bce7587fd89e59dffdecbc852bc70d9e14db4f14
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import traceback
25
26 if os.name == 'nt':
27     import ctypes
28
29 from .compat import (
30     compat_basestring,
31     compat_cookiejar,
32     compat_expanduser,
33     compat_get_terminal_size,
34     compat_http_client,
35     compat_kwargs,
36     compat_str,
37     compat_urllib_error,
38     compat_urllib_request,
39 )
40 from .utils import (
41     escape_url,
42     ContentTooShortError,
43     date_from_str,
44     DateRange,
45     DEFAULT_OUTTMPL,
46     determine_ext,
47     DownloadError,
48     encodeFilename,
49     ExtractorError,
50     format_bytes,
51     formatSeconds,
52     locked_file,
53     make_HTTPS_handler,
54     MaxDownloadsReached,
55     PagedList,
56     parse_filesize,
57     PerRequestProxyHandler,
58     PostProcessingError,
59     platform_name,
60     preferredencoding,
61     render_table,
62     SameFileError,
63     sanitize_filename,
64     sanitize_path,
65     std_headers,
66     subtitles_filename,
67     takewhile_inclusive,
68     UnavailableVideoError,
69     url_basename,
70     version_tuple,
71     write_json_file,
72     write_string,
73     YoutubeDLHandler,
74     prepend_extension,
75     args_to_str,
76     age_restricted,
77 )
78 from .cache import Cache
79 from .extractor import get_info_extractor, gen_extractors
80 from .downloader import get_suitable_downloader
81 from .downloader.rtmp import rtmpdump_version
82 from .postprocessor import (
83     FFmpegFixupM4aPP,
84     FFmpegFixupStretchedPP,
85     FFmpegMergerPP,
86     FFmpegPostProcessor,
87     get_postprocessor,
88 )
89 from .version import __version__
90
91
92 class YoutubeDL(object):
93     """YoutubeDL class.
94
95     YoutubeDL objects are the ones responsible of downloading the
96     actual video file and writing it to disk if the user has requested
97     it, among some other tasks. In most cases there should be one per
98     program. As, given a video URL, the downloader doesn't know how to
99     extract all the needed information, task that InfoExtractors do, it
100     has to pass the URL to one of them.
101
102     For this, YoutubeDL objects have a method that allows
103     InfoExtractors to be registered in a given order. When it is passed
104     a URL, the YoutubeDL object handles it to the first InfoExtractor it
105     finds that reports being able to handle it. The InfoExtractor extracts
106     all the information about the video or videos the URL refers to, and
107     YoutubeDL process the extracted information, possibly using a File
108     Downloader to download the video.
109
110     YoutubeDL objects accept a lot of parameters. In order not to saturate
111     the object constructor with arguments, it receives a dictionary of
112     options instead. These options are available through the params
113     attribute for the InfoExtractors to use. The YoutubeDL also
114     registers itself as the downloader in charge for the InfoExtractors
115     that are added to it, so this is a "mutual registration".
116
117     Available options:
118
119     username:          Username for authentication purposes.
120     password:          Password for authentication purposes.
121     videopassword:     Password for acces a video.
122     usenetrc:          Use netrc for authentication instead.
123     verbose:           Print additional info to stdout.
124     quiet:             Do not print messages to stdout.
125     no_warnings:       Do not print out anything for warnings.
126     forceurl:          Force printing final URL.
127     forcetitle:        Force printing title.
128     forceid:           Force printing ID.
129     forcethumbnail:    Force printing thumbnail URL.
130     forcedescription:  Force printing description.
131     forcefilename:     Force printing final filename.
132     forceduration:     Force printing duration.
133     forcejson:         Force printing info_dict as JSON.
134     dump_single_json:  Force printing the info_dict of the whole playlist
135                        (or video) as a single JSON line.
136     simulate:          Do not download the video files.
137     format:            Video format code. See options.py for more information.
138     format_limit:      Highest quality format to try.
139     outtmpl:           Template for output names.
140     restrictfilenames: Do not allow "&" and spaces in file names
141     ignoreerrors:      Do not stop on download errors.
142     nooverwrites:      Prevent overwriting files.
143     playliststart:     Playlist item to start at.
144     playlistend:       Playlist item to end at.
145     playlist_items:    Specific indices of playlist to download.
146     playlistreverse:   Download playlist items in reverse order.
147     matchtitle:        Download only matching titles.
148     rejecttitle:       Reject downloads for matching titles.
149     logger:            Log messages to a logging.Logger instance.
150     logtostderr:       Log messages to stderr instead of stdout.
151     writedescription:  Write the video description to a .description file
152     writeinfojson:     Write the video description to a .info.json file
153     writeannotations:  Write the video annotations to a .annotations.xml file
154     writethumbnail:    Write the thumbnail image to a file
155     write_all_thumbnails:  Write all thumbnail formats to files
156     writesubtitles:    Write the video subtitles to a file
157     writeautomaticsub: Write the automatic subtitles to a file
158     allsubtitles:      Downloads all the subtitles of the video
159                        (requires writesubtitles or writeautomaticsub)
160     listsubtitles:     Lists all available subtitles for the video
161     subtitlesformat:   The format code for subtitles
162     subtitleslangs:    List of languages of the subtitles to download
163     keepvideo:         Keep the video file after post-processing
164     daterange:         A DateRange object, download only if the upload_date is in the range.
165     skip_download:     Skip the actual download of the video file
166     cachedir:          Location of the cache files in the filesystem.
167                        False to disable filesystem cache.
168     noplaylist:        Download single video instead of a playlist if in doubt.
169     age_limit:         An integer representing the user's age in years.
170                        Unsuitable videos for the given age are skipped.
171     min_views:         An integer representing the minimum view count the video
172                        must have in order to not be skipped.
173                        Videos without view count information are always
174                        downloaded. None for no limit.
175     max_views:         An integer representing the maximum view count.
176                        Videos that are more popular than that are not
177                        downloaded.
178                        Videos without view count information are always
179                        downloaded. None for no limit.
180     download_archive:  File name of a file where all downloads are recorded.
181                        Videos already present in the file are not downloaded
182                        again.
183     cookiefile:        File name where cookies should be read from and dumped to.
184     nocheckcertificate:Do not verify SSL certificates
185     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
186                        At the moment, this is only supported by YouTube.
187     proxy:             URL of the proxy server to use
188     cn_verification_proxy:  URL of the proxy to use for IP address verification
189                        on Chinese sites. (Experimental)
190     socket_timeout:    Time to wait for unresponsive hosts, in seconds
191     bidi_workaround:   Work around buggy terminals without bidirectional text
192                        support, using fridibi
193     debug_printtraffic:Print out sent and received HTTP traffic
194     include_ads:       Download ads as well
195     default_search:    Prepend this string if an input url is not valid.
196                        'auto' for elaborate guessing
197     encoding:          Use this encoding instead of the system-specified.
198     extract_flat:      Do not resolve URLs, return the immediate result.
199                        Pass in 'in_playlist' to only show this behavior for
200                        playlist items.
201     postprocessors:    A list of dictionaries, each with an entry
202                        * key:  The name of the postprocessor. See
203                                youtube_dl/postprocessor/__init__.py for a list.
204                        as well as any further keyword arguments for the
205                        postprocessor.
206     progress_hooks:    A list of functions that get called on download
207                        progress, with a dictionary with the entries
208                        * status: One of "downloading", "error", or "finished".
209                                  Check this first and ignore unknown values.
210
211                        If status is one of "downloading", or "finished", the
212                        following properties may also be present:
213                        * filename: The final filename (always present)
214                        * tmpfilename: The filename we're currently writing to
215                        * downloaded_bytes: Bytes on disk
216                        * total_bytes: Size of the whole file, None if unknown
217                        * total_bytes_estimate: Guess of the eventual file size,
218                                                None if unavailable.
219                        * elapsed: The number of seconds since download started.
220                        * eta: The estimated time in seconds, None if unknown
221                        * speed: The download speed in bytes/second, None if
222                                 unknown
223                        * fragment_index: The counter of the currently
224                                          downloaded video fragment.
225                        * fragment_count: The number of fragments (= individual
226                                          files that will be merged)
227
228                        Progress hooks are guaranteed to be called at least once
229                        (with status "finished") if the download is successful.
230     merge_output_format: Extension to use when merging formats.
231     fixup:             Automatically correct known faults of the file.
232                        One of:
233                        - "never": do nothing
234                        - "warn": only emit a warning
235                        - "detect_or_warn": check whether we can do anything
236                                            about it, warn otherwise (default)
237     source_address:    (Experimental) Client-side IP address to bind to.
238     call_home:         Boolean, true iff we are allowed to contact the
239                        youtube-dl servers for debugging.
240     sleep_interval:    Number of seconds to sleep before each download.
241     listformats:       Print an overview of available video formats and exit.
242     list_thumbnails:   Print a table of all thumbnails and exit.
243     match_filter:      A function that gets called with the info_dict of
244                        every video.
245                        If it returns a message, the video is ignored.
246                        If it returns None, the video is downloaded.
247                        match_filter_func in utils.py is one example for this.
248     no_color:          Do not emit color codes in output.
249
250     The following options determine which downloader is picked:
251     external_downloader: Executable of the external downloader to call.
252                        None or unset for standard (built-in) downloader.
253     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
254
255     The following parameters are not used by YoutubeDL itself, they are used by
256     the downloader (see youtube_dl/downloader/common.py):
257     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
258     noresizebuffer, retries, continuedl, noprogress, consoletitle,
259     xattr_set_filesize, external_downloader_args.
260
261     The following options are used by the post processors:
262     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
263                        otherwise prefer avconv.
264     exec_cmd:          Arbitrary command to run after downloading
265     """
266
267     params = None
268     _ies = []
269     _pps = []
270     _download_retcode = None
271     _num_downloads = None
272     _screen_file = None
273
274     def __init__(self, params=None, auto_init=True):
275         """Create a FileDownloader object with the given options."""
276         if params is None:
277             params = {}
278         self._ies = []
279         self._ies_instances = {}
280         self._pps = []
281         self._progress_hooks = []
282         self._download_retcode = 0
283         self._num_downloads = 0
284         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
285         self._err_file = sys.stderr
286         self.params = params
287         self.cache = Cache(self)
288
289         if params.get('bidi_workaround', False):
290             try:
291                 import pty
292                 master, slave = pty.openpty()
293                 width = compat_get_terminal_size().columns
294                 if width is None:
295                     width_args = []
296                 else:
297                     width_args = ['-w', str(width)]
298                 sp_kwargs = dict(
299                     stdin=subprocess.PIPE,
300                     stdout=slave,
301                     stderr=self._err_file)
302                 try:
303                     self._output_process = subprocess.Popen(
304                         ['bidiv'] + width_args, **sp_kwargs
305                     )
306                 except OSError:
307                     self._output_process = subprocess.Popen(
308                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
309                 self._output_channel = os.fdopen(master, 'rb')
310             except OSError as ose:
311                 if ose.errno == 2:
312                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
313                 else:
314                     raise
315
316         if (sys.version_info >= (3,) and sys.platform != 'win32' and
317                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
318                 not params.get('restrictfilenames', False)):
319             # On Python 3, the Unicode filesystem API will throw errors (#1474)
320             self.report_warning(
321                 'Assuming --restrict-filenames since file system encoding '
322                 'cannot encode all characters. '
323                 'Set the LC_ALL environment variable to fix this.')
324             self.params['restrictfilenames'] = True
325
326         if '%(stitle)s' in self.params.get('outtmpl', ''):
327             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
328
329         self._setup_opener()
330
331         if auto_init:
332             self.print_debug_header()
333             self.add_default_info_extractors()
334
335         for pp_def_raw in self.params.get('postprocessors', []):
336             pp_class = get_postprocessor(pp_def_raw['key'])
337             pp_def = dict(pp_def_raw)
338             del pp_def['key']
339             pp = pp_class(self, **compat_kwargs(pp_def))
340             self.add_post_processor(pp)
341
342         for ph in self.params.get('progress_hooks', []):
343             self.add_progress_hook(ph)
344
345     def warn_if_short_id(self, argv):
346         # short YouTube ID starting with dash?
347         idxs = [
348             i for i, a in enumerate(argv)
349             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
350         if idxs:
351             correct_argv = (
352                 ['youtube-dl'] +
353                 [a for i, a in enumerate(argv) if i not in idxs] +
354                 ['--'] + [argv[i] for i in idxs]
355             )
356             self.report_warning(
357                 'Long argument string detected. '
358                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
359                 args_to_str(correct_argv))
360
361     def add_info_extractor(self, ie):
362         """Add an InfoExtractor object to the end of the list."""
363         self._ies.append(ie)
364         self._ies_instances[ie.ie_key()] = ie
365         ie.set_downloader(self)
366
367     def get_info_extractor(self, ie_key):
368         """
369         Get an instance of an IE with name ie_key, it will try to get one from
370         the _ies list, if there's no instance it will create a new one and add
371         it to the extractor list.
372         """
373         ie = self._ies_instances.get(ie_key)
374         if ie is None:
375             ie = get_info_extractor(ie_key)()
376             self.add_info_extractor(ie)
377         return ie
378
379     def add_default_info_extractors(self):
380         """
381         Add the InfoExtractors returned by gen_extractors to the end of the list
382         """
383         for ie in gen_extractors():
384             self.add_info_extractor(ie)
385
386     def add_post_processor(self, pp):
387         """Add a PostProcessor object to the end of the chain."""
388         self._pps.append(pp)
389         pp.set_downloader(self)
390
391     def add_progress_hook(self, ph):
392         """Add the progress hook (currently only for the file downloader)"""
393         self._progress_hooks.append(ph)
394
395     def _bidi_workaround(self, message):
396         if not hasattr(self, '_output_channel'):
397             return message
398
399         assert hasattr(self, '_output_process')
400         assert isinstance(message, compat_str)
401         line_count = message.count('\n') + 1
402         self._output_process.stdin.write((message + '\n').encode('utf-8'))
403         self._output_process.stdin.flush()
404         res = ''.join(self._output_channel.readline().decode('utf-8')
405                       for _ in range(line_count))
406         return res[:-len('\n')]
407
408     def to_screen(self, message, skip_eol=False):
409         """Print message to stdout if not in quiet mode."""
410         return self.to_stdout(message, skip_eol, check_quiet=True)
411
412     def _write_string(self, s, out=None):
413         write_string(s, out=out, encoding=self.params.get('encoding'))
414
415     def to_stdout(self, message, skip_eol=False, check_quiet=False):
416         """Print message to stdout if not in quiet mode."""
417         if self.params.get('logger'):
418             self.params['logger'].debug(message)
419         elif not check_quiet or not self.params.get('quiet', False):
420             message = self._bidi_workaround(message)
421             terminator = ['\n', ''][skip_eol]
422             output = message + terminator
423
424             self._write_string(output, self._screen_file)
425
426     def to_stderr(self, message):
427         """Print message to stderr."""
428         assert isinstance(message, compat_str)
429         if self.params.get('logger'):
430             self.params['logger'].error(message)
431         else:
432             message = self._bidi_workaround(message)
433             output = message + '\n'
434             self._write_string(output, self._err_file)
435
436     def to_console_title(self, message):
437         if not self.params.get('consoletitle', False):
438             return
439         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
440             # c_wchar_p() might not be necessary if `message` is
441             # already of type unicode()
442             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
443         elif 'TERM' in os.environ:
444             self._write_string('\033]0;%s\007' % message, self._screen_file)
445
446     def save_console_title(self):
447         if not self.params.get('consoletitle', False):
448             return
449         if 'TERM' in os.environ:
450             # Save the title on stack
451             self._write_string('\033[22;0t', self._screen_file)
452
453     def restore_console_title(self):
454         if not self.params.get('consoletitle', False):
455             return
456         if 'TERM' in os.environ:
457             # Restore the title from stack
458             self._write_string('\033[23;0t', self._screen_file)
459
460     def __enter__(self):
461         self.save_console_title()
462         return self
463
464     def __exit__(self, *args):
465         self.restore_console_title()
466
467         if self.params.get('cookiefile') is not None:
468             self.cookiejar.save()
469
470     def trouble(self, message=None, tb=None):
471         """Determine action to take when a download problem appears.
472
473         Depending on if the downloader has been configured to ignore
474         download errors or not, this method may throw an exception or
475         not when errors are found, after printing the message.
476
477         tb, if given, is additional traceback information.
478         """
479         if message is not None:
480             self.to_stderr(message)
481         if self.params.get('verbose'):
482             if tb is None:
483                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
484                     tb = ''
485                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
486                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
487                     tb += compat_str(traceback.format_exc())
488                 else:
489                     tb_data = traceback.format_list(traceback.extract_stack())
490                     tb = ''.join(tb_data)
491             self.to_stderr(tb)
492         if not self.params.get('ignoreerrors', False):
493             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
494                 exc_info = sys.exc_info()[1].exc_info
495             else:
496                 exc_info = sys.exc_info()
497             raise DownloadError(message, exc_info)
498         self._download_retcode = 1
499
500     def report_warning(self, message):
501         '''
502         Print the message to stderr, it will be prefixed with 'WARNING:'
503         If stderr is a tty file the 'WARNING:' will be colored
504         '''
505         if self.params.get('logger') is not None:
506             self.params['logger'].warning(message)
507         else:
508             if self.params.get('no_warnings'):
509                 return
510             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
511                 _msg_header = '\033[0;33mWARNING:\033[0m'
512             else:
513                 _msg_header = 'WARNING:'
514             warning_message = '%s %s' % (_msg_header, message)
515             self.to_stderr(warning_message)
516
517     def report_error(self, message, tb=None):
518         '''
519         Do the same as trouble, but prefixes the message with 'ERROR:', colored
520         in red if stderr is a tty file.
521         '''
522         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
523             _msg_header = '\033[0;31mERROR:\033[0m'
524         else:
525             _msg_header = 'ERROR:'
526         error_message = '%s %s' % (_msg_header, message)
527         self.trouble(error_message, tb)
528
529     def report_file_already_downloaded(self, file_name):
530         """Report file has already been fully downloaded."""
531         try:
532             self.to_screen('[download] %s has already been downloaded' % file_name)
533         except UnicodeEncodeError:
534             self.to_screen('[download] The file has already been downloaded')
535
536     def prepare_filename(self, info_dict):
537         """Generate the output filename."""
538         try:
539             template_dict = dict(info_dict)
540
541             template_dict['epoch'] = int(time.time())
542             autonumber_size = self.params.get('autonumber_size')
543             if autonumber_size is None:
544                 autonumber_size = 5
545             autonumber_templ = '%0' + str(autonumber_size) + 'd'
546             template_dict['autonumber'] = autonumber_templ % self._num_downloads
547             if template_dict.get('playlist_index') is not None:
548                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
549             if template_dict.get('resolution') is None:
550                 if template_dict.get('width') and template_dict.get('height'):
551                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
552                 elif template_dict.get('height'):
553                     template_dict['resolution'] = '%sp' % template_dict['height']
554                 elif template_dict.get('width'):
555                     template_dict['resolution'] = '?x%d' % template_dict['width']
556
557             sanitize = lambda k, v: sanitize_filename(
558                 compat_str(v),
559                 restricted=self.params.get('restrictfilenames'),
560                 is_id=(k == 'id'))
561             template_dict = dict((k, sanitize(k, v))
562                                  for k, v in template_dict.items()
563                                  if v is not None)
564             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
565
566             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
567             tmpl = compat_expanduser(outtmpl)
568             filename = tmpl % template_dict
569             # Temporary fix for #4787
570             # 'Treat' all problem characters by passing filename through preferredencoding
571             # to workaround encoding issues with subprocess on python2 @ Windows
572             if sys.version_info < (3, 0) and sys.platform == 'win32':
573                 filename = encodeFilename(filename, True).decode(preferredencoding())
574             return filename
575         except ValueError as err:
576             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
577             return None
578
579     def _match_entry(self, info_dict, incomplete):
580         """ Returns None iff the file should be downloaded """
581
582         video_title = info_dict.get('title', info_dict.get('id', 'video'))
583         if 'title' in info_dict:
584             # This can happen when we're just evaluating the playlist
585             title = info_dict['title']
586             matchtitle = self.params.get('matchtitle', False)
587             if matchtitle:
588                 if not re.search(matchtitle, title, re.IGNORECASE):
589                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
590             rejecttitle = self.params.get('rejecttitle', False)
591             if rejecttitle:
592                 if re.search(rejecttitle, title, re.IGNORECASE):
593                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
594         date = info_dict.get('upload_date', None)
595         if date is not None:
596             dateRange = self.params.get('daterange', DateRange())
597             if date not in dateRange:
598                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
599         view_count = info_dict.get('view_count', None)
600         if view_count is not None:
601             min_views = self.params.get('min_views')
602             if min_views is not None and view_count < min_views:
603                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
604             max_views = self.params.get('max_views')
605             if max_views is not None and view_count > max_views:
606                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
607         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
608             return 'Skipping "%s" because it is age restricted' % video_title
609         if self.in_download_archive(info_dict):
610             return '%s has already been recorded in archive' % video_title
611
612         if not incomplete:
613             match_filter = self.params.get('match_filter')
614             if match_filter is not None:
615                 ret = match_filter(info_dict)
616                 if ret is not None:
617                     return ret
618
619         return None
620
621     @staticmethod
622     def add_extra_info(info_dict, extra_info):
623         '''Set the keys from extra_info in info dict if they are missing'''
624         for key, value in extra_info.items():
625             info_dict.setdefault(key, value)
626
627     def extract_info(self, url, download=True, ie_key=None, extra_info={},
628                      process=True):
629         '''
630         Returns a list with a dictionary for each video we find.
631         If 'download', also downloads the videos.
632         extra_info is a dict containing the extra values to add to each result
633          '''
634
635         if ie_key:
636             ies = [self.get_info_extractor(ie_key)]
637         else:
638             ies = self._ies
639
640         for ie in ies:
641             if not ie.suitable(url):
642                 continue
643
644             if not ie.working():
645                 self.report_warning('The program functionality for this site has been marked as broken, '
646                                     'and will probably not work.')
647
648             try:
649                 ie_result = ie.extract(url)
650                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
651                     break
652                 if isinstance(ie_result, list):
653                     # Backwards compatibility: old IE result format
654                     ie_result = {
655                         '_type': 'compat_list',
656                         'entries': ie_result,
657                     }
658                 self.add_default_extra_info(ie_result, ie, url)
659                 if process:
660                     return self.process_ie_result(ie_result, download, extra_info)
661                 else:
662                     return ie_result
663             except ExtractorError as de:  # An error we somewhat expected
664                 self.report_error(compat_str(de), de.format_traceback())
665                 break
666             except MaxDownloadsReached:
667                 raise
668             except Exception as e:
669                 if self.params.get('ignoreerrors', False):
670                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
671                     break
672                 else:
673                     raise
674         else:
675             self.report_error('no suitable InfoExtractor for URL %s' % url)
676
677     def add_default_extra_info(self, ie_result, ie, url):
678         self.add_extra_info(ie_result, {
679             'extractor': ie.IE_NAME,
680             'webpage_url': url,
681             'webpage_url_basename': url_basename(url),
682             'extractor_key': ie.ie_key(),
683         })
684
685     def process_ie_result(self, ie_result, download=True, extra_info={}):
686         """
687         Take the result of the ie(may be modified) and resolve all unresolved
688         references (URLs, playlist items).
689
690         It will also download the videos if 'download'.
691         Returns the resolved ie_result.
692         """
693
694         result_type = ie_result.get('_type', 'video')
695
696         if result_type in ('url', 'url_transparent'):
697             extract_flat = self.params.get('extract_flat', False)
698             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
699                     extract_flat is True):
700                 if self.params.get('forcejson', False):
701                     self.to_stdout(json.dumps(ie_result))
702                 return ie_result
703
704         if result_type == 'video':
705             self.add_extra_info(ie_result, extra_info)
706             return self.process_video_result(ie_result, download=download)
707         elif result_type == 'url':
708             # We have to add extra_info to the results because it may be
709             # contained in a playlist
710             return self.extract_info(ie_result['url'],
711                                      download,
712                                      ie_key=ie_result.get('ie_key'),
713                                      extra_info=extra_info)
714         elif result_type == 'url_transparent':
715             # Use the information from the embedding page
716             info = self.extract_info(
717                 ie_result['url'], ie_key=ie_result.get('ie_key'),
718                 extra_info=extra_info, download=False, process=False)
719
720             force_properties = dict(
721                 (k, v) for k, v in ie_result.items() if v is not None)
722             for f in ('_type', 'url'):
723                 if f in force_properties:
724                     del force_properties[f]
725             new_result = info.copy()
726             new_result.update(force_properties)
727
728             assert new_result.get('_type') != 'url_transparent'
729
730             return self.process_ie_result(
731                 new_result, download=download, extra_info=extra_info)
732         elif result_type == 'playlist' or result_type == 'multi_video':
733             # We process each entry in the playlist
734             playlist = ie_result.get('title', None) or ie_result.get('id', None)
735             self.to_screen('[download] Downloading playlist: %s' % playlist)
736
737             playlist_results = []
738
739             playliststart = self.params.get('playliststart', 1) - 1
740             playlistend = self.params.get('playlistend', None)
741             # For backwards compatibility, interpret -1 as whole list
742             if playlistend == -1:
743                 playlistend = None
744
745             playlistitems_str = self.params.get('playlist_items', None)
746             playlistitems = None
747             if playlistitems_str is not None:
748                 def iter_playlistitems(format):
749                     for string_segment in format.split(','):
750                         if '-' in string_segment:
751                             start, end = string_segment.split('-')
752                             for item in range(int(start), int(end) + 1):
753                                 yield int(item)
754                         else:
755                             yield int(string_segment)
756                 playlistitems = iter_playlistitems(playlistitems_str)
757
758             ie_entries = ie_result['entries']
759             if isinstance(ie_entries, list):
760                 n_all_entries = len(ie_entries)
761                 if playlistitems:
762                     entries = [ie_entries[i - 1] for i in playlistitems]
763                 else:
764                     entries = ie_entries[playliststart:playlistend]
765                 n_entries = len(entries)
766                 self.to_screen(
767                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
768                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
769             elif isinstance(ie_entries, PagedList):
770                 if playlistitems:
771                     entries = []
772                     for item in playlistitems:
773                         entries.extend(ie_entries.getslice(
774                             item - 1, item
775                         ))
776                 else:
777                     entries = ie_entries.getslice(
778                         playliststart, playlistend)
779                 n_entries = len(entries)
780                 self.to_screen(
781                     "[%s] playlist %s: Downloading %d videos" %
782                     (ie_result['extractor'], playlist, n_entries))
783             else:  # iterable
784                 if playlistitems:
785                     entry_list = list(ie_entries)
786                     entries = [entry_list[i - 1] for i in playlistitems]
787                 else:
788                     entries = list(itertools.islice(
789                         ie_entries, playliststart, playlistend))
790                 n_entries = len(entries)
791                 self.to_screen(
792                     "[%s] playlist %s: Downloading %d videos" %
793                     (ie_result['extractor'], playlist, n_entries))
794
795             if self.params.get('playlistreverse', False):
796                 entries = entries[::-1]
797
798             for i, entry in enumerate(entries, 1):
799                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
800                 extra = {
801                     'n_entries': n_entries,
802                     'playlist': playlist,
803                     'playlist_id': ie_result.get('id'),
804                     'playlist_title': ie_result.get('title'),
805                     'playlist_index': i + playliststart,
806                     'extractor': ie_result['extractor'],
807                     'webpage_url': ie_result['webpage_url'],
808                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
809                     'extractor_key': ie_result['extractor_key'],
810                 }
811
812                 reason = self._match_entry(entry, incomplete=True)
813                 if reason is not None:
814                     self.to_screen('[download] ' + reason)
815                     continue
816
817                 entry_result = self.process_ie_result(entry,
818                                                       download=download,
819                                                       extra_info=extra)
820                 playlist_results.append(entry_result)
821             ie_result['entries'] = playlist_results
822             return ie_result
823         elif result_type == 'compat_list':
824             self.report_warning(
825                 'Extractor %s returned a compat_list result. '
826                 'It needs to be updated.' % ie_result.get('extractor'))
827
828             def _fixup(r):
829                 self.add_extra_info(
830                     r,
831                     {
832                         'extractor': ie_result['extractor'],
833                         'webpage_url': ie_result['webpage_url'],
834                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
835                         'extractor_key': ie_result['extractor_key'],
836                     }
837                 )
838                 return r
839             ie_result['entries'] = [
840                 self.process_ie_result(_fixup(r), download, extra_info)
841                 for r in ie_result['entries']
842             ]
843             return ie_result
844         else:
845             raise Exception('Invalid result type: %s' % result_type)
846
847     def _apply_format_filter(self, format_spec, available_formats):
848         " Returns a tuple of the remaining format_spec and filtered formats "
849
850         OPERATORS = {
851             '<': operator.lt,
852             '<=': operator.le,
853             '>': operator.gt,
854             '>=': operator.ge,
855             '=': operator.eq,
856             '!=': operator.ne,
857         }
858         operator_rex = re.compile(r'''(?x)\s*\[
859             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
860             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
861             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
862             \]$
863             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
864         m = operator_rex.search(format_spec)
865         if m:
866             try:
867                 comparison_value = int(m.group('value'))
868             except ValueError:
869                 comparison_value = parse_filesize(m.group('value'))
870                 if comparison_value is None:
871                     comparison_value = parse_filesize(m.group('value') + 'B')
872                 if comparison_value is None:
873                     raise ValueError(
874                         'Invalid value %r in format specification %r' % (
875                             m.group('value'), format_spec))
876             op = OPERATORS[m.group('op')]
877
878         if not m:
879             STR_OPERATORS = {
880                 '=': operator.eq,
881                 '!=': operator.ne,
882             }
883             str_operator_rex = re.compile(r'''(?x)\s*\[
884                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
885                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
886                 \s*(?P<value>[a-zA-Z0-9_-]+)
887                 \s*\]$
888                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
889             m = str_operator_rex.search(format_spec)
890             if m:
891                 comparison_value = m.group('value')
892                 op = STR_OPERATORS[m.group('op')]
893
894         if not m:
895             raise ValueError('Invalid format specification %r' % format_spec)
896
897         def _filter(f):
898             actual_value = f.get(m.group('key'))
899             if actual_value is None:
900                 return m.group('none_inclusive')
901             return op(actual_value, comparison_value)
902         new_formats = [f for f in available_formats if _filter(f)]
903
904         new_format_spec = format_spec[:-len(m.group(0))]
905         if not new_format_spec:
906             new_format_spec = 'best'
907
908         return (new_format_spec, new_formats)
909
910     def select_format(self, format_spec, available_formats):
911         while format_spec.endswith(']'):
912             format_spec, available_formats = self._apply_format_filter(
913                 format_spec, available_formats)
914         if not available_formats:
915             return None
916
917         if format_spec == 'best' or format_spec is None:
918             return available_formats[-1]
919         elif format_spec == 'worst':
920             return available_formats[0]
921         elif format_spec == 'bestaudio':
922             audio_formats = [
923                 f for f in available_formats
924                 if f.get('vcodec') == 'none']
925             if audio_formats:
926                 return audio_formats[-1]
927         elif format_spec == 'worstaudio':
928             audio_formats = [
929                 f for f in available_formats
930                 if f.get('vcodec') == 'none']
931             if audio_formats:
932                 return audio_formats[0]
933         elif format_spec == 'bestvideo':
934             video_formats = [
935                 f for f in available_formats
936                 if f.get('acodec') == 'none']
937             if video_formats:
938                 return video_formats[-1]
939         elif format_spec == 'worstvideo':
940             video_formats = [
941                 f for f in available_formats
942                 if f.get('acodec') == 'none']
943             if video_formats:
944                 return video_formats[0]
945         else:
946             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
947             if format_spec in extensions:
948                 filter_f = lambda f: f['ext'] == format_spec
949             else:
950                 filter_f = lambda f: f['format_id'] == format_spec
951             matches = list(filter(filter_f, available_formats))
952             if matches:
953                 return matches[-1]
954         return None
955
956     def _calc_headers(self, info_dict):
957         res = std_headers.copy()
958
959         add_headers = info_dict.get('http_headers')
960         if add_headers:
961             res.update(add_headers)
962
963         cookies = self._calc_cookies(info_dict)
964         if cookies:
965             res['Cookie'] = cookies
966
967         return res
968
969     def _calc_cookies(self, info_dict):
970         pr = compat_urllib_request.Request(info_dict['url'])
971         self.cookiejar.add_cookie_header(pr)
972         return pr.get_header('Cookie')
973
974     def process_video_result(self, info_dict, download=True):
975         assert info_dict.get('_type', 'video') == 'video'
976
977         if 'id' not in info_dict:
978             raise ExtractorError('Missing "id" field in extractor result')
979         if 'title' not in info_dict:
980             raise ExtractorError('Missing "title" field in extractor result')
981
982         if 'playlist' not in info_dict:
983             # It isn't part of a playlist
984             info_dict['playlist'] = None
985             info_dict['playlist_index'] = None
986
987         thumbnails = info_dict.get('thumbnails')
988         if thumbnails is None:
989             thumbnail = info_dict.get('thumbnail')
990             if thumbnail:
991                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
992         if thumbnails:
993             thumbnails.sort(key=lambda t: (
994                 t.get('preference'), t.get('width'), t.get('height'),
995                 t.get('id'), t.get('url')))
996             for i, t in enumerate(thumbnails):
997                 if 'width' in t and 'height' in t:
998                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
999                 if t.get('id') is None:
1000                     t['id'] = '%d' % i
1001
1002         if thumbnails and 'thumbnail' not in info_dict:
1003             info_dict['thumbnail'] = thumbnails[-1]['url']
1004
1005         if 'display_id' not in info_dict and 'id' in info_dict:
1006             info_dict['display_id'] = info_dict['id']
1007
1008         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1009             # Working around negative timestamps in Windows
1010             # (see http://bugs.python.org/issue1646728)
1011             if info_dict['timestamp'] < 0 and os.name == 'nt':
1012                 info_dict['timestamp'] = 0
1013             upload_date = datetime.datetime.utcfromtimestamp(
1014                 info_dict['timestamp'])
1015             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1016
1017         if self.params.get('listsubtitles', False):
1018             if 'automatic_captions' in info_dict:
1019                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1020             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1021             return
1022         info_dict['requested_subtitles'] = self.process_subtitles(
1023             info_dict['id'], info_dict.get('subtitles'),
1024             info_dict.get('automatic_captions'))
1025
1026         # This extractors handle format selection themselves
1027         if info_dict['extractor'] in ['Youku']:
1028             if download:
1029                 self.process_info(info_dict)
1030             return info_dict
1031
1032         # We now pick which formats have to be downloaded
1033         if info_dict.get('formats') is None:
1034             # There's only one format available
1035             formats = [info_dict]
1036         else:
1037             formats = info_dict['formats']
1038
1039         if not formats:
1040             raise ExtractorError('No video formats found!')
1041
1042         # We check that all the formats have the format and format_id fields
1043         for i, format in enumerate(formats):
1044             if 'url' not in format:
1045                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1046
1047             if format.get('format_id') is None:
1048                 format['format_id'] = compat_str(i)
1049             if format.get('format') is None:
1050                 format['format'] = '{id} - {res}{note}'.format(
1051                     id=format['format_id'],
1052                     res=self.format_resolution(format),
1053                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1054                 )
1055             # Automatically determine file extension if missing
1056             if 'ext' not in format:
1057                 format['ext'] = determine_ext(format['url']).lower()
1058             # Add HTTP headers, so that external programs can use them from the
1059             # json output
1060             full_format_info = info_dict.copy()
1061             full_format_info.update(format)
1062             format['http_headers'] = self._calc_headers(full_format_info)
1063
1064         format_limit = self.params.get('format_limit', None)
1065         if format_limit:
1066             formats = list(takewhile_inclusive(
1067                 lambda f: f['format_id'] != format_limit, formats
1068             ))
1069
1070         # TODO Central sorting goes here
1071
1072         if formats[0] is not info_dict:
1073             # only set the 'formats' fields if the original info_dict list them
1074             # otherwise we end up with a circular reference, the first (and unique)
1075             # element in the 'formats' field in info_dict is info_dict itself,
1076             # wich can't be exported to json
1077             info_dict['formats'] = formats
1078         if self.params.get('listformats'):
1079             self.list_formats(info_dict)
1080             return
1081         if self.params.get('list_thumbnails'):
1082             self.list_thumbnails(info_dict)
1083             return
1084
1085         req_format = self.params.get('format')
1086         if req_format is None:
1087             req_format = 'best'
1088         formats_to_download = []
1089         # The -1 is for supporting YoutubeIE
1090         if req_format in ('-1', 'all'):
1091             formats_to_download = formats
1092         else:
1093             for rfstr in req_format.split(','):
1094                 # We can accept formats requested in the format: 34/5/best, we pick
1095                 # the first that is available, starting from left
1096                 req_formats = rfstr.split('/')
1097                 for rf in req_formats:
1098                     if re.match(r'.+?\+.+?', rf) is not None:
1099                         # Two formats have been requested like '137+139'
1100                         format_1, format_2 = rf.split('+')
1101                         formats_info = (self.select_format(format_1, formats),
1102                                         self.select_format(format_2, formats))
1103                         if all(formats_info):
1104                             # The first format must contain the video and the
1105                             # second the audio
1106                             if formats_info[0].get('vcodec') == 'none':
1107                                 self.report_error('The first format must '
1108                                                   'contain the video, try using '
1109                                                   '"-f %s+%s"' % (format_2, format_1))
1110                                 return
1111                             output_ext = (
1112                                 formats_info[0]['ext']
1113                                 if self.params.get('merge_output_format') is None
1114                                 else self.params['merge_output_format'])
1115                             selected_format = {
1116                                 'requested_formats': formats_info,
1117                                 'format': '%s+%s' % (formats_info[0].get('format'),
1118                                                      formats_info[1].get('format')),
1119                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1120                                                         formats_info[1].get('format_id')),
1121                                 'width': formats_info[0].get('width'),
1122                                 'height': formats_info[0].get('height'),
1123                                 'resolution': formats_info[0].get('resolution'),
1124                                 'fps': formats_info[0].get('fps'),
1125                                 'vcodec': formats_info[0].get('vcodec'),
1126                                 'vbr': formats_info[0].get('vbr'),
1127                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1128                                 'acodec': formats_info[1].get('acodec'),
1129                                 'abr': formats_info[1].get('abr'),
1130                                 'ext': output_ext,
1131                             }
1132                         else:
1133                             selected_format = None
1134                     else:
1135                         selected_format = self.select_format(rf, formats)
1136                     if selected_format is not None:
1137                         formats_to_download.append(selected_format)
1138                         break
1139         if not formats_to_download:
1140             raise ExtractorError('requested format not available',
1141                                  expected=True)
1142
1143         if download:
1144             if len(formats_to_download) > 1:
1145                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1146             for format in formats_to_download:
1147                 new_info = dict(info_dict)
1148                 new_info.update(format)
1149                 self.process_info(new_info)
1150         # We update the info dict with the best quality format (backwards compatibility)
1151         info_dict.update(formats_to_download[-1])
1152         return info_dict
1153
1154     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1155         """Select the requested subtitles and their format"""
1156         available_subs = {}
1157         if normal_subtitles and self.params.get('writesubtitles'):
1158             available_subs.update(normal_subtitles)
1159         if automatic_captions and self.params.get('writeautomaticsub'):
1160             for lang, cap_info in automatic_captions.items():
1161                 if lang not in available_subs:
1162                     available_subs[lang] = cap_info
1163
1164         if (not self.params.get('writesubtitles') and not
1165                 self.params.get('writeautomaticsub') or not
1166                 available_subs):
1167             return None
1168
1169         if self.params.get('allsubtitles', False):
1170             requested_langs = available_subs.keys()
1171         else:
1172             if self.params.get('subtitleslangs', False):
1173                 requested_langs = self.params.get('subtitleslangs')
1174             elif 'en' in available_subs:
1175                 requested_langs = ['en']
1176             else:
1177                 requested_langs = [list(available_subs.keys())[0]]
1178
1179         formats_query = self.params.get('subtitlesformat', 'best')
1180         formats_preference = formats_query.split('/') if formats_query else []
1181         subs = {}
1182         for lang in requested_langs:
1183             formats = available_subs.get(lang)
1184             if formats is None:
1185                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1186                 continue
1187             for ext in formats_preference:
1188                 if ext == 'best':
1189                     f = formats[-1]
1190                     break
1191                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1192                 if matches:
1193                     f = matches[-1]
1194                     break
1195             else:
1196                 f = formats[-1]
1197                 self.report_warning(
1198                     'No subtitle format found matching "%s" for language %s, '
1199                     'using %s' % (formats_query, lang, f['ext']))
1200             subs[lang] = f
1201         return subs
1202
1203     def process_info(self, info_dict):
1204         """Process a single resolved IE result."""
1205
1206         assert info_dict.get('_type', 'video') == 'video'
1207
1208         max_downloads = self.params.get('max_downloads')
1209         if max_downloads is not None:
1210             if self._num_downloads >= int(max_downloads):
1211                 raise MaxDownloadsReached()
1212
1213         info_dict['fulltitle'] = info_dict['title']
1214         if len(info_dict['title']) > 200:
1215             info_dict['title'] = info_dict['title'][:197] + '...'
1216
1217         # Keep for backwards compatibility
1218         info_dict['stitle'] = info_dict['title']
1219
1220         if 'format' not in info_dict:
1221             info_dict['format'] = info_dict['ext']
1222
1223         reason = self._match_entry(info_dict, incomplete=False)
1224         if reason is not None:
1225             self.to_screen('[download] ' + reason)
1226             return
1227
1228         self._num_downloads += 1
1229
1230         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1231
1232         # Forced printings
1233         if self.params.get('forcetitle', False):
1234             self.to_stdout(info_dict['fulltitle'])
1235         if self.params.get('forceid', False):
1236             self.to_stdout(info_dict['id'])
1237         if self.params.get('forceurl', False):
1238             if info_dict.get('requested_formats') is not None:
1239                 for f in info_dict['requested_formats']:
1240                     self.to_stdout(f['url'] + f.get('play_path', ''))
1241             else:
1242                 # For RTMP URLs, also include the playpath
1243                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1244         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1245             self.to_stdout(info_dict['thumbnail'])
1246         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1247             self.to_stdout(info_dict['description'])
1248         if self.params.get('forcefilename', False) and filename is not None:
1249             self.to_stdout(filename)
1250         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1251             self.to_stdout(formatSeconds(info_dict['duration']))
1252         if self.params.get('forceformat', False):
1253             self.to_stdout(info_dict['format'])
1254         if self.params.get('forcejson', False):
1255             self.to_stdout(json.dumps(info_dict))
1256
1257         # Do nothing else if in simulate mode
1258         if self.params.get('simulate', False):
1259             return
1260
1261         if filename is None:
1262             return
1263
1264         try:
1265             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1266             if dn and not os.path.exists(dn):
1267                 os.makedirs(dn)
1268         except (OSError, IOError) as err:
1269             self.report_error('unable to create directory ' + compat_str(err))
1270             return
1271
1272         if self.params.get('writedescription', False):
1273             descfn = filename + '.description'
1274             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1275                 self.to_screen('[info] Video description is already present')
1276             elif info_dict.get('description') is None:
1277                 self.report_warning('There\'s no description to write.')
1278             else:
1279                 try:
1280                     self.to_screen('[info] Writing video description to: ' + descfn)
1281                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1282                         descfile.write(info_dict['description'])
1283                 except (OSError, IOError):
1284                     self.report_error('Cannot write description file ' + descfn)
1285                     return
1286
1287         if self.params.get('writeannotations', False):
1288             annofn = filename + '.annotations.xml'
1289             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1290                 self.to_screen('[info] Video annotations are already present')
1291             else:
1292                 try:
1293                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1294                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1295                         annofile.write(info_dict['annotations'])
1296                 except (KeyError, TypeError):
1297                     self.report_warning('There are no annotations to write.')
1298                 except (OSError, IOError):
1299                     self.report_error('Cannot write annotations file: ' + annofn)
1300                     return
1301
1302         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1303                                        self.params.get('writeautomaticsub')])
1304
1305         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1306             # subtitles download errors are already managed as troubles in relevant IE
1307             # that way it will silently go on when used with unsupporting IE
1308             subtitles = info_dict['requested_subtitles']
1309             ie = self.get_info_extractor(info_dict['extractor_key'])
1310             for sub_lang, sub_info in subtitles.items():
1311                 sub_format = sub_info['ext']
1312                 if sub_info.get('data') is not None:
1313                     sub_data = sub_info['data']
1314                 else:
1315                     try:
1316                         sub_data = ie._download_webpage(
1317                             sub_info['url'], info_dict['id'], note=False)
1318                     except ExtractorError as err:
1319                         self.report_warning('Unable to download subtitle for "%s": %s' %
1320                                             (sub_lang, compat_str(err.cause)))
1321                         continue
1322                 try:
1323                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1324                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1325                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1326                     else:
1327                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1328                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1329                             subfile.write(sub_data)
1330                 except (OSError, IOError):
1331                     self.report_error('Cannot write subtitles file ' + sub_filename)
1332                     return
1333
1334         if self.params.get('writeinfojson', False):
1335             infofn = os.path.splitext(filename)[0] + '.info.json'
1336             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1337                 self.to_screen('[info] Video description metadata is already present')
1338             else:
1339                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1340                 try:
1341                     write_json_file(info_dict, infofn)
1342                 except (OSError, IOError):
1343                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1344                     return
1345
1346         self._write_thumbnails(info_dict, filename)
1347
1348         if not self.params.get('skip_download', False):
1349             try:
1350                 def dl(name, info):
1351                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1352                     for ph in self._progress_hooks:
1353                         fd.add_progress_hook(ph)
1354                     if self.params.get('verbose'):
1355                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1356                     return fd.download(name, info)
1357
1358                 if info_dict.get('requested_formats') is not None:
1359                     downloaded = []
1360                     success = True
1361                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1362                     if not merger.available:
1363                         postprocessors = []
1364                         self.report_warning('You have requested multiple '
1365                                             'formats but ffmpeg or avconv are not installed.'
1366                                             ' The formats won\'t be merged')
1367                     else:
1368                         postprocessors = [merger]
1369                     for f in info_dict['requested_formats']:
1370                         new_info = dict(info_dict)
1371                         new_info.update(f)
1372                         fname = self.prepare_filename(new_info)
1373                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1374                         downloaded.append(fname)
1375                         partial_success = dl(fname, new_info)
1376                         success = success and partial_success
1377                     info_dict['__postprocessors'] = postprocessors
1378                     info_dict['__files_to_merge'] = downloaded
1379                 else:
1380                     # Just a single file
1381                     success = dl(filename, info_dict)
1382             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1383                 self.report_error('unable to download video data: %s' % str(err))
1384                 return
1385             except (OSError, IOError) as err:
1386                 raise UnavailableVideoError(err)
1387             except (ContentTooShortError, ) as err:
1388                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1389                 return
1390
1391             if success:
1392                 # Fixup content
1393                 fixup_policy = self.params.get('fixup')
1394                 if fixup_policy is None:
1395                     fixup_policy = 'detect_or_warn'
1396
1397                 stretched_ratio = info_dict.get('stretched_ratio')
1398                 if stretched_ratio is not None and stretched_ratio != 1:
1399                     if fixup_policy == 'warn':
1400                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1401                             info_dict['id'], stretched_ratio))
1402                     elif fixup_policy == 'detect_or_warn':
1403                         stretched_pp = FFmpegFixupStretchedPP(self)
1404                         if stretched_pp.available:
1405                             info_dict.setdefault('__postprocessors', [])
1406                             info_dict['__postprocessors'].append(stretched_pp)
1407                         else:
1408                             self.report_warning(
1409                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1410                                     info_dict['id'], stretched_ratio))
1411                     else:
1412                         assert fixup_policy in ('ignore', 'never')
1413
1414                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1415                     if fixup_policy == 'warn':
1416                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1417                             info_dict['id']))
1418                     elif fixup_policy == 'detect_or_warn':
1419                         fixup_pp = FFmpegFixupM4aPP(self)
1420                         if fixup_pp.available:
1421                             info_dict.setdefault('__postprocessors', [])
1422                             info_dict['__postprocessors'].append(fixup_pp)
1423                         else:
1424                             self.report_warning(
1425                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1426                                     info_dict['id']))
1427                     else:
1428                         assert fixup_policy in ('ignore', 'never')
1429
1430                 try:
1431                     self.post_process(filename, info_dict)
1432                 except (PostProcessingError) as err:
1433                     self.report_error('postprocessing: %s' % str(err))
1434                     return
1435                 self.record_download_archive(info_dict)
1436
1437     def download(self, url_list):
1438         """Download a given list of URLs."""
1439         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1440         if (len(url_list) > 1 and
1441                 '%' not in outtmpl and
1442                 self.params.get('max_downloads') != 1):
1443             raise SameFileError(outtmpl)
1444
1445         for url in url_list:
1446             try:
1447                 # It also downloads the videos
1448                 res = self.extract_info(url)
1449             except UnavailableVideoError:
1450                 self.report_error('unable to download video')
1451             except MaxDownloadsReached:
1452                 self.to_screen('[info] Maximum number of downloaded files reached.')
1453                 raise
1454             else:
1455                 if self.params.get('dump_single_json', False):
1456                     self.to_stdout(json.dumps(res))
1457
1458         return self._download_retcode
1459
1460     def download_with_info_file(self, info_filename):
1461         with contextlib.closing(fileinput.FileInput(
1462                 [info_filename], mode='r',
1463                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1464             # FileInput doesn't have a read method, we can't call json.load
1465             info = json.loads('\n'.join(f))
1466         try:
1467             self.process_ie_result(info, download=True)
1468         except DownloadError:
1469             webpage_url = info.get('webpage_url')
1470             if webpage_url is not None:
1471                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1472                 return self.download([webpage_url])
1473             else:
1474                 raise
1475         return self._download_retcode
1476
1477     def post_process(self, filename, ie_info):
1478         """Run all the postprocessors on the given file."""
1479         info = dict(ie_info)
1480         info['filepath'] = filename
1481         pps_chain = []
1482         if ie_info.get('__postprocessors') is not None:
1483             pps_chain.extend(ie_info['__postprocessors'])
1484         pps_chain.extend(self._pps)
1485         for pp in pps_chain:
1486             keep_video = None
1487             old_filename = info['filepath']
1488             try:
1489                 keep_video_wish, info = pp.run(info)
1490                 if keep_video_wish is not None:
1491                     if keep_video_wish:
1492                         keep_video = keep_video_wish
1493                     elif keep_video is None:
1494                         # No clear decision yet, let IE decide
1495                         keep_video = keep_video_wish
1496             except PostProcessingError as e:
1497                 self.report_error(e.msg)
1498             if keep_video is False and not self.params.get('keepvideo', False):
1499                 try:
1500                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1501                     os.remove(encodeFilename(old_filename))
1502                 except (IOError, OSError):
1503                     self.report_warning('Unable to remove downloaded video file')
1504
1505     def _make_archive_id(self, info_dict):
1506         # Future-proof against any change in case
1507         # and backwards compatibility with prior versions
1508         extractor = info_dict.get('extractor_key')
1509         if extractor is None:
1510             if 'id' in info_dict:
1511                 extractor = info_dict.get('ie_key')  # key in a playlist
1512         if extractor is None:
1513             return None  # Incomplete video information
1514         return extractor.lower() + ' ' + info_dict['id']
1515
1516     def in_download_archive(self, info_dict):
1517         fn = self.params.get('download_archive')
1518         if fn is None:
1519             return False
1520
1521         vid_id = self._make_archive_id(info_dict)
1522         if vid_id is None:
1523             return False  # Incomplete video information
1524
1525         try:
1526             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1527                 for line in archive_file:
1528                     if line.strip() == vid_id:
1529                         return True
1530         except IOError as ioe:
1531             if ioe.errno != errno.ENOENT:
1532                 raise
1533         return False
1534
1535     def record_download_archive(self, info_dict):
1536         fn = self.params.get('download_archive')
1537         if fn is None:
1538             return
1539         vid_id = self._make_archive_id(info_dict)
1540         assert vid_id
1541         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1542             archive_file.write(vid_id + '\n')
1543
1544     @staticmethod
1545     def format_resolution(format, default='unknown'):
1546         if format.get('vcodec') == 'none':
1547             return 'audio only'
1548         if format.get('resolution') is not None:
1549             return format['resolution']
1550         if format.get('height') is not None:
1551             if format.get('width') is not None:
1552                 res = '%sx%s' % (format['width'], format['height'])
1553             else:
1554                 res = '%sp' % format['height']
1555         elif format.get('width') is not None:
1556             res = '?x%d' % format['width']
1557         else:
1558             res = default
1559         return res
1560
1561     def _format_note(self, fdict):
1562         res = ''
1563         if fdict.get('ext') in ['f4f', 'f4m']:
1564             res += '(unsupported) '
1565         if fdict.get('format_note') is not None:
1566             res += fdict['format_note'] + ' '
1567         if fdict.get('tbr') is not None:
1568             res += '%4dk ' % fdict['tbr']
1569         if fdict.get('container') is not None:
1570             if res:
1571                 res += ', '
1572             res += '%s container' % fdict['container']
1573         if (fdict.get('vcodec') is not None and
1574                 fdict.get('vcodec') != 'none'):
1575             if res:
1576                 res += ', '
1577             res += fdict['vcodec']
1578             if fdict.get('vbr') is not None:
1579                 res += '@'
1580         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1581             res += 'video@'
1582         if fdict.get('vbr') is not None:
1583             res += '%4dk' % fdict['vbr']
1584         if fdict.get('fps') is not None:
1585             res += ', %sfps' % fdict['fps']
1586         if fdict.get('acodec') is not None:
1587             if res:
1588                 res += ', '
1589             if fdict['acodec'] == 'none':
1590                 res += 'video only'
1591             else:
1592                 res += '%-5s' % fdict['acodec']
1593         elif fdict.get('abr') is not None:
1594             if res:
1595                 res += ', '
1596             res += 'audio'
1597         if fdict.get('abr') is not None:
1598             res += '@%3dk' % fdict['abr']
1599         if fdict.get('asr') is not None:
1600             res += ' (%5dHz)' % fdict['asr']
1601         if fdict.get('filesize') is not None:
1602             if res:
1603                 res += ', '
1604             res += format_bytes(fdict['filesize'])
1605         elif fdict.get('filesize_approx') is not None:
1606             if res:
1607                 res += ', '
1608             res += '~' + format_bytes(fdict['filesize_approx'])
1609         return res
1610
1611     def list_formats(self, info_dict):
1612         formats = info_dict.get('formats', [info_dict])
1613         table = [
1614             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1615             for f in formats
1616             if f.get('preference') is None or f['preference'] >= -1000]
1617         if len(formats) > 1:
1618             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1619
1620         header_line = ['format code', 'extension', 'resolution', 'note']
1621         self.to_screen(
1622             '[info] Available formats for %s:\n%s' %
1623             (info_dict['id'], render_table(header_line, table)))
1624
1625     def list_thumbnails(self, info_dict):
1626         thumbnails = info_dict.get('thumbnails')
1627         if not thumbnails:
1628             tn_url = info_dict.get('thumbnail')
1629             if tn_url:
1630                 thumbnails = [{'id': '0', 'url': tn_url}]
1631             else:
1632                 self.to_screen(
1633                     '[info] No thumbnails present for %s' % info_dict['id'])
1634                 return
1635
1636         self.to_screen(
1637             '[info] Thumbnails for %s:' % info_dict['id'])
1638         self.to_screen(render_table(
1639             ['ID', 'width', 'height', 'URL'],
1640             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1641
1642     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1643         if not subtitles:
1644             self.to_screen('%s has no %s' % (video_id, name))
1645             return
1646         self.to_screen(
1647             'Available %s for %s:' % (name, video_id))
1648         self.to_screen(render_table(
1649             ['Language', 'formats'],
1650             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1651                 for lang, formats in subtitles.items()]))
1652
1653     def urlopen(self, req):
1654         """ Start an HTTP download """
1655
1656         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1657         # always respected by websites, some tend to give out URLs with non percent-encoded
1658         # non-ASCII characters (see telemb.py, ard.py [#3412])
1659         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1660         # To work around aforementioned issue we will replace request's original URL with
1661         # percent-encoded one
1662         req_is_string = isinstance(req, compat_basestring)
1663         url = req if req_is_string else req.get_full_url()
1664         url_escaped = escape_url(url)
1665
1666         # Substitute URL if any change after escaping
1667         if url != url_escaped:
1668             if req_is_string:
1669                 req = url_escaped
1670             else:
1671                 req = compat_urllib_request.Request(
1672                     url_escaped, data=req.data, headers=req.headers,
1673                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1674
1675         return self._opener.open(req, timeout=self._socket_timeout)
1676
1677     def print_debug_header(self):
1678         if not self.params.get('verbose'):
1679             return
1680
1681         if type('') is not compat_str:
1682             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1683             self.report_warning(
1684                 'Your Python is broken! Update to a newer and supported version')
1685
1686         stdout_encoding = getattr(
1687             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1688         encoding_str = (
1689             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1690                 locale.getpreferredencoding(),
1691                 sys.getfilesystemencoding(),
1692                 stdout_encoding,
1693                 self.get_encoding()))
1694         write_string(encoding_str, encoding=None)
1695
1696         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1697         try:
1698             sp = subprocess.Popen(
1699                 ['git', 'rev-parse', '--short', 'HEAD'],
1700                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1701                 cwd=os.path.dirname(os.path.abspath(__file__)))
1702             out, err = sp.communicate()
1703             out = out.decode().strip()
1704             if re.match('[0-9a-f]+', out):
1705                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1706         except:
1707             try:
1708                 sys.exc_clear()
1709             except:
1710                 pass
1711         self._write_string('[debug] Python version %s - %s\n' % (
1712             platform.python_version(), platform_name()))
1713
1714         exe_versions = FFmpegPostProcessor.get_versions(self)
1715         exe_versions['rtmpdump'] = rtmpdump_version()
1716         exe_str = ', '.join(
1717             '%s %s' % (exe, v)
1718             for exe, v in sorted(exe_versions.items())
1719             if v
1720         )
1721         if not exe_str:
1722             exe_str = 'none'
1723         self._write_string('[debug] exe versions: %s\n' % exe_str)
1724
1725         proxy_map = {}
1726         for handler in self._opener.handlers:
1727             if hasattr(handler, 'proxies'):
1728                 proxy_map.update(handler.proxies)
1729         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1730
1731         if self.params.get('call_home', False):
1732             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1733             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1734             latest_version = self.urlopen(
1735                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1736             if version_tuple(latest_version) > version_tuple(__version__):
1737                 self.report_warning(
1738                     'You are using an outdated version (newest version: %s)! '
1739                     'See https://yt-dl.org/update if you need help updating.' %
1740                     latest_version)
1741
1742     def _setup_opener(self):
1743         timeout_val = self.params.get('socket_timeout')
1744         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1745
1746         opts_cookiefile = self.params.get('cookiefile')
1747         opts_proxy = self.params.get('proxy')
1748
1749         if opts_cookiefile is None:
1750             self.cookiejar = compat_cookiejar.CookieJar()
1751         else:
1752             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1753                 opts_cookiefile)
1754             if os.access(opts_cookiefile, os.R_OK):
1755                 self.cookiejar.load()
1756
1757         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1758             self.cookiejar)
1759         if opts_proxy is not None:
1760             if opts_proxy == '':
1761                 proxies = {}
1762             else:
1763                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1764         else:
1765             proxies = compat_urllib_request.getproxies()
1766             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1767             if 'http' in proxies and 'https' not in proxies:
1768                 proxies['https'] = proxies['http']
1769         proxy_handler = PerRequestProxyHandler(proxies)
1770
1771         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1772         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1773         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1774         opener = compat_urllib_request.build_opener(
1775             proxy_handler, https_handler, cookie_processor, ydlh)
1776
1777         # Delete the default user-agent header, which would otherwise apply in
1778         # cases where our custom HTTP handler doesn't come into play
1779         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1780         opener.addheaders = []
1781         self._opener = opener
1782
1783     def encode(self, s):
1784         if isinstance(s, bytes):
1785             return s  # Already encoded
1786
1787         try:
1788             return s.encode(self.get_encoding())
1789         except UnicodeEncodeError as err:
1790             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1791             raise
1792
1793     def get_encoding(self):
1794         encoding = self.params.get('encoding')
1795         if encoding is None:
1796             encoding = preferredencoding()
1797         return encoding
1798
1799     def _write_thumbnails(self, info_dict, filename):
1800         if self.params.get('writethumbnail', False):
1801             thumbnails = info_dict.get('thumbnails')
1802             if thumbnails:
1803                 thumbnails = [thumbnails[-1]]
1804         elif self.params.get('write_all_thumbnails', False):
1805             thumbnails = info_dict.get('thumbnails')
1806         else:
1807             return
1808
1809         if not thumbnails:
1810             # No thumbnails present, so return immediately
1811             return
1812
1813         for t in thumbnails:
1814             thumb_ext = determine_ext(t['url'], 'jpg')
1815             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1816             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1817             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1818
1819             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1820                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1821                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1822             else:
1823                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1824                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1825                 try:
1826                     uf = self.urlopen(t['url'])
1827                     with open(thumb_filename, 'wb') as thumbf:
1828                         shutil.copyfileobj(uf, thumbf)
1829                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1830                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1831                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1832                     self.report_warning('Unable to download thumbnail "%s": %s' %
1833                                         (t['url'], compat_str(err)))