[downloader/http] Remove wrong '_hook_progress' call (fixes #5117)
[youtube-dl] / youtube_dl / YoutubeDL.py
index 7319323e59ae34dbcfc5c3101d32fddb1ff46319..df2aebb59f836e7255ccaa6fcce545154d58b65d 100755 (executable)
@@ -4,8 +4,10 @@
 from __future__ import absolute_import, unicode_literals
 
 import collections
+import contextlib
 import datetime
 import errno
+import fileinput
 import io
 import itertools
 import json
@@ -28,6 +30,7 @@ from .compat import (
     compat_basestring,
     compat_cookiejar,
     compat_expanduser,
+    compat_get_terminal_size,
     compat_http_client,
     compat_kwargs,
     compat_str,
@@ -46,12 +49,12 @@ from .utils import (
     ExtractorError,
     format_bytes,
     formatSeconds,
-    get_term_width,
     locked_file,
     make_HTTPS_handler,
     MaxDownloadsReached,
     PagedList,
     parse_filesize,
+    PerRequestProxyHandler,
     PostProcessingError,
     platform_name,
     preferredencoding,
@@ -181,6 +184,8 @@ class YoutubeDL(object):
     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
                        At the moment, this is only supported by YouTube.
     proxy:             URL of the proxy server to use
+    cn_verification_proxy:  URL of the proxy to use for IP address verification
+                       on Chinese sites. (Experimental)
     socket_timeout:    Time to wait for unresponsive hosts, in seconds
     bidi_workaround:   Work around buggy terminals without bidirectional text
                        support, using fridibi
@@ -199,18 +204,25 @@ class YoutubeDL(object):
                        postprocessor.
     progress_hooks:    A list of functions that get called on download
                        progress, with a dictionary with the entries
-                       * status: One of "downloading" and "finished".
+                       * status: One of "downloading", "error", or "finished".
                                  Check this first and ignore unknown values.
 
-                       If status is one of "downloading" or "finished", the
+                       If status is one of "downloading", or "finished", the
                        following properties may also be present:
                        * filename: The final filename (always present)
+                       * tmpfilename: The filename we're currently writing to
                        * downloaded_bytes: Bytes on disk
                        * total_bytes: Size of the whole file, None if unknown
-                       * tmpfilename: The filename we're currently writing to
+                       * total_bytes_estimate: Guess of the eventual file size,
+                                               None if unavailable.
+                       * elapsed: The number of seconds since download started.
                        * eta: The estimated time in seconds, None if unknown
                        * speed: The download speed in bytes/second, None if
                                 unknown
+                       * fragment_index: The counter of the currently
+                                         downloaded video fragment.
+                       * fragment_count: The number of fragments (= individual
+                                         files that will be merged)
 
                        Progress hooks are guaranteed to be called at least once
                        (with status "finished") if the download is successful.
@@ -225,7 +237,6 @@ class YoutubeDL(object):
     call_home:         Boolean, true iff we are allowed to contact the
                        youtube-dl servers for debugging.
     sleep_interval:    Number of seconds to sleep before each download.
-    external_downloader:  Executable of the external downloader to call.
     listformats:       Print an overview of available video formats and exit.
     list_thumbnails:   Print a table of all thumbnails and exit.
     match_filter:      A function that gets called with the info_dict of
@@ -235,12 +246,16 @@ class YoutubeDL(object):
                        match_filter_func in utils.py is one example for this.
     no_color:          Do not emit color codes in output.
 
+    The following options determine which downloader is picked:
+    external_downloader: Executable of the external downloader to call.
+                       None or unset for standard (built-in) downloader.
+    hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
 
     The following parameters are not used by YoutubeDL itself, they are used by
-    the FileDownloader:
+    the downloader (see youtube_dl/downloader/common.py):
     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
     noresizebuffer, retries, continuedl, noprogress, consoletitle,
-    xattr_set_filesize.
+    xattr_set_filesize, external_downloader_args.
 
     The following options are used by the post processors:
     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
@@ -274,7 +289,7 @@ class YoutubeDL(object):
             try:
                 import pty
                 master, slave = pty.openpty()
-                width = get_term_width()
+                width = compat_get_terminal_size().columns
                 if width is None:
                     width_args = []
                 else:
@@ -298,8 +313,8 @@ class YoutubeDL(object):
                     raise
 
         if (sys.version_info >= (3,) and sys.platform != 'win32' and
-                sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
-                and not params.get('restrictfilenames', False)):
+                sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
+                not params.get('restrictfilenames', False)):
             # On Python 3, the Unicode filesystem API will throw errors (#1474)
             self.report_warning(
                 'Assuming --restrict-filenames since file system encoding '
@@ -951,30 +966,9 @@ class YoutubeDL(object):
         return res
 
     def _calc_cookies(self, info_dict):
-        class _PseudoRequest(object):
-            def __init__(self, url):
-                self.url = url
-                self.headers = {}
-                self.unverifiable = False
-
-            def add_unredirected_header(self, k, v):
-                self.headers[k] = v
-
-            def get_full_url(self):
-                return self.url
-
-            def is_unverifiable(self):
-                return self.unverifiable
-
-            def has_header(self, h):
-                return h in self.headers
-
-            def get_header(self, h, default=None):
-                return self.headers.get(h, default)
-
-        pr = _PseudoRequest(info_dict['url'])
+        pr = compat_urllib_request.Request(info_dict['url'])
         self.cookiejar.add_cookie_header(pr)
-        return pr.headers.get('Cookie')
+        return pr.get_header('Cookie')
 
     def process_video_result(self, info_dict, download=True):
         assert info_dict.get('_type', 'video') == 'video'
@@ -1189,14 +1183,6 @@ class YoutubeDL(object):
             if formats is None:
                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
                 continue
-            if isinstance(formats, compat_str):
-                # TODO: convert all IE with subtitles support to the new format
-                # and remove this
-                subs[lang] = {
-                    'ext': formats_preference[0],
-                    'data': formats,
-                }
-                continue
             for ext in formats_preference:
                 if ext == 'best':
                     f = formats[-1]
@@ -1319,17 +1305,18 @@ class YoutubeDL(object):
             # subtitles download errors are already managed as troubles in relevant IE
             # that way it will silently go on when used with unsupporting IE
             subtitles = info_dict['requested_subtitles']
+            ie = self.get_info_extractor(info_dict['extractor_key'])
             for sub_lang, sub_info in subtitles.items():
                 sub_format = sub_info['ext']
                 if sub_info.get('data') is not None:
                     sub_data = sub_info['data']
                 else:
                     try:
-                        uf = self.urlopen(sub_info['url'])
-                        sub_data = uf.read().decode('utf-8')
-                    except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+                        sub_data = ie._download_webpage(
+                            sub_info['url'], info_dict['id'], note=False)
+                    except ExtractorError as err:
                         self.report_warning('Unable to download subtitle for "%s": %s' %
-                                            (sub_lang, compat_str(err)))
+                                            (sub_lang, compat_str(err.cause)))
                         continue
                 try:
                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
@@ -1371,7 +1358,7 @@ class YoutubeDL(object):
                     downloaded = []
                     success = True
                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
-                    if not merger._executable:
+                    if not merger.available:
                         postprocessors = []
                         self.report_warning('You have requested multiple '
                                             'formats but ffmpeg or avconv are not installed.'
@@ -1450,8 +1437,8 @@ class YoutubeDL(object):
         """Download a given list of URLs."""
         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
         if (len(url_list) > 1 and
-                '%' not in outtmpl
-                and self.params.get('max_downloads') != 1):
+                '%' not in outtmpl and
+                self.params.get('max_downloads') != 1):
             raise SameFileError(outtmpl)
 
         for url in url_list:
@@ -1470,8 +1457,11 @@ class YoutubeDL(object):
         return self._download_retcode
 
     def download_with_info_file(self, info_filename):
-        with io.open(info_filename, 'r', encoding='utf-8') as f:
-            info = json.load(f)
+        with contextlib.closing(fileinput.FileInput(
+                [info_filename], mode='r',
+                openhook=fileinput.hook_encoded('utf-8'))) as f:
+            # FileInput doesn't have a read method, we can't call json.load
+            info = json.loads('\n'.join(f))
         try:
             self.process_ie_result(info, download=True)
         except DownloadError:
@@ -1618,29 +1608,18 @@ class YoutubeDL(object):
         return res
 
     def list_formats(self, info_dict):
-        def line(format, idlen=20):
-            return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
-                format['format_id'],
-                format['ext'],
-                self.format_resolution(format),
-                self._format_note(format),
-            ))
-
         formats = info_dict.get('formats', [info_dict])
-        idlen = max(len('format code'),
-                    max(len(f['format_id']) for f in formats))
-        formats_s = [
-            line(f, idlen) for f in formats
+        table = [
+            [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
+            for f in formats
             if f.get('preference') is None or f['preference'] >= -1000]
         if len(formats) > 1:
-            formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
+            table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
 
-        header_line = line({
-            'format_id': 'format code', 'ext': 'extension',
-            'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
+        header_line = ['format code', 'extension', 'resolution', 'note']
         self.to_screen(
-            '[info] Available formats for %s:\n%s\n%s' %
-            (info_dict['id'], header_line, '\n'.join(formats_s)))
+            '[info] Available formats for %s:\n%s' %
+            (info_dict['id'], render_table(header_line, table)))
 
     def list_thumbnails(self, info_dict):
         thumbnails = info_dict.get('thumbnails')
@@ -1731,7 +1710,7 @@ class YoutubeDL(object):
         self._write_string('[debug] Python version %s - %s\n' % (
             platform.python_version(), platform_name()))
 
-        exe_versions = FFmpegPostProcessor.get_versions()
+        exe_versions = FFmpegPostProcessor.get_versions(self)
         exe_versions['rtmpdump'] = rtmpdump_version()
         exe_str = ', '.join(
             '%s %s' % (exe, v)
@@ -1786,13 +1765,14 @@ class YoutubeDL(object):
             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
             if 'http' in proxies and 'https' not in proxies:
                 proxies['https'] = proxies['http']
-        proxy_handler = compat_urllib_request.ProxyHandler(proxies)
+        proxy_handler = PerRequestProxyHandler(proxies)
 
         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
         opener = compat_urllib_request.build_opener(
-            https_handler, proxy_handler, cookie_processor, ydlh)
+            proxy_handler, https_handler, cookie_processor, ydlh)
+
         # Delete the default user-agent header, which would otherwise apply in
         # cases where our custom HTTP handler doesn't come into play
         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)