Move FileDownloader to its own module and create a new class for each download process
[youtube-dl] / youtube_dl / YoutubeDL.py
index e86e8a0907d39c061fb5f8a1def6487182f05056..414aa5a80cb575642ee4ff20e393c7c96afb4e14 100644 (file)
@@ -3,6 +3,7 @@
 
 from __future__ import absolute_import
 
+import collections
 import errno
 import io
 import json
@@ -22,7 +23,6 @@ if os.name == 'nt':
 from .utils import (
     compat_cookiejar,
     compat_http_client,
-    compat_print,
     compat_str,
     compat_urllib_error,
     compat_urllib_request,
@@ -34,6 +34,7 @@ from .utils import (
     encodeFilename,
     ExtractorError,
     format_bytes,
+    get_term_width,
     locked_file,
     make_HTTPS_handler,
     MaxDownloadsReached,
@@ -50,7 +51,7 @@ from .utils import (
     YoutubeDLHandler,
 )
 from .extractor import get_info_extractor, gen_extractors
-from .FileDownloader import FileDownloader
+from .downloader import get_suitable_downloader
 from .version import __version__
 
 
@@ -132,6 +133,9 @@ class YoutubeDL(object):
     cookiefile:        File name where cookies should be read from and dumped to.
     nocheckcertificate:Do not verify SSL certificates
     proxy:             URL of the proxy server to use
+    socket_timeout:    Time to wait for unresponsive hosts, in seconds
+    bidi_workaround:   Work around buggy terminals without bidirectional text
+                       support, using fridibi
 
     The following parameters are not used by YoutubeDL itself, they are used by
     the FileDownloader:
@@ -146,15 +150,38 @@ class YoutubeDL(object):
     _num_downloads = None
     _screen_file = None
 
-    def __init__(self, params={}):
+    def __init__(self, params=None):
         """Create a FileDownloader object with the given options."""
         self._ies = []
         self._ies_instances = {}
         self._pps = []
-        self._progress_hooks = []
+        self._fd_progress_hooks = []
         self._download_retcode = 0
         self._num_downloads = 0
         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
+        self._err_file = sys.stderr
+        self.params = {} if params is None else params
+
+        if params.get('bidi_workaround', False):
+            try:
+                import pty
+                master, slave = pty.openpty()
+                width = get_term_width()
+                if width is None:
+                    width_args = []
+                else:
+                    width_args = ['-w', str(width)]
+                self._fribidi = subprocess.Popen(
+                    ['fribidi', '-c', 'UTF-8'] + width_args,
+                    stdin=subprocess.PIPE,
+                    stdout=slave,
+                    stderr=self._err_file)
+                self._fribidi_channel = os.fdopen(master, 'rb')
+            except OSError as ose:
+                if ose.errno == 2:
+                    self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
+                else:
+                    raise
 
         if (sys.version_info >= (3,) and sys.platform != 'win32' and
                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
@@ -164,10 +191,7 @@ class YoutubeDL(object):
                 u'Assuming --restrict-filenames since file system encoding '
                 u'cannot encode all charactes. '
                 u'Set the LC_ALL environment variable to fix this.')
-            params['restrictfilenames'] = True
-
-        self.params = params
-        self.fd = FileDownloader(self, self.params)
+            self.params['restrictfilenames'] = True
 
         if '%(stitle)s' in self.params.get('outtmpl', ''):
             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
@@ -204,13 +228,35 @@ class YoutubeDL(object):
         self._pps.append(pp)
         pp.set_downloader(self)
 
+    def add_downloader_progress_hook(self, ph):
+        """Add the progress hook to the file downloader"""
+        self._fd_progress_hooks.append(ph)
+
+    def _bidi_workaround(self, message):
+        if not hasattr(self, '_fribidi_channel'):
+            return message
+
+        assert type(message) == type(u'')
+        line_count = message.count(u'\n') + 1
+        self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
+        self._fribidi.stdin.flush()
+        res = u''.join(self._fribidi_channel.readline().decode('utf-8')
+                       for _ in range(line_count))
+        return res[:-len(u'\n')]
+
     def to_screen(self, message, skip_eol=False):
+        """Print message to stdout if not in quiet mode."""
+        return self.to_stdout(message, skip_eol, check_quiet=True)
+
+    def to_stdout(self, message, skip_eol=False, check_quiet=False):
         """Print message to stdout if not in quiet mode."""
         if self.params.get('logger'):
             self.params['logger'].debug(message)
-        elif not self.params.get('quiet', False):
+        elif not check_quiet or not self.params.get('quiet', False):
+            message = self._bidi_workaround(message)
             terminator = [u'\n', u''][skip_eol]
             output = message + terminator
+
             write_string(output, self._screen_file)
 
     def to_stderr(self, message):
@@ -219,10 +265,9 @@ class YoutubeDL(object):
         if self.params.get('logger'):
             self.params['logger'].error(message)
         else:
+            message = self._bidi_workaround(message)
             output = message + u'\n'
-            if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
-                output = output.encode(preferredencoding())
-            sys.stderr.write(output)
+            write_string(output, self._err_file)
 
     def to_console_title(self, message):
         if not self.params.get('consoletitle', False):
@@ -293,7 +338,7 @@ class YoutubeDL(object):
         Print the message to stderr, it will be prefixed with 'WARNING:'
         If stderr is a tty file the 'WARNING:' will be colored
         '''
-        if sys.stderr.isatty() and os.name != 'nt':
+        if self._err_file.isatty() and os.name != 'nt':
             _msg_header = u'\033[0;33mWARNING:\033[0m'
         else:
             _msg_header = u'WARNING:'
@@ -305,7 +350,7 @@ class YoutubeDL(object):
         Do the same as trouble, but prefixes the message with 'ERROR:', colored
         in red if stderr is a tty file.
         '''
-        if sys.stderr.isatty() and os.name != 'nt':
+        if self._err_file.isatty() and os.name != 'nt':
             _msg_header = u'\033[0;31mERROR:\033[0m'
         else:
             _msg_header = u'ERROR:'
@@ -354,18 +399,17 @@ class YoutubeDL(object):
                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
 
             sanitize = lambda k, v: sanitize_filename(
-                u'NA' if v is None else compat_str(v),
+                compat_str(v),
                 restricted=self.params.get('restrictfilenames'),
                 is_id=(k == u'id'))
             template_dict = dict((k, sanitize(k, v))
-                                 for k, v in template_dict.items())
+                                 for k, v in template_dict.items()
+                                 if v is not None)
+            template_dict = collections.defaultdict(lambda: u'NA', template_dict)
 
             tmpl = os.path.expanduser(self.params['outtmpl'])
             filename = tmpl % template_dict
             return filename
-        except KeyError as err:
-            self.report_error(u'Erroneous output template')
-            return None
         except ValueError as err:
             self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
             return None
@@ -404,7 +448,8 @@ class YoutubeDL(object):
         for key, value in extra_info.items():
             info_dict.setdefault(key, value)
 
-    def extract_info(self, url, download=True, ie_key=None, extra_info={}):
+    def extract_info(self, url, download=True, ie_key=None, extra_info={},
+                     process=True):
         '''
         Returns a list with a dictionary for each video we find.
         If 'download', also downloads the videos.
@@ -440,7 +485,10 @@ class YoutubeDL(object):
                         'webpage_url': url,
                         'extractor_key': ie.ie_key(),
                     })
-                return self.process_ie_result(ie_result, download, extra_info)
+                if process:
+                    return self.process_ie_result(ie_result, download, extra_info)
+                else:
+                    return ie_result
             except ExtractorError as de: # An error we somewhat expected
                 self.report_error(compat_str(de), de.format_traceback())
                 break
@@ -473,8 +521,33 @@ class YoutubeDL(object):
                                      download,
                                      ie_key=ie_result.get('ie_key'),
                                      extra_info=extra_info)
+        elif result_type == 'url_transparent':
+            # Use the information from the embedding page
+            info = self.extract_info(
+                ie_result['url'], ie_key=ie_result.get('ie_key'),
+                extra_info=extra_info, download=False, process=False)
+
+            def make_result(embedded_info):
+                new_result = ie_result.copy()
+                for f in ('_type', 'url', 'ext', 'player_url', 'formats',
+                          'entries', 'urlhandle', 'ie_key', 'duration',
+                          'subtitles', 'annotations', 'format',
+                          'thumbnail', 'thumbnails'):
+                    if f in new_result:
+                        del new_result[f]
+                    if f in embedded_info:
+                        new_result[f] = embedded_info[f]
+                return new_result
+            new_result = make_result(info)
+
+            assert new_result.get('_type') != 'url_transparent'
+            if new_result.get('_type') == 'compat_list':
+                new_result['entries'] = [
+                    make_result(e) for e in new_result['entries']]
+
+            return self.process_ie_result(
+                new_result, download=download, extra_info=extra_info)
         elif result_type == 'playlist':
-
             # We process each entry in the playlist
             playlist = ie_result.get('title', None) or ie_result.get('id', None)
             self.to_screen(u'[download] Downloading playlist: %s' % playlist)
@@ -665,22 +738,23 @@ class YoutubeDL(object):
 
         # Forced printings
         if self.params.get('forcetitle', False):
-            compat_print(info_dict['fulltitle'])
+            self.to_stdout(info_dict['fulltitle'])
         if self.params.get('forceid', False):
-            compat_print(info_dict['id'])
+            self.to_stdout(info_dict['id'])
         if self.params.get('forceurl', False):
             # For RTMP URLs, also include the playpath
-            compat_print(info_dict['url'] + info_dict.get('play_path', u''))
+            self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
-            compat_print(info_dict['thumbnail'])
+            self.to_stdout(info_dict['thumbnail'])
         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
-            compat_print(info_dict['description'])
+            self.to_stdout(info_dict['description'])
         if self.params.get('forcefilename', False) and filename is not None:
-            compat_print(filename)
+            self.to_stdout(filename)
         if self.params.get('forceformat', False):
-            compat_print(info_dict['format'])
+            self.to_stdout(info_dict['format'])
         if self.params.get('forcejson', False):
-            compat_print(json.dumps(info_dict))
+            info_dict['_filename'] = filename
+            self.to_stdout(json.dumps(info_dict))
 
         # Do nothing else if in simulate mode
         if self.params.get('simulate', False):
@@ -755,7 +829,7 @@ class YoutubeDL(object):
         if self.params.get('writethumbnail', False):
             if info_dict.get('thumbnail') is not None:
                 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
-                thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
+                thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
                                (info_dict['extractor'], info_dict['id']))
                 try:
@@ -773,7 +847,10 @@ class YoutubeDL(object):
                 success = True
             else:
                 try:
-                    success = self.fd._do_download(filename, info_dict)
+                    fd = get_suitable_downloader(info_dict)(self, self.params)
+                    for ph in self._fd_progress_hooks:
+                        fd.add_progress_hook(ph)
+                    success = fd.download(filename, info_dict)
                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                     self.report_error(u'unable to download video data: %s' % str(err))
                     return
@@ -811,6 +888,20 @@ class YoutubeDL(object):
 
         return self._download_retcode
 
+    def download_with_info_file(self, info_filename):
+        with io.open(info_filename, 'r', encoding='utf-8') as f:
+            info = json.load(f)
+        try:
+            self.process_ie_result(info, download=True)
+        except DownloadError:
+            webpage_url = info.get('webpage_url')
+            if webpage_url is not None:
+                self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
+                return self.download([webpage_url])
+            else:
+                raise
+        return self._download_retcode
+
     def post_process(self, filename, ie_info):
         """Run all the postprocessors on the given file."""
         info = dict(ie_info)
@@ -969,7 +1060,10 @@ class YoutubeDL(object):
                 proxy_map.update(handler.proxies)
         write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
 
-    def _setup_opener(self, timeout=20):
+    def _setup_opener(self):
+        timeout_val = self.params.get('socket_timeout')
+        timeout = 600 if timeout_val is None else float(timeout_val)
+
         opts_cookiefile = self.params.get('cookiefile')
         opts_proxy = self.params.get('proxy')