X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FFileDownloader.py;h=03346ab04263a9e7c11f829053f8e6d96101781e;hb=2b35c9ef742bf261078ea10c6c0bba848db1a0df;hp=aad12fc1ff7737870335210051a09e15c2af082b;hpb=37c8fd48421ce8bbbe25da962b2e8d5ca2629dd0;p=youtube-dl diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index aad12fc1f..84a539b82 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -1,105 +1,58 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from __future__ import absolute_import - import math import os import re -import socket import subprocess import sys import time -if os.name == 'nt': - import ctypes - -from .utils import * +from .utils import ( + compat_urllib_error, + compat_urllib_request, + ContentTooShortError, + determine_ext, + encodeFilename, + sanitize_open, + timeconvert, +) class FileDownloader(object): """File Downloader class. File downloader objects are the ones responsible of downloading the - actual video file and writing it to disk if the user has requested - it, among some other tasks. In most cases there should be one per - program. As, given a video URL, the downloader doesn't know how to - extract all the needed information, task that InfoExtractors do, it - has to pass the URL to one of them. - - For this, file downloader objects have a method that allows - InfoExtractors to be registered in a given order. When it is passed - a URL, the file downloader handles it to the first InfoExtractor it - finds that reports being able to handle it. The InfoExtractor extracts - all the information about the video or videos the URL refers to, and - asks the FileDownloader to process the video information, possibly - downloading the video. + actual video file and writing it to disk. File downloaders accept a lot of parameters. In order not to saturate the object constructor with arguments, it receives a dictionary of - options instead. These options are available through the params - attribute for the InfoExtractors to use. The FileDownloader also - registers itself as the downloader in charge for the InfoExtractors - that are added to it, so this is a "mutual registration". + options instead. Available options: - username: Username for authentication purposes. - password: Password for authentication purposes. - usenetrc: Use netrc for authentication instead. + verbose: Print additional info to stdout. quiet: Do not print messages to stdout. - forceurl: Force printing final URL. - forcetitle: Force printing title. - forcethumbnail: Force printing thumbnail URL. - forcedescription: Force printing description. - forcefilename: Force printing final filename. - simulate: Do not download the video files. - format: Video format code. - format_limit: Highest quality format to try. - outtmpl: Template for output names. - restrictfilenames: Do not allow "&" and spaces in file names - ignoreerrors: Do not stop on download errors. ratelimit: Download speed limit, in bytes/sec. - nooverwrites: Prevent overwriting files. retries: Number of times to retry for HTTP error 5xx buffersize: Size of download buffer in bytes. noresizebuffer: Do not automatically resize the download buffer. continuedl: Try to continue downloads if possible. noprogress: Do not print the progress bar. - playliststart: Playlist item to start at. - playlistend: Playlist item to end at. - matchtitle: Download only matching titles. - rejecttitle: Reject downloads for matching titles. logtostderr: Log messages to stderr instead of stdout. consoletitle: Display progress in console window's titlebar. nopart: Do not use temporary .part files. updatetime: Use the Last-modified header to set output file timestamps. - writedescription: Write the video description to a .description file - writeinfojson: Write the video description to a .info.json file - writesubtitles: Write the video subtitles to a .srt file - subtitleslang: Language of the subtitles to download test: Download only first bytes to test the downloader. + min_filesize: Skip files smaller than this size + max_filesize: Skip files larger than this size """ params = None - _ies = [] - _pps = [] - _download_retcode = None - _num_downloads = None - _screen_file = None - def __init__(self, params): + def __init__(self, ydl, params): """Create a FileDownloader object with the given options.""" - self._ies = [] - self._pps = [] - self._download_retcode = 0 - self._num_downloads = 0 - self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] + self.ydl = ydl + self._progress_hooks = [] self.params = params - if '%(stitle)s' in self.params['outtmpl']: - self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') - @staticmethod def format_bytes(bytes): if bytes is None: @@ -110,36 +63,61 @@ class FileDownloader(object): exponent = 0 else: exponent = int(math.log(bytes, 1024.0)) - suffix = 'bkMGTPEZY'[exponent] + suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent] converted = float(bytes) / float(1024 ** exponent) return '%.2f%s' % (converted, suffix) + @staticmethod + def format_seconds(seconds): + (mins, secs) = divmod(seconds, 60) + (hours, mins) = divmod(mins, 60) + if hours > 99: + return '--:--:--' + if hours == 0: + return '%02d:%02d' % (mins, secs) + else: + return '%02d:%02d:%02d' % (hours, mins, secs) + @staticmethod def calc_percent(byte_counter, data_len): if data_len is None: + return None + return float(byte_counter) / float(data_len) * 100.0 + + @staticmethod + def format_percent(percent): + if percent is None: return '---.-%' - return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0)) + return '%6s' % ('%3.1f%%' % percent) @staticmethod def calc_eta(start, now, total, current): if total is None: - return '--:--' + return None dif = now - start if current == 0 or dif < 0.001: # One millisecond - return '--:--' + return None rate = float(current) / dif - eta = int((float(total) - float(current)) / rate) - (eta_mins, eta_secs) = divmod(eta, 60) - if eta_mins > 99: + return int((float(total) - float(current)) / rate) + + @staticmethod + def format_eta(eta): + if eta is None: return '--:--' - return '%02d:%02d' % (eta_mins, eta_secs) + return FileDownloader.format_seconds(eta) @staticmethod def calc_speed(start, now, bytes): dif = now - start if bytes == 0 or dif < 0.001: # One millisecond + return None + return float(bytes) / dif + + @staticmethod + def format_speed(speed): + if speed is None: return '%10s' % '---b/s' - return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif)) + return '%10s' % ('%s/s' % FileDownloader.format_bytes(speed)) @staticmethod def best_block_size(elapsed_time, bytes): @@ -164,62 +142,23 @@ class FileDownloader(object): multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) return int(round(number * multiplier)) - def add_info_extractor(self, ie): - """Add an InfoExtractor object to the end of the list.""" - self._ies.append(ie) - ie.set_downloader(self) - - def add_post_processor(self, pp): - """Add a PostProcessor object to the end of the chain.""" - self._pps.append(pp) - pp.set_downloader(self) - - def to_screen(self, message, skip_eol=False): - """Print message to stdout if not in quiet mode.""" - assert type(message) == type(u'') - if not self.params.get('quiet', False): - terminator = [u'\n', u''][skip_eol] - output = message + terminator - if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr - output = output.encode(preferredencoding(), 'ignore') - self._screen_file.write(output) - self._screen_file.flush() + def to_screen(self, *args, **kargs): + self.ydl.to_screen(*args, **kargs) def to_stderr(self, message): - """Print message to stderr.""" - assert type(message) == type(u'') - output = message + u'\n' - if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr - output = output.encode(preferredencoding()) - sys.stderr.write(output) - - def to_cons_title(self, message): - """Set console/terminal window title to message.""" - if not self.params.get('consoletitle', False): - return - if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): - # c_wchar_p() might not be necessary if `message` is - # already of type unicode() - ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) - elif 'TERM' in os.environ: - sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding())) - - def fixed_template(self): - """Checks if the output template is fixed.""" - return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None) - - def trouble(self, message=None): - """Determine action to take when a download problem appears. - - Depending on if the downloader has been configured to ignore - download errors or not, this method may throw an exception or - not when errors are found, after printing the message. - """ - if message is not None: - self.to_stderr(message) - if not self.params.get('ignoreerrors', False): - raise DownloadError(message) - self._download_retcode = 1 + self.ydl.to_screen(message) + + def to_console_title(self, message): + self.ydl.to_console_title(message) + + def trouble(self, *args, **kargs): + self.ydl.trouble(*args, **kargs) + + def report_warning(self, *args, **kargs): + self.ydl.report_warning(*args, **kargs) + + def report_error(self, *args, **kargs): + self.ydl.report_error(*args, **kargs) def slow_down(self, start_time, byte_counter): """Sleep if the download speed is over the rate limit.""" @@ -251,8 +190,8 @@ class FileDownloader(object): if old_filename == new_filename: return os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) - except (IOError, OSError) as err: - self.trouble(u'ERROR: unable to rename file') + except (IOError, OSError): + self.report_error(u'unable to rename file') def try_utime(self, filename, last_modified_hdr): """Try to set the last-modified time of the given file.""" @@ -266,35 +205,40 @@ class FileDownloader(object): filetime = timeconvert(timestr) if filetime is None: return filetime + # Ignore obviously invalid dates + if filetime == 0: + return try: os.utime(filename, (time.time(), filetime)) except: pass return filetime - def report_writedescription(self, descfn): - """ Report that the description file is being written """ - self.to_screen(u'[info] Writing video description to: ' + descfn) - - def report_writesubtitles(self, srtfn): - """ Report that the subtitles file is being written """ - self.to_screen(u'[info] Writing video subtitles to: ' + srtfn) - - def report_writeinfojson(self, infofn): - """ Report that the metadata file has been written """ - self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn) - def report_destination(self, filename): """Report destination filename.""" self.to_screen(u'[download] Destination: ' + filename) - def report_progress(self, percent_str, data_len_str, speed_str, eta_str): + def report_progress(self, percent, data_len_str, speed, eta): """Report download progress.""" if self.params.get('noprogress', False): return - self.to_screen(u'\r[download] %s of %s at %s ETA %s' % - (percent_str, data_len_str, speed_str, eta_str), skip_eol=True) - self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' % + clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'') + if eta is not None: + eta_str = self.format_eta(eta) + else: + eta_str = 'Unknown ETA' + if percent is not None: + percent_str = self.format_percent(percent) + else: + percent_str = 'Unknown %' + speed_str = self.format_speed(speed) + if self.params.get('progress_with_newline', False): + self.to_screen(u'[download] %s of %s at %s ETA %s' % + (percent_str, data_len_str, speed_str, eta_str)) + else: + self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' % + (clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True) + self.to_console_title(u'youtube-dl - %s of %s at %s ETA %s' % (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip())) def report_resuming_byte(self, resume_len): @@ -309,242 +253,106 @@ class FileDownloader(object): """Report file has already been fully downloaded.""" try: self.to_screen(u'[download] %s has already been downloaded' % file_name) - except (UnicodeEncodeError) as err: + except UnicodeEncodeError: self.to_screen(u'[download] The file has already been downloaded') def report_unable_to_resume(self): """Report it was impossible to resume download.""" self.to_screen(u'[download] Unable to resume') - def report_finish(self): + def report_finish(self, data_len_str, tot_time): """Report download finished.""" if self.params.get('noprogress', False): self.to_screen(u'[download] Download completed') else: - self.to_screen(u'') - - def increment_downloads(self): - """Increment the ordinal that assigns a number to each file.""" - self._num_downloads += 1 - - def prepare_filename(self, info_dict): - """Generate the output filename.""" - try: - template_dict = dict(info_dict) - - template_dict['epoch'] = int(time.time()) - template_dict['autonumber'] = u'%05d' % self._num_downloads - - sanitize = lambda k,v: sanitize_filename( - u'NA' if v is None else compat_str(v), - restricted=self.params.get('restrictfilenames'), - is_id=(k==u'id')) - template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items()) - - filename = self.params['outtmpl'] % template_dict - return filename - except (ValueError, KeyError) as err: - self.trouble(u'ERROR: invalid system charset or erroneous output template') - return None - - def _match_entry(self, info_dict): - """ Returns None iff the file should be downloaded """ - - title = info_dict['title'] - matchtitle = self.params.get('matchtitle', False) - if matchtitle: - matchtitle = matchtitle.decode('utf8') - if not re.search(matchtitle, title, re.IGNORECASE): - return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' - rejecttitle = self.params.get('rejecttitle', False) - if rejecttitle: - rejecttitle = rejecttitle.decode('utf8') - if re.search(rejecttitle, title, re.IGNORECASE): - return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' - return None - - def process_info(self, info_dict): - """Process a single dictionary returned by an InfoExtractor.""" - - # Keep for backwards compatibility - info_dict['stitle'] = info_dict['title'] - - if not 'format' in info_dict: - info_dict['format'] = info_dict['ext'] - - reason = self._match_entry(info_dict) - if reason is not None: - self.to_screen(u'[download] ' + reason) - return - - max_downloads = self.params.get('max_downloads') - if max_downloads is not None: - if self._num_downloads > int(max_downloads): - raise MaxDownloadsReached() - - filename = self.prepare_filename(info_dict) - - # Forced printings - if self.params.get('forcetitle', False): - compat_print(info_dict['title']) - if self.params.get('forceurl', False): - compat_print(info_dict['url']) - if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: - compat_print(info_dict['thumbnail']) - if self.params.get('forcedescription', False) and 'description' in info_dict: - compat_print(info_dict['description']) - if self.params.get('forcefilename', False) and filename is not None: - compat_print(filename) - if self.params.get('forceformat', False): - compat_print(info_dict['format']) - - # Do nothing else if in simulate mode - if self.params.get('simulate', False): - return - - if filename is None: - return - - try: - dn = os.path.dirname(encodeFilename(filename)) - if dn != '' and not os.path.exists(dn): # dn is already encoded - os.makedirs(dn) - except (OSError, IOError) as err: - self.trouble(u'ERROR: unable to create directory ' + compat_str(err)) - return - - if self.params.get('writedescription', False): - try: - descfn = filename + u'.description' - self.report_writedescription(descfn) - descfile = open(encodeFilename(descfn), 'wb') - try: - descfile.write(info_dict['description'].encode('utf-8')) - finally: - descfile.close() - except (OSError, IOError): - self.trouble(u'ERROR: Cannot write description file ' + descfn) - return - - if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: - # subtitles download errors are already managed as troubles in relevant IE - # that way it will silently go on when used with unsupporting IE - try: - srtfn = filename.rsplit('.', 1)[0] + u'.srt' - self.report_writesubtitles(srtfn) - srtfile = open(encodeFilename(srtfn), 'wb') - try: - srtfile.write(info_dict['subtitles'].encode('utf-8')) - finally: - srtfile.close() - except (OSError, IOError): - self.trouble(u'ERROR: Cannot write subtitles file ' + descfn) - return - - if self.params.get('writeinfojson', False): - infofn = filename + u'.info.json' - self.report_writeinfojson(infofn) - try: - json.dump - except (NameError,AttributeError): - self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.') - return - try: - infof = open(encodeFilename(infofn), 'wb') - try: - json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',)) - json.dump(json_info_dict, infof) - finally: - infof.close() - except (OSError, IOError): - self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn) - return - - if not self.params.get('skip_download', False): - if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)): - success = True - else: - try: - success = self._do_download(filename, info_dict) - except (OSError, IOError) as err: - raise UnavailableVideoError() - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self.trouble(u'ERROR: unable to download video data: %s' % str(err)) - return - except (ContentTooShortError, ) as err: - self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) - return - - if success: - try: - self.post_process(filename, info_dict) - except (PostProcessingError) as err: - self.trouble(u'ERROR: postprocessing: %s' % str(err)) - return - - def download(self, url_list): - """Download a given list of URLs.""" - if len(url_list) > 1 and self.fixed_template(): - raise SameFileError(self.params['outtmpl']) - - for url in url_list: - suitable_found = False - for ie in self._ies: - # Go to next InfoExtractor if not suitable - if not ie.suitable(url): + clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'') + self.to_screen(u'\r%s[download] 100%% of %s in %s' % + (clear_line, data_len_str, self.format_seconds(tot_time))) + + def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live): + def run_rtmpdump(args): + start = time.time() + resume_percent = None + resume_downloaded_data_len = None + proc = subprocess.Popen(args, stderr=subprocess.PIPE) + cursor_in_new_line = True + proc_stderr_closed = False + while not proc_stderr_closed: + # read line from stderr + line = u'' + while True: + char = proc.stderr.read(1) + if not char: + proc_stderr_closed = True + break + if char in [b'\r', b'\n']: + break + line += char.decode('ascii', 'replace') + if not line: + # proc_stderr_closed is True continue + mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line) + if mobj: + downloaded_data_len = int(float(mobj.group(1))*1024) + percent = float(mobj.group(2)) + if not resume_percent: + resume_percent = percent + resume_downloaded_data_len = downloaded_data_len + eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent) + speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len) + data_len = None + if percent > 0: + data_len = int(downloaded_data_len * 100 / percent) + data_len_str = u'~'+self.format_bytes(data_len) + self.report_progress(percent, data_len_str, speed, eta) + cursor_in_new_line = False + self._hook_progress({ + 'downloaded_bytes': downloaded_data_len, + 'total_bytes': data_len, + 'tmpfilename': tmpfilename, + 'filename': filename, + 'status': 'downloading', + 'eta': eta, + 'speed': speed, + }) + elif self.params.get('verbose', False): + if not cursor_in_new_line: + self.to_screen(u'') + cursor_in_new_line = True + self.to_screen(u'[rtmpdump] '+line) + proc.wait() + if not cursor_in_new_line: + self.to_screen(u'') + return proc.returncode - # Warn if the _WORKING attribute is False - if not ie.working(): - self.trouble(u'WARNING: the program functionality for this site has been marked as broken, ' - u'and will probably not work. If you want to go on, use the -i option.') - - # Suitable InfoExtractor found - suitable_found = True - - # Extract information from URL and process it - videos = ie.extract(url) - for video in videos or []: - video['extractor'] = ie.IE_NAME - try: - self.increment_downloads() - self.process_info(video) - except UnavailableVideoError: - self.trouble(u'\nERROR: unable to download video') - - # Suitable InfoExtractor had been found; go to next URL - break - - if not suitable_found: - self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url) - - return self._download_retcode - - def post_process(self, filename, ie_info): - """Run the postprocessing chain on the given file.""" - info = dict(ie_info) - info['filepath'] = filename - for pp in self._pps: - info = pp.run(info) - if info is None: - break - - def _download_with_rtmpdump(self, filename, url, player_url): self.report_destination(filename) tmpfilename = self.temp_name(filename) + test = self.params.get('test', False) # Check for rtmpdump first try: - subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT) + subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) except (OSError, IOError): - self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run') + self.report_error(u'RTMP download detected but "rtmpdump" could not be run') return False # Download using rtmpdump. rtmpdump returns exit code 2 when # the connection was interrumpted and resuming appears to be # possible. This is part of rtmpdump's normal usage, AFAIK. - basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename] - args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)] + basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename] + if player_url is not None: + basic_args += ['--swfVfy', player_url] + if page_url is not None: + basic_args += ['--pageUrl', page_url] + if play_path is not None: + basic_args += ['--playpath', play_path] + if tc_url is not None: + basic_args += ['--tcUrl', url] + if test: + basic_args += ['--stop', '1'] + if live: + basic_args += ['--live'] + args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)] if self.params.get('verbose', False): try: import pipes @@ -552,46 +360,140 @@ class FileDownloader(object): except ImportError: shell_quote = repr self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args)) - retval = subprocess.call(args) - while retval == 2 or retval == 1: + + retval = run_rtmpdump(args) + + while (retval == 2 or retval == 1) and not test: prevsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True) + self.to_screen(u'[rtmpdump] %s bytes' % prevsize) time.sleep(5.0) # This seems to be needed - retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1]) + retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1]) cursize = os.path.getsize(encodeFilename(tmpfilename)) if prevsize == cursize and retval == 1: break # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those if prevsize == cursize and retval == 2 and cursize > 1024: - self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.') + self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.') retval = 0 break + if retval == 0 or (test and retval == 2): + fsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen(u'[rtmpdump] %s bytes' % fsize) + self.try_rename(tmpfilename, filename) + self._hook_progress({ + 'downloaded_bytes': fsize, + 'total_bytes': fsize, + 'filename': filename, + 'status': 'finished', + }) + return True + else: + self.to_stderr(u"\n") + self.report_error(u'rtmpdump exited with code %d' % retval) + return False + + def _download_with_mplayer(self, filename, url): + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + + args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url] + # Check for mplayer first + try: + subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) + except (OSError, IOError): + self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] ) + return False + + # Download using mplayer. + retval = subprocess.call(args) + if retval == 0: + fsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize)) + self.try_rename(tmpfilename, filename) + self._hook_progress({ + 'downloaded_bytes': fsize, + 'total_bytes': fsize, + 'filename': filename, + 'status': 'finished', + }) + return True + else: + self.to_stderr(u"\n") + self.report_error(u'mplayer exited with code %d' % retval) + return False + + def _download_m3u8_with_ffmpeg(self, filename, url): + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + + args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy', + '-bsf:a', 'aac_adtstoasc', tmpfilename] + + for program in ['avconv', 'ffmpeg']: + try: + subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) + break + except (OSError, IOError): + pass + else: + self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found') + cmd = [program] + args + + retval = subprocess.call(cmd) if retval == 0: - self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename))) + fsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize)) self.try_rename(tmpfilename, filename) + self._hook_progress({ + 'downloaded_bytes': fsize, + 'total_bytes': fsize, + 'filename': filename, + 'status': 'finished', + }) return True else: - self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval) + self.to_stderr(u"\n") + self.report_error(u'ffmpeg exited with code %d' % retval) return False + def _do_download(self, filename, info_dict): url = info_dict['url'] - player_url = info_dict.get('player_url', None) # Check file already present if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False): self.report_file_already_downloaded(filename) + self._hook_progress({ + 'filename': filename, + 'status': 'finished', + 'total_bytes': os.path.getsize(encodeFilename(filename)), + }) return True # Attempt to download using rtmpdump if url.startswith('rtmp'): - return self._download_with_rtmpdump(filename, url, player_url) + return self._download_with_rtmpdump(filename, url, + info_dict.get('player_url', None), + info_dict.get('page_url', None), + info_dict.get('play_path', None), + info_dict.get('tc_url', None), + info_dict.get('rtmp_live', False)) + + # Attempt to download using mplayer + if url.startswith('mms') or url.startswith('rtsp'): + return self._download_with_mplayer(filename, url) + + # m3u8 manifest are downloaded with ffmpeg + if determine_ext(url) == u'm3u8': + return self._download_m3u8_with_ffmpeg(filename, url) tmpfilename = self.temp_name(filename) stream = None # Do not include the Accept-Encoding header headers = {'Youtubedl-no-compression': 'True'} + if 'user_agent' in info_dict: + headers['Youtubedl-user-agent'] = info_dict['user_agent'] basic_request = compat_urllib_request.Request(url, None, headers) request = compat_urllib_request.Request(url, None, headers) @@ -648,6 +550,10 @@ class FileDownloader(object): # the one in the hard drive. self.report_file_already_downloaded(filename) self.try_rename(tmpfilename, filename) + self._hook_progress({ + 'filename': filename, + 'status': 'finished', + }) return True else: # The length does not match, we start the download over @@ -660,12 +566,21 @@ class FileDownloader(object): self.report_retry(count, retries) if count > retries: - self.trouble(u'ERROR: giving up after %s retries' % retries) + self.report_error(u'giving up after %s retries' % retries) return False data_len = data.info().get('Content-length', None) if data_len is not None: data_len = int(data_len) + resume_len + min_data_len = self.params.get("min_filesize", None) + max_data_len = self.params.get("max_filesize", None) + if min_data_len is not None and data_len < min_data_len: + self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len)) + return False + if max_data_len is not None and data_len > max_data_len: + self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) + return False + data_len_str = self.format_bytes(data_len) byte_counter = 0 + resume_len block_size = self.params.get('buffersize', 1024) @@ -687,33 +602,45 @@ class FileDownloader(object): filename = self.undo_temp_name(tmpfilename) self.report_destination(filename) except (OSError, IOError) as err: - self.trouble(u'ERROR: unable to open for writing: %s' % str(err)) + self.report_error(u'unable to open for writing: %s' % str(err)) return False try: stream.write(data_block) except (IOError, OSError) as err: - self.trouble(u'\nERROR: unable to write data: %s' % str(err)) + self.to_stderr(u"\n") + self.report_error(u'unable to write data: %s' % str(err)) return False if not self.params.get('noresizebuffer', False): block_size = self.best_block_size(after - before, len(data_block)) # Progress message - speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len) + speed = self.calc_speed(start, time.time(), byte_counter - resume_len) if data_len is None: - self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA') + eta = percent = None else: - percent_str = self.calc_percent(byte_counter, data_len) - eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) - self.report_progress(percent_str, data_len_str, speed_str, eta_str) + percent = self.calc_percent(byte_counter, data_len) + eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) + self.report_progress(percent, data_len_str, speed, eta) + + self._hook_progress({ + 'downloaded_bytes': byte_counter, + 'total_bytes': data_len, + 'tmpfilename': tmpfilename, + 'filename': filename, + 'status': 'downloading', + 'eta': eta, + 'speed': speed, + }) # Apply rate limit self.slow_down(start, byte_counter - resume_len) if stream is None: - self.trouble(u'\nERROR: Did not get any data blocks') + self.to_stderr(u"\n") + self.report_error(u'Did not get any data blocks') return False stream.close() - self.report_finish() + self.report_finish(data_len_str, (time.time() - start)) if data_len is not None and byte_counter != data_len: raise ContentTooShortError(byte_counter, int(data_len)) self.try_rename(tmpfilename, filename) @@ -722,4 +649,33 @@ class FileDownloader(object): if self.params.get('updatetime', True): info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None)) + self._hook_progress({ + 'downloaded_bytes': byte_counter, + 'total_bytes': byte_counter, + 'filename': filename, + 'status': 'finished', + }) + return True + + def _hook_progress(self, status): + for ph in self._progress_hooks: + ph(status) + + def add_progress_hook(self, ph): + """ ph gets called on download progress, with a dictionary with the entries + * filename: The final filename + * status: One of "downloading" and "finished" + + It can also have some of the following entries: + + * downloaded_bytes: Bytes on disks + * total_bytes: Total bytes, None if unknown + * tmpfilename: The filename we're currently writing to + * eta: The estimated time in seconds, None if unknown + * speed: The download speed in bytes/second, None if unknown + + Hooks are guaranteed to be called at least once (with status "finished") + if the download is successful. + """ + self._progress_hooks.append(ph)