from __future__ import absolute_import
import math
+import io
import os
import re
import socket
import subprocess
import sys
import time
+import traceback
if os.name == 'nt':
import ctypes
writeinfojson: Write the video description to a .info.json file
writesubtitles: Write the video subtitles to a .srt file
subtitleslang: Language of the subtitles to download
+ test: Download only first bytes to test the downloader.
+ keepvideo: Keep the video file after post-processing
+ min_filesize: Skip files smaller than this size
+ max_filesize: Skip files larger than this size
"""
params = None
"""Create a FileDownloader object with the given options."""
self._ies = []
self._pps = []
+ self._progress_hooks = []
self._download_retcode = 0
self._num_downloads = 0
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ:
- sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
+ self.to_screen('\033]0;%s\007' % message, skip_eol=True)
def fixed_template(self):
"""Checks if the output template is fixed."""
return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
- def trouble(self, message=None):
+ def trouble(self, message=None, tb=None):
"""Determine action to take when a download problem appears.
Depending on if the downloader has been configured to ignore
download errors or not, this method may throw an exception or
not when errors are found, after printing the message.
+
+ tb, if given, is additional traceback information.
"""
if message is not None:
self.to_stderr(message)
+ if self.params.get('verbose'):
+ if tb is None:
+ tb_data = traceback.format_list(traceback.extract_stack())
+ tb = u''.join(tb_data)
+ self.to_stderr(tb)
if not self.params.get('ignoreerrors', False):
raise DownloadError(message)
self._download_retcode = 1
"""Report download progress."""
if self.params.get('noprogress', False):
return
- self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
+ if self.params.get('progress_with_newline', False):
+ self.to_screen(u'[download] %s of %s at %s ETA %s' %
+ (percent_str, data_len_str, speed_str, eta_str))
+ else:
+ self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
(percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
template_dict['epoch'] = int(time.time())
template_dict['autonumber'] = u'%05d' % self._num_downloads
- template_dict = dict((key, u'NA' if val is None else val) for key, val in template_dict.items())
- template_dict = dict((k, sanitize_filename(compat_str(v), self.params.get('restrictfilenames'))) for k,v in template_dict.items())
+ sanitize = lambda k,v: sanitize_filename(
+ u'NA' if v is None else compat_str(v),
+ restricted=self.params.get('restrictfilenames'),
+ is_id=(k==u'id'))
+ template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
filename = self.params['outtmpl'] % template_dict
return filename
title = info_dict['title']
matchtitle = self.params.get('matchtitle', False)
if matchtitle:
- matchtitle = matchtitle.decode('utf8')
if not re.search(matchtitle, title, re.IGNORECASE):
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
rejecttitle = self.params.get('rejecttitle', False)
if rejecttitle:
- rejecttitle = rejecttitle.decode('utf8')
if re.search(rejecttitle, title, re.IGNORECASE):
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
return None
try:
descfn = filename + u'.description'
self.report_writedescription(descfn)
- descfile = open(encodeFilename(descfn), 'wb')
- try:
- descfile.write(info_dict['description'].encode('utf-8'))
- finally:
- descfile.close()
+ with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
+ descfile.write(info_dict['description'])
except (OSError, IOError):
self.trouble(u'ERROR: Cannot write description file ' + descfn)
return
try:
srtfn = filename.rsplit('.', 1)[0] + u'.srt'
self.report_writesubtitles(srtfn)
- srtfile = open(encodeFilename(srtfn), 'wb')
- try:
- srtfile.write(info_dict['subtitles'].encode('utf-8'))
- finally:
- srtfile.close()
+ with io.open(encodeFilename(srtfn), 'w', encoding='utf-8') as srtfile:
+ srtfile.write(info_dict['subtitles'])
except (OSError, IOError):
self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
return
infofn = filename + u'.info.json'
self.report_writeinfojson(infofn)
try:
- json.dump
- except (NameError,AttributeError):
- self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
- return
- try:
- infof = open(encodeFilename(infofn), 'wb')
- try:
- json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
- json.dump(json_info_dict, infof)
- finally:
- infof.close()
+ json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
+ write_json_file(json_info_dict, encodeFilename(infofn))
except (OSError, IOError):
self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
return
try:
success = self._do_download(filename, info_dict)
except (OSError, IOError) as err:
- raise UnavailableVideoError
+ raise UnavailableVideoError()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.trouble(u'ERROR: unable to download video data: %s' % str(err))
return
# Warn if the _WORKING attribute is False
if not ie.working():
- self.trouble(u'WARNING: the program functionality for this site has been marked as broken, '
- u'and will probably not work. If you want to go on, use the -i option.')
+ self.to_stderr(u'WARNING: the program functionality for this site has been marked as broken, '
+ u'and will probably not work. If you want to go on, use the -i option.')
# Suitable InfoExtractor found
suitable_found = True
# Extract information from URL and process it
- videos = ie.extract(url)
+ try:
+ videos = ie.extract(url)
+ except ExtractorError as de: # An error we somewhat expected
+ self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
+ break
+ except Exception as e:
+ if self.params.get('ignoreerrors', False):
+ self.trouble(u'ERROR: ' + compat_str(e), tb=compat_str(traceback.format_exc()))
+ break
+ else:
+ raise
+
+ if len(videos or []) > 1 and self.fixed_template():
+ raise SameFileError(self.params['outtmpl'])
+
for video in videos or []:
video['extractor'] = ie.IE_NAME
try:
return self._download_retcode
def post_process(self, filename, ie_info):
- """Run the postprocessing chain on the given file."""
+ """Run all the postprocessors on the given file."""
info = dict(ie_info)
info['filepath'] = filename
+ keep_video = None
for pp in self._pps:
- info = pp.run(info)
- if info is None:
- break
+ try:
+ keep_video_wish,new_info = pp.run(info)
+ if keep_video_wish is not None:
+ if keep_video_wish:
+ keep_video = keep_video_wish
+ elif keep_video is None:
+ # No clear decision yet, let IE decide
+ keep_video = keep_video_wish
+ except PostProcessingError as e:
+ self.to_stderr(u'ERROR: ' + e.msg)
+ if keep_video is False and not self.params.get('keepvideo', False):
+ try:
+ self.to_stderr(u'Deleting original file %s (pass -k to keep)' % filename)
+ os.remove(encodeFilename(filename))
+ except (IOError, OSError):
+ self.to_stderr(u'WARNING: Unable to remove downloaded video file')
- def _download_with_rtmpdump(self, filename, url, player_url):
+ def _download_with_rtmpdump(self, filename, url, player_url, page_url):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
- basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
+ basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
+ if player_url is not None:
+ basic_args += ['-W', player_url]
+ if page_url is not None:
+ basic_args += ['--pageUrl', page_url]
args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
if self.params.get('verbose', False):
try:
retval = 0
break
if retval == 0:
- self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename)))
+ fsize = os.path.getsize(encodeFilename(tmpfilename))
+ self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
self.try_rename(tmpfilename, filename)
+ self._hook_progress({
+ 'downloaded_bytes': fsize,
+ 'total_bytes': fsize,
+ 'filename': filename,
+ 'status': 'finished',
+ })
return True
else:
self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
def _do_download(self, filename, info_dict):
url = info_dict['url']
- player_url = info_dict.get('player_url', None)
# Check file already present
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
self.report_file_already_downloaded(filename)
+ self._hook_progress({
+ 'filename': filename,
+ 'status': 'finished',
+ })
return True
# Attempt to download using rtmpdump
if url.startswith('rtmp'):
- return self._download_with_rtmpdump(filename, url, player_url)
+ return self._download_with_rtmpdump(filename, url,
+ info_dict.get('player_url', None),
+ info_dict.get('page_url', None))
tmpfilename = self.temp_name(filename)
stream = None
# Do not include the Accept-Encoding header
headers = {'Youtubedl-no-compression': 'True'}
+ if 'user_agent' in info_dict:
+ headers['Youtubedl-user-agent'] = info_dict['user_agent']
basic_request = compat_urllib_request.Request(url, None, headers)
request = compat_urllib_request.Request(url, None, headers)
+ if self.params.get('test', False):
+ request.add_header('Range','bytes=0-10240')
+
# Establish possible resume length
if os.path.isfile(encodeFilename(tmpfilename)):
resume_len = os.path.getsize(encodeFilename(tmpfilename))
# the one in the hard drive.
self.report_file_already_downloaded(filename)
self.try_rename(tmpfilename, filename)
+ self._hook_progress({
+ 'filename': filename,
+ 'status': 'finished',
+ })
return True
else:
# The length does not match, we start the download over
data_len = data.info().get('Content-length', None)
if data_len is not None:
data_len = int(data_len) + resume_len
+ min_data_len = self.params.get("min_filesize", None)
+ max_data_len = self.params.get("max_filesize", None)
+ if min_data_len is not None and data_len < min_data_len:
+ self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
+ return False
+ if max_data_len is not None and data_len > max_data_len:
+ self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
+ return False
+
data_len_str = self.format_bytes(data_len)
byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024)
eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
self.report_progress(percent_str, data_len_str, speed_str, eta_str)
+ self._hook_progress({
+ 'downloaded_bytes': byte_counter,
+ 'total_bytes': data_len,
+ 'tmpfilename': tmpfilename,
+ 'filename': filename,
+ 'status': 'downloading',
+ })
+
# Apply rate limit
self.slow_down(start, byte_counter - resume_len)
if self.params.get('updatetime', True):
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
+ self._hook_progress({
+ 'downloaded_bytes': byte_counter,
+ 'total_bytes': byte_counter,
+ 'filename': filename,
+ 'status': 'finished',
+ })
+
return True
+
+ def _hook_progress(self, status):
+ for ph in self._progress_hooks:
+ ph(status)
+
+ def add_progress_hook(self, ph):
+ """ ph gets called on download progress, with a dictionary with the entries
+ * filename: The final filename
+ * status: One of "downloading" and "finished"
+
+ It can also have some of the following entries:
+
+ * downloaded_bytes: Bytes on disks
+ * total_bytes: Total bytes, None if unknown
+ * tmpfilename: The filename we're currently writing to
+
+ Hooks are guaranteed to be called at least once (with status "finished")
+ if the download is successful.
+ """
+ self._progress_hooks.append(ph)