import traceback
import random
+from string import ascii_letters
+
from .compat import (
compat_basestring,
compat_cookiejar,
write_string,
YoutubeDLCookieProcessor,
YoutubeDLHandler,
+ PhantomJSwrapper,
)
from .cache import Cache
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
FORMAT_RE.format(numeric_field),
r'%({0})s'.format(numeric_field), outtmpl)
- filename = expand_path(outtmpl % template_dict)
+ # expand_path translates '%%' into '%' and '$$' into '$'
+ # correspondingly that is not what we want since we need to keep
+ # '%%' intact for template dict substitution step. Working around
+ # with boundary-alike separator hack.
+ sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
+ outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
+
+ # outtmpl should be expand_path'ed before template dict substitution
+ # because meta fields may contain env variables we don't want to
+ # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
+ # title "Hello $PATH", we don't want `$PATH` to be expanded.
+ filename = expand_path(outtmpl).replace(sep, '') % template_dict
+
# Temporary fix for #4787
# 'Treat' all problem characters by passing filename through preferredencoding
# to workaround encoding issues with subprocess on python2 @ Windows
force_properties = dict(
(k, v) for k, v in ie_result.items() if v is not None)
- for f in ('_type', 'url', 'ie_key'):
+ for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
if f in force_properties:
del force_properties[f]
new_result = info.copy()
return op(actual_value, comparison_value)
return _filter
+ def _default_format_spec(self, info_dict, download=True):
+ req_format_list = []
+
+ def can_have_partial_formats():
+ if self.params.get('simulate', False):
+ return True
+ if not download:
+ return True
+ if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
+ return False
+ if info_dict.get('is_live'):
+ return False
+ merger = FFmpegMergerPP(self)
+ return merger.available and merger.can_merge()
+ if can_have_partial_formats():
+ req_format_list.append('bestvideo+bestaudio')
+ req_format_list.append('best')
+ return '/'.join(req_format_list)
+
def build_format_selector(self, format_spec):
def syntax_error(note, start):
message = (
def is_wellformed(f):
url = f.get('url')
- valid_url = url and isinstance(url, compat_str)
- if not valid_url:
+ if not url:
self.report_warning(
'"url" field is missing or empty - skipping format, '
'there is an error in extractor')
- return valid_url
+ return False
+ if isinstance(url, bytes):
+ sanitize_string_field(f, 'url')
+ return True
# Filter out malformed formats for better extraction robustness
formats = list(filter(is_wellformed, formats))
sanitize_string_field(format, 'format_id')
sanitize_numeric_fields(format)
format['url'] = sanitize_url(format['url'])
- if format.get('format_id') is None:
+ if not format.get('format_id'):
format['format_id'] = compat_str(i)
else:
# Sanitize format_id from characters used in format selector expression
req_format = self.params.get('format')
if req_format is None:
- req_format_list = []
- if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
- not info_dict.get('is_live')):
- merger = FFmpegMergerPP(self)
- if merger.available and merger.can_merge():
- req_format_list.append('bestvideo+bestaudio')
- req_format_list.append('best')
- req_format = '/'.join(req_format_list)
+ req_format = self._default_format_spec(info_dict, download=download)
+ if self.params.get('verbose'):
+ self.to_stdout('[debug] Default format spec: %s' % req_format)
+
format_selector = self.build_format_selector(req_format)
# While in format selection we may need to have an access to the original
if filename is None:
return
- try:
- dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
- if dn and not os.path.exists(dn):
- os.makedirs(dn)
- except (OSError, IOError) as err:
- self.report_error('unable to create directory ' + error_to_compat_str(err))
+ def ensure_dir_exists(path):
+ try:
+ dn = os.path.dirname(path)
+ if dn and not os.path.exists(dn):
+ os.makedirs(dn)
+ return True
+ except (OSError, IOError) as err:
+ self.report_error('unable to create directory ' + error_to_compat_str(err))
+ return False
+
+ if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
return
if self.params.get('writedescription', False):
ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
- if sub_info.get('data') is not None:
- sub_data = sub_info['data']
+ sub_filename = subtitles_filename(filename, sub_lang, sub_format)
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
+ self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
else:
- try:
- sub_data = ie._download_webpage(
- sub_info['url'], info_dict['id'], note=False)
- except ExtractorError as err:
- self.report_warning('Unable to download subtitle for "%s": %s' %
- (sub_lang, error_to_compat_str(err.cause)))
- continue
- try:
- sub_filename = subtitles_filename(filename, sub_lang, sub_format)
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
- self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
+ self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
+ if sub_info.get('data') is not None:
+ try:
+ # Use newline='' to prevent conversion of newline characters
+ # See https://github.com/rg3/youtube-dl/issues/10268
+ with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
+ subfile.write(sub_info['data'])
+ except (OSError, IOError):
+ self.report_error('Cannot write subtitles file ' + sub_filename)
+ return
else:
- self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
- # Use newline='' to prevent conversion of newline characters
- # See https://github.com/rg3/youtube-dl/issues/10268
- with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
- subfile.write(sub_data)
- except (OSError, IOError):
- self.report_error('Cannot write subtitles file ' + sub_filename)
- return
+ try:
+ sub_data = ie._request_webpage(
+ sub_info['url'], info_dict['id'], note=False).read()
+ with io.open(encodeFilename(sub_filename), 'wb') as subfile:
+ subfile.write(sub_data)
+ except (ExtractorError, IOError, OSError, ValueError) as err:
+ self.report_warning('Unable to download subtitle for "%s": %s' %
+ (sub_lang, error_to_compat_str(err)))
+ continue
if self.params.get('writeinfojson', False):
infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
for f in requested_formats:
new_info = dict(info_dict)
new_info.update(f)
- fname = self.prepare_filename(new_info)
- fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
+ fname = prepend_extension(
+ self.prepare_filename(new_info),
+ 'f%s' % f['format_id'], new_info['ext'])
+ if not ensure_dir_exists(fname):
+ return
downloaded.append(fname)
partial_success = dl(fname, new_info)
success = success and partial_success
info_dict.get('protocol') == 'm3u8' and
self.params.get('hls_prefer_native')):
if fixup_policy == 'warn':
- self.report_warning('%s: malformated aac bitstream.' % (
+ self.report_warning('%s: malformed AAC bitstream detected.' % (
info_dict['id']))
elif fixup_policy == 'detect_or_warn':
fixup_pp = FFmpegFixupM3u8PP(self)
info_dict['__postprocessors'].append(fixup_pp)
else:
self.report_warning(
- '%s: malformated aac bitstream. %s'
+ '%s: malformed AAC bitstream detected. %s'
% (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
else:
assert fixup_policy in ('ignore', 'never')
exe_versions = FFmpegPostProcessor.get_versions(self)
exe_versions['rtmpdump'] = rtmpdump_version()
+ exe_versions['phantomjs'] = PhantomJSwrapper._version()
exe_str = ', '.join(
'%s %s' % (exe, v)
for exe, v in sorted(exe_versions.items())