import ctypes
from .compat import (
+ compat_basestring,
compat_cookiejar,
compat_expanduser,
compat_http_client,
allsubtitles: Downloads all the subtitles of the video
(requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video
- subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
+ subtitlesformat: The format code for subtitles
subtitleslangs: List of languages of the subtitles to download
keepvideo: Keep the video file after post-processing
daterange: A DateRange object, download only if the upload_date is in the range.
external_downloader: Executable of the external downloader to call.
listformats: Print an overview of available video formats and exit.
list_thumbnails: Print a table of all thumbnails and exit.
+ match_filter: A function that gets called with the info_dict of
+ every video.
+ If it returns a message, the video is ignored.
+ If it returns None, the video is downloaded.
+ match_filter_func in utils.py is one example for this.
+ no_color: Do not emit color codes in output.
The following parameters are not used by YoutubeDL itself, they are used by
else:
if self.params.get('no_warnings'):
return
- if self._err_file.isatty() and os.name != 'nt':
+ if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
_msg_header = '\033[0;33mWARNING:\033[0m'
else:
_msg_header = 'WARNING:'
Do the same as trouble, but prefixes the message with 'ERROR:', colored
in red if stderr is a tty file.
'''
- if self._err_file.isatty() and os.name != 'nt':
+ if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
_msg_header = '\033[0;31mERROR:\033[0m'
else:
_msg_header = 'ERROR:'
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
return None
- def _match_entry(self, info_dict):
+ def _match_entry(self, info_dict, incomplete):
""" Returns None iff the file should be downloaded """
video_title = info_dict.get('title', info_dict.get('id', 'video'))
if max_views is not None and view_count > max_views:
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
- return 'Skipping "%s" because it is age restricted' % title
+ return 'Skipping "%s" because it is age restricted' % video_title
if self.in_download_archive(info_dict):
return '%s has already been recorded in archive' % video_title
+
+ if not incomplete:
+ match_filter = self.params.get('match_filter')
+ if match_filter is not None:
+ ret = match_filter(info_dict)
+ if ret is not None:
+ return ret
+
return None
@staticmethod
'extractor_key': ie_result['extractor_key'],
}
- reason = self._match_entry(entry)
+ reason = self._match_entry(entry, incomplete=True)
if reason is not None:
self.to_screen('[download] ' + reason)
continue
'!=': operator.ne,
}
operator_rex = re.compile(r'''(?x)\s*\[
- (?P<key>width|height|tbr|abr|vbr|filesize|fps)
+ (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
\]$
''' % '|'.join(map(re.escape, OPERATORS.keys())))
m = operator_rex.search(format_spec)
+ if m:
+ try:
+ comparison_value = int(m.group('value'))
+ except ValueError:
+ comparison_value = parse_filesize(m.group('value'))
+ if comparison_value is None:
+ comparison_value = parse_filesize(m.group('value') + 'B')
+ if comparison_value is None:
+ raise ValueError(
+ 'Invalid value %r in format specification %r' % (
+ m.group('value'), format_spec))
+ op = OPERATORS[m.group('op')]
+
if not m:
- raise ValueError('Invalid format specification %r' % format_spec)
+ STR_OPERATORS = {
+ '=': operator.eq,
+ '!=': operator.ne,
+ }
+ str_operator_rex = re.compile(r'''(?x)\s*\[
+ \s*(?P<key>ext|acodec|vcodec|container|protocol)
+ \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
+ \s*(?P<value>[a-zA-Z0-9_-]+)
+ \s*\]$
+ ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
+ m = str_operator_rex.search(format_spec)
+ if m:
+ comparison_value = m.group('value')
+ op = STR_OPERATORS[m.group('op')]
- try:
- comparison_value = int(m.group('value'))
- except ValueError:
- comparison_value = parse_filesize(m.group('value'))
- if comparison_value is None:
- comparison_value = parse_filesize(m.group('value') + 'B')
- if comparison_value is None:
- raise ValueError(
- 'Invalid value %r in format specification %r' % (
- m.group('value'), format_spec))
- op = OPERATORS[m.group('op')]
+ if not m:
+ raise ValueError('Invalid format specification %r' % format_spec)
def _filter(f):
actual_value = f.get(m.group('key'))
def has_header(self, h):
return h in self.headers
+ def get_header(self, h, default=None):
+ return self.headers.get(h, default)
+
pr = _PseudoRequest(info_dict['url'])
self.cookiejar.add_cookie_header(pr)
return pr.headers.get('Cookie')
thumbnails.sort(key=lambda t: (
t.get('preference'), t.get('width'), t.get('height'),
t.get('id'), t.get('url')))
- for t in thumbnails:
+ for i, t in enumerate(thumbnails):
if 'width' in t and 'height' in t:
t['resolution'] = '%dx%d' % (t['width'], t['height'])
+ if t.get('id') is None:
+ t['id'] = '%d' % i
if thumbnails and 'thumbnail' not in info_dict:
info_dict['thumbnail'] = thumbnails[-1]['url']
info_dict['timestamp'])
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
+ if self.params.get('listsubtitles', False):
+ if 'automatic_captions' in info_dict:
+ self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
+ self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
+ return
+ info_dict['requested_subtitles'] = self.process_subtitles(
+ info_dict['id'], info_dict.get('subtitles'),
+ info_dict.get('automatic_captions'))
+
# This extractors handle format selection themselves
if info_dict['extractor'] in ['Youku']:
if download:
else self.params['merge_output_format'])
selected_format = {
'requested_formats': formats_info,
- 'format': rf,
- 'format_id': rf,
- 'ext': formats_info[0]['ext'],
+ 'format': '%s+%s' % (formats_info[0].get('format'),
+ formats_info[1].get('format')),
+ 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
+ formats_info[1].get('format_id')),
'width': formats_info[0].get('width'),
'height': formats_info[0].get('height'),
'resolution': formats_info[0].get('resolution'),
info_dict.update(formats_to_download[-1])
return info_dict
+ def process_subtitles(self, video_id, available_subs, available_autocaps):
+ """Select the requested subtitles and their format"""
+ if available_autocaps and self.params.get('writeautomaticsub'):
+ available_subs = available_subs.copy()
+ for lang, cap_info in available_autocaps.items():
+ if lang not in available_subs:
+ available_subs[lang] = cap_info
+
+ if not available_subs:
+ return available_subs
+
+ if self.params.get('allsubtitles', False):
+ requested_langs = available_subs.keys()
+ else:
+ if self.params.get('subtitleslangs', False):
+ requested_langs = self.params.get('subtitleslangs')
+ elif 'en' in available_subs:
+ requested_langs = ['en']
+ else:
+ requested_langs = [list(available_subs.keys())[0]]
+
+ formats_query = self.params.get('subtitlesformat', 'best')
+ formats_preference = formats_query.split('/') if formats_query else []
+ subs = {}
+ for lang in requested_langs:
+ formats = available_subs.get(lang)
+ if formats is None:
+ self.report_warning('%s subtitles not available for %s' % (lang, video_id))
+ continue
+ if isinstance(formats, compat_str):
+ # TODO: convert all IE with subtitles support to the new format
+ # and remove this
+ subs[lang] = {
+ 'ext': formats_preference[0],
+ 'data': formats,
+ }
+ continue
+ for ext in formats_preference:
+ if ext == 'best':
+ f = formats[-1]
+ break
+ matches = list(filter(lambda f: f['ext'] == ext, formats))
+ if matches:
+ f = matches[-1]
+ break
+ else:
+ f = formats[-1]
+ self.report_warning(
+ 'No subtitle format found matching "%s" for language %s, '
+ 'using %s' % (formats_query, lang, f['ext']))
+ subs[lang] = f
+ return subs
+
def process_info(self, info_dict):
"""Process a single resolved IE result."""
if 'format' not in info_dict:
info_dict['format'] = info_dict['ext']
- reason = self._match_entry(info_dict)
+ reason = self._match_entry(info_dict, incomplete=False)
if reason is not None:
self.to_screen('[download] ' + reason)
return
subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')])
- if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
+ if subtitles_are_requested and info_dict.get('requested_subtitles'):
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
- subtitles = info_dict['subtitles']
- sub_format = self.params.get('subtitlesformat', 'srt')
- for sub_lang in subtitles.keys():
- sub = subtitles[sub_lang]
- if sub is None:
- continue
+ subtitles = info_dict['requested_subtitles']
+ for sub_lang, sub_info in subtitles.items():
+ sub_format = sub_info['ext']
+ if sub_info.get('data') is not None:
+ sub_data = sub_info['data']
+ else:
+ try:
+ uf = self.urlopen(sub_info['url'])
+ sub_data = uf.read().decode('utf-8')
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self.report_warning('Unable to download subtitle for "%s": %s' %
+ (sub_lang, compat_str(err)))
+ continue
try:
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
else:
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
- subfile.write(sub)
+ subfile.write(sub_data)
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
line(f, idlen) for f in formats
if f.get('preference') is None or f['preference'] >= -1000]
if len(formats) > 1:
- formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
header_line = line({
['ID', 'width', 'height', 'URL'],
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
+ def list_subtitles(self, video_id, subtitles, name='subtitles'):
+ if not subtitles:
+ self.to_screen('%s has no %s' % (video_id, name))
+ return
+ self.to_screen(
+ 'Available %s for %s:' % (name, video_id))
+ self.to_screen(render_table(
+ ['Language', 'formats'],
+ [[lang, ', '.join(f['ext'] for f in reversed(formats))]
+ for lang, formats in subtitles.items()]))
+
def urlopen(self, req):
""" Start an HTTP download """
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
# To work around aforementioned issue we will replace request's original URL with
# percent-encoded one
- req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
+ req_is_string = isinstance(req, compat_basestring)
url = req if req_is_string else req.get_full_url()
url_escaped = escape_url(url)