linux-brain/fs/xfs/xfs_xattr.c

240 lines
5.4 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2008 Christoph Hellwig.
* Portions Copyright (C) 2000-2008 Silicon Graphics, Inc.
*/
#include "xfs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_da_format.h"
#include "xfs_inode.h"
#include "xfs_attr.h"
#include <linux/posix_acl_xattr.h>
#include <linux/xattr.h>
static int
xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused,
struct inode *inode, const char *name, void *value, size_t size)
{
int xflags = handler->flags;
struct xfs_inode *ip = XFS_I(inode);
int error, asize = size;
/* Convert Linux syscall to XFS internal ATTR flags */
if (!size) {
xflags |= ATTR_KERNOVAL;
value = NULL;
}
xfs: allocate xattr buffer on demand When doing file lookups and checking for permissions, we end up in xfs_get_acl() to see if there are any ACLs on the inode. This requires and xattr lookup, and to do that we have to supply a buffer large enough to hold an maximum sized xattr. On workloads were we are accessing a wide range of cache cold files under memory pressure (e.g. NFS fileservers) we end up spending a lot of time allocating the buffer. The buffer is 64k in length, so is a contiguous multi-page allocation, and if that then fails we fall back to vmalloc(). Hence the allocation here is /expensive/ when we are looking up hundreds of thousands of files a second. Initial numbers from a bpf trace show average time in xfs_get_acl() is ~32us, with ~19us of that in the memory allocation. Note these are average times, so there are going to be affected by the worst case allocations more than the common fast case... To avoid this, we could just do a "null" lookup to see if the ACL xattr exists and then only do the allocation if it exists. This, however, optimises the path for the "no ACL present" case at the expense of the "acl present" case. i.e. we can halve the time in xfs_get_acl() for the no acl case (i.e down to ~10-15us), but that then increases the ACL case by 30% (i.e. up to 40-45us). To solve this and speed up both cases, drive the xattr buffer allocation into the attribute code once we know what the actual xattr length is. For the no-xattr case, we avoid the allocation completely, speeding up that case. For the common ACL case, we'll end up with a fast heap allocation (because it'll be smaller than a page), and only for the rarer "we have a remote xattr" will we have a multi-page allocation occur. Hence the common ACL case will be much faster, too. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2019-08-30 01:04:10 +09:00
error = xfs_attr_get(ip, name, (unsigned char **)&value, &asize, xflags);
if (error)
return error;
return asize;
}
void
xfs_forget_acl(
struct inode *inode,
const char *name,
int xflags)
{
/*
* Invalidate any cached ACLs if the user has bypassed the ACL
* interface. We don't validate the content whatsoever so it is caller
* responsibility to provide data in valid format and ensure i_mode is
* consistent.
*/
if (xflags & ATTR_ROOT) {
#ifdef CONFIG_XFS_POSIX_ACL
if (!strcmp(name, SGI_ACL_FILE))
forget_cached_acl(inode, ACL_TYPE_ACCESS);
else if (!strcmp(name, SGI_ACL_DEFAULT))
forget_cached_acl(inode, ACL_TYPE_DEFAULT);
#endif
}
}
static int
xfs_xattr_set(const struct xattr_handler *handler, struct dentry *unused,
struct inode *inode, const char *name, const void *value,
size_t size, int flags)
{
int xflags = handler->flags;
struct xfs_inode *ip = XFS_I(inode);
xfs: invalidate cached acl if set directly via xattr ACLs are stored as extended attributes of the inode to which they apply. XFS converts the standard "system.posix_acl_[access|default]" attribute names used to control ACLs to "trusted.SGI_ACL_[FILE|DEFAULT]" as stored on-disk. These xattrs are directly exposed in on-disk format via getxattr/setxattr, without any ACL aware code in the path to perform validation, etc. This is partly historical and supports backup/restore applications such as xfsdump to back up and restore the binary blob that represents ACLs as-is. Andreas reports that the ACLs observed via the getfacl interface is not consistent when ACLs are set directly via the setxattr path. This occurs because the ACLs are cached in-core against the inode and the xattr path has no knowledge that the operation relates to ACLs. Update the xattr set codepath to trap writes of the special XFS ACL attributes and invalidate the associated cached ACL when this occurs. This ensures that the correct ACLs are used on a subsequent operation through the actual ACL interface. Note that this does not update or add support for setting the ACL xattrs directly beyond the restore use case that requires a correctly formatted binary blob and to restore a consistent i_mode at the same time. It is still possible for a root user to set an invalid or inconsistent (with i_mode) ACL blob on-disk and potentially cause corruption. [ With fixes from Andreas Gruenbacher. ] Reported-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-11-03 10:40:59 +09:00
int error;
/* Convert Linux syscall to XFS internal ATTR flags */
if (flags & XATTR_CREATE)
xflags |= ATTR_CREATE;
if (flags & XATTR_REPLACE)
xflags |= ATTR_REPLACE;
if (!value)
return xfs_attr_remove(ip, (unsigned char *)name, xflags);
xfs: invalidate cached acl if set directly via xattr ACLs are stored as extended attributes of the inode to which they apply. XFS converts the standard "system.posix_acl_[access|default]" attribute names used to control ACLs to "trusted.SGI_ACL_[FILE|DEFAULT]" as stored on-disk. These xattrs are directly exposed in on-disk format via getxattr/setxattr, without any ACL aware code in the path to perform validation, etc. This is partly historical and supports backup/restore applications such as xfsdump to back up and restore the binary blob that represents ACLs as-is. Andreas reports that the ACLs observed via the getfacl interface is not consistent when ACLs are set directly via the setxattr path. This occurs because the ACLs are cached in-core against the inode and the xattr path has no knowledge that the operation relates to ACLs. Update the xattr set codepath to trap writes of the special XFS ACL attributes and invalidate the associated cached ACL when this occurs. This ensures that the correct ACLs are used on a subsequent operation through the actual ACL interface. Note that this does not update or add support for setting the ACL xattrs directly beyond the restore use case that requires a correctly formatted binary blob and to restore a consistent i_mode at the same time. It is still possible for a root user to set an invalid or inconsistent (with i_mode) ACL blob on-disk and potentially cause corruption. [ With fixes from Andreas Gruenbacher. ] Reported-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-11-03 10:40:59 +09:00
error = xfs_attr_set(ip, (unsigned char *)name,
(void *)value, size, xflags);
if (!error)
xfs_forget_acl(inode, name, xflags);
xfs: invalidate cached acl if set directly via xattr ACLs are stored as extended attributes of the inode to which they apply. XFS converts the standard "system.posix_acl_[access|default]" attribute names used to control ACLs to "trusted.SGI_ACL_[FILE|DEFAULT]" as stored on-disk. These xattrs are directly exposed in on-disk format via getxattr/setxattr, without any ACL aware code in the path to perform validation, etc. This is partly historical and supports backup/restore applications such as xfsdump to back up and restore the binary blob that represents ACLs as-is. Andreas reports that the ACLs observed via the getfacl interface is not consistent when ACLs are set directly via the setxattr path. This occurs because the ACLs are cached in-core against the inode and the xattr path has no knowledge that the operation relates to ACLs. Update the xattr set codepath to trap writes of the special XFS ACL attributes and invalidate the associated cached ACL when this occurs. This ensures that the correct ACLs are used on a subsequent operation through the actual ACL interface. Note that this does not update or add support for setting the ACL xattrs directly beyond the restore use case that requires a correctly formatted binary blob and to restore a consistent i_mode at the same time. It is still possible for a root user to set an invalid or inconsistent (with i_mode) ACL blob on-disk and potentially cause corruption. [ With fixes from Andreas Gruenbacher. ] Reported-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-11-03 10:40:59 +09:00
return error;
}
static const struct xattr_handler xfs_xattr_user_handler = {
.prefix = XATTR_USER_PREFIX,
.flags = 0, /* no flags implies user namespace */
.get = xfs_xattr_get,
.set = xfs_xattr_set,
};
static const struct xattr_handler xfs_xattr_trusted_handler = {
.prefix = XATTR_TRUSTED_PREFIX,
.flags = ATTR_ROOT,
.get = xfs_xattr_get,
.set = xfs_xattr_set,
};
static const struct xattr_handler xfs_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.flags = ATTR_SECURE,
.get = xfs_xattr_get,
.set = xfs_xattr_set,
};
const struct xattr_handler *xfs_xattr_handlers[] = {
&xfs_xattr_user_handler,
&xfs_xattr_trusted_handler,
&xfs_xattr_security_handler,
#ifdef CONFIG_XFS_POSIX_ACL
&posix_acl_access_xattr_handler,
&posix_acl_default_xattr_handler,
#endif
NULL
};
static void
__xfs_xattr_put_listent(
struct xfs_attr_list_context *context,
char *prefix,
int prefix_len,
unsigned char *name,
int namelen)
{
char *offset;
int arraytop;
if (context->count < 0 || context->seen_enough)
return;
if (!context->alist)
goto compute_size;
arraytop = context->count + prefix_len + namelen + 1;
if (arraytop > context->firstu) {
context->count = -1; /* insufficient space */
context->seen_enough = 1;
return;
}
offset = (char *)context->alist + context->count;
strncpy(offset, prefix, prefix_len);
offset += prefix_len;
strncpy(offset, (char *)name, namelen); /* real name */
offset += namelen;
*offset = '\0';
compute_size:
context->count += prefix_len + namelen + 1;
return;
}
static void
xfs_xattr_put_listent(
struct xfs_attr_list_context *context,
int flags,
unsigned char *name,
int namelen,
int valuelen)
{
char *prefix;
int prefix_len;
ASSERT(context->count >= 0);
if (flags & XFS_ATTR_ROOT) {
#ifdef CONFIG_XFS_POSIX_ACL
if (namelen == SGI_ACL_FILE_SIZE &&
strncmp(name, SGI_ACL_FILE,
SGI_ACL_FILE_SIZE) == 0) {
__xfs_xattr_put_listent(
context, XATTR_SYSTEM_PREFIX,
XATTR_SYSTEM_PREFIX_LEN,
XATTR_POSIX_ACL_ACCESS,
strlen(XATTR_POSIX_ACL_ACCESS));
} else if (namelen == SGI_ACL_DEFAULT_SIZE &&
strncmp(name, SGI_ACL_DEFAULT,
SGI_ACL_DEFAULT_SIZE) == 0) {
__xfs_xattr_put_listent(
context, XATTR_SYSTEM_PREFIX,
XATTR_SYSTEM_PREFIX_LEN,
XATTR_POSIX_ACL_DEFAULT,
strlen(XATTR_POSIX_ACL_DEFAULT));
}
#endif
/*
* Only show root namespace entries if we are actually allowed to
* see them.
*/
if (!capable(CAP_SYS_ADMIN))
return;
prefix = XATTR_TRUSTED_PREFIX;
prefix_len = XATTR_TRUSTED_PREFIX_LEN;
} else if (flags & XFS_ATTR_SECURE) {
prefix = XATTR_SECURITY_PREFIX;
prefix_len = XATTR_SECURITY_PREFIX_LEN;
} else {
prefix = XATTR_USER_PREFIX;
prefix_len = XATTR_USER_PREFIX_LEN;
}
__xfs_xattr_put_listent(context, prefix, prefix_len, name,
namelen);
return;
}
ssize_t
xfs_vn_listxattr(
struct dentry *dentry,
char *data,
size_t size)
{
struct xfs_attr_list_context context;
struct attrlist_cursor_kern cursor = { 0 };
struct inode *inode = d_inode(dentry);
int error;
/*
* First read the regular on-disk attributes.
*/
memset(&context, 0, sizeof(context));
context.dp = XFS_I(inode);
context.cursor = &cursor;
context.resynch = 1;
context.alist = size ? data : NULL;
context.bufsize = size;
context.firstu = context.bufsize;
context.put_listent = xfs_xattr_put_listent;
error = xfs_attr_list_int(&context);
if (error)
return error;
if (context.count < 0)
return -ERANGE;
return context.count;
}