linux-brain/fs/internal.h
Paul Gortmaker eef99860c6 cgroup1: fix leaked context root causing sporadic NULL deref in LTP
commit 1e7107c5ef44431bc1ebbd4c353f1d7c22e5f2ec upstream.

Richard reported sporadic (roughly one in 10 or so) null dereferences and
other strange behaviour for a set of automated LTP tests.  Things like:

   BUG: kernel NULL pointer dereference, address: 0000000000000008
   #PF: supervisor read access in kernel mode
   #PF: error_code(0x0000) - not-present page
   PGD 0 P4D 0
   Oops: 0000 [#1] PREEMPT SMP PTI
   CPU: 0 PID: 1516 Comm: umount Not tainted 5.10.0-yocto-standard #1
   Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-48-gd9c812dda519-prebuilt.qemu.org 04/01/2014
   RIP: 0010:kernfs_sop_show_path+0x1b/0x60

...or these others:

   RIP: 0010:do_mkdirat+0x6a/0xf0
   RIP: 0010:d_alloc_parallel+0x98/0x510
   RIP: 0010:do_readlinkat+0x86/0x120

There were other less common instances of some kind of a general scribble
but the common theme was mount and cgroup and a dubious dentry triggering
the NULL dereference.  I was only able to reproduce it under qemu by
replicating Richard's setup as closely as possible - I never did get it
to happen on bare metal, even while keeping everything else the same.

In commit 71d883c37e ("cgroup_do_mount(): massage calling conventions")
we see this as a part of the overall change:

   --------------
           struct cgroup_subsys *ss;
   -       struct dentry *dentry;

   [...]

   -       dentry = cgroup_do_mount(&cgroup_fs_type, fc->sb_flags, root,
   -                                CGROUP_SUPER_MAGIC, ns);

   [...]

   -       if (percpu_ref_is_dying(&root->cgrp.self.refcnt)) {
   -               struct super_block *sb = dentry->d_sb;
   -               dput(dentry);
   +       ret = cgroup_do_mount(fc, CGROUP_SUPER_MAGIC, ns);
   +       if (!ret && percpu_ref_is_dying(&root->cgrp.self.refcnt)) {
   +               struct super_block *sb = fc->root->d_sb;
   +               dput(fc->root);
                   deactivate_locked_super(sb);
                   msleep(10);
                   return restart_syscall();
           }
   --------------

In changing from the local "*dentry" variable to using fc->root, we now
export/leave that dentry pointer in the file context after doing the dput()
in the unlikely "is_dying" case.   With LTP doing a crazy amount of back to
back mount/unmount [testcases/bin/cgroup_regression_5_1.sh] the unlikely
becomes slightly likely and then bad things happen.

A fix would be to not leave the stale reference in fc->root as follows:

   --------------
                  dput(fc->root);
  +               fc->root = NULL;
                  deactivate_locked_super(sb);
   --------------

...but then we are just open-coding a duplicate of fc_drop_locked() so we
simply use that instead.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Tejun Heo <tj@kernel.org>
Cc: Zefan Li <lizefan.x@bytedance.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: stable@vger.kernel.org      # v5.1+
Reported-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Fixes: 71d883c37e ("cgroup_do_mount(): massage calling conventions")
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2021-07-31 08:19:37 +02:00

191 lines
5.0 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
/* fs/ internal definitions
*
* Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
struct super_block;
struct file_system_type;
struct iomap;
struct iomap_ops;
struct linux_binprm;
struct path;
struct mount;
struct shrink_control;
struct fs_context;
struct user_namespace;
/*
* block_dev.c
*/
#ifdef CONFIG_BLOCK
extern void __init bdev_cache_init(void);
extern int __sync_blockdev(struct block_device *bdev, int wait);
#else
static inline void bdev_cache_init(void)
{
}
static inline int __sync_blockdev(struct block_device *bdev, int wait)
{
return 0;
}
#endif
/*
* buffer.c
*/
extern void guard_bio_eod(struct bio *bio);
extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block, struct iomap *iomap);
/*
* char_dev.c
*/
extern void __init chrdev_init(void);
/*
* fs_context.c
*/
extern const struct fs_context_operations legacy_fs_context_ops;
extern int parse_monolithic_mount_data(struct fs_context *, void *);
extern void vfs_clean_context(struct fs_context *fc);
extern int finish_clean_context(struct fs_context *fc);
/*
* namei.c
*/
extern int filename_lookup(int dfd, struct filename *name, unsigned flags,
struct path *path, struct path *root);
extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
const char *, unsigned int, struct path *);
long do_mknodat(int dfd, const char __user *filename, umode_t mode,
unsigned int dev);
long do_mkdirat(int dfd, const char __user *pathname, umode_t mode);
long do_rmdir(int dfd, const char __user *pathname);
long do_unlinkat(int dfd, struct filename *name);
long do_symlinkat(const char __user *oldname, int newdfd,
const char __user *newname);
int do_linkat(int olddfd, const char __user *oldname, int newdfd,
const char __user *newname, int flags);
/*
* namespace.c
*/
extern void *copy_mount_options(const void __user *);
extern char *copy_mount_string(const void __user *);
extern struct vfsmount *lookup_mnt(const struct path *);
extern int finish_automount(struct vfsmount *, struct path *);
extern int sb_prepare_remount_readonly(struct super_block *);
extern void __init mnt_init(void);
extern int __mnt_want_write_file(struct file *);
extern void __mnt_drop_write_file(struct file *);
extern void dissolve_on_fput(struct vfsmount *);
/*
* fs_struct.c
*/
extern void chroot_fs_refs(const struct path *, const struct path *);
/*
* file_table.c
*/
extern struct file *alloc_empty_file(int, const struct cred *);
extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
/*
* super.c
*/
extern int reconfigure_super(struct fs_context *);
extern bool trylock_super(struct super_block *sb);
extern struct super_block *user_get_super(dev_t);
extern bool mount_capable(struct fs_context *);
/*
* open.c
*/
struct open_flags {
int open_flag;
umode_t mode;
int acc_mode;
int intent;
int lookup_flags;
};
extern struct file *do_filp_open(int dfd, struct filename *pathname,
const struct open_flags *op);
extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
const char *, const struct open_flags *);
long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
long do_faccessat(int dfd, const char __user *filename, int mode);
int do_fchmodat(int dfd, const char __user *filename, umode_t mode);
int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
int flag);
extern int vfs_open(const struct path *, struct file *);
/*
* inode.c
*/
extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
extern void inode_add_lru(struct inode *inode);
extern int dentry_needs_remove_privs(struct dentry *dentry);
/*
* fs-writeback.c
*/
extern void inode_io_list_del(struct inode *inode);
extern long get_nr_dirty_inodes(void);
extern int invalidate_inodes(struct super_block *, bool);
/*
* dcache.c
*/
extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
extern int d_set_mounted(struct dentry *dentry);
extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc);
extern struct dentry *d_alloc_cursor(struct dentry *);
extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
extern char *simple_dname(struct dentry *, char *, int);
extern void dput_to_list(struct dentry *, struct list_head *);
extern void shrink_dentry_list(struct list_head *);
/*
* read_write.c
*/
extern int rw_verify_area(int, struct file *, const loff_t *, size_t);
/*
* pipe.c
*/
extern const struct file_operations pipefifo_fops;
/*
* fs_pin.c
*/
extern void group_pin_kill(struct hlist_head *p);
extern void mnt_pin_kill(struct mount *m);
/*
* fs/nsfs.c
*/
extern const struct dentry_operations ns_dentry_operations;
/*
* fs/ioctl.c
*/
extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
unsigned long arg);
/* direct-io.c: */
int sb_init_dio_done_wq(struct super_block *sb);