linux-brain/drivers/infiniband/core/restrack.c

364 lines
8.3 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
*/
#include <rdma/rdma_cm.h>
#include <rdma/ib_verbs.h>
#include <rdma/restrack.h>
#include <rdma/rdma_counter.h>
#include <linux/mutex.h>
#include <linux/sched/task.h>
#include <linux/pid_namespace.h>
#include "cma_priv.h"
#include "restrack.h"
/**
* rdma_restrack_init() - initialize and allocate resource tracking
* @dev: IB device
*
* Return: 0 on success
*/
int rdma_restrack_init(struct ib_device *dev)
{
struct rdma_restrack_root *rt;
int i;
dev->res = kcalloc(RDMA_RESTRACK_MAX, sizeof(*rt), GFP_KERNEL);
if (!dev->res)
return -ENOMEM;
rt = dev->res;
for (i = 0; i < RDMA_RESTRACK_MAX; i++)
xa_init_flags(&rt[i].xa, XA_FLAGS_ALLOC);
return 0;
}
static const char *type2str(enum rdma_restrack_type type)
{
static const char * const names[RDMA_RESTRACK_MAX] = {
[RDMA_RESTRACK_PD] = "PD",
[RDMA_RESTRACK_CQ] = "CQ",
[RDMA_RESTRACK_QP] = "QP",
[RDMA_RESTRACK_CM_ID] = "CM_ID",
[RDMA_RESTRACK_MR] = "MR",
[RDMA_RESTRACK_CTX] = "CTX",
[RDMA_RESTRACK_COUNTER] = "COUNTER",
};
return names[type];
};
/**
* rdma_restrack_clean() - clean resource tracking
* @dev: IB device
*/
void rdma_restrack_clean(struct ib_device *dev)
{
struct rdma_restrack_root *rt = dev->res;
struct rdma_restrack_entry *e;
char buf[TASK_COMM_LEN];
bool found = false;
const char *owner;
int i;
for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) {
struct xarray *xa = &dev->res[i].xa;
if (!xa_empty(xa)) {
unsigned long index;
if (!found) {
pr_err("restrack: %s", CUT_HERE);
dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n");
}
xa_for_each(xa, index, e) {
if (rdma_is_kernel_res(e)) {
owner = e->kern_name;
} else {
/*
* There is no need to call get_task_struct here,
* because we can be here only if there are more
* get_task_struct() call than put_task_struct().
*/
get_task_comm(buf, e->task);
owner = buf;
}
pr_err("restrack: %s %s object allocated by %s is not freed\n",
rdma_is_kernel_res(e) ? "Kernel" :
"User",
type2str(e->type), owner);
}
found = true;
}
xa_destroy(xa);
}
if (found)
pr_err("restrack: %s", CUT_HERE);
kfree(rt);
}
/**
* rdma_restrack_count() - the current usage of specific object
* @dev: IB device
* @type: actual type of object to operate
*/
int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type)
{
struct rdma_restrack_root *rt = &dev->res[type];
struct rdma_restrack_entry *e;
XA_STATE(xas, &rt->xa, 0);
u32 cnt = 0;
xa_lock(&rt->xa);
xas_for_each(&xas, e, U32_MAX) {
if (!rdma_is_visible_in_pid_ns(e))
continue;
cnt++;
}
xa_unlock(&rt->xa);
return cnt;
}
EXPORT_SYMBOL(rdma_restrack_count);
static void set_kern_name(struct rdma_restrack_entry *res)
{
struct ib_pd *pd;
switch (res->type) {
case RDMA_RESTRACK_QP:
pd = container_of(res, struct ib_qp, res)->pd;
if (!pd) {
WARN_ONCE(true, "XRC QPs are not supported\n");
/* Survive, despite the programmer's error */
res->kern_name = " ";
}
break;
case RDMA_RESTRACK_MR:
pd = container_of(res, struct ib_mr, res)->pd;
break;
default:
/* Other types set kern_name directly */
pd = NULL;
break;
}
if (pd)
res->kern_name = pd->res.kern_name;
}
static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
{
switch (res->type) {
case RDMA_RESTRACK_PD:
return container_of(res, struct ib_pd, res)->device;
case RDMA_RESTRACK_CQ:
return container_of(res, struct ib_cq, res)->device;
case RDMA_RESTRACK_QP:
return container_of(res, struct ib_qp, res)->device;
case RDMA_RESTRACK_CM_ID:
return container_of(res, struct rdma_id_private,
res)->id.device;
case RDMA_RESTRACK_MR:
return container_of(res, struct ib_mr, res)->device;
case RDMA_RESTRACK_CTX:
return container_of(res, struct ib_ucontext, res)->device;
case RDMA_RESTRACK_COUNTER:
return container_of(res, struct rdma_counter, res)->device;
default:
WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
return NULL;
}
}
void rdma_restrack_set_task(struct rdma_restrack_entry *res,
const char *caller)
{
if (caller) {
res->kern_name = caller;
return;
}
if (res->task)
put_task_struct(res->task);
get_task_struct(current);
res->task = current;
}
EXPORT_SYMBOL(rdma_restrack_set_task);
/**
* rdma_restrack_attach_task() - attach the task onto this resource
* @res: resource entry
* @task: the task to attach, the current task will be used if it is NULL.
*/
void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
struct task_struct *task)
{
if (res->task)
put_task_struct(res->task);
get_task_struct(task);
res->task = task;
}
static void rdma_restrack_add(struct rdma_restrack_entry *res)
{
struct ib_device *dev = res_to_dev(res);
struct rdma_restrack_root *rt;
int ret;
if (!dev)
return;
rt = &dev->res[res->type];
kref_init(&res->kref);
init_completion(&res->comp);
if (res->type == RDMA_RESTRACK_QP) {
/* Special case to ensure that LQPN points to right QP */
struct ib_qp *qp = container_of(res, struct ib_qp, res);
ret = xa_insert(&rt->xa, qp->qp_num, res, GFP_KERNEL);
res->id = ret ? 0 : qp->qp_num;
} else if (res->type == RDMA_RESTRACK_COUNTER) {
/* Special case to ensure that cntn points to right counter */
struct rdma_counter *counter;
counter = container_of(res, struct rdma_counter, res);
ret = xa_insert(&rt->xa, counter->id, res, GFP_KERNEL);
res->id = ret ? 0 : counter->id;
} else {
ret = xa_alloc_cyclic(&rt->xa, &res->id, res, xa_limit_32b,
&rt->next_id, GFP_KERNEL);
}
if (!ret)
res->valid = true;
}
/**
* rdma_restrack_kadd() - add kernel object to the reource tracking database
* @res: resource entry
*/
void rdma_restrack_kadd(struct rdma_restrack_entry *res)
{
res->task = NULL;
set_kern_name(res);
res->user = false;
rdma_restrack_add(res);
}
EXPORT_SYMBOL(rdma_restrack_kadd);
/**
* rdma_restrack_uadd() - add user object to the reource tracking database
* @res: resource entry
*/
void rdma_restrack_uadd(struct rdma_restrack_entry *res)
{
if ((res->type != RDMA_RESTRACK_CM_ID) &&
(res->type != RDMA_RESTRACK_COUNTER))
res->task = NULL;
if (!res->task)
rdma_restrack_set_task(res, NULL);
res->kern_name = NULL;
res->user = true;
rdma_restrack_add(res);
}
EXPORT_SYMBOL(rdma_restrack_uadd);
int __must_check rdma_restrack_get(struct rdma_restrack_entry *res)
{
return kref_get_unless_zero(&res->kref);
}
EXPORT_SYMBOL(rdma_restrack_get);
/**
* rdma_restrack_get_byid() - translate from ID to restrack object
* @dev: IB device
* @type: resource track type
* @id: ID to take a look
*
* Return: Pointer to restrack entry or -ENOENT in case of error.
*/
struct rdma_restrack_entry *
rdma_restrack_get_byid(struct ib_device *dev,
enum rdma_restrack_type type, u32 id)
{
struct rdma_restrack_root *rt = &dev->res[type];
struct rdma_restrack_entry *res;
xa_lock(&rt->xa);
res = xa_load(&rt->xa, id);
if (!res || !rdma_restrack_get(res))
res = ERR_PTR(-ENOENT);
xa_unlock(&rt->xa);
return res;
}
EXPORT_SYMBOL(rdma_restrack_get_byid);
static void restrack_release(struct kref *kref)
{
struct rdma_restrack_entry *res;
res = container_of(kref, struct rdma_restrack_entry, kref);
complete(&res->comp);
}
int rdma_restrack_put(struct rdma_restrack_entry *res)
{
return kref_put(&res->kref, restrack_release);
}
EXPORT_SYMBOL(rdma_restrack_put);
void rdma_restrack_del(struct rdma_restrack_entry *res)
{
struct rdma_restrack_entry *old;
struct rdma_restrack_root *rt;
struct ib_device *dev;
if (!res->valid)
goto out;
dev = res_to_dev(res);
if (WARN_ON(!dev))
return;
rt = &dev->res[res->type];
old = xa_erase(&rt->xa, res->id);
WARN_ON(old != res);
res->valid = false;
rdma_restrack_put(res);
wait_for_completion(&res->comp);
out:
RDMA/restrack: Protect from reentry to resource return path Nullify the resource task struct pointer to ensure that subsequent calls won't try to release task_struct again. ------------[ cut here ]------------ ODEBUG: free active (active state 1) object type: rcu_head hint: (null) WARNING: CPU: 0 PID: 6048 at lib/debugobjects.c:329 debug_print_object+0x16a/0x210 lib/debugobjects.c:326 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 6048 Comm: syz-executor022 Not tainted 4.19.0-rc7-next-20181008+ #89 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x244/0x3ab lib/dump_stack.c:113 panic+0x238/0x4e7 kernel/panic.c:184 __warn.cold.8+0x163/0x1ba kernel/panic.c:536 report_bug+0x254/0x2d0 lib/bug.c:186 fixup_bug arch/x86/kernel/traps.c:178 [inline] do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:271 do_invalid_op+0x36/0x40 arch/x86/kernel/traps.c:290 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:969 RIP: 0010:debug_print_object+0x16a/0x210 lib/debugobjects.c:326 Code: 41 88 48 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 92 00 00 00 48 8b 14 dd 60 02 41 88 4c 89 fe 48 c7 c7 00 f8 40 88 e8 36 2f b4 fd <0f> 0b 83 05 a9 f4 5e 06 01 48 83 c4 18 5b 41 5c 41 5d 41 5e 41 5f RSP: 0018:ffff8801d8c3eda8 EFLAGS: 00010086 RAX: 0000000000000000 RBX: 0000000000000003 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff8164d235 RDI: 0000000000000005 RBP: ffff8801d8c3ede8 R08: ffff8801d70aa280 R09: ffffed003b5c3eda R10: ffffed003b5c3eda R11: ffff8801dae1f6d7 R12: 0000000000000001 R13: ffffffff8939a760 R14: 0000000000000000 R15: ffffffff8840fca0 __debug_check_no_obj_freed lib/debugobjects.c:786 [inline] debug_check_no_obj_freed+0x3ae/0x58d lib/debugobjects.c:818 kmem_cache_free+0x202/0x290 mm/slab.c:3759 free_task_struct kernel/fork.c:163 [inline] free_task+0x16e/0x1f0 kernel/fork.c:457 __put_task_struct+0x2e6/0x620 kernel/fork.c:730 put_task_struct include/linux/sched/task.h:96 [inline] finish_task_switch+0x66c/0x900 kernel/sched/core.c:2715 context_switch kernel/sched/core.c:2834 [inline] __schedule+0x8d7/0x21d0 kernel/sched/core.c:3480 schedule+0xfe/0x460 kernel/sched/core.c:3524 freezable_schedule include/linux/freezer.h:172 [inline] futex_wait_queue_me+0x3f9/0x840 kernel/futex.c:2530 futex_wait+0x45c/0xa50 kernel/futex.c:2645 do_futex+0x31a/0x26d0 kernel/futex.c:3528 __do_sys_futex kernel/futex.c:3589 [inline] __se_sys_futex kernel/futex.c:3557 [inline] __x64_sys_futex+0x472/0x6a0 kernel/futex.c:3557 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x446549 Code: e8 2c b3 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 2b 09 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f3a998f5da8 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca RAX: ffffffffffffffda RBX: 00000000006dbc38 RCX: 0000000000446549 RDX: 0000000000000000 RSI: 0000000000000080 RDI: 00000000006dbc38 RBP: 00000000006dbc30 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006dbc3c R13: 2f646e6162696e69 R14: 666e692f7665642f R15: 00000000006dbd2c Kernel Offset: disabled Reported-by: syzbot+71aff6ea121ffefc280f@syzkaller.appspotmail.com Fixes: ed7a01fd3fd7 ("RDMA/restrack: Release task struct which was hold by CM_ID object") Signed-off-by: Leon Romanovsky <leonro@mellanox.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
2018-10-12 04:10:10 +09:00
if (res->task) {
put_task_struct(res->task);
RDMA/restrack: Protect from reentry to resource return path Nullify the resource task struct pointer to ensure that subsequent calls won't try to release task_struct again. ------------[ cut here ]------------ ODEBUG: free active (active state 1) object type: rcu_head hint: (null) WARNING: CPU: 0 PID: 6048 at lib/debugobjects.c:329 debug_print_object+0x16a/0x210 lib/debugobjects.c:326 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 6048 Comm: syz-executor022 Not tainted 4.19.0-rc7-next-20181008+ #89 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x244/0x3ab lib/dump_stack.c:113 panic+0x238/0x4e7 kernel/panic.c:184 __warn.cold.8+0x163/0x1ba kernel/panic.c:536 report_bug+0x254/0x2d0 lib/bug.c:186 fixup_bug arch/x86/kernel/traps.c:178 [inline] do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:271 do_invalid_op+0x36/0x40 arch/x86/kernel/traps.c:290 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:969 RIP: 0010:debug_print_object+0x16a/0x210 lib/debugobjects.c:326 Code: 41 88 48 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 92 00 00 00 48 8b 14 dd 60 02 41 88 4c 89 fe 48 c7 c7 00 f8 40 88 e8 36 2f b4 fd <0f> 0b 83 05 a9 f4 5e 06 01 48 83 c4 18 5b 41 5c 41 5d 41 5e 41 5f RSP: 0018:ffff8801d8c3eda8 EFLAGS: 00010086 RAX: 0000000000000000 RBX: 0000000000000003 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff8164d235 RDI: 0000000000000005 RBP: ffff8801d8c3ede8 R08: ffff8801d70aa280 R09: ffffed003b5c3eda R10: ffffed003b5c3eda R11: ffff8801dae1f6d7 R12: 0000000000000001 R13: ffffffff8939a760 R14: 0000000000000000 R15: ffffffff8840fca0 __debug_check_no_obj_freed lib/debugobjects.c:786 [inline] debug_check_no_obj_freed+0x3ae/0x58d lib/debugobjects.c:818 kmem_cache_free+0x202/0x290 mm/slab.c:3759 free_task_struct kernel/fork.c:163 [inline] free_task+0x16e/0x1f0 kernel/fork.c:457 __put_task_struct+0x2e6/0x620 kernel/fork.c:730 put_task_struct include/linux/sched/task.h:96 [inline] finish_task_switch+0x66c/0x900 kernel/sched/core.c:2715 context_switch kernel/sched/core.c:2834 [inline] __schedule+0x8d7/0x21d0 kernel/sched/core.c:3480 schedule+0xfe/0x460 kernel/sched/core.c:3524 freezable_schedule include/linux/freezer.h:172 [inline] futex_wait_queue_me+0x3f9/0x840 kernel/futex.c:2530 futex_wait+0x45c/0xa50 kernel/futex.c:2645 do_futex+0x31a/0x26d0 kernel/futex.c:3528 __do_sys_futex kernel/futex.c:3589 [inline] __se_sys_futex kernel/futex.c:3557 [inline] __x64_sys_futex+0x472/0x6a0 kernel/futex.c:3557 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x446549 Code: e8 2c b3 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 2b 09 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f3a998f5da8 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca RAX: ffffffffffffffda RBX: 00000000006dbc38 RCX: 0000000000446549 RDX: 0000000000000000 RSI: 0000000000000080 RDI: 00000000006dbc38 RBP: 00000000006dbc30 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006dbc3c R13: 2f646e6162696e69 R14: 666e692f7665642f R15: 00000000006dbd2c Kernel Offset: disabled Reported-by: syzbot+71aff6ea121ffefc280f@syzkaller.appspotmail.com Fixes: ed7a01fd3fd7 ("RDMA/restrack: Release task struct which was hold by CM_ID object") Signed-off-by: Leon Romanovsky <leonro@mellanox.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
2018-10-12 04:10:10 +09:00
res->task = NULL;
}
}
EXPORT_SYMBOL(rdma_restrack_del);
bool rdma_is_visible_in_pid_ns(struct rdma_restrack_entry *res)
{
/*
* 1. Kern resources should be visible in init
* namespace only
* 2. Present only resources visible in the current
* namespace
*/
if (rdma_is_kernel_res(res))
return task_active_pid_ns(current) == &init_pid_ns;
/* PID 0 means that resource is not found in current namespace */
return task_pid_vnr(res->task);
}