linux-brain/net/core/dst.c
Jason A. Donenfeld adecda5bee net: print proper warning on dst underflow
Proper warnings with stack traces make it much easier to figure out
what's doing the double free and create more meaningful bug reports from
users.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-09-26 09:05:56 +02:00

333 lines
7.9 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* net/core/dst.c Protocol independent destination cache.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
*/
#include <linux/bitops.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/workqueue.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/string.h>
#include <linux/types.h>
#include <net/net_namespace.h>
#include <linux/sched.h>
#include <linux/prefetch.h>
#include <net/lwtunnel.h>
#include <net/xfrm.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
kfree_skb(skb);
return 0;
}
EXPORT_SYMBOL(dst_discard_out);
const struct dst_metrics dst_default_metrics = {
/* This initializer is needed to force linker to place this variable
* into const section. Otherwise it might end into bss section.
* We really want to avoid false sharing on this variable, and catch
* any writes on it.
*/
.refcnt = REFCOUNT_INIT(1),
};
EXPORT_SYMBOL(dst_default_metrics);
void dst_init(struct dst_entry *dst, struct dst_ops *ops,
struct net_device *dev, int initial_ref, int initial_obsolete,
unsigned short flags)
{
dst->dev = dev;
if (dev)
dev_hold(dev);
dst->ops = ops;
dst_init_metrics(dst, dst_default_metrics.metrics, true);
dst->expires = 0UL;
#ifdef CONFIG_XFRM
dst->xfrm = NULL;
#endif
dst->input = dst_discard;
dst->output = dst_discard_out;
dst->error = 0;
dst->obsolete = initial_obsolete;
dst->header_len = 0;
dst->trailer_len = 0;
#ifdef CONFIG_IP_ROUTE_CLASSID
dst->tclassid = 0;
#endif
dst->lwtstate = NULL;
atomic_set(&dst->__refcnt, initial_ref);
dst->__use = 0;
dst->lastuse = jiffies;
dst->flags = flags;
if (!(flags & DST_NOCOUNT))
dst_entries_add(ops, 1);
}
EXPORT_SYMBOL(dst_init);
void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
int initial_ref, int initial_obsolete, unsigned short flags)
{
struct dst_entry *dst;
if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
if (ops->gc(ops)) {
printk_ratelimited(KERN_NOTICE "Route cache is full: "
"consider increasing sysctl "
"net.ipv[4|6].route.max_size.\n");
return NULL;
}
}
dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
if (!dst)
return NULL;
dst_init(dst, ops, dev, initial_ref, initial_obsolete, flags);
return dst;
}
EXPORT_SYMBOL(dst_alloc);
struct dst_entry *dst_destroy(struct dst_entry * dst)
{
struct dst_entry *child = NULL;
smp_rmb();
#ifdef CONFIG_XFRM
if (dst->xfrm) {
struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
child = xdst->child;
}
#endif
if (!(dst->flags & DST_NOCOUNT))
dst_entries_add(dst->ops, -1);
if (dst->ops->destroy)
dst->ops->destroy(dst);
if (dst->dev)
dev_put(dst->dev);
lwtstate_put(dst->lwtstate);
if (dst->flags & DST_METADATA)
metadata_dst_free((struct metadata_dst *)dst);
else
kmem_cache_free(dst->ops->kmem_cachep, dst);
dst = child;
if (dst)
dst_release_immediate(dst);
return NULL;
}
EXPORT_SYMBOL(dst_destroy);
static void dst_destroy_rcu(struct rcu_head *head)
{
struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
dst = dst_destroy(dst);
}
/* Operations to mark dst as DEAD and clean up the net device referenced
* by dst:
* 1. put the dst under loopback interface and discard all tx/rx packets
* on this route.
* 2. release the net_device
* This function should be called when removing routes from the fib tree
* in preparation for a NETDEV_DOWN/NETDEV_UNREGISTER event and also to
* make the next dst_ops->check() fail.
*/
void dst_dev_put(struct dst_entry *dst)
{
struct net_device *dev = dst->dev;
dst->obsolete = DST_OBSOLETE_DEAD;
if (dst->ops->ifdown)
dst->ops->ifdown(dst, dev, true);
dst->input = dst_discard;
dst->output = dst_discard_out;
dst->dev = blackhole_netdev;
dev_hold(dst->dev);
dev_put(dev);
}
EXPORT_SYMBOL(dst_dev_put);
void dst_release(struct dst_entry *dst)
{
if (dst) {
int newrefcnt;
newrefcnt = atomic_dec_return(&dst->__refcnt);
if (WARN_ONCE(newrefcnt < 0, "dst_release underflow"))
net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
__func__, dst, newrefcnt);
if (!newrefcnt)
call_rcu(&dst->rcu_head, dst_destroy_rcu);
}
}
EXPORT_SYMBOL(dst_release);
void dst_release_immediate(struct dst_entry *dst)
{
if (dst) {
int newrefcnt;
newrefcnt = atomic_dec_return(&dst->__refcnt);
if (WARN_ONCE(newrefcnt < 0, "dst_release_immediate underflow"))
net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
__func__, dst, newrefcnt);
if (!newrefcnt)
dst_destroy(dst);
}
}
EXPORT_SYMBOL(dst_release_immediate);
u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
{
struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC);
if (p) {
struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old);
unsigned long prev, new;
refcount_set(&p->refcnt, 1);
memcpy(p->metrics, old_p->metrics, sizeof(p->metrics));
new = (unsigned long) p;
prev = cmpxchg(&dst->_metrics, old, new);
if (prev != old) {
kfree(p);
p = (struct dst_metrics *)__DST_METRICS_PTR(prev);
if (prev & DST_METRICS_READ_ONLY)
p = NULL;
} else if (prev & DST_METRICS_REFCOUNTED) {
if (refcount_dec_and_test(&old_p->refcnt))
kfree(old_p);
}
}
BUILD_BUG_ON(offsetof(struct dst_metrics, metrics) != 0);
return (u32 *)p;
}
EXPORT_SYMBOL(dst_cow_metrics_generic);
/* Caller asserts that dst_metrics_read_only(dst) is false. */
void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
{
unsigned long prev, new;
new = ((unsigned long) &dst_default_metrics) | DST_METRICS_READ_ONLY;
prev = cmpxchg(&dst->_metrics, old, new);
if (prev == old)
kfree(__DST_METRICS_PTR(old));
}
EXPORT_SYMBOL(__dst_destroy_metrics_generic);
static struct dst_ops md_dst_ops = {
.family = AF_UNSPEC,
};
static int dst_md_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
WARN_ONCE(1, "Attempting to call output on metadata dst\n");
kfree_skb(skb);
return 0;
}
static int dst_md_discard(struct sk_buff *skb)
{
WARN_ONCE(1, "Attempting to call input on metadata dst\n");
kfree_skb(skb);
return 0;
}
static void __metadata_dst_init(struct metadata_dst *md_dst,
enum metadata_type type, u8 optslen)
{
struct dst_entry *dst;
dst = &md_dst->dst;
dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE,
DST_METADATA | DST_NOCOUNT);
dst->input = dst_md_discard;
dst->output = dst_md_discard_out;
memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
md_dst->type = type;
}
struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type,
gfp_t flags)
{
struct metadata_dst *md_dst;
md_dst = kmalloc(sizeof(*md_dst) + optslen, flags);
if (!md_dst)
return NULL;
__metadata_dst_init(md_dst, type, optslen);
return md_dst;
}
EXPORT_SYMBOL_GPL(metadata_dst_alloc);
void metadata_dst_free(struct metadata_dst *md_dst)
{
#ifdef CONFIG_DST_CACHE
if (md_dst->type == METADATA_IP_TUNNEL)
dst_cache_destroy(&md_dst->u.tun_info.dst_cache);
#endif
kfree(md_dst);
}
EXPORT_SYMBOL_GPL(metadata_dst_free);
struct metadata_dst __percpu *
metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags)
{
int cpu;
struct metadata_dst __percpu *md_dst;
md_dst = __alloc_percpu_gfp(sizeof(struct metadata_dst) + optslen,
__alignof__(struct metadata_dst), flags);
if (!md_dst)
return NULL;
for_each_possible_cpu(cpu)
__metadata_dst_init(per_cpu_ptr(md_dst, cpu), type, optslen);
return md_dst;
}
EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu);
void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst)
{
#ifdef CONFIG_DST_CACHE
int cpu;
for_each_possible_cpu(cpu) {
struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu);
if (one_md_dst->type == METADATA_IP_TUNNEL)
dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache);
}
#endif
free_percpu(md_dst);
}
EXPORT_SYMBOL_GPL(metadata_dst_free_percpu);