xsk: add sock_diag interface for AF_XDP

This patch adds the sock_diag interface for querying sockets from user
space. Tools like iproute2 ss(8) can use this interface to list open
AF_XDP sockets.

The user-space ABI is defined in linux/xdp_diag.h and includes netlink
request and response structs. The request can query sockets and the
response contains socket information about the rings, umems, inode and
more.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
Björn Töpel 2019-01-24 19:59:39 +01:00 committed by Daniel Borkmann
parent 50e74c0131
commit a36b38aa2a
6 changed files with 285 additions and 5 deletions

View File

@ -0,0 +1,72 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* xdp_diag: interface for query/monitor XDP sockets
* Copyright(c) 2019 Intel Corporation.
*/
#ifndef _LINUX_XDP_DIAG_H
#define _LINUX_XDP_DIAG_H
#include <linux/types.h>
struct xdp_diag_req {
__u8 sdiag_family;
__u8 sdiag_protocol;
__u16 pad;
__u32 xdiag_ino;
__u32 xdiag_show;
__u32 xdiag_cookie[2];
};
struct xdp_diag_msg {
__u8 xdiag_family;
__u8 xdiag_type;
__u16 pad;
__u32 xdiag_ino;
__u32 xdiag_cookie[2];
};
#define XDP_SHOW_INFO (1 << 0) /* Basic information */
#define XDP_SHOW_RING_CFG (1 << 1)
#define XDP_SHOW_UMEM (1 << 2)
#define XDP_SHOW_MEMINFO (1 << 3)
enum {
XDP_DIAG_NONE,
XDP_DIAG_INFO,
XDP_DIAG_UID,
XDP_DIAG_RX_RING,
XDP_DIAG_TX_RING,
XDP_DIAG_UMEM,
XDP_DIAG_UMEM_FILL_RING,
XDP_DIAG_UMEM_COMPLETION_RING,
XDP_DIAG_MEMINFO,
__XDP_DIAG_MAX,
};
#define XDP_DIAG_MAX (__XDP_DIAG_MAX - 1)
struct xdp_diag_info {
__u32 ifindex;
__u32 queue_id;
};
struct xdp_diag_ring {
__u32 entries; /*num descs */
};
#define XDP_DU_F_ZEROCOPY (1 << 0)
struct xdp_diag_umem {
__u64 size;
__u32 id;
__u32 num_pages;
__u32 chunk_size;
__u32 headroom;
__u32 ifindex;
__u32 queue_id;
__u32 flags;
__u32 refs;
};
#endif /* _LINUX_XDP_DIAG_H */

View File

@ -5,3 +5,11 @@ config XDP_SOCKETS
help
XDP sockets allows a channel between XDP programs and
userspace applications.
config XDP_SOCKETS_DIAG
tristate "XDP sockets: monitoring interface"
depends on XDP_SOCKETS
default n
help
Support for PF_XDP sockets monitoring interface used by the ss tool.
If unsure, say Y.

View File

@ -1 +1,2 @@
obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o
obj-$(CONFIG_XDP_SOCKETS_DIAG) += xsk_diag.o

View File

@ -27,14 +27,10 @@
#include "xsk_queue.h"
#include "xdp_umem.h"
#include "xsk.h"
#define TX_BATCH_SIZE 16
static struct xdp_sock *xdp_sk(struct sock *sk)
{
return (struct xdp_sock *)sk;
}
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
{
return READ_ONCE(xs->rx) && READ_ONCE(xs->umem) &&

12
net/xdp/xsk.h Normal file
View File

@ -0,0 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright(c) 2019 Intel Corporation. */
#ifndef XSK_H_
#define XSK_H_
static inline struct xdp_sock *xdp_sk(struct sock *sk)
{
return (struct xdp_sock *)sk;
}
#endif /* XSK_H_ */

191
net/xdp/xsk_diag.c Normal file
View File

@ -0,0 +1,191 @@
// SPDX-License-Identifier: GPL-2.0
/* XDP sockets monitoring support
*
* Copyright(c) 2019 Intel Corporation.
*
* Author: Björn Töpel <bjorn.topel@intel.com>
*/
#include <linux/module.h>
#include <net/xdp_sock.h>
#include <linux/xdp_diag.h>
#include <linux/sock_diag.h>
#include "xsk_queue.h"
#include "xsk.h"
static int xsk_diag_put_info(const struct xdp_sock *xs, struct sk_buff *nlskb)
{
struct xdp_diag_info di = {};
di.ifindex = xs->dev ? xs->dev->ifindex : 0;
di.queue_id = xs->queue_id;
return nla_put(nlskb, XDP_DIAG_INFO, sizeof(di), &di);
}
static int xsk_diag_put_ring(const struct xsk_queue *queue, int nl_type,
struct sk_buff *nlskb)
{
struct xdp_diag_ring dr = {};
dr.entries = queue->nentries;
return nla_put(nlskb, nl_type, sizeof(dr), &dr);
}
static int xsk_diag_put_rings_cfg(const struct xdp_sock *xs,
struct sk_buff *nlskb)
{
int err = 0;
if (xs->rx)
err = xsk_diag_put_ring(xs->rx, XDP_DIAG_RX_RING, nlskb);
if (!err && xs->tx)
err = xsk_diag_put_ring(xs->tx, XDP_DIAG_TX_RING, nlskb);
return err;
}
static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
{
struct xdp_umem *umem = xs->umem;
struct xdp_diag_umem du = {};
int err;
if (!umem)
return 0;
du.id = umem->id;
du.size = umem->size;
du.num_pages = umem->npgs;
du.chunk_size = (__u32)(~umem->chunk_mask + 1);
du.headroom = umem->headroom;
du.ifindex = umem->dev ? umem->dev->ifindex : 0;
du.queue_id = umem->queue_id;
du.flags = 0;
if (umem->zc)
du.flags |= XDP_DU_F_ZEROCOPY;
du.refs = refcount_read(&umem->users);
err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du);
if (!err && umem->fq)
err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_FILL_RING, nlskb);
if (!err && umem->cq) {
err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_COMPLETION_RING,
nlskb);
}
return err;
}
static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
struct xdp_diag_req *req,
struct user_namespace *user_ns,
u32 portid, u32 seq, u32 flags, int sk_ino)
{
struct xdp_sock *xs = xdp_sk(sk);
struct xdp_diag_msg *msg;
struct nlmsghdr *nlh;
nlh = nlmsg_put(nlskb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*msg),
flags);
if (!nlh)
return -EMSGSIZE;
msg = nlmsg_data(nlh);
memset(msg, 0, sizeof(*msg));
msg->xdiag_family = AF_XDP;
msg->xdiag_type = sk->sk_type;
msg->xdiag_ino = sk_ino;
sock_diag_save_cookie(sk, msg->xdiag_cookie);
if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb))
goto out_nlmsg_trim;
if ((req->xdiag_show & XDP_SHOW_INFO) &&
nla_put_u32(nlskb, XDP_DIAG_UID,
from_kuid_munged(user_ns, sock_i_uid(sk))))
goto out_nlmsg_trim;
if ((req->xdiag_show & XDP_SHOW_RING_CFG) &&
xsk_diag_put_rings_cfg(xs, nlskb))
goto out_nlmsg_trim;
if ((req->xdiag_show & XDP_SHOW_UMEM) &&
xsk_diag_put_umem(xs, nlskb))
goto out_nlmsg_trim;
if ((req->xdiag_show & XDP_SHOW_MEMINFO) &&
sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO))
goto out_nlmsg_trim;
nlmsg_end(nlskb, nlh);
return 0;
out_nlmsg_trim:
nlmsg_cancel(nlskb, nlh);
return -EMSGSIZE;
}
static int xsk_diag_dump(struct sk_buff *nlskb, struct netlink_callback *cb)
{
struct xdp_diag_req *req = nlmsg_data(cb->nlh);
struct net *net = sock_net(nlskb->sk);
int num = 0, s_num = cb->args[0];
struct sock *sk;
mutex_lock(&net->xdp.lock);
sk_for_each(sk, &net->xdp.list) {
if (!net_eq(sock_net(sk), net))
continue;
if (num++ < s_num)
continue;
if (xsk_diag_fill(sk, nlskb, req,
sk_user_ns(NETLINK_CB(cb->skb).sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
sock_i_ino(sk)) < 0) {
num--;
break;
}
}
mutex_unlock(&net->xdp.lock);
cb->args[0] = num;
return nlskb->len;
}
static int xsk_diag_handler_dump(struct sk_buff *nlskb, struct nlmsghdr *hdr)
{
struct netlink_dump_control c = { .dump = xsk_diag_dump };
int hdrlen = sizeof(struct xdp_diag_req);
struct net *net = sock_net(nlskb->sk);
if (nlmsg_len(hdr) < hdrlen)
return -EINVAL;
if (!(hdr->nlmsg_flags & NLM_F_DUMP))
return -EOPNOTSUPP;
return netlink_dump_start(net->diag_nlsk, nlskb, hdr, &c);
}
static const struct sock_diag_handler xsk_diag_handler = {
.family = AF_XDP,
.dump = xsk_diag_handler_dump,
};
static int __init xsk_diag_init(void)
{
return sock_diag_register(&xsk_diag_handler);
}
static void __exit xsk_diag_exit(void)
{
sock_diag_unregister(&xsk_diag_handler);
}
module_init(xsk_diag_init);
module_exit(xsk_diag_exit);
MODULE_LICENSE("GPL");
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP);