Merge branch 'sk_buff-add-extension-infrastructure'

Florian Westphal says:

====================
sk_buff: add extension infrastructure

TL;DR:
 - objdiff shows no change if CONFIG_XFRM=n && BR_NETFILTER=n
 - small size reduction when one or both options are set
 - no changes in ipsec performance

 Changes since v1:
 - Allocate entire extension space from a kmem_cache.
 - Avoid atomic_dec_and_test operation on skb_ext_put() for refcnt == 1 case.
   (similar to kfree_skbmem() fclone_ref use).

This adds an optional extension infrastructure, with ispec (xfrm) and
bridge netfilter as first users.

The third (future) user is Multipath TCP which is still out-of-tree.
MPTCP needs to map logical mptcp sequence numbers to the tcp sequence
numbers used by individual subflows.

This DSS mapping is read/written from tcp option space on receive and
written to tcp option space on transmitted tcp packets that are part of
and MPTCP connection.

Extending skb_shared_info or adding a private data field to skb fclones
doesn't work for incoming skb, so a different DSS propagation method would
be required for the receive side.

mptcp has same requirements as secpath/bridge netfilter:

1. extension memory is released when the sk_buff is free'd.
2. data is shared after cloning an skb (clone inherits extension)
3. adding extension to an skb will COW the extension buffer if needed.

Two new members are added to sk_buff:
1. 'active_extensions' byte (filling a hole), telling which extensions
   are available for this skb.
   This has two purposes.
   a) avoids the need to initialize the pointer.
   b) allows to "delete" an extension by clearing its bit
   value in ->active_extensions.

   While it would be possible to store the active_extensions byte
   in the extension struct instead of sk_buff, there is one problem
   with this:
    When an extension has to be disabled, we can always clear the
    bit in skb->active_extensions.  But in case it would be stored in the
    extension buffer itself, we might have to COW it first, if
    we are dealing with a cloned skb.  On kmalloc failure we would
    be unable to turn an extension off.
2. extension pointer, located at the end of the sk_buff.
   If the active_extensions byte is 0, the pointer is undefined,
   it is not initialized on skb allocation.

This adds extra code to skb clone and free paths (to deal with
refcount/free of extension area) but this replaces similar code that
manages skb->nf_bridge and skb->sp structs in the followup patches of
the series.

It is possible to add support for extensions that are not preseved on
clones/copies:

1. define a bitmask of all extensions that need copy/cow on clone
2. change __skb_ext_copy() to check
   ->active_extensions & SKB_EXT_PRESERVE_ON_CLONE
3. set clone->active_extensions to 0 if test is false.

This isn't done here because all extensions that get added here
need the copy/cow semantics.

Last patch converts skb->sp, secpath information gets stored as
new SKB_EXT_SEC_PATH, so the 'sp' pointer is removed from skbuff.

Extra code added to skb clone and free paths (to deal with refcount/free
of extension area) replaces the existing code that does the same for
skb->nf_bridge and skb->secpath.

I don't see any other in-tree users that could benefit from this
infrastructure, it doesn't make sense to add an extension just for the sake
of a single flag bit (like skb->nf_trace).

Adding a new extension is a good fit if all of the following are true:

1. Data is related to the skb/packet aggregate
2. Data should be freed when the skb is free'd
3. Data is not going to be relevant/needed in normal case (udp, tcp,
   forwarding workloads, ...)
4. There are no fancy action(s) needed on clone/free, such as callbacks
   into kernel modules.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2018-12-19 11:21:45 -08:00
commit 4a54877ee7
39 changed files with 564 additions and 286 deletions

View File

@ -111,9 +111,10 @@ the stack in xfrm_input().
xfrm_state_hold(xs);
store the state information into the skb
skb->sp = secpath_dup(skb->sp);
skb->sp->xvec[skb->sp->len++] = xs;
skb->sp->olen++;
sp = secpath_set(skb);
if (!sp) return;
sp->xvec[sp->len++] = xs;
sp->olen++;
indicate the success and/or error status of the offload
xo = xfrm_offload(skb);

View File

@ -570,6 +570,7 @@ int chcr_ipsec_xmit(struct sk_buff *skb, struct net_device *dev)
struct sge_eth_txq *q;
struct port_info *pi;
dma_addr_t addr[MAX_SKB_FRAGS + 1];
struct sec_path *sp;
bool immediate = false;
if (!x->xso.offload_handle)
@ -578,7 +579,8 @@ int chcr_ipsec_xmit(struct sk_buff *skb, struct net_device *dev)
sa_entry = (struct ipsec_sa_entry *)x->xso.offload_handle;
kctx_len = sa_entry->kctx_len;
if (skb->sp->len != 1) {
sp = skb_sec_path(skb);
if (sp->len != 1) {
out_free: dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}

View File

@ -1065,11 +1065,13 @@ int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
struct ixgbe_adapter *adapter = netdev_priv(tx_ring->netdev);
struct ixgbe_ipsec *ipsec = adapter->ipsec;
struct xfrm_state *xs;
struct sec_path *sp;
struct tx_sa *tsa;
if (unlikely(!first->skb->sp->len)) {
sp = skb_sec_path(first->skb);
if (unlikely(!sp->len)) {
netdev_err(tx_ring->netdev, "%s: no xfrm state len = %d\n",
__func__, first->skb->sp->len);
__func__, sp->len);
return 0;
}
@ -1159,6 +1161,7 @@ void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
struct xfrm_state *xs = NULL;
struct ipv6hdr *ip6 = NULL;
struct iphdr *ip4 = NULL;
struct sec_path *sp;
void *daddr;
__be32 spi;
u8 *c_hdr;
@ -1198,12 +1201,12 @@ void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
if (unlikely(!xs))
return;
skb->sp = secpath_dup(skb->sp);
if (unlikely(!skb->sp))
sp = secpath_set(skb);
if (unlikely(!sp))
return;
skb->sp->xvec[skb->sp->len++] = xs;
skb->sp->olen++;
sp->xvec[sp->len++] = xs;
sp->olen++;
xo = xfrm_offload(skb);
xo->flags = CRYPTO_DONE;
xo->status = CRYPTO_SUCCESS;

View File

@ -8695,7 +8695,8 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
#endif /* IXGBE_FCOE */
#ifdef CONFIG_IXGBE_IPSEC
if (skb->sp && !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx))
if (secpath_exists(skb) &&
!ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx))
goto out_drop;
#endif
tso = ixgbe_tso(tx_ring, first, &hdr_len, &ipsec_tx);
@ -10192,7 +10193,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
*/
if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) {
#ifdef CONFIG_IXGBE_IPSEC
if (!skb->sp)
if (!secpath_exists(skb))
#endif
features &= ~NETIF_F_TSO;
}

View File

@ -450,12 +450,14 @@ int ixgbevf_ipsec_tx(struct ixgbevf_ring *tx_ring,
struct ixgbevf_adapter *adapter = netdev_priv(tx_ring->netdev);
struct ixgbevf_ipsec *ipsec = adapter->ipsec;
struct xfrm_state *xs;
struct sec_path *sp;
struct tx_sa *tsa;
u16 sa_idx;
if (unlikely(!first->skb->sp->len)) {
sp = skb_sec_path(first->skb);
if (unlikely(!sp->len)) {
netdev_err(tx_ring->netdev, "%s: no xfrm state len = %d\n",
__func__, first->skb->sp->len);
__func__, sp->len);
return 0;
}
@ -546,6 +548,7 @@ void ixgbevf_ipsec_rx(struct ixgbevf_ring *rx_ring,
struct xfrm_state *xs = NULL;
struct ipv6hdr *ip6 = NULL;
struct iphdr *ip4 = NULL;
struct sec_path *sp;
void *daddr;
__be32 spi;
u8 *c_hdr;
@ -585,12 +588,12 @@ void ixgbevf_ipsec_rx(struct ixgbevf_ring *rx_ring,
if (unlikely(!xs))
return;
skb->sp = secpath_dup(skb->sp);
if (unlikely(!skb->sp))
sp = secpath_set(skb);
if (unlikely(!sp))
return;
skb->sp->xvec[skb->sp->len++] = xs;
skb->sp->olen++;
sp->xvec[sp->len++] = xs;
sp->olen++;
xo = xfrm_offload(skb);
xo->flags = CRYPTO_DONE;
xo->status = CRYPTO_SUCCESS;

View File

@ -4157,7 +4157,7 @@ static int ixgbevf_xmit_frame_ring(struct sk_buff *skb,
first->protocol = vlan_get_protocol(skb);
#ifdef CONFIG_IXGBEVF_IPSEC
if (skb->sp && !ixgbevf_ipsec_tx(tx_ring, first, &ipsec_tx))
if (secpath_exists(skb) && !ixgbevf_ipsec_tx(tx_ring, first, &ipsec_tx))
goto out_drop;
#endif
tso = ixgbevf_tso(tx_ring, first, &hdr_len, &ipsec_tx);

View File

@ -254,11 +254,13 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
struct mlx5e_ipsec_metadata *mdata;
struct mlx5e_ipsec_sa_entry *sa_entry;
struct xfrm_state *x;
struct sec_path *sp;
if (!xo)
return skb;
if (unlikely(skb->sp->len != 1)) {
sp = skb_sec_path(skb);
if (unlikely(sp->len != 1)) {
atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle);
goto drop;
}
@ -305,10 +307,11 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
struct mlx5e_priv *priv = netdev_priv(netdev);
struct xfrm_offload *xo;
struct xfrm_state *xs;
struct sec_path *sp;
u32 sa_handle;
skb->sp = secpath_dup(skb->sp);
if (unlikely(!skb->sp)) {
sp = secpath_set(skb);
if (unlikely(!sp)) {
atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc);
return NULL;
}
@ -320,8 +323,9 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
return NULL;
}
skb->sp->xvec[skb->sp->len++] = xs;
skb->sp->olen++;
sp = skb_sec_path(skb);
sp->xvec[sp->len++] = xs;
sp->olen++;
xo = xfrm_offload(skb);
xo->flags = CRYPTO_DONE;
@ -372,10 +376,11 @@ struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
netdev_features_t features)
{
struct sec_path *sp = skb_sec_path(skb);
struct xfrm_state *x;
if (skb->sp && skb->sp->len) {
x = skb->sp->xvec[0];
if (sp && sp->len) {
x = sp->xvec[0];
if (x && x->xso.offload_handle)
return true;
}

View File

@ -227,18 +227,19 @@ static const struct xfrmdev_ops nsim_xfrmdev_ops = {
bool nsim_ipsec_tx(struct netdevsim *ns, struct sk_buff *skb)
{
struct sec_path *sp = skb_sec_path(skb);
struct nsim_ipsec *ipsec = &ns->ipsec;
struct xfrm_state *xs;
struct nsim_sa *tsa;
u32 sa_idx;
/* do we even need to check this packet? */
if (!skb->sp)
if (!sp)
return true;
if (unlikely(!skb->sp->len)) {
if (unlikely(!sp->len)) {
netdev_err(ns->netdev, "no xfrm state len = %d\n",
skb->sp->len);
sp->len);
return false;
}

View File

@ -17,43 +17,58 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb)
skb_dst_drop(skb);
}
static inline struct nf_bridge_info *
nf_bridge_info_get(const struct sk_buff *skb)
{
return skb_ext_find(skb, SKB_EXT_BRIDGE_NF);
}
static inline bool nf_bridge_info_exists(const struct sk_buff *skb)
{
return skb_ext_exist(skb, SKB_EXT_BRIDGE_NF);
}
static inline int nf_bridge_get_physinif(const struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge;
const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
if (skb->nf_bridge == NULL)
if (!nf_bridge)
return 0;
nf_bridge = skb->nf_bridge;
return nf_bridge->physindev ? nf_bridge->physindev->ifindex : 0;
}
static inline int nf_bridge_get_physoutif(const struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge;
const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
if (skb->nf_bridge == NULL)
if (!nf_bridge)
return 0;
nf_bridge = skb->nf_bridge;
return nf_bridge->physoutdev ? nf_bridge->physoutdev->ifindex : 0;
}
static inline struct net_device *
nf_bridge_get_physindev(const struct sk_buff *skb)
{
return skb->nf_bridge ? skb->nf_bridge->physindev : NULL;
const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
return nf_bridge ? nf_bridge->physindev : NULL;
}
static inline struct net_device *
nf_bridge_get_physoutdev(const struct sk_buff *skb)
{
return skb->nf_bridge ? skb->nf_bridge->physoutdev : NULL;
const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
return nf_bridge ? nf_bridge->physoutdev : NULL;
}
static inline bool nf_bridge_in_prerouting(const struct sk_buff *skb)
{
return skb->nf_bridge && skb->nf_bridge->in_prerouting;
const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
return nf_bridge && nf_bridge->in_prerouting;
}
#else
#define br_drop_fake_rtable(skb) do { } while (0)

View File

@ -245,6 +245,7 @@ struct iov_iter;
struct napi_struct;
struct bpf_prog;
union bpf_attr;
struct skb_ext;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct nf_conntrack {
@ -254,7 +255,6 @@ struct nf_conntrack {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct nf_bridge_info {
refcount_t use;
enum {
BRNF_PROTO_UNCHANGED,
BRNF_PROTO_8021Q,
@ -636,6 +636,7 @@ typedef unsigned char *sk_buff_data_t;
* @queue_mapping: Queue mapping for multiqueue devices
* @xmit_more: More SKBs are pending for this queue
* @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves
* @active_extensions: active extensions (skb_ext_id types)
* @ndisc_nodetype: router type (from link layer)
* @ooo_okay: allow the mapping of a socket to a queue to be changed
* @l4_hash: indicate hash is a canonical 4-tuple hash over transport
@ -665,6 +666,7 @@ typedef unsigned char *sk_buff_data_t;
* @data: Data head pointer
* @truesize: Buffer size
* @users: User count - see {datagram,tcp}.c
* @extensions: allocated extensions, valid if active_extensions is nonzero
*/
struct sk_buff {
@ -712,14 +714,8 @@ struct sk_buff {
struct list_head tcp_tsorted_anchor;
};
#ifdef CONFIG_XFRM
struct sec_path *sp;
#endif
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
unsigned long _nfct;
#endif
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct nf_bridge_info *nf_bridge;
#endif
unsigned int len,
data_len;
@ -747,7 +743,9 @@ struct sk_buff {
head_frag:1,
xmit_more:1,
pfmemalloc:1;
#ifdef CONFIG_SKB_EXTENSIONS
__u8 active_extensions;
#endif
/* fields enclosed in headers_start/headers_end are copied
* using a single memcpy() in __copy_skb_header()
*/
@ -869,6 +867,11 @@ struct sk_buff {
*data;
unsigned int truesize;
refcount_t users;
#ifdef CONFIG_SKB_EXTENSIONS
/* only useable after checking ->active_extensions != 0 */
struct skb_ext *extensions;
#endif
};
#ifdef __KERNEL__
@ -3896,18 +3899,108 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
atomic_inc(&nfct->use);
}
#endif
#ifdef CONFIG_SKB_EXTENSIONS
enum skb_ext_id {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
SKB_EXT_BRIDGE_NF,
#endif
#ifdef CONFIG_XFRM
SKB_EXT_SEC_PATH,
#endif
SKB_EXT_NUM, /* must be last */
};
/**
* struct skb_ext - sk_buff extensions
* @refcnt: 1 on allocation, deallocated on 0
* @offset: offset to add to @data to obtain extension address
* @chunks: size currently allocated, stored in SKB_EXT_ALIGN_SHIFT units
* @data: start of extension data, variable sized
*
* Note: offsets/lengths are stored in chunks of 8 bytes, this allows
* to use 'u8' types while allowing up to 2kb worth of extension data.
*/
struct skb_ext {
refcount_t refcnt;
u8 offset[SKB_EXT_NUM]; /* in chunks of 8 bytes */
u8 chunks; /* same */
char data[0] __aligned(8);
};
void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id);
void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id);
void __skb_ext_put(struct skb_ext *ext);
static inline void skb_ext_put(struct sk_buff *skb)
{
if (nf_bridge && refcount_dec_and_test(&nf_bridge->use))
kfree(nf_bridge);
if (skb->active_extensions)
__skb_ext_put(skb->extensions);
}
static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge)
static inline void skb_ext_get(struct sk_buff *skb)
{
if (nf_bridge)
refcount_inc(&nf_bridge->use);
if (skb->active_extensions) {
struct skb_ext *ext = skb->extensions;
if (ext)
refcount_inc(&ext->refcnt);
}
}
#endif /* CONFIG_BRIDGE_NETFILTER */
static inline void __skb_ext_copy(struct sk_buff *dst,
const struct sk_buff *src)
{
dst->active_extensions = src->active_extensions;
if (src->active_extensions) {
struct skb_ext *ext = src->extensions;
refcount_inc(&ext->refcnt);
dst->extensions = ext;
}
}
static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *src)
{
skb_ext_put(dst);
__skb_ext_copy(dst, src);
}
static inline bool __skb_ext_exist(const struct skb_ext *ext, enum skb_ext_id i)
{
return !!ext->offset[i];
}
static inline bool skb_ext_exist(const struct sk_buff *skb, enum skb_ext_id id)
{
return skb->active_extensions & (1 << id);
}
static inline void skb_ext_del(struct sk_buff *skb, enum skb_ext_id id)
{
if (skb_ext_exist(skb, id))
__skb_ext_del(skb, id);
}
static inline void *skb_ext_find(const struct sk_buff *skb, enum skb_ext_id id)
{
if (skb_ext_exist(skb, id)) {
struct skb_ext *ext = skb->extensions;
return (void *)ext + (ext->offset[id] << 3);
}
return NULL;
}
#else
static inline void skb_ext_put(struct sk_buff *skb) {}
static inline void skb_ext_get(struct sk_buff *skb) {}
static inline void skb_ext_del(struct sk_buff *skb, int unused) {}
static inline void __skb_ext_copy(struct sk_buff *d, const struct sk_buff *s) {}
static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *s) {}
#endif /* CONFIG_SKB_EXTENSIONS */
static inline void nf_reset(struct sk_buff *skb)
{
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
@ -3915,8 +4008,7 @@ static inline void nf_reset(struct sk_buff *skb)
skb->_nfct = 0;
#endif
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
nf_bridge_put(skb->nf_bridge);
skb->nf_bridge = NULL;
skb_ext_del(skb, SKB_EXT_BRIDGE_NF);
#endif
}
@ -3934,7 +4026,7 @@ static inline void ipvs_reset(struct sk_buff *skb)
#endif
}
/* Note: This doesn't put any conntrack and bridge info in dst. */
/* Note: This doesn't put any conntrack info in dst. */
static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
bool copy)
{
@ -3942,10 +4034,6 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
dst->_nfct = src->_nfct;
nf_conntrack_get(skb_nfct(src));
#endif
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
dst->nf_bridge = src->nf_bridge;
nf_bridge_get(src->nf_bridge);
#endif
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES)
if (copy)
dst->nf_trace = src->nf_trace;
@ -3956,9 +4044,6 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
{
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_conntrack_put(skb_nfct(dst));
#endif
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
nf_bridge_put(dst->nf_bridge);
#endif
__nf_copy(dst, src, true);
}
@ -3981,12 +4066,19 @@ static inline void skb_init_secmark(struct sk_buff *skb)
{ }
#endif
static inline int secpath_exists(const struct sk_buff *skb)
{
#ifdef CONFIG_XFRM
return skb_ext_exist(skb, SKB_EXT_SEC_PATH);
#else
return 0;
#endif
}
static inline bool skb_irq_freeable(const struct sk_buff *skb)
{
return !skb->destructor &&
#if IS_ENABLED(CONFIG_XFRM)
!skb->sp &&
#endif
!secpath_exists(skb) &&
!skb_nfct(skb) &&
!skb->_skb_refdst &&
!skb_has_frag_list(skb);
@ -4032,10 +4124,10 @@ static inline bool skb_get_dst_pending_confirm(const struct sk_buff *skb)
return skb->dst_pending_confirm != 0;
}
static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
static inline struct sec_path *skb_sec_path(const struct sk_buff *skb)
{
#ifdef CONFIG_XFRM
return skb->sp;
return skb_ext_find(skb, SKB_EXT_SEC_PATH);
#else
return NULL;
#endif

View File

@ -6,12 +6,12 @@
static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
{
skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC);
struct nf_bridge_info *b = skb_ext_add(skb, SKB_EXT_BRIDGE_NF);
if (likely(skb->nf_bridge))
refcount_set(&(skb->nf_bridge->use), 1);
if (b)
memset(b, 0, sizeof(*b));
return skb->nf_bridge;
return b;
}
void nf_bridge_update_protocol(struct sk_buff *skb);
@ -22,12 +22,6 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, struct sock *sk,
int (*okfn)(struct net *, struct sock *,
struct sk_buff *));
static inline struct nf_bridge_info *
nf_bridge_info_get(const struct sk_buff *skb)
{
return skb->nf_bridge;
}
unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb);
static inline void nf_bridge_push_encap_header(struct sk_buff *skb)

View File

@ -1096,7 +1096,6 @@ struct xfrm_offload {
};
struct sec_path {
refcount_t refcnt;
int len;
int olen;
@ -1104,41 +1103,13 @@ struct sec_path {
struct xfrm_offload ovec[XFRM_MAX_OFFLOAD_DEPTH];
};
static inline int secpath_exists(struct sk_buff *skb)
{
#ifdef CONFIG_XFRM
return skb->sp != NULL;
#else
return 0;
#endif
}
static inline struct sec_path *
secpath_get(struct sec_path *sp)
{
if (sp)
refcount_inc(&sp->refcnt);
return sp;
}
void __secpath_destroy(struct sec_path *sp);
static inline void
secpath_put(struct sec_path *sp)
{
if (sp && refcount_dec_and_test(&sp->refcnt))
__secpath_destroy(sp);
}
struct sec_path *secpath_dup(struct sec_path *src);
int secpath_set(struct sk_buff *skb);
struct sec_path *secpath_set(struct sk_buff *skb);
static inline void
secpath_reset(struct sk_buff *skb)
{
#ifdef CONFIG_XFRM
secpath_put(skb->sp);
skb->sp = NULL;
skb_ext_del(skb, SKB_EXT_SEC_PATH);
#endif
}
@ -1194,7 +1165,7 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir,
if (sk && sk->sk_policy[XFRM_POLICY_IN])
return __xfrm_policy_check(sk, ndir, skb, family);
return (!net->xfrm.policy_count[dir] && !skb->sp) ||
return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) ||
(skb_dst(skb)->flags & DST_NOPOLICY) ||
__xfrm_policy_check(sk, ndir, skb, family);
}
@ -1905,14 +1876,16 @@ static inline void xfrm_states_delete(struct xfrm_state **states, int n)
#ifdef CONFIG_XFRM
static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb)
{
return skb->sp->xvec[skb->sp->len - 1];
struct sec_path *sp = skb_sec_path(skb);
return sp->xvec[sp->len - 1];
}
#endif
static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb)
{
#ifdef CONFIG_XFRM
struct sec_path *sp = skb->sp;
struct sec_path *sp = skb_sec_path(skb);
if (!sp || !sp->olen || sp->len != sp->olen)
return NULL;

View File

@ -51,6 +51,9 @@ config NET_INGRESS
config NET_EGRESS
bool
config SKB_EXTENSIONS
bool
menu "Networking options"
source "net/packet/Kconfig"
@ -184,6 +187,7 @@ config BRIDGE_NETFILTER
depends on NETFILTER && INET
depends on NETFILTER_ADVANCED
select NETFILTER_FAMILY_BRIDGE
select SKB_EXTENSIONS
default m
---help---
Enabling this option will let arptables resp. iptables see bridged

View File

@ -132,10 +132,7 @@ static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage);
static void nf_bridge_info_free(struct sk_buff *skb)
{
if (skb->nf_bridge) {
nf_bridge_put(skb->nf_bridge);
skb->nf_bridge = NULL;
}
skb_ext_del(skb, SKB_EXT_BRIDGE_NF);
}
static inline struct net_device *bridge_parent(const struct net_device *dev)
@ -148,19 +145,7 @@ static inline struct net_device *bridge_parent(const struct net_device *dev)
static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge = skb->nf_bridge;
if (refcount_read(&nf_bridge->use) > 1) {
struct nf_bridge_info *tmp = nf_bridge_alloc(skb);
if (tmp) {
memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info));
refcount_set(&tmp->use, 1);
}
nf_bridge_put(nf_bridge);
nf_bridge = tmp;
}
return nf_bridge;
return skb_ext_add(skb, SKB_EXT_BRIDGE_NF);
}
unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
@ -247,7 +232,9 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb)
void nf_bridge_update_protocol(struct sk_buff *skb)
{
switch (skb->nf_bridge->orig_proto) {
const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
switch (nf_bridge->orig_proto) {
case BRNF_PROTO_8021Q:
skb->protocol = htons(ETH_P_8021Q);
break;
@ -506,7 +493,6 @@ static unsigned int br_nf_pre_routing(void *priv,
if (br_validate_ipv4(state->net, skb))
return NF_DROP;
nf_bridge_put(skb->nf_bridge);
if (!nf_bridge_alloc(skb))
return NF_DROP;
if (!setup_pre_routing(skb))
@ -569,7 +555,8 @@ static unsigned int br_nf_forward_ip(void *priv,
struct net_device *parent;
u_int8_t pf;
if (!skb->nf_bridge)
nf_bridge = nf_bridge_info_get(skb);
if (!nf_bridge)
return NF_ACCEPT;
/* Need exclusive nf_bridge_info since we might have multiple
@ -701,7 +688,9 @@ br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
{
if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
if (nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
return PPPOE_SES_HLEN;
return 0;
}
@ -839,7 +828,9 @@ static unsigned int ip_sabotage_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
if (skb->nf_bridge && !skb->nf_bridge->in_prerouting &&
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
if (nf_bridge && !nf_bridge->in_prerouting &&
!netif_is_l3_master(skb->dev)) {
state->okfn(state->net, state->sk, skb);
return NF_STOLEN;
@ -877,7 +868,9 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
static int br_nf_dev_xmit(struct sk_buff *skb)
{
if (skb->nf_bridge && skb->nf_bridge->bridged_dnat) {
const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
if (nf_bridge && nf_bridge->bridged_dnat) {
br_nf_pre_routing_finish_bridge_slow(skb);
return 1;
}

View File

@ -224,8 +224,8 @@ unsigned int br_nf_pre_routing_ipv6(void *priv,
if (br_validate_ipv6(state->net, skb))
return NF_DROP;
nf_bridge_put(skb->nf_bridge);
if (!nf_bridge_alloc(skb))
nf_bridge = nf_bridge_alloc(skb);
if (!nf_bridge)
return NF_DROP;
if (!setup_pre_routing(skb))
return NF_DROP;

View File

@ -79,6 +79,9 @@
struct kmem_cache *skbuff_head_cache __ro_after_init;
static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
#ifdef CONFIG_SKB_EXTENSIONS
static struct kmem_cache *skbuff_ext_cache __ro_after_init;
#endif
int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
EXPORT_SYMBOL(sysctl_max_skb_frags);
@ -606,7 +609,6 @@ static void kfree_skbmem(struct sk_buff *skb)
void skb_release_head_state(struct sk_buff *skb)
{
skb_dst_drop(skb);
secpath_reset(skb);
if (skb->destructor) {
WARN_ON(in_irq());
skb->destructor(skb);
@ -614,9 +616,7 @@ void skb_release_head_state(struct sk_buff *skb)
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
nf_conntrack_put(skb_nfct(skb));
#endif
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
nf_bridge_put(skb->nf_bridge);
#endif
skb_ext_put(skb);
}
/* Free everything but the sk_buff shell. */
@ -796,9 +796,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->dev = old->dev;
memcpy(new->cb, old->cb, sizeof(old->cb));
skb_dst_copy(new, old);
#ifdef CONFIG_XFRM
new->sp = secpath_get(old->sp);
#endif
__skb_ext_copy(new, old);
__nf_copy(new, old, false);
/* Note : this field could be in headers_start/headers_end section
@ -3902,6 +3900,46 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(skb_gro_receive);
#ifdef CONFIG_SKB_EXTENSIONS
#define SKB_EXT_ALIGN_VALUE 8
#define SKB_EXT_CHUNKSIZEOF(x) (ALIGN((sizeof(x)), SKB_EXT_ALIGN_VALUE) / SKB_EXT_ALIGN_VALUE)
static const u8 skb_ext_type_len[] = {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
[SKB_EXT_BRIDGE_NF] = SKB_EXT_CHUNKSIZEOF(struct nf_bridge_info),
#endif
#ifdef CONFIG_XFRM
[SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
#endif
};
static __always_inline unsigned int skb_ext_total_length(void)
{
return SKB_EXT_CHUNKSIZEOF(struct skb_ext) +
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
skb_ext_type_len[SKB_EXT_BRIDGE_NF] +
#endif
#ifdef CONFIG_XFRM
skb_ext_type_len[SKB_EXT_SEC_PATH] +
#endif
0;
}
static void skb_extensions_init(void)
{
BUILD_BUG_ON(SKB_EXT_NUM >= 8);
BUILD_BUG_ON(skb_ext_total_length() > 255);
skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache",
SKB_EXT_ALIGN_VALUE * skb_ext_total_length(),
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
}
#else
static void skb_extensions_init(void) {}
#endif
void __init skb_init(void)
{
skbuff_head_cache = kmem_cache_create_usercopy("skbuff_head_cache",
@ -3916,6 +3954,7 @@ void __init skb_init(void)
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
skb_extensions_init();
}
static int
@ -5554,3 +5593,151 @@ void skb_condense(struct sk_buff *skb)
*/
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
}
#ifdef CONFIG_SKB_EXTENSIONS
static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id)
{
return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE);
}
static struct skb_ext *skb_ext_alloc(void)
{
struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
if (new) {
memset(new->offset, 0, sizeof(new->offset));
refcount_set(&new->refcnt, 1);
}
return new;
}
static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old,
unsigned int old_active)
{
struct skb_ext *new;
if (refcount_read(&old->refcnt) == 1)
return old;
new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
if (!new)
return NULL;
memcpy(new, old, old->chunks * SKB_EXT_ALIGN_VALUE);
refcount_set(&new->refcnt, 1);
#ifdef CONFIG_XFRM
if (old_active & (1 << SKB_EXT_SEC_PATH)) {
struct sec_path *sp = skb_ext_get_ptr(old, SKB_EXT_SEC_PATH);
unsigned int i;
for (i = 0; i < sp->len; i++)
xfrm_state_hold(sp->xvec[i]);
}
#endif
__skb_ext_put(old);
return new;
}
/**
* skb_ext_add - allocate space for given extension, COW if needed
* @skb: buffer
* @id: extension to allocate space for
*
* Allocates enough space for the given extension.
* If the extension is already present, a pointer to that extension
* is returned.
*
* If the skb was cloned, COW applies and the returned memory can be
* modified without changing the extension space of clones buffers.
*
* Returns pointer to the extension or NULL on allocation failure.
*/
void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id)
{
struct skb_ext *new, *old = NULL;
unsigned int newlen, newoff;
if (skb->active_extensions) {
old = skb->extensions;
new = skb_ext_maybe_cow(old, skb->active_extensions);
if (!new)
return NULL;
if (__skb_ext_exist(old, id)) {
if (old != new)
skb->extensions = new;
goto set_active;
}
newoff = old->chunks;
} else {
newoff = SKB_EXT_CHUNKSIZEOF(*new);
new = skb_ext_alloc();
if (!new)
return NULL;
}
newlen = newoff + skb_ext_type_len[id];
new->chunks = newlen;
new->offset[id] = newoff;
skb->extensions = new;
set_active:
skb->active_extensions |= 1 << id;
return skb_ext_get_ptr(new, id);
}
EXPORT_SYMBOL(skb_ext_add);
#ifdef CONFIG_XFRM
static void skb_ext_put_sp(struct sec_path *sp)
{
unsigned int i;
for (i = 0; i < sp->len; i++)
xfrm_state_put(sp->xvec[i]);
}
#endif
void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id)
{
struct skb_ext *ext = skb->extensions;
skb->active_extensions &= ~(1 << id);
if (skb->active_extensions == 0) {
skb->extensions = NULL;
__skb_ext_put(ext);
#ifdef CONFIG_XFRM
} else if (id == SKB_EXT_SEC_PATH &&
refcount_read(&ext->refcnt) == 1) {
struct sec_path *sp = skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH);
skb_ext_put_sp(sp);
sp->len = 0;
#endif
}
}
EXPORT_SYMBOL(__skb_ext_del);
void __skb_ext_put(struct skb_ext *ext)
{
/* If this is last clone, nothing can increment
* it after check passes. Avoids one atomic op.
*/
if (refcount_read(&ext->refcnt) == 1)
goto free_now;
if (!refcount_dec_and_test(&ext->refcnt))
return;
free_now:
#ifdef CONFIG_XFRM
if (__skb_ext_exist(ext, SKB_EXT_SEC_PATH))
skb_ext_put_sp(skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH));
#endif
kmem_cache_free(skbuff_ext_cache, ext);
}
EXPORT_SYMBOL(__skb_ext_put);
#endif /* CONFIG_SKB_EXTENSIONS */

View File

@ -125,10 +125,13 @@ static void esp_output_done(struct crypto_async_request *base, int err)
void *tmp;
struct xfrm_state *x;
if (xo && (xo->flags & XFRM_DEV_RESUME))
x = skb->sp->xvec[skb->sp->len - 1];
else
if (xo && (xo->flags & XFRM_DEV_RESUME)) {
struct sec_path *sp = skb_sec_path(skb);
x = sp->xvec[sp->len - 1];
} else {
x = skb_dst(skb)->xfrm;
}
tmp = ESP_SKB_CB(skb)->tmp;
esp_ssg_unref(x, tmp);

View File

@ -46,11 +46,12 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
xo = xfrm_offload(skb);
if (!xo || !(xo->flags & CRYPTO_DONE)) {
err = secpath_set(skb);
if (err)
struct sec_path *sp = secpath_set(skb);
if (!sp)
goto out;
if (skb->sp->len == XFRM_MAX_DEPTH)
if (sp->len == XFRM_MAX_DEPTH)
goto out;
x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
@ -59,8 +60,8 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
if (!x)
goto out;
skb->sp->xvec[skb->sp->len++] = x;
skb->sp->olen++;
sp->xvec[sp->len++] = x;
sp->olen++;
xo = xfrm_offload(skb);
if (!xo) {
@ -114,6 +115,7 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
struct crypto_aead *aead;
netdev_features_t esp_features = features;
struct xfrm_offload *xo = xfrm_offload(skb);
struct sec_path *sp;
if (!xo)
return ERR_PTR(-EINVAL);
@ -121,7 +123,8 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
return ERR_PTR(-EINVAL);
x = skb->sp->xvec[skb->sp->len - 1];
sp = skb_sec_path(skb);
x = sp->xvec[sp->len - 1];
aead = x->data;
esph = ip_esp_hdr(skb);

View File

@ -533,6 +533,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->tc_index = from->tc_index;
#endif
nf_copy(to, from);
skb_ext_copy(to, from);
#if IS_ENABLED(CONFIG_IP_VS)
to->ipvs_property = from->ipvs_property;
#endif

View File

@ -102,6 +102,7 @@ EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put);
/* Send RST reply */
void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
{
struct net_device *br_indev __maybe_unused;
struct sk_buff *nskb;
struct iphdr *niph;
const struct tcphdr *oth;
@ -147,10 +148,11 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
* build the eth header using the original destination's MAC as the
* source, and send the RST packet directly.
*/
if (oldskb->nf_bridge) {
br_indev = nf_bridge_get_physindev(oldskb);
if (br_indev) {
struct ethhdr *oeth = eth_hdr(oldskb);
nskb->dev = nf_bridge_get_physindev(oldskb);
nskb->dev = br_indev;
niph->tot_len = htons(nskb->len);
ip_send_check(niph);
if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),

View File

@ -145,10 +145,13 @@ static void esp_output_done(struct crypto_async_request *base, int err)
void *tmp;
struct xfrm_state *x;
if (xo && (xo->flags & XFRM_DEV_RESUME))
x = skb->sp->xvec[skb->sp->len - 1];
else
if (xo && (xo->flags & XFRM_DEV_RESUME)) {
struct sec_path *sp = skb_sec_path(skb);
x = sp->xvec[sp->len - 1];
} else {
x = skb_dst(skb)->xfrm;
}
tmp = ESP_SKB_CB(skb)->tmp;
esp_ssg_unref(x, tmp);

View File

@ -68,11 +68,12 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
xo = xfrm_offload(skb);
if (!xo || !(xo->flags & CRYPTO_DONE)) {
err = secpath_set(skb);
if (err)
struct sec_path *sp = secpath_set(skb);
if (!sp)
goto out;
if (skb->sp->len == XFRM_MAX_DEPTH)
if (sp->len == XFRM_MAX_DEPTH)
goto out;
x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
@ -81,8 +82,8 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
if (!x)
goto out;
skb->sp->xvec[skb->sp->len++] = x;
skb->sp->olen++;
sp->xvec[sp->len++] = x;
sp->olen++;
xo = xfrm_offload(skb);
if (!xo) {
@ -141,6 +142,7 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
struct crypto_aead *aead;
netdev_features_t esp_features = features;
struct xfrm_offload *xo = xfrm_offload(skb);
struct sec_path *sp;
if (!xo)
return ERR_PTR(-EINVAL);
@ -148,7 +150,8 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
return ERR_PTR(-EINVAL);
x = skb->sp->xvec[skb->sp->len - 1];
sp = skb_sec_path(skb);
x = sp->xvec[sp->len - 1];
aead = x->data;
esph = ip_esp_hdr(skb);

View File

@ -581,6 +581,7 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->tc_index = from->tc_index;
#endif
nf_copy(to, from);
skb_ext_copy(to, from);
skb_copy_secmark(to, from);
}

View File

@ -131,6 +131,7 @@ EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_put);
void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
{
struct net_device *br_indev __maybe_unused;
struct sk_buff *nskb;
struct tcphdr _otcph;
const struct tcphdr *otcph;
@ -197,15 +198,18 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
* build the eth header using the original destination's MAC as the
* source, and send the RST packet directly.
*/
if (oldskb->nf_bridge) {
br_indev = nf_bridge_get_physindev(oldskb);
if (br_indev) {
struct ethhdr *oeth = eth_hdr(oldskb);
nskb->dev = nf_bridge_get_physindev(oldskb);
nskb->dev = br_indev;
nskb->protocol = htons(ETH_P_IPV6);
ip6h->payload_len = htons(sizeof(struct tcphdr));
if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
oeth->h_source, oeth->h_dest, nskb->len) < 0)
oeth->h_source, oeth->h_dest, nskb->len) < 0) {
kfree_skb(nskb);
return;
}
dev_queue_xmit(nskb);
} else
#endif

View File

@ -86,14 +86,16 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
{
struct net *net = dev_net(skb->dev);
struct xfrm_state *x = NULL;
struct sec_path *sp;
int i = 0;
if (secpath_set(skb)) {
sp = secpath_set(skb);
if (!sp) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR);
goto drop;
}
if (1 + skb->sp->len == XFRM_MAX_DEPTH) {
if (1 + sp->len == XFRM_MAX_DEPTH) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
goto drop;
}
@ -145,7 +147,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
goto drop;
}
skb->sp->xvec[skb->sp->len++] = x;
sp->xvec[sp->len++] = x;
spin_lock(&x->lock);

View File

@ -156,22 +156,20 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
const struct net_device *out,
const struct nf_loginfo *loginfo, const char *prefix)
{
const struct net_device *physoutdev __maybe_unused;
const struct net_device *physindev __maybe_unused;
nf_log_buf_add(m, KERN_SOH "%c%sIN=%s OUT=%s ",
'0' + loginfo->u.log.level, prefix,
in ? in->name : "",
out ? out->name : "");
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (skb->nf_bridge) {
const struct net_device *physindev;
const struct net_device *physoutdev;
physindev = nf_bridge_get_physindev(skb);
if (physindev && in != physindev)
nf_log_buf_add(m, "PHYSIN=%s ", physindev->name);
physoutdev = nf_bridge_get_physoutdev(skb);
if (physoutdev && out != physoutdev)
nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name);
}
physindev = nf_bridge_get_physindev(skb);
if (physindev && in != physindev)
nf_log_buf_add(m, "PHYSIN=%s ", physindev->name);
physoutdev = nf_bridge_get_physoutdev(skb);
if (physoutdev && out != physoutdev)
nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name);
#endif
}
EXPORT_SYMBOL_GPL(nf_log_dump_packet_common);

View File

@ -46,6 +46,24 @@ void nf_unregister_queue_handler(struct net *net)
}
EXPORT_SYMBOL(nf_unregister_queue_handler);
static void nf_queue_entry_release_br_nf_refs(struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
if (nf_bridge) {
struct net_device *physdev;
physdev = nf_bridge_get_physindev(skb);
if (physdev)
dev_put(physdev);
physdev = nf_bridge_get_physoutdev(skb);
if (physdev)
dev_put(physdev);
}
#endif
}
void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
{
struct nf_hook_state *state = &entry->state;
@ -57,20 +75,28 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
dev_put(state->out);
if (state->sk)
sock_put(state->sk);
nf_queue_entry_release_br_nf_refs(entry->skb);
}
EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
static void nf_queue_entry_get_br_nf_refs(struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
if (nf_bridge) {
struct net_device *physdev;
physdev = nf_bridge_get_physindev(entry->skb);
physdev = nf_bridge_get_physindev(skb);
if (physdev)
dev_put(physdev);
physdev = nf_bridge_get_physoutdev(entry->skb);
dev_hold(physdev);
physdev = nf_bridge_get_physoutdev(skb);
if (physdev)
dev_put(physdev);
dev_hold(physdev);
}
#endif
}
EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
/* Bump dev refs so they don't vanish while packet is out */
void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
@ -83,18 +109,8 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
dev_hold(state->out);
if (state->sk)
sock_hold(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
struct net_device *physdev;
physdev = nf_bridge_get_physindev(entry->skb);
if (physdev)
dev_hold(physdev);
physdev = nf_bridge_get_physoutdev(entry->skb);
if (physdev)
dev_hold(physdev);
}
#endif
nf_queue_entry_get_br_nf_refs(entry->skb);
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);

View File

@ -727,13 +727,13 @@ nf_queue_entry_dup(struct nf_queue_entry *e)
*/
static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
{
if (skb->nf_bridge)
if (nf_bridge_info_get(skb))
__skb_push(skb, skb->network_header - skb->mac_header);
}
static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
{
if (skb->nf_bridge)
if (nf_bridge_info_get(skb))
__skb_pull(skb, skb->network_header - skb->mac_header);
}
#else
@ -904,23 +904,22 @@ nfqnl_set_mode(struct nfqnl_instance *queue,
static int
dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
{
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
int physinif, physoutif;
physinif = nf_bridge_get_physinif(entry->skb);
physoutif = nf_bridge_get_physoutif(entry->skb);
if (physinif == ifindex || physoutif == ifindex)
return 1;
#endif
if (entry->state.in)
if (entry->state.in->ifindex == ifindex)
return 1;
if (entry->state.out)
if (entry->state.out->ifindex == ifindex)
return 1;
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
int physinif, physoutif;
physinif = nf_bridge_get_physinif(entry->skb);
physoutif = nf_bridge_get_physoutif(entry->skb);
if (physinif == ifindex || physoutif == ifindex)
return 1;
}
#endif
return 0;
}

View File

@ -229,7 +229,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
}
#ifdef CONFIG_XFRM
case NFT_META_SECPATH:
nft_reg_store8(dest, !!skb->sp);
nft_reg_store8(dest, secpath_exists(skb));
break;
#endif
#ifdef CONFIG_NF_TABLES_BRIDGE

View File

@ -161,7 +161,7 @@ static void nft_xfrm_get_eval_in(const struct nft_xfrm *priv,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct sec_path *sp = pkt->skb->sp;
const struct sec_path *sp = skb_sec_path(pkt->skb);
const struct xfrm_state *state;
if (sp == NULL || sp->len <= priv->spnum) {

View File

@ -33,7 +33,7 @@ physdev_mt(const struct sk_buff *skb, struct xt_action_param *par)
/* Not a bridged IP packet or no info available yet:
* LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if
* the destination device will be a bridge. */
if (!skb->nf_bridge) {
if (!nf_bridge_info_exists(skb)) {
/* Return MATCH if the invert flags of the used options are on */
if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) &&
!(info->invert & XT_PHYSDEV_OP_BRIDGED))

View File

@ -56,7 +56,7 @@ match_policy_in(const struct sk_buff *skb, const struct xt_policy_info *info,
unsigned short family)
{
const struct xt_policy_elem *e;
const struct sec_path *sp = skb->sp;
const struct sec_path *sp = skb_sec_path(skb);
int strict = info->flags & XT_POLICY_MATCH_STRICT;
int i, pos;

View File

@ -5,6 +5,7 @@ config XFRM
bool
depends on NET
select GRO_CELLS
select SKB_EXTENSIONS
config XFRM_OFFLOAD
bool

View File

@ -32,6 +32,7 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
struct softnet_data *sd;
netdev_features_t esp_features = features;
struct xfrm_offload *xo = xfrm_offload(skb);
struct sec_path *sp;
if (!xo)
return skb;
@ -39,7 +40,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
if (!(features & NETIF_F_HW_ESP))
esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
x = skb->sp->xvec[skb->sp->len - 1];
sp = skb_sec_path(skb);
x = sp->xvec[sp->len - 1];
if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND)
return skb;

View File

@ -38,8 +38,6 @@ struct xfrm_trans_cb {
#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
static struct kmem_cache *secpath_cachep __ro_after_init;
static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1];
@ -111,56 +109,24 @@ static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol,
return ret;
}
void __secpath_destroy(struct sec_path *sp)
struct sec_path *secpath_set(struct sk_buff *skb)
{
int i;
for (i = 0; i < sp->len; i++)
xfrm_state_put(sp->xvec[i]);
kmem_cache_free(secpath_cachep, sp);
}
EXPORT_SYMBOL(__secpath_destroy);
struct sec_path *sp, *tmp = skb_ext_find(skb, SKB_EXT_SEC_PATH);
struct sec_path *secpath_dup(struct sec_path *src)
{
struct sec_path *sp;
sp = kmem_cache_alloc(secpath_cachep, GFP_ATOMIC);
sp = skb_ext_add(skb, SKB_EXT_SEC_PATH);
if (!sp)
return NULL;
sp->len = 0;
sp->olen = 0;
if (tmp) /* reused existing one (was COW'd if needed) */
return sp;
/* allocated new secpath */
memset(sp->ovec, 0, sizeof(sp->ovec));
sp->olen = 0;
sp->len = 0;
if (src) {
int i;
memcpy(sp, src, sizeof(*sp));
for (i = 0; i < sp->len; i++)
xfrm_state_hold(sp->xvec[i]);
}
refcount_set(&sp->refcnt, 1);
return sp;
}
EXPORT_SYMBOL(secpath_dup);
int secpath_set(struct sk_buff *skb)
{
struct sec_path *sp;
/* Allocate new secpath or COW existing one. */
if (!skb->sp || refcount_read(&skb->sp->refcnt) != 1) {
sp = secpath_dup(skb->sp);
if (!sp)
return -ENOMEM;
if (skb->sp)
secpath_put(skb->sp);
skb->sp = sp;
}
return 0;
}
EXPORT_SYMBOL(secpath_set);
/* Fetch spi and seq from ipsec header */
@ -236,6 +202,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
bool xfrm_gro = false;
bool crypto_done = false;
struct xfrm_offload *xo = xfrm_offload(skb);
struct sec_path *sp;
if (encap_type < 0) {
x = xfrm_input_state(skb);
@ -312,8 +279,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
break;
}
err = secpath_set(skb);
if (err) {
sp = secpath_set(skb);
if (!sp) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR);
goto drop;
}
@ -328,7 +295,9 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
daddr = (xfrm_address_t *)(skb_network_header(skb) +
XFRM_SPI_SKB_CB(skb)->daddroff);
do {
if (skb->sp->len == XFRM_MAX_DEPTH) {
sp = skb_sec_path(skb);
if (sp->len == XFRM_MAX_DEPTH) {
secpath_reset(skb);
XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
goto drop;
@ -344,7 +313,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
skb->mark = xfrm_smark_get(skb->mark, x);
skb->sp->xvec[skb->sp->len++] = x;
sp->xvec[sp->len++] = x;
lock:
spin_lock(&x->lock);
@ -468,8 +437,9 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
nf_reset(skb);
if (decaps) {
if (skb->sp)
skb->sp->olen = 0;
sp = skb_sec_path(skb);
if (sp)
sp->olen = 0;
skb_dst_drop(skb);
gro_cells_receive(&gro_cells, skb);
return 0;
@ -480,8 +450,9 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async);
if (xfrm_gro) {
if (skb->sp)
skb->sp->olen = 0;
sp = skb_sec_path(skb);
if (sp)
sp->olen = 0;
skb_dst_drop(skb);
gro_cells_receive(&gro_cells, skb);
return err;
@ -546,11 +517,6 @@ void __init xfrm_input_init(void)
if (err)
gro_cells.cells = NULL;
secpath_cachep = kmem_cache_create("secpath_cache",
sizeof(struct sec_path),
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
for_each_possible_cpu(i) {
struct xfrm_trans_tasklet *trans;

View File

@ -251,7 +251,7 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
struct xfrm_if *xi;
bool xnet;
if (err && !skb->sp)
if (err && !secpath_exists(skb))
return 0;
x = xfrm_input_state(skb);

View File

@ -218,19 +218,16 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
if (xfrm_dev_offload_ok(skb, x)) {
struct sec_path *sp;
sp = secpath_dup(skb->sp);
sp = secpath_set(skb);
if (!sp) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
kfree_skb(skb);
return -ENOMEM;
}
if (skb->sp)
secpath_put(skb->sp);
skb->sp = sp;
skb->encapsulation = 1;
sp->olen++;
sp->xvec[skb->sp->len++] = x;
sp->xvec[sp->len++] = x;
xfrm_state_hold(x);
if (skb_is_gso(skb)) {

View File

@ -3200,11 +3200,12 @@ EXPORT_SYMBOL(xfrm_lookup_route);
static inline int
xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
{
struct sec_path *sp = skb_sec_path(skb);
struct xfrm_state *x;
if (!skb->sp || idx < 0 || idx >= skb->sp->len)
if (!sp || idx < 0 || idx >= sp->len)
return 0;
x = skb->sp->xvec[idx];
x = sp->xvec[idx];
if (!x->type->reject)
return 0;
return x->type->reject(x, skb, fl);
@ -3304,6 +3305,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
struct flowi fl;
int xerr_idx = -1;
const struct xfrm_if_cb *ifcb;
struct sec_path *sp;
struct xfrm_if *xi;
u32 if_id = 0;
@ -3328,11 +3330,12 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
nf_nat_decode_session(skb, &fl, family);
/* First, check used SA against their selectors. */
if (skb->sp) {
sp = skb_sec_path(skb);
if (sp) {
int i;
for (i = skb->sp->len-1; i >= 0; i--) {
struct xfrm_state *x = skb->sp->xvec[i];
for (i = sp->len - 1; i >= 0; i--) {
struct xfrm_state *x = sp->xvec[i];
if (!xfrm_selector_match(&x->sel, &fl, family)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
return 0;
@ -3359,7 +3362,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
}
if (!pol) {
if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
if (sp && secpath_has_nontransport(sp, 0, &xerr_idx)) {
xfrm_secpath_reject(xerr_idx, skb, &fl);
XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
return 0;
@ -3388,7 +3391,6 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
#endif
if (pol->action == XFRM_POLICY_ALLOW) {
struct sec_path *sp;
static struct sec_path dummy;
struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
@ -3396,7 +3398,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
int ti = 0;
int i, k;
if ((sp = skb->sp) == NULL)
sp = skb_sec_path(skb);
if (!sp)
sp = &dummy;
for (pi = 0; pi < npols; pi++) {

View File

@ -230,7 +230,7 @@ static int selinux_xfrm_skb_sid_ingress(struct sk_buff *skb,
u32 *sid, int ckall)
{
u32 sid_session = SECSID_NULL;
struct sec_path *sp = skb->sp;
struct sec_path *sp = skb_sec_path(skb);
if (sp) {
int i;
@ -408,7 +408,7 @@ int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb,
struct common_audit_data *ad)
{
int i;
struct sec_path *sp = skb->sp;
struct sec_path *sp = skb_sec_path(skb);
u32 peer_sid = SECINITSID_UNLABELED;
if (sp) {