linux-brain/net/sched/em_ipt.c

298 lines
6.7 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* net/sched/em_ipt.c IPtables matches Ematch
*
* (c) 2018 Eyal Birger <eyal.birger@gmail.com>
*/
#include <linux/gfp.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/skbuff.h>
#include <linux/tc_ematch/tc_em_ipt.h>
#include <linux/netfilter.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/pkt_cls.h>
struct em_ipt_match {
const struct xt_match *match;
u32 hook;
u8 nfproto;
u8 match_data[0] __aligned(8);
};
struct em_ipt_xt_match {
char *match_name;
int (*validate_match_data)(struct nlattr **tb, u8 mrev);
};
static const struct nla_policy em_ipt_policy[TCA_EM_IPT_MAX + 1] = {
[TCA_EM_IPT_MATCH_NAME] = { .type = NLA_STRING,
.len = XT_EXTENSION_MAXNAMELEN },
[TCA_EM_IPT_MATCH_REVISION] = { .type = NLA_U8 },
[TCA_EM_IPT_HOOK] = { .type = NLA_U32 },
[TCA_EM_IPT_NFPROTO] = { .type = NLA_U8 },
[TCA_EM_IPT_MATCH_DATA] = { .type = NLA_UNSPEC },
};
static int check_match(struct net *net, struct em_ipt_match *im, int mdata_len)
{
struct xt_mtchk_param mtpar = {};
union {
struct ipt_entry e4;
struct ip6t_entry e6;
} e = {};
mtpar.net = net;
mtpar.table = "filter";
mtpar.hook_mask = 1 << im->hook;
mtpar.family = im->match->family;
mtpar.match = im->match;
mtpar.entryinfo = &e;
mtpar.matchinfo = (void *)im->match_data;
return xt_check_match(&mtpar, mdata_len, 0, 0);
}
static int policy_validate_match_data(struct nlattr **tb, u8 mrev)
{
if (mrev != 0) {
pr_err("only policy match revision 0 supported");
return -EINVAL;
}
if (nla_get_u32(tb[TCA_EM_IPT_HOOK]) != NF_INET_PRE_ROUTING) {
pr_err("policy can only be matched on NF_INET_PRE_ROUTING");
return -EINVAL;
}
return 0;
}
static int addrtype_validate_match_data(struct nlattr **tb, u8 mrev)
{
if (mrev != 1) {
pr_err("only addrtype match revision 1 supported");
return -EINVAL;
}
return 0;
}
static const struct em_ipt_xt_match em_ipt_xt_matches[] = {
{
.match_name = "policy",
.validate_match_data = policy_validate_match_data
},
{
.match_name = "addrtype",
.validate_match_data = addrtype_validate_match_data
},
{}
};
static struct xt_match *get_xt_match(struct nlattr **tb)
{
const struct em_ipt_xt_match *m;
struct nlattr *mname_attr;
u8 nfproto, mrev = 0;
int ret;
mname_attr = tb[TCA_EM_IPT_MATCH_NAME];
for (m = em_ipt_xt_matches; m->match_name; m++) {
if (!nla_strcmp(mname_attr, m->match_name))
break;
}
if (!m->match_name) {
pr_err("Unsupported xt match");
return ERR_PTR(-EINVAL);
}
if (tb[TCA_EM_IPT_MATCH_REVISION])
mrev = nla_get_u8(tb[TCA_EM_IPT_MATCH_REVISION]);
ret = m->validate_match_data(tb, mrev);
if (ret < 0)
return ERR_PTR(ret);
nfproto = nla_get_u8(tb[TCA_EM_IPT_NFPROTO]);
return xt_request_find_match(nfproto, m->match_name, mrev);
}
static int em_ipt_change(struct net *net, void *data, int data_len,
struct tcf_ematch *em)
{
struct nlattr *tb[TCA_EM_IPT_MAX + 1];
struct em_ipt_match *im = NULL;
struct xt_match *match;
int mdata_len, ret;
u8 nfproto;
netlink: make validation more configurable for future strictness We currently have two levels of strict validation: 1) liberal (default) - undefined (type >= max) & NLA_UNSPEC attributes accepted - attribute length >= expected accepted - garbage at end of message accepted 2) strict (opt-in) - NLA_UNSPEC attributes accepted - attribute length >= expected accepted Split out parsing strictness into four different options: * TRAILING - check that there's no trailing data after parsing attributes (in message or nested) * MAXTYPE - reject attrs > max known type * UNSPEC - reject attributes with NLA_UNSPEC policy entries * STRICT_ATTRS - strictly validate attribute size The default for future things should be *everything*. The current *_strict() is a combination of TRAILING and MAXTYPE, and is renamed to _deprecated_strict(). The current regular parsing has none of this, and is renamed to *_parse_deprecated(). Additionally it allows us to selectively set one of the new flags even on old policies. Notably, the UNSPEC flag could be useful in this case, since it can be arranged (by filling in the policy) to not be an incompatible userspace ABI change, but would then going forward prevent forgetting attribute entries. Similar can apply to the POLICY flag. We end up with the following renames: * nla_parse -> nla_parse_deprecated * nla_parse_strict -> nla_parse_deprecated_strict * nlmsg_parse -> nlmsg_parse_deprecated * nlmsg_parse_strict -> nlmsg_parse_deprecated_strict * nla_parse_nested -> nla_parse_nested_deprecated * nla_validate_nested -> nla_validate_nested_deprecated Using spatch, of course: @@ expression TB, MAX, HEAD, LEN, POL, EXT; @@ -nla_parse(TB, MAX, HEAD, LEN, POL, EXT) +nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT) @@ expression NLH, HDRLEN, TB, MAX, POL, EXT; @@ -nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT) +nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT) @@ expression NLH, HDRLEN, TB, MAX, POL, EXT; @@ -nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT) +nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT) @@ expression TB, MAX, NLA, POL, EXT; @@ -nla_parse_nested(TB, MAX, NLA, POL, EXT) +nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT) @@ expression START, MAX, POL, EXT; @@ -nla_validate_nested(START, MAX, POL, EXT) +nla_validate_nested_deprecated(START, MAX, POL, EXT) @@ expression NLH, HDRLEN, MAX, POL, EXT; @@ -nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT) +nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT) For this patch, don't actually add the strict, non-renamed versions yet so that it breaks compile if I get it wrong. Also, while at it, make nla_validate and nla_parse go down to a common __nla_validate_parse() function to avoid code duplication. Ultimately, this allows us to have very strict validation for every new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the next patch, while existing things will continue to work as is. In effect then, this adds fully strict validation for any new command. Signed-off-by: Johannes Berg <johannes.berg@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 21:07:28 +09:00
ret = nla_parse_deprecated(tb, TCA_EM_IPT_MAX, data, data_len,
em_ipt_policy, NULL);
if (ret < 0)
return ret;
if (!tb[TCA_EM_IPT_HOOK] || !tb[TCA_EM_IPT_MATCH_NAME] ||
!tb[TCA_EM_IPT_MATCH_DATA] || !tb[TCA_EM_IPT_NFPROTO])
return -EINVAL;
nfproto = nla_get_u8(tb[TCA_EM_IPT_NFPROTO]);
switch (nfproto) {
case NFPROTO_IPV4:
case NFPROTO_IPV6:
break;
default:
return -EINVAL;
}
match = get_xt_match(tb);
if (IS_ERR(match)) {
pr_err("unable to load match\n");
return PTR_ERR(match);
}
mdata_len = XT_ALIGN(nla_len(tb[TCA_EM_IPT_MATCH_DATA]));
im = kzalloc(sizeof(*im) + mdata_len, GFP_KERNEL);
if (!im) {
ret = -ENOMEM;
goto err;
}
im->match = match;
im->hook = nla_get_u32(tb[TCA_EM_IPT_HOOK]);
im->nfproto = nfproto;
nla_memcpy(im->match_data, tb[TCA_EM_IPT_MATCH_DATA], mdata_len);
ret = check_match(net, im, mdata_len);
if (ret)
goto err;
em->datalen = sizeof(*im) + mdata_len;
em->data = (unsigned long)im;
return 0;
err:
kfree(im);
module_put(match->me);
return ret;
}
static void em_ipt_destroy(struct tcf_ematch *em)
{
struct em_ipt_match *im = (void *)em->data;
if (!im)
return;
if (im->match->destroy) {
struct xt_mtdtor_param par = {
.net = em->net,
.match = im->match,
.matchinfo = im->match_data,
.family = im->match->family
};
im->match->destroy(&par);
}
module_put(im->match->me);
kfree((void *)im);
}
static int em_ipt_match(struct sk_buff *skb, struct tcf_ematch *em,
struct tcf_pkt_info *info)
{
const struct em_ipt_match *im = (const void *)em->data;
struct xt_action_param acpar = {};
struct net_device *indev = NULL;
u8 nfproto = im->match->family;
struct nf_hook_state state;
int ret;
sched: consistently handle layer3 header accesses in the presence of VLANs [ Upstream commit d7bf2ebebc2bd61ab95e2a8e33541ef282f303d4 ] There are a couple of places in net/sched/ that check skb->protocol and act on the value there. However, in the presence of VLAN tags, the value stored in skb->protocol can be inconsistent based on whether VLAN acceleration is enabled. The commit quoted in the Fixes tag below fixed the users of skb->protocol to use a helper that will always see the VLAN ethertype. However, most of the callers don't actually handle the VLAN ethertype, but expect to find the IP header type in the protocol field. This means that things like changing the ECN field, or parsing diffserv values, stops working if there's a VLAN tag, or if there are multiple nested VLAN tags (QinQ). To fix this, change the helper to take an argument that indicates whether the caller wants to skip the VLAN tags or not. When skipping VLAN tags, we make sure to skip all of them, so behaviour is consistent even in QinQ mode. To make the helper usable from the ECN code, move it to if_vlan.h instead of pkt_sched.h. v3: - Remove empty lines - Move vlan variable definitions inside loop in skb_protocol() - Also use skb_protocol() helper in IP{,6}_ECN_decapsulate() and bpf_skb_ecn_set_ce() v2: - Use eth_type_vlan() helper in skb_protocol() - Also fix code that reads skb->protocol directly - Change a couple of 'if/else if' statements to switch constructs to avoid calling the helper twice Reported-by: Ilya Ponetayev <i.ponetaev@ndmsystems.com> Fixes: d8b9605d2697 ("net: sched: fix skb->protocol use in case of accelerated vlan path") Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-07-04 05:26:43 +09:00
switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
return 0;
if (nfproto == NFPROTO_UNSPEC)
nfproto = NFPROTO_IPV4;
break;
case htons(ETH_P_IPV6):
if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
return 0;
if (nfproto == NFPROTO_UNSPEC)
nfproto = NFPROTO_IPV6;
break;
default:
return 0;
}
rcu_read_lock();
if (skb->skb_iif)
indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
nf_hook_state_init(&state, im->hook, nfproto,
indev ?: skb->dev, skb->dev, NULL, em->net, NULL);
acpar.match = im->match;
acpar.matchinfo = im->match_data;
acpar.state = &state;
ret = im->match->match(skb, &acpar);
rcu_read_unlock();
return ret;
}
static int em_ipt_dump(struct sk_buff *skb, struct tcf_ematch *em)
{
struct em_ipt_match *im = (void *)em->data;
if (nla_put_string(skb, TCA_EM_IPT_MATCH_NAME, im->match->name) < 0)
return -EMSGSIZE;
if (nla_put_u32(skb, TCA_EM_IPT_HOOK, im->hook) < 0)
return -EMSGSIZE;
if (nla_put_u8(skb, TCA_EM_IPT_MATCH_REVISION, im->match->revision) < 0)
return -EMSGSIZE;
if (nla_put_u8(skb, TCA_EM_IPT_NFPROTO, im->nfproto) < 0)
return -EMSGSIZE;
if (nla_put(skb, TCA_EM_IPT_MATCH_DATA,
im->match->usersize ?: im->match->matchsize,
im->match_data) < 0)
return -EMSGSIZE;
return 0;
}
static struct tcf_ematch_ops em_ipt_ops = {
.kind = TCF_EM_IPT,
.change = em_ipt_change,
.destroy = em_ipt_destroy,
.match = em_ipt_match,
.dump = em_ipt_dump,
.owner = THIS_MODULE,
.link = LIST_HEAD_INIT(em_ipt_ops.link)
};
static int __init init_em_ipt(void)
{
return tcf_em_register(&em_ipt_ops);
}
static void __exit exit_em_ipt(void)
{
tcf_em_unregister(&em_ipt_ops);
}
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Eyal Birger <eyal.birger@gmail.com>");
MODULE_DESCRIPTION("TC extended match for IPtables matches");
module_init(init_em_ipt);
module_exit(exit_em_ipt);
MODULE_ALIAS_TCF_EMATCH(TCF_EM_IPT);