linux-brain/net/sched/cls_matchall.c

441 lines
10 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* net/sched/cls_matchll.c Match-all classifier
*
* Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
struct cls_mall_head {
struct tcf_exts exts;
struct tcf_result res;
u32 handle;
u32 flags;
unsigned int in_hw_count;
struct tc_matchall_pcnt __percpu *pf;
struct rcu_work rwork;
bool deleting;
};
static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
struct cls_mall_head *head = rcu_dereference_bh(tp->root);
cls_matchall: avoid panic when receiving a packet before filter set When a matchall classifier is added, there is a small time interval in which tp->root is NULL. If we receive a packet in this small time slice a NULL pointer dereference will happen, leading to a kernel panic: # tc qdisc replace dev eth0 ingress # tc filter add dev eth0 parent ffff: matchall action gact drop Unable to handle kernel NULL pointer dereference at virtual address 0000000000000034 Mem abort info: ESR = 0x96000005 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000005 CM = 0, WnR = 0 user pgtable: 4k pages, 39-bit VAs, pgdp = 00000000a623d530 [0000000000000034] pgd=0000000000000000, pud=0000000000000000 Internal error: Oops: 96000005 [#1] SMP Modules linked in: cls_matchall sch_ingress nls_iso8859_1 nls_cp437 vfat fat m25p80 spi_nor mtd xhci_plat_hcd xhci_hcd phy_generic sfp mdio_i2c usbcore i2c_mv64xxx marvell10g mvpp2 usb_common spi_orion mvmdio i2c_core sbsa_gwdt phylink ip_tables x_tables autofs4 Process ksoftirqd/0 (pid: 9, stack limit = 0x0000000009de7d62) CPU: 0 PID: 9 Comm: ksoftirqd/0 Not tainted 5.1.0-rc6 #21 Hardware name: Marvell 8040 MACCHIATOBin Double-shot (DT) pstate: 40000005 (nZcv daif -PAN -UAO) pc : mall_classify+0x28/0x78 [cls_matchall] lr : tcf_classify+0x78/0x138 sp : ffffff80109db9d0 x29: ffffff80109db9d0 x28: ffffffc426058800 x27: 0000000000000000 x26: ffffffc425b0dd00 x25: 0000000020000000 x24: 0000000000000000 x23: ffffff80109dbac0 x22: 0000000000000001 x21: ffffffc428ab5100 x20: ffffffc425b0dd00 x19: ffffff80109dbac0 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000000 x13: ffffffbf108ad288 x12: dead000000000200 x11: 00000000f0000000 x10: 0000000000000001 x9 : ffffffbf1089a220 x8 : 0000000000000001 x7 : ffffffbebffaa950 x6 : 0000000000000000 x5 : 000000442d6ba000 x4 : 0000000000000000 x3 : ffffff8008735ad8 x2 : ffffff80109dbac0 x1 : ffffffc425b0dd00 x0 : ffffff8010592078 Call trace: mall_classify+0x28/0x78 [cls_matchall] tcf_classify+0x78/0x138 __netif_receive_skb_core+0x29c/0xa20 __netif_receive_skb_one_core+0x34/0x60 __netif_receive_skb+0x28/0x78 netif_receive_skb_internal+0x2c/0xc0 napi_gro_receive+0x1a0/0x1d8 mvpp2_poll+0x928/0xb18 [mvpp2] net_rx_action+0x108/0x378 __do_softirq+0x128/0x320 run_ksoftirqd+0x44/0x60 smpboot_thread_fn+0x168/0x1b0 kthread+0x12c/0x130 ret_from_fork+0x10/0x1c Code: aa0203f3 aa1e03e0 d503201f f9400684 (b9403480) ---[ end trace fc71e2ef7b8ab5a5 ]--- Kernel panic - not syncing: Fatal exception in interrupt SMP: stopping secondary CPUs Kernel Offset: disabled CPU features: 0x002,00002000 Memory Limit: none Rebooting in 1 seconds.. Fix this by adding a NULL check in mall_classify(). Fixes: ed76f5edccc9 ("net: sched: protect filter_chain list with filter_chain_lock mutex") Signed-off-by: Matteo Croce <mcroce@redhat.com> Acked-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-05-02 17:51:05 +09:00
if (unlikely(!head))
return -1;
if (tc_skip_sw(head->flags))
return -1;
net/sched: cls_matchall: fix crash when used with classful qdisc this script, edited from Linux Advanced Routing and Traffic Control guide tc q a dev en0 root handle 1: htb default a tc c a dev en0 parent 1: classid 1:1 htb rate 6mbit burst 15k tc c a dev en0 parent 1:1 classid 1:a htb rate 5mbit ceil 6mbit burst 15k tc c a dev en0 parent 1:1 classid 1:b htb rate 1mbit ceil 6mbit burst 15k tc f a dev en0 parent 1:0 prio 1 $clsname $clsargs classid 1:b ping $address -c1 tc -s c s dev en0 classifies traffic to 1:b or 1:a, depending on whether the packet matches or not the pattern $clsargs of filter $clsname. However, when $clsname is 'matchall', a systematic crash can be observed in htb_classify(). HTB and classful qdiscs don't assign initial value to struct tcf_result, but then they expect it to contain valid values after filters have been run. Thus, current 'matchall' ignores the TCA_MATCHALL_CLASSID attribute, configured by user, and makes HTB (and classful qdiscs) dereference random pointers. By assigning head->res to *res in mall_classify(), before the actions are invoked, we fix this crash and enable TCA_MATCHALL_CLASSID functionality, that had no effect on 'matchall' classifier since its first introduction. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1460213 Reported-by: Jiri Benc <jbenc@redhat.com> Fixes: b87f7936a932 ("net/sched: introduce Match-all classifier") Signed-off-by: Davide Caratti <dcaratti@redhat.com> Acked-by: Yotam Gigi <yotamg@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-09-16 21:02:21 +09:00
*res = head->res;
__this_cpu_inc(head->pf->rhit);
return tcf_exts_exec(skb, &head->exts, res);
}
static int mall_init(struct tcf_proto *tp)
{
return 0;
}
static void __mall_destroy(struct cls_mall_head *head)
{
tcf_exts_destroy(&head->exts);
tcf_exts_put_net(&head->exts);
free_percpu(head->pf);
kfree(head);
}
static void mall_destroy_work(struct work_struct *work)
{
struct cls_mall_head *head = container_of(to_rcu_work(work),
struct cls_mall_head,
rwork);
rtnl_lock();
__mall_destroy(head);
rtnl_unlock();
}
static void mall_destroy_hw_filter(struct tcf_proto *tp,
struct cls_mall_head *head,
unsigned long cookie,
struct netlink_ext_ack *extack)
{
struct tc_cls_matchall_offload cls_mall = {};
struct tcf_block *block = tp->chain->block;
tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack);
cls_mall.command = TC_CLSMATCHALL_DESTROY;
cls_mall.cookie = cookie;
net: sched: refactor block offloads counter usage Without rtnl lock protection filters can no longer safely manage block offloads counter themselves. Refactor cls API to protect block offloadcnt with tcf_block->cb_lock that is already used to protect driver callback list and nooffloaddevcnt counter. The counter can be modified by concurrent tasks by new functions that execute block callbacks (which is safe with previous patch that changed its type to atomic_t), however, block bind/unbind code that checks the counter value takes cb_lock in write mode to exclude any concurrent modifications. This approach prevents race conditions between bind/unbind and callback execution code but allows for concurrency for tc rule update path. Move block offload counter, filter in hardware counter and filter flags management from classifiers into cls hardware offloads API. Make functions tcf_block_offload_{inc|dec}() and tc_cls_offload_cnt_update() to be cls API private. Implement following new cls API to be used instead: tc_setup_cb_add() - non-destructive filter add. If filter that wasn't already in hardware is successfully offloaded, increment block offloads counter, set filter in hardware counter and flag. On failure, previously offloaded filter is considered to be intact and offloads counter is not decremented. tc_setup_cb_replace() - destructive filter replace. Release existing filter block offload counter and reset its in hardware counter and flag. Set new filter in hardware counter and flag. On failure, previously offloaded filter is considered to be destroyed and offload counter is decremented. tc_setup_cb_destroy() - filter destroy. Unconditionally decrement block offloads counter. tc_setup_cb_reoffload() - reoffload filter to single cb. Execute cb() and call tc_cls_offload_cnt_update() if cb() didn't return an error. Refactor all offload-capable classifiers to atomically offload filters to hardware, change block offload counter, and set filter in hardware counter and flag by means of the new cls API functions. Signed-off-by: Vlad Buslov <vladbu@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-08-26 22:44:59 +09:00
tc_setup_cb_destroy(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall, false,
&head->flags, &head->in_hw_count, true);
}
static int mall_replace_hw_filter(struct tcf_proto *tp,
struct cls_mall_head *head,
unsigned long cookie,
struct netlink_ext_ack *extack)
{
struct tc_cls_matchall_offload cls_mall = {};
struct tcf_block *block = tp->chain->block;
bool skip_sw = tc_skip_sw(head->flags);
int err;
cls_mall.rule = flow_rule_alloc(tcf_exts_num_actions(&head->exts));
if (!cls_mall.rule)
return -ENOMEM;
tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack);
cls_mall.command = TC_CLSMATCHALL_REPLACE;
cls_mall.cookie = cookie;
err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true);
if (err) {
kfree(cls_mall.rule);
mall_destroy_hw_filter(tp, head, cookie, NULL);
if (skip_sw)
NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
else
err = 0;
return err;
}
net: sched: refactor block offloads counter usage Without rtnl lock protection filters can no longer safely manage block offloads counter themselves. Refactor cls API to protect block offloadcnt with tcf_block->cb_lock that is already used to protect driver callback list and nooffloaddevcnt counter. The counter can be modified by concurrent tasks by new functions that execute block callbacks (which is safe with previous patch that changed its type to atomic_t), however, block bind/unbind code that checks the counter value takes cb_lock in write mode to exclude any concurrent modifications. This approach prevents race conditions between bind/unbind and callback execution code but allows for concurrency for tc rule update path. Move block offload counter, filter in hardware counter and filter flags management from classifiers into cls hardware offloads API. Make functions tcf_block_offload_{inc|dec}() and tc_cls_offload_cnt_update() to be cls API private. Implement following new cls API to be used instead: tc_setup_cb_add() - non-destructive filter add. If filter that wasn't already in hardware is successfully offloaded, increment block offloads counter, set filter in hardware counter and flag. On failure, previously offloaded filter is considered to be intact and offloads counter is not decremented. tc_setup_cb_replace() - destructive filter replace. Release existing filter block offload counter and reset its in hardware counter and flag. Set new filter in hardware counter and flag. On failure, previously offloaded filter is considered to be destroyed and offload counter is decremented. tc_setup_cb_destroy() - filter destroy. Unconditionally decrement block offloads counter. tc_setup_cb_reoffload() - reoffload filter to single cb. Execute cb() and call tc_cls_offload_cnt_update() if cb() didn't return an error. Refactor all offload-capable classifiers to atomically offload filters to hardware, change block offload counter, and set filter in hardware counter and flag by means of the new cls API functions. Signed-off-by: Vlad Buslov <vladbu@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-08-26 22:44:59 +09:00
err = tc_setup_cb_add(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall,
skip_sw, &head->flags, &head->in_hw_count, true);
tc_cleanup_flow_action(&cls_mall.rule->action);
kfree(cls_mall.rule);
net: sched: refactor block offloads counter usage Without rtnl lock protection filters can no longer safely manage block offloads counter themselves. Refactor cls API to protect block offloadcnt with tcf_block->cb_lock that is already used to protect driver callback list and nooffloaddevcnt counter. The counter can be modified by concurrent tasks by new functions that execute block callbacks (which is safe with previous patch that changed its type to atomic_t), however, block bind/unbind code that checks the counter value takes cb_lock in write mode to exclude any concurrent modifications. This approach prevents race conditions between bind/unbind and callback execution code but allows for concurrency for tc rule update path. Move block offload counter, filter in hardware counter and filter flags management from classifiers into cls hardware offloads API. Make functions tcf_block_offload_{inc|dec}() and tc_cls_offload_cnt_update() to be cls API private. Implement following new cls API to be used instead: tc_setup_cb_add() - non-destructive filter add. If filter that wasn't already in hardware is successfully offloaded, increment block offloads counter, set filter in hardware counter and flag. On failure, previously offloaded filter is considered to be intact and offloads counter is not decremented. tc_setup_cb_replace() - destructive filter replace. Release existing filter block offload counter and reset its in hardware counter and flag. Set new filter in hardware counter and flag. On failure, previously offloaded filter is considered to be destroyed and offload counter is decremented. tc_setup_cb_destroy() - filter destroy. Unconditionally decrement block offloads counter. tc_setup_cb_reoffload() - reoffload filter to single cb. Execute cb() and call tc_cls_offload_cnt_update() if cb() didn't return an error. Refactor all offload-capable classifiers to atomically offload filters to hardware, change block offload counter, and set filter in hardware counter and flag by means of the new cls API functions. Signed-off-by: Vlad Buslov <vladbu@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-08-26 22:44:59 +09:00
if (err) {
mall_destroy_hw_filter(tp, head, cookie, NULL);
return err;
}
if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW))
return -EINVAL;
return 0;
}
static void mall_destroy(struct tcf_proto *tp, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
if (!head)
return;
tcf_unbind_filter(tp, &head->res);
if (!tc_skip_hw(head->flags))
mall_destroy_hw_filter(tp, head, (unsigned long) head, extack);
if (tcf_exts_get_net(&head->exts))
tcf_queue_work(&head->rwork, mall_destroy_work);
else
__mall_destroy(head);
}
static void *mall_get(struct tcf_proto *tp, u32 handle)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
if (head && head->handle == handle)
return head;
return NULL;
}
static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
[TCA_MATCHALL_UNSPEC] = { .type = NLA_UNSPEC },
[TCA_MATCHALL_CLASSID] = { .type = NLA_U32 },
[TCA_MATCHALL_FLAGS] = { .type = NLA_U32 },
};
static int mall_set_parms(struct net *net, struct tcf_proto *tp,
struct cls_mall_head *head,
unsigned long base, struct nlattr **tb,
struct nlattr *est, bool ovr,
struct netlink_ext_ack *extack)
{
int err;
err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, true,
extack);
if (err < 0)
return err;
if (tb[TCA_MATCHALL_CLASSID]) {
head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
tcf_bind_filter(tp, &head->res, base);
}
return 0;
}
static int mall_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
void **arg, bool ovr, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
struct nlattr *tb[TCA_MATCHALL_MAX + 1];
struct cls_mall_head *new;
u32 flags = 0;
int err;
if (!tca[TCA_OPTIONS])
return -EINVAL;
if (head)
return -EEXIST;
netlink: make validation more configurable for future strictness We currently have two levels of strict validation: 1) liberal (default) - undefined (type >= max) & NLA_UNSPEC attributes accepted - attribute length >= expected accepted - garbage at end of message accepted 2) strict (opt-in) - NLA_UNSPEC attributes accepted - attribute length >= expected accepted Split out parsing strictness into four different options: * TRAILING - check that there's no trailing data after parsing attributes (in message or nested) * MAXTYPE - reject attrs > max known type * UNSPEC - reject attributes with NLA_UNSPEC policy entries * STRICT_ATTRS - strictly validate attribute size The default for future things should be *everything*. The current *_strict() is a combination of TRAILING and MAXTYPE, and is renamed to _deprecated_strict(). The current regular parsing has none of this, and is renamed to *_parse_deprecated(). Additionally it allows us to selectively set one of the new flags even on old policies. Notably, the UNSPEC flag could be useful in this case, since it can be arranged (by filling in the policy) to not be an incompatible userspace ABI change, but would then going forward prevent forgetting attribute entries. Similar can apply to the POLICY flag. We end up with the following renames: * nla_parse -> nla_parse_deprecated * nla_parse_strict -> nla_parse_deprecated_strict * nlmsg_parse -> nlmsg_parse_deprecated * nlmsg_parse_strict -> nlmsg_parse_deprecated_strict * nla_parse_nested -> nla_parse_nested_deprecated * nla_validate_nested -> nla_validate_nested_deprecated Using spatch, of course: @@ expression TB, MAX, HEAD, LEN, POL, EXT; @@ -nla_parse(TB, MAX, HEAD, LEN, POL, EXT) +nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT) @@ expression NLH, HDRLEN, TB, MAX, POL, EXT; @@ -nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT) +nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT) @@ expression NLH, HDRLEN, TB, MAX, POL, EXT; @@ -nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT) +nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT) @@ expression TB, MAX, NLA, POL, EXT; @@ -nla_parse_nested(TB, MAX, NLA, POL, EXT) +nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT) @@ expression START, MAX, POL, EXT; @@ -nla_validate_nested(START, MAX, POL, EXT) +nla_validate_nested_deprecated(START, MAX, POL, EXT) @@ expression NLH, HDRLEN, MAX, POL, EXT; @@ -nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT) +nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT) For this patch, don't actually add the strict, non-renamed versions yet so that it breaks compile if I get it wrong. Also, while at it, make nla_validate and nla_parse go down to a common __nla_validate_parse() function to avoid code duplication. Ultimately, this allows us to have very strict validation for every new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the next patch, while existing things will continue to work as is. In effect then, this adds fully strict validation for any new command. Signed-off-by: Johannes Berg <johannes.berg@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 21:07:28 +09:00
err = nla_parse_nested_deprecated(tb, TCA_MATCHALL_MAX,
tca[TCA_OPTIONS], mall_policy, NULL);
if (err < 0)
return err;
if (tb[TCA_MATCHALL_FLAGS]) {
flags = nla_get_u32(tb[TCA_MATCHALL_FLAGS]);
if (!tc_flags_valid(flags))
return -EINVAL;
}
new = kzalloc(sizeof(*new), GFP_KERNEL);
if (!new)
return -ENOBUFS;
err = tcf_exts_init(&new->exts, net, TCA_MATCHALL_ACT, 0);
if (err)
goto err_exts_init;
if (!handle)
handle = 1;
new->handle = handle;
new->flags = flags;
new->pf = alloc_percpu(struct tc_matchall_pcnt);
if (!new->pf) {
err = -ENOMEM;
goto err_alloc_percpu;
}
err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr,
extack);
if (err)
goto err_set_parms;
if (!tc_skip_hw(new->flags)) {
err = mall_replace_hw_filter(tp, new, (unsigned long)new,
extack);
if (err)
goto err_replace_hw_filter;
}
if (!tc_in_hw(new->flags))
new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
*arg = head;
rcu_assign_pointer(tp->root, new);
return 0;
err_replace_hw_filter:
err_set_parms:
free_percpu(new->pf);
err_alloc_percpu:
tcf_exts_destroy(&new->exts);
err_exts_init:
kfree(new);
return err;
}
static int mall_delete(struct tcf_proto *tp, void *arg, bool *last,
bool rtnl_held, struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
head->deleting = true;
*last = true;
return 0;
}
static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg,
bool rtnl_held)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
if (arg->count < arg->skip)
goto skip;
if (!head || head->deleting)
return;
if (arg->fn(tp, head, arg) < 0)
arg->stop = 1;
skip:
arg->count++;
}
static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
void *cb_priv, struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
struct tc_cls_matchall_offload cls_mall = {};
struct tcf_block *block = tp->chain->block;
int err;
if (tc_skip_hw(head->flags))
return 0;
cls_mall.rule = flow_rule_alloc(tcf_exts_num_actions(&head->exts));
if (!cls_mall.rule)
return -ENOMEM;
tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack);
cls_mall.command = add ?
TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY;
cls_mall.cookie = (unsigned long)head;
err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true);
if (err) {
kfree(cls_mall.rule);
if (add && tc_skip_sw(head->flags)) {
NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
return err;
}
return 0;
}
net: sched: refactor block offloads counter usage Without rtnl lock protection filters can no longer safely manage block offloads counter themselves. Refactor cls API to protect block offloadcnt with tcf_block->cb_lock that is already used to protect driver callback list and nooffloaddevcnt counter. The counter can be modified by concurrent tasks by new functions that execute block callbacks (which is safe with previous patch that changed its type to atomic_t), however, block bind/unbind code that checks the counter value takes cb_lock in write mode to exclude any concurrent modifications. This approach prevents race conditions between bind/unbind and callback execution code but allows for concurrency for tc rule update path. Move block offload counter, filter in hardware counter and filter flags management from classifiers into cls hardware offloads API. Make functions tcf_block_offload_{inc|dec}() and tc_cls_offload_cnt_update() to be cls API private. Implement following new cls API to be used instead: tc_setup_cb_add() - non-destructive filter add. If filter that wasn't already in hardware is successfully offloaded, increment block offloads counter, set filter in hardware counter and flag. On failure, previously offloaded filter is considered to be intact and offloads counter is not decremented. tc_setup_cb_replace() - destructive filter replace. Release existing filter block offload counter and reset its in hardware counter and flag. Set new filter in hardware counter and flag. On failure, previously offloaded filter is considered to be destroyed and offload counter is decremented. tc_setup_cb_destroy() - filter destroy. Unconditionally decrement block offloads counter. tc_setup_cb_reoffload() - reoffload filter to single cb. Execute cb() and call tc_cls_offload_cnt_update() if cb() didn't return an error. Refactor all offload-capable classifiers to atomically offload filters to hardware, change block offload counter, and set filter in hardware counter and flag by means of the new cls API functions. Signed-off-by: Vlad Buslov <vladbu@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-08-26 22:44:59 +09:00
err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSMATCHALL,
&cls_mall, cb_priv, &head->flags,
&head->in_hw_count);
tc_cleanup_flow_action(&cls_mall.rule->action);
kfree(cls_mall.rule);
net: sched: refactor block offloads counter usage Without rtnl lock protection filters can no longer safely manage block offloads counter themselves. Refactor cls API to protect block offloadcnt with tcf_block->cb_lock that is already used to protect driver callback list and nooffloaddevcnt counter. The counter can be modified by concurrent tasks by new functions that execute block callbacks (which is safe with previous patch that changed its type to atomic_t), however, block bind/unbind code that checks the counter value takes cb_lock in write mode to exclude any concurrent modifications. This approach prevents race conditions between bind/unbind and callback execution code but allows for concurrency for tc rule update path. Move block offload counter, filter in hardware counter and filter flags management from classifiers into cls hardware offloads API. Make functions tcf_block_offload_{inc|dec}() and tc_cls_offload_cnt_update() to be cls API private. Implement following new cls API to be used instead: tc_setup_cb_add() - non-destructive filter add. If filter that wasn't already in hardware is successfully offloaded, increment block offloads counter, set filter in hardware counter and flag. On failure, previously offloaded filter is considered to be intact and offloads counter is not decremented. tc_setup_cb_replace() - destructive filter replace. Release existing filter block offload counter and reset its in hardware counter and flag. Set new filter in hardware counter and flag. On failure, previously offloaded filter is considered to be destroyed and offload counter is decremented. tc_setup_cb_destroy() - filter destroy. Unconditionally decrement block offloads counter. tc_setup_cb_reoffload() - reoffload filter to single cb. Execute cb() and call tc_cls_offload_cnt_update() if cb() didn't return an error. Refactor all offload-capable classifiers to atomically offload filters to hardware, change block offload counter, and set filter in hardware counter and flag by means of the new cls API functions. Signed-off-by: Vlad Buslov <vladbu@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-08-26 22:44:59 +09:00
if (err)
return err;
return 0;
}
static void mall_stats_hw_filter(struct tcf_proto *tp,
struct cls_mall_head *head,
unsigned long cookie)
{
struct tc_cls_matchall_offload cls_mall = {};
struct tcf_block *block = tp->chain->block;
tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, NULL);
cls_mall.command = TC_CLSMATCHALL_STATS;
cls_mall.cookie = cookie;
net: sched: refactor block offloads counter usage Without rtnl lock protection filters can no longer safely manage block offloads counter themselves. Refactor cls API to protect block offloadcnt with tcf_block->cb_lock that is already used to protect driver callback list and nooffloaddevcnt counter. The counter can be modified by concurrent tasks by new functions that execute block callbacks (which is safe with previous patch that changed its type to atomic_t), however, block bind/unbind code that checks the counter value takes cb_lock in write mode to exclude any concurrent modifications. This approach prevents race conditions between bind/unbind and callback execution code but allows for concurrency for tc rule update path. Move block offload counter, filter in hardware counter and filter flags management from classifiers into cls hardware offloads API. Make functions tcf_block_offload_{inc|dec}() and tc_cls_offload_cnt_update() to be cls API private. Implement following new cls API to be used instead: tc_setup_cb_add() - non-destructive filter add. If filter that wasn't already in hardware is successfully offloaded, increment block offloads counter, set filter in hardware counter and flag. On failure, previously offloaded filter is considered to be intact and offloads counter is not decremented. tc_setup_cb_replace() - destructive filter replace. Release existing filter block offload counter and reset its in hardware counter and flag. Set new filter in hardware counter and flag. On failure, previously offloaded filter is considered to be destroyed and offload counter is decremented. tc_setup_cb_destroy() - filter destroy. Unconditionally decrement block offloads counter. tc_setup_cb_reoffload() - reoffload filter to single cb. Execute cb() and call tc_cls_offload_cnt_update() if cb() didn't return an error. Refactor all offload-capable classifiers to atomically offload filters to hardware, change block offload counter, and set filter in hardware counter and flag by means of the new cls API functions. Signed-off-by: Vlad Buslov <vladbu@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-08-26 22:44:59 +09:00
tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false, true);
tcf_exts_stats_update(&head->exts, cls_mall.stats.bytes,
cls_mall.stats.pkts, cls_mall.stats.lastused);
}
static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct tc_matchall_pcnt gpf = {};
struct cls_mall_head *head = fh;
struct nlattr *nest;
int cpu;
if (!head)
return skb->len;
if (!tc_skip_hw(head->flags))
mall_stats_hw_filter(tp, head, (unsigned long)head);
t->tcm_handle = head->handle;
nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (!nest)
goto nla_put_failure;
if (head->res.classid &&
nla_put_u32(skb, TCA_MATCHALL_CLASSID, head->res.classid))
goto nla_put_failure;
if (head->flags && nla_put_u32(skb, TCA_MATCHALL_FLAGS, head->flags))
goto nla_put_failure;
for_each_possible_cpu(cpu) {
struct tc_matchall_pcnt *pf = per_cpu_ptr(head->pf, cpu);
gpf.rhit += pf->rhit;
}
if (nla_put_64bit(skb, TCA_MATCHALL_PCNT,
sizeof(struct tc_matchall_pcnt),
&gpf, TCA_MATCHALL_PAD))
goto nla_put_failure;
if (tcf_exts_dump(skb, &head->exts))
goto nla_put_failure;
nla_nest_end(skb, nest);
if (tcf_exts_dump_stats(skb, &head->exts) < 0)
goto nla_put_failure;
return skb->len;
nla_put_failure:
nla_nest_cancel(skb, nest);
return -1;
}
static void mall_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
unsigned long base)
{
struct cls_mall_head *head = fh;
if (head && head->res.classid == classid) {
if (cl)
__tcf_bind_filter(q, &head->res, base);
else
__tcf_unbind_filter(q, &head->res);
}
}
static struct tcf_proto_ops cls_mall_ops __read_mostly = {
.kind = "matchall",
.classify = mall_classify,
.init = mall_init,
.destroy = mall_destroy,
.get = mall_get,
.change = mall_change,
.delete = mall_delete,
.walk = mall_walk,
.reoffload = mall_reoffload,
.dump = mall_dump,
.bind_class = mall_bind_class,
.owner = THIS_MODULE,
};
static int __init cls_mall_init(void)
{
return register_tcf_proto_ops(&cls_mall_ops);
}
static void __exit cls_mall_exit(void)
{
unregister_tcf_proto_ops(&cls_mall_ops);
}
module_init(cls_mall_init);
module_exit(cls_mall_exit);
MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
MODULE_DESCRIPTION("Match-all classifier");
MODULE_LICENSE("GPL v2");