linux-brain/net/openvswitch/flow_table.c
Thomas Gleixner c942299924 treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 269
Based on 1 normalized pattern(s):

  this program is free software you can redistribute it and or modify
  it under the terms of version 2 of the gnu general public license as
  published by the free software foundation this program is
  distributed in the hope that it will be useful but without any
  warranty without even the implied warranty of merchantability or
  fitness for a particular purpose see the gnu general public license
  for more details you should have received a copy of the gnu general
  public license along with this program if not write to the free
  software foundation inc 51 franklin street fifth floor boston ma
  02110 1301 usa

extracted by the scancode license scanner the SPDX license identifier

  GPL-2.0-only

has been chosen to replace the boilerplate/reference in 21 file(s).

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Alexios Zavras <alexios.zavras@intel.com>
Reviewed-by: Allison Randal <allison@lohutok.net>
Reviewed-by: Richard Fontana <rfontana@redhat.com>
Cc: linux-spdx@vger.kernel.org
Link: https://lkml.kernel.org/r/20190529141334.228102212@linutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-06-05 17:30:29 +02:00

738 lines
18 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2007-2014 Nicira, Inc.
*/
#include "flow.h"
#include "datapath.h"
#include "flow_netlink.h"
#include <linux/uaccess.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <net/llc_pdu.h>
#include <linux/kernel.h>
#include <linux/jhash.h>
#include <linux/jiffies.h>
#include <linux/llc.h>
#include <linux/module.h>
#include <linux/in.h>
#include <linux/rcupdate.h>
#include <linux/cpumask.h>
#include <linux/if_arp.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/sctp.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/icmp.h>
#include <linux/icmpv6.h>
#include <linux/rculist.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ndisc.h>
#define TBL_MIN_BUCKETS 1024
#define REHASH_INTERVAL (10 * 60 * HZ)
static struct kmem_cache *flow_cache;
struct kmem_cache *flow_stats_cache __read_mostly;
static u16 range_n_bytes(const struct sw_flow_key_range *range)
{
return range->end - range->start;
}
void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
bool full, const struct sw_flow_mask *mask)
{
int start = full ? 0 : mask->range.start;
int len = full ? sizeof *dst : range_n_bytes(&mask->range);
const long *m = (const long *)((const u8 *)&mask->key + start);
const long *s = (const long *)((const u8 *)src + start);
long *d = (long *)((u8 *)dst + start);
int i;
/* If 'full' is true then all of 'dst' is fully initialized. Otherwise,
* if 'full' is false the memory outside of the 'mask->range' is left
* uninitialized. This can be used as an optimization when further
* operations on 'dst' only use contents within 'mask->range'.
*/
for (i = 0; i < len; i += sizeof(long))
*d++ = *s++ & *m++;
}
struct sw_flow *ovs_flow_alloc(void)
{
struct sw_flow *flow;
struct flow_stats *stats;
flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL);
if (!flow)
return ERR_PTR(-ENOMEM);
flow->stats_last_writer = -1;
/* Initialize the default stat node. */
stats = kmem_cache_alloc_node(flow_stats_cache,
GFP_KERNEL | __GFP_ZERO,
node_online(0) ? 0 : NUMA_NO_NODE);
if (!stats)
goto err;
spin_lock_init(&stats->lock);
RCU_INIT_POINTER(flow->stats[0], stats);
cpumask_set_cpu(0, &flow->cpu_used_mask);
return flow;
err:
kmem_cache_free(flow_cache, flow);
return ERR_PTR(-ENOMEM);
}
int ovs_flow_tbl_count(const struct flow_table *table)
{
return table->count;
}
static void flow_free(struct sw_flow *flow)
{
int cpu;
if (ovs_identifier_is_key(&flow->id))
kfree(flow->id.unmasked_key);
if (flow->sf_acts)
ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts);
/* We open code this to make sure cpu 0 is always considered */
for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask))
if (flow->stats[cpu])
kmem_cache_free(flow_stats_cache,
(struct flow_stats __force *)flow->stats[cpu]);
kmem_cache_free(flow_cache, flow);
}
static void rcu_free_flow_callback(struct rcu_head *rcu)
{
struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
flow_free(flow);
}
void ovs_flow_free(struct sw_flow *flow, bool deferred)
{
if (!flow)
return;
if (deferred)
call_rcu(&flow->rcu, rcu_free_flow_callback);
else
flow_free(flow);
}
static void __table_instance_destroy(struct table_instance *ti)
{
kvfree(ti->buckets);
kfree(ti);
}
static struct table_instance *table_instance_alloc(int new_size)
{
struct table_instance *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
int i;
if (!ti)
return NULL;
ti->buckets = kvmalloc_array(new_size, sizeof(struct hlist_head),
GFP_KERNEL);
if (!ti->buckets) {
kfree(ti);
return NULL;
}
for (i = 0; i < new_size; i++)
INIT_HLIST_HEAD(&ti->buckets[i]);
ti->n_buckets = new_size;
ti->node_ver = 0;
ti->keep_flows = false;
get_random_bytes(&ti->hash_seed, sizeof(u32));
return ti;
}
int ovs_flow_tbl_init(struct flow_table *table)
{
struct table_instance *ti, *ufid_ti;
ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ti)
return -ENOMEM;
ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ufid_ti)
goto free_ti;
rcu_assign_pointer(table->ti, ti);
rcu_assign_pointer(table->ufid_ti, ufid_ti);
INIT_LIST_HEAD(&table->mask_list);
table->last_rehash = jiffies;
table->count = 0;
table->ufid_count = 0;
return 0;
free_ti:
__table_instance_destroy(ti);
return -ENOMEM;
}
static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
{
struct table_instance *ti = container_of(rcu, struct table_instance, rcu);
__table_instance_destroy(ti);
}
static void table_instance_destroy(struct table_instance *ti,
struct table_instance *ufid_ti,
bool deferred)
{
int i;
if (!ti)
return;
BUG_ON(!ufid_ti);
if (ti->keep_flows)
goto skip_flows;
for (i = 0; i < ti->n_buckets; i++) {
struct sw_flow *flow;
struct hlist_head *head = &ti->buckets[i];
struct hlist_node *n;
int ver = ti->node_ver;
int ufid_ver = ufid_ti->node_ver;
hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) {
hlist_del_rcu(&flow->flow_table.node[ver]);
if (ovs_identifier_is_ufid(&flow->id))
hlist_del_rcu(&flow->ufid_table.node[ufid_ver]);
ovs_flow_free(flow, deferred);
}
}
skip_flows:
if (deferred) {
call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb);
} else {
__table_instance_destroy(ti);
__table_instance_destroy(ufid_ti);
}
}
/* No need for locking this function is called from RCU callback or
* error path.
*/
void ovs_flow_tbl_destroy(struct flow_table *table)
{
struct table_instance *ti = rcu_dereference_raw(table->ti);
struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
table_instance_destroy(ti, ufid_ti, false);
}
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
u32 *bucket, u32 *last)
{
struct sw_flow *flow;
struct hlist_head *head;
int ver;
int i;
ver = ti->node_ver;
while (*bucket < ti->n_buckets) {
i = 0;
head = &ti->buckets[*bucket];
hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) {
if (i < *last) {
i++;
continue;
}
*last = i + 1;
return flow;
}
(*bucket)++;
*last = 0;
}
return NULL;
}
static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash)
{
hash = jhash_1word(hash, ti->hash_seed);
return &ti->buckets[hash & (ti->n_buckets - 1)];
}
static void table_instance_insert(struct table_instance *ti,
struct sw_flow *flow)
{
struct hlist_head *head;
head = find_bucket(ti, flow->flow_table.hash);
hlist_add_head_rcu(&flow->flow_table.node[ti->node_ver], head);
}
static void ufid_table_instance_insert(struct table_instance *ti,
struct sw_flow *flow)
{
struct hlist_head *head;
head = find_bucket(ti, flow->ufid_table.hash);
hlist_add_head_rcu(&flow->ufid_table.node[ti->node_ver], head);
}
static void flow_table_copy_flows(struct table_instance *old,
struct table_instance *new, bool ufid)
{
int old_ver;
int i;
old_ver = old->node_ver;
new->node_ver = !old_ver;
/* Insert in new table. */
for (i = 0; i < old->n_buckets; i++) {
struct sw_flow *flow;
struct hlist_head *head = &old->buckets[i];
if (ufid)
hlist_for_each_entry(flow, head,
ufid_table.node[old_ver])
ufid_table_instance_insert(new, flow);
else
hlist_for_each_entry(flow, head,
flow_table.node[old_ver])
table_instance_insert(new, flow);
}
old->keep_flows = true;
}
static struct table_instance *table_instance_rehash(struct table_instance *ti,
int n_buckets, bool ufid)
{
struct table_instance *new_ti;
new_ti = table_instance_alloc(n_buckets);
if (!new_ti)
return NULL;
flow_table_copy_flows(ti, new_ti, ufid);
return new_ti;
}
int ovs_flow_tbl_flush(struct flow_table *flow_table)
{
struct table_instance *old_ti, *new_ti;
struct table_instance *old_ufid_ti, *new_ufid_ti;
new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!new_ti)
return -ENOMEM;
new_ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!new_ufid_ti)
goto err_free_ti;
old_ti = ovsl_dereference(flow_table->ti);
old_ufid_ti = ovsl_dereference(flow_table->ufid_ti);
rcu_assign_pointer(flow_table->ti, new_ti);
rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti);
flow_table->last_rehash = jiffies;
flow_table->count = 0;
flow_table->ufid_count = 0;
table_instance_destroy(old_ti, old_ufid_ti, true);
return 0;
err_free_ti:
__table_instance_destroy(new_ti);
return -ENOMEM;
}
static u32 flow_hash(const struct sw_flow_key *key,
const struct sw_flow_key_range *range)
{
int key_start = range->start;
int key_end = range->end;
const u32 *hash_key = (const u32 *)((const u8 *)key + key_start);
int hash_u32s = (key_end - key_start) >> 2;
/* Make sure number of hash bytes are multiple of u32. */
BUILD_BUG_ON(sizeof(long) % sizeof(u32));
return jhash2(hash_key, hash_u32s, 0);
}
static int flow_key_start(const struct sw_flow_key *key)
{
if (key->tun_proto)
return 0;
else
return rounddown(offsetof(struct sw_flow_key, phy),
sizeof(long));
}
static bool cmp_key(const struct sw_flow_key *key1,
const struct sw_flow_key *key2,
int key_start, int key_end)
{
const long *cp1 = (const long *)((const u8 *)key1 + key_start);
const long *cp2 = (const long *)((const u8 *)key2 + key_start);
long diffs = 0;
int i;
for (i = key_start; i < key_end; i += sizeof(long))
diffs |= *cp1++ ^ *cp2++;
return diffs == 0;
}
static bool flow_cmp_masked_key(const struct sw_flow *flow,
const struct sw_flow_key *key,
const struct sw_flow_key_range *range)
{
return cmp_key(&flow->key, key, range->start, range->end);
}
static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
const struct sw_flow_match *match)
{
struct sw_flow_key *key = match->key;
int key_start = flow_key_start(key);
int key_end = match->range.end;
BUG_ON(ovs_identifier_is_ufid(&flow->id));
return cmp_key(flow->id.unmasked_key, key, key_start, key_end);
}
static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
const struct sw_flow_key *unmasked,
const struct sw_flow_mask *mask)
{
struct sw_flow *flow;
struct hlist_head *head;
u32 hash;
struct sw_flow_key masked_key;
ovs_flow_mask_key(&masked_key, unmasked, false, mask);
hash = flow_hash(&masked_key, &mask->range);
head = find_bucket(ti, hash);
hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
if (flow->mask == mask && flow->flow_table.hash == hash &&
flow_cmp_masked_key(flow, &masked_key, &mask->range))
return flow;
}
return NULL;
}
struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
const struct sw_flow_key *key,
u32 *n_mask_hit)
{
struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
struct sw_flow_mask *mask;
struct sw_flow *flow;
*n_mask_hit = 0;
list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
(*n_mask_hit)++;
flow = masked_flow_lookup(ti, key, mask);
if (flow) /* Found */
return flow;
}
return NULL;
}
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
const struct sw_flow_key *key)
{
u32 __always_unused n_mask_hit;
return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit);
}
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
const struct sw_flow_match *match)
{
struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
struct sw_flow_mask *mask;
struct sw_flow *flow;
/* Always called under ovs-mutex. */
list_for_each_entry(mask, &tbl->mask_list, list) {
flow = masked_flow_lookup(ti, match->key, mask);
if (flow && ovs_identifier_is_key(&flow->id) &&
ovs_flow_cmp_unmasked_key(flow, match))
return flow;
}
return NULL;
}
static u32 ufid_hash(const struct sw_flow_id *sfid)
{
return jhash(sfid->ufid, sfid->ufid_len, 0);
}
static bool ovs_flow_cmp_ufid(const struct sw_flow *flow,
const struct sw_flow_id *sfid)
{
if (flow->id.ufid_len != sfid->ufid_len)
return false;
return !memcmp(flow->id.ufid, sfid->ufid, sfid->ufid_len);
}
bool ovs_flow_cmp(const struct sw_flow *flow, const struct sw_flow_match *match)
{
if (ovs_identifier_is_ufid(&flow->id))
return flow_cmp_masked_key(flow, match->key, &match->range);
return ovs_flow_cmp_unmasked_key(flow, match);
}
struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
const struct sw_flow_id *ufid)
{
struct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti);
struct sw_flow *flow;
struct hlist_head *head;
u32 hash;
hash = ufid_hash(ufid);
head = find_bucket(ti, hash);
hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) {
if (flow->ufid_table.hash == hash &&
ovs_flow_cmp_ufid(flow, ufid))
return flow;
}
return NULL;
}
int ovs_flow_tbl_num_masks(const struct flow_table *table)
{
struct sw_flow_mask *mask;
int num = 0;
list_for_each_entry(mask, &table->mask_list, list)
num++;
return num;
}
static struct table_instance *table_instance_expand(struct table_instance *ti,
bool ufid)
{
return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
}
/* Remove 'mask' from the mask list, if it is not needed any more. */
static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
{
if (mask) {
/* ovs-lock is required to protect mask-refcount and
* mask list.
*/
ASSERT_OVSL();
BUG_ON(!mask->ref_count);
mask->ref_count--;
if (!mask->ref_count) {
list_del_rcu(&mask->list);
kfree_rcu(mask, rcu);
}
}
}
/* Must be called with OVS mutex held. */
void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
{
struct table_instance *ti = ovsl_dereference(table->ti);
struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
BUG_ON(table->count == 0);
hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
table->count--;
if (ovs_identifier_is_ufid(&flow->id)) {
hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
table->ufid_count--;
}
/* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
* accessible as long as the RCU read lock is held.
*/
flow_mask_remove(table, flow->mask);
}
static struct sw_flow_mask *mask_alloc(void)
{
struct sw_flow_mask *mask;
mask = kmalloc(sizeof(*mask), GFP_KERNEL);
if (mask)
mask->ref_count = 1;
return mask;
}
static bool mask_equal(const struct sw_flow_mask *a,
const struct sw_flow_mask *b)
{
const u8 *a_ = (const u8 *)&a->key + a->range.start;
const u8 *b_ = (const u8 *)&b->key + b->range.start;
return (a->range.end == b->range.end)
&& (a->range.start == b->range.start)
&& (memcmp(a_, b_, range_n_bytes(&a->range)) == 0);
}
static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
const struct sw_flow_mask *mask)
{
struct list_head *ml;
list_for_each(ml, &tbl->mask_list) {
struct sw_flow_mask *m;
m = container_of(ml, struct sw_flow_mask, list);
if (mask_equal(mask, m))
return m;
}
return NULL;
}
/* Add 'mask' into the mask list, if it is not already there. */
static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
const struct sw_flow_mask *new)
{
struct sw_flow_mask *mask;
mask = flow_mask_find(tbl, new);
if (!mask) {
/* Allocate a new mask if none exsits. */
mask = mask_alloc();
if (!mask)
return -ENOMEM;
mask->key = new->key;
mask->range = new->range;
list_add_rcu(&mask->list, &tbl->mask_list);
} else {
BUG_ON(!mask->ref_count);
mask->ref_count++;
}
flow->mask = mask;
return 0;
}
/* Must be called with OVS mutex held. */
static void flow_key_insert(struct flow_table *table, struct sw_flow *flow)
{
struct table_instance *new_ti = NULL;
struct table_instance *ti;
flow->flow_table.hash = flow_hash(&flow->key, &flow->mask->range);
ti = ovsl_dereference(table->ti);
table_instance_insert(ti, flow);
table->count++;
/* Expand table, if necessary, to make room. */
if (table->count > ti->n_buckets)
new_ti = table_instance_expand(ti, false);
else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL))
new_ti = table_instance_rehash(ti, ti->n_buckets, false);
if (new_ti) {
rcu_assign_pointer(table->ti, new_ti);
call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
table->last_rehash = jiffies;
}
}
/* Must be called with OVS mutex held. */
static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)
{
struct table_instance *ti;
flow->ufid_table.hash = ufid_hash(&flow->id);
ti = ovsl_dereference(table->ufid_ti);
ufid_table_instance_insert(ti, flow);
table->ufid_count++;
/* Expand table, if necessary, to make room. */
if (table->ufid_count > ti->n_buckets) {
struct table_instance *new_ti;
new_ti = table_instance_expand(ti, true);
if (new_ti) {
rcu_assign_pointer(table->ufid_ti, new_ti);
call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
}
}
}
/* Must be called with OVS mutex held. */
int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
const struct sw_flow_mask *mask)
{
int err;
err = flow_mask_insert(table, flow, mask);
if (err)
return err;
flow_key_insert(table, flow);
if (ovs_identifier_is_ufid(&flow->id))
flow_ufid_insert(table, flow);
return 0;
}
/* Initializes the flow module.
* Returns zero if successful or a negative error code. */
int ovs_flow_init(void)
{
BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
+ (nr_cpu_ids
* sizeof(struct flow_stats *)),
0, 0, NULL);
if (flow_cache == NULL)
return -ENOMEM;
flow_stats_cache
= kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats),
0, SLAB_HWCACHE_ALIGN, NULL);
if (flow_stats_cache == NULL) {
kmem_cache_destroy(flow_cache);
flow_cache = NULL;
return -ENOMEM;
}
return 0;
}
/* Uninitializes the flow module. */
void ovs_flow_exit(void)
{
kmem_cache_destroy(flow_stats_cache);
kmem_cache_destroy(flow_cache);
}