Commit 18021360 authored by David S. Miller's avatar David S. Miller

Merge branch 'ovs-meter-tables'

Tonghao Zhang says:

====================
openvswitch: expand meter tables and fix bug

The patch set expand or shrink the meter table when necessary.
and other patches fix bug or improve codes.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents efcd549d e5735887
...@@ -82,7 +82,7 @@ struct datapath { ...@@ -82,7 +82,7 @@ struct datapath {
u32 max_headroom; u32 max_headroom;
/* Switch meters. */ /* Switch meters. */
struct hlist_head *meters; struct dp_meter_table meter_tbl;
}; };
/** /**
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/openvswitch.h> #include <linux/openvswitch.h>
#include <linux/netlink.h> #include <linux/netlink.h>
#include <linux/rculist.h> #include <linux/rculist.h>
#include <linux/swap.h>
#include <net/netlink.h> #include <net/netlink.h>
#include <net/genetlink.h> #include <net/genetlink.h>
...@@ -19,8 +20,6 @@ ...@@ -19,8 +20,6 @@
#include "datapath.h" #include "datapath.h"
#include "meter.h" #include "meter.h"
#define METER_HASH_BUCKETS 1024
static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = { static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
[OVS_METER_ATTR_ID] = { .type = NLA_U32, }, [OVS_METER_ATTR_ID] = { .type = NLA_U32, },
[OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG }, [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
...@@ -39,6 +38,11 @@ static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = { ...@@ -39,6 +38,11 @@ static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
[OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) }, [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
}; };
static u32 meter_hash(struct dp_meter_instance *ti, u32 id)
{
return id % ti->n_meters;
}
static void ovs_meter_free(struct dp_meter *meter) static void ovs_meter_free(struct dp_meter *meter)
{ {
if (!meter) if (!meter)
...@@ -47,40 +51,162 @@ static void ovs_meter_free(struct dp_meter *meter) ...@@ -47,40 +51,162 @@ static void ovs_meter_free(struct dp_meter *meter)
kfree_rcu(meter, rcu); kfree_rcu(meter, rcu);
} }
static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
u32 meter_id)
{
return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
}
/* Call with ovs_mutex or RCU read lock. */ /* Call with ovs_mutex or RCU read lock. */
static struct dp_meter *lookup_meter(const struct datapath *dp, static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
u32 meter_id) u32 meter_id)
{ {
struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
u32 hash = meter_hash(ti, meter_id);
struct dp_meter *meter; struct dp_meter *meter;
struct hlist_head *head;
head = meter_hash_bucket(dp, meter_id); meter = rcu_dereference_ovsl(ti->dp_meters[hash]);
hlist_for_each_entry_rcu(meter, head, dp_hash_node, if (meter && likely(meter->id == meter_id))
lockdep_ovsl_is_held()) { return meter;
if (meter->id == meter_id)
return meter;
}
return NULL; return NULL;
} }
static void attach_meter(struct datapath *dp, struct dp_meter *meter) static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size)
{
struct dp_meter_instance *ti;
ti = kvzalloc(sizeof(*ti) +
sizeof(struct dp_meter *) * size,
GFP_KERNEL);
if (!ti)
return NULL;
ti->n_meters = size;
return ti;
}
static void dp_meter_instance_free(struct dp_meter_instance *ti)
{
kvfree(ti);
}
static void dp_meter_instance_free_rcu(struct rcu_head *rcu)
{ {
struct hlist_head *head = meter_hash_bucket(dp, meter->id); struct dp_meter_instance *ti;
hlist_add_head_rcu(&meter->dp_hash_node, head); ti = container_of(rcu, struct dp_meter_instance, rcu);
kvfree(ti);
} }
static void detach_meter(struct dp_meter *meter) static int
dp_meter_instance_realloc(struct dp_meter_table *tbl, u32 size)
{
struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
int n_meters = min(size, ti->n_meters);
struct dp_meter_instance *new_ti;
int i;
new_ti = dp_meter_instance_alloc(size);
if (!new_ti)
return -ENOMEM;
for (i = 0; i < n_meters; i++)
new_ti->dp_meters[i] =
rcu_dereference_ovsl(ti->dp_meters[i]);
rcu_assign_pointer(tbl->ti, new_ti);
call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
return 0;
}
static void dp_meter_instance_insert(struct dp_meter_instance *ti,
struct dp_meter *meter)
{
u32 hash;
hash = meter_hash(ti, meter->id);
rcu_assign_pointer(ti->dp_meters[hash], meter);
}
static void dp_meter_instance_remove(struct dp_meter_instance *ti,
struct dp_meter *meter)
{ {
u32 hash;
hash = meter_hash(ti, meter->id);
RCU_INIT_POINTER(ti->dp_meters[hash], NULL);
}
static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
{
struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
u32 hash = meter_hash(ti, meter->id);
int err;
/* In generally, slots selected should be empty, because
* OvS uses id-pool to fetch a available id.
*/
if (unlikely(rcu_dereference_ovsl(ti->dp_meters[hash])))
return -EBUSY;
dp_meter_instance_insert(ti, meter);
/* That function is thread-safe. */
tbl->count++;
if (tbl->count >= tbl->max_meters_allowed) {
err = -EFBIG;
goto attach_err;
}
if (tbl->count >= ti->n_meters &&
dp_meter_instance_realloc(tbl, ti->n_meters * 2)) {
err = -ENOMEM;
goto attach_err;
}
return 0;
attach_err:
dp_meter_instance_remove(ti, meter);
tbl->count--;
return err;
}
static int detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
{
struct dp_meter_instance *ti;
ASSERT_OVSL(); ASSERT_OVSL();
if (meter) if (!meter)
hlist_del_rcu(&meter->dp_hash_node); return 0;
ti = rcu_dereference_ovsl(tbl->ti);
dp_meter_instance_remove(ti, meter);
tbl->count--;
/* Shrink the meter array if necessary. */
if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN &&
tbl->count <= (ti->n_meters / 4)) {
int half_size = ti->n_meters / 2;
int i;
/* Avoid hash collision, don't move slots to other place.
* Make sure there are no references of meters in array
* which will be released.
*/
for (i = half_size; i < ti->n_meters; i++)
if (rcu_dereference_ovsl(ti->dp_meters[i]))
goto out;
if (dp_meter_instance_realloc(tbl, half_size))
goto shrink_err;
}
out:
return 0;
shrink_err:
dp_meter_instance_insert(ti, meter);
tbl->count++;
return -ENOMEM;
} }
static struct sk_buff * static struct sk_buff *
...@@ -116,12 +242,11 @@ static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id, ...@@ -116,12 +242,11 @@ static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id)) if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
goto error; goto error;
if (!meter)
return 0;
if (nla_put(reply, OVS_METER_ATTR_STATS, if (nla_put(reply, OVS_METER_ATTR_STATS,
sizeof(struct ovs_flow_stats), &meter->stats) || sizeof(struct ovs_flow_stats), &meter->stats))
nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used, goto error;
if (nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
OVS_METER_ATTR_PAD)) OVS_METER_ATTR_PAD))
goto error; goto error;
...@@ -150,18 +275,32 @@ static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id, ...@@ -150,18 +275,32 @@ static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info) static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
{ {
struct sk_buff *reply; struct ovs_header *ovs_header = info->userhdr;
struct ovs_header *ovs_reply_header; struct ovs_header *ovs_reply_header;
struct nlattr *nla, *band_nla; struct nlattr *nla, *band_nla;
int err; struct sk_buff *reply;
struct datapath *dp;
int err = -EMSGSIZE;
reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES, reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES,
&ovs_reply_header); &ovs_reply_header);
if (IS_ERR(reply)) if (IS_ERR(reply))
return PTR_ERR(reply); return PTR_ERR(reply);
if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) || ovs_lock();
nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS)) dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
if (!dp) {
err = -ENODEV;
goto exit_unlock;
}
if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS,
dp->meter_tbl.max_meters_allowed))
goto exit_unlock;
ovs_unlock();
if (nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
goto nla_put_failure; goto nla_put_failure;
nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS); nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS);
...@@ -180,9 +319,10 @@ static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info) ...@@ -180,9 +319,10 @@ static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
genlmsg_end(reply, ovs_reply_header); genlmsg_end(reply, ovs_reply_header);
return genlmsg_reply(reply, info); return genlmsg_reply(reply, info);
exit_unlock:
ovs_unlock();
nla_put_failure: nla_put_failure:
nlmsg_free(reply); nlmsg_free(reply);
err = -EMSGSIZE;
return err; return err;
} }
...@@ -252,7 +392,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a) ...@@ -252,7 +392,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
* *
* Start with a full bucket. * Start with a full bucket.
*/ */
band->bucket = (band->burst_size + band->rate) * 1000; band->bucket = (band->burst_size + band->rate) * 1000ULL;
band_max_delta_t = band->bucket / band->rate; band_max_delta_t = band->bucket / band->rate;
if (band_max_delta_t > meter->max_delta_t) if (band_max_delta_t > meter->max_delta_t)
meter->max_delta_t = band_max_delta_t; meter->max_delta_t = band_max_delta_t;
...@@ -273,14 +413,14 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) ...@@ -273,14 +413,14 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *reply; struct sk_buff *reply;
struct ovs_header *ovs_reply_header; struct ovs_header *ovs_reply_header;
struct ovs_header *ovs_header = info->userhdr; struct ovs_header *ovs_header = info->userhdr;
struct dp_meter_table *meter_tbl;
struct datapath *dp; struct datapath *dp;
int err; int err;
u32 meter_id; u32 meter_id;
bool failed; bool failed;
if (!a[OVS_METER_ATTR_ID]) { if (!a[OVS_METER_ATTR_ID])
return -ENODEV; return -EINVAL;
}
meter = dp_meter_create(a); meter = dp_meter_create(a);
if (IS_ERR_OR_NULL(meter)) if (IS_ERR_OR_NULL(meter))
...@@ -300,12 +440,18 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) ...@@ -300,12 +440,18 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
goto exit_unlock; goto exit_unlock;
} }
meter_tbl = &dp->meter_tbl;
meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
/* Cannot fail after this. */ old_meter = lookup_meter(meter_tbl, meter_id);
old_meter = lookup_meter(dp, meter_id); err = detach_meter(meter_tbl, old_meter);
detach_meter(old_meter); if (err)
attach_meter(dp, meter); goto exit_unlock;
err = attach_meter(meter_tbl, meter);
if (err)
goto exit_unlock;
ovs_unlock(); ovs_unlock();
/* Build response with the meter_id and stats from /* Build response with the meter_id and stats from
...@@ -337,14 +483,14 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) ...@@ -337,14 +483,14 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info) static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
{ {
struct nlattr **a = info->attrs;
u32 meter_id;
struct ovs_header *ovs_header = info->userhdr; struct ovs_header *ovs_header = info->userhdr;
struct ovs_header *ovs_reply_header; struct ovs_header *ovs_reply_header;
struct nlattr **a = info->attrs;
struct dp_meter *meter;
struct sk_buff *reply;
struct datapath *dp; struct datapath *dp;
u32 meter_id;
int err; int err;
struct sk_buff *reply;
struct dp_meter *meter;
if (!a[OVS_METER_ATTR_ID]) if (!a[OVS_METER_ATTR_ID])
return -EINVAL; return -EINVAL;
...@@ -365,7 +511,7 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info) ...@@ -365,7 +511,7 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
} }
/* Locate meter, copy stats. */ /* Locate meter, copy stats. */
meter = lookup_meter(dp, meter_id); meter = lookup_meter(&dp->meter_tbl, meter_id);
if (!meter) { if (!meter) {
err = -ENOENT; err = -ENOENT;
goto exit_unlock; goto exit_unlock;
...@@ -390,18 +536,17 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info) ...@@ -390,18 +536,17 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info) static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
{ {
struct nlattr **a = info->attrs;
u32 meter_id;
struct ovs_header *ovs_header = info->userhdr; struct ovs_header *ovs_header = info->userhdr;
struct ovs_header *ovs_reply_header; struct ovs_header *ovs_reply_header;
struct nlattr **a = info->attrs;
struct dp_meter *old_meter;
struct sk_buff *reply;
struct datapath *dp; struct datapath *dp;
u32 meter_id;
int err; int err;
struct sk_buff *reply;
struct dp_meter *old_meter;
if (!a[OVS_METER_ATTR_ID]) if (!a[OVS_METER_ATTR_ID])
return -EINVAL; return -EINVAL;
meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL, reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL,
&ovs_reply_header); &ovs_reply_header);
...@@ -416,14 +561,19 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info) ...@@ -416,14 +561,19 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
goto exit_unlock; goto exit_unlock;
} }
old_meter = lookup_meter(dp, meter_id); meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
old_meter = lookup_meter(&dp->meter_tbl, meter_id);
if (old_meter) { if (old_meter) {
spin_lock_bh(&old_meter->lock); spin_lock_bh(&old_meter->lock);
err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter); err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
WARN_ON(err); WARN_ON(err);
spin_unlock_bh(&old_meter->lock); spin_unlock_bh(&old_meter->lock);
detach_meter(old_meter);
err = detach_meter(&dp->meter_tbl, old_meter);
if (err)
goto exit_unlock;
} }
ovs_unlock(); ovs_unlock();
ovs_meter_free(old_meter); ovs_meter_free(old_meter);
genlmsg_end(reply, ovs_reply_header); genlmsg_end(reply, ovs_reply_header);
...@@ -443,16 +593,16 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info) ...@@ -443,16 +593,16 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key, u32 meter_id) struct sw_flow_key *key, u32 meter_id)
{ {
struct dp_meter *meter;
struct dp_meter_band *band;
long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000); long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000);
long long int long_delta_ms; long long int long_delta_ms;
u32 delta_ms; struct dp_meter_band *band;
u32 cost; struct dp_meter *meter;
int i, band_exceeded_max = -1; int i, band_exceeded_max = -1;
u32 band_exceeded_rate = 0; u32 band_exceeded_rate = 0;
u32 delta_ms;
u32 cost;
meter = lookup_meter(dp, meter_id); meter = lookup_meter(&dp->meter_tbl, meter_id);
/* Do not drop the packet when there is no meter. */ /* Do not drop the packet when there is no meter. */
if (!meter) if (!meter)
return false; return false;
...@@ -570,32 +720,39 @@ struct genl_family dp_meter_genl_family __ro_after_init = { ...@@ -570,32 +720,39 @@ struct genl_family dp_meter_genl_family __ro_after_init = {
int ovs_meters_init(struct datapath *dp) int ovs_meters_init(struct datapath *dp)
{ {
int i; struct dp_meter_table *tbl = &dp->meter_tbl;
struct dp_meter_instance *ti;
dp->meters = kmalloc_array(METER_HASH_BUCKETS, unsigned long free_mem_bytes;
sizeof(struct hlist_head), GFP_KERNEL);
if (!dp->meters) ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
if (!ti)
return -ENOMEM; return -ENOMEM;
for (i = 0; i < METER_HASH_BUCKETS; i++) /* Allow meters in a datapath to use ~3.12% of physical memory. */
INIT_HLIST_HEAD(&dp->meters[i]); free_mem_bytes = nr_free_buffer_pages() * (PAGE_SIZE >> 5);
tbl->max_meters_allowed = min(free_mem_bytes / sizeof(struct dp_meter),
DP_METER_NUM_MAX);
if (!tbl->max_meters_allowed)
goto out_err;
rcu_assign_pointer(tbl->ti, ti);
tbl->count = 0;
return 0; return 0;
out_err:
dp_meter_instance_free(ti);
return -ENOMEM;
} }
void ovs_meters_exit(struct datapath *dp) void ovs_meters_exit(struct datapath *dp)
{ {
struct dp_meter_table *tbl = &dp->meter_tbl;
struct dp_meter_instance *ti = rcu_dereference_raw(tbl->ti);
int i; int i;
for (i = 0; i < METER_HASH_BUCKETS; i++) { for (i = 0; i < ti->n_meters; i++)
struct hlist_head *head = &dp->meters[i]; ovs_meter_free(ti->dp_meters[i]);
struct dp_meter *meter;
struct hlist_node *n;
hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
kfree(meter);
}
kfree(dp->meters); dp_meter_instance_free(ti);
} }
...@@ -13,26 +13,26 @@ ...@@ -13,26 +13,26 @@
#include <linux/openvswitch.h> #include <linux/openvswitch.h>
#include <linux/genetlink.h> #include <linux/genetlink.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <linux/bits.h>
#include "flow.h" #include "flow.h"
struct datapath; struct datapath;
#define DP_MAX_BANDS 1 #define DP_MAX_BANDS 1
#define DP_METER_ARRAY_SIZE_MIN BIT_ULL(10)
#define DP_METER_NUM_MAX (200000UL)
struct dp_meter_band { struct dp_meter_band {
u32 type; u32 type;
u32 rate; u32 rate;
u32 burst_size; u32 burst_size;
u32 bucket; /* 1/1000 packets, or in bits */ u64 bucket; /* 1/1000 packets, or in bits */
struct ovs_flow_stats stats; struct ovs_flow_stats stats;
}; };
struct dp_meter { struct dp_meter {
spinlock_t lock; /* Per meter lock */ spinlock_t lock; /* Per meter lock */
struct rcu_head rcu; struct rcu_head rcu;
struct hlist_node dp_hash_node; /*Element in datapath->meters
* hash table.
*/
u32 id; u32 id;
u16 kbps:1, keep_stats:1; u16 kbps:1, keep_stats:1;
u16 n_bands; u16 n_bands;
...@@ -42,6 +42,18 @@ struct dp_meter { ...@@ -42,6 +42,18 @@ struct dp_meter {
struct dp_meter_band bands[]; struct dp_meter_band bands[];
}; };
struct dp_meter_instance {
struct rcu_head rcu;
u32 n_meters;
struct dp_meter __rcu *dp_meters[];
};
struct dp_meter_table {
struct dp_meter_instance __rcu *ti;
u32 count;
u32 max_meters_allowed;
};
extern struct genl_family dp_meter_genl_family; extern struct genl_family dp_meter_genl_family;
int ovs_meters_init(struct datapath *dp); int ovs_meters_init(struct datapath *dp);
void ovs_meters_exit(struct datapath *dp); void ovs_meters_exit(struct datapath *dp);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment