Commit 1109c005 authored by John Fastabend's avatar John Fastabend Committed by David S. Miller

net: sched: RCU cls_route

RCUify the route classifier. For now however spinlock's are used to
protect fastmap cache.

The issue here is the fastmap may be read by one CPU while the
cache is being updated by another. An array of pointers could be
one possible solution.
Signed-off-by: default avatarJohn Fastabend <john.r.fastabend@intel.com>
Acked-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent e35a8ee5
...@@ -29,25 +29,26 @@ ...@@ -29,25 +29,26 @@
* are mutually exclusive. * are mutually exclusive.
* 3. "to TAG from ANY" has higher priority, than "to ANY from XXX" * 3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
*/ */
struct route4_fastmap { struct route4_fastmap {
struct route4_filter *filter; struct route4_filter *filter;
u32 id; u32 id;
int iif; int iif;
}; };
struct route4_head { struct route4_head {
struct route4_fastmap fastmap[16]; struct route4_fastmap fastmap[16];
struct route4_bucket *table[256 + 1]; struct route4_bucket __rcu *table[256 + 1];
struct rcu_head rcu;
}; };
struct route4_bucket { struct route4_bucket {
/* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */ /* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */
struct route4_filter *ht[16 + 16 + 1]; struct route4_filter __rcu *ht[16 + 16 + 1];
struct rcu_head rcu;
}; };
struct route4_filter { struct route4_filter {
struct route4_filter *next; struct route4_filter __rcu *next;
u32 id; u32 id;
int iif; int iif;
...@@ -55,6 +56,8 @@ struct route4_filter { ...@@ -55,6 +56,8 @@ struct route4_filter {
struct tcf_exts exts; struct tcf_exts exts;
u32 handle; u32 handle;
struct route4_bucket *bkt; struct route4_bucket *bkt;
struct tcf_proto *tp;
struct rcu_head rcu;
}; };
#define ROUTE4_FAILURE ((struct route4_filter *)(-1L)) #define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
...@@ -64,14 +67,13 @@ static inline int route4_fastmap_hash(u32 id, int iif) ...@@ -64,14 +67,13 @@ static inline int route4_fastmap_hash(u32 id, int iif)
return id & 0xF; return id & 0xF;
} }
static DEFINE_SPINLOCK(fastmap_lock);
static void static void
route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id) route4_reset_fastmap(struct route4_head *head)
{ {
spinlock_t *root_lock = qdisc_root_sleeping_lock(q); spin_lock_bh(&fastmap_lock);
spin_lock_bh(root_lock);
memset(head->fastmap, 0, sizeof(head->fastmap)); memset(head->fastmap, 0, sizeof(head->fastmap));
spin_unlock_bh(root_lock); spin_unlock_bh(&fastmap_lock);
} }
static void static void
...@@ -80,9 +82,12 @@ route4_set_fastmap(struct route4_head *head, u32 id, int iif, ...@@ -80,9 +82,12 @@ route4_set_fastmap(struct route4_head *head, u32 id, int iif,
{ {
int h = route4_fastmap_hash(id, iif); int h = route4_fastmap_hash(id, iif);
/* fastmap updates must look atomic to aling id, iff, filter */
spin_lock_bh(&fastmap_lock);
head->fastmap[h].id = id; head->fastmap[h].id = id;
head->fastmap[h].iif = iif; head->fastmap[h].iif = iif;
head->fastmap[h].filter = f; head->fastmap[h].filter = f;
spin_unlock_bh(&fastmap_lock);
} }
static inline int route4_hash_to(u32 id) static inline int route4_hash_to(u32 id)
...@@ -123,7 +128,7 @@ static inline int route4_hash_wild(void) ...@@ -123,7 +128,7 @@ static inline int route4_hash_wild(void)
static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp, static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res) struct tcf_result *res)
{ {
struct route4_head *head = tp->root; struct route4_head *head = rcu_dereference_bh(tp->root);
struct dst_entry *dst; struct dst_entry *dst;
struct route4_bucket *b; struct route4_bucket *b;
struct route4_filter *f; struct route4_filter *f;
...@@ -141,32 +146,43 @@ static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp, ...@@ -141,32 +146,43 @@ static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp,
iif = inet_iif(skb); iif = inet_iif(skb);
h = route4_fastmap_hash(id, iif); h = route4_fastmap_hash(id, iif);
spin_lock(&fastmap_lock);
if (id == head->fastmap[h].id && if (id == head->fastmap[h].id &&
iif == head->fastmap[h].iif && iif == head->fastmap[h].iif &&
(f = head->fastmap[h].filter) != NULL) { (f = head->fastmap[h].filter) != NULL) {
if (f == ROUTE4_FAILURE) if (f == ROUTE4_FAILURE) {
spin_unlock(&fastmap_lock);
goto failure; goto failure;
}
*res = f->res; *res = f->res;
spin_unlock(&fastmap_lock);
return 0; return 0;
} }
spin_unlock(&fastmap_lock);
h = route4_hash_to(id); h = route4_hash_to(id);
restart: restart:
b = head->table[h]; b = rcu_dereference_bh(head->table[h]);
if (b) { if (b) {
for (f = b->ht[route4_hash_from(id)]; f; f = f->next) for (f = rcu_dereference_bh(b->ht[route4_hash_from(id)]);
f;
f = rcu_dereference_bh(f->next))
if (f->id == id) if (f->id == id)
ROUTE4_APPLY_RESULT(); ROUTE4_APPLY_RESULT();
for (f = b->ht[route4_hash_iif(iif)]; f; f = f->next) for (f = rcu_dereference_bh(b->ht[route4_hash_iif(iif)]);
f;
f = rcu_dereference_bh(f->next))
if (f->iif == iif) if (f->iif == iif)
ROUTE4_APPLY_RESULT(); ROUTE4_APPLY_RESULT();
for (f = b->ht[route4_hash_wild()]; f; f = f->next) for (f = rcu_dereference_bh(b->ht[route4_hash_wild()]);
f;
f = rcu_dereference_bh(f->next))
ROUTE4_APPLY_RESULT(); ROUTE4_APPLY_RESULT();
} }
if (h < 256) { if (h < 256) {
h = 256; h = 256;
...@@ -213,7 +229,7 @@ static inline u32 from_hash(u32 id) ...@@ -213,7 +229,7 @@ static inline u32 from_hash(u32 id)
static unsigned long route4_get(struct tcf_proto *tp, u32 handle) static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
{ {
struct route4_head *head = tp->root; struct route4_head *head = rtnl_dereference(tp->root);
struct route4_bucket *b; struct route4_bucket *b;
struct route4_filter *f; struct route4_filter *f;
unsigned int h1, h2; unsigned int h1, h2;
...@@ -229,9 +245,11 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle) ...@@ -229,9 +245,11 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
if (h2 > 32) if (h2 > 32)
return 0; return 0;
b = head->table[h1]; b = rtnl_dereference(head->table[h1]);
if (b) { if (b) {
for (f = b->ht[h2]; f; f = f->next) for (f = rtnl_dereference(b->ht[h2]);
f;
f = rtnl_dereference(f->next))
if (f->handle == handle) if (f->handle == handle)
return (unsigned long)f; return (unsigned long)f;
} }
...@@ -248,8 +266,11 @@ static int route4_init(struct tcf_proto *tp) ...@@ -248,8 +266,11 @@ static int route4_init(struct tcf_proto *tp)
} }
static void static void
route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f) route4_delete_filter(struct rcu_head *head)
{ {
struct route4_filter *f = container_of(head, struct route4_filter, rcu);
struct tcf_proto *tp = f->tp;
tcf_unbind_filter(tp, &f->res); tcf_unbind_filter(tp, &f->res);
tcf_exts_destroy(tp, &f->exts); tcf_exts_destroy(tp, &f->exts);
kfree(f); kfree(f);
...@@ -257,7 +278,7 @@ route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f) ...@@ -257,7 +278,7 @@ route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
static void route4_destroy(struct tcf_proto *tp) static void route4_destroy(struct tcf_proto *tp)
{ {
struct route4_head *head = tp->root; struct route4_head *head = rtnl_dereference(tp->root);
int h1, h2; int h1, h2;
if (head == NULL) if (head == NULL)
...@@ -266,28 +287,35 @@ static void route4_destroy(struct tcf_proto *tp) ...@@ -266,28 +287,35 @@ static void route4_destroy(struct tcf_proto *tp)
for (h1 = 0; h1 <= 256; h1++) { for (h1 = 0; h1 <= 256; h1++) {
struct route4_bucket *b; struct route4_bucket *b;
b = head->table[h1]; b = rtnl_dereference(head->table[h1]);
if (b) { if (b) {
for (h2 = 0; h2 <= 32; h2++) { for (h2 = 0; h2 <= 32; h2++) {
struct route4_filter *f; struct route4_filter *f;
while ((f = b->ht[h2]) != NULL) { while ((f = rtnl_dereference(b->ht[h2])) != NULL) {
b->ht[h2] = f->next; struct route4_filter *next;
route4_delete_filter(tp, f);
next = rtnl_dereference(f->next);
RCU_INIT_POINTER(b->ht[h2], next);
call_rcu(&f->rcu, route4_delete_filter);
} }
} }
kfree(b); RCU_INIT_POINTER(head->table[h1], NULL);
kfree_rcu(b, rcu);
} }
} }
kfree(head); RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
} }
static int route4_delete(struct tcf_proto *tp, unsigned long arg) static int route4_delete(struct tcf_proto *tp, unsigned long arg)
{ {
struct route4_head *head = tp->root; struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter **fp, *f = (struct route4_filter *)arg; struct route4_filter *f = (struct route4_filter *)arg;
unsigned int h = 0; struct route4_filter __rcu **fp;
struct route4_filter *nf;
struct route4_bucket *b; struct route4_bucket *b;
unsigned int h = 0;
int i; int i;
if (!head || !f) if (!head || !f)
...@@ -296,27 +324,35 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg) ...@@ -296,27 +324,35 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
h = f->handle; h = f->handle;
b = f->bkt; b = f->bkt;
for (fp = &b->ht[from_hash(h >> 16)]; *fp; fp = &(*fp)->next) { fp = &b->ht[from_hash(h >> 16)];
if (*fp == f) { for (nf = rtnl_dereference(*fp); nf;
tcf_tree_lock(tp); fp = &nf->next, nf = rtnl_dereference(*fp)) {
*fp = f->next; if (nf == f) {
tcf_tree_unlock(tp); /* unlink it */
RCU_INIT_POINTER(*fp, rtnl_dereference(f->next));
route4_reset_fastmap(tp->q, head, f->id); /* Remove any fastmap lookups that might ref filter
route4_delete_filter(tp, f); * notice we unlink'd the filter so we can't get it
* back in the fastmap.
*/
route4_reset_fastmap(head);
/* Strip tree */ /* Delete it */
call_rcu(&f->rcu, route4_delete_filter);
for (i = 0; i <= 32; i++) /* Strip RTNL protected tree */
if (b->ht[i]) for (i = 0; i <= 32; i++) {
struct route4_filter *rt;
rt = rtnl_dereference(b->ht[i]);
if (rt)
return 0; return 0;
}
/* OK, session has no flows */ /* OK, session has no flows */
tcf_tree_lock(tp); RCU_INIT_POINTER(head->table[to_hash(h)], NULL);
head->table[to_hash(h)] = NULL; kfree_rcu(b, rcu);
tcf_tree_unlock(tp);
kfree(b);
return 0; return 0;
} }
} }
...@@ -380,26 +416,25 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, ...@@ -380,26 +416,25 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
} }
h1 = to_hash(nhandle); h1 = to_hash(nhandle);
b = head->table[h1]; b = rtnl_dereference(head->table[h1]);
if (!b) { if (!b) {
err = -ENOBUFS; err = -ENOBUFS;
b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL); b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL);
if (b == NULL) if (b == NULL)
goto errout; goto errout;
tcf_tree_lock(tp); rcu_assign_pointer(head->table[h1], b);
head->table[h1] = b;
tcf_tree_unlock(tp);
} else { } else {
unsigned int h2 = from_hash(nhandle >> 16); unsigned int h2 = from_hash(nhandle >> 16);
err = -EEXIST; err = -EEXIST;
for (fp = b->ht[h2]; fp; fp = fp->next) for (fp = rtnl_dereference(b->ht[h2]);
fp;
fp = rtnl_dereference(fp->next))
if (fp->handle == f->handle) if (fp->handle == f->handle)
goto errout; goto errout;
} }
tcf_tree_lock(tp);
if (tb[TCA_ROUTE4_TO]) if (tb[TCA_ROUTE4_TO])
f->id = to; f->id = to;
...@@ -410,7 +445,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, ...@@ -410,7 +445,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
f->handle = nhandle; f->handle = nhandle;
f->bkt = b; f->bkt = b;
tcf_tree_unlock(tp); f->tp = tp;
if (tb[TCA_ROUTE4_CLASSID]) { if (tb[TCA_ROUTE4_CLASSID]) {
f->res.classid = nla_get_u32(tb[TCA_ROUTE4_CLASSID]); f->res.classid = nla_get_u32(tb[TCA_ROUTE4_CLASSID]);
...@@ -431,14 +466,15 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, ...@@ -431,14 +466,15 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
struct nlattr **tca, struct nlattr **tca,
unsigned long *arg, bool ovr) unsigned long *arg, bool ovr)
{ {
struct route4_head *head = tp->root; struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter *f, *f1, **fp; struct route4_filter __rcu **fp;
struct route4_filter *fold, *f1, *pfp, *f = NULL;
struct route4_bucket *b; struct route4_bucket *b;
struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_ROUTE4_MAX + 1]; struct nlattr *tb[TCA_ROUTE4_MAX + 1];
unsigned int h, th; unsigned int h, th;
u32 old_handle = 0;
int err; int err;
bool new = true;
if (opt == NULL) if (opt == NULL)
return handle ? -EINVAL : 0; return handle ? -EINVAL : 0;
...@@ -447,70 +483,70 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, ...@@ -447,70 +483,70 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
if (err < 0) if (err < 0)
return err; return err;
f = (struct route4_filter *)*arg; fold = (struct route4_filter *)*arg;
if (f) { if (fold && handle && fold->handle != handle)
if (f->handle != handle && handle)
return -EINVAL; return -EINVAL;
if (f->bkt)
old_handle = f->handle;
err = route4_set_parms(net, tp, base, f, handle, head, tb,
tca[TCA_RATE], 0, ovr);
if (err < 0)
return err;
goto reinsert;
}
err = -ENOBUFS; err = -ENOBUFS;
if (head == NULL) { if (head == NULL) {
head = kzalloc(sizeof(struct route4_head), GFP_KERNEL); head = kzalloc(sizeof(struct route4_head), GFP_KERNEL);
if (head == NULL) if (head == NULL)
goto errout; goto errout;
rcu_assign_pointer(tp->root, head);
tcf_tree_lock(tp);
tp->root = head;
tcf_tree_unlock(tp);
} }
f = kzalloc(sizeof(struct route4_filter), GFP_KERNEL); f = kzalloc(sizeof(struct route4_filter), GFP_KERNEL);
if (f == NULL) if (!f)
goto errout; goto errout;
tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
if (fold) {
f->id = fold->id;
f->iif = fold->iif;
f->res = fold->res;
f->handle = fold->handle;
f->tp = fold->tp;
f->bkt = fold->bkt;
new = false;
}
err = route4_set_parms(net, tp, base, f, handle, head, tb, err = route4_set_parms(net, tp, base, f, handle, head, tb,
tca[TCA_RATE], 1, ovr); tca[TCA_RATE], new, ovr);
if (err < 0) if (err < 0)
goto errout; goto errout;
reinsert:
h = from_hash(f->handle >> 16); h = from_hash(f->handle >> 16);
for (fp = &f->bkt->ht[h]; (f1 = *fp) != NULL; fp = &f1->next) fp = &f->bkt->ht[h];
for (pfp = rtnl_dereference(*fp);
(f1 = rtnl_dereference(*fp)) != NULL;
fp = &f1->next)
if (f->handle < f1->handle) if (f->handle < f1->handle)
break; break;
f->next = f1; rcu_assign_pointer(f->next, f1);
tcf_tree_lock(tp); rcu_assign_pointer(*fp, f);
*fp = f;
if (old_handle && f->handle != old_handle) { if (fold && fold->handle && f->handle != fold->handle) {
th = to_hash(old_handle); th = to_hash(fold->handle);
h = from_hash(old_handle >> 16); h = from_hash(fold->handle >> 16);
b = head->table[th]; b = rtnl_dereference(head->table[th]);
if (b) { if (b) {
for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) { fp = &b->ht[h];
if (*fp == f) { for (pfp = rtnl_dereference(*fp); pfp;
fp = &pfp->next, pfp = rtnl_dereference(*fp)) {
if (pfp == f) {
*fp = f->next; *fp = f->next;
break; break;
} }
} }
} }
} }
tcf_tree_unlock(tp);
route4_reset_fastmap(tp->q, head, f->id); route4_reset_fastmap(head);
*arg = (unsigned long)f; *arg = (unsigned long)f;
if (fold)
call_rcu(&fold->rcu, route4_delete_filter);
return 0; return 0;
errout: errout:
...@@ -520,7 +556,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, ...@@ -520,7 +556,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{ {
struct route4_head *head = tp->root; struct route4_head *head = rtnl_dereference(tp->root);
unsigned int h, h1; unsigned int h, h1;
if (head == NULL) if (head == NULL)
...@@ -530,13 +566,15 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) ...@@ -530,13 +566,15 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
return; return;
for (h = 0; h <= 256; h++) { for (h = 0; h <= 256; h++) {
struct route4_bucket *b = head->table[h]; struct route4_bucket *b = rtnl_dereference(head->table[h]);
if (b) { if (b) {
for (h1 = 0; h1 <= 32; h1++) { for (h1 = 0; h1 <= 32; h1++) {
struct route4_filter *f; struct route4_filter *f;
for (f = b->ht[h1]; f; f = f->next) { for (f = rtnl_dereference(b->ht[h1]);
f;
f = rtnl_dereference(f->next)) {
if (arg->count < arg->skip) { if (arg->count < arg->skip) {
arg->count++; arg->count++;
continue; continue;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment