Commit 7d0742da authored by Patrick McHardy's avatar Patrick McHardy Committed by David S. Miller

[NETFILTER]: nf_conntrack_expect: use RCU for expectation hash

Use RCU for expectation hash. This doesn't buy much for conntrack
runtime performance, but allows to reduce the use of nf_conntrack_lock
for /proc and nf_netlink_conntrack.
Signed-off-by: default avatarPatrick McHardy <kaber@trash.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent c52fbb41
...@@ -49,6 +49,8 @@ struct nf_conntrack_expect ...@@ -49,6 +49,8 @@ struct nf_conntrack_expect
/* Direction relative to the master connection. */ /* Direction relative to the master connection. */
enum ip_conntrack_dir dir; enum ip_conntrack_dir dir;
#endif #endif
struct rcu_head rcu;
}; };
#define NF_CT_EXPECT_PERMANENT 0x1 #define NF_CT_EXPECT_PERMANENT 0x1
......
...@@ -191,10 +191,12 @@ struct ct_expect_iter_state { ...@@ -191,10 +191,12 @@ struct ct_expect_iter_state {
static struct hlist_node *ct_expect_get_first(struct seq_file *seq) static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
{ {
struct ct_expect_iter_state *st = seq->private; struct ct_expect_iter_state *st = seq->private;
struct hlist_node *n;
for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
if (!hlist_empty(&nf_ct_expect_hash[st->bucket])) n = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
return nf_ct_expect_hash[st->bucket].first; if (n)
return n;
} }
return NULL; return NULL;
} }
...@@ -204,11 +206,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq, ...@@ -204,11 +206,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
{ {
struct ct_expect_iter_state *st = seq->private; struct ct_expect_iter_state *st = seq->private;
head = head->next; head = rcu_dereference(head->next);
while (head == NULL) { while (head == NULL) {
if (++st->bucket >= nf_ct_expect_hsize) if (++st->bucket >= nf_ct_expect_hsize)
return NULL; return NULL;
head = nf_ct_expect_hash[st->bucket].first; head = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
} }
return head; return head;
} }
...@@ -225,7 +227,7 @@ static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos) ...@@ -225,7 +227,7 @@ static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
static void *exp_seq_start(struct seq_file *seq, loff_t *pos) static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
{ {
read_lock_bh(&nf_conntrack_lock); rcu_read_lock();
return ct_expect_get_idx(seq, *pos); return ct_expect_get_idx(seq, *pos);
} }
...@@ -237,7 +239,7 @@ static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos) ...@@ -237,7 +239,7 @@ static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void exp_seq_stop(struct seq_file *seq, void *v) static void exp_seq_stop(struct seq_file *seq, void *v)
{ {
read_unlock_bh(&nf_conntrack_lock); rcu_read_unlock();
} }
static int exp_seq_show(struct seq_file *s, void *v) static int exp_seq_show(struct seq_file *s, void *v)
......
...@@ -50,7 +50,7 @@ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) ...@@ -50,7 +50,7 @@ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
NF_CT_ASSERT(master_help); NF_CT_ASSERT(master_help);
NF_CT_ASSERT(!timer_pending(&exp->timeout)); NF_CT_ASSERT(!timer_pending(&exp->timeout));
hlist_del(&exp->hnode); hlist_del_rcu(&exp->hnode);
nf_ct_expect_count--; nf_ct_expect_count--;
hlist_del(&exp->lnode); hlist_del(&exp->lnode);
...@@ -97,7 +97,7 @@ __nf_ct_expect_find(const struct nf_conntrack_tuple *tuple) ...@@ -97,7 +97,7 @@ __nf_ct_expect_find(const struct nf_conntrack_tuple *tuple)
return NULL; return NULL;
h = nf_ct_expect_dst_hash(tuple); h = nf_ct_expect_dst_hash(tuple);
hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) { hlist_for_each_entry_rcu(i, n, &nf_ct_expect_hash[h], hnode) {
if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
return i; return i;
} }
...@@ -111,11 +111,11 @@ nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple) ...@@ -111,11 +111,11 @@ nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple)
{ {
struct nf_conntrack_expect *i; struct nf_conntrack_expect *i;
read_lock_bh(&nf_conntrack_lock); rcu_read_lock();
i = __nf_ct_expect_find(tuple); i = __nf_ct_expect_find(tuple);
if (i) if (i && !atomic_inc_not_zero(&i->use))
atomic_inc(&i->use); i = NULL;
read_unlock_bh(&nf_conntrack_lock); rcu_read_unlock();
return i; return i;
} }
...@@ -223,6 +223,7 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me) ...@@ -223,6 +223,7 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
new->master = me; new->master = me;
atomic_set(&new->use, 1); atomic_set(&new->use, 1);
INIT_RCU_HEAD(&new->rcu);
return new; return new;
} }
EXPORT_SYMBOL_GPL(nf_ct_expect_alloc); EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
...@@ -278,10 +279,18 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family, ...@@ -278,10 +279,18 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family,
} }
EXPORT_SYMBOL_GPL(nf_ct_expect_init); EXPORT_SYMBOL_GPL(nf_ct_expect_init);
static void nf_ct_expect_free_rcu(struct rcu_head *head)
{
struct nf_conntrack_expect *exp;
exp = container_of(head, struct nf_conntrack_expect, rcu);
kmem_cache_free(nf_ct_expect_cachep, exp);
}
void nf_ct_expect_put(struct nf_conntrack_expect *exp) void nf_ct_expect_put(struct nf_conntrack_expect *exp)
{ {
if (atomic_dec_and_test(&exp->use)) if (atomic_dec_and_test(&exp->use))
kmem_cache_free(nf_ct_expect_cachep, exp); call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
} }
EXPORT_SYMBOL_GPL(nf_ct_expect_put); EXPORT_SYMBOL_GPL(nf_ct_expect_put);
...@@ -295,7 +304,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) ...@@ -295,7 +304,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
hlist_add_head(&exp->lnode, &master_help->expectations); hlist_add_head(&exp->lnode, &master_help->expectations);
master_help->expecting++; master_help->expecting++;
hlist_add_head(&exp->hnode, &nf_ct_expect_hash[h]); hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
nf_ct_expect_count++; nf_ct_expect_count++;
setup_timer(&exp->timeout, nf_ct_expectation_timed_out, setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
...@@ -394,10 +403,12 @@ struct ct_expect_iter_state { ...@@ -394,10 +403,12 @@ struct ct_expect_iter_state {
static struct hlist_node *ct_expect_get_first(struct seq_file *seq) static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
{ {
struct ct_expect_iter_state *st = seq->private; struct ct_expect_iter_state *st = seq->private;
struct hlist_node *n;
for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
if (!hlist_empty(&nf_ct_expect_hash[st->bucket])) n = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
return nf_ct_expect_hash[st->bucket].first; if (n)
return n;
} }
return NULL; return NULL;
} }
...@@ -407,11 +418,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq, ...@@ -407,11 +418,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
{ {
struct ct_expect_iter_state *st = seq->private; struct ct_expect_iter_state *st = seq->private;
head = head->next; head = rcu_dereference(head->next);
while (head == NULL) { while (head == NULL) {
if (++st->bucket >= nf_ct_expect_hsize) if (++st->bucket >= nf_ct_expect_hsize)
return NULL; return NULL;
head = nf_ct_expect_hash[st->bucket].first; head = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
} }
return head; return head;
} }
...@@ -427,9 +438,9 @@ static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos) ...@@ -427,9 +438,9 @@ static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
} }
static void *exp_seq_start(struct seq_file *seq, loff_t *pos) static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(nf_conntrack_lock) __acquires(RCU)
{ {
read_lock_bh(&nf_conntrack_lock); rcu_read_lock();
return ct_expect_get_idx(seq, *pos); return ct_expect_get_idx(seq, *pos);
} }
...@@ -440,9 +451,9 @@ static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos) ...@@ -440,9 +451,9 @@ static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
} }
static void exp_seq_stop(struct seq_file *seq, void *v) static void exp_seq_stop(struct seq_file *seq, void *v)
__releases(nf_conntrack_lock) __releases(RCU)
{ {
read_unlock_bh(&nf_conntrack_lock); rcu_read_unlock();
} }
static int exp_seq_show(struct seq_file *s, void *v) static int exp_seq_show(struct seq_file *s, void *v)
......
...@@ -1471,7 +1471,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -1471,7 +1471,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
struct hlist_node *n; struct hlist_node *n;
u_int8_t l3proto = nfmsg->nfgen_family; u_int8_t l3proto = nfmsg->nfgen_family;
read_lock_bh(&nf_conntrack_lock); rcu_read_lock();
last = (struct nf_conntrack_expect *)cb->args[1]; last = (struct nf_conntrack_expect *)cb->args[1];
for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) {
restart: restart:
...@@ -1488,7 +1488,8 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -1488,7 +1488,8 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
cb->nlh->nlmsg_seq, cb->nlh->nlmsg_seq,
IPCTNL_MSG_EXP_NEW, IPCTNL_MSG_EXP_NEW,
1, exp) < 0) { 1, exp) < 0) {
atomic_inc(&exp->use); if (!atomic_inc_not_zero(&exp->use))
continue;
cb->args[1] = (unsigned long)exp; cb->args[1] = (unsigned long)exp;
goto out; goto out;
} }
...@@ -1499,7 +1500,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -1499,7 +1500,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
} }
} }
out: out:
read_unlock_bh(&nf_conntrack_lock); rcu_read_unlock();
if (last) if (last)
nf_ct_expect_put(last); nf_ct_expect_put(last);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment