Commit 942e4a2b authored by Stephen Hemminger's avatar Stephen Hemminger Committed by David S. Miller

netfilter: revised locking for x_tables

The x_tables are organized with a table structure and a per-cpu copies
of the counters and rules. On older kernels there was a reader/writer 
lock per table which was a performance bottleneck. In 2.6.30-rc, this
was converted to use RCU and the counters/rules which solved the performance
problems for do_table but made replacing rules much slower because of
the necessary RCU grace period.

This version uses a per-cpu set of spinlocks and counters to allow to
table processing to proceed without the cache thrashing of a global
reader lock and keeps the same performance for table updates.
Signed-off-by: default avatarStephen Hemminger <shemminger@vyatta.com>
Acked-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent bf368e4e
...@@ -354,9 +354,6 @@ struct xt_table ...@@ -354,9 +354,6 @@ struct xt_table
/* What hooks you will enter on */ /* What hooks you will enter on */
unsigned int valid_hooks; unsigned int valid_hooks;
/* Lock for the curtain */
struct mutex lock;
/* Man behind the curtain... */ /* Man behind the curtain... */
struct xt_table_info *private; struct xt_table_info *private;
...@@ -434,8 +431,74 @@ extern void xt_proto_fini(struct net *net, u_int8_t af); ...@@ -434,8 +431,74 @@ extern void xt_proto_fini(struct net *net, u_int8_t af);
extern struct xt_table_info *xt_alloc_table_info(unsigned int size); extern struct xt_table_info *xt_alloc_table_info(unsigned int size);
extern void xt_free_table_info(struct xt_table_info *info); extern void xt_free_table_info(struct xt_table_info *info);
extern void xt_table_entry_swap_rcu(struct xt_table_info *old,
struct xt_table_info *new); /*
* Per-CPU spinlock associated with per-cpu table entries, and
* with a counter for the "reading" side that allows a recursive
* reader to avoid taking the lock and deadlocking.
*
* "reading" is used by ip/arp/ip6 tables rule processing which runs per-cpu.
* It needs to ensure that the rules are not being changed while the packet
* is being processed. In some cases, the read lock will be acquired
* twice on the same CPU; this is okay because of the count.
*
* "writing" is used when reading counters.
* During replace any readers that are using the old tables have to complete
* before freeing the old table. This is handled by the write locking
* necessary for reading the counters.
*/
struct xt_info_lock {
spinlock_t lock;
unsigned char readers;
};
DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks);
/*
* Note: we need to ensure that preemption is disabled before acquiring
* the per-cpu-variable, so we do it as a two step process rather than
* using "spin_lock_bh()".
*
* We _also_ need to disable bottom half processing before updating our
* nesting count, to make sure that the only kind of re-entrancy is this
* code being called by itself: since the count+lock is not an atomic
* operation, we can allow no races.
*
* _Only_ that special combination of being per-cpu and never getting
* re-entered asynchronously means that the count is safe.
*/
static inline void xt_info_rdlock_bh(void)
{
struct xt_info_lock *lock;
local_bh_disable();
lock = &__get_cpu_var(xt_info_locks);
if (!lock->readers++)
spin_lock(&lock->lock);
}
static inline void xt_info_rdunlock_bh(void)
{
struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks);
if (!--lock->readers)
spin_unlock(&lock->lock);
local_bh_enable();
}
/*
* The "writer" side needs to get exclusive access to the lock,
* regardless of readers. This must be called with bottom half
* processing (and thus also preemption) disabled.
*/
static inline void xt_info_wrlock(unsigned int cpu)
{
spin_lock(&per_cpu(xt_info_locks, cpu).lock);
}
static inline void xt_info_wrunlock(unsigned int cpu)
{
spin_unlock(&per_cpu(xt_info_locks, cpu).lock);
}
/* /*
* This helper is performance critical and must be inlined * This helper is performance critical and must be inlined
......
...@@ -253,9 +253,9 @@ unsigned int arpt_do_table(struct sk_buff *skb, ...@@ -253,9 +253,9 @@ unsigned int arpt_do_table(struct sk_buff *skb,
indev = in ? in->name : nulldevname; indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname; outdev = out ? out->name : nulldevname;
rcu_read_lock_bh(); xt_info_rdlock_bh();
private = rcu_dereference(table->private); private = table->private;
table_base = rcu_dereference(private->entries[smp_processor_id()]); table_base = private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]); e = get_entry(table_base, private->hook_entry[hook]);
back = get_entry(table_base, private->underflow[hook]); back = get_entry(table_base, private->underflow[hook]);
...@@ -273,6 +273,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, ...@@ -273,6 +273,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
(2 * skb->dev->addr_len); (2 * skb->dev->addr_len);
ADD_COUNTER(e->counters, hdr_len, 1); ADD_COUNTER(e->counters, hdr_len, 1);
t = arpt_get_target(e); t = arpt_get_target(e);
...@@ -328,8 +329,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, ...@@ -328,8 +329,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
e = (void *)e + e->next_offset; e = (void *)e + e->next_offset;
} }
} while (!hotdrop); } while (!hotdrop);
xt_info_rdunlock_bh();
rcu_read_unlock_bh();
if (hotdrop) if (hotdrop)
return NF_DROP; return NF_DROP;
...@@ -711,9 +711,12 @@ static void get_counters(const struct xt_table_info *t, ...@@ -711,9 +711,12 @@ static void get_counters(const struct xt_table_info *t,
/* Instead of clearing (by a previous call to memset()) /* Instead of clearing (by a previous call to memset())
* the counters and using adds, we set the counters * the counters and using adds, we set the counters
* with data used by 'current' CPU * with data used by 'current' CPU
* We dont care about preemption here. *
* Bottom half has to be disabled to prevent deadlock
* if new softirq were to run and call ipt_do_table
*/ */
curcpu = raw_smp_processor_id(); local_bh_disable();
curcpu = smp_processor_id();
i = 0; i = 0;
ARPT_ENTRY_ITERATE(t->entries[curcpu], ARPT_ENTRY_ITERATE(t->entries[curcpu],
...@@ -726,73 +729,22 @@ static void get_counters(const struct xt_table_info *t, ...@@ -726,73 +729,22 @@ static void get_counters(const struct xt_table_info *t,
if (cpu == curcpu) if (cpu == curcpu)
continue; continue;
i = 0; i = 0;
xt_info_wrlock(cpu);
ARPT_ENTRY_ITERATE(t->entries[cpu], ARPT_ENTRY_ITERATE(t->entries[cpu],
t->size, t->size,
add_entry_to_counter, add_entry_to_counter,
counters, counters,
&i); &i);
xt_info_wrunlock(cpu);
} }
}
/* We're lazy, and add to the first CPU; overflow works its fey magic
* and everything is OK. */
static int
add_counter_to_entry(struct arpt_entry *e,
const struct xt_counters addme[],
unsigned int *i)
{
ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
(*i)++;
return 0;
}
/* Take values from counters and add them back onto the current cpu */
static void put_counters(struct xt_table_info *t,
const struct xt_counters counters[])
{
unsigned int i, cpu;
local_bh_disable();
cpu = smp_processor_id();
i = 0;
ARPT_ENTRY_ITERATE(t->entries[cpu],
t->size,
add_counter_to_entry,
counters,
&i);
local_bh_enable(); local_bh_enable();
} }
static inline int
zero_entry_counter(struct arpt_entry *e, void *arg)
{
e->counters.bcnt = 0;
e->counters.pcnt = 0;
return 0;
}
static void
clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
{
unsigned int cpu;
const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
for_each_possible_cpu(cpu) {
memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
zero_entry_counter, NULL);
}
}
static struct xt_counters *alloc_counters(struct xt_table *table) static struct xt_counters *alloc_counters(struct xt_table *table)
{ {
unsigned int countersize; unsigned int countersize;
struct xt_counters *counters; struct xt_counters *counters;
struct xt_table_info *private = table->private; struct xt_table_info *private = table->private;
struct xt_table_info *info;
/* We need atomic snapshot of counters: rest doesn't change /* We need atomic snapshot of counters: rest doesn't change
* (other than comefrom, which userspace doesn't care * (other than comefrom, which userspace doesn't care
...@@ -802,30 +754,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) ...@@ -802,30 +754,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
counters = vmalloc_node(countersize, numa_node_id()); counters = vmalloc_node(countersize, numa_node_id());
if (counters == NULL) if (counters == NULL)
goto nomem; return ERR_PTR(-ENOMEM);
info = xt_alloc_table_info(private->size);
if (!info)
goto free_counters;
clone_counters(info, private);
mutex_lock(&table->lock);
xt_table_entry_swap_rcu(private, info);
synchronize_net(); /* Wait until smoke has cleared */
get_counters(info, counters);
put_counters(private, counters);
mutex_unlock(&table->lock);
xt_free_table_info(info); get_counters(private, counters);
return counters; return counters;
free_counters:
vfree(counters);
nomem:
return ERR_PTR(-ENOMEM);
} }
static int copy_entries_to_user(unsigned int total_size, static int copy_entries_to_user(unsigned int total_size,
...@@ -1094,8 +1027,9 @@ static int __do_replace(struct net *net, const char *name, ...@@ -1094,8 +1027,9 @@ static int __do_replace(struct net *net, const char *name,
(newinfo->number <= oldinfo->initial_entries)) (newinfo->number <= oldinfo->initial_entries))
module_put(t->me); module_put(t->me);
/* Get the old counters. */ /* Get the old counters, and synchronize with replace */
get_counters(oldinfo, counters); get_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */ /* Decrease module usage counts and free resource */
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
...@@ -1165,10 +1099,23 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) ...@@ -1165,10 +1099,23 @@ static int do_replace(struct net *net, void __user *user, unsigned int len)
return ret; return ret;
} }
/* We're lazy, and add to the first CPU; overflow works its fey magic
* and everything is OK. */
static int
add_counter_to_entry(struct arpt_entry *e,
const struct xt_counters addme[],
unsigned int *i)
{
ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
(*i)++;
return 0;
}
static int do_add_counters(struct net *net, void __user *user, unsigned int len, static int do_add_counters(struct net *net, void __user *user, unsigned int len,
int compat) int compat)
{ {
unsigned int i; unsigned int i, curcpu;
struct xt_counters_info tmp; struct xt_counters_info tmp;
struct xt_counters *paddc; struct xt_counters *paddc;
unsigned int num_counters; unsigned int num_counters;
...@@ -1224,26 +1171,26 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, ...@@ -1224,26 +1171,26 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
goto free; goto free;
} }
mutex_lock(&t->lock); local_bh_disable();
private = t->private; private = t->private;
if (private->number != num_counters) { if (private->number != num_counters) {
ret = -EINVAL; ret = -EINVAL;
goto unlock_up_free; goto unlock_up_free;
} }
preempt_disable();
i = 0; i = 0;
/* Choose the copy that is on our node */ /* Choose the copy that is on our node */
loc_cpu_entry = private->entries[smp_processor_id()]; curcpu = smp_processor_id();
loc_cpu_entry = private->entries[curcpu];
xt_info_wrlock(curcpu);
ARPT_ENTRY_ITERATE(loc_cpu_entry, ARPT_ENTRY_ITERATE(loc_cpu_entry,
private->size, private->size,
add_counter_to_entry, add_counter_to_entry,
paddc, paddc,
&i); &i);
preempt_enable(); xt_info_wrunlock(curcpu);
unlock_up_free: unlock_up_free:
mutex_unlock(&t->lock); local_bh_enable();
xt_table_unlock(t); xt_table_unlock(t);
module_put(t->me); module_put(t->me);
free: free:
......
...@@ -338,10 +338,9 @@ ipt_do_table(struct sk_buff *skb, ...@@ -338,10 +338,9 @@ ipt_do_table(struct sk_buff *skb,
tgpar.hooknum = hook; tgpar.hooknum = hook;
IP_NF_ASSERT(table->valid_hooks & (1 << hook)); IP_NF_ASSERT(table->valid_hooks & (1 << hook));
xt_info_rdlock_bh();
rcu_read_lock_bh(); private = table->private;
private = rcu_dereference(table->private); table_base = private->entries[smp_processor_id()];
table_base = rcu_dereference(private->entries[smp_processor_id()]);
e = get_entry(table_base, private->hook_entry[hook]); e = get_entry(table_base, private->hook_entry[hook]);
...@@ -436,8 +435,7 @@ ipt_do_table(struct sk_buff *skb, ...@@ -436,8 +435,7 @@ ipt_do_table(struct sk_buff *skb,
e = (void *)e + e->next_offset; e = (void *)e + e->next_offset;
} }
} while (!hotdrop); } while (!hotdrop);
xt_info_rdunlock_bh();
rcu_read_unlock_bh();
#ifdef DEBUG_ALLOW_ALL #ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT; return NF_ACCEPT;
...@@ -896,10 +894,13 @@ get_counters(const struct xt_table_info *t, ...@@ -896,10 +894,13 @@ get_counters(const struct xt_table_info *t,
/* Instead of clearing (by a previous call to memset()) /* Instead of clearing (by a previous call to memset())
* the counters and using adds, we set the counters * the counters and using adds, we set the counters
* with data used by 'current' CPU * with data used by 'current' CPU.
* We dont care about preemption here. *
* Bottom half has to be disabled to prevent deadlock
* if new softirq were to run and call ipt_do_table
*/ */
curcpu = raw_smp_processor_id(); local_bh_disable();
curcpu = smp_processor_id();
i = 0; i = 0;
IPT_ENTRY_ITERATE(t->entries[curcpu], IPT_ENTRY_ITERATE(t->entries[curcpu],
...@@ -912,74 +913,22 @@ get_counters(const struct xt_table_info *t, ...@@ -912,74 +913,22 @@ get_counters(const struct xt_table_info *t,
if (cpu == curcpu) if (cpu == curcpu)
continue; continue;
i = 0; i = 0;
xt_info_wrlock(cpu);
IPT_ENTRY_ITERATE(t->entries[cpu], IPT_ENTRY_ITERATE(t->entries[cpu],
t->size, t->size,
add_entry_to_counter, add_entry_to_counter,
counters, counters,
&i); &i);
xt_info_wrunlock(cpu);
} }
}
/* We're lazy, and add to the first CPU; overflow works its fey magic
* and everything is OK. */
static int
add_counter_to_entry(struct ipt_entry *e,
const struct xt_counters addme[],
unsigned int *i)
{
ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
(*i)++;
return 0;
}
/* Take values from counters and add them back onto the current cpu */
static void put_counters(struct xt_table_info *t,
const struct xt_counters counters[])
{
unsigned int i, cpu;
local_bh_disable();
cpu = smp_processor_id();
i = 0;
IPT_ENTRY_ITERATE(t->entries[cpu],
t->size,
add_counter_to_entry,
counters,
&i);
local_bh_enable(); local_bh_enable();
} }
static inline int
zero_entry_counter(struct ipt_entry *e, void *arg)
{
e->counters.bcnt = 0;
e->counters.pcnt = 0;
return 0;
}
static void
clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
{
unsigned int cpu;
const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
for_each_possible_cpu(cpu) {
memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
zero_entry_counter, NULL);
}
}
static struct xt_counters * alloc_counters(struct xt_table *table) static struct xt_counters * alloc_counters(struct xt_table *table)
{ {
unsigned int countersize; unsigned int countersize;
struct xt_counters *counters; struct xt_counters *counters;
struct xt_table_info *private = table->private; struct xt_table_info *private = table->private;
struct xt_table_info *info;
/* We need atomic snapshot of counters: rest doesn't change /* We need atomic snapshot of counters: rest doesn't change
(other than comefrom, which userspace doesn't care (other than comefrom, which userspace doesn't care
...@@ -988,30 +937,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table) ...@@ -988,30 +937,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table)
counters = vmalloc_node(countersize, numa_node_id()); counters = vmalloc_node(countersize, numa_node_id());
if (counters == NULL) if (counters == NULL)
goto nomem; return ERR_PTR(-ENOMEM);
info = xt_alloc_table_info(private->size);
if (!info)
goto free_counters;
clone_counters(info, private);
mutex_lock(&table->lock);
xt_table_entry_swap_rcu(private, info);
synchronize_net(); /* Wait until smoke has cleared */
get_counters(info, counters); get_counters(private, counters);
put_counters(private, counters);
mutex_unlock(&table->lock);
xt_free_table_info(info);
return counters; return counters;
free_counters:
vfree(counters);
nomem:
return ERR_PTR(-ENOMEM);
} }
static int static int
...@@ -1306,8 +1236,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, ...@@ -1306,8 +1236,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
(newinfo->number <= oldinfo->initial_entries)) (newinfo->number <= oldinfo->initial_entries))
module_put(t->me); module_put(t->me);
/* Get the old counters. */ /* Get the old counters, and synchronize with replace */
get_counters(oldinfo, counters); get_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */ /* Decrease module usage counts and free resource */
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
...@@ -1377,11 +1308,23 @@ do_replace(struct net *net, void __user *user, unsigned int len) ...@@ -1377,11 +1308,23 @@ do_replace(struct net *net, void __user *user, unsigned int len)
return ret; return ret;
} }
/* We're lazy, and add to the first CPU; overflow works its fey magic
* and everything is OK. */
static int
add_counter_to_entry(struct ipt_entry *e,
const struct xt_counters addme[],
unsigned int *i)
{
ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
(*i)++;
return 0;
}
static int static int
do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
{ {
unsigned int i; unsigned int i, curcpu;
struct xt_counters_info tmp; struct xt_counters_info tmp;
struct xt_counters *paddc; struct xt_counters *paddc;
unsigned int num_counters; unsigned int num_counters;
...@@ -1437,25 +1380,26 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat ...@@ -1437,25 +1380,26 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
goto free; goto free;
} }
mutex_lock(&t->lock); local_bh_disable();
private = t->private; private = t->private;
if (private->number != num_counters) { if (private->number != num_counters) {
ret = -EINVAL; ret = -EINVAL;
goto unlock_up_free; goto unlock_up_free;
} }
preempt_disable();
i = 0; i = 0;
/* Choose the copy that is on our node */ /* Choose the copy that is on our node */
loc_cpu_entry = private->entries[raw_smp_processor_id()]; curcpu = smp_processor_id();
loc_cpu_entry = private->entries[curcpu];
xt_info_wrlock(curcpu);
IPT_ENTRY_ITERATE(loc_cpu_entry, IPT_ENTRY_ITERATE(loc_cpu_entry,
private->size, private->size,
add_counter_to_entry, add_counter_to_entry,
paddc, paddc,
&i); &i);
preempt_enable(); xt_info_wrunlock(curcpu);
unlock_up_free: unlock_up_free:
mutex_unlock(&t->lock); local_bh_enable();
xt_table_unlock(t); xt_table_unlock(t);
module_put(t->me); module_put(t->me);
free: free:
......
...@@ -365,9 +365,9 @@ ip6t_do_table(struct sk_buff *skb, ...@@ -365,9 +365,9 @@ ip6t_do_table(struct sk_buff *skb,
IP_NF_ASSERT(table->valid_hooks & (1 << hook)); IP_NF_ASSERT(table->valid_hooks & (1 << hook));
rcu_read_lock_bh(); xt_info_rdlock_bh();
private = rcu_dereference(table->private); private = table->private;
table_base = rcu_dereference(private->entries[smp_processor_id()]); table_base = private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]); e = get_entry(table_base, private->hook_entry[hook]);
...@@ -466,7 +466,7 @@ ip6t_do_table(struct sk_buff *skb, ...@@ -466,7 +466,7 @@ ip6t_do_table(struct sk_buff *skb,
#ifdef CONFIG_NETFILTER_DEBUG #ifdef CONFIG_NETFILTER_DEBUG
((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
#endif #endif
rcu_read_unlock_bh(); xt_info_rdunlock_bh();
#ifdef DEBUG_ALLOW_ALL #ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT; return NF_ACCEPT;
...@@ -926,9 +926,12 @@ get_counters(const struct xt_table_info *t, ...@@ -926,9 +926,12 @@ get_counters(const struct xt_table_info *t,
/* Instead of clearing (by a previous call to memset()) /* Instead of clearing (by a previous call to memset())
* the counters and using adds, we set the counters * the counters and using adds, we set the counters
* with data used by 'current' CPU * with data used by 'current' CPU
* We dont care about preemption here. *
* Bottom half has to be disabled to prevent deadlock
* if new softirq were to run and call ipt_do_table
*/ */
curcpu = raw_smp_processor_id(); local_bh_disable();
curcpu = smp_processor_id();
i = 0; i = 0;
IP6T_ENTRY_ITERATE(t->entries[curcpu], IP6T_ENTRY_ITERATE(t->entries[curcpu],
...@@ -941,72 +944,22 @@ get_counters(const struct xt_table_info *t, ...@@ -941,72 +944,22 @@ get_counters(const struct xt_table_info *t,
if (cpu == curcpu) if (cpu == curcpu)
continue; continue;
i = 0; i = 0;
xt_info_wrlock(cpu);
IP6T_ENTRY_ITERATE(t->entries[cpu], IP6T_ENTRY_ITERATE(t->entries[cpu],
t->size, t->size,
add_entry_to_counter, add_entry_to_counter,
counters, counters,
&i); &i);
xt_info_wrunlock(cpu);
} }
}
/* We're lazy, and add to the first CPU; overflow works its fey magic
* and everything is OK. */
static int
add_counter_to_entry(struct ip6t_entry *e,
const struct xt_counters addme[],
unsigned int *i)
{
ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
(*i)++;
return 0;
}
/* Take values from counters and add them back onto the current cpu */
static void put_counters(struct xt_table_info *t,
const struct xt_counters counters[])
{
unsigned int i, cpu;
local_bh_disable();
cpu = smp_processor_id();
i = 0;
IP6T_ENTRY_ITERATE(t->entries[cpu],
t->size,
add_counter_to_entry,
counters,
&i);
local_bh_enable(); local_bh_enable();
} }
static inline int
zero_entry_counter(struct ip6t_entry *e, void *arg)
{
e->counters.bcnt = 0;
e->counters.pcnt = 0;
return 0;
}
static void
clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
{
unsigned int cpu;
const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
for_each_possible_cpu(cpu) {
memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
zero_entry_counter, NULL);
}
}
static struct xt_counters *alloc_counters(struct xt_table *table) static struct xt_counters *alloc_counters(struct xt_table *table)
{ {
unsigned int countersize; unsigned int countersize;
struct xt_counters *counters; struct xt_counters *counters;
struct xt_table_info *private = table->private; struct xt_table_info *private = table->private;
struct xt_table_info *info;
/* We need atomic snapshot of counters: rest doesn't change /* We need atomic snapshot of counters: rest doesn't change
(other than comefrom, which userspace doesn't care (other than comefrom, which userspace doesn't care
...@@ -1015,30 +968,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) ...@@ -1015,30 +968,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
counters = vmalloc_node(countersize, numa_node_id()); counters = vmalloc_node(countersize, numa_node_id());
if (counters == NULL) if (counters == NULL)
goto nomem; return ERR_PTR(-ENOMEM);
info = xt_alloc_table_info(private->size);
if (!info)
goto free_counters;
clone_counters(info, private);
mutex_lock(&table->lock);
xt_table_entry_swap_rcu(private, info);
synchronize_net(); /* Wait until smoke has cleared */
get_counters(info, counters);
put_counters(private, counters);
mutex_unlock(&table->lock);
xt_free_table_info(info); get_counters(private, counters);
return counters; return counters;
free_counters:
vfree(counters);
nomem:
return ERR_PTR(-ENOMEM);
} }
static int static int
...@@ -1334,8 +1268,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, ...@@ -1334,8 +1268,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
(newinfo->number <= oldinfo->initial_entries)) (newinfo->number <= oldinfo->initial_entries))
module_put(t->me); module_put(t->me);
/* Get the old counters. */ /* Get the old counters, and synchronize with replace */
get_counters(oldinfo, counters); get_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */ /* Decrease module usage counts and free resource */
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
...@@ -1405,11 +1340,24 @@ do_replace(struct net *net, void __user *user, unsigned int len) ...@@ -1405,11 +1340,24 @@ do_replace(struct net *net, void __user *user, unsigned int len)
return ret; return ret;
} }
/* We're lazy, and add to the first CPU; overflow works its fey magic
* and everything is OK. */
static int
add_counter_to_entry(struct ip6t_entry *e,
const struct xt_counters addme[],
unsigned int *i)
{
ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
(*i)++;
return 0;
}
static int static int
do_add_counters(struct net *net, void __user *user, unsigned int len, do_add_counters(struct net *net, void __user *user, unsigned int len,
int compat) int compat)
{ {
unsigned int i; unsigned int i, curcpu;
struct xt_counters_info tmp; struct xt_counters_info tmp;
struct xt_counters *paddc; struct xt_counters *paddc;
unsigned int num_counters; unsigned int num_counters;
...@@ -1465,25 +1413,28 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, ...@@ -1465,25 +1413,28 @@ do_add_counters(struct net *net, void __user *user, unsigned int len,
goto free; goto free;
} }
mutex_lock(&t->lock);
local_bh_disable();
private = t->private; private = t->private;
if (private->number != num_counters) { if (private->number != num_counters) {
ret = -EINVAL; ret = -EINVAL;
goto unlock_up_free; goto unlock_up_free;
} }
preempt_disable();
i = 0; i = 0;
/* Choose the copy that is on our node */ /* Choose the copy that is on our node */
loc_cpu_entry = private->entries[raw_smp_processor_id()]; curcpu = smp_processor_id();
xt_info_wrlock(curcpu);
loc_cpu_entry = private->entries[curcpu];
IP6T_ENTRY_ITERATE(loc_cpu_entry, IP6T_ENTRY_ITERATE(loc_cpu_entry,
private->size, private->size,
add_counter_to_entry, add_counter_to_entry,
paddc, paddc,
&i); &i);
preempt_enable(); xt_info_wrunlock(curcpu);
unlock_up_free: unlock_up_free:
mutex_unlock(&t->lock); local_bh_enable();
xt_table_unlock(t); xt_table_unlock(t);
module_put(t->me); module_put(t->me);
free: free:
......
...@@ -625,20 +625,6 @@ void xt_free_table_info(struct xt_table_info *info) ...@@ -625,20 +625,6 @@ void xt_free_table_info(struct xt_table_info *info)
} }
EXPORT_SYMBOL(xt_free_table_info); EXPORT_SYMBOL(xt_free_table_info);
void xt_table_entry_swap_rcu(struct xt_table_info *oldinfo,
struct xt_table_info *newinfo)
{
unsigned int cpu;
for_each_possible_cpu(cpu) {
void *p = oldinfo->entries[cpu];
rcu_assign_pointer(oldinfo->entries[cpu], newinfo->entries[cpu]);
newinfo->entries[cpu] = p;
}
}
EXPORT_SYMBOL_GPL(xt_table_entry_swap_rcu);
/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
const char *name) const char *name)
...@@ -676,32 +662,43 @@ void xt_compat_unlock(u_int8_t af) ...@@ -676,32 +662,43 @@ void xt_compat_unlock(u_int8_t af)
EXPORT_SYMBOL_GPL(xt_compat_unlock); EXPORT_SYMBOL_GPL(xt_compat_unlock);
#endif #endif
DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks);
EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks);
struct xt_table_info * struct xt_table_info *
xt_replace_table(struct xt_table *table, xt_replace_table(struct xt_table *table,
unsigned int num_counters, unsigned int num_counters,
struct xt_table_info *newinfo, struct xt_table_info *newinfo,
int *error) int *error)
{ {
struct xt_table_info *oldinfo, *private; struct xt_table_info *private;
/* Do the substitution. */ /* Do the substitution. */
mutex_lock(&table->lock); local_bh_disable();
private = table->private; private = table->private;
/* Check inside lock: is the old number correct? */ /* Check inside lock: is the old number correct? */
if (num_counters != private->number) { if (num_counters != private->number) {
duprintf("num_counters != table->private->number (%u/%u)\n", duprintf("num_counters != table->private->number (%u/%u)\n",
num_counters, private->number); num_counters, private->number);
mutex_unlock(&table->lock); local_bh_enable();
*error = -EAGAIN; *error = -EAGAIN;
return NULL; return NULL;
} }
oldinfo = private;
rcu_assign_pointer(table->private, newinfo);
newinfo->initial_entries = oldinfo->initial_entries;
mutex_unlock(&table->lock);
synchronize_net(); table->private = newinfo;
return oldinfo; newinfo->initial_entries = private->initial_entries;
/*
* Even though table entries have now been swapped, other CPU's
* may still be using the old entries. This is okay, because
* resynchronization happens because of the locking done
* during the get_counters() routine.
*/
local_bh_enable();
return private;
} }
EXPORT_SYMBOL_GPL(xt_replace_table); EXPORT_SYMBOL_GPL(xt_replace_table);
...@@ -734,7 +731,6 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table *table, ...@@ -734,7 +731,6 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table *table,
/* Simplifies replace_table code. */ /* Simplifies replace_table code. */
table->private = bootstrap; table->private = bootstrap;
mutex_init(&table->lock);
if (!xt_replace_table(table, 0, newinfo, &ret)) if (!xt_replace_table(table, 0, newinfo, &ret))
goto unlock; goto unlock;
...@@ -1147,7 +1143,14 @@ static struct pernet_operations xt_net_ops = { ...@@ -1147,7 +1143,14 @@ static struct pernet_operations xt_net_ops = {
static int __init xt_init(void) static int __init xt_init(void)
{ {
int i, rv; unsigned int i;
int rv;
for_each_possible_cpu(i) {
struct xt_info_lock *lock = &per_cpu(xt_info_locks, i);
spin_lock_init(&lock->lock);
lock->readers = 0;
}
xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL); xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL);
if (!xt) if (!xt)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment