Commit 0b6b098e authored by Mathias Krause's avatar Mathias Krause Committed by Herbert Xu

padata: make the sequence counter an atomic_t

Using a spinlock to atomically increase a counter sounds wrong -- we've
atomic_t for this!

Also move 'seq_nr' to a different cache line than 'lock' to reduce cache
line trashing. This has the nice side effect of decreasing the size of
struct parallel_data from 192 to 128 bytes for a x86-64 build, e.g.
occupying only two instead of three cache lines.

Those changes results in a 5% performance increase on an IPsec test run
using pcrypt.

Btw. the seq_lock spinlock was never explicitly initialized -- one more
reason to get rid of it.
Signed-off-by: default avatarMathias Krause <mathias.krause@secunet.com>
Acked-by: default avatarSteffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent cfc6f11b
...@@ -129,10 +129,9 @@ struct parallel_data { ...@@ -129,10 +129,9 @@ struct parallel_data {
struct padata_serial_queue __percpu *squeue; struct padata_serial_queue __percpu *squeue;
atomic_t reorder_objects; atomic_t reorder_objects;
atomic_t refcnt; atomic_t refcnt;
atomic_t seq_nr;
struct padata_cpumask cpumask; struct padata_cpumask cpumask;
spinlock_t lock ____cacheline_aligned; spinlock_t lock ____cacheline_aligned;
spinlock_t seq_lock;
unsigned int seq_nr;
unsigned int processed; unsigned int processed;
struct timer_list timer; struct timer_list timer;
}; };
......
...@@ -46,6 +46,7 @@ static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) ...@@ -46,6 +46,7 @@ static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
static int padata_cpu_hash(struct parallel_data *pd) static int padata_cpu_hash(struct parallel_data *pd)
{ {
unsigned int seq_nr;
int cpu_index; int cpu_index;
/* /*
...@@ -53,10 +54,8 @@ static int padata_cpu_hash(struct parallel_data *pd) ...@@ -53,10 +54,8 @@ static int padata_cpu_hash(struct parallel_data *pd)
* seq_nr mod. number of cpus in use. * seq_nr mod. number of cpus in use.
*/ */
spin_lock(&pd->seq_lock); seq_nr = atomic_inc_return(&pd->seq_nr);
cpu_index = pd->seq_nr % cpumask_weight(pd->cpumask.pcpu); cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
pd->seq_nr++;
spin_unlock(&pd->seq_lock);
return padata_index_to_cpu(pd, cpu_index); return padata_index_to_cpu(pd, cpu_index);
} }
...@@ -429,7 +428,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, ...@@ -429,7 +428,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
padata_init_pqueues(pd); padata_init_pqueues(pd);
padata_init_squeues(pd); padata_init_squeues(pd);
setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd); setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
pd->seq_nr = 0; atomic_set(&pd->seq_nr, -1);
atomic_set(&pd->reorder_objects, 0); atomic_set(&pd->reorder_objects, 0);
atomic_set(&pd->refcnt, 0); atomic_set(&pd->refcnt, 0);
pd->pinst = pinst; pd->pinst = pinst;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment