Commit ba8e8755 authored by Andrew Morton's avatar Andrew Morton Committed by James Bottomley

[PATCH] percpu_counters: approximate but scalable counters

Several places in ext2 and ext3 are using filesystem-wide counters which use
global locking.  Mainly for the orlov allocator's heuristics.

To solve the contention which this causes we can trade off accuracy against
speed.

This patch introduces a "percpu_counter" library type in which the counts are
per-cpu and are periodically spilled into a global counter.  Readers only
read the global counter.

These objects are *large*.  On a 32 CPU P4, they are 4 kbytes.  On a 4 way
p3, 128 bytes.
parent f688c084
/*
* A simple "approximate counter" for use in ext2 and ext3 superblocks.
*
* WARNING: these things are HUGE. 4 kbytes per counter on 32-way P4.
*/
#include <linux/config.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/preempt.h>
#ifdef CONFIG_SMP
struct __percpu_counter {
long count;
} ____cacheline_aligned;
struct percpu_counter {
spinlock_t lock;
long count;
struct __percpu_counter counters[NR_CPUS];
};
#if NR_CPUS >= 16
#define FBC_BATCH (NR_CPUS*2)
#else
#define FBC_BATCH (NR_CPUS*4)
#endif
static inline void percpu_counter_init(struct percpu_counter *fbc)
{
int i;
spin_lock_init(&fbc->lock);
fbc->count = 0;
for (i = 0; i < NR_CPUS; i++)
fbc->counters[i].count = 0;
}
void percpu_counter_mod(struct percpu_counter *fbc, long amount);
static inline long percpu_counter_read(struct percpu_counter *fbc)
{
return fbc->count;
}
/*
* It is possible for the percpu_counter_read() to return a small negative
* number for some counter which should never be negative.
*/
static inline long percpu_counter_read_positive(struct percpu_counter *fbc)
{
long ret = fbc->count;
barrier(); /* Prevent reloads of fbc->count */
if (ret > 0)
return ret;
return 1;
}
#else
struct percpu_counter {
long count;
};
static inline void percpu_counter_init(struct percpu_counter *fbc)
{
fbc->count = 0;
}
static inline void
percpu_counter_mod(struct percpu_counter *fbc, long amount)
{
preempt_disable();
fbc->count += amount;
preempt_enable();
}
static inline long percpu_counter_read(struct percpu_counter *fbc)
{
return fbc->count;
}
static inline long percpu_counter_read_positive(struct percpu_counter *fbc)
{
return fbc->count;
}
#endif /* CONFIG_SMP */
static inline void percpu_counter_inc(struct percpu_counter *fbc)
{
percpu_counter_mod(fbc, 1);
}
static inline void percpu_counter_dec(struct percpu_counter *fbc)
{
percpu_counter_mod(fbc, -1);
}
...@@ -58,6 +58,7 @@ ...@@ -58,6 +58,7 @@
#include <linux/ptrace.h> #include <linux/ptrace.h>
#include <linux/time.h> #include <linux/time.h>
#include <linux/backing-dev.h> #include <linux/backing-dev.h>
#include <linux/percpu_counter.h>
#include <asm/checksum.h> #include <asm/checksum.h>
#if defined(CONFIG_PROC_FS) #if defined(CONFIG_PROC_FS)
...@@ -100,6 +101,7 @@ EXPORT_SYMBOL(kfree); ...@@ -100,6 +101,7 @@ EXPORT_SYMBOL(kfree);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
EXPORT_SYMBOL(kmalloc_percpu); EXPORT_SYMBOL(kmalloc_percpu);
EXPORT_SYMBOL(kfree_percpu); EXPORT_SYMBOL(kfree_percpu);
EXPORT_SYMBOL(percpu_counter_mod);
#endif #endif
EXPORT_SYMBOL(vfree); EXPORT_SYMBOL(vfree);
EXPORT_SYMBOL(__vmalloc); EXPORT_SYMBOL(__vmalloc);
......
...@@ -14,6 +14,7 @@ obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o \ ...@@ -14,6 +14,7 @@ obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o \
obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
obj-$(CONFIG_SMP) += percpu_counter.o
ifneq ($(CONFIG_HAVE_DEC_LOCK),y) ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
obj-y += dec_and_lock.o obj-y += dec_and_lock.o
......
#include <linux/percpu_counter.h>
void percpu_counter_mod(struct percpu_counter *fbc, long amount)
{
int cpu = get_cpu();
long count = fbc->counters[cpu].count;
count += amount;
if (count >= FBC_BATCH || count <= -FBC_BATCH) {
spin_lock(&fbc->lock);
fbc->count += count;
spin_unlock(&fbc->lock);
count = 0;
}
fbc->counters[cpu].count = count;
put_cpu();
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment