Commit 68978ee7 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] slab: add kmem_cache_alloc_node

From: Manfred Spraul <manfred@colorfullife.com>

The attached patch adds a simple kmem_cache_alloc_node function: allocate
memory on a given node.  The function is intended for cpu bound structures.
 It's used for alloc_percpu and for the slab-internal per-cpu structures.
Jack Steiner reported a ~3% performance increase for AIM7 on a 64-way
Itanium 2.

Port maintainers: The patch could cause problems if CPU_UP_PREPARE is
called for a cpu on a node before the corresponding memory is attached
and/or if alloc_pages_node doesn't fall back to memory from another node if
there is no memory in the requested node.  I think noone does that, but I'm
not sure.
parent a3e754c2
...@@ -60,6 +60,7 @@ extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned lo ...@@ -60,6 +60,7 @@ extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned lo
extern int kmem_cache_destroy(kmem_cache_t *); extern int kmem_cache_destroy(kmem_cache_t *);
extern int kmem_cache_shrink(kmem_cache_t *); extern int kmem_cache_shrink(kmem_cache_t *);
extern void *kmem_cache_alloc(kmem_cache_t *, int); extern void *kmem_cache_alloc(kmem_cache_t *, int);
extern void *kmem_cache_alloc_node(kmem_cache_t *, int);
extern void kmem_cache_free(kmem_cache_t *, void *); extern void kmem_cache_free(kmem_cache_t *, void *);
extern unsigned int kmem_cache_size(kmem_cache_t *); extern unsigned int kmem_cache_size(kmem_cache_t *);
......
...@@ -612,6 +612,26 @@ static void stop_cpu_timer(int cpu) ...@@ -612,6 +612,26 @@ static void stop_cpu_timer(int cpu)
} }
#endif #endif
static struct array_cache *alloc_arraycache(int cpu, int entries, int batchcount)
{
int memsize = sizeof(void*)*entries+sizeof(struct array_cache);
struct array_cache *nc = NULL;
if (cpu != -1) {
nc = kmem_cache_alloc_node(kmem_find_general_cachep(memsize,
GFP_KERNEL), cpu_to_node(cpu));
}
if (!nc)
nc = kmalloc(memsize, GFP_KERNEL);
if (nc) {
nc->avail = 0;
nc->limit = entries;
nc->batchcount = batchcount;
nc->touched = 0;
}
return nc;
}
static int __devinit cpuup_callback(struct notifier_block *nfb, static int __devinit cpuup_callback(struct notifier_block *nfb,
unsigned long action, unsigned long action,
void *hcpu) void *hcpu)
...@@ -623,17 +643,11 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, ...@@ -623,17 +643,11 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
case CPU_UP_PREPARE: case CPU_UP_PREPARE:
down(&cache_chain_sem); down(&cache_chain_sem);
list_for_each_entry(cachep, &cache_chain, next) { list_for_each_entry(cachep, &cache_chain, next) {
int memsize;
struct array_cache *nc; struct array_cache *nc;
memsize = sizeof(void*)*cachep->limit+sizeof(struct array_cache); nc = alloc_arraycache(cpu, cachep->limit, cachep->batchcount);
nc = kmalloc(memsize, GFP_KERNEL);
if (!nc) if (!nc)
goto bad; goto bad;
nc->avail = 0;
nc->limit = cachep->limit;
nc->batchcount = cachep->batchcount;
nc->touched = 0;
spin_lock_irq(&cachep->spinlock); spin_lock_irq(&cachep->spinlock);
cachep->array[cpu] = nc; cachep->array[cpu] = nc;
...@@ -829,23 +843,32 @@ __initcall(cpucache_init); ...@@ -829,23 +843,32 @@ __initcall(cpucache_init);
* did not request dmaable memory, we might get it, but that * did not request dmaable memory, we might get it, but that
* would be relatively rare and ignorable. * would be relatively rare and ignorable.
*/ */
static inline void *kmem_getpages(kmem_cache_t *cachep, unsigned long flags) static void *kmem_getpages(kmem_cache_t *cachep, int flags, int nodeid)
{ {
struct page *page;
void *addr; void *addr;
int i;
flags |= cachep->gfpflags; flags |= cachep->gfpflags;
addr = (void*)__get_free_pages(flags, cachep->gfporder); if (likely(nodeid == -1)) {
if (addr) { addr = (void*)__get_free_pages(flags, cachep->gfporder);
int i = (1 << cachep->gfporder); if (!addr)
struct page *page = virt_to_page(addr); return NULL;
page = virt_to_page(addr);
if (cachep->flags & SLAB_RECLAIM_ACCOUNT) } else {
atomic_add(i, &slab_reclaim_pages); page = alloc_pages_node(nodeid, flags, cachep->gfporder);
add_page_state(nr_slab, i); if (!page)
while (i--) { return NULL;
SetPageSlab(page); addr = page_address(page);
page++; }
}
i = (1 << cachep->gfporder);
if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
atomic_add(i, &slab_reclaim_pages);
add_page_state(nr_slab, i);
while (i--) {
SetPageSlab(page);
page++;
} }
return addr; return addr;
} }
...@@ -1650,6 +1673,21 @@ static void kmem_flagcheck(kmem_cache_t *cachep, int flags) ...@@ -1650,6 +1673,21 @@ static void kmem_flagcheck(kmem_cache_t *cachep, int flags)
} }
} }
static void set_slab_attr(kmem_cache_t *cachep, struct slab *slabp, void *objp)
{
int i;
struct page *page;
/* Nasty!!!!!! I hope this is OK. */
i = 1 << cachep->gfporder;
page = virt_to_page(objp);
do {
SET_PAGE_CACHE(page, cachep);
SET_PAGE_SLAB(page, slabp);
page++;
} while (--i);
}
/* /*
* Grow (by 1) the number of slabs within a cache. This is called by * Grow (by 1) the number of slabs within a cache. This is called by
* kmem_cache_alloc() when there are no active objs left in a cache. * kmem_cache_alloc() when there are no active objs left in a cache.
...@@ -1657,10 +1695,9 @@ static void kmem_flagcheck(kmem_cache_t *cachep, int flags) ...@@ -1657,10 +1695,9 @@ static void kmem_flagcheck(kmem_cache_t *cachep, int flags)
static int cache_grow (kmem_cache_t * cachep, int flags) static int cache_grow (kmem_cache_t * cachep, int flags)
{ {
struct slab *slabp; struct slab *slabp;
struct page *page;
void *objp; void *objp;
size_t offset; size_t offset;
unsigned int i, local_flags; int local_flags;
unsigned long ctor_flags; unsigned long ctor_flags;
/* Be lazy and only check for valid flags here, /* Be lazy and only check for valid flags here,
...@@ -1706,21 +1743,14 @@ static int cache_grow (kmem_cache_t * cachep, int flags) ...@@ -1706,21 +1743,14 @@ static int cache_grow (kmem_cache_t * cachep, int flags)
/* Get mem for the objs. */ /* Get mem for the objs. */
if (!(objp = kmem_getpages(cachep, flags))) if (!(objp = kmem_getpages(cachep, flags, -1)))
goto failed; goto failed;
/* Get slab management. */ /* Get slab management. */
if (!(slabp = alloc_slabmgmt(cachep, objp, offset, local_flags))) if (!(slabp = alloc_slabmgmt(cachep, objp, offset, local_flags)))
goto opps1; goto opps1;
/* Nasty!!!!!! I hope this is OK. */ set_slab_attr(cachep, slabp, objp);
i = 1 << cachep->gfporder;
page = virt_to_page(objp);
do {
SET_PAGE_CACHE(page, cachep);
SET_PAGE_SLAB(page, slabp);
page++;
} while (--i);
cache_init_objs(cachep, slabp, ctor_flags); cache_init_objs(cachep, slabp, ctor_flags);
...@@ -2225,6 +2255,81 @@ int fastcall kmem_ptr_validate(kmem_cache_t *cachep, void *ptr) ...@@ -2225,6 +2255,81 @@ int fastcall kmem_ptr_validate(kmem_cache_t *cachep, void *ptr)
return 0; return 0;
} }
/**
* kmem_cache_alloc_node - Allocate an object on the specified node
* @cachep: The cache to allocate from.
* @flags: See kmalloc().
* @nodeid: node number of the target node.
*
* Identical to kmem_cache_alloc, except that this function is slow
* and can sleep. And it will allocate memory on the given node, which
* can improve the performance for cpu bound structures.
*/
void *kmem_cache_alloc_node(kmem_cache_t *cachep, int nodeid)
{
size_t offset;
void *objp;
struct slab *slabp;
kmem_bufctl_t next;
/* The main algorithms are not node aware, thus we have to cheat:
* We bypass all caches and allocate a new slab.
* The following code is a streamlined copy of cache_grow().
*/
/* Get colour for the slab, and update the next value. */
spin_lock_irq(&cachep->spinlock);
offset = cachep->colour_next;
cachep->colour_next++;
if (cachep->colour_next >= cachep->colour)
cachep->colour_next = 0;
offset *= cachep->colour_off;
spin_unlock_irq(&cachep->spinlock);
/* Get mem for the objs. */
if (!(objp = kmem_getpages(cachep, GFP_KERNEL, nodeid)))
goto failed;
/* Get slab management. */
if (!(slabp = alloc_slabmgmt(cachep, objp, offset, GFP_KERNEL)))
goto opps1;
set_slab_attr(cachep, slabp, objp);
cache_init_objs(cachep, slabp, SLAB_CTOR_CONSTRUCTOR);
/* The first object is ours: */
objp = slabp->s_mem + slabp->free*cachep->objsize;
slabp->inuse++;
next = slab_bufctl(slabp)[slabp->free];
#if DEBUG
slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
#endif
slabp->free = next;
/* add the remaining objects into the cache */
spin_lock_irq(&cachep->spinlock);
check_slabp(cachep, slabp);
STATS_INC_GROWN(cachep);
/* Make slab active. */
if (slabp->free == BUFCTL_END) {
list_add_tail(&slabp->list, &(list3_data(cachep)->slabs_full));
} else {
list_add_tail(&slabp->list,
&(list3_data(cachep)->slabs_partial));
list3_data(cachep)->free_objects += cachep->num-1;
}
spin_unlock_irq(&cachep->spinlock);
objp = cache_alloc_debugcheck_after(cachep, GFP_KERNEL, objp,
__builtin_return_address(0));
return objp;
opps1:
kmem_freepages(cachep, objp);
failed:
return NULL;
}
EXPORT_SYMBOL(kmem_cache_alloc_node);
/** /**
* kmalloc - allocate memory * kmalloc - allocate memory
* @size: how many bytes of memory are required. * @size: how many bytes of memory are required.
...@@ -2289,7 +2394,10 @@ void *__alloc_percpu(size_t size, size_t align) ...@@ -2289,7 +2394,10 @@ void *__alloc_percpu(size_t size, size_t align)
for (i = 0; i < NR_CPUS; i++) { for (i = 0; i < NR_CPUS; i++) {
if (!cpu_possible(i)) if (!cpu_possible(i))
continue; continue;
pdata->ptrs[i] = kmalloc(size, GFP_KERNEL); pdata->ptrs[i] = kmem_cache_alloc_node(
kmem_find_general_cachep(size, GFP_KERNEL),
cpu_to_node(i));
if (!pdata->ptrs[i]) if (!pdata->ptrs[i])
goto unwind_oom; goto unwind_oom;
memset(pdata->ptrs[i], 0, size); memset(pdata->ptrs[i], 0, size);
...@@ -2428,19 +2536,15 @@ static int do_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount, in ...@@ -2428,19 +2536,15 @@ static int do_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount, in
memset(&new.new,0,sizeof(new.new)); memset(&new.new,0,sizeof(new.new));
for (i = 0; i < NR_CPUS; i++) { for (i = 0; i < NR_CPUS; i++) {
struct array_cache *ccnew; if (cpu_online(i)) {
new.new[i] = alloc_arraycache(i, limit, batchcount);
ccnew = kmalloc(sizeof(void*)*limit+ if (!new.new[i]) {
sizeof(struct array_cache), GFP_KERNEL); for (i--; i >= 0; i--) kfree(new.new[i]);
if (!ccnew) { return -ENOMEM;
for (i--; i >= 0; i--) kfree(new.new[i]); }
return -ENOMEM; } else {
new.new[i] = NULL;
} }
ccnew->avail = 0;
ccnew->limit = limit;
ccnew->batchcount = batchcount;
ccnew->touched = 0;
new.new[i] = ccnew;
} }
new.cachep = cachep; new.cachep = cachep;
...@@ -2462,14 +2566,9 @@ static int do_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount, in ...@@ -2462,14 +2566,9 @@ static int do_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount, in
spin_unlock_irq(&cachep->spinlock); spin_unlock_irq(&cachep->spinlock);
kfree(ccold); kfree(ccold);
} }
new_shared = kmalloc(sizeof(void*)*batchcount*shared+ new_shared = alloc_arraycache(-1, batchcount*shared, 0xbaadf00d);
sizeof(struct array_cache), GFP_KERNEL);
if (new_shared) { if (new_shared) {
struct array_cache *old; struct array_cache *old;
new_shared->avail = 0;
new_shared->limit = batchcount*shared;
new_shared->batchcount = 0xbaadf00d;
new_shared->touched = 0;
spin_lock_irq(&cachep->spinlock); spin_lock_irq(&cachep->spinlock);
old = cachep->lists.shared; old = cachep->lists.shared;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment