Commit 6296e5d3 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux

* 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux:
  slub: disallow changing cpu_partial from userspace for debug caches
  slub: add missed accounting
  slub: Extract get_freelist from __slab_alloc
  slub: Switch per cpu partial page support off for debugging
  slub: fix a possible memleak in __slab_alloc()
  slub: fix slub_max_order Documentation
  slub: add missed accounting
  slab: add taint flag outputting to debug paths.
  slub: add taint flag outputting to debug paths
  slab: introduce slab_max_order kernel parameter
  slab: rename slab_break_gfp_order to slab_max_order
parents c086ae4e 5878cf43
...@@ -2395,6 +2395,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ...@@ -2395,6 +2395,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
slram= [HW,MTD] slram= [HW,MTD]
slab_max_order= [MM, SLAB]
Determines the maximum allowed order for slabs.
A high setting may cause OOMs due to memory
fragmentation. Defaults to 1 for systems with
more than 32MB of RAM, 0 otherwise.
slub_debug[=options[,slabs]] [MM, SLUB] slub_debug[=options[,slabs]] [MM, SLUB]
Enabling slub_debug allows one to determine the Enabling slub_debug allows one to determine the
culprit if slab objects become corrupted. Enabling culprit if slab objects become corrupted. Enabling
......
...@@ -117,7 +117,7 @@ can be influenced by kernel parameters: ...@@ -117,7 +117,7 @@ can be influenced by kernel parameters:
slub_min_objects=x (default 4) slub_min_objects=x (default 4)
slub_min_order=x (default 0) slub_min_order=x (default 0)
slub_max_order=x (default 1) slub_max_order=x (default 3 (PAGE_ALLOC_COSTLY_ORDER))
slub_min_objects allows to specify how many objects must at least fit slub_min_objects allows to specify how many objects must at least fit
into one slab in order for the allocation order to be acceptable. into one slab in order for the allocation order to be acceptable.
......
...@@ -481,11 +481,13 @@ EXPORT_SYMBOL(slab_buffer_size); ...@@ -481,11 +481,13 @@ EXPORT_SYMBOL(slab_buffer_size);
#endif #endif
/* /*
* Do not go above this order unless 0 objects fit into the slab. * Do not go above this order unless 0 objects fit into the slab or
* overridden on the command line.
*/ */
#define BREAK_GFP_ORDER_HI 1 #define SLAB_MAX_ORDER_HI 1
#define BREAK_GFP_ORDER_LO 0 #define SLAB_MAX_ORDER_LO 0
static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; static int slab_max_order = SLAB_MAX_ORDER_LO;
static bool slab_max_order_set __initdata;
/* /*
* Functions for storing/retrieving the cachep and or slab from the page * Functions for storing/retrieving the cachep and or slab from the page
...@@ -854,6 +856,17 @@ static int __init noaliencache_setup(char *s) ...@@ -854,6 +856,17 @@ static int __init noaliencache_setup(char *s)
} }
__setup("noaliencache", noaliencache_setup); __setup("noaliencache", noaliencache_setup);
static int __init slab_max_order_setup(char *str)
{
get_option(&str, &slab_max_order);
slab_max_order = slab_max_order < 0 ? 0 :
min(slab_max_order, MAX_ORDER - 1);
slab_max_order_set = true;
return 1;
}
__setup("slab_max_order=", slab_max_order_setup);
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
/* /*
* Special reaping functions for NUMA systems called from cache_reap(). * Special reaping functions for NUMA systems called from cache_reap().
...@@ -1502,10 +1515,11 @@ void __init kmem_cache_init(void) ...@@ -1502,10 +1515,11 @@ void __init kmem_cache_init(void)
/* /*
* Fragmentation resistance on low memory - only use bigger * Fragmentation resistance on low memory - only use bigger
* page orders on machines with more than 32MB of memory. * page orders on machines with more than 32MB of memory if
* not overridden on the command line.
*/ */
if (totalram_pages > (32 << 20) >> PAGE_SHIFT) if (!slab_max_order_set && totalram_pages > (32 << 20) >> PAGE_SHIFT)
slab_break_gfp_order = BREAK_GFP_ORDER_HI; slab_max_order = SLAB_MAX_ORDER_HI;
/* Bootstrap is tricky, because several objects are allocated /* Bootstrap is tricky, because several objects are allocated
* from caches that do not exist yet: * from caches that do not exist yet:
...@@ -1932,8 +1946,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) ...@@ -1932,8 +1946,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
/* Print header */ /* Print header */
if (lines == 0) { if (lines == 0) {
printk(KERN_ERR printk(KERN_ERR
"Slab corruption: %s start=%p, len=%d\n", "Slab corruption (%s): %s start=%p, len=%d\n",
cachep->name, realobj, size); print_tainted(), cachep->name, realobj, size);
print_objinfo(cachep, objp, 0); print_objinfo(cachep, objp, 0);
} }
/* Hexdump the affected line */ /* Hexdump the affected line */
...@@ -2117,7 +2131,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, ...@@ -2117,7 +2131,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
* Large number of objects is good, but very large slabs are * Large number of objects is good, but very large slabs are
* currently bad for the gfp()s. * currently bad for the gfp()s.
*/ */
if (gfporder >= slab_break_gfp_order) if (gfporder >= slab_max_order)
break; break;
/* /*
...@@ -3042,8 +3056,9 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp) ...@@ -3042,8 +3056,9 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
if (entries != cachep->num - slabp->inuse) { if (entries != cachep->num - slabp->inuse) {
bad: bad:
printk(KERN_ERR "slab: Internal list corruption detected in " printk(KERN_ERR "slab: Internal list corruption detected in "
"cache '%s'(%d), slabp %p(%d). Hexdump:\n", "cache '%s'(%d), slabp %p(%d). Tainted(%s). Hexdump:\n",
cachep->name, cachep->num, slabp, slabp->inuse); cachep->name, cachep->num, slabp, slabp->inuse,
print_tainted());
print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp, print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp,
sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t), sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t),
1); 1);
......
...@@ -570,7 +570,7 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...) ...@@ -570,7 +570,7 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...)
va_end(args); va_end(args);
printk(KERN_ERR "========================================" printk(KERN_ERR "========================================"
"=====================================\n"); "=====================================\n");
printk(KERN_ERR "BUG %s: %s\n", s->name, buf); printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf);
printk(KERN_ERR "----------------------------------------" printk(KERN_ERR "----------------------------------------"
"-------------------------------------\n\n"); "-------------------------------------\n\n");
} }
...@@ -1901,11 +1901,14 @@ static void unfreeze_partials(struct kmem_cache *s) ...@@ -1901,11 +1901,14 @@ static void unfreeze_partials(struct kmem_cache *s)
} }
if (l != m) { if (l != m) {
if (l == M_PARTIAL) if (l == M_PARTIAL) {
remove_partial(n, page); remove_partial(n, page);
else stat(s, FREE_REMOVE_PARTIAL);
} else {
add_partial(n, page, add_partial(n, page,
DEACTIVATE_TO_TAIL); DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
l = m; l = m;
} }
...@@ -2123,6 +2126,37 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, ...@@ -2123,6 +2126,37 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
return object; return object;
} }
/*
* Check the page->freelist of a page and either transfer the freelist to the per cpu freelist
* or deactivate the page.
*
* The page is still frozen if the return value is not NULL.
*
* If this function returns NULL then the page has been unfrozen.
*/
static inline void *get_freelist(struct kmem_cache *s, struct page *page)
{
struct page new;
unsigned long counters;
void *freelist;
do {
freelist = page->freelist;
counters = page->counters;
new.counters = counters;
VM_BUG_ON(!new.frozen);
new.inuse = page->objects;
new.frozen = freelist != NULL;
} while (!cmpxchg_double_slab(s, page,
freelist, counters,
NULL, new.counters,
"get_freelist"));
return freelist;
}
/* /*
* Slow path. The lockless freelist is empty or we need to perform * Slow path. The lockless freelist is empty or we need to perform
* debugging duties. * debugging duties.
...@@ -2144,8 +2178,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, ...@@ -2144,8 +2178,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
{ {
void **object; void **object;
unsigned long flags; unsigned long flags;
struct page new;
unsigned long counters;
local_irq_save(flags); local_irq_save(flags);
#ifdef CONFIG_PREEMPT #ifdef CONFIG_PREEMPT
...@@ -2166,31 +2198,14 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, ...@@ -2166,31 +2198,14 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
goto new_slab; goto new_slab;
} }
stat(s, ALLOC_SLOWPATH); /* must check again c->freelist in case of cpu migration or IRQ */
object = c->freelist;
do { if (object)
object = c->page->freelist; goto load_freelist;
counters = c->page->counters;
new.counters = counters;
VM_BUG_ON(!new.frozen);
/*
* If there is no object left then we use this loop to
* deactivate the slab which is simple since no objects
* are left in the slab and therefore we do not need to
* put the page back onto the partial list.
*
* If there are objects left then we retrieve them
* and use them to refill the per cpu queue.
*/
new.inuse = c->page->objects; stat(s, ALLOC_SLOWPATH);
new.frozen = object != NULL;
} while (!__cmpxchg_double_slab(s, c->page, object = get_freelist(s, c->page);
object, counters,
NULL, new.counters,
"__slab_alloc"));
if (!object) { if (!object) {
c->page = NULL; c->page = NULL;
...@@ -3028,7 +3043,9 @@ static int kmem_cache_open(struct kmem_cache *s, ...@@ -3028,7 +3043,9 @@ static int kmem_cache_open(struct kmem_cache *s,
* per node list when we run out of per cpu objects. We only fetch 50% * per node list when we run out of per cpu objects. We only fetch 50%
* to keep some capacity around for frees. * to keep some capacity around for frees.
*/ */
if (s->size >= PAGE_SIZE) if (kmem_cache_debug(s))
s->cpu_partial = 0;
else if (s->size >= PAGE_SIZE)
s->cpu_partial = 2; s->cpu_partial = 2;
else if (s->size >= 1024) else if (s->size >= 1024)
s->cpu_partial = 6; s->cpu_partial = 6;
...@@ -4637,6 +4654,8 @@ static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf, ...@@ -4637,6 +4654,8 @@ static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
err = strict_strtoul(buf, 10, &objects); err = strict_strtoul(buf, 10, &objects);
if (err) if (err)
return err; return err;
if (objects && kmem_cache_debug(s))
return -EINVAL;
s->cpu_partial = objects; s->cpu_partial = objects;
flush_all(s); flush_all(s);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment