Commit bb4f6b0c authored by Pekka Enberg's avatar Pekka Enberg

Merge branches 'slab/align', 'slab/cleanups', 'slab/fixes', 'slab/memhotadd'...

Merge branches 'slab/align', 'slab/cleanups', 'slab/fixes', 'slab/memhotadd' and 'slub/fixes' into slab-for-linus
...@@ -99,13 +99,7 @@ ...@@ -99,13 +99,7 @@
* as arm where pointers are 32-bit aligned but there are data types such as * as arm where pointers are 32-bit aligned but there are data types such as
* u64 which require 64-bit alignment. * u64 which require 64-bit alignment.
*/ */
#if defined(ARCH_KMALLOC_MINALIGN)
#define CRYPTO_MINALIGN ARCH_KMALLOC_MINALIGN #define CRYPTO_MINALIGN ARCH_KMALLOC_MINALIGN
#elif defined(ARCH_SLAB_MINALIGN)
#define CRYPTO_MINALIGN ARCH_SLAB_MINALIGN
#else
#define CRYPTO_MINALIGN __alignof__(unsigned long long)
#endif
#define CRYPTO_MINALIGN_ATTR __attribute__ ((__aligned__(CRYPTO_MINALIGN))) #define CRYPTO_MINALIGN_ATTR __attribute__ ((__aligned__(CRYPTO_MINALIGN)))
......
...@@ -16,6 +16,30 @@ ...@@ -16,6 +16,30 @@
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/kmemtrace.h> #include <linux/kmemtrace.h>
#ifndef ARCH_KMALLOC_MINALIGN
/*
* Enforce a minimum alignment for the kmalloc caches.
* Usually, the kmalloc caches are cache_line_size() aligned, except when
* DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned.
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
* alignment larger than the alignment of a 64-bit integer.
* ARCH_KMALLOC_MINALIGN allows that.
* Note that increasing this value may disable some debug features.
*/
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
#endif
#ifndef ARCH_SLAB_MINALIGN
/*
* Enforce a minimum alignment for all caches.
* Intended for archs that get misalignment faults even for BYTES_PER_WORD
* aligned buffers. Includes ARCH_KMALLOC_MINALIGN.
* If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables
* some debug features.
*/
#define ARCH_SLAB_MINALIGN 0
#endif
/* /*
* struct kmem_cache * struct kmem_cache
* *
......
#ifndef __LINUX_SLOB_DEF_H #ifndef __LINUX_SLOB_DEF_H
#define __LINUX_SLOB_DEF_H #define __LINUX_SLOB_DEF_H
#ifndef ARCH_KMALLOC_MINALIGN
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long)
#endif
#ifndef ARCH_SLAB_MINALIGN
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long)
#endif
void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep, static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep,
......
...@@ -116,6 +116,14 @@ struct kmem_cache { ...@@ -116,6 +116,14 @@ struct kmem_cache {
#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE) #define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE)
#ifndef ARCH_KMALLOC_MINALIGN
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
#endif
#ifndef ARCH_SLAB_MINALIGN
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
#endif
/* /*
* Maximum kmalloc object size handled by SLUB. Larger object allocations * Maximum kmalloc object size handled by SLUB. Larger object allocations
* are passed through to the page allocator. The page allocator "fastpath" * are passed through to the page allocator. The page allocator "fastpath"
......
...@@ -115,6 +115,7 @@ ...@@ -115,6 +115,7 @@
#include <linux/reciprocal_div.h> #include <linux/reciprocal_div.h>
#include <linux/debugobjects.h> #include <linux/debugobjects.h>
#include <linux/kmemcheck.h> #include <linux/kmemcheck.h>
#include <linux/memory.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
...@@ -144,30 +145,6 @@ ...@@ -144,30 +145,6 @@
#define BYTES_PER_WORD sizeof(void *) #define BYTES_PER_WORD sizeof(void *)
#define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long)) #define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long))
#ifndef ARCH_KMALLOC_MINALIGN
/*
* Enforce a minimum alignment for the kmalloc caches.
* Usually, the kmalloc caches are cache_line_size() aligned, except when
* DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned.
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
* alignment larger than the alignment of a 64-bit integer.
* ARCH_KMALLOC_MINALIGN allows that.
* Note that increasing this value may disable some debug features.
*/
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
#endif
#ifndef ARCH_SLAB_MINALIGN
/*
* Enforce a minimum alignment for all caches.
* Intended for archs that get misalignment faults even for BYTES_PER_WORD
* aligned buffers. Includes ARCH_KMALLOC_MINALIGN.
* If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables
* some debug features.
*/
#define ARCH_SLAB_MINALIGN 0
#endif
#ifndef ARCH_KMALLOC_FLAGS #ifndef ARCH_KMALLOC_FLAGS
#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
#endif #endif
...@@ -1102,6 +1079,52 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) ...@@ -1102,6 +1079,52 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
} }
#endif #endif
/*
* Allocates and initializes nodelists for a node on each slab cache, used for
* either memory or cpu hotplug. If memory is being hot-added, the kmem_list3
* will be allocated off-node since memory is not yet online for the new node.
* When hotplugging memory or a cpu, existing nodelists are not replaced if
* already in use.
*
* Must hold cache_chain_mutex.
*/
static int init_cache_nodelists_node(int node)
{
struct kmem_cache *cachep;
struct kmem_list3 *l3;
const int memsize = sizeof(struct kmem_list3);
list_for_each_entry(cachep, &cache_chain, next) {
/*
* Set up the size64 kmemlist for cpu before we can
* begin anything. Make sure some other cpu on this
* node has not already allocated this
*/
if (!cachep->nodelists[node]) {
l3 = kmalloc_node(memsize, GFP_KERNEL, node);
if (!l3)
return -ENOMEM;
kmem_list3_init(l3);
l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
((unsigned long)cachep) % REAPTIMEOUT_LIST3;
/*
* The l3s don't come and go as CPUs come and
* go. cache_chain_mutex is sufficient
* protection here.
*/
cachep->nodelists[node] = l3;
}
spin_lock_irq(&cachep->nodelists[node]->list_lock);
cachep->nodelists[node]->free_limit =
(1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
spin_unlock_irq(&cachep->nodelists[node]->list_lock);
}
return 0;
}
static void __cpuinit cpuup_canceled(long cpu) static void __cpuinit cpuup_canceled(long cpu)
{ {
struct kmem_cache *cachep; struct kmem_cache *cachep;
...@@ -1172,7 +1195,7 @@ static int __cpuinit cpuup_prepare(long cpu) ...@@ -1172,7 +1195,7 @@ static int __cpuinit cpuup_prepare(long cpu)
struct kmem_cache *cachep; struct kmem_cache *cachep;
struct kmem_list3 *l3 = NULL; struct kmem_list3 *l3 = NULL;
int node = cpu_to_node(cpu); int node = cpu_to_node(cpu);
const int memsize = sizeof(struct kmem_list3); int err;
/* /*
* We need to do this right in the beginning since * We need to do this right in the beginning since
...@@ -1180,35 +1203,9 @@ static int __cpuinit cpuup_prepare(long cpu) ...@@ -1180,35 +1203,9 @@ static int __cpuinit cpuup_prepare(long cpu)
* kmalloc_node allows us to add the slab to the right * kmalloc_node allows us to add the slab to the right
* kmem_list3 and not this cpu's kmem_list3 * kmem_list3 and not this cpu's kmem_list3
*/ */
err = init_cache_nodelists_node(node);
list_for_each_entry(cachep, &cache_chain, next) { if (err < 0)
/* goto bad;
* Set up the size64 kmemlist for cpu before we can
* begin anything. Make sure some other cpu on this
* node has not already allocated this
*/
if (!cachep->nodelists[node]) {
l3 = kmalloc_node(memsize, GFP_KERNEL, node);
if (!l3)
goto bad;
kmem_list3_init(l3);
l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
((unsigned long)cachep) % REAPTIMEOUT_LIST3;
/*
* The l3s don't come and go as CPUs come and
* go. cache_chain_mutex is sufficient
* protection here.
*/
cachep->nodelists[node] = l3;
}
spin_lock_irq(&cachep->nodelists[node]->list_lock);
cachep->nodelists[node]->free_limit =
(1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
spin_unlock_irq(&cachep->nodelists[node]->list_lock);
}
/* /*
* Now we can go ahead with allocating the shared arrays and * Now we can go ahead with allocating the shared arrays and
...@@ -1331,11 +1328,75 @@ static struct notifier_block __cpuinitdata cpucache_notifier = { ...@@ -1331,11 +1328,75 @@ static struct notifier_block __cpuinitdata cpucache_notifier = {
&cpuup_callback, NULL, 0 &cpuup_callback, NULL, 0
}; };
#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
/*
* Drains freelist for a node on each slab cache, used for memory hot-remove.
* Returns -EBUSY if all objects cannot be drained so that the node is not
* removed.
*
* Must hold cache_chain_mutex.
*/
static int __meminit drain_cache_nodelists_node(int node)
{
struct kmem_cache *cachep;
int ret = 0;
list_for_each_entry(cachep, &cache_chain, next) {
struct kmem_list3 *l3;
l3 = cachep->nodelists[node];
if (!l3)
continue;
drain_freelist(cachep, l3, l3->free_objects);
if (!list_empty(&l3->slabs_full) ||
!list_empty(&l3->slabs_partial)) {
ret = -EBUSY;
break;
}
}
return ret;
}
static int __meminit slab_memory_callback(struct notifier_block *self,
unsigned long action, void *arg)
{
struct memory_notify *mnb = arg;
int ret = 0;
int nid;
nid = mnb->status_change_nid;
if (nid < 0)
goto out;
switch (action) {
case MEM_GOING_ONLINE:
mutex_lock(&cache_chain_mutex);
ret = init_cache_nodelists_node(nid);
mutex_unlock(&cache_chain_mutex);
break;
case MEM_GOING_OFFLINE:
mutex_lock(&cache_chain_mutex);
ret = drain_cache_nodelists_node(nid);
mutex_unlock(&cache_chain_mutex);
break;
case MEM_ONLINE:
case MEM_OFFLINE:
case MEM_CANCEL_ONLINE:
case MEM_CANCEL_OFFLINE:
break;
}
out:
return ret ? notifier_from_errno(ret) : NOTIFY_OK;
}
#endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */
/* /*
* swap the static kmem_list3 with kmalloced memory * swap the static kmem_list3 with kmalloced memory
*/ */
static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, static void __init init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
int nodeid) int nodeid)
{ {
struct kmem_list3 *ptr; struct kmem_list3 *ptr;
...@@ -1580,6 +1641,14 @@ void __init kmem_cache_init_late(void) ...@@ -1580,6 +1641,14 @@ void __init kmem_cache_init_late(void)
*/ */
register_cpu_notifier(&cpucache_notifier); register_cpu_notifier(&cpucache_notifier);
#ifdef CONFIG_NUMA
/*
* Register a memory hotplug callback that initializes and frees
* nodelists.
*/
hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
#endif
/* /*
* The reap timers are started later, with a module init call: That part * The reap timers are started later, with a module init call: That part
* of the kernel is not yet operational. * of the kernel is not yet operational.
...@@ -2220,8 +2289,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, ...@@ -2220,8 +2289,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
if (ralign < align) { if (ralign < align) {
ralign = align; ralign = align;
} }
/* disable debug if necessary */ /* disable debug if not aligning with REDZONE_ALIGN */
if (ralign > __alignof__(unsigned long long)) if (ralign & (__alignof__(unsigned long long) - 1))
flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
/* /*
* 4) Store it. * 4) Store it.
...@@ -2247,8 +2316,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, ...@@ -2247,8 +2316,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
*/ */
if (flags & SLAB_RED_ZONE) { if (flags & SLAB_RED_ZONE) {
/* add space for red zone words */ /* add space for red zone words */
cachep->obj_offset += sizeof(unsigned long long); cachep->obj_offset += align;
size += 2 * sizeof(unsigned long long); size += align + sizeof(unsigned long long);
} }
if (flags & SLAB_STORE_USER) { if (flags & SLAB_STORE_USER) {
/* user store requires one word storage behind the end of /* user store requires one word storage behind the end of
...@@ -4216,10 +4285,11 @@ static int s_show(struct seq_file *m, void *p) ...@@ -4216,10 +4285,11 @@ static int s_show(struct seq_file *m, void *p)
unsigned long node_frees = cachep->node_frees; unsigned long node_frees = cachep->node_frees;
unsigned long overflows = cachep->node_overflow; unsigned long overflows = cachep->node_overflow;
seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu "
%4lu %4lu %4lu %4lu %4lu", allocs, high, grown, "%4lu %4lu %4lu %4lu %4lu",
reaped, errors, max_freeable, node_allocs, allocs, high, grown,
node_frees, overflows); reaped, errors, max_freeable, node_allocs,
node_frees, overflows);
} }
/* cpu stats */ /* cpu stats */
{ {
......
...@@ -467,14 +467,6 @@ static void slob_free(void *block, int size) ...@@ -467,14 +467,6 @@ static void slob_free(void *block, int size)
* End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend. * End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend.
*/ */
#ifndef ARCH_KMALLOC_MINALIGN
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long)
#endif
#ifndef ARCH_SLAB_MINALIGN
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long)
#endif
void *__kmalloc_node(size_t size, gfp_t gfp, int node) void *__kmalloc_node(size_t size, gfp_t gfp, int node)
{ {
unsigned int *m; unsigned int *m;
......
...@@ -157,14 +157,6 @@ ...@@ -157,14 +157,6 @@
#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
SLAB_CACHE_DMA | SLAB_NOTRACK) SLAB_CACHE_DMA | SLAB_NOTRACK)
#ifndef ARCH_KMALLOC_MINALIGN
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
#endif
#ifndef ARCH_SLAB_MINALIGN
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
#endif
#define OO_SHIFT 16 #define OO_SHIFT 16
#define OO_MASK ((1 << OO_SHIFT) - 1) #define OO_MASK ((1 << OO_SHIFT) - 1)
#define MAX_OBJS_PER_PAGE 65535 /* since page.objects is u16 */ #define MAX_OBJS_PER_PAGE 65535 /* since page.objects is u16 */
...@@ -1084,7 +1076,7 @@ static inline struct page *alloc_slab_page(gfp_t flags, int node, ...@@ -1084,7 +1076,7 @@ static inline struct page *alloc_slab_page(gfp_t flags, int node,
if (node == -1) if (node == -1)
return alloc_pages(flags, order); return alloc_pages(flags, order);
else else
return alloc_pages_node(node, flags, order); return alloc_pages_exact_node(node, flags, order);
} }
static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
...@@ -2429,9 +2421,11 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, ...@@ -2429,9 +2421,11 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
#ifdef CONFIG_SLUB_DEBUG #ifdef CONFIG_SLUB_DEBUG
void *addr = page_address(page); void *addr = page_address(page);
void *p; void *p;
DECLARE_BITMAP(map, page->objects); long *map = kzalloc(BITS_TO_LONGS(page->objects) * sizeof(long),
GFP_ATOMIC);
bitmap_zero(map, page->objects); if (!map)
return;
slab_err(s, page, "%s", text); slab_err(s, page, "%s", text);
slab_lock(page); slab_lock(page);
for_each_free_object(p, s, page->freelist) for_each_free_object(p, s, page->freelist)
...@@ -2446,6 +2440,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, ...@@ -2446,6 +2440,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
} }
} }
slab_unlock(page); slab_unlock(page);
kfree(map);
#endif #endif
} }
...@@ -3338,8 +3333,15 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, ...@@ -3338,8 +3333,15 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
struct kmem_cache *s; struct kmem_cache *s;
void *ret; void *ret;
if (unlikely(size > SLUB_MAX_SIZE)) if (unlikely(size > SLUB_MAX_SIZE)) {
return kmalloc_large_node(size, gfpflags, node); ret = kmalloc_large_node(size, gfpflags, node);
trace_kmalloc_node(caller, ret,
size, PAGE_SIZE << get_order(size),
gfpflags, node);
return ret;
}
s = get_slab(size, gfpflags); s = get_slab(size, gfpflags);
...@@ -3651,10 +3653,10 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, ...@@ -3651,10 +3653,10 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
} }
static void process_slab(struct loc_track *t, struct kmem_cache *s, static void process_slab(struct loc_track *t, struct kmem_cache *s,
struct page *page, enum track_item alloc) struct page *page, enum track_item alloc,
long *map)
{ {
void *addr = page_address(page); void *addr = page_address(page);
DECLARE_BITMAP(map, page->objects);
void *p; void *p;
bitmap_zero(map, page->objects); bitmap_zero(map, page->objects);
...@@ -3673,11 +3675,14 @@ static int list_locations(struct kmem_cache *s, char *buf, ...@@ -3673,11 +3675,14 @@ static int list_locations(struct kmem_cache *s, char *buf,
unsigned long i; unsigned long i;
struct loc_track t = { 0, 0, NULL }; struct loc_track t = { 0, 0, NULL };
int node; int node;
unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
sizeof(unsigned long), GFP_KERNEL);
if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location), if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
GFP_TEMPORARY)) GFP_TEMPORARY)) {
kfree(map);
return sprintf(buf, "Out of memory\n"); return sprintf(buf, "Out of memory\n");
}
/* Push back cpu slabs */ /* Push back cpu slabs */
flush_all(s); flush_all(s);
...@@ -3691,9 +3696,9 @@ static int list_locations(struct kmem_cache *s, char *buf, ...@@ -3691,9 +3696,9 @@ static int list_locations(struct kmem_cache *s, char *buf,
spin_lock_irqsave(&n->list_lock, flags); spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, lru) list_for_each_entry(page, &n->partial, lru)
process_slab(&t, s, page, alloc); process_slab(&t, s, page, alloc, map);
list_for_each_entry(page, &n->full, lru) list_for_each_entry(page, &n->full, lru)
process_slab(&t, s, page, alloc); process_slab(&t, s, page, alloc, map);
spin_unlock_irqrestore(&n->list_lock, flags); spin_unlock_irqrestore(&n->list_lock, flags);
} }
...@@ -3744,6 +3749,7 @@ static int list_locations(struct kmem_cache *s, char *buf, ...@@ -3744,6 +3749,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
} }
free_loc_track(&t); free_loc_track(&t);
kfree(map);
if (!t.count) if (!t.count)
len += sprintf(buf, "No data\n"); len += sprintf(buf, "No data\n");
return len; return len;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment