Commit ae664dba authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux

Pull SLAB changes from Pekka Enberg:
 "This contains preparational work from Christoph Lameter and Glauber
  Costa for SLAB memcg and cleanups and improvements from Ezequiel
  Garcia and Joonsoo Kim.

  Please note that the SLOB cleanup commit from Arnd Bergmann already
  appears in your tree but I had also merged it myself which is why it
  shows up in the shortlog."

* 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux:
  mm/sl[aou]b: Common alignment code
  slab: Use the new create_boot_cache function to simplify bootstrap
  slub: Use statically allocated kmem_cache boot structure for bootstrap
  mm, sl[au]b: create common functions for boot slab creation
  slab: Simplify bootstrap
  slub: Use correct cpu_slab on dead cpu
  mm: fix slab.c kernel-doc warnings
  mm/slob: use min_t() to compare ARCH_SLAB_MINALIGN
  slab: Ignore internal flags in cache creation
  mm/slob: Use free_page instead of put_page for page-size kmalloc allocations
  mm/sl[aou]b: Move common kmem_cache_size() to slab.h
  mm/slob: Use object_size field in kmem_cache_size()
  mm/slob: Drop usage of page->private for storing page-sized allocations
  slub: Commonize slab_cache field in struct page
  sl[au]b: Process slabinfo_show in common code
  mm/sl[au]b: Move print_slabinfo_header to slab_common.c
  mm/sl[au]b: Move slabinfo processing to slab_common.c
  slub: remove one code path and reduce lock contention in __slab_free()
parents a2faf2fc 08afe22c
...@@ -128,10 +128,7 @@ struct page { ...@@ -128,10 +128,7 @@ struct page {
}; };
struct list_head list; /* slobs list of pages */ struct list_head list; /* slobs list of pages */
struct { /* slab fields */ struct slab *slab_page; /* slab fields */
struct kmem_cache *slab_cache;
struct slab *slab_page;
};
}; };
/* Remainder is not double word aligned */ /* Remainder is not double word aligned */
...@@ -146,7 +143,7 @@ struct page { ...@@ -146,7 +143,7 @@ struct page {
#if USE_SPLIT_PTLOCKS #if USE_SPLIT_PTLOCKS
spinlock_t ptl; spinlock_t ptl;
#endif #endif
struct kmem_cache *slab; /* SLUB: Pointer to slab */ struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */
struct page *first_page; /* Compound tail pages */ struct page *first_page; /* Compound tail pages */
}; };
......
...@@ -128,7 +128,6 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, ...@@ -128,7 +128,6 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
void kmem_cache_destroy(struct kmem_cache *); void kmem_cache_destroy(struct kmem_cache *);
int kmem_cache_shrink(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *);
void kmem_cache_free(struct kmem_cache *, void *); void kmem_cache_free(struct kmem_cache *, void *);
unsigned int kmem_cache_size(struct kmem_cache *);
/* /*
* Please use this macro to create slab caches. Simply specify the * Please use this macro to create slab caches. Simply specify the
...@@ -388,6 +387,14 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node) ...@@ -388,6 +387,14 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node)
return kmalloc_node(size, flags | __GFP_ZERO, node); return kmalloc_node(size, flags | __GFP_ZERO, node);
} }
/*
* Determine the size of a slab object
*/
static inline unsigned int kmem_cache_size(struct kmem_cache *s)
{
return s->object_size;
}
void __init kmem_cache_init_late(void); void __init kmem_cache_init_late(void);
#endif /* _LINUX_SLAB_H */ #endif /* _LINUX_SLAB_H */
...@@ -89,9 +89,13 @@ struct kmem_cache { ...@@ -89,9 +89,13 @@ struct kmem_cache {
* (see kmem_cache_init()) * (see kmem_cache_init())
* We still use [NR_CPUS] and not [1] or [0] because cache_cache * We still use [NR_CPUS] and not [1] or [0] because cache_cache
* is statically defined, so we reserve the max number of cpus. * is statically defined, so we reserve the max number of cpus.
*
* We also need to guarantee that the list is able to accomodate a
* pointer for each node since "nodelists" uses the remainder of
* available pointers.
*/ */
struct kmem_list3 **nodelists; struct kmem_list3 **nodelists;
struct array_cache *array[NR_CPUS]; struct array_cache *array[NR_CPUS + MAX_NUMNODES];
/* /*
* Do not add fields after array[] * Do not add fields after array[]
*/ */
......
This diff is collapsed.
...@@ -32,9 +32,17 @@ extern struct list_head slab_caches; ...@@ -32,9 +32,17 @@ extern struct list_head slab_caches;
/* The slab cache that manages slab cache information */ /* The slab cache that manages slab cache information */
extern struct kmem_cache *kmem_cache; extern struct kmem_cache *kmem_cache;
unsigned long calculate_alignment(unsigned long flags,
unsigned long align, unsigned long size);
/* Functions provided by the slab allocators */ /* Functions provided by the slab allocators */
extern int __kmem_cache_create(struct kmem_cache *, unsigned long flags); extern int __kmem_cache_create(struct kmem_cache *, unsigned long flags);
extern struct kmem_cache *create_kmalloc_cache(const char *name, size_t size,
unsigned long flags);
extern void create_boot_cache(struct kmem_cache *, const char *name,
size_t size, unsigned long flags);
#ifdef CONFIG_SLUB #ifdef CONFIG_SLUB
struct kmem_cache *__kmem_cache_alias(const char *name, size_t size, struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,
size_t align, unsigned long flags, void (*ctor)(void *)); size_t align, unsigned long flags, void (*ctor)(void *));
...@@ -45,6 +53,51 @@ static inline struct kmem_cache *__kmem_cache_alias(const char *name, size_t siz ...@@ -45,6 +53,51 @@ static inline struct kmem_cache *__kmem_cache_alias(const char *name, size_t siz
#endif #endif
/* Legal flag mask for kmem_cache_create(), for various configurations */
#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \
SLAB_DESTROY_BY_RCU | SLAB_DEBUG_OBJECTS )
#if defined(CONFIG_DEBUG_SLAB)
#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
#elif defined(CONFIG_SLUB_DEBUG)
#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
SLAB_TRACE | SLAB_DEBUG_FREE)
#else
#define SLAB_DEBUG_FLAGS (0)
#endif
#if defined(CONFIG_SLAB)
#define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \
SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | SLAB_NOTRACK)
#elif defined(CONFIG_SLUB)
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
SLAB_TEMPORARY | SLAB_NOTRACK)
#else
#define SLAB_CACHE_FLAGS (0)
#endif
#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
int __kmem_cache_shutdown(struct kmem_cache *); int __kmem_cache_shutdown(struct kmem_cache *);
struct seq_file;
struct file;
struct slabinfo {
unsigned long active_objs;
unsigned long num_objs;
unsigned long active_slabs;
unsigned long num_slabs;
unsigned long shared_avail;
unsigned int limit;
unsigned int batchcount;
unsigned int shared;
unsigned int objects_per_slab;
unsigned int cache_order;
};
void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo);
void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s);
ssize_t slabinfo_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos);
#endif #endif
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/page.h> #include <asm/page.h>
...@@ -70,6 +72,34 @@ static inline int kmem_cache_sanity_check(const char *name, size_t size) ...@@ -70,6 +72,34 @@ static inline int kmem_cache_sanity_check(const char *name, size_t size)
} }
#endif #endif
/*
* Figure out what the alignment of the objects will be given a set of
* flags, a user specified alignment and the size of the objects.
*/
unsigned long calculate_alignment(unsigned long flags,
unsigned long align, unsigned long size)
{
/*
* If the user wants hardware cache aligned objects then follow that
* suggestion if the object is sufficiently large.
*
* The hardware cache alignment cannot override the specified
* alignment though. If that is greater then use it.
*/
if (flags & SLAB_HWCACHE_ALIGN) {
unsigned long ralign = cache_line_size();
while (size <= ralign / 2)
ralign /= 2;
align = max(align, ralign);
}
if (align < ARCH_SLAB_MINALIGN)
align = ARCH_SLAB_MINALIGN;
return ALIGN(align, sizeof(void *));
}
/* /*
* kmem_cache_create - Create a cache. * kmem_cache_create - Create a cache.
* @name: A string which is used in /proc/slabinfo to identify this cache. * @name: A string which is used in /proc/slabinfo to identify this cache.
...@@ -107,6 +137,13 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align ...@@ -107,6 +137,13 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align
if (!kmem_cache_sanity_check(name, size) == 0) if (!kmem_cache_sanity_check(name, size) == 0)
goto out_locked; goto out_locked;
/*
* Some allocators will constraint the set of valid flags to a subset
* of all flags. We expect them to define CACHE_CREATE_MASK in this
* case, and we'll just provide them with a sanitized version of the
* passed flags.
*/
flags &= CACHE_CREATE_MASK;
s = __kmem_cache_alias(name, size, align, flags, ctor); s = __kmem_cache_alias(name, size, align, flags, ctor);
if (s) if (s)
...@@ -115,7 +152,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align ...@@ -115,7 +152,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align
s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL); s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
if (s) { if (s) {
s->object_size = s->size = size; s->object_size = s->size = size;
s->align = align; s->align = calculate_alignment(flags, align, size);
s->ctor = ctor; s->ctor = ctor;
s->name = kstrdup(name, GFP_KERNEL); s->name = kstrdup(name, GFP_KERNEL);
if (!s->name) { if (!s->name) {
...@@ -192,3 +229,146 @@ int slab_is_available(void) ...@@ -192,3 +229,146 @@ int slab_is_available(void)
{ {
return slab_state >= UP; return slab_state >= UP;
} }
#ifndef CONFIG_SLOB
/* Create a cache during boot when no slab services are available yet */
void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
unsigned long flags)
{
int err;
s->name = name;
s->size = s->object_size = size;
s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
err = __kmem_cache_create(s, flags);
if (err)
panic("Creation of kmalloc slab %s size=%zd failed. Reason %d\n",
name, size, err);
s->refcount = -1; /* Exempt from merging for now */
}
struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
unsigned long flags)
{
struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
if (!s)
panic("Out of memory when creating slab %s\n", name);
create_boot_cache(s, name, size, flags);
list_add(&s->list, &slab_caches);
s->refcount = 1;
return s;
}
#endif /* !CONFIG_SLOB */
#ifdef CONFIG_SLABINFO
static void print_slabinfo_header(struct seq_file *m)
{
/*
* Output format version, so at least we can change it
* without _too_ many complaints.
*/
#ifdef CONFIG_DEBUG_SLAB
seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
#else
seq_puts(m, "slabinfo - version: 2.1\n");
#endif
seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
"<objperslab> <pagesperslab>");
seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
#ifdef CONFIG_DEBUG_SLAB
seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
"<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
#endif
seq_putc(m, '\n');
}
static void *s_start(struct seq_file *m, loff_t *pos)
{
loff_t n = *pos;
mutex_lock(&slab_mutex);
if (!n)
print_slabinfo_header(m);
return seq_list_start(&slab_caches, *pos);
}
static void *s_next(struct seq_file *m, void *p, loff_t *pos)
{
return seq_list_next(p, &slab_caches, pos);
}
static void s_stop(struct seq_file *m, void *p)
{
mutex_unlock(&slab_mutex);
}
static int s_show(struct seq_file *m, void *p)
{
struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
struct slabinfo sinfo;
memset(&sinfo, 0, sizeof(sinfo));
get_slabinfo(s, &sinfo);
seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
s->name, sinfo.active_objs, sinfo.num_objs, s->size,
sinfo.objects_per_slab, (1 << sinfo.cache_order));
seq_printf(m, " : tunables %4u %4u %4u",
sinfo.limit, sinfo.batchcount, sinfo.shared);
seq_printf(m, " : slabdata %6lu %6lu %6lu",
sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
slabinfo_show_stats(m, s);
seq_putc(m, '\n');
return 0;
}
/*
* slabinfo_op - iterator that generates /proc/slabinfo
*
* Output layout:
* cache-name
* num-active-objs
* total-objs
* object size
* num-active-slabs
* total-slabs
* num-pages-per-slab
* + further values on SMP and with statistics enabled
*/
static const struct seq_operations slabinfo_op = {
.start = s_start,
.next = s_next,
.stop = s_stop,
.show = s_show,
};
static int slabinfo_open(struct inode *inode, struct file *file)
{
return seq_open(file, &slabinfo_op);
}
static const struct file_operations proc_slabinfo_operations = {
.open = slabinfo_open,
.read = seq_read,
.write = slabinfo_write,
.llseek = seq_lseek,
.release = seq_release,
};
static int __init slab_proc_init(void)
{
proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);
return 0;
}
module_init(slab_proc_init);
#endif /* CONFIG_SLABINFO */
...@@ -28,9 +28,8 @@ ...@@ -28,9 +28,8 @@
* from kmalloc are prepended with a 4-byte header with the kmalloc size. * from kmalloc are prepended with a 4-byte header with the kmalloc size.
* If kmalloc is asked for objects of PAGE_SIZE or larger, it calls * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
* alloc_pages() directly, allocating compound pages so the page order * alloc_pages() directly, allocating compound pages so the page order
* does not have to be separately tracked, and also stores the exact * does not have to be separately tracked.
* allocation size in page->private so that it can be used to accurately * These objects are detected in kfree() because PageSlab()
* provide ksize(). These objects are detected in kfree() because slob_page()
* is false for them. * is false for them.
* *
* SLAB is emulated on top of SLOB by simply calling constructors and * SLAB is emulated on top of SLOB by simply calling constructors and
...@@ -124,7 +123,6 @@ static inline void clear_slob_page_free(struct page *sp) ...@@ -124,7 +123,6 @@ static inline void clear_slob_page_free(struct page *sp)
#define SLOB_UNIT sizeof(slob_t) #define SLOB_UNIT sizeof(slob_t)
#define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT) #define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT)
#define SLOB_ALIGN L1_CACHE_BYTES
/* /*
* struct slob_rcu is inserted at the tail of allocated slob blocks, which * struct slob_rcu is inserted at the tail of allocated slob blocks, which
...@@ -455,11 +453,6 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller) ...@@ -455,11 +453,6 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
if (likely(order)) if (likely(order))
gfp |= __GFP_COMP; gfp |= __GFP_COMP;
ret = slob_new_pages(gfp, order, node); ret = slob_new_pages(gfp, order, node);
if (ret) {
struct page *page;
page = virt_to_page(ret);
page->private = size;
}
trace_kmalloc_node(caller, ret, trace_kmalloc_node(caller, ret,
size, PAGE_SIZE << order, gfp, node); size, PAGE_SIZE << order, gfp, node);
...@@ -506,7 +499,7 @@ void kfree(const void *block) ...@@ -506,7 +499,7 @@ void kfree(const void *block)
unsigned int *m = (unsigned int *)(block - align); unsigned int *m = (unsigned int *)(block - align);
slob_free(m, *m + align); slob_free(m, *m + align);
} else } else
put_page(sp); __free_pages(sp, compound_order(sp));
} }
EXPORT_SYMBOL(kfree); EXPORT_SYMBOL(kfree);
...@@ -514,37 +507,30 @@ EXPORT_SYMBOL(kfree); ...@@ -514,37 +507,30 @@ EXPORT_SYMBOL(kfree);
size_t ksize(const void *block) size_t ksize(const void *block)
{ {
struct page *sp; struct page *sp;
int align;
unsigned int *m;
BUG_ON(!block); BUG_ON(!block);
if (unlikely(block == ZERO_SIZE_PTR)) if (unlikely(block == ZERO_SIZE_PTR))
return 0; return 0;
sp = virt_to_page(block); sp = virt_to_page(block);
if (PageSlab(sp)) { if (unlikely(!PageSlab(sp)))
int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); return PAGE_SIZE << compound_order(sp);
unsigned int *m = (unsigned int *)(block - align);
return SLOB_UNITS(*m) * SLOB_UNIT; align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
} else m = (unsigned int *)(block - align);
return sp->private; return SLOB_UNITS(*m) * SLOB_UNIT;
} }
EXPORT_SYMBOL(ksize); EXPORT_SYMBOL(ksize);
int __kmem_cache_create(struct kmem_cache *c, unsigned long flags) int __kmem_cache_create(struct kmem_cache *c, unsigned long flags)
{ {
size_t align = c->size;
if (flags & SLAB_DESTROY_BY_RCU) { if (flags & SLAB_DESTROY_BY_RCU) {
/* leave room for rcu footer at the end of object */ /* leave room for rcu footer at the end of object */
c->size += sizeof(struct slob_rcu); c->size += sizeof(struct slob_rcu);
} }
c->flags = flags; c->flags = flags;
/* ignore alignment unless it's forced */
c->align = (flags & SLAB_HWCACHE_ALIGN) ? SLOB_ALIGN : 0;
if (c->align < ARCH_SLAB_MINALIGN)
c->align = ARCH_SLAB_MINALIGN;
if (c->align < align)
c->align = align;
return 0; return 0;
} }
...@@ -558,12 +544,12 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node) ...@@ -558,12 +544,12 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
if (c->size < PAGE_SIZE) { if (c->size < PAGE_SIZE) {
b = slob_alloc(c->size, flags, c->align, node); b = slob_alloc(c->size, flags, c->align, node);
trace_kmem_cache_alloc_node(_RET_IP_, b, c->size, trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
SLOB_UNITS(c->size) * SLOB_UNIT, SLOB_UNITS(c->size) * SLOB_UNIT,
flags, node); flags, node);
} else { } else {
b = slob_new_pages(flags, get_order(c->size), node); b = slob_new_pages(flags, get_order(c->size), node);
trace_kmem_cache_alloc_node(_RET_IP_, b, c->size, trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
PAGE_SIZE << get_order(c->size), PAGE_SIZE << get_order(c->size),
flags, node); flags, node);
} }
...@@ -608,12 +594,6 @@ void kmem_cache_free(struct kmem_cache *c, void *b) ...@@ -608,12 +594,6 @@ void kmem_cache_free(struct kmem_cache *c, void *b)
} }
EXPORT_SYMBOL(kmem_cache_free); EXPORT_SYMBOL(kmem_cache_free);
unsigned int kmem_cache_size(struct kmem_cache *c)
{
return c->size;
}
EXPORT_SYMBOL(kmem_cache_size);
int __kmem_cache_shutdown(struct kmem_cache *c) int __kmem_cache_shutdown(struct kmem_cache *c)
{ {
/* No way to check for remaining objects */ /* No way to check for remaining objects */
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment