Commit d30e51aa authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'slab-for-6.8' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab

Pull slab updates from Vlastimil Babka:

 - SLUB: delayed freezing of CPU partial slabs (Chengming Zhou)

   Freezing is an operation involving double_cmpxchg() that makes a slab
   exclusive for a particular CPU. Chengming noticed that we use it also
   in situations where we are not yet installing the slab as the CPU
   slab, because freezing also indicates that the slab is not on the
   shared list. This results in redundant freeze/unfreeze operation and
   can be avoided by marking separately the shared list presence by
   reusing the PG_workingset flag.

   This approach neatly avoids the issues described in 9b1ea29b
   ("Revert "mm, slub: consider rest of partial list if acquire_slab()
   fails"") as we can now grab a slab from the shared list in a quick
   and guaranteed way without the cmpxchg_double() operation that
   amplifies the lock contention and can fail.

   As a result, lkp has reported 34.2% improvement of
   stress-ng.rawudp.ops_per_sec

 - SLAB removal and SLUB cleanups (Vlastimil Babka)

   The SLAB allocator has been deprecated since 6.5 and nobody has
   objected so far. We agreed at LSF/MM to wait until the next LTS,
   which is 6.6, so we should be good to go now.

   This doesn't yet erase all traces of SLAB outside of mm/ so some dead
   code, comments or documentation remain, and will be cleaned up
   gradually (some series are already in the works).

   Removing the choice of allocators has already allowed to simplify and
   optimize the code wiring up the kmalloc APIs to the SLUB
   implementation.

* tag 'slab-for-6.8' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab: (34 commits)
  mm/slub: free KFENCE objects in slab_free_hook()
  mm/slub: handle bulk and single object freeing separately
  mm/slub: introduce __kmem_cache_free_bulk() without free hooks
  mm/slub: fix bulk alloc and free stats
  mm/slub: optimize free fast path code layout
  mm/slub: optimize alloc fastpath code layout
  mm/slub: remove slab_alloc() and __kmem_cache_alloc_lru() wrappers
  mm/slab: move kmalloc() functions from slab_common.c to slub.c
  mm/slab: move kmalloc_slab() to mm/slab.h
  mm/slab: move kfree() from slab_common.c to slub.c
  mm/slab: move struct kmem_cache_node from slab.h to slub.c
  mm/slab: move memcg related functions from slab.h to slub.c
  mm/slab: move pre/post-alloc hooks from slab.h to slub.c
  mm/slab: consolidate includes in the internal mm/slab.h
  mm/slab: move the rest of slub_def.h to mm/slab.h
  mm/slab: move struct kmem_cache_cpu declaration to slub.c
  mm/slab: remove mm/slab.c and slab_def.h
  mm/mempool/dmapool: remove CONFIG_DEBUG_SLAB ifdefs
  mm/slab: remove CONFIG_SLAB code from slab common code
  cpu/hotplug: remove CPUHP_SLAB_PREPARE hooks
  ...
parents 9f8413c4 61d7e367
......@@ -9,10 +9,6 @@
Linus
----------
N: Matt Mackal
E: mpm@selenic.com
D: SLOB slab allocator
N: Matti Aarnio
E: mea@nic.funet.fi
D: Alpha systems hacking, IPv6 and other network related stuff
......@@ -1572,6 +1568,10 @@ S: Ampferstr. 50 / 4
S: 6020 Innsbruck
S: Austria
N: Mark Hemment
E: markhe@nextd.demon.co.uk
D: SLAB allocator implementation
N: Richard Henderson
E: rth@twiddle.net
E: rth@cygnus.com
......@@ -2445,6 +2445,10 @@ D: work on suspend-to-ram/disk, killing duplicates from ioctl32,
D: Altera SoCFPGA and Nokia N900 support.
S: Czech Republic
N: Olivia Mackall
E: olivia@selenic.com
D: SLOB slab allocator
N: Paul Mackerras
E: paulus@samba.org
D: PPP driver
......
......@@ -37,7 +37,7 @@ The Slab Cache
.. kernel-doc:: include/linux/slab.h
:internal:
.. kernel-doc:: mm/slab.c
.. kernel-doc:: mm/slub.c
:export:
.. kernel-doc:: mm/slab_common.c
......
......@@ -154,7 +154,7 @@ config ARM64
select HAVE_MOVE_PUD
select HAVE_PCI
select HAVE_ACPI_APEI if (ACPI && EFI)
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ALIGNED_STRUCT_PAGE
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_BITREVERSE
select HAVE_ARCH_COMPILER_H
......
......@@ -146,7 +146,7 @@ config S390
select GENERIC_TIME_VSYSCALL
select GENERIC_VDSO_TIME_NS
select GENERIC_IOREMAP if PCI
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ALIGNED_STRUCT_PAGE
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
......
......@@ -169,7 +169,7 @@ config X86
select HAS_IOPORT
select HAVE_ACPI_APEI if ACPI
select HAVE_ACPI_APEI_NMI if ACPI
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ALIGNED_STRUCT_PAGE
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
select HAVE_ARCH_HUGE_VMALLOC if X86_64
......
......@@ -104,7 +104,6 @@ enum cpuhp_state {
CPUHP_X2APIC_PREPARE,
CPUHP_SMPCFD_PREPARE,
CPUHP_RELAY_PREPARE,
CPUHP_SLAB_PREPARE,
CPUHP_MD_RAID5_PREPARE,
CPUHP_RCUTREE_PREP,
CPUHP_CPUIDLE_COUPLED_PREPARE,
......
......@@ -24,7 +24,7 @@
/*
* Flags to pass to kmem_cache_create().
* The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set.
* The ones marked DEBUG need CONFIG_SLUB_DEBUG enabled, otherwise are no-op
*/
/* DEBUG: Perform (expensive) checks on alloc/free */
#define SLAB_CONSISTENCY_CHECKS ((slab_flags_t __force)0x00000100U)
......@@ -302,25 +302,15 @@ static inline unsigned int arch_slab_minalign(void)
* Kmalloc array related definitions
*/
#ifdef CONFIG_SLAB
/*
* SLAB and SLUB directly allocates requests fitting in to an order-1 page
* SLUB directly allocates requests fitting in to an order-1 page
* (PAGE_SIZE*2). Larger requests are passed to the page allocator.
*/
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT)
#ifndef KMALLOC_SHIFT_LOW
#define KMALLOC_SHIFT_LOW 5
#endif
#endif
#ifdef CONFIG_SLUB
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT)
#ifndef KMALLOC_SHIFT_LOW
#define KMALLOC_SHIFT_LOW 3
#endif
#endif
/* Maximum allocatable size */
#define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX)
......@@ -788,12 +778,4 @@ size_t kmalloc_size_roundup(size_t size);
void __init kmem_cache_init_late(void);
#if defined(CONFIG_SMP) && defined(CONFIG_SLAB)
int slab_prepare_cpu(unsigned int cpu);
int slab_dead_cpu(unsigned int cpu);
#else
#define slab_prepare_cpu NULL
#define slab_dead_cpu NULL
#endif
#endif /* _LINUX_SLAB_H */
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SLAB_DEF_H
#define _LINUX_SLAB_DEF_H
#include <linux/kfence.h>
#include <linux/reciprocal_div.h>
/*
* Definitions unique to the original Linux SLAB allocator.
*/
struct kmem_cache {
struct array_cache __percpu *cpu_cache;
/* 1) Cache tunables. Protected by slab_mutex */
unsigned int batchcount;
unsigned int limit;
unsigned int shared;
unsigned int size;
struct reciprocal_value reciprocal_buffer_size;
/* 2) touched by every alloc & free from the backend */
slab_flags_t flags; /* constant flags */
unsigned int num; /* # of objs per slab */
/* 3) cache_grow/shrink */
/* order of pgs per slab (2^n) */
unsigned int gfporder;
/* force GFP flags, e.g. GFP_DMA */
gfp_t allocflags;
size_t colour; /* cache colouring range */
unsigned int colour_off; /* colour offset */
unsigned int freelist_size;
/* constructor func */
void (*ctor)(void *obj);
/* 4) cache creation/removal */
const char *name;
struct list_head list;
int refcount;
int object_size;
int align;
/* 5) statistics */
#ifdef CONFIG_DEBUG_SLAB
unsigned long num_active;
unsigned long num_allocations;
unsigned long high_mark;
unsigned long grown;
unsigned long reaped;
unsigned long errors;
unsigned long max_freeable;
unsigned long node_allocs;
unsigned long node_frees;
unsigned long node_overflow;
atomic_t allochit;
atomic_t allocmiss;
atomic_t freehit;
atomic_t freemiss;
/*
* If debugging is enabled, then the allocator can add additional
* fields and/or padding to every object. 'size' contains the total
* object size including these internal fields, while 'obj_offset'
* and 'object_size' contain the offset to the user object and its
* size.
*/
int obj_offset;
#endif /* CONFIG_DEBUG_SLAB */
#ifdef CONFIG_KASAN_GENERIC
struct kasan_cache kasan_info;
#endif
#ifdef CONFIG_SLAB_FREELIST_RANDOM
unsigned int *random_seq;
#endif
#ifdef CONFIG_HARDENED_USERCOPY
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
#endif
struct kmem_cache_node *node[MAX_NUMNODES];
};
static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
void *x)
{
void *object = x - (x - slab->s_mem) % cache->size;
void *last_object = slab->s_mem + (cache->num - 1) * cache->size;
if (unlikely(object > last_object))
return last_object;
else
return object;
}
/*
* We want to avoid an expensive divide : (offset / cache->size)
* Using the fact that size is a constant for a particular cache,
* we can replace (offset / cache->size) by
* reciprocal_divide(offset, cache->reciprocal_buffer_size)
*/
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
const struct slab *slab, void *obj)
{
u32 offset = (obj - slab->s_mem);
return reciprocal_divide(offset, cache->reciprocal_buffer_size);
}
static inline int objs_per_slab(const struct kmem_cache *cache,
const struct slab *slab)
{
if (is_kfence_address(slab_address(slab)))
return 1;
return cache->num;
}
#endif /* _LINUX_SLAB_DEF_H */
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SLUB_DEF_H
#define _LINUX_SLUB_DEF_H
/*
* SLUB : A Slab allocator without object queues.
*
* (C) 2007 SGI, Christoph Lameter
*/
#include <linux/kfence.h>
#include <linux/kobject.h>
#include <linux/reciprocal_div.h>
#include <linux/local_lock.h>
enum stat_item {
ALLOC_FASTPATH, /* Allocation from cpu slab */
ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
FREE_FASTPATH, /* Free to cpu slab */
FREE_SLOWPATH, /* Freeing not to cpu slab */
FREE_FROZEN, /* Freeing to frozen slab */
FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
FREE_REMOVE_PARTIAL, /* Freeing removes last object */
ALLOC_FROM_PARTIAL, /* Cpu slab acquired from node partial list */
ALLOC_SLAB, /* Cpu slab acquired from page allocator */
ALLOC_REFILL, /* Refill cpu slab from slab freelist */
ALLOC_NODE_MISMATCH, /* Switching cpu slab */
FREE_SLAB, /* Slab freed to the page allocator */
CPUSLAB_FLUSH, /* Abandoning of the cpu slab */
DEACTIVATE_FULL, /* Cpu slab was full when deactivated */
DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */
DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
DEACTIVATE_BYPASS, /* Implicit deactivation */
ORDER_FALLBACK, /* Number of times fallback was necessary */
CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */
CMPXCHG_DOUBLE_FAIL, /* Number of times that cmpxchg double did not match */
CPU_PARTIAL_ALLOC, /* Used cpu partial on alloc */
CPU_PARTIAL_FREE, /* Refill cpu partial on free */
CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */
CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
NR_SLUB_STAT_ITEMS
};
#ifndef CONFIG_SLUB_TINY
/*
* When changing the layout, make sure freelist and tid are still compatible
* with this_cpu_cmpxchg_double() alignment requirements.
*/
struct kmem_cache_cpu {
union {
struct {
void **freelist; /* Pointer to next available object */
unsigned long tid; /* Globally unique transaction id */
};
freelist_aba_t freelist_tid;
};
struct slab *slab; /* The slab from which we are allocating */
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct slab *partial; /* Partially allocated frozen slabs */
#endif
local_lock_t lock; /* Protects the fields above */
#ifdef CONFIG_SLUB_STATS
unsigned stat[NR_SLUB_STAT_ITEMS];
#endif
};
#endif /* CONFIG_SLUB_TINY */
#ifdef CONFIG_SLUB_CPU_PARTIAL
#define slub_percpu_partial(c) ((c)->partial)
#define slub_set_percpu_partial(c, p) \
({ \
slub_percpu_partial(c) = (p)->next; \
})
#define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c))
#else
#define slub_percpu_partial(c) NULL
#define slub_set_percpu_partial(c, p)
#define slub_percpu_partial_read_once(c) NULL
#endif // CONFIG_SLUB_CPU_PARTIAL
/*
* Word size structure that can be atomically updated or read and that
* contains both the order and the number of objects that a slab of the
* given order would contain.
*/
struct kmem_cache_order_objects {
unsigned int x;
};
/*
* Slab cache management.
*/
struct kmem_cache {
#ifndef CONFIG_SLUB_TINY
struct kmem_cache_cpu __percpu *cpu_slab;
#endif
/* Used for retrieving partial slabs, etc. */
slab_flags_t flags;
unsigned long min_partial;
unsigned int size; /* The size of an object including metadata */
unsigned int object_size;/* The size of an object without metadata */
struct reciprocal_value reciprocal_size;
unsigned int offset; /* Free pointer offset */
#ifdef CONFIG_SLUB_CPU_PARTIAL
/* Number of per cpu partial objects to keep around */
unsigned int cpu_partial;
/* Number of per cpu partial slabs to keep around */
unsigned int cpu_partial_slabs;
#endif
struct kmem_cache_order_objects oo;
/* Allocation and freeing of slabs */
struct kmem_cache_order_objects min;
gfp_t allocflags; /* gfp flags to use on each alloc */
int refcount; /* Refcount for slab cache destroy */
void (*ctor)(void *);
unsigned int inuse; /* Offset to metadata */
unsigned int align; /* Alignment */
unsigned int red_left_pad; /* Left redzone padding size */
const char *name; /* Name (only for display!) */
struct list_head list; /* List of slab caches */
#ifdef CONFIG_SYSFS
struct kobject kobj; /* For sysfs */
#endif
#ifdef CONFIG_SLAB_FREELIST_HARDENED
unsigned long random;
#endif
#ifdef CONFIG_NUMA
/*
* Defragmentation by allocating from a remote node.
*/
unsigned int remote_node_defrag_ratio;
#endif
#ifdef CONFIG_SLAB_FREELIST_RANDOM
unsigned int *random_seq;
#endif
#ifdef CONFIG_KASAN_GENERIC
struct kasan_cache kasan_info;
#endif
#ifdef CONFIG_HARDENED_USERCOPY
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
#endif
struct kmem_cache_node *node[MAX_NUMNODES];
};
#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY)
#define SLAB_SUPPORTS_SYSFS
void sysfs_slab_unlink(struct kmem_cache *);
void sysfs_slab_release(struct kmem_cache *);
#else
static inline void sysfs_slab_unlink(struct kmem_cache *s)
{
}
static inline void sysfs_slab_release(struct kmem_cache *s)
{
}
#endif
void *fixup_red_left(struct kmem_cache *s, void *p);
static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
void *x) {
void *object = x - (x - slab_address(slab)) % cache->size;
void *last_object = slab_address(slab) +
(slab->objects - 1) * cache->size;
void *result = (unlikely(object > last_object)) ? last_object : object;
result = fixup_red_left(cache, result);
return result;
}
/* Determine object index from a given position */
static inline unsigned int __obj_to_index(const struct kmem_cache *cache,
void *addr, void *obj)
{
return reciprocal_divide(kasan_reset_tag(obj) - addr,
cache->reciprocal_size);
}
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
const struct slab *slab, void *obj)
{
if (is_kfence_address(obj))
return 0;
return __obj_to_index(cache, slab_address(slab), obj);
}
static inline int objs_per_slab(const struct kmem_cache *cache,
const struct slab *slab)
{
return slab->objects;
}
#endif /* _LINUX_SLUB_DEF_H */
......@@ -2125,11 +2125,6 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.startup.single = relay_prepare_cpu,
.teardown.single = NULL,
},
[CPUHP_SLAB_PREPARE] = {
.name = "slab:prepare",
.startup.single = slab_prepare_cpu,
.teardown.single = slab_dead_cpu,
},
[CPUHP_RCUTREE_PREP] = {
.name = "RCU/tree:prepare",
.startup.single = rcutree_prepare_cpu,
......
......@@ -1970,7 +1970,6 @@ config FAULT_INJECTION
config FAILSLAB
bool "Fault-injection capability for kmalloc"
depends on FAULT_INJECTION
depends on SLAB || SLUB
help
Provide fault-injection capability for kmalloc.
......
......@@ -37,7 +37,7 @@ menuconfig KASAN
(HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS)) && \
CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \
HAVE_ARCH_KASAN_HW_TAGS
depends on (SLUB && SYSFS && !SLUB_TINY) || (SLAB && !DEBUG_SLAB)
depends on SYSFS && !SLUB_TINY
select STACKDEPOT_ALWAYS_INIT
help
Enables KASAN (Kernel Address Sanitizer) - a dynamic memory safety
......@@ -78,7 +78,7 @@ config KASAN_GENERIC
bool "Generic KASAN"
depends on HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC
depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS
select SLUB_DEBUG if SLUB
select SLUB_DEBUG
select CONSTRUCTORS
help
Enables Generic KASAN.
......@@ -89,13 +89,11 @@ config KASAN_GENERIC
overhead of ~50% for dynamic allocations.
The performance slowdown is ~x3.
(Incompatible with CONFIG_DEBUG_SLAB: the kernel does not boot.)
config KASAN_SW_TAGS
bool "Software Tag-Based KASAN"
depends on HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS
depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS
select SLUB_DEBUG if SLUB
select SLUB_DEBUG
select CONSTRUCTORS
help
Enables Software Tag-Based KASAN.
......@@ -110,12 +108,9 @@ config KASAN_SW_TAGS
May potentially introduce problems related to pointer casting and
comparison, as it embeds a tag into the top byte of each pointer.
(Incompatible with CONFIG_DEBUG_SLAB: the kernel does not boot.)
config KASAN_HW_TAGS
bool "Hardware Tag-Based KASAN"
depends on HAVE_ARCH_KASAN_HW_TAGS
depends on SLUB
help
Enables Hardware Tag-Based KASAN.
......
......@@ -5,7 +5,7 @@ config HAVE_ARCH_KFENCE
menuconfig KFENCE
bool "KFENCE: low-overhead sampling-based memory safety error detector"
depends on HAVE_ARCH_KFENCE && (SLAB || SLUB)
depends on HAVE_ARCH_KFENCE
select STACKTRACE
select IRQ_WORK
help
......
......@@ -11,7 +11,7 @@ config HAVE_KMSAN_COMPILER
config KMSAN
bool "KMSAN: detector of uninitialized values use"
depends on HAVE_ARCH_KMSAN && HAVE_KMSAN_COMPILER
depends on SLUB && DEBUG_KERNEL && !KASAN && !KCSAN
depends on DEBUG_KERNEL && !KASAN && !KCSAN
depends on !PREEMPT_RT
select STACKDEPOT
select STACKDEPOT_ALWAYS_INIT
......
......@@ -226,52 +226,17 @@ config ZSMALLOC_CHAIN_SIZE
For more information, see zsmalloc documentation.
menu "SLAB allocator options"
choice
prompt "Choose SLAB allocator"
default SLUB
help
This option allows to select a slab allocator.
config SLAB_DEPRECATED
bool "SLAB (DEPRECATED)"
depends on !PREEMPT_RT
help
Deprecated and scheduled for removal in a few cycles. Replaced by
SLUB.
If you cannot migrate to SLUB, please contact linux-mm@kvack.org
and the people listed in the SLAB ALLOCATOR section of MAINTAINERS
file, explaining why.
The regular slab allocator that is established and known to work
well in all environments. It organizes cache hot objects in
per cpu and per node queues.
menu "Slab allocator options"
config SLUB
bool "SLUB (Unqueued Allocator)"
help
SLUB is a slab allocator that minimizes cache line usage
instead of managing queues of cached objects (SLAB approach).
Per cpu caching is realized using slabs of objects instead
of queues of objects. SLUB can use memory efficiently
and has enhanced diagnostics. SLUB is the default choice for
a slab allocator.
endchoice
config SLAB
bool
default y
depends on SLAB_DEPRECATED
def_bool y
config SLUB_TINY
bool "Configure SLUB for minimal memory footprint"
depends on SLUB && EXPERT
bool "Configure for minimal memory footprint"
depends on EXPERT
select SLAB_MERGE_DEFAULT
help
Configures the SLUB allocator in a way to achieve minimal memory
Configures the slab allocator in a way to achieve minimal memory
footprint, sacrificing scalability, debugging and other features.
This is intended only for the smallest system that had used the
SLOB allocator and is not recommended for systems with more than
......@@ -282,7 +247,6 @@ config SLUB_TINY
config SLAB_MERGE_DEFAULT
bool "Allow slab caches to be merged"
default y
depends on SLAB || SLUB
help
For reduced kernel memory fragmentation, slab caches can be
merged when they share the same size and other characteristics.
......@@ -296,7 +260,7 @@ config SLAB_MERGE_DEFAULT
config SLAB_FREELIST_RANDOM
bool "Randomize slab freelist"
depends on SLAB || (SLUB && !SLUB_TINY)
depends on !SLUB_TINY
help
Randomizes the freelist order used on creating new pages. This
security feature reduces the predictability of the kernel slab
......@@ -304,21 +268,19 @@ config SLAB_FREELIST_RANDOM
config SLAB_FREELIST_HARDENED
bool "Harden slab freelist metadata"
depends on SLAB || (SLUB && !SLUB_TINY)
depends on !SLUB_TINY
help
Many kernel heap attacks try to target slab cache metadata and
other infrastructure. This options makes minor performance
sacrifices to harden the kernel slab allocator against common
freelist exploit methods. Some slab implementations have more
sanity-checking than others. This option is most effective with
CONFIG_SLUB.
freelist exploit methods.
config SLUB_STATS
default n
bool "Enable SLUB performance statistics"
depends on SLUB && SYSFS && !SLUB_TINY
bool "Enable performance statistics"
depends on SYSFS && !SLUB_TINY
help
SLUB statistics are useful to debug SLUBs allocation behavior in
The statistics are useful to debug slab allocation behavior in
order find ways to optimize the allocator. This should never be
enabled for production use since keeping statistics slows down
the allocator by a few percentage points. The slabinfo command
......@@ -328,8 +290,8 @@ config SLUB_STATS
config SLUB_CPU_PARTIAL
default y
depends on SLUB && SMP && !SLUB_TINY
bool "SLUB per cpu partial cache"
depends on SMP && !SLUB_TINY
bool "Enable per cpu partial caches"
help
Per cpu partial caches accelerate objects allocation and freeing
that is local to a processor at the price of more indeterminism
......@@ -339,7 +301,7 @@ config SLUB_CPU_PARTIAL
config RANDOM_KMALLOC_CACHES
default n
depends on SLUB && !SLUB_TINY
depends on !SLUB_TINY
bool "Randomize slab caches for normal kmalloc"
help
A hardening feature that creates multiple copies of slab caches for
......@@ -354,7 +316,7 @@ config RANDOM_KMALLOC_CACHES
limited degree of memory and CPU overhead that relates to hardware and
system workload.
endmenu # SLAB allocator options
endmenu # Slab allocator options
config SHUFFLE_PAGE_ALLOCATOR
bool "Page allocator randomization"
......
......@@ -45,18 +45,10 @@ config DEBUG_PAGEALLOC_ENABLE_DEFAULT
Enable debug page memory allocations by default? This value
can be overridden by debug_pagealloc=off|on.
config DEBUG_SLAB
bool "Debug slab memory allocations"
depends on DEBUG_KERNEL && SLAB
help
Say Y here to have the kernel do limited verification on memory
allocation as well as poisoning memory on free to catch use of freed
memory. This can make kmalloc/kfree-intensive workloads much slower.
config SLUB_DEBUG
default y
bool "Enable SLUB debugging support" if EXPERT
depends on SLUB && SYSFS && !SLUB_TINY
depends on SYSFS && !SLUB_TINY
select STACKDEPOT if STACKTRACE_SUPPORT
help
SLUB has extensive debug support features. Disabling these can
......@@ -66,7 +58,7 @@ config SLUB_DEBUG
config SLUB_DEBUG_ON
bool "SLUB debugging on by default"
depends on SLUB && SLUB_DEBUG
depends on SLUB_DEBUG
select STACKDEPOT_ALWAYS_INIT if STACKTRACE_SUPPORT
default n
help
......@@ -231,8 +223,8 @@ config DEBUG_KMEMLEAK
allocations. See Documentation/dev-tools/kmemleak.rst for more
details.
Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances
of finding leaks due to the slab objects poisoning.
Enabling SLUB_DEBUG may increase the chances of finding leaks
due to the slab objects poisoning.
In order to access the kmemleak file, debugfs needs to be
mounted (usually at /sys/kernel/debug).
......
......@@ -4,7 +4,6 @@
#
KASAN_SANITIZE_slab_common.o := n
KASAN_SANITIZE_slab.o := n
KASAN_SANITIZE_slub.o := n
KCSAN_SANITIZE_kmemleak.o := n
......@@ -12,7 +11,6 @@ KCSAN_SANITIZE_kmemleak.o := n
# the same word but accesses to different bits of that word. Re-enable KCSAN
# for these when we have more consensus on what to do about them.
KCSAN_SANITIZE_slab_common.o := n
KCSAN_SANITIZE_slab.o := n
KCSAN_SANITIZE_slub.o := n
KCSAN_SANITIZE_page_alloc.o := n
# But enable explicit instrumentation for memory barriers.
......@@ -22,7 +20,6 @@ KCSAN_INSTRUMENT_BARRIERS := y
# flaky coverage that is not a function of syscall inputs. E.g. slab is out of
# free pages, or a task is migrated between nodes.
KCOV_INSTRUMENT_slab_common.o := n
KCOV_INSTRUMENT_slab.o := n
KCOV_INSTRUMENT_slub.o := n
KCOV_INSTRUMENT_page_alloc.o := n
KCOV_INSTRUMENT_debug-pagealloc.o := n
......@@ -66,6 +63,7 @@ obj-y += page-alloc.o
obj-y += init-mm.o
obj-y += memblock.o
obj-y += $(memory-hotplug-y)
obj-y += slub.o
ifdef CONFIG_MMU
obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o
......@@ -82,8 +80,6 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
obj-$(CONFIG_KSM) += ksm.o
obj-$(CONFIG_PAGE_POISONING) += page_poison.o
obj-$(CONFIG_SLAB) += slab.o
obj-$(CONFIG_SLUB) += slub.o
obj-$(CONFIG_KASAN) += kasan/
obj-$(CONFIG_KFENCE) += kfence/
obj-$(CONFIG_KMSAN) += kmsan/
......
......@@ -36,7 +36,7 @@
#include <linux/types.h>
#include <linux/wait.h>
#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON)
#ifdef CONFIG_SLUB_DEBUG_ON
#define DMAPOOL_DEBUG 1
#endif
......
......@@ -153,10 +153,6 @@ void __kasan_poison_object_data(struct kmem_cache *cache, void *object)
* 2. A cache might be SLAB_TYPESAFE_BY_RCU, which means objects can be
* accessed after being freed. We preassign tags for objects in these
* caches as well.
* 3. For SLAB allocator we can't preassign tags randomly since the freelist
* is stored as an array of indexes instead of a linked list. Assign tags
* based on objects indexes, so that objects that are next to each other
* get different tags.
*/
static inline u8 assign_tag(struct kmem_cache *cache,
const void *object, bool init)
......@@ -171,17 +167,12 @@ static inline u8 assign_tag(struct kmem_cache *cache,
if (!cache->ctor && !(cache->flags & SLAB_TYPESAFE_BY_RCU))
return init ? KASAN_TAG_KERNEL : kasan_random_tag();
/* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU: */
#ifdef CONFIG_SLAB
/* For SLAB assign tags based on the object index in the freelist. */
return (u8)obj_to_index(cache, virt_to_slab(object), (void *)object);
#else
/*
* For SLUB assign a random tag during slab creation, otherwise reuse
* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU,
* assign a random tag during slab creation, otherwise reuse
* the already assigned tag.
*/
return init ? kasan_random_tag() : get_tag(object);
#endif
}
void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache,
......
......@@ -373,8 +373,7 @@ void kasan_set_track(struct kasan_track *track, gfp_t flags);
void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags);
void kasan_save_free_info(struct kmem_cache *cache, void *object);
#if defined(CONFIG_KASAN_GENERIC) && \
(defined(CONFIG_SLAB) || defined(CONFIG_SLUB))
#ifdef CONFIG_KASAN_GENERIC
bool kasan_quarantine_put(struct kmem_cache *cache, void *object);
void kasan_quarantine_reduce(void);
void kasan_quarantine_remove_cache(struct kmem_cache *cache);
......
......@@ -144,10 +144,6 @@ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache)
{
void *object = qlink_to_object(qlink, cache);
struct kasan_free_meta *meta = kasan_get_free_meta(cache, object);
unsigned long flags;
if (IS_ENABLED(CONFIG_SLAB))
local_irq_save(flags);
/*
* If init_on_free is enabled and KASAN's free metadata is stored in
......@@ -166,9 +162,6 @@ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache)
*(u8 *)kasan_mem_to_shadow(object) = KASAN_SLAB_FREE;
___cache_free(cache, object, _THIS_IP_);
if (IS_ENABLED(CONFIG_SLAB))
local_irq_restore(flags);
}
static void qlist_free_all(struct qlist_head *q, struct kmem_cache *cache)
......
......@@ -23,6 +23,7 @@
#include <linux/stacktrace.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/vmalloc.h>
#include <linux/kasan.h>
#include <linux/module.h>
#include <linux/sched/task_stack.h>
......
......@@ -463,11 +463,7 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
/* Set required slab fields. */
slab = virt_to_slab((void *)meta->addr);
slab->slab_cache = cache;
#if defined(CONFIG_SLUB)
slab->objects = 1;
#elif defined(CONFIG_SLAB)
slab->s_mem = addr;
#endif
/* Memory initialization. */
set_canary(meta);
......
......@@ -64,6 +64,7 @@
#include <linux/psi.h>
#include <linux/seq_buf.h>
#include <linux/sched/isolation.h>
#include <linux/kmemleak.h>
#include "internal.h"
#include <net/sock.h>
#include <net/ip.h>
......@@ -5150,7 +5151,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
return ret;
}
#if defined(CONFIG_MEMCG_KMEM) && (defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG))
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_SLUB_DEBUG)
static int mem_cgroup_slab_show(struct seq_file *m, void *p)
{
/*
......@@ -5259,8 +5260,7 @@ static struct cftype mem_cgroup_legacy_files[] = {
.write = mem_cgroup_reset,
.read_u64 = mem_cgroup_read_u64,
},
#if defined(CONFIG_MEMCG_KMEM) && \
(defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG))
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_SLUB_DEBUG)
{
.name = "kmem.slabinfo",
.seq_show = mem_cgroup_slab_show,
......
......@@ -20,7 +20,7 @@
#include <linux/writeback.h>
#include "slab.h"
#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON)
#ifdef CONFIG_SLUB_DEBUG_ON
static void poison_error(mempool_t *pool, void *element, size_t size,
size_t byte)
{
......@@ -95,14 +95,14 @@ static void poison_element(mempool_t *pool, void *element)
kunmap_atomic(addr);
}
}
#else /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */
#else /* CONFIG_SLUB_DEBUG_ON */
static inline void check_element(mempool_t *pool, void *element)
{
}
static inline void poison_element(mempool_t *pool, void *element)
{
}
#endif /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */
#endif /* CONFIG_SLUB_DEBUG_ON */
static __always_inline void kasan_poison_element(mempool_t *pool, void *element)
{
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
......@@ -21,6 +21,7 @@
#include <linux/swiotlb.h>
#include <linux/proc_fs.h>
#include <linux/debugfs.h>
#include <linux/kmemleak.h>
#include <linux/kasan.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
......@@ -71,10 +72,8 @@ static int __init setup_slab_merge(char *str)
return 1;
}
#ifdef CONFIG_SLUB
__setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0);
__setup_param("slub_merge", slub_merge, setup_slab_merge, 0);
#endif
__setup("slab_nomerge", setup_slab_nomerge);
__setup("slab_merge", setup_slab_merge);
......@@ -197,10 +196,6 @@ struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
if (s->size - size >= sizeof(void *))
continue;
if (IS_ENABLED(CONFIG_SLAB) && align &&
(align > s->align || s->align % align))
continue;
return s;
}
return NULL;
......@@ -670,7 +665,7 @@ EXPORT_SYMBOL(random_kmalloc_seed);
* of two cache sizes there. The size of larger slabs can be determined using
* fls.
*/
static u8 size_index[24] __ro_after_init = {
u8 kmalloc_size_index[24] __ro_after_init = {
3, /* 8 */
4, /* 16 */
5, /* 24 */
......@@ -697,33 +692,6 @@ static u8 size_index[24] __ro_after_init = {
2 /* 192 */
};
static inline unsigned int size_index_elem(unsigned int bytes)
{
return (bytes - 1) / 8;
}
/*
* Find the kmem_cache structure that serves a given size of
* allocation
*/
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller)
{
unsigned int index;
if (size <= 192) {
if (!size)
return ZERO_SIZE_PTR;
index = size_index[size_index_elem(size)];
} else {
if (WARN_ON_ONCE(size > KMALLOC_MAX_CACHE_SIZE))
return NULL;
index = fls(size - 1);
}
return kmalloc_caches[kmalloc_type(flags, caller)][index];
}
size_t kmalloc_size_roundup(size_t size)
{
if (size && size <= KMALLOC_MAX_CACHE_SIZE) {
......@@ -848,9 +816,9 @@ void __init setup_kmalloc_cache_index_table(void)
for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
unsigned int elem = size_index_elem(i);
if (elem >= ARRAY_SIZE(size_index))
if (elem >= ARRAY_SIZE(kmalloc_size_index))
break;
size_index[elem] = KMALLOC_SHIFT_LOW;
kmalloc_size_index[elem] = KMALLOC_SHIFT_LOW;
}
if (KMALLOC_MIN_SIZE >= 64) {
......@@ -859,7 +827,7 @@ void __init setup_kmalloc_cache_index_table(void)
* is 64 byte.
*/
for (i = 64 + 8; i <= 96; i += 8)
size_index[size_index_elem(i)] = 7;
kmalloc_size_index[size_index_elem(i)] = 7;
}
......@@ -870,7 +838,7 @@ void __init setup_kmalloc_cache_index_table(void)
* instead.
*/
for (i = 128 + 8; i <= 192; i += 8)
size_index[size_index_elem(i)] = 8;
kmalloc_size_index[size_index_elem(i)] = 8;
}
}
......@@ -968,95 +936,6 @@ void __init create_kmalloc_caches(slab_flags_t flags)
slab_state = UP;
}
void free_large_kmalloc(struct folio *folio, void *object)
{
unsigned int order = folio_order(folio);
if (WARN_ON_ONCE(order == 0))
pr_warn_once("object pointer: 0x%p\n", object);
kmemleak_free(object);
kasan_kfree_large(object);
kmsan_kfree_large(object);
mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
-(PAGE_SIZE << order));
__free_pages(folio_page(folio, 0), order);
}
static void *__kmalloc_large_node(size_t size, gfp_t flags, int node);
static __always_inline
void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
{
struct kmem_cache *s;
void *ret;
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = __kmalloc_large_node(size, flags, node);
trace_kmalloc(caller, ret, size,
PAGE_SIZE << get_order(size), flags, node);
return ret;
}
s = kmalloc_slab(size, flags, caller);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
ret = __kmem_cache_alloc_node(s, flags, node, size, caller);
ret = kasan_kmalloc(s, ret, size, flags);
trace_kmalloc(caller, ret, size, s->size, flags, node);
return ret;
}
void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
return __do_kmalloc_node(size, flags, node, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc_node);
void *__kmalloc(size_t size, gfp_t flags)
{
return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc);
void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
int node, unsigned long caller)
{
return __do_kmalloc_node(size, flags, node, caller);
}
EXPORT_SYMBOL(__kmalloc_node_track_caller);
/**
* kfree - free previously allocated memory
* @object: pointer returned by kmalloc() or kmem_cache_alloc()
*
* If @object is NULL, no operation is performed.
*/
void kfree(const void *object)
{
struct folio *folio;
struct slab *slab;
struct kmem_cache *s;
trace_kfree(_RET_IP_, object);
if (unlikely(ZERO_OR_NULL_PTR(object)))
return;
folio = virt_to_folio(object);
if (unlikely(!folio_test_slab(folio))) {
free_large_kmalloc(folio, (void *)object);
return;
}
slab = folio_slab(folio);
s = slab->slab_cache;
__kmem_cache_free(s, (void *)object, _RET_IP_);
}
EXPORT_SYMBOL(kfree);
/**
* __ksize -- Report full size of underlying allocation
* @object: pointer to the object
......@@ -1093,30 +972,6 @@ size_t __ksize(const void *object)
return slab_ksize(folio_slab(folio)->slab_cache);
}
void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
void *ret = __kmem_cache_alloc_node(s, gfpflags, NUMA_NO_NODE,
size, _RET_IP_);
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, NUMA_NO_NODE);
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
EXPORT_SYMBOL(kmalloc_trace);
void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
int node, size_t size)
{
void *ret = __kmem_cache_alloc_node(s, gfpflags, node, size, _RET_IP_);
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, node);
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
EXPORT_SYMBOL(kmalloc_node_trace);
gfp_t kmalloc_fix_flags(gfp_t flags)
{
gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
......@@ -1129,57 +984,6 @@ gfp_t kmalloc_fix_flags(gfp_t flags)
return flags;
}
/*
* To avoid unnecessary overhead, we pass through large allocation requests
* directly to the page allocator. We use __GFP_COMP, because we will need to
* know the allocation order to free the pages properly in kfree.
*/
static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
{
struct page *page;
void *ptr = NULL;
unsigned int order = get_order(size);
if (unlikely(flags & GFP_SLAB_BUG_MASK))
flags = kmalloc_fix_flags(flags);
flags |= __GFP_COMP;
page = alloc_pages_node(node, flags, order);
if (page) {
ptr = page_address(page);
mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
PAGE_SIZE << order);
}
ptr = kasan_kmalloc_large(ptr, size, flags);
/* As ptr might get tagged, call kmemleak hook after KASAN. */
kmemleak_alloc(ptr, size, 1, flags);
kmsan_kmalloc_large(ptr, size, flags);
return ptr;
}
void *kmalloc_large(size_t size, gfp_t flags)
{
void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
flags, NUMA_NO_NODE);
return ret;
}
EXPORT_SYMBOL(kmalloc_large);
void *kmalloc_large_node(size_t size, gfp_t flags, int node)
{
void *ret = __kmalloc_large_node(size, flags, node);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
flags, node);
return ret;
}
EXPORT_SYMBOL(kmalloc_large_node);
#ifdef CONFIG_SLAB_FREELIST_RANDOM
/* Randomize a generic freelist */
static void freelist_randomize(unsigned int *list,
......@@ -1222,12 +1026,8 @@ void cache_random_seq_destroy(struct kmem_cache *cachep)
}
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
#ifdef CONFIG_SLAB
#define SLABINFO_RIGHTS (0600)
#else
#ifdef CONFIG_SLUB_DEBUG
#define SLABINFO_RIGHTS (0400)
#endif
static void print_slabinfo_header(struct seq_file *m)
{
......@@ -1235,18 +1035,10 @@ static void print_slabinfo_header(struct seq_file *m)
* Output format version, so at least we can change it
* without _too_ many complaints.
*/
#ifdef CONFIG_DEBUG_SLAB
seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
#else
seq_puts(m, "slabinfo - version: 2.1\n");
#endif
seq_puts(m, "# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>");
seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
#ifdef CONFIG_DEBUG_SLAB
seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> <error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
#endif
seq_putc(m, '\n');
}
......@@ -1370,7 +1162,7 @@ static int __init slab_proc_init(void)
}
module_init(slab_proc_init);
#endif /* CONFIG_SLAB || CONFIG_SLUB_DEBUG */
#endif /* CONFIG_SLUB_DEBUG */
static __always_inline __realloc_size(2) void *
__do_krealloc(const void *p, size_t new_size, gfp_t flags)
......@@ -1488,10 +1280,3 @@ EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
EXPORT_TRACEPOINT_SYMBOL(kfree);
EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
{
if (__should_failslab(s, gfpflags))
return -ENOMEM;
return 0;
}
ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment