Commit 61d7e367 authored by Vlastimil Babka's avatar Vlastimil Babka

Merge branch 'slab/for-6.8/slub-hook-cleanups' into slab/for-next

Merge the SLAB allocator removal and a number of subsequent SLUB
cleanups and optimizations.
parents 31bda717 782f8906
...@@ -9,10 +9,6 @@ ...@@ -9,10 +9,6 @@
Linus Linus
---------- ----------
N: Matt Mackal
E: mpm@selenic.com
D: SLOB slab allocator
N: Matti Aarnio N: Matti Aarnio
E: mea@nic.funet.fi E: mea@nic.funet.fi
D: Alpha systems hacking, IPv6 and other network related stuff D: Alpha systems hacking, IPv6 and other network related stuff
...@@ -1572,6 +1568,10 @@ S: Ampferstr. 50 / 4 ...@@ -1572,6 +1568,10 @@ S: Ampferstr. 50 / 4
S: 6020 Innsbruck S: 6020 Innsbruck
S: Austria S: Austria
N: Mark Hemment
E: markhe@nextd.demon.co.uk
D: SLAB allocator implementation
N: Richard Henderson N: Richard Henderson
E: rth@twiddle.net E: rth@twiddle.net
E: rth@cygnus.com E: rth@cygnus.com
...@@ -2437,6 +2437,10 @@ D: work on suspend-to-ram/disk, killing duplicates from ioctl32, ...@@ -2437,6 +2437,10 @@ D: work on suspend-to-ram/disk, killing duplicates from ioctl32,
D: Altera SoCFPGA and Nokia N900 support. D: Altera SoCFPGA and Nokia N900 support.
S: Czech Republic S: Czech Republic
N: Olivia Mackall
E: olivia@selenic.com
D: SLOB slab allocator
N: Paul Mackerras N: Paul Mackerras
E: paulus@samba.org E: paulus@samba.org
D: PPP driver D: PPP driver
......
...@@ -37,7 +37,7 @@ The Slab Cache ...@@ -37,7 +37,7 @@ The Slab Cache
.. kernel-doc:: include/linux/slab.h .. kernel-doc:: include/linux/slab.h
:internal: :internal:
.. kernel-doc:: mm/slab.c .. kernel-doc:: mm/slub.c
:export: :export:
.. kernel-doc:: mm/slab_common.c .. kernel-doc:: mm/slab_common.c
......
...@@ -154,7 +154,7 @@ config ARM64 ...@@ -154,7 +154,7 @@ config ARM64
select HAVE_MOVE_PUD select HAVE_MOVE_PUD
select HAVE_PCI select HAVE_PCI
select HAVE_ACPI_APEI if (ACPI && EFI) select HAVE_ACPI_APEI if (ACPI && EFI)
select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ALIGNED_STRUCT_PAGE
select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_BITREVERSE select HAVE_ARCH_BITREVERSE
select HAVE_ARCH_COMPILER_H select HAVE_ARCH_COMPILER_H
......
...@@ -146,7 +146,7 @@ config S390 ...@@ -146,7 +146,7 @@ config S390
select GENERIC_TIME_VSYSCALL select GENERIC_TIME_VSYSCALL
select GENERIC_VDSO_TIME_NS select GENERIC_VDSO_TIME_NS
select GENERIC_IOREMAP if PCI select GENERIC_IOREMAP if PCI
select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ALIGNED_STRUCT_PAGE
select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_JUMP_LABEL_RELATIVE
......
...@@ -169,7 +169,7 @@ config X86 ...@@ -169,7 +169,7 @@ config X86
select HAS_IOPORT select HAS_IOPORT
select HAVE_ACPI_APEI if ACPI select HAVE_ACPI_APEI if ACPI
select HAVE_ACPI_APEI_NMI if ACPI select HAVE_ACPI_APEI_NMI if ACPI
select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ALIGNED_STRUCT_PAGE
select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
select HAVE_ARCH_HUGE_VMALLOC if X86_64 select HAVE_ARCH_HUGE_VMALLOC if X86_64
......
...@@ -108,7 +108,6 @@ enum cpuhp_state { ...@@ -108,7 +108,6 @@ enum cpuhp_state {
CPUHP_X2APIC_PREPARE, CPUHP_X2APIC_PREPARE,
CPUHP_SMPCFD_PREPARE, CPUHP_SMPCFD_PREPARE,
CPUHP_RELAY_PREPARE, CPUHP_RELAY_PREPARE,
CPUHP_SLAB_PREPARE,
CPUHP_MD_RAID5_PREPARE, CPUHP_MD_RAID5_PREPARE,
CPUHP_RCUTREE_PREP, CPUHP_RCUTREE_PREP,
CPUHP_CPUIDLE_COUPLED_PREPARE, CPUHP_CPUIDLE_COUPLED_PREPARE,
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
/* /*
* Flags to pass to kmem_cache_create(). * Flags to pass to kmem_cache_create().
* The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set. * The ones marked DEBUG need CONFIG_SLUB_DEBUG enabled, otherwise are no-op
*/ */
/* DEBUG: Perform (expensive) checks on alloc/free */ /* DEBUG: Perform (expensive) checks on alloc/free */
#define SLAB_CONSISTENCY_CHECKS ((slab_flags_t __force)0x00000100U) #define SLAB_CONSISTENCY_CHECKS ((slab_flags_t __force)0x00000100U)
...@@ -302,25 +302,15 @@ static inline unsigned int arch_slab_minalign(void) ...@@ -302,25 +302,15 @@ static inline unsigned int arch_slab_minalign(void)
* Kmalloc array related definitions * Kmalloc array related definitions
*/ */
#ifdef CONFIG_SLAB
/* /*
* SLAB and SLUB directly allocates requests fitting in to an order-1 page * SLUB directly allocates requests fitting in to an order-1 page
* (PAGE_SIZE*2). Larger requests are passed to the page allocator. * (PAGE_SIZE*2). Larger requests are passed to the page allocator.
*/ */
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT) #define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT)
#ifndef KMALLOC_SHIFT_LOW #ifndef KMALLOC_SHIFT_LOW
#define KMALLOC_SHIFT_LOW 5
#endif
#endif
#ifdef CONFIG_SLUB
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT)
#ifndef KMALLOC_SHIFT_LOW
#define KMALLOC_SHIFT_LOW 3 #define KMALLOC_SHIFT_LOW 3
#endif #endif
#endif
/* Maximum allocatable size */ /* Maximum allocatable size */
#define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX) #define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX)
...@@ -788,12 +778,4 @@ size_t kmalloc_size_roundup(size_t size); ...@@ -788,12 +778,4 @@ size_t kmalloc_size_roundup(size_t size);
void __init kmem_cache_init_late(void); void __init kmem_cache_init_late(void);
#if defined(CONFIG_SMP) && defined(CONFIG_SLAB)
int slab_prepare_cpu(unsigned int cpu);
int slab_dead_cpu(unsigned int cpu);
#else
#define slab_prepare_cpu NULL
#define slab_dead_cpu NULL
#endif
#endif /* _LINUX_SLAB_H */ #endif /* _LINUX_SLAB_H */
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SLAB_DEF_H
#define _LINUX_SLAB_DEF_H
#include <linux/kfence.h>
#include <linux/reciprocal_div.h>
/*
* Definitions unique to the original Linux SLAB allocator.
*/
struct kmem_cache {
struct array_cache __percpu *cpu_cache;
/* 1) Cache tunables. Protected by slab_mutex */
unsigned int batchcount;
unsigned int limit;
unsigned int shared;
unsigned int size;
struct reciprocal_value reciprocal_buffer_size;
/* 2) touched by every alloc & free from the backend */
slab_flags_t flags; /* constant flags */
unsigned int num; /* # of objs per slab */
/* 3) cache_grow/shrink */
/* order of pgs per slab (2^n) */
unsigned int gfporder;
/* force GFP flags, e.g. GFP_DMA */
gfp_t allocflags;
size_t colour; /* cache colouring range */
unsigned int colour_off; /* colour offset */
unsigned int freelist_size;
/* constructor func */
void (*ctor)(void *obj);
/* 4) cache creation/removal */
const char *name;
struct list_head list;
int refcount;
int object_size;
int align;
/* 5) statistics */
#ifdef CONFIG_DEBUG_SLAB
unsigned long num_active;
unsigned long num_allocations;
unsigned long high_mark;
unsigned long grown;
unsigned long reaped;
unsigned long errors;
unsigned long max_freeable;
unsigned long node_allocs;
unsigned long node_frees;
unsigned long node_overflow;
atomic_t allochit;
atomic_t allocmiss;
atomic_t freehit;
atomic_t freemiss;
/*
* If debugging is enabled, then the allocator can add additional
* fields and/or padding to every object. 'size' contains the total
* object size including these internal fields, while 'obj_offset'
* and 'object_size' contain the offset to the user object and its
* size.
*/
int obj_offset;
#endif /* CONFIG_DEBUG_SLAB */
#ifdef CONFIG_KASAN_GENERIC
struct kasan_cache kasan_info;
#endif
#ifdef CONFIG_SLAB_FREELIST_RANDOM
unsigned int *random_seq;
#endif
#ifdef CONFIG_HARDENED_USERCOPY
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
#endif
struct kmem_cache_node *node[MAX_NUMNODES];
};
static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
void *x)
{
void *object = x - (x - slab->s_mem) % cache->size;
void *last_object = slab->s_mem + (cache->num - 1) * cache->size;
if (unlikely(object > last_object))
return last_object;
else
return object;
}
/*
* We want to avoid an expensive divide : (offset / cache->size)
* Using the fact that size is a constant for a particular cache,
* we can replace (offset / cache->size) by
* reciprocal_divide(offset, cache->reciprocal_buffer_size)
*/
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
const struct slab *slab, void *obj)
{
u32 offset = (obj - slab->s_mem);
return reciprocal_divide(offset, cache->reciprocal_buffer_size);
}
static inline int objs_per_slab(const struct kmem_cache *cache,
const struct slab *slab)
{
if (is_kfence_address(slab_address(slab)))
return 1;
return cache->num;
}
#endif /* _LINUX_SLAB_DEF_H */
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SLUB_DEF_H
#define _LINUX_SLUB_DEF_H
/*
* SLUB : A Slab allocator without object queues.
*
* (C) 2007 SGI, Christoph Lameter
*/
#include <linux/kfence.h>
#include <linux/kobject.h>
#include <linux/reciprocal_div.h>
#include <linux/local_lock.h>
enum stat_item {
ALLOC_FASTPATH, /* Allocation from cpu slab */
ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
FREE_FASTPATH, /* Free to cpu slab */
FREE_SLOWPATH, /* Freeing not to cpu slab */
FREE_FROZEN, /* Freeing to frozen slab */
FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
FREE_REMOVE_PARTIAL, /* Freeing removes last object */
ALLOC_FROM_PARTIAL, /* Cpu slab acquired from node partial list */
ALLOC_SLAB, /* Cpu slab acquired from page allocator */
ALLOC_REFILL, /* Refill cpu slab from slab freelist */
ALLOC_NODE_MISMATCH, /* Switching cpu slab */
FREE_SLAB, /* Slab freed to the page allocator */
CPUSLAB_FLUSH, /* Abandoning of the cpu slab */
DEACTIVATE_FULL, /* Cpu slab was full when deactivated */
DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */
DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
DEACTIVATE_BYPASS, /* Implicit deactivation */
ORDER_FALLBACK, /* Number of times fallback was necessary */
CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */
CMPXCHG_DOUBLE_FAIL, /* Number of times that cmpxchg double did not match */
CPU_PARTIAL_ALLOC, /* Used cpu partial on alloc */
CPU_PARTIAL_FREE, /* Refill cpu partial on free */
CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */
CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
NR_SLUB_STAT_ITEMS
};
#ifndef CONFIG_SLUB_TINY
/*
* When changing the layout, make sure freelist and tid are still compatible
* with this_cpu_cmpxchg_double() alignment requirements.
*/
struct kmem_cache_cpu {
union {
struct {
void **freelist; /* Pointer to next available object */
unsigned long tid; /* Globally unique transaction id */
};
freelist_aba_t freelist_tid;
};
struct slab *slab; /* The slab from which we are allocating */
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct slab *partial; /* Partially allocated frozen slabs */
#endif
local_lock_t lock; /* Protects the fields above */
#ifdef CONFIG_SLUB_STATS
unsigned stat[NR_SLUB_STAT_ITEMS];
#endif
};
#endif /* CONFIG_SLUB_TINY */
#ifdef CONFIG_SLUB_CPU_PARTIAL
#define slub_percpu_partial(c) ((c)->partial)
#define slub_set_percpu_partial(c, p) \
({ \
slub_percpu_partial(c) = (p)->next; \
})
#define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c))
#else
#define slub_percpu_partial(c) NULL
#define slub_set_percpu_partial(c, p)
#define slub_percpu_partial_read_once(c) NULL
#endif // CONFIG_SLUB_CPU_PARTIAL
/*
* Word size structure that can be atomically updated or read and that
* contains both the order and the number of objects that a slab of the
* given order would contain.
*/
struct kmem_cache_order_objects {
unsigned int x;
};
/*
* Slab cache management.
*/
struct kmem_cache {
#ifndef CONFIG_SLUB_TINY
struct kmem_cache_cpu __percpu *cpu_slab;
#endif
/* Used for retrieving partial slabs, etc. */
slab_flags_t flags;
unsigned long min_partial;
unsigned int size; /* The size of an object including metadata */
unsigned int object_size;/* The size of an object without metadata */
struct reciprocal_value reciprocal_size;
unsigned int offset; /* Free pointer offset */
#ifdef CONFIG_SLUB_CPU_PARTIAL
/* Number of per cpu partial objects to keep around */
unsigned int cpu_partial;
/* Number of per cpu partial slabs to keep around */
unsigned int cpu_partial_slabs;
#endif
struct kmem_cache_order_objects oo;
/* Allocation and freeing of slabs */
struct kmem_cache_order_objects min;
gfp_t allocflags; /* gfp flags to use on each alloc */
int refcount; /* Refcount for slab cache destroy */
void (*ctor)(void *);
unsigned int inuse; /* Offset to metadata */
unsigned int align; /* Alignment */
unsigned int red_left_pad; /* Left redzone padding size */
const char *name; /* Name (only for display!) */
struct list_head list; /* List of slab caches */
#ifdef CONFIG_SYSFS
struct kobject kobj; /* For sysfs */
#endif
#ifdef CONFIG_SLAB_FREELIST_HARDENED
unsigned long random;
#endif
#ifdef CONFIG_NUMA
/*
* Defragmentation by allocating from a remote node.
*/
unsigned int remote_node_defrag_ratio;
#endif
#ifdef CONFIG_SLAB_FREELIST_RANDOM
unsigned int *random_seq;
#endif
#ifdef CONFIG_KASAN_GENERIC
struct kasan_cache kasan_info;
#endif
#ifdef CONFIG_HARDENED_USERCOPY
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
#endif
struct kmem_cache_node *node[MAX_NUMNODES];
};
#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY)
#define SLAB_SUPPORTS_SYSFS
void sysfs_slab_unlink(struct kmem_cache *);
void sysfs_slab_release(struct kmem_cache *);
#else
static inline void sysfs_slab_unlink(struct kmem_cache *s)
{
}
static inline void sysfs_slab_release(struct kmem_cache *s)
{
}
#endif
void *fixup_red_left(struct kmem_cache *s, void *p);
static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
void *x) {
void *object = x - (x - slab_address(slab)) % cache->size;
void *last_object = slab_address(slab) +
(slab->objects - 1) * cache->size;
void *result = (unlikely(object > last_object)) ? last_object : object;
result = fixup_red_left(cache, result);
return result;
}
/* Determine object index from a given position */
static inline unsigned int __obj_to_index(const struct kmem_cache *cache,
void *addr, void *obj)
{
return reciprocal_divide(kasan_reset_tag(obj) - addr,
cache->reciprocal_size);
}
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
const struct slab *slab, void *obj)
{
if (is_kfence_address(obj))
return 0;
return __obj_to_index(cache, slab_address(slab), obj);
}
static inline int objs_per_slab(const struct kmem_cache *cache,
const struct slab *slab)
{
return slab->objects;
}
#endif /* _LINUX_SLUB_DEF_H */
...@@ -2125,11 +2125,6 @@ static struct cpuhp_step cpuhp_hp_states[] = { ...@@ -2125,11 +2125,6 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.startup.single = relay_prepare_cpu, .startup.single = relay_prepare_cpu,
.teardown.single = NULL, .teardown.single = NULL,
}, },
[CPUHP_SLAB_PREPARE] = {
.name = "slab:prepare",
.startup.single = slab_prepare_cpu,
.teardown.single = slab_dead_cpu,
},
[CPUHP_RCUTREE_PREP] = { [CPUHP_RCUTREE_PREP] = {
.name = "RCU/tree:prepare", .name = "RCU/tree:prepare",
.startup.single = rcutree_prepare_cpu, .startup.single = rcutree_prepare_cpu,
......
...@@ -1985,7 +1985,6 @@ config FAULT_INJECTION ...@@ -1985,7 +1985,6 @@ config FAULT_INJECTION
config FAILSLAB config FAILSLAB
bool "Fault-injection capability for kmalloc" bool "Fault-injection capability for kmalloc"
depends on FAULT_INJECTION depends on FAULT_INJECTION
depends on SLAB || SLUB
help help
Provide fault-injection capability for kmalloc. Provide fault-injection capability for kmalloc.
......
...@@ -37,7 +37,7 @@ menuconfig KASAN ...@@ -37,7 +37,7 @@ menuconfig KASAN
(HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS)) && \ (HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS)) && \
CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \ CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \
HAVE_ARCH_KASAN_HW_TAGS HAVE_ARCH_KASAN_HW_TAGS
depends on (SLUB && SYSFS && !SLUB_TINY) || (SLAB && !DEBUG_SLAB) depends on SYSFS && !SLUB_TINY
select STACKDEPOT_ALWAYS_INIT select STACKDEPOT_ALWAYS_INIT
help help
Enables KASAN (Kernel Address Sanitizer) - a dynamic memory safety Enables KASAN (Kernel Address Sanitizer) - a dynamic memory safety
...@@ -78,7 +78,7 @@ config KASAN_GENERIC ...@@ -78,7 +78,7 @@ config KASAN_GENERIC
bool "Generic KASAN" bool "Generic KASAN"
depends on HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC depends on HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC
depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS
select SLUB_DEBUG if SLUB select SLUB_DEBUG
select CONSTRUCTORS select CONSTRUCTORS
help help
Enables Generic KASAN. Enables Generic KASAN.
...@@ -89,13 +89,11 @@ config KASAN_GENERIC ...@@ -89,13 +89,11 @@ config KASAN_GENERIC
overhead of ~50% for dynamic allocations. overhead of ~50% for dynamic allocations.
The performance slowdown is ~x3. The performance slowdown is ~x3.
(Incompatible with CONFIG_DEBUG_SLAB: the kernel does not boot.)
config KASAN_SW_TAGS config KASAN_SW_TAGS
bool "Software Tag-Based KASAN" bool "Software Tag-Based KASAN"
depends on HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS depends on HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS
depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS
select SLUB_DEBUG if SLUB select SLUB_DEBUG
select CONSTRUCTORS select CONSTRUCTORS
help help
Enables Software Tag-Based KASAN. Enables Software Tag-Based KASAN.
...@@ -110,12 +108,9 @@ config KASAN_SW_TAGS ...@@ -110,12 +108,9 @@ config KASAN_SW_TAGS
May potentially introduce problems related to pointer casting and May potentially introduce problems related to pointer casting and
comparison, as it embeds a tag into the top byte of each pointer. comparison, as it embeds a tag into the top byte of each pointer.
(Incompatible with CONFIG_DEBUG_SLAB: the kernel does not boot.)
config KASAN_HW_TAGS config KASAN_HW_TAGS
bool "Hardware Tag-Based KASAN" bool "Hardware Tag-Based KASAN"
depends on HAVE_ARCH_KASAN_HW_TAGS depends on HAVE_ARCH_KASAN_HW_TAGS
depends on SLUB
help help
Enables Hardware Tag-Based KASAN. Enables Hardware Tag-Based KASAN.
......
...@@ -5,7 +5,7 @@ config HAVE_ARCH_KFENCE ...@@ -5,7 +5,7 @@ config HAVE_ARCH_KFENCE
menuconfig KFENCE menuconfig KFENCE
bool "KFENCE: low-overhead sampling-based memory safety error detector" bool "KFENCE: low-overhead sampling-based memory safety error detector"
depends on HAVE_ARCH_KFENCE && (SLAB || SLUB) depends on HAVE_ARCH_KFENCE
select STACKTRACE select STACKTRACE
select IRQ_WORK select IRQ_WORK
help help
......
...@@ -11,7 +11,7 @@ config HAVE_KMSAN_COMPILER ...@@ -11,7 +11,7 @@ config HAVE_KMSAN_COMPILER
config KMSAN config KMSAN
bool "KMSAN: detector of uninitialized values use" bool "KMSAN: detector of uninitialized values use"
depends on HAVE_ARCH_KMSAN && HAVE_KMSAN_COMPILER depends on HAVE_ARCH_KMSAN && HAVE_KMSAN_COMPILER
depends on SLUB && DEBUG_KERNEL && !KASAN && !KCSAN depends on DEBUG_KERNEL && !KASAN && !KCSAN
depends on !PREEMPT_RT depends on !PREEMPT_RT
select STACKDEPOT select STACKDEPOT
select STACKDEPOT_ALWAYS_INIT select STACKDEPOT_ALWAYS_INIT
......
...@@ -226,52 +226,17 @@ config ZSMALLOC_CHAIN_SIZE ...@@ -226,52 +226,17 @@ config ZSMALLOC_CHAIN_SIZE
For more information, see zsmalloc documentation. For more information, see zsmalloc documentation.
menu "SLAB allocator options" menu "Slab allocator options"
choice
prompt "Choose SLAB allocator"
default SLUB
help
This option allows to select a slab allocator.
config SLAB_DEPRECATED
bool "SLAB (DEPRECATED)"
depends on !PREEMPT_RT
help
Deprecated and scheduled for removal in a few cycles. Replaced by
SLUB.
If you cannot migrate to SLUB, please contact linux-mm@kvack.org
and the people listed in the SLAB ALLOCATOR section of MAINTAINERS
file, explaining why.
The regular slab allocator that is established and known to work
well in all environments. It organizes cache hot objects in
per cpu and per node queues.
config SLUB config SLUB
bool "SLUB (Unqueued Allocator)" def_bool y
help
SLUB is a slab allocator that minimizes cache line usage
instead of managing queues of cached objects (SLAB approach).
Per cpu caching is realized using slabs of objects instead
of queues of objects. SLUB can use memory efficiently
and has enhanced diagnostics. SLUB is the default choice for
a slab allocator.
endchoice
config SLAB
bool
default y
depends on SLAB_DEPRECATED
config SLUB_TINY config SLUB_TINY
bool "Configure SLUB for minimal memory footprint" bool "Configure for minimal memory footprint"
depends on SLUB && EXPERT depends on EXPERT
select SLAB_MERGE_DEFAULT select SLAB_MERGE_DEFAULT
help help
Configures the SLUB allocator in a way to achieve minimal memory Configures the slab allocator in a way to achieve minimal memory
footprint, sacrificing scalability, debugging and other features. footprint, sacrificing scalability, debugging and other features.
This is intended only for the smallest system that had used the This is intended only for the smallest system that had used the
SLOB allocator and is not recommended for systems with more than SLOB allocator and is not recommended for systems with more than
...@@ -282,7 +247,6 @@ config SLUB_TINY ...@@ -282,7 +247,6 @@ config SLUB_TINY
config SLAB_MERGE_DEFAULT config SLAB_MERGE_DEFAULT
bool "Allow slab caches to be merged" bool "Allow slab caches to be merged"
default y default y
depends on SLAB || SLUB
help help
For reduced kernel memory fragmentation, slab caches can be For reduced kernel memory fragmentation, slab caches can be
merged when they share the same size and other characteristics. merged when they share the same size and other characteristics.
...@@ -296,7 +260,7 @@ config SLAB_MERGE_DEFAULT ...@@ -296,7 +260,7 @@ config SLAB_MERGE_DEFAULT
config SLAB_FREELIST_RANDOM config SLAB_FREELIST_RANDOM
bool "Randomize slab freelist" bool "Randomize slab freelist"
depends on SLAB || (SLUB && !SLUB_TINY) depends on !SLUB_TINY
help help
Randomizes the freelist order used on creating new pages. This Randomizes the freelist order used on creating new pages. This
security feature reduces the predictability of the kernel slab security feature reduces the predictability of the kernel slab
...@@ -304,21 +268,19 @@ config SLAB_FREELIST_RANDOM ...@@ -304,21 +268,19 @@ config SLAB_FREELIST_RANDOM
config SLAB_FREELIST_HARDENED config SLAB_FREELIST_HARDENED
bool "Harden slab freelist metadata" bool "Harden slab freelist metadata"
depends on SLAB || (SLUB && !SLUB_TINY) depends on !SLUB_TINY
help help
Many kernel heap attacks try to target slab cache metadata and Many kernel heap attacks try to target slab cache metadata and
other infrastructure. This options makes minor performance other infrastructure. This options makes minor performance
sacrifices to harden the kernel slab allocator against common sacrifices to harden the kernel slab allocator against common
freelist exploit methods. Some slab implementations have more freelist exploit methods.
sanity-checking than others. This option is most effective with
CONFIG_SLUB.
config SLUB_STATS config SLUB_STATS
default n default n
bool "Enable SLUB performance statistics" bool "Enable performance statistics"
depends on SLUB && SYSFS && !SLUB_TINY depends on SYSFS && !SLUB_TINY
help help
SLUB statistics are useful to debug SLUBs allocation behavior in The statistics are useful to debug slab allocation behavior in
order find ways to optimize the allocator. This should never be order find ways to optimize the allocator. This should never be
enabled for production use since keeping statistics slows down enabled for production use since keeping statistics slows down
the allocator by a few percentage points. The slabinfo command the allocator by a few percentage points. The slabinfo command
...@@ -328,8 +290,8 @@ config SLUB_STATS ...@@ -328,8 +290,8 @@ config SLUB_STATS
config SLUB_CPU_PARTIAL config SLUB_CPU_PARTIAL
default y default y
depends on SLUB && SMP && !SLUB_TINY depends on SMP && !SLUB_TINY
bool "SLUB per cpu partial cache" bool "Enable per cpu partial caches"
help help
Per cpu partial caches accelerate objects allocation and freeing Per cpu partial caches accelerate objects allocation and freeing
that is local to a processor at the price of more indeterminism that is local to a processor at the price of more indeterminism
...@@ -339,7 +301,7 @@ config SLUB_CPU_PARTIAL ...@@ -339,7 +301,7 @@ config SLUB_CPU_PARTIAL
config RANDOM_KMALLOC_CACHES config RANDOM_KMALLOC_CACHES
default n default n
depends on SLUB && !SLUB_TINY depends on !SLUB_TINY
bool "Randomize slab caches for normal kmalloc" bool "Randomize slab caches for normal kmalloc"
help help
A hardening feature that creates multiple copies of slab caches for A hardening feature that creates multiple copies of slab caches for
...@@ -354,7 +316,7 @@ config RANDOM_KMALLOC_CACHES ...@@ -354,7 +316,7 @@ config RANDOM_KMALLOC_CACHES
limited degree of memory and CPU overhead that relates to hardware and limited degree of memory and CPU overhead that relates to hardware and
system workload. system workload.
endmenu # SLAB allocator options endmenu # Slab allocator options
config SHUFFLE_PAGE_ALLOCATOR config SHUFFLE_PAGE_ALLOCATOR
bool "Page allocator randomization" bool "Page allocator randomization"
......
...@@ -45,18 +45,10 @@ config DEBUG_PAGEALLOC_ENABLE_DEFAULT ...@@ -45,18 +45,10 @@ config DEBUG_PAGEALLOC_ENABLE_DEFAULT
Enable debug page memory allocations by default? This value Enable debug page memory allocations by default? This value
can be overridden by debug_pagealloc=off|on. can be overridden by debug_pagealloc=off|on.
config DEBUG_SLAB
bool "Debug slab memory allocations"
depends on DEBUG_KERNEL && SLAB
help
Say Y here to have the kernel do limited verification on memory
allocation as well as poisoning memory on free to catch use of freed
memory. This can make kmalloc/kfree-intensive workloads much slower.
config SLUB_DEBUG config SLUB_DEBUG
default y default y
bool "Enable SLUB debugging support" if EXPERT bool "Enable SLUB debugging support" if EXPERT
depends on SLUB && SYSFS && !SLUB_TINY depends on SYSFS && !SLUB_TINY
select STACKDEPOT if STACKTRACE_SUPPORT select STACKDEPOT if STACKTRACE_SUPPORT
help help
SLUB has extensive debug support features. Disabling these can SLUB has extensive debug support features. Disabling these can
...@@ -66,7 +58,7 @@ config SLUB_DEBUG ...@@ -66,7 +58,7 @@ config SLUB_DEBUG
config SLUB_DEBUG_ON config SLUB_DEBUG_ON
bool "SLUB debugging on by default" bool "SLUB debugging on by default"
depends on SLUB && SLUB_DEBUG depends on SLUB_DEBUG
select STACKDEPOT_ALWAYS_INIT if STACKTRACE_SUPPORT select STACKDEPOT_ALWAYS_INIT if STACKTRACE_SUPPORT
default n default n
help help
...@@ -231,8 +223,8 @@ config DEBUG_KMEMLEAK ...@@ -231,8 +223,8 @@ config DEBUG_KMEMLEAK
allocations. See Documentation/dev-tools/kmemleak.rst for more allocations. See Documentation/dev-tools/kmemleak.rst for more
details. details.
Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances Enabling SLUB_DEBUG may increase the chances of finding leaks
of finding leaks due to the slab objects poisoning. due to the slab objects poisoning.
In order to access the kmemleak file, debugfs needs to be In order to access the kmemleak file, debugfs needs to be
mounted (usually at /sys/kernel/debug). mounted (usually at /sys/kernel/debug).
......
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
# #
KASAN_SANITIZE_slab_common.o := n KASAN_SANITIZE_slab_common.o := n
KASAN_SANITIZE_slab.o := n
KASAN_SANITIZE_slub.o := n KASAN_SANITIZE_slub.o := n
KCSAN_SANITIZE_kmemleak.o := n KCSAN_SANITIZE_kmemleak.o := n
...@@ -12,7 +11,6 @@ KCSAN_SANITIZE_kmemleak.o := n ...@@ -12,7 +11,6 @@ KCSAN_SANITIZE_kmemleak.o := n
# the same word but accesses to different bits of that word. Re-enable KCSAN # the same word but accesses to different bits of that word. Re-enable KCSAN
# for these when we have more consensus on what to do about them. # for these when we have more consensus on what to do about them.
KCSAN_SANITIZE_slab_common.o := n KCSAN_SANITIZE_slab_common.o := n
KCSAN_SANITIZE_slab.o := n
KCSAN_SANITIZE_slub.o := n KCSAN_SANITIZE_slub.o := n
KCSAN_SANITIZE_page_alloc.o := n KCSAN_SANITIZE_page_alloc.o := n
# But enable explicit instrumentation for memory barriers. # But enable explicit instrumentation for memory barriers.
...@@ -22,7 +20,6 @@ KCSAN_INSTRUMENT_BARRIERS := y ...@@ -22,7 +20,6 @@ KCSAN_INSTRUMENT_BARRIERS := y
# flaky coverage that is not a function of syscall inputs. E.g. slab is out of # flaky coverage that is not a function of syscall inputs. E.g. slab is out of
# free pages, or a task is migrated between nodes. # free pages, or a task is migrated between nodes.
KCOV_INSTRUMENT_slab_common.o := n KCOV_INSTRUMENT_slab_common.o := n
KCOV_INSTRUMENT_slab.o := n
KCOV_INSTRUMENT_slub.o := n KCOV_INSTRUMENT_slub.o := n
KCOV_INSTRUMENT_page_alloc.o := n KCOV_INSTRUMENT_page_alloc.o := n
KCOV_INSTRUMENT_debug-pagealloc.o := n KCOV_INSTRUMENT_debug-pagealloc.o := n
...@@ -66,6 +63,7 @@ obj-y += page-alloc.o ...@@ -66,6 +63,7 @@ obj-y += page-alloc.o
obj-y += init-mm.o obj-y += init-mm.o
obj-y += memblock.o obj-y += memblock.o
obj-y += $(memory-hotplug-y) obj-y += $(memory-hotplug-y)
obj-y += slub.o
ifdef CONFIG_MMU ifdef CONFIG_MMU
obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o
...@@ -82,8 +80,6 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o ...@@ -82,8 +80,6 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
obj-$(CONFIG_KSM) += ksm.o obj-$(CONFIG_KSM) += ksm.o
obj-$(CONFIG_PAGE_POISONING) += page_poison.o obj-$(CONFIG_PAGE_POISONING) += page_poison.o
obj-$(CONFIG_SLAB) += slab.o
obj-$(CONFIG_SLUB) += slub.o
obj-$(CONFIG_KASAN) += kasan/ obj-$(CONFIG_KASAN) += kasan/
obj-$(CONFIG_KFENCE) += kfence/ obj-$(CONFIG_KFENCE) += kfence/
obj-$(CONFIG_KMSAN) += kmsan/ obj-$(CONFIG_KMSAN) += kmsan/
......
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/wait.h> #include <linux/wait.h>
#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON) #ifdef CONFIG_SLUB_DEBUG_ON
#define DMAPOOL_DEBUG 1 #define DMAPOOL_DEBUG 1
#endif #endif
......
...@@ -153,10 +153,6 @@ void __kasan_poison_object_data(struct kmem_cache *cache, void *object) ...@@ -153,10 +153,6 @@ void __kasan_poison_object_data(struct kmem_cache *cache, void *object)
* 2. A cache might be SLAB_TYPESAFE_BY_RCU, which means objects can be * 2. A cache might be SLAB_TYPESAFE_BY_RCU, which means objects can be
* accessed after being freed. We preassign tags for objects in these * accessed after being freed. We preassign tags for objects in these
* caches as well. * caches as well.
* 3. For SLAB allocator we can't preassign tags randomly since the freelist
* is stored as an array of indexes instead of a linked list. Assign tags
* based on objects indexes, so that objects that are next to each other
* get different tags.
*/ */
static inline u8 assign_tag(struct kmem_cache *cache, static inline u8 assign_tag(struct kmem_cache *cache,
const void *object, bool init) const void *object, bool init)
...@@ -171,17 +167,12 @@ static inline u8 assign_tag(struct kmem_cache *cache, ...@@ -171,17 +167,12 @@ static inline u8 assign_tag(struct kmem_cache *cache,
if (!cache->ctor && !(cache->flags & SLAB_TYPESAFE_BY_RCU)) if (!cache->ctor && !(cache->flags & SLAB_TYPESAFE_BY_RCU))
return init ? KASAN_TAG_KERNEL : kasan_random_tag(); return init ? KASAN_TAG_KERNEL : kasan_random_tag();
/* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU: */
#ifdef CONFIG_SLAB
/* For SLAB assign tags based on the object index in the freelist. */
return (u8)obj_to_index(cache, virt_to_slab(object), (void *)object);
#else
/* /*
* For SLUB assign a random tag during slab creation, otherwise reuse * For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU,
* assign a random tag during slab creation, otherwise reuse
* the already assigned tag. * the already assigned tag.
*/ */
return init ? kasan_random_tag() : get_tag(object); return init ? kasan_random_tag() : get_tag(object);
#endif
} }
void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache, void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache,
......
...@@ -373,8 +373,7 @@ void kasan_set_track(struct kasan_track *track, gfp_t flags); ...@@ -373,8 +373,7 @@ void kasan_set_track(struct kasan_track *track, gfp_t flags);
void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags); void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags);
void kasan_save_free_info(struct kmem_cache *cache, void *object); void kasan_save_free_info(struct kmem_cache *cache, void *object);
#if defined(CONFIG_KASAN_GENERIC) && \ #ifdef CONFIG_KASAN_GENERIC
(defined(CONFIG_SLAB) || defined(CONFIG_SLUB))
bool kasan_quarantine_put(struct kmem_cache *cache, void *object); bool kasan_quarantine_put(struct kmem_cache *cache, void *object);
void kasan_quarantine_reduce(void); void kasan_quarantine_reduce(void);
void kasan_quarantine_remove_cache(struct kmem_cache *cache); void kasan_quarantine_remove_cache(struct kmem_cache *cache);
......
...@@ -144,10 +144,6 @@ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache) ...@@ -144,10 +144,6 @@ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache)
{ {
void *object = qlink_to_object(qlink, cache); void *object = qlink_to_object(qlink, cache);
struct kasan_free_meta *meta = kasan_get_free_meta(cache, object); struct kasan_free_meta *meta = kasan_get_free_meta(cache, object);
unsigned long flags;
if (IS_ENABLED(CONFIG_SLAB))
local_irq_save(flags);
/* /*
* If init_on_free is enabled and KASAN's free metadata is stored in * If init_on_free is enabled and KASAN's free metadata is stored in
...@@ -166,9 +162,6 @@ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache) ...@@ -166,9 +162,6 @@ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache)
*(u8 *)kasan_mem_to_shadow(object) = KASAN_SLAB_FREE; *(u8 *)kasan_mem_to_shadow(object) = KASAN_SLAB_FREE;
___cache_free(cache, object, _THIS_IP_); ___cache_free(cache, object, _THIS_IP_);
if (IS_ENABLED(CONFIG_SLAB))
local_irq_restore(flags);
} }
static void qlist_free_all(struct qlist_head *q, struct kmem_cache *cache) static void qlist_free_all(struct qlist_head *q, struct kmem_cache *cache)
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <linux/stacktrace.h> #include <linux/stacktrace.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/vmalloc.h>
#include <linux/kasan.h> #include <linux/kasan.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/sched/task_stack.h> #include <linux/sched/task_stack.h>
......
...@@ -463,11 +463,7 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g ...@@ -463,11 +463,7 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
/* Set required slab fields. */ /* Set required slab fields. */
slab = virt_to_slab((void *)meta->addr); slab = virt_to_slab((void *)meta->addr);
slab->slab_cache = cache; slab->slab_cache = cache;
#if defined(CONFIG_SLUB)
slab->objects = 1; slab->objects = 1;
#elif defined(CONFIG_SLAB)
slab->s_mem = addr;
#endif
/* Memory initialization. */ /* Memory initialization. */
set_canary(meta); set_canary(meta);
......
...@@ -64,6 +64,7 @@ ...@@ -64,6 +64,7 @@
#include <linux/psi.h> #include <linux/psi.h>
#include <linux/seq_buf.h> #include <linux/seq_buf.h>
#include <linux/sched/isolation.h> #include <linux/sched/isolation.h>
#include <linux/kmemleak.h>
#include "internal.h" #include "internal.h"
#include <net/sock.h> #include <net/sock.h>
#include <net/ip.h> #include <net/ip.h>
...@@ -5149,7 +5150,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, ...@@ -5149,7 +5150,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
return ret; return ret;
} }
#if defined(CONFIG_MEMCG_KMEM) && (defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)) #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_SLUB_DEBUG)
static int mem_cgroup_slab_show(struct seq_file *m, void *p) static int mem_cgroup_slab_show(struct seq_file *m, void *p)
{ {
/* /*
...@@ -5258,8 +5259,7 @@ static struct cftype mem_cgroup_legacy_files[] = { ...@@ -5258,8 +5259,7 @@ static struct cftype mem_cgroup_legacy_files[] = {
.write = mem_cgroup_reset, .write = mem_cgroup_reset,
.read_u64 = mem_cgroup_read_u64, .read_u64 = mem_cgroup_read_u64,
}, },
#if defined(CONFIG_MEMCG_KMEM) && \ #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_SLUB_DEBUG)
(defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG))
{ {
.name = "kmem.slabinfo", .name = "kmem.slabinfo",
.seq_show = mem_cgroup_slab_show, .seq_show = mem_cgroup_slab_show,
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
#include <linux/writeback.h> #include <linux/writeback.h>
#include "slab.h" #include "slab.h"
#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON) #ifdef CONFIG_SLUB_DEBUG_ON
static void poison_error(mempool_t *pool, void *element, size_t size, static void poison_error(mempool_t *pool, void *element, size_t size,
size_t byte) size_t byte)
{ {
...@@ -95,14 +95,14 @@ static void poison_element(mempool_t *pool, void *element) ...@@ -95,14 +95,14 @@ static void poison_element(mempool_t *pool, void *element)
kunmap_atomic(addr); kunmap_atomic(addr);
} }
} }
#else /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */ #else /* CONFIG_SLUB_DEBUG_ON */
static inline void check_element(mempool_t *pool, void *element) static inline void check_element(mempool_t *pool, void *element)
{ {
} }
static inline void poison_element(mempool_t *pool, void *element) static inline void poison_element(mempool_t *pool, void *element)
{ {
} }
#endif /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */ #endif /* CONFIG_SLUB_DEBUG_ON */
static __always_inline void kasan_poison_element(mempool_t *pool, void *element) static __always_inline void kasan_poison_element(mempool_t *pool, void *element)
{ {
......
This source diff could not be displayed because it is too large. You can view the blob instead.
/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
#ifndef MM_SLAB_H #ifndef MM_SLAB_H
#define MM_SLAB_H #define MM_SLAB_H
#include <linux/reciprocal_div.h>
#include <linux/list_lru.h>
#include <linux/local_lock.h>
#include <linux/random.h>
#include <linux/kobject.h>
#include <linux/sched/mm.h>
#include <linux/memcontrol.h>
#include <linux/kfence.h>
#include <linux/kasan.h>
/* /*
* Internal slab definitions * Internal slab definitions
*/ */
void __init kmem_cache_init(void);
#ifdef CONFIG_64BIT #ifdef CONFIG_64BIT
# ifdef system_has_cmpxchg128 # ifdef system_has_cmpxchg128
...@@ -42,21 +52,6 @@ typedef union { ...@@ -42,21 +52,6 @@ typedef union {
struct slab { struct slab {
unsigned long __page_flags; unsigned long __page_flags;
#if defined(CONFIG_SLAB)
struct kmem_cache *slab_cache;
union {
struct {
struct list_head slab_list;
void *freelist; /* array of free object indexes */
void *s_mem; /* first object */
};
struct rcu_head rcu_head;
};
unsigned int active;
#elif defined(CONFIG_SLUB)
struct kmem_cache *slab_cache; struct kmem_cache *slab_cache;
union { union {
struct { struct {
...@@ -91,10 +86,6 @@ struct slab { ...@@ -91,10 +86,6 @@ struct slab {
}; };
unsigned int __unused; unsigned int __unused;
#else
#error "Unexpected slab allocator configured"
#endif
atomic_t __page_refcount; atomic_t __page_refcount;
#ifdef CONFIG_MEMCG #ifdef CONFIG_MEMCG
unsigned long memcg_data; unsigned long memcg_data;
...@@ -111,7 +102,7 @@ SLAB_MATCH(memcg_data, memcg_data); ...@@ -111,7 +102,7 @@ SLAB_MATCH(memcg_data, memcg_data);
#endif #endif
#undef SLAB_MATCH #undef SLAB_MATCH
static_assert(sizeof(struct slab) <= sizeof(struct page)); static_assert(sizeof(struct slab) <= sizeof(struct page));
#if defined(system_has_freelist_aba) && defined(CONFIG_SLUB) #if defined(system_has_freelist_aba)
static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t))); static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t)));
#endif #endif
...@@ -228,21 +219,138 @@ static inline size_t slab_size(const struct slab *slab) ...@@ -228,21 +219,138 @@ static inline size_t slab_size(const struct slab *slab)
return PAGE_SIZE << slab_order(slab); return PAGE_SIZE << slab_order(slab);
} }
#ifdef CONFIG_SLAB #ifdef CONFIG_SLUB_CPU_PARTIAL
#include <linux/slab_def.h> #define slub_percpu_partial(c) ((c)->partial)
#define slub_set_percpu_partial(c, p) \
({ \
slub_percpu_partial(c) = (p)->next; \
})
#define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c))
#else
#define slub_percpu_partial(c) NULL
#define slub_set_percpu_partial(c, p)
#define slub_percpu_partial_read_once(c) NULL
#endif // CONFIG_SLUB_CPU_PARTIAL
/*
* Word size structure that can be atomically updated or read and that
* contains both the order and the number of objects that a slab of the
* given order would contain.
*/
struct kmem_cache_order_objects {
unsigned int x;
};
/*
* Slab cache management.
*/
struct kmem_cache {
#ifndef CONFIG_SLUB_TINY
struct kmem_cache_cpu __percpu *cpu_slab;
#endif
/* Used for retrieving partial slabs, etc. */
slab_flags_t flags;
unsigned long min_partial;
unsigned int size; /* Object size including metadata */
unsigned int object_size; /* Object size without metadata */
struct reciprocal_value reciprocal_size;
unsigned int offset; /* Free pointer offset */
#ifdef CONFIG_SLUB_CPU_PARTIAL
/* Number of per cpu partial objects to keep around */
unsigned int cpu_partial;
/* Number of per cpu partial slabs to keep around */
unsigned int cpu_partial_slabs;
#endif
struct kmem_cache_order_objects oo;
/* Allocation and freeing of slabs */
struct kmem_cache_order_objects min;
gfp_t allocflags; /* gfp flags to use on each alloc */
int refcount; /* Refcount for slab cache destroy */
void (*ctor)(void *object); /* Object constructor */
unsigned int inuse; /* Offset to metadata */
unsigned int align; /* Alignment */
unsigned int red_left_pad; /* Left redzone padding size */
const char *name; /* Name (only for display!) */
struct list_head list; /* List of slab caches */
#ifdef CONFIG_SYSFS
struct kobject kobj; /* For sysfs */
#endif
#ifdef CONFIG_SLAB_FREELIST_HARDENED
unsigned long random;
#endif #endif
#ifdef CONFIG_SLUB #ifdef CONFIG_NUMA
#include <linux/slub_def.h> /*
* Defragmentation by allocating from a remote node.
*/
unsigned int remote_node_defrag_ratio;
#endif #endif
#include <linux/memcontrol.h> #ifdef CONFIG_SLAB_FREELIST_RANDOM
#include <linux/fault-inject.h> unsigned int *random_seq;
#include <linux/kasan.h> #endif
#include <linux/kmemleak.h>
#include <linux/random.h> #ifdef CONFIG_KASAN_GENERIC
#include <linux/sched/mm.h> struct kasan_cache kasan_info;
#include <linux/list_lru.h> #endif
#ifdef CONFIG_HARDENED_USERCOPY
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
#endif
struct kmem_cache_node *node[MAX_NUMNODES];
};
#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY)
#define SLAB_SUPPORTS_SYSFS
void sysfs_slab_unlink(struct kmem_cache *s);
void sysfs_slab_release(struct kmem_cache *s);
#else
static inline void sysfs_slab_unlink(struct kmem_cache *s) { }
static inline void sysfs_slab_release(struct kmem_cache *s) { }
#endif
void *fixup_red_left(struct kmem_cache *s, void *p);
static inline void *nearest_obj(struct kmem_cache *cache,
const struct slab *slab, void *x)
{
void *object = x - (x - slab_address(slab)) % cache->size;
void *last_object = slab_address(slab) +
(slab->objects - 1) * cache->size;
void *result = (unlikely(object > last_object)) ? last_object : object;
result = fixup_red_left(cache, result);
return result;
}
/* Determine object index from a given position */
static inline unsigned int __obj_to_index(const struct kmem_cache *cache,
void *addr, void *obj)
{
return reciprocal_divide(kasan_reset_tag(obj) - addr,
cache->reciprocal_size);
}
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
const struct slab *slab, void *obj)
{
if (is_kfence_address(obj))
return 0;
return __obj_to_index(cache, slab_address(slab), obj);
}
static inline int objs_per_slab(const struct kmem_cache *cache,
const struct slab *slab)
{
return slab->objects;
}
/* /*
* State of the slab allocator. * State of the slab allocator.
...@@ -281,19 +389,39 @@ extern const struct kmalloc_info_struct { ...@@ -281,19 +389,39 @@ extern const struct kmalloc_info_struct {
void setup_kmalloc_cache_index_table(void); void setup_kmalloc_cache_index_table(void);
void create_kmalloc_caches(slab_flags_t); void create_kmalloc_caches(slab_flags_t);
/* Find the kmalloc slab corresponding for a certain size */ extern u8 kmalloc_size_index[24];
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller);
static inline unsigned int size_index_elem(unsigned int bytes)
{
return (bytes - 1) / 8;
}
/*
* Find the kmem_cache structure that serves a given size of
* allocation
*
* This assumes size is larger than zero and not larger than
* KMALLOC_MAX_CACHE_SIZE and the caller must check that.
*/
static inline struct kmem_cache *
kmalloc_slab(size_t size, gfp_t flags, unsigned long caller)
{
unsigned int index;
if (size <= 192)
index = kmalloc_size_index[size_index_elem(size)];
else
index = fls(size - 1);
void *__kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, return kmalloc_caches[kmalloc_type(flags, caller)][index];
int node, size_t orig_size, }
unsigned long caller);
void __kmem_cache_free(struct kmem_cache *s, void *x, unsigned long caller);
gfp_t kmalloc_fix_flags(gfp_t flags); gfp_t kmalloc_fix_flags(gfp_t flags);
/* Functions provided by the slab allocators */ /* Functions provided by the slab allocators */
int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags); int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags);
void __init kmem_cache_init(void);
void __init new_kmalloc_cache(int idx, enum kmalloc_cache_type type, void __init new_kmalloc_cache(int idx, enum kmalloc_cache_type type,
slab_flags_t flags); slab_flags_t flags);
extern void create_boot_cache(struct kmem_cache *, const char *name, extern void create_boot_cache(struct kmem_cache *, const char *name,
...@@ -320,26 +448,16 @@ static inline bool is_kmalloc_cache(struct kmem_cache *s) ...@@ -320,26 +448,16 @@ static inline bool is_kmalloc_cache(struct kmem_cache *s)
SLAB_CACHE_DMA32 | SLAB_PANIC | \ SLAB_CACHE_DMA32 | SLAB_PANIC | \
SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS ) SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS )
#if defined(CONFIG_DEBUG_SLAB) #ifdef CONFIG_SLUB_DEBUG
#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
#elif defined(CONFIG_SLUB_DEBUG)
#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
SLAB_TRACE | SLAB_CONSISTENCY_CHECKS) SLAB_TRACE | SLAB_CONSISTENCY_CHECKS)
#else #else
#define SLAB_DEBUG_FLAGS (0) #define SLAB_DEBUG_FLAGS (0)
#endif #endif
#if defined(CONFIG_SLAB)
#define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \
SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | \
SLAB_ACCOUNT | SLAB_NO_MERGE)
#elif defined(CONFIG_SLUB)
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
SLAB_TEMPORARY | SLAB_ACCOUNT | \ SLAB_TEMPORARY | SLAB_ACCOUNT | \
SLAB_NO_USER_FLAGS | SLAB_KMALLOC | SLAB_NO_MERGE) SLAB_NO_USER_FLAGS | SLAB_KMALLOC | SLAB_NO_MERGE)
#else
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE)
#endif
/* Common flags available with current configuration */ /* Common flags available with current configuration */
#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS) #define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
...@@ -387,12 +505,6 @@ void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s); ...@@ -387,12 +505,6 @@ void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s);
ssize_t slabinfo_write(struct file *file, const char __user *buffer, ssize_t slabinfo_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos); size_t count, loff_t *ppos);
static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
{
return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B;
}
#ifdef CONFIG_SLUB_DEBUG #ifdef CONFIG_SLUB_DEBUG
#ifdef CONFIG_SLUB_DEBUG_ON #ifdef CONFIG_SLUB_DEBUG_ON
DECLARE_STATIC_KEY_TRUE(slub_debug_enabled); DECLARE_STATIC_KEY_TRUE(slub_debug_enabled);
...@@ -452,238 +564,32 @@ int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s, ...@@ -452,238 +564,32 @@ int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
gfp_t gfp, bool new_slab); gfp_t gfp, bool new_slab);
void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
enum node_stat_item idx, int nr); enum node_stat_item idx, int nr);
static inline void memcg_free_slab_cgroups(struct slab *slab)
{
kfree(slab_objcgs(slab));
slab->memcg_data = 0;
}
static inline size_t obj_full_size(struct kmem_cache *s)
{
/*
* For each accounted object there is an extra space which is used
* to store obj_cgroup membership. Charge it too.
*/
return s->size + sizeof(struct obj_cgroup *);
}
/*
* Returns false if the allocation should fail.
*/
static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
struct list_lru *lru,
struct obj_cgroup **objcgp,
size_t objects, gfp_t flags)
{
struct obj_cgroup *objcg;
if (!memcg_kmem_online())
return true;
if (!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT))
return true;
/*
* The obtained objcg pointer is safe to use within the current scope,
* defined by current task or set_active_memcg() pair.
* obj_cgroup_get() is used to get a permanent reference.
*/
objcg = current_obj_cgroup();
if (!objcg)
return true;
if (lru) {
int ret;
struct mem_cgroup *memcg;
memcg = get_mem_cgroup_from_objcg(objcg);
ret = memcg_list_lru_alloc(memcg, lru, flags);
css_put(&memcg->css);
if (ret)
return false;
}
if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s)))
return false;
*objcgp = objcg;
return true;
}
static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
struct obj_cgroup *objcg,
gfp_t flags, size_t size,
void **p)
{
struct slab *slab;
unsigned long off;
size_t i;
if (!memcg_kmem_online() || !objcg)
return;
for (i = 0; i < size; i++) {
if (likely(p[i])) {
slab = virt_to_slab(p[i]);
if (!slab_objcgs(slab) &&
memcg_alloc_slab_cgroups(slab, s, flags,
false)) {
obj_cgroup_uncharge(objcg, obj_full_size(s));
continue;
}
off = obj_to_index(s, slab, p[i]);
obj_cgroup_get(objcg);
slab_objcgs(slab)[off] = objcg;
mod_objcg_state(objcg, slab_pgdat(slab),
cache_vmstat_idx(s), obj_full_size(s));
} else {
obj_cgroup_uncharge(objcg, obj_full_size(s));
}
}
}
static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
void **p, int objects)
{
struct obj_cgroup **objcgs;
int i;
if (!memcg_kmem_online())
return;
objcgs = slab_objcgs(slab);
if (!objcgs)
return;
for (i = 0; i < objects; i++) {
struct obj_cgroup *objcg;
unsigned int off;
off = obj_to_index(s, slab, p[i]);
objcg = objcgs[off];
if (!objcg)
continue;
objcgs[off] = NULL;
obj_cgroup_uncharge(objcg, obj_full_size(s));
mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
-obj_full_size(s));
obj_cgroup_put(objcg);
}
}
#else /* CONFIG_MEMCG_KMEM */ #else /* CONFIG_MEMCG_KMEM */
static inline struct obj_cgroup **slab_objcgs(struct slab *slab) static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
{ {
return NULL; return NULL;
} }
static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
{
return NULL;
}
static inline int memcg_alloc_slab_cgroups(struct slab *slab, static inline int memcg_alloc_slab_cgroups(struct slab *slab,
struct kmem_cache *s, gfp_t gfp, struct kmem_cache *s, gfp_t gfp,
bool new_slab) bool new_slab)
{ {
return 0; return 0;
} }
static inline void memcg_free_slab_cgroups(struct slab *slab)
{
}
static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
struct list_lru *lru,
struct obj_cgroup **objcgp,
size_t objects, gfp_t flags)
{
return true;
}
static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
struct obj_cgroup *objcg,
gfp_t flags, size_t size,
void **p)
{
}
static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
void **p, int objects)
{
}
#endif /* CONFIG_MEMCG_KMEM */ #endif /* CONFIG_MEMCG_KMEM */
static inline struct kmem_cache *virt_to_cache(const void *obj)
{
struct slab *slab;
slab = virt_to_slab(obj);
if (WARN_ONCE(!slab, "%s: Object is not a Slab page!\n",
__func__))
return NULL;
return slab->slab_cache;
}
static __always_inline void account_slab(struct slab *slab, int order,
struct kmem_cache *s, gfp_t gfp)
{
if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
memcg_alloc_slab_cgroups(slab, s, gfp, true);
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
PAGE_SIZE << order);
}
static __always_inline void unaccount_slab(struct slab *slab, int order,
struct kmem_cache *s)
{
if (memcg_kmem_online())
memcg_free_slab_cgroups(slab);
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
-(PAGE_SIZE << order));
}
static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
{
struct kmem_cache *cachep;
if (!IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) &&
!kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS))
return s;
cachep = virt_to_cache(x);
if (WARN(cachep && cachep != s,
"%s: Wrong slab cache. %s but object is from %s\n",
__func__, s->name, cachep->name))
print_tracking(cachep, x);
return cachep;
}
void free_large_kmalloc(struct folio *folio, void *object);
size_t __ksize(const void *objp); size_t __ksize(const void *objp);
static inline size_t slab_ksize(const struct kmem_cache *s) static inline size_t slab_ksize(const struct kmem_cache *s)
{ {
#ifndef CONFIG_SLUB #ifdef CONFIG_SLUB_DEBUG
return s->object_size;
#else /* CONFIG_SLUB */
# ifdef CONFIG_SLUB_DEBUG
/* /*
* Debugging requires use of the padding between object * Debugging requires use of the padding between object
* and whatever may come after it. * and whatever may come after it.
*/ */
if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
return s->object_size; return s->object_size;
# endif #endif
if (s->flags & SLAB_KASAN) if (s->flags & SLAB_KASAN)
return s->object_size; return s->object_size;
/* /*
...@@ -697,128 +603,9 @@ static inline size_t slab_ksize(const struct kmem_cache *s) ...@@ -697,128 +603,9 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
* Else we can use all the padding etc for the allocation * Else we can use all the padding etc for the allocation
*/ */
return s->size; return s->size;
#endif
}
static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
struct list_lru *lru,
struct obj_cgroup **objcgp,
size_t size, gfp_t flags)
{
flags &= gfp_allowed_mask;
might_alloc(flags);
if (should_failslab(s, flags))
return NULL;
if (!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags))
return NULL;
return s;
}
static inline void slab_post_alloc_hook(struct kmem_cache *s,
struct obj_cgroup *objcg, gfp_t flags,
size_t size, void **p, bool init,
unsigned int orig_size)
{
unsigned int zero_size = s->object_size;
bool kasan_init = init;
size_t i;
flags &= gfp_allowed_mask;
/*
* For kmalloc object, the allocated memory size(object_size) is likely
* larger than the requested size(orig_size). If redzone check is
* enabled for the extra space, don't zero it, as it will be redzoned
* soon. The redzone operation for this extra space could be seen as a
* replacement of current poisoning under certain debug option, and
* won't break other sanity checks.
*/
if (kmem_cache_debug_flags(s, SLAB_STORE_USER | SLAB_RED_ZONE) &&
(s->flags & SLAB_KMALLOC))
zero_size = orig_size;
/*
* When slub_debug is enabled, avoid memory initialization integrated
* into KASAN and instead zero out the memory via the memset below with
* the proper size. Otherwise, KASAN might overwrite SLUB redzones and
* cause false-positive reports. This does not lead to a performance
* penalty on production builds, as slub_debug is not intended to be
* enabled there.
*/
if (__slub_debug_enabled())
kasan_init = false;
/*
* As memory initialization might be integrated into KASAN,
* kasan_slab_alloc and initialization memset must be
* kept together to avoid discrepancies in behavior.
*
* As p[i] might get tagged, memset and kmemleak hook come after KASAN.
*/
for (i = 0; i < size; i++) {
p[i] = kasan_slab_alloc(s, p[i], flags, kasan_init);
if (p[i] && init && (!kasan_init || !kasan_has_integrated_init()))
memset(p[i], 0, zero_size);
kmemleak_alloc_recursive(p[i], s->object_size, 1,
s->flags, flags);
kmsan_slab_alloc(s, p[i], flags);
}
memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
} }
/*
* The slab lists for all objects.
*/
struct kmem_cache_node {
#ifdef CONFIG_SLAB
raw_spinlock_t list_lock;
struct list_head slabs_partial; /* partial list first, better asm code */
struct list_head slabs_full;
struct list_head slabs_free;
unsigned long total_slabs; /* length of all slab lists */
unsigned long free_slabs; /* length of free slab list only */
unsigned long free_objects;
unsigned int free_limit;
unsigned int colour_next; /* Per-node cache coloring */
struct array_cache *shared; /* shared per node */
struct alien_cache **alien; /* on other nodes */
unsigned long next_reap; /* updated without locking */
int free_touched; /* updated without locking */
#endif
#ifdef CONFIG_SLUB
spinlock_t list_lock;
unsigned long nr_partial;
struct list_head partial;
#ifdef CONFIG_SLUB_DEBUG #ifdef CONFIG_SLUB_DEBUG
atomic_long_t nr_slabs;
atomic_long_t total_objects;
struct list_head full;
#endif
#endif
};
static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
{
return s->node[node];
}
/*
* Iterator over all nodes. The body will be executed for each node that has
* a kmem_cache_node structure allocated (which is true for all online nodes)
*/
#define for_each_kmem_cache_node(__s, __node, __n) \
for (__node = 0; __node < nr_node_ids; __node++) \
if ((__n = get_node(__s, __node)))
#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
void dump_unreclaimable_slab(void); void dump_unreclaimable_slab(void);
#else #else
static inline void dump_unreclaimable_slab(void) static inline void dump_unreclaimable_slab(void)
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <linux/swiotlb.h> #include <linux/swiotlb.h>
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/kmemleak.h>
#include <linux/kasan.h> #include <linux/kasan.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
...@@ -71,10 +72,8 @@ static int __init setup_slab_merge(char *str) ...@@ -71,10 +72,8 @@ static int __init setup_slab_merge(char *str)
return 1; return 1;
} }
#ifdef CONFIG_SLUB
__setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0); __setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0);
__setup_param("slub_merge", slub_merge, setup_slab_merge, 0); __setup_param("slub_merge", slub_merge, setup_slab_merge, 0);
#endif
__setup("slab_nomerge", setup_slab_nomerge); __setup("slab_nomerge", setup_slab_nomerge);
__setup("slab_merge", setup_slab_merge); __setup("slab_merge", setup_slab_merge);
...@@ -197,10 +196,6 @@ struct kmem_cache *find_mergeable(unsigned int size, unsigned int align, ...@@ -197,10 +196,6 @@ struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
if (s->size - size >= sizeof(void *)) if (s->size - size >= sizeof(void *))
continue; continue;
if (IS_ENABLED(CONFIG_SLAB) && align &&
(align > s->align || s->align % align))
continue;
return s; return s;
} }
return NULL; return NULL;
...@@ -670,7 +665,7 @@ EXPORT_SYMBOL(random_kmalloc_seed); ...@@ -670,7 +665,7 @@ EXPORT_SYMBOL(random_kmalloc_seed);
* of two cache sizes there. The size of larger slabs can be determined using * of two cache sizes there. The size of larger slabs can be determined using
* fls. * fls.
*/ */
static u8 size_index[24] __ro_after_init = { u8 kmalloc_size_index[24] __ro_after_init = {
3, /* 8 */ 3, /* 8 */
4, /* 16 */ 4, /* 16 */
5, /* 24 */ 5, /* 24 */
...@@ -697,33 +692,6 @@ static u8 size_index[24] __ro_after_init = { ...@@ -697,33 +692,6 @@ static u8 size_index[24] __ro_after_init = {
2 /* 192 */ 2 /* 192 */
}; };
static inline unsigned int size_index_elem(unsigned int bytes)
{
return (bytes - 1) / 8;
}
/*
* Find the kmem_cache structure that serves a given size of
* allocation
*/
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller)
{
unsigned int index;
if (size <= 192) {
if (!size)
return ZERO_SIZE_PTR;
index = size_index[size_index_elem(size)];
} else {
if (WARN_ON_ONCE(size > KMALLOC_MAX_CACHE_SIZE))
return NULL;
index = fls(size - 1);
}
return kmalloc_caches[kmalloc_type(flags, caller)][index];
}
size_t kmalloc_size_roundup(size_t size) size_t kmalloc_size_roundup(size_t size)
{ {
if (size && size <= KMALLOC_MAX_CACHE_SIZE) { if (size && size <= KMALLOC_MAX_CACHE_SIZE) {
...@@ -848,9 +816,9 @@ void __init setup_kmalloc_cache_index_table(void) ...@@ -848,9 +816,9 @@ void __init setup_kmalloc_cache_index_table(void)
for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
unsigned int elem = size_index_elem(i); unsigned int elem = size_index_elem(i);
if (elem >= ARRAY_SIZE(size_index)) if (elem >= ARRAY_SIZE(kmalloc_size_index))
break; break;
size_index[elem] = KMALLOC_SHIFT_LOW; kmalloc_size_index[elem] = KMALLOC_SHIFT_LOW;
} }
if (KMALLOC_MIN_SIZE >= 64) { if (KMALLOC_MIN_SIZE >= 64) {
...@@ -859,7 +827,7 @@ void __init setup_kmalloc_cache_index_table(void) ...@@ -859,7 +827,7 @@ void __init setup_kmalloc_cache_index_table(void)
* is 64 byte. * is 64 byte.
*/ */
for (i = 64 + 8; i <= 96; i += 8) for (i = 64 + 8; i <= 96; i += 8)
size_index[size_index_elem(i)] = 7; kmalloc_size_index[size_index_elem(i)] = 7;
} }
...@@ -870,7 +838,7 @@ void __init setup_kmalloc_cache_index_table(void) ...@@ -870,7 +838,7 @@ void __init setup_kmalloc_cache_index_table(void)
* instead. * instead.
*/ */
for (i = 128 + 8; i <= 192; i += 8) for (i = 128 + 8; i <= 192; i += 8)
size_index[size_index_elem(i)] = 8; kmalloc_size_index[size_index_elem(i)] = 8;
} }
} }
...@@ -968,95 +936,6 @@ void __init create_kmalloc_caches(slab_flags_t flags) ...@@ -968,95 +936,6 @@ void __init create_kmalloc_caches(slab_flags_t flags)
slab_state = UP; slab_state = UP;
} }
void free_large_kmalloc(struct folio *folio, void *object)
{
unsigned int order = folio_order(folio);
if (WARN_ON_ONCE(order == 0))
pr_warn_once("object pointer: 0x%p\n", object);
kmemleak_free(object);
kasan_kfree_large(object);
kmsan_kfree_large(object);
mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
-(PAGE_SIZE << order));
__free_pages(folio_page(folio, 0), order);
}
static void *__kmalloc_large_node(size_t size, gfp_t flags, int node);
static __always_inline
void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
{
struct kmem_cache *s;
void *ret;
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = __kmalloc_large_node(size, flags, node);
trace_kmalloc(caller, ret, size,
PAGE_SIZE << get_order(size), flags, node);
return ret;
}
s = kmalloc_slab(size, flags, caller);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
ret = __kmem_cache_alloc_node(s, flags, node, size, caller);
ret = kasan_kmalloc(s, ret, size, flags);
trace_kmalloc(caller, ret, size, s->size, flags, node);
return ret;
}
void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
return __do_kmalloc_node(size, flags, node, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc_node);
void *__kmalloc(size_t size, gfp_t flags)
{
return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc);
void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
int node, unsigned long caller)
{
return __do_kmalloc_node(size, flags, node, caller);
}
EXPORT_SYMBOL(__kmalloc_node_track_caller);
/**
* kfree - free previously allocated memory
* @object: pointer returned by kmalloc() or kmem_cache_alloc()
*
* If @object is NULL, no operation is performed.
*/
void kfree(const void *object)
{
struct folio *folio;
struct slab *slab;
struct kmem_cache *s;
trace_kfree(_RET_IP_, object);
if (unlikely(ZERO_OR_NULL_PTR(object)))
return;
folio = virt_to_folio(object);
if (unlikely(!folio_test_slab(folio))) {
free_large_kmalloc(folio, (void *)object);
return;
}
slab = folio_slab(folio);
s = slab->slab_cache;
__kmem_cache_free(s, (void *)object, _RET_IP_);
}
EXPORT_SYMBOL(kfree);
/** /**
* __ksize -- Report full size of underlying allocation * __ksize -- Report full size of underlying allocation
* @object: pointer to the object * @object: pointer to the object
...@@ -1093,30 +972,6 @@ size_t __ksize(const void *object) ...@@ -1093,30 +972,6 @@ size_t __ksize(const void *object)
return slab_ksize(folio_slab(folio)->slab_cache); return slab_ksize(folio_slab(folio)->slab_cache);
} }
void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
void *ret = __kmem_cache_alloc_node(s, gfpflags, NUMA_NO_NODE,
size, _RET_IP_);
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, NUMA_NO_NODE);
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
EXPORT_SYMBOL(kmalloc_trace);
void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
int node, size_t size)
{
void *ret = __kmem_cache_alloc_node(s, gfpflags, node, size, _RET_IP_);
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, node);
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
EXPORT_SYMBOL(kmalloc_node_trace);
gfp_t kmalloc_fix_flags(gfp_t flags) gfp_t kmalloc_fix_flags(gfp_t flags)
{ {
gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK; gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
...@@ -1129,57 +984,6 @@ gfp_t kmalloc_fix_flags(gfp_t flags) ...@@ -1129,57 +984,6 @@ gfp_t kmalloc_fix_flags(gfp_t flags)
return flags; return flags;
} }
/*
* To avoid unnecessary overhead, we pass through large allocation requests
* directly to the page allocator. We use __GFP_COMP, because we will need to
* know the allocation order to free the pages properly in kfree.
*/
static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
{
struct page *page;
void *ptr = NULL;
unsigned int order = get_order(size);
if (unlikely(flags & GFP_SLAB_BUG_MASK))
flags = kmalloc_fix_flags(flags);
flags |= __GFP_COMP;
page = alloc_pages_node(node, flags, order);
if (page) {
ptr = page_address(page);
mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
PAGE_SIZE << order);
}
ptr = kasan_kmalloc_large(ptr, size, flags);
/* As ptr might get tagged, call kmemleak hook after KASAN. */
kmemleak_alloc(ptr, size, 1, flags);
kmsan_kmalloc_large(ptr, size, flags);
return ptr;
}
void *kmalloc_large(size_t size, gfp_t flags)
{
void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
flags, NUMA_NO_NODE);
return ret;
}
EXPORT_SYMBOL(kmalloc_large);
void *kmalloc_large_node(size_t size, gfp_t flags, int node)
{
void *ret = __kmalloc_large_node(size, flags, node);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
flags, node);
return ret;
}
EXPORT_SYMBOL(kmalloc_large_node);
#ifdef CONFIG_SLAB_FREELIST_RANDOM #ifdef CONFIG_SLAB_FREELIST_RANDOM
/* Randomize a generic freelist */ /* Randomize a generic freelist */
static void freelist_randomize(unsigned int *list, static void freelist_randomize(unsigned int *list,
...@@ -1222,12 +1026,8 @@ void cache_random_seq_destroy(struct kmem_cache *cachep) ...@@ -1222,12 +1026,8 @@ void cache_random_seq_destroy(struct kmem_cache *cachep)
} }
#endif /* CONFIG_SLAB_FREELIST_RANDOM */ #endif /* CONFIG_SLAB_FREELIST_RANDOM */
#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG) #ifdef CONFIG_SLUB_DEBUG
#ifdef CONFIG_SLAB
#define SLABINFO_RIGHTS (0600)
#else
#define SLABINFO_RIGHTS (0400) #define SLABINFO_RIGHTS (0400)
#endif
static void print_slabinfo_header(struct seq_file *m) static void print_slabinfo_header(struct seq_file *m)
{ {
...@@ -1235,18 +1035,10 @@ static void print_slabinfo_header(struct seq_file *m) ...@@ -1235,18 +1035,10 @@ static void print_slabinfo_header(struct seq_file *m)
* Output format version, so at least we can change it * Output format version, so at least we can change it
* without _too_ many complaints. * without _too_ many complaints.
*/ */
#ifdef CONFIG_DEBUG_SLAB
seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
#else
seq_puts(m, "slabinfo - version: 2.1\n"); seq_puts(m, "slabinfo - version: 2.1\n");
#endif
seq_puts(m, "# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>"); seq_puts(m, "# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>");
seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
#ifdef CONFIG_DEBUG_SLAB
seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> <error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
#endif
seq_putc(m, '\n'); seq_putc(m, '\n');
} }
...@@ -1370,7 +1162,7 @@ static int __init slab_proc_init(void) ...@@ -1370,7 +1162,7 @@ static int __init slab_proc_init(void)
} }
module_init(slab_proc_init); module_init(slab_proc_init);
#endif /* CONFIG_SLAB || CONFIG_SLUB_DEBUG */ #endif /* CONFIG_SLUB_DEBUG */
static __always_inline __realloc_size(2) void * static __always_inline __realloc_size(2) void *
__do_krealloc(const void *p, size_t new_size, gfp_t flags) __do_krealloc(const void *p, size_t new_size, gfp_t flags)
...@@ -1488,10 +1280,3 @@ EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); ...@@ -1488,10 +1280,3 @@ EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
EXPORT_TRACEPOINT_SYMBOL(kfree); EXPORT_TRACEPOINT_SYMBOL(kfree);
EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free); EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
{
if (__should_failslab(s, gfpflags))
return -ENOMEM;
return 0;
}
ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include <linux/memory.h> #include <linux/memory.h>
#include <linux/math64.h> #include <linux/math64.h>
#include <linux/fault-inject.h> #include <linux/fault-inject.h>
#include <linux/kmemleak.h>
#include <linux/stacktrace.h> #include <linux/stacktrace.h>
#include <linux/prefetch.h> #include <linux/prefetch.h>
#include <linux/memcontrol.h> #include <linux/memcontrol.h>
...@@ -345,6 +346,60 @@ static void debugfs_slab_add(struct kmem_cache *); ...@@ -345,6 +346,60 @@ static void debugfs_slab_add(struct kmem_cache *);
static inline void debugfs_slab_add(struct kmem_cache *s) { } static inline void debugfs_slab_add(struct kmem_cache *s) { }
#endif #endif
enum stat_item {
ALLOC_FASTPATH, /* Allocation from cpu slab */
ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
FREE_FASTPATH, /* Free to cpu slab */
FREE_SLOWPATH, /* Freeing not to cpu slab */
FREE_FROZEN, /* Freeing to frozen slab */
FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
FREE_REMOVE_PARTIAL, /* Freeing removes last object */
ALLOC_FROM_PARTIAL, /* Cpu slab acquired from node partial list */
ALLOC_SLAB, /* Cpu slab acquired from page allocator */
ALLOC_REFILL, /* Refill cpu slab from slab freelist */
ALLOC_NODE_MISMATCH, /* Switching cpu slab */
FREE_SLAB, /* Slab freed to the page allocator */
CPUSLAB_FLUSH, /* Abandoning of the cpu slab */
DEACTIVATE_FULL, /* Cpu slab was full when deactivated */
DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */
DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
DEACTIVATE_BYPASS, /* Implicit deactivation */
ORDER_FALLBACK, /* Number of times fallback was necessary */
CMPXCHG_DOUBLE_CPU_FAIL,/* Failures of this_cpu_cmpxchg_double */
CMPXCHG_DOUBLE_FAIL, /* Failures of slab freelist update */
CPU_PARTIAL_ALLOC, /* Used cpu partial on alloc */
CPU_PARTIAL_FREE, /* Refill cpu partial on free */
CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */
CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
NR_SLUB_STAT_ITEMS
};
#ifndef CONFIG_SLUB_TINY
/*
* When changing the layout, make sure freelist and tid are still compatible
* with this_cpu_cmpxchg_double() alignment requirements.
*/
struct kmem_cache_cpu {
union {
struct {
void **freelist; /* Pointer to next available object */
unsigned long tid; /* Globally unique transaction id */
};
freelist_aba_t freelist_tid;
};
struct slab *slab; /* The slab from which we are allocating */
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct slab *partial; /* Partially allocated frozen slabs */
#endif
local_lock_t lock; /* Protects the fields above */
#ifdef CONFIG_SLUB_STATS
unsigned int stat[NR_SLUB_STAT_ITEMS];
#endif
};
#endif /* CONFIG_SLUB_TINY */
static inline void stat(const struct kmem_cache *s, enum stat_item si) static inline void stat(const struct kmem_cache *s, enum stat_item si)
{ {
#ifdef CONFIG_SLUB_STATS #ifdef CONFIG_SLUB_STATS
...@@ -356,6 +411,41 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si) ...@@ -356,6 +411,41 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
#endif #endif
} }
static inline
void stat_add(const struct kmem_cache *s, enum stat_item si, int v)
{
#ifdef CONFIG_SLUB_STATS
raw_cpu_add(s->cpu_slab->stat[si], v);
#endif
}
/*
* The slab lists for all objects.
*/
struct kmem_cache_node {
spinlock_t list_lock;
unsigned long nr_partial;
struct list_head partial;
#ifdef CONFIG_SLUB_DEBUG
atomic_long_t nr_slabs;
atomic_long_t total_objects;
struct list_head full;
#endif
};
static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
{
return s->node[node];
}
/*
* Iterator over all nodes. The body will be executed for each node that has
* a kmem_cache_node structure allocated (which is true for all online nodes)
*/
#define for_each_kmem_cache_node(__s, __node, __n) \
for (__node = 0; __node < nr_node_ids; __node++) \
if ((__n = get_node(__s, __node)))
/* /*
* Tracks for which NUMA nodes we have kmem_cache_nodes allocated. * Tracks for which NUMA nodes we have kmem_cache_nodes allocated.
* Corresponds to node_state[N_NORMAL_MEMORY], but can temporarily * Corresponds to node_state[N_NORMAL_MEMORY], but can temporarily
...@@ -1774,12 +1864,214 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab, ...@@ -1774,12 +1864,214 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
#endif #endif
#endif /* CONFIG_SLUB_DEBUG */ #endif /* CONFIG_SLUB_DEBUG */
static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
{
return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B;
}
#ifdef CONFIG_MEMCG_KMEM
static inline void memcg_free_slab_cgroups(struct slab *slab)
{
kfree(slab_objcgs(slab));
slab->memcg_data = 0;
}
static inline size_t obj_full_size(struct kmem_cache *s)
{
/*
* For each accounted object there is an extra space which is used
* to store obj_cgroup membership. Charge it too.
*/
return s->size + sizeof(struct obj_cgroup *);
}
/*
* Returns false if the allocation should fail.
*/
static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s,
struct list_lru *lru,
struct obj_cgroup **objcgp,
size_t objects, gfp_t flags)
{
/*
* The obtained objcg pointer is safe to use within the current scope,
* defined by current task or set_active_memcg() pair.
* obj_cgroup_get() is used to get a permanent reference.
*/
struct obj_cgroup *objcg = current_obj_cgroup();
if (!objcg)
return true;
if (lru) {
int ret;
struct mem_cgroup *memcg;
memcg = get_mem_cgroup_from_objcg(objcg);
ret = memcg_list_lru_alloc(memcg, lru, flags);
css_put(&memcg->css);
if (ret)
return false;
}
if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s)))
return false;
*objcgp = objcg;
return true;
}
/*
* Returns false if the allocation should fail.
*/
static __fastpath_inline
bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
struct obj_cgroup **objcgp, size_t objects,
gfp_t flags)
{
if (!memcg_kmem_online())
return true;
if (likely(!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT)))
return true;
return likely(__memcg_slab_pre_alloc_hook(s, lru, objcgp, objects,
flags));
}
static void __memcg_slab_post_alloc_hook(struct kmem_cache *s,
struct obj_cgroup *objcg,
gfp_t flags, size_t size,
void **p)
{
struct slab *slab;
unsigned long off;
size_t i;
flags &= gfp_allowed_mask;
for (i = 0; i < size; i++) {
if (likely(p[i])) {
slab = virt_to_slab(p[i]);
if (!slab_objcgs(slab) &&
memcg_alloc_slab_cgroups(slab, s, flags, false)) {
obj_cgroup_uncharge(objcg, obj_full_size(s));
continue;
}
off = obj_to_index(s, slab, p[i]);
obj_cgroup_get(objcg);
slab_objcgs(slab)[off] = objcg;
mod_objcg_state(objcg, slab_pgdat(slab),
cache_vmstat_idx(s), obj_full_size(s));
} else {
obj_cgroup_uncharge(objcg, obj_full_size(s));
}
}
}
static __fastpath_inline
void memcg_slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
gfp_t flags, size_t size, void **p)
{
if (likely(!memcg_kmem_online() || !objcg))
return;
return __memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
}
static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
void **p, int objects,
struct obj_cgroup **objcgs)
{
for (int i = 0; i < objects; i++) {
struct obj_cgroup *objcg;
unsigned int off;
off = obj_to_index(s, slab, p[i]);
objcg = objcgs[off];
if (!objcg)
continue;
objcgs[off] = NULL;
obj_cgroup_uncharge(objcg, obj_full_size(s));
mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
-obj_full_size(s));
obj_cgroup_put(objcg);
}
}
static __fastpath_inline
void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
int objects)
{
struct obj_cgroup **objcgs;
if (!memcg_kmem_online())
return;
objcgs = slab_objcgs(slab);
if (likely(!objcgs))
return;
__memcg_slab_free_hook(s, slab, p, objects, objcgs);
}
static inline
void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects,
struct obj_cgroup *objcg)
{
if (objcg)
obj_cgroup_uncharge(objcg, objects * obj_full_size(s));
}
#else /* CONFIG_MEMCG_KMEM */
static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
{
return NULL;
}
static inline void memcg_free_slab_cgroups(struct slab *slab)
{
}
static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
struct list_lru *lru,
struct obj_cgroup **objcgp,
size_t objects, gfp_t flags)
{
return true;
}
static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
struct obj_cgroup *objcg,
gfp_t flags, size_t size,
void **p)
{
}
static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
void **p, int objects)
{
}
static inline
void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects,
struct obj_cgroup *objcg)
{
}
#endif /* CONFIG_MEMCG_KMEM */
/* /*
* Hooks for other subsystems that check memory allocations. In a typical * Hooks for other subsystems that check memory allocations. In a typical
* production configuration these hooks all should produce no code at all. * production configuration these hooks all should produce no code at all.
*
* Returns true if freeing of the object can proceed, false if its reuse
* was delayed by KASAN quarantine, or it was returned to KFENCE.
*/ */
static __always_inline bool slab_free_hook(struct kmem_cache *s, static __always_inline
void *x, bool init) bool slab_free_hook(struct kmem_cache *s, void *x, bool init)
{ {
kmemleak_free_recursive(x, s->flags); kmemleak_free_recursive(x, s->flags);
kmsan_slab_free(s, x); kmsan_slab_free(s, x);
...@@ -1794,6 +2086,9 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s, ...@@ -1794,6 +2086,9 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s,
__kcsan_check_access(x, s->object_size, __kcsan_check_access(x, s->object_size,
KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT); KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
if (kfence_free(x))
return false;
/* /*
* As memory initialization might be integrated into KASAN, * As memory initialization might be integrated into KASAN,
* kasan_slab_free and initialization memset's must be * kasan_slab_free and initialization memset's must be
...@@ -1802,7 +2097,7 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s, ...@@ -1802,7 +2097,7 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s,
* The initialization memset's clear the object and the metadata, * The initialization memset's clear the object and the metadata,
* but don't touch the SLAB redzone. * but don't touch the SLAB redzone.
*/ */
if (init) { if (unlikely(init)) {
int rsize; int rsize;
if (!kasan_has_integrated_init()) if (!kasan_has_integrated_init())
...@@ -1812,7 +2107,7 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s, ...@@ -1812,7 +2107,7 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s,
s->size - s->inuse - rsize); s->size - s->inuse - rsize);
} }
/* KASAN might put x into memory quarantine, delaying its reuse. */ /* KASAN might put x into memory quarantine, delaying its reuse. */
return kasan_slab_free(s, x, init); return !kasan_slab_free(s, x, init);
} }
static inline bool slab_free_freelist_hook(struct kmem_cache *s, static inline bool slab_free_freelist_hook(struct kmem_cache *s,
...@@ -1822,23 +2117,26 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, ...@@ -1822,23 +2117,26 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
void *object; void *object;
void *next = *head; void *next = *head;
void *old_tail = *tail ? *tail : *head; void *old_tail = *tail;
bool init;
if (is_kfence_address(next)) { if (is_kfence_address(next)) {
slab_free_hook(s, next, false); slab_free_hook(s, next, false);
return true; return false;
} }
/* Head and tail of the reconstructed freelist */ /* Head and tail of the reconstructed freelist */
*head = NULL; *head = NULL;
*tail = NULL; *tail = NULL;
init = slab_want_init_on_free(s);
do { do {
object = next; object = next;
next = get_freepointer(s, object); next = get_freepointer(s, object);
/* If object's reuse doesn't have to be delayed */ /* If object's reuse doesn't have to be delayed */
if (!slab_free_hook(s, object, slab_want_init_on_free(s))) { if (likely(slab_free_hook(s, object, init))) {
/* Move object to the new freelist */ /* Move object to the new freelist */
set_freepointer(s, object, *head); set_freepointer(s, object, *head);
*head = object; *head = object;
...@@ -1853,9 +2151,6 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, ...@@ -1853,9 +2151,6 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
} }
} while (object != old_tail); } while (object != old_tail);
if (*head == *tail)
*tail = NULL;
return *head != NULL; return *head != NULL;
} }
...@@ -2008,6 +2303,26 @@ static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) ...@@ -2008,6 +2303,26 @@ static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
} }
#endif /* CONFIG_SLAB_FREELIST_RANDOM */ #endif /* CONFIG_SLAB_FREELIST_RANDOM */
static __always_inline void account_slab(struct slab *slab, int order,
struct kmem_cache *s, gfp_t gfp)
{
if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
memcg_alloc_slab_cgroups(slab, s, gfp, true);
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
PAGE_SIZE << order);
}
static __always_inline void unaccount_slab(struct slab *slab, int order,
struct kmem_cache *s)
{
if (memcg_kmem_online())
memcg_free_slab_cgroups(slab);
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
-(PAGE_SIZE << order));
}
static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
{ {
struct slab *slab; struct slab *slab;
...@@ -3420,6 +3735,86 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s, ...@@ -3420,6 +3735,86 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
0, sizeof(void *)); 0, sizeof(void *));
} }
noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
{
if (__should_failslab(s, gfpflags))
return -ENOMEM;
return 0;
}
ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
static __fastpath_inline
struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
struct list_lru *lru,
struct obj_cgroup **objcgp,
size_t size, gfp_t flags)
{
flags &= gfp_allowed_mask;
might_alloc(flags);
if (unlikely(should_failslab(s, flags)))
return NULL;
if (unlikely(!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags)))
return NULL;
return s;
}
static __fastpath_inline
void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
gfp_t flags, size_t size, void **p, bool init,
unsigned int orig_size)
{
unsigned int zero_size = s->object_size;
bool kasan_init = init;
size_t i;
gfp_t init_flags = flags & gfp_allowed_mask;
/*
* For kmalloc object, the allocated memory size(object_size) is likely
* larger than the requested size(orig_size). If redzone check is
* enabled for the extra space, don't zero it, as it will be redzoned
* soon. The redzone operation for this extra space could be seen as a
* replacement of current poisoning under certain debug option, and
* won't break other sanity checks.
*/
if (kmem_cache_debug_flags(s, SLAB_STORE_USER | SLAB_RED_ZONE) &&
(s->flags & SLAB_KMALLOC))
zero_size = orig_size;
/*
* When slub_debug is enabled, avoid memory initialization integrated
* into KASAN and instead zero out the memory via the memset below with
* the proper size. Otherwise, KASAN might overwrite SLUB redzones and
* cause false-positive reports. This does not lead to a performance
* penalty on production builds, as slub_debug is not intended to be
* enabled there.
*/
if (__slub_debug_enabled())
kasan_init = false;
/*
* As memory initialization might be integrated into KASAN,
* kasan_slab_alloc and initialization memset must be
* kept together to avoid discrepancies in behavior.
*
* As p[i] might get tagged, memset and kmemleak hook come after KASAN.
*/
for (i = 0; i < size; i++) {
p[i] = kasan_slab_alloc(s, p[i], init_flags, kasan_init);
if (p[i] && init && (!kasan_init ||
!kasan_has_integrated_init()))
memset(p[i], 0, zero_size);
kmemleak_alloc_recursive(p[i], s->object_size, 1,
s->flags, init_flags);
kmsan_slab_alloc(s, p[i], init_flags);
}
memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
}
/* /*
* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
* have the fastpath folded into their functions. So no function call * have the fastpath folded into their functions. So no function call
...@@ -3438,7 +3833,7 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list ...@@ -3438,7 +3833,7 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
bool init = false; bool init = false;
s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags); s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags);
if (!s) if (unlikely(!s))
return NULL; return NULL;
object = kfence_alloc(s, orig_size, gfpflags); object = kfence_alloc(s, orig_size, gfpflags);
...@@ -3460,53 +3855,169 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list ...@@ -3460,53 +3855,169 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
return object; return object;
} }
static __fastpath_inline void *slab_alloc(struct kmem_cache *s, struct list_lru *lru, void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
gfp_t gfpflags, unsigned long addr, size_t orig_size)
{ {
return slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, addr, orig_size); void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE, _RET_IP_,
s->object_size);
trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, NUMA_NO_NODE);
return ret;
} }
EXPORT_SYMBOL(kmem_cache_alloc);
static __fastpath_inline void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, gfp_t gfpflags)
gfp_t gfpflags)
{ {
void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size); void *ret = slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, _RET_IP_,
s->object_size);
trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, NUMA_NO_NODE); trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, NUMA_NO_NODE);
return ret; return ret;
} }
EXPORT_SYMBOL(kmem_cache_alloc_lru);
void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) /**
* kmem_cache_alloc_node - Allocate an object on the specified node
* @s: The cache to allocate from.
* @gfpflags: See kmalloc().
* @node: node number of the target node.
*
* Identical to kmem_cache_alloc but it will allocate memory on the given
* node, which can improve the performance for cpu bound structures.
*
* Fallback to other node is possible if __GFP_THISNODE is not set.
*
* Return: pointer to the new object or %NULL in case of error
*/
void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
{ {
return __kmem_cache_alloc_lru(s, NULL, gfpflags); void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, node);
return ret;
} }
EXPORT_SYMBOL(kmem_cache_alloc); EXPORT_SYMBOL(kmem_cache_alloc_node);
void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, /*
gfp_t gfpflags) * To avoid unnecessary overhead, we pass through large allocation requests
* directly to the page allocator. We use __GFP_COMP, because we will need to
* know the allocation order to free the pages properly in kfree.
*/
static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
{ {
return __kmem_cache_alloc_lru(s, lru, gfpflags); struct page *page;
void *ptr = NULL;
unsigned int order = get_order(size);
if (unlikely(flags & GFP_SLAB_BUG_MASK))
flags = kmalloc_fix_flags(flags);
flags |= __GFP_COMP;
page = alloc_pages_node(node, flags, order);
if (page) {
ptr = page_address(page);
mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
PAGE_SIZE << order);
}
ptr = kasan_kmalloc_large(ptr, size, flags);
/* As ptr might get tagged, call kmemleak hook after KASAN. */
kmemleak_alloc(ptr, size, 1, flags);
kmsan_kmalloc_large(ptr, size, flags);
return ptr;
} }
EXPORT_SYMBOL(kmem_cache_alloc_lru);
void *__kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, void *kmalloc_large(size_t size, gfp_t flags)
int node, size_t orig_size, {
unsigned long caller) void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
flags, NUMA_NO_NODE);
return ret;
}
EXPORT_SYMBOL(kmalloc_large);
void *kmalloc_large_node(size_t size, gfp_t flags, int node)
{ {
return slab_alloc_node(s, NULL, gfpflags, node, void *ret = __kmalloc_large_node(size, flags, node);
caller, orig_size);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
flags, node);
return ret;
} }
EXPORT_SYMBOL(kmalloc_large_node);
void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) static __always_inline
void *__do_kmalloc_node(size_t size, gfp_t flags, int node,
unsigned long caller)
{ {
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size); struct kmem_cache *s;
void *ret;
trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, node); if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = __kmalloc_large_node(size, flags, node);
trace_kmalloc(caller, ret, size,
PAGE_SIZE << get_order(size), flags, node);
return ret;
}
if (unlikely(!size))
return ZERO_SIZE_PTR;
s = kmalloc_slab(size, flags, caller);
ret = slab_alloc_node(s, NULL, flags, node, caller, size);
ret = kasan_kmalloc(s, ret, size, flags);
trace_kmalloc(caller, ret, size, s->size, flags, node);
return ret; return ret;
} }
EXPORT_SYMBOL(kmem_cache_alloc_node);
void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
return __do_kmalloc_node(size, flags, node, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc_node);
void *__kmalloc(size_t size, gfp_t flags)
{
return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc);
void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
int node, unsigned long caller)
{
return __do_kmalloc_node(size, flags, node, caller);
}
EXPORT_SYMBOL(__kmalloc_node_track_caller);
void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE,
_RET_IP_, size);
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, NUMA_NO_NODE);
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
EXPORT_SYMBOL(kmalloc_trace);
void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
int node, size_t size)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, node);
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
EXPORT_SYMBOL(kmalloc_node_trace);
static noinline void free_to_partial_list( static noinline void free_to_partial_list(
struct kmem_cache *s, struct slab *slab, struct kmem_cache *s, struct slab *slab,
...@@ -3592,9 +4103,6 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab, ...@@ -3592,9 +4103,6 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
stat(s, FREE_SLOWPATH); stat(s, FREE_SLOWPATH);
if (kfence_free(head))
return;
if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) { if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
free_to_partial_list(s, slab, head, tail, cnt, addr); free_to_partial_list(s, slab, head, tail, cnt, addr);
return; return;
...@@ -3716,7 +4224,6 @@ static __always_inline void do_slab_free(struct kmem_cache *s, ...@@ -3716,7 +4224,6 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
struct slab *slab, void *head, void *tail, struct slab *slab, void *head, void *tail,
int cnt, unsigned long addr) int cnt, unsigned long addr)
{ {
void *tail_obj = tail ? : head;
struct kmem_cache_cpu *c; struct kmem_cache_cpu *c;
unsigned long tid; unsigned long tid;
void **freelist; void **freelist;
...@@ -3735,14 +4242,14 @@ static __always_inline void do_slab_free(struct kmem_cache *s, ...@@ -3735,14 +4242,14 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
barrier(); barrier();
if (unlikely(slab != c->slab)) { if (unlikely(slab != c->slab)) {
__slab_free(s, slab, head, tail_obj, cnt, addr); __slab_free(s, slab, head, tail, cnt, addr);
return; return;
} }
if (USE_LOCKLESS_FAST_PATH()) { if (USE_LOCKLESS_FAST_PATH()) {
freelist = READ_ONCE(c->freelist); freelist = READ_ONCE(c->freelist);
set_freepointer(s, tail_obj, freelist); set_freepointer(s, tail, freelist);
if (unlikely(!__update_cpu_freelist_fast(s, freelist, head, tid))) { if (unlikely(!__update_cpu_freelist_fast(s, freelist, head, tid))) {
note_cmpxchg_failure("slab_free", s, tid); note_cmpxchg_failure("slab_free", s, tid);
...@@ -3759,60 +4266,143 @@ static __always_inline void do_slab_free(struct kmem_cache *s, ...@@ -3759,60 +4266,143 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
tid = c->tid; tid = c->tid;
freelist = c->freelist; freelist = c->freelist;
set_freepointer(s, tail_obj, freelist); set_freepointer(s, tail, freelist);
c->freelist = head; c->freelist = head;
c->tid = next_tid(tid); c->tid = next_tid(tid);
local_unlock(&s->cpu_slab->lock); local_unlock(&s->cpu_slab->lock);
} }
stat(s, FREE_FASTPATH); stat_add(s, FREE_FASTPATH, cnt);
} }
#else /* CONFIG_SLUB_TINY */ #else /* CONFIG_SLUB_TINY */
static void do_slab_free(struct kmem_cache *s, static void do_slab_free(struct kmem_cache *s,
struct slab *slab, void *head, void *tail, struct slab *slab, void *head, void *tail,
int cnt, unsigned long addr) int cnt, unsigned long addr)
{ {
void *tail_obj = tail ? : head; __slab_free(s, slab, head, tail, cnt, addr);
__slab_free(s, slab, head, tail_obj, cnt, addr);
} }
#endif /* CONFIG_SLUB_TINY */ #endif /* CONFIG_SLUB_TINY */
static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab, static __fastpath_inline
void *head, void *tail, void **p, int cnt, void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
unsigned long addr) unsigned long addr)
{
memcg_slab_free_hook(s, slab, &object, 1);
if (likely(slab_free_hook(s, object, slab_want_init_on_free(s))))
do_slab_free(s, slab, object, object, 1, addr);
}
static __fastpath_inline
void slab_free_bulk(struct kmem_cache *s, struct slab *slab, void *head,
void *tail, void **p, int cnt, unsigned long addr)
{ {
memcg_slab_free_hook(s, slab, p, cnt); memcg_slab_free_hook(s, slab, p, cnt);
/* /*
* With KASAN enabled slab_free_freelist_hook modifies the freelist * With KASAN enabled slab_free_freelist_hook modifies the freelist
* to remove objects, whose reuse must be delayed. * to remove objects, whose reuse must be delayed.
*/ */
if (slab_free_freelist_hook(s, &head, &tail, &cnt)) if (likely(slab_free_freelist_hook(s, &head, &tail, &cnt)))
do_slab_free(s, slab, head, tail, cnt, addr); do_slab_free(s, slab, head, tail, cnt, addr);
} }
#ifdef CONFIG_KASAN_GENERIC #ifdef CONFIG_KASAN_GENERIC
void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr) void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
{ {
do_slab_free(cache, virt_to_slab(x), x, NULL, 1, addr); do_slab_free(cache, virt_to_slab(x), x, x, 1, addr);
} }
#endif #endif
void __kmem_cache_free(struct kmem_cache *s, void *x, unsigned long caller) static inline struct kmem_cache *virt_to_cache(const void *obj)
{
struct slab *slab;
slab = virt_to_slab(obj);
if (WARN_ONCE(!slab, "%s: Object is not a Slab page!\n", __func__))
return NULL;
return slab->slab_cache;
}
static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
{ {
slab_free(s, virt_to_slab(x), x, NULL, &x, 1, caller); struct kmem_cache *cachep;
if (!IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) &&
!kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS))
return s;
cachep = virt_to_cache(x);
if (WARN(cachep && cachep != s,
"%s: Wrong slab cache. %s but object is from %s\n",
__func__, s->name, cachep->name))
print_tracking(cachep, x);
return cachep;
} }
/**
* kmem_cache_free - Deallocate an object
* @s: The cache the allocation was from.
* @x: The previously allocated object.
*
* Free an object which was previously allocated from this
* cache.
*/
void kmem_cache_free(struct kmem_cache *s, void *x) void kmem_cache_free(struct kmem_cache *s, void *x)
{ {
s = cache_from_obj(s, x); s = cache_from_obj(s, x);
if (!s) if (!s)
return; return;
trace_kmem_cache_free(_RET_IP_, x, s); trace_kmem_cache_free(_RET_IP_, x, s);
slab_free(s, virt_to_slab(x), x, NULL, &x, 1, _RET_IP_); slab_free(s, virt_to_slab(x), x, _RET_IP_);
} }
EXPORT_SYMBOL(kmem_cache_free); EXPORT_SYMBOL(kmem_cache_free);
static void free_large_kmalloc(struct folio *folio, void *object)
{
unsigned int order = folio_order(folio);
if (WARN_ON_ONCE(order == 0))
pr_warn_once("object pointer: 0x%p\n", object);
kmemleak_free(object);
kasan_kfree_large(object);
kmsan_kfree_large(object);
mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
-(PAGE_SIZE << order));
__free_pages(folio_page(folio, 0), order);
}
/**
* kfree - free previously allocated memory
* @object: pointer returned by kmalloc() or kmem_cache_alloc()
*
* If @object is NULL, no operation is performed.
*/
void kfree(const void *object)
{
struct folio *folio;
struct slab *slab;
struct kmem_cache *s;
void *x = (void *)object;
trace_kfree(_RET_IP_, object);
if (unlikely(ZERO_OR_NULL_PTR(object)))
return;
folio = virt_to_folio(object);
if (unlikely(!folio_test_slab(folio))) {
free_large_kmalloc(folio, (void *)object);
return;
}
slab = folio_slab(folio);
s = slab->slab_cache;
slab_free(s, slab, x, _RET_IP_);
}
EXPORT_SYMBOL(kfree);
struct detached_freelist { struct detached_freelist {
struct slab *slab; struct slab *slab;
void *tail; void *tail;
...@@ -3892,6 +4482,27 @@ int build_detached_freelist(struct kmem_cache *s, size_t size, ...@@ -3892,6 +4482,27 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
return same; return same;
} }
/*
* Internal bulk free of objects that were not initialised by the post alloc
* hooks and thus should not be processed by the free hooks
*/
static void __kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
{
if (!size)
return;
do {
struct detached_freelist df;
size = build_detached_freelist(s, size, p, &df);
if (!df.slab)
continue;
do_slab_free(df.s, df.slab, df.freelist, df.tail, df.cnt,
_RET_IP_);
} while (likely(size));
}
/* Note that interrupts must be enabled when calling this function. */ /* Note that interrupts must be enabled when calling this function. */
void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
{ {
...@@ -3905,15 +4516,16 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) ...@@ -3905,15 +4516,16 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
if (!df.slab) if (!df.slab)
continue; continue;
slab_free(df.s, df.slab, df.freelist, df.tail, &p[size], df.cnt, slab_free_bulk(df.s, df.slab, df.freelist, df.tail, &p[size],
_RET_IP_); df.cnt, _RET_IP_);
} while (likely(size)); } while (likely(size));
} }
EXPORT_SYMBOL(kmem_cache_free_bulk); EXPORT_SYMBOL(kmem_cache_free_bulk);
#ifndef CONFIG_SLUB_TINY #ifndef CONFIG_SLUB_TINY
static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, static inline
size_t size, void **p, struct obj_cgroup *objcg) int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
void **p)
{ {
struct kmem_cache_cpu *c; struct kmem_cache_cpu *c;
unsigned long irqflags; unsigned long irqflags;
...@@ -3967,6 +4579,7 @@ static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, ...@@ -3967,6 +4579,7 @@ static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
c->freelist = get_freepointer(s, object); c->freelist = get_freepointer(s, object);
p[i] = object; p[i] = object;
maybe_wipe_obj_freeptr(s, p[i]); maybe_wipe_obj_freeptr(s, p[i]);
stat(s, ALLOC_FASTPATH);
} }
c->tid = next_tid(c->tid); c->tid = next_tid(c->tid);
local_unlock_irqrestore(&s->cpu_slab->lock, irqflags); local_unlock_irqrestore(&s->cpu_slab->lock, irqflags);
...@@ -3976,14 +4589,13 @@ static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, ...@@ -3976,14 +4589,13 @@ static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
error: error:
slub_put_cpu_ptr(s->cpu_slab); slub_put_cpu_ptr(s->cpu_slab);
slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size); __kmem_cache_free_bulk(s, i, p);
kmem_cache_free_bulk(s, i, p);
return 0; return 0;
} }
#else /* CONFIG_SLUB_TINY */ #else /* CONFIG_SLUB_TINY */
static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
size_t size, void **p, struct obj_cgroup *objcg) size_t size, void **p)
{ {
int i; int i;
...@@ -4006,8 +4618,7 @@ static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, ...@@ -4006,8 +4618,7 @@ static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
return i; return i;
error: error:
slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size); __kmem_cache_free_bulk(s, i, p);
kmem_cache_free_bulk(s, i, p);
return 0; return 0;
} }
#endif /* CONFIG_SLUB_TINY */ #endif /* CONFIG_SLUB_TINY */
...@@ -4027,15 +4638,19 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, ...@@ -4027,15 +4638,19 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
if (unlikely(!s)) if (unlikely(!s))
return 0; return 0;
i = __kmem_cache_alloc_bulk(s, flags, size, p, objcg); i = __kmem_cache_alloc_bulk(s, flags, size, p);
/* /*
* memcg and kmem_cache debug support and memory initialization. * memcg and kmem_cache debug support and memory initialization.
* Done outside of the IRQ disabled fastpath loop. * Done outside of the IRQ disabled fastpath loop.
*/ */
if (i != 0) if (likely(i != 0)) {
slab_post_alloc_hook(s, objcg, flags, size, p, slab_post_alloc_hook(s, objcg, flags, size, p,
slab_want_init_on_alloc(flags, s), s->object_size); slab_want_init_on_alloc(flags, s), s->object_size);
} else {
memcg_slab_alloc_error_hook(s, size, objcg);
}
return i; return i;
} }
EXPORT_SYMBOL(kmem_cache_alloc_bulk); EXPORT_SYMBOL(kmem_cache_alloc_bulk);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment