Commit d30e51aa authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'slab-for-6.8' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab

Pull slab updates from Vlastimil Babka:

 - SLUB: delayed freezing of CPU partial slabs (Chengming Zhou)

   Freezing is an operation involving double_cmpxchg() that makes a slab
   exclusive for a particular CPU. Chengming noticed that we use it also
   in situations where we are not yet installing the slab as the CPU
   slab, because freezing also indicates that the slab is not on the
   shared list. This results in redundant freeze/unfreeze operation and
   can be avoided by marking separately the shared list presence by
   reusing the PG_workingset flag.

   This approach neatly avoids the issues described in 9b1ea29b
   ("Revert "mm, slub: consider rest of partial list if acquire_slab()
   fails"") as we can now grab a slab from the shared list in a quick
   and guaranteed way without the cmpxchg_double() operation that
   amplifies the lock contention and can fail.

   As a result, lkp has reported 34.2% improvement of
   stress-ng.rawudp.ops_per_sec

 - SLAB removal and SLUB cleanups (Vlastimil Babka)

   The SLAB allocator has been deprecated since 6.5 and nobody has
   objected so far. We agreed at LSF/MM to wait until the next LTS,
   which is 6.6, so we should be good to go now.

   This doesn't yet erase all traces of SLAB outside of mm/ so some dead
   code, comments or documentation remain, and will be cleaned up
   gradually (some series are already in the works).

   Removing the choice of allocators has already allowed to simplify and
   optimize the code wiring up the kmalloc APIs to the SLUB
   implementation.

* tag 'slab-for-6.8' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab: (34 commits)
  mm/slub: free KFENCE objects in slab_free_hook()
  mm/slub: handle bulk and single object freeing separately
  mm/slub: introduce __kmem_cache_free_bulk() without free hooks
  mm/slub: fix bulk alloc and free stats
  mm/slub: optimize free fast path code layout
  mm/slub: optimize alloc fastpath code layout
  mm/slub: remove slab_alloc() and __kmem_cache_alloc_lru() wrappers
  mm/slab: move kmalloc() functions from slab_common.c to slub.c
  mm/slab: move kmalloc_slab() to mm/slab.h
  mm/slab: move kfree() from slab_common.c to slub.c
  mm/slab: move struct kmem_cache_node from slab.h to slub.c
  mm/slab: move memcg related functions from slab.h to slub.c
  mm/slab: move pre/post-alloc hooks from slab.h to slub.c
  mm/slab: consolidate includes in the internal mm/slab.h
  mm/slab: move the rest of slub_def.h to mm/slab.h
  mm/slab: move struct kmem_cache_cpu declaration to slub.c
  mm/slab: remove mm/slab.c and slab_def.h
  mm/mempool/dmapool: remove CONFIG_DEBUG_SLAB ifdefs
  mm/slab: remove CONFIG_SLAB code from slab common code
  cpu/hotplug: remove CPUHP_SLAB_PREPARE hooks
  ...
parents 9f8413c4 61d7e367
......@@ -9,10 +9,6 @@
Linus
----------
N: Matt Mackal
E: mpm@selenic.com
D: SLOB slab allocator
N: Matti Aarnio
E: mea@nic.funet.fi
D: Alpha systems hacking, IPv6 and other network related stuff
......@@ -1572,6 +1568,10 @@ S: Ampferstr. 50 / 4
S: 6020 Innsbruck
S: Austria
N: Mark Hemment
E: markhe@nextd.demon.co.uk
D: SLAB allocator implementation
N: Richard Henderson
E: rth@twiddle.net
E: rth@cygnus.com
......@@ -2445,6 +2445,10 @@ D: work on suspend-to-ram/disk, killing duplicates from ioctl32,
D: Altera SoCFPGA and Nokia N900 support.
S: Czech Republic
N: Olivia Mackall
E: olivia@selenic.com
D: SLOB slab allocator
N: Paul Mackerras
E: paulus@samba.org
D: PPP driver
......
......@@ -37,7 +37,7 @@ The Slab Cache
.. kernel-doc:: include/linux/slab.h
:internal:
.. kernel-doc:: mm/slab.c
.. kernel-doc:: mm/slub.c
:export:
.. kernel-doc:: mm/slab_common.c
......
......@@ -154,7 +154,7 @@ config ARM64
select HAVE_MOVE_PUD
select HAVE_PCI
select HAVE_ACPI_APEI if (ACPI && EFI)
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ALIGNED_STRUCT_PAGE
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_BITREVERSE
select HAVE_ARCH_COMPILER_H
......
......@@ -146,7 +146,7 @@ config S390
select GENERIC_TIME_VSYSCALL
select GENERIC_VDSO_TIME_NS
select GENERIC_IOREMAP if PCI
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ALIGNED_STRUCT_PAGE
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
......
......@@ -169,7 +169,7 @@ config X86
select HAS_IOPORT
select HAVE_ACPI_APEI if ACPI
select HAVE_ACPI_APEI_NMI if ACPI
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ALIGNED_STRUCT_PAGE
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
select HAVE_ARCH_HUGE_VMALLOC if X86_64
......
......@@ -104,7 +104,6 @@ enum cpuhp_state {
CPUHP_X2APIC_PREPARE,
CPUHP_SMPCFD_PREPARE,
CPUHP_RELAY_PREPARE,
CPUHP_SLAB_PREPARE,
CPUHP_MD_RAID5_PREPARE,
CPUHP_RCUTREE_PREP,
CPUHP_CPUIDLE_COUPLED_PREPARE,
......
......@@ -24,7 +24,7 @@
/*
* Flags to pass to kmem_cache_create().
* The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set.
* The ones marked DEBUG need CONFIG_SLUB_DEBUG enabled, otherwise are no-op
*/
/* DEBUG: Perform (expensive) checks on alloc/free */
#define SLAB_CONSISTENCY_CHECKS ((slab_flags_t __force)0x00000100U)
......@@ -302,25 +302,15 @@ static inline unsigned int arch_slab_minalign(void)
* Kmalloc array related definitions
*/
#ifdef CONFIG_SLAB
/*
* SLAB and SLUB directly allocates requests fitting in to an order-1 page
* SLUB directly allocates requests fitting in to an order-1 page
* (PAGE_SIZE*2). Larger requests are passed to the page allocator.
*/
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT)
#ifndef KMALLOC_SHIFT_LOW
#define KMALLOC_SHIFT_LOW 5
#endif
#endif
#ifdef CONFIG_SLUB
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT)
#ifndef KMALLOC_SHIFT_LOW
#define KMALLOC_SHIFT_LOW 3
#endif
#endif
/* Maximum allocatable size */
#define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX)
......@@ -788,12 +778,4 @@ size_t kmalloc_size_roundup(size_t size);
void __init kmem_cache_init_late(void);
#if defined(CONFIG_SMP) && defined(CONFIG_SLAB)
int slab_prepare_cpu(unsigned int cpu);
int slab_dead_cpu(unsigned int cpu);
#else
#define slab_prepare_cpu NULL
#define slab_dead_cpu NULL
#endif
#endif /* _LINUX_SLAB_H */
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SLAB_DEF_H
#define _LINUX_SLAB_DEF_H
#include <linux/kfence.h>
#include <linux/reciprocal_div.h>
/*
* Definitions unique to the original Linux SLAB allocator.
*/
struct kmem_cache {
struct array_cache __percpu *cpu_cache;
/* 1) Cache tunables. Protected by slab_mutex */
unsigned int batchcount;
unsigned int limit;
unsigned int shared;
unsigned int size;
struct reciprocal_value reciprocal_buffer_size;
/* 2) touched by every alloc & free from the backend */
slab_flags_t flags; /* constant flags */
unsigned int num; /* # of objs per slab */
/* 3) cache_grow/shrink */
/* order of pgs per slab (2^n) */
unsigned int gfporder;
/* force GFP flags, e.g. GFP_DMA */
gfp_t allocflags;
size_t colour; /* cache colouring range */
unsigned int colour_off; /* colour offset */
unsigned int freelist_size;
/* constructor func */
void (*ctor)(void *obj);
/* 4) cache creation/removal */
const char *name;
struct list_head list;
int refcount;
int object_size;
int align;
/* 5) statistics */
#ifdef CONFIG_DEBUG_SLAB
unsigned long num_active;
unsigned long num_allocations;
unsigned long high_mark;
unsigned long grown;
unsigned long reaped;
unsigned long errors;
unsigned long max_freeable;
unsigned long node_allocs;
unsigned long node_frees;
unsigned long node_overflow;
atomic_t allochit;
atomic_t allocmiss;
atomic_t freehit;
atomic_t freemiss;
/*
* If debugging is enabled, then the allocator can add additional
* fields and/or padding to every object. 'size' contains the total
* object size including these internal fields, while 'obj_offset'
* and 'object_size' contain the offset to the user object and its
* size.
*/
int obj_offset;
#endif /* CONFIG_DEBUG_SLAB */
#ifdef CONFIG_KASAN_GENERIC
struct kasan_cache kasan_info;
#endif
#ifdef CONFIG_SLAB_FREELIST_RANDOM
unsigned int *random_seq;
#endif
#ifdef CONFIG_HARDENED_USERCOPY
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
#endif
struct kmem_cache_node *node[MAX_NUMNODES];
};
static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
void *x)
{
void *object = x - (x - slab->s_mem) % cache->size;
void *last_object = slab->s_mem + (cache->num - 1) * cache->size;
if (unlikely(object > last_object))
return last_object;
else
return object;
}
/*
* We want to avoid an expensive divide : (offset / cache->size)
* Using the fact that size is a constant for a particular cache,
* we can replace (offset / cache->size) by
* reciprocal_divide(offset, cache->reciprocal_buffer_size)
*/
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
const struct slab *slab, void *obj)
{
u32 offset = (obj - slab->s_mem);
return reciprocal_divide(offset, cache->reciprocal_buffer_size);
}
static inline int objs_per_slab(const struct kmem_cache *cache,
const struct slab *slab)
{
if (is_kfence_address(slab_address(slab)))
return 1;
return cache->num;
}
#endif /* _LINUX_SLAB_DEF_H */
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SLUB_DEF_H
#define _LINUX_SLUB_DEF_H
/*
* SLUB : A Slab allocator without object queues.
*
* (C) 2007 SGI, Christoph Lameter
*/
#include <linux/kfence.h>
#include <linux/kobject.h>
#include <linux/reciprocal_div.h>
#include <linux/local_lock.h>
enum stat_item {
ALLOC_FASTPATH, /* Allocation from cpu slab */
ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
FREE_FASTPATH, /* Free to cpu slab */
FREE_SLOWPATH, /* Freeing not to cpu slab */
FREE_FROZEN, /* Freeing to frozen slab */
FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
FREE_REMOVE_PARTIAL, /* Freeing removes last object */
ALLOC_FROM_PARTIAL, /* Cpu slab acquired from node partial list */
ALLOC_SLAB, /* Cpu slab acquired from page allocator */
ALLOC_REFILL, /* Refill cpu slab from slab freelist */
ALLOC_NODE_MISMATCH, /* Switching cpu slab */
FREE_SLAB, /* Slab freed to the page allocator */
CPUSLAB_FLUSH, /* Abandoning of the cpu slab */
DEACTIVATE_FULL, /* Cpu slab was full when deactivated */
DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */
DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
DEACTIVATE_BYPASS, /* Implicit deactivation */
ORDER_FALLBACK, /* Number of times fallback was necessary */
CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */
CMPXCHG_DOUBLE_FAIL, /* Number of times that cmpxchg double did not match */
CPU_PARTIAL_ALLOC, /* Used cpu partial on alloc */
CPU_PARTIAL_FREE, /* Refill cpu partial on free */
CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */
CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
NR_SLUB_STAT_ITEMS
};
#ifndef CONFIG_SLUB_TINY
/*
* When changing the layout, make sure freelist and tid are still compatible
* with this_cpu_cmpxchg_double() alignment requirements.
*/
struct kmem_cache_cpu {
union {
struct {
void **freelist; /* Pointer to next available object */
unsigned long tid; /* Globally unique transaction id */
};
freelist_aba_t freelist_tid;
};
struct slab *slab; /* The slab from which we are allocating */
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct slab *partial; /* Partially allocated frozen slabs */
#endif
local_lock_t lock; /* Protects the fields above */
#ifdef CONFIG_SLUB_STATS
unsigned stat[NR_SLUB_STAT_ITEMS];
#endif
};
#endif /* CONFIG_SLUB_TINY */
#ifdef CONFIG_SLUB_CPU_PARTIAL
#define slub_percpu_partial(c) ((c)->partial)
#define slub_set_percpu_partial(c, p) \
({ \
slub_percpu_partial(c) = (p)->next; \
})
#define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c))
#else
#define slub_percpu_partial(c) NULL
#define slub_set_percpu_partial(c, p)
#define slub_percpu_partial_read_once(c) NULL
#endif // CONFIG_SLUB_CPU_PARTIAL
/*
* Word size structure that can be atomically updated or read and that
* contains both the order and the number of objects that a slab of the
* given order would contain.
*/
struct kmem_cache_order_objects {
unsigned int x;
};
/*
* Slab cache management.
*/
struct kmem_cache {
#ifndef CONFIG_SLUB_TINY
struct kmem_cache_cpu __percpu *cpu_slab;
#endif
/* Used for retrieving partial slabs, etc. */
slab_flags_t flags;
unsigned long min_partial;
unsigned int size; /* The size of an object including metadata */
unsigned int object_size;/* The size of an object without metadata */
struct reciprocal_value reciprocal_size;
unsigned int offset; /* Free pointer offset */
#ifdef CONFIG_SLUB_CPU_PARTIAL
/* Number of per cpu partial objects to keep around */
unsigned int cpu_partial;
/* Number of per cpu partial slabs to keep around */
unsigned int cpu_partial_slabs;
#endif
struct kmem_cache_order_objects oo;
/* Allocation and freeing of slabs */
struct kmem_cache_order_objects min;
gfp_t allocflags; /* gfp flags to use on each alloc */
int refcount; /* Refcount for slab cache destroy */
void (*ctor)(void *);
unsigned int inuse; /* Offset to metadata */
unsigned int align; /* Alignment */
unsigned int red_left_pad; /* Left redzone padding size */
const char *name; /* Name (only for display!) */
struct list_head list; /* List of slab caches */
#ifdef CONFIG_SYSFS
struct kobject kobj; /* For sysfs */
#endif
#ifdef CONFIG_SLAB_FREELIST_HARDENED
unsigned long random;
#endif
#ifdef CONFIG_NUMA
/*
* Defragmentation by allocating from a remote node.
*/
unsigned int remote_node_defrag_ratio;
#endif
#ifdef CONFIG_SLAB_FREELIST_RANDOM
unsigned int *random_seq;
#endif
#ifdef CONFIG_KASAN_GENERIC
struct kasan_cache kasan_info;
#endif
#ifdef CONFIG_HARDENED_USERCOPY
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
#endif
struct kmem_cache_node *node[MAX_NUMNODES];
};
#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY)
#define SLAB_SUPPORTS_SYSFS
void sysfs_slab_unlink(struct kmem_cache *);
void sysfs_slab_release(struct kmem_cache *);
#else
static inline void sysfs_slab_unlink(struct kmem_cache *s)
{
}
static inline void sysfs_slab_release(struct kmem_cache *s)
{
}
#endif
void *fixup_red_left(struct kmem_cache *s, void *p);
static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
void *x) {
void *object = x - (x - slab_address(slab)) % cache->size;
void *last_object = slab_address(slab) +
(slab->objects - 1) * cache->size;
void *result = (unlikely(object > last_object)) ? last_object : object;
result = fixup_red_left(cache, result);
return result;
}
/* Determine object index from a given position */
static inline unsigned int __obj_to_index(const struct kmem_cache *cache,
void *addr, void *obj)
{
return reciprocal_divide(kasan_reset_tag(obj) - addr,
cache->reciprocal_size);
}
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
const struct slab *slab, void *obj)
{
if (is_kfence_address(obj))
return 0;
return __obj_to_index(cache, slab_address(slab), obj);
}
static inline int objs_per_slab(const struct kmem_cache *cache,
const struct slab *slab)
{
return slab->objects;
}
#endif /* _LINUX_SLUB_DEF_H */
......@@ -2125,11 +2125,6 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.startup.single = relay_prepare_cpu,
.teardown.single = NULL,
},
[CPUHP_SLAB_PREPARE] = {
.name = "slab:prepare",
.startup.single = slab_prepare_cpu,
.teardown.single = slab_dead_cpu,
},
[CPUHP_RCUTREE_PREP] = {
.name = "RCU/tree:prepare",
.startup.single = rcutree_prepare_cpu,
......
......@@ -1970,7 +1970,6 @@ config FAULT_INJECTION
config FAILSLAB
bool "Fault-injection capability for kmalloc"
depends on FAULT_INJECTION
depends on SLAB || SLUB
help
Provide fault-injection capability for kmalloc.
......
......@@ -37,7 +37,7 @@ menuconfig KASAN
(HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS)) && \
CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \
HAVE_ARCH_KASAN_HW_TAGS
depends on (SLUB && SYSFS && !SLUB_TINY) || (SLAB && !DEBUG_SLAB)
depends on SYSFS && !SLUB_TINY
select STACKDEPOT_ALWAYS_INIT
help
Enables KASAN (Kernel Address Sanitizer) - a dynamic memory safety
......@@ -78,7 +78,7 @@ config KASAN_GENERIC
bool "Generic KASAN"
depends on HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC
depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS
select SLUB_DEBUG if SLUB
select SLUB_DEBUG
select CONSTRUCTORS
help
Enables Generic KASAN.
......@@ -89,13 +89,11 @@ config KASAN_GENERIC
overhead of ~50% for dynamic allocations.
The performance slowdown is ~x3.
(Incompatible with CONFIG_DEBUG_SLAB: the kernel does not boot.)
config KASAN_SW_TAGS
bool "Software Tag-Based KASAN"
depends on HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS
depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS
select SLUB_DEBUG if SLUB
select SLUB_DEBUG
select CONSTRUCTORS
help
Enables Software Tag-Based KASAN.
......@@ -110,12 +108,9 @@ config KASAN_SW_TAGS
May potentially introduce problems related to pointer casting and
comparison, as it embeds a tag into the top byte of each pointer.
(Incompatible with CONFIG_DEBUG_SLAB: the kernel does not boot.)
config KASAN_HW_TAGS
bool "Hardware Tag-Based KASAN"
depends on HAVE_ARCH_KASAN_HW_TAGS
depends on SLUB
help
Enables Hardware Tag-Based KASAN.
......
......@@ -5,7 +5,7 @@ config HAVE_ARCH_KFENCE
menuconfig KFENCE
bool "KFENCE: low-overhead sampling-based memory safety error detector"
depends on HAVE_ARCH_KFENCE && (SLAB || SLUB)
depends on HAVE_ARCH_KFENCE
select STACKTRACE
select IRQ_WORK
help
......
......@@ -11,7 +11,7 @@ config HAVE_KMSAN_COMPILER
config KMSAN
bool "KMSAN: detector of uninitialized values use"
depends on HAVE_ARCH_KMSAN && HAVE_KMSAN_COMPILER
depends on SLUB && DEBUG_KERNEL && !KASAN && !KCSAN
depends on DEBUG_KERNEL && !KASAN && !KCSAN
depends on !PREEMPT_RT
select STACKDEPOT
select STACKDEPOT_ALWAYS_INIT
......
......@@ -226,52 +226,17 @@ config ZSMALLOC_CHAIN_SIZE
For more information, see zsmalloc documentation.
menu "SLAB allocator options"
choice
prompt "Choose SLAB allocator"
default SLUB
help
This option allows to select a slab allocator.
config SLAB_DEPRECATED
bool "SLAB (DEPRECATED)"
depends on !PREEMPT_RT
help
Deprecated and scheduled for removal in a few cycles. Replaced by
SLUB.
If you cannot migrate to SLUB, please contact linux-mm@kvack.org
and the people listed in the SLAB ALLOCATOR section of MAINTAINERS
file, explaining why.
The regular slab allocator that is established and known to work
well in all environments. It organizes cache hot objects in
per cpu and per node queues.
menu "Slab allocator options"
config SLUB
bool "SLUB (Unqueued Allocator)"
help
SLUB is a slab allocator that minimizes cache line usage
instead of managing queues of cached objects (SLAB approach).
Per cpu caching is realized using slabs of objects instead
of queues of objects. SLUB can use memory efficiently
and has enhanced diagnostics. SLUB is the default choice for
a slab allocator.
endchoice
config SLAB
bool
default y
depends on SLAB_DEPRECATED
def_bool y
config SLUB_TINY
bool "Configure SLUB for minimal memory footprint"
depends on SLUB && EXPERT
bool "Configure for minimal memory footprint"
depends on EXPERT
select SLAB_MERGE_DEFAULT
help
Configures the SLUB allocator in a way to achieve minimal memory
Configures the slab allocator in a way to achieve minimal memory
footprint, sacrificing scalability, debugging and other features.
This is intended only for the smallest system that had used the
SLOB allocator and is not recommended for systems with more than
......@@ -282,7 +247,6 @@ config SLUB_TINY
config SLAB_MERGE_DEFAULT
bool "Allow slab caches to be merged"
default y
depends on SLAB || SLUB
help
For reduced kernel memory fragmentation, slab caches can be
merged when they share the same size and other characteristics.
......@@ -296,7 +260,7 @@ config SLAB_MERGE_DEFAULT
config SLAB_FREELIST_RANDOM
bool "Randomize slab freelist"
depends on SLAB || (SLUB && !SLUB_TINY)
depends on !SLUB_TINY
help
Randomizes the freelist order used on creating new pages. This
security feature reduces the predictability of the kernel slab
......@@ -304,21 +268,19 @@ config SLAB_FREELIST_RANDOM
config SLAB_FREELIST_HARDENED
bool "Harden slab freelist metadata"
depends on SLAB || (SLUB && !SLUB_TINY)
depends on !SLUB_TINY
help
Many kernel heap attacks try to target slab cache metadata and
other infrastructure. This options makes minor performance
sacrifices to harden the kernel slab allocator against common
freelist exploit methods. Some slab implementations have more
sanity-checking than others. This option is most effective with
CONFIG_SLUB.
freelist exploit methods.
config SLUB_STATS
default n
bool "Enable SLUB performance statistics"
depends on SLUB && SYSFS && !SLUB_TINY
bool "Enable performance statistics"
depends on SYSFS && !SLUB_TINY
help
SLUB statistics are useful to debug SLUBs allocation behavior in
The statistics are useful to debug slab allocation behavior in
order find ways to optimize the allocator. This should never be
enabled for production use since keeping statistics slows down
the allocator by a few percentage points. The slabinfo command
......@@ -328,8 +290,8 @@ config SLUB_STATS
config SLUB_CPU_PARTIAL
default y
depends on SLUB && SMP && !SLUB_TINY
bool "SLUB per cpu partial cache"
depends on SMP && !SLUB_TINY
bool "Enable per cpu partial caches"
help
Per cpu partial caches accelerate objects allocation and freeing
that is local to a processor at the price of more indeterminism
......@@ -339,7 +301,7 @@ config SLUB_CPU_PARTIAL
config RANDOM_KMALLOC_CACHES
default n
depends on SLUB && !SLUB_TINY
depends on !SLUB_TINY
bool "Randomize slab caches for normal kmalloc"
help
A hardening feature that creates multiple copies of slab caches for
......@@ -354,7 +316,7 @@ config RANDOM_KMALLOC_CACHES
limited degree of memory and CPU overhead that relates to hardware and
system workload.
endmenu # SLAB allocator options
endmenu # Slab allocator options
config SHUFFLE_PAGE_ALLOCATOR
bool "Page allocator randomization"
......
......@@ -45,18 +45,10 @@ config DEBUG_PAGEALLOC_ENABLE_DEFAULT
Enable debug page memory allocations by default? This value
can be overridden by debug_pagealloc=off|on.
config DEBUG_SLAB
bool "Debug slab memory allocations"
depends on DEBUG_KERNEL && SLAB
help
Say Y here to have the kernel do limited verification on memory
allocation as well as poisoning memory on free to catch use of freed
memory. This can make kmalloc/kfree-intensive workloads much slower.
config SLUB_DEBUG
default y
bool "Enable SLUB debugging support" if EXPERT
depends on SLUB && SYSFS && !SLUB_TINY
depends on SYSFS && !SLUB_TINY
select STACKDEPOT if STACKTRACE_SUPPORT
help
SLUB has extensive debug support features. Disabling these can
......@@ -66,7 +58,7 @@ config SLUB_DEBUG
config SLUB_DEBUG_ON
bool "SLUB debugging on by default"
depends on SLUB && SLUB_DEBUG
depends on SLUB_DEBUG
select STACKDEPOT_ALWAYS_INIT if STACKTRACE_SUPPORT
default n
help
......@@ -231,8 +223,8 @@ config DEBUG_KMEMLEAK
allocations. See Documentation/dev-tools/kmemleak.rst for more
details.
Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances
of finding leaks due to the slab objects poisoning.
Enabling SLUB_DEBUG may increase the chances of finding leaks
due to the slab objects poisoning.
In order to access the kmemleak file, debugfs needs to be
mounted (usually at /sys/kernel/debug).
......
......@@ -4,7 +4,6 @@
#
KASAN_SANITIZE_slab_common.o := n
KASAN_SANITIZE_slab.o := n
KASAN_SANITIZE_slub.o := n
KCSAN_SANITIZE_kmemleak.o := n
......@@ -12,7 +11,6 @@ KCSAN_SANITIZE_kmemleak.o := n
# the same word but accesses to different bits of that word. Re-enable KCSAN
# for these when we have more consensus on what to do about them.
KCSAN_SANITIZE_slab_common.o := n
KCSAN_SANITIZE_slab.o := n
KCSAN_SANITIZE_slub.o := n
KCSAN_SANITIZE_page_alloc.o := n
# But enable explicit instrumentation for memory barriers.
......@@ -22,7 +20,6 @@ KCSAN_INSTRUMENT_BARRIERS := y
# flaky coverage that is not a function of syscall inputs. E.g. slab is out of
# free pages, or a task is migrated between nodes.
KCOV_INSTRUMENT_slab_common.o := n
KCOV_INSTRUMENT_slab.o := n
KCOV_INSTRUMENT_slub.o := n
KCOV_INSTRUMENT_page_alloc.o := n
KCOV_INSTRUMENT_debug-pagealloc.o := n
......@@ -66,6 +63,7 @@ obj-y += page-alloc.o
obj-y += init-mm.o
obj-y += memblock.o
obj-y += $(memory-hotplug-y)
obj-y += slub.o
ifdef CONFIG_MMU
obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o
......@@ -82,8 +80,6 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
obj-$(CONFIG_KSM) += ksm.o
obj-$(CONFIG_PAGE_POISONING) += page_poison.o
obj-$(CONFIG_SLAB) += slab.o
obj-$(CONFIG_SLUB) += slub.o
obj-$(CONFIG_KASAN) += kasan/
obj-$(CONFIG_KFENCE) += kfence/
obj-$(CONFIG_KMSAN) += kmsan/
......
......@@ -36,7 +36,7 @@
#include <linux/types.h>
#include <linux/wait.h>
#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON)
#ifdef CONFIG_SLUB_DEBUG_ON
#define DMAPOOL_DEBUG 1
#endif
......
......@@ -153,10 +153,6 @@ void __kasan_poison_object_data(struct kmem_cache *cache, void *object)
* 2. A cache might be SLAB_TYPESAFE_BY_RCU, which means objects can be
* accessed after being freed. We preassign tags for objects in these
* caches as well.
* 3. For SLAB allocator we can't preassign tags randomly since the freelist
* is stored as an array of indexes instead of a linked list. Assign tags
* based on objects indexes, so that objects that are next to each other
* get different tags.
*/
static inline u8 assign_tag(struct kmem_cache *cache,
const void *object, bool init)
......@@ -171,17 +167,12 @@ static inline u8 assign_tag(struct kmem_cache *cache,
if (!cache->ctor && !(cache->flags & SLAB_TYPESAFE_BY_RCU))
return init ? KASAN_TAG_KERNEL : kasan_random_tag();
/* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU: */
#ifdef CONFIG_SLAB
/* For SLAB assign tags based on the object index in the freelist. */
return (u8)obj_to_index(cache, virt_to_slab(object), (void *)object);
#else
/*
* For SLUB assign a random tag during slab creation, otherwise reuse
* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU,
* assign a random tag during slab creation, otherwise reuse
* the already assigned tag.
*/
return init ? kasan_random_tag() : get_tag(object);
#endif
}
void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache,
......
......@@ -373,8 +373,7 @@ void kasan_set_track(struct kasan_track *track, gfp_t flags);
void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags);
void kasan_save_free_info(struct kmem_cache *cache, void *object);
#if defined(CONFIG_KASAN_GENERIC) && \
(defined(CONFIG_SLAB) || defined(CONFIG_SLUB))
#ifdef CONFIG_KASAN_GENERIC
bool kasan_quarantine_put(struct kmem_cache *cache, void *object);
void kasan_quarantine_reduce(void);
void kasan_quarantine_remove_cache(struct kmem_cache *cache);
......
......@@ -144,10 +144,6 @@ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache)
{
void *object = qlink_to_object(qlink, cache);
struct kasan_free_meta *meta = kasan_get_free_meta(cache, object);
unsigned long flags;
if (IS_ENABLED(CONFIG_SLAB))
local_irq_save(flags);
/*
* If init_on_free is enabled and KASAN's free metadata is stored in
......@@ -166,9 +162,6 @@ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache)
*(u8 *)kasan_mem_to_shadow(object) = KASAN_SLAB_FREE;
___cache_free(cache, object, _THIS_IP_);
if (IS_ENABLED(CONFIG_SLAB))
local_irq_restore(flags);
}
static void qlist_free_all(struct qlist_head *q, struct kmem_cache *cache)
......
......@@ -23,6 +23,7 @@
#include <linux/stacktrace.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/vmalloc.h>
#include <linux/kasan.h>
#include <linux/module.h>
#include <linux/sched/task_stack.h>
......
......@@ -463,11 +463,7 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
/* Set required slab fields. */
slab = virt_to_slab((void *)meta->addr);
slab->slab_cache = cache;
#if defined(CONFIG_SLUB)
slab->objects = 1;
#elif defined(CONFIG_SLAB)
slab->s_mem = addr;
#endif
/* Memory initialization. */
set_canary(meta);
......
......@@ -64,6 +64,7 @@
#include <linux/psi.h>
#include <linux/seq_buf.h>
#include <linux/sched/isolation.h>
#include <linux/kmemleak.h>
#include "internal.h"
#include <net/sock.h>
#include <net/ip.h>
......@@ -5150,7 +5151,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
return ret;
}
#if defined(CONFIG_MEMCG_KMEM) && (defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG))
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_SLUB_DEBUG)
static int mem_cgroup_slab_show(struct seq_file *m, void *p)
{
/*
......@@ -5259,8 +5260,7 @@ static struct cftype mem_cgroup_legacy_files[] = {
.write = mem_cgroup_reset,
.read_u64 = mem_cgroup_read_u64,
},
#if defined(CONFIG_MEMCG_KMEM) && \
(defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG))
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_SLUB_DEBUG)
{
.name = "kmem.slabinfo",
.seq_show = mem_cgroup_slab_show,
......
......@@ -20,7 +20,7 @@
#include <linux/writeback.h>
#include "slab.h"
#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON)
#ifdef CONFIG_SLUB_DEBUG_ON
static void poison_error(mempool_t *pool, void *element, size_t size,
size_t byte)
{
......@@ -95,14 +95,14 @@ static void poison_element(mempool_t *pool, void *element)
kunmap_atomic(addr);
}
}
#else /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */
#else /* CONFIG_SLUB_DEBUG_ON */
static inline void check_element(mempool_t *pool, void *element)
{
}
static inline void poison_element(mempool_t *pool, void *element)
{
}
#endif /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */
#endif /* CONFIG_SLUB_DEBUG_ON */
static __always_inline void kasan_poison_element(mempool_t *pool, void *element)
{
......
This source diff could not be displayed because it is too large. You can view the blob instead.
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef MM_SLAB_H
#define MM_SLAB_H
#include <linux/reciprocal_div.h>
#include <linux/list_lru.h>
#include <linux/local_lock.h>
#include <linux/random.h>
#include <linux/kobject.h>
#include <linux/sched/mm.h>
#include <linux/memcontrol.h>
#include <linux/kfence.h>
#include <linux/kasan.h>
/*
* Internal slab definitions
*/
void __init kmem_cache_init(void);
#ifdef CONFIG_64BIT
# ifdef system_has_cmpxchg128
......@@ -42,21 +52,6 @@ typedef union {
struct slab {
unsigned long __page_flags;
#if defined(CONFIG_SLAB)
struct kmem_cache *slab_cache;
union {
struct {
struct list_head slab_list;
void *freelist; /* array of free object indexes */
void *s_mem; /* first object */
};
struct rcu_head rcu_head;
};
unsigned int active;
#elif defined(CONFIG_SLUB)
struct kmem_cache *slab_cache;
union {
struct {
......@@ -91,10 +86,6 @@ struct slab {
};
unsigned int __unused;
#else
#error "Unexpected slab allocator configured"
#endif
atomic_t __page_refcount;
#ifdef CONFIG_MEMCG
unsigned long memcg_data;
......@@ -111,7 +102,7 @@ SLAB_MATCH(memcg_data, memcg_data);
#endif
#undef SLAB_MATCH
static_assert(sizeof(struct slab) <= sizeof(struct page));
#if defined(system_has_freelist_aba) && defined(CONFIG_SLUB)
#if defined(system_has_freelist_aba)
static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t)));
#endif
......@@ -228,21 +219,138 @@ static inline size_t slab_size(const struct slab *slab)
return PAGE_SIZE << slab_order(slab);
}
#ifdef CONFIG_SLAB
#include <linux/slab_def.h>
#ifdef CONFIG_SLUB_CPU_PARTIAL
#define slub_percpu_partial(c) ((c)->partial)
#define slub_set_percpu_partial(c, p) \
({ \
slub_percpu_partial(c) = (p)->next; \
})
#define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c))
#else
#define slub_percpu_partial(c) NULL
#define slub_set_percpu_partial(c, p)
#define slub_percpu_partial_read_once(c) NULL
#endif // CONFIG_SLUB_CPU_PARTIAL
/*
* Word size structure that can be atomically updated or read and that
* contains both the order and the number of objects that a slab of the
* given order would contain.
*/
struct kmem_cache_order_objects {
unsigned int x;
};
/*
* Slab cache management.
*/
struct kmem_cache {
#ifndef CONFIG_SLUB_TINY
struct kmem_cache_cpu __percpu *cpu_slab;
#endif
/* Used for retrieving partial slabs, etc. */
slab_flags_t flags;
unsigned long min_partial;
unsigned int size; /* Object size including metadata */
unsigned int object_size; /* Object size without metadata */
struct reciprocal_value reciprocal_size;
unsigned int offset; /* Free pointer offset */
#ifdef CONFIG_SLUB_CPU_PARTIAL
/* Number of per cpu partial objects to keep around */
unsigned int cpu_partial;
/* Number of per cpu partial slabs to keep around */
unsigned int cpu_partial_slabs;
#endif
struct kmem_cache_order_objects oo;
/* Allocation and freeing of slabs */
struct kmem_cache_order_objects min;
gfp_t allocflags; /* gfp flags to use on each alloc */
int refcount; /* Refcount for slab cache destroy */
void (*ctor)(void *object); /* Object constructor */
unsigned int inuse; /* Offset to metadata */
unsigned int align; /* Alignment */
unsigned int red_left_pad; /* Left redzone padding size */
const char *name; /* Name (only for display!) */
struct list_head list; /* List of slab caches */
#ifdef CONFIG_SYSFS
struct kobject kobj; /* For sysfs */
#endif
#ifdef CONFIG_SLAB_FREELIST_HARDENED
unsigned long random;
#endif
#ifdef CONFIG_SLUB
#include <linux/slub_def.h>
#ifdef CONFIG_NUMA
/*
* Defragmentation by allocating from a remote node.
*/
unsigned int remote_node_defrag_ratio;
#endif
#include <linux/memcontrol.h>
#include <linux/fault-inject.h>
#include <linux/kasan.h>
#include <linux/kmemleak.h>
#include <linux/random.h>
#include <linux/sched/mm.h>
#include <linux/list_lru.h>
#ifdef CONFIG_SLAB_FREELIST_RANDOM
unsigned int *random_seq;
#endif
#ifdef CONFIG_KASAN_GENERIC
struct kasan_cache kasan_info;
#endif
#ifdef CONFIG_HARDENED_USERCOPY
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
#endif
struct kmem_cache_node *node[MAX_NUMNODES];
};
#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY)
#define SLAB_SUPPORTS_SYSFS
void sysfs_slab_unlink(struct kmem_cache *s);
void sysfs_slab_release(struct kmem_cache *s);
#else
static inline void sysfs_slab_unlink(struct kmem_cache *s) { }
static inline void sysfs_slab_release(struct kmem_cache *s) { }
#endif
void *fixup_red_left(struct kmem_cache *s, void *p);
static inline void *nearest_obj(struct kmem_cache *cache,
const struct slab *slab, void *x)
{
void *object = x - (x - slab_address(slab)) % cache->size;
void *last_object = slab_address(slab) +
(slab->objects - 1) * cache->size;
void *result = (unlikely(object > last_object)) ? last_object : object;
result = fixup_red_left(cache, result);
return result;
}
/* Determine object index from a given position */
static inline unsigned int __obj_to_index(const struct kmem_cache *cache,
void *addr, void *obj)
{
return reciprocal_divide(kasan_reset_tag(obj) - addr,
cache->reciprocal_size);
}
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
const struct slab *slab, void *obj)
{
if (is_kfence_address(obj))
return 0;
return __obj_to_index(cache, slab_address(slab), obj);
}
static inline int objs_per_slab(const struct kmem_cache *cache,
const struct slab *slab)
{
return slab->objects;
}
/*
* State of the slab allocator.
......@@ -281,19 +389,39 @@ extern const struct kmalloc_info_struct {
void setup_kmalloc_cache_index_table(void);
void create_kmalloc_caches(slab_flags_t);
/* Find the kmalloc slab corresponding for a certain size */
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller);
extern u8 kmalloc_size_index[24];
static inline unsigned int size_index_elem(unsigned int bytes)
{
return (bytes - 1) / 8;
}
/*
* Find the kmem_cache structure that serves a given size of
* allocation
*
* This assumes size is larger than zero and not larger than
* KMALLOC_MAX_CACHE_SIZE and the caller must check that.
*/
static inline struct kmem_cache *
kmalloc_slab(size_t size, gfp_t flags, unsigned long caller)
{
unsigned int index;
if (size <= 192)
index = kmalloc_size_index[size_index_elem(size)];
else
index = fls(size - 1);
void *__kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags,
int node, size_t orig_size,
unsigned long caller);
void __kmem_cache_free(struct kmem_cache *s, void *x, unsigned long caller);
return kmalloc_caches[kmalloc_type(flags, caller)][index];
}
gfp_t kmalloc_fix_flags(gfp_t flags);
/* Functions provided by the slab allocators */
int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags);
void __init kmem_cache_init(void);
void __init new_kmalloc_cache(int idx, enum kmalloc_cache_type type,
slab_flags_t flags);
extern void create_boot_cache(struct kmem_cache *, const char *name,
......@@ -320,26 +448,16 @@ static inline bool is_kmalloc_cache(struct kmem_cache *s)
SLAB_CACHE_DMA32 | SLAB_PANIC | \
SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS )
#if defined(CONFIG_DEBUG_SLAB)
#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
#elif defined(CONFIG_SLUB_DEBUG)
#ifdef CONFIG_SLUB_DEBUG
#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
SLAB_TRACE | SLAB_CONSISTENCY_CHECKS)
#else
#define SLAB_DEBUG_FLAGS (0)
#endif
#if defined(CONFIG_SLAB)
#define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \
SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | \
SLAB_ACCOUNT | SLAB_NO_MERGE)
#elif defined(CONFIG_SLUB)
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
SLAB_TEMPORARY | SLAB_ACCOUNT | \
SLAB_NO_USER_FLAGS | SLAB_KMALLOC | SLAB_NO_MERGE)
#else
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE)
#endif
/* Common flags available with current configuration */
#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
......@@ -387,12 +505,6 @@ void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s);
ssize_t slabinfo_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos);
static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
{
return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B;
}
#ifdef CONFIG_SLUB_DEBUG
#ifdef CONFIG_SLUB_DEBUG_ON
DECLARE_STATIC_KEY_TRUE(slub_debug_enabled);
......@@ -452,238 +564,32 @@ int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
gfp_t gfp, bool new_slab);
void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
enum node_stat_item idx, int nr);
static inline void memcg_free_slab_cgroups(struct slab *slab)
{
kfree(slab_objcgs(slab));
slab->memcg_data = 0;
}
static inline size_t obj_full_size(struct kmem_cache *s)
{
/*
* For each accounted object there is an extra space which is used
* to store obj_cgroup membership. Charge it too.
*/
return s->size + sizeof(struct obj_cgroup *);
}
/*
* Returns false if the allocation should fail.
*/
static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
struct list_lru *lru,
struct obj_cgroup **objcgp,
size_t objects, gfp_t flags)
{
struct obj_cgroup *objcg;
if (!memcg_kmem_online())
return true;
if (!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT))
return true;
/*
* The obtained objcg pointer is safe to use within the current scope,
* defined by current task or set_active_memcg() pair.
* obj_cgroup_get() is used to get a permanent reference.
*/
objcg = current_obj_cgroup();
if (!objcg)
return true;
if (lru) {
int ret;
struct mem_cgroup *memcg;
memcg = get_mem_cgroup_from_objcg(objcg);
ret = memcg_list_lru_alloc(memcg, lru, flags);
css_put(&memcg->css);
if (ret)
return false;
}
if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s)))
return false;
*objcgp = objcg;
return true;
}
static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
struct obj_cgroup *objcg,
gfp_t flags, size_t size,
void **p)
{
struct slab *slab;
unsigned long off;
size_t i;
if (!memcg_kmem_online() || !objcg)
return;
for (i = 0; i < size; i++) {
if (likely(p[i])) {
slab = virt_to_slab(p[i]);
if (!slab_objcgs(slab) &&
memcg_alloc_slab_cgroups(slab, s, flags,
false)) {
obj_cgroup_uncharge(objcg, obj_full_size(s));
continue;
}
off = obj_to_index(s, slab, p[i]);
obj_cgroup_get(objcg);
slab_objcgs(slab)[off] = objcg;
mod_objcg_state(objcg, slab_pgdat(slab),
cache_vmstat_idx(s), obj_full_size(s));
} else {
obj_cgroup_uncharge(objcg, obj_full_size(s));
}
}
}
static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
void **p, int objects)
{
struct obj_cgroup **objcgs;
int i;
if (!memcg_kmem_online())
return;
objcgs = slab_objcgs(slab);
if (!objcgs)
return;
for (i = 0; i < objects; i++) {
struct obj_cgroup *objcg;
unsigned int off;
off = obj_to_index(s, slab, p[i]);
objcg = objcgs[off];
if (!objcg)
continue;
objcgs[off] = NULL;
obj_cgroup_uncharge(objcg, obj_full_size(s));
mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
-obj_full_size(s));
obj_cgroup_put(objcg);
}
}
#else /* CONFIG_MEMCG_KMEM */
static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
{
return NULL;
}
static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
{
return NULL;
}
static inline int memcg_alloc_slab_cgroups(struct slab *slab,
struct kmem_cache *s, gfp_t gfp,
bool new_slab)
{
return 0;
}
static inline void memcg_free_slab_cgroups(struct slab *slab)
{
}
static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
struct list_lru *lru,
struct obj_cgroup **objcgp,
size_t objects, gfp_t flags)
{
return true;
}
static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
struct obj_cgroup *objcg,
gfp_t flags, size_t size,
void **p)
{
}
static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
void **p, int objects)
{
}
#endif /* CONFIG_MEMCG_KMEM */
static inline struct kmem_cache *virt_to_cache(const void *obj)
{
struct slab *slab;
slab = virt_to_slab(obj);
if (WARN_ONCE(!slab, "%s: Object is not a Slab page!\n",
__func__))
return NULL;
return slab->slab_cache;
}
static __always_inline void account_slab(struct slab *slab, int order,
struct kmem_cache *s, gfp_t gfp)
{
if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
memcg_alloc_slab_cgroups(slab, s, gfp, true);
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
PAGE_SIZE << order);
}
static __always_inline void unaccount_slab(struct slab *slab, int order,
struct kmem_cache *s)
{
if (memcg_kmem_online())
memcg_free_slab_cgroups(slab);
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
-(PAGE_SIZE << order));
}
static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
{
struct kmem_cache *cachep;
if (!IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) &&
!kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS))
return s;
cachep = virt_to_cache(x);
if (WARN(cachep && cachep != s,
"%s: Wrong slab cache. %s but object is from %s\n",
__func__, s->name, cachep->name))
print_tracking(cachep, x);
return cachep;
}
void free_large_kmalloc(struct folio *folio, void *object);
size_t __ksize(const void *objp);
static inline size_t slab_ksize(const struct kmem_cache *s)
{
#ifndef CONFIG_SLUB
return s->object_size;
#else /* CONFIG_SLUB */
# ifdef CONFIG_SLUB_DEBUG
#ifdef CONFIG_SLUB_DEBUG
/*
* Debugging requires use of the padding between object
* and whatever may come after it.
*/
if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
return s->object_size;
# endif
#endif
if (s->flags & SLAB_KASAN)
return s->object_size;
/*
......@@ -697,128 +603,9 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
* Else we can use all the padding etc for the allocation
*/
return s->size;
#endif
}
static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
struct list_lru *lru,
struct obj_cgroup **objcgp,
size_t size, gfp_t flags)
{
flags &= gfp_allowed_mask;
might_alloc(flags);
if (should_failslab(s, flags))
return NULL;
if (!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags))
return NULL;
return s;
}
static inline void slab_post_alloc_hook(struct kmem_cache *s,
struct obj_cgroup *objcg, gfp_t flags,
size_t size, void **p, bool init,
unsigned int orig_size)
{
unsigned int zero_size = s->object_size;
bool kasan_init = init;
size_t i;
flags &= gfp_allowed_mask;
/*
* For kmalloc object, the allocated memory size(object_size) is likely
* larger than the requested size(orig_size). If redzone check is
* enabled for the extra space, don't zero it, as it will be redzoned
* soon. The redzone operation for this extra space could be seen as a
* replacement of current poisoning under certain debug option, and
* won't break other sanity checks.
*/
if (kmem_cache_debug_flags(s, SLAB_STORE_USER | SLAB_RED_ZONE) &&
(s->flags & SLAB_KMALLOC))
zero_size = orig_size;
/*
* When slub_debug is enabled, avoid memory initialization integrated
* into KASAN and instead zero out the memory via the memset below with
* the proper size. Otherwise, KASAN might overwrite SLUB redzones and
* cause false-positive reports. This does not lead to a performance
* penalty on production builds, as slub_debug is not intended to be
* enabled there.
*/
if (__slub_debug_enabled())
kasan_init = false;
/*
* As memory initialization might be integrated into KASAN,
* kasan_slab_alloc and initialization memset must be
* kept together to avoid discrepancies in behavior.
*
* As p[i] might get tagged, memset and kmemleak hook come after KASAN.
*/
for (i = 0; i < size; i++) {
p[i] = kasan_slab_alloc(s, p[i], flags, kasan_init);
if (p[i] && init && (!kasan_init || !kasan_has_integrated_init()))
memset(p[i], 0, zero_size);
kmemleak_alloc_recursive(p[i], s->object_size, 1,
s->flags, flags);
kmsan_slab_alloc(s, p[i], flags);
}
memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
}
/*
* The slab lists for all objects.
*/
struct kmem_cache_node {
#ifdef CONFIG_SLAB
raw_spinlock_t list_lock;
struct list_head slabs_partial; /* partial list first, better asm code */
struct list_head slabs_full;
struct list_head slabs_free;
unsigned long total_slabs; /* length of all slab lists */
unsigned long free_slabs; /* length of free slab list only */
unsigned long free_objects;
unsigned int free_limit;
unsigned int colour_next; /* Per-node cache coloring */
struct array_cache *shared; /* shared per node */
struct alien_cache **alien; /* on other nodes */
unsigned long next_reap; /* updated without locking */
int free_touched; /* updated without locking */
#endif
#ifdef CONFIG_SLUB
spinlock_t list_lock;
unsigned long nr_partial;
struct list_head partial;
#ifdef CONFIG_SLUB_DEBUG
atomic_long_t nr_slabs;
atomic_long_t total_objects;
struct list_head full;
#endif
#endif
};
static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
{
return s->node[node];
}
/*
* Iterator over all nodes. The body will be executed for each node that has
* a kmem_cache_node structure allocated (which is true for all online nodes)
*/
#define for_each_kmem_cache_node(__s, __node, __n) \
for (__node = 0; __node < nr_node_ids; __node++) \
if ((__n = get_node(__s, __node)))
#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
void dump_unreclaimable_slab(void);
#else
static inline void dump_unreclaimable_slab(void)
......
......@@ -21,6 +21,7 @@
#include <linux/swiotlb.h>
#include <linux/proc_fs.h>
#include <linux/debugfs.h>
#include <linux/kmemleak.h>
#include <linux/kasan.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
......@@ -71,10 +72,8 @@ static int __init setup_slab_merge(char *str)
return 1;
}
#ifdef CONFIG_SLUB
__setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0);
__setup_param("slub_merge", slub_merge, setup_slab_merge, 0);
#endif
__setup("slab_nomerge", setup_slab_nomerge);
__setup("slab_merge", setup_slab_merge);
......@@ -197,10 +196,6 @@ struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
if (s->size - size >= sizeof(void *))
continue;
if (IS_ENABLED(CONFIG_SLAB) && align &&
(align > s->align || s->align % align))
continue;
return s;
}
return NULL;
......@@ -670,7 +665,7 @@ EXPORT_SYMBOL(random_kmalloc_seed);
* of two cache sizes there. The size of larger slabs can be determined using
* fls.
*/
static u8 size_index[24] __ro_after_init = {
u8 kmalloc_size_index[24] __ro_after_init = {
3, /* 8 */
4, /* 16 */
5, /* 24 */
......@@ -697,33 +692,6 @@ static u8 size_index[24] __ro_after_init = {
2 /* 192 */
};
static inline unsigned int size_index_elem(unsigned int bytes)
{
return (bytes - 1) / 8;
}
/*
* Find the kmem_cache structure that serves a given size of
* allocation
*/
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller)
{
unsigned int index;
if (size <= 192) {
if (!size)
return ZERO_SIZE_PTR;
index = size_index[size_index_elem(size)];
} else {
if (WARN_ON_ONCE(size > KMALLOC_MAX_CACHE_SIZE))
return NULL;
index = fls(size - 1);
}
return kmalloc_caches[kmalloc_type(flags, caller)][index];
}
size_t kmalloc_size_roundup(size_t size)
{
if (size && size <= KMALLOC_MAX_CACHE_SIZE) {
......@@ -848,9 +816,9 @@ void __init setup_kmalloc_cache_index_table(void)
for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
unsigned int elem = size_index_elem(i);
if (elem >= ARRAY_SIZE(size_index))
if (elem >= ARRAY_SIZE(kmalloc_size_index))
break;
size_index[elem] = KMALLOC_SHIFT_LOW;
kmalloc_size_index[elem] = KMALLOC_SHIFT_LOW;
}
if (KMALLOC_MIN_SIZE >= 64) {
......@@ -859,7 +827,7 @@ void __init setup_kmalloc_cache_index_table(void)
* is 64 byte.
*/
for (i = 64 + 8; i <= 96; i += 8)
size_index[size_index_elem(i)] = 7;
kmalloc_size_index[size_index_elem(i)] = 7;
}
......@@ -870,7 +838,7 @@ void __init setup_kmalloc_cache_index_table(void)
* instead.
*/
for (i = 128 + 8; i <= 192; i += 8)
size_index[size_index_elem(i)] = 8;
kmalloc_size_index[size_index_elem(i)] = 8;
}
}
......@@ -968,95 +936,6 @@ void __init create_kmalloc_caches(slab_flags_t flags)
slab_state = UP;
}
void free_large_kmalloc(struct folio *folio, void *object)
{
unsigned int order = folio_order(folio);
if (WARN_ON_ONCE(order == 0))
pr_warn_once("object pointer: 0x%p\n", object);
kmemleak_free(object);
kasan_kfree_large(object);
kmsan_kfree_large(object);
mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
-(PAGE_SIZE << order));
__free_pages(folio_page(folio, 0), order);
}
static void *__kmalloc_large_node(size_t size, gfp_t flags, int node);
static __always_inline
void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
{
struct kmem_cache *s;
void *ret;
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = __kmalloc_large_node(size, flags, node);
trace_kmalloc(caller, ret, size,
PAGE_SIZE << get_order(size), flags, node);
return ret;
}
s = kmalloc_slab(size, flags, caller);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
ret = __kmem_cache_alloc_node(s, flags, node, size, caller);
ret = kasan_kmalloc(s, ret, size, flags);
trace_kmalloc(caller, ret, size, s->size, flags, node);
return ret;
}
void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
return __do_kmalloc_node(size, flags, node, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc_node);
void *__kmalloc(size_t size, gfp_t flags)
{
return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc);
void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
int node, unsigned long caller)
{
return __do_kmalloc_node(size, flags, node, caller);
}
EXPORT_SYMBOL(__kmalloc_node_track_caller);
/**
* kfree - free previously allocated memory
* @object: pointer returned by kmalloc() or kmem_cache_alloc()
*
* If @object is NULL, no operation is performed.
*/
void kfree(const void *object)
{
struct folio *folio;
struct slab *slab;
struct kmem_cache *s;
trace_kfree(_RET_IP_, object);
if (unlikely(ZERO_OR_NULL_PTR(object)))
return;
folio = virt_to_folio(object);
if (unlikely(!folio_test_slab(folio))) {
free_large_kmalloc(folio, (void *)object);
return;
}
slab = folio_slab(folio);
s = slab->slab_cache;
__kmem_cache_free(s, (void *)object, _RET_IP_);
}
EXPORT_SYMBOL(kfree);
/**
* __ksize -- Report full size of underlying allocation
* @object: pointer to the object
......@@ -1093,30 +972,6 @@ size_t __ksize(const void *object)
return slab_ksize(folio_slab(folio)->slab_cache);
}
void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
void *ret = __kmem_cache_alloc_node(s, gfpflags, NUMA_NO_NODE,
size, _RET_IP_);
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, NUMA_NO_NODE);
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
EXPORT_SYMBOL(kmalloc_trace);
void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
int node, size_t size)
{
void *ret = __kmem_cache_alloc_node(s, gfpflags, node, size, _RET_IP_);
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, node);
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
EXPORT_SYMBOL(kmalloc_node_trace);
gfp_t kmalloc_fix_flags(gfp_t flags)
{
gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
......@@ -1129,57 +984,6 @@ gfp_t kmalloc_fix_flags(gfp_t flags)
return flags;
}
/*
* To avoid unnecessary overhead, we pass through large allocation requests
* directly to the page allocator. We use __GFP_COMP, because we will need to
* know the allocation order to free the pages properly in kfree.
*/
static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
{
struct page *page;
void *ptr = NULL;
unsigned int order = get_order(size);
if (unlikely(flags & GFP_SLAB_BUG_MASK))
flags = kmalloc_fix_flags(flags);
flags |= __GFP_COMP;
page = alloc_pages_node(node, flags, order);
if (page) {
ptr = page_address(page);
mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
PAGE_SIZE << order);
}
ptr = kasan_kmalloc_large(ptr, size, flags);
/* As ptr might get tagged, call kmemleak hook after KASAN. */
kmemleak_alloc(ptr, size, 1, flags);
kmsan_kmalloc_large(ptr, size, flags);
return ptr;
}
void *kmalloc_large(size_t size, gfp_t flags)
{
void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
flags, NUMA_NO_NODE);
return ret;
}
EXPORT_SYMBOL(kmalloc_large);
void *kmalloc_large_node(size_t size, gfp_t flags, int node)
{
void *ret = __kmalloc_large_node(size, flags, node);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
flags, node);
return ret;
}
EXPORT_SYMBOL(kmalloc_large_node);
#ifdef CONFIG_SLAB_FREELIST_RANDOM
/* Randomize a generic freelist */
static void freelist_randomize(unsigned int *list,
......@@ -1222,12 +1026,8 @@ void cache_random_seq_destroy(struct kmem_cache *cachep)
}
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
#ifdef CONFIG_SLAB
#define SLABINFO_RIGHTS (0600)
#else
#ifdef CONFIG_SLUB_DEBUG
#define SLABINFO_RIGHTS (0400)
#endif
static void print_slabinfo_header(struct seq_file *m)
{
......@@ -1235,18 +1035,10 @@ static void print_slabinfo_header(struct seq_file *m)
* Output format version, so at least we can change it
* without _too_ many complaints.
*/
#ifdef CONFIG_DEBUG_SLAB
seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
#else
seq_puts(m, "slabinfo - version: 2.1\n");
#endif
seq_puts(m, "# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>");
seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
#ifdef CONFIG_DEBUG_SLAB
seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> <error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
#endif
seq_putc(m, '\n');
}
......@@ -1370,7 +1162,7 @@ static int __init slab_proc_init(void)
}
module_init(slab_proc_init);
#endif /* CONFIG_SLAB || CONFIG_SLUB_DEBUG */
#endif /* CONFIG_SLUB_DEBUG */
static __always_inline __realloc_size(2) void *
__do_krealloc(const void *p, size_t new_size, gfp_t flags)
......@@ -1488,10 +1280,3 @@ EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
EXPORT_TRACEPOINT_SYMBOL(kfree);
EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
{
if (__should_failslab(s, gfpflags))
return -ENOMEM;
return 0;
}
ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
......@@ -34,6 +34,7 @@
#include <linux/memory.h>
#include <linux/math64.h>
#include <linux/fault-inject.h>
#include <linux/kmemleak.h>
#include <linux/stacktrace.h>
#include <linux/prefetch.h>
#include <linux/memcontrol.h>
......@@ -76,13 +77,28 @@
*
* Frozen slabs
*
* If a slab is frozen then it is exempt from list management. It is not
* on any list except per cpu partial list. The processor that froze the
* If a slab is frozen then it is exempt from list management. It is
* the cpu slab which is actively allocated from by the processor that
* froze it and it is not on any list. The processor that froze the
* slab is the one who can perform list operations on the slab. Other
* processors may put objects onto the freelist but the processor that
* froze the slab is the only one that can retrieve the objects from the
* slab's freelist.
*
* CPU partial slabs
*
* The partially empty slabs cached on the CPU partial list are used
* for performance reasons, which speeds up the allocation process.
* These slabs are not frozen, but are also exempt from list management,
* by clearing the PG_workingset flag when moving out of the node
* partial list. Please see __slab_free() for more details.
*
* To sum up, the current scheme is:
* - node partial slab: PG_Workingset && !frozen
* - cpu partial slab: !PG_Workingset && !frozen
* - cpu slab: !PG_Workingset && frozen
* - full slab: !PG_Workingset && !frozen
*
* list_lock
*
* The list_lock protects the partial and full list on each node and
......@@ -204,9 +220,9 @@ DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
/* Structure holding parameters for get_partial() call chain */
struct partial_context {
struct slab **slab;
gfp_t flags;
unsigned int orig_size;
void *object;
};
static inline bool kmem_cache_debug(struct kmem_cache *s)
......@@ -330,6 +346,60 @@ static void debugfs_slab_add(struct kmem_cache *);
static inline void debugfs_slab_add(struct kmem_cache *s) { }
#endif
enum stat_item {
ALLOC_FASTPATH, /* Allocation from cpu slab */
ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
FREE_FASTPATH, /* Free to cpu slab */
FREE_SLOWPATH, /* Freeing not to cpu slab */
FREE_FROZEN, /* Freeing to frozen slab */
FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
FREE_REMOVE_PARTIAL, /* Freeing removes last object */
ALLOC_FROM_PARTIAL, /* Cpu slab acquired from node partial list */
ALLOC_SLAB, /* Cpu slab acquired from page allocator */
ALLOC_REFILL, /* Refill cpu slab from slab freelist */
ALLOC_NODE_MISMATCH, /* Switching cpu slab */
FREE_SLAB, /* Slab freed to the page allocator */
CPUSLAB_FLUSH, /* Abandoning of the cpu slab */
DEACTIVATE_FULL, /* Cpu slab was full when deactivated */
DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */
DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
DEACTIVATE_BYPASS, /* Implicit deactivation */
ORDER_FALLBACK, /* Number of times fallback was necessary */
CMPXCHG_DOUBLE_CPU_FAIL,/* Failures of this_cpu_cmpxchg_double */
CMPXCHG_DOUBLE_FAIL, /* Failures of slab freelist update */
CPU_PARTIAL_ALLOC, /* Used cpu partial on alloc */
CPU_PARTIAL_FREE, /* Refill cpu partial on free */
CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */
CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
NR_SLUB_STAT_ITEMS
};
#ifndef CONFIG_SLUB_TINY
/*
* When changing the layout, make sure freelist and tid are still compatible
* with this_cpu_cmpxchg_double() alignment requirements.
*/
struct kmem_cache_cpu {
union {
struct {
void **freelist; /* Pointer to next available object */
unsigned long tid; /* Globally unique transaction id */
};
freelist_aba_t freelist_tid;
};
struct slab *slab; /* The slab from which we are allocating */
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct slab *partial; /* Partially allocated frozen slabs */
#endif
local_lock_t lock; /* Protects the fields above */
#ifdef CONFIG_SLUB_STATS
unsigned int stat[NR_SLUB_STAT_ITEMS];
#endif
};
#endif /* CONFIG_SLUB_TINY */
static inline void stat(const struct kmem_cache *s, enum stat_item si)
{
#ifdef CONFIG_SLUB_STATS
......@@ -341,6 +411,41 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
#endif
}
static inline
void stat_add(const struct kmem_cache *s, enum stat_item si, int v)
{
#ifdef CONFIG_SLUB_STATS
raw_cpu_add(s->cpu_slab->stat[si], v);
#endif
}
/*
* The slab lists for all objects.
*/
struct kmem_cache_node {
spinlock_t list_lock;
unsigned long nr_partial;
struct list_head partial;
#ifdef CONFIG_SLUB_DEBUG
atomic_long_t nr_slabs;
atomic_long_t total_objects;
struct list_head full;
#endif
};
static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
{
return s->node[node];
}
/*
* Iterator over all nodes. The body will be executed for each node that has
* a kmem_cache_node structure allocated (which is true for all online nodes)
*/
#define for_each_kmem_cache_node(__s, __node, __n) \
for (__node = 0; __node < nr_node_ids; __node++) \
if ((__n = get_node(__s, __node)))
/*
* Tracks for which NUMA nodes we have kmem_cache_nodes allocated.
* Corresponds to node_state[N_NORMAL_MEMORY], but can temporarily
......@@ -522,7 +627,7 @@ static __always_inline void slab_unlock(struct slab *slab)
struct page *page = slab_page(slab);
VM_BUG_ON_PAGE(PageTail(page), page);
__bit_spin_unlock(PG_locked, &page->flags);
bit_spin_unlock(PG_locked, &page->flags);
}
static inline bool
......@@ -1759,12 +1864,214 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
#endif
#endif /* CONFIG_SLUB_DEBUG */
static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
{
return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B;
}
#ifdef CONFIG_MEMCG_KMEM
static inline void memcg_free_slab_cgroups(struct slab *slab)
{
kfree(slab_objcgs(slab));
slab->memcg_data = 0;
}
static inline size_t obj_full_size(struct kmem_cache *s)
{
/*
* For each accounted object there is an extra space which is used
* to store obj_cgroup membership. Charge it too.
*/
return s->size + sizeof(struct obj_cgroup *);
}
/*
* Returns false if the allocation should fail.
*/
static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s,
struct list_lru *lru,
struct obj_cgroup **objcgp,
size_t objects, gfp_t flags)
{
/*
* The obtained objcg pointer is safe to use within the current scope,
* defined by current task or set_active_memcg() pair.
* obj_cgroup_get() is used to get a permanent reference.
*/
struct obj_cgroup *objcg = current_obj_cgroup();
if (!objcg)
return true;
if (lru) {
int ret;
struct mem_cgroup *memcg;
memcg = get_mem_cgroup_from_objcg(objcg);
ret = memcg_list_lru_alloc(memcg, lru, flags);
css_put(&memcg->css);
if (ret)
return false;
}
if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s)))
return false;
*objcgp = objcg;
return true;
}
/*
* Returns false if the allocation should fail.
*/
static __fastpath_inline
bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
struct obj_cgroup **objcgp, size_t objects,
gfp_t flags)
{
if (!memcg_kmem_online())
return true;
if (likely(!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT)))
return true;
return likely(__memcg_slab_pre_alloc_hook(s, lru, objcgp, objects,
flags));
}
static void __memcg_slab_post_alloc_hook(struct kmem_cache *s,
struct obj_cgroup *objcg,
gfp_t flags, size_t size,
void **p)
{
struct slab *slab;
unsigned long off;
size_t i;
flags &= gfp_allowed_mask;
for (i = 0; i < size; i++) {
if (likely(p[i])) {
slab = virt_to_slab(p[i]);
if (!slab_objcgs(slab) &&
memcg_alloc_slab_cgroups(slab, s, flags, false)) {
obj_cgroup_uncharge(objcg, obj_full_size(s));
continue;
}
off = obj_to_index(s, slab, p[i]);
obj_cgroup_get(objcg);
slab_objcgs(slab)[off] = objcg;
mod_objcg_state(objcg, slab_pgdat(slab),
cache_vmstat_idx(s), obj_full_size(s));
} else {
obj_cgroup_uncharge(objcg, obj_full_size(s));
}
}
}
static __fastpath_inline
void memcg_slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
gfp_t flags, size_t size, void **p)
{
if (likely(!memcg_kmem_online() || !objcg))
return;
return __memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
}
static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
void **p, int objects,
struct obj_cgroup **objcgs)
{
for (int i = 0; i < objects; i++) {
struct obj_cgroup *objcg;
unsigned int off;
off = obj_to_index(s, slab, p[i]);
objcg = objcgs[off];
if (!objcg)
continue;
objcgs[off] = NULL;
obj_cgroup_uncharge(objcg, obj_full_size(s));
mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
-obj_full_size(s));
obj_cgroup_put(objcg);
}
}
static __fastpath_inline
void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
int objects)
{
struct obj_cgroup **objcgs;
if (!memcg_kmem_online())
return;
objcgs = slab_objcgs(slab);
if (likely(!objcgs))
return;
__memcg_slab_free_hook(s, slab, p, objects, objcgs);
}
static inline
void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects,
struct obj_cgroup *objcg)
{
if (objcg)
obj_cgroup_uncharge(objcg, objects * obj_full_size(s));
}
#else /* CONFIG_MEMCG_KMEM */
static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
{
return NULL;
}
static inline void memcg_free_slab_cgroups(struct slab *slab)
{
}
static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
struct list_lru *lru,
struct obj_cgroup **objcgp,
size_t objects, gfp_t flags)
{
return true;
}
static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
struct obj_cgroup *objcg,
gfp_t flags, size_t size,
void **p)
{
}
static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
void **p, int objects)
{
}
static inline
void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects,
struct obj_cgroup *objcg)
{
}
#endif /* CONFIG_MEMCG_KMEM */
/*
* Hooks for other subsystems that check memory allocations. In a typical
* production configuration these hooks all should produce no code at all.
*
* Returns true if freeing of the object can proceed, false if its reuse
* was delayed by KASAN quarantine, or it was returned to KFENCE.
*/
static __always_inline bool slab_free_hook(struct kmem_cache *s,
void *x, bool init)
static __always_inline
bool slab_free_hook(struct kmem_cache *s, void *x, bool init)
{
kmemleak_free_recursive(x, s->flags);
kmsan_slab_free(s, x);
......@@ -1779,6 +2086,9 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s,
__kcsan_check_access(x, s->object_size,
KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
if (kfence_free(x))
return false;
/*
* As memory initialization might be integrated into KASAN,
* kasan_slab_free and initialization memset's must be
......@@ -1787,7 +2097,7 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s,
* The initialization memset's clear the object and the metadata,
* but don't touch the SLAB redzone.
*/
if (init) {
if (unlikely(init)) {
int rsize;
if (!kasan_has_integrated_init())
......@@ -1797,7 +2107,7 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s,
s->size - s->inuse - rsize);
}
/* KASAN might put x into memory quarantine, delaying its reuse. */
return kasan_slab_free(s, x, init);
return !kasan_slab_free(s, x, init);
}
static inline bool slab_free_freelist_hook(struct kmem_cache *s,
......@@ -1807,23 +2117,26 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
void *object;
void *next = *head;
void *old_tail = *tail ? *tail : *head;
void *old_tail = *tail;
bool init;
if (is_kfence_address(next)) {
slab_free_hook(s, next, false);
return true;
return false;
}
/* Head and tail of the reconstructed freelist */
*head = NULL;
*tail = NULL;
init = slab_want_init_on_free(s);
do {
object = next;
next = get_freepointer(s, object);
/* If object's reuse doesn't have to be delayed */
if (!slab_free_hook(s, object, slab_want_init_on_free(s))) {
if (likely(slab_free_hook(s, object, init))) {
/* Move object to the new freelist */
set_freepointer(s, object, *head);
*head = object;
......@@ -1838,9 +2151,6 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
}
} while (object != old_tail);
if (*head == *tail)
*tail = NULL;
return *head != NULL;
}
......@@ -1993,6 +2303,26 @@ static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
}
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
static __always_inline void account_slab(struct slab *slab, int order,
struct kmem_cache *s, gfp_t gfp)
{
if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
memcg_alloc_slab_cgroups(slab, s, gfp, true);
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
PAGE_SIZE << order);
}
static __always_inline void unaccount_slab(struct slab *slab, int order,
struct kmem_cache *s)
{
if (memcg_kmem_online())
memcg_free_slab_cgroups(slab);
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
-(PAGE_SIZE << order));
}
static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
{
struct slab *slab;
......@@ -2116,6 +2446,25 @@ static void discard_slab(struct kmem_cache *s, struct slab *slab)
free_slab(s, slab);
}
/*
* SLUB reuses PG_workingset bit to keep track of whether it's on
* the per-node partial list.
*/
static inline bool slab_test_node_partial(const struct slab *slab)
{
return folio_test_workingset((struct folio *)slab_folio(slab));
}
static inline void slab_set_node_partial(struct slab *slab)
{
set_bit(PG_workingset, folio_flags(slab_folio(slab), 0));
}
static inline void slab_clear_node_partial(struct slab *slab)
{
clear_bit(PG_workingset, folio_flags(slab_folio(slab), 0));
}
/*
* Management of partially allocated slabs.
*/
......@@ -2127,6 +2476,7 @@ __add_partial(struct kmem_cache_node *n, struct slab *slab, int tail)
list_add_tail(&slab->slab_list, &n->partial);
else
list_add(&slab->slab_list, &n->partial);
slab_set_node_partial(slab);
}
static inline void add_partial(struct kmem_cache_node *n,
......@@ -2141,11 +2491,12 @@ static inline void remove_partial(struct kmem_cache_node *n,
{
lockdep_assert_held(&n->list_lock);
list_del(&slab->slab_list);
slab_clear_node_partial(slab);
n->nr_partial--;
}
/*
* Called only for kmem_cache_debug() caches instead of acquire_slab(), with a
* Called only for kmem_cache_debug() caches instead of remove_partial(), with a
* slab from the n->partial list. Remove only a single object from the slab, do
* the alloc_debug_processing() checks and leave the slab on the list, or move
* it to full list if it was the last free object.
......@@ -2213,51 +2564,6 @@ static void *alloc_single_from_new_slab(struct kmem_cache *s,
return object;
}
/*
* Remove slab from the partial list, freeze it and
* return the pointer to the freelist.
*
* Returns a list of objects or NULL if it fails.
*/
static inline void *acquire_slab(struct kmem_cache *s,
struct kmem_cache_node *n, struct slab *slab,
int mode)
{
void *freelist;
unsigned long counters;
struct slab new;
lockdep_assert_held(&n->list_lock);
/*
* Zap the freelist and set the frozen bit.
* The old freelist is the list of objects for the
* per cpu allocation list.
*/
freelist = slab->freelist;
counters = slab->counters;
new.counters = counters;
if (mode) {
new.inuse = slab->objects;
new.freelist = NULL;
} else {
new.freelist = freelist;
}
VM_BUG_ON(new.frozen);
new.frozen = 1;
if (!__slab_update_freelist(s, slab,
freelist, counters,
new.freelist, new.counters,
"acquire_slab"))
return NULL;
remove_partial(n, slab);
WARN_ON(!freelist);
return freelist;
}
#ifdef CONFIG_SLUB_CPU_PARTIAL
static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain);
#else
......@@ -2269,11 +2575,11 @@ static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags);
/*
* Try to allocate a partial slab from a specific node.
*/
static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
struct partial_context *pc)
static struct slab *get_partial_node(struct kmem_cache *s,
struct kmem_cache_node *n,
struct partial_context *pc)
{
struct slab *slab, *slab2;
void *object = NULL;
struct slab *slab, *slab2, *partial = NULL;
unsigned long flags;
unsigned int partial_slabs = 0;
......@@ -2288,27 +2594,25 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry_safe(slab, slab2, &n->partial, slab_list) {
void *t;
if (!pfmemalloc_match(slab, pc->flags))
continue;
if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
object = alloc_single_from_partial(s, n, slab,
void *object = alloc_single_from_partial(s, n, slab,
pc->orig_size);
if (object)
if (object) {
partial = slab;
pc->object = object;
break;
}
continue;
}
t = acquire_slab(s, n, slab, object == NULL);
if (!t)
break;
remove_partial(n, slab);
if (!object) {
*pc->slab = slab;
if (!partial) {
partial = slab;
stat(s, ALLOC_FROM_PARTIAL);
object = t;
} else {
put_cpu_partial(s, slab, 0);
stat(s, CPU_PARTIAL_NODE);
......@@ -2324,20 +2628,21 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
}
spin_unlock_irqrestore(&n->list_lock, flags);
return object;
return partial;
}
/*
* Get a slab from somewhere. Search in increasing NUMA distances.
*/
static void *get_any_partial(struct kmem_cache *s, struct partial_context *pc)
static struct slab *get_any_partial(struct kmem_cache *s,
struct partial_context *pc)
{
#ifdef CONFIG_NUMA
struct zonelist *zonelist;
struct zoneref *z;
struct zone *zone;
enum zone_type highest_zoneidx = gfp_zone(pc->flags);
void *object;
struct slab *slab;
unsigned int cpuset_mems_cookie;
/*
......@@ -2372,8 +2677,8 @@ static void *get_any_partial(struct kmem_cache *s, struct partial_context *pc)
if (n && cpuset_zone_allowed(zone, pc->flags) &&
n->nr_partial > s->min_partial) {
object = get_partial_node(s, n, pc);
if (object) {
slab = get_partial_node(s, n, pc);
if (slab) {
/*
* Don't check read_mems_allowed_retry()
* here - if mems_allowed was updated in
......@@ -2381,7 +2686,7 @@ static void *get_any_partial(struct kmem_cache *s, struct partial_context *pc)
* between allocation and the cpuset
* update
*/
return object;
return slab;
}
}
}
......@@ -2393,17 +2698,18 @@ static void *get_any_partial(struct kmem_cache *s, struct partial_context *pc)
/*
* Get a partial slab, lock it and return it.
*/
static void *get_partial(struct kmem_cache *s, int node, struct partial_context *pc)
static struct slab *get_partial(struct kmem_cache *s, int node,
struct partial_context *pc)
{
void *object;
struct slab *slab;
int searchnode = node;
if (node == NUMA_NO_NODE)
searchnode = numa_mem_id();
object = get_partial_node(s, get_node(s, searchnode), pc);
if (object || node != NUMA_NO_NODE)
return object;
slab = get_partial_node(s, get_node(s, searchnode), pc);
if (slab || node != NUMA_NO_NODE)
return slab;
return get_any_partial(s, pc);
}
......@@ -2492,10 +2798,8 @@ static void init_kmem_cache_cpus(struct kmem_cache *s)
static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
void *freelist)
{
enum slab_modes { M_NONE, M_PARTIAL, M_FREE, M_FULL_NOLIST };
struct kmem_cache_node *n = get_node(s, slab_nid(slab));
int free_delta = 0;
enum slab_modes mode = M_NONE;
void *nextfree, *freelist_iter, *freelist_tail;
int tail = DEACTIVATE_TO_HEAD;
unsigned long flags = 0;
......@@ -2533,80 +2837,52 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
/*
* Stage two: Unfreeze the slab while splicing the per-cpu
* freelist to the head of slab's freelist.
*
* Ensure that the slab is unfrozen while the list presence
* reflects the actual number of objects during unfreeze.
*
* We first perform cmpxchg holding lock and insert to list
* when it succeed. If there is mismatch then the slab is not
* unfrozen and number of objects in the slab may have changed.
* Then release lock and retry cmpxchg again.
*/
redo:
old.freelist = READ_ONCE(slab->freelist);
old.counters = READ_ONCE(slab->counters);
VM_BUG_ON(!old.frozen);
/* Determine target state of the slab */
new.counters = old.counters;
if (freelist_tail) {
new.inuse -= free_delta;
set_freepointer(s, freelist_tail, old.freelist);
new.freelist = freelist;
} else
new.freelist = old.freelist;
new.frozen = 0;
do {
old.freelist = READ_ONCE(slab->freelist);
old.counters = READ_ONCE(slab->counters);
VM_BUG_ON(!old.frozen);
/* Determine target state of the slab */
new.counters = old.counters;
new.frozen = 0;
if (freelist_tail) {
new.inuse -= free_delta;
set_freepointer(s, freelist_tail, old.freelist);
new.freelist = freelist;
} else {
new.freelist = old.freelist;
}
} while (!slab_update_freelist(s, slab,
old.freelist, old.counters,
new.freelist, new.counters,
"unfreezing slab"));
/*
* Stage three: Manipulate the slab list based on the updated state.
*/
if (!new.inuse && n->nr_partial >= s->min_partial) {
mode = M_FREE;
stat(s, DEACTIVATE_EMPTY);
discard_slab(s, slab);
stat(s, FREE_SLAB);
} else if (new.freelist) {
mode = M_PARTIAL;
/*
* Taking the spinlock removes the possibility that
* acquire_slab() will see a slab that is frozen
*/
spin_lock_irqsave(&n->list_lock, flags);
} else {
mode = M_FULL_NOLIST;
}
if (!slab_update_freelist(s, slab,
old.freelist, old.counters,
new.freelist, new.counters,
"unfreezing slab")) {
if (mode == M_PARTIAL)
spin_unlock_irqrestore(&n->list_lock, flags);
goto redo;
}
if (mode == M_PARTIAL) {
add_partial(n, slab, tail);
spin_unlock_irqrestore(&n->list_lock, flags);
stat(s, tail);
} else if (mode == M_FREE) {
stat(s, DEACTIVATE_EMPTY);
discard_slab(s, slab);
stat(s, FREE_SLAB);
} else if (mode == M_FULL_NOLIST) {
} else {
stat(s, DEACTIVATE_FULL);
}
}
#ifdef CONFIG_SLUB_CPU_PARTIAL
static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab)
static void __put_partials(struct kmem_cache *s, struct slab *partial_slab)
{
struct kmem_cache_node *n = NULL, *n2 = NULL;
struct slab *slab, *slab_to_discard = NULL;
unsigned long flags = 0;
while (partial_slab) {
struct slab new;
struct slab old;
slab = partial_slab;
partial_slab = slab->next;
......@@ -2619,23 +2895,7 @@ static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab)
spin_lock_irqsave(&n->list_lock, flags);
}
do {
old.freelist = slab->freelist;
old.counters = slab->counters;
VM_BUG_ON(!old.frozen);
new.counters = old.counters;
new.freelist = old.freelist;
new.frozen = 0;
} while (!__slab_update_freelist(s, slab,
old.freelist, old.counters,
new.freelist, new.counters,
"unfreezing slab"));
if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
if (unlikely(!slab->inuse && n->nr_partial >= s->min_partial)) {
slab->next = slab_to_discard;
slab_to_discard = slab;
} else {
......@@ -2658,9 +2918,9 @@ static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab)
}
/*
* Unfreeze all the cpu partial slabs.
* Put all the cpu partial slabs to the node partial list.
*/
static void unfreeze_partials(struct kmem_cache *s)
static void put_partials(struct kmem_cache *s)
{
struct slab *partial_slab;
unsigned long flags;
......@@ -2671,11 +2931,11 @@ static void unfreeze_partials(struct kmem_cache *s)
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
if (partial_slab)
__unfreeze_partials(s, partial_slab);
__put_partials(s, partial_slab);
}
static void unfreeze_partials_cpu(struct kmem_cache *s,
struct kmem_cache_cpu *c)
static void put_partials_cpu(struct kmem_cache *s,
struct kmem_cache_cpu *c)
{
struct slab *partial_slab;
......@@ -2683,12 +2943,11 @@ static void unfreeze_partials_cpu(struct kmem_cache *s,
c->partial = NULL;
if (partial_slab)
__unfreeze_partials(s, partial_slab);
__put_partials(s, partial_slab);
}
/*
* Put a slab that was just frozen (in __slab_free|get_partial_node) into a
* partial slab slot if available.
* Put a slab into a partial slab slot if available.
*
* If we did not find a slot then simply move all the partials to the
* per node partial list.
......@@ -2696,7 +2955,7 @@ static void unfreeze_partials_cpu(struct kmem_cache *s,
static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain)
{
struct slab *oldslab;
struct slab *slab_to_unfreeze = NULL;
struct slab *slab_to_put = NULL;
unsigned long flags;
int slabs = 0;
......@@ -2711,7 +2970,7 @@ static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain)
* per node partial list. Postpone the actual unfreezing
* outside of the critical section.
*/
slab_to_unfreeze = oldslab;
slab_to_put = oldslab;
oldslab = NULL;
} else {
slabs = oldslab->slabs;
......@@ -2727,17 +2986,17 @@ static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain)
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
if (slab_to_unfreeze) {
__unfreeze_partials(s, slab_to_unfreeze);
if (slab_to_put) {
__put_partials(s, slab_to_put);
stat(s, CPU_PARTIAL_DRAIN);
}
}
#else /* CONFIG_SLUB_CPU_PARTIAL */
static inline void unfreeze_partials(struct kmem_cache *s) { }
static inline void unfreeze_partials_cpu(struct kmem_cache *s,
struct kmem_cache_cpu *c) { }
static inline void put_partials(struct kmem_cache *s) { }
static inline void put_partials_cpu(struct kmem_cache *s,
struct kmem_cache_cpu *c) { }
#endif /* CONFIG_SLUB_CPU_PARTIAL */
......@@ -2779,7 +3038,7 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
stat(s, CPUSLAB_FLUSH);
}
unfreeze_partials_cpu(s, c);
put_partials_cpu(s, c);
}
struct slub_flush_work {
......@@ -2807,7 +3066,7 @@ static void flush_cpu_slab(struct work_struct *w)
if (c->slab)
flush_slab(s, c);
unfreeze_partials(s);
put_partials(s);
}
static bool has_cpu_slab(int cpu, struct kmem_cache *s)
......@@ -3073,6 +3332,33 @@ static inline void *get_freelist(struct kmem_cache *s, struct slab *slab)
return freelist;
}
/*
* Freeze the partial slab and return the pointer to the freelist.
*/
static inline void *freeze_slab(struct kmem_cache *s, struct slab *slab)
{
struct slab new;
unsigned long counters;
void *freelist;
do {
freelist = slab->freelist;
counters = slab->counters;
new.counters = counters;
VM_BUG_ON(new.frozen);
new.inuse = slab->objects;
new.frozen = 1;
} while (!slab_update_freelist(s, slab,
freelist, counters,
NULL, new.counters,
"freeze_slab"));
return freelist;
}
/*
* Slow path. The lockless freelist is empty or we need to perform
* debugging duties.
......@@ -3115,7 +3401,6 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
node = NUMA_NO_NODE;
goto new_slab;
}
redo:
if (unlikely(!node_match(slab, node))) {
/*
......@@ -3191,7 +3476,8 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
new_slab:
if (slub_percpu_partial(c)) {
#ifdef CONFIG_SLUB_CPU_PARTIAL
while (slub_percpu_partial(c)) {
local_lock_irqsave(&s->cpu_slab->lock, flags);
if (unlikely(c->slab)) {
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
......@@ -3203,21 +3489,45 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
goto new_objects;
}
slab = c->slab = slub_percpu_partial(c);
slab = slub_percpu_partial(c);
slub_set_percpu_partial(c, slab);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
stat(s, CPU_PARTIAL_ALLOC);
goto redo;
if (unlikely(!node_match(slab, node) ||
!pfmemalloc_match(slab, gfpflags))) {
slab->next = NULL;
__put_partials(s, slab);
continue;
}
freelist = freeze_slab(s, slab);
goto retry_load_slab;
}
#endif
new_objects:
pc.flags = gfpflags;
pc.slab = &slab;
pc.orig_size = orig_size;
freelist = get_partial(s, node, &pc);
if (freelist)
goto check_new_slab;
slab = get_partial(s, node, &pc);
if (slab) {
if (kmem_cache_debug(s)) {
freelist = pc.object;
/*
* For debug caches here we had to go through
* alloc_single_from_partial() so just store the
* tracking info and return the object.
*/
if (s->flags & SLAB_STORE_USER)
set_track(s, freelist, TRACK_ALLOC, addr);
return freelist;
}
freelist = freeze_slab(s, slab);
goto retry_load_slab;
}
slub_put_cpu_ptr(s->cpu_slab);
slab = new_slab(s, gfpflags, node);
......@@ -3253,20 +3563,6 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
inc_slabs_node(s, slab_nid(slab), slab->objects);
check_new_slab:
if (kmem_cache_debug(s)) {
/*
* For debug caches here we had to go through
* alloc_single_from_partial() so just store the tracking info
* and return the object
*/
if (s->flags & SLAB_STORE_USER)
set_track(s, freelist, TRACK_ALLOC, addr);
return freelist;
}
if (unlikely(!pfmemalloc_match(slab, gfpflags))) {
/*
* For !pfmemalloc_match() case we don't load freelist so that
......@@ -3409,12 +3705,11 @@ static void *__slab_alloc_node(struct kmem_cache *s,
void *object;
pc.flags = gfpflags;
pc.slab = &slab;
pc.orig_size = orig_size;
object = get_partial(s, node, &pc);
slab = get_partial(s, node, &pc);
if (object)
return object;
if (slab)
return pc.object;
slab = new_slab(s, gfpflags, node);
if (unlikely(!slab)) {
......@@ -3440,6 +3735,86 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
0, sizeof(void *));
}
noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
{
if (__should_failslab(s, gfpflags))
return -ENOMEM;
return 0;
}
ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
static __fastpath_inline
struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
struct list_lru *lru,
struct obj_cgroup **objcgp,
size_t size, gfp_t flags)
{
flags &= gfp_allowed_mask;
might_alloc(flags);
if (unlikely(should_failslab(s, flags)))
return NULL;
if (unlikely(!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags)))
return NULL;
return s;
}
static __fastpath_inline
void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
gfp_t flags, size_t size, void **p, bool init,
unsigned int orig_size)
{
unsigned int zero_size = s->object_size;
bool kasan_init = init;
size_t i;
gfp_t init_flags = flags & gfp_allowed_mask;
/*
* For kmalloc object, the allocated memory size(object_size) is likely
* larger than the requested size(orig_size). If redzone check is
* enabled for the extra space, don't zero it, as it will be redzoned
* soon. The redzone operation for this extra space could be seen as a
* replacement of current poisoning under certain debug option, and
* won't break other sanity checks.
*/
if (kmem_cache_debug_flags(s, SLAB_STORE_USER | SLAB_RED_ZONE) &&
(s->flags & SLAB_KMALLOC))
zero_size = orig_size;
/*
* When slub_debug is enabled, avoid memory initialization integrated
* into KASAN and instead zero out the memory via the memset below with
* the proper size. Otherwise, KASAN might overwrite SLUB redzones and
* cause false-positive reports. This does not lead to a performance
* penalty on production builds, as slub_debug is not intended to be
* enabled there.
*/
if (__slub_debug_enabled())
kasan_init = false;
/*
* As memory initialization might be integrated into KASAN,
* kasan_slab_alloc and initialization memset must be
* kept together to avoid discrepancies in behavior.
*
* As p[i] might get tagged, memset and kmemleak hook come after KASAN.
*/
for (i = 0; i < size; i++) {
p[i] = kasan_slab_alloc(s, p[i], init_flags, kasan_init);
if (p[i] && init && (!kasan_init ||
!kasan_has_integrated_init()))
memset(p[i], 0, zero_size);
kmemleak_alloc_recursive(p[i], s->object_size, 1,
s->flags, init_flags);
kmsan_slab_alloc(s, p[i], init_flags);
}
memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
}
/*
* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
* have the fastpath folded into their functions. So no function call
......@@ -3458,7 +3833,7 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
bool init = false;
s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags);
if (!s)
if (unlikely(!s))
return NULL;
object = kfence_alloc(s, orig_size, gfpflags);
......@@ -3480,53 +3855,169 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
return object;
}
static __fastpath_inline void *slab_alloc(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags, unsigned long addr, size_t orig_size)
void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
{
return slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, addr, orig_size);
void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE, _RET_IP_,
s->object_size);
trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, NUMA_NO_NODE);
return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc);
static __fastpath_inline
void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags)
void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags)
{
void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);
void *ret = slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, _RET_IP_,
s->object_size);
trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, NUMA_NO_NODE);
return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc_lru);
void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
/**
* kmem_cache_alloc_node - Allocate an object on the specified node
* @s: The cache to allocate from.
* @gfpflags: See kmalloc().
* @node: node number of the target node.
*
* Identical to kmem_cache_alloc but it will allocate memory on the given
* node, which can improve the performance for cpu bound structures.
*
* Fallback to other node is possible if __GFP_THISNODE is not set.
*
* Return: pointer to the new object or %NULL in case of error
*/
void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
{
return __kmem_cache_alloc_lru(s, NULL, gfpflags);
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, node);
return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc);
EXPORT_SYMBOL(kmem_cache_alloc_node);
void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags)
/*
* To avoid unnecessary overhead, we pass through large allocation requests
* directly to the page allocator. We use __GFP_COMP, because we will need to
* know the allocation order to free the pages properly in kfree.
*/
static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
{
return __kmem_cache_alloc_lru(s, lru, gfpflags);
struct page *page;
void *ptr = NULL;
unsigned int order = get_order(size);
if (unlikely(flags & GFP_SLAB_BUG_MASK))
flags = kmalloc_fix_flags(flags);
flags |= __GFP_COMP;
page = alloc_pages_node(node, flags, order);
if (page) {
ptr = page_address(page);
mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
PAGE_SIZE << order);
}
ptr = kasan_kmalloc_large(ptr, size, flags);
/* As ptr might get tagged, call kmemleak hook after KASAN. */
kmemleak_alloc(ptr, size, 1, flags);
kmsan_kmalloc_large(ptr, size, flags);
return ptr;
}
EXPORT_SYMBOL(kmem_cache_alloc_lru);
void *__kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags,
int node, size_t orig_size,
unsigned long caller)
void *kmalloc_large(size_t size, gfp_t flags)
{
return slab_alloc_node(s, NULL, gfpflags, node,
caller, orig_size);
void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
flags, NUMA_NO_NODE);
return ret;
}
EXPORT_SYMBOL(kmalloc_large);
void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
void *kmalloc_large_node(size_t size, gfp_t flags, int node)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
void *ret = __kmalloc_large_node(size, flags, node);
trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, node);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
flags, node);
return ret;
}
EXPORT_SYMBOL(kmalloc_large_node);
static __always_inline
void *__do_kmalloc_node(size_t size, gfp_t flags, int node,
unsigned long caller)
{
struct kmem_cache *s;
void *ret;
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = __kmalloc_large_node(size, flags, node);
trace_kmalloc(caller, ret, size,
PAGE_SIZE << get_order(size), flags, node);
return ret;
}
if (unlikely(!size))
return ZERO_SIZE_PTR;
s = kmalloc_slab(size, flags, caller);
ret = slab_alloc_node(s, NULL, flags, node, caller, size);
ret = kasan_kmalloc(s, ret, size, flags);
trace_kmalloc(caller, ret, size, s->size, flags, node);
return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc_node);
void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
return __do_kmalloc_node(size, flags, node, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc_node);
void *__kmalloc(size_t size, gfp_t flags)
{
return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc);
void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
int node, unsigned long caller)
{
return __do_kmalloc_node(size, flags, node, caller);
}
EXPORT_SYMBOL(__kmalloc_node_track_caller);
void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE,
_RET_IP_, size);
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, NUMA_NO_NODE);
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
EXPORT_SYMBOL(kmalloc_trace);
void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
int node, size_t size)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, node);
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
EXPORT_SYMBOL(kmalloc_node_trace);
static noinline void free_to_partial_list(
struct kmem_cache *s, struct slab *slab,
......@@ -3608,12 +4099,10 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
unsigned long counters;
struct kmem_cache_node *n = NULL;
unsigned long flags;
bool on_node_partial;
stat(s, FREE_SLOWPATH);
if (kfence_free(head))
return;
if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
free_to_partial_list(s, slab, head, tail, cnt, addr);
return;
......@@ -3631,18 +4120,8 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
was_frozen = new.frozen;
new.inuse -= cnt;
if ((!new.inuse || !prior) && !was_frozen) {
if (kmem_cache_has_cpu_partial(s) && !prior) {
/*
* Slab was on no list before and will be
* partially empty
* We can defer the list move and instead
* freeze it.
*/
new.frozen = 1;
} else { /* Needs to be taken off a list */
/* Needs to be taken off a list */
if (!kmem_cache_has_cpu_partial(s) || prior) {
n = get_node(s, slab_nid(slab));
/*
......@@ -3655,6 +4134,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
*/
spin_lock_irqsave(&n->list_lock, flags);
on_node_partial = slab_test_node_partial(slab);
}
}
......@@ -3671,9 +4151,9 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
* activity can be necessary.
*/
stat(s, FREE_FROZEN);
} else if (new.frozen) {
} else if (kmem_cache_has_cpu_partial(s) && !prior) {
/*
* If we just froze the slab then put it onto the
* If we started with a full slab then put it onto the
* per cpu partial list.
*/
put_cpu_partial(s, slab, 1);
......@@ -3683,6 +4163,15 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
return;
}
/*
* This slab was partially empty but not on the per-node partial list,
* in which case we shouldn't manipulate its list, just return.
*/
if (prior && !on_node_partial) {
spin_unlock_irqrestore(&n->list_lock, flags);
return;
}
if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
goto slab_empty;
......@@ -3735,7 +4224,6 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
struct slab *slab, void *head, void *tail,
int cnt, unsigned long addr)
{
void *tail_obj = tail ? : head;
struct kmem_cache_cpu *c;
unsigned long tid;
void **freelist;
......@@ -3754,14 +4242,14 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
barrier();
if (unlikely(slab != c->slab)) {
__slab_free(s, slab, head, tail_obj, cnt, addr);
__slab_free(s, slab, head, tail, cnt, addr);
return;
}
if (USE_LOCKLESS_FAST_PATH()) {
freelist = READ_ONCE(c->freelist);
set_freepointer(s, tail_obj, freelist);
set_freepointer(s, tail, freelist);
if (unlikely(!__update_cpu_freelist_fast(s, freelist, head, tid))) {
note_cmpxchg_failure("slab_free", s, tid);
......@@ -3778,60 +4266,143 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
tid = c->tid;
freelist = c->freelist;
set_freepointer(s, tail_obj, freelist);
set_freepointer(s, tail, freelist);
c->freelist = head;
c->tid = next_tid(tid);
local_unlock(&s->cpu_slab->lock);
}
stat(s, FREE_FASTPATH);
stat_add(s, FREE_FASTPATH, cnt);
}
#else /* CONFIG_SLUB_TINY */
static void do_slab_free(struct kmem_cache *s,
struct slab *slab, void *head, void *tail,
int cnt, unsigned long addr)
{
void *tail_obj = tail ? : head;
__slab_free(s, slab, head, tail_obj, cnt, addr);
__slab_free(s, slab, head, tail, cnt, addr);
}
#endif /* CONFIG_SLUB_TINY */
static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab,
void *head, void *tail, void **p, int cnt,
unsigned long addr)
static __fastpath_inline
void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
unsigned long addr)
{
memcg_slab_free_hook(s, slab, &object, 1);
if (likely(slab_free_hook(s, object, slab_want_init_on_free(s))))
do_slab_free(s, slab, object, object, 1, addr);
}
static __fastpath_inline
void slab_free_bulk(struct kmem_cache *s, struct slab *slab, void *head,
void *tail, void **p, int cnt, unsigned long addr)
{
memcg_slab_free_hook(s, slab, p, cnt);
/*
* With KASAN enabled slab_free_freelist_hook modifies the freelist
* to remove objects, whose reuse must be delayed.
*/
if (slab_free_freelist_hook(s, &head, &tail, &cnt))
if (likely(slab_free_freelist_hook(s, &head, &tail, &cnt)))
do_slab_free(s, slab, head, tail, cnt, addr);
}
#ifdef CONFIG_KASAN_GENERIC
void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
{
do_slab_free(cache, virt_to_slab(x), x, NULL, 1, addr);
do_slab_free(cache, virt_to_slab(x), x, x, 1, addr);
}
#endif
void __kmem_cache_free(struct kmem_cache *s, void *x, unsigned long caller)
static inline struct kmem_cache *virt_to_cache(const void *obj)
{
slab_free(s, virt_to_slab(x), x, NULL, &x, 1, caller);
struct slab *slab;
slab = virt_to_slab(obj);
if (WARN_ONCE(!slab, "%s: Object is not a Slab page!\n", __func__))
return NULL;
return slab->slab_cache;
}
static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
{
struct kmem_cache *cachep;
if (!IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) &&
!kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS))
return s;
cachep = virt_to_cache(x);
if (WARN(cachep && cachep != s,
"%s: Wrong slab cache. %s but object is from %s\n",
__func__, s->name, cachep->name))
print_tracking(cachep, x);
return cachep;
}
/**
* kmem_cache_free - Deallocate an object
* @s: The cache the allocation was from.
* @x: The previously allocated object.
*
* Free an object which was previously allocated from this
* cache.
*/
void kmem_cache_free(struct kmem_cache *s, void *x)
{
s = cache_from_obj(s, x);
if (!s)
return;
trace_kmem_cache_free(_RET_IP_, x, s);
slab_free(s, virt_to_slab(x), x, NULL, &x, 1, _RET_IP_);
slab_free(s, virt_to_slab(x), x, _RET_IP_);
}
EXPORT_SYMBOL(kmem_cache_free);
static void free_large_kmalloc(struct folio *folio, void *object)
{
unsigned int order = folio_order(folio);
if (WARN_ON_ONCE(order == 0))
pr_warn_once("object pointer: 0x%p\n", object);
kmemleak_free(object);
kasan_kfree_large(object);
kmsan_kfree_large(object);
mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
-(PAGE_SIZE << order));
__free_pages(folio_page(folio, 0), order);
}
/**
* kfree - free previously allocated memory
* @object: pointer returned by kmalloc() or kmem_cache_alloc()
*
* If @object is NULL, no operation is performed.
*/
void kfree(const void *object)
{
struct folio *folio;
struct slab *slab;
struct kmem_cache *s;
void *x = (void *)object;
trace_kfree(_RET_IP_, object);
if (unlikely(ZERO_OR_NULL_PTR(object)))
return;
folio = virt_to_folio(object);
if (unlikely(!folio_test_slab(folio))) {
free_large_kmalloc(folio, (void *)object);
return;
}
slab = folio_slab(folio);
s = slab->slab_cache;
slab_free(s, slab, x, _RET_IP_);
}
EXPORT_SYMBOL(kfree);
struct detached_freelist {
struct slab *slab;
void *tail;
......@@ -3911,6 +4482,27 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
return same;
}
/*
* Internal bulk free of objects that were not initialised by the post alloc
* hooks and thus should not be processed by the free hooks
*/
static void __kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
{
if (!size)
return;
do {
struct detached_freelist df;
size = build_detached_freelist(s, size, p, &df);
if (!df.slab)
continue;
do_slab_free(df.s, df.slab, df.freelist, df.tail, df.cnt,
_RET_IP_);
} while (likely(size));
}
/* Note that interrupts must be enabled when calling this function. */
void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
{
......@@ -3924,15 +4516,16 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
if (!df.slab)
continue;
slab_free(df.s, df.slab, df.freelist, df.tail, &p[size], df.cnt,
_RET_IP_);
slab_free_bulk(df.s, df.slab, df.freelist, df.tail, &p[size],
df.cnt, _RET_IP_);
} while (likely(size));
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
#ifndef CONFIG_SLUB_TINY
static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
size_t size, void **p, struct obj_cgroup *objcg)
static inline
int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
void **p)
{
struct kmem_cache_cpu *c;
unsigned long irqflags;
......@@ -3986,6 +4579,7 @@ static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
c->freelist = get_freepointer(s, object);
p[i] = object;
maybe_wipe_obj_freeptr(s, p[i]);
stat(s, ALLOC_FASTPATH);
}
c->tid = next_tid(c->tid);
local_unlock_irqrestore(&s->cpu_slab->lock, irqflags);
......@@ -3995,14 +4589,13 @@ static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
error:
slub_put_cpu_ptr(s->cpu_slab);
slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
kmem_cache_free_bulk(s, i, p);
__kmem_cache_free_bulk(s, i, p);
return 0;
}
#else /* CONFIG_SLUB_TINY */
static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
size_t size, void **p, struct obj_cgroup *objcg)
size_t size, void **p)
{
int i;
......@@ -4025,8 +4618,7 @@ static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
return i;
error:
slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
kmem_cache_free_bulk(s, i, p);
__kmem_cache_free_bulk(s, i, p);
return 0;
}
#endif /* CONFIG_SLUB_TINY */
......@@ -4046,15 +4638,19 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
if (unlikely(!s))
return 0;
i = __kmem_cache_alloc_bulk(s, flags, size, p, objcg);
i = __kmem_cache_alloc_bulk(s, flags, size, p);
/*
* memcg and kmem_cache debug support and memory initialization.
* Done outside of the IRQ disabled fastpath loop.
*/
if (i != 0)
if (likely(i != 0)) {
slab_post_alloc_hook(s, objcg, flags, size, p,
slab_want_init_on_alloc(flags, s), s->object_size);
} else {
memcg_slab_alloc_error_hook(s, size, objcg);
}
return i;
}
EXPORT_SYMBOL(kmem_cache_alloc_bulk);
......@@ -4831,6 +5427,7 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s)
if (free == slab->objects) {
list_move(&slab->slab_list, &discard);
slab_clear_node_partial(slab);
n->nr_partial--;
dec_slabs_node(s, node, slab->objects);
} else if (free <= SHRINK_PROMOTE_MAX)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment