Commit 893660b0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'slab-for-6.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab

Pull slab updates from Vlastimil Babka:

 - SLOB deprecation and SLUB_TINY

   The SLOB allocator adds maintenance burden and stands in the way of
   API improvements [1]. Deprecate it by renaming the config option (to
   make users notice) to CONFIG_SLOB_DEPRECATED with updated help text.
   SLUB should be used instead as SLAB will be the next on the removal
   list.

   Based on reports from a riscv k210 board with 8MB RAM, add a
   CONFIG_SLUB_TINY option to minimize SLUB's memory usage at the
   expense of scalability. This has resolved the k210 regression [2] so
   in case there are no others (that wouldn't be resolvable by further
   tweaks to SLUB_TINY) plan is to remove SLOB in a few cycles.

   Existing defconfigs with CONFIG_SLOB are converted to
   CONFIG_SLUB_TINY.

 - kmalloc() slub_debug redzone improvements

   A series from Feng Tang that builds on the tracking or requested size
   for kmalloc() allocations (for caches with debugging enabled) added
   in 6.1, to make redzone checks consider the requested size and not
   the rounded up one, in order to catch more subtle buffer overruns.
   Includes new slub_kunit test.

 - struct slab fields reordering to accomodate larger rcu_head

   RCU folks would like to grow rcu_head with debugging options, which
   breaks current struct slab layout's assumptions, so reorganize it to
   make this possible.

 - Miscellaneous improvements/fixes:
     - __alloc_size checking compiler workaround (Kees Cook)
     - Optimize and cleanup SLUB's sysfs init (Rasmus Villemoes)
     - Make SLAB compatible with PROVE_RAW_LOCK_NESTING (Jiri Kosina)
     - Correct SLUB's percpu allocation estimates (Baoquan He)
     - Re-enableS LUB's run-time failslab sysfs control (Alexander Atanasov)
     - Make tools/vm/slabinfo more user friendly when not run as root (Rong Tao)
     - Dead code removal in SLUB (Hyeonggon Yoo)

* tag 'slab-for-6.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab: (31 commits)
  mm, slob: rename CONFIG_SLOB to CONFIG_SLOB_DEPRECATED
  mm, slub: don't aggressively inline with CONFIG_SLUB_TINY
  mm, slub: remove percpu slabs with CONFIG_SLUB_TINY
  mm, slub: split out allocations from pre/post hooks
  mm/slub, kunit: Add a test case for kmalloc redzone check
  mm/slub, kunit: add SLAB_SKIP_KFENCE flag for cache creation
  mm, slub: refactor free debug processing
  mm, slab: ignore SLAB_RECLAIM_ACCOUNT with CONFIG_SLUB_TINY
  mm, slub: don't create kmalloc-rcl caches with CONFIG_SLUB_TINY
  mm, slub: lower the default slub_max_order with CONFIG_SLUB_TINY
  mm, slub: retain no free slabs on partial list with CONFIG_SLUB_TINY
  mm, slub: disable SYSFS support with CONFIG_SLUB_TINY
  mm, slub: add CONFIG_SLUB_TINY
  mm, slab: ignore hardened usercopy parameters when disabled
  slab: Remove special-casing of const 0 size allocations
  slab: Clean up SLOB vs kmalloc() definition
  mm/sl[au]b: rearrange struct slab fields to allow larger rcu_head
  mm/migrate: make isolate_movable_page() skip slab pages
  mm/slab: move and adjust kernel-doc for kmem_cache_alloc
  mm/slub, percpu: correct the calculation of early percpu allocation size
  ...
parents 98d0052d dc19745a
......@@ -116,6 +116,8 @@ options from the ``slub_debug`` parameter translate to the following files::
T trace
A failslab
failslab file is writable, so writing 1 or 0 will enable or disable
the option at runtime. Write returns -EINVAL if cache is an alias.
Careful with tracing: It may spew out lots of information and never stop if
used on the wrong slab.
......
......@@ -14,7 +14,8 @@ CONFIG_ARCH_EDB7211=y
CONFIG_ARCH_P720T=y
CONFIG_AEABI=y
# CONFIG_COREDUMP is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
......
......@@ -13,7 +13,8 @@ CONFIG_CMDLINE="noinitrd root=/dev/mtdblock2 rootfstype=jffs2 fbcon=rotate:1"
CONFIG_FPE_NWFPE=y
CONFIG_PM=y
# CONFIG_SWAP is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
......
......@@ -25,7 +25,8 @@ CONFIG_ARM_CLPS711X_CPUIDLE=y
CONFIG_JUMP_LABEL=y
CONFIG_PARTITION_ADVANCED=y
# CONFIG_COREDUMP is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
CONFIG_MTD_BLOCK=y
......
......@@ -42,7 +42,8 @@ CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_BINFMT_MISC=y
# CONFIG_SWAP is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
# CONFIG_VM_EVENT_COUNTERS is not set
CONFIG_NET=y
CONFIG_PACKET=y
......
......@@ -49,7 +49,8 @@ CONFIG_PARTITION_ADVANCED=y
CONFIG_LDM_PARTITION=y
CONFIG_CMDLINE_PARTITION=y
CONFIG_BINFMT_MISC=y
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
# CONFIG_COMPACTION is not set
CONFIG_NET=y
CONFIG_PACKET=y
......
......@@ -19,7 +19,8 @@ CONFIG_FPE_NWFPE=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_SWAP is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
......
......@@ -26,7 +26,8 @@ CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
# CONFIG_BLOCK is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
# CONFIG_COMPAT_BRK is not set
# CONFIG_VM_EVENT_COUNTERS is not set
CONFIG_NET=y
......
......@@ -10,7 +10,8 @@ CONFIG_EXPERT=y
# CONFIG_AIO is not set
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_MODULES=y
# CONFIG_BLOCK is not set
CONFIG_OPENRISC_BUILTIN_DTB="or1ksim"
......
......@@ -16,7 +16,8 @@ CONFIG_EXPERT=y
# CONFIG_AIO is not set
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_MODULES=y
# CONFIG_BLOCK is not set
CONFIG_OPENRISC_BUILTIN_DTB="simple_smp"
......
......@@ -25,7 +25,8 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
# CONFIG_MMU is not set
CONFIG_SOC_CANAAN=y
CONFIG_NONPORTABLE=y
......
......@@ -17,7 +17,8 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
# CONFIG_MMU is not set
CONFIG_SOC_CANAAN=y
CONFIG_NONPORTABLE=y
......
......@@ -22,7 +22,8 @@ CONFIG_EXPERT=y
# CONFIG_KALLSYMS is not set
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
# CONFIG_MMU is not set
CONFIG_SOC_VIRT=y
CONFIG_NONPORTABLE=y
......
......@@ -10,7 +10,8 @@ CONFIG_USER_NS=y
CONFIG_PID_NS=y
CONFIG_BLK_DEV_INITRD=y
# CONFIG_AIO is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_PROFILING=y
CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
......
......@@ -11,7 +11,8 @@ CONFIG_USER_NS=y
CONFIG_PID_NS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_KALLSYMS_ALL=y
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_PROFILING=y
CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
......
......@@ -21,7 +21,8 @@ CONFIG_BLK_DEV_INITRD=y
CONFIG_KALLSYMS_ALL=y
# CONFIG_ELF_CORE is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_PROFILING=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
......
......@@ -9,7 +9,8 @@ CONFIG_LOG_BUF_SHIFT=14
# CONFIG_FUTEX is not set
# CONFIG_EPOLL is not set
# CONFIG_SHMEM is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_CPU_SUBTYPE_SH7706=y
CONFIG_MEMORY_START=0x0c000000
......
......@@ -20,7 +20,8 @@ CONFIG_USER_NS=y
CONFIG_PID_NS=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_KALLSYMS_ALL=y
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_PROFILING=y
CONFIG_KPROBES=y
CONFIG_MODULES=y
......
......@@ -302,7 +302,7 @@ static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
#ifdef CONFIG_KASAN_GENERIC
size_t kasan_metadata_size(struct kmem_cache *cache);
size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object);
slab_flags_t kasan_never_merge(void);
void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
slab_flags_t *flags);
......@@ -315,7 +315,8 @@ void kasan_record_aux_stack_noalloc(void *ptr);
#else /* CONFIG_KASAN_GENERIC */
/* Tag-based KASAN modes do not use per-object metadata. */
static inline size_t kasan_metadata_size(struct kmem_cache *cache)
static inline size_t kasan_metadata_size(struct kmem_cache *cache,
bool in_object)
{
return 0;
}
......
......@@ -42,7 +42,7 @@
* larger than PERCPU_DYNAMIC_EARLY_SIZE.
*/
#define PERCPU_DYNAMIC_EARLY_SLOTS 128
#define PERCPU_DYNAMIC_EARLY_SIZE (12 << 10)
#define PERCPU_DYNAMIC_EARLY_SIZE (20 << 10)
/*
* PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy
......
......@@ -140,7 +140,11 @@
/* The following flags affect the page allocator grouping pages by mobility */
/* Objects are reclaimable */
#ifndef CONFIG_SLUB_TINY
#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U)
#else
#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0)
#endif
#define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */
/*
......@@ -347,12 +351,17 @@ enum kmalloc_cache_type {
#endif
#ifndef CONFIG_MEMCG_KMEM
KMALLOC_CGROUP = KMALLOC_NORMAL,
#else
KMALLOC_CGROUP,
#endif
#ifdef CONFIG_SLUB_TINY
KMALLOC_RECLAIM = KMALLOC_NORMAL,
#else
KMALLOC_RECLAIM,
#endif
#ifdef CONFIG_ZONE_DMA
KMALLOC_DMA,
#endif
#ifdef CONFIG_MEMCG_KMEM
KMALLOC_CGROUP,
#endif
NR_KMALLOC_TYPES
};
......@@ -452,7 +461,18 @@ static_assert(PAGE_SHIFT <= 20);
#endif /* !CONFIG_SLOB */
void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1);
void *kmem_cache_alloc(struct kmem_cache *s, gfp_t flags) __assume_slab_alignment __malloc;
/**
* kmem_cache_alloc - Allocate an object
* @cachep: The cache to allocate from.
* @flags: See kmalloc().
*
* Allocate an object from this cache.
* See kmem_cache_zalloc() for a shortcut of adding __GFP_ZERO to flags.
*
* Return: pointer to the new object or %NULL in case of error
*/
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) __assume_slab_alignment __malloc;
void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags) __assume_slab_alignment __malloc;
void kmem_cache_free(struct kmem_cache *s, void *objp);
......@@ -494,9 +514,9 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align
__alloc_size(1);
/**
* kmalloc - allocate memory
* kmalloc - allocate kernel memory
* @size: how many bytes of memory are required.
* @flags: the type of memory to allocate.
* @flags: describe the allocation context
*
* kmalloc is the normal method of allocating memory
* for objects smaller than page size in the kernel.
......@@ -523,12 +543,12 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align
* %GFP_ATOMIC
* Allocation will not sleep. May use emergency pools.
*
* %GFP_HIGHUSER
* Allocate memory from high memory on behalf of user.
*
* Also it is possible to set different flags by OR'ing
* in one or more of the following additional @flags:
*
* %__GFP_ZERO
* Zero the allocated memory before returning. Also see kzalloc().
*
* %__GFP_HIGH
* This allocation has high priority and may use emergency pools.
*
......@@ -547,42 +567,42 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align
* Try really hard to succeed the allocation but fail
* eventually.
*/
#ifndef CONFIG_SLOB
static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
{
if (__builtin_constant_p(size)) {
#ifndef CONFIG_SLOB
if (__builtin_constant_p(size) && size) {
unsigned int index;
#endif
if (size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large(size, flags);
#ifndef CONFIG_SLOB
index = kmalloc_index(size);
if (!index)
return ZERO_SIZE_PTR;
index = kmalloc_index(size);
return kmalloc_trace(
kmalloc_caches[kmalloc_type(flags)][index],
flags, size);
#endif
}
return __kmalloc(size, flags);
}
#else
static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
{
if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large(size, flags);
return __kmalloc(size, flags);
}
#endif
#ifndef CONFIG_SLOB
static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
{
if (__builtin_constant_p(size)) {
if (__builtin_constant_p(size) && size) {
unsigned int index;
if (size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large_node(size, flags, node);
index = kmalloc_index(size);
if (!index)
return ZERO_SIZE_PTR;
return kmalloc_node_trace(
kmalloc_caches[kmalloc_type(flags)][index],
flags, node, size);
......
......@@ -80,8 +80,10 @@ struct kmem_cache {
unsigned int *random_seq;
#endif
#ifdef CONFIG_HARDENED_USERCOPY
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
#endif
struct kmem_cache_node *node[MAX_NUMNODES];
};
......
......@@ -41,6 +41,7 @@ enum stat_item {
CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
NR_SLUB_STAT_ITEMS };
#ifndef CONFIG_SLUB_TINY
/*
* When changing the layout, make sure freelist and tid are still compatible
* with this_cpu_cmpxchg_double() alignment requirements.
......@@ -57,6 +58,7 @@ struct kmem_cache_cpu {
unsigned stat[NR_SLUB_STAT_ITEMS];
#endif
};
#endif /* CONFIG_SLUB_TINY */
#ifdef CONFIG_SLUB_CPU_PARTIAL
#define slub_percpu_partial(c) ((c)->partial)
......@@ -88,7 +90,9 @@ struct kmem_cache_order_objects {
* Slab cache management.
*/
struct kmem_cache {
#ifndef CONFIG_SLUB_TINY
struct kmem_cache_cpu __percpu *cpu_slab;
#endif
/* Used for retrieving partial slabs, etc. */
slab_flags_t flags;
unsigned long min_partial;
......@@ -136,13 +140,15 @@ struct kmem_cache {
struct kasan_cache kasan_info;
#endif
#ifdef CONFIG_HARDENED_USERCOPY
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
#endif
struct kmem_cache_node *node[MAX_NUMNODES];
};
#ifdef CONFIG_SYSFS
#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY)
#define SLAB_SUPPORTS_SYSFS
void sysfs_slab_unlink(struct kmem_cache *);
void sysfs_slab_release(struct kmem_cache *);
......
......@@ -7,5 +7,6 @@ CONFIG_KERNEL_XZ=y
# CONFIG_KERNEL_LZO is not set
# CONFIG_KERNEL_LZ4 is not set
# CONFIG_SLAB is not set
# CONFIG_SLUB is not set
CONFIG_SLOB=y
# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
......@@ -37,7 +37,7 @@ menuconfig KASAN
(HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS)) && \
CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \
HAVE_ARCH_KASAN_HW_TAGS
depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB)
depends on (SLUB && SYSFS && !SLUB_TINY) || (SLAB && !DEBUG_SLAB)
select STACKDEPOT_ALWAYS_INIT
help
Enables KASAN (Kernel Address Sanitizer) - a dynamic memory safety
......
......@@ -9,10 +9,25 @@
static struct kunit_resource resource;
static int slab_errors;
/*
* Wrapper function for kmem_cache_create(), which reduces 2 parameters:
* 'align' and 'ctor', and sets SLAB_SKIP_KFENCE flag to avoid getting an
* object from kfence pool, where the operation could be caught by both
* our test and kfence sanity check.
*/
static struct kmem_cache *test_kmem_cache_create(const char *name,
unsigned int size, slab_flags_t flags)
{
struct kmem_cache *s = kmem_cache_create(name, size, 0,
(flags | SLAB_NO_USER_FLAGS), NULL);
s->flags |= SLAB_SKIP_KFENCE;
return s;
}
static void test_clobber_zone(struct kunit *test)
{
struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_alloc", 64, 0,
SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL);
struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_alloc", 64,
SLAB_RED_ZONE);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
kasan_disable_current();
......@@ -29,8 +44,8 @@ static void test_clobber_zone(struct kunit *test)
#ifndef CONFIG_KASAN
static void test_next_pointer(struct kunit *test)
{
struct kmem_cache *s = kmem_cache_create("TestSlub_next_ptr_free", 64, 0,
SLAB_POISON|SLAB_NO_USER_FLAGS, NULL);
struct kmem_cache *s = test_kmem_cache_create("TestSlub_next_ptr_free",
64, SLAB_POISON);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
unsigned long tmp;
unsigned long *ptr_addr;
......@@ -74,8 +89,8 @@ static void test_next_pointer(struct kunit *test)
static void test_first_word(struct kunit *test)
{
struct kmem_cache *s = kmem_cache_create("TestSlub_1th_word_free", 64, 0,
SLAB_POISON|SLAB_NO_USER_FLAGS, NULL);
struct kmem_cache *s = test_kmem_cache_create("TestSlub_1th_word_free",
64, SLAB_POISON);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
kmem_cache_free(s, p);
......@@ -89,8 +104,8 @@ static void test_first_word(struct kunit *test)
static void test_clobber_50th_byte(struct kunit *test)
{
struct kmem_cache *s = kmem_cache_create("TestSlub_50th_word_free", 64, 0,
SLAB_POISON|SLAB_NO_USER_FLAGS, NULL);
struct kmem_cache *s = test_kmem_cache_create("TestSlub_50th_word_free",
64, SLAB_POISON);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
kmem_cache_free(s, p);
......@@ -105,8 +120,8 @@ static void test_clobber_50th_byte(struct kunit *test)
static void test_clobber_redzone_free(struct kunit *test)
{
struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_free", 64, 0,
SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL);
struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_free", 64,
SLAB_RED_ZONE);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
kasan_disable_current();
......@@ -120,6 +135,27 @@ static void test_clobber_redzone_free(struct kunit *test)
kmem_cache_destroy(s);
}
static void test_kmalloc_redzone_access(struct kunit *test)
{
struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_kmalloc", 32,
SLAB_KMALLOC|SLAB_STORE_USER|SLAB_RED_ZONE);
u8 *p = kmalloc_trace(s, GFP_KERNEL, 18);
kasan_disable_current();
/* Suppress the -Warray-bounds warning */
OPTIMIZER_HIDE_VAR(p);
p[18] = 0xab;
p[19] = 0xab;
validate_slab_cache(s);
KUNIT_EXPECT_EQ(test, 2, slab_errors);
kasan_enable_current();
kmem_cache_free(s, p);
kmem_cache_destroy(s);
}
static int test_init(struct kunit *test)
{
slab_errors = 0;
......@@ -139,6 +175,7 @@ static struct kunit_case test_cases[] = {
#endif
KUNIT_CASE(test_clobber_redzone_free),
KUNIT_CASE(test_kmalloc_redzone_access),
{}
};
......
......@@ -219,17 +219,43 @@ config SLUB
and has enhanced diagnostics. SLUB is the default choice for
a slab allocator.
config SLOB
config SLOB_DEPRECATED
depends on EXPERT
bool "SLOB (Simple Allocator)"
bool "SLOB (Simple Allocator - DEPRECATED)"
depends on !PREEMPT_RT
help
Deprecated and scheduled for removal in a few cycles. SLUB
recommended as replacement. CONFIG_SLUB_TINY can be considered
on systems with 16MB or less RAM.
If you need SLOB to stay, please contact linux-mm@kvack.org and
people listed in the SLAB ALLOCATOR section of MAINTAINERS file,
with your use case.
SLOB replaces the stock allocator with a drastically simpler
allocator. SLOB is generally more space efficient but
does not perform as well on large systems.
endchoice
config SLOB
bool
default y
depends on SLOB_DEPRECATED
config SLUB_TINY
bool "Configure SLUB for minimal memory footprint"
depends on SLUB && EXPERT
select SLAB_MERGE_DEFAULT
help
Configures the SLUB allocator in a way to achieve minimal memory
footprint, sacrificing scalability, debugging and other features.
This is intended only for the smallest system that had used the
SLOB allocator and is not recommended for systems with more than
16MB RAM.
If unsure, say N.
config SLAB_MERGE_DEFAULT
bool "Allow slab caches to be merged"
default y
......@@ -247,7 +273,7 @@ config SLAB_MERGE_DEFAULT
config SLAB_FREELIST_RANDOM
bool "Randomize slab freelist"
depends on SLAB || SLUB
depends on SLAB || (SLUB && !SLUB_TINY)
help
Randomizes the freelist order used on creating new pages. This
security feature reduces the predictability of the kernel slab
......@@ -255,7 +281,7 @@ config SLAB_FREELIST_RANDOM
config SLAB_FREELIST_HARDENED
bool "Harden slab freelist metadata"
depends on SLAB || SLUB
depends on SLAB || (SLUB && !SLUB_TINY)
help
Many kernel heap attacks try to target slab cache metadata and
other infrastructure. This options makes minor performance
......@@ -267,7 +293,7 @@ config SLAB_FREELIST_HARDENED
config SLUB_STATS
default n
bool "Enable SLUB performance statistics"
depends on SLUB && SYSFS
depends on SLUB && SYSFS && !SLUB_TINY
help
SLUB statistics are useful to debug SLUBs allocation behavior in
order find ways to optimize the allocator. This should never be
......@@ -279,7 +305,7 @@ config SLUB_STATS
config SLUB_CPU_PARTIAL
default y
depends on SLUB && SMP
depends on SLUB && SMP && !SLUB_TINY
bool "SLUB per cpu partial cache"
help
Per cpu partial caches accelerate objects allocation and freeing
......
......@@ -56,7 +56,7 @@ config DEBUG_SLAB
config SLUB_DEBUG
default y
bool "Enable SLUB debugging support" if EXPERT
depends on SLUB && SYSFS
depends on SLUB && SYSFS && !SLUB_TINY
select STACKDEPOT if STACKTRACE_SUPPORT
help
SLUB has extensive debug support features. Disabling these can
......
......@@ -450,15 +450,22 @@ void kasan_init_object_meta(struct kmem_cache *cache, const void *object)
__memset(alloc_meta, 0, sizeof(*alloc_meta));
}
size_t kasan_metadata_size(struct kmem_cache *cache)
size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object)
{
struct kasan_cache *info = &cache->kasan_info;
if (!kasan_requires_meta())
return 0;
return (cache->kasan_info.alloc_meta_offset ?
sizeof(struct kasan_alloc_meta) : 0) +
((cache->kasan_info.free_meta_offset &&
cache->kasan_info.free_meta_offset != KASAN_NO_FREE_META) ?
sizeof(struct kasan_free_meta) : 0);
if (in_object)
return (info->free_meta_offset ?
0 : sizeof(struct kasan_free_meta));
else
return (info->alloc_meta_offset ?
sizeof(struct kasan_alloc_meta) : 0) +
((info->free_meta_offset &&
info->free_meta_offset != KASAN_NO_FREE_META) ?
sizeof(struct kasan_free_meta) : 0);
}
static void __kasan_record_aux_stack(void *addr, bool can_alloc)
......
......@@ -74,13 +74,22 @@ int isolate_movable_page(struct page *page, isolate_mode_t mode)
if (unlikely(!get_page_unless_zero(page)))
goto out;
if (unlikely(PageSlab(page)))
goto out_putpage;
/* Pairs with smp_wmb() in slab freeing, e.g. SLUB's __free_slab() */
smp_rmb();
/*
* Check PageMovable before holding a PG_lock because page's owner
* assumes anybody doesn't touch PG_lock of newly allocated page
* so unconditionally grabbing the lock ruins page's owner side.
* Check movable flag before taking the page lock because
* we use non-atomic bitops on newly allocated page flags so
* unconditionally grabbing the lock ruins page's owner side.
*/
if (unlikely(!__PageMovable(page)))
goto out_putpage;
/* Pairs with smp_wmb() in slab allocation, e.g. SLUB's alloc_slab_page() */
smp_rmb();
if (unlikely(PageSlab(page)))
goto out_putpage;
/*
* As movable pages are not isolated from LRU lists, concurrent
* compaction threads can race against page migration functions
......
......@@ -234,7 +234,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
parent->shared = NULL;
parent->alien = NULL;
parent->colour_next = 0;
spin_lock_init(&parent->list_lock);
raw_spin_lock_init(&parent->list_lock);
parent->free_objects = 0;
parent->free_touched = 0;
}
......@@ -559,9 +559,9 @@ static noinline void cache_free_pfmemalloc(struct kmem_cache *cachep,
slab_node = slab_nid(slab);
n = get_node(cachep, slab_node);
spin_lock(&n->list_lock);
raw_spin_lock(&n->list_lock);
free_block(cachep, &objp, 1, slab_node, &list);
spin_unlock(&n->list_lock);
raw_spin_unlock(&n->list_lock);
slabs_destroy(cachep, &list);
}
......@@ -684,7 +684,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
struct kmem_cache_node *n = get_node(cachep, node);
if (ac->avail) {
spin_lock(&n->list_lock);
raw_spin_lock(&n->list_lock);
/*
* Stuff objects into the remote nodes shared array first.
* That way we could avoid the overhead of putting the objects
......@@ -695,7 +695,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
free_block(cachep, ac->entry, ac->avail, node, list);
ac->avail = 0;
spin_unlock(&n->list_lock);
raw_spin_unlock(&n->list_lock);
}
}
......@@ -768,9 +768,9 @@ static int __cache_free_alien(struct kmem_cache *cachep, void *objp,
slabs_destroy(cachep, &list);
} else {
n = get_node(cachep, slab_node);
spin_lock(&n->list_lock);
raw_spin_lock(&n->list_lock);
free_block(cachep, &objp, 1, slab_node, &list);
spin_unlock(&n->list_lock);
raw_spin_unlock(&n->list_lock);
slabs_destroy(cachep, &list);
}
return 1;
......@@ -811,10 +811,10 @@ static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp)
*/
n = get_node(cachep, node);
if (n) {
spin_lock_irq(&n->list_lock);
raw_spin_lock_irq(&n->list_lock);
n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount +
cachep->num;
spin_unlock_irq(&n->list_lock);
raw_spin_unlock_irq(&n->list_lock);
return 0;
}
......@@ -893,7 +893,7 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep,
goto fail;
n = get_node(cachep, node);
spin_lock_irq(&n->list_lock);
raw_spin_lock_irq(&n->list_lock);
if (n->shared && force_change) {
free_block(cachep, n->shared->entry,
n->shared->avail, node, &list);
......@@ -911,7 +911,7 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep,
new_alien = NULL;
}
spin_unlock_irq(&n->list_lock);
raw_spin_unlock_irq(&n->list_lock);
slabs_destroy(cachep, &list);
/*
......@@ -950,7 +950,7 @@ static void cpuup_canceled(long cpu)
if (!n)
continue;
spin_lock_irq(&n->list_lock);
raw_spin_lock_irq(&n->list_lock);
/* Free limit for this kmem_cache_node */
n->free_limit -= cachep->batchcount;
......@@ -961,7 +961,7 @@ static void cpuup_canceled(long cpu)
nc->avail = 0;
if (!cpumask_empty(mask)) {
spin_unlock_irq(&n->list_lock);
raw_spin_unlock_irq(&n->list_lock);
goto free_slab;
}
......@@ -975,7 +975,7 @@ static void cpuup_canceled(long cpu)
alien = n->alien;
n->alien = NULL;
spin_unlock_irq(&n->list_lock);
raw_spin_unlock_irq(&n->list_lock);
kfree(shared);
if (alien) {
......@@ -1159,7 +1159,7 @@ static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node *
/*
* Do not assume that spinlocks can be initialized via memcpy:
*/
spin_lock_init(&ptr->list_lock);
raw_spin_lock_init(&ptr->list_lock);
MAKE_ALL_LISTS(cachep, ptr, nodeid);
cachep->node[nodeid] = ptr;
......@@ -1330,11 +1330,11 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
for_each_kmem_cache_node(cachep, node, n) {
unsigned long total_slabs, free_slabs, free_objs;
spin_lock_irqsave(&n->list_lock, flags);
raw_spin_lock_irqsave(&n->list_lock, flags);
total_slabs = n->total_slabs;
free_slabs = n->free_slabs;
free_objs = n->free_objects;
spin_unlock_irqrestore(&n->list_lock, flags);
raw_spin_unlock_irqrestore(&n->list_lock, flags);
pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld\n",
node, total_slabs - free_slabs, total_slabs,
......@@ -1370,6 +1370,8 @@ static struct slab *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
account_slab(slab, cachep->gfporder, cachep, flags);
__folio_set_slab(folio);
/* Make the flag visible before any changes to folio->mapping */
smp_wmb();
/* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */
if (sk_memalloc_socks() && page_is_pfmemalloc(folio_page(folio, 0)))
slab_set_pfmemalloc(slab);
......@@ -1387,9 +1389,11 @@ static void kmem_freepages(struct kmem_cache *cachep, struct slab *slab)
BUG_ON(!folio_test_slab(folio));
__slab_clear_pfmemalloc(slab);
__folio_clear_slab(folio);
page_mapcount_reset(folio_page(folio, 0));
folio->mapping = NULL;
/* Make the mapping reset visible before clearing the flag */
smp_wmb();
__folio_clear_slab(folio);
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += 1 << order;
......@@ -2096,7 +2100,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep)
{
#ifdef CONFIG_SMP
check_irq_off();
assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
assert_raw_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
#endif
}
......@@ -2104,7 +2108,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
{
#ifdef CONFIG_SMP
check_irq_off();
assert_spin_locked(&get_node(cachep, node)->list_lock);
assert_raw_spin_locked(&get_node(cachep, node)->list_lock);
#endif
}
......@@ -2144,9 +2148,9 @@ static void do_drain(void *arg)
check_irq_off();
ac = cpu_cache_get(cachep);
n = get_node(cachep, node);
spin_lock(&n->list_lock);
raw_spin_lock(&n->list_lock);
free_block(cachep, ac->entry, ac->avail, node, &list);
spin_unlock(&n->list_lock);
raw_spin_unlock(&n->list_lock);
ac->avail = 0;
slabs_destroy(cachep, &list);
}
......@@ -2164,9 +2168,9 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
drain_alien_cache(cachep, n->alien);
for_each_kmem_cache_node(cachep, node, n) {
spin_lock_irq(&n->list_lock);
raw_spin_lock_irq(&n->list_lock);
drain_array_locked(cachep, n->shared, node, true, &list);
spin_unlock_irq(&n->list_lock);
raw_spin_unlock_irq(&n->list_lock);
slabs_destroy(cachep, &list);
}
......@@ -2188,10 +2192,10 @@ static int drain_freelist(struct kmem_cache *cache,
nr_freed = 0;
while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
spin_lock_irq(&n->list_lock);
raw_spin_lock_irq(&n->list_lock);
p = n->slabs_free.prev;
if (p == &n->slabs_free) {
spin_unlock_irq(&n->list_lock);
raw_spin_unlock_irq(&n->list_lock);
goto out;
}
......@@ -2204,7 +2208,7 @@ static int drain_freelist(struct kmem_cache *cache,
* to the cache.
*/
n->free_objects -= cache->num;
spin_unlock_irq(&n->list_lock);
raw_spin_unlock_irq(&n->list_lock);
slab_destroy(cache, slab);
nr_freed++;
}
......@@ -2629,7 +2633,7 @@ static void cache_grow_end(struct kmem_cache *cachep, struct slab *slab)
INIT_LIST_HEAD(&slab->slab_list);
n = get_node(cachep, slab_nid(slab));
spin_lock(&n->list_lock);
raw_spin_lock(&n->list_lock);
n->total_slabs++;
if (!slab->active) {
list_add_tail(&slab->slab_list, &n->slabs_free);
......@@ -2639,7 +2643,7 @@ static void cache_grow_end(struct kmem_cache *cachep, struct slab *slab)
STATS_INC_GROWN(cachep);
n->free_objects += cachep->num - slab->active;
spin_unlock(&n->list_lock);
raw_spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
}
......@@ -2805,7 +2809,7 @@ static struct slab *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
{
struct slab *slab;
assert_spin_locked(&n->list_lock);
assert_raw_spin_locked(&n->list_lock);
slab = list_first_entry_or_null(&n->slabs_partial, struct slab,
slab_list);
if (!slab) {
......@@ -2832,10 +2836,10 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
if (!gfp_pfmemalloc_allowed(flags))
return NULL;
spin_lock(&n->list_lock);
raw_spin_lock(&n->list_lock);
slab = get_first_slab(n, true);
if (!slab) {
spin_unlock(&n->list_lock);
raw_spin_unlock(&n->list_lock);
return NULL;
}
......@@ -2844,7 +2848,7 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
fixup_slab_list(cachep, n, slab, &list);
spin_unlock(&n->list_lock);
raw_spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
return obj;
......@@ -2903,7 +2907,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
if (!n->free_objects && (!shared || !shared->avail))
goto direct_grow;
spin_lock(&n->list_lock);
raw_spin_lock(&n->list_lock);
shared = READ_ONCE(n->shared);
/* See if we can refill from the shared array */
......@@ -2927,7 +2931,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
must_grow:
n->free_objects -= ac->avail;
alloc_done:
spin_unlock(&n->list_lock);
raw_spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
direct_grow:
......@@ -3147,7 +3151,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
BUG_ON(!n);
check_irq_off();
spin_lock(&n->list_lock);
raw_spin_lock(&n->list_lock);
slab = get_first_slab(n, false);
if (!slab)
goto must_grow;
......@@ -3165,12 +3169,12 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
fixup_slab_list(cachep, n, slab, &list);
spin_unlock(&n->list_lock);
raw_spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
return obj;
must_grow:
spin_unlock(&n->list_lock);
raw_spin_unlock(&n->list_lock);
slab = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid);
if (slab) {
/* This slab isn't counted yet so don't update free_objects */
......@@ -3254,7 +3258,8 @@ slab_alloc_node(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags,
init = slab_want_init_on_alloc(flags, cachep);
out:
slab_post_alloc_hook(cachep, objcg, flags, 1, &objp, init);
slab_post_alloc_hook(cachep, objcg, flags, 1, &objp, init,
cachep->object_size);
return objp;
}
......@@ -3325,7 +3330,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
check_irq_off();
n = get_node(cachep, node);
spin_lock(&n->list_lock);
raw_spin_lock(&n->list_lock);
if (n->shared) {
struct array_cache *shared_array = n->shared;
int max = shared_array->limit - shared_array->avail;
......@@ -3354,7 +3359,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
STATS_SET_FREEABLE(cachep, i);
}
#endif
spin_unlock(&n->list_lock);
raw_spin_unlock(&n->list_lock);
ac->avail -= batchcount;
memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
slabs_destroy(cachep, &list);
......@@ -3446,16 +3451,6 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
return ret;
}
/**
* kmem_cache_alloc - Allocate an object
* @cachep: The cache to allocate from.
* @flags: See kmalloc().
*
* Allocate an object from this cache. The flags are only relevant
* if the cache has no available objects.
*
* Return: pointer to the new object or %NULL in case of error
*/
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
return __kmem_cache_alloc_lru(cachep, NULL, flags);
......@@ -3507,13 +3502,13 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
* Done outside of the IRQ disabled section.
*/
slab_post_alloc_hook(s, objcg, flags, size, p,
slab_want_init_on_alloc(flags, s));
slab_want_init_on_alloc(flags, s), s->object_size);
/* FIXME: Trace call missing. Christoph would like a bulk variant */
return size;
error:
local_irq_enable();
cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_);
slab_post_alloc_hook(s, objcg, flags, i, p, false);
slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
kmem_cache_free_bulk(s, i, p);
return 0;
}
......@@ -3721,9 +3716,9 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
node = cpu_to_mem(cpu);
n = get_node(cachep, node);
spin_lock_irq(&n->list_lock);
raw_spin_lock_irq(&n->list_lock);
free_block(cachep, ac->entry, ac->avail, node, &list);
spin_unlock_irq(&n->list_lock);
raw_spin_unlock_irq(&n->list_lock);
slabs_destroy(cachep, &list);
}
free_percpu(prev);
......@@ -3815,9 +3810,9 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
return;
}
spin_lock_irq(&n->list_lock);
raw_spin_lock_irq(&n->list_lock);
drain_array_locked(cachep, ac, node, false, &list);
spin_unlock_irq(&n->list_lock);
raw_spin_unlock_irq(&n->list_lock);
slabs_destroy(cachep, &list);
}
......@@ -3901,7 +3896,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
for_each_kmem_cache_node(cachep, node, n) {
check_irq_on();
spin_lock_irq(&n->list_lock);
raw_spin_lock_irq(&n->list_lock);
total_slabs += n->total_slabs;
free_slabs += n->free_slabs;
......@@ -3910,7 +3905,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
if (n->shared)
shared_avail += n->shared->avail;
spin_unlock_irq(&n->list_lock);
raw_spin_unlock_irq(&n->list_lock);
}
num_objs = total_slabs * cachep->num;
active_slabs = total_slabs - free_slabs;
......
......@@ -11,37 +11,43 @@ struct slab {
#if defined(CONFIG_SLAB)
struct kmem_cache *slab_cache;
union {
struct list_head slab_list;
struct {
struct list_head slab_list;
void *freelist; /* array of free object indexes */
void *s_mem; /* first object */
};
struct rcu_head rcu_head;
};
struct kmem_cache *slab_cache;
void *freelist; /* array of free object indexes */
void *s_mem; /* first object */
unsigned int active;
#elif defined(CONFIG_SLUB)
union {
struct list_head slab_list;
struct rcu_head rcu_head;
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct {
struct slab *next;
int slabs; /* Nr of slabs left */
};
#endif
};
struct kmem_cache *slab_cache;
/* Double-word boundary */
void *freelist; /* first free object */
union {
unsigned long counters;
struct {
unsigned inuse:16;
unsigned objects:15;
unsigned frozen:1;
union {
struct list_head slab_list;
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct {
struct slab *next;
int slabs; /* Nr of slabs left */
};
#endif
};
/* Double-word boundary */
void *freelist; /* first free object */
union {
unsigned long counters;
struct {
unsigned inuse:16;
unsigned objects:15;
unsigned frozen:1;
};
};
};
struct rcu_head rcu_head;
};
unsigned int __unused;
......@@ -66,9 +72,10 @@ struct slab {
#define SLAB_MATCH(pg, sl) \
static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl))
SLAB_MATCH(flags, __page_flags);
SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */
#ifndef CONFIG_SLOB
SLAB_MATCH(rcu_head, rcu_head);
SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */
#else
SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */
#endif
SLAB_MATCH(_refcount, __page_refcount);
#ifdef CONFIG_MEMCG
......@@ -76,6 +83,9 @@ SLAB_MATCH(memcg_data, memcg_data);
#endif
#undef SLAB_MATCH
static_assert(sizeof(struct slab) <= sizeof(struct page));
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && defined(CONFIG_SLUB)
static_assert(IS_ALIGNED(offsetof(struct slab, freelist), 2*sizeof(void *)));
#endif
/**
* folio_slab - Converts from folio to slab.
......@@ -207,8 +217,6 @@ struct kmem_cache {
unsigned int size; /* The aligned/padded/added on size */
unsigned int align; /* Alignment as calculated */
slab_flags_t flags; /* Active flags on the slab */
unsigned int useroffset;/* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
const char *name; /* Slab name for sysfs */
int refcount; /* Use counter */
void (*ctor)(void *); /* Called on object slot creation */
......@@ -336,7 +344,8 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
SLAB_ACCOUNT)
#elif defined(CONFIG_SLUB)
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
SLAB_TEMPORARY | SLAB_ACCOUNT | SLAB_NO_USER_FLAGS)
SLAB_TEMPORARY | SLAB_ACCOUNT | \
SLAB_NO_USER_FLAGS | SLAB_KMALLOC)
#else
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE)
#endif
......@@ -356,6 +365,7 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
SLAB_RECLAIM_ACCOUNT | \
SLAB_TEMPORARY | \
SLAB_ACCOUNT | \
SLAB_KMALLOC | \
SLAB_NO_USER_FLAGS)
bool __kmem_cache_empty(struct kmem_cache *);
......@@ -720,12 +730,26 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
static inline void slab_post_alloc_hook(struct kmem_cache *s,
struct obj_cgroup *objcg, gfp_t flags,
size_t size, void **p, bool init)
size_t size, void **p, bool init,
unsigned int orig_size)
{
unsigned int zero_size = s->object_size;
size_t i;
flags &= gfp_allowed_mask;
/*
* For kmalloc object, the allocated memory size(object_size) is likely
* larger than the requested size(orig_size). If redzone check is
* enabled for the extra space, don't zero it, as it will be redzoned
* soon. The redzone operation for this extra space could be seen as a
* replacement of current poisoning under certain debug option, and
* won't break other sanity checks.
*/
if (kmem_cache_debug_flags(s, SLAB_STORE_USER | SLAB_RED_ZONE) &&
(s->flags & SLAB_KMALLOC))
zero_size = orig_size;
/*
* As memory initialization might be integrated into KASAN,
* kasan_slab_alloc and initialization memset must be
......@@ -736,7 +760,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
for (i = 0; i < size; i++) {
p[i] = kasan_slab_alloc(s, p[i], flags, init);
if (p[i] && init && !kasan_has_integrated_init())
memset(p[i], 0, s->object_size);
memset(p[i], 0, zero_size);
kmemleak_alloc_recursive(p[i], s->object_size, 1,
s->flags, flags);
kmsan_slab_alloc(s, p[i], flags);
......@@ -750,9 +774,8 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
* The slab lists for all objects.
*/
struct kmem_cache_node {
spinlock_t list_lock;
#ifdef CONFIG_SLAB
raw_spinlock_t list_lock;
struct list_head slabs_partial; /* partial list first, better asm code */
struct list_head slabs_full;
struct list_head slabs_free;
......@@ -768,6 +791,7 @@ struct kmem_cache_node {
#endif
#ifdef CONFIG_SLUB
spinlock_t list_lock;
unsigned long nr_partial;
struct list_head partial;
#ifdef CONFIG_SLUB_DEBUG
......@@ -871,4 +895,8 @@ void __check_heap_object(const void *ptr, unsigned long n,
}
#endif
#ifdef CONFIG_SLUB_DEBUG
void skip_orig_size_check(struct kmem_cache *s, const void *object);
#endif
#endif /* MM_SLAB_H */
......@@ -143,8 +143,10 @@ int slab_unmergeable(struct kmem_cache *s)
if (s->ctor)
return 1;
#ifdef CONFIG_HARDENED_USERCOPY
if (s->usersize)
return 1;
#endif
/*
* We may have set a slab to be unmergeable during bootstrap.
......@@ -223,8 +225,10 @@ static struct kmem_cache *create_cache(const char *name,
s->size = s->object_size = object_size;
s->align = align;
s->ctor = ctor;
#ifdef CONFIG_HARDENED_USERCOPY
s->useroffset = useroffset;
s->usersize = usersize;
#endif
err = __kmem_cache_create(s, flags);
if (err)
......@@ -317,7 +321,8 @@ kmem_cache_create_usercopy(const char *name,
flags &= CACHE_CREATE_MASK;
/* Fail closed on bad usersize of useroffset values. */
if (WARN_ON(!usersize && useroffset) ||
if (!IS_ENABLED(CONFIG_HARDENED_USERCOPY) ||
WARN_ON(!usersize && useroffset) ||
WARN_ON(size < usersize || size - usersize < useroffset))
usersize = useroffset = 0;
......@@ -595,8 +600,8 @@ void kmem_dump_obj(void *object)
ptroffset = ((char *)object - (char *)kp.kp_objp) - kp.kp_data_offset;
pr_cont(" pointer offset %lu", ptroffset);
}
if (kp.kp_slab_cache && kp.kp_slab_cache->usersize)
pr_cont(" size %u", kp.kp_slab_cache->usersize);
if (kp.kp_slab_cache && kp.kp_slab_cache->object_size)
pr_cont(" size %u", kp.kp_slab_cache->object_size);
if (kp.kp_ret)
pr_cont(" allocated at %pS\n", kp.kp_ret);
else
......@@ -640,8 +645,10 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name,
align = max(align, size);
s->align = calculate_alignment(flags, align, size);
#ifdef CONFIG_HARDENED_USERCOPY
s->useroffset = useroffset;
s->usersize = usersize;
#endif
err = __kmem_cache_create(s, flags);
......@@ -766,10 +773,16 @@ EXPORT_SYMBOL(kmalloc_size_roundup);
#define KMALLOC_CGROUP_NAME(sz)
#endif
#ifndef CONFIG_SLUB_TINY
#define KMALLOC_RCL_NAME(sz) .name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #sz,
#else
#define KMALLOC_RCL_NAME(sz)
#endif
#define INIT_KMALLOC_INFO(__size, __short_size) \
{ \
.name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \
.name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #__short_size, \
KMALLOC_RCL_NAME(__short_size) \
KMALLOC_CGROUP_NAME(__short_size) \
KMALLOC_DMA_NAME(__short_size) \
.size = __size, \
......@@ -855,7 +868,7 @@ void __init setup_kmalloc_cache_index_table(void)
static void __init
new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
{
if (type == KMALLOC_RECLAIM) {
if ((KMALLOC_RECLAIM != KMALLOC_NORMAL) && (type == KMALLOC_RECLAIM)) {
flags |= SLAB_RECLAIM_ACCOUNT;
} else if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_CGROUP)) {
if (mem_cgroup_kmem_disabled()) {
......@@ -1037,6 +1050,10 @@ size_t __ksize(const void *object)
return folio_size(folio);
}
#ifdef CONFIG_SLUB_DEBUG
skip_orig_size_check(folio_slab(folio)->slab_cache, object);
#endif
return slab_ksize(folio_slab(folio)->slab_cache);
}
......
......@@ -187,6 +187,12 @@ do { \
#define USE_LOCKLESS_FAST_PATH() (false)
#endif
#ifndef CONFIG_SLUB_TINY
#define __fastpath_inline __always_inline
#else
#define __fastpath_inline
#endif
#ifdef CONFIG_SLUB_DEBUG
#ifdef CONFIG_SLUB_DEBUG_ON
DEFINE_STATIC_KEY_TRUE(slub_debug_enabled);
......@@ -241,6 +247,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
/* Enable to log cmpxchg failures */
#undef SLUB_DEBUG_CMPXCHG
#ifndef CONFIG_SLUB_TINY
/*
* Minimum number of partial slabs. These will be left on the partial
* lists even if they are empty. kmem_cache_shrink may reclaim them.
......@@ -253,6 +260,10 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
* sort the partial list by the number of objects in use.
*/
#define MAX_PARTIAL 10
#else
#define MIN_PARTIAL 0
#define MAX_PARTIAL 0
#endif
#define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \
SLAB_POISON | SLAB_STORE_USER)
......@@ -298,7 +309,7 @@ struct track {
enum track_item { TRACK_ALLOC, TRACK_FREE };
#ifdef CONFIG_SYSFS
#ifdef SLAB_SUPPORTS_SYSFS
static int sysfs_slab_add(struct kmem_cache *);
static int sysfs_slab_alias(struct kmem_cache *, const char *);
#else
......@@ -332,10 +343,12 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
*/
static nodemask_t slab_nodes;
#ifndef CONFIG_SLUB_TINY
/*
* Workqueue used for flush_cpu_slab().
*/
static struct workqueue_struct *flushwq;
#endif
/********************************************************************
* Core slab cache functions
......@@ -381,10 +394,12 @@ static inline void *get_freepointer(struct kmem_cache *s, void *object)
return freelist_dereference(s, object + s->offset);
}
#ifndef CONFIG_SLUB_TINY
static void prefetch_freepointer(const struct kmem_cache *s, void *object)
{
prefetchw(object + s->offset);
}
#endif
/*
* When running under KMSAN, get_freepointer_safe() may return an uninitialized
......@@ -829,6 +844,17 @@ static inline void set_orig_size(struct kmem_cache *s,
if (!slub_debug_orig_size(s))
return;
#ifdef CONFIG_KASAN_GENERIC
/*
* KASAN could save its free meta data in object's data area at
* offset 0, if the size is larger than 'orig_size', it will
* overlap the data redzone in [orig_size+1, object_size], and
* the check should be skipped.
*/
if (kasan_metadata_size(s, true) > orig_size)
orig_size = s->object_size;
#endif
p += get_info_end(s);
p += sizeof(struct track) * 2;
......@@ -848,6 +874,11 @@ static inline unsigned int get_orig_size(struct kmem_cache *s, void *object)
return *(unsigned int *)p;
}
void skip_orig_size_check(struct kmem_cache *s, const void *object)
{
set_orig_size(s, (void *)object, s->object_size);
}
static void slab_bug(struct kmem_cache *s, char *fmt, ...)
{
struct va_format vaf;
......@@ -910,7 +941,7 @@ static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p)
if (slub_debug_orig_size(s))
off += sizeof(unsigned int);
off += kasan_metadata_size(s);
off += kasan_metadata_size(s, false);
if (off != size_from_object(s))
/* Beginning of the filler is the free pointer */
......@@ -966,17 +997,28 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct slab *slab,
static void init_object(struct kmem_cache *s, void *object, u8 val)
{
u8 *p = kasan_reset_tag(object);
unsigned int poison_size = s->object_size;
if (s->flags & SLAB_RED_ZONE)
if (s->flags & SLAB_RED_ZONE) {
memset(p - s->red_left_pad, val, s->red_left_pad);
if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
/*
* Redzone the extra allocated space by kmalloc than
* requested, and the poison size will be limited to
* the original request size accordingly.
*/
poison_size = get_orig_size(s, object);
}
}
if (s->flags & __OBJECT_POISON) {
memset(p, POISON_FREE, s->object_size - 1);
p[s->object_size - 1] = POISON_END;
memset(p, POISON_FREE, poison_size - 1);
p[poison_size - 1] = POISON_END;
}
if (s->flags & SLAB_RED_ZONE)
memset(p + s->object_size, val, s->inuse - s->object_size);
memset(p + poison_size, val, s->inuse - poison_size);
}
static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
......@@ -1070,7 +1112,7 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
off += sizeof(unsigned int);
}
off += kasan_metadata_size(s);
off += kasan_metadata_size(s, false);
if (size_from_object(s) == off)
return 1;
......@@ -1120,6 +1162,7 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
{
u8 *p = object;
u8 *endobject = object + s->object_size;
unsigned int orig_size;
if (s->flags & SLAB_RED_ZONE) {
if (!check_bytes_and_report(s, slab, object, "Left Redzone",
......@@ -1129,6 +1172,17 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
if (!check_bytes_and_report(s, slab, object, "Right Redzone",
endobject, val, s->inuse - s->object_size))
return 0;
if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
orig_size = get_orig_size(s, object);
if (s->object_size > orig_size &&
!check_bytes_and_report(s, slab, object,
"kmalloc Redzone", p + orig_size,
val, s->object_size - orig_size)) {
return 0;
}
}
} else {
if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
check_bytes_and_report(s, slab, p, "Alignment padding",
......@@ -1363,7 +1417,7 @@ static inline int alloc_consistency_checks(struct kmem_cache *s,
return 1;
}
static noinline int alloc_debug_processing(struct kmem_cache *s,
static noinline bool alloc_debug_processing(struct kmem_cache *s,
struct slab *slab, void *object, int orig_size)
{
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
......@@ -1375,7 +1429,7 @@ static noinline int alloc_debug_processing(struct kmem_cache *s,
trace(s, slab, object, 1);
set_orig_size(s, object, orig_size);
init_object(s, object, SLUB_RED_ACTIVE);
return 1;
return true;
bad:
if (folio_test_slab(slab_folio(slab))) {
......@@ -1388,7 +1442,7 @@ static noinline int alloc_debug_processing(struct kmem_cache *s,
slab->inuse = slab->objects;
slab->freelist = NULL;
}
return 0;
return false;
}
static inline int free_consistency_checks(struct kmem_cache *s,
......@@ -1641,17 +1695,17 @@ static inline void setup_object_debug(struct kmem_cache *s, void *object) {}
static inline
void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) {}
static inline int alloc_debug_processing(struct kmem_cache *s,
struct slab *slab, void *object, int orig_size) { return 0; }
static inline bool alloc_debug_processing(struct kmem_cache *s,
struct slab *slab, void *object, int orig_size) { return true; }
static inline void free_debug_processing(
struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int bulk_cnt,
unsigned long addr) {}
static inline bool free_debug_processing(struct kmem_cache *s,
struct slab *slab, void *head, void *tail, int *bulk_cnt,
unsigned long addr, depot_stack_handle_t handle) { return true; }
static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {}
static inline int check_object(struct kmem_cache *s, struct slab *slab,
void *object, u8 val) { return 1; }
static inline depot_stack_handle_t set_track_prepare(void) { return 0; }
static inline void set_track(struct kmem_cache *s, void *object,
enum track_item alloc, unsigned long addr) {}
static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
......@@ -1676,11 +1730,13 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
static inline void dec_slabs_node(struct kmem_cache *s, int node,
int objects) {}
#ifndef CONFIG_SLUB_TINY
static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
void **freelist, void *nextfree)
{
return false;
}
#endif
#endif /* CONFIG_SLUB_DEBUG */
/*
......@@ -1800,6 +1856,8 @@ static inline struct slab *alloc_slab_page(gfp_t flags, int node,
slab = folio_slab(folio);
__folio_set_slab(folio);
/* Make the flag visible before any changes to folio->mapping */
smp_wmb();
if (page_is_pfmemalloc(folio_page(folio, 0)))
slab_set_pfmemalloc(slab);
......@@ -1999,17 +2057,11 @@ static void __free_slab(struct kmem_cache *s, struct slab *slab)
int order = folio_order(folio);
int pages = 1 << order;
if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
void *p;
slab_pad_check(s, slab);
for_each_object(p, s, slab_address(slab), slab->objects)
check_object(s, slab, p, SLUB_RED_INACTIVE);
}
__slab_clear_pfmemalloc(slab);
__folio_clear_slab(folio);
folio->mapping = NULL;
/* Make the mapping reset visible before clearing the flag */
smp_wmb();
__folio_clear_slab(folio);
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += pages;
unaccount_slab(slab, order, s);
......@@ -2025,9 +2077,17 @@ static void rcu_free_slab(struct rcu_head *h)
static void free_slab(struct kmem_cache *s, struct slab *slab)
{
if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
void *p;
slab_pad_check(s, slab);
for_each_object(p, s, slab_address(slab), slab->objects)
check_object(s, slab, p, SLUB_RED_INACTIVE);
}
if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU))
call_rcu(&slab->rcu_head, rcu_free_slab);
} else
else
__free_slab(s, slab);
}
......@@ -2214,7 +2274,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
if (!pfmemalloc_match(slab, pc->flags))
continue;
if (kmem_cache_debug(s)) {
if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
object = alloc_single_from_partial(s, n, slab,
pc->orig_size);
if (object)
......@@ -2329,6 +2389,8 @@ static void *get_partial(struct kmem_cache *s, int node, struct partial_context
return get_any_partial(s, pc);
}
#ifndef CONFIG_SLUB_TINY
#ifdef CONFIG_PREEMPTION
/*
* Calculate the next globally unique transaction for disambiguation
......@@ -2342,7 +2404,7 @@ static void *get_partial(struct kmem_cache *s, int node, struct partial_context
* different cpus.
*/
#define TID_STEP 1
#endif
#endif /* CONFIG_PREEMPTION */
static inline unsigned long next_tid(unsigned long tid)
{
......@@ -2411,7 +2473,7 @@ static void init_kmem_cache_cpus(struct kmem_cache *s)
static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
void *freelist)
{
enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE, M_FULL_NOLIST };
enum slab_modes { M_NONE, M_PARTIAL, M_FREE, M_FULL_NOLIST };
struct kmem_cache_node *n = get_node(s, slab_nid(slab));
int free_delta = 0;
enum slab_modes mode = M_NONE;
......@@ -2487,14 +2549,6 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
* acquire_slab() will see a slab that is frozen
*/
spin_lock_irqsave(&n->list_lock, flags);
} else if (kmem_cache_debug_flags(s, SLAB_STORE_USER)) {
mode = M_FULL;
/*
* This also ensures that the scanning of full
* slabs from diagnostic functions will not see
* any frozen slabs.
*/
spin_lock_irqsave(&n->list_lock, flags);
} else {
mode = M_FULL_NOLIST;
}
......@@ -2504,7 +2558,7 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
old.freelist, old.counters,
new.freelist, new.counters,
"unfreezing slab")) {
if (mode == M_PARTIAL || mode == M_FULL)
if (mode == M_PARTIAL)
spin_unlock_irqrestore(&n->list_lock, flags);
goto redo;
}
......@@ -2518,10 +2572,6 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
stat(s, DEACTIVATE_EMPTY);
discard_slab(s, slab);
stat(s, FREE_SLAB);
} else if (mode == M_FULL) {
add_full(s, n, slab);
spin_unlock_irqrestore(&n->list_lock, flags);
stat(s, DEACTIVATE_FULL);
} else if (mode == M_FULL_NOLIST) {
stat(s, DEACTIVATE_FULL);
}
......@@ -2803,6 +2853,13 @@ static int slub_cpu_dead(unsigned int cpu)
return 0;
}
#else /* CONFIG_SLUB_TINY */
static inline void flush_all_cpus_locked(struct kmem_cache *s) { }
static inline void flush_all(struct kmem_cache *s) { }
static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) { }
static inline int slub_cpu_dead(unsigned int cpu) { return 0; }
#endif /* CONFIG_SLUB_TINY */
/*
* Check if the objects in a per cpu structure fit numa
* locality expectations.
......@@ -2828,38 +2885,28 @@ static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
}
/* Supports checking bulk free of a constructed freelist */
static noinline void free_debug_processing(
struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int bulk_cnt,
unsigned long addr)
static inline bool free_debug_processing(struct kmem_cache *s,
struct slab *slab, void *head, void *tail, int *bulk_cnt,
unsigned long addr, depot_stack_handle_t handle)
{
struct kmem_cache_node *n = get_node(s, slab_nid(slab));
struct slab *slab_free = NULL;
bool checks_ok = false;
void *object = head;
int cnt = 0;
unsigned long flags;
bool checks_ok = false;
depot_stack_handle_t handle = 0;
if (s->flags & SLAB_STORE_USER)
handle = set_track_prepare();
spin_lock_irqsave(&n->list_lock, flags);
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
if (!check_slab(s, slab))
goto out;
}
if (slab->inuse < bulk_cnt) {
if (slab->inuse < *bulk_cnt) {
slab_err(s, slab, "Slab has %d allocated objects but %d are to be freed\n",
slab->inuse, bulk_cnt);
slab->inuse, *bulk_cnt);
goto out;
}
next_object:
if (++cnt > bulk_cnt)
if (++cnt > *bulk_cnt)
goto out_cnt;
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
......@@ -2881,61 +2928,22 @@ static noinline void free_debug_processing(
checks_ok = true;
out_cnt:
if (cnt != bulk_cnt)
if (cnt != *bulk_cnt) {
slab_err(s, slab, "Bulk free expected %d objects but found %d\n",
bulk_cnt, cnt);
out:
if (checks_ok) {
void *prior = slab->freelist;
/* Perform the actual freeing while we still hold the locks */
slab->inuse -= cnt;
set_freepointer(s, tail, prior);
slab->freelist = head;
/*
* If the slab is empty, and node's partial list is full,
* it should be discarded anyway no matter it's on full or
* partial list.
*/
if (slab->inuse == 0 && n->nr_partial >= s->min_partial)
slab_free = slab;
if (!prior) {
/* was on full list */
remove_full(s, n, slab);
if (!slab_free) {
add_partial(n, slab, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
} else if (slab_free) {
remove_partial(n, slab);
stat(s, FREE_REMOVE_PARTIAL);
}
*bulk_cnt, cnt);
*bulk_cnt = cnt;
}
if (slab_free) {
/*
* Update the counters while still holding n->list_lock to
* prevent spurious validation warnings
*/
dec_slabs_node(s, slab_nid(slab_free), slab_free->objects);
}
spin_unlock_irqrestore(&n->list_lock, flags);
out:
if (!checks_ok)
slab_fix(s, "Object at 0x%p not freed", object);
if (slab_free) {
stat(s, FREE_SLAB);
free_slab(s, slab_free);
}
return checks_ok;
}
#endif /* CONFIG_SLUB_DEBUG */
#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
#if defined(CONFIG_SLUB_DEBUG) || defined(SLAB_SUPPORTS_SYSFS)
static unsigned long count_partial(struct kmem_cache_node *n,
int (*get_count)(struct slab *))
{
......@@ -2949,12 +2957,12 @@ static unsigned long count_partial(struct kmem_cache_node *n,
spin_unlock_irqrestore(&n->list_lock, flags);
return x;
}
#endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
#endif /* CONFIG_SLUB_DEBUG || SLAB_SUPPORTS_SYSFS */
#ifdef CONFIG_SLUB_DEBUG
static noinline void
slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
{
#ifdef CONFIG_SLUB_DEBUG
static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
int node;
......@@ -2985,8 +2993,11 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
node, nr_slabs, nr_objs, nr_free);
}
#endif
}
#else /* CONFIG_SLUB_DEBUG */
static inline void
slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) { }
#endif
static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags)
{
......@@ -2996,6 +3007,7 @@ static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags)
return true;
}
#ifndef CONFIG_SLUB_TINY
/*
* Check the slab->freelist and either transfer the freelist to the
* per cpu freelist or deactivate the slab.
......@@ -3283,45 +3295,13 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
return p;
}
/*
* If the object has been wiped upon free, make sure it's fully initialized by
* zeroing out freelist pointer.
*/
static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
void *obj)
{
if (unlikely(slab_want_init_on_free(s)) && obj)
memset((void *)((char *)kasan_reset_tag(obj) + s->offset),
0, sizeof(void *));
}
/*
* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
* have the fastpath folded into their functions. So no function call
* overhead for requests that can be satisfied on the fastpath.
*
* The fastpath works by first checking if the lockless freelist can be used.
* If not then __slab_alloc is called for slow processing.
*
* Otherwise we can simply pick the next object from the lockless free list.
*/
static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_lru *lru,
static __always_inline void *__slab_alloc_node(struct kmem_cache *s,
gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
{
void *object;
struct kmem_cache_cpu *c;
struct slab *slab;
unsigned long tid;
struct obj_cgroup *objcg = NULL;
bool init = false;
s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags);
if (!s)
return NULL;
object = kfence_alloc(s, orig_size, gfpflags);
if (unlikely(object))
goto out;
void *object;
redo:
/*
......@@ -3391,22 +3371,95 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_l
stat(s, ALLOC_FASTPATH);
}
return object;
}
#else /* CONFIG_SLUB_TINY */
static void *__slab_alloc_node(struct kmem_cache *s,
gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
{
struct partial_context pc;
struct slab *slab;
void *object;
pc.flags = gfpflags;
pc.slab = &slab;
pc.orig_size = orig_size;
object = get_partial(s, node, &pc);
if (object)
return object;
slab = new_slab(s, gfpflags, node);
if (unlikely(!slab)) {
slab_out_of_memory(s, gfpflags, node);
return NULL;
}
object = alloc_single_from_new_slab(s, slab, orig_size);
return object;
}
#endif /* CONFIG_SLUB_TINY */
/*
* If the object has been wiped upon free, make sure it's fully initialized by
* zeroing out freelist pointer.
*/
static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
void *obj)
{
if (unlikely(slab_want_init_on_free(s)) && obj)
memset((void *)((char *)kasan_reset_tag(obj) + s->offset),
0, sizeof(void *));
}
/*
* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
* have the fastpath folded into their functions. So no function call
* overhead for requests that can be satisfied on the fastpath.
*
* The fastpath works by first checking if the lockless freelist can be used.
* If not then __slab_alloc is called for slow processing.
*
* Otherwise we can simply pick the next object from the lockless free list.
*/
static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
{
void *object;
struct obj_cgroup *objcg = NULL;
bool init = false;
s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags);
if (!s)
return NULL;
object = kfence_alloc(s, orig_size, gfpflags);
if (unlikely(object))
goto out;
object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);
maybe_wipe_obj_freeptr(s, object);
init = slab_want_init_on_alloc(gfpflags, s);
out:
slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init);
/*
* When init equals 'true', like for kzalloc() family, only
* @orig_size bytes might be zeroed instead of s->object_size
*/
slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init, orig_size);
return object;
}
static __always_inline void *slab_alloc(struct kmem_cache *s, struct list_lru *lru,
static __fastpath_inline void *slab_alloc(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags, unsigned long addr, size_t orig_size)
{
return slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, addr, orig_size);
}
static __always_inline
static __fastpath_inline
void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags)
{
......@@ -3448,6 +3501,67 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
}
EXPORT_SYMBOL(kmem_cache_alloc_node);
static noinline void free_to_partial_list(
struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int bulk_cnt,
unsigned long addr)
{
struct kmem_cache_node *n = get_node(s, slab_nid(slab));
struct slab *slab_free = NULL;
int cnt = bulk_cnt;
unsigned long flags;
depot_stack_handle_t handle = 0;
if (s->flags & SLAB_STORE_USER)
handle = set_track_prepare();
spin_lock_irqsave(&n->list_lock, flags);
if (free_debug_processing(s, slab, head, tail, &cnt, addr, handle)) {
void *prior = slab->freelist;
/* Perform the actual freeing while we still hold the locks */
slab->inuse -= cnt;
set_freepointer(s, tail, prior);
slab->freelist = head;
/*
* If the slab is empty, and node's partial list is full,
* it should be discarded anyway no matter it's on full or
* partial list.
*/
if (slab->inuse == 0 && n->nr_partial >= s->min_partial)
slab_free = slab;
if (!prior) {
/* was on full list */
remove_full(s, n, slab);
if (!slab_free) {
add_partial(n, slab, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
} else if (slab_free) {
remove_partial(n, slab);
stat(s, FREE_REMOVE_PARTIAL);
}
}
if (slab_free) {
/*
* Update the counters while still holding n->list_lock to
* prevent spurious validation warnings
*/
dec_slabs_node(s, slab_nid(slab_free), slab_free->objects);
}
spin_unlock_irqrestore(&n->list_lock, flags);
if (slab_free) {
stat(s, FREE_SLAB);
free_slab(s, slab_free);
}
}
/*
* Slow path handling. This may still be called frequently since objects
* have a longer lifetime than the cpu slabs in most processing loads.
......@@ -3473,8 +3587,8 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
if (kfence_free(head))
return;
if (kmem_cache_debug(s)) {
free_debug_processing(s, slab, head, tail, cnt, addr);
if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
free_to_partial_list(s, slab, head, tail, cnt, addr);
return;
}
......@@ -3574,6 +3688,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
discard_slab(s, slab);
}
#ifndef CONFIG_SLUB_TINY
/*
* Fastpath with forced inlining to produce a kfree and kmem_cache_free that
* can perform fastpath freeing without additional function calls.
......@@ -3648,8 +3763,18 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
}
stat(s, FREE_FASTPATH);
}
#else /* CONFIG_SLUB_TINY */
static void do_slab_free(struct kmem_cache *s,
struct slab *slab, void *head, void *tail,
int cnt, unsigned long addr)
{
void *tail_obj = tail ? : head;
__slab_free(s, slab, head, tail_obj, cnt, addr);
}
#endif /* CONFIG_SLUB_TINY */
static __always_inline void slab_free(struct kmem_cache *s, struct slab *slab,
static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab,
void *head, void *tail, void **p, int cnt,
unsigned long addr)
{
......@@ -3782,18 +3907,13 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
/* Note that interrupts must be enabled when calling this function. */
int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
void **p)
#ifndef CONFIG_SLUB_TINY
static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
size_t size, void **p, struct obj_cgroup *objcg)
{
struct kmem_cache_cpu *c;
int i;
struct obj_cgroup *objcg = NULL;
/* memcg and kmem_cache debug support */
s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags);
if (unlikely(!s))
return false;
/*
* Drain objects in the per cpu slab, while disabling local
* IRQs, which protects against PREEMPT and interrupts
......@@ -3847,19 +3967,72 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
local_unlock_irq(&s->cpu_slab->lock);
slub_put_cpu_ptr(s->cpu_slab);
/*
* memcg and kmem_cache debug support and memory initialization.
* Done outside of the IRQ disabled fastpath loop.
*/
slab_post_alloc_hook(s, objcg, flags, size, p,
slab_want_init_on_alloc(flags, s));
return i;
error:
slub_put_cpu_ptr(s->cpu_slab);
slab_post_alloc_hook(s, objcg, flags, i, p, false);
slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
kmem_cache_free_bulk(s, i, p);
return 0;
}
#else /* CONFIG_SLUB_TINY */
static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
size_t size, void **p, struct obj_cgroup *objcg)
{
int i;
for (i = 0; i < size; i++) {
void *object = kfence_alloc(s, s->object_size, flags);
if (unlikely(object)) {
p[i] = object;
continue;
}
p[i] = __slab_alloc_node(s, flags, NUMA_NO_NODE,
_RET_IP_, s->object_size);
if (unlikely(!p[i]))
goto error;
maybe_wipe_obj_freeptr(s, p[i]);
}
return i;
error:
slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
kmem_cache_free_bulk(s, i, p);
return 0;
}
#endif /* CONFIG_SLUB_TINY */
/* Note that interrupts must be enabled when calling this function. */
int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
void **p)
{
int i;
struct obj_cgroup *objcg = NULL;
if (!size)
return 0;
/* memcg and kmem_cache debug support */
s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags);
if (unlikely(!s))
return 0;
i = __kmem_cache_alloc_bulk(s, flags, size, p, objcg);
/*
* memcg and kmem_cache debug support and memory initialization.
* Done outside of the IRQ disabled fastpath loop.
*/
if (i != 0)
slab_post_alloc_hook(s, objcg, flags, size, p,
slab_want_init_on_alloc(flags, s), s->object_size);
return i;
}
EXPORT_SYMBOL(kmem_cache_alloc_bulk);
......@@ -3883,7 +4056,8 @@ EXPORT_SYMBOL(kmem_cache_alloc_bulk);
* take the list_lock.
*/
static unsigned int slub_min_order;
static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
static unsigned int slub_max_order =
IS_ENABLED(CONFIG_SLUB_TINY) ? 1 : PAGE_ALLOC_COSTLY_ORDER;
static unsigned int slub_min_objects;
/*
......@@ -4014,10 +4188,12 @@ init_kmem_cache_node(struct kmem_cache_node *n)
#endif
}
#ifndef CONFIG_SLUB_TINY
static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
{
BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
NR_KMALLOC_TYPES * KMALLOC_SHIFT_HIGH *
sizeof(struct kmem_cache_cpu));
/*
* Must align to double word boundary for the double cmpxchg
......@@ -4033,6 +4209,12 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
return 1;
}
#else
static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
{
return 1;
}
#endif /* CONFIG_SLUB_TINY */
static struct kmem_cache *kmem_cache_node;
......@@ -4095,7 +4277,9 @@ static void free_kmem_cache_nodes(struct kmem_cache *s)
void __kmem_cache_release(struct kmem_cache *s)
{
cache_random_seq_destroy(s);
#ifndef CONFIG_SLUB_TINY
free_percpu(s->cpu_slab);
#endif
free_kmem_cache_nodes(s);
}
......@@ -4202,7 +4386,8 @@ static int calculate_sizes(struct kmem_cache *s)
*/
s->inuse = size;
if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
if (slub_debug_orig_size(s) ||
(flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) ||
s->ctor) {
/*
......@@ -4872,8 +5057,10 @@ void __init kmem_cache_init(void)
void __init kmem_cache_init_late(void)
{
#ifndef CONFIG_SLUB_TINY
flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM, 0);
WARN_ON(!flushwq);
#endif
}
struct kmem_cache *
......@@ -4924,7 +5111,7 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
return 0;
}
#ifdef CONFIG_SYSFS
#ifdef SLAB_SUPPORTS_SYSFS
static int count_inuse(struct slab *slab)
{
return slab->inuse;
......@@ -5182,7 +5369,7 @@ static void process_slab(struct loc_track *t, struct kmem_cache *s,
#endif /* CONFIG_DEBUG_FS */
#endif /* CONFIG_SLUB_DEBUG */
#ifdef CONFIG_SYSFS
#ifdef SLAB_SUPPORTS_SYSFS
enum slab_stat_type {
SL_ALL, /* All slabs */
SL_PARTIAL, /* Only partially allocated slabs */
......@@ -5502,11 +5689,13 @@ static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
SLAB_ATTR_RO(cache_dma);
#endif
#ifdef CONFIG_HARDENED_USERCOPY
static ssize_t usersize_show(struct kmem_cache *s, char *buf)
{
return sysfs_emit(buf, "%u\n", s->usersize);
}
SLAB_ATTR_RO(usersize);
#endif
static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
{
......@@ -5586,7 +5775,21 @@ static ssize_t failslab_show(struct kmem_cache *s, char *buf)
{
return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
}
SLAB_ATTR_RO(failslab);
static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
size_t length)
{
if (s->refcount > 1)
return -EINVAL;
if (buf[0] == '1')
WRITE_ONCE(s->flags, s->flags | SLAB_FAILSLAB);
else
WRITE_ONCE(s->flags, s->flags & ~SLAB_FAILSLAB);
return length;
}
SLAB_ATTR(failslab);
#endif
static ssize_t shrink_show(struct kmem_cache *s, char *buf)
......@@ -5803,7 +6006,9 @@ static struct attribute *slab_attrs[] = {
#ifdef CONFIG_FAILSLAB
&failslab_attr.attr,
#endif
#ifdef CONFIG_HARDENED_USERCOPY
&usersize_attr.attr,
#endif
#ifdef CONFIG_KFENCE
&skip_kfence_attr.attr,
#endif
......@@ -5920,11 +6125,6 @@ static int sysfs_slab_add(struct kmem_cache *s)
struct kset *kset = cache_kset(s);
int unmergeable = slab_unmergeable(s);
if (!kset) {
kobject_init(&s->kobj, &slab_ktype);
return 0;
}
if (!unmergeable && disable_higher_order_debug &&
(slub_debug & DEBUG_METADATA_FLAGS))
unmergeable = 1;
......@@ -6054,9 +6254,8 @@ static int __init slab_sysfs_init(void)
mutex_unlock(&slab_mutex);
return 0;
}
__initcall(slab_sysfs_init);
#endif /* CONFIG_SYSFS */
late_initcall(slab_sysfs_init);
#endif /* SLAB_SUPPORTS_SYSFS */
#if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS)
static int slab_debugfs_show(struct seq_file *seq, void *v)
......
......@@ -157,9 +157,11 @@ static unsigned long read_obj(const char *name)
{
FILE *f = fopen(name, "r");
if (!f)
if (!f) {
buffer[0] = 0;
else {
if (errno == EACCES)
fatal("%s, Try using superuser\n", strerror(errno));
} else {
if (!fgets(buffer, sizeof(buffer), f))
buffer[0] = 0;
fclose(f);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment