Commit 893660b0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'slab-for-6.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab

Pull slab updates from Vlastimil Babka:

 - SLOB deprecation and SLUB_TINY

   The SLOB allocator adds maintenance burden and stands in the way of
   API improvements [1]. Deprecate it by renaming the config option (to
   make users notice) to CONFIG_SLOB_DEPRECATED with updated help text.
   SLUB should be used instead as SLAB will be the next on the removal
   list.

   Based on reports from a riscv k210 board with 8MB RAM, add a
   CONFIG_SLUB_TINY option to minimize SLUB's memory usage at the
   expense of scalability. This has resolved the k210 regression [2] so
   in case there are no others (that wouldn't be resolvable by further
   tweaks to SLUB_TINY) plan is to remove SLOB in a few cycles.

   Existing defconfigs with CONFIG_SLOB are converted to
   CONFIG_SLUB_TINY.

 - kmalloc() slub_debug redzone improvements

   A series from Feng Tang that builds on the tracking or requested size
   for kmalloc() allocations (for caches with debugging enabled) added
   in 6.1, to make redzone checks consider the requested size and not
   the rounded up one, in order to catch more subtle buffer overruns.
   Includes new slub_kunit test.

 - struct slab fields reordering to accomodate larger rcu_head

   RCU folks would like to grow rcu_head with debugging options, which
   breaks current struct slab layout's assumptions, so reorganize it to
   make this possible.

 - Miscellaneous improvements/fixes:
     - __alloc_size checking compiler workaround (Kees Cook)
     - Optimize and cleanup SLUB's sysfs init (Rasmus Villemoes)
     - Make SLAB compatible with PROVE_RAW_LOCK_NESTING (Jiri Kosina)
     - Correct SLUB's percpu allocation estimates (Baoquan He)
     - Re-enableS LUB's run-time failslab sysfs control (Alexander Atanasov)
     - Make tools/vm/slabinfo more user friendly when not run as root (Rong Tao)
     - Dead code removal in SLUB (Hyeonggon Yoo)

* tag 'slab-for-6.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab: (31 commits)
  mm, slob: rename CONFIG_SLOB to CONFIG_SLOB_DEPRECATED
  mm, slub: don't aggressively inline with CONFIG_SLUB_TINY
  mm, slub: remove percpu slabs with CONFIG_SLUB_TINY
  mm, slub: split out allocations from pre/post hooks
  mm/slub, kunit: Add a test case for kmalloc redzone check
  mm/slub, kunit: add SLAB_SKIP_KFENCE flag for cache creation
  mm, slub: refactor free debug processing
  mm, slab: ignore SLAB_RECLAIM_ACCOUNT with CONFIG_SLUB_TINY
  mm, slub: don't create kmalloc-rcl caches with CONFIG_SLUB_TINY
  mm, slub: lower the default slub_max_order with CONFIG_SLUB_TINY
  mm, slub: retain no free slabs on partial list with CONFIG_SLUB_TINY
  mm, slub: disable SYSFS support with CONFIG_SLUB_TINY
  mm, slub: add CONFIG_SLUB_TINY
  mm, slab: ignore hardened usercopy parameters when disabled
  slab: Remove special-casing of const 0 size allocations
  slab: Clean up SLOB vs kmalloc() definition
  mm/sl[au]b: rearrange struct slab fields to allow larger rcu_head
  mm/migrate: make isolate_movable_page() skip slab pages
  mm/slab: move and adjust kernel-doc for kmem_cache_alloc
  mm/slub, percpu: correct the calculation of early percpu allocation size
  ...
parents 98d0052d dc19745a
......@@ -116,6 +116,8 @@ options from the ``slub_debug`` parameter translate to the following files::
T trace
A failslab
failslab file is writable, so writing 1 or 0 will enable or disable
the option at runtime. Write returns -EINVAL if cache is an alias.
Careful with tracing: It may spew out lots of information and never stop if
used on the wrong slab.
......
......@@ -14,7 +14,8 @@ CONFIG_ARCH_EDB7211=y
CONFIG_ARCH_P720T=y
CONFIG_AEABI=y
# CONFIG_COREDUMP is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
......
......@@ -13,7 +13,8 @@ CONFIG_CMDLINE="noinitrd root=/dev/mtdblock2 rootfstype=jffs2 fbcon=rotate:1"
CONFIG_FPE_NWFPE=y
CONFIG_PM=y
# CONFIG_SWAP is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
......
......@@ -25,7 +25,8 @@ CONFIG_ARM_CLPS711X_CPUIDLE=y
CONFIG_JUMP_LABEL=y
CONFIG_PARTITION_ADVANCED=y
# CONFIG_COREDUMP is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
CONFIG_MTD_BLOCK=y
......
......@@ -42,7 +42,8 @@ CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_BINFMT_MISC=y
# CONFIG_SWAP is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
# CONFIG_VM_EVENT_COUNTERS is not set
CONFIG_NET=y
CONFIG_PACKET=y
......
......@@ -49,7 +49,8 @@ CONFIG_PARTITION_ADVANCED=y
CONFIG_LDM_PARTITION=y
CONFIG_CMDLINE_PARTITION=y
CONFIG_BINFMT_MISC=y
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
# CONFIG_COMPACTION is not set
CONFIG_NET=y
CONFIG_PACKET=y
......
......@@ -19,7 +19,8 @@ CONFIG_FPE_NWFPE=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_SWAP is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
......
......@@ -26,7 +26,8 @@ CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
# CONFIG_BLOCK is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
# CONFIG_COMPAT_BRK is not set
# CONFIG_VM_EVENT_COUNTERS is not set
CONFIG_NET=y
......
......@@ -10,7 +10,8 @@ CONFIG_EXPERT=y
# CONFIG_AIO is not set
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_MODULES=y
# CONFIG_BLOCK is not set
CONFIG_OPENRISC_BUILTIN_DTB="or1ksim"
......
......@@ -16,7 +16,8 @@ CONFIG_EXPERT=y
# CONFIG_AIO is not set
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_MODULES=y
# CONFIG_BLOCK is not set
CONFIG_OPENRISC_BUILTIN_DTB="simple_smp"
......
......@@ -25,7 +25,8 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
# CONFIG_MMU is not set
CONFIG_SOC_CANAAN=y
CONFIG_NONPORTABLE=y
......
......@@ -17,7 +17,8 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
# CONFIG_MMU is not set
CONFIG_SOC_CANAAN=y
CONFIG_NONPORTABLE=y
......
......@@ -22,7 +22,8 @@ CONFIG_EXPERT=y
# CONFIG_KALLSYMS is not set
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
# CONFIG_MMU is not set
CONFIG_SOC_VIRT=y
CONFIG_NONPORTABLE=y
......
......@@ -10,7 +10,8 @@ CONFIG_USER_NS=y
CONFIG_PID_NS=y
CONFIG_BLK_DEV_INITRD=y
# CONFIG_AIO is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_PROFILING=y
CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
......
......@@ -11,7 +11,8 @@ CONFIG_USER_NS=y
CONFIG_PID_NS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_KALLSYMS_ALL=y
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_PROFILING=y
CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
......
......@@ -21,7 +21,8 @@ CONFIG_BLK_DEV_INITRD=y
CONFIG_KALLSYMS_ALL=y
# CONFIG_ELF_CORE is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_PROFILING=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
......
......@@ -9,7 +9,8 @@ CONFIG_LOG_BUF_SHIFT=14
# CONFIG_FUTEX is not set
# CONFIG_EPOLL is not set
# CONFIG_SHMEM is not set
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_CPU_SUBTYPE_SH7706=y
CONFIG_MEMORY_START=0x0c000000
......
......@@ -20,7 +20,8 @@ CONFIG_USER_NS=y
CONFIG_PID_NS=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_KALLSYMS_ALL=y
CONFIG_SLOB=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
CONFIG_PROFILING=y
CONFIG_KPROBES=y
CONFIG_MODULES=y
......
......@@ -302,7 +302,7 @@ static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
#ifdef CONFIG_KASAN_GENERIC
size_t kasan_metadata_size(struct kmem_cache *cache);
size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object);
slab_flags_t kasan_never_merge(void);
void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
slab_flags_t *flags);
......@@ -315,7 +315,8 @@ void kasan_record_aux_stack_noalloc(void *ptr);
#else /* CONFIG_KASAN_GENERIC */
/* Tag-based KASAN modes do not use per-object metadata. */
static inline size_t kasan_metadata_size(struct kmem_cache *cache)
static inline size_t kasan_metadata_size(struct kmem_cache *cache,
bool in_object)
{
return 0;
}
......
......@@ -42,7 +42,7 @@
* larger than PERCPU_DYNAMIC_EARLY_SIZE.
*/
#define PERCPU_DYNAMIC_EARLY_SLOTS 128
#define PERCPU_DYNAMIC_EARLY_SIZE (12 << 10)
#define PERCPU_DYNAMIC_EARLY_SIZE (20 << 10)
/*
* PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy
......
......@@ -140,7 +140,11 @@
/* The following flags affect the page allocator grouping pages by mobility */
/* Objects are reclaimable */
#ifndef CONFIG_SLUB_TINY
#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U)
#else
#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0)
#endif
#define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */
/*
......@@ -347,12 +351,17 @@ enum kmalloc_cache_type {
#endif
#ifndef CONFIG_MEMCG_KMEM
KMALLOC_CGROUP = KMALLOC_NORMAL,
#else
KMALLOC_CGROUP,
#endif
#ifdef CONFIG_SLUB_TINY
KMALLOC_RECLAIM = KMALLOC_NORMAL,
#else
KMALLOC_RECLAIM,
#endif
#ifdef CONFIG_ZONE_DMA
KMALLOC_DMA,
#endif
#ifdef CONFIG_MEMCG_KMEM
KMALLOC_CGROUP,
#endif
NR_KMALLOC_TYPES
};
......@@ -452,7 +461,18 @@ static_assert(PAGE_SHIFT <= 20);
#endif /* !CONFIG_SLOB */
void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1);
void *kmem_cache_alloc(struct kmem_cache *s, gfp_t flags) __assume_slab_alignment __malloc;
/**
* kmem_cache_alloc - Allocate an object
* @cachep: The cache to allocate from.
* @flags: See kmalloc().
*
* Allocate an object from this cache.
* See kmem_cache_zalloc() for a shortcut of adding __GFP_ZERO to flags.
*
* Return: pointer to the new object or %NULL in case of error
*/
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) __assume_slab_alignment __malloc;
void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags) __assume_slab_alignment __malloc;
void kmem_cache_free(struct kmem_cache *s, void *objp);
......@@ -494,9 +514,9 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align
__alloc_size(1);
/**
* kmalloc - allocate memory
* kmalloc - allocate kernel memory
* @size: how many bytes of memory are required.
* @flags: the type of memory to allocate.
* @flags: describe the allocation context
*
* kmalloc is the normal method of allocating memory
* for objects smaller than page size in the kernel.
......@@ -523,12 +543,12 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align
* %GFP_ATOMIC
* Allocation will not sleep. May use emergency pools.
*
* %GFP_HIGHUSER
* Allocate memory from high memory on behalf of user.
*
* Also it is possible to set different flags by OR'ing
* in one or more of the following additional @flags:
*
* %__GFP_ZERO
* Zero the allocated memory before returning. Also see kzalloc().
*
* %__GFP_HIGH
* This allocation has high priority and may use emergency pools.
*
......@@ -547,42 +567,42 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align
* Try really hard to succeed the allocation but fail
* eventually.
*/
#ifndef CONFIG_SLOB
static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
{
if (__builtin_constant_p(size)) {
#ifndef CONFIG_SLOB
if (__builtin_constant_p(size) && size) {
unsigned int index;
#endif
if (size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large(size, flags);
#ifndef CONFIG_SLOB
index = kmalloc_index(size);
if (!index)
return ZERO_SIZE_PTR;
index = kmalloc_index(size);
return kmalloc_trace(
kmalloc_caches[kmalloc_type(flags)][index],
flags, size);
#endif
}
return __kmalloc(size, flags);
}
#else
static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
{
if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large(size, flags);
return __kmalloc(size, flags);
}
#endif
#ifndef CONFIG_SLOB
static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
{
if (__builtin_constant_p(size)) {
if (__builtin_constant_p(size) && size) {
unsigned int index;
if (size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large_node(size, flags, node);
index = kmalloc_index(size);
if (!index)
return ZERO_SIZE_PTR;
return kmalloc_node_trace(
kmalloc_caches[kmalloc_type(flags)][index],
flags, node, size);
......
......@@ -80,8 +80,10 @@ struct kmem_cache {
unsigned int *random_seq;
#endif
#ifdef CONFIG_HARDENED_USERCOPY
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
#endif
struct kmem_cache_node *node[MAX_NUMNODES];
};
......
......@@ -41,6 +41,7 @@ enum stat_item {
CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
NR_SLUB_STAT_ITEMS };
#ifndef CONFIG_SLUB_TINY
/*
* When changing the layout, make sure freelist and tid are still compatible
* with this_cpu_cmpxchg_double() alignment requirements.
......@@ -57,6 +58,7 @@ struct kmem_cache_cpu {
unsigned stat[NR_SLUB_STAT_ITEMS];
#endif
};
#endif /* CONFIG_SLUB_TINY */
#ifdef CONFIG_SLUB_CPU_PARTIAL
#define slub_percpu_partial(c) ((c)->partial)
......@@ -88,7 +90,9 @@ struct kmem_cache_order_objects {
* Slab cache management.
*/
struct kmem_cache {
#ifndef CONFIG_SLUB_TINY
struct kmem_cache_cpu __percpu *cpu_slab;
#endif
/* Used for retrieving partial slabs, etc. */
slab_flags_t flags;
unsigned long min_partial;
......@@ -136,13 +140,15 @@ struct kmem_cache {
struct kasan_cache kasan_info;
#endif
#ifdef CONFIG_HARDENED_USERCOPY
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
#endif
struct kmem_cache_node *node[MAX_NUMNODES];
};
#ifdef CONFIG_SYSFS
#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY)
#define SLAB_SUPPORTS_SYSFS
void sysfs_slab_unlink(struct kmem_cache *);
void sysfs_slab_release(struct kmem_cache *);
......
......@@ -7,5 +7,6 @@ CONFIG_KERNEL_XZ=y
# CONFIG_KERNEL_LZO is not set
# CONFIG_KERNEL_LZ4 is not set
# CONFIG_SLAB is not set
# CONFIG_SLUB is not set
CONFIG_SLOB=y
# CONFIG_SLOB_DEPRECATED is not set
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
......@@ -37,7 +37,7 @@ menuconfig KASAN
(HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS)) && \
CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \
HAVE_ARCH_KASAN_HW_TAGS
depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB)
depends on (SLUB && SYSFS && !SLUB_TINY) || (SLAB && !DEBUG_SLAB)
select STACKDEPOT_ALWAYS_INIT
help
Enables KASAN (Kernel Address Sanitizer) - a dynamic memory safety
......
......@@ -9,10 +9,25 @@
static struct kunit_resource resource;
static int slab_errors;
/*
* Wrapper function for kmem_cache_create(), which reduces 2 parameters:
* 'align' and 'ctor', and sets SLAB_SKIP_KFENCE flag to avoid getting an
* object from kfence pool, where the operation could be caught by both
* our test and kfence sanity check.
*/
static struct kmem_cache *test_kmem_cache_create(const char *name,
unsigned int size, slab_flags_t flags)
{
struct kmem_cache *s = kmem_cache_create(name, size, 0,
(flags | SLAB_NO_USER_FLAGS), NULL);
s->flags |= SLAB_SKIP_KFENCE;
return s;
}
static void test_clobber_zone(struct kunit *test)
{
struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_alloc", 64, 0,
SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL);
struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_alloc", 64,
SLAB_RED_ZONE);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
kasan_disable_current();
......@@ -29,8 +44,8 @@ static void test_clobber_zone(struct kunit *test)
#ifndef CONFIG_KASAN
static void test_next_pointer(struct kunit *test)
{
struct kmem_cache *s = kmem_cache_create("TestSlub_next_ptr_free", 64, 0,
SLAB_POISON|SLAB_NO_USER_FLAGS, NULL);
struct kmem_cache *s = test_kmem_cache_create("TestSlub_next_ptr_free",
64, SLAB_POISON);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
unsigned long tmp;
unsigned long *ptr_addr;
......@@ -74,8 +89,8 @@ static void test_next_pointer(struct kunit *test)
static void test_first_word(struct kunit *test)
{
struct kmem_cache *s = kmem_cache_create("TestSlub_1th_word_free", 64, 0,
SLAB_POISON|SLAB_NO_USER_FLAGS, NULL);
struct kmem_cache *s = test_kmem_cache_create("TestSlub_1th_word_free",
64, SLAB_POISON);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
kmem_cache_free(s, p);
......@@ -89,8 +104,8 @@ static void test_first_word(struct kunit *test)
static void test_clobber_50th_byte(struct kunit *test)
{
struct kmem_cache *s = kmem_cache_create("TestSlub_50th_word_free", 64, 0,
SLAB_POISON|SLAB_NO_USER_FLAGS, NULL);
struct kmem_cache *s = test_kmem_cache_create("TestSlub_50th_word_free",
64, SLAB_POISON);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
kmem_cache_free(s, p);
......@@ -105,8 +120,8 @@ static void test_clobber_50th_byte(struct kunit *test)
static void test_clobber_redzone_free(struct kunit *test)
{
struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_free", 64, 0,
SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL);
struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_free", 64,
SLAB_RED_ZONE);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
kasan_disable_current();
......@@ -120,6 +135,27 @@ static void test_clobber_redzone_free(struct kunit *test)
kmem_cache_destroy(s);
}
static void test_kmalloc_redzone_access(struct kunit *test)
{
struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_kmalloc", 32,
SLAB_KMALLOC|SLAB_STORE_USER|SLAB_RED_ZONE);
u8 *p = kmalloc_trace(s, GFP_KERNEL, 18);
kasan_disable_current();
/* Suppress the -Warray-bounds warning */
OPTIMIZER_HIDE_VAR(p);
p[18] = 0xab;
p[19] = 0xab;
validate_slab_cache(s);
KUNIT_EXPECT_EQ(test, 2, slab_errors);
kasan_enable_current();
kmem_cache_free(s, p);
kmem_cache_destroy(s);
}
static int test_init(struct kunit *test)
{
slab_errors = 0;
......@@ -139,6 +175,7 @@ static struct kunit_case test_cases[] = {
#endif
KUNIT_CASE(test_clobber_redzone_free),
KUNIT_CASE(test_kmalloc_redzone_access),
{}
};
......
......@@ -219,17 +219,43 @@ config SLUB
and has enhanced diagnostics. SLUB is the default choice for
a slab allocator.
config SLOB
config SLOB_DEPRECATED
depends on EXPERT
bool "SLOB (Simple Allocator)"
bool "SLOB (Simple Allocator - DEPRECATED)"
depends on !PREEMPT_RT
help
Deprecated and scheduled for removal in a few cycles. SLUB
recommended as replacement. CONFIG_SLUB_TINY can be considered
on systems with 16MB or less RAM.
If you need SLOB to stay, please contact linux-mm@kvack.org and
people listed in the SLAB ALLOCATOR section of MAINTAINERS file,
with your use case.
SLOB replaces the stock allocator with a drastically simpler
allocator. SLOB is generally more space efficient but
does not perform as well on large systems.
endchoice
config SLOB
bool
default y
depends on SLOB_DEPRECATED
config SLUB_TINY
bool "Configure SLUB for minimal memory footprint"
depends on SLUB && EXPERT
select SLAB_MERGE_DEFAULT
help
Configures the SLUB allocator in a way to achieve minimal memory
footprint, sacrificing scalability, debugging and other features.
This is intended only for the smallest system that had used the
SLOB allocator and is not recommended for systems with more than
16MB RAM.
If unsure, say N.
config SLAB_MERGE_DEFAULT
bool "Allow slab caches to be merged"
default y
......@@ -247,7 +273,7 @@ config SLAB_MERGE_DEFAULT
config SLAB_FREELIST_RANDOM
bool "Randomize slab freelist"
depends on SLAB || SLUB
depends on SLAB || (SLUB && !SLUB_TINY)
help
Randomizes the freelist order used on creating new pages. This
security feature reduces the predictability of the kernel slab
......@@ -255,7 +281,7 @@ config SLAB_FREELIST_RANDOM
config SLAB_FREELIST_HARDENED
bool "Harden slab freelist metadata"
depends on SLAB || SLUB
depends on SLAB || (SLUB && !SLUB_TINY)
help
Many kernel heap attacks try to target slab cache metadata and
other infrastructure. This options makes minor performance
......@@ -267,7 +293,7 @@ config SLAB_FREELIST_HARDENED
config SLUB_STATS
default n
bool "Enable SLUB performance statistics"
depends on SLUB && SYSFS
depends on SLUB && SYSFS && !SLUB_TINY
help
SLUB statistics are useful to debug SLUBs allocation behavior in
order find ways to optimize the allocator. This should never be
......@@ -279,7 +305,7 @@ config SLUB_STATS
config SLUB_CPU_PARTIAL
default y
depends on SLUB && SMP
depends on SLUB && SMP && !SLUB_TINY
bool "SLUB per cpu partial cache"
help
Per cpu partial caches accelerate objects allocation and freeing
......
......@@ -56,7 +56,7 @@ config DEBUG_SLAB
config SLUB_DEBUG
default y
bool "Enable SLUB debugging support" if EXPERT
depends on SLUB && SYSFS
depends on SLUB && SYSFS && !SLUB_TINY
select STACKDEPOT if STACKTRACE_SUPPORT
help
SLUB has extensive debug support features. Disabling these can
......
......@@ -450,15 +450,22 @@ void kasan_init_object_meta(struct kmem_cache *cache, const void *object)
__memset(alloc_meta, 0, sizeof(*alloc_meta));
}
size_t kasan_metadata_size(struct kmem_cache *cache)
size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object)
{
struct kasan_cache *info = &cache->kasan_info;
if (!kasan_requires_meta())
return 0;
return (cache->kasan_info.alloc_meta_offset ?
sizeof(struct kasan_alloc_meta) : 0) +
((cache->kasan_info.free_meta_offset &&
cache->kasan_info.free_meta_offset != KASAN_NO_FREE_META) ?
sizeof(struct kasan_free_meta) : 0);
if (in_object)
return (info->free_meta_offset ?
0 : sizeof(struct kasan_free_meta));
else
return (info->alloc_meta_offset ?
sizeof(struct kasan_alloc_meta) : 0) +
((info->free_meta_offset &&
info->free_meta_offset != KASAN_NO_FREE_META) ?
sizeof(struct kasan_free_meta) : 0);
}
static void __kasan_record_aux_stack(void *addr, bool can_alloc)
......
......@@ -74,13 +74,22 @@ int isolate_movable_page(struct page *page, isolate_mode_t mode)
if (unlikely(!get_page_unless_zero(page)))
goto out;
if (unlikely(PageSlab(page)))
goto out_putpage;
/* Pairs with smp_wmb() in slab freeing, e.g. SLUB's __free_slab() */
smp_rmb();
/*
* Check PageMovable before holding a PG_lock because page's owner
* assumes anybody doesn't touch PG_lock of newly allocated page
* so unconditionally grabbing the lock ruins page's owner side.
* Check movable flag before taking the page lock because
* we use non-atomic bitops on newly allocated page flags so
* unconditionally grabbing the lock ruins page's owner side.
*/
if (unlikely(!__PageMovable(page)))
goto out_putpage;
/* Pairs with smp_wmb() in slab allocation, e.g. SLUB's alloc_slab_page() */
smp_rmb();
if (unlikely(PageSlab(page)))
goto out_putpage;
/*
* As movable pages are not isolated from LRU lists, concurrent
* compaction threads can race against page migration functions
......
This diff is collapsed.
......@@ -11,37 +11,43 @@ struct slab {
#if defined(CONFIG_SLAB)
struct kmem_cache *slab_cache;
union {
struct list_head slab_list;
struct {
struct list_head slab_list;
void *freelist; /* array of free object indexes */
void *s_mem; /* first object */
};
struct rcu_head rcu_head;
};
struct kmem_cache *slab_cache;
void *freelist; /* array of free object indexes */
void *s_mem; /* first object */
unsigned int active;
#elif defined(CONFIG_SLUB)
union {
struct list_head slab_list;
struct rcu_head rcu_head;
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct {
struct slab *next;
int slabs; /* Nr of slabs left */
};
#endif
};
struct kmem_cache *slab_cache;
/* Double-word boundary */
void *freelist; /* first free object */
union {
unsigned long counters;
struct {
unsigned inuse:16;
unsigned objects:15;
unsigned frozen:1;
union {
struct list_head slab_list;
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct {
struct slab *next;
int slabs; /* Nr of slabs left */
};
#endif
};
/* Double-word boundary */
void *freelist; /* first free object */
union {
unsigned long counters;
struct {
unsigned inuse:16;
unsigned objects:15;
unsigned frozen:1;
};
};
};
struct rcu_head rcu_head;
};
unsigned int __unused;
......@@ -66,9 +72,10 @@ struct slab {
#define SLAB_MATCH(pg, sl) \
static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl))
SLAB_MATCH(flags, __page_flags);
SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */
#ifndef CONFIG_SLOB
SLAB_MATCH(rcu_head, rcu_head);
SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */
#else
SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */
#endif
SLAB_MATCH(_refcount, __page_refcount);
#ifdef CONFIG_MEMCG
......@@ -76,6 +83,9 @@ SLAB_MATCH(memcg_data, memcg_data);
#endif
#undef SLAB_MATCH
static_assert(sizeof(struct slab) <= sizeof(struct page));
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && defined(CONFIG_SLUB)
static_assert(IS_ALIGNED(offsetof(struct slab, freelist), 2*sizeof(void *)));
#endif
/**
* folio_slab - Converts from folio to slab.
......@@ -207,8 +217,6 @@ struct kmem_cache {
unsigned int size; /* The aligned/padded/added on size */
unsigned int align; /* Alignment as calculated */
slab_flags_t flags; /* Active flags on the slab */
unsigned int useroffset;/* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
const char *name; /* Slab name for sysfs */
int refcount; /* Use counter */
void (*ctor)(void *); /* Called on object slot creation */
......@@ -336,7 +344,8 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
SLAB_ACCOUNT)
#elif defined(CONFIG_SLUB)
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
SLAB_TEMPORARY | SLAB_ACCOUNT | SLAB_NO_USER_FLAGS)
SLAB_TEMPORARY | SLAB_ACCOUNT | \
SLAB_NO_USER_FLAGS | SLAB_KMALLOC)
#else
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE)
#endif
......@@ -356,6 +365,7 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
SLAB_RECLAIM_ACCOUNT | \
SLAB_TEMPORARY | \
SLAB_ACCOUNT | \
SLAB_KMALLOC | \
SLAB_NO_USER_FLAGS)
bool __kmem_cache_empty(struct kmem_cache *);
......@@ -720,12 +730,26 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
static inline void slab_post_alloc_hook(struct kmem_cache *s,
struct obj_cgroup *objcg, gfp_t flags,
size_t size, void **p, bool init)
size_t size, void **p, bool init,
unsigned int orig_size)
{
unsigned int zero_size = s->object_size;
size_t i;
flags &= gfp_allowed_mask;
/*
* For kmalloc object, the allocated memory size(object_size) is likely
* larger than the requested size(orig_size). If redzone check is
* enabled for the extra space, don't zero it, as it will be redzoned
* soon. The redzone operation for this extra space could be seen as a
* replacement of current poisoning under certain debug option, and
* won't break other sanity checks.
*/
if (kmem_cache_debug_flags(s, SLAB_STORE_USER | SLAB_RED_ZONE) &&
(s->flags & SLAB_KMALLOC))
zero_size = orig_size;
/*
* As memory initialization might be integrated into KASAN,
* kasan_slab_alloc and initialization memset must be
......@@ -736,7 +760,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
for (i = 0; i < size; i++) {
p[i] = kasan_slab_alloc(s, p[i], flags, init);
if (p[i] && init && !kasan_has_integrated_init())
memset(p[i], 0, s->object_size);
memset(p[i], 0, zero_size);
kmemleak_alloc_recursive(p[i], s->object_size, 1,
s->flags, flags);
kmsan_slab_alloc(s, p[i], flags);
......@@ -750,9 +774,8 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
* The slab lists for all objects.
*/
struct kmem_cache_node {
spinlock_t list_lock;
#ifdef CONFIG_SLAB
raw_spinlock_t list_lock;
struct list_head slabs_partial; /* partial list first, better asm code */
struct list_head slabs_full;
struct list_head slabs_free;
......@@ -768,6 +791,7 @@ struct kmem_cache_node {
#endif
#ifdef CONFIG_SLUB
spinlock_t list_lock;
unsigned long nr_partial;
struct list_head partial;
#ifdef CONFIG_SLUB_DEBUG
......@@ -871,4 +895,8 @@ void __check_heap_object(const void *ptr, unsigned long n,
}
#endif
#ifdef CONFIG_SLUB_DEBUG
void skip_orig_size_check(struct kmem_cache *s, const void *object);
#endif
#endif /* MM_SLAB_H */
......@@ -143,8 +143,10 @@ int slab_unmergeable(struct kmem_cache *s)
if (s->ctor)
return 1;
#ifdef CONFIG_HARDENED_USERCOPY
if (s->usersize)
return 1;
#endif
/*
* We may have set a slab to be unmergeable during bootstrap.
......@@ -223,8 +225,10 @@ static struct kmem_cache *create_cache(const char *name,
s->size = s->object_size = object_size;
s->align = align;
s->ctor = ctor;
#ifdef CONFIG_HARDENED_USERCOPY
s->useroffset = useroffset;
s->usersize = usersize;
#endif
err = __kmem_cache_create(s, flags);
if (err)
......@@ -317,7 +321,8 @@ kmem_cache_create_usercopy(const char *name,
flags &= CACHE_CREATE_MASK;
/* Fail closed on bad usersize of useroffset values. */
if (WARN_ON(!usersize && useroffset) ||
if (!IS_ENABLED(CONFIG_HARDENED_USERCOPY) ||
WARN_ON(!usersize && useroffset) ||
WARN_ON(size < usersize || size - usersize < useroffset))
usersize = useroffset = 0;
......@@ -595,8 +600,8 @@ void kmem_dump_obj(void *object)
ptroffset = ((char *)object - (char *)kp.kp_objp) - kp.kp_data_offset;
pr_cont(" pointer offset %lu", ptroffset);
}
if (kp.kp_slab_cache && kp.kp_slab_cache->usersize)
pr_cont(" size %u", kp.kp_slab_cache->usersize);
if (kp.kp_slab_cache && kp.kp_slab_cache->object_size)
pr_cont(" size %u", kp.kp_slab_cache->object_size);
if (kp.kp_ret)
pr_cont(" allocated at %pS\n", kp.kp_ret);
else
......@@ -640,8 +645,10 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name,
align = max(align, size);
s->align = calculate_alignment(flags, align, size);
#ifdef CONFIG_HARDENED_USERCOPY
s->useroffset = useroffset;
s->usersize = usersize;
#endif
err = __kmem_cache_create(s, flags);
......@@ -766,10 +773,16 @@ EXPORT_SYMBOL(kmalloc_size_roundup);
#define KMALLOC_CGROUP_NAME(sz)
#endif
#ifndef CONFIG_SLUB_TINY
#define KMALLOC_RCL_NAME(sz) .name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #sz,
#else
#define KMALLOC_RCL_NAME(sz)
#endif
#define INIT_KMALLOC_INFO(__size, __short_size) \
{ \
.name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \
.name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #__short_size, \
KMALLOC_RCL_NAME(__short_size) \
KMALLOC_CGROUP_NAME(__short_size) \
KMALLOC_DMA_NAME(__short_size) \
.size = __size, \
......@@ -855,7 +868,7 @@ void __init setup_kmalloc_cache_index_table(void)
static void __init
new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
{
if (type == KMALLOC_RECLAIM) {
if ((KMALLOC_RECLAIM != KMALLOC_NORMAL) && (type == KMALLOC_RECLAIM)) {
flags |= SLAB_RECLAIM_ACCOUNT;
} else if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_CGROUP)) {
if (mem_cgroup_kmem_disabled()) {
......@@ -1037,6 +1050,10 @@ size_t __ksize(const void *object)
return folio_size(folio);
}
#ifdef CONFIG_SLUB_DEBUG
skip_orig_size_check(folio_slab(folio)->slab_cache, object);
#endif
return slab_ksize(folio_slab(folio)->slab_cache);
}
......
This diff is collapsed.
......@@ -157,9 +157,11 @@ static unsigned long read_obj(const char *name)
{
FILE *f = fopen(name, "r");
if (!f)
if (!f) {
buffer[0] = 0;
else {
if (errno == EACCES)
fatal("%s, Try using superuser\n", strerror(errno));
} else {
if (!fgets(buffer, sizeof(buffer), f))
buffer[0] = 0;
fclose(f);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment