Commit bdf56c75 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'slab-for-6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab

Pull slab updates from Vlastimil Babka:
 "This time it's mostly refactoring and improving APIs for slab users in
  the kernel, along with some debugging improvements.

   - kmem_cache_create() refactoring (Christian Brauner)

     Over the years have been growing new parameters to
     kmem_cache_create() where most of them are needed only for a small
     number of caches - most recently the rcu_freeptr_offset parameter.

     To avoid adding new parameters to kmem_cache_create() and adjusting
     all its callers, or creating new wrappers such as
     kmem_cache_create_rcu(), we can now pass extra parameters using the
     new struct kmem_cache_args. Not explicitly initialized fields
     default to values interpreted as unused.

     kmem_cache_create() is for now a wrapper that works both with the
     new form: kmem_cache_create(name, object_size, args, flags) and the
     legacy form: kmem_cache_create(name, object_size, align, flags,
     ctor)

   - kmem_cache_destroy() waits for kfree_rcu()'s in flight (Vlastimil
     Babka, Uladislau Rezki)

     Since SLOB removal, kfree() is allowed for freeing objects
     allocated by kmem_cache_create(). By extension kfree_rcu() as
     allowed as well, which can allow converting simple call_rcu()
     callbacks that only do kmem_cache_free(), as there was never a
     kmem_cache_free_rcu() variant. However, for caches that can be
     destroyed e.g. on module removal, the cache owners knew to issue
     rcu_barrier() first to wait for the pending call_rcu()'s, and this
     is not sufficient for pending kfree_rcu()'s due to its internal
     batching optimizations. Ulad has provided a new
     kvfree_rcu_barrier() and to make the usage less error-prone,
     kmem_cache_destroy() calls it. Additionally, destroying
     SLAB_TYPESAFE_BY_RCU caches now again issues rcu_barrier()
     synchronously instead of using an async work, because the past
     motivation for async work no longer applies. Users of custom
     call_rcu() callbacks should however keep calling rcu_barrier()
     before cache destruction.

   - Debugging use-after-free in SLAB_TYPESAFE_BY_RCU caches (Jann Horn)

     Currently, KASAN cannot catch UAFs in such caches as it is legal to
     access them within a grace period, and we only track the grace
     period when trying to free the underlying slab page. The new
     CONFIG_SLUB_RCU_DEBUG option changes the freeing of individual
     object to be RCU-delayed, after which KASAN can poison them.

   - Delayed memcg charging (Shakeel Butt)

     In some cases, the memcg is uknown at allocation time, such as
     receiving network packets in softirq context. With
     kmem_cache_charge() these may be now charged later when the user
     and its memcg is known.

   - Misc fixes and improvements (Pedro Falcato, Axel Rasmussen,
     Christoph Lameter, Yan Zhen, Peng Fan, Xavier)"

* tag 'slab-for-6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab: (34 commits)
  mm, slab: restore kerneldoc for kmem_cache_create()
  io_uring: port to struct kmem_cache_args
  slab: make __kmem_cache_create() static inline
  slab: make kmem_cache_create_usercopy() static inline
  slab: remove kmem_cache_create_rcu()
  file: port to struct kmem_cache_args
  slab: create kmem_cache_create() compatibility layer
  slab: port KMEM_CACHE_USERCOPY() to struct kmem_cache_args
  slab: port KMEM_CACHE() to struct kmem_cache_args
  slab: remove rcu_freeptr_offset from struct kmem_cache
  slab: pass struct kmem_cache_args to do_kmem_cache_create()
  slab: pull kmem_cache_open() into do_kmem_cache_create()
  slab: pass struct kmem_cache_args to create_cache()
  slab: port kmem_cache_create_usercopy() to struct kmem_cache_args
  slab: port kmem_cache_create_rcu() to struct kmem_cache_args
  slab: port kmem_cache_create() to struct kmem_cache_args
  slab: add struct kmem_cache_args
  slab: s/__kmem_cache_create/do_kmem_cache_create/g
  memcg: add charging of already allocated slab objects
  mm/slab: Optimize the code logic in find_mergeable()
  ...
parents efdfcd40 ecc4d6af
......@@ -521,9 +521,14 @@ EXPORT_SYMBOL(__fput_sync);
void __init files_init(void)
{
filp_cachep = kmem_cache_create_rcu("filp", sizeof(struct file),
offsetof(struct file, f_freeptr),
SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
struct kmem_cache_args args = {
.use_freeptr_offset = true,
.freeptr_offset = offsetof(struct file, f_freeptr),
};
filp_cachep = kmem_cache_create("filp", sizeof(struct file), &args,
SLAB_HWCACHE_ALIGN | SLAB_PANIC |
SLAB_ACCOUNT | SLAB_TYPESAFE_BY_RCU);
percpu_counter_init(&nr_files, 0, GFP_KERNEL);
}
......
......@@ -175,13 +175,59 @@ static __always_inline void * __must_check kasan_init_slab_obj(
return (void *)object;
}
bool __kasan_slab_free(struct kmem_cache *s, void *object,
unsigned long ip, bool init);
bool __kasan_slab_pre_free(struct kmem_cache *s, void *object,
unsigned long ip);
/**
* kasan_slab_pre_free - Check whether freeing a slab object is safe.
* @object: Object to be freed.
*
* This function checks whether freeing the given object is safe. It may
* check for double-free and invalid-free bugs and report them.
*
* This function is intended only for use by the slab allocator.
*
* @Return true if freeing the object is unsafe; false otherwise.
*/
static __always_inline bool kasan_slab_pre_free(struct kmem_cache *s,
void *object)
{
if (kasan_enabled())
return __kasan_slab_pre_free(s, object, _RET_IP_);
return false;
}
bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init,
bool still_accessible);
/**
* kasan_slab_free - Poison, initialize, and quarantine a slab object.
* @object: Object to be freed.
* @init: Whether to initialize the object.
* @still_accessible: Whether the object contents are still accessible.
*
* This function informs that a slab object has been freed and is not
* supposed to be accessed anymore, except when @still_accessible is set
* (indicating that the object is in a SLAB_TYPESAFE_BY_RCU cache and an RCU
* grace period might not have passed yet).
*
* For KASAN modes that have integrated memory initialization
* (kasan_has_integrated_init() == true), this function also initializes
* the object's memory. For other modes, the @init argument is ignored.
*
* This function might also take ownership of the object to quarantine it.
* When this happens, KASAN will defer freeing the object to a later
* stage and handle it internally until then. The return value indicates
* whether KASAN took ownership of the object.
*
* This function is intended only for use by the slab allocator.
*
* @Return true if KASAN took ownership of the object; false otherwise.
*/
static __always_inline bool kasan_slab_free(struct kmem_cache *s,
void *object, bool init)
void *object, bool init,
bool still_accessible)
{
if (kasan_enabled())
return __kasan_slab_free(s, object, _RET_IP_, init);
return __kasan_slab_free(s, object, init, still_accessible);
return false;
}
......@@ -371,7 +417,14 @@ static inline void *kasan_init_slab_obj(struct kmem_cache *cache,
{
return (void *)object;
}
static inline bool kasan_slab_free(struct kmem_cache *s, void *object, bool init)
static inline bool kasan_slab_pre_free(struct kmem_cache *s, void *object)
{
return false;
}
static inline bool kasan_slab_free(struct kmem_cache *s, void *object,
bool init, bool still_accessible)
{
return false;
}
......
......@@ -111,6 +111,11 @@ static inline void __kvfree_call_rcu(struct rcu_head *head, void *ptr)
kvfree(ptr);
}
static inline void kvfree_rcu_barrier(void)
{
rcu_barrier();
}
#ifdef CONFIG_KASAN_GENERIC
void kvfree_call_rcu(struct rcu_head *head, void *ptr);
#else
......
......@@ -35,6 +35,7 @@ static inline void rcu_virt_note_context_switch(void)
void synchronize_rcu_expedited(void);
void kvfree_call_rcu(struct rcu_head *head, void *ptr);
void kvfree_rcu_barrier(void);
void rcu_barrier(void);
void rcu_momentary_eqs(void);
......
......@@ -240,17 +240,173 @@ struct mem_cgroup;
*/
bool slab_is_available(void);
struct kmem_cache *kmem_cache_create(const char *name, unsigned int size,
unsigned int align, slab_flags_t flags,
void (*ctor)(void *));
struct kmem_cache *kmem_cache_create_usercopy(const char *name,
unsigned int size, unsigned int align,
slab_flags_t flags,
unsigned int useroffset, unsigned int usersize,
void (*ctor)(void *));
struct kmem_cache *kmem_cache_create_rcu(const char *name, unsigned int size,
unsigned int freeptr_offset,
slab_flags_t flags);
/**
* struct kmem_cache_args - Less common arguments for kmem_cache_create()
*
* Any uninitialized fields of the structure are interpreted as unused. The
* exception is @freeptr_offset where %0 is a valid value, so
* @use_freeptr_offset must be also set to %true in order to interpret the field
* as used. For @useroffset %0 is also valid, but only with non-%0
* @usersize.
*
* When %NULL args is passed to kmem_cache_create(), it is equivalent to all
* fields unused.
*/
struct kmem_cache_args {
/**
* @align: The required alignment for the objects.
*
* %0 means no specific alignment is requested.
*/
unsigned int align;
/**
* @useroffset: Usercopy region offset.
*
* %0 is a valid offset, when @usersize is non-%0
*/
unsigned int useroffset;
/**
* @usersize: Usercopy region size.
*
* %0 means no usercopy region is specified.
*/
unsigned int usersize;
/**
* @freeptr_offset: Custom offset for the free pointer
* in &SLAB_TYPESAFE_BY_RCU caches
*
* By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer
* outside of the object. This might cause the object to grow in size.
* Cache creators that have a reason to avoid this can specify a custom
* free pointer offset in their struct where the free pointer will be
* placed.
*
* Note that placing the free pointer inside the object requires the
* caller to ensure that no fields are invalidated that are required to
* guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for
* details).
*
* Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset
* is specified, %use_freeptr_offset must be set %true.
*
* Note that @ctor currently isn't supported with custom free pointers
* as a @ctor requires an external free pointer.
*/
unsigned int freeptr_offset;
/**
* @use_freeptr_offset: Whether a @freeptr_offset is used.
*/
bool use_freeptr_offset;
/**
* @ctor: A constructor for the objects.
*
* The constructor is invoked for each object in a newly allocated slab
* page. It is the cache user's responsibility to free object in the
* same state as after calling the constructor, or deal appropriately
* with any differences between a freshly constructed and a reallocated
* object.
*
* %NULL means no constructor.
*/
void (*ctor)(void *);
};
struct kmem_cache *__kmem_cache_create_args(const char *name,
unsigned int object_size,
struct kmem_cache_args *args,
slab_flags_t flags);
static inline struct kmem_cache *
__kmem_cache_create(const char *name, unsigned int size, unsigned int align,
slab_flags_t flags, void (*ctor)(void *))
{
struct kmem_cache_args kmem_args = {
.align = align,
.ctor = ctor,
};
return __kmem_cache_create_args(name, size, &kmem_args, flags);
}
/**
* kmem_cache_create_usercopy - Create a kmem cache with a region suitable
* for copying to userspace.
* @name: A string which is used in /proc/slabinfo to identify this cache.
* @size: The size of objects to be created in this cache.
* @align: The required alignment for the objects.
* @flags: SLAB flags
* @useroffset: Usercopy region offset
* @usersize: Usercopy region size
* @ctor: A constructor for the objects, or %NULL.
*
* This is a legacy wrapper, new code should use either KMEM_CACHE_USERCOPY()
* if whitelisting a single field is sufficient, or kmem_cache_create() with
* the necessary parameters passed via the args parameter (see
* &struct kmem_cache_args)
*
* Return: a pointer to the cache on success, NULL on failure.
*/
static inline struct kmem_cache *
kmem_cache_create_usercopy(const char *name, unsigned int size,
unsigned int align, slab_flags_t flags,
unsigned int useroffset, unsigned int usersize,
void (*ctor)(void *))
{
struct kmem_cache_args kmem_args = {
.align = align,
.ctor = ctor,
.useroffset = useroffset,
.usersize = usersize,
};
return __kmem_cache_create_args(name, size, &kmem_args, flags);
}
/* If NULL is passed for @args, use this variant with default arguments. */
static inline struct kmem_cache *
__kmem_cache_default_args(const char *name, unsigned int size,
struct kmem_cache_args *args,
slab_flags_t flags)
{
struct kmem_cache_args kmem_default_args = {};
/* Make sure we don't get passed garbage. */
if (WARN_ON_ONCE(args))
return ERR_PTR(-EINVAL);
return __kmem_cache_create_args(name, size, &kmem_default_args, flags);
}
/**
* kmem_cache_create - Create a kmem cache.
* @__name: A string which is used in /proc/slabinfo to identify this cache.
* @__object_size: The size of objects to be created in this cache.
* @__args: Optional arguments, see &struct kmem_cache_args. Passing %NULL
* means defaults will be used for all the arguments.
*
* This is currently implemented as a macro using ``_Generic()`` to call
* either the new variant of the function, or a legacy one.
*
* The new variant has 4 parameters:
* ``kmem_cache_create(name, object_size, args, flags)``
*
* See __kmem_cache_create_args() which implements this.
*
* The legacy variant has 5 parameters:
* ``kmem_cache_create(name, object_size, align, flags, ctor)``
*
* The align and ctor parameters map to the respective fields of
* &struct kmem_cache_args
*
* Context: Cannot be called within a interrupt, but can be interrupted.
*
* Return: a pointer to the cache on success, NULL on failure.
*/
#define kmem_cache_create(__name, __object_size, __args, ...) \
_Generic((__args), \
struct kmem_cache_args *: __kmem_cache_create_args, \
void *: __kmem_cache_default_args, \
default: __kmem_cache_create)(__name, __object_size, __args, __VA_ARGS__)
void kmem_cache_destroy(struct kmem_cache *s);
int kmem_cache_shrink(struct kmem_cache *s);
......@@ -262,20 +418,23 @@ int kmem_cache_shrink(struct kmem_cache *s);
* f.e. add ____cacheline_aligned_in_smp to the struct declaration
* then the objects will be properly aligned in SMP configurations.
*/
#define KMEM_CACHE(__struct, __flags) \
kmem_cache_create(#__struct, sizeof(struct __struct), \
__alignof__(struct __struct), (__flags), NULL)
#define KMEM_CACHE(__struct, __flags) \
__kmem_cache_create_args(#__struct, sizeof(struct __struct), \
&(struct kmem_cache_args) { \
.align = __alignof__(struct __struct), \
}, (__flags))
/*
* To whitelist a single field for copying to/from usercopy, use this
* macro instead for KMEM_CACHE() above.
*/
#define KMEM_CACHE_USERCOPY(__struct, __flags, __field) \
kmem_cache_create_usercopy(#__struct, \
sizeof(struct __struct), \
__alignof__(struct __struct), (__flags), \
offsetof(struct __struct, __field), \
sizeof_field(struct __struct, __field), NULL)
#define KMEM_CACHE_USERCOPY(__struct, __flags, __field) \
__kmem_cache_create_args(#__struct, sizeof(struct __struct), \
&(struct kmem_cache_args) { \
.align = __alignof__(struct __struct), \
.useroffset = offsetof(struct __struct, __field), \
.usersize = sizeof_field(struct __struct, __field), \
}, (__flags))
/*
* Common kmalloc functions provided by all allocators
......@@ -556,6 +715,35 @@ void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags) __assume_slab_alignment __malloc;
#define kmem_cache_alloc_lru(...) alloc_hooks(kmem_cache_alloc_lru_noprof(__VA_ARGS__))
/**
* kmem_cache_charge - memcg charge an already allocated slab memory
* @objp: address of the slab object to memcg charge
* @gfpflags: describe the allocation context
*
* kmem_cache_charge allows charging a slab object to the current memcg,
* primarily in cases where charging at allocation time might not be possible
* because the target memcg is not known (i.e. softirq context)
*
* The objp should be pointer returned by the slab allocator functions like
* kmalloc (with __GFP_ACCOUNT in flags) or kmem_cache_alloc. The memcg charge
* behavior can be controlled through gfpflags parameter, which affects how the
* necessary internal metadata can be allocated. Including __GFP_NOFAIL denotes
* that overcharging is requested instead of failure, but is not applied for the
* internal metadata allocation.
*
* There are several cases where it will return true even if the charging was
* not done:
* More specifically:
*
* 1. For !CONFIG_MEMCG or cgroup_disable=memory systems.
* 2. Already charged slab objects.
* 3. For slab objects from KMALLOC_NORMAL caches - allocated by kmalloc()
* without __GFP_ACCOUNT
* 4. Allocating internal metadata has failed
*
* Return: true if charge was successful otherwise false.
*/
bool kmem_cache_charge(void *objp, gfp_t gfpflags);
void kmem_cache_free(struct kmem_cache *s, void *objp);
kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags,
......
......@@ -3755,6 +3755,11 @@ SYSCALL_DEFINE2(io_uring_setup, u32, entries,
static int __init io_uring_init(void)
{
struct kmem_cache_args kmem_args = {
.useroffset = offsetof(struct io_kiocb, cmd.data),
.usersize = sizeof_field(struct io_kiocb, cmd.data),
};
#define __BUILD_BUG_VERIFY_OFFSET_SIZE(stype, eoffset, esize, ename) do { \
BUILD_BUG_ON(offsetof(stype, ename) != eoffset); \
BUILD_BUG_ON(sizeof_field(stype, ename) != esize); \
......@@ -3839,12 +3844,9 @@ static int __init io_uring_init(void)
* range, and HARDENED_USERCOPY will complain if we haven't
* correctly annotated this range.
*/
req_cachep = kmem_cache_create_usercopy("io_kiocb",
sizeof(struct io_kiocb), 0,
SLAB_HWCACHE_ALIGN | SLAB_PANIC |
SLAB_ACCOUNT | SLAB_TYPESAFE_BY_RCU,
offsetof(struct io_kiocb, cmd.data),
sizeof_field(struct io_kiocb, cmd.data), NULL);
req_cachep = kmem_cache_create("io_kiocb", sizeof(struct io_kiocb), &kmem_args,
SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT |
SLAB_TYPESAFE_BY_RCU);
io_buf_cachep = KMEM_CACHE(io_buffer,
SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
......
......@@ -3564,18 +3564,15 @@ kvfree_rcu_drain_ready(struct kfree_rcu_cpu *krcp)
}
/*
* This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
* Return: %true if a work is queued, %false otherwise.
*/
static void kfree_rcu_monitor(struct work_struct *work)
static bool
kvfree_rcu_queue_batch(struct kfree_rcu_cpu *krcp)
{
struct kfree_rcu_cpu *krcp = container_of(work,
struct kfree_rcu_cpu, monitor_work.work);
unsigned long flags;
bool queued = false;
int i, j;
// Drain ready for reclaim.
kvfree_rcu_drain_ready(krcp);
raw_spin_lock_irqsave(&krcp->lock, flags);
// Attempt to start a new batch.
......@@ -3614,11 +3611,27 @@ static void kfree_rcu_monitor(struct work_struct *work)
// be that the work is in the pending state when
// channels have been detached following by each
// other.
queue_rcu_work(system_unbound_wq, &krwp->rcu_work);
queued = queue_rcu_work(system_unbound_wq, &krwp->rcu_work);
}
}
raw_spin_unlock_irqrestore(&krcp->lock, flags);
return queued;
}
/*
* This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
*/
static void kfree_rcu_monitor(struct work_struct *work)
{
struct kfree_rcu_cpu *krcp = container_of(work,
struct kfree_rcu_cpu, monitor_work.work);
// Drain ready for reclaim.
kvfree_rcu_drain_ready(krcp);
// Queue a batch for a rest.
kvfree_rcu_queue_batch(krcp);
// If there is nothing to detach, it means that our job is
// successfully done here. In case of having at least one
......@@ -3840,6 +3853,86 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr)
}
EXPORT_SYMBOL_GPL(kvfree_call_rcu);
/**
* kvfree_rcu_barrier - Wait until all in-flight kvfree_rcu() complete.
*
* Note that a single argument of kvfree_rcu() call has a slow path that
* triggers synchronize_rcu() following by freeing a pointer. It is done
* before the return from the function. Therefore for any single-argument
* call that will result in a kfree() to a cache that is to be destroyed
* during module exit, it is developer's responsibility to ensure that all
* such calls have returned before the call to kmem_cache_destroy().
*/
void kvfree_rcu_barrier(void)
{
struct kfree_rcu_cpu_work *krwp;
struct kfree_rcu_cpu *krcp;
bool queued;
int i, cpu;
/*
* Firstly we detach objects and queue them over an RCU-batch
* for all CPUs. Finally queued works are flushed for each CPU.
*
* Please note. If there are outstanding batches for a particular
* CPU, those have to be finished first following by queuing a new.
*/
for_each_possible_cpu(cpu) {
krcp = per_cpu_ptr(&krc, cpu);
/*
* Check if this CPU has any objects which have been queued for a
* new GP completion. If not(means nothing to detach), we are done
* with it. If any batch is pending/running for this "krcp", below
* per-cpu flush_rcu_work() waits its completion(see last step).
*/
if (!need_offload_krc(krcp))
continue;
while (1) {
/*
* If we are not able to queue a new RCU work it means:
* - batches for this CPU are still in flight which should
* be flushed first and then repeat;
* - no objects to detach, because of concurrency.
*/
queued = kvfree_rcu_queue_batch(krcp);
/*
* Bail out, if there is no need to offload this "krcp"
* anymore. As noted earlier it can run concurrently.
*/
if (queued || !need_offload_krc(krcp))
break;
/* There are ongoing batches. */
for (i = 0; i < KFREE_N_BATCHES; i++) {
krwp = &(krcp->krw_arr[i]);
flush_rcu_work(&krwp->rcu_work);
}
}
}
/*
* Now we guarantee that all objects are flushed.
*/
for_each_possible_cpu(cpu) {
krcp = per_cpu_ptr(&krc, cpu);
/*
* A monitor work can drain ready to reclaim objects
* directly. Wait its completion if running or pending.
*/
cancel_delayed_work_sync(&krcp->monitor_work);
for (i = 0; i < KFREE_N_BATCHES; i++) {
krwp = &(krcp->krw_arr[i]);
flush_rcu_work(&krwp->rcu_work);
}
}
}
EXPORT_SYMBOL_GPL(kvfree_rcu_barrier);
static unsigned long
kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
......
......@@ -5,6 +5,7 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/rcupdate.h>
#include "../mm/slab.h"
static struct kunit_resource resource;
......@@ -157,6 +158,34 @@ static void test_kmalloc_redzone_access(struct kunit *test)
kmem_cache_destroy(s);
}
struct test_kfree_rcu_struct {
struct rcu_head rcu;
};
static void test_kfree_rcu(struct kunit *test)
{
struct kmem_cache *s = test_kmem_cache_create("TestSlub_kfree_rcu",
sizeof(struct test_kfree_rcu_struct),
SLAB_NO_MERGE);
struct test_kfree_rcu_struct *p = kmem_cache_alloc(s, GFP_KERNEL);
kfree_rcu(p, rcu);
kmem_cache_destroy(s);
KUNIT_EXPECT_EQ(test, 0, slab_errors);
}
static void test_leak_destroy(struct kunit *test)
{
struct kmem_cache *s = test_kmem_cache_create("TestSlub_kfree_rcu",
64, SLAB_NO_MERGE);
kmem_cache_alloc(s, GFP_KERNEL);
kmem_cache_destroy(s);
KUNIT_EXPECT_EQ(test, 1, slab_errors);
}
static int test_init(struct kunit *test)
{
slab_errors = 0;
......@@ -177,6 +206,8 @@ static struct kunit_case test_cases[] = {
KUNIT_CASE(test_clobber_redzone_free),
KUNIT_CASE(test_kmalloc_redzone_access),
KUNIT_CASE(test_kfree_rcu),
KUNIT_CASE(test_leak_destroy),
{}
};
......
......@@ -70,6 +70,38 @@ config SLUB_DEBUG_ON
off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying
"slab_debug=-".
config SLUB_RCU_DEBUG
bool "Enable UAF detection in TYPESAFE_BY_RCU caches (for KASAN)"
depends on SLUB_DEBUG
# SLUB_RCU_DEBUG should build fine without KASAN, but is currently useless
# without KASAN, so mark it as a dependency of KASAN for now.
depends on KASAN
default KASAN_GENERIC || KASAN_SW_TAGS
help
Make SLAB_TYPESAFE_BY_RCU caches behave approximately as if the cache
was not marked as SLAB_TYPESAFE_BY_RCU and every caller used
kfree_rcu() instead.
This is intended for use in combination with KASAN, to enable KASAN to
detect use-after-free accesses in such caches.
(KFENCE is able to do that independent of this flag.)
This might degrade performance.
Unfortunately this also prevents a very specific bug pattern from
triggering (insufficient checks against an object being recycled
within the RCU grace period); so this option can be turned off even on
KASAN builds, in case you want to test for such a bug.
If you're using this for testing bugs / fuzzing and care about
catching all the bugs WAY more than performance, you might want to
also turn on CONFIG_RCU_STRICT_GRACE_PERIOD.
WARNING:
This is designed as a debugging feature, not a security feature.
Objects are sometimes recycled without RCU delay under memory pressure.
If unsure, say N.
config PAGE_OWNER
bool "Track page owner"
depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
......
......@@ -208,15 +208,12 @@ void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache,
return (void *)object;
}
static inline bool poison_slab_object(struct kmem_cache *cache, void *object,
unsigned long ip, bool init)
/* Returns true when freeing the object is not safe. */
static bool check_slab_allocation(struct kmem_cache *cache, void *object,
unsigned long ip)
{
void *tagged_object;
if (!kasan_arch_is_ready())
return false;
void *tagged_object = object;
tagged_object = object;
object = kasan_reset_tag(object);
if (unlikely(nearest_obj(cache, virt_to_slab(object), object) != object)) {
......@@ -224,37 +221,47 @@ static inline bool poison_slab_object(struct kmem_cache *cache, void *object,
return true;
}
/* RCU slabs could be legally used after free within the RCU period. */
if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU))
return false;
if (!kasan_byte_accessible(tagged_object)) {
kasan_report_invalid_free(tagged_object, ip, KASAN_REPORT_DOUBLE_FREE);
return true;
}
return false;
}
static inline void poison_slab_object(struct kmem_cache *cache, void *object,
bool init, bool still_accessible)
{
void *tagged_object = object;
object = kasan_reset_tag(object);
/* RCU slabs could be legally used after free within the RCU period. */
if (unlikely(still_accessible))
return;
kasan_poison(object, round_up(cache->object_size, KASAN_GRANULE_SIZE),
KASAN_SLAB_FREE, init);
if (kasan_stack_collection_enabled())
kasan_save_free_info(cache, tagged_object);
}
return false;
bool __kasan_slab_pre_free(struct kmem_cache *cache, void *object,
unsigned long ip)
{
if (!kasan_arch_is_ready() || is_kfence_address(object))
return false;
return check_slab_allocation(cache, object, ip);
}
bool __kasan_slab_free(struct kmem_cache *cache, void *object,
unsigned long ip, bool init)
bool __kasan_slab_free(struct kmem_cache *cache, void *object, bool init,
bool still_accessible)
{
if (is_kfence_address(object))
if (!kasan_arch_is_ready() || is_kfence_address(object))
return false;
/*
* If the object is buggy, do not let slab put the object onto the
* freelist. The object will thus never be allocated again and its
* metadata will never get released.
*/
if (poison_slab_object(cache, object, ip, init))
return true;
poison_slab_object(cache, object, init, still_accessible);
/*
* If the object is put into quarantine, do not let slab put the object
......@@ -504,11 +511,16 @@ bool __kasan_mempool_poison_object(void *ptr, unsigned long ip)
return true;
}
if (is_kfence_address(ptr))
return false;
if (is_kfence_address(ptr) || !kasan_arch_is_ready())
return true;
slab = folio_slab(folio);
return !poison_slab_object(slab->slab_cache, ptr, ip, false);
if (check_slab_allocation(slab->slab_cache, ptr, ip))
return false;
poison_slab_object(slab->slab_cache, ptr, false, false);
return true;
}
void __kasan_mempool_unpoison_object(void *ptr, size_t size, unsigned long ip)
......
......@@ -996,6 +996,51 @@ static void kmem_cache_invalid_free(struct kunit *test)
kmem_cache_destroy(cache);
}
static void kmem_cache_rcu_uaf(struct kunit *test)
{
char *p;
size_t size = 200;
struct kmem_cache *cache;
KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB_RCU_DEBUG);
cache = kmem_cache_create("test_cache", size, 0, SLAB_TYPESAFE_BY_RCU,
NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
p = kmem_cache_alloc(cache, GFP_KERNEL);
if (!p) {
kunit_err(test, "Allocation failed: %s\n", __func__);
kmem_cache_destroy(cache);
return;
}
*p = 1;
rcu_read_lock();
/* Free the object - this will internally schedule an RCU callback. */
kmem_cache_free(cache, p);
/*
* We should still be allowed to access the object at this point because
* the cache is SLAB_TYPESAFE_BY_RCU and we've been in an RCU read-side
* critical section since before the kmem_cache_free().
*/
READ_ONCE(*p);
rcu_read_unlock();
/*
* Wait for the RCU callback to execute; after this, the object should
* have actually been freed from KASAN's perspective.
*/
rcu_barrier();
KUNIT_EXPECT_KASAN_FAIL(test, READ_ONCE(*p));
kmem_cache_destroy(cache);
}
static void empty_cache_ctor(void *object) { }
static void kmem_cache_double_destroy(struct kunit *test)
......@@ -1937,6 +1982,7 @@ static struct kunit_case kasan_kunit_test_cases[] = {
KUNIT_CASE(kmem_cache_oob),
KUNIT_CASE(kmem_cache_double_free),
KUNIT_CASE(kmem_cache_invalid_free),
KUNIT_CASE(kmem_cache_rcu_uaf),
KUNIT_CASE(kmem_cache_double_destroy),
KUNIT_CASE(kmem_cache_accounted),
KUNIT_CASE(kmem_cache_bulk),
......
......@@ -261,8 +261,6 @@ struct kmem_cache {
unsigned int object_size; /* Object size without metadata */
struct reciprocal_value reciprocal_size;
unsigned int offset; /* Free pointer offset */
/* Specific free pointer requested (if not UINT_MAX) */
unsigned int rcu_freeptr_offset;
#ifdef CONFIG_SLUB_CPU_PARTIAL
/* Number of per cpu partial objects to keep around */
unsigned int cpu_partial;
......@@ -424,7 +422,9 @@ kmalloc_slab(size_t size, kmem_buckets *b, gfp_t flags, unsigned long caller)
gfp_t kmalloc_fix_flags(gfp_t flags);
/* Functions provided by the slab allocators */
int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags);
int do_kmem_cache_create(struct kmem_cache *s, const char *name,
unsigned int size, struct kmem_cache_args *args,
slab_flags_t flags);
void __init kmem_cache_init(void);
extern void create_boot_cache(struct kmem_cache *, const char *name,
......@@ -445,6 +445,13 @@ static inline bool is_kmalloc_cache(struct kmem_cache *s)
return (s->flags & SLAB_KMALLOC);
}
static inline bool is_kmalloc_normal(struct kmem_cache *s)
{
if (!is_kmalloc_cache(s))
return false;
return !(s->flags & (SLAB_CACHE_DMA|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT));
}
/* Legal flag mask for kmem_cache_create(), for various configurations */
#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \
SLAB_CACHE_DMA32 | SLAB_PANIC | \
......
This diff is collapsed.
This diff is collapsed.
......@@ -714,6 +714,7 @@ struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg)
out:
release_sock(sk);
if (newsk && mem_cgroup_sockets_enabled) {
gfp_t gfp = GFP_KERNEL | __GFP_NOFAIL;
int amt = 0;
/* atomically get the memory usage, set and charge the
......@@ -731,8 +732,8 @@ struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg)
}
if (amt)
mem_cgroup_charge_skmem(newsk->sk_memcg, amt,
GFP_KERNEL | __GFP_NOFAIL);
mem_cgroup_charge_skmem(newsk->sk_memcg, amt, gfp);
kmem_cache_charge(newsk, gfp);
release_sock(newsk);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment