Commit cabdf74e authored by Peng Zhang's avatar Peng Zhang Committed by Andrew Morton

mm: kfence: allocate kfence_metadata at runtime

kfence_metadata is currently a static array.  For the purpose of
allocating scalable __kfence_pool, we first change it to runtime
allocation of metadata.  Since the size of an object of kfence_metadata is
1160 bytes, we can save at least 72 pages (with default 256 objects)
without enabling kfence.

[akpm@linux-foundation.org: restore newline, per Marco]
Link: https://lkml.kernel.org/r/20230718073019.52513-1-zhangpeng.00@bytedance.comSigned-off-by: default avatarPeng Zhang <zhangpeng.00@bytedance.com>
Reviewed-by: default avatarMarco Elver <elver@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 8d3a7d79
...@@ -59,15 +59,16 @@ static __always_inline bool is_kfence_address(const void *addr) ...@@ -59,15 +59,16 @@ static __always_inline bool is_kfence_address(const void *addr)
} }
/** /**
* kfence_alloc_pool() - allocate the KFENCE pool via memblock * kfence_alloc_pool_and_metadata() - allocate the KFENCE pool and KFENCE
* metadata via memblock
*/ */
void __init kfence_alloc_pool(void); void __init kfence_alloc_pool_and_metadata(void);
/** /**
* kfence_init() - perform KFENCE initialization at boot time * kfence_init() - perform KFENCE initialization at boot time
* *
* Requires that kfence_alloc_pool() was called before. This sets up the * Requires that kfence_alloc_pool_and_metadata() was called before. This sets
* allocation gate timer, and requires that workqueues are available. * up the allocation gate timer, and requires that workqueues are available.
*/ */
void __init kfence_init(void); void __init kfence_init(void);
...@@ -223,7 +224,7 @@ bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *sla ...@@ -223,7 +224,7 @@ bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *sla
#else /* CONFIG_KFENCE */ #else /* CONFIG_KFENCE */
static inline bool is_kfence_address(const void *addr) { return false; } static inline bool is_kfence_address(const void *addr) { return false; }
static inline void kfence_alloc_pool(void) { } static inline void kfence_alloc_pool_and_metadata(void) { }
static inline void kfence_init(void) { } static inline void kfence_init(void) { }
static inline void kfence_shutdown_cache(struct kmem_cache *s) { } static inline void kfence_shutdown_cache(struct kmem_cache *s) { }
static inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { return NULL; } static inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { return NULL; }
......
...@@ -116,7 +116,15 @@ EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */ ...@@ -116,7 +116,15 @@ EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */
* backing pages (in __kfence_pool). * backing pages (in __kfence_pool).
*/ */
static_assert(CONFIG_KFENCE_NUM_OBJECTS > 0); static_assert(CONFIG_KFENCE_NUM_OBJECTS > 0);
struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS]; struct kfence_metadata *kfence_metadata __read_mostly;
/*
* If kfence_metadata is not NULL, it may be accessed by kfence_shutdown_cache().
* So introduce kfence_metadata_init to initialize metadata, and then make
* kfence_metadata visible after initialization is successful. This prevents
* potential UAF or access to uninitialized metadata.
*/
static struct kfence_metadata *kfence_metadata_init __read_mostly;
/* Freelist with available objects. */ /* Freelist with available objects. */
static struct list_head kfence_freelist = LIST_HEAD_INIT(kfence_freelist); static struct list_head kfence_freelist = LIST_HEAD_INIT(kfence_freelist);
...@@ -591,7 +599,7 @@ static unsigned long kfence_init_pool(void) ...@@ -591,7 +599,7 @@ static unsigned long kfence_init_pool(void)
__folio_set_slab(slab_folio(slab)); __folio_set_slab(slab_folio(slab));
#ifdef CONFIG_MEMCG #ifdef CONFIG_MEMCG
slab->memcg_data = (unsigned long)&kfence_metadata[i / 2 - 1].objcg | slab->memcg_data = (unsigned long)&kfence_metadata_init[i / 2 - 1].objcg |
MEMCG_DATA_OBJCGS; MEMCG_DATA_OBJCGS;
#endif #endif
} }
...@@ -610,7 +618,7 @@ static unsigned long kfence_init_pool(void) ...@@ -610,7 +618,7 @@ static unsigned long kfence_init_pool(void)
} }
for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) { for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
struct kfence_metadata *meta = &kfence_metadata[i]; struct kfence_metadata *meta = &kfence_metadata_init[i];
/* Initialize metadata. */ /* Initialize metadata. */
INIT_LIST_HEAD(&meta->list); INIT_LIST_HEAD(&meta->list);
...@@ -626,6 +634,12 @@ static unsigned long kfence_init_pool(void) ...@@ -626,6 +634,12 @@ static unsigned long kfence_init_pool(void)
addr += 2 * PAGE_SIZE; addr += 2 * PAGE_SIZE;
} }
/*
* Make kfence_metadata visible only when initialization is successful.
* Otherwise, if the initialization fails and kfence_metadata is freed,
* it may cause UAF in kfence_shutdown_cache().
*/
smp_store_release(&kfence_metadata, kfence_metadata_init);
return 0; return 0;
reset_slab: reset_slab:
...@@ -672,26 +686,10 @@ static bool __init kfence_init_pool_early(void) ...@@ -672,26 +686,10 @@ static bool __init kfence_init_pool_early(void)
*/ */
memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool)); memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool));
__kfence_pool = NULL; __kfence_pool = NULL;
return false;
}
static bool kfence_init_pool_late(void)
{
unsigned long addr, free_size;
addr = kfence_init_pool(); memblock_free_late(__pa(kfence_metadata_init), KFENCE_METADATA_SIZE);
kfence_metadata_init = NULL;
if (!addr)
return true;
/* Same as above. */
free_size = KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool);
#ifdef CONFIG_CONTIG_ALLOC
free_contig_range(page_to_pfn(virt_to_page((void *)addr)), free_size / PAGE_SIZE);
#else
free_pages_exact((void *)addr, free_size);
#endif
__kfence_pool = NULL;
return false; return false;
} }
...@@ -841,19 +839,30 @@ static void toggle_allocation_gate(struct work_struct *work) ...@@ -841,19 +839,30 @@ static void toggle_allocation_gate(struct work_struct *work)
/* === Public interface ===================================================== */ /* === Public interface ===================================================== */
void __init kfence_alloc_pool(void) void __init kfence_alloc_pool_and_metadata(void)
{ {
if (!kfence_sample_interval) if (!kfence_sample_interval)
return; return;
/* if the pool has already been initialized by arch, skip the below. */ /*
if (__kfence_pool) * If the pool has already been initialized by arch, there is no need to
return; * re-allocate the memory pool.
*/
__kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
if (!__kfence_pool) if (!__kfence_pool)
__kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
if (!__kfence_pool) {
pr_err("failed to allocate pool\n"); pr_err("failed to allocate pool\n");
return;
}
/* The memory allocated by memblock has been zeroed out. */
kfence_metadata_init = memblock_alloc(KFENCE_METADATA_SIZE, PAGE_SIZE);
if (!kfence_metadata_init) {
pr_err("failed to allocate metadata\n");
memblock_free(__kfence_pool, KFENCE_POOL_SIZE);
__kfence_pool = NULL;
}
} }
static void kfence_init_enable(void) static void kfence_init_enable(void)
...@@ -895,33 +904,69 @@ void __init kfence_init(void) ...@@ -895,33 +904,69 @@ void __init kfence_init(void)
static int kfence_init_late(void) static int kfence_init_late(void)
{ {
const unsigned long nr_pages = KFENCE_POOL_SIZE / PAGE_SIZE; const unsigned long nr_pages_pool = KFENCE_POOL_SIZE / PAGE_SIZE;
const unsigned long nr_pages_meta = KFENCE_METADATA_SIZE / PAGE_SIZE;
unsigned long addr = (unsigned long)__kfence_pool;
unsigned long free_size = KFENCE_POOL_SIZE;
int err = -ENOMEM;
#ifdef CONFIG_CONTIG_ALLOC #ifdef CONFIG_CONTIG_ALLOC
struct page *pages; struct page *pages;
pages = alloc_contig_pages(nr_pages, GFP_KERNEL, first_online_node, NULL); pages = alloc_contig_pages(nr_pages_pool, GFP_KERNEL, first_online_node,
NULL);
if (!pages) if (!pages)
return -ENOMEM; return -ENOMEM;
__kfence_pool = page_to_virt(pages); __kfence_pool = page_to_virt(pages);
pages = alloc_contig_pages(nr_pages_meta, GFP_KERNEL, first_online_node,
NULL);
if (pages)
kfence_metadata_init = page_to_virt(pages);
#else #else
if (nr_pages > MAX_ORDER_NR_PAGES) { if (nr_pages_pool > MAX_ORDER_NR_PAGES ||
nr_pages_meta > MAX_ORDER_NR_PAGES) {
pr_warn("KFENCE_NUM_OBJECTS too large for buddy allocator\n"); pr_warn("KFENCE_NUM_OBJECTS too large for buddy allocator\n");
return -EINVAL; return -EINVAL;
} }
__kfence_pool = alloc_pages_exact(KFENCE_POOL_SIZE, GFP_KERNEL); __kfence_pool = alloc_pages_exact(KFENCE_POOL_SIZE, GFP_KERNEL);
if (!__kfence_pool) if (!__kfence_pool)
return -ENOMEM; return -ENOMEM;
kfence_metadata_init = alloc_pages_exact(KFENCE_METADATA_SIZE, GFP_KERNEL);
#endif #endif
if (!kfence_init_pool_late()) { if (!kfence_metadata_init)
pr_err("%s failed\n", __func__); goto free_pool;
return -EBUSY;
memzero_explicit(kfence_metadata_init, KFENCE_METADATA_SIZE);
addr = kfence_init_pool();
if (!addr) {
kfence_init_enable();
kfence_debugfs_init();
return 0;
} }
kfence_init_enable(); pr_err("%s failed\n", __func__);
kfence_debugfs_init(); free_size = KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool);
err = -EBUSY;
return 0; #ifdef CONFIG_CONTIG_ALLOC
free_contig_range(page_to_pfn(virt_to_page((void *)kfence_metadata_init)),
nr_pages_meta);
free_pool:
free_contig_range(page_to_pfn(virt_to_page((void *)addr)),
free_size / PAGE_SIZE);
#else
free_pages_exact((void *)kfence_metadata_init, KFENCE_METADATA_SIZE);
free_pool:
free_pages_exact((void *)addr, free_size);
#endif
kfence_metadata_init = NULL;
__kfence_pool = NULL;
return err;
} }
static int kfence_enable_late(void) static int kfence_enable_late(void)
...@@ -941,6 +986,10 @@ void kfence_shutdown_cache(struct kmem_cache *s) ...@@ -941,6 +986,10 @@ void kfence_shutdown_cache(struct kmem_cache *s)
struct kfence_metadata *meta; struct kfence_metadata *meta;
int i; int i;
/* Pairs with release in kfence_init_pool(). */
if (!smp_load_acquire(&kfence_metadata))
return;
for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) { for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
bool in_use; bool in_use;
......
...@@ -102,7 +102,10 @@ struct kfence_metadata { ...@@ -102,7 +102,10 @@ struct kfence_metadata {
#endif #endif
}; };
extern struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS]; #define KFENCE_METADATA_SIZE PAGE_ALIGN(sizeof(struct kfence_metadata) * \
CONFIG_KFENCE_NUM_OBJECTS)
extern struct kfence_metadata *kfence_metadata;
static inline struct kfence_metadata *addr_to_metadata(unsigned long addr) static inline struct kfence_metadata *addr_to_metadata(unsigned long addr)
{ {
......
...@@ -2767,7 +2767,7 @@ void __init mm_core_init(void) ...@@ -2767,7 +2767,7 @@ void __init mm_core_init(void)
*/ */
page_ext_init_flatmem(); page_ext_init_flatmem();
mem_debugging_and_hardening_init(); mem_debugging_and_hardening_init();
kfence_alloc_pool(); kfence_alloc_pool_and_metadata();
report_meminit(); report_meminit();
kmsan_init_shadow(); kmsan_init_shadow();
stack_depot_early_init(); stack_depot_early_init();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment