Commit d6a71648 authored by Hyeonggon Yoo's avatar Hyeonggon Yoo Committed by Vlastimil Babka

mm/slab: kmalloc: pass requests larger than order-1 page to page allocator

There is not much benefit for serving large objects in kmalloc().
Let's pass large requests to page allocator like SLUB for better
maintenance of common code.
Signed-off-by: default avatarHyeonggon Yoo <42.hyeyoo@gmail.com>
Reviewed-by: default avatarVlastimil Babka <vbabka@suse.cz>
Signed-off-by: default avatarVlastimil Babka <vbabka@suse.cz>
parent c4cab557
...@@ -243,27 +243,17 @@ static inline unsigned int arch_slab_minalign(void) ...@@ -243,27 +243,17 @@ static inline unsigned int arch_slab_minalign(void)
#ifdef CONFIG_SLAB #ifdef CONFIG_SLAB
/* /*
* The largest kmalloc size supported by the SLAB allocators is * SLAB and SLUB directly allocates requests fitting in to an order-1 page
* 32 megabyte (2^25) or the maximum allocatable page order if that is * (PAGE_SIZE*2). Larger requests are passed to the page allocator.
* less than 32 MB.
*
* WARNING: Its not easy to increase this value since the allocators have
* to do various tricks to work around compiler limitations in order to
* ensure proper constant folding.
*/ */
#define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \ #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
(MAX_ORDER + PAGE_SHIFT - 1) : 25) #define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1)
#define KMALLOC_SHIFT_MAX KMALLOC_SHIFT_HIGH
#ifndef KMALLOC_SHIFT_LOW #ifndef KMALLOC_SHIFT_LOW
#define KMALLOC_SHIFT_LOW 5 #define KMALLOC_SHIFT_LOW 5
#endif #endif
#endif #endif
#ifdef CONFIG_SLUB #ifdef CONFIG_SLUB
/*
* SLUB directly allocates requests fitting in to an order-1 page
* (PAGE_SIZE*2). Larger requests are passed to the page allocator.
*/
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1) #define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1)
#ifndef KMALLOC_SHIFT_LOW #ifndef KMALLOC_SHIFT_LOW
...@@ -415,10 +405,6 @@ static __always_inline unsigned int __kmalloc_index(size_t size, ...@@ -415,10 +405,6 @@ static __always_inline unsigned int __kmalloc_index(size_t size,
if (size <= 512 * 1024) return 19; if (size <= 512 * 1024) return 19;
if (size <= 1024 * 1024) return 20; if (size <= 1024 * 1024) return 20;
if (size <= 2 * 1024 * 1024) return 21; if (size <= 2 * 1024 * 1024) return 21;
if (size <= 4 * 1024 * 1024) return 22;
if (size <= 8 * 1024 * 1024) return 23;
if (size <= 16 * 1024 * 1024) return 24;
if (size <= 32 * 1024 * 1024) return 25;
if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant) if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant)
BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()"); BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()");
...@@ -428,6 +414,7 @@ static __always_inline unsigned int __kmalloc_index(size_t size, ...@@ -428,6 +414,7 @@ static __always_inline unsigned int __kmalloc_index(size_t size,
/* Will never be reached. Needed because the compiler may complain */ /* Will never be reached. Needed because the compiler may complain */
return -1; return -1;
} }
static_assert(PAGE_SHIFT <= 20);
#define kmalloc_index(s) __kmalloc_index(s, true) #define kmalloc_index(s) __kmalloc_index(s, true)
#endif /* !CONFIG_SLOB */ #endif /* !CONFIG_SLOB */
......
...@@ -3585,11 +3585,19 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller) ...@@ -3585,11 +3585,19 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
struct kmem_cache *cachep; struct kmem_cache *cachep;
void *ret; void *ret;
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
return NULL; ret = kmalloc_large_node_notrace(size, flags, node);
trace_kmalloc_node(caller, ret, NULL, size,
PAGE_SIZE << get_order(size),
flags, node);
return ret;
}
cachep = kmalloc_slab(size, flags); cachep = kmalloc_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(cachep))) if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep; return cachep;
ret = kmem_cache_alloc_node_trace(cachep, flags, node, size); ret = kmem_cache_alloc_node_trace(cachep, flags, node, size);
ret = kasan_kmalloc(cachep, ret, size, flags); ret = kasan_kmalloc(cachep, ret, size, flags);
...@@ -3664,17 +3672,27 @@ EXPORT_SYMBOL(kmem_cache_free); ...@@ -3664,17 +3672,27 @@ EXPORT_SYMBOL(kmem_cache_free);
void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
{ {
struct kmem_cache *s;
size_t i;
local_irq_disable(); local_irq_disable();
for (i = 0; i < size; i++) { for (int i = 0; i < size; i++) {
void *objp = p[i]; void *objp = p[i];
struct kmem_cache *s;
if (!orig_s) /* called via kfree_bulk */ if (!orig_s) {
s = virt_to_cache(objp); struct folio *folio = virt_to_folio(objp);
else
/* called via kfree_bulk */
if (!folio_test_slab(folio)) {
local_irq_enable();
free_large_kmalloc(folio, objp);
local_irq_disable();
continue;
}
s = folio_slab(folio)->slab_cache;
} else {
s = cache_from_obj(orig_s, objp); s = cache_from_obj(orig_s, objp);
}
if (!s) if (!s)
continue; continue;
...@@ -3703,20 +3721,24 @@ void kfree(const void *objp) ...@@ -3703,20 +3721,24 @@ void kfree(const void *objp)
{ {
struct kmem_cache *c; struct kmem_cache *c;
unsigned long flags; unsigned long flags;
struct folio *folio;
trace_kfree(_RET_IP_, objp); trace_kfree(_RET_IP_, objp);
if (unlikely(ZERO_OR_NULL_PTR(objp))) if (unlikely(ZERO_OR_NULL_PTR(objp)))
return; return;
local_irq_save(flags);
kfree_debugcheck(objp); folio = virt_to_folio(objp);
c = virt_to_cache(objp); if (!folio_test_slab(folio)) {
if (!c) { free_large_kmalloc(folio, (void *)objp);
local_irq_restore(flags);
return; return;
} }
debug_check_no_locks_freed(objp, c->object_size);
c = folio_slab(folio)->slab_cache;
local_irq_save(flags);
kfree_debugcheck(objp);
debug_check_no_locks_freed(objp, c->object_size);
debug_check_no_obj_freed(objp, c->object_size); debug_check_no_obj_freed(objp, c->object_size);
__cache_free(c, (void *)objp, _RET_IP_); __cache_free(c, (void *)objp, _RET_IP_);
local_irq_restore(flags); local_irq_restore(flags);
...@@ -4138,15 +4160,17 @@ void __check_heap_object(const void *ptr, unsigned long n, ...@@ -4138,15 +4160,17 @@ void __check_heap_object(const void *ptr, unsigned long n,
size_t __ksize(const void *objp) size_t __ksize(const void *objp)
{ {
struct kmem_cache *c; struct kmem_cache *c;
size_t size; struct folio *folio;
BUG_ON(!objp); BUG_ON(!objp);
if (unlikely(objp == ZERO_SIZE_PTR)) if (unlikely(objp == ZERO_SIZE_PTR))
return 0; return 0;
c = virt_to_cache(objp); folio = virt_to_folio(objp);
size = c ? c->object_size : 0; if (!folio_test_slab(folio))
return folio_size(folio);
return size; c = folio_slab(folio)->slab_cache;
return c->object_size;
} }
EXPORT_SYMBOL(__ksize); EXPORT_SYMBOL(__ksize);
...@@ -660,6 +660,9 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) ...@@ -660,6 +660,9 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
print_tracking(cachep, x); print_tracking(cachep, x);
return cachep; return cachep;
} }
void free_large_kmalloc(struct folio *folio, void *object);
#endif /* CONFIG_SLOB */ #endif /* CONFIG_SLOB */
static inline size_t slab_ksize(const struct kmem_cache *s) static inline size_t slab_ksize(const struct kmem_cache *s)
......
...@@ -744,8 +744,8 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) ...@@ -744,8 +744,8 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
/* /*
* kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time. * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
* kmalloc_index() supports up to 2^25=32MB, so the final entry of the table is * kmalloc_index() supports up to 2^21=2MB, so the final entry of the table is
* kmalloc-32M. * kmalloc-2M.
*/ */
const struct kmalloc_info_struct kmalloc_info[] __initconst = { const struct kmalloc_info_struct kmalloc_info[] __initconst = {
INIT_KMALLOC_INFO(0, 0), INIT_KMALLOC_INFO(0, 0),
...@@ -769,11 +769,7 @@ const struct kmalloc_info_struct kmalloc_info[] __initconst = { ...@@ -769,11 +769,7 @@ const struct kmalloc_info_struct kmalloc_info[] __initconst = {
INIT_KMALLOC_INFO(262144, 256k), INIT_KMALLOC_INFO(262144, 256k),
INIT_KMALLOC_INFO(524288, 512k), INIT_KMALLOC_INFO(524288, 512k),
INIT_KMALLOC_INFO(1048576, 1M), INIT_KMALLOC_INFO(1048576, 1M),
INIT_KMALLOC_INFO(2097152, 2M), INIT_KMALLOC_INFO(2097152, 2M)
INIT_KMALLOC_INFO(4194304, 4M),
INIT_KMALLOC_INFO(8388608, 8M),
INIT_KMALLOC_INFO(16777216, 16M),
INIT_KMALLOC_INFO(33554432, 32M)
}; };
/* /*
...@@ -886,6 +882,21 @@ void __init create_kmalloc_caches(slab_flags_t flags) ...@@ -886,6 +882,21 @@ void __init create_kmalloc_caches(slab_flags_t flags)
/* Kmalloc array is now usable */ /* Kmalloc array is now usable */
slab_state = UP; slab_state = UP;
} }
void free_large_kmalloc(struct folio *folio, void *object)
{
unsigned int order = folio_order(folio);
if (WARN_ON_ONCE(order == 0))
pr_warn_once("object pointer: 0x%p\n", object);
kmemleak_free(object);
kasan_kfree_large(object);
mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
-(PAGE_SIZE << order));
__free_pages(folio_page(folio, 0), order);
}
#endif /* !CONFIG_SLOB */ #endif /* !CONFIG_SLOB */
gfp_t kmalloc_fix_flags(gfp_t flags) gfp_t kmalloc_fix_flags(gfp_t flags)
......
...@@ -1704,12 +1704,6 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab, ...@@ -1704,12 +1704,6 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
* Hooks for other subsystems that check memory allocations. In a typical * Hooks for other subsystems that check memory allocations. In a typical
* production configuration these hooks all should produce no code at all. * production configuration these hooks all should produce no code at all.
*/ */
static __always_inline void kfree_hook(void *x)
{
kmemleak_free(x);
kasan_kfree_large(x);
}
static __always_inline bool slab_free_hook(struct kmem_cache *s, static __always_inline bool slab_free_hook(struct kmem_cache *s,
void *x, bool init) void *x, bool init)
{ {
...@@ -3550,19 +3544,6 @@ struct detached_freelist { ...@@ -3550,19 +3544,6 @@ struct detached_freelist {
struct kmem_cache *s; struct kmem_cache *s;
}; };
static inline void free_large_kmalloc(struct folio *folio, void *object)
{
unsigned int order = folio_order(folio);
if (WARN_ON_ONCE(order == 0))
pr_warn_once("object pointer: 0x%p\n", object);
kfree_hook(object);
mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
-(PAGE_SIZE << order));
__free_pages(folio_page(folio, 0), order);
}
/* /*
* This function progressively scans the array with free objects (with * This function progressively scans the array with free objects (with
* a limited look ahead) and extract objects belonging to the same * a limited look ahead) and extract objects belonging to the same
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment