Commit 121e6f32 authored by Nicholas Piggin's avatar Nicholas Piggin Committed by Linus Torvalds

mm/vmalloc: hugepage vmalloc mappings

Support huge page vmalloc mappings.  Config option HAVE_ARCH_HUGE_VMALLOC
enables support on architectures that define HAVE_ARCH_HUGE_VMAP and
supports PMD sized vmap mappings.

vmalloc will attempt to allocate PMD-sized pages if allocating PMD size or
larger, and fall back to small pages if that was unsuccessful.

Architectures must ensure that any arch specific vmalloc allocations that
require PAGE_SIZE mappings (e.g., module allocations vs strict module rwx)
use the VM_NOHUGE flag to inhibit larger mappings.

This can result in more internal fragmentation and memory overhead for a
given allocation, an option nohugevmalloc is added to disable at boot.

[colin.king@canonical.com: fix read of uninitialized pointer area]
  Link: https://lkml.kernel.org/r/20210318155955.18220-1-colin.king@canonical.com

Link: https://lkml.kernel.org/r/20210317062402.533919-14-npiggin@gmail.comSigned-off-by: default avatarNicholas Piggin <npiggin@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ding Tianhong <dingtianhong@huawei.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 5d87510d
......@@ -829,6 +829,17 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
config HAVE_ARCH_HUGE_VMAP
bool
#
# Archs that select this would be capable of PMD-sized vmaps (i.e.,
# arch_vmap_pmd_supported() returns true), and they must make no assumptions
# that vmalloc memory is mapped with PAGE_SIZE ptes. The VM_NO_HUGE_VMAP flag
# can be used to prohibit arch-specific allocations from using hugepages to
# help with this (e.g., modules may require it).
#
config HAVE_ARCH_HUGE_VMALLOC
depends on HAVE_ARCH_HUGE_VMAP
bool
config ARCH_WANT_HUGE_PMD_SHARE
bool
......
......@@ -26,6 +26,7 @@ struct notifier_block; /* in notifier.h */
#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */
#define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */
#define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */
#define VM_NO_HUGE_VMAP 0x00000400 /* force PAGE_SIZE pte mapping */
/*
* VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC.
......@@ -54,6 +55,9 @@ struct vm_struct {
unsigned long size;
unsigned long flags;
struct page **pages;
#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
unsigned int page_order;
#endif
unsigned int nr_pages;
phys_addr_t phys_addr;
const void *caller;
......@@ -188,6 +192,22 @@ void free_vm_area(struct vm_struct *area);
extern struct vm_struct *remove_vm_area(const void *addr);
extern struct vm_struct *find_vm_area(const void *addr);
static inline bool is_vm_area_hugepages(const void *addr)
{
/*
* This may not 100% tell if the area is mapped with > PAGE_SIZE
* page table entries, if for some reason the architecture indicates
* larger sizes are available but decides not to use them, nothing
* prevents that. This only indicates the size of the physical page
* allocated in the vmalloc layer.
*/
#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
return find_vm_area(addr)->page_order > 0;
#else
return false;
#endif
}
#ifdef CONFIG_MMU
int vmap_range(unsigned long addr, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot,
......@@ -205,6 +225,7 @@ static inline void set_vm_flush_reset_perms(void *addr)
if (vm)
vm->flags |= VM_FLUSH_RESET_PERMS;
}
#else
static inline int
map_kernel_range_noflush(unsigned long start, unsigned long size,
......
......@@ -72,6 +72,7 @@
#include <linux/padata.h>
#include <linux/khugepaged.h>
#include <linux/buffer_head.h>
#include <linux/vmalloc.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
......@@ -8222,6 +8223,7 @@ void *__init alloc_large_system_hash(const char *tablename,
void *table = NULL;
gfp_t gfp_flags;
bool virt;
bool huge;
/* allow the kernel cmdline to have a say */
if (!numentries) {
......@@ -8289,6 +8291,7 @@ void *__init alloc_large_system_hash(const char *tablename,
} else if (get_order(size) >= MAX_ORDER || hashdist) {
table = __vmalloc(size, gfp_flags);
virt = true;
huge = is_vm_area_hugepages(table);
} else {
/*
* If bucketsize is not a power-of-two, we may free
......@@ -8305,7 +8308,7 @@ void *__init alloc_large_system_hash(const char *tablename,
pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n",
tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size,
virt ? "vmalloc" : "linear");
virt ? (huge ? "vmalloc hugepage" : "vmalloc") : "linear");
if (_hash_shift)
*_hash_shift = log2qty;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment