Commit 121e6f32 authored by Nicholas Piggin's avatar Nicholas Piggin Committed by Linus Torvalds

mm/vmalloc: hugepage vmalloc mappings

Support huge page vmalloc mappings.  Config option HAVE_ARCH_HUGE_VMALLOC
enables support on architectures that define HAVE_ARCH_HUGE_VMAP and
supports PMD sized vmap mappings.

vmalloc will attempt to allocate PMD-sized pages if allocating PMD size or
larger, and fall back to small pages if that was unsuccessful.

Architectures must ensure that any arch specific vmalloc allocations that
require PAGE_SIZE mappings (e.g., module allocations vs strict module rwx)
use the VM_NOHUGE flag to inhibit larger mappings.

This can result in more internal fragmentation and memory overhead for a
given allocation, an option nohugevmalloc is added to disable at boot.

[colin.king@canonical.com: fix read of uninitialized pointer area]
  Link: https://lkml.kernel.org/r/20210318155955.18220-1-colin.king@canonical.com

Link: https://lkml.kernel.org/r/20210317062402.533919-14-npiggin@gmail.comSigned-off-by: default avatarNicholas Piggin <npiggin@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ding Tianhong <dingtianhong@huawei.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 5d87510d
...@@ -829,6 +829,17 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD ...@@ -829,6 +829,17 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
config HAVE_ARCH_HUGE_VMAP config HAVE_ARCH_HUGE_VMAP
bool bool
#
# Archs that select this would be capable of PMD-sized vmaps (i.e.,
# arch_vmap_pmd_supported() returns true), and they must make no assumptions
# that vmalloc memory is mapped with PAGE_SIZE ptes. The VM_NO_HUGE_VMAP flag
# can be used to prohibit arch-specific allocations from using hugepages to
# help with this (e.g., modules may require it).
#
config HAVE_ARCH_HUGE_VMALLOC
depends on HAVE_ARCH_HUGE_VMAP
bool
config ARCH_WANT_HUGE_PMD_SHARE config ARCH_WANT_HUGE_PMD_SHARE
bool bool
......
...@@ -26,6 +26,7 @@ struct notifier_block; /* in notifier.h */ ...@@ -26,6 +26,7 @@ struct notifier_block; /* in notifier.h */
#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */
#define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */ #define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */
#define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */ #define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */
#define VM_NO_HUGE_VMAP 0x00000400 /* force PAGE_SIZE pte mapping */
/* /*
* VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC. * VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC.
...@@ -54,6 +55,9 @@ struct vm_struct { ...@@ -54,6 +55,9 @@ struct vm_struct {
unsigned long size; unsigned long size;
unsigned long flags; unsigned long flags;
struct page **pages; struct page **pages;
#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
unsigned int page_order;
#endif
unsigned int nr_pages; unsigned int nr_pages;
phys_addr_t phys_addr; phys_addr_t phys_addr;
const void *caller; const void *caller;
...@@ -188,6 +192,22 @@ void free_vm_area(struct vm_struct *area); ...@@ -188,6 +192,22 @@ void free_vm_area(struct vm_struct *area);
extern struct vm_struct *remove_vm_area(const void *addr); extern struct vm_struct *remove_vm_area(const void *addr);
extern struct vm_struct *find_vm_area(const void *addr); extern struct vm_struct *find_vm_area(const void *addr);
static inline bool is_vm_area_hugepages(const void *addr)
{
/*
* This may not 100% tell if the area is mapped with > PAGE_SIZE
* page table entries, if for some reason the architecture indicates
* larger sizes are available but decides not to use them, nothing
* prevents that. This only indicates the size of the physical page
* allocated in the vmalloc layer.
*/
#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
return find_vm_area(addr)->page_order > 0;
#else
return false;
#endif
}
#ifdef CONFIG_MMU #ifdef CONFIG_MMU
int vmap_range(unsigned long addr, unsigned long end, int vmap_range(unsigned long addr, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot, phys_addr_t phys_addr, pgprot_t prot,
...@@ -205,6 +225,7 @@ static inline void set_vm_flush_reset_perms(void *addr) ...@@ -205,6 +225,7 @@ static inline void set_vm_flush_reset_perms(void *addr)
if (vm) if (vm)
vm->flags |= VM_FLUSH_RESET_PERMS; vm->flags |= VM_FLUSH_RESET_PERMS;
} }
#else #else
static inline int static inline int
map_kernel_range_noflush(unsigned long start, unsigned long size, map_kernel_range_noflush(unsigned long start, unsigned long size,
......
...@@ -72,6 +72,7 @@ ...@@ -72,6 +72,7 @@
#include <linux/padata.h> #include <linux/padata.h>
#include <linux/khugepaged.h> #include <linux/khugepaged.h>
#include <linux/buffer_head.h> #include <linux/buffer_head.h>
#include <linux/vmalloc.h>
#include <asm/sections.h> #include <asm/sections.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
...@@ -8222,6 +8223,7 @@ void *__init alloc_large_system_hash(const char *tablename, ...@@ -8222,6 +8223,7 @@ void *__init alloc_large_system_hash(const char *tablename,
void *table = NULL; void *table = NULL;
gfp_t gfp_flags; gfp_t gfp_flags;
bool virt; bool virt;
bool huge;
/* allow the kernel cmdline to have a say */ /* allow the kernel cmdline to have a say */
if (!numentries) { if (!numentries) {
...@@ -8289,6 +8291,7 @@ void *__init alloc_large_system_hash(const char *tablename, ...@@ -8289,6 +8291,7 @@ void *__init alloc_large_system_hash(const char *tablename,
} else if (get_order(size) >= MAX_ORDER || hashdist) { } else if (get_order(size) >= MAX_ORDER || hashdist) {
table = __vmalloc(size, gfp_flags); table = __vmalloc(size, gfp_flags);
virt = true; virt = true;
huge = is_vm_area_hugepages(table);
} else { } else {
/* /*
* If bucketsize is not a power-of-two, we may free * If bucketsize is not a power-of-two, we may free
...@@ -8305,7 +8308,7 @@ void *__init alloc_large_system_hash(const char *tablename, ...@@ -8305,7 +8308,7 @@ void *__init alloc_large_system_hash(const char *tablename,
pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n", pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n",
tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size, tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size,
virt ? "vmalloc" : "linear"); virt ? (huge ? "vmalloc hugepage" : "vmalloc") : "linear");
if (_hash_shift) if (_hash_shift)
*_hash_shift = log2qty; *_hash_shift = log2qty;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment