Commit a664b2d8 authored by Andrea Arcangeli's avatar Andrea Arcangeli Committed by Linus Torvalds

thp: madvise(MADV_NOHUGEPAGE)

Add madvise MADV_NOHUGEPAGE to mark regions that are not important to be
hugepage backed.  Return -EINVAL if the vma is not of an anonymous type,
or the feature isn't built into the kernel.  Never silently return
success.
Signed-off-by: default avatarAndrea Arcangeli <aarcange@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 1ddd6db4
...@@ -52,10 +52,12 @@ extern pmd_t *page_check_address_pmd(struct page *page, ...@@ -52,10 +52,12 @@ extern pmd_t *page_check_address_pmd(struct page *page,
#define HPAGE_PMD_SIZE HPAGE_SIZE #define HPAGE_PMD_SIZE HPAGE_SIZE
#define transparent_hugepage_enabled(__vma) \ #define transparent_hugepage_enabled(__vma) \
(transparent_hugepage_flags & (1<<TRANSPARENT_HUGEPAGE_FLAG) || \ ((transparent_hugepage_flags & \
(transparent_hugepage_flags & \ (1<<TRANSPARENT_HUGEPAGE_FLAG) || \
(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG) && \ (transparent_hugepage_flags & \
(__vma)->vm_flags & VM_HUGEPAGE)) (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG) && \
((__vma)->vm_flags & VM_HUGEPAGE))) && \
!((__vma)->vm_flags & VM_NOHUGEPAGE))
#define transparent_hugepage_defrag(__vma) \ #define transparent_hugepage_defrag(__vma) \
((transparent_hugepage_flags & \ ((transparent_hugepage_flags & \
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)) || \ (1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)) || \
...@@ -103,7 +105,7 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd); ...@@ -103,7 +105,7 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
#if HPAGE_PMD_ORDER > MAX_ORDER #if HPAGE_PMD_ORDER > MAX_ORDER
#error "hugepages can't be allocated by the buddy allocator" #error "hugepages can't be allocated by the buddy allocator"
#endif #endif
extern int hugepage_madvise(unsigned long *vm_flags); extern int hugepage_madvise(unsigned long *vm_flags, int advice);
extern void __vma_adjust_trans_huge(struct vm_area_struct *vma, extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start, unsigned long start,
unsigned long end, unsigned long end,
...@@ -141,7 +143,7 @@ static inline int split_huge_page(struct page *page) ...@@ -141,7 +143,7 @@ static inline int split_huge_page(struct page *page)
do { } while (0) do { } while (0)
#define wait_split_huge_page(__anon_vma, __pmd) \ #define wait_split_huge_page(__anon_vma, __pmd) \
do { } while (0) do { } while (0)
static inline int hugepage_madvise(unsigned long *vm_flags) static inline int hugepage_madvise(unsigned long *vm_flags, int advice)
{ {
BUG(); BUG();
return 0; return 0;
......
...@@ -38,9 +38,10 @@ static inline void khugepaged_exit(struct mm_struct *mm) ...@@ -38,9 +38,10 @@ static inline void khugepaged_exit(struct mm_struct *mm)
static inline int khugepaged_enter(struct vm_area_struct *vma) static inline int khugepaged_enter(struct vm_area_struct *vma)
{ {
if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags)) if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
if (khugepaged_always() || if ((khugepaged_always() ||
(khugepaged_req_madv() && (khugepaged_req_madv() &&
vma->vm_flags & VM_HUGEPAGE)) vma->vm_flags & VM_HUGEPAGE)) &&
!(vma->vm_flags & VM_NOHUGEPAGE))
if (__khugepaged_enter(vma->vm_mm)) if (__khugepaged_enter(vma->vm_mm))
return -ENOMEM; return -ENOMEM;
return 0; return 0;
......
...@@ -83,6 +83,7 @@ extern unsigned int kobjsize(const void *objp); ...@@ -83,6 +83,7 @@ extern unsigned int kobjsize(const void *objp);
#define VM_GROWSUP 0x00000200 #define VM_GROWSUP 0x00000200
#else #else
#define VM_GROWSUP 0x00000000 #define VM_GROWSUP 0x00000000
#define VM_NOHUGEPAGE 0x00000200 /* MADV_NOHUGEPAGE marked this vma */
#endif #endif
#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ #define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/khugepaged.h> #include <linux/khugepaged.h>
#include <linux/freezer.h> #include <linux/freezer.h>
#include <linux/mman.h>
#include <asm/tlb.h> #include <asm/tlb.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include "internal.h" #include "internal.h"
...@@ -1388,18 +1389,36 @@ int split_huge_page(struct page *page) ...@@ -1388,18 +1389,36 @@ int split_huge_page(struct page *page)
return ret; return ret;
} }
int hugepage_madvise(unsigned long *vm_flags) int hugepage_madvise(unsigned long *vm_flags, int advice)
{ {
/* switch (advice) {
* Be somewhat over-protective like KSM for now! case MADV_HUGEPAGE:
*/ /*
if (*vm_flags & (VM_HUGEPAGE | VM_SHARED | VM_MAYSHARE | * Be somewhat over-protective like KSM for now!
VM_PFNMAP | VM_IO | VM_DONTEXPAND | */
VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE | if (*vm_flags & (VM_HUGEPAGE |
VM_MIXEDMAP | VM_SAO)) VM_SHARED | VM_MAYSHARE |
return -EINVAL; VM_PFNMAP | VM_IO | VM_DONTEXPAND |
VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
*vm_flags |= VM_HUGEPAGE; VM_MIXEDMAP | VM_SAO))
return -EINVAL;
*vm_flags &= ~VM_NOHUGEPAGE;
*vm_flags |= VM_HUGEPAGE;
break;
case MADV_NOHUGEPAGE:
/*
* Be somewhat over-protective like KSM for now!
*/
if (*vm_flags & (VM_NOHUGEPAGE |
VM_SHARED | VM_MAYSHARE |
VM_PFNMAP | VM_IO | VM_DONTEXPAND |
VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
VM_MIXEDMAP | VM_SAO))
return -EINVAL;
*vm_flags &= ~VM_HUGEPAGE;
*vm_flags |= VM_NOHUGEPAGE;
break;
}
return 0; return 0;
} }
......
...@@ -72,7 +72,8 @@ static long madvise_behavior(struct vm_area_struct * vma, ...@@ -72,7 +72,8 @@ static long madvise_behavior(struct vm_area_struct * vma,
goto out; goto out;
break; break;
case MADV_HUGEPAGE: case MADV_HUGEPAGE:
error = hugepage_madvise(&new_flags); case MADV_NOHUGEPAGE:
error = hugepage_madvise(&new_flags, behavior);
if (error) if (error)
goto out; goto out;
break; break;
...@@ -290,6 +291,7 @@ madvise_behavior_valid(int behavior) ...@@ -290,6 +291,7 @@ madvise_behavior_valid(int behavior)
#endif #endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
case MADV_HUGEPAGE: case MADV_HUGEPAGE:
case MADV_NOHUGEPAGE:
#endif #endif
return 1; return 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment