Commit 3b363692 authored by Michal Hocko's avatar Michal Hocko Committed by Linus Torvalds

mm, memcg: sync allocation and memcg charge gfp flags for THP

memcg currently uses hardcoded GFP_TRANSHUGE gfp flags for all THP
charges.  THP allocations, however, might be using different flags
depending on /sys/kernel/mm/transparent_hugepage/{,khugepaged/}defrag and
the current allocation context.

The primary difference is that defrag configured to "madvise" value will
clear __GFP_WAIT flag from the core gfp mask to make the allocation
lighter for all mappings which are not backed by VM_HUGEPAGE vmas.  If
memcg charge path ignores this fact we will get light allocation but the a
potential memcg reclaim would kill the whole point of the configuration.

Fix the mismatch by providing the same gfp mask used for the allocation to
the charge functions.  This is quite easy for all paths except for
hugepaged kernel thread with !CONFIG_NUMA which is doing a pre-allocation
long before the allocated page is used in collapse_huge_page via
khugepaged_alloc_page.  To prevent from cluttering the whole code path
from khugepaged_do_scan we simply return the current flags as per
khugepaged_defrag() value which might have changed since the
preallocation.  If somebody changed the value of the knob we would charge
differently but this shouldn't happen often and it is definitely not
critical because it would only lead to a reduced success rate of one-off
THP promotion.

[akpm@linux-foundation.org: fix weird code layout while we're there]
[rientjes@google.com: clean up around alloc_hugepage_gfpmask()]
Signed-off-by: default avatarMichal Hocko <mhocko@suse.cz>
Acked-by: default avatarVlastimil Babka <vbabka@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: default avatarDavid Rientjes <rientjes@google.com>
Signed-off-by: default avatarDavid Rientjes <rientjes@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent cc5993bd
...@@ -708,7 +708,7 @@ static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot) ...@@ -708,7 +708,7 @@ static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot)
static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
struct vm_area_struct *vma, struct vm_area_struct *vma,
unsigned long haddr, pmd_t *pmd, unsigned long haddr, pmd_t *pmd,
struct page *page) struct page *page, gfp_t gfp)
{ {
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
pgtable_t pgtable; pgtable_t pgtable;
...@@ -716,7 +716,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, ...@@ -716,7 +716,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
VM_BUG_ON_PAGE(!PageCompound(page), page); VM_BUG_ON_PAGE(!PageCompound(page), page);
if (mem_cgroup_try_charge(page, mm, GFP_TRANSHUGE, &memcg)) if (mem_cgroup_try_charge(page, mm, gfp, &memcg))
return VM_FAULT_OOM; return VM_FAULT_OOM;
pgtable = pte_alloc_one(mm, haddr); pgtable = pte_alloc_one(mm, haddr);
...@@ -822,7 +822,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -822,7 +822,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
count_vm_event(THP_FAULT_FALLBACK); count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK; return VM_FAULT_FALLBACK;
} }
if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) { if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page, gfp))) {
put_page(page); put_page(page);
count_vm_event(THP_FAULT_FALLBACK); count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK; return VM_FAULT_FALLBACK;
...@@ -1080,6 +1080,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1080,6 +1080,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long haddr; unsigned long haddr;
unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */
gfp_t huge_gfp; /* for allocation and charge */
ptl = pmd_lockptr(mm, pmd); ptl = pmd_lockptr(mm, pmd);
VM_BUG_ON_VMA(!vma->anon_vma, vma); VM_BUG_ON_VMA(!vma->anon_vma, vma);
...@@ -1106,10 +1107,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1106,10 +1107,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
alloc: alloc:
if (transparent_hugepage_enabled(vma) && if (transparent_hugepage_enabled(vma) &&
!transparent_hugepage_debug_cow()) { !transparent_hugepage_debug_cow()) {
gfp_t gfp; huge_gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
new_page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
} else } else
new_page = NULL; new_page = NULL;
...@@ -1130,8 +1129,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1130,8 +1129,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto out; goto out;
} }
if (unlikely(mem_cgroup_try_charge(new_page, mm, if (unlikely(mem_cgroup_try_charge(new_page, mm, huge_gfp, &memcg))) {
GFP_TRANSHUGE, &memcg))) {
put_page(new_page); put_page(new_page);
if (page) { if (page) {
split_huge_page(page); split_huge_page(page);
...@@ -2323,19 +2321,13 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait) ...@@ -2323,19 +2321,13 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
return true; return true;
} }
static struct page static struct page *
*khugepaged_alloc_page(struct page **hpage, struct mm_struct *mm, khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address, struct vm_area_struct *vma, unsigned long address,
int node) int node)
{ {
gfp_t flags;
VM_BUG_ON_PAGE(*hpage, *hpage); VM_BUG_ON_PAGE(*hpage, *hpage);
/* Only allocate from the target node */
flags = alloc_hugepage_gfpmask(khugepaged_defrag(), __GFP_OTHER_NODE) |
__GFP_THISNODE;
/* /*
* Before allocating the hugepage, release the mmap_sem read lock. * Before allocating the hugepage, release the mmap_sem read lock.
* The allocation can take potentially a long time if it involves * The allocation can take potentially a long time if it involves
...@@ -2344,7 +2336,7 @@ static struct page ...@@ -2344,7 +2336,7 @@ static struct page
*/ */
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
*hpage = alloc_pages_exact_node(node, flags, HPAGE_PMD_ORDER); *hpage = alloc_pages_exact_node(node, gfp, HPAGE_PMD_ORDER);
if (unlikely(!*hpage)) { if (unlikely(!*hpage)) {
count_vm_event(THP_COLLAPSE_ALLOC_FAILED); count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
*hpage = ERR_PTR(-ENOMEM); *hpage = ERR_PTR(-ENOMEM);
...@@ -2397,13 +2389,14 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait) ...@@ -2397,13 +2389,14 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
return true; return true;
} }
static struct page static struct page *
*khugepaged_alloc_page(struct page **hpage, struct mm_struct *mm, khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address, struct vm_area_struct *vma, unsigned long address,
int node) int node)
{ {
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
VM_BUG_ON(!*hpage); VM_BUG_ON(!*hpage);
return *hpage; return *hpage;
} }
#endif #endif
...@@ -2438,16 +2431,21 @@ static void collapse_huge_page(struct mm_struct *mm, ...@@ -2438,16 +2431,21 @@ static void collapse_huge_page(struct mm_struct *mm,
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */
gfp_t gfp;
VM_BUG_ON(address & ~HPAGE_PMD_MASK); VM_BUG_ON(address & ~HPAGE_PMD_MASK);
/* Only allocate from the target node */
gfp = alloc_hugepage_gfpmask(khugepaged_defrag(), __GFP_OTHER_NODE) |
__GFP_THISNODE;
/* release the mmap_sem read lock. */ /* release the mmap_sem read lock. */
new_page = khugepaged_alloc_page(hpage, mm, vma, address, node); new_page = khugepaged_alloc_page(hpage, gfp, mm, vma, address, node);
if (!new_page) if (!new_page)
return; return;
if (unlikely(mem_cgroup_try_charge(new_page, mm, if (unlikely(mem_cgroup_try_charge(new_page, mm,
GFP_TRANSHUGE, &memcg))) gfp, &memcg)))
return; return;
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment