Commit c164e038 authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Linus Torvalds

mm: fix huge zero page accounting in smaps report

As a small zero page, huge zero page should not be accounted in smaps
report as normal page.

For small pages we rely on vm_normal_page() to filter out zero page, but
vm_normal_page() is not designed to handle pmds.  We only get here due
hackish cast pmd to pte in smaps_pte_range() -- pte and pmd format is not
necessary compatible on each and every architecture.

Let's add separate codepath to handle pmds.  follow_trans_huge_pmd() will
detect huge zero page for us.

We would need pmd_dirty() helper to do this properly.  The patch adds it
to THP-enabled architectures which don't yet have one.

[akpm@linux-foundation.org: use do_div to fix 32-bit build]
Signed-off-by: default avatar"Kirill A. Shutemov" <kirill@shutemov.name>
Reported-by: default avatarFengguang Wu <fengguang.wu@intel.com>
Tested-by: default avatarFengwei Yin <yfw.kernel@gmail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 2314b42d
...@@ -279,6 +279,7 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, ...@@ -279,6 +279,7 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ #endif /* CONFIG_HAVE_RCU_TABLE_FREE */
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
#define pmd_young(pmd) pte_young(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd))
#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
#define pmd_mksplitting(pmd) pte_pmd(pte_mkspecial(pmd_pte(pmd))) #define pmd_mksplitting(pmd) pte_pmd(pte_mkspecial(pmd_pte(pmd)))
......
...@@ -467,6 +467,7 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd) ...@@ -467,6 +467,7 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
} }
#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) #define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd))
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
#define pmd_young(pmd) pte_young(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd))
#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
......
...@@ -667,6 +667,13 @@ static inline unsigned long pmd_pfn(pmd_t pmd) ...@@ -667,6 +667,13 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
} }
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline unsigned long pmd_dirty(pmd_t pmd)
{
pte_t pte = __pte(pmd_val(pmd));
return pte_dirty(pte);
}
static inline unsigned long pmd_young(pmd_t pmd) static inline unsigned long pmd_young(pmd_t pmd)
{ {
pte_t pte = __pte(pmd_val(pmd)); pte_t pte = __pte(pmd_val(pmd));
......
...@@ -99,6 +99,11 @@ static inline int pte_young(pte_t pte) ...@@ -99,6 +99,11 @@ static inline int pte_young(pte_t pte)
return pte_flags(pte) & _PAGE_ACCESSED; return pte_flags(pte) & _PAGE_ACCESSED;
} }
static inline int pmd_dirty(pmd_t pmd)
{
return pmd_flags(pmd) & _PAGE_DIRTY;
}
static inline int pmd_young(pmd_t pmd) static inline int pmd_young(pmd_t pmd)
{ {
return pmd_flags(pmd) & _PAGE_ACCESSED; return pmd_flags(pmd) & _PAGE_ACCESSED;
......
...@@ -447,58 +447,91 @@ struct mem_size_stats { ...@@ -447,58 +447,91 @@ struct mem_size_stats {
u64 pss; u64 pss;
}; };
static void smaps_account(struct mem_size_stats *mss, struct page *page,
unsigned long size, bool young, bool dirty)
{
int mapcount;
if (PageAnon(page))
mss->anonymous += size;
static void smaps_pte_entry(pte_t ptent, unsigned long addr, mss->resident += size;
unsigned long ptent_size, struct mm_walk *walk) /* Accumulate the size in pages that have been accessed. */
if (young || PageReferenced(page))
mss->referenced += size;
mapcount = page_mapcount(page);
if (mapcount >= 2) {
u64 pss_delta;
if (dirty || PageDirty(page))
mss->shared_dirty += size;
else
mss->shared_clean += size;
pss_delta = (u64)size << PSS_SHIFT;
do_div(pss_delta, mapcount);
mss->pss += pss_delta;
} else {
if (dirty || PageDirty(page))
mss->private_dirty += size;
else
mss->private_clean += size;
mss->pss += (u64)size << PSS_SHIFT;
}
}
static void smaps_pte_entry(pte_t *pte, unsigned long addr,
struct mm_walk *walk)
{ {
struct mem_size_stats *mss = walk->private; struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = mss->vma; struct vm_area_struct *vma = mss->vma;
pgoff_t pgoff = linear_page_index(vma, addr); pgoff_t pgoff = linear_page_index(vma, addr);
struct page *page = NULL; struct page *page = NULL;
int mapcount;
if (pte_present(ptent)) { if (pte_present(*pte)) {
page = vm_normal_page(vma, addr, ptent); page = vm_normal_page(vma, addr, *pte);
} else if (is_swap_pte(ptent)) { } else if (is_swap_pte(*pte)) {
swp_entry_t swpent = pte_to_swp_entry(ptent); swp_entry_t swpent = pte_to_swp_entry(*pte);
if (!non_swap_entry(swpent)) if (!non_swap_entry(swpent))
mss->swap += ptent_size; mss->swap += PAGE_SIZE;
else if (is_migration_entry(swpent)) else if (is_migration_entry(swpent))
page = migration_entry_to_page(swpent); page = migration_entry_to_page(swpent);
} else if (pte_file(ptent)) { } else if (pte_file(*pte)) {
if (pte_to_pgoff(ptent) != pgoff) if (pte_to_pgoff(*pte) != pgoff)
mss->nonlinear += ptent_size; mss->nonlinear += PAGE_SIZE;
} }
if (!page) if (!page)
return; return;
if (PageAnon(page))
mss->anonymous += ptent_size;
if (page->index != pgoff) if (page->index != pgoff)
mss->nonlinear += ptent_size; mss->nonlinear += PAGE_SIZE;
mss->resident += ptent_size; smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte));
/* Accumulate the size in pages that have been accessed. */ }
if (pte_young(ptent) || PageReferenced(page))
mss->referenced += ptent_size; #ifdef CONFIG_TRANSPARENT_HUGEPAGE
mapcount = page_mapcount(page); static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
if (mapcount >= 2) { struct mm_walk *walk)
if (pte_dirty(ptent) || PageDirty(page)) {
mss->shared_dirty += ptent_size; struct mem_size_stats *mss = walk->private;
else struct vm_area_struct *vma = mss->vma;
mss->shared_clean += ptent_size; struct page *page;
mss->pss += (ptent_size << PSS_SHIFT) / mapcount;
} else { /* FOLL_DUMP will return -EFAULT on huge zero page */
if (pte_dirty(ptent) || PageDirty(page)) page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
mss->private_dirty += ptent_size; if (IS_ERR_OR_NULL(page))
else return;
mss->private_clean += ptent_size; mss->anonymous_thp += HPAGE_PMD_SIZE;
mss->pss += (ptent_size << PSS_SHIFT); smaps_account(mss, page, HPAGE_PMD_SIZE,
} pmd_young(*pmd), pmd_dirty(*pmd));
} }
#else
static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
struct mm_walk *walk)
{
}
#endif
static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
struct mm_walk *walk) struct mm_walk *walk)
...@@ -509,9 +542,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, ...@@ -509,9 +542,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
spinlock_t *ptl; spinlock_t *ptl;
if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk); smaps_pmd_entry(pmd, addr, walk);
spin_unlock(ptl); spin_unlock(ptl);
mss->anonymous_thp += HPAGE_PMD_SIZE;
return 0; return 0;
} }
...@@ -524,7 +556,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, ...@@ -524,7 +556,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
*/ */
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE) for (; addr != end; pte++, addr += PAGE_SIZE)
smaps_pte_entry(*pte, addr, PAGE_SIZE, walk); smaps_pte_entry(pte, addr, walk);
pte_unmap_unlock(pte - 1, ptl); pte_unmap_unlock(pte - 1, ptl);
cond_resched(); cond_resched();
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment