Commit cdd9a571 authored by David Hildenbrand's avatar David Hildenbrand Committed by Andrew Morton

fs/proc: move page_mapcount() to fs/proc/internal.h

...  and rename it to folio_precise_page_mapcount().  fs/proc is the last
remaining user, and that should stay that way.

While at it, cleanup kpagecount_read() a bit: there are still some legacy
leftovers -- when the interface was introduced it returned the page
refcount, but was changed briefly afterwards to return the page mapcount. 
Further, some simple folio conversion.

Once we stop using the per-page mapcounts of large folios, all
folio_precise_page_mapcount() users will have to implement an alternative
way to achieve what they are trying to achieve, possibly in a less precise
way.

[dan.carpenter@linaro.org: fix uninitialized variable in pagemap_pmd_range()]
  Link: https://lkml.kernel.org/r/9d6eaba7-92f8-4a70-8765-38a519680a87@moroto.mountain
Link: https://lkml.kernel.org/r/20240607122357.115423-6-david@redhat.comSigned-off-by: default avatarDavid Hildenbrand <david@redhat.com>
Signed-off-by: default avatarDan Carpenter <dan.carpenter@linaro.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Oscar Salvador <osalvador@suse.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 3689c3eb
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <linux/binfmts.h> #include <linux/binfmts.h>
#include <linux/sched/coredump.h> #include <linux/sched/coredump.h>
#include <linux/sched/task.h> #include <linux/sched/task.h>
#include <linux/mm.h>
struct ctl_table_header; struct ctl_table_header;
struct mempolicy; struct mempolicy;
...@@ -142,6 +143,38 @@ unsigned name_to_int(const struct qstr *qstr); ...@@ -142,6 +143,38 @@ unsigned name_to_int(const struct qstr *qstr);
/* Worst case buffer size needed for holding an integer. */ /* Worst case buffer size needed for holding an integer. */
#define PROC_NUMBUF 13 #define PROC_NUMBUF 13
/**
* folio_precise_page_mapcount() - Number of mappings of this folio page.
* @folio: The folio.
* @page: The page.
*
* The number of present user page table entries that reference this page
* as tracked via the RMAP: either referenced directly (PTE) or as part of
* a larger area that covers this page (e.g., PMD).
*
* Use this function only for the calculation of existing statistics
* (USS, PSS, mapcount_max) and for debugging purposes (/proc/kpagecount).
*
* Do not add new users.
*
* Returns: The number of mappings of this folio page. 0 for
* folios that are not mapped to user space or are not tracked via the RMAP
* (e.g., shared zeropage).
*/
static inline int folio_precise_page_mapcount(struct folio *folio,
struct page *page)
{
int mapcount = atomic_read(&page->_mapcount) + 1;
/* Handle page_has_type() pages */
if (mapcount < PAGE_MAPCOUNT_RESERVE + 1)
mapcount = 0;
if (folio_test_large(folio))
mapcount += folio_entire_mapcount(folio);
return mapcount;
}
/* /*
* array.c * array.c
*/ */
......
...@@ -37,21 +37,19 @@ static inline unsigned long get_max_dump_pfn(void) ...@@ -37,21 +37,19 @@ static inline unsigned long get_max_dump_pfn(void)
#endif #endif
} }
/* /proc/kpagecount - an array exposing page counts /* /proc/kpagecount - an array exposing page mapcounts
* *
* Each entry is a u64 representing the corresponding * Each entry is a u64 representing the corresponding
* physical page count. * physical page mapcount.
*/ */
static ssize_t kpagecount_read(struct file *file, char __user *buf, static ssize_t kpagecount_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos) size_t count, loff_t *ppos)
{ {
const unsigned long max_dump_pfn = get_max_dump_pfn(); const unsigned long max_dump_pfn = get_max_dump_pfn();
u64 __user *out = (u64 __user *)buf; u64 __user *out = (u64 __user *)buf;
struct page *ppage;
unsigned long src = *ppos; unsigned long src = *ppos;
unsigned long pfn; unsigned long pfn;
ssize_t ret = 0; ssize_t ret = 0;
u64 pcount;
pfn = src / KPMSIZE; pfn = src / KPMSIZE;
if (src & KPMMASK || count & KPMMASK) if (src & KPMMASK || count & KPMMASK)
...@@ -61,18 +59,19 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, ...@@ -61,18 +59,19 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
while (count > 0) { while (count > 0) {
struct page *page;
u64 mapcount = 0;
/* /*
* TODO: ZONE_DEVICE support requires to identify * TODO: ZONE_DEVICE support requires to identify
* memmaps that were actually initialized. * memmaps that were actually initialized.
*/ */
ppage = pfn_to_online_page(pfn); page = pfn_to_online_page(pfn);
if (page)
if (!ppage) mapcount = folio_precise_page_mapcount(page_folio(page),
pcount = 0; page);
else
pcount = page_mapcount(ppage);
if (put_user(pcount, out)) { if (put_user(mapcount, out)) {
ret = -EFAULT; ret = -EFAULT;
break; break;
} }
......
...@@ -488,12 +488,12 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, ...@@ -488,12 +488,12 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
return; return;
} }
/* /*
* The page_mapcount() is called to get a snapshot of the mapcount. * We obtain a snapshot of the mapcount. Without holding the folio lock
* Without holding the folio lock this snapshot can be slightly wrong as * this snapshot can be slightly wrong as we cannot always read the
* we cannot always read the mapcount atomically. * mapcount atomically.
*/ */
for (i = 0; i < nr; i++, page++) { for (i = 0; i < nr; i++, page++) {
int mapcount = page_mapcount(page); int mapcount = folio_precise_page_mapcount(folio, page);
unsigned long pss = PAGE_SIZE << PSS_SHIFT; unsigned long pss = PAGE_SIZE << PSS_SHIFT;
if (mapcount >= 2) if (mapcount >= 2)
pss /= mapcount; pss /= mapcount;
...@@ -1427,6 +1427,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, ...@@ -1427,6 +1427,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
{ {
u64 frame = 0, flags = 0; u64 frame = 0, flags = 0;
struct page *page = NULL; struct page *page = NULL;
struct folio *folio;
if (pte_present(pte)) { if (pte_present(pte)) {
if (pm->show_pfn) if (pm->show_pfn)
...@@ -1464,10 +1465,14 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, ...@@ -1464,10 +1465,14 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
flags |= PM_UFFD_WP; flags |= PM_UFFD_WP;
} }
if (page && !PageAnon(page)) if (page) {
folio = page_folio(page);
if (!folio_test_anon(folio))
flags |= PM_FILE; flags |= PM_FILE;
if (page && (flags & PM_PRESENT) && page_mapcount(page) == 1) if ((flags & PM_PRESENT) &&
folio_precise_page_mapcount(folio, page) == 1)
flags |= PM_MMAP_EXCLUSIVE; flags |= PM_MMAP_EXCLUSIVE;
}
if (vma->vm_flags & VM_SOFTDIRTY) if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY; flags |= PM_SOFT_DIRTY;
...@@ -1490,6 +1495,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, ...@@ -1490,6 +1495,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
u64 flags = 0, frame = 0; u64 flags = 0, frame = 0;
pmd_t pmd = *pmdp; pmd_t pmd = *pmdp;
struct page *page = NULL; struct page *page = NULL;
struct folio *folio = NULL;
if (vma->vm_flags & VM_SOFTDIRTY) if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY; flags |= PM_SOFT_DIRTY;
...@@ -1528,15 +1534,18 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, ...@@ -1528,15 +1534,18 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
} }
#endif #endif
if (page && !PageAnon(page)) if (page) {
folio = page_folio(page);
if (!folio_test_anon(folio))
flags |= PM_FILE; flags |= PM_FILE;
}
for (; addr != end; addr += PAGE_SIZE, idx++) { for (; addr != end; addr += PAGE_SIZE, idx++) {
unsigned long cur_flags = flags; unsigned long cur_flags = flags;
pagemap_entry_t pme; pagemap_entry_t pme;
if (page && (flags & PM_PRESENT) && if (folio && (flags & PM_PRESENT) &&
page_mapcount(page + idx) == 1) folio_precise_page_mapcount(folio, page + idx) == 1)
cur_flags |= PM_MMAP_EXCLUSIVE; cur_flags |= PM_MMAP_EXCLUSIVE;
pme = make_pme(frame, cur_flags); pme = make_pme(frame, cur_flags);
...@@ -2575,7 +2584,7 @@ static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty, ...@@ -2575,7 +2584,7 @@ static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty,
unsigned long nr_pages) unsigned long nr_pages)
{ {
struct folio *folio = page_folio(page); struct folio *folio = page_folio(page);
int count = page_mapcount(page); int count = folio_precise_page_mapcount(folio, page);
md->pages += nr_pages; md->pages += nr_pages;
if (pte_dirty || folio_test_dirty(folio)) if (pte_dirty || folio_test_dirty(folio))
......
...@@ -1202,8 +1202,7 @@ static inline int is_vmalloc_or_module_addr(const void *x) ...@@ -1202,8 +1202,7 @@ static inline int is_vmalloc_or_module_addr(const void *x)
/* /*
* How many times the entire folio is mapped as a single unit (eg by a * How many times the entire folio is mapped as a single unit (eg by a
* PMD or PUD entry). This is probably not what you want, except for * PMD or PUD entry). This is probably not what you want, except for
* debugging purposes - it does not include PTE-mapped sub-pages; look * debugging purposes or implementation of other core folio_*() primitives.
* at folio_mapcount() or page_mapcount() instead.
*/ */
static inline int folio_entire_mapcount(const struct folio *folio) static inline int folio_entire_mapcount(const struct folio *folio)
{ {
...@@ -1221,30 +1220,6 @@ static inline void page_mapcount_reset(struct page *page) ...@@ -1221,30 +1220,6 @@ static inline void page_mapcount_reset(struct page *page)
atomic_set(&(page)->_mapcount, -1); atomic_set(&(page)->_mapcount, -1);
} }
/**
* page_mapcount() - Number of times this precise page is mapped.
* @page: The page.
*
* The number of times this page is mapped. If this page is part of
* a large folio, it includes the number of times this page is mapped
* as part of that folio.
*
* Will report 0 for pages which cannot be mapped into userspace, eg
* slab, page tables and similar.
*/
static inline int page_mapcount(struct page *page)
{
int mapcount = atomic_read(&page->_mapcount) + 1;
/* Handle page_has_type() pages */
if (mapcount < PAGE_MAPCOUNT_RESERVE + 1)
mapcount = 0;
if (unlikely(PageCompound(page)))
mapcount += folio_entire_mapcount(page_folio(page));
return mapcount;
}
static inline int folio_large_mapcount(const struct folio *folio) static inline int folio_large_mapcount(const struct folio *folio)
{ {
VM_WARN_ON_FOLIO(!folio_test_large(folio), folio); VM_WARN_ON_FOLIO(!folio_test_large(folio), folio);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment