Commit 15bde4ab authored by Barry Song's avatar Barry Song Committed by Andrew Morton

mm: extend rmap flags arguments for folio_add_new_anon_rmap

Patch series "mm: clarify folio_add_new_anon_rmap() and
__folio_add_anon_rmap()", v2.

This patchset is preparatory work for mTHP swapin.

folio_add_new_anon_rmap() assumes that new anon rmaps are always
exclusive.  However, this assumption doesn’t hold true for cases like
do_swap_page(), where a new anon might be added to the swapcache and is
not necessarily exclusive.

The patchset extends the rmap flags to allow folio_add_new_anon_rmap() to
handle both exclusive and non-exclusive new anon folios.  The
do_swap_page() function is updated to use this extended API with rmap
flags.  Consequently, all new anon folios now consistently use
folio_add_new_anon_rmap().  The special case for !folio_test_anon() in
__folio_add_anon_rmap() can be safely removed.

In conclusion, new anon folios always use folio_add_new_anon_rmap(),
regardless of exclusivity.  Old anon folios continue to use
__folio_add_anon_rmap() via folio_add_anon_rmap_pmd() and
folio_add_anon_rmap_ptes().


This patch (of 3):

In the case of a swap-in, a new anonymous folio is not necessarily
exclusive.  This patch updates the rmap flags to allow a new anonymous
folio to be treated as either exclusive or non-exclusive.  To maintain the
existing behavior, we always use EXCLUSIVE as the default setting.

[akpm@linux-foundation.org: cleanup and constifications per David and akpm]
[v-songbaohua@oppo.com: fix missing doc for flags of folio_add_new_anon_rmap()]
  Link: https://lkml.kernel.org/r/20240619210641.62542-1-21cnbao@gmail.com
[v-songbaohua@oppo.com: enhance doc for extend rmap flags arguments for folio_add_new_anon_rmap]
  Link: https://lkml.kernel.org/r/20240622030256.43775-1-21cnbao@gmail.com
Link: https://lkml.kernel.org/r/20240617231137.80726-1-21cnbao@gmail.com
Link: https://lkml.kernel.org/r/20240617231137.80726-2-21cnbao@gmail.comSigned-off-by: default avatarBarry Song <v-songbaohua@oppo.com>
Suggested-by: default avatarDavid Hildenbrand <david@redhat.com>
Tested-by: default avatarShuai Yuan <yuanshuai@oppo.com>
Acked-by: default avatarDavid Hildenbrand <david@redhat.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 55ccad6f
...@@ -244,7 +244,7 @@ void folio_add_anon_rmap_ptes(struct folio *, struct page *, int nr_pages, ...@@ -244,7 +244,7 @@ void folio_add_anon_rmap_ptes(struct folio *, struct page *, int nr_pages,
void folio_add_anon_rmap_pmd(struct folio *, struct page *, void folio_add_anon_rmap_pmd(struct folio *, struct page *,
struct vm_area_struct *, unsigned long address, rmap_t flags); struct vm_area_struct *, unsigned long address, rmap_t flags);
void folio_add_new_anon_rmap(struct folio *, struct vm_area_struct *, void folio_add_new_anon_rmap(struct folio *, struct vm_area_struct *,
unsigned long address); unsigned long address, rmap_t flags);
void folio_add_file_rmap_ptes(struct folio *, struct page *, int nr_pages, void folio_add_file_rmap_ptes(struct folio *, struct page *, int nr_pages,
struct vm_area_struct *); struct vm_area_struct *);
#define folio_add_file_rmap_pte(folio, page, vma) \ #define folio_add_file_rmap_pte(folio, page, vma) \
......
...@@ -181,7 +181,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, ...@@ -181,7 +181,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
if (new_page) { if (new_page) {
folio_get(new_folio); folio_get(new_folio);
folio_add_new_anon_rmap(new_folio, vma, addr); folio_add_new_anon_rmap(new_folio, vma, addr, RMAP_EXCLUSIVE);
folio_add_lru_vma(new_folio, vma); folio_add_lru_vma(new_folio, vma);
} else } else
/* no new page, just dec_mm_counter for old_page */ /* no new page, just dec_mm_counter for old_page */
......
...@@ -974,7 +974,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf, ...@@ -974,7 +974,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
entry = mk_huge_pmd(page, vma->vm_page_prot); entry = mk_huge_pmd(page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
folio_add_new_anon_rmap(folio, vma, haddr); folio_add_new_anon_rmap(folio, vma, haddr, RMAP_EXCLUSIVE);
folio_add_lru_vma(folio, vma); folio_add_lru_vma(folio, vma);
pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable); pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
......
...@@ -1210,7 +1210,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, ...@@ -1210,7 +1210,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
spin_lock(pmd_ptl); spin_lock(pmd_ptl);
BUG_ON(!pmd_none(*pmd)); BUG_ON(!pmd_none(*pmd));
folio_add_new_anon_rmap(folio, vma, address); folio_add_new_anon_rmap(folio, vma, address, RMAP_EXCLUSIVE);
folio_add_lru_vma(folio, vma); folio_add_lru_vma(folio, vma);
pgtable_trans_huge_deposit(mm, pmd, pgtable); pgtable_trans_huge_deposit(mm, pmd, pgtable);
set_pmd_at(mm, address, pmd, _pmd); set_pmd_at(mm, address, pmd, _pmd);
......
...@@ -930,7 +930,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma ...@@ -930,7 +930,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
*prealloc = NULL; *prealloc = NULL;
copy_user_highpage(&new_folio->page, page, addr, src_vma); copy_user_highpage(&new_folio->page, page, addr, src_vma);
__folio_mark_uptodate(new_folio); __folio_mark_uptodate(new_folio);
folio_add_new_anon_rmap(new_folio, dst_vma, addr); folio_add_new_anon_rmap(new_folio, dst_vma, addr, RMAP_EXCLUSIVE);
folio_add_lru_vma(new_folio, dst_vma); folio_add_lru_vma(new_folio, dst_vma);
rss[MM_ANONPAGES]++; rss[MM_ANONPAGES]++;
...@@ -3402,7 +3402,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) ...@@ -3402,7 +3402,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
* some TLBs while the old PTE remains in others. * some TLBs while the old PTE remains in others.
*/ */
ptep_clear_flush(vma, vmf->address, vmf->pte); ptep_clear_flush(vma, vmf->address, vmf->pte);
folio_add_new_anon_rmap(new_folio, vma, vmf->address); folio_add_new_anon_rmap(new_folio, vma, vmf->address, RMAP_EXCLUSIVE);
folio_add_lru_vma(new_folio, vma); folio_add_lru_vma(new_folio, vma);
BUG_ON(unshare && pte_write(entry)); BUG_ON(unshare && pte_write(entry));
set_pte_at(mm, vmf->address, vmf->pte, entry); set_pte_at(mm, vmf->address, vmf->pte, entry);
...@@ -4339,7 +4339,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) ...@@ -4339,7 +4339,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
/* ksm created a completely new copy */ /* ksm created a completely new copy */
if (unlikely(folio != swapcache && swapcache)) { if (unlikely(folio != swapcache && swapcache)) {
folio_add_new_anon_rmap(folio, vma, address); folio_add_new_anon_rmap(folio, vma, address, RMAP_EXCLUSIVE);
folio_add_lru_vma(folio, vma); folio_add_lru_vma(folio, vma);
} else { } else {
folio_add_anon_rmap_ptes(folio, page, nr_pages, vma, address, folio_add_anon_rmap_ptes(folio, page, nr_pages, vma, address,
...@@ -4594,7 +4594,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) ...@@ -4594,7 +4594,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
count_mthp_stat(folio_order(folio), MTHP_STAT_ANON_FAULT_ALLOC); count_mthp_stat(folio_order(folio), MTHP_STAT_ANON_FAULT_ALLOC);
#endif #endif
folio_add_new_anon_rmap(folio, vma, addr); folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE);
folio_add_lru_vma(folio, vma); folio_add_lru_vma(folio, vma);
setpte: setpte:
if (vmf_orig_pte_uffd_wp(vmf)) if (vmf_orig_pte_uffd_wp(vmf))
...@@ -4792,7 +4792,7 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio, ...@@ -4792,7 +4792,7 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio,
/* copy-on-write page */ /* copy-on-write page */
if (write && !(vma->vm_flags & VM_SHARED)) { if (write && !(vma->vm_flags & VM_SHARED)) {
VM_BUG_ON_FOLIO(nr != 1, folio); VM_BUG_ON_FOLIO(nr != 1, folio);
folio_add_new_anon_rmap(folio, vma, addr); folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE);
folio_add_lru_vma(folio, vma); folio_add_lru_vma(folio, vma);
} else { } else {
folio_add_file_rmap_ptes(folio, page, nr, vma); folio_add_file_rmap_ptes(folio, page, nr, vma);
......
...@@ -658,7 +658,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate, ...@@ -658,7 +658,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
goto unlock_abort; goto unlock_abort;
inc_mm_counter(mm, MM_ANONPAGES); inc_mm_counter(mm, MM_ANONPAGES);
folio_add_new_anon_rmap(folio, vma, addr); folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE);
if (!folio_is_zone_device(folio)) if (!folio_is_zone_device(folio))
folio_add_lru_vma(folio, vma); folio_add_lru_vma(folio, vma);
folio_get(folio); folio_get(folio);
......
...@@ -1401,29 +1401,34 @@ void folio_add_anon_rmap_pmd(struct folio *folio, struct page *page, ...@@ -1401,29 +1401,34 @@ void folio_add_anon_rmap_pmd(struct folio *folio, struct page *page,
* @folio: The folio to add the mapping to. * @folio: The folio to add the mapping to.
* @vma: the vm area in which the mapping is added * @vma: the vm area in which the mapping is added
* @address: the user virtual address mapped * @address: the user virtual address mapped
* @flags: The rmap flags
* *
* Like folio_add_anon_rmap_*() but must only be called on *new* folios. * Like folio_add_anon_rmap_*() but must only be called on *new* folios.
* This means the inc-and-test can be bypassed. * This means the inc-and-test can be bypassed.
* The folio does not have to be locked. * The folio doesn't necessarily need to be locked while it's exclusive
* unless two threads map it concurrently. However, the folio must be
* locked if it's shared.
* *
* If the folio is pmd-mappable, it is accounted as a THP. As the folio * If the folio is pmd-mappable, it is accounted as a THP.
* is new, it's assumed to be mapped exclusively by a single process.
*/ */
void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma, void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
unsigned long address) unsigned long address, rmap_t flags)
{ {
int nr = folio_nr_pages(folio); const int nr = folio_nr_pages(folio);
const bool exclusive = flags & RMAP_EXCLUSIVE;
int nr_pmdmapped = 0; int nr_pmdmapped = 0;
VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
VM_WARN_ON_FOLIO(!exclusive && !folio_test_locked(folio), folio);
VM_BUG_ON_VMA(address < vma->vm_start || VM_BUG_ON_VMA(address < vma->vm_start ||
address + (nr << PAGE_SHIFT) > vma->vm_end, vma); address + (nr << PAGE_SHIFT) > vma->vm_end, vma);
__folio_set_swapbacked(folio); __folio_set_swapbacked(folio);
__folio_set_anon(folio, vma, address, true); __folio_set_anon(folio, vma, address, exclusive);
if (likely(!folio_test_large(folio))) { if (likely(!folio_test_large(folio))) {
/* increment count (starts at -1) */ /* increment count (starts at -1) */
atomic_set(&folio->_mapcount, 0); atomic_set(&folio->_mapcount, 0);
if (exclusive)
SetPageAnonExclusive(&folio->page); SetPageAnonExclusive(&folio->page);
} else if (!folio_test_pmd_mappable(folio)) { } else if (!folio_test_pmd_mappable(folio)) {
int i; int i;
...@@ -1433,6 +1438,7 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma, ...@@ -1433,6 +1438,7 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
/* increment count (starts at -1) */ /* increment count (starts at -1) */
atomic_set(&page->_mapcount, 0); atomic_set(&page->_mapcount, 0);
if (exclusive)
SetPageAnonExclusive(page); SetPageAnonExclusive(page);
} }
...@@ -1445,6 +1451,7 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma, ...@@ -1445,6 +1451,7 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
/* increment count (starts at -1) */ /* increment count (starts at -1) */
atomic_set(&folio->_large_mapcount, 0); atomic_set(&folio->_large_mapcount, 0);
atomic_set(&folio->_nr_pages_mapped, ENTIRELY_MAPPED); atomic_set(&folio->_nr_pages_mapped, ENTIRELY_MAPPED);
if (exclusive)
SetPageAnonExclusive(&folio->page); SetPageAnonExclusive(&folio->page);
nr_pmdmapped = nr; nr_pmdmapped = nr;
} }
......
...@@ -1911,7 +1911,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -1911,7 +1911,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
folio_add_anon_rmap_pte(folio, page, vma, addr, rmap_flags); folio_add_anon_rmap_pte(folio, page, vma, addr, rmap_flags);
} else { /* ksm created a completely new copy */ } else { /* ksm created a completely new copy */
folio_add_new_anon_rmap(folio, vma, addr); folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE);
folio_add_lru_vma(folio, vma); folio_add_lru_vma(folio, vma);
} }
new_pte = pte_mkold(mk_pte(page, vma->vm_page_prot)); new_pte = pte_mkold(mk_pte(page, vma->vm_page_prot));
......
...@@ -216,7 +216,7 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd, ...@@ -216,7 +216,7 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd,
folio_add_lru(folio); folio_add_lru(folio);
folio_add_file_rmap_pte(folio, page, dst_vma); folio_add_file_rmap_pte(folio, page, dst_vma);
} else { } else {
folio_add_new_anon_rmap(folio, dst_vma, dst_addr); folio_add_new_anon_rmap(folio, dst_vma, dst_addr, RMAP_EXCLUSIVE);
folio_add_lru_vma(folio, dst_vma); folio_add_lru_vma(folio, dst_vma);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment