Commit e5d6df73 authored by Shiyang Ruan's avatar Shiyang Ruan Committed by akpm

fsdax: replace mmap entry in case of CoW

Replace the existing entry to the newly allocated one in case of CoW. 
Also, we mark the entry as PAGECACHE_TAG_TOWRITE so writeback marks this
entry as writeprotected.  This helps us snapshots so new write pagefaults
after snapshots trigger a CoW.

Link: https://lkml.kernel.org/r/20220603053738.1218681-11-ruansy.fnst@fujitsu.comSigned-off-by: default avatarGoldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: default avatarShiyang Ruan <ruansy.fnst@fujitsu.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarRitesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: default avatarDarrick J. Wong <djwong@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dan Williams <dan.j.wiliams@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Goldwyn Rodrigues <rgoldwyn@suse.de>
Cc: Jane Chu <jane.chu@oracle.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent ff17b8df
...@@ -829,6 +829,23 @@ static int copy_cow_page_dax(struct vm_fault *vmf, const struct iomap_iter *iter ...@@ -829,6 +829,23 @@ static int copy_cow_page_dax(struct vm_fault *vmf, const struct iomap_iter *iter
return 0; return 0;
} }
/*
* MAP_SYNC on a dax mapping guarantees dirty metadata is
* flushed on write-faults (non-cow), but not read-faults.
*/
static bool dax_fault_is_synchronous(const struct iomap_iter *iter,
struct vm_area_struct *vma)
{
return (iter->flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC) &&
(iter->iomap.flags & IOMAP_F_DIRTY);
}
static bool dax_fault_is_cow(const struct iomap_iter *iter)
{
return (iter->flags & IOMAP_WRITE) &&
(iter->iomap.flags & IOMAP_F_SHARED);
}
/* /*
* By this point grab_mapping_entry() has ensured that we have a locked entry * By this point grab_mapping_entry() has ensured that we have a locked entry
* of the appropriate size so we don't have to worry about downgrading PMDs to * of the appropriate size so we don't have to worry about downgrading PMDs to
...@@ -836,16 +853,19 @@ static int copy_cow_page_dax(struct vm_fault *vmf, const struct iomap_iter *iter ...@@ -836,16 +853,19 @@ static int copy_cow_page_dax(struct vm_fault *vmf, const struct iomap_iter *iter
* already in the tree, we will skip the insertion and just dirty the PMD as * already in the tree, we will skip the insertion and just dirty the PMD as
* appropriate. * appropriate.
*/ */
static void *dax_insert_entry(struct xa_state *xas, static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf,
struct address_space *mapping, struct vm_fault *vmf, const struct iomap_iter *iter, void *entry, pfn_t pfn,
void *entry, pfn_t pfn, unsigned long flags, bool dirty) unsigned long flags)
{ {
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
void *new_entry = dax_make_entry(pfn, flags); void *new_entry = dax_make_entry(pfn, flags);
bool dirty = !dax_fault_is_synchronous(iter, vmf->vma);
bool cow = dax_fault_is_cow(iter);
if (dirty) if (dirty)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES); __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) { if (cow || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) {
unsigned long index = xas->xa_index; unsigned long index = xas->xa_index;
/* we are replacing a zero page with block mapping */ /* we are replacing a zero page with block mapping */
if (dax_is_pmd_entry(entry)) if (dax_is_pmd_entry(entry))
...@@ -857,12 +877,12 @@ static void *dax_insert_entry(struct xa_state *xas, ...@@ -857,12 +877,12 @@ static void *dax_insert_entry(struct xa_state *xas,
xas_reset(xas); xas_reset(xas);
xas_lock_irq(xas); xas_lock_irq(xas);
if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { if (cow || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
void *old; void *old;
dax_disassociate_entry(entry, mapping, false); dax_disassociate_entry(entry, mapping, false);
dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address, dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address,
false); cow);
/* /*
* Only swap our new entry into the page cache if the current * Only swap our new entry into the page cache if the current
* entry is a zero page or an empty entry. If a normal PTE or * entry is a zero page or an empty entry. If a normal PTE or
...@@ -882,6 +902,9 @@ static void *dax_insert_entry(struct xa_state *xas, ...@@ -882,6 +902,9 @@ static void *dax_insert_entry(struct xa_state *xas,
if (dirty) if (dirty)
xas_set_mark(xas, PAGECACHE_TAG_DIRTY); xas_set_mark(xas, PAGECACHE_TAG_DIRTY);
if (cow)
xas_set_mark(xas, PAGECACHE_TAG_TOWRITE);
xas_unlock_irq(xas); xas_unlock_irq(xas);
return entry; return entry;
} }
...@@ -1123,17 +1146,15 @@ static int dax_iomap_cow_copy(loff_t pos, uint64_t length, size_t align_size, ...@@ -1123,17 +1146,15 @@ static int dax_iomap_cow_copy(loff_t pos, uint64_t length, size_t align_size,
* If this page is ever written to we will re-fault and change the mapping to * If this page is ever written to we will re-fault and change the mapping to
* point to real DAX storage instead. * point to real DAX storage instead.
*/ */
static vm_fault_t dax_load_hole(struct xa_state *xas, static vm_fault_t dax_load_hole(struct xa_state *xas, struct vm_fault *vmf,
struct address_space *mapping, void **entry, const struct iomap_iter *iter, void **entry)
struct vm_fault *vmf)
{ {
struct inode *inode = mapping->host; struct inode *inode = iter->inode;
unsigned long vaddr = vmf->address; unsigned long vaddr = vmf->address;
pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr)); pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr));
vm_fault_t ret; vm_fault_t ret;
*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, DAX_ZERO_PAGE);
DAX_ZERO_PAGE, false);
ret = vmf_insert_mixed(vmf->vma, vaddr, pfn); ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
trace_dax_load_hole(inode, vmf, ret); trace_dax_load_hole(inode, vmf, ret);
...@@ -1142,7 +1163,7 @@ static vm_fault_t dax_load_hole(struct xa_state *xas, ...@@ -1142,7 +1163,7 @@ static vm_fault_t dax_load_hole(struct xa_state *xas,
#ifdef CONFIG_FS_DAX_PMD #ifdef CONFIG_FS_DAX_PMD
static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
const struct iomap *iomap, void **entry) const struct iomap_iter *iter, void **entry)
{ {
struct address_space *mapping = vmf->vma->vm_file->f_mapping; struct address_space *mapping = vmf->vma->vm_file->f_mapping;
unsigned long pmd_addr = vmf->address & PMD_MASK; unsigned long pmd_addr = vmf->address & PMD_MASK;
...@@ -1160,8 +1181,8 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, ...@@ -1160,8 +1181,8 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
goto fallback; goto fallback;
pfn = page_to_pfn_t(zero_page); pfn = page_to_pfn_t(zero_page);
*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn,
DAX_PMD | DAX_ZERO_PAGE, false); DAX_PMD | DAX_ZERO_PAGE);
if (arch_needs_pgtable_deposit()) { if (arch_needs_pgtable_deposit()) {
pgtable = pte_alloc_one(vma->vm_mm); pgtable = pte_alloc_one(vma->vm_mm);
...@@ -1194,7 +1215,7 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, ...@@ -1194,7 +1215,7 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
} }
#else #else
static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
const struct iomap *iomap, void **entry) const struct iomap_iter *iter, void **entry)
{ {
return VM_FAULT_FALLBACK; return VM_FAULT_FALLBACK;
} }
...@@ -1439,17 +1460,6 @@ static vm_fault_t dax_fault_return(int error) ...@@ -1439,17 +1460,6 @@ static vm_fault_t dax_fault_return(int error)
return vmf_error(error); return vmf_error(error);
} }
/*
* MAP_SYNC on a dax mapping guarantees dirty metadata is
* flushed on write-faults (non-cow), but not read-faults.
*/
static bool dax_fault_is_synchronous(unsigned long flags,
struct vm_area_struct *vma, const struct iomap *iomap)
{
return (flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC)
&& (iomap->flags & IOMAP_F_DIRTY);
}
/* /*
* When handling a synchronous page fault and the inode need a fsync, we can * When handling a synchronous page fault and the inode need a fsync, we can
* insert the PTE/PMD into page tables only after that fsync happened. Skip * insert the PTE/PMD into page tables only after that fsync happened. Skip
...@@ -1507,13 +1517,11 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, ...@@ -1507,13 +1517,11 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
const struct iomap_iter *iter, pfn_t *pfnp, const struct iomap_iter *iter, pfn_t *pfnp,
struct xa_state *xas, void **entry, bool pmd) struct xa_state *xas, void **entry, bool pmd)
{ {
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
const struct iomap *iomap = &iter->iomap; const struct iomap *iomap = &iter->iomap;
const struct iomap *srcmap = &iter->srcmap; const struct iomap *srcmap = &iter->srcmap;
size_t size = pmd ? PMD_SIZE : PAGE_SIZE; size_t size = pmd ? PMD_SIZE : PAGE_SIZE;
loff_t pos = (loff_t)xas->xa_index << PAGE_SHIFT; loff_t pos = (loff_t)xas->xa_index << PAGE_SHIFT;
bool write = vmf->flags & FAULT_FLAG_WRITE; bool write = iter->flags & IOMAP_WRITE;
bool sync = dax_fault_is_synchronous(iter->flags, vmf->vma, iomap);
unsigned long entry_flags = pmd ? DAX_PMD : 0; unsigned long entry_flags = pmd ? DAX_PMD : 0;
int err = 0; int err = 0;
pfn_t pfn; pfn_t pfn;
...@@ -1526,8 +1534,8 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, ...@@ -1526,8 +1534,8 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
if (!write && if (!write &&
(iomap->type == IOMAP_UNWRITTEN || iomap->type == IOMAP_HOLE)) { (iomap->type == IOMAP_UNWRITTEN || iomap->type == IOMAP_HOLE)) {
if (!pmd) if (!pmd)
return dax_load_hole(xas, mapping, entry, vmf); return dax_load_hole(xas, vmf, iter, entry);
return dax_pmd_load_hole(xas, vmf, iomap, entry); return dax_pmd_load_hole(xas, vmf, iter, entry);
} }
if (iomap->type != IOMAP_MAPPED && !(iomap->flags & IOMAP_F_SHARED)) { if (iomap->type != IOMAP_MAPPED && !(iomap->flags & IOMAP_F_SHARED)) {
...@@ -1539,8 +1547,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, ...@@ -1539,8 +1547,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
if (err) if (err)
return pmd ? VM_FAULT_FALLBACK : dax_fault_return(err); return pmd ? VM_FAULT_FALLBACK : dax_fault_return(err);
*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, entry_flags, *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, entry_flags);
write && !sync);
if (write && if (write &&
srcmap->type != IOMAP_HOLE && srcmap->addr != iomap->addr) { srcmap->type != IOMAP_HOLE && srcmap->addr != iomap->addr) {
...@@ -1549,7 +1556,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, ...@@ -1549,7 +1556,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
return dax_fault_return(err); return dax_fault_return(err);
} }
if (sync) if (dax_fault_is_synchronous(iter, vmf->vma))
return dax_fault_synchronous_pfnp(pfnp, pfn); return dax_fault_synchronous_pfnp(pfnp, pfn);
/* insert PMD pfn */ /* insert PMD pfn */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment