Commit cfc93c6c authored by Matthew Wilcox's avatar Matthew Wilcox

dax: Convert dax_insert_pfn_mkwrite to XArray

Add some XArray-based helper functions to replace the radix tree based
metaphors currently in use.  The biggest change is that converted code
doesn't see its own lock bit; get_unlocked_entry() always returns an
entry with the lock bit clear.  So we don't have to mess around loading
the current entry and clearing the lock bit; we can just store the
unlocked entry that we already have.
Signed-off-by: default avatarMatthew Wilcox <willy@infradead.org>
parent ec4907ff
...@@ -38,6 +38,17 @@ ...@@ -38,6 +38,17 @@
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/fs_dax.h> #include <trace/events/fs_dax.h>
static inline unsigned int pe_order(enum page_entry_size pe_size)
{
if (pe_size == PE_SIZE_PTE)
return PAGE_SHIFT - PAGE_SHIFT;
if (pe_size == PE_SIZE_PMD)
return PMD_SHIFT - PAGE_SHIFT;
if (pe_size == PE_SIZE_PUD)
return PUD_SHIFT - PAGE_SHIFT;
return ~0;
}
/* We choose 4096 entries - same as per-zone page wait tables */ /* We choose 4096 entries - same as per-zone page wait tables */
#define DAX_WAIT_TABLE_BITS 12 #define DAX_WAIT_TABLE_BITS 12
#define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS) #define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
...@@ -46,6 +57,9 @@ ...@@ -46,6 +57,9 @@
#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1) #define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
#define PG_PMD_NR (PMD_SIZE >> PAGE_SHIFT) #define PG_PMD_NR (PMD_SIZE >> PAGE_SHIFT)
/* The order of a PMD entry */
#define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT)
static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES]; static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES];
static int __init init_dax_wait_table(void) static int __init init_dax_wait_table(void)
...@@ -85,10 +99,15 @@ static void *dax_make_locked(unsigned long pfn, unsigned long flags) ...@@ -85,10 +99,15 @@ static void *dax_make_locked(unsigned long pfn, unsigned long flags)
DAX_LOCKED); DAX_LOCKED);
} }
static bool dax_is_locked(void *entry)
{
return xa_to_value(entry) & DAX_LOCKED;
}
static unsigned int dax_entry_order(void *entry) static unsigned int dax_entry_order(void *entry)
{ {
if (xa_to_value(entry) & DAX_PMD) if (xa_to_value(entry) & DAX_PMD)
return PMD_SHIFT - PAGE_SHIFT; return PMD_ORDER;
return 0; return 0;
} }
...@@ -181,6 +200,81 @@ static void dax_wake_mapping_entry_waiter(struct xarray *xa, ...@@ -181,6 +200,81 @@ static void dax_wake_mapping_entry_waiter(struct xarray *xa,
__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key); __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
} }
static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
{
return dax_wake_mapping_entry_waiter(xas->xa, xas->xa_index, entry,
wake_all);
}
/*
* Look up entry in page cache, wait for it to become unlocked if it
* is a DAX entry and return it. The caller must subsequently call
* put_unlocked_entry() if it did not lock the entry or dax_unlock_entry()
* if it did.
*
* Must be called with the i_pages lock held.
*/
static void *get_unlocked_entry(struct xa_state *xas)
{
void *entry;
struct wait_exceptional_entry_queue ewait;
wait_queue_head_t *wq;
init_wait(&ewait.wait);
ewait.wait.func = wake_exceptional_entry_func;
for (;;) {
entry = xas_load(xas);
if (!entry || xa_is_internal(entry) ||
WARN_ON_ONCE(!xa_is_value(entry)) ||
!dax_is_locked(entry))
return entry;
wq = dax_entry_waitqueue(xas->xa, xas->xa_index, entry,
&ewait.key);
prepare_to_wait_exclusive(wq, &ewait.wait,
TASK_UNINTERRUPTIBLE);
xas_unlock_irq(xas);
xas_reset(xas);
schedule();
finish_wait(wq, &ewait.wait);
xas_lock_irq(xas);
}
}
static void put_unlocked_entry(struct xa_state *xas, void *entry)
{
/* If we were the only waiter woken, wake the next one */
if (entry)
dax_wake_entry(xas, entry, false);
}
/*
* We used the xa_state to get the entry, but then we locked the entry and
* dropped the xa_lock, so we know the xa_state is stale and must be reset
* before use.
*/
static void dax_unlock_entry(struct xa_state *xas, void *entry)
{
void *old;
xas_reset(xas);
xas_lock_irq(xas);
old = xas_store(xas, entry);
xas_unlock_irq(xas);
BUG_ON(!dax_is_locked(old));
dax_wake_entry(xas, entry, false);
}
/*
* Return: The entry stored at this location before it was locked.
*/
static void *dax_lock_entry(struct xa_state *xas, void *entry)
{
unsigned long v = xa_to_value(entry);
return xas_store(xas, xa_mk_value(v | DAX_LOCKED));
}
/* /*
* Check whether the given slot is locked. Must be called with the i_pages * Check whether the given slot is locked. Must be called with the i_pages
* lock held. * lock held.
...@@ -1728,50 +1822,46 @@ EXPORT_SYMBOL_GPL(dax_iomap_fault); ...@@ -1728,50 +1822,46 @@ EXPORT_SYMBOL_GPL(dax_iomap_fault);
/* /*
* dax_insert_pfn_mkwrite - insert PTE or PMD entry into page tables * dax_insert_pfn_mkwrite - insert PTE or PMD entry into page tables
* @vmf: The description of the fault * @vmf: The description of the fault
* @pe_size: Size of entry to be inserted
* @pfn: PFN to insert * @pfn: PFN to insert
* @order: Order of entry to insert.
* *
* This function inserts a writeable PTE or PMD entry into the page tables * This function inserts a writeable PTE or PMD entry into the page tables
* for an mmaped DAX file. It also marks the page cache entry as dirty. * for an mmaped DAX file. It also marks the page cache entry as dirty.
*/ */
static vm_fault_t dax_insert_pfn_mkwrite(struct vm_fault *vmf, static vm_fault_t
enum page_entry_size pe_size, dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
pfn_t pfn)
{ {
struct address_space *mapping = vmf->vma->vm_file->f_mapping; struct address_space *mapping = vmf->vma->vm_file->f_mapping;
void *entry, **slot; XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, order);
pgoff_t index = vmf->pgoff; void *entry;
vm_fault_t ret; vm_fault_t ret;
xa_lock_irq(&mapping->i_pages); xas_lock_irq(&xas);
entry = get_unlocked_mapping_entry(mapping, index, &slot); entry = get_unlocked_entry(&xas);
/* Did we race with someone splitting entry or so? */ /* Did we race with someone splitting entry or so? */
if (!entry || if (!entry ||
(pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) || (order == 0 && !dax_is_pte_entry(entry)) ||
(pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) { (order == PMD_ORDER && (xa_is_internal(entry) ||
put_unlocked_mapping_entry(mapping, index, entry); !dax_is_pmd_entry(entry)))) {
xa_unlock_irq(&mapping->i_pages); put_unlocked_entry(&xas, entry);
xas_unlock_irq(&xas);
trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
VM_FAULT_NOPAGE); VM_FAULT_NOPAGE);
return VM_FAULT_NOPAGE; return VM_FAULT_NOPAGE;
} }
radix_tree_tag_set(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY); xas_set_mark(&xas, PAGECACHE_TAG_DIRTY);
entry = lock_slot(mapping, slot); dax_lock_entry(&xas, entry);
xa_unlock_irq(&mapping->i_pages); xas_unlock_irq(&xas);
switch (pe_size) { if (order == 0)
case PE_SIZE_PTE:
ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn); ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
break;
#ifdef CONFIG_FS_DAX_PMD #ifdef CONFIG_FS_DAX_PMD
case PE_SIZE_PMD: else if (order == PMD_ORDER)
ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
pfn, true); pfn, true);
break;
#endif #endif
default: else
ret = VM_FAULT_FALLBACK; ret = VM_FAULT_FALLBACK;
} dax_unlock_entry(&xas, entry);
put_locked_mapping_entry(mapping, index);
trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret); trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret);
return ret; return ret;
} }
...@@ -1791,17 +1881,12 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, ...@@ -1791,17 +1881,12 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
{ {
int err; int err;
loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT; loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT;
size_t len = 0; unsigned int order = pe_order(pe_size);
size_t len = PAGE_SIZE << order;
if (pe_size == PE_SIZE_PTE)
len = PAGE_SIZE;
else if (pe_size == PE_SIZE_PMD)
len = PMD_SIZE;
else
WARN_ON_ONCE(1);
err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1); err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1);
if (err) if (err)
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
return dax_insert_pfn_mkwrite(vmf, pe_size, pfn); return dax_insert_pfn_mkwrite(vmf, pfn, order);
} }
EXPORT_SYMBOL_GPL(dax_finish_sync_fault); EXPORT_SYMBOL_GPL(dax_finish_sync_fault);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment