Commit a2f6d9c4 authored by Theodore Ts'o's avatar Theodore Ts'o

Merge branch 'dax-4.10-iomap-pmd' into origin

parents bc33b0ca 9484ab1b
...@@ -55,7 +55,6 @@ config FS_DAX_PMD ...@@ -55,7 +55,6 @@ config FS_DAX_PMD
depends on FS_DAX depends on FS_DAX
depends on ZONE_DEVICE depends on ZONE_DEVICE
depends on TRANSPARENT_HUGEPAGE depends on TRANSPARENT_HUGEPAGE
depends on BROKEN
endif # BLOCK endif # BLOCK
......
This diff is collapsed.
...@@ -38,7 +38,7 @@ static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) ...@@ -38,7 +38,7 @@ static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
return 0; /* skip atime */ return 0; /* skip atime */
inode_lock_shared(inode); inode_lock_shared(inode);
ret = iomap_dax_rw(iocb, to, &ext2_iomap_ops); ret = dax_iomap_rw(iocb, to, &ext2_iomap_ops);
inode_unlock_shared(inode); inode_unlock_shared(inode);
file_accessed(iocb->ki_filp); file_accessed(iocb->ki_filp);
...@@ -62,7 +62,7 @@ static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -62,7 +62,7 @@ static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (ret) if (ret)
goto out_unlock; goto out_unlock;
ret = iomap_dax_rw(iocb, from, &ext2_iomap_ops); ret = dax_iomap_rw(iocb, from, &ext2_iomap_ops);
if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
i_size_write(inode, iocb->ki_pos); i_size_write(inode, iocb->ki_pos);
mark_inode_dirty(inode); mark_inode_dirty(inode);
...@@ -99,7 +99,7 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -99,7 +99,7 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
} }
down_read(&ei->dax_sem); down_read(&ei->dax_sem);
ret = iomap_dax_fault(vma, vmf, &ext2_iomap_ops); ret = dax_iomap_fault(vma, vmf, &ext2_iomap_ops);
up_read(&ei->dax_sem); up_read(&ei->dax_sem);
if (vmf->flags & FAULT_FLAG_WRITE) if (vmf->flags & FAULT_FLAG_WRITE)
...@@ -107,27 +107,6 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -107,27 +107,6 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return ret; return ret;
} }
static int ext2_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd, unsigned int flags)
{
struct inode *inode = file_inode(vma->vm_file);
struct ext2_inode_info *ei = EXT2_I(inode);
int ret;
if (flags & FAULT_FLAG_WRITE) {
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
}
down_read(&ei->dax_sem);
ret = dax_pmd_fault(vma, addr, pmd, flags, ext2_get_block);
up_read(&ei->dax_sem);
if (flags & FAULT_FLAG_WRITE)
sb_end_pagefault(inode->i_sb);
return ret;
}
static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma, static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
struct vm_fault *vmf) struct vm_fault *vmf)
{ {
...@@ -154,7 +133,11 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma, ...@@ -154,7 +133,11 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
static const struct vm_operations_struct ext2_dax_vm_ops = { static const struct vm_operations_struct ext2_dax_vm_ops = {
.fault = ext2_dax_fault, .fault = ext2_dax_fault,
.pmd_fault = ext2_dax_pmd_fault, /*
* .pmd_fault is not supported for DAX because allocation in ext2
* cannot be reliably aligned to huge page sizes and so pmd faults
* will always fail and fail back to regular faults.
*/
.page_mkwrite = ext2_dax_fault, .page_mkwrite = ext2_dax_fault,
.pfn_mkwrite = ext2_dax_pfn_mkwrite, .pfn_mkwrite = ext2_dax_pfn_mkwrite,
}; };
...@@ -166,7 +149,7 @@ static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -166,7 +149,7 @@ static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma)
file_accessed(file); file_accessed(file);
vma->vm_ops = &ext2_dax_vm_ops; vma->vm_ops = &ext2_dax_vm_ops;
vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; vma->vm_flags |= VM_MIXEDMAP;
return 0; return 0;
} }
#else #else
......
...@@ -767,6 +767,9 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, ...@@ -767,6 +767,9 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
ext4_update_bh_state(bh, map.m_flags); ext4_update_bh_state(bh, map.m_flags);
bh->b_size = inode->i_sb->s_blocksize * map.m_len; bh->b_size = inode->i_sb->s_blocksize * map.m_len;
ret = 0; ret = 0;
} else if (ret == 0) {
/* hole case, need to fill in bh->b_size */
bh->b_size = inode->i_sb->s_blocksize * map.m_len;
} }
return ret; return ret;
} }
......
...@@ -467,8 +467,9 @@ int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, ...@@ -467,8 +467,9 @@ int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
offset = page_offset(page); offset = page_offset(page);
while (length > 0) { while (length > 0) {
ret = iomap_apply(inode, offset, length, IOMAP_WRITE, ret = iomap_apply(inode, offset, length,
ops, page, iomap_page_mkwrite_actor); IOMAP_WRITE | IOMAP_FAULT, ops, page,
iomap_page_mkwrite_actor);
if (unlikely(ret <= 0)) if (unlikely(ret <= 0))
goto out_unlock; goto out_unlock;
offset += ret; offset += ret;
......
...@@ -1298,8 +1298,7 @@ __xfs_get_blocks( ...@@ -1298,8 +1298,7 @@ __xfs_get_blocks(
sector_t iblock, sector_t iblock,
struct buffer_head *bh_result, struct buffer_head *bh_result,
int create, int create,
bool direct, bool direct)
bool dax_fault)
{ {
struct xfs_inode *ip = XFS_I(inode); struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
...@@ -1420,13 +1419,8 @@ __xfs_get_blocks( ...@@ -1420,13 +1419,8 @@ __xfs_get_blocks(
if (ISUNWRITTEN(&imap)) if (ISUNWRITTEN(&imap))
set_buffer_unwritten(bh_result); set_buffer_unwritten(bh_result);
/* direct IO needs special help */ /* direct IO needs special help */
if (create) { if (create)
if (dax_fault) xfs_map_direct(inode, bh_result, &imap, offset, is_cow);
ASSERT(!ISUNWRITTEN(&imap));
else
xfs_map_direct(inode, bh_result, &imap, offset,
is_cow);
}
} }
/* /*
...@@ -1466,7 +1460,7 @@ xfs_get_blocks( ...@@ -1466,7 +1460,7 @@ xfs_get_blocks(
struct buffer_head *bh_result, struct buffer_head *bh_result,
int create) int create)
{ {
return __xfs_get_blocks(inode, iblock, bh_result, create, false, false); return __xfs_get_blocks(inode, iblock, bh_result, create, false);
} }
int int
...@@ -1476,17 +1470,7 @@ xfs_get_blocks_direct( ...@@ -1476,17 +1470,7 @@ xfs_get_blocks_direct(
struct buffer_head *bh_result, struct buffer_head *bh_result,
int create) int create)
{ {
return __xfs_get_blocks(inode, iblock, bh_result, create, true, false); return __xfs_get_blocks(inode, iblock, bh_result, create, true);
}
int
xfs_get_blocks_dax_fault(
struct inode *inode,
sector_t iblock,
struct buffer_head *bh_result,
int create)
{
return __xfs_get_blocks(inode, iblock, bh_result, create, true, true);
} }
/* /*
......
...@@ -59,9 +59,6 @@ int xfs_get_blocks(struct inode *inode, sector_t offset, ...@@ -59,9 +59,6 @@ int xfs_get_blocks(struct inode *inode, sector_t offset,
struct buffer_head *map_bh, int create); struct buffer_head *map_bh, int create);
int xfs_get_blocks_direct(struct inode *inode, sector_t offset, int xfs_get_blocks_direct(struct inode *inode, sector_t offset,
struct buffer_head *map_bh, int create); struct buffer_head *map_bh, int create);
int xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
struct buffer_head *map_bh, int create);
int xfs_end_io_direct_write(struct kiocb *iocb, loff_t offset, int xfs_end_io_direct_write(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private); ssize_t size, void *private);
int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size); int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
......
...@@ -318,7 +318,7 @@ xfs_file_dax_read( ...@@ -318,7 +318,7 @@ xfs_file_dax_read(
return 0; /* skip atime */ return 0; /* skip atime */
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
ret = iomap_dax_rw(iocb, to, &xfs_iomap_ops); ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
file_accessed(iocb->ki_filp); file_accessed(iocb->ki_filp);
...@@ -653,7 +653,7 @@ xfs_file_dax_write( ...@@ -653,7 +653,7 @@ xfs_file_dax_write(
trace_xfs_file_dax_write(ip, count, pos); trace_xfs_file_dax_write(ip, count, pos);
ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops); ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops);
if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
i_size_write(inode, iocb->ki_pos); i_size_write(inode, iocb->ki_pos);
error = xfs_setfilesize(ip, pos, ret); error = xfs_setfilesize(ip, pos, ret);
...@@ -1474,7 +1474,7 @@ xfs_filemap_page_mkwrite( ...@@ -1474,7 +1474,7 @@ xfs_filemap_page_mkwrite(
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (IS_DAX(inode)) { if (IS_DAX(inode)) {
ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops); ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
} else { } else {
ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops); ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
ret = block_page_mkwrite_return(ret); ret = block_page_mkwrite_return(ret);
...@@ -1508,7 +1508,7 @@ xfs_filemap_fault( ...@@ -1508,7 +1508,7 @@ xfs_filemap_fault(
* changes to xfs_get_blocks_direct() to map unwritten extent * changes to xfs_get_blocks_direct() to map unwritten extent
* ioend for conversion on read-only mappings. * ioend for conversion on read-only mappings.
*/ */
ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops); ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
} else } else
ret = filemap_fault(vma, vmf); ret = filemap_fault(vma, vmf);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
...@@ -1545,7 +1545,7 @@ xfs_filemap_pmd_fault( ...@@ -1545,7 +1545,7 @@ xfs_filemap_pmd_fault(
} }
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
ret = dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault); ret = dax_iomap_pmd_fault(vma, addr, pmd, flags, &xfs_iomap_ops);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (flags & FAULT_FLAG_WRITE) if (flags & FAULT_FLAG_WRITE)
......
...@@ -8,21 +8,46 @@ ...@@ -8,21 +8,46 @@
struct iomap_ops; struct iomap_ops;
/* We use lowest available exceptional entry bit for locking */ /*
* We use lowest available bit in exceptional entry for locking, one bit for
* the entry size (PMD) and two more to tell us if the entry is a huge zero
* page (HZP) or an empty entry that is just used for locking. In total four
* special bits.
*
* If the PMD bit isn't set the entry has size PAGE_SIZE, and if the HZP and
* EMPTY bits aren't set the entry is a normal DAX entry with a filesystem
* block allocation.
*/
#define RADIX_DAX_SHIFT (RADIX_TREE_EXCEPTIONAL_SHIFT + 4)
#define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT) #define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT)
#define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1))
#define RADIX_DAX_HZP (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
#define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3))
ssize_t iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter, static inline unsigned long dax_radix_sector(void *entry)
{
return (unsigned long)entry >> RADIX_DAX_SHIFT;
}
static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags)
{
return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags |
((unsigned long)sector << RADIX_DAX_SHIFT) |
RADIX_DAX_ENTRY_LOCK);
}
ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
struct iomap_ops *ops); struct iomap_ops *ops);
ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *,
get_block_t, dio_iodone_t, int flags); get_block_t, dio_iodone_t, int flags);
int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
int dax_truncate_page(struct inode *, loff_t from, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t);
int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
struct iomap_ops *ops); struct iomap_ops *ops);
int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
void dax_wake_mapping_entry_waiter(struct address_space *mapping, void dax_wake_mapping_entry_waiter(struct address_space *mapping,
pgoff_t index, bool wake_all); pgoff_t index, void *entry, bool wake_all);
#ifdef CONFIG_FS_DAX #ifdef CONFIG_FS_DAX
struct page *read_dax_sector(struct block_device *bdev, sector_t n); struct page *read_dax_sector(struct block_device *bdev, sector_t n);
...@@ -48,15 +73,32 @@ static inline int __dax_zero_page_range(struct block_device *bdev, ...@@ -48,15 +73,32 @@ static inline int __dax_zero_page_range(struct block_device *bdev,
} }
#endif #endif
#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
unsigned int flags, get_block_t);
#else
static inline int dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, static inline int dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd, unsigned int flags, get_block_t gb) pmd_t *pmd, unsigned int flags, get_block_t gb)
{ {
return VM_FAULT_FALLBACK; return VM_FAULT_FALLBACK;
} }
#ifdef CONFIG_FS_DAX_PMD
static inline unsigned int dax_radix_order(void *entry)
{
if ((unsigned long)entry & RADIX_DAX_PMD)
return PMD_SHIFT - PAGE_SHIFT;
return 0;
}
int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmd, unsigned int flags, struct iomap_ops *ops);
#else
static inline unsigned int dax_radix_order(void *entry)
{
return 0;
}
static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmd, unsigned int flags,
struct iomap_ops *ops)
{
return VM_FAULT_FALLBACK;
}
#endif #endif
int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *); int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
#define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb) #define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb)
......
...@@ -49,6 +49,7 @@ struct iomap { ...@@ -49,6 +49,7 @@ struct iomap {
#define IOMAP_WRITE (1 << 0) /* writing, must allocate blocks */ #define IOMAP_WRITE (1 << 0) /* writing, must allocate blocks */
#define IOMAP_ZERO (1 << 1) /* zeroing operation, may skip holes */ #define IOMAP_ZERO (1 << 1) /* zeroing operation, may skip holes */
#define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */ #define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */
#define IOMAP_FAULT (1 << 3) /* mapping for page fault */
struct iomap_ops { struct iomap_ops {
/* /*
......
...@@ -137,13 +137,12 @@ static int page_cache_tree_insert(struct address_space *mapping, ...@@ -137,13 +137,12 @@ static int page_cache_tree_insert(struct address_space *mapping,
} else { } else {
/* DAX can replace empty locked entry with a hole */ /* DAX can replace empty locked entry with a hole */
WARN_ON_ONCE(p != WARN_ON_ONCE(p !=
(void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | dax_radix_locked_entry(0, RADIX_DAX_EMPTY));
RADIX_DAX_ENTRY_LOCK));
/* DAX accounts exceptional entries as normal pages */ /* DAX accounts exceptional entries as normal pages */
if (node) if (node)
workingset_node_pages_dec(node); workingset_node_pages_dec(node);
/* Wakeup waiters for exceptional entry lock */ /* Wakeup waiters for exceptional entry lock */
dax_wake_mapping_entry_waiter(mapping, page->index, dax_wake_mapping_entry_waiter(mapping, page->index, p,
false); false);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment