Commit 8d0d47ea authored by Theodore Ts'o's avatar Theodore Ts'o

Merge branch 'mb/dio' into master

parents a6d40408 378f32ba
...@@ -1090,7 +1090,7 @@ EXPORT_SYMBOL_GPL(__dax_zero_page_range); ...@@ -1090,7 +1090,7 @@ EXPORT_SYMBOL_GPL(__dax_zero_page_range);
static loff_t static loff_t
dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
struct iomap *iomap) struct iomap *iomap, struct iomap *srcmap)
{ {
struct block_device *bdev = iomap->bdev; struct block_device *bdev = iomap->bdev;
struct dax_device *dax_dev = iomap->dax_dev; struct dax_device *dax_dev = iomap->dax_dev;
...@@ -1247,7 +1247,8 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, ...@@ -1247,7 +1247,8 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
unsigned long vaddr = vmf->address; unsigned long vaddr = vmf->address;
loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT; loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
struct iomap iomap = { 0 }; struct iomap iomap = { .type = IOMAP_HOLE };
struct iomap srcmap = { .type = IOMAP_HOLE };
unsigned flags = IOMAP_FAULT; unsigned flags = IOMAP_FAULT;
int error, major = 0; int error, major = 0;
bool write = vmf->flags & FAULT_FLAG_WRITE; bool write = vmf->flags & FAULT_FLAG_WRITE;
...@@ -1292,7 +1293,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, ...@@ -1292,7 +1293,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
* the file system block size to be equal the page size, which means * the file system block size to be equal the page size, which means
* that we never have to deal with more than a single extent here. * that we never have to deal with more than a single extent here.
*/ */
error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap); error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap, &srcmap);
if (iomap_errp) if (iomap_errp)
*iomap_errp = error; *iomap_errp = error;
if (error) { if (error) {
...@@ -1471,7 +1472,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, ...@@ -1471,7 +1472,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT; unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
vm_fault_t result = VM_FAULT_FALLBACK; vm_fault_t result = VM_FAULT_FALLBACK;
struct iomap iomap = { 0 }; struct iomap iomap = { .type = IOMAP_HOLE };
struct iomap srcmap = { .type = IOMAP_HOLE };
pgoff_t max_pgoff; pgoff_t max_pgoff;
void *entry; void *entry;
loff_t pos; loff_t pos;
...@@ -1546,7 +1548,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, ...@@ -1546,7 +1548,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
* to look up our filesystem block. * to look up our filesystem block.
*/ */
pos = (loff_t)xas.xa_index << PAGE_SHIFT; pos = (loff_t)xas.xa_index << PAGE_SHIFT;
error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap); error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap,
&srcmap);
if (error) if (error)
goto unlock_entry; goto unlock_entry;
......
...@@ -801,7 +801,7 @@ int ext2_get_block(struct inode *inode, sector_t iblock, ...@@ -801,7 +801,7 @@ int ext2_get_block(struct inode *inode, sector_t iblock,
#ifdef CONFIG_FS_DAX #ifdef CONFIG_FS_DAX
static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned flags, struct iomap *iomap) unsigned flags, struct iomap *iomap, struct iomap *srcmap)
{ {
unsigned int blkbits = inode->i_blkbits; unsigned int blkbits = inode->i_blkbits;
unsigned long first_block = offset >> blkbits; unsigned long first_block = offset >> blkbits;
......
...@@ -1584,7 +1584,6 @@ enum { ...@@ -1584,7 +1584,6 @@ enum {
EXT4_STATE_NO_EXPAND, /* No space for expansion */ EXT4_STATE_NO_EXPAND, /* No space for expansion */
EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */ EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */
EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */
EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
EXT4_STATE_NEWENTRY, /* File just added to dir */ EXT4_STATE_NEWENTRY, /* File just added to dir */
EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
EXT4_STATE_EXT_PRECACHED, /* extents have been precached */ EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
...@@ -2565,8 +2564,6 @@ int ext4_get_block_unwritten(struct inode *inode, sector_t iblock, ...@@ -2565,8 +2564,6 @@ int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create); struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock, int ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create); struct buffer_head *bh_result, int create);
int ext4_dio_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create); struct buffer_head *bh, int create);
int ext4_walk_page_buffers(handle_t *handle, int ext4_walk_page_buffers(handle_t *handle,
...@@ -3391,6 +3388,7 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) ...@@ -3391,6 +3388,7 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
} }
extern const struct iomap_ops ext4_iomap_ops; extern const struct iomap_ops ext4_iomap_ops;
extern const struct iomap_ops ext4_iomap_report_ops;
static inline int ext4_buffer_uptodate(struct buffer_head *bh) static inline int ext4_buffer_uptodate(struct buffer_head *bh)
{ {
......
...@@ -1765,16 +1765,9 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, ...@@ -1765,16 +1765,9 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
*/ */
if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
return 0; return 0;
/*
* The check for IO to unwritten extent is somewhat racy as we
* increment i_unwritten / set EXT4_STATE_DIO_UNWRITTEN only after
* dropping i_data_sem. But reserved blocks should save us in that
* case.
*/
if (ext4_ext_is_unwritten(ex1) && if (ext4_ext_is_unwritten(ex1) &&
(ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) || ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)
atomic_read(&EXT4_I(inode)->i_unwritten) ||
(ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)))
return 0; return 0;
#ifdef AGGRESSIVE_TEST #ifdef AGGRESSIVE_TEST
if (ext1_ee_len >= 4) if (ext1_ee_len >= 4)
......
This diff is collapsed.
...@@ -80,6 +80,43 @@ static int ext4_sync_parent(struct inode *inode) ...@@ -80,6 +80,43 @@ static int ext4_sync_parent(struct inode *inode)
return ret; return ret;
} }
static int ext4_fsync_nojournal(struct inode *inode, bool datasync,
bool *needs_barrier)
{
int ret, err;
ret = sync_mapping_buffers(inode->i_mapping);
if (!(inode->i_state & I_DIRTY_ALL))
return ret;
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
return ret;
err = sync_inode_metadata(inode, 1);
if (!ret)
ret = err;
if (!ret)
ret = ext4_sync_parent(inode);
if (test_opt(inode->i_sb, BARRIER))
*needs_barrier = true;
return ret;
}
static int ext4_fsync_journal(struct inode *inode, bool datasync,
bool *needs_barrier)
{
struct ext4_inode_info *ei = EXT4_I(inode);
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
tid_t commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
if (journal->j_flags & JBD2_BARRIER &&
!jbd2_trans_will_send_data_barrier(journal, commit_tid))
*needs_barrier = true;
return jbd2_complete_transaction(journal, commit_tid);
}
/* /*
* akpm: A new design for ext4_sync_file(). * akpm: A new design for ext4_sync_file().
* *
...@@ -91,17 +128,14 @@ static int ext4_sync_parent(struct inode *inode) ...@@ -91,17 +128,14 @@ static int ext4_sync_parent(struct inode *inode)
* What we do is just kick off a commit and wait on it. This will snapshot the * What we do is just kick off a commit and wait on it. This will snapshot the
* inode to disk. * inode to disk.
*/ */
int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{ {
struct inode *inode = file->f_mapping->host;
struct ext4_inode_info *ei = EXT4_I(inode);
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
int ret = 0, err; int ret = 0, err;
tid_t commit_tid;
bool needs_barrier = false; bool needs_barrier = false;
struct inode *inode = file->f_mapping->host;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) if (unlikely(ext4_forced_shutdown(sbi)))
return -EIO; return -EIO;
J_ASSERT(ext4_journal_current_handle() == NULL); J_ASSERT(ext4_journal_current_handle() == NULL);
...@@ -111,23 +145,15 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) ...@@ -111,23 +145,15 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (sb_rdonly(inode->i_sb)) { if (sb_rdonly(inode->i_sb)) {
/* Make sure that we read updated s_mount_flags value */ /* Make sure that we read updated s_mount_flags value */
smp_rmb(); smp_rmb();
if (EXT4_SB(inode->i_sb)->s_mount_flags & EXT4_MF_FS_ABORTED) if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
ret = -EROFS; ret = -EROFS;
goto out; goto out;
} }
if (!journal) {
ret = __generic_file_fsync(file, start, end, datasync);
if (!ret)
ret = ext4_sync_parent(inode);
if (test_opt(inode->i_sb, BARRIER))
goto issue_flush;
goto out;
}
ret = file_write_and_wait_range(file, start, end); ret = file_write_and_wait_range(file, start, end);
if (ret) if (ret)
return ret; return ret;
/* /*
* data=writeback,ordered: * data=writeback,ordered:
* The caller's filemap_fdatawrite()/wait will sync the data. * The caller's filemap_fdatawrite()/wait will sync the data.
...@@ -142,18 +168,14 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) ...@@ -142,18 +168,14 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* (they were dirtied by commit). But that's OK - the blocks are * (they were dirtied by commit). But that's OK - the blocks are
* safe in-journal, which is all fsync() needs to ensure. * safe in-journal, which is all fsync() needs to ensure.
*/ */
if (ext4_should_journal_data(inode)) { if (!sbi->s_journal)
ret = ext4_fsync_nojournal(inode, datasync, &needs_barrier);
else if (ext4_should_journal_data(inode))
ret = ext4_force_commit(inode->i_sb); ret = ext4_force_commit(inode->i_sb);
goto out; else
} ret = ext4_fsync_journal(inode, datasync, &needs_barrier);
commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
if (journal->j_flags & JBD2_BARRIER &&
!jbd2_trans_will_send_data_barrier(journal, commit_tid))
needs_barrier = true;
ret = jbd2_complete_transaction(journal, commit_tid);
if (needs_barrier) { if (needs_barrier) {
issue_flush:
err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
if (!ret) if (!ret)
ret = err; ret = err;
......
This diff is collapsed.
...@@ -1149,7 +1149,8 @@ static inline bool gfs2_iomap_need_write_lock(unsigned flags) ...@@ -1149,7 +1149,8 @@ static inline bool gfs2_iomap_need_write_lock(unsigned flags)
} }
static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
unsigned flags, struct iomap *iomap) unsigned flags, struct iomap *iomap,
struct iomap *srcmap)
{ {
struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_inode *ip = GFS2_I(inode);
struct metapath mp = { .mp_aheight = 1, }; struct metapath mp = { .mp_aheight = 1, };
......
...@@ -732,7 +732,8 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to) ...@@ -732,7 +732,8 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to)
if (ret) if (ret)
goto out_uninit; goto out_uninit;
ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL); ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL,
is_sync_kiocb(iocb));
gfs2_glock_dq(&gh); gfs2_glock_dq(&gh);
out_uninit: out_uninit:
...@@ -767,7 +768,8 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -767,7 +768,8 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
if (offset + len > i_size_read(&ip->i_inode)) if (offset + len > i_size_read(&ip->i_inode))
goto out; goto out;
ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL); ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL,
is_sync_kiocb(iocb));
out: out:
gfs2_glock_dq(&gh); gfs2_glock_dq(&gh);
......
...@@ -3,13 +3,15 @@ ...@@ -3,13 +3,15 @@
# Copyright (c) 2019 Oracle. # Copyright (c) 2019 Oracle.
# All Rights Reserved. # All Rights Reserved.
# #
ccflags-y += -I $(srctree)/$(src) # needed for trace events
obj-$(CONFIG_FS_IOMAP) += iomap.o obj-$(CONFIG_FS_IOMAP) += iomap.o
iomap-y += \ iomap-y += trace.o \
apply.o \ apply.o \
buffered-io.o \ buffered-io.o \
direct-io.o \ direct-io.o \
fiemap.o \ fiemap.o \
seek.o seek.o
iomap-$(CONFIG_SWAP) += swapfile.o iomap-$(CONFIG_SWAP) += swapfile.o
...@@ -23,8 +23,10 @@ loff_t ...@@ -23,8 +23,10 @@ loff_t
iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags, iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
const struct iomap_ops *ops, void *data, iomap_actor_t actor) const struct iomap_ops *ops, void *data, iomap_actor_t actor)
{ {
struct iomap iomap = { 0 }; struct iomap iomap = { .type = IOMAP_HOLE };
struct iomap srcmap = { .type = IOMAP_HOLE };
loff_t written = 0, ret; loff_t written = 0, ret;
u64 end;
/* /*
* Need to map a range from start position for length bytes. This can * Need to map a range from start position for length bytes. This can
...@@ -38,7 +40,7 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags, ...@@ -38,7 +40,7 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
* expose transient stale data. If the reserve fails, we can safely * expose transient stale data. If the reserve fails, we can safely
* back out at this point as there is nothing to undo. * back out at this point as there is nothing to undo.
*/ */
ret = ops->iomap_begin(inode, pos, length, flags, &iomap); ret = ops->iomap_begin(inode, pos, length, flags, &iomap, &srcmap);
if (ret) if (ret)
return ret; return ret;
if (WARN_ON(iomap.offset > pos)) if (WARN_ON(iomap.offset > pos))
...@@ -50,15 +52,26 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags, ...@@ -50,15 +52,26 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
* Cut down the length to the one actually provided by the filesystem, * Cut down the length to the one actually provided by the filesystem,
* as it might not be able to give us the whole size that we requested. * as it might not be able to give us the whole size that we requested.
*/ */
if (iomap.offset + iomap.length < pos + length) end = iomap.offset + iomap.length;
length = iomap.offset + iomap.length - pos; if (srcmap.type != IOMAP_HOLE)
end = min(end, srcmap.offset + srcmap.length);
if (pos + length > end)
length = end - pos;
/* /*
* Now that we have guaranteed that the space allocation will succeed. * Now that we have guaranteed that the space allocation will succeed,
* we can do the copy-in page by page without having to worry about * we can do the copy-in page by page without having to worry about
* failures exposing transient data. * failures exposing transient data.
*
* To support COW operations, we read in data for partially blocks from
* the srcmap if the file system filled it in. In that case we the
* length needs to be limited to the earlier of the ends of the iomaps.
* If the file system did not provide a srcmap we pass in the normal
* iomap into the actors so that they don't need to have special
* handling for the two cases.
*/ */
written = actor(inode, pos, length, data, &iomap); written = actor(inode, pos, length, data, &iomap,
srcmap.type != IOMAP_HOLE ? &srcmap : &iomap);
/* /*
* Now the data has been copied, commit the range we've copied. This * Now the data has been copied, commit the range we've copied. This
......
This diff is collapsed.
...@@ -358,7 +358,7 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length, ...@@ -358,7 +358,7 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
static loff_t static loff_t
iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
void *data, struct iomap *iomap) void *data, struct iomap *iomap, struct iomap *srcmap)
{ {
struct iomap_dio *dio = data; struct iomap_dio *dio = data;
...@@ -392,7 +392,8 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, ...@@ -392,7 +392,8 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
*/ */
ssize_t ssize_t
iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops) const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
bool wait_for_completion)
{ {
struct address_space *mapping = iocb->ki_filp->f_mapping; struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = file_inode(iocb->ki_filp); struct inode *inode = file_inode(iocb->ki_filp);
...@@ -400,7 +401,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -400,7 +401,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
loff_t pos = iocb->ki_pos, start = pos; loff_t pos = iocb->ki_pos, start = pos;
loff_t end = iocb->ki_pos + count - 1, ret = 0; loff_t end = iocb->ki_pos + count - 1, ret = 0;
unsigned int flags = IOMAP_DIRECT; unsigned int flags = IOMAP_DIRECT;
bool wait_for_completion = is_sync_kiocb(iocb);
struct blk_plug plug; struct blk_plug plug;
struct iomap_dio *dio; struct iomap_dio *dio;
...@@ -409,6 +409,9 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -409,6 +409,9 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
if (!count) if (!count)
return 0; return 0;
if (WARN_ON(is_sync_kiocb(iocb) && !wait_for_completion))
return -EIO;
dio = kmalloc(sizeof(*dio), GFP_KERNEL); dio = kmalloc(sizeof(*dio), GFP_KERNEL);
if (!dio) if (!dio)
return -ENOMEM; return -ENOMEM;
...@@ -430,7 +433,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -430,7 +433,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
if (pos >= dio->i_size) if (pos >= dio->i_size)
goto out_free_dio; goto out_free_dio;
if (iter_is_iovec(iter) && iov_iter_rw(iter) == READ) if (iter_is_iovec(iter))
dio->flags |= IOMAP_DIO_DIRTY; dio->flags |= IOMAP_DIO_DIRTY;
} else { } else {
flags |= IOMAP_WRITE; flags |= IOMAP_WRITE;
......
...@@ -44,7 +44,7 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi, ...@@ -44,7 +44,7 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi,
static loff_t static loff_t
iomap_fiemap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, iomap_fiemap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
struct iomap *iomap) struct iomap *iomap, struct iomap *srcmap)
{ {
struct fiemap_ctx *ctx = data; struct fiemap_ctx *ctx = data;
loff_t ret = length; loff_t ret = length;
...@@ -111,7 +111,7 @@ EXPORT_SYMBOL_GPL(iomap_fiemap); ...@@ -111,7 +111,7 @@ EXPORT_SYMBOL_GPL(iomap_fiemap);
static loff_t static loff_t
iomap_bmap_actor(struct inode *inode, loff_t pos, loff_t length, iomap_bmap_actor(struct inode *inode, loff_t pos, loff_t length,
void *data, struct iomap *iomap) void *data, struct iomap *iomap, struct iomap *srcmap)
{ {
sector_t *bno = data, addr; sector_t *bno = data, addr;
......
...@@ -119,7 +119,7 @@ page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length, ...@@ -119,7 +119,7 @@ page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
static loff_t static loff_t
iomap_seek_hole_actor(struct inode *inode, loff_t offset, loff_t length, iomap_seek_hole_actor(struct inode *inode, loff_t offset, loff_t length,
void *data, struct iomap *iomap) void *data, struct iomap *iomap, struct iomap *srcmap)
{ {
switch (iomap->type) { switch (iomap->type) {
case IOMAP_UNWRITTEN: case IOMAP_UNWRITTEN:
...@@ -165,7 +165,7 @@ EXPORT_SYMBOL_GPL(iomap_seek_hole); ...@@ -165,7 +165,7 @@ EXPORT_SYMBOL_GPL(iomap_seek_hole);
static loff_t static loff_t
iomap_seek_data_actor(struct inode *inode, loff_t offset, loff_t length, iomap_seek_data_actor(struct inode *inode, loff_t offset, loff_t length,
void *data, struct iomap *iomap) void *data, struct iomap *iomap, struct iomap *srcmap)
{ {
switch (iomap->type) { switch (iomap->type) {
case IOMAP_HOLE: case IOMAP_HOLE:
......
...@@ -76,7 +76,8 @@ static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi) ...@@ -76,7 +76,8 @@ static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi)
* distinction between written and unwritten extents. * distinction between written and unwritten extents.
*/ */
static loff_t iomap_swapfile_activate_actor(struct inode *inode, loff_t pos, static loff_t iomap_swapfile_activate_actor(struct inode *inode, loff_t pos,
loff_t count, void *data, struct iomap *iomap) loff_t count, void *data, struct iomap *iomap,
struct iomap *srcmap)
{ {
struct iomap_swapfile_info *isi = data; struct iomap_swapfile_info *isi = data;
int error; int error;
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2019 Christoph Hellwig
*/
#include <linux/iomap.h>
/*
* We include this last to have the helpers above available for the trace
* event implementations.
*/
#define CREATE_TRACE_POINTS
#include "trace.h"
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2009-2019 Christoph Hellwig
*
* NOTE: none of these tracepoints shall be consider a stable kernel ABI
* as they can change at any time.
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM iomap
#if !defined(_IOMAP_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
#define _IOMAP_TRACE_H
#include <linux/tracepoint.h>
struct inode;
DECLARE_EVENT_CLASS(iomap_readpage_class,
TP_PROTO(struct inode *inode, int nr_pages),
TP_ARGS(inode, nr_pages),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(u64, ino)
__field(int, nr_pages)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->nr_pages = nr_pages;
),
TP_printk("dev %d:%d ino 0x%llx nr_pages %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->nr_pages)
)
#define DEFINE_READPAGE_EVENT(name) \
DEFINE_EVENT(iomap_readpage_class, name, \
TP_PROTO(struct inode *inode, int nr_pages), \
TP_ARGS(inode, nr_pages))
DEFINE_READPAGE_EVENT(iomap_readpage);
DEFINE_READPAGE_EVENT(iomap_readpages);
DECLARE_EVENT_CLASS(iomap_page_class,
TP_PROTO(struct inode *inode, struct page *page, unsigned long off,
unsigned int len),
TP_ARGS(inode, page, off, len),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(u64, ino)
__field(pgoff_t, pgoff)
__field(loff_t, size)
__field(unsigned long, offset)
__field(unsigned int, length)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pgoff = page_offset(page);
__entry->size = i_size_read(inode);
__entry->offset = off;
__entry->length = len;
),
TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
"length %x",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->pgoff,
__entry->size,
__entry->offset,
__entry->length)
)
#define DEFINE_PAGE_EVENT(name) \
DEFINE_EVENT(iomap_page_class, name, \
TP_PROTO(struct inode *inode, struct page *page, unsigned long off, \
unsigned int len), \
TP_ARGS(inode, page, off, len))
DEFINE_PAGE_EVENT(iomap_writepage);
DEFINE_PAGE_EVENT(iomap_releasepage);
DEFINE_PAGE_EVENT(iomap_invalidatepage);
#endif /* _IOMAP_TRACE_H */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE trace
#include <trace/define_trace.h>
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include "xfs_ag_resv.h" #include "xfs_ag_resv.h"
#include "xfs_refcount.h" #include "xfs_refcount.h"
#include "xfs_icache.h" #include "xfs_icache.h"
#include "xfs_iomap.h"
kmem_zone_t *xfs_bmap_free_item_zone; kmem_zone_t *xfs_bmap_free_item_zone;
...@@ -4456,16 +4457,21 @@ int ...@@ -4456,16 +4457,21 @@ int
xfs_bmapi_convert_delalloc( xfs_bmapi_convert_delalloc(
struct xfs_inode *ip, struct xfs_inode *ip,
int whichfork, int whichfork,
xfs_fileoff_t offset_fsb, xfs_off_t offset,
struct xfs_bmbt_irec *imap, struct iomap *iomap,
unsigned int *seq) unsigned int *seq)
{ {
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
struct xfs_bmalloca bma = { NULL }; struct xfs_bmalloca bma = { NULL };
u16 flags = 0;
struct xfs_trans *tp; struct xfs_trans *tp;
int error; int error;
if (whichfork == XFS_COW_FORK)
flags |= IOMAP_F_SHARED;
/* /*
* Space for the extent and indirect blocks was reserved when the * Space for the extent and indirect blocks was reserved when the
* delalloc extent was created so there's no need to do so here. * delalloc extent was created so there's no need to do so here.
...@@ -4495,7 +4501,7 @@ xfs_bmapi_convert_delalloc( ...@@ -4495,7 +4501,7 @@ xfs_bmapi_convert_delalloc(
* the extent. Just return the real extent at this offset. * the extent. Just return the real extent at this offset.
*/ */
if (!isnullstartblock(bma.got.br_startblock)) { if (!isnullstartblock(bma.got.br_startblock)) {
*imap = bma.got; xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
*seq = READ_ONCE(ifp->if_seq); *seq = READ_ONCE(ifp->if_seq);
goto out_trans_cancel; goto out_trans_cancel;
} }
...@@ -4528,7 +4534,7 @@ xfs_bmapi_convert_delalloc( ...@@ -4528,7 +4534,7 @@ xfs_bmapi_convert_delalloc(
XFS_STATS_INC(mp, xs_xstrat_quick); XFS_STATS_INC(mp, xs_xstrat_quick);
ASSERT(!isnullstartblock(bma.got.br_startblock)); ASSERT(!isnullstartblock(bma.got.br_startblock));
*imap = bma.got; xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
*seq = READ_ONCE(ifp->if_seq); *seq = READ_ONCE(ifp->if_seq);
if (whichfork == XFS_COW_FORK) if (whichfork == XFS_COW_FORK)
......
...@@ -228,8 +228,7 @@ int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork, ...@@ -228,8 +228,7 @@ int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur,
int eof); int eof);
int xfs_bmapi_convert_delalloc(struct xfs_inode *ip, int whichfork, int xfs_bmapi_convert_delalloc(struct xfs_inode *ip, int whichfork,
xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap, xfs_off_t offset, struct iomap *iomap, unsigned int *seq);
unsigned int *seq);
int xfs_bmap_add_extent_unwritten_real(struct xfs_trans *tp, int xfs_bmap_add_extent_unwritten_real(struct xfs_trans *tp,
struct xfs_inode *ip, int whichfork, struct xfs_inode *ip, int whichfork,
struct xfs_iext_cursor *icur, struct xfs_btree_cur **curp, struct xfs_iext_cursor *icur, struct xfs_btree_cur **curp,
......
This diff is collapsed.
...@@ -6,23 +6,6 @@ ...@@ -6,23 +6,6 @@
#ifndef __XFS_AOPS_H__ #ifndef __XFS_AOPS_H__
#define __XFS_AOPS_H__ #define __XFS_AOPS_H__
extern struct bio_set xfs_ioend_bioset;
/*
* Structure for buffered I/O completions.
*/
struct xfs_ioend {
struct list_head io_list; /* next ioend in chain */
int io_fork; /* inode fork written back */
xfs_exntst_t io_state; /* extent state */
struct inode *io_inode; /* file being written to */
size_t io_size; /* size of the extent */
xfs_off_t io_offset; /* offset in the file */
struct xfs_trans *io_append_trans;/* xact. for size update */
struct bio *io_bio; /* bio being built */
struct bio io_inline_bio; /* MUST BE LAST! */
};
extern const struct address_space_operations xfs_address_space_operations; extern const struct address_space_operations xfs_address_space_operations;
extern const struct address_space_operations xfs_dax_aops; extern const struct address_space_operations xfs_dax_aops;
......
...@@ -188,7 +188,7 @@ xfs_file_dio_aio_read( ...@@ -188,7 +188,7 @@ xfs_file_dio_aio_read(
file_accessed(iocb->ki_filp); file_accessed(iocb->ki_filp);
xfs_ilock(ip, XFS_IOLOCK_SHARED); xfs_ilock(ip, XFS_IOLOCK_SHARED);
ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL); ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL, is_sync_kiocb(iocb));
xfs_iunlock(ip, XFS_IOLOCK_SHARED); xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return ret; return ret;
...@@ -547,15 +547,12 @@ xfs_file_dio_aio_write( ...@@ -547,15 +547,12 @@ xfs_file_dio_aio_write(
} }
trace_xfs_file_direct_write(ip, count, iocb->ki_pos); trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, &xfs_dio_write_ops);
/* /*
* If unaligned, this is the only IO in-flight. If it has not yet * If unaligned, this is the only IO in-flight. Wait on it before we
* completed, wait on it before we release the iolock to prevent * release the iolock to prevent subsequent overlapping IO.
* subsequent overlapping IO.
*/ */
if (ret == -EIOCBQUEUED && unaligned_io) ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, &xfs_dio_write_ops,
inode_dio_wait(inode); is_sync_kiocb(iocb) || unaligned_io);
out: out:
xfs_iunlock(ip, iolock); xfs_iunlock(ip, iolock);
......
...@@ -54,7 +54,7 @@ xfs_bmbt_to_iomap( ...@@ -54,7 +54,7 @@ xfs_bmbt_to_iomap(
struct xfs_inode *ip, struct xfs_inode *ip,
struct iomap *iomap, struct iomap *iomap,
struct xfs_bmbt_irec *imap, struct xfs_bmbt_irec *imap,
bool shared) u16 flags)
{ {
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
...@@ -79,12 +79,11 @@ xfs_bmbt_to_iomap( ...@@ -79,12 +79,11 @@ xfs_bmbt_to_iomap(
iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount); iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip)); iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip));
iomap->dax_dev = xfs_find_daxdev_for_inode(VFS_I(ip)); iomap->dax_dev = xfs_find_daxdev_for_inode(VFS_I(ip));
iomap->flags = flags;
if (xfs_ipincount(ip) && if (xfs_ipincount(ip) &&
(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
iomap->flags |= IOMAP_F_DIRTY; iomap->flags |= IOMAP_F_DIRTY;
if (shared)
iomap->flags |= IOMAP_F_SHARED;
return 0; return 0;
} }
...@@ -540,6 +539,7 @@ xfs_file_iomap_begin_delay( ...@@ -540,6 +539,7 @@ xfs_file_iomap_begin_delay(
struct xfs_iext_cursor icur, ccur; struct xfs_iext_cursor icur, ccur;
xfs_fsblock_t prealloc_blocks = 0; xfs_fsblock_t prealloc_blocks = 0;
bool eof = false, cow_eof = false, shared = false; bool eof = false, cow_eof = false, shared = false;
u16 iomap_flags = 0;
int whichfork = XFS_DATA_FORK; int whichfork = XFS_DATA_FORK;
int error = 0; int error = 0;
...@@ -707,22 +707,28 @@ xfs_file_iomap_begin_delay( ...@@ -707,22 +707,28 @@ xfs_file_iomap_begin_delay(
* Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
* them out if the write happens to fail. * them out if the write happens to fail.
*/ */
iomap->flags |= IOMAP_F_NEW; if (whichfork == XFS_DATA_FORK) {
trace_xfs_iomap_alloc(ip, offset, count, whichfork, iomap_flags |= IOMAP_F_NEW;
whichfork == XFS_DATA_FORK ? &imap : &cmap); trace_xfs_iomap_alloc(ip, offset, count, whichfork, &imap);
} else {
trace_xfs_iomap_alloc(ip, offset, count, whichfork, &cmap);
}
done: done:
if (whichfork == XFS_COW_FORK) { if (whichfork == XFS_COW_FORK) {
if (imap.br_startoff > offset_fsb) { if (imap.br_startoff > offset_fsb) {
xfs_trim_extent(&cmap, offset_fsb, xfs_trim_extent(&cmap, offset_fsb,
imap.br_startoff - offset_fsb); imap.br_startoff - offset_fsb);
error = xfs_bmbt_to_iomap(ip, iomap, &cmap, true); error = xfs_bmbt_to_iomap(ip, iomap, &cmap,
IOMAP_F_SHARED);
goto out_unlock; goto out_unlock;
} }
/* ensure we only report blocks we have a reservation for */ /* ensure we only report blocks we have a reservation for */
xfs_trim_extent(&imap, cmap.br_startoff, cmap.br_blockcount); xfs_trim_extent(&imap, cmap.br_startoff, cmap.br_blockcount);
shared = true; shared = true;
} }
error = xfs_bmbt_to_iomap(ip, iomap, &imap, shared); if (shared)
iomap_flags |= IOMAP_F_SHARED;
error = xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags);
out_unlock: out_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error; return error;
...@@ -922,7 +928,8 @@ xfs_file_iomap_begin( ...@@ -922,7 +928,8 @@ xfs_file_iomap_begin(
loff_t offset, loff_t offset,
loff_t length, loff_t length,
unsigned flags, unsigned flags,
struct iomap *iomap) struct iomap *iomap,
struct iomap *srcmap)
{ {
struct xfs_inode *ip = XFS_I(inode); struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
...@@ -930,6 +937,7 @@ xfs_file_iomap_begin( ...@@ -930,6 +937,7 @@ xfs_file_iomap_begin(
xfs_fileoff_t offset_fsb, end_fsb; xfs_fileoff_t offset_fsb, end_fsb;
int nimaps = 1, error = 0; int nimaps = 1, error = 0;
bool shared = false; bool shared = false;
u16 iomap_flags = 0;
unsigned lockmode; unsigned lockmode;
if (XFS_FORCED_SHUTDOWN(mp)) if (XFS_FORCED_SHUTDOWN(mp))
...@@ -1045,11 +1053,20 @@ xfs_file_iomap_begin( ...@@ -1045,11 +1053,20 @@ xfs_file_iomap_begin(
if (error) if (error)
return error; return error;
iomap->flags |= IOMAP_F_NEW; iomap_flags |= IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap); trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap);
out_finish: out_finish:
return xfs_bmbt_to_iomap(ip, iomap, &imap, shared); /*
* Writes that span EOF might trigger an IO size update on completion,
* so consider them to be dirty for the purposes of O_DSYNC even if
* there is no other metadata changes pending or have been made here.
*/
if ((flags & IOMAP_WRITE) && offset + length > i_size_read(inode))
iomap_flags |= IOMAP_F_DIRTY;
if (shared)
iomap_flags |= IOMAP_F_SHARED;
return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags);
out_found: out_found:
ASSERT(nimaps); ASSERT(nimaps);
...@@ -1145,7 +1162,8 @@ xfs_seek_iomap_begin( ...@@ -1145,7 +1162,8 @@ xfs_seek_iomap_begin(
loff_t offset, loff_t offset,
loff_t length, loff_t length,
unsigned flags, unsigned flags,
struct iomap *iomap) struct iomap *iomap,
struct iomap *srcmap)
{ {
struct xfs_inode *ip = XFS_I(inode); struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
...@@ -1193,7 +1211,7 @@ xfs_seek_iomap_begin( ...@@ -1193,7 +1211,7 @@ xfs_seek_iomap_begin(
if (data_fsb < cow_fsb + cmap.br_blockcount) if (data_fsb < cow_fsb + cmap.br_blockcount)
end_fsb = min(end_fsb, data_fsb); end_fsb = min(end_fsb, data_fsb);
xfs_trim_extent(&cmap, offset_fsb, end_fsb); xfs_trim_extent(&cmap, offset_fsb, end_fsb);
error = xfs_bmbt_to_iomap(ip, iomap, &cmap, true); error = xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
/* /*
* This is a COW extent, so we must probe the page cache * This is a COW extent, so we must probe the page cache
* because there could be dirty page cache being backed * because there could be dirty page cache being backed
...@@ -1215,7 +1233,7 @@ xfs_seek_iomap_begin( ...@@ -1215,7 +1233,7 @@ xfs_seek_iomap_begin(
imap.br_state = XFS_EXT_NORM; imap.br_state = XFS_EXT_NORM;
done: done:
xfs_trim_extent(&imap, offset_fsb, end_fsb); xfs_trim_extent(&imap, offset_fsb, end_fsb);
error = xfs_bmbt_to_iomap(ip, iomap, &imap, false); error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
out_unlock: out_unlock:
xfs_iunlock(ip, lockmode); xfs_iunlock(ip, lockmode);
return error; return error;
...@@ -1231,7 +1249,8 @@ xfs_xattr_iomap_begin( ...@@ -1231,7 +1249,8 @@ xfs_xattr_iomap_begin(
loff_t offset, loff_t offset,
loff_t length, loff_t length,
unsigned flags, unsigned flags,
struct iomap *iomap) struct iomap *iomap,
struct iomap *srcmap)
{ {
struct xfs_inode *ip = XFS_I(inode); struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
...@@ -1261,7 +1280,7 @@ xfs_xattr_iomap_begin( ...@@ -1261,7 +1280,7 @@ xfs_xattr_iomap_begin(
if (error) if (error)
return error; return error;
ASSERT(nimaps); ASSERT(nimaps);
return xfs_bmbt_to_iomap(ip, iomap, &imap, false); return xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
} }
const struct iomap_ops xfs_xattr_iomap_ops = { const struct iomap_ops xfs_xattr_iomap_ops = {
......
...@@ -16,7 +16,7 @@ int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, ...@@ -16,7 +16,7 @@ int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool); int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
int xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, int xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
struct xfs_bmbt_irec *, bool shared); struct xfs_bmbt_irec *, u16);
xfs_extlen_t xfs_eof_alignment(struct xfs_inode *ip, xfs_extlen_t extsize); xfs_extlen_t xfs_eof_alignment(struct xfs_inode *ip, xfs_extlen_t extsize);
static inline xfs_filblks_t static inline xfs_filblks_t
......
...@@ -178,7 +178,7 @@ xfs_fs_map_blocks( ...@@ -178,7 +178,7 @@ xfs_fs_map_blocks(
} }
xfs_iunlock(ip, XFS_IOLOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL);
error = xfs_bmbt_to_iomap(ip, iomap, &imap, false); error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
*device_generation = mp->m_generation; *device_generation = mp->m_generation;
return error; return error;
out_unlock: out_unlock:
......
...@@ -1442,7 +1442,7 @@ xfs_reflink_dirty_extents( ...@@ -1442,7 +1442,7 @@ xfs_reflink_dirty_extents(
flen = XFS_FSB_TO_B(mp, rlen); flen = XFS_FSB_TO_B(mp, rlen);
if (fpos + flen > isize) if (fpos + flen > isize)
flen = isize - fpos; flen = isize - fpos;
error = iomap_file_dirty(VFS_I(ip), fpos, flen, error = iomap_file_unshare(VFS_I(ip), fpos, flen,
&xfs_iomap_ops); &xfs_iomap_ops);
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
if (error) if (error)
......
...@@ -40,7 +40,6 @@ ...@@ -40,7 +40,6 @@
#include <linux/parser.h> #include <linux/parser.h>
static const struct super_operations xfs_super_operations; static const struct super_operations xfs_super_operations;
struct bio_set xfs_ioend_bioset;
static struct kset *xfs_kset; /* top-level xfs sysfs dir */ static struct kset *xfs_kset; /* top-level xfs sysfs dir */
#ifdef DEBUG #ifdef DEBUG
...@@ -1853,15 +1852,10 @@ MODULE_ALIAS_FS("xfs"); ...@@ -1853,15 +1852,10 @@ MODULE_ALIAS_FS("xfs");
STATIC int __init STATIC int __init
xfs_init_zones(void) xfs_init_zones(void)
{ {
if (bioset_init(&xfs_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
offsetof(struct xfs_ioend, io_inline_bio),
BIOSET_NEED_BVECS))
goto out;
xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t), xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
"xfs_log_ticket"); "xfs_log_ticket");
if (!xfs_log_ticket_zone) if (!xfs_log_ticket_zone)
goto out_free_ioend_bioset; goto out;
xfs_bmap_free_item_zone = kmem_zone_init( xfs_bmap_free_item_zone = kmem_zone_init(
sizeof(struct xfs_extent_free_item), sizeof(struct xfs_extent_free_item),
...@@ -1996,8 +1990,6 @@ xfs_init_zones(void) ...@@ -1996,8 +1990,6 @@ xfs_init_zones(void)
kmem_zone_destroy(xfs_bmap_free_item_zone); kmem_zone_destroy(xfs_bmap_free_item_zone);
out_destroy_log_ticket_zone: out_destroy_log_ticket_zone:
kmem_zone_destroy(xfs_log_ticket_zone); kmem_zone_destroy(xfs_log_ticket_zone);
out_free_ioend_bioset:
bioset_exit(&xfs_ioend_bioset);
out: out:
return -ENOMEM; return -ENOMEM;
} }
...@@ -2028,7 +2020,6 @@ xfs_destroy_zones(void) ...@@ -2028,7 +2020,6 @@ xfs_destroy_zones(void)
kmem_zone_destroy(xfs_btree_cur_zone); kmem_zone_destroy(xfs_btree_cur_zone);
kmem_zone_destroy(xfs_bmap_free_item_zone); kmem_zone_destroy(xfs_bmap_free_item_zone);
kmem_zone_destroy(xfs_log_ticket_zone); kmem_zone_destroy(xfs_log_ticket_zone);
bioset_exit(&xfs_ioend_bioset);
} }
STATIC int __init STATIC int __init
......
...@@ -1158,71 +1158,6 @@ DEFINE_RW_EVENT(xfs_file_buffered_write); ...@@ -1158,71 +1158,6 @@ DEFINE_RW_EVENT(xfs_file_buffered_write);
DEFINE_RW_EVENT(xfs_file_direct_write); DEFINE_RW_EVENT(xfs_file_direct_write);
DEFINE_RW_EVENT(xfs_file_dax_write); DEFINE_RW_EVENT(xfs_file_dax_write);
DECLARE_EVENT_CLASS(xfs_page_class,
TP_PROTO(struct inode *inode, struct page *page, unsigned long off,
unsigned int len),
TP_ARGS(inode, page, off, len),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(pgoff_t, pgoff)
__field(loff_t, size)
__field(unsigned long, offset)
__field(unsigned int, length)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = XFS_I(inode)->i_ino;
__entry->pgoff = page_offset(page);
__entry->size = i_size_read(inode);
__entry->offset = off;
__entry->length = len;
),
TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
"length %x",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->pgoff,
__entry->size,
__entry->offset,
__entry->length)
)
#define DEFINE_PAGE_EVENT(name) \
DEFINE_EVENT(xfs_page_class, name, \
TP_PROTO(struct inode *inode, struct page *page, unsigned long off, \
unsigned int len), \
TP_ARGS(inode, page, off, len))
DEFINE_PAGE_EVENT(xfs_writepage);
DEFINE_PAGE_EVENT(xfs_releasepage);
DEFINE_PAGE_EVENT(xfs_invalidatepage);
DECLARE_EVENT_CLASS(xfs_readpage_class,
TP_PROTO(struct inode *inode, int nr_pages),
TP_ARGS(inode, nr_pages),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(int, nr_pages)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->nr_pages = nr_pages;
),
TP_printk("dev %d:%d ino 0x%llx nr_pages %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->nr_pages)
)
#define DEFINE_READPAGE_EVENT(name) \
DEFINE_EVENT(xfs_readpage_class, name, \
TP_PROTO(struct inode *inode, int nr_pages), \
TP_ARGS(inode, nr_pages))
DEFINE_READPAGE_EVENT(xfs_vm_readpage);
DEFINE_READPAGE_EVENT(xfs_vm_readpages);
DECLARE_EVENT_CLASS(xfs_imap_class, DECLARE_EVENT_CLASS(xfs_imap_class,
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
int whichfork, struct xfs_bmbt_irec *irec), int whichfork, struct xfs_bmbt_irec *irec),
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/bitmap.h> #include <linux/bitmap.h>
#include <linux/blk_types.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/mm_types.h> #include <linux/mm_types.h>
...@@ -12,6 +13,7 @@ ...@@ -12,6 +13,7 @@
struct address_space; struct address_space;
struct fiemap_extent_info; struct fiemap_extent_info;
struct inode; struct inode;
struct iomap_writepage_ctx;
struct iov_iter; struct iov_iter;
struct kiocb; struct kiocb;
struct page; struct page;
...@@ -21,28 +23,45 @@ struct vm_fault; ...@@ -21,28 +23,45 @@ struct vm_fault;
/* /*
* Types of block ranges for iomap mappings: * Types of block ranges for iomap mappings:
*/ */
#define IOMAP_HOLE 0x01 /* no blocks allocated, need allocation */ #define IOMAP_HOLE 0 /* no blocks allocated, need allocation */
#define IOMAP_DELALLOC 0x02 /* delayed allocation blocks */ #define IOMAP_DELALLOC 1 /* delayed allocation blocks */
#define IOMAP_MAPPED 0x03 /* blocks allocated at @addr */ #define IOMAP_MAPPED 2 /* blocks allocated at @addr */
#define IOMAP_UNWRITTEN 0x04 /* blocks allocated at @addr in unwritten state */ #define IOMAP_UNWRITTEN 3 /* blocks allocated at @addr in unwritten state */
#define IOMAP_INLINE 0x05 /* data inline in the inode */ #define IOMAP_INLINE 4 /* data inline in the inode */
/* /*
* Flags for all iomap mappings: * Flags reported by the file system from iomap_begin:
*
* IOMAP_F_NEW indicates that the blocks have been newly allocated and need
* zeroing for areas that no data is copied to.
* *
* IOMAP_F_DIRTY indicates the inode has uncommitted metadata needed to access * IOMAP_F_DIRTY indicates the inode has uncommitted metadata needed to access
* written data and requires fdatasync to commit them to persistent storage. * written data and requires fdatasync to commit them to persistent storage.
* This needs to take into account metadata changes that *may* be made at IO
* completion, such as file size updates from direct IO.
*
* IOMAP_F_SHARED indicates that the blocks are shared, and will need to be
* unshared as part a write.
*
* IOMAP_F_MERGED indicates that the iomap contains the merge of multiple block
* mappings.
*
* IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of
* buffer heads for this mapping.
*/ */
#define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ #define IOMAP_F_NEW 0x01
#define IOMAP_F_DIRTY 0x02 /* uncommitted metadata */ #define IOMAP_F_DIRTY 0x02
#define IOMAP_F_BUFFER_HEAD 0x04 /* file system requires buffer heads */ #define IOMAP_F_SHARED 0x04
#define IOMAP_F_SIZE_CHANGED 0x08 /* file size has changed */ #define IOMAP_F_MERGED 0x08
#define IOMAP_F_BUFFER_HEAD 0x10
/* /*
* Flags that only need to be reported for IOMAP_REPORT requests: * Flags set by the core iomap code during operations:
*
* IOMAP_F_SIZE_CHANGED indicates to the iomap_end method that the file size
* has changed as the result of this write operation.
*/ */
#define IOMAP_F_MERGED 0x10 /* contains multiple blocks/extents */ #define IOMAP_F_SIZE_CHANGED 0x100
#define IOMAP_F_SHARED 0x20 /* block shared with another file */
/* /*
* Flags from 0x1000 up are for file system specific usage: * Flags from 0x1000 up are for file system specific usage:
...@@ -110,7 +129,8 @@ struct iomap_ops { ...@@ -110,7 +129,8 @@ struct iomap_ops {
* The actual length is returned in iomap->length. * The actual length is returned in iomap->length.
*/ */
int (*iomap_begin)(struct inode *inode, loff_t pos, loff_t length, int (*iomap_begin)(struct inode *inode, loff_t pos, loff_t length,
unsigned flags, struct iomap *iomap); unsigned flags, struct iomap *iomap,
struct iomap *srcmap);
/* /*
* Commit and/or unreserve space previous allocated using iomap_begin. * Commit and/or unreserve space previous allocated using iomap_begin.
...@@ -126,29 +146,12 @@ struct iomap_ops { ...@@ -126,29 +146,12 @@ struct iomap_ops {
* Main iomap iterator function. * Main iomap iterator function.
*/ */
typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len, typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
void *data, struct iomap *iomap); void *data, struct iomap *iomap, struct iomap *srcmap);
loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length, loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
unsigned flags, const struct iomap_ops *ops, void *data, unsigned flags, const struct iomap_ops *ops, void *data,
iomap_actor_t actor); iomap_actor_t actor);
/*
* Structure allocate for each page when block size < PAGE_SIZE to track
* sub-page uptodate status and I/O completions.
*/
struct iomap_page {
atomic_t read_count;
atomic_t write_count;
DECLARE_BITMAP(uptodate, PAGE_SIZE / 512);
};
static inline struct iomap_page *to_iomap_page(struct page *page)
{
if (page_has_private(page))
return (struct iomap_page *)page_private(page);
return NULL;
}
ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
const struct iomap_ops *ops); const struct iomap_ops *ops);
int iomap_readpage(struct page *page, const struct iomap_ops *ops); int iomap_readpage(struct page *page, const struct iomap_ops *ops);
...@@ -166,7 +169,7 @@ int iomap_migrate_page(struct address_space *mapping, struct page *newpage, ...@@ -166,7 +169,7 @@ int iomap_migrate_page(struct address_space *mapping, struct page *newpage,
#else #else
#define iomap_migrate_page NULL #define iomap_migrate_page NULL
#endif #endif
int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len, int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
const struct iomap_ops *ops); const struct iomap_ops *ops);
int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
bool *did_zero, const struct iomap_ops *ops); bool *did_zero, const struct iomap_ops *ops);
...@@ -183,6 +186,63 @@ loff_t iomap_seek_data(struct inode *inode, loff_t offset, ...@@ -183,6 +186,63 @@ loff_t iomap_seek_data(struct inode *inode, loff_t offset,
sector_t iomap_bmap(struct address_space *mapping, sector_t bno, sector_t iomap_bmap(struct address_space *mapping, sector_t bno,
const struct iomap_ops *ops); const struct iomap_ops *ops);
/*
* Structure for writeback I/O completions.
*/
struct iomap_ioend {
struct list_head io_list; /* next ioend in chain */
u16 io_type;
u16 io_flags; /* IOMAP_F_* */
struct inode *io_inode; /* file being written to */
size_t io_size; /* size of the extent */
loff_t io_offset; /* offset in the file */
void *io_private; /* file system private data */
struct bio *io_bio; /* bio being built */
struct bio io_inline_bio; /* MUST BE LAST! */
};
struct iomap_writeback_ops {
/*
* Required, maps the blocks so that writeback can be performed on
* the range starting at offset.
*/
int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode,
loff_t offset);
/*
* Optional, allows the file systems to perform actions just before
* submitting the bio and/or override the bio end_io handler for complex
* operations like copy on write extent manipulation or unwritten extent
* conversions.
*/
int (*prepare_ioend)(struct iomap_ioend *ioend, int status);
/*
* Optional, allows the file system to discard state on a page where
* we failed to submit any I/O.
*/
void (*discard_page)(struct page *page);
};
struct iomap_writepage_ctx {
struct iomap iomap;
struct iomap_ioend *ioend;
const struct iomap_writeback_ops *ops;
};
void iomap_finish_ioends(struct iomap_ioend *ioend, int error);
void iomap_ioend_try_merge(struct iomap_ioend *ioend,
struct list_head *more_ioends,
void (*merge_private)(struct iomap_ioend *ioend,
struct iomap_ioend *next));
void iomap_sort_ioends(struct list_head *ioend_list);
int iomap_writepage(struct page *page, struct writeback_control *wbc,
struct iomap_writepage_ctx *wpc,
const struct iomap_writeback_ops *ops);
int iomap_writepages(struct address_space *mapping,
struct writeback_control *wbc, struct iomap_writepage_ctx *wpc,
const struct iomap_writeback_ops *ops);
/* /*
* Flags for direct I/O ->end_io: * Flags for direct I/O ->end_io:
*/ */
...@@ -195,7 +255,8 @@ struct iomap_dio_ops { ...@@ -195,7 +255,8 @@ struct iomap_dio_ops {
}; };
ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops); const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
bool wait_for_completion);
int iomap_dio_iopoll(struct kiocb *kiocb, bool spin); int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
#ifdef CONFIG_SWAP #ifdef CONFIG_SWAP
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment