Commit a1e09b03 authored by Eric Biggers's avatar Eric Biggers Committed by Jaegeuk Kim

f2fs: use iomap for direct I/O

Make f2fs_file_read_iter() and f2fs_file_write_iter() use the iomap
direct I/O implementation instead of the fs/direct-io.c one.

The iomap implementation is more efficient, and it also avoids the need
to add new features and optimizations to the old implementation.

This new implementation also eliminates the need for f2fs to hook bio
submission and completion and to allocate memory per-bio.  This is
because it's possible to correctly update f2fs's in-flight DIO counters
using __iomap_dio_rw() in combination with an implementation of
iomap_dio_ops::end_io() (as suggested by Christoph Hellwig).

When possible, this new implementation preserves existing f2fs behavior
such as the conditions for falling back to buffered I/O.

This patch has been tested with xfstests by running 'gce-xfstests -c
f2fs -g auto -X generic/017' with and without this patch; no regressions
were seen.  (Some tests fail both before and after.  generic/017 hangs
both before and after, so it had to be excluded.)
Signed-off-by: default avatarEric Biggers <ebiggers@google.com>
[Jaegeuk Kim: use spin_lock_bh for f2fs_update_iostat in softirq]
Signed-off-by: default avatarJaegeuk Kim <jaegeuk@kernel.org>
parent 1517c1a7
...@@ -1377,11 +1377,6 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) ...@@ -1377,11 +1377,6 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
f2fs_invalidate_compress_page(sbi, old_blkaddr); f2fs_invalidate_compress_page(sbi, old_blkaddr);
} }
f2fs_update_data_blkaddr(dn, dn->data_blkaddr); f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
/*
* i_size will be updated by direct_IO. Otherwise, we'll get stale
* data from unwritten block via dio_read.
*/
return 0; return 0;
} }
...@@ -1743,50 +1738,6 @@ static inline u64 blks_to_bytes(struct inode *inode, u64 blks) ...@@ -1743,50 +1738,6 @@ static inline u64 blks_to_bytes(struct inode *inode, u64 blks)
return (blks << inode->i_blkbits); return (blks << inode->i_blkbits);
} }
static int __get_data_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create, int flag,
pgoff_t *next_pgofs, int seg_type, bool may_write)
{
struct f2fs_map_blocks map;
int err;
map.m_lblk = iblock;
map.m_len = bytes_to_blks(inode, bh->b_size);
map.m_next_pgofs = next_pgofs;
map.m_next_extent = NULL;
map.m_seg_type = seg_type;
map.m_may_create = may_write;
err = f2fs_map_blocks(inode, &map, create, flag);
if (!err) {
map_bh(bh, inode->i_sb, map.m_pblk);
bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
bh->b_size = blks_to_bytes(inode, map.m_len);
if (map.m_multidev_dio)
bh->b_bdev = map.m_bdev;
}
return err;
}
static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
return __get_data_block(inode, iblock, bh_result, create,
F2FS_GET_BLOCK_DIO, NULL,
f2fs_rw_hint_to_seg_type(inode->i_write_hint),
true);
}
static int get_data_block_dio(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
return __get_data_block(inode, iblock, bh_result, create,
F2FS_GET_BLOCK_DIO, NULL,
f2fs_rw_hint_to_seg_type(inode->i_write_hint),
false);
}
static int f2fs_xattr_fiemap(struct inode *inode, static int f2fs_xattr_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo) struct fiemap_extent_info *fieinfo)
{ {
...@@ -3263,7 +3214,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, ...@@ -3263,7 +3214,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
FS_CP_DATA_IO : FS_DATA_IO); FS_CP_DATA_IO : FS_DATA_IO);
} }
static void f2fs_write_failed(struct inode *inode, loff_t to) void f2fs_write_failed(struct inode *inode, loff_t to)
{ {
loff_t i_size = i_size_read(inode); loff_t i_size = i_size_read(inode);
...@@ -3551,158 +3502,6 @@ static int f2fs_write_end(struct file *file, ...@@ -3551,158 +3502,6 @@ static int f2fs_write_end(struct file *file,
return copied; return copied;
} }
static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
loff_t offset)
{
unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
unsigned blkbits = i_blkbits;
unsigned blocksize_mask = (1 << blkbits) - 1;
unsigned long align = offset | iov_iter_alignment(iter);
struct block_device *bdev = inode->i_sb->s_bdev;
if (iov_iter_rw(iter) == READ && offset >= i_size_read(inode))
return 1;
if (align & blocksize_mask) {
if (bdev)
blkbits = blksize_bits(bdev_logical_block_size(bdev));
blocksize_mask = (1 << blkbits) - 1;
if (align & blocksize_mask)
return -EINVAL;
return 1;
}
return 0;
}
static void f2fs_dio_end_io(struct bio *bio)
{
struct f2fs_private_dio *dio = bio->bi_private;
dec_page_count(F2FS_I_SB(dio->inode),
dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
bio->bi_private = dio->orig_private;
bio->bi_end_io = dio->orig_end_io;
kfree(dio);
bio_endio(bio);
}
static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
loff_t file_offset)
{
struct f2fs_private_dio *dio;
bool write = (bio_op(bio) == REQ_OP_WRITE);
dio = f2fs_kzalloc(F2FS_I_SB(inode),
sizeof(struct f2fs_private_dio), GFP_NOFS);
if (!dio)
goto out;
dio->inode = inode;
dio->orig_end_io = bio->bi_end_io;
dio->orig_private = bio->bi_private;
dio->write = write;
bio->bi_end_io = f2fs_dio_end_io;
bio->bi_private = dio;
inc_page_count(F2FS_I_SB(inode),
write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
submit_bio(bio);
return;
out:
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
}
static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
{
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
size_t count = iov_iter_count(iter);
loff_t offset = iocb->ki_pos;
int rw = iov_iter_rw(iter);
int err;
enum rw_hint hint = iocb->ki_hint;
int whint_mode = F2FS_OPTION(sbi).whint_mode;
bool do_opu;
err = check_direct_IO(inode, iter, offset);
if (err)
return err < 0 ? err : 0;
if (f2fs_force_buffered_io(inode, iocb, iter))
return 0;
do_opu = rw == WRITE && f2fs_lfs_mode(sbi);
trace_f2fs_direct_IO_enter(inode, offset, count, rw);
if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
iocb->ki_hint = WRITE_LIFE_NOT_SET;
if (iocb->ki_flags & IOCB_NOWAIT) {
if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
iocb->ki_hint = hint;
err = -EAGAIN;
goto out;
}
if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
up_read(&fi->i_gc_rwsem[rw]);
iocb->ki_hint = hint;
err = -EAGAIN;
goto out;
}
} else {
down_read(&fi->i_gc_rwsem[rw]);
if (do_opu)
down_read(&fi->i_gc_rwsem[READ]);
}
err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
iter, rw == WRITE ? get_data_block_dio_write :
get_data_block_dio, NULL, f2fs_dio_submit_bio,
rw == WRITE ? DIO_LOCKING | DIO_SKIP_HOLES :
DIO_SKIP_HOLES);
if (do_opu)
up_read(&fi->i_gc_rwsem[READ]);
up_read(&fi->i_gc_rwsem[rw]);
if (rw == WRITE) {
if (whint_mode == WHINT_MODE_OFF)
iocb->ki_hint = hint;
if (err > 0) {
f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
err);
if (!do_opu)
set_inode_flag(inode, FI_UPDATE_WRITE);
} else if (err == -EIOCBQUEUED) {
f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
count - iov_iter_count(iter));
} else if (err < 0) {
f2fs_write_failed(inode, offset + count);
}
} else {
if (err > 0)
f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err);
else if (err == -EIOCBQUEUED)
f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO,
count - iov_iter_count(iter));
}
out:
trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
return err;
}
void f2fs_invalidate_page(struct page *page, unsigned int offset, void f2fs_invalidate_page(struct page *page, unsigned int offset,
unsigned int length) unsigned int length)
{ {
...@@ -4158,7 +3957,7 @@ const struct address_space_operations f2fs_dblock_aops = { ...@@ -4158,7 +3957,7 @@ const struct address_space_operations f2fs_dblock_aops = {
.set_page_dirty = f2fs_set_data_page_dirty, .set_page_dirty = f2fs_set_data_page_dirty,
.invalidatepage = f2fs_invalidate_page, .invalidatepage = f2fs_invalidate_page,
.releasepage = f2fs_release_page, .releasepage = f2fs_release_page,
.direct_IO = f2fs_direct_IO, .direct_IO = noop_direct_IO,
.bmap = f2fs_bmap, .bmap = f2fs_bmap,
.swap_activate = f2fs_swap_activate, .swap_activate = f2fs_swap_activate,
.swap_deactivate = f2fs_swap_deactivate, .swap_deactivate = f2fs_swap_deactivate,
......
...@@ -1807,13 +1807,6 @@ struct f2fs_sb_info { ...@@ -1807,13 +1807,6 @@ struct f2fs_sb_info {
#endif #endif
}; };
struct f2fs_private_dio {
struct inode *inode;
void *orig_private;
bio_end_io_t *orig_end_io;
bool write;
};
#ifdef CONFIG_F2FS_FAULT_INJECTION #ifdef CONFIG_F2FS_FAULT_INJECTION
#define f2fs_show_injection_info(sbi, type) \ #define f2fs_show_injection_info(sbi, type) \
printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", \ printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", \
...@@ -3642,6 +3635,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, ...@@ -3642,6 +3635,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
struct writeback_control *wbc, struct writeback_control *wbc,
enum iostat_type io_type, enum iostat_type io_type,
int compr_blocks, bool allow_balance); int compr_blocks, bool allow_balance);
void f2fs_write_failed(struct inode *inode, loff_t to);
void f2fs_invalidate_page(struct page *page, unsigned int offset, void f2fs_invalidate_page(struct page *page, unsigned int offset,
unsigned int length); unsigned int length);
int f2fs_release_page(struct page *page, gfp_t wait); int f2fs_release_page(struct page *page, gfp_t wait);
......
This diff is collapsed.
...@@ -92,7 +92,7 @@ static inline void __record_iostat_latency(struct f2fs_sb_info *sbi) ...@@ -92,7 +92,7 @@ static inline void __record_iostat_latency(struct f2fs_sb_info *sbi)
struct f2fs_iostat_latency iostat_lat[MAX_IO_TYPE][NR_PAGE_TYPE]; struct f2fs_iostat_latency iostat_lat[MAX_IO_TYPE][NR_PAGE_TYPE];
struct iostat_lat_info *io_lat = sbi->iostat_io_lat; struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
spin_lock_irq(&sbi->iostat_lat_lock); spin_lock_bh(&sbi->iostat_lat_lock);
for (idx = 0; idx < MAX_IO_TYPE; idx++) { for (idx = 0; idx < MAX_IO_TYPE; idx++) {
for (io = 0; io < NR_PAGE_TYPE; io++) { for (io = 0; io < NR_PAGE_TYPE; io++) {
cnt = io_lat->bio_cnt[idx][io]; cnt = io_lat->bio_cnt[idx][io];
...@@ -106,7 +106,7 @@ static inline void __record_iostat_latency(struct f2fs_sb_info *sbi) ...@@ -106,7 +106,7 @@ static inline void __record_iostat_latency(struct f2fs_sb_info *sbi)
io_lat->bio_cnt[idx][io] = 0; io_lat->bio_cnt[idx][io] = 0;
} }
} }
spin_unlock_irq(&sbi->iostat_lat_lock); spin_unlock_bh(&sbi->iostat_lat_lock);
trace_f2fs_iostat_latency(sbi, iostat_lat); trace_f2fs_iostat_latency(sbi, iostat_lat);
} }
...@@ -120,9 +120,9 @@ static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi) ...@@ -120,9 +120,9 @@ static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi)
return; return;
/* Need double check under the lock */ /* Need double check under the lock */
spin_lock(&sbi->iostat_lock); spin_lock_bh(&sbi->iostat_lock);
if (time_is_after_jiffies(sbi->iostat_next_period)) { if (time_is_after_jiffies(sbi->iostat_next_period)) {
spin_unlock(&sbi->iostat_lock); spin_unlock_bh(&sbi->iostat_lock);
return; return;
} }
sbi->iostat_next_period = jiffies + sbi->iostat_next_period = jiffies +
...@@ -133,7 +133,7 @@ static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi) ...@@ -133,7 +133,7 @@ static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi)
sbi->prev_rw_iostat[i]; sbi->prev_rw_iostat[i];
sbi->prev_rw_iostat[i] = sbi->rw_iostat[i]; sbi->prev_rw_iostat[i] = sbi->rw_iostat[i];
} }
spin_unlock(&sbi->iostat_lock); spin_unlock_bh(&sbi->iostat_lock);
trace_f2fs_iostat(sbi, iostat_diff); trace_f2fs_iostat(sbi, iostat_diff);
...@@ -145,16 +145,16 @@ void f2fs_reset_iostat(struct f2fs_sb_info *sbi) ...@@ -145,16 +145,16 @@ void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
struct iostat_lat_info *io_lat = sbi->iostat_io_lat; struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
int i; int i;
spin_lock(&sbi->iostat_lock); spin_lock_bh(&sbi->iostat_lock);
for (i = 0; i < NR_IO_TYPE; i++) { for (i = 0; i < NR_IO_TYPE; i++) {
sbi->rw_iostat[i] = 0; sbi->rw_iostat[i] = 0;
sbi->prev_rw_iostat[i] = 0; sbi->prev_rw_iostat[i] = 0;
} }
spin_unlock(&sbi->iostat_lock); spin_unlock_bh(&sbi->iostat_lock);
spin_lock_irq(&sbi->iostat_lat_lock); spin_lock_bh(&sbi->iostat_lat_lock);
memset(io_lat, 0, sizeof(struct iostat_lat_info)); memset(io_lat, 0, sizeof(struct iostat_lat_info));
spin_unlock_irq(&sbi->iostat_lat_lock); spin_unlock_bh(&sbi->iostat_lat_lock);
} }
void f2fs_update_iostat(struct f2fs_sb_info *sbi, void f2fs_update_iostat(struct f2fs_sb_info *sbi,
...@@ -163,19 +163,16 @@ void f2fs_update_iostat(struct f2fs_sb_info *sbi, ...@@ -163,19 +163,16 @@ void f2fs_update_iostat(struct f2fs_sb_info *sbi,
if (!sbi->iostat_enable) if (!sbi->iostat_enable)
return; return;
spin_lock(&sbi->iostat_lock); spin_lock_bh(&sbi->iostat_lock);
sbi->rw_iostat[type] += io_bytes; sbi->rw_iostat[type] += io_bytes;
if (type == APP_WRITE_IO || type == APP_DIRECT_IO) if (type == APP_BUFFERED_IO || type == APP_DIRECT_IO)
sbi->rw_iostat[APP_BUFFERED_IO] = sbi->rw_iostat[APP_WRITE_IO] += io_bytes;
sbi->rw_iostat[APP_WRITE_IO] -
sbi->rw_iostat[APP_DIRECT_IO];
if (type == APP_READ_IO || type == APP_DIRECT_READ_IO) if (type == APP_BUFFERED_READ_IO || type == APP_DIRECT_READ_IO)
sbi->rw_iostat[APP_BUFFERED_READ_IO] = sbi->rw_iostat[APP_READ_IO] += io_bytes;
sbi->rw_iostat[APP_READ_IO] -
sbi->rw_iostat[APP_DIRECT_READ_IO]; spin_unlock_bh(&sbi->iostat_lock);
spin_unlock(&sbi->iostat_lock);
f2fs_record_iostat(sbi); f2fs_record_iostat(sbi);
} }
...@@ -185,7 +182,6 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx, ...@@ -185,7 +182,6 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
{ {
unsigned long ts_diff; unsigned long ts_diff;
unsigned int iotype = iostat_ctx->type; unsigned int iotype = iostat_ctx->type;
unsigned long flags;
struct f2fs_sb_info *sbi = iostat_ctx->sbi; struct f2fs_sb_info *sbi = iostat_ctx->sbi;
struct iostat_lat_info *io_lat = sbi->iostat_io_lat; struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
int idx; int idx;
...@@ -206,12 +202,12 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx, ...@@ -206,12 +202,12 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
idx = WRITE_ASYNC_IO; idx = WRITE_ASYNC_IO;
} }
spin_lock_irqsave(&sbi->iostat_lat_lock, flags); spin_lock_bh(&sbi->iostat_lat_lock);
io_lat->sum_lat[idx][iotype] += ts_diff; io_lat->sum_lat[idx][iotype] += ts_diff;
io_lat->bio_cnt[idx][iotype]++; io_lat->bio_cnt[idx][iotype]++;
if (ts_diff > io_lat->peak_lat[idx][iotype]) if (ts_diff > io_lat->peak_lat[idx][iotype])
io_lat->peak_lat[idx][iotype] = ts_diff; io_lat->peak_lat[idx][iotype] = ts_diff;
spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags); spin_unlock_bh(&sbi->iostat_lat_lock);
} }
void iostat_update_and_unbind_ctx(struct bio *bio, int rw) void iostat_update_and_unbind_ctx(struct bio *bio, int rw)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment