Commit 71f2c820 authored by Chao Yu's avatar Chao Yu Committed by Jaegeuk Kim

f2fs: multidevice: support direct IO

Commit 3c62be17 ("f2fs: support multiple devices") missed
to support direct IO for multiple device feature, this patch
adds to support the missing part of multidevice feature.

In addition, for multiple device image, we should be aware of
any issued direct write IO rather than just buffered write IO,
so that fsync and syncfs can issue a preflush command to the
device where direct write IO goes, to persist user data for
posix compliant.
Signed-off-by: default avatarChao Yu <chao@kernel.org>
Signed-off-by: default avatarJaegeuk Kim <jaegeuk@kernel.org>
parent 6691d940
...@@ -1465,10 +1465,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, ...@@ -1465,10 +1465,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
struct extent_info ei = {0, }; struct extent_info ei = {0, };
block_t blkaddr; block_t blkaddr;
unsigned int start_pgofs; unsigned int start_pgofs;
int bidx = 0;
if (!maxblocks) if (!maxblocks)
return 0; return 0;
map->m_bdev = inode->i_sb->s_bdev;
map->m_multidev_dio =
f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag);
map->m_len = 0; map->m_len = 0;
map->m_flags = 0; map->m_flags = 0;
...@@ -1491,6 +1496,21 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, ...@@ -1491,6 +1496,21 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
if (flag == F2FS_GET_BLOCK_DIO) if (flag == F2FS_GET_BLOCK_DIO)
f2fs_wait_on_block_writeback_range(inode, f2fs_wait_on_block_writeback_range(inode,
map->m_pblk, map->m_len); map->m_pblk, map->m_len);
if (map->m_multidev_dio) {
block_t blk_addr = map->m_pblk;
bidx = f2fs_target_device_index(sbi, map->m_pblk);
map->m_bdev = FDEV(bidx).bdev;
map->m_pblk -= FDEV(bidx).start_blk;
map->m_len = min(map->m_len,
FDEV(bidx).end_blk + 1 - map->m_pblk);
if (map->m_may_create)
f2fs_update_device_state(sbi, inode->i_ino,
blk_addr, map->m_len);
}
goto out; goto out;
} }
...@@ -1609,6 +1629,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, ...@@ -1609,6 +1629,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
if (flag == F2FS_GET_BLOCK_PRE_AIO) if (flag == F2FS_GET_BLOCK_PRE_AIO)
goto skip; goto skip;
if (map->m_multidev_dio)
bidx = f2fs_target_device_index(sbi, blkaddr);
if (map->m_len == 0) { if (map->m_len == 0) {
/* preallocated unwritten block should be mapped for fiemap. */ /* preallocated unwritten block should be mapped for fiemap. */
if (blkaddr == NEW_ADDR) if (blkaddr == NEW_ADDR)
...@@ -1617,10 +1640,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, ...@@ -1617,10 +1640,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
map->m_pblk = blkaddr; map->m_pblk = blkaddr;
map->m_len = 1; map->m_len = 1;
if (map->m_multidev_dio)
map->m_bdev = FDEV(bidx).bdev;
} else if ((map->m_pblk != NEW_ADDR && } else if ((map->m_pblk != NEW_ADDR &&
blkaddr == (map->m_pblk + ofs)) || blkaddr == (map->m_pblk + ofs)) ||
(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) || (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
flag == F2FS_GET_BLOCK_PRE_DIO) { flag == F2FS_GET_BLOCK_PRE_DIO) {
if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev)
goto sync_out;
ofs++; ofs++;
map->m_len++; map->m_len++;
} else { } else {
...@@ -1673,11 +1701,31 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, ...@@ -1673,11 +1701,31 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
sync_out: sync_out:
/* for hardware encryption, but to avoid potential issue in future */ if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) /*
* for hardware encryption, but to avoid potential issue
* in future
*/
f2fs_wait_on_block_writeback_range(inode, f2fs_wait_on_block_writeback_range(inode,
map->m_pblk, map->m_len); map->m_pblk, map->m_len);
if (map->m_multidev_dio) {
block_t blk_addr = map->m_pblk;
bidx = f2fs_target_device_index(sbi, map->m_pblk);
map->m_bdev = FDEV(bidx).bdev;
map->m_pblk -= FDEV(bidx).start_blk;
if (map->m_may_create)
f2fs_update_device_state(sbi, inode->i_ino,
blk_addr, map->m_len);
f2fs_bug_on(sbi, blk_addr + map->m_len >
FDEV(bidx).end_blk + 1);
}
}
if (flag == F2FS_GET_BLOCK_PRECACHE) { if (flag == F2FS_GET_BLOCK_PRECACHE) {
if (map->m_flags & F2FS_MAP_MAPPED) { if (map->m_flags & F2FS_MAP_MAPPED) {
unsigned int ofs = start_pgofs - map->m_lblk; unsigned int ofs = start_pgofs - map->m_lblk;
...@@ -1696,7 +1744,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, ...@@ -1696,7 +1744,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
f2fs_balance_fs(sbi, dn.node_changed); f2fs_balance_fs(sbi, dn.node_changed);
} }
out: out:
trace_f2fs_map_blocks(inode, map, err); trace_f2fs_map_blocks(inode, map, create, flag, err);
return err; return err;
} }
...@@ -1755,6 +1803,9 @@ static int __get_data_block(struct inode *inode, sector_t iblock, ...@@ -1755,6 +1803,9 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
map_bh(bh, inode->i_sb, map.m_pblk); map_bh(bh, inode->i_sb, map.m_pblk);
bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags; bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
bh->b_size = blks_to_bytes(inode, map.m_len); bh->b_size = blks_to_bytes(inode, map.m_len);
if (map.m_multidev_dio)
bh->b_bdev = map.m_bdev;
} }
return err; return err;
} }
......
...@@ -620,6 +620,7 @@ struct extent_tree { ...@@ -620,6 +620,7 @@ struct extent_tree {
F2FS_MAP_UNWRITTEN) F2FS_MAP_UNWRITTEN)
struct f2fs_map_blocks { struct f2fs_map_blocks {
struct block_device *m_bdev; /* for multi-device dio */
block_t m_pblk; block_t m_pblk;
block_t m_lblk; block_t m_lblk;
unsigned int m_len; unsigned int m_len;
...@@ -628,6 +629,7 @@ struct f2fs_map_blocks { ...@@ -628,6 +629,7 @@ struct f2fs_map_blocks {
pgoff_t *m_next_extent; /* point to next possible extent */ pgoff_t *m_next_extent; /* point to next possible extent */
int m_seg_type; int m_seg_type;
bool m_may_create; /* indicate it is from write path */ bool m_may_create; /* indicate it is from write path */
bool m_multidev_dio; /* indicate it allows multi-device dio */
}; };
/* for flag in get_data_block */ /* for flag in get_data_block */
...@@ -1733,12 +1735,15 @@ struct f2fs_sb_info { ...@@ -1733,12 +1735,15 @@ struct f2fs_sb_info {
/* For shrinker support */ /* For shrinker support */
struct list_head s_list; struct list_head s_list;
struct mutex umount_mutex;
unsigned int shrinker_run_no;
/* For multi devices */
int s_ndevs; /* number of devices */ int s_ndevs; /* number of devices */
struct f2fs_dev_info *devs; /* for device list */ struct f2fs_dev_info *devs; /* for device list */
unsigned int dirty_device; /* for checkpoint data flush */ unsigned int dirty_device; /* for checkpoint data flush */
spinlock_t dev_lock; /* protect dirty_device */ spinlock_t dev_lock; /* protect dirty_device */
struct mutex umount_mutex; bool aligned_blksize; /* all devices has the same logical blksize */
unsigned int shrinker_run_no;
/* For write statistics */ /* For write statistics */
u64 sectors_written_start; u64 sectors_written_start;
...@@ -3500,6 +3505,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, ...@@ -3500,6 +3505,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr, block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type, struct f2fs_summary *sum, int type,
struct f2fs_io_info *fio); struct f2fs_io_info *fio);
void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
block_t blkaddr, unsigned int blkcnt);
void f2fs_wait_on_page_writeback(struct page *page, void f2fs_wait_on_page_writeback(struct page *page,
enum page_type type, bool ordered, bool locked); enum page_type type, bool ordered, bool locked);
void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr);
...@@ -4320,6 +4327,16 @@ static inline int block_unaligned_IO(struct inode *inode, ...@@ -4320,6 +4327,16 @@ static inline int block_unaligned_IO(struct inode *inode,
return align & blocksize_mask; return align & blocksize_mask;
} }
static inline bool f2fs_allow_multi_device_dio(struct f2fs_sb_info *sbi,
int flag)
{
if (!f2fs_is_multi_device(sbi))
return false;
if (flag != F2FS_GET_BLOCK_DIO)
return false;
return sbi->aligned_blksize;
}
static inline bool f2fs_force_buffered_io(struct inode *inode, static inline bool f2fs_force_buffered_io(struct inode *inode,
struct kiocb *iocb, struct iov_iter *iter) struct kiocb *iocb, struct iov_iter *iter)
{ {
...@@ -4328,7 +4345,9 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, ...@@ -4328,7 +4345,9 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
if (f2fs_post_read_required(inode)) if (f2fs_post_read_required(inode))
return true; return true;
if (f2fs_is_multi_device(sbi))
/* disallow direct IO if any of devices has unaligned blksize */
if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize)
return true; return true;
/* /*
* for blkzoned device, fallback direct IO to buffered IO, so * for blkzoned device, fallback direct IO to buffered IO, so
......
...@@ -3520,24 +3520,30 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, ...@@ -3520,24 +3520,30 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
up_read(&SM_I(sbi)->curseg_lock); up_read(&SM_I(sbi)->curseg_lock);
} }
static void update_device_state(struct f2fs_io_info *fio) void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
block_t blkaddr, unsigned int blkcnt)
{ {
struct f2fs_sb_info *sbi = fio->sbi;
unsigned int devidx;
if (!f2fs_is_multi_device(sbi)) if (!f2fs_is_multi_device(sbi))
return; return;
devidx = f2fs_target_device_index(sbi, fio->new_blkaddr); while (1) {
unsigned int devidx = f2fs_target_device_index(sbi, blkaddr);
unsigned int blks = FDEV(devidx).end_blk - blkaddr + 1;
/* update device state for fsync */ /* update device state for fsync */
f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO); f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO);
/* update device state for checkpoint */ /* update device state for checkpoint */
if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) { if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
spin_lock(&sbi->dev_lock); spin_lock(&sbi->dev_lock);
f2fs_set_bit(devidx, (char *)&sbi->dirty_device); f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
spin_unlock(&sbi->dev_lock); spin_unlock(&sbi->dev_lock);
}
if (blkcnt <= blks)
break;
blkcnt -= blks;
blkaddr += blks;
} }
} }
...@@ -3564,7 +3570,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) ...@@ -3564,7 +3570,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
goto reallocate; goto reallocate;
} }
update_device_state(fio); f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
if (keep_order) if (keep_order)
up_read(&fio->sbi->io_order_lock); up_read(&fio->sbi->io_order_lock);
...@@ -3653,7 +3659,8 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) ...@@ -3653,7 +3659,8 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
else else
err = f2fs_submit_page_bio(fio); err = f2fs_submit_page_bio(fio);
if (!err) { if (!err) {
update_device_state(fio); f2fs_update_device_state(fio->sbi, fio->ino,
fio->new_blkaddr, 1);
f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
} }
......
...@@ -3757,6 +3757,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) ...@@ -3757,6 +3757,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
{ {
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
unsigned int max_devices = MAX_DEVICES; unsigned int max_devices = MAX_DEVICES;
unsigned int logical_blksize;
int i; int i;
/* Initialize single device information */ /* Initialize single device information */
...@@ -3777,6 +3778,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) ...@@ -3777,6 +3778,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
if (!sbi->devs) if (!sbi->devs)
return -ENOMEM; return -ENOMEM;
logical_blksize = bdev_logical_block_size(sbi->sb->s_bdev);
sbi->aligned_blksize = true;
for (i = 0; i < max_devices; i++) { for (i = 0; i < max_devices; i++) {
if (i > 0 && !RDEV(i).path[0]) if (i > 0 && !RDEV(i).path[0])
...@@ -3813,6 +3817,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) ...@@ -3813,6 +3817,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
/* to release errored devices */ /* to release errored devices */
sbi->s_ndevs = i + 1; sbi->s_ndevs = i + 1;
if (logical_blksize != bdev_logical_block_size(FDEV(i).bdev))
sbi->aligned_blksize = false;
#ifdef CONFIG_BLK_DEV_ZONED #ifdef CONFIG_BLK_DEV_ZONED
if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM && if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
!f2fs_sb_has_blkzoned(sbi)) { !f2fs_sb_has_blkzoned(sbi)) {
......
...@@ -570,9 +570,10 @@ TRACE_EVENT(f2fs_file_write_iter, ...@@ -570,9 +570,10 @@ TRACE_EVENT(f2fs_file_write_iter,
); );
TRACE_EVENT(f2fs_map_blocks, TRACE_EVENT(f2fs_map_blocks,
TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map, int ret), TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map,
int create, int flag, int ret),
TP_ARGS(inode, map, ret), TP_ARGS(inode, map, create, flag, ret),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(dev_t, dev) __field(dev_t, dev)
...@@ -583,11 +584,14 @@ TRACE_EVENT(f2fs_map_blocks, ...@@ -583,11 +584,14 @@ TRACE_EVENT(f2fs_map_blocks,
__field(unsigned int, m_flags) __field(unsigned int, m_flags)
__field(int, m_seg_type) __field(int, m_seg_type)
__field(bool, m_may_create) __field(bool, m_may_create)
__field(bool, m_multidev_dio)
__field(int, create)
__field(int, flag)
__field(int, ret) __field(int, ret)
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = inode->i_sb->s_dev; __entry->dev = map->m_bdev->bd_dev;
__entry->ino = inode->i_ino; __entry->ino = inode->i_ino;
__entry->m_lblk = map->m_lblk; __entry->m_lblk = map->m_lblk;
__entry->m_pblk = map->m_pblk; __entry->m_pblk = map->m_pblk;
...@@ -595,12 +599,16 @@ TRACE_EVENT(f2fs_map_blocks, ...@@ -595,12 +599,16 @@ TRACE_EVENT(f2fs_map_blocks,
__entry->m_flags = map->m_flags; __entry->m_flags = map->m_flags;
__entry->m_seg_type = map->m_seg_type; __entry->m_seg_type = map->m_seg_type;
__entry->m_may_create = map->m_may_create; __entry->m_may_create = map->m_may_create;
__entry->m_multidev_dio = map->m_multidev_dio;
__entry->create = create;
__entry->flag = flag;
__entry->ret = ret; __entry->ret = ret;
), ),
TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, " TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, "
"start blkaddr = 0x%llx, len = 0x%llx, flags = %u," "start blkaddr = 0x%llx, len = 0x%llx, flags = %u, "
"seg_type = %d, may_create = %d, err = %d", "seg_type = %d, may_create = %d, multidevice = %d, "
"create = %d, flag = %d, err = %d",
show_dev_ino(__entry), show_dev_ino(__entry),
(unsigned long long)__entry->m_lblk, (unsigned long long)__entry->m_lblk,
(unsigned long long)__entry->m_pblk, (unsigned long long)__entry->m_pblk,
...@@ -608,6 +616,9 @@ TRACE_EVENT(f2fs_map_blocks, ...@@ -608,6 +616,9 @@ TRACE_EVENT(f2fs_map_blocks,
__entry->m_flags, __entry->m_flags,
__entry->m_seg_type, __entry->m_seg_type,
__entry->m_may_create, __entry->m_may_create,
__entry->m_multidev_dio,
__entry->create,
__entry->flag,
__entry->ret) __entry->ret)
); );
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment