Commit 35ec7d57 authored by Chao Yu's avatar Chao Yu Committed by Jaegeuk Kim

f2fs: split discard command in prior to block layer

Some devices has small max_{hw,}discard_sectors, so that in
__blkdev_issue_discard(), one big size discard bio can be split
into multiple small size discard bios, result in heavy load in IO
scheduler and device, which can hang other sync IO for long time.

Now, f2fs is trying to control discard commands more elaboratively,
in order to make less conflict in between discard IO and user IO
to enhance application's performance, so in this patch, we will
split discard bio in f2fs in prior to in block layer to reduce
issuing multiple discard bios in a short time.
Signed-off-by: default avatarChao Yu <yuchao0@huawei.com>
Signed-off-by: default avatarJaegeuk Kim <jaegeuk@kernel.org>
parent a690efff
...@@ -178,7 +178,6 @@ enum { ...@@ -178,7 +178,6 @@ enum {
#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) #define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi)
#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */ #define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */
#define DEF_MAX_DISCARD_LEN 512 /* Max. 2MB per discard */
#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */ #define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */
#define DEF_MID_DISCARD_ISSUE_TIME 500 /* 500 ms, if device busy */ #define DEF_MID_DISCARD_ISSUE_TIME 500 /* 500 ms, if device busy */
#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */ #define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */
...@@ -250,9 +249,10 @@ struct discard_entry { ...@@ -250,9 +249,10 @@ struct discard_entry {
(MAX_PLIST_NUM - 1) : (blk_num - 1)) (MAX_PLIST_NUM - 1) : (blk_num - 1))
enum { enum {
D_PREP, D_PREP, /* initial */
D_SUBMIT, D_PARTIAL, /* partially submitted */
D_DONE, D_SUBMIT, /* all submitted */
D_DONE, /* finished */
}; };
struct discard_info { struct discard_info {
...@@ -277,7 +277,10 @@ struct discard_cmd { ...@@ -277,7 +277,10 @@ struct discard_cmd {
struct block_device *bdev; /* bdev */ struct block_device *bdev; /* bdev */
unsigned short ref; /* reference count */ unsigned short ref; /* reference count */
unsigned char state; /* state */ unsigned char state; /* state */
unsigned char issuing; /* issuing discard */
int error; /* bio error */ int error; /* bio error */
spinlock_t lock; /* for state/bio_ref updating */
unsigned short bio_ref; /* bio reference count */
}; };
enum { enum {
...@@ -710,22 +713,22 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs, ...@@ -710,22 +713,22 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
} }
static inline bool __is_discard_mergeable(struct discard_info *back, static inline bool __is_discard_mergeable(struct discard_info *back,
struct discard_info *front) struct discard_info *front, unsigned int max_len)
{ {
return (back->lstart + back->len == front->lstart) && return (back->lstart + back->len == front->lstart) &&
(back->len + front->len < DEF_MAX_DISCARD_LEN); (back->len + front->len <= max_len);
} }
static inline bool __is_discard_back_mergeable(struct discard_info *cur, static inline bool __is_discard_back_mergeable(struct discard_info *cur,
struct discard_info *back) struct discard_info *back, unsigned int max_len)
{ {
return __is_discard_mergeable(back, cur); return __is_discard_mergeable(back, cur, max_len);
} }
static inline bool __is_discard_front_mergeable(struct discard_info *cur, static inline bool __is_discard_front_mergeable(struct discard_info *cur,
struct discard_info *front) struct discard_info *front, unsigned int max_len)
{ {
return __is_discard_mergeable(cur, front); return __is_discard_mergeable(cur, front, max_len);
} }
static inline bool __is_extent_mergeable(struct extent_info *back, static inline bool __is_extent_mergeable(struct extent_info *back,
......
...@@ -839,9 +839,12 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, ...@@ -839,9 +839,12 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
dc->len = len; dc->len = len;
dc->ref = 0; dc->ref = 0;
dc->state = D_PREP; dc->state = D_PREP;
dc->issuing = 0;
dc->error = 0; dc->error = 0;
init_completion(&dc->wait); init_completion(&dc->wait);
list_add_tail(&dc->list, pend_list); list_add_tail(&dc->list, pend_list);
spin_lock_init(&dc->lock);
dc->bio_ref = 0;
atomic_inc(&dcc->discard_cmd_cnt); atomic_inc(&dcc->discard_cmd_cnt);
dcc->undiscard_blks += len; dcc->undiscard_blks += len;
...@@ -868,7 +871,7 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc, ...@@ -868,7 +871,7 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc,
struct discard_cmd *dc) struct discard_cmd *dc)
{ {
if (dc->state == D_DONE) if (dc->state == D_DONE)
atomic_dec(&dcc->issing_discard); atomic_sub(dc->issuing, &dcc->issing_discard);
list_del(&dc->list); list_del(&dc->list);
rb_erase(&dc->rb_node, &dcc->root); rb_erase(&dc->rb_node, &dcc->root);
...@@ -883,9 +886,17 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, ...@@ -883,9 +886,17 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_cmd *dc) struct discard_cmd *dc)
{ {
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
unsigned long flags;
trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len); trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
spin_lock_irqsave(&dc->lock, flags);
if (dc->bio_ref) {
spin_unlock_irqrestore(&dc->lock, flags);
return;
}
spin_unlock_irqrestore(&dc->lock, flags);
f2fs_bug_on(sbi, dc->ref); f2fs_bug_on(sbi, dc->ref);
if (dc->error == -EOPNOTSUPP) if (dc->error == -EOPNOTSUPP)
...@@ -901,10 +912,17 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, ...@@ -901,10 +912,17 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
static void f2fs_submit_discard_endio(struct bio *bio) static void f2fs_submit_discard_endio(struct bio *bio)
{ {
struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
unsigned long flags;
dc->error = blk_status_to_errno(bio->bi_status); dc->error = blk_status_to_errno(bio->bi_status);
spin_lock_irqsave(&dc->lock, flags);
dc->bio_ref--;
if (!dc->bio_ref && dc->state == D_SUBMIT) {
dc->state = D_DONE; dc->state = D_DONE;
complete_all(&dc->wait); complete_all(&dc->wait);
}
spin_unlock_irqrestore(&dc->lock, flags);
bio_put(bio); bio_put(bio);
} }
...@@ -972,17 +990,25 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, ...@@ -972,17 +990,25 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
} }
} }
static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t lstart,
block_t start, block_t len);
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */ /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
static void __submit_discard_cmd(struct f2fs_sb_info *sbi, static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy, struct discard_policy *dpolicy,
struct discard_cmd *dc) struct discard_cmd *dc,
unsigned int *issued)
{ {
struct block_device *bdev = dc->bdev;
struct request_queue *q = bdev_get_queue(bdev);
unsigned int max_discard_blocks =
SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
&(dcc->fstrim_list) : &(dcc->wait_list); &(dcc->fstrim_list) : &(dcc->wait_list);
struct bio *bio = NULL;
int flag = dpolicy->sync ? REQ_SYNC : 0; int flag = dpolicy->sync ? REQ_SYNC : 0;
block_t lstart, start, len, total_len;
int err = 0;
if (dc->state != D_PREP) if (dc->state != D_PREP)
return; return;
...@@ -990,30 +1016,81 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, ...@@ -990,30 +1016,81 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
return; return;
trace_f2fs_issue_discard(dc->bdev, dc->start, dc->len); trace_f2fs_issue_discard(bdev, dc->start, dc->len);
dc->error = __blkdev_issue_discard(dc->bdev, lstart = dc->lstart;
SECTOR_FROM_BLOCK(dc->start), start = dc->start;
SECTOR_FROM_BLOCK(dc->len), len = dc->len;
total_len = len;
dc->len = 0;
while (total_len && *issued < dpolicy->max_requests && !err) {
struct bio *bio = NULL;
unsigned long flags;
bool last = true;
if (len > max_discard_blocks) {
len = max_discard_blocks;
last = false;
}
(*issued)++;
if (*issued == dpolicy->max_requests)
last = true;
dc->len += len;
err = __blkdev_issue_discard(bdev,
SECTOR_FROM_BLOCK(start),
SECTOR_FROM_BLOCK(len),
GFP_NOFS, 0, &bio); GFP_NOFS, 0, &bio);
if (!dc->error) { if (!err && bio) {
/* should keep before submission to avoid D_DONE right away */ /*
* should keep before submission to avoid D_DONE
* right away
*/
spin_lock_irqsave(&dc->lock, flags);
if (last)
dc->state = D_SUBMIT; dc->state = D_SUBMIT;
atomic_inc(&dcc->issued_discard); else
dc->state = D_PARTIAL;
dc->bio_ref++;
spin_unlock_irqrestore(&dc->lock, flags);
atomic_inc(&dcc->issing_discard); atomic_inc(&dcc->issing_discard);
if (bio) { dc->issuing++;
list_move_tail(&dc->list, wait_list);
/* sanity check on discard range */
__check_sit_bitmap(sbi, start, start + len);
bio->bi_private = dc; bio->bi_private = dc;
bio->bi_end_io = f2fs_submit_discard_endio; bio->bi_end_io = f2fs_submit_discard_endio;
bio->bi_opf |= flag; bio->bi_opf |= flag;
submit_bio(bio); submit_bio(bio);
list_move_tail(&dc->list, wait_list);
__check_sit_bitmap(sbi, dc->start, dc->start + dc->len); atomic_inc(&dcc->issued_discard);
f2fs_update_iostat(sbi, FS_DISCARD, 1); f2fs_update_iostat(sbi, FS_DISCARD, 1);
}
} else { } else {
spin_lock_irqsave(&dc->lock, flags);
if (dc->state == D_PARTIAL)
dc->state = D_SUBMIT;
spin_unlock_irqrestore(&dc->lock, flags);
__remove_discard_cmd(sbi, dc); __remove_discard_cmd(sbi, dc);
err = -EIO;
} }
lstart += len;
start += len;
total_len -= len;
len = total_len;
}
if (len)
__update_discard_tree_range(sbi, bdev, lstart, start, len);
} }
static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
...@@ -1094,10 +1171,11 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, ...@@ -1094,10 +1171,11 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
struct discard_cmd *dc; struct discard_cmd *dc;
struct discard_info di = {0}; struct discard_info di = {0};
struct rb_node **insert_p = NULL, *insert_parent = NULL; struct rb_node **insert_p = NULL, *insert_parent = NULL;
struct request_queue *q = bdev_get_queue(bdev);
unsigned int max_discard_blocks =
SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
block_t end = lstart + len; block_t end = lstart + len;
mutex_lock(&dcc->cmd_lock);
dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
NULL, lstart, NULL, lstart,
(struct rb_entry **)&prev_dc, (struct rb_entry **)&prev_dc,
...@@ -1137,7 +1215,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, ...@@ -1137,7 +1215,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
if (prev_dc && prev_dc->state == D_PREP && if (prev_dc && prev_dc->state == D_PREP &&
prev_dc->bdev == bdev && prev_dc->bdev == bdev &&
__is_discard_back_mergeable(&di, &prev_dc->di)) { __is_discard_back_mergeable(&di, &prev_dc->di,
max_discard_blocks)) {
prev_dc->di.len += di.len; prev_dc->di.len += di.len;
dcc->undiscard_blks += di.len; dcc->undiscard_blks += di.len;
__relocate_discard_cmd(dcc, prev_dc); __relocate_discard_cmd(dcc, prev_dc);
...@@ -1148,7 +1227,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, ...@@ -1148,7 +1227,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
if (next_dc && next_dc->state == D_PREP && if (next_dc && next_dc->state == D_PREP &&
next_dc->bdev == bdev && next_dc->bdev == bdev &&
__is_discard_front_mergeable(&di, &next_dc->di)) { __is_discard_front_mergeable(&di, &next_dc->di,
max_discard_blocks)) {
next_dc->di.lstart = di.lstart; next_dc->di.lstart = di.lstart;
next_dc->di.len += di.len; next_dc->di.len += di.len;
next_dc->di.start = di.start; next_dc->di.start = di.start;
...@@ -1171,8 +1251,6 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, ...@@ -1171,8 +1251,6 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
node = rb_next(&prev_dc->rb_node); node = rb_next(&prev_dc->rb_node);
next_dc = rb_entry_safe(node, struct discard_cmd, rb_node); next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
} }
mutex_unlock(&dcc->cmd_lock);
} }
static int __queue_discard_cmd(struct f2fs_sb_info *sbi, static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
...@@ -1187,7 +1265,9 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, ...@@ -1187,7 +1265,9 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
blkstart -= FDEV(devi).start_blk; blkstart -= FDEV(devi).start_blk;
} }
mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
__update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen); __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
return 0; return 0;
} }
...@@ -1226,9 +1306,9 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, ...@@ -1226,9 +1306,9 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
} }
dcc->next_pos = dc->lstart + dc->len; dcc->next_pos = dc->lstart + dc->len;
__submit_discard_cmd(sbi, dpolicy, dc); __submit_discard_cmd(sbi, dpolicy, dc, &issued);
if (++issued >= dpolicy->max_requests) if (issued >= dpolicy->max_requests)
break; break;
next: next:
node = rb_next(&dc->rb_node); node = rb_next(&dc->rb_node);
...@@ -1283,9 +1363,9 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, ...@@ -1283,9 +1363,9 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
break; break;
} }
__submit_discard_cmd(sbi, dpolicy, dc); __submit_discard_cmd(sbi, dpolicy, dc, &issued);
if (++issued >= dpolicy->max_requests) if (issued >= dpolicy->max_requests)
break; break;
} }
blk_finish_plug(&plug); blk_finish_plug(&plug);
...@@ -2492,9 +2572,9 @@ static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi, ...@@ -2492,9 +2572,9 @@ static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
goto skip; goto skip;
} }
__submit_discard_cmd(sbi, dpolicy, dc); __submit_discard_cmd(sbi, dpolicy, dc, &issued);
if (++issued >= dpolicy->max_requests) { if (issued >= dpolicy->max_requests) {
start = dc->lstart + dc->len; start = dc->lstart + dc->len;
blk_finish_plug(&plug); blk_finish_plug(&plug);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment