Commit 1df3e199 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: BCH_WRITE_SYNC

This adds a new flag for the write path, BCH_WRITE_SYNC, and switches
the O_DIRECT write path to use it when we're not running asynchronously.

It runs the btree update after the write in the original thread's
context instead of a kworker, cutting context switches in half.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent a1019576
...@@ -2156,6 +2156,8 @@ static long bch2_dio_write_loop(struct dio_write *dio) ...@@ -2156,6 +2156,8 @@ static long bch2_dio_write_loop(struct dio_write *dio)
dio->op.subvol = inode->ei_subvol; dio->op.subvol = inode->ei_subvol;
dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9); dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9);
if (sync)
dio->op.flags |= BCH_WRITE_SYNC;
if ((req->ki_flags & IOCB_DSYNC) && if ((req->ki_flags & IOCB_DSYNC) &&
!c->opts.journal_flush_disabled) !c->opts.journal_flush_disabled)
dio->op.flags |= BCH_WRITE_FLUSH; dio->op.flags |= BCH_WRITE_FLUSH;
......
...@@ -596,7 +596,7 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, ...@@ -596,7 +596,7 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
static void __bch2_write(struct bch_write_op *); static void __bch2_write(struct bch_write_op *);
static void bch2_write_done(struct closure *cl) static void __bch2_write_done(struct closure *cl)
{ {
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
struct bch_fs *c = op->c; struct bch_fs *c = op->c;
...@@ -612,7 +612,23 @@ static void bch2_write_done(struct closure *cl) ...@@ -612,7 +612,23 @@ static void bch2_write_done(struct closure *cl)
EBUG_ON(cl->parent); EBUG_ON(cl->parent);
closure_debug_destroy(cl); closure_debug_destroy(cl);
op->end_io(op); if (op->end_io)
op->end_io(op);
}
static __always_inline void bch2_write_done(struct bch_write_op *op)
{
if (likely(!(op->flags & BCH_WRITE_FLUSH) || op->error)) {
__bch2_write_done(&op->cl);
} else if (!(op->flags & BCH_WRITE_SYNC)) {
bch2_journal_flush_seq_async(&op->c->journal,
op->journal_seq,
&op->cl);
continue_at(&op->cl, __bch2_write_done, index_update_wq(op));
} else {
bch2_journal_flush_seq(&op->c->journal, op->journal_seq);
__bch2_write_done(&op->cl);
}
} }
static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op) static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op)
...@@ -699,6 +715,7 @@ static void __bch2_write_index(struct bch_write_op *op) ...@@ -699,6 +715,7 @@ static void __bch2_write_index(struct bch_write_op *op)
err: err:
keys->top = keys->keys; keys->top = keys->keys;
op->error = ret; op->error = ret;
op->flags |= BCH_WRITE_DONE;
goto out; goto out;
} }
...@@ -778,9 +795,9 @@ void bch2_write_point_do_index_updates(struct work_struct *work) ...@@ -778,9 +795,9 @@ void bch2_write_point_do_index_updates(struct work_struct *work)
bch2_journal_flush_seq_async(&op->c->journal, bch2_journal_flush_seq_async(&op->c->journal,
op->journal_seq, op->journal_seq,
&op->cl); &op->cl);
continue_at(&op->cl, bch2_write_done, index_update_wq(op)); continue_at(&op->cl, __bch2_write_done, index_update_wq(op));
} else { } else {
bch2_write_done(&op->cl); __bch2_write_done(&op->cl);
} }
} }
} }
...@@ -1271,10 +1288,10 @@ static void __bch2_write(struct bch_write_op *op) ...@@ -1271,10 +1288,10 @@ static void __bch2_write(struct bch_write_op *op)
? NULL : &op->cl, ? NULL : &op->cl,
&wp); &wp);
if (unlikely(ret)) { if (unlikely(ret)) {
if (unlikely(ret != -EAGAIN)) if (ret == -EAGAIN)
goto err; break;
break; goto err;
} }
EBUG_ON(!wp); EBUG_ON(!wp);
...@@ -1283,13 +1300,25 @@ static void __bch2_write(struct bch_write_op *op) ...@@ -1283,13 +1300,25 @@ static void __bch2_write(struct bch_write_op *op)
ret = bch2_write_extent(op, wp, &bio); ret = bch2_write_extent(op, wp, &bio);
bch2_alloc_sectors_done(c, wp); bch2_alloc_sectors_done(c, wp);
err:
if (ret <= 0) {
if (!(op->flags & BCH_WRITE_SYNC)) {
spin_lock(&wp->writes_lock);
op->wp = wp;
list_add_tail(&op->wp_list, &wp->writes);
if (wp->state == WRITE_POINT_stopped)
__wp_update_state(wp, WRITE_POINT_waiting_io);
spin_unlock(&wp->writes_lock);
}
if (ret < 0)
goto err;
if (!ret)
op->flags |= BCH_WRITE_DONE; op->flags |= BCH_WRITE_DONE;
if (ret < 0) {
op->error = ret;
break;
}
}
bio->bi_end_io = bch2_write_endio; bio->bi_end_io = bch2_write_endio;
bio->bi_private = &op->cl; bio->bi_private = &op->cl;
bio->bi_opf |= REQ_OP_WRITE; bio->bi_opf |= REQ_OP_WRITE;
...@@ -1302,36 +1331,28 @@ static void __bch2_write(struct bch_write_op *op) ...@@ -1302,36 +1331,28 @@ static void __bch2_write(struct bch_write_op *op)
bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_user, bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_user,
key_to_write); key_to_write);
} while (ret); } while (ret);
out:
/* /*
* If the write can't all be submitted at once, we generally want to * Sync or no?
* block synchronously as that signals backpressure to the caller. *
* If we're running asynchronously, wne may still want to block
* synchronously here if we weren't able to submit all of the IO at
* once, as that signals backpressure to the caller.
*/ */
if (!(op->flags & BCH_WRITE_DONE) && if ((op->flags & BCH_WRITE_SYNC) ||
!(op->flags & BCH_WRITE_IN_WORKER)) { (!(op->flags & BCH_WRITE_DONE) &&
!(op->flags & BCH_WRITE_IN_WORKER))) {
closure_sync(&op->cl); closure_sync(&op->cl);
__bch2_write_index(op); __bch2_write_index(op);
if (!(op->flags & BCH_WRITE_DONE)) if (!(op->flags & BCH_WRITE_DONE))
goto again; goto again;
bch2_write_done(&op->cl); bch2_write_done(op);
} else { } else {
spin_lock(&wp->writes_lock);
op->wp = wp;
list_add_tail(&op->wp_list, &wp->writes);
if (wp->state == WRITE_POINT_stopped)
__wp_update_state(wp, WRITE_POINT_waiting_io);
spin_unlock(&wp->writes_lock);
continue_at(&op->cl, bch2_write_index, NULL); continue_at(&op->cl, bch2_write_index, NULL);
} }
memalloc_nofs_restore(nofs_flags); memalloc_nofs_restore(nofs_flags);
return;
err:
op->error = ret;
op->flags |= BCH_WRITE_DONE;
goto out;
} }
static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len) static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
...@@ -1374,7 +1395,7 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len) ...@@ -1374,7 +1395,7 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
__bch2_write_index(op); __bch2_write_index(op);
err: err:
bch2_write_done(&op->cl); bch2_write_done(op);
} }
/** /**
......
...@@ -39,6 +39,7 @@ enum bch_write_flags { ...@@ -39,6 +39,7 @@ enum bch_write_flags {
__BCH_WRITE_WROTE_DATA_INLINE, __BCH_WRITE_WROTE_DATA_INLINE,
__BCH_WRITE_FROM_INTERNAL, __BCH_WRITE_FROM_INTERNAL,
__BCH_WRITE_CHECK_ENOSPC, __BCH_WRITE_CHECK_ENOSPC,
__BCH_WRITE_SYNC,
__BCH_WRITE_MOVE, __BCH_WRITE_MOVE,
__BCH_WRITE_IN_WORKER, __BCH_WRITE_IN_WORKER,
__BCH_WRITE_DONE, __BCH_WRITE_DONE,
...@@ -55,6 +56,7 @@ enum bch_write_flags { ...@@ -55,6 +56,7 @@ enum bch_write_flags {
#define BCH_WRITE_WROTE_DATA_INLINE (1U << __BCH_WRITE_WROTE_DATA_INLINE) #define BCH_WRITE_WROTE_DATA_INLINE (1U << __BCH_WRITE_WROTE_DATA_INLINE)
#define BCH_WRITE_FROM_INTERNAL (1U << __BCH_WRITE_FROM_INTERNAL) #define BCH_WRITE_FROM_INTERNAL (1U << __BCH_WRITE_FROM_INTERNAL)
#define BCH_WRITE_CHECK_ENOSPC (1U << __BCH_WRITE_CHECK_ENOSPC) #define BCH_WRITE_CHECK_ENOSPC (1U << __BCH_WRITE_CHECK_ENOSPC)
#define BCH_WRITE_SYNC (1U << __BCH_WRITE_SYNC)
#define BCH_WRITE_MOVE (1U << __BCH_WRITE_MOVE) #define BCH_WRITE_MOVE (1U << __BCH_WRITE_MOVE)
/* Internal: */ /* Internal: */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment