Commit 93723095 authored by Qu Wenruo's avatar Qu Wenruo Committed by David Sterba

btrfs: raid56: switch write path to rmw_rbio()

This includes the following changes:

- Implement new raid_unplug() functions
  Now we don't need a workqueue to run the plug, as all our
  work is just queue rmw_rbio_work() call, which can be executed
  without sleep.

- Implement a rmw_rbio_work_locked() helper
  This is for unlock_stripe(), which is already holding the full stripe
  lock.

- Remove all the old functions
  This should already shows how complex the old functions are, as we
  ended up removing the following functions:

  * rmw_work()
  * validate_rbio_for_rmw()
  * raid56_rmw_end_io_work()
  * raid56_rmw_stripe()
  * full_stripe_write()
  * partial_stripe_write()
  * __raid56_parity_write()
  * run_plug()
  * unplug_work()
  * btrfs_raid_unplug()
  * rmw_work()
  * __raid56_parity_recover()
  * raid_recover_end_io_work()

- Unexport rmw_rbio()
Signed-off-by: default avatarQu Wenruo <wqu@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 5eb30ee2
......@@ -64,9 +64,9 @@ struct sector_ptr {
unsigned int uptodate:8;
};
static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
static void rmw_work(struct work_struct *work);
static void rmw_rbio_work(struct work_struct *work);
static void rmw_rbio_work_locked(struct work_struct *work);
static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio);
static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
static void index_rbio_pages(struct btrfs_raid_bio *rbio);
......@@ -816,7 +816,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
start_async_work(next, recover_rbio_work_locked);
} else if (next->operation == BTRFS_RBIO_WRITE) {
steal_rbio(rbio, next);
start_async_work(next, rmw_work);
start_async_work(next, rmw_rbio_work_locked);
} else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
steal_rbio(rbio, next);
start_async_work(next, scrub_parity_work);
......@@ -1108,23 +1108,6 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
return 0;
}
/*
* while we're doing the read/modify/write cycle, we could
* have errors in reading pages off the disk. This checks
* for errors and if we're not able to read the page it'll
* trigger parity reconstruction. The rmw will be finished
* after we've reconstructed the failed stripes
*/
static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
{
if (rbio->faila >= 0 || rbio->failb >= 0) {
BUG_ON(rbio->faila == rbio->real_stripes - 1);
__raid56_parity_recover(rbio);
} else {
finish_rmw(rbio);
}
}
static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
{
const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
......@@ -1601,31 +1584,6 @@ static void raid56_bio_end_io(struct bio *bio)
&rbio->end_io_work);
}
/*
* End io handler for the read phase of the RMW cycle. All the bios here are
* physical stripe bios we've read from the disk so we can recalculate the
* parity of the stripe.
*
* This will usually kick off finish_rmw once all the bios are read in, but it
* may trigger parity reconstruction if we had any errors along the way
*/
static void raid56_rmw_end_io_work(struct work_struct *work)
{
struct btrfs_raid_bio *rbio =
container_of(work, struct btrfs_raid_bio, end_io_work);
if (atomic_read(&rbio->error) > rbio->bioc->max_errors) {
rbio_orig_end_io(rbio, BLK_STS_IOERR);
return;
}
/*
* This will normally call finish_rmw to start our write but if there
* are any failed stripes we'll reconstruct from parity first.
*/
validate_rbio_for_rmw(rbio);
}
static int rmw_assemble_read_bios(struct btrfs_raid_bio *rbio,
struct bio_list *bio_list)
{
......@@ -1686,122 +1644,6 @@ static int alloc_rbio_data_pages(struct btrfs_raid_bio *rbio)
return 0;
}
/*
* the stripe must be locked by the caller. It will
* unlock after all the writes are done
*/
static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
{
int bios_to_read = 0;
struct bio_list bio_list;
int ret;
struct bio *bio;
bio_list_init(&bio_list);
ret = alloc_rbio_pages(rbio);
if (ret)
goto cleanup;
index_rbio_pages(rbio);
atomic_set(&rbio->error, 0);
ret = rmw_assemble_read_bios(rbio, &bio_list);
if (ret < 0)
goto cleanup;
bios_to_read = bio_list_size(&bio_list);
if (!bios_to_read) {
/*
* this can happen if others have merged with
* us, it means there is nothing left to read.
* But if there are missing devices it may not be
* safe to do the full stripe write yet.
*/
goto finish;
}
/*
* The bioc may be freed once we submit the last bio. Make sure not to
* touch it after that.
*/
atomic_set(&rbio->stripes_pending, bios_to_read);
INIT_WORK(&rbio->end_io_work, raid56_rmw_end_io_work);
while ((bio = bio_list_pop(&bio_list))) {
bio->bi_end_io = raid56_bio_end_io;
if (trace_raid56_read_partial_enabled()) {
struct raid56_bio_trace_info trace_info = { 0 };
bio_get_trace_info(rbio, bio, &trace_info);
trace_raid56_read_partial(rbio, bio, &trace_info);
}
submit_bio(bio);
}
/* the actual write will happen once the reads are done */
return 0;
cleanup:
rbio_orig_end_io(rbio, BLK_STS_IOERR);
while ((bio = bio_list_pop(&bio_list)))
bio_put(bio);
return -EIO;
finish:
validate_rbio_for_rmw(rbio);
return 0;
}
/*
* if the upper layers pass in a full stripe, we thank them by only allocating
* enough pages to hold the parity, and sending it all down quickly.
*/
static int full_stripe_write(struct btrfs_raid_bio *rbio)
{
int ret;
ret = alloc_rbio_parity_pages(rbio);
if (ret)
return ret;
ret = lock_stripe_add(rbio);
if (ret == 0)
finish_rmw(rbio);
return 0;
}
/*
* partial stripe writes get handed over to async helpers.
* We're really hoping to merge a few more writes into this
* rbio before calculating new parity
*/
static int partial_stripe_write(struct btrfs_raid_bio *rbio)
{
int ret;
ret = lock_stripe_add(rbio);
if (ret == 0)
start_async_work(rbio, rmw_work);
return 0;
}
/*
* sometimes while we were reading from the drive to
* recalculate parity, enough new bios come into create
* a full stripe. So we do a check here to see if we can
* go directly to finish_rmw
*/
static int __raid56_parity_write(struct btrfs_raid_bio *rbio)
{
/* head off into rmw land if we don't have a full stripe */
if (!rbio_is_full(rbio))
return partial_stripe_write(rbio);
return full_stripe_write(rbio);
}
/*
* We use plugging call backs to collect full stripes.
* Any time we get a partial stripe write while plugged
......@@ -1836,28 +1678,22 @@ static int plug_cmp(void *priv, const struct list_head *a,
return 0;
}
static void run_plug(struct btrfs_plug_cb *plug)
static void raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
{
struct btrfs_plug_cb *plug = container_of(cb, struct btrfs_plug_cb, cb);
struct btrfs_raid_bio *cur;
struct btrfs_raid_bio *last = NULL;
/*
* sort our plug list then try to merge
* everything we can in hopes of creating full
* stripes.
*/
list_sort(NULL, &plug->rbio_list, plug_cmp);
while (!list_empty(&plug->rbio_list)) {
cur = list_entry(plug->rbio_list.next,
struct btrfs_raid_bio, plug_list);
list_del_init(&cur->plug_list);
if (rbio_is_full(cur)) {
int ret;
/* we have a full stripe, send it down */
ret = full_stripe_write(cur);
BUG_ON(ret);
/* We have a full stripe, queue it down. */
start_async_work(cur, rmw_rbio_work);
continue;
}
if (last) {
......@@ -1865,42 +1701,16 @@ static void run_plug(struct btrfs_plug_cb *plug)
merge_rbio(last, cur);
free_raid_bio(cur);
continue;
}
__raid56_parity_write(last);
start_async_work(last, rmw_rbio_work);
}
last = cur;
}
if (last) {
__raid56_parity_write(last);
}
if (last)
start_async_work(last, rmw_rbio_work);
kfree(plug);
}
/*
* if the unplug comes from schedule, we have to push the
* work off to a helper thread
*/
static void unplug_work(struct work_struct *work)
{
struct btrfs_plug_cb *plug;
plug = container_of(work, struct btrfs_plug_cb, work);
run_plug(plug);
}
static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
{
struct btrfs_plug_cb *plug;
plug = container_of(cb, struct btrfs_plug_cb, cb);
if (from_schedule) {
INIT_WORK(&plug->work, unplug_work);
queue_work(plug->info->rmw_workers, &plug->work);
return;
}
run_plug(plug);
}
/* Add the original bio into rbio->bio_list, and update rbio::dbitmap. */
static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
{
......@@ -1948,19 +1758,13 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
rbio_add_bio(rbio, bio);
/*
* don't plug on full rbios, just get them out the door
* Don't plug on full rbios, just get them out the door
* as quickly as we can
*/
if (rbio_is_full(rbio)) {
ret = full_stripe_write(rbio);
if (ret) {
free_raid_bio(rbio);
goto fail;
}
return;
}
if (rbio_is_full(rbio))
goto queue_rbio;
cb = blk_check_plugged(btrfs_raid_unplug, fs_info, sizeof(*plug));
cb = blk_check_plugged(raid_unplug, fs_info, sizeof(*plug));
if (cb) {
plug = container_of(cb, struct btrfs_plug_cb, cb);
if (!plug->info) {
......@@ -1968,13 +1772,14 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
INIT_LIST_HEAD(&plug->rbio_list);
}
list_add_tail(&rbio->plug_list, &plug->rbio_list);
} else {
ret = __raid56_parity_write(rbio);
if (ret) {
free_raid_bio(rbio);
goto fail;
}
return;
}
queue_rbio:
/*
* Either we don't have any existing plug, or we're doing a full stripe,
* can queue the rmw work now.
*/
start_async_work(rbio, rmw_rbio_work);
return;
......@@ -2217,21 +2022,6 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
}
}
/*
* This is called only for stripes we've read from disk to reconstruct the
* parity.
*/
static void raid_recover_end_io_work(struct work_struct *work)
{
struct btrfs_raid_bio *rbio =
container_of(work, struct btrfs_raid_bio, end_io_work);
if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
rbio_orig_end_io(rbio, BLK_STS_IOERR);
else
__raid_recover_end_io(rbio);
}
static int recover_assemble_read_bios(struct btrfs_raid_bio *rbio,
struct bio_list *bio_list)
{
......@@ -2348,79 +2138,6 @@ static void recover_rbio_work_locked(struct work_struct *work)
rbio_orig_end_io(rbio, errno_to_blk_status(ret));
}
/*
* reads everything we need off the disk to reconstruct
* the parity. endio handlers trigger final reconstruction
* when the IO is done.
*
* This is used both for reads from the higher layers and for
* parity construction required to finish a rmw cycle.
*/
static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
{
int bios_to_read = 0;
struct bio_list bio_list;
int ret;
struct bio *bio;
bio_list_init(&bio_list);
ret = alloc_rbio_pages(rbio);
if (ret)
goto cleanup;
atomic_set(&rbio->error, 0);
ret = recover_assemble_read_bios(rbio, &bio_list);
if (ret < 0)
goto cleanup;
bios_to_read = bio_list_size(&bio_list);
if (!bios_to_read) {
/*
* we might have no bios to read just because the pages
* were up to date, or we might have no bios to read because
* the devices were gone.
*/
if (atomic_read(&rbio->error) <= rbio->bioc->max_errors) {
__raid_recover_end_io(rbio);
return 0;
} else {
goto cleanup;
}
}
/*
* The bioc may be freed once we submit the last bio. Make sure not to
* touch it after that.
*/
atomic_set(&rbio->stripes_pending, bios_to_read);
INIT_WORK(&rbio->end_io_work, raid_recover_end_io_work);
while ((bio = bio_list_pop(&bio_list))) {
bio->bi_end_io = raid56_bio_end_io;
if (trace_raid56_scrub_read_recover_enabled()) {
struct raid56_bio_trace_info trace_info = { 0 };
bio_get_trace_info(rbio, bio, &trace_info);
trace_raid56_scrub_read_recover(rbio, bio, &trace_info);
}
submit_bio(bio);
}
return 0;
cleanup:
if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
rbio->operation == BTRFS_RBIO_REBUILD_MISSING)
rbio_orig_end_io(rbio, BLK_STS_IOERR);
while ((bio = bio_list_pop(&bio_list)))
bio_put(bio);
return -EIO;
}
/*
* the main entry point for reads from the higher layers. This
* is really only called when the normal read path had a failure,
......@@ -2529,7 +2246,7 @@ static void submit_write_bios(struct btrfs_raid_bio *rbio,
}
}
int rmw_rbio(struct btrfs_raid_bio *rbio)
static int rmw_rbio(struct btrfs_raid_bio *rbio)
{
struct bio_list bio_list;
int sectornr;
......@@ -2615,12 +2332,29 @@ int rmw_rbio(struct btrfs_raid_bio *rbio)
return ret;
}
static void rmw_work(struct work_struct *work)
static void rmw_rbio_work(struct work_struct *work)
{
struct btrfs_raid_bio *rbio;
int ret;
rbio = container_of(work, struct btrfs_raid_bio, work);
raid56_rmw_stripe(rbio);
ret = lock_stripe_add(rbio);
if (ret == 0) {
ret = rmw_rbio(rbio);
rbio_orig_end_io(rbio, errno_to_blk_status(ret));
}
}
static void rmw_rbio_work_locked(struct work_struct *work)
{
struct btrfs_raid_bio *rbio;
int ret;
rbio = container_of(work, struct btrfs_raid_bio, work);
ret = rmw_rbio(rbio);
rbio_orig_end_io(rbio, errno_to_blk_status(ret));
}
/*
......
......@@ -185,9 +185,4 @@ void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio);
int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
/*
* Placeholder definition to avoid warning, will be removed when
* the full write path is migrated.
*/
int rmw_rbio(struct btrfs_raid_bio *rbio);
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment