Commit 5eb30ee2 authored by Qu Wenruo's avatar Qu Wenruo Committed by David Sterba

btrfs: raid56: introduce the main entrance for RMW path

The new entrance will be called rmw_rbio(), it will have a streamlined
workflow by using submit-and-wait method.

Thus there will be no weird jumps between tons of functions, thus way
more reader friendly, and will make later expansion easier, as it's now
a straight workflow, the timing is way more clear.

Unfortunately we can not yet migrate the RMW path to use this new
entrance as we still need extra work to address the plug and
unlock_stripe() function.
Signed-off-by: default avatarQu Wenruo <wqu@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 6486d21c
...@@ -1252,6 +1252,14 @@ static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio, ...@@ -1252,6 +1252,14 @@ static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio,
/* We should have at least one data sector. */ /* We should have at least one data sector. */
ASSERT(bitmap_weight(&rbio->dbitmap, rbio->stripe_nsectors)); ASSERT(bitmap_weight(&rbio->dbitmap, rbio->stripe_nsectors));
/*
* Reset errors, as we may have errors inherited from from degraded
* write.
*/
atomic_set(&rbio->error, 0);
rbio->faila = -1;
rbio->failb = -1;
/* /*
* Start assembly. Make bios for everything from the higher layers (the * Start assembly. Make bios for everything from the higher layers (the
* bio_list in our rbio) and our P/Q. Ignore everything else. * bio_list in our rbio) and our P/Q. Ignore everything else.
...@@ -1665,6 +1673,19 @@ static int rmw_assemble_read_bios(struct btrfs_raid_bio *rbio, ...@@ -1665,6 +1673,19 @@ static int rmw_assemble_read_bios(struct btrfs_raid_bio *rbio,
return ret; return ret;
} }
static int alloc_rbio_data_pages(struct btrfs_raid_bio *rbio)
{
const int data_pages = rbio->nr_data * rbio->stripe_npages;
int ret;
ret = btrfs_alloc_page_array(data_pages, rbio->stripe_pages);
if (ret < 0)
return ret;
index_stripe_sectors(rbio);
return 0;
}
/* /*
* the stripe must be locked by the caller. It will * the stripe must be locked by the caller. It will
* unlock after all the writes are done * unlock after all the writes are done
...@@ -2454,6 +2475,146 @@ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc, ...@@ -2454,6 +2475,146 @@ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
start_async_work(rbio, recover_rbio_work); start_async_work(rbio, recover_rbio_work);
} }
static int rmw_read_and_wait(struct btrfs_raid_bio *rbio)
{
struct bio_list bio_list;
struct bio *bio;
int ret;
bio_list_init(&bio_list);
atomic_set(&rbio->error, 0);
ret = rmw_assemble_read_bios(rbio, &bio_list);
if (ret < 0)
goto out;
submit_read_bios(rbio, &bio_list);
wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
return ret;
out:
while ((bio = bio_list_pop(&bio_list)))
bio_put(bio);
return ret;
}
static void raid_wait_write_end_io(struct bio *bio)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
blk_status_t err = bio->bi_status;
if (err)
fail_bio_stripe(rbio, bio);
bio_put(bio);
if (atomic_dec_and_test(&rbio->stripes_pending))
wake_up(&rbio->io_wait);
}
static void submit_write_bios(struct btrfs_raid_bio *rbio,
struct bio_list *bio_list)
{
struct bio *bio;
atomic_set(&rbio->stripes_pending, bio_list_size(bio_list));
while ((bio = bio_list_pop(bio_list))) {
bio->bi_end_io = raid_wait_write_end_io;
if (trace_raid56_write_stripe_enabled()) {
struct raid56_bio_trace_info trace_info = { 0 };
bio_get_trace_info(rbio, bio, &trace_info);
trace_raid56_write_stripe(rbio, bio, &trace_info);
}
submit_bio(bio);
}
}
int rmw_rbio(struct btrfs_raid_bio *rbio)
{
struct bio_list bio_list;
int sectornr;
int ret = 0;
/*
* Allocate the pages for parity first, as P/Q pages will always be
* needed for both full-stripe and sub-stripe writes.
*/
ret = alloc_rbio_parity_pages(rbio);
if (ret < 0)
return ret;
/* Full stripe write, can write the full stripe right now. */
if (rbio_is_full(rbio))
goto write;
/*
* Now we're doing sub-stripe write, also need all data stripes to do
* the full RMW.
*/
ret = alloc_rbio_data_pages(rbio);
if (ret < 0)
return ret;
atomic_set(&rbio->error, 0);
index_rbio_pages(rbio);
ret = rmw_read_and_wait(rbio);
if (ret < 0)
return ret;
/* Too many read errors, beyond our tolerance. */
if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
return ret;
/* Have read failures but under tolerance, needs recovery. */
if (rbio->faila >= 0 || rbio->failb >= 0) {
ret = recover_rbio(rbio);
if (ret < 0)
return ret;
}
write:
/*
* At this stage we're not allowed to add any new bios to the
* bio list any more, anyone else that wants to change this stripe
* needs to do their own rmw.
*/
spin_lock_irq(&rbio->bio_list_lock);
set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
spin_unlock_irq(&rbio->bio_list_lock);
atomic_set(&rbio->error, 0);
index_rbio_pages(rbio);
/*
* We don't cache full rbios because we're assuming
* the higher layers are unlikely to use this area of
* the disk again soon. If they do use it again,
* hopefully they will send another full bio.
*/
if (!rbio_is_full(rbio))
cache_rbio_pages(rbio);
else
clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++)
generate_pq_vertical(rbio, sectornr);
bio_list_init(&bio_list);
ret = rmw_assemble_write_bios(rbio, &bio_list);
if (ret < 0)
return ret;
/* We should have at least one bio assembled. */
ASSERT(bio_list_size(&bio_list));
submit_write_bios(rbio, &bio_list);
wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
/* We have more errors than our tolerance during the read. */
if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
ret = -EIO;
return ret;
}
static void rmw_work(struct work_struct *work) static void rmw_work(struct work_struct *work)
{ {
struct btrfs_raid_bio *rbio; struct btrfs_raid_bio *rbio;
......
...@@ -185,4 +185,9 @@ void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio); ...@@ -185,4 +185,9 @@ void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio);
int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info); int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info); void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
/*
* Placeholder definition to avoid warning, will be removed when
* the full write path is migrated.
*/
int rmw_rbio(struct btrfs_raid_bio *rbio);
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment