Commit e33129d8 authored by Dan Williams's avatar Dan Williams

md: handle_stripe5 - add request/completion logic for async write ops

After handle_stripe5 decides whether it wants to perform a
read-modify-write, or a reconstruct write it calls
handle_write_operations5.  A read-modify-write operation will perform an
xor subtraction of the blocks marked with the R5_Wantprexor flag, copy the
new data into the stripe (biodrain) and perform a postxor operation across
all up-to-date blocks to generate the new parity.  A reconstruct write is run
when all blocks are already up-to-date in the cache so all that is needed
is a biodrain and postxor.

On the completion path STRIPE_OP_PREXOR will be set if the operation was a
read-modify-write.  The STRIPE_OP_BIODRAIN flag is used in the completion
path to differentiate write-initiated postxor operations versus
expansion-initiated postxor operations.  Completion of a write triggers i/o
to the drives.

Changelog:
* make the 'rcw' parameter to handle_write_operations5 a simple flag, Neil Brown
* remove test_and_set/test_and_clear BUG_ONs, Neil Brown
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
Acked-By: default avatarNeilBrown <neilb@suse.de>
parent d84e0f10
...@@ -1822,7 +1822,79 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) ...@@ -1822,7 +1822,79 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
} }
} }
static int
handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
{
int i, pd_idx = sh->pd_idx, disks = sh->disks;
int locked = 0;
if (rcw) {
/* if we are not expanding this is a proper write request, and
* there will be bios with new data to be drained into the
* stripe cache
*/
if (!expand) {
set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
sh->ops.count++;
}
set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
sh->ops.count++;
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
if (dev->towrite) {
set_bit(R5_LOCKED, &dev->flags);
if (!expand)
clear_bit(R5_UPTODATE, &dev->flags);
locked++;
}
}
} else {
BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
set_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
sh->ops.count += 3;
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
if (i == pd_idx)
continue;
/* For a read-modify write there may be blocks that are
* locked for reading while others are ready to be
* written so we distinguish these blocks by the
* R5_Wantprexor bit
*/
if (dev->towrite &&
(test_bit(R5_UPTODATE, &dev->flags) ||
test_bit(R5_Wantcompute, &dev->flags))) {
set_bit(R5_Wantprexor, &dev->flags);
set_bit(R5_LOCKED, &dev->flags);
clear_bit(R5_UPTODATE, &dev->flags);
locked++;
}
}
}
/* keep the parity disk locked while asynchronous operations
* are in flight
*/
set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
locked++;
pr_debug("%s: stripe %llu locked: %d pending: %lx\n",
__FUNCTION__, (unsigned long long)sh->sector,
locked, sh->ops.pending);
return locked;
}
/* /*
* Each stripe/dev can have one or more bion attached. * Each stripe/dev can have one or more bion attached.
...@@ -2217,27 +2289,8 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf, ...@@ -2217,27 +2289,8 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
* we can start a write request * we can start a write request
*/ */
if (s->locked == 0 && (rcw == 0 || rmw == 0) && if (s->locked == 0 && (rcw == 0 || rmw == 0) &&
!test_bit(STRIPE_BIT_DELAY, &sh->state)) { !test_bit(STRIPE_BIT_DELAY, &sh->state))
pr_debug("Computing parity...\n"); s->locked += handle_write_operations5(sh, rcw == 0, 0);
compute_parity5(sh, rcw == 0 ?
RECONSTRUCT_WRITE : READ_MODIFY_WRITE);
/* now every locked buffer is ready to be written */
for (i = disks; i--; )
if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
pr_debug("Writing block %d\n", i);
s->locked++;
set_bit(R5_Wantwrite, &sh->dev[i].flags);
if (!test_bit(R5_Insync, &sh->dev[i].flags)
|| (i == sh->pd_idx && s->failed == 0))
set_bit(STRIPE_INSYNC, &sh->state);
}
if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
atomic_dec(&conf->preread_active_stripes);
if (atomic_read(&conf->preread_active_stripes) <
IO_THRESHOLD)
md_wakeup_thread(conf->mddev->thread);
}
}
} }
static void handle_issuing_new_write_requests6(raid5_conf_t *conf, static void handle_issuing_new_write_requests6(raid5_conf_t *conf,
...@@ -2656,8 +2709,70 @@ static void handle_stripe5(struct stripe_head *sh) ...@@ -2656,8 +2709,70 @@ static void handle_stripe5(struct stripe_head *sh)
(s.syncing && (s.uptodate < disks)) || s.expanding) (s.syncing && (s.uptodate < disks)) || s.expanding)
handle_issuing_new_read_requests5(sh, &s, disks); handle_issuing_new_read_requests5(sh, &s, disks);
/* now to consider writing and what else, if anything should be read */ /* Now we check to see if any write operations have recently
if (s.to_write) * completed
*/
/* leave prexor set until postxor is done, allows us to distinguish
* a rmw from a rcw during biodrain
*/
if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) &&
test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack);
clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
for (i = disks; i--; )
clear_bit(R5_Wantprexor, &sh->dev[i].flags);
}
/* if only POSTXOR is set then this is an 'expand' postxor */
if (test_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete) &&
test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete);
clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.ack);
clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack);
clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
/* All the 'written' buffers and the parity block are ready to
* be written back to disk
*/
BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
for (i = disks; i--; ) {
dev = &sh->dev[i];
if (test_bit(R5_LOCKED, &dev->flags) &&
(i == sh->pd_idx || dev->written)) {
pr_debug("Writing block %d\n", i);
set_bit(R5_Wantwrite, &dev->flags);
if (!test_and_set_bit(
STRIPE_OP_IO, &sh->ops.pending))
sh->ops.count++;
if (!test_bit(R5_Insync, &dev->flags) ||
(i == sh->pd_idx && s.failed == 0))
set_bit(STRIPE_INSYNC, &sh->state);
}
}
if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
atomic_dec(&conf->preread_active_stripes);
if (atomic_read(&conf->preread_active_stripes) <
IO_THRESHOLD)
md_wakeup_thread(conf->mddev->thread);
}
}
/* Now to consider new write requests and what else, if anything
* should be read. We do not handle new writes when:
* 1/ A 'write' operation (copy+xor) is already in flight.
* 2/ A 'check' operation is in flight, as it may clobber the parity
* block.
*/
if (s.to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) &&
!test_bit(STRIPE_OP_CHECK, &sh->ops.pending))
handle_issuing_new_write_requests5(conf, sh, &s, disks); handle_issuing_new_write_requests5(conf, sh, &s, disks);
/* maybe we need to check and possibly fix the parity for this stripe /* maybe we need to check and possibly fix the parity for this stripe
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment