Commit 6c0069c0 authored by Yuri Tikhonov's avatar Yuri Tikhonov Committed by Dan Williams

md/raid6: asynchronous handle_stripe6

1/ Use STRIPE_OP_BIOFILL to offload completion of read requests to
   raid_run_ops
2/ Implement a handler for sh->reconstruct_state similar to the raid5 case
   (adds handling of Q parity)
3/ Prevent handle_parity_checks6 from running concurrently with 'compute'
   operations
4/ Hook up raid_run_ops
Signed-off-by: default avatarYuri Tikhonov <yur@emcraft.com>
Signed-off-by: default avatarIlya Yanok <yanok@emcraft.com>
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent d82dfee0
...@@ -3424,9 +3424,10 @@ static bool handle_stripe6(struct stripe_head *sh) ...@@ -3424,9 +3424,10 @@ static bool handle_stripe6(struct stripe_head *sh)
mdk_rdev_t *blocked_rdev = NULL; mdk_rdev_t *blocked_rdev = NULL;
pr_debug("handling stripe %llu, state=%#lx cnt=%d, " pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
"pd_idx=%d, qd_idx=%d\n", "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
(unsigned long long)sh->sector, sh->state, (unsigned long long)sh->sector, sh->state,
atomic_read(&sh->count), pd_idx, qd_idx); atomic_read(&sh->count), pd_idx, qd_idx,
sh->check_state, sh->reconstruct_state);
memset(&s, 0, sizeof(s)); memset(&s, 0, sizeof(s));
spin_lock(&sh->lock); spin_lock(&sh->lock);
...@@ -3446,35 +3447,24 @@ static bool handle_stripe6(struct stripe_head *sh) ...@@ -3446,35 +3447,24 @@ static bool handle_stripe6(struct stripe_head *sh)
pr_debug("check %d: state 0x%lx read %p write %p written %p\n", pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
i, dev->flags, dev->toread, dev->towrite, dev->written); i, dev->flags, dev->toread, dev->towrite, dev->written);
/* maybe we can reply to a read */ /* maybe we can reply to a read
if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { *
struct bio *rbi, *rbi2; * new wantfill requests are only permitted while
pr_debug("Return read for disc %d\n", i); * ops_complete_biofill is guaranteed to be inactive
spin_lock_irq(&conf->device_lock); */
rbi = dev->toread; if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
dev->toread = NULL; !test_bit(STRIPE_BIOFILL_RUN, &sh->state))
if (test_and_clear_bit(R5_Overlap, &dev->flags)) set_bit(R5_Wantfill, &dev->flags);
wake_up(&conf->wait_for_overlap);
spin_unlock_irq(&conf->device_lock);
while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
copy_data(0, rbi, dev->page, dev->sector);
rbi2 = r5_next_bio(rbi, dev->sector);
spin_lock_irq(&conf->device_lock);
if (!raid5_dec_bi_phys_segments(rbi)) {
rbi->bi_next = return_bi;
return_bi = rbi;
}
spin_unlock_irq(&conf->device_lock);
rbi = rbi2;
}
}
/* now count some things */ /* now count some things */
if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
if (test_bit(R5_Wantcompute, &dev->flags))
BUG_ON(++s.compute > 2);
if (test_bit(R5_Wantfill, &dev->flags)) {
if (dev->toread) s.to_fill++;
} else if (dev->toread)
s.to_read++; s.to_read++;
if (dev->towrite) { if (dev->towrite) {
s.to_write++; s.to_write++;
...@@ -3515,6 +3505,11 @@ static bool handle_stripe6(struct stripe_head *sh) ...@@ -3515,6 +3505,11 @@ static bool handle_stripe6(struct stripe_head *sh)
blocked_rdev = NULL; blocked_rdev = NULL;
} }
if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
set_bit(STRIPE_OP_BIOFILL, &s.ops_request);
set_bit(STRIPE_BIOFILL_RUN, &sh->state);
}
pr_debug("locked=%d uptodate=%d to_read=%d" pr_debug("locked=%d uptodate=%d to_read=%d"
" to_write=%d failed=%d failed_num=%d,%d\n", " to_write=%d failed=%d failed_num=%d,%d\n",
s.locked, s.uptodate, s.to_read, s.to_write, s.failed, s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
...@@ -3555,9 +3550,43 @@ static bool handle_stripe6(struct stripe_head *sh) ...@@ -3555,9 +3550,43 @@ static bool handle_stripe6(struct stripe_head *sh)
* or to load a block that is being partially written. * or to load a block that is being partially written.
*/ */
if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
(s.syncing && (s.uptodate < disks)) || s.expanding) (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
handle_stripe_fill6(sh, &s, &r6s, disks); handle_stripe_fill6(sh, &s, &r6s, disks);
/* Now we check to see if any write operations have recently
* completed
*/
if (sh->reconstruct_state == reconstruct_state_drain_result) {
int qd_idx = sh->qd_idx;
sh->reconstruct_state = reconstruct_state_idle;
/* All the 'written' buffers and the parity blocks are ready to
* be written back to disk
*/
BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags));
for (i = disks; i--; ) {
dev = &sh->dev[i];
if (test_bit(R5_LOCKED, &dev->flags) &&
(i == sh->pd_idx || i == qd_idx ||
dev->written)) {
pr_debug("Writing block %d\n", i);
BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
set_bit(R5_Wantwrite, &dev->flags);
if (!test_bit(R5_Insync, &dev->flags) ||
((i == sh->pd_idx || i == qd_idx) &&
s.failed == 0))
set_bit(STRIPE_INSYNC, &sh->state);
}
}
if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
atomic_dec(&conf->preread_active_stripes);
if (atomic_read(&conf->preread_active_stripes) <
IO_THRESHOLD)
md_wakeup_thread(conf->mddev->thread);
}
}
/* Now to consider new write requests and what else, if anything /* Now to consider new write requests and what else, if anything
* should be read. We do not handle new writes when: * should be read. We do not handle new writes when:
* 1/ A 'write' operation (copy+gen_syndrome) is already in flight. * 1/ A 'write' operation (copy+gen_syndrome) is already in flight.
...@@ -3569,9 +3598,13 @@ static bool handle_stripe6(struct stripe_head *sh) ...@@ -3569,9 +3598,13 @@ static bool handle_stripe6(struct stripe_head *sh)
/* maybe we need to check and possibly fix the parity for this stripe /* maybe we need to check and possibly fix the parity for this stripe
* Any reads will already have been scheduled, so we just see if enough * Any reads will already have been scheduled, so we just see if enough
* data is available * data is available. The parity check is held off while parity
* dependent operations are in flight.
*/ */
if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) if (sh->check_state ||
(s.syncing && s.locked == 0 &&
!test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
!test_bit(STRIPE_INSYNC, &sh->state)))
handle_parity_checks6(conf, sh, &s, &r6s, disks); handle_parity_checks6(conf, sh, &s, &r6s, disks);
if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
...@@ -3593,15 +3626,29 @@ static bool handle_stripe6(struct stripe_head *sh) ...@@ -3593,15 +3626,29 @@ static bool handle_stripe6(struct stripe_head *sh)
set_bit(R5_Wantwrite, &dev->flags); set_bit(R5_Wantwrite, &dev->flags);
set_bit(R5_ReWrite, &dev->flags); set_bit(R5_ReWrite, &dev->flags);
set_bit(R5_LOCKED, &dev->flags); set_bit(R5_LOCKED, &dev->flags);
s.locked++;
} else { } else {
/* let's read it back */ /* let's read it back */
set_bit(R5_Wantread, &dev->flags); set_bit(R5_Wantread, &dev->flags);
set_bit(R5_LOCKED, &dev->flags); set_bit(R5_LOCKED, &dev->flags);
s.locked++;
} }
} }
} }
if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { /* Finish reconstruct operations initiated by the expansion process */
if (sh->reconstruct_state == reconstruct_state_result) {
sh->reconstruct_state = reconstruct_state_idle;
clear_bit(STRIPE_EXPANDING, &sh->state);
for (i = conf->raid_disks; i--; ) {
set_bit(R5_Wantwrite, &sh->dev[i].flags);
set_bit(R5_LOCKED, &sh->dev[i].flags);
s.locked++;
}
}
if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
!sh->reconstruct_state) {
struct stripe_head *sh2 struct stripe_head *sh2
= get_active_stripe(conf, sh->sector, 1, 1); = get_active_stripe(conf, sh->sector, 1, 1);
if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
...@@ -3622,14 +3669,8 @@ static bool handle_stripe6(struct stripe_head *sh) ...@@ -3622,14 +3669,8 @@ static bool handle_stripe6(struct stripe_head *sh)
/* Need to write out all blocks after computing P&Q */ /* Need to write out all blocks after computing P&Q */
sh->disks = conf->raid_disks; sh->disks = conf->raid_disks;
stripe_set_idx(sh->sector, conf, 0, sh); stripe_set_idx(sh->sector, conf, 0, sh);
compute_parity6(sh, RECONSTRUCT_WRITE); schedule_reconstruction(sh, &s, 1, 1);
for (i = conf->raid_disks ; i-- ; ) { } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
set_bit(R5_LOCKED, &sh->dev[i].flags);
s.locked++;
set_bit(R5_Wantwrite, &sh->dev[i].flags);
}
clear_bit(STRIPE_EXPANDING, &sh->state);
} else if (s.expanded) {
clear_bit(STRIPE_EXPAND_READY, &sh->state); clear_bit(STRIPE_EXPAND_READY, &sh->state);
atomic_dec(&conf->reshape_stripes); atomic_dec(&conf->reshape_stripes);
wake_up(&conf->wait_for_overlap); wake_up(&conf->wait_for_overlap);
...@@ -3647,6 +3688,9 @@ static bool handle_stripe6(struct stripe_head *sh) ...@@ -3647,6 +3688,9 @@ static bool handle_stripe6(struct stripe_head *sh)
if (unlikely(blocked_rdev)) if (unlikely(blocked_rdev))
md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
if (s.ops_request)
raid_run_ops(sh, s.ops_request);
ops_run_io(sh, &s); ops_run_io(sh, &s);
return_io(return_bi); return_io(return_bi);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment