Commit 82ea4be6 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'md-3.10-fixes' of git://neil.brown.name/md

Pull md bugfixes from Neil Brown:
 "A few bugfixes for md

  Some tagged for -stable"

* tag 'md-3.10-fixes' of git://neil.brown.name/md:
  md/raid1,5,10: Disable WRITE SAME until a recovery strategy is in place
  md/raid1,raid10: use freeze_array in place of raise_barrier in various places.
  md/raid1: consider WRITE as successful only if at least one non-Faulty and non-rebuilding drive completed it.
  md: md_stop_writes() should always freeze recovery.
parents b844db31 5026d7a9
...@@ -5268,8 +5268,8 @@ static void md_clean(struct mddev *mddev) ...@@ -5268,8 +5268,8 @@ static void md_clean(struct mddev *mddev)
static void __md_stop_writes(struct mddev *mddev) static void __md_stop_writes(struct mddev *mddev)
{ {
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
if (mddev->sync_thread) { if (mddev->sync_thread) {
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev); md_reap_sync_thread(mddev);
} }
......
...@@ -417,7 +417,17 @@ static void raid1_end_write_request(struct bio *bio, int error) ...@@ -417,7 +417,17 @@ static void raid1_end_write_request(struct bio *bio, int error)
r1_bio->bios[mirror] = NULL; r1_bio->bios[mirror] = NULL;
to_put = bio; to_put = bio;
set_bit(R1BIO_Uptodate, &r1_bio->state); /*
* Do not set R1BIO_Uptodate if the current device is
* rebuilding or Faulty. This is because we cannot use
* such device for properly reading the data back (we could
* potentially use it, if the current write would have felt
* before rdev->recovery_offset, but for simplicity we don't
* check this here.
*/
if (test_bit(In_sync, &conf->mirrors[mirror].rdev->flags) &&
!test_bit(Faulty, &conf->mirrors[mirror].rdev->flags))
set_bit(R1BIO_Uptodate, &r1_bio->state);
/* Maybe we can clear some bad blocks. */ /* Maybe we can clear some bad blocks. */
if (is_badblock(conf->mirrors[mirror].rdev, if (is_badblock(conf->mirrors[mirror].rdev,
...@@ -870,17 +880,17 @@ static void allow_barrier(struct r1conf *conf) ...@@ -870,17 +880,17 @@ static void allow_barrier(struct r1conf *conf)
wake_up(&conf->wait_barrier); wake_up(&conf->wait_barrier);
} }
static void freeze_array(struct r1conf *conf) static void freeze_array(struct r1conf *conf, int extra)
{ {
/* stop syncio and normal IO and wait for everything to /* stop syncio and normal IO and wait for everything to
* go quite. * go quite.
* We increment barrier and nr_waiting, and then * We increment barrier and nr_waiting, and then
* wait until nr_pending match nr_queued+1 * wait until nr_pending match nr_queued+extra
* This is called in the context of one normal IO request * This is called in the context of one normal IO request
* that has failed. Thus any sync request that might be pending * that has failed. Thus any sync request that might be pending
* will be blocked by nr_pending, and we need to wait for * will be blocked by nr_pending, and we need to wait for
* pending IO requests to complete or be queued for re-try. * pending IO requests to complete or be queued for re-try.
* Thus the number queued (nr_queued) plus this request (1) * Thus the number queued (nr_queued) plus this request (extra)
* must match the number of pending IOs (nr_pending) before * must match the number of pending IOs (nr_pending) before
* we continue. * we continue.
*/ */
...@@ -888,7 +898,7 @@ static void freeze_array(struct r1conf *conf) ...@@ -888,7 +898,7 @@ static void freeze_array(struct r1conf *conf)
conf->barrier++; conf->barrier++;
conf->nr_waiting++; conf->nr_waiting++;
wait_event_lock_irq_cmd(conf->wait_barrier, wait_event_lock_irq_cmd(conf->wait_barrier,
conf->nr_pending == conf->nr_queued+1, conf->nr_pending == conf->nr_queued+extra,
conf->resync_lock, conf->resync_lock,
flush_pending_writes(conf)); flush_pending_writes(conf));
spin_unlock_irq(&conf->resync_lock); spin_unlock_irq(&conf->resync_lock);
...@@ -1544,8 +1554,8 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1544,8 +1554,8 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
* we wait for all outstanding requests to complete. * we wait for all outstanding requests to complete.
*/ */
synchronize_sched(); synchronize_sched();
raise_barrier(conf); freeze_array(conf, 0);
lower_barrier(conf); unfreeze_array(conf);
clear_bit(Unmerged, &rdev->flags); clear_bit(Unmerged, &rdev->flags);
} }
md_integrity_add_rdev(rdev, mddev); md_integrity_add_rdev(rdev, mddev);
...@@ -1595,11 +1605,11 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1595,11 +1605,11 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
*/ */
struct md_rdev *repl = struct md_rdev *repl =
conf->mirrors[conf->raid_disks + number].rdev; conf->mirrors[conf->raid_disks + number].rdev;
raise_barrier(conf); freeze_array(conf, 0);
clear_bit(Replacement, &repl->flags); clear_bit(Replacement, &repl->flags);
p->rdev = repl; p->rdev = repl;
conf->mirrors[conf->raid_disks + number].rdev = NULL; conf->mirrors[conf->raid_disks + number].rdev = NULL;
lower_barrier(conf); unfreeze_array(conf);
clear_bit(WantReplacement, &rdev->flags); clear_bit(WantReplacement, &rdev->flags);
} else } else
clear_bit(WantReplacement, &rdev->flags); clear_bit(WantReplacement, &rdev->flags);
...@@ -2195,7 +2205,7 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) ...@@ -2195,7 +2205,7 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
* frozen * frozen
*/ */
if (mddev->ro == 0) { if (mddev->ro == 0) {
freeze_array(conf); freeze_array(conf, 1);
fix_read_error(conf, r1_bio->read_disk, fix_read_error(conf, r1_bio->read_disk,
r1_bio->sector, r1_bio->sectors); r1_bio->sector, r1_bio->sectors);
unfreeze_array(conf); unfreeze_array(conf);
...@@ -2780,8 +2790,8 @@ static int run(struct mddev *mddev) ...@@ -2780,8 +2790,8 @@ static int run(struct mddev *mddev)
return PTR_ERR(conf); return PTR_ERR(conf);
if (mddev->queue) if (mddev->queue)
blk_queue_max_write_same_sectors(mddev->queue, blk_queue_max_write_same_sectors(mddev->queue, 0);
mddev->chunk_sectors);
rdev_for_each(rdev, mddev) { rdev_for_each(rdev, mddev) {
if (!mddev->gendisk) if (!mddev->gendisk)
continue; continue;
...@@ -2963,7 +2973,7 @@ static int raid1_reshape(struct mddev *mddev) ...@@ -2963,7 +2973,7 @@ static int raid1_reshape(struct mddev *mddev)
return -ENOMEM; return -ENOMEM;
} }
raise_barrier(conf); freeze_array(conf, 0);
/* ok, everything is stopped */ /* ok, everything is stopped */
oldpool = conf->r1bio_pool; oldpool = conf->r1bio_pool;
...@@ -2994,7 +3004,7 @@ static int raid1_reshape(struct mddev *mddev) ...@@ -2994,7 +3004,7 @@ static int raid1_reshape(struct mddev *mddev)
conf->raid_disks = mddev->raid_disks = raid_disks; conf->raid_disks = mddev->raid_disks = raid_disks;
mddev->delta_disks = 0; mddev->delta_disks = 0;
lower_barrier(conf); unfreeze_array(conf);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
......
...@@ -490,7 +490,17 @@ static void raid10_end_write_request(struct bio *bio, int error) ...@@ -490,7 +490,17 @@ static void raid10_end_write_request(struct bio *bio, int error)
sector_t first_bad; sector_t first_bad;
int bad_sectors; int bad_sectors;
set_bit(R10BIO_Uptodate, &r10_bio->state); /*
* Do not set R10BIO_Uptodate if the current device is
* rebuilding or Faulty. This is because we cannot use
* such device for properly reading the data back (we could
* potentially use it, if the current write would have felt
* before rdev->recovery_offset, but for simplicity we don't
* check this here.
*/
if (test_bit(In_sync, &rdev->flags) &&
!test_bit(Faulty, &rdev->flags))
set_bit(R10BIO_Uptodate, &r10_bio->state);
/* Maybe we can clear some bad blocks. */ /* Maybe we can clear some bad blocks. */
if (is_badblock(rdev, if (is_badblock(rdev,
...@@ -1055,17 +1065,17 @@ static void allow_barrier(struct r10conf *conf) ...@@ -1055,17 +1065,17 @@ static void allow_barrier(struct r10conf *conf)
wake_up(&conf->wait_barrier); wake_up(&conf->wait_barrier);
} }
static void freeze_array(struct r10conf *conf) static void freeze_array(struct r10conf *conf, int extra)
{ {
/* stop syncio and normal IO and wait for everything to /* stop syncio and normal IO and wait for everything to
* go quiet. * go quiet.
* We increment barrier and nr_waiting, and then * We increment barrier and nr_waiting, and then
* wait until nr_pending match nr_queued+1 * wait until nr_pending match nr_queued+extra
* This is called in the context of one normal IO request * This is called in the context of one normal IO request
* that has failed. Thus any sync request that might be pending * that has failed. Thus any sync request that might be pending
* will be blocked by nr_pending, and we need to wait for * will be blocked by nr_pending, and we need to wait for
* pending IO requests to complete or be queued for re-try. * pending IO requests to complete or be queued for re-try.
* Thus the number queued (nr_queued) plus this request (1) * Thus the number queued (nr_queued) plus this request (extra)
* must match the number of pending IOs (nr_pending) before * must match the number of pending IOs (nr_pending) before
* we continue. * we continue.
*/ */
...@@ -1073,7 +1083,7 @@ static void freeze_array(struct r10conf *conf) ...@@ -1073,7 +1083,7 @@ static void freeze_array(struct r10conf *conf)
conf->barrier++; conf->barrier++;
conf->nr_waiting++; conf->nr_waiting++;
wait_event_lock_irq_cmd(conf->wait_barrier, wait_event_lock_irq_cmd(conf->wait_barrier,
conf->nr_pending == conf->nr_queued+1, conf->nr_pending == conf->nr_queued+extra,
conf->resync_lock, conf->resync_lock,
flush_pending_writes(conf)); flush_pending_writes(conf));
...@@ -1837,8 +1847,8 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1837,8 +1847,8 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
* we wait for all outstanding requests to complete. * we wait for all outstanding requests to complete.
*/ */
synchronize_sched(); synchronize_sched();
raise_barrier(conf, 0); freeze_array(conf, 0);
lower_barrier(conf); unfreeze_array(conf);
clear_bit(Unmerged, &rdev->flags); clear_bit(Unmerged, &rdev->flags);
} }
md_integrity_add_rdev(rdev, mddev); md_integrity_add_rdev(rdev, mddev);
...@@ -2612,7 +2622,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) ...@@ -2612,7 +2622,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
r10_bio->devs[slot].bio = NULL; r10_bio->devs[slot].bio = NULL;
if (mddev->ro == 0) { if (mddev->ro == 0) {
freeze_array(conf); freeze_array(conf, 1);
fix_read_error(conf, mddev, r10_bio); fix_read_error(conf, mddev, r10_bio);
unfreeze_array(conf); unfreeze_array(conf);
} else } else
...@@ -3609,8 +3619,7 @@ static int run(struct mddev *mddev) ...@@ -3609,8 +3619,7 @@ static int run(struct mddev *mddev)
if (mddev->queue) { if (mddev->queue) {
blk_queue_max_discard_sectors(mddev->queue, blk_queue_max_discard_sectors(mddev->queue,
mddev->chunk_sectors); mddev->chunk_sectors);
blk_queue_max_write_same_sectors(mddev->queue, blk_queue_max_write_same_sectors(mddev->queue, 0);
mddev->chunk_sectors);
blk_queue_io_min(mddev->queue, chunk_size); blk_queue_io_min(mddev->queue, chunk_size);
if (conf->geo.raid_disks % conf->geo.near_copies) if (conf->geo.raid_disks % conf->geo.near_copies)
blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks);
......
...@@ -5466,7 +5466,7 @@ static int run(struct mddev *mddev) ...@@ -5466,7 +5466,7 @@ static int run(struct mddev *mddev)
if (mddev->major_version == 0 && if (mddev->major_version == 0 &&
mddev->minor_version > 90) mddev->minor_version > 90)
rdev->recovery_offset = reshape_offset; rdev->recovery_offset = reshape_offset;
if (rdev->recovery_offset < reshape_offset) { if (rdev->recovery_offset < reshape_offset) {
/* We need to check old and new layout */ /* We need to check old and new layout */
if (!only_parity(rdev->raid_disk, if (!only_parity(rdev->raid_disk,
...@@ -5589,6 +5589,8 @@ static int run(struct mddev *mddev) ...@@ -5589,6 +5589,8 @@ static int run(struct mddev *mddev)
*/ */
mddev->queue->limits.discard_zeroes_data = 0; mddev->queue->limits.discard_zeroes_data = 0;
blk_queue_max_write_same_sectors(mddev->queue, 0);
rdev_for_each(rdev, mddev) { rdev_for_each(rdev, mddev) {
disk_stack_limits(mddev->gendisk, rdev->bdev, disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9); rdev->data_offset << 9);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment