Commit f788893d authored by Jens Axboe's avatar Jens Axboe

Merge tag 'md-next-20231208' of...

Merge tag 'md-next-20231208' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.8/block

Pull MD updates from Song:

"1. Fix/Cleanup RCU usage from conf->disks[i].rdev, by Yu Kuai;
 2. Fix raid5 hang issue, by Junxiao Bi;
 3. Add Yu Kuai as Reviewer of the md subsystem."

* tag 'md-next-20231208' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
  md: synchronize flush io with array reconfiguration
  MAINTAINERS: SOFTWARE RAID: Add Yu Kuai as Reviewer
  md/md-multipath: remove rcu protection to access rdev from conf
  md/raid5: remove rcu protection to access rdev from conf
  md/raid1: remove rcu protection to access rdev from conf
  md/raid10: remove rcu protection to access rdev from conf
  md: remove flag RemoveSynchronized
  Revert "md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d"
  md: bypass block throttle for superblock update
parents 1b151e24 fa2bbff7
...@@ -20106,6 +20106,7 @@ F: include/linux/property.h ...@@ -20106,6 +20106,7 @@ F: include/linux/property.h
SOFTWARE RAID (Multiple Disks) SUPPORT SOFTWARE RAID (Multiple Disks) SUPPORT
M: Song Liu <song@kernel.org> M: Song Liu <song@kernel.org>
R: Yu Kuai <yukuai3@huawei.com>
L: linux-raid@vger.kernel.org L: linux-raid@vger.kernel.org
S: Supported S: Supported
Q: https://patchwork.kernel.org/project/linux-raid/list/ Q: https://patchwork.kernel.org/project/linux-raid/list/
......
...@@ -32,17 +32,15 @@ static int multipath_map (struct mpconf *conf) ...@@ -32,17 +32,15 @@ static int multipath_map (struct mpconf *conf)
* now we use the first available disk. * now we use the first available disk.
*/ */
rcu_read_lock();
for (i = 0; i < disks; i++) { for (i = 0; i < disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev); struct md_rdev *rdev = conf->multipaths[i].rdev;
if (rdev && test_bit(In_sync, &rdev->flags) && if (rdev && test_bit(In_sync, &rdev->flags) &&
!test_bit(Faulty, &rdev->flags)) { !test_bit(Faulty, &rdev->flags)) {
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
return i; return i;
} }
} }
rcu_read_unlock();
pr_crit_ratelimited("multipath_map(): no more operational IO paths?\n"); pr_crit_ratelimited("multipath_map(): no more operational IO paths?\n");
return (-1); return (-1);
...@@ -137,14 +135,16 @@ static void multipath_status(struct seq_file *seq, struct mddev *mddev) ...@@ -137,14 +135,16 @@ static void multipath_status(struct seq_file *seq, struct mddev *mddev)
struct mpconf *conf = mddev->private; struct mpconf *conf = mddev->private;
int i; int i;
lockdep_assert_held(&mddev->lock);
seq_printf (seq, " [%d/%d] [", conf->raid_disks, seq_printf (seq, " [%d/%d] [", conf->raid_disks,
conf->raid_disks - mddev->degraded); conf->raid_disks - mddev->degraded);
rcu_read_lock();
for (i = 0; i < conf->raid_disks; i++) { for (i = 0; i < conf->raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev); struct md_rdev *rdev = READ_ONCE(conf->multipaths[i].rdev);
seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
seq_printf(seq, "%s",
rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
} }
rcu_read_unlock();
seq_putc(seq, ']'); seq_putc(seq, ']');
} }
...@@ -182,7 +182,7 @@ static void multipath_error (struct mddev *mddev, struct md_rdev *rdev) ...@@ -182,7 +182,7 @@ static void multipath_error (struct mddev *mddev, struct md_rdev *rdev)
conf->raid_disks - mddev->degraded); conf->raid_disks - mddev->degraded);
} }
static void print_multipath_conf (struct mpconf *conf) static void print_multipath_conf(struct mpconf *conf)
{ {
int i; int i;
struct multipath_info *tmp; struct multipath_info *tmp;
...@@ -195,6 +195,7 @@ static void print_multipath_conf (struct mpconf *conf) ...@@ -195,6 +195,7 @@ static void print_multipath_conf (struct mpconf *conf)
pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded, pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
conf->raid_disks); conf->raid_disks);
lockdep_assert_held(&conf->mddev->reconfig_mutex);
for (i = 0; i < conf->raid_disks; i++) { for (i = 0; i < conf->raid_disks; i++) {
tmp = conf->multipaths + i; tmp = conf->multipaths + i;
if (tmp->rdev) if (tmp->rdev)
...@@ -231,7 +232,7 @@ static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -231,7 +232,7 @@ static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev)
rdev->raid_disk = path; rdev->raid_disk = path;
set_bit(In_sync, &rdev->flags); set_bit(In_sync, &rdev->flags);
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
rcu_assign_pointer(p->rdev, rdev); WRITE_ONCE(p->rdev, rdev);
err = 0; err = 0;
break; break;
} }
...@@ -257,16 +258,7 @@ static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -257,16 +258,7 @@ static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
err = -EBUSY; err = -EBUSY;
goto abort; goto abort;
} }
p->rdev = NULL; WRITE_ONCE(p->rdev, NULL);
if (!test_bit(RemoveSynchronized, &rdev->flags)) {
synchronize_rcu();
if (atomic_read(&rdev->nr_pending)) {
/* lost the race, try later */
err = -EBUSY;
p->rdev = rdev;
goto abort;
}
}
err = md_integrity_register(mddev); err = md_integrity_register(mddev);
} }
abort: abort:
......
...@@ -529,6 +529,9 @@ static void md_end_flush(struct bio *bio) ...@@ -529,6 +529,9 @@ static void md_end_flush(struct bio *bio)
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
if (atomic_dec_and_test(&mddev->flush_pending)) { if (atomic_dec_and_test(&mddev->flush_pending)) {
/* The pair is percpu_ref_get() from md_flush_request() */
percpu_ref_put(&mddev->active_io);
/* The pre-request flush has finished */ /* The pre-request flush has finished */
queue_work(md_wq, &mddev->flush_work); queue_work(md_wq, &mddev->flush_work);
} }
...@@ -548,12 +551,8 @@ static void submit_flushes(struct work_struct *ws) ...@@ -548,12 +551,8 @@ static void submit_flushes(struct work_struct *ws)
rdev_for_each_rcu(rdev, mddev) rdev_for_each_rcu(rdev, mddev)
if (rdev->raid_disk >= 0 && if (rdev->raid_disk >= 0 &&
!test_bit(Faulty, &rdev->flags)) { !test_bit(Faulty, &rdev->flags)) {
/* Take two references, one is dropped
* when request finishes, one after
* we reclaim rcu_read_lock
*/
struct bio *bi; struct bio *bi;
atomic_inc(&rdev->nr_pending);
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock(); rcu_read_unlock();
bi = bio_alloc_bioset(rdev->bdev, 0, bi = bio_alloc_bioset(rdev->bdev, 0,
...@@ -564,7 +563,6 @@ static void submit_flushes(struct work_struct *ws) ...@@ -564,7 +563,6 @@ static void submit_flushes(struct work_struct *ws)
atomic_inc(&mddev->flush_pending); atomic_inc(&mddev->flush_pending);
submit_bio(bi); submit_bio(bi);
rcu_read_lock(); rcu_read_lock();
rdev_dec_pending(rdev, mddev);
} }
rcu_read_unlock(); rcu_read_unlock();
if (atomic_dec_and_test(&mddev->flush_pending)) if (atomic_dec_and_test(&mddev->flush_pending))
...@@ -617,6 +615,18 @@ bool md_flush_request(struct mddev *mddev, struct bio *bio) ...@@ -617,6 +615,18 @@ bool md_flush_request(struct mddev *mddev, struct bio *bio)
/* new request after previous flush is completed */ /* new request after previous flush is completed */
if (ktime_after(req_start, mddev->prev_flush_start)) { if (ktime_after(req_start, mddev->prev_flush_start)) {
WARN_ON(mddev->flush_bio); WARN_ON(mddev->flush_bio);
/*
* Grab a reference to make sure mddev_suspend() will wait for
* this flush to be done.
*
* md_flush_reqeust() is called under md_handle_request() and
* 'active_io' is already grabbed, hence percpu_ref_is_zero()
* won't pass, percpu_ref_tryget_live() can't be used because
* percpu_ref_kill() can be called by mddev_suspend()
* concurrently.
*/
WARN_ON(percpu_ref_is_zero(&mddev->active_io));
percpu_ref_get(&mddev->active_io);
mddev->flush_bio = bio; mddev->flush_bio = bio;
bio = NULL; bio = NULL;
} }
...@@ -1014,7 +1024,8 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, ...@@ -1014,7 +1024,8 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
bio = bio_alloc_bioset(rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev, bio = bio_alloc_bioset(rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev,
1, 1,
REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA, REQ_OP_WRITE | REQ_SYNC | REQ_IDLE | REQ_META
| REQ_PREFLUSH | REQ_FUA,
GFP_NOIO, &mddev->sync_set); GFP_NOIO, &mddev->sync_set);
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
...@@ -9243,45 +9254,20 @@ static int remove_and_add_spares(struct mddev *mddev, ...@@ -9243,45 +9254,20 @@ static int remove_and_add_spares(struct mddev *mddev,
struct md_rdev *rdev; struct md_rdev *rdev;
int spares = 0; int spares = 0;
int removed = 0; int removed = 0;
bool remove_some = false;
if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
/* Mustn't remove devices when resync thread is running */ /* Mustn't remove devices when resync thread is running */
return 0; return 0;
rdev_for_each(rdev, mddev) { rdev_for_each(rdev, mddev) {
if ((this == NULL || rdev == this) && if ((this == NULL || rdev == this) && rdev_removeable(rdev) &&
rdev->raid_disk >= 0 && !mddev->pers->hot_remove_disk(mddev, rdev)) {
!test_bit(Blocked, &rdev->flags) &&
test_bit(Faulty, &rdev->flags) &&
atomic_read(&rdev->nr_pending)==0) {
/* Faulty non-Blocked devices with nr_pending == 0
* never get nr_pending incremented,
* never get Faulty cleared, and never get Blocked set.
* So we can synchronize_rcu now rather than once per device
*/
remove_some = true;
set_bit(RemoveSynchronized, &rdev->flags);
}
}
if (remove_some)
synchronize_rcu();
rdev_for_each(rdev, mddev) {
if ((this == NULL || rdev == this) &&
(test_bit(RemoveSynchronized, &rdev->flags) ||
rdev_removeable(rdev))) {
if (mddev->pers->hot_remove_disk(
mddev, rdev) == 0) {
sysfs_unlink_rdev(mddev, rdev); sysfs_unlink_rdev(mddev, rdev);
rdev->saved_raid_disk = rdev->raid_disk; rdev->saved_raid_disk = rdev->raid_disk;
rdev->raid_disk = -1; rdev->raid_disk = -1;
removed++; removed++;
} }
} }
if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
clear_bit(RemoveSynchronized, &rdev->flags);
}
if (removed && mddev->kobj.sd) if (removed && mddev->kobj.sd)
sysfs_notify_dirent_safe(mddev->sysfs_degraded); sysfs_notify_dirent_safe(mddev->sysfs_degraded);
......
...@@ -190,11 +190,6 @@ enum flag_bits { ...@@ -190,11 +190,6 @@ enum flag_bits {
* than other devices in the array * than other devices in the array
*/ */
ClusterRemove, ClusterRemove,
RemoveSynchronized, /* synchronize_rcu() was called after
* this device was known to be faulty,
* so it is safe to remove without
* another synchronize_rcu() call.
*/
ExternalBbl, /* External metadata provides bad ExternalBbl, /* External metadata provides bad
* block management for a disk * block management for a disk
*/ */
......
...@@ -609,7 +609,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect ...@@ -609,7 +609,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
int choose_first; int choose_first;
int choose_next_idle; int choose_next_idle;
rcu_read_lock();
/* /*
* Check if we can balance. We can balance on the whole * Check if we can balance. We can balance on the whole
* device if no resync is going on, or below the resync window. * device if no resync is going on, or below the resync window.
...@@ -642,7 +641,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect ...@@ -642,7 +641,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
unsigned int pending; unsigned int pending;
bool nonrot; bool nonrot;
rdev = rcu_dereference(conf->mirrors[disk].rdev); rdev = conf->mirrors[disk].rdev;
if (r1_bio->bios[disk] == IO_BLOCKED if (r1_bio->bios[disk] == IO_BLOCKED
|| rdev == NULL || rdev == NULL
|| test_bit(Faulty, &rdev->flags)) || test_bit(Faulty, &rdev->flags))
...@@ -773,7 +772,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect ...@@ -773,7 +772,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
} }
if (best_disk >= 0) { if (best_disk >= 0) {
rdev = rcu_dereference(conf->mirrors[best_disk].rdev); rdev = conf->mirrors[best_disk].rdev;
if (!rdev) if (!rdev)
goto retry; goto retry;
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
...@@ -784,7 +783,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect ...@@ -784,7 +783,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
conf->mirrors[best_disk].next_seq_sect = this_sector + sectors; conf->mirrors[best_disk].next_seq_sect = this_sector + sectors;
} }
rcu_read_unlock();
*max_sectors = sectors; *max_sectors = sectors;
return best_disk; return best_disk;
...@@ -1235,14 +1233,12 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, ...@@ -1235,14 +1233,12 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
if (r1bio_existed) { if (r1bio_existed) {
/* Need to get the block device name carefully */ /* Need to get the block device name carefully */
struct md_rdev *rdev; struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev;
rcu_read_lock();
rdev = rcu_dereference(conf->mirrors[r1_bio->read_disk].rdev);
if (rdev) if (rdev)
snprintf(b, sizeof(b), "%pg", rdev->bdev); snprintf(b, sizeof(b), "%pg", rdev->bdev);
else else
strcpy(b, "???"); strcpy(b, "???");
rcu_read_unlock();
} }
/* /*
...@@ -1396,10 +1392,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, ...@@ -1396,10 +1392,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
disks = conf->raid_disks * 2; disks = conf->raid_disks * 2;
blocked_rdev = NULL; blocked_rdev = NULL;
rcu_read_lock();
max_sectors = r1_bio->sectors; max_sectors = r1_bio->sectors;
for (i = 0; i < disks; i++) { for (i = 0; i < disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); struct md_rdev *rdev = conf->mirrors[i].rdev;
/* /*
* The write-behind io is only attempted on drives marked as * The write-behind io is only attempted on drives marked as
...@@ -1465,7 +1460,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, ...@@ -1465,7 +1460,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
} }
r1_bio->bios[i] = bio; r1_bio->bios[i] = bio;
} }
rcu_read_unlock();
if (unlikely(blocked_rdev)) { if (unlikely(blocked_rdev)) {
/* Wait for this device to become unblocked */ /* Wait for this device to become unblocked */
...@@ -1617,15 +1611,16 @@ static void raid1_status(struct seq_file *seq, struct mddev *mddev) ...@@ -1617,15 +1611,16 @@ static void raid1_status(struct seq_file *seq, struct mddev *mddev)
struct r1conf *conf = mddev->private; struct r1conf *conf = mddev->private;
int i; int i;
lockdep_assert_held(&mddev->lock);
seq_printf(seq, " [%d/%d] [", conf->raid_disks, seq_printf(seq, " [%d/%d] [", conf->raid_disks,
conf->raid_disks - mddev->degraded); conf->raid_disks - mddev->degraded);
rcu_read_lock();
for (i = 0; i < conf->raid_disks; i++) { for (i = 0; i < conf->raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); struct md_rdev *rdev = READ_ONCE(conf->mirrors[i].rdev);
seq_printf(seq, "%s", seq_printf(seq, "%s",
rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_"); rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
} }
rcu_read_unlock();
seq_printf(seq, "]"); seq_printf(seq, "]");
} }
...@@ -1691,16 +1686,15 @@ static void print_conf(struct r1conf *conf) ...@@ -1691,16 +1686,15 @@ static void print_conf(struct r1conf *conf)
pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded, pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
conf->raid_disks); conf->raid_disks);
rcu_read_lock(); lockdep_assert_held(&conf->mddev->reconfig_mutex);
for (i = 0; i < conf->raid_disks; i++) { for (i = 0; i < conf->raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); struct md_rdev *rdev = conf->mirrors[i].rdev;
if (rdev) if (rdev)
pr_debug(" disk %d, wo:%d, o:%d, dev:%pg\n", pr_debug(" disk %d, wo:%d, o:%d, dev:%pg\n",
i, !test_bit(In_sync, &rdev->flags), i, !test_bit(In_sync, &rdev->flags),
!test_bit(Faulty, &rdev->flags), !test_bit(Faulty, &rdev->flags),
rdev->bdev); rdev->bdev);
} }
rcu_read_unlock();
} }
static void close_sync(struct r1conf *conf) static void close_sync(struct r1conf *conf)
...@@ -1810,7 +1804,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1810,7 +1804,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
*/ */
if (rdev->saved_raid_disk < 0) if (rdev->saved_raid_disk < 0)
conf->fullsync = 1; conf->fullsync = 1;
rcu_assign_pointer(p->rdev, rdev); WRITE_ONCE(p->rdev, rdev);
break; break;
} }
if (test_bit(WantReplacement, &p->rdev->flags) && if (test_bit(WantReplacement, &p->rdev->flags) &&
...@@ -1826,7 +1820,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1826,7 +1820,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
rdev->raid_disk = repl_slot; rdev->raid_disk = repl_slot;
err = 0; err = 0;
conf->fullsync = 1; conf->fullsync = 1;
rcu_assign_pointer(p[conf->raid_disks].rdev, rdev); WRITE_ONCE(p[conf->raid_disks].rdev, rdev);
} }
print_conf(conf); print_conf(conf);
...@@ -1862,16 +1856,7 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1862,16 +1856,7 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
err = -EBUSY; err = -EBUSY;
goto abort; goto abort;
} }
p->rdev = NULL; WRITE_ONCE(p->rdev, NULL);
if (!test_bit(RemoveSynchronized, &rdev->flags)) {
synchronize_rcu();
if (atomic_read(&rdev->nr_pending)) {
/* lost the race, try later */
err = -EBUSY;
p->rdev = rdev;
goto abort;
}
}
if (conf->mirrors[conf->raid_disks + number].rdev) { if (conf->mirrors[conf->raid_disks + number].rdev) {
/* We just removed a device that is being replaced. /* We just removed a device that is being replaced.
* Move down the replacement. We drain all IO before * Move down the replacement. We drain all IO before
...@@ -1892,7 +1877,7 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1892,7 +1877,7 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
goto abort; goto abort;
} }
clear_bit(Replacement, &repl->flags); clear_bit(Replacement, &repl->flags);
p->rdev = repl; WRITE_ONCE(p->rdev, repl);
conf->mirrors[conf->raid_disks + number].rdev = NULL; conf->mirrors[conf->raid_disks + number].rdev = NULL;
unfreeze_array(conf); unfreeze_array(conf);
} }
...@@ -2290,8 +2275,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk, ...@@ -2290,8 +2275,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
sector_t first_bad; sector_t first_bad;
int bad_sectors; int bad_sectors;
rcu_read_lock(); rdev = conf->mirrors[d].rdev;
rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev && if (rdev &&
(test_bit(In_sync, &rdev->flags) || (test_bit(In_sync, &rdev->flags) ||
(!test_bit(Faulty, &rdev->flags) && (!test_bit(Faulty, &rdev->flags) &&
...@@ -2299,15 +2283,14 @@ static void fix_read_error(struct r1conf *conf, int read_disk, ...@@ -2299,15 +2283,14 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
is_badblock(rdev, sect, s, is_badblock(rdev, sect, s,
&first_bad, &bad_sectors) == 0) { &first_bad, &bad_sectors) == 0) {
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
if (sync_page_io(rdev, sect, s<<9, if (sync_page_io(rdev, sect, s<<9,
conf->tmppage, REQ_OP_READ, false)) conf->tmppage, REQ_OP_READ, false))
success = 1; success = 1;
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
if (success) if (success)
break; break;
} else }
rcu_read_unlock();
d++; d++;
if (d == conf->raid_disks * 2) if (d == conf->raid_disks * 2)
d = 0; d = 0;
...@@ -2326,29 +2309,24 @@ static void fix_read_error(struct r1conf *conf, int read_disk, ...@@ -2326,29 +2309,24 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
if (d==0) if (d==0)
d = conf->raid_disks * 2; d = conf->raid_disks * 2;
d--; d--;
rcu_read_lock(); rdev = conf->mirrors[d].rdev;
rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev && if (rdev &&
!test_bit(Faulty, &rdev->flags)) { !test_bit(Faulty, &rdev->flags)) {
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
r1_sync_page_io(rdev, sect, s, r1_sync_page_io(rdev, sect, s,
conf->tmppage, WRITE); conf->tmppage, WRITE);
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
} else }
rcu_read_unlock();
} }
d = start; d = start;
while (d != read_disk) { while (d != read_disk) {
if (d==0) if (d==0)
d = conf->raid_disks * 2; d = conf->raid_disks * 2;
d--; d--;
rcu_read_lock(); rdev = conf->mirrors[d].rdev;
rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev && if (rdev &&
!test_bit(Faulty, &rdev->flags)) { !test_bit(Faulty, &rdev->flags)) {
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
if (r1_sync_page_io(rdev, sect, s, if (r1_sync_page_io(rdev, sect, s,
conf->tmppage, READ)) { conf->tmppage, READ)) {
atomic_add(s, &rdev->corrected_errors); atomic_add(s, &rdev->corrected_errors);
...@@ -2359,8 +2337,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk, ...@@ -2359,8 +2337,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
rdev->bdev); rdev->bdev);
} }
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
} else }
rcu_read_unlock();
} }
sectors -= s; sectors -= s;
sect += s; sect += s;
...@@ -2741,7 +2718,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -2741,7 +2718,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
r1_bio = raid1_alloc_init_r1buf(conf); r1_bio = raid1_alloc_init_r1buf(conf);
rcu_read_lock();
/* /*
* If we get a correctably read error during resync or recovery, * If we get a correctably read error during resync or recovery,
* we might want to read from a different device. So we * we might want to read from a different device. So we
...@@ -2762,7 +2738,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -2762,7 +2738,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
struct md_rdev *rdev; struct md_rdev *rdev;
bio = r1_bio->bios[i]; bio = r1_bio->bios[i];
rdev = rcu_dereference(conf->mirrors[i].rdev); rdev = conf->mirrors[i].rdev;
if (rdev == NULL || if (rdev == NULL ||
test_bit(Faulty, &rdev->flags)) { test_bit(Faulty, &rdev->flags)) {
if (i < conf->raid_disks) if (i < conf->raid_disks)
...@@ -2820,7 +2796,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -2820,7 +2796,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
bio->bi_opf |= MD_FAILFAST; bio->bi_opf |= MD_FAILFAST;
} }
} }
rcu_read_unlock();
if (disk < 0) if (disk < 0)
disk = wonly; disk = wonly;
r1_bio->read_disk = disk; r1_bio->read_disk = disk;
......
...@@ -743,7 +743,6 @@ static struct md_rdev *read_balance(struct r10conf *conf, ...@@ -743,7 +743,6 @@ static struct md_rdev *read_balance(struct r10conf *conf,
struct geom *geo = &conf->geo; struct geom *geo = &conf->geo;
raid10_find_phys(conf, r10_bio); raid10_find_phys(conf, r10_bio);
rcu_read_lock();
best_dist_slot = -1; best_dist_slot = -1;
min_pending = UINT_MAX; min_pending = UINT_MAX;
best_dist_rdev = NULL; best_dist_rdev = NULL;
...@@ -775,18 +774,11 @@ static struct md_rdev *read_balance(struct r10conf *conf, ...@@ -775,18 +774,11 @@ static struct md_rdev *read_balance(struct r10conf *conf,
if (r10_bio->devs[slot].bio == IO_BLOCKED) if (r10_bio->devs[slot].bio == IO_BLOCKED)
continue; continue;
disk = r10_bio->devs[slot].devnum; disk = r10_bio->devs[slot].devnum;
rdev = rcu_dereference(conf->mirrors[disk].replacement); rdev = conf->mirrors[disk].replacement;
if (rdev == NULL || test_bit(Faulty, &rdev->flags) || if (rdev == NULL || test_bit(Faulty, &rdev->flags) ||
r10_bio->devs[slot].addr + sectors > r10_bio->devs[slot].addr + sectors >
rdev->recovery_offset) { rdev->recovery_offset)
/* rdev = conf->mirrors[disk].rdev;
* Read replacement first to prevent reading both rdev
* and replacement as NULL during replacement replace
* rdev.
*/
smp_mb();
rdev = rcu_dereference(conf->mirrors[disk].rdev);
}
if (rdev == NULL || if (rdev == NULL ||
test_bit(Faulty, &rdev->flags)) test_bit(Faulty, &rdev->flags))
continue; continue;
...@@ -876,7 +868,6 @@ static struct md_rdev *read_balance(struct r10conf *conf, ...@@ -876,7 +868,6 @@ static struct md_rdev *read_balance(struct r10conf *conf,
r10_bio->read_slot = slot; r10_bio->read_slot = slot;
} else } else
rdev = NULL; rdev = NULL;
rcu_read_unlock();
*max_sectors = best_good_sectors; *max_sectors = best_good_sectors;
return rdev; return rdev;
...@@ -1198,9 +1189,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, ...@@ -1198,9 +1189,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
*/ */
gfp = GFP_NOIO | __GFP_HIGH; gfp = GFP_NOIO | __GFP_HIGH;
rcu_read_lock();
disk = r10_bio->devs[slot].devnum; disk = r10_bio->devs[slot].devnum;
err_rdev = rcu_dereference(conf->mirrors[disk].rdev); err_rdev = conf->mirrors[disk].rdev;
if (err_rdev) if (err_rdev)
snprintf(b, sizeof(b), "%pg", err_rdev->bdev); snprintf(b, sizeof(b), "%pg", err_rdev->bdev);
else { else {
...@@ -1208,7 +1198,6 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, ...@@ -1208,7 +1198,6 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
/* This never gets dereferenced */ /* This never gets dereferenced */
err_rdev = r10_bio->devs[slot].rdev; err_rdev = r10_bio->devs[slot].rdev;
} }
rcu_read_unlock();
} }
if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors))
...@@ -1279,15 +1268,8 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, ...@@ -1279,15 +1268,8 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
int devnum = r10_bio->devs[n_copy].devnum; int devnum = r10_bio->devs[n_copy].devnum;
struct bio *mbio; struct bio *mbio;
if (replacement) { rdev = replacement ? conf->mirrors[devnum].replacement :
rdev = conf->mirrors[devnum].replacement; conf->mirrors[devnum].rdev;
if (rdev == NULL) {
/* Replacement just got moved to main 'rdev' */
smp_mb();
rdev = conf->mirrors[devnum].rdev;
}
} else
rdev = conf->mirrors[devnum].rdev;
mbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO, &mddev->bio_set); mbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO, &mddev->bio_set);
if (replacement) if (replacement)
...@@ -1321,25 +1303,6 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, ...@@ -1321,25 +1303,6 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
} }
} }
static struct md_rdev *dereference_rdev_and_rrdev(struct raid10_info *mirror,
struct md_rdev **prrdev)
{
struct md_rdev *rdev, *rrdev;
rrdev = rcu_dereference(mirror->replacement);
/*
* Read replacement first to prevent reading both rdev and
* replacement as NULL during replacement replace rdev.
*/
smp_mb();
rdev = rcu_dereference(mirror->rdev);
if (rdev == rrdev)
rrdev = NULL;
*prrdev = rrdev;
return rdev;
}
static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio) static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
{ {
int i; int i;
...@@ -1348,11 +1311,11 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio) ...@@ -1348,11 +1311,11 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
retry_wait: retry_wait:
blocked_rdev = NULL; blocked_rdev = NULL;
rcu_read_lock();
for (i = 0; i < conf->copies; i++) { for (i = 0; i < conf->copies; i++) {
struct md_rdev *rdev, *rrdev; struct md_rdev *rdev, *rrdev;
rdev = dereference_rdev_and_rrdev(&conf->mirrors[i], &rrdev); rdev = conf->mirrors[i].rdev;
rrdev = conf->mirrors[i].replacement;
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
blocked_rdev = rdev; blocked_rdev = rdev;
...@@ -1391,7 +1354,6 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio) ...@@ -1391,7 +1354,6 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
} }
} }
} }
rcu_read_unlock();
if (unlikely(blocked_rdev)) { if (unlikely(blocked_rdev)) {
/* Have to wait for this device to get unblocked, then retry */ /* Have to wait for this device to get unblocked, then retry */
...@@ -1474,14 +1436,14 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, ...@@ -1474,14 +1436,14 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
wait_blocked_dev(mddev, r10_bio); wait_blocked_dev(mddev, r10_bio);
rcu_read_lock();
max_sectors = r10_bio->sectors; max_sectors = r10_bio->sectors;
for (i = 0; i < conf->copies; i++) { for (i = 0; i < conf->copies; i++) {
int d = r10_bio->devs[i].devnum; int d = r10_bio->devs[i].devnum;
struct md_rdev *rdev, *rrdev; struct md_rdev *rdev, *rrdev;
rdev = dereference_rdev_and_rrdev(&conf->mirrors[d], &rrdev); rdev = conf->mirrors[d].rdev;
rrdev = conf->mirrors[d].replacement;
if (rdev && (test_bit(Faulty, &rdev->flags))) if (rdev && (test_bit(Faulty, &rdev->flags)))
rdev = NULL; rdev = NULL;
if (rrdev && (test_bit(Faulty, &rrdev->flags))) if (rrdev && (test_bit(Faulty, &rrdev->flags)))
...@@ -1535,7 +1497,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, ...@@ -1535,7 +1497,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
atomic_inc(&rrdev->nr_pending); atomic_inc(&rrdev->nr_pending);
} }
} }
rcu_read_unlock();
if (max_sectors < r10_bio->sectors) if (max_sectors < r10_bio->sectors)
r10_bio->sectors = max_sectors; r10_bio->sectors = max_sectors;
...@@ -1625,17 +1586,8 @@ static void raid10_end_discard_request(struct bio *bio) ...@@ -1625,17 +1586,8 @@ static void raid10_end_discard_request(struct bio *bio)
set_bit(R10BIO_Uptodate, &r10_bio->state); set_bit(R10BIO_Uptodate, &r10_bio->state);
dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl); dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
if (repl) rdev = repl ? conf->mirrors[dev].replacement :
rdev = conf->mirrors[dev].replacement; conf->mirrors[dev].rdev;
if (!rdev) {
/*
* raid10_remove_disk uses smp_mb to make sure rdev is set to
* replacement before setting replacement to NULL. It can read
* rdev first without barrier protect even replacement is NULL
*/
smp_rmb();
rdev = conf->mirrors[dev].rdev;
}
raid_end_discard_bio(r10_bio); raid_end_discard_bio(r10_bio);
rdev_dec_pending(rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev);
...@@ -1785,11 +1737,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio) ...@@ -1785,11 +1737,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
* inc refcount on their rdev. Record them by setting * inc refcount on their rdev. Record them by setting
* bios[x] to bio * bios[x] to bio
*/ */
rcu_read_lock();
for (disk = 0; disk < geo->raid_disks; disk++) { for (disk = 0; disk < geo->raid_disks; disk++) {
struct md_rdev *rdev, *rrdev; struct md_rdev *rdev, *rrdev;
rdev = dereference_rdev_and_rrdev(&conf->mirrors[disk], &rrdev); rdev = conf->mirrors[disk].rdev;
rrdev = conf->mirrors[disk].replacement;
r10_bio->devs[disk].bio = NULL; r10_bio->devs[disk].bio = NULL;
r10_bio->devs[disk].repl_bio = NULL; r10_bio->devs[disk].repl_bio = NULL;
...@@ -1809,7 +1761,6 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio) ...@@ -1809,7 +1761,6 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
atomic_inc(&rrdev->nr_pending); atomic_inc(&rrdev->nr_pending);
} }
} }
rcu_read_unlock();
atomic_set(&r10_bio->remaining, 1); atomic_set(&r10_bio->remaining, 1);
for (disk = 0; disk < geo->raid_disks; disk++) { for (disk = 0; disk < geo->raid_disks; disk++) {
...@@ -1939,6 +1890,8 @@ static void raid10_status(struct seq_file *seq, struct mddev *mddev) ...@@ -1939,6 +1890,8 @@ static void raid10_status(struct seq_file *seq, struct mddev *mddev)
struct r10conf *conf = mddev->private; struct r10conf *conf = mddev->private;
int i; int i;
lockdep_assert_held(&mddev->lock);
if (conf->geo.near_copies < conf->geo.raid_disks) if (conf->geo.near_copies < conf->geo.raid_disks)
seq_printf(seq, " %dK chunks", mddev->chunk_sectors / 2); seq_printf(seq, " %dK chunks", mddev->chunk_sectors / 2);
if (conf->geo.near_copies > 1) if (conf->geo.near_copies > 1)
...@@ -1953,12 +1906,11 @@ static void raid10_status(struct seq_file *seq, struct mddev *mddev) ...@@ -1953,12 +1906,11 @@ static void raid10_status(struct seq_file *seq, struct mddev *mddev)
} }
seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks, seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks,
conf->geo.raid_disks - mddev->degraded); conf->geo.raid_disks - mddev->degraded);
rcu_read_lock();
for (i = 0; i < conf->geo.raid_disks; i++) { for (i = 0; i < conf->geo.raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); struct md_rdev *rdev = READ_ONCE(conf->mirrors[i].rdev);
seq_printf(seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_"); seq_printf(seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
} }
rcu_read_unlock();
seq_printf(seq, "]"); seq_printf(seq, "]");
} }
...@@ -1980,7 +1932,6 @@ static int _enough(struct r10conf *conf, int previous, int ignore) ...@@ -1980,7 +1932,6 @@ static int _enough(struct r10conf *conf, int previous, int ignore)
ncopies = conf->geo.near_copies; ncopies = conf->geo.near_copies;
} }
rcu_read_lock();
do { do {
int n = conf->copies; int n = conf->copies;
int cnt = 0; int cnt = 0;
...@@ -1988,7 +1939,7 @@ static int _enough(struct r10conf *conf, int previous, int ignore) ...@@ -1988,7 +1939,7 @@ static int _enough(struct r10conf *conf, int previous, int ignore)
while (n--) { while (n--) {
struct md_rdev *rdev; struct md_rdev *rdev;
if (this != ignore && if (this != ignore &&
(rdev = rcu_dereference(conf->mirrors[this].rdev)) && (rdev = conf->mirrors[this].rdev) &&
test_bit(In_sync, &rdev->flags)) test_bit(In_sync, &rdev->flags))
cnt++; cnt++;
this = (this+1) % disks; this = (this+1) % disks;
...@@ -1999,7 +1950,6 @@ static int _enough(struct r10conf *conf, int previous, int ignore) ...@@ -1999,7 +1950,6 @@ static int _enough(struct r10conf *conf, int previous, int ignore)
} while (first != 0); } while (first != 0);
has_enough = 1; has_enough = 1;
out: out:
rcu_read_unlock();
return has_enough; return has_enough;
} }
...@@ -2072,8 +2022,7 @@ static void print_conf(struct r10conf *conf) ...@@ -2072,8 +2022,7 @@ static void print_conf(struct r10conf *conf)
pr_debug(" --- wd:%d rd:%d\n", conf->geo.raid_disks - conf->mddev->degraded, pr_debug(" --- wd:%d rd:%d\n", conf->geo.raid_disks - conf->mddev->degraded,
conf->geo.raid_disks); conf->geo.raid_disks);
/* This is only called with ->reconfix_mutex held, so lockdep_assert_held(&conf->mddev->reconfig_mutex);
* rcu protection of rdev is not needed */
for (i = 0; i < conf->geo.raid_disks; i++) { for (i = 0; i < conf->geo.raid_disks; i++) {
rdev = conf->mirrors[i].rdev; rdev = conf->mirrors[i].rdev;
if (rdev) if (rdev)
...@@ -2190,7 +2139,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -2190,7 +2139,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
err = 0; err = 0;
if (rdev->saved_raid_disk != mirror) if (rdev->saved_raid_disk != mirror)
conf->fullsync = 1; conf->fullsync = 1;
rcu_assign_pointer(p->rdev, rdev); WRITE_ONCE(p->rdev, rdev);
break; break;
} }
...@@ -2204,7 +2153,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -2204,7 +2153,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
disk_stack_limits(mddev->gendisk, rdev->bdev, disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9); rdev->data_offset << 9);
conf->fullsync = 1; conf->fullsync = 1;
rcu_assign_pointer(p->replacement, rdev); WRITE_ONCE(p->replacement, rdev);
} }
print_conf(conf); print_conf(conf);
...@@ -2246,24 +2195,12 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -2246,24 +2195,12 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
err = -EBUSY; err = -EBUSY;
goto abort; goto abort;
} }
*rdevp = NULL; WRITE_ONCE(*rdevp, NULL);
if (!test_bit(RemoveSynchronized, &rdev->flags)) {
synchronize_rcu();
if (atomic_read(&rdev->nr_pending)) {
/* lost the race, try later */
err = -EBUSY;
*rdevp = rdev;
goto abort;
}
}
if (p->replacement) { if (p->replacement) {
/* We must have just cleared 'rdev' */ /* We must have just cleared 'rdev' */
p->rdev = p->replacement; WRITE_ONCE(p->rdev, p->replacement);
clear_bit(Replacement, &p->replacement->flags); clear_bit(Replacement, &p->replacement->flags);
smp_mb(); /* Make sure other CPUs may see both as identical WRITE_ONCE(p->replacement, NULL);
* but will never see neither -- if they are careful.
*/
p->replacement = NULL;
} }
clear_bit(WantReplacement, &rdev->flags); clear_bit(WantReplacement, &rdev->flags);
...@@ -2763,20 +2700,18 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2763,20 +2700,18 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
if (s > (PAGE_SIZE>>9)) if (s > (PAGE_SIZE>>9))
s = PAGE_SIZE >> 9; s = PAGE_SIZE >> 9;
rcu_read_lock();
do { do {
sector_t first_bad; sector_t first_bad;
int bad_sectors; int bad_sectors;
d = r10_bio->devs[sl].devnum; d = r10_bio->devs[sl].devnum;
rdev = rcu_dereference(conf->mirrors[d].rdev); rdev = conf->mirrors[d].rdev;
if (rdev && if (rdev &&
test_bit(In_sync, &rdev->flags) && test_bit(In_sync, &rdev->flags) &&
!test_bit(Faulty, &rdev->flags) && !test_bit(Faulty, &rdev->flags) &&
is_badblock(rdev, r10_bio->devs[sl].addr + sect, s, is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
&first_bad, &bad_sectors) == 0) { &first_bad, &bad_sectors) == 0) {
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
success = sync_page_io(rdev, success = sync_page_io(rdev,
r10_bio->devs[sl].addr + r10_bio->devs[sl].addr +
sect, sect,
...@@ -2784,7 +2719,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2784,7 +2719,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
conf->tmppage, conf->tmppage,
REQ_OP_READ, false); REQ_OP_READ, false);
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
rcu_read_lock();
if (success) if (success)
break; break;
} }
...@@ -2792,7 +2726,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2792,7 +2726,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
if (sl == conf->copies) if (sl == conf->copies)
sl = 0; sl = 0;
} while (sl != slot); } while (sl != slot);
rcu_read_unlock();
if (!success) { if (!success) {
/* Cannot read from anywhere, just mark the block /* Cannot read from anywhere, just mark the block
...@@ -2816,20 +2749,18 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2816,20 +2749,18 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
start = sl; start = sl;
/* write it back and re-read */ /* write it back and re-read */
rcu_read_lock();
while (sl != slot) { while (sl != slot) {
if (sl==0) if (sl==0)
sl = conf->copies; sl = conf->copies;
sl--; sl--;
d = r10_bio->devs[sl].devnum; d = r10_bio->devs[sl].devnum;
rdev = rcu_dereference(conf->mirrors[d].rdev); rdev = conf->mirrors[d].rdev;
if (!rdev || if (!rdev ||
test_bit(Faulty, &rdev->flags) || test_bit(Faulty, &rdev->flags) ||
!test_bit(In_sync, &rdev->flags)) !test_bit(In_sync, &rdev->flags))
continue; continue;
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
if (r10_sync_page_io(rdev, if (r10_sync_page_io(rdev,
r10_bio->devs[sl].addr + r10_bio->devs[sl].addr +
sect, sect,
...@@ -2848,7 +2779,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2848,7 +2779,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
rdev->bdev); rdev->bdev);
} }
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
rcu_read_lock();
} }
sl = start; sl = start;
while (sl != slot) { while (sl != slot) {
...@@ -2856,14 +2786,13 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2856,14 +2786,13 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
sl = conf->copies; sl = conf->copies;
sl--; sl--;
d = r10_bio->devs[sl].devnum; d = r10_bio->devs[sl].devnum;
rdev = rcu_dereference(conf->mirrors[d].rdev); rdev = conf->mirrors[d].rdev;
if (!rdev || if (!rdev ||
test_bit(Faulty, &rdev->flags) || test_bit(Faulty, &rdev->flags) ||
!test_bit(In_sync, &rdev->flags)) !test_bit(In_sync, &rdev->flags))
continue; continue;
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
switch (r10_sync_page_io(rdev, switch (r10_sync_page_io(rdev,
r10_bio->devs[sl].addr + r10_bio->devs[sl].addr +
sect, sect,
...@@ -2891,9 +2820,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2891,9 +2820,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
} }
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
rcu_read_lock();
} }
rcu_read_unlock();
sectors -= s; sectors -= s;
sect += s; sect += s;
...@@ -3367,14 +3294,13 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3367,14 +3294,13 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
/* Completed a full sync so the replacements /* Completed a full sync so the replacements
* are now fully recovered. * are now fully recovered.
*/ */
rcu_read_lock();
for (i = 0; i < conf->geo.raid_disks; i++) { for (i = 0; i < conf->geo.raid_disks; i++) {
struct md_rdev *rdev = struct md_rdev *rdev =
rcu_dereference(conf->mirrors[i].replacement); conf->mirrors[i].replacement;
if (rdev) if (rdev)
rdev->recovery_offset = MaxSector; rdev->recovery_offset = MaxSector;
} }
rcu_read_unlock();
} }
conf->fullsync = 0; conf->fullsync = 0;
} }
...@@ -3455,9 +3381,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3455,9 +3381,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
struct raid10_info *mirror = &conf->mirrors[i]; struct raid10_info *mirror = &conf->mirrors[i];
struct md_rdev *mrdev, *mreplace; struct md_rdev *mrdev, *mreplace;
rcu_read_lock(); mrdev = mirror->rdev;
mrdev = rcu_dereference(mirror->rdev); mreplace = mirror->replacement;
mreplace = rcu_dereference(mirror->replacement);
if (mrdev && (test_bit(Faulty, &mrdev->flags) || if (mrdev && (test_bit(Faulty, &mrdev->flags) ||
test_bit(In_sync, &mrdev->flags))) test_bit(In_sync, &mrdev->flags)))
...@@ -3465,22 +3390,18 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3465,22 +3390,18 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
if (mreplace && test_bit(Faulty, &mreplace->flags)) if (mreplace && test_bit(Faulty, &mreplace->flags))
mreplace = NULL; mreplace = NULL;
if (!mrdev && !mreplace) { if (!mrdev && !mreplace)
rcu_read_unlock();
continue; continue;
}
still_degraded = 0; still_degraded = 0;
/* want to reconstruct this device */ /* want to reconstruct this device */
rb2 = r10_bio; rb2 = r10_bio;
sect = raid10_find_virt(conf, sector_nr, i); sect = raid10_find_virt(conf, sector_nr, i);
if (sect >= mddev->resync_max_sectors) { if (sect >= mddev->resync_max_sectors)
/* last stripe is not complete - don't /* last stripe is not complete - don't
* try to recover this sector. * try to recover this sector.
*/ */
rcu_read_unlock();
continue; continue;
}
/* Unless we are doing a full sync, or a replacement /* Unless we are doing a full sync, or a replacement
* we only need to recover the block if it is set in * we only need to recover the block if it is set in
* the bitmap * the bitmap
...@@ -3496,14 +3417,12 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3496,14 +3417,12 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
* that there will never be anything to do here * that there will never be anything to do here
*/ */
chunks_skipped = -1; chunks_skipped = -1;
rcu_read_unlock();
continue; continue;
} }
if (mrdev) if (mrdev)
atomic_inc(&mrdev->nr_pending); atomic_inc(&mrdev->nr_pending);
if (mreplace) if (mreplace)
atomic_inc(&mreplace->nr_pending); atomic_inc(&mreplace->nr_pending);
rcu_read_unlock();
r10_bio = raid10_alloc_init_r10buf(conf); r10_bio = raid10_alloc_init_r10buf(conf);
r10_bio->state = 0; r10_bio->state = 0;
...@@ -3522,10 +3441,9 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3522,10 +3441,9 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
/* Need to check if the array will still be /* Need to check if the array will still be
* degraded * degraded
*/ */
rcu_read_lock();
for (j = 0; j < conf->geo.raid_disks; j++) { for (j = 0; j < conf->geo.raid_disks; j++) {
struct md_rdev *rdev = rcu_dereference( struct md_rdev *rdev = conf->mirrors[j].rdev;
conf->mirrors[j].rdev);
if (rdev == NULL || test_bit(Faulty, &rdev->flags)) { if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
still_degraded = 1; still_degraded = 1;
break; break;
...@@ -3540,8 +3458,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3540,8 +3458,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
int k; int k;
int d = r10_bio->devs[j].devnum; int d = r10_bio->devs[j].devnum;
sector_t from_addr, to_addr; sector_t from_addr, to_addr;
struct md_rdev *rdev = struct md_rdev *rdev = conf->mirrors[d].rdev;
rcu_dereference(conf->mirrors[d].rdev);
sector_t sector, first_bad; sector_t sector, first_bad;
int bad_sectors; int bad_sectors;
if (!rdev || if (!rdev ||
...@@ -3620,7 +3537,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3620,7 +3537,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
atomic_inc(&r10_bio->remaining); atomic_inc(&r10_bio->remaining);
break; break;
} }
rcu_read_unlock();
if (j == conf->copies) { if (j == conf->copies) {
/* Cannot recover, so abort the recovery or /* Cannot recover, so abort the recovery or
* record a bad block */ * record a bad block */
...@@ -3747,12 +3663,10 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3747,12 +3663,10 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
bio = r10_bio->devs[i].bio; bio = r10_bio->devs[i].bio;
bio->bi_status = BLK_STS_IOERR; bio->bi_status = BLK_STS_IOERR;
rcu_read_lock(); rdev = conf->mirrors[d].rdev;
rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev == NULL || test_bit(Faulty, &rdev->flags))
if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
rcu_read_unlock();
continue; continue;
}
sector = r10_bio->devs[i].addr; sector = r10_bio->devs[i].addr;
if (is_badblock(rdev, sector, max_sync, if (is_badblock(rdev, sector, max_sync,
&first_bad, &bad_sectors)) { &first_bad, &bad_sectors)) {
...@@ -3762,7 +3676,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3762,7 +3676,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
bad_sectors -= (sector - first_bad); bad_sectors -= (sector - first_bad);
if (max_sync > bad_sectors) if (max_sync > bad_sectors)
max_sync = bad_sectors; max_sync = bad_sectors;
rcu_read_unlock();
continue; continue;
} }
} }
...@@ -3778,11 +3691,10 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3778,11 +3691,10 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
bio_set_dev(bio, rdev->bdev); bio_set_dev(bio, rdev->bdev);
count++; count++;
rdev = rcu_dereference(conf->mirrors[d].replacement); rdev = conf->mirrors[d].replacement;
if (rdev == NULL || test_bit(Faulty, &rdev->flags)) { if (rdev == NULL || test_bit(Faulty, &rdev->flags))
rcu_read_unlock();
continue; continue;
}
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
/* Need to set up for writing to the replacement */ /* Need to set up for writing to the replacement */
...@@ -3799,7 +3711,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3799,7 +3711,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
bio->bi_iter.bi_sector = sector + rdev->data_offset; bio->bi_iter.bi_sector = sector + rdev->data_offset;
bio_set_dev(bio, rdev->bdev); bio_set_dev(bio, rdev->bdev);
count++; count++;
rcu_read_unlock();
} }
if (count < 2) { if (count < 2) {
...@@ -4509,11 +4420,11 @@ static int calc_degraded(struct r10conf *conf) ...@@ -4509,11 +4420,11 @@ static int calc_degraded(struct r10conf *conf)
int degraded, degraded2; int degraded, degraded2;
int i; int i;
rcu_read_lock();
degraded = 0; degraded = 0;
/* 'prev' section first */ /* 'prev' section first */
for (i = 0; i < conf->prev.raid_disks; i++) { for (i = 0; i < conf->prev.raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); struct md_rdev *rdev = conf->mirrors[i].rdev;
if (!rdev || test_bit(Faulty, &rdev->flags)) if (!rdev || test_bit(Faulty, &rdev->flags))
degraded++; degraded++;
else if (!test_bit(In_sync, &rdev->flags)) else if (!test_bit(In_sync, &rdev->flags))
...@@ -4523,13 +4434,12 @@ static int calc_degraded(struct r10conf *conf) ...@@ -4523,13 +4434,12 @@ static int calc_degraded(struct r10conf *conf)
*/ */
degraded++; degraded++;
} }
rcu_read_unlock();
if (conf->geo.raid_disks == conf->prev.raid_disks) if (conf->geo.raid_disks == conf->prev.raid_disks)
return degraded; return degraded;
rcu_read_lock();
degraded2 = 0; degraded2 = 0;
for (i = 0; i < conf->geo.raid_disks; i++) { for (i = 0; i < conf->geo.raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); struct md_rdev *rdev = conf->mirrors[i].rdev;
if (!rdev || test_bit(Faulty, &rdev->flags)) if (!rdev || test_bit(Faulty, &rdev->flags))
degraded2++; degraded2++;
else if (!test_bit(In_sync, &rdev->flags)) { else if (!test_bit(In_sync, &rdev->flags)) {
...@@ -4542,7 +4452,6 @@ static int calc_degraded(struct r10conf *conf) ...@@ -4542,7 +4452,6 @@ static int calc_degraded(struct r10conf *conf)
degraded2++; degraded2++;
} }
} }
rcu_read_unlock();
if (degraded2 > degraded) if (degraded2 > degraded)
return degraded2; return degraded2;
return degraded; return degraded;
...@@ -4974,16 +4883,15 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, ...@@ -4974,16 +4883,15 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
blist = read_bio; blist = read_bio;
read_bio->bi_next = NULL; read_bio->bi_next = NULL;
rcu_read_lock();
for (s = 0; s < conf->copies*2; s++) { for (s = 0; s < conf->copies*2; s++) {
struct bio *b; struct bio *b;
int d = r10_bio->devs[s/2].devnum; int d = r10_bio->devs[s/2].devnum;
struct md_rdev *rdev2; struct md_rdev *rdev2;
if (s&1) { if (s&1) {
rdev2 = rcu_dereference(conf->mirrors[d].replacement); rdev2 = conf->mirrors[d].replacement;
b = r10_bio->devs[s/2].repl_bio; b = r10_bio->devs[s/2].repl_bio;
} else { } else {
rdev2 = rcu_dereference(conf->mirrors[d].rdev); rdev2 = conf->mirrors[d].rdev;
b = r10_bio->devs[s/2].bio; b = r10_bio->devs[s/2].bio;
} }
if (!rdev2 || test_bit(Faulty, &rdev2->flags)) if (!rdev2 || test_bit(Faulty, &rdev2->flags))
...@@ -5017,7 +4925,6 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, ...@@ -5017,7 +4925,6 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
sector_nr += len >> 9; sector_nr += len >> 9;
nr_sectors += len >> 9; nr_sectors += len >> 9;
} }
rcu_read_unlock();
r10_bio->sectors = nr_sectors; r10_bio->sectors = nr_sectors;
/* Now submit the read */ /* Now submit the read */
...@@ -5070,20 +4977,17 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio) ...@@ -5070,20 +4977,17 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio)
struct bio *b; struct bio *b;
int d = r10_bio->devs[s/2].devnum; int d = r10_bio->devs[s/2].devnum;
struct md_rdev *rdev; struct md_rdev *rdev;
rcu_read_lock();
if (s&1) { if (s&1) {
rdev = rcu_dereference(conf->mirrors[d].replacement); rdev = conf->mirrors[d].replacement;
b = r10_bio->devs[s/2].repl_bio; b = r10_bio->devs[s/2].repl_bio;
} else { } else {
rdev = rcu_dereference(conf->mirrors[d].rdev); rdev = conf->mirrors[d].rdev;
b = r10_bio->devs[s/2].bio; b = r10_bio->devs[s/2].bio;
} }
if (!rdev || test_bit(Faulty, &rdev->flags)) { if (!rdev || test_bit(Faulty, &rdev->flags))
rcu_read_unlock();
continue; continue;
}
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
md_sync_acct_bio(b, r10_bio->sectors); md_sync_acct_bio(b, r10_bio->sectors);
atomic_inc(&r10_bio->remaining); atomic_inc(&r10_bio->remaining);
b->bi_next = NULL; b->bi_next = NULL;
...@@ -5154,10 +5058,9 @@ static int handle_reshape_read_error(struct mddev *mddev, ...@@ -5154,10 +5058,9 @@ static int handle_reshape_read_error(struct mddev *mddev,
if (s > (PAGE_SIZE >> 9)) if (s > (PAGE_SIZE >> 9))
s = PAGE_SIZE >> 9; s = PAGE_SIZE >> 9;
rcu_read_lock();
while (!success) { while (!success) {
int d = r10b->devs[slot].devnum; int d = r10b->devs[slot].devnum;
struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev); struct md_rdev *rdev = conf->mirrors[d].rdev;
sector_t addr; sector_t addr;
if (rdev == NULL || if (rdev == NULL ||
test_bit(Faulty, &rdev->flags) || test_bit(Faulty, &rdev->flags) ||
...@@ -5166,14 +5069,12 @@ static int handle_reshape_read_error(struct mddev *mddev, ...@@ -5166,14 +5069,12 @@ static int handle_reshape_read_error(struct mddev *mddev,
addr = r10b->devs[slot].addr + idx * PAGE_SIZE; addr = r10b->devs[slot].addr + idx * PAGE_SIZE;
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
success = sync_page_io(rdev, success = sync_page_io(rdev,
addr, addr,
s << 9, s << 9,
pages[idx], pages[idx],
REQ_OP_READ, false); REQ_OP_READ, false);
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
rcu_read_lock();
if (success) if (success)
break; break;
failed: failed:
...@@ -5183,7 +5084,6 @@ static int handle_reshape_read_error(struct mddev *mddev, ...@@ -5183,7 +5084,6 @@ static int handle_reshape_read_error(struct mddev *mddev,
if (slot == first_slot) if (slot == first_slot)
break; break;
} }
rcu_read_unlock();
if (!success) { if (!success) {
/* couldn't read this block, must give up */ /* couldn't read this block, must give up */
set_bit(MD_RECOVERY_INTR, set_bit(MD_RECOVERY_INTR,
...@@ -5209,12 +5109,8 @@ static void end_reshape_write(struct bio *bio) ...@@ -5209,12 +5109,8 @@ static void end_reshape_write(struct bio *bio)
struct md_rdev *rdev = NULL; struct md_rdev *rdev = NULL;
d = find_bio_disk(conf, r10_bio, bio, &slot, &repl); d = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
if (repl) rdev = repl ? conf->mirrors[d].replacement :
rdev = conf->mirrors[d].replacement; conf->mirrors[d].rdev;
if (!rdev) {
smp_mb();
rdev = conf->mirrors[d].rdev;
}
if (bio->bi_status) { if (bio->bi_status) {
/* FIXME should record badblock */ /* FIXME should record badblock */
...@@ -5249,18 +5145,16 @@ static void raid10_finish_reshape(struct mddev *mddev) ...@@ -5249,18 +5145,16 @@ static void raid10_finish_reshape(struct mddev *mddev)
mddev->resync_max_sectors = mddev->array_sectors; mddev->resync_max_sectors = mddev->array_sectors;
} else { } else {
int d; int d;
rcu_read_lock();
for (d = conf->geo.raid_disks ; for (d = conf->geo.raid_disks ;
d < conf->geo.raid_disks - mddev->delta_disks; d < conf->geo.raid_disks - mddev->delta_disks;
d++) { d++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev); struct md_rdev *rdev = conf->mirrors[d].rdev;
if (rdev) if (rdev)
clear_bit(In_sync, &rdev->flags); clear_bit(In_sync, &rdev->flags);
rdev = rcu_dereference(conf->mirrors[d].replacement); rdev = conf->mirrors[d].replacement;
if (rdev) if (rdev)
clear_bit(In_sync, &rdev->flags); clear_bit(In_sync, &rdev->flags);
} }
rcu_read_unlock();
} }
mddev->layout = mddev->new_layout; mddev->layout = mddev->new_layout;
mddev->chunk_sectors = 1 << conf->geo.chunk_shift; mddev->chunk_sectors = 1 << conf->geo.chunk_shift;
......
...@@ -1890,28 +1890,22 @@ r5l_recovery_replay_one_stripe(struct r5conf *conf, ...@@ -1890,28 +1890,22 @@ r5l_recovery_replay_one_stripe(struct r5conf *conf,
continue; continue;
/* in case device is broken */ /* in case device is broken */
rcu_read_lock(); rdev = conf->disks[disk_index].rdev;
rdev = rcu_dereference(conf->disks[disk_index].rdev);
if (rdev) { if (rdev) {
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
sync_page_io(rdev, sh->sector, PAGE_SIZE, sync_page_io(rdev, sh->sector, PAGE_SIZE,
sh->dev[disk_index].page, REQ_OP_WRITE, sh->dev[disk_index].page, REQ_OP_WRITE,
false); false);
rdev_dec_pending(rdev, rdev->mddev); rdev_dec_pending(rdev, rdev->mddev);
rcu_read_lock();
} }
rrdev = rcu_dereference(conf->disks[disk_index].replacement); rrdev = conf->disks[disk_index].replacement;
if (rrdev) { if (rrdev) {
atomic_inc(&rrdev->nr_pending); atomic_inc(&rrdev->nr_pending);
rcu_read_unlock();
sync_page_io(rrdev, sh->sector, PAGE_SIZE, sync_page_io(rrdev, sh->sector, PAGE_SIZE,
sh->dev[disk_index].page, REQ_OP_WRITE, sh->dev[disk_index].page, REQ_OP_WRITE,
false); false);
rdev_dec_pending(rrdev, rrdev->mddev); rdev_dec_pending(rrdev, rrdev->mddev);
rcu_read_lock();
} }
rcu_read_unlock();
} }
ctx->data_parity_stripes++; ctx->data_parity_stripes++;
out: out:
...@@ -2948,7 +2942,6 @@ bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect) ...@@ -2948,7 +2942,6 @@ bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect)
if (!log) if (!log)
return false; return false;
WARN_ON_ONCE(!rcu_read_lock_held());
tree_index = r5c_tree_index(conf, sect); tree_index = r5c_tree_index(conf, sect);
slot = radix_tree_lookup(&log->big_stripe_tree, tree_index); slot = radix_tree_lookup(&log->big_stripe_tree, tree_index);
return slot != NULL; return slot != NULL;
......
...@@ -620,11 +620,9 @@ static void ppl_do_flush(struct ppl_io_unit *io) ...@@ -620,11 +620,9 @@ static void ppl_do_flush(struct ppl_io_unit *io)
struct md_rdev *rdev; struct md_rdev *rdev;
struct block_device *bdev = NULL; struct block_device *bdev = NULL;
rcu_read_lock(); rdev = conf->disks[i].rdev;
rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !test_bit(Faulty, &rdev->flags)) if (rdev && !test_bit(Faulty, &rdev->flags))
bdev = rdev->bdev; bdev = rdev->bdev;
rcu_read_unlock();
if (bdev) { if (bdev) {
struct bio *bio; struct bio *bio;
...@@ -882,9 +880,7 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e, ...@@ -882,9 +880,7 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
(unsigned long long)r_sector, dd_idx, (unsigned long long)r_sector, dd_idx,
(unsigned long long)sector); (unsigned long long)sector);
/* Array has not started so rcu dereference is safe */ rdev = conf->disks[dd_idx].rdev;
rdev = rcu_dereference_protected(
conf->disks[dd_idx].rdev, 1);
if (!rdev || (!test_bit(In_sync, &rdev->flags) && if (!rdev || (!test_bit(In_sync, &rdev->flags) &&
sector >= rdev->recovery_offset)) { sector >= rdev->recovery_offset)) {
pr_debug("%s:%*s data member disk %d missing\n", pr_debug("%s:%*s data member disk %d missing\n",
...@@ -936,9 +932,7 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e, ...@@ -936,9 +932,7 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
0, &disk, &sh); 0, &disk, &sh);
BUG_ON(sh.pd_idx != le32_to_cpu(e->parity_disk)); BUG_ON(sh.pd_idx != le32_to_cpu(e->parity_disk));
/* Array has not started so rcu dereference is safe */ parity_rdev = conf->disks[sh.pd_idx].rdev;
parity_rdev = rcu_dereference_protected(
conf->disks[sh.pd_idx].rdev, 1);
BUG_ON(parity_rdev->bdev->bd_dev != log->rdev->bdev->bd_dev); BUG_ON(parity_rdev->bdev->bd_dev != log->rdev->bdev->bd_dev);
pr_debug("%s:%*s write parity at sector %llu, disk %pg\n", pr_debug("%s:%*s write parity at sector %llu, disk %pg\n",
...@@ -1404,9 +1398,7 @@ int ppl_init_log(struct r5conf *conf) ...@@ -1404,9 +1398,7 @@ int ppl_init_log(struct r5conf *conf)
for (i = 0; i < ppl_conf->count; i++) { for (i = 0; i < ppl_conf->count; i++) {
struct ppl_log *log = &ppl_conf->child_logs[i]; struct ppl_log *log = &ppl_conf->child_logs[i];
/* Array has not started so rcu dereference is safe */ struct md_rdev *rdev = conf->disks[i].rdev;
struct md_rdev *rdev =
rcu_dereference_protected(conf->disks[i].rdev, 1);
mutex_init(&log->io_mutex); mutex_init(&log->io_mutex);
spin_lock_init(&log->io_list_lock); spin_lock_init(&log->io_list_lock);
......
...@@ -36,7 +36,6 @@ ...@@ -36,7 +36,6 @@
*/ */
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/delay.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/raid/pq.h> #include <linux/raid/pq.h>
#include <linux/async_tx.h> #include <linux/async_tx.h>
...@@ -694,12 +693,12 @@ int raid5_calc_degraded(struct r5conf *conf) ...@@ -694,12 +693,12 @@ int raid5_calc_degraded(struct r5conf *conf)
int degraded, degraded2; int degraded, degraded2;
int i; int i;
rcu_read_lock();
degraded = 0; degraded = 0;
for (i = 0; i < conf->previous_raid_disks; i++) { for (i = 0; i < conf->previous_raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev);
if (rdev && test_bit(Faulty, &rdev->flags)) if (rdev && test_bit(Faulty, &rdev->flags))
rdev = rcu_dereference(conf->disks[i].replacement); rdev = READ_ONCE(conf->disks[i].replacement);
if (!rdev || test_bit(Faulty, &rdev->flags)) if (!rdev || test_bit(Faulty, &rdev->flags))
degraded++; degraded++;
else if (test_bit(In_sync, &rdev->flags)) else if (test_bit(In_sync, &rdev->flags))
...@@ -717,15 +716,14 @@ int raid5_calc_degraded(struct r5conf *conf) ...@@ -717,15 +716,14 @@ int raid5_calc_degraded(struct r5conf *conf)
if (conf->raid_disks >= conf->previous_raid_disks) if (conf->raid_disks >= conf->previous_raid_disks)
degraded++; degraded++;
} }
rcu_read_unlock();
if (conf->raid_disks == conf->previous_raid_disks) if (conf->raid_disks == conf->previous_raid_disks)
return degraded; return degraded;
rcu_read_lock();
degraded2 = 0; degraded2 = 0;
for (i = 0; i < conf->raid_disks; i++) { for (i = 0; i < conf->raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev);
if (rdev && test_bit(Faulty, &rdev->flags)) if (rdev && test_bit(Faulty, &rdev->flags))
rdev = rcu_dereference(conf->disks[i].replacement); rdev = READ_ONCE(conf->disks[i].replacement);
if (!rdev || test_bit(Faulty, &rdev->flags)) if (!rdev || test_bit(Faulty, &rdev->flags))
degraded2++; degraded2++;
else if (test_bit(In_sync, &rdev->flags)) else if (test_bit(In_sync, &rdev->flags))
...@@ -739,7 +737,6 @@ int raid5_calc_degraded(struct r5conf *conf) ...@@ -739,7 +737,6 @@ int raid5_calc_degraded(struct r5conf *conf)
if (conf->raid_disks <= conf->previous_raid_disks) if (conf->raid_disks <= conf->previous_raid_disks)
degraded2++; degraded2++;
} }
rcu_read_unlock();
if (degraded2 > degraded) if (degraded2 > degraded)
return degraded2; return degraded2;
return degraded; return degraded;
...@@ -1184,14 +1181,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) ...@@ -1184,14 +1181,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
bi = &dev->req; bi = &dev->req;
rbi = &dev->rreq; /* For writing to replacement */ rbi = &dev->rreq; /* For writing to replacement */
rcu_read_lock(); rdev = conf->disks[i].rdev;
rrdev = rcu_dereference(conf->disks[i].replacement); rrdev = conf->disks[i].replacement;
smp_mb(); /* Ensure that if rrdev is NULL, rdev won't be */
rdev = rcu_dereference(conf->disks[i].rdev);
if (!rdev) {
rdev = rrdev;
rrdev = NULL;
}
if (op_is_write(op)) { if (op_is_write(op)) {
if (replace_only) if (replace_only)
rdev = NULL; rdev = NULL;
...@@ -1212,7 +1203,6 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) ...@@ -1212,7 +1203,6 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
rrdev = NULL; rrdev = NULL;
if (rrdev) if (rrdev)
atomic_inc(&rrdev->nr_pending); atomic_inc(&rrdev->nr_pending);
rcu_read_unlock();
/* We have already checked bad blocks for reads. Now /* We have already checked bad blocks for reads. Now
* need to check for writes. We never accept write errors * need to check for writes. We never accept write errors
...@@ -2731,28 +2721,6 @@ static void shrink_stripes(struct r5conf *conf) ...@@ -2731,28 +2721,6 @@ static void shrink_stripes(struct r5conf *conf)
conf->slab_cache = NULL; conf->slab_cache = NULL;
} }
/*
* This helper wraps rcu_dereference_protected() and can be used when
* it is known that the nr_pending of the rdev is elevated.
*/
static struct md_rdev *rdev_pend_deref(struct md_rdev __rcu *rdev)
{
return rcu_dereference_protected(rdev,
atomic_read(&rcu_access_pointer(rdev)->nr_pending));
}
/*
* This helper wraps rcu_dereference_protected() and should be used
* when it is known that the mddev_lock() is held. This is safe
* seeing raid5_remove_disk() has the same lock held.
*/
static struct md_rdev *rdev_mdlock_deref(struct mddev *mddev,
struct md_rdev __rcu *rdev)
{
return rcu_dereference_protected(rdev,
lockdep_is_held(&mddev->reconfig_mutex));
}
static void raid5_end_read_request(struct bio * bi) static void raid5_end_read_request(struct bio * bi)
{ {
struct stripe_head *sh = bi->bi_private; struct stripe_head *sh = bi->bi_private;
...@@ -2778,9 +2746,9 @@ static void raid5_end_read_request(struct bio * bi) ...@@ -2778,9 +2746,9 @@ static void raid5_end_read_request(struct bio * bi)
* In that case it moved down to 'rdev'. * In that case it moved down to 'rdev'.
* rdev is not removed until all requests are finished. * rdev is not removed until all requests are finished.
*/ */
rdev = rdev_pend_deref(conf->disks[i].replacement); rdev = conf->disks[i].replacement;
if (!rdev) if (!rdev)
rdev = rdev_pend_deref(conf->disks[i].rdev); rdev = conf->disks[i].rdev;
if (use_new_offset(conf, sh)) if (use_new_offset(conf, sh))
s = sh->sector + rdev->new_data_offset; s = sh->sector + rdev->new_data_offset;
...@@ -2893,11 +2861,11 @@ static void raid5_end_write_request(struct bio *bi) ...@@ -2893,11 +2861,11 @@ static void raid5_end_write_request(struct bio *bi)
for (i = 0 ; i < disks; i++) { for (i = 0 ; i < disks; i++) {
if (bi == &sh->dev[i].req) { if (bi == &sh->dev[i].req) {
rdev = rdev_pend_deref(conf->disks[i].rdev); rdev = conf->disks[i].rdev;
break; break;
} }
if (bi == &sh->dev[i].rreq) { if (bi == &sh->dev[i].rreq) {
rdev = rdev_pend_deref(conf->disks[i].replacement); rdev = conf->disks[i].replacement;
if (rdev) if (rdev)
replacement = 1; replacement = 1;
else else
...@@ -2905,7 +2873,7 @@ static void raid5_end_write_request(struct bio *bi) ...@@ -2905,7 +2873,7 @@ static void raid5_end_write_request(struct bio *bi)
* replaced it. rdev is not removed * replaced it. rdev is not removed
* until all requests are finished. * until all requests are finished.
*/ */
rdev = rdev_pend_deref(conf->disks[i].rdev); rdev = conf->disks[i].rdev;
break; break;
} }
} }
...@@ -3667,15 +3635,13 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, ...@@ -3667,15 +3635,13 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
int bitmap_end = 0; int bitmap_end = 0;
if (test_bit(R5_ReadError, &sh->dev[i].flags)) { if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
struct md_rdev *rdev; struct md_rdev *rdev = conf->disks[i].rdev;
rcu_read_lock();
rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && test_bit(In_sync, &rdev->flags) && if (rdev && test_bit(In_sync, &rdev->flags) &&
!test_bit(Faulty, &rdev->flags)) !test_bit(Faulty, &rdev->flags))
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
else else
rdev = NULL; rdev = NULL;
rcu_read_unlock();
if (rdev) { if (rdev) {
if (!rdev_set_badblocks( if (!rdev_set_badblocks(
rdev, rdev,
...@@ -3793,16 +3759,17 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh, ...@@ -3793,16 +3759,17 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
/* During recovery devices cannot be removed, so /* During recovery devices cannot be removed, so
* locking and refcounting of rdevs is not needed * locking and refcounting of rdevs is not needed
*/ */
rcu_read_lock();
for (i = 0; i < conf->raid_disks; i++) { for (i = 0; i < conf->raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); struct md_rdev *rdev = conf->disks[i].rdev;
if (rdev if (rdev
&& !test_bit(Faulty, &rdev->flags) && !test_bit(Faulty, &rdev->flags)
&& !test_bit(In_sync, &rdev->flags) && !test_bit(In_sync, &rdev->flags)
&& !rdev_set_badblocks(rdev, sh->sector, && !rdev_set_badblocks(rdev, sh->sector,
RAID5_STRIPE_SECTORS(conf), 0)) RAID5_STRIPE_SECTORS(conf), 0))
abort = 1; abort = 1;
rdev = rcu_dereference(conf->disks[i].replacement); rdev = conf->disks[i].replacement;
if (rdev if (rdev
&& !test_bit(Faulty, &rdev->flags) && !test_bit(Faulty, &rdev->flags)
&& !test_bit(In_sync, &rdev->flags) && !test_bit(In_sync, &rdev->flags)
...@@ -3810,7 +3777,6 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh, ...@@ -3810,7 +3777,6 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
RAID5_STRIPE_SECTORS(conf), 0)) RAID5_STRIPE_SECTORS(conf), 0))
abort = 1; abort = 1;
} }
rcu_read_unlock();
if (abort) if (abort)
conf->recovery_disabled = conf->recovery_disabled =
conf->mddev->recovery_disabled; conf->mddev->recovery_disabled;
...@@ -3823,15 +3789,13 @@ static int want_replace(struct stripe_head *sh, int disk_idx) ...@@ -3823,15 +3789,13 @@ static int want_replace(struct stripe_head *sh, int disk_idx)
struct md_rdev *rdev; struct md_rdev *rdev;
int rv = 0; int rv = 0;
rcu_read_lock(); rdev = sh->raid_conf->disks[disk_idx].replacement;
rdev = rcu_dereference(sh->raid_conf->disks[disk_idx].replacement);
if (rdev if (rdev
&& !test_bit(Faulty, &rdev->flags) && !test_bit(Faulty, &rdev->flags)
&& !test_bit(In_sync, &rdev->flags) && !test_bit(In_sync, &rdev->flags)
&& (rdev->recovery_offset <= sh->sector && (rdev->recovery_offset <= sh->sector
|| rdev->mddev->recovery_cp <= sh->sector)) || rdev->mddev->recovery_cp <= sh->sector))
rv = 1; rv = 1;
rcu_read_unlock();
return rv; return rv;
} }
...@@ -4708,7 +4672,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) ...@@ -4708,7 +4672,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
s->log_failed = r5l_log_disk_error(conf); s->log_failed = r5l_log_disk_error(conf);
/* Now to look around and see what can be done */ /* Now to look around and see what can be done */
rcu_read_lock();
for (i=disks; i--; ) { for (i=disks; i--; ) {
struct md_rdev *rdev; struct md_rdev *rdev;
sector_t first_bad; sector_t first_bad;
...@@ -4753,7 +4716,7 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) ...@@ -4753,7 +4716,7 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
/* Prefer to use the replacement for reads, but only /* Prefer to use the replacement for reads, but only
* if it is recovered enough and has no bad blocks. * if it is recovered enough and has no bad blocks.
*/ */
rdev = rcu_dereference(conf->disks[i].replacement); rdev = conf->disks[i].replacement;
if (rdev && !test_bit(Faulty, &rdev->flags) && if (rdev && !test_bit(Faulty, &rdev->flags) &&
rdev->recovery_offset >= sh->sector + RAID5_STRIPE_SECTORS(conf) && rdev->recovery_offset >= sh->sector + RAID5_STRIPE_SECTORS(conf) &&
!is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), !is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
...@@ -4764,7 +4727,7 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) ...@@ -4764,7 +4727,7 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
set_bit(R5_NeedReplace, &dev->flags); set_bit(R5_NeedReplace, &dev->flags);
else else
clear_bit(R5_NeedReplace, &dev->flags); clear_bit(R5_NeedReplace, &dev->flags);
rdev = rcu_dereference(conf->disks[i].rdev); rdev = conf->disks[i].rdev;
clear_bit(R5_ReadRepl, &dev->flags); clear_bit(R5_ReadRepl, &dev->flags);
} }
if (rdev && test_bit(Faulty, &rdev->flags)) if (rdev && test_bit(Faulty, &rdev->flags))
...@@ -4811,8 +4774,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) ...@@ -4811,8 +4774,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
if (test_bit(R5_WriteError, &dev->flags)) { if (test_bit(R5_WriteError, &dev->flags)) {
/* This flag does not apply to '.replacement' /* This flag does not apply to '.replacement'
* only to .rdev, so make sure to check that*/ * only to .rdev, so make sure to check that*/
struct md_rdev *rdev2 = rcu_dereference( struct md_rdev *rdev2 = conf->disks[i].rdev;
conf->disks[i].rdev);
if (rdev2 == rdev) if (rdev2 == rdev)
clear_bit(R5_Insync, &dev->flags); clear_bit(R5_Insync, &dev->flags);
if (rdev2 && !test_bit(Faulty, &rdev2->flags)) { if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
...@@ -4824,8 +4787,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) ...@@ -4824,8 +4787,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
if (test_bit(R5_MadeGood, &dev->flags)) { if (test_bit(R5_MadeGood, &dev->flags)) {
/* This flag does not apply to '.replacement' /* This flag does not apply to '.replacement'
* only to .rdev, so make sure to check that*/ * only to .rdev, so make sure to check that*/
struct md_rdev *rdev2 = rcu_dereference( struct md_rdev *rdev2 = conf->disks[i].rdev;
conf->disks[i].rdev);
if (rdev2 && !test_bit(Faulty, &rdev2->flags)) { if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
s->handle_bad_blocks = 1; s->handle_bad_blocks = 1;
atomic_inc(&rdev2->nr_pending); atomic_inc(&rdev2->nr_pending);
...@@ -4833,8 +4796,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) ...@@ -4833,8 +4796,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
clear_bit(R5_MadeGood, &dev->flags); clear_bit(R5_MadeGood, &dev->flags);
} }
if (test_bit(R5_MadeGoodRepl, &dev->flags)) { if (test_bit(R5_MadeGoodRepl, &dev->flags)) {
struct md_rdev *rdev2 = rcu_dereference( struct md_rdev *rdev2 = conf->disks[i].replacement;
conf->disks[i].replacement);
if (rdev2 && !test_bit(Faulty, &rdev2->flags)) { if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
s->handle_bad_blocks = 1; s->handle_bad_blocks = 1;
atomic_inc(&rdev2->nr_pending); atomic_inc(&rdev2->nr_pending);
...@@ -4855,8 +4818,7 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) ...@@ -4855,8 +4818,7 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
if (rdev && !test_bit(Faulty, &rdev->flags)) if (rdev && !test_bit(Faulty, &rdev->flags))
do_recovery = 1; do_recovery = 1;
else if (!rdev) { else if (!rdev) {
rdev = rcu_dereference( rdev = conf->disks[i].replacement;
conf->disks[i].replacement);
if (rdev && !test_bit(Faulty, &rdev->flags)) if (rdev && !test_bit(Faulty, &rdev->flags))
do_recovery = 1; do_recovery = 1;
} }
...@@ -4883,7 +4845,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) ...@@ -4883,7 +4845,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
else else
s->replacing = 1; s->replacing = 1;
} }
rcu_read_unlock();
} }
/* /*
...@@ -5340,23 +5301,23 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -5340,23 +5301,23 @@ static void handle_stripe(struct stripe_head *sh)
struct r5dev *dev = &sh->dev[i]; struct r5dev *dev = &sh->dev[i];
if (test_and_clear_bit(R5_WriteError, &dev->flags)) { if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
/* We own a safe reference to the rdev */ /* We own a safe reference to the rdev */
rdev = rdev_pend_deref(conf->disks[i].rdev); rdev = conf->disks[i].rdev;
if (!rdev_set_badblocks(rdev, sh->sector, if (!rdev_set_badblocks(rdev, sh->sector,
RAID5_STRIPE_SECTORS(conf), 0)) RAID5_STRIPE_SECTORS(conf), 0))
md_error(conf->mddev, rdev); md_error(conf->mddev, rdev);
rdev_dec_pending(rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev);
} }
if (test_and_clear_bit(R5_MadeGood, &dev->flags)) { if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
rdev = rdev_pend_deref(conf->disks[i].rdev); rdev = conf->disks[i].rdev;
rdev_clear_badblocks(rdev, sh->sector, rdev_clear_badblocks(rdev, sh->sector,
RAID5_STRIPE_SECTORS(conf), 0); RAID5_STRIPE_SECTORS(conf), 0);
rdev_dec_pending(rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev);
} }
if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) { if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
rdev = rdev_pend_deref(conf->disks[i].replacement); rdev = conf->disks[i].replacement;
if (!rdev) if (!rdev)
/* rdev have been moved down */ /* rdev have been moved down */
rdev = rdev_pend_deref(conf->disks[i].rdev); rdev = conf->disks[i].rdev;
rdev_clear_badblocks(rdev, sh->sector, rdev_clear_badblocks(rdev, sh->sector,
RAID5_STRIPE_SECTORS(conf), 0); RAID5_STRIPE_SECTORS(conf), 0);
rdev_dec_pending(rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev);
...@@ -5515,24 +5476,22 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) ...@@ -5515,24 +5476,22 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
&dd_idx, NULL); &dd_idx, NULL);
end_sector = sector + bio_sectors(raid_bio); end_sector = sector + bio_sectors(raid_bio);
rcu_read_lock();
if (r5c_big_stripe_cached(conf, sector)) if (r5c_big_stripe_cached(conf, sector))
goto out_rcu_unlock; return 0;
rdev = rcu_dereference(conf->disks[dd_idx].replacement); rdev = conf->disks[dd_idx].replacement;
if (!rdev || test_bit(Faulty, &rdev->flags) || if (!rdev || test_bit(Faulty, &rdev->flags) ||
rdev->recovery_offset < end_sector) { rdev->recovery_offset < end_sector) {
rdev = rcu_dereference(conf->disks[dd_idx].rdev); rdev = conf->disks[dd_idx].rdev;
if (!rdev) if (!rdev)
goto out_rcu_unlock; return 0;
if (test_bit(Faulty, &rdev->flags) || if (test_bit(Faulty, &rdev->flags) ||
!(test_bit(In_sync, &rdev->flags) || !(test_bit(In_sync, &rdev->flags) ||
rdev->recovery_offset >= end_sector)) rdev->recovery_offset >= end_sector))
goto out_rcu_unlock; return 0;
} }
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
if (is_badblock(rdev, sector, bio_sectors(raid_bio), &first_bad, if (is_badblock(rdev, sector, bio_sectors(raid_bio), &first_bad,
&bad_sectors)) { &bad_sectors)) {
...@@ -5576,10 +5535,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) ...@@ -5576,10 +5535,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
raid_bio->bi_iter.bi_sector); raid_bio->bi_iter.bi_sector);
submit_bio_noacct(align_bio); submit_bio_noacct(align_bio);
return 1; return 1;
out_rcu_unlock:
rcu_read_unlock();
return 0;
} }
static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio) static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
...@@ -6582,14 +6537,12 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n ...@@ -6582,14 +6537,12 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n
* Note in case of > 1 drive failures it's possible we're rebuilding * Note in case of > 1 drive failures it's possible we're rebuilding
* one drive while leaving another faulty drive in array. * one drive while leaving another faulty drive in array.
*/ */
rcu_read_lock();
for (i = 0; i < conf->raid_disks; i++) { for (i = 0; i < conf->raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); struct md_rdev *rdev = conf->disks[i].rdev;
if (rdev == NULL || test_bit(Faulty, &rdev->flags)) if (rdev == NULL || test_bit(Faulty, &rdev->flags))
still_degraded = 1; still_degraded = 1;
} }
rcu_read_unlock();
md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded); md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded);
...@@ -6820,18 +6773,7 @@ static void raid5d(struct md_thread *thread) ...@@ -6820,18 +6773,7 @@ static void raid5d(struct md_thread *thread)
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
md_check_recovery(mddev); md_check_recovery(mddev);
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
/*
* Waiting on MD_SB_CHANGE_PENDING below may deadlock
* seeing md_check_recovery() is needed to clear
* the flag when using mdmon.
*/
continue;
} }
wait_event_lock_irq(mddev->sb_wait,
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags),
conf->device_lock);
} }
pr_debug("%d stripes handled\n", handled); pr_debug("%d stripes handled\n", handled);
...@@ -7911,18 +7853,10 @@ static int raid5_run(struct mddev *mddev) ...@@ -7911,18 +7853,10 @@ static int raid5_run(struct mddev *mddev)
for (i = 0; i < conf->raid_disks && conf->previous_raid_disks; for (i = 0; i < conf->raid_disks && conf->previous_raid_disks;
i++) { i++) {
rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev); rdev = conf->disks[i].rdev;
if (!rdev && conf->disks[i].replacement) {
/* The replacement is all we have yet */
rdev = rdev_mdlock_deref(mddev,
conf->disks[i].replacement);
conf->disks[i].replacement = NULL;
clear_bit(Replacement, &rdev->flags);
rcu_assign_pointer(conf->disks[i].rdev, rdev);
}
if (!rdev) if (!rdev)
continue; continue;
if (rcu_access_pointer(conf->disks[i].replacement) && if (conf->disks[i].replacement &&
conf->reshape_progress != MaxSector) { conf->reshape_progress != MaxSector) {
/* replacements and reshape simply do not mix. */ /* replacements and reshape simply do not mix. */
pr_warn("md: cannot handle concurrent replacement and reshape.\n"); pr_warn("md: cannot handle concurrent replacement and reshape.\n");
...@@ -8106,15 +8040,16 @@ static void raid5_status(struct seq_file *seq, struct mddev *mddev) ...@@ -8106,15 +8040,16 @@ static void raid5_status(struct seq_file *seq, struct mddev *mddev)
struct r5conf *conf = mddev->private; struct r5conf *conf = mddev->private;
int i; int i;
lockdep_assert_held(&mddev->lock);
seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level, seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level,
conf->chunk_sectors / 2, mddev->layout); conf->chunk_sectors / 2, mddev->layout);
seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded); seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded);
rcu_read_lock();
for (i = 0; i < conf->raid_disks; i++) { for (i = 0; i < conf->raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev);
seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_"); seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
} }
rcu_read_unlock();
seq_printf (seq, "]"); seq_printf (seq, "]");
} }
...@@ -8152,9 +8087,8 @@ static int raid5_spare_active(struct mddev *mddev) ...@@ -8152,9 +8087,8 @@ static int raid5_spare_active(struct mddev *mddev)
unsigned long flags; unsigned long flags;
for (i = 0; i < conf->raid_disks; i++) { for (i = 0; i < conf->raid_disks; i++) {
rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev); rdev = conf->disks[i].rdev;
replacement = rdev_mdlock_deref(mddev, replacement = conf->disks[i].replacement;
conf->disks[i].replacement);
if (replacement if (replacement
&& replacement->recovery_offset == MaxSector && replacement->recovery_offset == MaxSector
&& !test_bit(Faulty, &replacement->flags) && !test_bit(Faulty, &replacement->flags)
...@@ -8193,7 +8127,7 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -8193,7 +8127,7 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
struct r5conf *conf = mddev->private; struct r5conf *conf = mddev->private;
int err = 0; int err = 0;
int number = rdev->raid_disk; int number = rdev->raid_disk;
struct md_rdev __rcu **rdevp; struct md_rdev **rdevp;
struct disk_info *p; struct disk_info *p;
struct md_rdev *tmp; struct md_rdev *tmp;
...@@ -8216,9 +8150,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -8216,9 +8150,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
if (unlikely(number >= conf->pool_size)) if (unlikely(number >= conf->pool_size))
return 0; return 0;
p = conf->disks + number; p = conf->disks + number;
if (rdev == rcu_access_pointer(p->rdev)) if (rdev == p->rdev)
rdevp = &p->rdev; rdevp = &p->rdev;
else if (rdev == rcu_access_pointer(p->replacement)) else if (rdev == p->replacement)
rdevp = &p->replacement; rdevp = &p->replacement;
else else
return 0; return 0;
...@@ -8238,37 +8172,24 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -8238,37 +8172,24 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
if (!test_bit(Faulty, &rdev->flags) && if (!test_bit(Faulty, &rdev->flags) &&
mddev->recovery_disabled != conf->recovery_disabled && mddev->recovery_disabled != conf->recovery_disabled &&
!has_failed(conf) && !has_failed(conf) &&
(!rcu_access_pointer(p->replacement) || (!p->replacement || p->replacement == rdev) &&
rcu_access_pointer(p->replacement) == rdev) &&
number < conf->raid_disks) { number < conf->raid_disks) {
err = -EBUSY; err = -EBUSY;
goto abort; goto abort;
} }
*rdevp = NULL; WRITE_ONCE(*rdevp, NULL);
if (!test_bit(RemoveSynchronized, &rdev->flags)) {
lockdep_assert_held(&mddev->reconfig_mutex);
synchronize_rcu();
if (atomic_read(&rdev->nr_pending)) {
/* lost the race, try later */
err = -EBUSY;
rcu_assign_pointer(*rdevp, rdev);
}
}
if (!err) { if (!err) {
err = log_modify(conf, rdev, false); err = log_modify(conf, rdev, false);
if (err) if (err)
goto abort; goto abort;
} }
tmp = rcu_access_pointer(p->replacement); tmp = p->replacement;
if (tmp) { if (tmp) {
/* We must have just cleared 'rdev' */ /* We must have just cleared 'rdev' */
rcu_assign_pointer(p->rdev, tmp); WRITE_ONCE(p->rdev, tmp);
clear_bit(Replacement, &tmp->flags); clear_bit(Replacement, &tmp->flags);
smp_mb(); /* Make sure other CPUs may see both as identical WRITE_ONCE(p->replacement, NULL);
* but will never see neither - if they are careful
*/
rcu_assign_pointer(p->replacement, NULL);
if (!err) if (!err)
err = log_modify(conf, tmp, true); err = log_modify(conf, tmp, true);
...@@ -8336,7 +8257,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -8336,7 +8257,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
rdev->raid_disk = disk; rdev->raid_disk = disk;
if (rdev->saved_raid_disk != disk) if (rdev->saved_raid_disk != disk)
conf->fullsync = 1; conf->fullsync = 1;
rcu_assign_pointer(p->rdev, rdev); WRITE_ONCE(p->rdev, rdev);
err = log_modify(conf, rdev, true); err = log_modify(conf, rdev, true);
...@@ -8345,7 +8266,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -8345,7 +8266,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
} }
for (disk = first; disk <= last; disk++) { for (disk = first; disk <= last; disk++) {
p = conf->disks + disk; p = conf->disks + disk;
tmp = rdev_mdlock_deref(mddev, p->rdev); tmp = p->rdev;
if (test_bit(WantReplacement, &tmp->flags) && if (test_bit(WantReplacement, &tmp->flags) &&
mddev->reshape_position == MaxSector && mddev->reshape_position == MaxSector &&
p->replacement == NULL) { p->replacement == NULL) {
...@@ -8354,7 +8275,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -8354,7 +8275,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
rdev->raid_disk = disk; rdev->raid_disk = disk;
err = 0; err = 0;
conf->fullsync = 1; conf->fullsync = 1;
rcu_assign_pointer(p->replacement, rdev); WRITE_ONCE(p->replacement, rdev);
break; break;
} }
} }
...@@ -8487,7 +8408,7 @@ static int raid5_start_reshape(struct mddev *mddev) ...@@ -8487,7 +8408,7 @@ static int raid5_start_reshape(struct mddev *mddev)
if (mddev->recovery_cp < MaxSector) if (mddev->recovery_cp < MaxSector)
return -EBUSY; return -EBUSY;
for (i = 0; i < conf->raid_disks; i++) for (i = 0; i < conf->raid_disks; i++)
if (rdev_mdlock_deref(mddev, conf->disks[i].replacement)) if (conf->disks[i].replacement)
return -EBUSY; return -EBUSY;
rdev_for_each(rdev, mddev) { rdev_for_each(rdev, mddev) {
...@@ -8658,12 +8579,10 @@ static void raid5_finish_reshape(struct mddev *mddev) ...@@ -8658,12 +8579,10 @@ static void raid5_finish_reshape(struct mddev *mddev)
for (d = conf->raid_disks ; for (d = conf->raid_disks ;
d < conf->raid_disks - mddev->delta_disks; d < conf->raid_disks - mddev->delta_disks;
d++) { d++) {
rdev = rdev_mdlock_deref(mddev, rdev = conf->disks[d].rdev;
conf->disks[d].rdev);
if (rdev) if (rdev)
clear_bit(In_sync, &rdev->flags); clear_bit(In_sync, &rdev->flags);
rdev = rdev_mdlock_deref(mddev, rdev = conf->disks[d].replacement;
conf->disks[d].replacement);
if (rdev) if (rdev)
clear_bit(In_sync, &rdev->flags); clear_bit(In_sync, &rdev->flags);
} }
......
...@@ -473,8 +473,8 @@ enum { ...@@ -473,8 +473,8 @@ enum {
*/ */
struct disk_info { struct disk_info {
struct md_rdev __rcu *rdev; struct md_rdev *rdev;
struct md_rdev __rcu *replacement; struct md_rdev *replacement;
struct page *extra_page; /* extra page to use in prexor */ struct page *extra_page; /* extra page to use in prexor */
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment