Commit f01e49fb authored by Jens Axboe's avatar Jens Axboe

Merge branch 'md-next' of...

Merge branch 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-5.19/drivers

Pull MD updates from Song:

"1. Improve annotation in raid5 code, by Logan Gunthorpe.
 2. Support MD_BROKEN flag in raid-1/5/10, by Mariusz Tkaczyk.
 3. Other small fixes/cleanups."

* 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
  md: Replace role magic numbers with defined constants
  md/raid0: Ignore RAID0 layout if the second zone has only one device
  md/raid5: Annotate functions that hold device_lock with __must_hold
  md/raid5-ppl: Annotate with rcu_dereference_protected()
  md/raid5: Annotate rdev/replacement access when mddev_lock is held
  md/raid5: Annotate rdev/replacement accesses when nr_pending is elevated
  md/raid5: Add __rcu annotation to struct disk_info
  md/raid5: Un-nest struct raid5_percpu definition
  md/raid5: Cleanup setup_conf() error returns
  md: replace deprecated strlcpy & remove duplicated line
  md/bitmap: don't set sb values if can't pass sanity check
  md: fix an incorrect NULL check in md_reload_sb
  md: fix an incorrect NULL check in does_sb_need_changing
  raid5: introduce MD_BROKEN
  md: Set MD_BROKEN for RAID1 and RAID10
parents 8ba816b2 9151ad5d
...@@ -639,14 +639,6 @@ static int md_bitmap_read_sb(struct bitmap *bitmap) ...@@ -639,14 +639,6 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ; daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
write_behind = le32_to_cpu(sb->write_behind); write_behind = le32_to_cpu(sb->write_behind);
sectors_reserved = le32_to_cpu(sb->sectors_reserved); sectors_reserved = le32_to_cpu(sb->sectors_reserved);
/* Setup nodes/clustername only if bitmap version is
* cluster-compatible
*/
if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
nodes = le32_to_cpu(sb->nodes);
strlcpy(bitmap->mddev->bitmap_info.cluster_name,
sb->cluster_name, 64);
}
/* verify that the bitmap-specific fields are valid */ /* verify that the bitmap-specific fields are valid */
if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
...@@ -668,6 +660,16 @@ static int md_bitmap_read_sb(struct bitmap *bitmap) ...@@ -668,6 +660,16 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
goto out; goto out;
} }
/*
* Setup nodes/clustername only if bitmap version is
* cluster-compatible
*/
if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
nodes = le32_to_cpu(sb->nodes);
strscpy(bitmap->mddev->bitmap_info.cluster_name,
sb->cluster_name, 64);
}
/* keep the array size field of the bitmap superblock up to date */ /* keep the array size field of the bitmap superblock up to date */
sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
...@@ -695,14 +697,13 @@ static int md_bitmap_read_sb(struct bitmap *bitmap) ...@@ -695,14 +697,13 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN) if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
set_bit(BITMAP_HOSTENDIAN, &bitmap->flags); set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
bitmap->events_cleared = le64_to_cpu(sb->events_cleared); bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
err = 0; err = 0;
out: out:
kunmap_atomic(sb); kunmap_atomic(sb);
/* Assigning chunksize is required for "re_read" */
bitmap->mddev->bitmap_info.chunksize = chunksize;
if (err == 0 && nodes && (bitmap->cluster_slot < 0)) { if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
/* Assigning chunksize is required for "re_read" */
bitmap->mddev->bitmap_info.chunksize = chunksize;
err = md_setup_cluster(bitmap->mddev, nodes); err = md_setup_cluster(bitmap->mddev, nodes);
if (err) { if (err) {
pr_warn("%s: Could not setup cluster service (%d)\n", pr_warn("%s: Could not setup cluster service (%d)\n",
...@@ -713,18 +714,18 @@ static int md_bitmap_read_sb(struct bitmap *bitmap) ...@@ -713,18 +714,18 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
goto re_read; goto re_read;
} }
out_no_sb: out_no_sb:
if (test_bit(BITMAP_STALE, &bitmap->flags)) if (err == 0) {
bitmap->events_cleared = bitmap->mddev->events; if (test_bit(BITMAP_STALE, &bitmap->flags))
bitmap->mddev->bitmap_info.chunksize = chunksize; bitmap->events_cleared = bitmap->mddev->events;
bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; bitmap->mddev->bitmap_info.chunksize = chunksize;
bitmap->mddev->bitmap_info.max_write_behind = write_behind; bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
bitmap->mddev->bitmap_info.nodes = nodes; bitmap->mddev->bitmap_info.max_write_behind = write_behind;
if (bitmap->mddev->bitmap_info.space == 0 || bitmap->mddev->bitmap_info.nodes = nodes;
bitmap->mddev->bitmap_info.space > sectors_reserved) if (bitmap->mddev->bitmap_info.space == 0 ||
bitmap->mddev->bitmap_info.space = sectors_reserved; bitmap->mddev->bitmap_info.space > sectors_reserved)
if (err) { bitmap->mddev->bitmap_info.space = sectors_reserved;
} else {
md_bitmap_print_sb(bitmap); md_bitmap_print_sb(bitmap);
if (bitmap->cluster_slot < 0) if (bitmap->cluster_slot < 0)
md_cluster_stop(bitmap->mddev); md_cluster_stop(bitmap->mddev);
......
...@@ -201,7 +201,7 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev, ...@@ -201,7 +201,7 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name); pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name);
goto out_err; goto out_err;
} }
strlcpy(res->name, name, namelen + 1); strscpy(res->name, name, namelen + 1);
if (with_lvb) { if (with_lvb) {
res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL); res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL);
if (!res->lksb.sb_lvbptr) { if (!res->lksb.sb_lvbptr) {
......
...@@ -2627,14 +2627,16 @@ static void sync_sbs(struct mddev *mddev, int nospares) ...@@ -2627,14 +2627,16 @@ static void sync_sbs(struct mddev *mddev, int nospares)
static bool does_sb_need_changing(struct mddev *mddev) static bool does_sb_need_changing(struct mddev *mddev)
{ {
struct md_rdev *rdev; struct md_rdev *rdev = NULL, *iter;
struct mdp_superblock_1 *sb; struct mdp_superblock_1 *sb;
int role; int role;
/* Find a good rdev */ /* Find a good rdev */
rdev_for_each(rdev, mddev) rdev_for_each(iter, mddev)
if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags)) if ((iter->raid_disk >= 0) && !test_bit(Faulty, &iter->flags)) {
rdev = iter;
break; break;
}
/* No good device found. */ /* No good device found. */
if (!rdev) if (!rdev)
...@@ -2645,11 +2647,11 @@ static bool does_sb_need_changing(struct mddev *mddev) ...@@ -2645,11 +2647,11 @@ static bool does_sb_need_changing(struct mddev *mddev)
rdev_for_each(rdev, mddev) { rdev_for_each(rdev, mddev) {
role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
/* Device activated? */ /* Device activated? */
if (role == 0xffff && rdev->raid_disk >=0 && if (role == MD_DISK_ROLE_SPARE && rdev->raid_disk >= 0 &&
!test_bit(Faulty, &rdev->flags)) !test_bit(Faulty, &rdev->flags))
return true; return true;
/* Device turned faulty? */ /* Device turned faulty? */
if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd)) if (test_bit(Faulty, &rdev->flags) && (role < MD_DISK_ROLE_MAX))
return true; return true;
} }
...@@ -2984,10 +2986,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) ...@@ -2984,10 +2986,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
if (cmd_match(buf, "faulty") && rdev->mddev->pers) { if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
md_error(rdev->mddev, rdev); md_error(rdev->mddev, rdev);
if (test_bit(Faulty, &rdev->flags))
err = 0; if (test_bit(MD_BROKEN, &rdev->mddev->flags))
else
err = -EBUSY; err = -EBUSY;
else
err = 0;
} else if (cmd_match(buf, "remove")) { } else if (cmd_match(buf, "remove")) {
if (rdev->mddev->pers) { if (rdev->mddev->pers) {
clear_bit(Blocked, &rdev->flags); clear_bit(Blocked, &rdev->flags);
...@@ -4028,7 +4031,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -4028,7 +4031,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
oldpriv = mddev->private; oldpriv = mddev->private;
mddev->pers = pers; mddev->pers = pers;
mddev->private = priv; mddev->private = priv;
strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
mddev->level = mddev->new_level; mddev->level = mddev->new_level;
mddev->layout = mddev->new_layout; mddev->layout = mddev->new_layout;
mddev->chunk_sectors = mddev->new_chunk_sectors; mddev->chunk_sectors = mddev->new_chunk_sectors;
...@@ -4353,10 +4356,9 @@ __ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR, ...@@ -4353,10 +4356,9 @@ __ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
* like active, but no writes have been seen for a while (100msec). * like active, but no writes have been seen for a while (100msec).
* *
* broken * broken
* RAID0/LINEAR-only: same as clean, but array is missing a member. * Array is failed. It's useful because mounted-arrays aren't stopped
* It's useful because RAID0/LINEAR mounted-arrays aren't stopped * when array is failed, so this state will at least alert the user that
* when a member is gone, so this state will at least alert the * something is wrong.
* user that something is wrong.
*/ */
enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active, enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
write_pending, active_idle, broken, bad_word}; write_pending, active_idle, broken, bad_word};
...@@ -5763,7 +5765,7 @@ static int add_named_array(const char *val, const struct kernel_param *kp) ...@@ -5763,7 +5765,7 @@ static int add_named_array(const char *val, const struct kernel_param *kp)
len--; len--;
if (len >= DISK_NAME_LEN) if (len >= DISK_NAME_LEN)
return -E2BIG; return -E2BIG;
strlcpy(buf, val, len+1); strscpy(buf, val, len+1);
if (strncmp(buf, "md_", 3) == 0) if (strncmp(buf, "md_", 3) == 0)
return md_alloc(0, buf); return md_alloc(0, buf);
if (strncmp(buf, "md", 2) == 0 && if (strncmp(buf, "md", 2) == 0 &&
...@@ -5896,7 +5898,7 @@ int md_run(struct mddev *mddev) ...@@ -5896,7 +5898,7 @@ int md_run(struct mddev *mddev)
mddev->level = pers->level; mddev->level = pers->level;
mddev->new_level = pers->level; mddev->new_level = pers->level;
} }
strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
if (mddev->reshape_position != MaxSector && if (mddev->reshape_position != MaxSector &&
pers->start_reshape == NULL) { pers->start_reshape == NULL) {
...@@ -7443,7 +7445,7 @@ static int set_disk_faulty(struct mddev *mddev, dev_t dev) ...@@ -7443,7 +7445,7 @@ static int set_disk_faulty(struct mddev *mddev, dev_t dev)
err = -ENODEV; err = -ENODEV;
else { else {
md_error(mddev, rdev); md_error(mddev, rdev);
if (!test_bit(Faulty, &rdev->flags)) if (test_bit(MD_BROKEN, &mddev->flags))
err = -EBUSY; err = -EBUSY;
} }
rcu_read_unlock(); rcu_read_unlock();
...@@ -7984,13 +7986,16 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev) ...@@ -7984,13 +7986,16 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
if (!mddev->pers || !mddev->pers->error_handler) if (!mddev->pers || !mddev->pers->error_handler)
return; return;
mddev->pers->error_handler(mddev,rdev); mddev->pers->error_handler(mddev, rdev);
if (mddev->degraded)
if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
sysfs_notify_dirent_safe(rdev->sysfs_state); sysfs_notify_dirent_safe(rdev->sysfs_state);
set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_INTR, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); if (!test_bit(MD_BROKEN, &mddev->flags)) {
md_wakeup_thread(mddev->thread); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
}
if (mddev->event_work.func) if (mddev->event_work.func)
queue_work(md_misc_wq, &mddev->event_work); queue_work(md_misc_wq, &mddev->event_work);
md_new_event(); md_new_event();
...@@ -9670,7 +9675,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) ...@@ -9670,7 +9675,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]); role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
if (test_bit(Candidate, &rdev2->flags)) { if (test_bit(Candidate, &rdev2->flags)) {
if (role == 0xfffe) { if (role == MD_DISK_ROLE_FAULTY) {
pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b)); pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
md_kick_rdev_from_array(rdev2); md_kick_rdev_from_array(rdev2);
continue; continue;
...@@ -9683,7 +9688,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) ...@@ -9683,7 +9688,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
/* /*
* got activated except reshape is happening. * got activated except reshape is happening.
*/ */
if (rdev2->raid_disk == -1 && role != 0xffff && if (rdev2->raid_disk == -1 && role != MD_DISK_ROLE_SPARE &&
!(le32_to_cpu(sb->feature_map) & !(le32_to_cpu(sb->feature_map) &
MD_FEATURE_RESHAPE_ACTIVE)) { MD_FEATURE_RESHAPE_ACTIVE)) {
rdev2->saved_raid_disk = role; rdev2->saved_raid_disk = role;
...@@ -9700,7 +9705,8 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) ...@@ -9700,7 +9705,8 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
* as faulty. The recovery is performed by the * as faulty. The recovery is performed by the
* one who initiated the error. * one who initiated the error.
*/ */
if ((role == 0xfffe) || (role == 0xfffd)) { if (role == MD_DISK_ROLE_FAULTY ||
role == MD_DISK_ROLE_JOURNAL) {
md_error(mddev, rdev2); md_error(mddev, rdev2);
clear_bit(Blocked, &rdev2->flags); clear_bit(Blocked, &rdev2->flags);
} }
...@@ -9790,16 +9796,18 @@ static int read_rdev(struct mddev *mddev, struct md_rdev *rdev) ...@@ -9790,16 +9796,18 @@ static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
void md_reload_sb(struct mddev *mddev, int nr) void md_reload_sb(struct mddev *mddev, int nr)
{ {
struct md_rdev *rdev; struct md_rdev *rdev = NULL, *iter;
int err; int err;
/* Find the rdev */ /* Find the rdev */
rdev_for_each_rcu(rdev, mddev) { rdev_for_each_rcu(iter, mddev) {
if (rdev->desc_nr == nr) if (iter->desc_nr == nr) {
rdev = iter;
break; break;
}
} }
if (!rdev || rdev->desc_nr != nr) { if (!rdev) {
pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr); pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
return; return;
} }
......
...@@ -234,34 +234,42 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors, ...@@ -234,34 +234,42 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
int is_new); int is_new);
struct md_cluster_info; struct md_cluster_info;
/* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added */ /**
* enum mddev_flags - md device flags.
* @MD_ARRAY_FIRST_USE: First use of array, needs initialization.
* @MD_CLOSING: If set, we are closing the array, do not open it then.
* @MD_JOURNAL_CLEAN: A raid with journal is already clean.
* @MD_HAS_JOURNAL: The raid array has journal feature set.
* @MD_CLUSTER_RESYNC_LOCKED: cluster raid only, which means node, already took
* resync lock, need to release the lock.
* @MD_FAILFAST_SUPPORTED: Using MD_FAILFAST on metadata writes is supported as
* calls to md_error() will never cause the array to
* become failed.
* @MD_HAS_PPL: The raid array has PPL feature set.
* @MD_HAS_MULTIPLE_PPLS: The raid array has multiple PPLs feature set.
* @MD_ALLOW_SB_UPDATE: md_check_recovery is allowed to update the metadata
* without taking reconfig_mutex.
* @MD_UPDATING_SB: md_check_recovery is updating the metadata without
* explicitly holding reconfig_mutex.
* @MD_NOT_READY: do_md_run() is active, so 'array_state', ust not report that
* array is ready yet.
* @MD_BROKEN: This is used to stop writes and mark array as failed.
*
* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added
*/
enum mddev_flags { enum mddev_flags {
MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */ MD_ARRAY_FIRST_USE,
MD_CLOSING, /* If set, we are closing the array, do not open MD_CLOSING,
* it then */ MD_JOURNAL_CLEAN,
MD_JOURNAL_CLEAN, /* A raid with journal is already clean */ MD_HAS_JOURNAL,
MD_HAS_JOURNAL, /* The raid array has journal feature set */ MD_CLUSTER_RESYNC_LOCKED,
MD_CLUSTER_RESYNC_LOCKED, /* cluster raid only, which means node MD_FAILFAST_SUPPORTED,
* already took resync lock, need to MD_HAS_PPL,
* release the lock */ MD_HAS_MULTIPLE_PPLS,
MD_FAILFAST_SUPPORTED, /* Using MD_FAILFAST on metadata writes is MD_ALLOW_SB_UPDATE,
* supported as calls to md_error() will MD_UPDATING_SB,
* never cause the array to become failed. MD_NOT_READY,
*/ MD_BROKEN,
MD_HAS_PPL, /* The raid array has PPL feature set */
MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */
MD_ALLOW_SB_UPDATE, /* md_check_recovery is allowed to update
* the metadata without taking reconfig_mutex.
*/
MD_UPDATING_SB, /* md_check_recovery is updating the metadata
* without explicitly holding reconfig_mutex.
*/
MD_NOT_READY, /* do_md_run() is active, so 'array_state'
* must not report that array is ready yet
*/
MD_BROKEN, /* This is used in RAID-0/LINEAR only, to stop
* I/O in case an array member is gone/failed.
*/
}; };
enum mddev_sb_flags { enum mddev_sb_flags {
......
...@@ -128,21 +128,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) ...@@ -128,21 +128,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
pr_debug("md/raid0:%s: FINAL %d zones\n", pr_debug("md/raid0:%s: FINAL %d zones\n",
mdname(mddev), conf->nr_strip_zones); mdname(mddev), conf->nr_strip_zones);
if (conf->nr_strip_zones == 1) {
conf->layout = RAID0_ORIG_LAYOUT;
} else if (mddev->layout == RAID0_ORIG_LAYOUT ||
mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
conf->layout = mddev->layout;
} else if (default_layout == RAID0_ORIG_LAYOUT ||
default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
conf->layout = default_layout;
} else {
pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
mdname(mddev));
pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
err = -ENOTSUPP;
goto abort;
}
/* /*
* now since we have the hard sector sizes, we can make sure * now since we have the hard sector sizes, we can make sure
* chunk size is a multiple of that sector size * chunk size is a multiple of that sector size
...@@ -273,6 +258,22 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) ...@@ -273,6 +258,22 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
(unsigned long long)smallest->sectors); (unsigned long long)smallest->sectors);
} }
if (conf->nr_strip_zones == 1 || conf->strip_zone[1].nb_dev == 1) {
conf->layout = RAID0_ORIG_LAYOUT;
} else if (mddev->layout == RAID0_ORIG_LAYOUT ||
mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
conf->layout = mddev->layout;
} else if (default_layout == RAID0_ORIG_LAYOUT ||
default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
conf->layout = default_layout;
} else {
pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
mdname(mddev));
pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
err = -EOPNOTSUPP;
goto abort;
}
pr_debug("md/raid0:%s: done.\n", mdname(mddev)); pr_debug("md/raid0:%s: done.\n", mdname(mddev));
*private_conf = conf; *private_conf = conf;
......
...@@ -1641,30 +1641,39 @@ static void raid1_status(struct seq_file *seq, struct mddev *mddev) ...@@ -1641,30 +1641,39 @@ static void raid1_status(struct seq_file *seq, struct mddev *mddev)
seq_printf(seq, "]"); seq_printf(seq, "]");
} }
/**
* raid1_error() - RAID1 error handler.
* @mddev: affected md device.
* @rdev: member device to fail.
*
* The routine acknowledges &rdev failure and determines new @mddev state.
* If it failed, then:
* - &MD_BROKEN flag is set in &mddev->flags.
* - recovery is disabled.
* Otherwise, it must be degraded:
* - recovery is interrupted.
* - &mddev->degraded is bumped.
*
* @rdev is marked as &Faulty excluding case when array is failed and
* &mddev->fail_last_dev is off.
*/
static void raid1_error(struct mddev *mddev, struct md_rdev *rdev) static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
{ {
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
struct r1conf *conf = mddev->private; struct r1conf *conf = mddev->private;
unsigned long flags; unsigned long flags;
/*
* If it is not operational, then we have already marked it as dead
* else if it is the last working disks with "fail_last_dev == false",
* ignore the error, let the next level up know.
* else mark the drive as failed
*/
spin_lock_irqsave(&conf->device_lock, flags); spin_lock_irqsave(&conf->device_lock, flags);
if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
&& (conf->raid_disks - mddev->degraded) == 1) { if (test_bit(In_sync, &rdev->flags) &&
/* (conf->raid_disks - mddev->degraded) == 1) {
* Don't fail the drive, act as though we were just a set_bit(MD_BROKEN, &mddev->flags);
* normal single drive.
* However don't try a recovery from this drive as if (!mddev->fail_last_dev) {
* it is very likely to fail. conf->recovery_disabled = mddev->recovery_disabled;
*/ spin_unlock_irqrestore(&conf->device_lock, flags);
conf->recovery_disabled = mddev->recovery_disabled; return;
spin_unlock_irqrestore(&conf->device_lock, flags); }
return;
} }
set_bit(Blocked, &rdev->flags); set_bit(Blocked, &rdev->flags);
if (test_and_clear_bit(In_sync, &rdev->flags)) if (test_and_clear_bit(In_sync, &rdev->flags))
......
...@@ -1970,32 +1970,40 @@ static int enough(struct r10conf *conf, int ignore) ...@@ -1970,32 +1970,40 @@ static int enough(struct r10conf *conf, int ignore)
_enough(conf, 1, ignore); _enough(conf, 1, ignore);
} }
/**
* raid10_error() - RAID10 error handler.
* @mddev: affected md device.
* @rdev: member device to fail.
*
* The routine acknowledges &rdev failure and determines new @mddev state.
* If it failed, then:
* - &MD_BROKEN flag is set in &mddev->flags.
* Otherwise, it must be degraded:
* - recovery is interrupted.
* - &mddev->degraded is bumped.
* @rdev is marked as &Faulty excluding case when array is failed and
* &mddev->fail_last_dev is off.
*/
static void raid10_error(struct mddev *mddev, struct md_rdev *rdev) static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
{ {
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
struct r10conf *conf = mddev->private; struct r10conf *conf = mddev->private;
unsigned long flags; unsigned long flags;
/*
* If it is not operational, then we have already marked it as dead
* else if it is the last working disks with "fail_last_dev == false",
* ignore the error, let the next level up know.
* else mark the drive as failed
*/
spin_lock_irqsave(&conf->device_lock, flags); spin_lock_irqsave(&conf->device_lock, flags);
if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
&& !enough(conf, rdev->raid_disk)) { if (test_bit(In_sync, &rdev->flags) && !enough(conf, rdev->raid_disk)) {
/* set_bit(MD_BROKEN, &mddev->flags);
* Don't fail the drive, just return an IO error.
*/ if (!mddev->fail_last_dev) {
spin_unlock_irqrestore(&conf->device_lock, flags); spin_unlock_irqrestore(&conf->device_lock, flags);
return; return;
}
} }
if (test_and_clear_bit(In_sync, &rdev->flags)) if (test_and_clear_bit(In_sync, &rdev->flags))
mddev->degraded++; mddev->degraded++;
/*
* If recovery is running, make sure it aborts.
*/
set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_INTR, &mddev->recovery);
set_bit(Blocked, &rdev->flags); set_bit(Blocked, &rdev->flags);
set_bit(Faulty, &rdev->flags); set_bit(Faulty, &rdev->flags);
......
...@@ -883,7 +883,9 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e, ...@@ -883,7 +883,9 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
(unsigned long long)r_sector, dd_idx, (unsigned long long)r_sector, dd_idx,
(unsigned long long)sector); (unsigned long long)sector);
rdev = conf->disks[dd_idx].rdev; /* Array has not started so rcu dereference is safe */
rdev = rcu_dereference_protected(
conf->disks[dd_idx].rdev, 1);
if (!rdev || (!test_bit(In_sync, &rdev->flags) && if (!rdev || (!test_bit(In_sync, &rdev->flags) &&
sector >= rdev->recovery_offset)) { sector >= rdev->recovery_offset)) {
pr_debug("%s:%*s data member disk %d missing\n", pr_debug("%s:%*s data member disk %d missing\n",
...@@ -934,7 +936,10 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e, ...@@ -934,7 +936,10 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
parity_sector = raid5_compute_sector(conf, r_sector_first + i, parity_sector = raid5_compute_sector(conf, r_sector_first + i,
0, &disk, &sh); 0, &disk, &sh);
BUG_ON(sh.pd_idx != le32_to_cpu(e->parity_disk)); BUG_ON(sh.pd_idx != le32_to_cpu(e->parity_disk));
parity_rdev = conf->disks[sh.pd_idx].rdev;
/* Array has not started so rcu dereference is safe */
parity_rdev = rcu_dereference_protected(
conf->disks[sh.pd_idx].rdev, 1);
BUG_ON(parity_rdev->bdev->bd_dev != log->rdev->bdev->bd_dev); BUG_ON(parity_rdev->bdev->bd_dev != log->rdev->bdev->bd_dev);
pr_debug("%s:%*s write parity at sector %llu, disk %s\n", pr_debug("%s:%*s write parity at sector %llu, disk %s\n",
...@@ -1404,7 +1409,9 @@ int ppl_init_log(struct r5conf *conf) ...@@ -1404,7 +1409,9 @@ int ppl_init_log(struct r5conf *conf)
for (i = 0; i < ppl_conf->count; i++) { for (i = 0; i < ppl_conf->count; i++) {
struct ppl_log *log = &ppl_conf->child_logs[i]; struct ppl_log *log = &ppl_conf->child_logs[i];
struct md_rdev *rdev = conf->disks[i].rdev; /* Array has not started so rcu dereference is safe */
struct md_rdev *rdev =
rcu_dereference_protected(conf->disks[i].rdev, 1);
mutex_init(&log->io_mutex); mutex_init(&log->io_mutex);
spin_lock_init(&log->io_list_lock); spin_lock_init(&log->io_list_lock);
......
This diff is collapsed.
...@@ -473,7 +473,8 @@ enum { ...@@ -473,7 +473,8 @@ enum {
*/ */
struct disk_info { struct disk_info {
struct md_rdev *rdev, *replacement; struct md_rdev __rcu *rdev;
struct md_rdev __rcu *replacement;
struct page *extra_page; /* extra page to use in prexor */ struct page *extra_page; /* extra page to use in prexor */
}; };
...@@ -560,6 +561,16 @@ struct r5pending_data { ...@@ -560,6 +561,16 @@ struct r5pending_data {
struct bio_list bios; struct bio_list bios;
}; };
struct raid5_percpu {
struct page *spare_page; /* Used when checking P/Q in raid6 */
void *scribble; /* space for constructing buffer
* lists and performing address
* conversions
*/
int scribble_obj_size;
local_lock_t lock;
};
struct r5conf { struct r5conf {
struct hlist_head *stripe_hashtbl; struct hlist_head *stripe_hashtbl;
/* only protect corresponding hash list and inactive_list */ /* only protect corresponding hash list and inactive_list */
...@@ -635,15 +646,7 @@ struct r5conf { ...@@ -635,15 +646,7 @@ struct r5conf {
*/ */
int recovery_disabled; int recovery_disabled;
/* per cpu variables */ /* per cpu variables */
struct raid5_percpu { struct raid5_percpu __percpu *percpu;
struct page *spare_page; /* Used when checking P/Q in raid6 */
void *scribble; /* space for constructing buffer
* lists and performing address
* conversions
*/
int scribble_obj_size;
local_lock_t lock;
} __percpu *percpu;
int scribble_disks; int scribble_disks;
int scribble_sectors; int scribble_sectors;
struct hlist_node node; struct hlist_node node;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment