Commit 9a567843 authored by Guoqing Jiang's avatar Guoqing Jiang Committed by Song Liu

md: allow last device to be forcibly removed from RAID1/RAID10.

When the 'last' device in a RAID1 or RAID10 reports an error,
we do not mark it as failed.  This would serve little purpose
as there is no risk of losing data beyond that which is obviously
lost (as there is with RAID5), and there could be other sectors
on the device which are readable, and only readable from this device.
This in general this maximises access to data.

However the current implementation also stops an admin from removing
the last device by direct action.  This is rarely useful, but in many
case is not harmful and can make automation easier by removing special
cases.

Also, if an attempt to write metadata fails the device must be marked
as faulty, else an infinite loop will result, attempting to update
the metadata on all non-faulty devices.

So add 'fail_last_dev' member to 'struct mddev', then we can bypasses
the 'last disk' checks for RAID1 and RAID10, and control the behavior
per array by change sysfs node.
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
[add sysfs node for fail_last_dev by Guoqing]
Signed-off-by: default avatarGuoqing Jiang <guoqing.jiang@cloud.ionos.com>
Signed-off-by: default avatarSong Liu <songliubraving@fb.com>
parent cf891607
...@@ -5178,6 +5178,34 @@ static struct md_sysfs_entry md_consistency_policy = ...@@ -5178,6 +5178,34 @@ static struct md_sysfs_entry md_consistency_policy =
__ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show, __ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show,
consistency_policy_store); consistency_policy_store);
static ssize_t fail_last_dev_show(struct mddev *mddev, char *page)
{
return sprintf(page, "%d\n", mddev->fail_last_dev);
}
/*
* Setting fail_last_dev to true to allow last device to be forcibly removed
* from RAID1/RAID10.
*/
static ssize_t
fail_last_dev_store(struct mddev *mddev, const char *buf, size_t len)
{
int ret;
bool value;
ret = kstrtobool(buf, &value);
if (ret)
return ret;
if (value != mddev->fail_last_dev)
mddev->fail_last_dev = value;
return len;
}
static struct md_sysfs_entry md_fail_last_dev =
__ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
fail_last_dev_store);
static struct attribute *md_default_attrs[] = { static struct attribute *md_default_attrs[] = {
&md_level.attr, &md_level.attr,
&md_layout.attr, &md_layout.attr,
...@@ -5194,6 +5222,7 @@ static struct attribute *md_default_attrs[] = { ...@@ -5194,6 +5222,7 @@ static struct attribute *md_default_attrs[] = {
&md_array_size.attr, &md_array_size.attr,
&max_corr_read_errors.attr, &max_corr_read_errors.attr,
&md_consistency_policy.attr, &md_consistency_policy.attr,
&md_fail_last_dev.attr,
NULL, NULL,
}; };
......
...@@ -487,6 +487,7 @@ struct mddev { ...@@ -487,6 +487,7 @@ struct mddev {
unsigned int good_device_nr; /* good device num within cluster raid */ unsigned int good_device_nr; /* good device num within cluster raid */
bool has_superblocks:1; bool has_superblocks:1;
bool fail_last_dev:1;
}; };
enum recovery_flags { enum recovery_flags {
......
...@@ -1617,12 +1617,12 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1617,12 +1617,12 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
/* /*
* If it is not operational, then we have already marked it as dead * If it is not operational, then we have already marked it as dead
* else if it is the last working disks, ignore the error, let the * else if it is the last working disks with "fail_last_dev == false",
* next level up know. * ignore the error, let the next level up know.
* else mark the drive as failed * else mark the drive as failed
*/ */
spin_lock_irqsave(&conf->device_lock, flags); spin_lock_irqsave(&conf->device_lock, flags);
if (test_bit(In_sync, &rdev->flags) if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
&& (conf->raid_disks - mddev->degraded) == 1) { && (conf->raid_disks - mddev->degraded) == 1) {
/* /*
* Don't fail the drive, act as though we were just a * Don't fail the drive, act as though we were just a
......
...@@ -1640,12 +1640,12 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1640,12 +1640,12 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
/* /*
* If it is not operational, then we have already marked it as dead * If it is not operational, then we have already marked it as dead
* else if it is the last working disks, ignore the error, let the * else if it is the last working disks with "fail_last_dev == false",
* next level up know. * ignore the error, let the next level up know.
* else mark the drive as failed * else mark the drive as failed
*/ */
spin_lock_irqsave(&conf->device_lock, flags); spin_lock_irqsave(&conf->device_lock, flags);
if (test_bit(In_sync, &rdev->flags) if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
&& !enough(conf, rdev->raid_disk)) { && !enough(conf, rdev->raid_disk)) {
/* /*
* Don't fail the drive, just return an IO error. * Don't fail the drive, just return an IO error.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment