Commit 697a067f authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'md-3.11' of git://neil.brown.name/md

Pull md updates from NeilBrown:
 "Mostly fixes, with a few minor features (eg 'last_sync_action' sysfs
  file)

  A couple marked for -stable including one recent bug which causes a
  RAID10 reshape to complete without moving any data :-(

  A couple more bugfixes (at least) to come, but haven't confirmed the
  right solution yet."

* tag 'md-3.11' of git://neil.brown.name/md:
  md/raid10: fix bug which causes all RAID10 reshapes to move no data.
  md/raid5: allow 5-device RAID6 to be reshaped to 4-device.
  md/raid10: fix two bugs affecting RAID10 reshape.
  md: remove doubled description for sync_max, merging it within sync_min/sync_max
  MD: Remember the last sync operation that was performed
  md: fix buglet in RAID5 -> RAID0 conversion.
  md/raid10: check In_sync flag in 'enough()'.
  md/raid10: locking changes for 'enough()'.
  md: replace strict_strto*() with kstrto*()
  md: Wait for md_check_recovery before attempting device removal.
  dm-raid: silence compiler warning on rebuilds_per_group.
  DM RAID: Fix raid_resume not reviving failed devices in all cases
  DM RAID: Break-up untidy function
  DM RAID: Add ability to restore transiently failed devices on resume
parents e61aca51 13765120
...@@ -222,3 +222,5 @@ Version History ...@@ -222,3 +222,5 @@ Version History
1.4.2 Add RAID10 "far" and "offset" algorithm support. 1.4.2 Add RAID10 "far" and "offset" algorithm support.
1.5.0 Add message interface to allow manipulation of the sync_action. 1.5.0 Add message interface to allow manipulation of the sync_action.
New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt. New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt.
1.5.1 Add ability to restore transiently failed devices on resume.
1.5.2 'mismatch_cnt' is zero unless [last_]sync_action is "check".
...@@ -566,13 +566,6 @@ also have ...@@ -566,13 +566,6 @@ also have
when it reaches the current sync_max (below) and possibly at when it reaches the current sync_max (below) and possibly at
other times. other times.
sync_max
This is a number of sectors at which point a resync/recovery
process will pause. When a resync is active, the value can
only ever be increased, never decreased. The value of 'max'
effectively disables the limit.
sync_speed sync_speed
This shows the current actual speed, in K/sec, of the current This shows the current actual speed, in K/sec, of the current
sync_action. It is averaged over the last 30 seconds. sync_action. It is averaged over the last 30 seconds.
...@@ -593,6 +586,12 @@ also have ...@@ -593,6 +586,12 @@ also have
that number to reach sync_max. Then you can either increase that number to reach sync_max. Then you can either increase
"sync_max", or can write 'idle' to "sync_action". "sync_max", or can write 'idle' to "sync_action".
The value of 'max' for "sync_max" effectively disables the limit.
When a resync is active, the value can only ever be increased,
never decreased.
The value of '0' is the minimum for "sync_min".
Each active md device may also have attributes specific to the Each active md device may also have attributes specific to the
personality module that manages it. personality module that manages it.
......
...@@ -2002,9 +2002,9 @@ location_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -2002,9 +2002,9 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
} else { } else {
int rv; int rv;
if (buf[0] == '+') if (buf[0] == '+')
rv = strict_strtoll(buf+1, 10, &offset); rv = kstrtoll(buf+1, 10, &offset);
else else
rv = strict_strtoll(buf, 10, &offset); rv = kstrtoll(buf, 10, &offset);
if (rv) if (rv)
return rv; return rv;
if (offset == 0) if (offset == 0)
...@@ -2139,7 +2139,7 @@ static ssize_t ...@@ -2139,7 +2139,7 @@ static ssize_t
backlog_store(struct mddev *mddev, const char *buf, size_t len) backlog_store(struct mddev *mddev, const char *buf, size_t len)
{ {
unsigned long backlog; unsigned long backlog;
int rv = strict_strtoul(buf, 10, &backlog); int rv = kstrtoul(buf, 10, &backlog);
if (rv) if (rv)
return rv; return rv;
if (backlog > COUNTER_MAX) if (backlog > COUNTER_MAX)
...@@ -2165,7 +2165,7 @@ chunksize_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -2165,7 +2165,7 @@ chunksize_store(struct mddev *mddev, const char *buf, size_t len)
unsigned long csize; unsigned long csize;
if (mddev->bitmap) if (mddev->bitmap)
return -EBUSY; return -EBUSY;
rv = strict_strtoul(buf, 10, &csize); rv = kstrtoul(buf, 10, &csize);
if (rv) if (rv)
return rv; return rv;
if (csize < 512 || if (csize < 512 ||
......
...@@ -380,7 +380,7 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) ...@@ -380,7 +380,7 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size)
static int validate_raid_redundancy(struct raid_set *rs) static int validate_raid_redundancy(struct raid_set *rs)
{ {
unsigned i, rebuild_cnt = 0; unsigned i, rebuild_cnt = 0;
unsigned rebuilds_per_group, copies, d; unsigned rebuilds_per_group = 0, copies, d;
unsigned group_size, last_group_start; unsigned group_size, last_group_start;
for (i = 0; i < rs->md.raid_disks; i++) for (i = 0; i < rs->md.raid_disks; i++)
...@@ -504,7 +504,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv, ...@@ -504,7 +504,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
* First, parse the in-order required arguments * First, parse the in-order required arguments
* "chunk_size" is the only argument of this type. * "chunk_size" is the only argument of this type.
*/ */
if ((strict_strtoul(argv[0], 10, &value) < 0)) { if ((kstrtoul(argv[0], 10, &value) < 0)) {
rs->ti->error = "Bad chunk size"; rs->ti->error = "Bad chunk size";
return -EINVAL; return -EINVAL;
} else if (rs->raid_type->level == 1) { } else if (rs->raid_type->level == 1) {
...@@ -585,7 +585,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv, ...@@ -585,7 +585,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
continue; continue;
} }
if (strict_strtoul(argv[i], 10, &value) < 0) { if (kstrtoul(argv[i], 10, &value) < 0) {
rs->ti->error = "Bad numerical argument given in raid params"; rs->ti->error = "Bad numerical argument given in raid params";
return -EINVAL; return -EINVAL;
} }
...@@ -1181,7 +1181,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) ...@@ -1181,7 +1181,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
argv++; argv++;
/* number of RAID parameters */ /* number of RAID parameters */
if (strict_strtoul(argv[0], 10, &num_raid_params) < 0) { if (kstrtoul(argv[0], 10, &num_raid_params) < 0) {
ti->error = "Cannot understand number of RAID parameters"; ti->error = "Cannot understand number of RAID parameters";
return -EINVAL; return -EINVAL;
} }
...@@ -1194,7 +1194,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) ...@@ -1194,7 +1194,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
return -EINVAL; return -EINVAL;
} }
if ((strict_strtoul(argv[num_raid_params], 10, &num_raid_devs) < 0) || if ((kstrtoul(argv[num_raid_params], 10, &num_raid_devs) < 0) ||
(num_raid_devs >= INT_MAX)) { (num_raid_devs >= INT_MAX)) {
ti->error = "Cannot understand number of raid devices"; ti->error = "Cannot understand number of raid devices";
return -EINVAL; return -EINVAL;
...@@ -1388,6 +1388,7 @@ static void raid_status(struct dm_target *ti, status_type_t type, ...@@ -1388,6 +1388,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
* performing a "check" of the array. * performing a "check" of the array.
*/ */
DMEMIT(" %llu", DMEMIT(" %llu",
(strcmp(rs->md.last_sync_action, "check")) ? 0 :
(unsigned long long) (unsigned long long)
atomic64_read(&rs->md.resync_mismatches)); atomic64_read(&rs->md.resync_mismatches));
break; break;
...@@ -1572,6 +1573,62 @@ static void raid_postsuspend(struct dm_target *ti) ...@@ -1572,6 +1573,62 @@ static void raid_postsuspend(struct dm_target *ti)
mddev_suspend(&rs->md); mddev_suspend(&rs->md);
} }
static void attempt_restore_of_faulty_devices(struct raid_set *rs)
{
int i;
uint64_t failed_devices, cleared_failed_devices = 0;
unsigned long flags;
struct dm_raid_superblock *sb;
struct md_rdev *r;
for (i = 0; i < rs->md.raid_disks; i++) {
r = &rs->dev[i].rdev;
if (test_bit(Faulty, &r->flags) && r->sb_page &&
sync_page_io(r, 0, r->sb_size, r->sb_page, READ, 1)) {
DMINFO("Faulty %s device #%d has readable super block."
" Attempting to revive it.",
rs->raid_type->name, i);
/*
* Faulty bit may be set, but sometimes the array can
* be suspended before the personalities can respond
* by removing the device from the array (i.e. calling
* 'hot_remove_disk'). If they haven't yet removed
* the failed device, its 'raid_disk' number will be
* '>= 0' - meaning we must call this function
* ourselves.
*/
if ((r->raid_disk >= 0) &&
(r->mddev->pers->hot_remove_disk(r->mddev, r) != 0))
/* Failed to revive this device, try next */
continue;
r->raid_disk = i;
r->saved_raid_disk = i;
flags = r->flags;
clear_bit(Faulty, &r->flags);
clear_bit(WriteErrorSeen, &r->flags);
clear_bit(In_sync, &r->flags);
if (r->mddev->pers->hot_add_disk(r->mddev, r)) {
r->raid_disk = -1;
r->saved_raid_disk = -1;
r->flags = flags;
} else {
r->recovery_offset = 0;
cleared_failed_devices |= 1 << i;
}
}
}
if (cleared_failed_devices) {
rdev_for_each(r, &rs->md) {
sb = page_address(r->sb_page);
failed_devices = le64_to_cpu(sb->failed_devices);
failed_devices &= ~cleared_failed_devices;
sb->failed_devices = cpu_to_le64(failed_devices);
}
}
}
static void raid_resume(struct dm_target *ti) static void raid_resume(struct dm_target *ti)
{ {
struct raid_set *rs = ti->private; struct raid_set *rs = ti->private;
...@@ -1580,6 +1637,13 @@ static void raid_resume(struct dm_target *ti) ...@@ -1580,6 +1637,13 @@ static void raid_resume(struct dm_target *ti)
if (!rs->bitmap_loaded) { if (!rs->bitmap_loaded) {
bitmap_load(&rs->md); bitmap_load(&rs->md);
rs->bitmap_loaded = 1; rs->bitmap_loaded = 1;
} else {
/*
* A secondary resume while the device is active.
* Take this opportunity to check whether any failed
* devices are reachable again.
*/
attempt_restore_of_faulty_devices(rs);
} }
clear_bit(MD_RECOVERY_FROZEN, &rs->md.recovery); clear_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
...@@ -1588,7 +1652,7 @@ static void raid_resume(struct dm_target *ti) ...@@ -1588,7 +1652,7 @@ static void raid_resume(struct dm_target *ti)
static struct target_type raid_target = { static struct target_type raid_target = {
.name = "raid", .name = "raid",
.version = {1, 5, 0}, .version = {1, 5, 2},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = raid_ctr, .ctr = raid_ctr,
.dtr = raid_dtr, .dtr = raid_dtr,
......
...@@ -521,6 +521,7 @@ void mddev_init(struct mddev *mddev) ...@@ -521,6 +521,7 @@ void mddev_init(struct mddev *mddev)
init_waitqueue_head(&mddev->recovery_wait); init_waitqueue_head(&mddev->recovery_wait);
mddev->reshape_position = MaxSector; mddev->reshape_position = MaxSector;
mddev->reshape_backwards = 0; mddev->reshape_backwards = 0;
mddev->last_sync_action = "none";
mddev->resync_min = 0; mddev->resync_min = 0;
mddev->resync_max = MaxSector; mddev->resync_max = MaxSector;
mddev->level = LEVEL_NONE; mddev->level = LEVEL_NONE;
...@@ -2867,7 +2868,7 @@ static ssize_t ...@@ -2867,7 +2868,7 @@ static ssize_t
offset_store(struct md_rdev *rdev, const char *buf, size_t len) offset_store(struct md_rdev *rdev, const char *buf, size_t len)
{ {
unsigned long long offset; unsigned long long offset;
if (strict_strtoull(buf, 10, &offset) < 0) if (kstrtoull(buf, 10, &offset) < 0)
return -EINVAL; return -EINVAL;
if (rdev->mddev->pers && rdev->raid_disk >= 0) if (rdev->mddev->pers && rdev->raid_disk >= 0)
return -EBUSY; return -EBUSY;
...@@ -2895,7 +2896,7 @@ static ssize_t new_offset_store(struct md_rdev *rdev, ...@@ -2895,7 +2896,7 @@ static ssize_t new_offset_store(struct md_rdev *rdev,
unsigned long long new_offset; unsigned long long new_offset;
struct mddev *mddev = rdev->mddev; struct mddev *mddev = rdev->mddev;
if (strict_strtoull(buf, 10, &new_offset) < 0) if (kstrtoull(buf, 10, &new_offset) < 0)
return -EINVAL; return -EINVAL;
if (mddev->sync_thread) if (mddev->sync_thread)
...@@ -2961,7 +2962,7 @@ static int strict_blocks_to_sectors(const char *buf, sector_t *sectors) ...@@ -2961,7 +2962,7 @@ static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
unsigned long long blocks; unsigned long long blocks;
sector_t new; sector_t new;
if (strict_strtoull(buf, 10, &blocks) < 0) if (kstrtoull(buf, 10, &blocks) < 0)
return -EINVAL; return -EINVAL;
if (blocks & 1ULL << (8 * sizeof(blocks) - 1)) if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
...@@ -3069,7 +3070,7 @@ static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_ ...@@ -3069,7 +3070,7 @@ static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_
if (cmd_match(buf, "none")) if (cmd_match(buf, "none"))
recovery_start = MaxSector; recovery_start = MaxSector;
else if (strict_strtoull(buf, 10, &recovery_start)) else if (kstrtoull(buf, 10, &recovery_start))
return -EINVAL; return -EINVAL;
if (rdev->mddev->pers && if (rdev->mddev->pers &&
...@@ -3497,7 +3498,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -3497,7 +3498,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
if (clevel[len-1] == '\n') if (clevel[len-1] == '\n')
len--; len--;
clevel[len] = 0; clevel[len] = 0;
if (strict_strtol(clevel, 10, &level)) if (kstrtol(clevel, 10, &level))
level = LEVEL_NONE; level = LEVEL_NONE;
if (request_module("md-%s", clevel) != 0) if (request_module("md-%s", clevel) != 0)
...@@ -4272,6 +4273,17 @@ action_store(struct mddev *mddev, const char *page, size_t len) ...@@ -4272,6 +4273,17 @@ action_store(struct mddev *mddev, const char *page, size_t len)
return len; return len;
} }
static struct md_sysfs_entry md_scan_mode =
__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
static ssize_t
last_sync_action_show(struct mddev *mddev, char *page)
{
return sprintf(page, "%s\n", mddev->last_sync_action);
}
static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
static ssize_t static ssize_t
mismatch_cnt_show(struct mddev *mddev, char *page) mismatch_cnt_show(struct mddev *mddev, char *page)
{ {
...@@ -4280,10 +4292,6 @@ mismatch_cnt_show(struct mddev *mddev, char *page) ...@@ -4280,10 +4292,6 @@ mismatch_cnt_show(struct mddev *mddev, char *page)
atomic64_read(&mddev->resync_mismatches)); atomic64_read(&mddev->resync_mismatches));
} }
static struct md_sysfs_entry md_scan_mode =
__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt); static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
static ssize_t static ssize_t
...@@ -4356,7 +4364,7 @@ sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -4356,7 +4364,7 @@ sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
{ {
long n; long n;
if (strict_strtol(buf, 10, &n)) if (kstrtol(buf, 10, &n))
return -EINVAL; return -EINVAL;
if (n != 0 && n != 1) if (n != 0 && n != 1)
...@@ -4424,7 +4432,7 @@ static ssize_t ...@@ -4424,7 +4432,7 @@ static ssize_t
min_sync_store(struct mddev *mddev, const char *buf, size_t len) min_sync_store(struct mddev *mddev, const char *buf, size_t len)
{ {
unsigned long long min; unsigned long long min;
if (strict_strtoull(buf, 10, &min)) if (kstrtoull(buf, 10, &min))
return -EINVAL; return -EINVAL;
if (min > mddev->resync_max) if (min > mddev->resync_max)
return -EINVAL; return -EINVAL;
...@@ -4461,7 +4469,7 @@ max_sync_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -4461,7 +4469,7 @@ max_sync_store(struct mddev *mddev, const char *buf, size_t len)
mddev->resync_max = MaxSector; mddev->resync_max = MaxSector;
else { else {
unsigned long long max; unsigned long long max;
if (strict_strtoull(buf, 10, &max)) if (kstrtoull(buf, 10, &max))
return -EINVAL; return -EINVAL;
if (max < mddev->resync_min) if (max < mddev->resync_min)
return -EINVAL; return -EINVAL;
...@@ -4686,6 +4694,7 @@ static struct attribute *md_default_attrs[] = { ...@@ -4686,6 +4694,7 @@ static struct attribute *md_default_attrs[] = {
static struct attribute *md_redundancy_attrs[] = { static struct attribute *md_redundancy_attrs[] = {
&md_scan_mode.attr, &md_scan_mode.attr,
&md_last_scan_mode.attr,
&md_mismatches.attr, &md_mismatches.attr,
&md_sync_min.attr, &md_sync_min.attr,
&md_sync_max.attr, &md_sync_max.attr,
...@@ -6405,6 +6414,12 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, ...@@ -6405,6 +6414,12 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
/* need to ensure md_delayed_delete() has completed */ /* need to ensure md_delayed_delete() has completed */
flush_workqueue(md_misc_wq); flush_workqueue(md_misc_wq);
if (cmd == HOT_REMOVE_DISK)
/* need to ensure recovery thread has run */
wait_event_interruptible_timeout(mddev->sb_wait,
!test_bit(MD_RECOVERY_NEEDED,
&mddev->flags),
msecs_to_jiffies(5000));
err = mddev_lock(mddev); err = mddev_lock(mddev);
if (err) { if (err) {
printk(KERN_INFO printk(KERN_INFO
...@@ -7323,7 +7338,7 @@ void md_do_sync(struct md_thread *thread) ...@@ -7323,7 +7338,7 @@ void md_do_sync(struct md_thread *thread)
sector_t last_check; sector_t last_check;
int skipped = 0; int skipped = 0;
struct md_rdev *rdev; struct md_rdev *rdev;
char *desc; char *desc, *action = NULL;
struct blk_plug plug; struct blk_plug plug;
/* just incase thread restarts... */ /* just incase thread restarts... */
...@@ -7333,17 +7348,21 @@ void md_do_sync(struct md_thread *thread) ...@@ -7333,17 +7348,21 @@ void md_do_sync(struct md_thread *thread)
return; return;
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
desc = "data-check"; desc = "data-check";
else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) action = "check";
} else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
desc = "requested-resync"; desc = "requested-resync";
else action = "repair";
} else
desc = "resync"; desc = "resync";
} else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
desc = "reshape"; desc = "reshape";
else else
desc = "recovery"; desc = "recovery";
mddev->last_sync_action = action ?: desc;
/* we overload curr_resync somewhat here. /* we overload curr_resync somewhat here.
* 0 == not engaged in resync at all * 0 == not engaged in resync at all
* 2 == checking that there is no conflict with another sync * 2 == checking that there is no conflict with another sync
...@@ -7892,6 +7911,8 @@ void md_check_recovery(struct mddev *mddev) ...@@ -7892,6 +7911,8 @@ void md_check_recovery(struct mddev *mddev)
md_new_event(mddev); md_new_event(mddev);
} }
unlock: unlock:
wake_up(&mddev->sb_wait);
if (!mddev->sync_thread) { if (!mddev->sync_thread) {
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
if (test_and_clear_bit(MD_RECOVERY_RECOVER, if (test_and_clear_bit(MD_RECOVERY_RECOVER,
......
...@@ -268,6 +268,14 @@ struct mddev { ...@@ -268,6 +268,14 @@ struct mddev {
struct md_thread *thread; /* management thread */ struct md_thread *thread; /* management thread */
struct md_thread *sync_thread; /* doing resync or reconstruct */ struct md_thread *sync_thread; /* doing resync or reconstruct */
/* 'last_sync_action' is initialized to "none". It is set when a
* sync operation (i.e "data-check", "requested-resync", "resync",
* "recovery", or "reshape") is started. It holds this value even
* when the sync thread is "frozen" (interrupted) or "idle" (stopped
* or finished). It is overwritten when a new sync operation is begun.
*/
char *last_sync_action;
sector_t curr_resync; /* last block scheduled */ sector_t curr_resync; /* last block scheduled */
/* As resync requests can complete out of order, we cannot easily track /* As resync requests can complete out of order, we cannot easily track
* how much resync has been completed. So we occasionally pause until * how much resync has been completed. So we occasionally pause until
......
...@@ -597,6 +597,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev) ...@@ -597,6 +597,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev)
mdname(mddev)); mdname(mddev));
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
rdev->sectors = mddev->dev_sectors;
} }
/* Set new parameters */ /* Set new parameters */
......
...@@ -1519,8 +1519,9 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1519,8 +1519,9 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
p = conf->mirrors+mirror; p = conf->mirrors+mirror;
if (!p->rdev) { if (!p->rdev) {
disk_stack_limits(mddev->gendisk, rdev->bdev, if (mddev->gendisk)
rdev->data_offset << 9); disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
p->head_position = 0; p->head_position = 0;
rdev->raid_disk = mirror; rdev->raid_disk = mirror;
...@@ -1559,7 +1560,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1559,7 +1560,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
clear_bit(Unmerged, &rdev->flags); clear_bit(Unmerged, &rdev->flags);
} }
md_integrity_add_rdev(rdev, mddev); md_integrity_add_rdev(rdev, mddev);
if (blk_queue_discard(bdev_get_queue(rdev->bdev))) if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
print_conf(conf); print_conf(conf);
return err; return err;
......
...@@ -97,7 +97,7 @@ static int max_queued_requests = 1024; ...@@ -97,7 +97,7 @@ static int max_queued_requests = 1024;
static void allow_barrier(struct r10conf *conf); static void allow_barrier(struct r10conf *conf);
static void lower_barrier(struct r10conf *conf); static void lower_barrier(struct r10conf *conf);
static int enough(struct r10conf *conf, int ignore); static int _enough(struct r10conf *conf, int previous, int ignore);
static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
int *skipped); int *skipped);
static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio); static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio);
...@@ -392,11 +392,9 @@ static void raid10_end_read_request(struct bio *bio, int error) ...@@ -392,11 +392,9 @@ static void raid10_end_read_request(struct bio *bio, int error)
* than fail the last device. Here we redefine * than fail the last device. Here we redefine
* "uptodate" to mean "Don't want to retry" * "uptodate" to mean "Don't want to retry"
*/ */
unsigned long flags; if (!_enough(conf, test_bit(R10BIO_Previous, &r10_bio->state),
spin_lock_irqsave(&conf->device_lock, flags); rdev->raid_disk))
if (!enough(conf, rdev->raid_disk))
uptodate = 1; uptodate = 1;
spin_unlock_irqrestore(&conf->device_lock, flags);
} }
if (uptodate) { if (uptodate) {
raid_end_bio_io(r10_bio); raid_end_bio_io(r10_bio);
...@@ -1632,37 +1630,58 @@ static void status(struct seq_file *seq, struct mddev *mddev) ...@@ -1632,37 +1630,58 @@ static void status(struct seq_file *seq, struct mddev *mddev)
* Don't consider the device numbered 'ignore' * Don't consider the device numbered 'ignore'
* as we might be about to remove it. * as we might be about to remove it.
*/ */
static int _enough(struct r10conf *conf, struct geom *geo, int ignore) static int _enough(struct r10conf *conf, int previous, int ignore)
{ {
int first = 0; int first = 0;
int has_enough = 0;
int disks, ncopies;
if (previous) {
disks = conf->prev.raid_disks;
ncopies = conf->prev.near_copies;
} else {
disks = conf->geo.raid_disks;
ncopies = conf->geo.near_copies;
}
rcu_read_lock();
do { do {
int n = conf->copies; int n = conf->copies;
int cnt = 0; int cnt = 0;
int this = first; int this = first;
while (n--) { while (n--) {
if (conf->mirrors[this].rdev && struct md_rdev *rdev;
this != ignore) if (this != ignore &&
(rdev = rcu_dereference(conf->mirrors[this].rdev)) &&
test_bit(In_sync, &rdev->flags))
cnt++; cnt++;
this = (this+1) % geo->raid_disks; this = (this+1) % disks;
} }
if (cnt == 0) if (cnt == 0)
return 0; goto out;
first = (first + geo->near_copies) % geo->raid_disks; first = (first + ncopies) % disks;
} while (first != 0); } while (first != 0);
return 1; has_enough = 1;
out:
rcu_read_unlock();
return has_enough;
} }
static int enough(struct r10conf *conf, int ignore) static int enough(struct r10conf *conf, int ignore)
{ {
return _enough(conf, &conf->geo, ignore) && /* when calling 'enough', both 'prev' and 'geo' must
_enough(conf, &conf->prev, ignore); * be stable.
* This is ensured if ->reconfig_mutex or ->device_lock
* is held.
*/
return _enough(conf, 0, ignore) &&
_enough(conf, 1, ignore);
} }
static void error(struct mddev *mddev, struct md_rdev *rdev) static void error(struct mddev *mddev, struct md_rdev *rdev)
{ {
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
struct r10conf *conf = mddev->private; struct r10conf *conf = mddev->private;
unsigned long flags;
/* /*
* If it is not operational, then we have already marked it as dead * If it is not operational, then we have already marked it as dead
...@@ -1670,18 +1689,18 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1670,18 +1689,18 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
* next level up know. * next level up know.
* else mark the drive as failed * else mark the drive as failed
*/ */
spin_lock_irqsave(&conf->device_lock, flags);
if (test_bit(In_sync, &rdev->flags) if (test_bit(In_sync, &rdev->flags)
&& !enough(conf, rdev->raid_disk)) && !enough(conf, rdev->raid_disk)) {
/* /*
* Don't fail the drive, just return an IO error. * Don't fail the drive, just return an IO error.
*/ */
spin_unlock_irqrestore(&conf->device_lock, flags);
return; return;
}
if (test_and_clear_bit(In_sync, &rdev->flags)) { if (test_and_clear_bit(In_sync, &rdev->flags)) {
unsigned long flags;
spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded++; mddev->degraded++;
spin_unlock_irqrestore(&conf->device_lock, flags); /*
/*
* if recovery is running, make sure it aborts. * if recovery is running, make sure it aborts.
*/ */
set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_INTR, &mddev->recovery);
...@@ -1689,6 +1708,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1689,6 +1708,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
set_bit(Blocked, &rdev->flags); set_bit(Blocked, &rdev->flags);
set_bit(Faulty, &rdev->flags); set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags);
spin_unlock_irqrestore(&conf->device_lock, flags);
printk(KERN_ALERT printk(KERN_ALERT
"md/raid10:%s: Disk failure on %s, disabling device.\n" "md/raid10:%s: Disk failure on %s, disabling device.\n"
"md/raid10:%s: Operation continuing on %d devices.\n", "md/raid10:%s: Operation continuing on %d devices.\n",
...@@ -1791,7 +1811,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1791,7 +1811,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
* very different from resync * very different from resync
*/ */
return -EBUSY; return -EBUSY;
if (rdev->saved_raid_disk < 0 && !_enough(conf, &conf->prev, -1)) if (rdev->saved_raid_disk < 0 && !_enough(conf, 1, -1))
return -EINVAL; return -EINVAL;
if (rdev->raid_disk >= 0) if (rdev->raid_disk >= 0)
...@@ -1819,15 +1839,17 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1819,15 +1839,17 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
set_bit(Replacement, &rdev->flags); set_bit(Replacement, &rdev->flags);
rdev->raid_disk = mirror; rdev->raid_disk = mirror;
err = 0; err = 0;
disk_stack_limits(mddev->gendisk, rdev->bdev, if (mddev->gendisk)
rdev->data_offset << 9); disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
conf->fullsync = 1; conf->fullsync = 1;
rcu_assign_pointer(p->replacement, rdev); rcu_assign_pointer(p->replacement, rdev);
break; break;
} }
disk_stack_limits(mddev->gendisk, rdev->bdev, if (mddev->gendisk)
rdev->data_offset << 9); disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
p->head_position = 0; p->head_position = 0;
p->recovery_disabled = mddev->recovery_disabled - 1; p->recovery_disabled = mddev->recovery_disabled - 1;
...@@ -2909,14 +2931,13 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -2909,14 +2931,13 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
*/ */
if (mddev->bitmap == NULL && if (mddev->bitmap == NULL &&
mddev->recovery_cp == MaxSector && mddev->recovery_cp == MaxSector &&
mddev->reshape_position == MaxSector &&
!test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
conf->fullsync == 0) { conf->fullsync == 0) {
*skipped = 1; *skipped = 1;
max_sector = mddev->dev_sectors; return mddev->dev_sectors - sector_nr;
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
max_sector = mddev->resync_max_sectors;
return max_sector - sector_nr;
} }
skipped: skipped:
...@@ -3532,7 +3553,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) ...@@ -3532,7 +3553,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
/* FIXME calc properly */ /* FIXME calc properly */
conf->mirrors = kzalloc(sizeof(struct raid10_info)*(mddev->raid_disks + conf->mirrors = kzalloc(sizeof(struct raid10_info)*(mddev->raid_disks +
max(0,mddev->delta_disks)), max(0,-mddev->delta_disks)),
GFP_KERNEL); GFP_KERNEL);
if (!conf->mirrors) if (!conf->mirrors)
goto out; goto out;
...@@ -3691,7 +3712,7 @@ static int run(struct mddev *mddev) ...@@ -3691,7 +3712,7 @@ static int run(struct mddev *mddev)
conf->geo.far_offset == 0) conf->geo.far_offset == 0)
goto out_free_conf; goto out_free_conf;
if (conf->prev.far_copies != 1 && if (conf->prev.far_copies != 1 &&
conf->geo.far_offset == 0) conf->prev.far_offset == 0)
goto out_free_conf; goto out_free_conf;
} }
......
...@@ -4924,7 +4924,7 @@ raid5_store_stripe_cache_size(struct mddev *mddev, const char *page, size_t len) ...@@ -4924,7 +4924,7 @@ raid5_store_stripe_cache_size(struct mddev *mddev, const char *page, size_t len)
if (!conf) if (!conf)
return -ENODEV; return -ENODEV;
if (strict_strtoul(page, 10, &new)) if (kstrtoul(page, 10, &new))
return -EINVAL; return -EINVAL;
err = raid5_set_cache_size(mddev, new); err = raid5_set_cache_size(mddev, new);
if (err) if (err)
...@@ -4957,7 +4957,7 @@ raid5_store_preread_threshold(struct mddev *mddev, const char *page, size_t len) ...@@ -4957,7 +4957,7 @@ raid5_store_preread_threshold(struct mddev *mddev, const char *page, size_t len)
if (!conf) if (!conf)
return -ENODEV; return -ENODEV;
if (strict_strtoul(page, 10, &new)) if (kstrtoul(page, 10, &new))
return -EINVAL; return -EINVAL;
if (new > conf->max_nr_stripes) if (new > conf->max_nr_stripes)
return -EINVAL; return -EINVAL;
...@@ -5914,7 +5914,7 @@ static int check_reshape(struct mddev *mddev) ...@@ -5914,7 +5914,7 @@ static int check_reshape(struct mddev *mddev)
return 0; /* nothing to do */ return 0; /* nothing to do */
if (has_failed(conf)) if (has_failed(conf))
return -EINVAL; return -EINVAL;
if (mddev->delta_disks < 0) { if (mddev->delta_disks < 0 && mddev->reshape_position == MaxSector) {
/* We might be able to shrink, but the devices must /* We might be able to shrink, but the devices must
* be made bigger first. * be made bigger first.
* For raid6, 4 is the minimum size. * For raid6, 4 is the minimum size.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment