Commit 41158c7e authored by NeilBrown's avatar NeilBrown Committed by Linus Torvalds

[PATCH] md: optimise reconstruction when re-adding a recently failed drive.

When an array is degraded, bit in the intent-bitmap are never cleared.  So if
a recently failed drive is re-added, we only need to reconstruct the block
that are still reflected in the bitmap.

This patch adds support for this re-adding.
Signed-off-by: default avatarNeil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 289e99e8
...@@ -577,6 +577,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -577,6 +577,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
mdp_disk_t *desc; mdp_disk_t *desc;
mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
rdev->raid_disk = -1;
rdev->in_sync = 0;
if (mddev->raid_disks == 0) { if (mddev->raid_disks == 0) {
mddev->major_version = 0; mddev->major_version = 0;
mddev->minor_version = sb->minor_version; mddev->minor_version = sb->minor_version;
...@@ -607,16 +609,24 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -607,16 +609,24 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
memcpy(mddev->uuid+12,&sb->set_uuid3, 4); memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
mddev->max_disks = MD_SB_DISKS; mddev->max_disks = MD_SB_DISKS;
} else { } else if (mddev->pers == NULL) {
__u64 ev1; /* Insist on good event counter while assembling */
ev1 = md_event(sb); __u64 ev1 = md_event(sb);
++ev1; ++ev1;
if (ev1 < mddev->events) if (ev1 < mddev->events)
return -EINVAL; return -EINVAL;
} } else if (mddev->bitmap) {
/* if adding to array with a bitmap, then we can accept an
* older device ... but not too old.
*/
__u64 ev1 = md_event(sb);
if (ev1 < mddev->bitmap->events_cleared)
return 0;
} else /* just a hot-add of a new device, leave raid_disk at -1 */
return 0;
if (mddev->level != LEVEL_MULTIPATH) { if (mddev->level != LEVEL_MULTIPATH) {
rdev->raid_disk = -1; rdev->faulty = 0;
rdev->in_sync = rdev->faulty = 0;
desc = sb->disks + rdev->desc_nr; desc = sb->disks + rdev->desc_nr;
if (desc->state & (1<<MD_DISK_FAULTY)) if (desc->state & (1<<MD_DISK_FAULTY))
...@@ -626,7 +636,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -626,7 +636,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
rdev->in_sync = 1; rdev->in_sync = 1;
rdev->raid_disk = desc->raid_disk; rdev->raid_disk = desc->raid_disk;
} }
} } else /* MULTIPATH are always insync */
rdev->in_sync = 1;
return 0; return 0;
} }
...@@ -868,6 +879,8 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -868,6 +879,8 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
{ {
struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
rdev->raid_disk = -1;
rdev->in_sync = 0;
if (mddev->raid_disks == 0) { if (mddev->raid_disks == 0) {
mddev->major_version = 1; mddev->major_version = 1;
mddev->patch_version = 0; mddev->patch_version = 0;
...@@ -885,13 +898,21 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -885,13 +898,21 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
memcpy(mddev->uuid, sb->set_uuid, 16); memcpy(mddev->uuid, sb->set_uuid, 16);
mddev->max_disks = (4096-256)/2; mddev->max_disks = (4096-256)/2;
} else { } else if (mddev->pers == NULL) {
__u64 ev1; /* Insist of good event counter while assembling */
ev1 = le64_to_cpu(sb->events); __u64 ev1 = le64_to_cpu(sb->events);
++ev1; ++ev1;
if (ev1 < mddev->events) if (ev1 < mddev->events)
return -EINVAL; return -EINVAL;
} } else if (mddev->bitmap) {
/* If adding to array with a bitmap, then we can accept an
* older device, but not too old.
*/
__u64 ev1 = le64_to_cpu(sb->events);
if (ev1 < mddev->bitmap->events_cleared)
return 0;
} else /* just a hot-add of a new device, leave raid_disk at -1 */
return 0;
if (mddev->level != LEVEL_MULTIPATH) { if (mddev->level != LEVEL_MULTIPATH) {
int role; int role;
...@@ -899,14 +920,10 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -899,14 +920,10 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
switch(role) { switch(role) {
case 0xffff: /* spare */ case 0xffff: /* spare */
rdev->in_sync = 0;
rdev->faulty = 0; rdev->faulty = 0;
rdev->raid_disk = -1;
break; break;
case 0xfffe: /* faulty */ case 0xfffe: /* faulty */
rdev->in_sync = 0;
rdev->faulty = 1; rdev->faulty = 1;
rdev->raid_disk = -1;
break; break;
default: default:
rdev->in_sync = 1; rdev->in_sync = 1;
...@@ -914,7 +931,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -914,7 +931,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
rdev->raid_disk = role; rdev->raid_disk = role;
break; break;
} }
} } else /* MULTIPATH are always insync */
rdev->in_sync = 1;
return 0; return 0;
} }
...@@ -2155,6 +2174,18 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) ...@@ -2155,6 +2174,18 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
PTR_ERR(rdev)); PTR_ERR(rdev));
return PTR_ERR(rdev); return PTR_ERR(rdev);
} }
/* set save_raid_disk if appropriate */
if (!mddev->persistent) {
if (info->state & (1<<MD_DISK_SYNC) &&
info->raid_disk < mddev->raid_disks)
rdev->raid_disk = info->raid_disk;
else
rdev->raid_disk = -1;
} else
super_types[mddev->major_version].
validate_super(mddev, rdev);
rdev->saved_raid_disk = rdev->raid_disk;
rdev->in_sync = 0; /* just to be sure */ rdev->in_sync = 0; /* just to be sure */
rdev->raid_disk = -1; rdev->raid_disk = -1;
err = bind_rdev_to_array(rdev, mddev); err = bind_rdev_to_array(rdev, mddev);
...@@ -3706,6 +3737,14 @@ void md_check_recovery(mddev_t *mddev) ...@@ -3706,6 +3737,14 @@ void md_check_recovery(mddev_t *mddev)
mddev->pers->spare_active(mddev); mddev->pers->spare_active(mddev);
} }
md_update_sb(mddev); md_update_sb(mddev);
/* if array is no-longer degraded, then any saved_raid_disk
* information must be scrapped
*/
if (!mddev->degraded)
ITERATE_RDEV(mddev,rdev,rtmp)
rdev->saved_raid_disk = -1;
mddev->recovery = 0; mddev->recovery = 0;
/* flag recovery needed just to double check */ /* flag recovery needed just to double check */
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
......
...@@ -811,9 +811,12 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -811,9 +811,12 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
{ {
conf_t *conf = mddev->private; conf_t *conf = mddev->private;
int found = 0; int found = 0;
int mirror; int mirror = 0;
mirror_info_t *p; mirror_info_t *p;
if (rdev->saved_raid_disk >= 0 &&
conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
mirror = rdev->saved_raid_disk;
for (mirror=0; mirror < mddev->raid_disks; mirror++) for (mirror=0; mirror < mddev->raid_disks; mirror++)
if ( !(p=conf->mirrors+mirror)->rdev) { if ( !(p=conf->mirrors+mirror)->rdev) {
...@@ -830,6 +833,8 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -830,6 +833,8 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
p->head_position = 0; p->head_position = 0;
rdev->raid_disk = mirror; rdev->raid_disk = mirror;
found = 1; found = 1;
if (rdev->saved_raid_disk != mirror)
conf->fullsync = 1;
p->rdev = rdev; p->rdev = rdev;
break; break;
} }
......
...@@ -183,6 +183,10 @@ struct mdk_rdev_s ...@@ -183,6 +183,10 @@ struct mdk_rdev_s
int desc_nr; /* descriptor index in the superblock */ int desc_nr; /* descriptor index in the superblock */
int raid_disk; /* role of device in array */ int raid_disk; /* role of device in array */
int saved_raid_disk; /* role that device used to have in the
* array and could again if we did a partial
* resync from the bitmap
*/
atomic_t nr_pending; /* number of pending requests. atomic_t nr_pending; /* number of pending requests.
* only maintained for arrays that * only maintained for arrays that
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment