Commit 97f6cd39 authored by Goldwyn Rodrigues's avatar Goldwyn Rodrigues Committed by NeilBrown

md-cluster: re-add capabilities

When "re-add" is writted to /sys/block/mdXX/md/dev-YYY/state,
the clustered md:

1. Sends RE_ADD message with the desc_nr. Nodes receiving the message
   clear the Faulty bit in their respective rdev->flags.
2. The node initiating re-add, gathers the bitmaps of all nodes
   and copies them into the local bitmap. It does not clear the bitmap
   from which it is copying.
3. Initiating node schedules a md recovery to sync the devices.
Signed-off-by: default avatarGuoqing Jiang <gqjiang@suse.com>
Signed-off-by: default avatarGoldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent a6da4ef8
...@@ -1851,7 +1851,7 @@ EXPORT_SYMBOL_GPL(bitmap_load); ...@@ -1851,7 +1851,7 @@ EXPORT_SYMBOL_GPL(bitmap_load);
* to our bitmap * to our bitmap
*/ */
int bitmap_copy_from_slot(struct mddev *mddev, int slot, int bitmap_copy_from_slot(struct mddev *mddev, int slot,
sector_t *low, sector_t *high) sector_t *low, sector_t *high, bool clear_bits)
{ {
int rv = 0, i, j; int rv = 0, i, j;
sector_t block, lo = 0, hi = 0; sector_t block, lo = 0, hi = 0;
...@@ -1882,14 +1882,16 @@ int bitmap_copy_from_slot(struct mddev *mddev, int slot, ...@@ -1882,14 +1882,16 @@ int bitmap_copy_from_slot(struct mddev *mddev, int slot,
} }
} }
bitmap_update_sb(bitmap); if (clear_bits) {
/* Setting this for the ev_page should be enough. bitmap_update_sb(bitmap);
* And we do not require both write_all and PAGE_DIRT either /* Setting this for the ev_page should be enough.
*/ * And we do not require both write_all and PAGE_DIRT either
for (i = 0; i < bitmap->storage.file_pages; i++) */
set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY); for (i = 0; i < bitmap->storage.file_pages; i++)
bitmap_write_all(bitmap); set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
bitmap_unplug(bitmap); bitmap_write_all(bitmap);
bitmap_unplug(bitmap);
}
*low = lo; *low = lo;
*high = hi; *high = hi;
err: err:
......
...@@ -263,7 +263,7 @@ void bitmap_daemon_work(struct mddev *mddev); ...@@ -263,7 +263,7 @@ void bitmap_daemon_work(struct mddev *mddev);
int bitmap_resize(struct bitmap *bitmap, sector_t blocks, int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
int chunksize, int init); int chunksize, int init);
int bitmap_copy_from_slot(struct mddev *mddev, int slot, int bitmap_copy_from_slot(struct mddev *mddev, int slot,
sector_t *lo, sector_t *hi); sector_t *lo, sector_t *hi, bool clear_bits);
#endif #endif
#endif #endif
...@@ -73,6 +73,7 @@ enum msg_type { ...@@ -73,6 +73,7 @@ enum msg_type {
RESYNCING, RESYNCING,
NEWDISK, NEWDISK,
REMOVE, REMOVE,
RE_ADD,
}; };
struct cluster_msg { struct cluster_msg {
...@@ -253,7 +254,7 @@ static void recover_bitmaps(struct md_thread *thread) ...@@ -253,7 +254,7 @@ static void recover_bitmaps(struct md_thread *thread)
str, ret); str, ret);
goto clear_bit; goto clear_bit;
} }
ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi); ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi, true);
if (ret) { if (ret) {
pr_err("md-cluster: Could not copy data from bitmap %d\n", slot); pr_err("md-cluster: Could not copy data from bitmap %d\n", slot);
goto dlm_unlock; goto dlm_unlock;
...@@ -412,6 +413,16 @@ static void process_remove_disk(struct mddev *mddev, struct cluster_msg *msg) ...@@ -412,6 +413,16 @@ static void process_remove_disk(struct mddev *mddev, struct cluster_msg *msg)
pr_warn("%s: %d Could not find disk(%d) to REMOVE\n", __func__, __LINE__, msg->raid_slot); pr_warn("%s: %d Could not find disk(%d) to REMOVE\n", __func__, __LINE__, msg->raid_slot);
} }
static void process_readd_disk(struct mddev *mddev, struct cluster_msg *msg)
{
struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev, msg->raid_slot);
if (rdev && test_bit(Faulty, &rdev->flags))
clear_bit(Faulty, &rdev->flags);
else
pr_warn("%s: %d Could not find disk(%d) which is faulty", __func__, __LINE__, msg->raid_slot);
}
static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
{ {
switch (msg->type) { switch (msg->type) {
...@@ -436,6 +447,11 @@ static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) ...@@ -436,6 +447,11 @@ static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
__func__, __LINE__, msg->slot); __func__, __LINE__, msg->slot);
process_remove_disk(mddev, msg); process_remove_disk(mddev, msg);
break; break;
case RE_ADD:
pr_info("%s: %d Received RE_ADD from %d\n",
__func__, __LINE__, msg->slot);
process_readd_disk(mddev, msg);
break;
default: default:
pr_warn("%s:%d Received unknown message from %d\n", pr_warn("%s:%d Received unknown message from %d\n",
__func__, __LINE__, msg->slot); __func__, __LINE__, msg->slot);
...@@ -883,6 +899,35 @@ static int remove_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -883,6 +899,35 @@ static int remove_disk(struct mddev *mddev, struct md_rdev *rdev)
return __sendmsg(cinfo, &cmsg); return __sendmsg(cinfo, &cmsg);
} }
static int gather_bitmaps(struct md_rdev *rdev)
{
int sn, err;
sector_t lo, hi;
struct cluster_msg cmsg;
struct mddev *mddev = rdev->mddev;
struct md_cluster_info *cinfo = mddev->cluster_info;
cmsg.type = RE_ADD;
cmsg.raid_slot = rdev->desc_nr;
err = sendmsg(cinfo, &cmsg);
if (err)
goto out;
for (sn = 0; sn < mddev->bitmap_info.nodes; sn++) {
if (sn == (cinfo->slot_number - 1))
continue;
err = bitmap_copy_from_slot(mddev, sn, &lo, &hi, false);
if (err) {
pr_warn("md-cluster: Could not gather bitmaps from slot %d", sn);
goto out;
}
if ((hi > 0) && (lo < mddev->recovery_cp))
mddev->recovery_cp = lo;
}
out:
return err;
}
static struct md_cluster_operations cluster_ops = { static struct md_cluster_operations cluster_ops = {
.join = join, .join = join,
.leave = leave, .leave = leave,
...@@ -898,6 +943,7 @@ static struct md_cluster_operations cluster_ops = { ...@@ -898,6 +943,7 @@ static struct md_cluster_operations cluster_ops = {
.add_new_disk_finish = add_new_disk_finish, .add_new_disk_finish = add_new_disk_finish,
.new_disk_ack = new_disk_ack, .new_disk_ack = new_disk_ack,
.remove_disk = remove_disk, .remove_disk = remove_disk,
.gather_bitmaps = gather_bitmaps,
}; };
static int __init cluster_init(void) static int __init cluster_init(void)
......
...@@ -23,6 +23,7 @@ struct md_cluster_operations { ...@@ -23,6 +23,7 @@ struct md_cluster_operations {
int (*add_new_disk_finish)(struct mddev *mddev); int (*add_new_disk_finish)(struct mddev *mddev);
int (*new_disk_ack)(struct mddev *mddev, bool ack); int (*new_disk_ack)(struct mddev *mddev, bool ack);
int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev); int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev);
int (*gather_bitmaps)(struct md_rdev *rdev);
}; };
#endif /* _MD_CLUSTER_H */ #endif /* _MD_CLUSTER_H */
...@@ -2596,8 +2596,17 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) ...@@ -2596,8 +2596,17 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
} }
} else if (cmd_match(buf, "re-add")) { } else if (cmd_match(buf, "re-add")) {
if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1)) { if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1)) {
clear_bit(Faulty, &rdev->flags); /* clear_bit is performed _after_ all the devices
err = add_bound_rdev(rdev); * have their local Faulty bit cleared. If any writes
* happen in the meantime in the local node, they
* will land in the local bitmap, which will be synced
* by this node eventually
*/
if (!mddev_is_clustered(rdev->mddev) ||
(err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
clear_bit(Faulty, &rdev->flags);
err = add_bound_rdev(rdev);
}
} else } else
err = -EBUSY; err = -EBUSY;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment