Commit c40f341f authored by Goldwyn Rodrigues's avatar Goldwyn Rodrigues

md-cluster: Use a small window for resync

Suspending the entire device for resync could take too long. Resync
in small chunks.

cluster's resync window (32M) is maintained in r1conf as
cluster_sync_low and cluster_sync_high and processed in
raid1's sync_request(). If the current resync is outside the cluster
resync window:

1. Set the cluster_sync_low to curr_resync_completed.
2. Check if the sync will fit in the new window, if not issue a
   wait_barrier() and set cluster_sync_low to sector_nr.
3. Set cluster_sync_high to cluster_sync_low + resync_window.
4. Send a message to all nodes so they may add it in their suspension
   list.

bitmap_cond_end_sync is modified to allow to force a sync inorder
to get the curr_resync_completed uptodate with the sector passed.
Signed-off-by: default avatarGoldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 3c462c88
...@@ -1570,7 +1570,7 @@ void bitmap_close_sync(struct bitmap *bitmap) ...@@ -1570,7 +1570,7 @@ void bitmap_close_sync(struct bitmap *bitmap)
} }
EXPORT_SYMBOL(bitmap_close_sync); EXPORT_SYMBOL(bitmap_close_sync);
void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force)
{ {
sector_t s = 0; sector_t s = 0;
sector_t blocks; sector_t blocks;
...@@ -1581,7 +1581,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) ...@@ -1581,7 +1581,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
bitmap->last_end_sync = jiffies; bitmap->last_end_sync = jiffies;
return; return;
} }
if (time_before(jiffies, (bitmap->last_end_sync if (!force && time_before(jiffies, (bitmap->last_end_sync
+ bitmap->mddev->bitmap_info.daemon_sleep))) + bitmap->mddev->bitmap_info.daemon_sleep)))
return; return;
wait_event(bitmap->mddev->recovery_wait, wait_event(bitmap->mddev->recovery_wait,
......
...@@ -257,7 +257,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, ...@@ -257,7 +257,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded); int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded);
void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted); void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted);
void bitmap_close_sync(struct bitmap *bitmap); void bitmap_close_sync(struct bitmap *bitmap);
void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector); void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force);
void bitmap_unplug(struct bitmap *bitmap); void bitmap_unplug(struct bitmap *bitmap);
void bitmap_daemon_work(struct mddev *mddev); void bitmap_daemon_work(struct mddev *mddev);
......
...@@ -802,15 +802,6 @@ static int slot_number(struct mddev *mddev) ...@@ -802,15 +802,6 @@ static int slot_number(struct mddev *mddev)
return cinfo->slot_number - 1; return cinfo->slot_number - 1;
} }
static void resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi);
/* Re-acquire the lock to refresh LVB */
dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW);
}
static int metadata_update_start(struct mddev *mddev) static int metadata_update_start(struct mddev *mddev)
{ {
return lock_comm(mddev->cluster_info); return lock_comm(mddev->cluster_info);
...@@ -836,45 +827,25 @@ static int metadata_update_cancel(struct mddev *mddev) ...@@ -836,45 +827,25 @@ static int metadata_update_cancel(struct mddev *mddev)
return dlm_unlock_sync(cinfo->token_lockres); return dlm_unlock_sync(cinfo->token_lockres);
} }
static int resync_send(struct mddev *mddev, enum msg_type type, static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
sector_t lo, sector_t hi)
{ {
struct md_cluster_info *cinfo = mddev->cluster_info; struct md_cluster_info *cinfo = mddev->cluster_info;
struct cluster_msg cmsg; struct cluster_msg cmsg;
int slot = cinfo->slot_number - 1; int slot = cinfo->slot_number - 1;
add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi);
/* Re-acquire the lock to refresh LVB */
dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW);
pr_info("%s:%d lo: %llu hi: %llu\n", __func__, __LINE__, pr_info("%s:%d lo: %llu hi: %llu\n", __func__, __LINE__,
(unsigned long long)lo, (unsigned long long)lo,
(unsigned long long)hi); (unsigned long long)hi);
resync_info_update(mddev, lo, hi); cmsg.type = cpu_to_le32(RESYNCING);
cmsg.type = cpu_to_le32(type);
cmsg.slot = cpu_to_le32(slot); cmsg.slot = cpu_to_le32(slot);
cmsg.low = cpu_to_le64(lo); cmsg.low = cpu_to_le64(lo);
cmsg.high = cpu_to_le64(hi); cmsg.high = cpu_to_le64(hi);
return sendmsg(cinfo, &cmsg); return sendmsg(cinfo, &cmsg);
} }
static int resync_start(struct mddev *mddev, sector_t lo, sector_t hi)
{
pr_info("%s:%d\n", __func__, __LINE__);
return resync_send(mddev, RESYNCING, lo, hi);
}
static void resync_finish(struct mddev *mddev)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
struct cluster_msg cmsg;
int slot = cinfo->slot_number - 1;
pr_info("%s:%d\n", __func__, __LINE__);
resync_send(mddev, RESYNCING, 0, 0);
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
cmsg.type = cpu_to_le32(BITMAP_NEEDS_SYNC);
cmsg.slot = cpu_to_le32(slot);
sendmsg(cinfo, &cmsg);
}
}
static int area_resyncing(struct mddev *mddev, int direction, static int area_resyncing(struct mddev *mddev, int direction,
sector_t lo, sector_t hi) sector_t lo, sector_t hi)
{ {
...@@ -997,8 +968,6 @@ static struct md_cluster_operations cluster_ops = { ...@@ -997,8 +968,6 @@ static struct md_cluster_operations cluster_ops = {
.leave = leave, .leave = leave,
.slot_number = slot_number, .slot_number = slot_number,
.resync_info_update = resync_info_update, .resync_info_update = resync_info_update,
.resync_start = resync_start,
.resync_finish = resync_finish,
.metadata_update_start = metadata_update_start, .metadata_update_start = metadata_update_start,
.metadata_update_finish = metadata_update_finish, .metadata_update_finish = metadata_update_finish,
.metadata_update_cancel = metadata_update_cancel, .metadata_update_cancel = metadata_update_cancel,
......
...@@ -12,9 +12,7 @@ struct md_cluster_operations { ...@@ -12,9 +12,7 @@ struct md_cluster_operations {
int (*join)(struct mddev *mddev, int nodes); int (*join)(struct mddev *mddev, int nodes);
int (*leave)(struct mddev *mddev); int (*leave)(struct mddev *mddev);
int (*slot_number)(struct mddev *mddev); int (*slot_number)(struct mddev *mddev);
void (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi); int (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
int (*resync_start)(struct mddev *mddev, sector_t lo, sector_t hi);
void (*resync_finish)(struct mddev *mddev);
int (*metadata_update_start)(struct mddev *mddev); int (*metadata_update_start)(struct mddev *mddev);
int (*metadata_update_finish)(struct mddev *mddev); int (*metadata_update_finish)(struct mddev *mddev);
int (*metadata_update_cancel)(struct mddev *mddev); int (*metadata_update_cancel)(struct mddev *mddev);
......
...@@ -7805,9 +7805,6 @@ void md_do_sync(struct md_thread *thread) ...@@ -7805,9 +7805,6 @@ void md_do_sync(struct md_thread *thread)
md_new_event(mddev); md_new_event(mddev);
update_time = jiffies; update_time = jiffies;
if (mddev_is_clustered(mddev))
md_cluster_ops->resync_start(mddev, j, max_sectors);
blk_start_plug(&plug); blk_start_plug(&plug);
while (j < max_sectors) { while (j < max_sectors) {
sector_t sectors; sector_t sectors;
...@@ -7871,8 +7868,6 @@ void md_do_sync(struct md_thread *thread) ...@@ -7871,8 +7868,6 @@ void md_do_sync(struct md_thread *thread)
j = max_sectors; j = max_sectors;
if (j > 2) if (j > 2)
mddev->curr_resync = j; mddev->curr_resync = j;
if (mddev_is_clustered(mddev))
md_cluster_ops->resync_info_update(mddev, j, max_sectors);
mddev->curr_mark_cnt = io_sectors; mddev->curr_mark_cnt = io_sectors;
if (last_check == 0) if (last_check == 0)
/* this is the earliest that rebuild will be /* this is the earliest that rebuild will be
...@@ -7979,9 +7974,6 @@ void md_do_sync(struct md_thread *thread) ...@@ -7979,9 +7974,6 @@ void md_do_sync(struct md_thread *thread)
} }
} }
skip: skip:
if (mddev_is_clustered(mddev))
md_cluster_ops->resync_finish(mddev);
set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags);
spin_lock(&mddev->lock); spin_lock(&mddev->lock);
......
...@@ -90,6 +90,8 @@ static void r1bio_pool_free(void *r1_bio, void *data) ...@@ -90,6 +90,8 @@ static void r1bio_pool_free(void *r1_bio, void *data)
#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
#define RESYNC_WINDOW (RESYNC_BLOCK_SIZE * RESYNC_DEPTH) #define RESYNC_WINDOW (RESYNC_BLOCK_SIZE * RESYNC_DEPTH)
#define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9) #define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9)
#define CLUSTER_RESYNC_WINDOW (16 * RESYNC_WINDOW)
#define CLUSTER_RESYNC_WINDOW_SECTORS (CLUSTER_RESYNC_WINDOW >> 9)
#define NEXT_NORMALIO_DISTANCE (3 * RESYNC_WINDOW_SECTORS) #define NEXT_NORMALIO_DISTANCE (3 * RESYNC_WINDOW_SECTORS)
static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
...@@ -2488,6 +2490,13 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp ...@@ -2488,6 +2490,13 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
bitmap_close_sync(mddev->bitmap); bitmap_close_sync(mddev->bitmap);
close_sync(conf); close_sync(conf);
if (mddev_is_clustered(mddev)) {
conf->cluster_sync_low = 0;
conf->cluster_sync_high = 0;
/* Send zeros to mark end of resync */
md_cluster_ops->resync_info_update(mddev, 0, 0);
}
return 0; return 0;
} }
...@@ -2508,7 +2517,12 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp ...@@ -2508,7 +2517,12 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
return sync_blocks; return sync_blocks;
} }
bitmap_cond_end_sync(mddev->bitmap, sector_nr); /* we are incrementing sector_nr below. To be safe, we check against
* sector_nr + two times RESYNC_SECTORS
*/
bitmap_cond_end_sync(mddev->bitmap, sector_nr,
mddev_is_clustered(mddev) && (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high));
r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
raise_barrier(conf, sector_nr); raise_barrier(conf, sector_nr);
...@@ -2699,6 +2713,16 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp ...@@ -2699,6 +2713,16 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
bio_full: bio_full:
r1_bio->sectors = nr_sectors; r1_bio->sectors = nr_sectors;
if (mddev_is_clustered(mddev) &&
conf->cluster_sync_high < sector_nr + nr_sectors) {
conf->cluster_sync_low = mddev->curr_resync_completed;
conf->cluster_sync_high = conf->cluster_sync_low + CLUSTER_RESYNC_WINDOW_SECTORS;
/* Send resync message */
md_cluster_ops->resync_info_update(mddev,
conf->cluster_sync_low,
conf->cluster_sync_high);
}
/* For a user-requested sync, we read all readable devices and do a /* For a user-requested sync, we read all readable devices and do a
* compare * compare
*/ */
......
...@@ -111,6 +111,13 @@ struct r1conf { ...@@ -111,6 +111,13 @@ struct r1conf {
* the new thread here until we fully activate the array. * the new thread here until we fully activate the array.
*/ */
struct md_thread *thread; struct md_thread *thread;
/* Keep track of cluster resync window to send to other
* nodes.
*/
sector_t cluster_sync_low;
sector_t cluster_sync_high;
}; };
/* /*
......
...@@ -3137,7 +3137,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -3137,7 +3137,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
/* resync. Schedule a read for every block at this virt offset */ /* resync. Schedule a read for every block at this virt offset */
int count = 0; int count = 0;
bitmap_cond_end_sync(mddev->bitmap, sector_nr); bitmap_cond_end_sync(mddev->bitmap, sector_nr, 0);
if (!bitmap_start_sync(mddev->bitmap, sector_nr, if (!bitmap_start_sync(mddev->bitmap, sector_nr,
&sync_blocks, mddev->degraded) && &sync_blocks, mddev->degraded) &&
......
...@@ -5613,7 +5613,7 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int ...@@ -5613,7 +5613,7 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int
return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */ return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */
} }
bitmap_cond_end_sync(mddev->bitmap, sector_nr); bitmap_cond_end_sync(mddev->bitmap, sector_nr, false);
sh = get_active_stripe(conf, sector_nr, 0, 1, 0); sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
if (sh == NULL) { if (sh == NULL) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment