Commit 76073054 authored by NeilBrown's avatar NeilBrown

md/raid1: clean up read_balance.

read_balance has two loops which both look for a 'best'
device based on slightly different criteria.
This is clumsy and makes is hard to add extra criteria.

So replace it all with a single loop that combines everything.
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 56d99121
...@@ -411,10 +411,10 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) ...@@ -411,10 +411,10 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
{ {
const sector_t this_sector = r1_bio->sector; const sector_t this_sector = r1_bio->sector;
const int sectors = r1_bio->sectors; const int sectors = r1_bio->sectors;
int new_disk = -1;
int start_disk; int start_disk;
int best_disk;
int i; int i;
sector_t new_distance, current_distance; sector_t best_dist;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
int choose_first; int choose_first;
...@@ -425,6 +425,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) ...@@ -425,6 +425,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
* We take the first readable disk when above the resync window. * We take the first readable disk when above the resync window.
*/ */
retry: retry:
best_disk = -1;
best_dist = MaxSector;
if (conf->mddev->recovery_cp < MaxSector && if (conf->mddev->recovery_cp < MaxSector &&
(this_sector + sectors >= conf->next_resync)) { (this_sector + sectors >= conf->next_resync)) {
choose_first = 1; choose_first = 1;
...@@ -434,8 +436,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) ...@@ -434,8 +436,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
start_disk = conf->last_used; start_disk = conf->last_used;
} }
/* make sure the disk is operational */
for (i = 0 ; i < conf->raid_disks ; i++) { for (i = 0 ; i < conf->raid_disks ; i++) {
sector_t dist;
int disk = start_disk + i; int disk = start_disk + i;
if (disk >= conf->raid_disks) if (disk >= conf->raid_disks)
disk -= conf->raid_disks; disk -= conf->raid_disks;
...@@ -443,60 +445,43 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) ...@@ -443,60 +445,43 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
rdev = rcu_dereference(conf->mirrors[disk].rdev); rdev = rcu_dereference(conf->mirrors[disk].rdev);
if (r1_bio->bios[disk] == IO_BLOCKED if (r1_bio->bios[disk] == IO_BLOCKED
|| rdev == NULL || rdev == NULL
|| !test_bit(In_sync, &rdev->flags)) || test_bit(Faulty, &rdev->flags))
continue;
if (!test_bit(In_sync, &rdev->flags) &&
rdev->recovery_offset < this_sector + sectors)
continue;
if (test_bit(WriteMostly, &rdev->flags)) {
/* Don't balance among write-mostly, just
* use the first as a last resort */
if (best_disk < 0)
best_disk = disk;
continue; continue;
new_disk = disk;
if (!test_bit(WriteMostly, &rdev->flags))
break;
} }
/* This is a reasonable device to use. It might
if (new_disk < 0 || choose_first) * even be best.
goto rb_out;
/*
* Don't change to another disk for sequential reads:
*/ */
if (conf->next_seq_sect == this_sector) dist = abs(this_sector - conf->mirrors[disk].head_position);
goto rb_out; if (choose_first
if (this_sector == conf->mirrors[new_disk].head_position) /* Don't change to another disk for sequential reads */
goto rb_out; || conf->next_seq_sect == this_sector
|| dist == 0
current_distance = abs(this_sector /* If device is idle, use it */
- conf->mirrors[new_disk].head_position); || atomic_read(&rdev->nr_pending) == 0) {
best_disk = disk;
/* look for a better disk - i.e. head is closer */
start_disk = new_disk;
for (i = 1; i < conf->raid_disks; i++) {
int disk = start_disk + 1;
if (disk >= conf->raid_disks)
disk -= conf->raid_disks;
rdev = rcu_dereference(conf->mirrors[disk].rdev);
if (r1_bio->bios[disk] == IO_BLOCKED
|| rdev == NULL
|| !test_bit(In_sync, &rdev->flags)
|| test_bit(WriteMostly, &rdev->flags))
continue;
if (!atomic_read(&rdev->nr_pending)) {
new_disk = disk;
break; break;
} }
new_distance = abs(this_sector - conf->mirrors[disk].head_position); if (dist < best_dist) {
if (new_distance < current_distance) { best_dist = dist;
current_distance = new_distance; best_disk = disk;
new_disk = disk;
} }
} }
rb_out: if (best_disk >= 0) {
if (new_disk >= 0) { rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
if (!rdev) if (!rdev)
goto retry; goto retry;
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
if (!test_bit(In_sync, &rdev->flags)) { if (test_bit(Faulty, &rdev->flags)) {
/* cannot risk returning a device that failed /* cannot risk returning a device that failed
* before we inc'ed nr_pending * before we inc'ed nr_pending
*/ */
...@@ -504,11 +489,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) ...@@ -504,11 +489,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
goto retry; goto retry;
} }
conf->next_seq_sect = this_sector + sectors; conf->next_seq_sect = this_sector + sectors;
conf->last_used = new_disk; conf->last_used = best_disk;
} }
rcu_read_unlock(); rcu_read_unlock();
return new_disk; return best_disk;
} }
static int raid1_congested(void *data, int bits) static int raid1_congested(void *data, int bits)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment