Commit c916ca35 authored by Yu Kuai's avatar Yu Kuai Committed by Song Liu

md/raid1: Fix data corruption for degraded array with slow disk

read_balance() will avoid reading from slow disks as much as possible,
however, if valid data only lands in slow disks, and a new normal disk
is still in recovery, unrecovered data can be read:

raid1_read_request
 read_balance
  raid1_should_read_first
  -> return false
  choose_best_rdev
  -> normal disk is not recovered, return -1
  choose_bb_rdev
  -> missing the checking of recovery, return the normal disk
 -> read unrecovered data

Root cause is that the checking of recovery is missing in
choose_bb_rdev(). Hence add such checking to fix the problem.

Also fix similar problem in choose_slow_rdev().

Cc: stable@vger.kernel.org
Fixes: 9f3ced79 ("md/raid1: factor out choose_bb_rdev() from read_balance()")
Fixes: dfa8ecd1 ("md/raid1: factor out choose_slow_rdev() from read_balance()")
Reported-and-tested-by: default avatarMateusz Jończyk <mat.jonczyk@o2.pl>
Closes: https://lore.kernel.org/all/9952f532-2554-44bf-b906-4880b2e88e3a@o2.pl/Signed-off-by: default avatarYu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20240803091137.3197008-1-yukuai1@huaweicloud.comSigned-off-by: default avatarSong Liu <song@kernel.org>
parent 7db40423
...@@ -617,6 +617,12 @@ static int choose_first_rdev(struct r1conf *conf, struct r1bio *r1_bio, ...@@ -617,6 +617,12 @@ static int choose_first_rdev(struct r1conf *conf, struct r1bio *r1_bio,
return -1; return -1;
} }
static bool rdev_in_recovery(struct md_rdev *rdev, struct r1bio *r1_bio)
{
return !test_bit(In_sync, &rdev->flags) &&
rdev->recovery_offset < r1_bio->sector + r1_bio->sectors;
}
static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio, static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio,
int *max_sectors) int *max_sectors)
{ {
...@@ -635,6 +641,7 @@ static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio, ...@@ -635,6 +641,7 @@ static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio,
rdev = conf->mirrors[disk].rdev; rdev = conf->mirrors[disk].rdev;
if (!rdev || test_bit(Faulty, &rdev->flags) || if (!rdev || test_bit(Faulty, &rdev->flags) ||
rdev_in_recovery(rdev, r1_bio) ||
test_bit(WriteMostly, &rdev->flags)) test_bit(WriteMostly, &rdev->flags))
continue; continue;
...@@ -673,7 +680,8 @@ static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio, ...@@ -673,7 +680,8 @@ static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio,
rdev = conf->mirrors[disk].rdev; rdev = conf->mirrors[disk].rdev;
if (!rdev || test_bit(Faulty, &rdev->flags) || if (!rdev || test_bit(Faulty, &rdev->flags) ||
!test_bit(WriteMostly, &rdev->flags)) !test_bit(WriteMostly, &rdev->flags) ||
rdev_in_recovery(rdev, r1_bio))
continue; continue;
/* there are no bad blocks, we can use this disk */ /* there are no bad blocks, we can use this disk */
...@@ -733,9 +741,7 @@ static bool rdev_readable(struct md_rdev *rdev, struct r1bio *r1_bio) ...@@ -733,9 +741,7 @@ static bool rdev_readable(struct md_rdev *rdev, struct r1bio *r1_bio)
if (!rdev || test_bit(Faulty, &rdev->flags)) if (!rdev || test_bit(Faulty, &rdev->flags))
return false; return false;
/* still in recovery */ if (rdev_in_recovery(rdev, r1_bio))
if (!test_bit(In_sync, &rdev->flags) &&
rdev->recovery_offset < r1_bio->sector + r1_bio->sectors)
return false; return false;
/* don't read from slow disk unless have to */ /* don't read from slow disk unless have to */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment