Commit 86b1e613 authored by Jens Axboe's avatar Jens Axboe

Merge tag 'md-6.9-20240301' of...

Merge tag 'md-6.9-20240301' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.9/block

Pull MD updates from Song:

"The major changes are:

 1. Refactor raid1 read_balance, by Yu Kuai and Paul Luse.
 2. Clean up and fix for md_ioctl, by Li Nan.
 3. Other small fixes, by Gui-Dong Han and Heming Zhao."

* tag 'md-6.9-20240301' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md: (22 commits)
  md/raid1: factor out helpers to choose the best rdev from read_balance()
  md/raid1: factor out the code to manage sequential IO
  md/raid1: factor out choose_bb_rdev() from read_balance()
  md/raid1: factor out choose_slow_rdev() from read_balance()
  md/raid1: factor out read_first_rdev() from read_balance()
  md/raid1-10: factor out a new helper raid1_should_read_first()
  md/raid1-10: add a helper raid1_check_read_range()
  md/raid1: fix choose next idle in read_balance()
  md/raid1: record nonrot rdevs while adding/removing rdevs to conf
  md/raid1: factor out helpers to add rdev to conf
  md: add a new helper rdev_has_badblock()
  md/raid5: fix atomicity violation in raid5_cache_count
  md/md-bitmap: fix incorrect usage for sb_index
  md: check mddev->pers before calling md_set_readonly()
  md: clean up openers check in do_md_stop() and md_set_readonly()
  md: sync blockdev before stopping raid or setting readonly
  md: factor out a helper to sync mddev
  md: Don't clear MD_CLOSING when the raid is about to stop
  md: return directly before setting did_set_md_closing
  md: clean up invalid BUG_ON in md_ioctl
  ...
parents 13fe8e68 e81faa91
......@@ -234,7 +234,8 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
sector_t doff;
bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
if (pg_index == store->file_pages - 1) {
/* we compare length (page numbers), not page offset. */
if ((pg_index - store->sb_index) == store->file_pages - 1) {
unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1);
if (last_page_size == 0)
......@@ -438,8 +439,8 @@ static void filemap_write_page(struct bitmap *bitmap, unsigned long pg_index,
struct page *page = store->filemap[pg_index];
if (mddev_is_clustered(bitmap->mddev)) {
pg_index += bitmap->cluster_slot *
DIV_ROUND_UP(store->bytes, PAGE_SIZE);
/* go to node bitmap area starting point */
pg_index += store->sb_index;
}
if (store->file)
......@@ -952,6 +953,7 @@ static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
unsigned long index = file_page_index(store, chunk);
unsigned long node_offset = 0;
index += store->sb_index;
if (mddev_is_clustered(bitmap->mddev))
node_offset = bitmap->cluster_slot * store->file_pages;
......@@ -982,6 +984,7 @@ static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
unsigned long index = file_page_index(store, chunk);
unsigned long node_offset = 0;
index += store->sb_index;
if (mddev_is_clustered(bitmap->mddev))
node_offset = bitmap->cluster_slot * store->file_pages;
......
......@@ -529,6 +529,24 @@ void mddev_resume(struct mddev *mddev)
}
EXPORT_SYMBOL_GPL(mddev_resume);
/* sync bdev before setting device to readonly or stopping raid*/
static int mddev_set_closing_and_sync_blockdev(struct mddev *mddev, int opener_num)
{
mutex_lock(&mddev->open_mutex);
if (mddev->pers && atomic_read(&mddev->openers) > opener_num) {
mutex_unlock(&mddev->open_mutex);
return -EBUSY;
}
if (test_and_set_bit(MD_CLOSING, &mddev->flags)) {
mutex_unlock(&mddev->open_mutex);
return -EBUSY;
}
mutex_unlock(&mddev->open_mutex);
sync_blockdev(mddev->gendisk->part0);
return 0;
}
/*
* Generic flush handling for md
*/
......@@ -4464,8 +4482,8 @@ array_state_show(struct mddev *mddev, char *page)
return sprintf(page, "%s\n", array_states[st]);
}
static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
static int do_md_stop(struct mddev *mddev, int ro);
static int md_set_readonly(struct mddev *mddev);
static int restart_array(struct mddev *mddev);
static ssize_t
......@@ -4482,6 +4500,17 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
case broken: /* cannot be set */
case bad_word:
return -EINVAL;
case clear:
case readonly:
case inactive:
case read_auto:
if (!mddev->pers || !md_is_rdwr(mddev))
break;
/* write sysfs will not open mddev and opener should be 0 */
err = mddev_set_closing_and_sync_blockdev(mddev, 0);
if (err)
return err;
break;
default:
break;
}
......@@ -4515,14 +4544,14 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
case inactive:
/* stop an active array, return 0 otherwise */
if (mddev->pers)
err = do_md_stop(mddev, 2, NULL);
err = do_md_stop(mddev, 2);
break;
case clear:
err = do_md_stop(mddev, 0, NULL);
err = do_md_stop(mddev, 0);
break;
case readonly:
if (mddev->pers)
err = md_set_readonly(mddev, NULL);
err = md_set_readonly(mddev);
else {
mddev->ro = MD_RDONLY;
set_disk_ro(mddev->gendisk, 1);
......@@ -4532,7 +4561,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
case read_auto:
if (mddev->pers) {
if (md_is_rdwr(mddev))
err = md_set_readonly(mddev, NULL);
err = md_set_readonly(mddev);
else if (mddev->ro == MD_RDONLY)
err = restart_array(mddev);
if (err == 0) {
......@@ -4581,6 +4610,11 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
sysfs_notify_dirent_safe(mddev->sysfs_state);
}
mddev_unlock(mddev);
if (st == readonly || st == read_auto || st == inactive ||
(err && st == clear))
clear_bit(MD_CLOSING, &mddev->flags);
return err ?: len;
}
static struct md_sysfs_entry md_array_state =
......@@ -6265,7 +6299,15 @@ static void md_clean(struct mddev *mddev)
mddev->persistent = 0;
mddev->level = LEVEL_NONE;
mddev->clevel[0] = 0;
/*
* Don't clear MD_CLOSING, or mddev can be opened again.
* 'hold_active != 0' means mddev is still in the creation
* process and will be used later.
*/
if (mddev->hold_active)
mddev->flags = 0;
else
mddev->flags &= BIT_ULL_MASK(MD_CLOSING);
mddev->sb_flags = 0;
mddev->ro = MD_RDWR;
mddev->metadata_type[0] = 0;
......@@ -6378,7 +6420,8 @@ void md_stop(struct mddev *mddev)
EXPORT_SYMBOL_GPL(md_stop);
static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
/* ensure 'mddev->pers' exist before calling md_set_readonly() */
static int md_set_readonly(struct mddev *mddev)
{
int err = 0;
int did_freeze = 0;
......@@ -6396,15 +6439,12 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
mddev_lock_nointr(mddev);
mutex_lock(&mddev->open_mutex);
if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
pr_warn("md: %s still in use.\n",mdname(mddev));
err = -EBUSY;
goto out;
}
if (mddev->pers) {
__md_stop_writes(mddev);
if (mddev->ro == MD_RDONLY) {
......@@ -6414,16 +6454,14 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
mddev->ro = MD_RDONLY;
set_disk_ro(mddev->gendisk, 1);
}
out:
if ((mddev->pers && !err) || did_freeze) {
if (!err || did_freeze) {
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
sysfs_notify_dirent_safe(mddev->sysfs_state);
}
mutex_unlock(&mddev->open_mutex);
return err;
}
......@@ -6431,8 +6469,7 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
* 0 - completely stop and dis-assemble array
* 2 - stop but do not disassemble array
*/
static int do_md_stop(struct mddev *mddev, int mode,
struct block_device *bdev)
static int do_md_stop(struct mddev *mddev, int mode)
{
struct gendisk *disk = mddev->gendisk;
struct md_rdev *rdev;
......@@ -6445,12 +6482,9 @@ static int do_md_stop(struct mddev *mddev, int mode,
stop_sync_thread(mddev, true, false);
mutex_lock(&mddev->open_mutex);
if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
mddev->sysfs_active ||
if (mddev->sysfs_active ||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
pr_warn("md: %s still in use.\n",mdname(mddev));
mutex_unlock(&mddev->open_mutex);
if (did_freeze) {
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
......@@ -6472,13 +6506,11 @@ static int do_md_stop(struct mddev *mddev, int mode,
sysfs_unlink_rdev(mddev, rdev);
set_capacity_and_notify(disk, 0);
mutex_unlock(&mddev->open_mutex);
mddev->changed = 1;
if (!md_is_rdwr(mddev))
mddev->ro = MD_RDWR;
} else
mutex_unlock(&mddev->open_mutex);
}
/*
* Free resources if final stop
*/
......@@ -6524,7 +6556,7 @@ static void autorun_array(struct mddev *mddev)
err = do_md_run(mddev);
if (err) {
pr_warn("md: do_md_run() returned %d\n", err);
do_md_stop(mddev, 0, NULL);
do_md_stop(mddev, 0);
}
}
......@@ -7522,16 +7554,17 @@ static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return 0;
}
static inline bool md_ioctl_valid(unsigned int cmd)
static inline int md_ioctl_valid(unsigned int cmd)
{
switch (cmd) {
case ADD_NEW_DISK:
case GET_ARRAY_INFO:
case GET_BITMAP_FILE:
case GET_DISK_INFO:
case RAID_VERSION:
return 0;
case ADD_NEW_DISK:
case GET_BITMAP_FILE:
case HOT_ADD_DISK:
case HOT_REMOVE_DISK:
case RAID_VERSION:
case RESTART_ARRAY_RW:
case RUN_ARRAY:
case SET_ARRAY_INFO:
......@@ -7540,9 +7573,11 @@ static inline bool md_ioctl_valid(unsigned int cmd)
case STOP_ARRAY:
case STOP_ARRAY_RO:
case CLUSTERED_DISK_NACK:
return true;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
return 0;
default:
return false;
return -ENOTTY;
}
}
......@@ -7600,31 +7635,17 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
int err = 0;
void __user *argp = (void __user *)arg;
struct mddev *mddev = NULL;
bool did_set_md_closing = false;
if (!md_ioctl_valid(cmd))
return -ENOTTY;
switch (cmd) {
case RAID_VERSION:
case GET_ARRAY_INFO:
case GET_DISK_INFO:
break;
default:
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
}
err = md_ioctl_valid(cmd);
if (err)
return err;
/*
* Commands dealing with the RAID driver but not any
* particular array:
*/
switch (cmd) {
case RAID_VERSION:
err = get_version(argp);
goto out;
default:;
}
if (cmd == RAID_VERSION)
return get_version(argp);
/*
* Commands creating/starting a new array:
......@@ -7632,35 +7653,23 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
mddev = bdev->bd_disk->private_data;
if (!mddev) {
BUG();
goto out;
}
/* Some actions do not requires the mutex */
switch (cmd) {
case GET_ARRAY_INFO:
if (!mddev->raid_disks && !mddev->external)
err = -ENODEV;
else
err = get_array_info(mddev, argp);
goto out;
return -ENODEV;
return get_array_info(mddev, argp);
case GET_DISK_INFO:
if (!mddev->raid_disks && !mddev->external)
err = -ENODEV;
else
err = get_disk_info(mddev, argp);
goto out;
return -ENODEV;
return get_disk_info(mddev, argp);
case SET_DISK_FAULTY:
err = set_disk_faulty(mddev, new_decode_dev(arg));
goto out;
return set_disk_faulty(mddev, new_decode_dev(arg));
case GET_BITMAP_FILE:
err = get_bitmap_file(mddev, argp);
goto out;
return get_bitmap_file(mddev, argp);
}
if (cmd == HOT_REMOVE_DISK)
......@@ -7673,20 +7682,9 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
/* Need to flush page cache, and ensure no-one else opens
* and writes
*/
mutex_lock(&mddev->open_mutex);
if (mddev->pers && atomic_read(&mddev->openers) > 1) {
mutex_unlock(&mddev->open_mutex);
err = -EBUSY;
goto out;
}
if (test_and_set_bit(MD_CLOSING, &mddev->flags)) {
mutex_unlock(&mddev->open_mutex);
err = -EBUSY;
goto out;
}
did_set_md_closing = true;
mutex_unlock(&mddev->open_mutex);
sync_blockdev(bdev);
err = mddev_set_closing_and_sync_blockdev(mddev, 1);
if (err)
return err;
}
if (!md_is_rdwr(mddev))
......@@ -7727,11 +7725,12 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
goto unlock;
case STOP_ARRAY:
err = do_md_stop(mddev, 0, bdev);
err = do_md_stop(mddev, 0);
goto unlock;
case STOP_ARRAY_RO:
err = md_set_readonly(mddev, bdev);
if (mddev->pers)
err = md_set_readonly(mddev);
goto unlock;
case HOT_REMOVE_DISK:
......@@ -7826,7 +7825,7 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
mddev_unlock(mddev);
out:
if(did_set_md_closing)
if (cmd == STOP_ARRAY_RO || (err && cmd == STOP_ARRAY))
clear_bit(MD_CLOSING, &mddev->flags);
return err;
}
......
......@@ -207,6 +207,7 @@ enum flag_bits {
* check if there is collision between raid1
* serial bios.
*/
Nonrot, /* non-rotational device (SSD) */
};
static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
......@@ -222,6 +223,16 @@ static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
}
return 0;
}
static inline int rdev_has_badblock(struct md_rdev *rdev, sector_t s,
int sectors)
{
sector_t first_bad;
int bad_sectors;
return is_badblock(rdev, s, sectors, &first_bad, &bad_sectors);
}
extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
int is_new);
extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
......
......@@ -227,3 +227,72 @@ static inline bool exceed_read_errors(struct mddev *mddev, struct md_rdev *rdev)
return false;
}
/**
* raid1_check_read_range() - check a given read range for bad blocks,
* available read length is returned;
* @rdev: the rdev to read;
* @this_sector: read position;
* @len: read length;
*
* helper function for read_balance()
*
* 1) If there are no bad blocks in the range, @len is returned;
* 2) If the range are all bad blocks, 0 is returned;
* 3) If there are partial bad blocks:
* - If the bad block range starts after @this_sector, the length of first
* good region is returned;
* - If the bad block range starts before @this_sector, 0 is returned and
* the @len is updated to the offset into the region before we get to the
* good blocks;
*/
static inline int raid1_check_read_range(struct md_rdev *rdev,
sector_t this_sector, int *len)
{
sector_t first_bad;
int bad_sectors;
/* no bad block overlap */
if (!is_badblock(rdev, this_sector, *len, &first_bad, &bad_sectors))
return *len;
/*
* bad block range starts offset into our range so we can return the
* number of sectors before the bad blocks start.
*/
if (first_bad > this_sector)
return first_bad - this_sector;
/* read range is fully consumed by bad blocks. */
if (this_sector + *len <= first_bad + bad_sectors)
return 0;
/*
* final case, bad block range starts before or at the start of our
* range but does not cover our entire range so we still return 0 but
* update the length with the number of sectors before we get to the
* good ones.
*/
*len = first_bad + bad_sectors - this_sector;
return 0;
}
/*
* Check if read should choose the first rdev.
*
* Balance on the whole device if no resync is going on (recovery is ok) or
* below the resync window. Otherwise, take the first readable disk.
*/
static inline bool raid1_should_read_first(struct mddev *mddev,
sector_t this_sector, int len)
{
if ((mddev->recovery_cp < this_sector + len))
return true;
if (mddev_is_clustered(mddev) &&
md_cluster_ops->area_resyncing(mddev, READ, this_sector,
this_sector + len))
return true;
return false;
}
This diff is collapsed.
......@@ -71,6 +71,7 @@ struct r1conf {
* allow for replacements.
*/
int raid_disks;
int nonrot_disks;
spinlock_t device_lock;
......
......@@ -518,11 +518,7 @@ static void raid10_end_write_request(struct bio *bio)
* The 'master' represents the composite IO operation to
* user-side. So if something waits for IO, then it will
* wait for the 'master' bio.
*/
sector_t first_bad;
int bad_sectors;
/*
*
* Do not set R10BIO_Uptodate if the current device is
* rebuilding or Faulty. This is because we cannot use
* such device for properly reading the data back (we could
......@@ -535,10 +531,9 @@ static void raid10_end_write_request(struct bio *bio)
set_bit(R10BIO_Uptodate, &r10_bio->state);
/* Maybe we can clear some bad blocks. */
if (is_badblock(rdev,
r10_bio->devs[slot].addr,
r10_bio->sectors,
&first_bad, &bad_sectors) && !discard_error) {
if (rdev_has_badblock(rdev, r10_bio->devs[slot].addr,
r10_bio->sectors) &&
!discard_error) {
bio_put(bio);
if (repl)
r10_bio->devs[slot].repl_bio = IO_MADE_GOOD;
......@@ -753,17 +748,8 @@ static struct md_rdev *read_balance(struct r10conf *conf,
best_good_sectors = 0;
do_balance = 1;
clear_bit(R10BIO_FailFast, &r10_bio->state);
/*
* Check if we can balance. We can balance on the whole
* device if no resync is going on (recovery is ok), or below
* the resync window. We take the first readable disk when
* above the resync window.
*/
if ((conf->mddev->recovery_cp < MaxSector
&& (this_sector + sectors >= conf->next_resync)) ||
(mddev_is_clustered(conf->mddev) &&
md_cluster_ops->area_resyncing(conf->mddev, READ, this_sector,
this_sector + sectors)))
if (raid1_should_read_first(conf->mddev, this_sector, sectors))
do_balance = 0;
for (slot = 0; slot < conf->copies ; slot++) {
......@@ -1330,10 +1316,7 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
}
if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
sector_t first_bad;
sector_t dev_sector = r10_bio->devs[i].addr;
int bad_sectors;
int is_bad;
/*
* Discard request doesn't care the write result
......@@ -1342,9 +1325,8 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
if (!r10_bio->sectors)
continue;
is_bad = is_badblock(rdev, dev_sector, r10_bio->sectors,
&first_bad, &bad_sectors);
if (is_bad < 0) {
if (rdev_has_badblock(rdev, dev_sector,
r10_bio->sectors) < 0) {
/*
* Mustn't write here until the bad block
* is acknowledged
......@@ -2290,8 +2272,6 @@ static void end_sync_write(struct bio *bio)
struct mddev *mddev = r10_bio->mddev;
struct r10conf *conf = mddev->private;
int d;
sector_t first_bad;
int bad_sectors;
int slot;
int repl;
struct md_rdev *rdev = NULL;
......@@ -2312,11 +2292,10 @@ static void end_sync_write(struct bio *bio)
&rdev->mddev->recovery);
set_bit(R10BIO_WriteError, &r10_bio->state);
}
} else if (is_badblock(rdev,
r10_bio->devs[slot].addr,
r10_bio->sectors,
&first_bad, &bad_sectors))
} else if (rdev_has_badblock(rdev, r10_bio->devs[slot].addr,
r10_bio->sectors)) {
set_bit(R10BIO_MadeGood, &r10_bio->state);
}
rdev_dec_pending(rdev, mddev);
......@@ -2597,11 +2576,8 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
int sectors, struct page *page, enum req_op op)
{
sector_t first_bad;
int bad_sectors;
if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors)
&& (op == REQ_OP_READ || test_bit(WriteErrorSeen, &rdev->flags)))
if (rdev_has_badblock(rdev, sector, sectors) &&
(op == REQ_OP_READ || test_bit(WriteErrorSeen, &rdev->flags)))
return -1;
if (sync_page_io(rdev, sector, sectors << 9, page, op, false))
/* success */
......@@ -2658,16 +2634,14 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
s = PAGE_SIZE >> 9;
do {
sector_t first_bad;
int bad_sectors;
d = r10_bio->devs[sl].devnum;
rdev = conf->mirrors[d].rdev;
if (rdev &&
test_bit(In_sync, &rdev->flags) &&
!test_bit(Faulty, &rdev->flags) &&
is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
&first_bad, &bad_sectors) == 0) {
rdev_has_badblock(rdev,
r10_bio->devs[sl].addr + sect,
s) == 0) {
atomic_inc(&rdev->nr_pending);
success = sync_page_io(rdev,
r10_bio->devs[sl].addr +
......
......@@ -1210,10 +1210,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
*/
while (op_is_write(op) && rdev &&
test_bit(WriteErrorSeen, &rdev->flags)) {
sector_t first_bad;
int bad_sectors;
int bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
&first_bad, &bad_sectors);
int bad = rdev_has_badblock(rdev, sh->sector,
RAID5_STRIPE_SECTORS(conf));
if (!bad)
break;
......@@ -2412,7 +2410,7 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
atomic_inc(&conf->active_stripes);
raid5_release_stripe(sh);
conf->max_nr_stripes++;
WRITE_ONCE(conf->max_nr_stripes, conf->max_nr_stripes + 1);
return 1;
}
......@@ -2707,7 +2705,7 @@ static int drop_one_stripe(struct r5conf *conf)
shrink_buffers(sh);
free_stripe(conf->slab_cache, sh);
atomic_dec(&conf->active_stripes);
conf->max_nr_stripes--;
WRITE_ONCE(conf->max_nr_stripes, conf->max_nr_stripes - 1);
return 1;
}
......@@ -2855,8 +2853,6 @@ static void raid5_end_write_request(struct bio *bi)
struct r5conf *conf = sh->raid_conf;
int disks = sh->disks, i;
struct md_rdev *rdev;
sector_t first_bad;
int bad_sectors;
int replacement = 0;
for (i = 0 ; i < disks; i++) {
......@@ -2888,9 +2884,8 @@ static void raid5_end_write_request(struct bio *bi)
if (replacement) {
if (bi->bi_status)
md_error(conf->mddev, rdev);
else if (is_badblock(rdev, sh->sector,
RAID5_STRIPE_SECTORS(conf),
&first_bad, &bad_sectors))
else if (rdev_has_badblock(rdev, sh->sector,
RAID5_STRIPE_SECTORS(conf)))
set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
} else {
if (bi->bi_status) {
......@@ -2900,9 +2895,8 @@ static void raid5_end_write_request(struct bio *bi)
if (!test_and_set_bit(WantReplacement, &rdev->flags))
set_bit(MD_RECOVERY_NEEDED,
&rdev->mddev->recovery);
} else if (is_badblock(rdev, sh->sector,
RAID5_STRIPE_SECTORS(conf),
&first_bad, &bad_sectors)) {
} else if (rdev_has_badblock(rdev, sh->sector,
RAID5_STRIPE_SECTORS(conf))) {
set_bit(R5_MadeGood, &sh->dev[i].flags);
if (test_bit(R5_ReadError, &sh->dev[i].flags))
/* That was a successful write so make
......@@ -4674,8 +4668,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
/* Now to look around and see what can be done */
for (i=disks; i--; ) {
struct md_rdev *rdev;
sector_t first_bad;
int bad_sectors;
int is_bad = 0;
dev = &sh->dev[i];
......@@ -4719,8 +4711,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
rdev = conf->disks[i].replacement;
if (rdev && !test_bit(Faulty, &rdev->flags) &&
rdev->recovery_offset >= sh->sector + RAID5_STRIPE_SECTORS(conf) &&
!is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
&first_bad, &bad_sectors))
!rdev_has_badblock(rdev, sh->sector,
RAID5_STRIPE_SECTORS(conf)))
set_bit(R5_ReadRepl, &dev->flags);
else {
if (rdev && !test_bit(Faulty, &rdev->flags))
......@@ -4733,8 +4725,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
if (rdev && test_bit(Faulty, &rdev->flags))
rdev = NULL;
if (rdev) {
is_bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
&first_bad, &bad_sectors);
is_bad = rdev_has_badblock(rdev, sh->sector,
RAID5_STRIPE_SECTORS(conf));
if (s->blocked_rdev == NULL
&& (test_bit(Blocked, &rdev->flags)
|| is_bad < 0)) {
......@@ -5463,8 +5455,8 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
struct r5conf *conf = mddev->private;
struct bio *align_bio;
struct md_rdev *rdev;
sector_t sector, end_sector, first_bad;
int bad_sectors, dd_idx;
sector_t sector, end_sector;
int dd_idx;
bool did_inc;
if (!in_chunk_boundary(mddev, raid_bio)) {
......@@ -5493,8 +5485,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
atomic_inc(&rdev->nr_pending);
if (is_badblock(rdev, sector, bio_sectors(raid_bio), &first_bad,
&bad_sectors)) {
if (rdev_has_badblock(rdev, sector, bio_sectors(raid_bio))) {
rdev_dec_pending(rdev, mddev);
return 0;
}
......@@ -6820,7 +6811,7 @@ raid5_set_cache_size(struct mddev *mddev, int size)
if (size <= 16 || size > 32768)
return -EINVAL;
conf->min_nr_stripes = size;
WRITE_ONCE(conf->min_nr_stripes, size);
mutex_lock(&conf->cache_size_mutex);
while (size < conf->max_nr_stripes &&
drop_one_stripe(conf))
......@@ -6832,7 +6823,7 @@ raid5_set_cache_size(struct mddev *mddev, int size)
mutex_lock(&conf->cache_size_mutex);
while (size > conf->max_nr_stripes)
if (!grow_one_stripe(conf, GFP_KERNEL)) {
conf->min_nr_stripes = conf->max_nr_stripes;
WRITE_ONCE(conf->min_nr_stripes, conf->max_nr_stripes);
result = -ENOMEM;
break;
}
......@@ -7388,11 +7379,13 @@ static unsigned long raid5_cache_count(struct shrinker *shrink,
struct shrink_control *sc)
{
struct r5conf *conf = shrink->private_data;
int max_stripes = READ_ONCE(conf->max_nr_stripes);
int min_stripes = READ_ONCE(conf->min_nr_stripes);
if (conf->max_nr_stripes < conf->min_nr_stripes)
if (max_stripes < min_stripes)
/* unlikely, but not impossible */
return 0;
return conf->max_nr_stripes - conf->min_nr_stripes;
return max_stripes - min_stripes;
}
static struct r5conf *setup_conf(struct mddev *mddev)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment