Commit d276bb29 authored by Jens Axboe's avatar Jens Axboe

Merge tag 'md-next-20230729' of...

Merge tag 'md-next-20230729' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.6/block

Pull MD updates from Song:

"1. Deprecate bitmap file support, by Christoph Hellwig;
 2. Fix deadlock with md sync thread, by Yu Kuai;
 3. Refactor md io accounting, by Yu Kuai;
 4. Various non-urgent fixes by Li Nan, Yu Kuai, and Jack Wang."

* tag 'md-next-20230729' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md: (36 commits)
  md/md-bitmap: hold 'reconfig_mutex' in backlog_store()
  md/md-bitmap: remove unnecessary local variable in backlog_store()
  md/raid10: use dereference_rdev_and_rrdev() to get devices
  md/raid10: factor out dereference_rdev_and_rrdev()
  md/raid10: check replacement and rdev to prevent submit the same io twice
  md/raid1: Avoid lock contention from wake_up()
  md: restore 'noio_flag' for the last mddev_resume()
  md: don't quiesce in mddev_suspend()
  md: remove redundant check in fix_read_error()
  md/raid10: optimize fix_read_error
  md/raid1: prioritize adding disk to 'removed' mirror
  md/md-faulty: enable io accounting
  md/md-linear: enable io accounting
  md/md-multipath: enable io accounting
  md/raid10: switch to use md_account_bio() for io accounting
  md/raid1: switch to use md_account_bio() for io accounting
  raid5: fix missing io accounting in raid5_align_endio()
  md: also clone new io if io accounting is disabled
  md: move initialization and destruction of 'io_acct_set' to md.c
  md: deprecate bitmap file support
  ...
parents 51d74ec9 44abfa6a
...@@ -50,6 +50,16 @@ config MD_AUTODETECT ...@@ -50,6 +50,16 @@ config MD_AUTODETECT
If unsure, say Y. If unsure, say Y.
config MD_BITMAP_FILE
bool "MD bitmap file support (deprecated)"
default y
help
If you say Y here, support for write intent bitmaps in files on an
external file system is enabled. This is an alternative to the internal
bitmaps near the MD superblock, and very problematic code that abuses
various kernel APIs and can only work with files on a file system not
actually sitting on the MD device.
config MD_LINEAR config MD_LINEAR
tristate "Linear (append) mode (deprecated)" tristate "Linear (append) mode (deprecated)"
depends on BLK_DEV_MD depends on BLK_DEV_MD
......
...@@ -3725,7 +3725,6 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv, ...@@ -3725,7 +3725,6 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) { if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
if (mddev->sync_thread) { if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_unregister_thread(&mddev->sync_thread);
md_reap_sync_thread(mddev); md_reap_sync_thread(mddev);
} }
} else if (decipher_sync_action(mddev, mddev->recovery) != st_idle) } else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
......
This diff is collapsed.
...@@ -201,6 +201,7 @@ struct bitmap { ...@@ -201,6 +201,7 @@ struct bitmap {
struct file *file; /* backing disk file */ struct file *file; /* backing disk file */
struct page *sb_page; /* cached copy of the bitmap struct page *sb_page; /* cached copy of the bitmap
* file superblock */ * file superblock */
unsigned long sb_index;
struct page **filemap; /* list of cache pages for struct page **filemap; /* list of cache pages for
* the file */ * the file */
unsigned long *filemap_attr; /* attributes associated unsigned long *filemap_attr; /* attributes associated
......
...@@ -204,6 +204,8 @@ static bool faulty_make_request(struct mddev *mddev, struct bio *bio) ...@@ -204,6 +204,8 @@ static bool faulty_make_request(struct mddev *mddev, struct bio *bio)
failit = 1; failit = 1;
} }
} }
md_account_bio(mddev, &bio);
if (failit) { if (failit) {
struct bio *b = bio_alloc_clone(conf->rdev->bdev, bio, GFP_NOIO, struct bio *b = bio_alloc_clone(conf->rdev->bdev, bio, GFP_NOIO,
&mddev->bio_set); &mddev->bio_set);
......
...@@ -238,6 +238,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio) ...@@ -238,6 +238,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
bio = split; bio = split;
} }
md_account_bio(mddev, &bio);
bio_set_dev(bio, tmp_dev->rdev->bdev); bio_set_dev(bio, tmp_dev->rdev->bdev);
bio->bi_iter.bi_sector = bio->bi_iter.bi_sector - bio->bi_iter.bi_sector = bio->bi_iter.bi_sector -
start_sector + data_offset; start_sector + data_offset;
......
...@@ -107,6 +107,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio) ...@@ -107,6 +107,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
&& md_flush_request(mddev, bio)) && md_flush_request(mddev, bio))
return true; return true;
md_account_bio(mddev, &bio);
mp_bh = mempool_alloc(&conf->pool, GFP_NOIO); mp_bh = mempool_alloc(&conf->pool, GFP_NOIO);
mp_bh->master_bio = bio; mp_bh->master_bio = bio;
......
This diff is collapsed.
...@@ -510,7 +510,7 @@ struct mddev { ...@@ -510,7 +510,7 @@ struct mddev {
struct bio_set sync_set; /* for sync operations like struct bio_set sync_set; /* for sync operations like
* metadata and bitmap writes * metadata and bitmap writes
*/ */
struct bio_set io_acct_set; /* for raid0 and raid5 io accounting */ struct bio_set io_clone_set;
/* Generic flush handling. /* Generic flush handling.
* The last to finish preflush schedules a worker to submit * The last to finish preflush schedules a worker to submit
...@@ -535,6 +535,11 @@ struct mddev { ...@@ -535,6 +535,11 @@ struct mddev {
*/ */
struct list_head deleting; struct list_head deleting;
/* Used to synchronize idle and frozen for action_store() */
struct mutex sync_mutex;
/* The sequence number for sync thread */
atomic_t sync_seq;
bool has_superblocks:1; bool has_superblocks:1;
bool fail_last_dev:1; bool fail_last_dev:1;
bool serialize_policy:1; bool serialize_policy:1;
...@@ -731,7 +736,7 @@ struct md_thread { ...@@ -731,7 +736,7 @@ struct md_thread {
void *private; void *private;
}; };
struct md_io_acct { struct md_io_clone {
struct mddev *mddev; struct mddev *mddev;
struct bio *orig_bio; struct bio *orig_bio;
unsigned long start_time; unsigned long start_time;
...@@ -769,8 +774,6 @@ extern void md_error(struct mddev *mddev, struct md_rdev *rdev); ...@@ -769,8 +774,6 @@ extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
extern void md_finish_reshape(struct mddev *mddev); extern void md_finish_reshape(struct mddev *mddev);
void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
struct bio *bio, sector_t start, sector_t size); struct bio *bio, sector_t start, sector_t size);
int acct_bioset_init(struct mddev *mddev);
void acct_bioset_exit(struct mddev *mddev);
void md_account_bio(struct mddev *mddev, struct bio **bio); void md_account_bio(struct mddev *mddev, struct bio **bio);
extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio); extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
......
...@@ -377,7 +377,6 @@ static void raid0_free(struct mddev *mddev, void *priv) ...@@ -377,7 +377,6 @@ static void raid0_free(struct mddev *mddev, void *priv)
struct r0conf *conf = priv; struct r0conf *conf = priv;
free_conf(mddev, conf); free_conf(mddev, conf);
acct_bioset_exit(mddev);
} }
static int raid0_run(struct mddev *mddev) static int raid0_run(struct mddev *mddev)
...@@ -392,16 +391,11 @@ static int raid0_run(struct mddev *mddev) ...@@ -392,16 +391,11 @@ static int raid0_run(struct mddev *mddev)
if (md_check_no_bitmap(mddev)) if (md_check_no_bitmap(mddev))
return -EINVAL; return -EINVAL;
if (acct_bioset_init(mddev)) {
pr_err("md/raid0:%s: alloc acct bioset failed.\n", mdname(mddev));
return -ENOMEM;
}
/* if private is not null, we are here after takeover */ /* if private is not null, we are here after takeover */
if (mddev->private == NULL) { if (mddev->private == NULL) {
ret = create_strip_zones(mddev, &conf); ret = create_strip_zones(mddev, &conf);
if (ret < 0) if (ret < 0)
goto exit_acct_set; return ret;
mddev->private = conf; mddev->private = conf;
} }
conf = mddev->private; conf = mddev->private;
...@@ -432,14 +426,8 @@ static int raid0_run(struct mddev *mddev) ...@@ -432,14 +426,8 @@ static int raid0_run(struct mddev *mddev)
ret = md_integrity_register(mddev); ret = md_integrity_register(mddev);
if (ret) if (ret)
goto free;
return ret;
free:
free_conf(mddev, conf); free_conf(mddev, conf);
exit_acct_set:
acct_bioset_exit(mddev);
return ret; return ret;
} }
......
...@@ -304,8 +304,6 @@ static void call_bio_endio(struct r1bio *r1_bio) ...@@ -304,8 +304,6 @@ static void call_bio_endio(struct r1bio *r1_bio)
if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
bio->bi_status = BLK_STS_IOERR; bio->bi_status = BLK_STS_IOERR;
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
bio_end_io_acct(bio, r1_bio->start_time);
bio_endio(bio); bio_endio(bio);
} }
...@@ -791,11 +789,17 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect ...@@ -791,11 +789,17 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
return best_disk; return best_disk;
} }
static void wake_up_barrier(struct r1conf *conf)
{
if (wq_has_sleeper(&conf->wait_barrier))
wake_up(&conf->wait_barrier);
}
static void flush_bio_list(struct r1conf *conf, struct bio *bio) static void flush_bio_list(struct r1conf *conf, struct bio *bio)
{ {
/* flush any pending bitmap writes to disk before proceeding w/ I/O */ /* flush any pending bitmap writes to disk before proceeding w/ I/O */
raid1_prepare_flush_writes(conf->mddev->bitmap); raid1_prepare_flush_writes(conf->mddev->bitmap);
wake_up(&conf->wait_barrier); wake_up_barrier(conf);
while (bio) { /* submit pending writes */ while (bio) { /* submit pending writes */
struct bio *next = bio->bi_next; struct bio *next = bio->bi_next;
...@@ -972,7 +976,7 @@ static bool _wait_barrier(struct r1conf *conf, int idx, bool nowait) ...@@ -972,7 +976,7 @@ static bool _wait_barrier(struct r1conf *conf, int idx, bool nowait)
* In case freeze_array() is waiting for * In case freeze_array() is waiting for
* get_unqueued_pending() == extra * get_unqueued_pending() == extra
*/ */
wake_up(&conf->wait_barrier); wake_up_barrier(conf);
/* Wait for the barrier in same barrier unit bucket to drop. */ /* Wait for the barrier in same barrier unit bucket to drop. */
/* Return false when nowait flag is set */ /* Return false when nowait flag is set */
...@@ -1015,7 +1019,7 @@ static bool wait_read_barrier(struct r1conf *conf, sector_t sector_nr, bool nowa ...@@ -1015,7 +1019,7 @@ static bool wait_read_barrier(struct r1conf *conf, sector_t sector_nr, bool nowa
* In case freeze_array() is waiting for * In case freeze_array() is waiting for
* get_unqueued_pending() == extra * get_unqueued_pending() == extra
*/ */
wake_up(&conf->wait_barrier); wake_up_barrier(conf);
/* Wait for array to be unfrozen */ /* Wait for array to be unfrozen */
/* Return false when nowait flag is set */ /* Return false when nowait flag is set */
...@@ -1044,7 +1048,7 @@ static bool wait_barrier(struct r1conf *conf, sector_t sector_nr, bool nowait) ...@@ -1044,7 +1048,7 @@ static bool wait_barrier(struct r1conf *conf, sector_t sector_nr, bool nowait)
static void _allow_barrier(struct r1conf *conf, int idx) static void _allow_barrier(struct r1conf *conf, int idx)
{ {
atomic_dec(&conf->nr_pending[idx]); atomic_dec(&conf->nr_pending[idx]);
wake_up(&conf->wait_barrier); wake_up_barrier(conf);
} }
static void allow_barrier(struct r1conf *conf, sector_t sector_nr) static void allow_barrier(struct r1conf *conf, sector_t sector_nr)
...@@ -1173,7 +1177,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) ...@@ -1173,7 +1177,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
bio_list_merge(&conf->pending_bio_list, &plug->pending); bio_list_merge(&conf->pending_bio_list, &plug->pending);
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_barrier); wake_up_barrier(conf);
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
kfree(plug); kfree(plug);
return; return;
...@@ -1303,10 +1307,10 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, ...@@ -1303,10 +1307,10 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
} }
r1_bio->read_disk = rdisk; r1_bio->read_disk = rdisk;
if (!r1bio_existed) {
if (!r1bio_existed && blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) md_account_bio(mddev, &bio);
r1_bio->start_time = bio_start_io_acct(bio); r1_bio->master_bio = bio;
}
read_bio = bio_alloc_clone(mirror->rdev->bdev, bio, gfp, read_bio = bio_alloc_clone(mirror->rdev->bdev, bio, gfp,
&mddev->bio_set); &mddev->bio_set);
...@@ -1500,8 +1504,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, ...@@ -1500,8 +1504,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
r1_bio->sectors = max_sectors; r1_bio->sectors = max_sectors;
} }
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) md_account_bio(mddev, &bio);
r1_bio->start_time = bio_start_io_acct(bio); r1_bio->master_bio = bio;
atomic_set(&r1_bio->remaining, 1); atomic_set(&r1_bio->remaining, 1);
atomic_set(&r1_bio->behind_remaining, 0); atomic_set(&r1_bio->behind_remaining, 0);
...@@ -1576,7 +1580,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, ...@@ -1576,7 +1580,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
r1_bio_write_done(r1_bio); r1_bio_write_done(r1_bio);
/* In case raid1d snuck in to freeze_array */ /* In case raid1d snuck in to freeze_array */
wake_up(&conf->wait_barrier); wake_up_barrier(conf);
} }
static bool raid1_make_request(struct mddev *mddev, struct bio *bio) static bool raid1_make_request(struct mddev *mddev, struct bio *bio)
...@@ -1766,7 +1770,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1766,7 +1770,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
{ {
struct r1conf *conf = mddev->private; struct r1conf *conf = mddev->private;
int err = -EEXIST; int err = -EEXIST;
int mirror = 0; int mirror = 0, repl_slot = -1;
struct raid1_info *p; struct raid1_info *p;
int first = 0; int first = 0;
int last = conf->raid_disks - 1; int last = conf->raid_disks - 1;
...@@ -1809,17 +1813,21 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1809,17 +1813,21 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
break; break;
} }
if (test_bit(WantReplacement, &p->rdev->flags) && if (test_bit(WantReplacement, &p->rdev->flags) &&
p[conf->raid_disks].rdev == NULL) { p[conf->raid_disks].rdev == NULL && repl_slot < 0)
repl_slot = mirror;
}
if (err && repl_slot >= 0) {
/* Add this device as a replacement */ /* Add this device as a replacement */
p = conf->mirrors + repl_slot;
clear_bit(In_sync, &rdev->flags); clear_bit(In_sync, &rdev->flags);
set_bit(Replacement, &rdev->flags); set_bit(Replacement, &rdev->flags);
rdev->raid_disk = mirror; rdev->raid_disk = repl_slot;
err = 0; err = 0;
conf->fullsync = 1; conf->fullsync = 1;
rcu_assign_pointer(p[conf->raid_disks].rdev, rdev); rcu_assign_pointer(p[conf->raid_disks].rdev, rdev);
break;
}
} }
print_conf(conf); print_conf(conf);
return err; return err;
} }
...@@ -2299,7 +2307,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk, ...@@ -2299,7 +2307,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
d++; d++;
if (d == conf->raid_disks * 2) if (d == conf->raid_disks * 2)
d = 0; d = 0;
} while (!success && d != read_disk); } while (d != read_disk);
if (!success) { if (!success) {
/* Cannot read from anywhere - mark it bad */ /* Cannot read from anywhere - mark it bad */
......
...@@ -157,7 +157,6 @@ struct r1bio { ...@@ -157,7 +157,6 @@ struct r1bio {
sector_t sector; sector_t sector;
int sectors; int sectors;
unsigned long state; unsigned long state;
unsigned long start_time;
struct mddev *mddev; struct mddev *mddev;
/* /*
* original bio going to /dev/mdx * original bio going to /dev/mdx
......
...@@ -325,8 +325,6 @@ static void raid_end_bio_io(struct r10bio *r10_bio) ...@@ -325,8 +325,6 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
bio->bi_status = BLK_STS_IOERR; bio->bi_status = BLK_STS_IOERR;
if (r10_bio->start_time)
bio_end_io_acct(bio, r10_bio->start_time);
bio_endio(bio); bio_endio(bio);
/* /*
* Wake up any possible resync thread that waits for the device * Wake up any possible resync thread that waits for the device
...@@ -1172,7 +1170,7 @@ static bool regular_request_wait(struct mddev *mddev, struct r10conf *conf, ...@@ -1172,7 +1170,7 @@ static bool regular_request_wait(struct mddev *mddev, struct r10conf *conf,
} }
static void raid10_read_request(struct mddev *mddev, struct bio *bio, static void raid10_read_request(struct mddev *mddev, struct bio *bio,
struct r10bio *r10_bio) struct r10bio *r10_bio, bool io_accounting)
{ {
struct r10conf *conf = mddev->private; struct r10conf *conf = mddev->private;
struct bio *read_bio; struct bio *read_bio;
...@@ -1243,9 +1241,10 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, ...@@ -1243,9 +1241,10 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
} }
slot = r10_bio->read_slot; slot = r10_bio->read_slot;
if (!r10_bio->start_time && if (io_accounting) {
blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) md_account_bio(mddev, &bio);
r10_bio->start_time = bio_start_io_acct(bio); r10_bio->master_bio = bio;
}
read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set); read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set);
r10_bio->devs[slot].bio = read_bio; r10_bio->devs[slot].bio = read_bio;
...@@ -1322,6 +1321,25 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, ...@@ -1322,6 +1321,25 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
} }
} }
static struct md_rdev *dereference_rdev_and_rrdev(struct raid10_info *mirror,
struct md_rdev **prrdev)
{
struct md_rdev *rdev, *rrdev;
rrdev = rcu_dereference(mirror->replacement);
/*
* Read replacement first to prevent reading both rdev and
* replacement as NULL during replacement replace rdev.
*/
smp_mb();
rdev = rcu_dereference(mirror->rdev);
if (rdev == rrdev)
rrdev = NULL;
*prrdev = rrdev;
return rdev;
}
static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio) static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
{ {
int i; int i;
...@@ -1332,11 +1350,9 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio) ...@@ -1332,11 +1350,9 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
blocked_rdev = NULL; blocked_rdev = NULL;
rcu_read_lock(); rcu_read_lock();
for (i = 0; i < conf->copies; i++) { for (i = 0; i < conf->copies; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); struct md_rdev *rdev, *rrdev;
struct md_rdev *rrdev = rcu_dereference(
conf->mirrors[i].replacement); rdev = dereference_rdev_and_rrdev(&conf->mirrors[i], &rrdev);
if (rdev == rrdev)
rrdev = NULL;
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
blocked_rdev = rdev; blocked_rdev = rdev;
...@@ -1465,15 +1481,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, ...@@ -1465,15 +1481,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
int d = r10_bio->devs[i].devnum; int d = r10_bio->devs[i].devnum;
struct md_rdev *rdev, *rrdev; struct md_rdev *rdev, *rrdev;
rrdev = rcu_dereference(conf->mirrors[d].replacement); rdev = dereference_rdev_and_rrdev(&conf->mirrors[d], &rrdev);
/*
* Read replacement first to prevent reading both rdev and
* replacement as NULL during replacement replace rdev.
*/
smp_mb();
rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev == rrdev)
rrdev = NULL;
if (rdev && (test_bit(Faulty, &rdev->flags))) if (rdev && (test_bit(Faulty, &rdev->flags)))
rdev = NULL; rdev = NULL;
if (rrdev && (test_bit(Faulty, &rrdev->flags))) if (rrdev && (test_bit(Faulty, &rrdev->flags)))
...@@ -1543,8 +1551,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, ...@@ -1543,8 +1551,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
r10_bio->master_bio = bio; r10_bio->master_bio = bio;
} }
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) md_account_bio(mddev, &bio);
r10_bio->start_time = bio_start_io_acct(bio); r10_bio->master_bio = bio;
atomic_set(&r10_bio->remaining, 1); atomic_set(&r10_bio->remaining, 1);
md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0); md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
...@@ -1571,12 +1579,11 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors) ...@@ -1571,12 +1579,11 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
r10_bio->sector = bio->bi_iter.bi_sector; r10_bio->sector = bio->bi_iter.bi_sector;
r10_bio->state = 0; r10_bio->state = 0;
r10_bio->read_slot = -1; r10_bio->read_slot = -1;
r10_bio->start_time = 0;
memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) *
conf->geo.raid_disks); conf->geo.raid_disks);
if (bio_data_dir(bio) == READ) if (bio_data_dir(bio) == READ)
raid10_read_request(mddev, bio, r10_bio); raid10_read_request(mddev, bio, r10_bio, true);
else else
raid10_write_request(mddev, bio, r10_bio); raid10_write_request(mddev, bio, r10_bio);
} }
...@@ -1780,10 +1787,9 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio) ...@@ -1780,10 +1787,9 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
*/ */
rcu_read_lock(); rcu_read_lock();
for (disk = 0; disk < geo->raid_disks; disk++) { for (disk = 0; disk < geo->raid_disks; disk++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev); struct md_rdev *rdev, *rrdev;
struct md_rdev *rrdev = rcu_dereference(
conf->mirrors[disk].replacement);
rdev = dereference_rdev_and_rrdev(&conf->mirrors[disk], &rrdev);
r10_bio->devs[disk].bio = NULL; r10_bio->devs[disk].bio = NULL;
r10_bio->devs[disk].repl_bio = NULL; r10_bio->devs[disk].repl_bio = NULL;
...@@ -2720,10 +2726,10 @@ static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector, ...@@ -2720,10 +2726,10 @@ static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10bio *r10_bio) static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10bio *r10_bio)
{ {
int sect = 0; /* Offset from r10_bio->sector */ int sect = 0; /* Offset from r10_bio->sector */
int sectors = r10_bio->sectors; int sectors = r10_bio->sectors, slot = r10_bio->read_slot;
struct md_rdev *rdev; struct md_rdev *rdev;
int max_read_errors = atomic_read(&mddev->max_corr_read_errors); int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
int d = r10_bio->devs[r10_bio->read_slot].devnum; int d = r10_bio->devs[slot].devnum;
/* still own a reference to this rdev, so it cannot /* still own a reference to this rdev, so it cannot
* have been cleared recently. * have been cleared recently.
...@@ -2744,13 +2750,13 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2744,13 +2750,13 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
pr_notice("md/raid10:%s: %pg: Failing raid device\n", pr_notice("md/raid10:%s: %pg: Failing raid device\n",
mdname(mddev), rdev->bdev); mdname(mddev), rdev->bdev);
md_error(mddev, rdev); md_error(mddev, rdev);
r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED; r10_bio->devs[slot].bio = IO_BLOCKED;
return; return;
} }
while(sectors) { while(sectors) {
int s = sectors; int s = sectors;
int sl = r10_bio->read_slot; int sl = slot;
int success = 0; int success = 0;
int start; int start;
...@@ -2785,7 +2791,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2785,7 +2791,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
sl++; sl++;
if (sl == conf->copies) if (sl == conf->copies)
sl = 0; sl = 0;
} while (!success && sl != r10_bio->read_slot); } while (sl != slot);
rcu_read_unlock(); rcu_read_unlock();
if (!success) { if (!success) {
...@@ -2793,16 +2799,16 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2793,16 +2799,16 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
* as bad on the first device to discourage future * as bad on the first device to discourage future
* reads. * reads.
*/ */
int dn = r10_bio->devs[r10_bio->read_slot].devnum; int dn = r10_bio->devs[slot].devnum;
rdev = conf->mirrors[dn].rdev; rdev = conf->mirrors[dn].rdev;
if (!rdev_set_badblocks( if (!rdev_set_badblocks(
rdev, rdev,
r10_bio->devs[r10_bio->read_slot].addr r10_bio->devs[slot].addr
+ sect, + sect,
s, 0)) { s, 0)) {
md_error(mddev, rdev); md_error(mddev, rdev);
r10_bio->devs[r10_bio->read_slot].bio r10_bio->devs[slot].bio
= IO_BLOCKED; = IO_BLOCKED;
} }
break; break;
...@@ -2811,7 +2817,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2811,7 +2817,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
start = sl; start = sl;
/* write it back and re-read */ /* write it back and re-read */
rcu_read_lock(); rcu_read_lock();
while (sl != r10_bio->read_slot) { while (sl != slot) {
if (sl==0) if (sl==0)
sl = conf->copies; sl = conf->copies;
sl--; sl--;
...@@ -2845,7 +2851,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2845,7 +2851,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
rcu_read_lock(); rcu_read_lock();
} }
sl = start; sl = start;
while (sl != r10_bio->read_slot) { while (sl != slot) {
if (sl==0) if (sl==0)
sl = conf->copies; sl = conf->copies;
sl--; sl--;
...@@ -2985,7 +2991,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) ...@@ -2985,7 +2991,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
r10_bio->state = 0; r10_bio->state = 0;
raid10_read_request(mddev, r10_bio->master_bio, r10_bio); raid10_read_request(mddev, r10_bio->master_bio, r10_bio, false);
/* /*
* allow_barrier after re-submit to ensure no sync io * allow_barrier after re-submit to ensure no sync io
* can be issued while regular io pending. * can be issued while regular io pending.
......
...@@ -123,7 +123,6 @@ struct r10bio { ...@@ -123,7 +123,6 @@ struct r10bio {
sector_t sector; /* virtual sector number */ sector_t sector; /* virtual sector number */
int sectors; int sectors;
unsigned long state; unsigned long state;
unsigned long start_time;
struct mddev *mddev; struct mddev *mddev;
/* /*
* original bio going to /dev/mdx * original bio going to /dev/mdx
......
...@@ -5468,26 +5468,17 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf, ...@@ -5468,26 +5468,17 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf,
*/ */
static void raid5_align_endio(struct bio *bi) static void raid5_align_endio(struct bio *bi)
{ {
struct md_io_acct *md_io_acct = bi->bi_private; struct bio *raid_bi = bi->bi_private;
struct bio *raid_bi = md_io_acct->orig_bio; struct md_rdev *rdev = (void *)raid_bi->bi_next;
struct mddev *mddev; struct mddev *mddev = rdev->mddev;
struct r5conf *conf; struct r5conf *conf = mddev->private;
struct md_rdev *rdev;
blk_status_t error = bi->bi_status; blk_status_t error = bi->bi_status;
unsigned long start_time = md_io_acct->start_time;
bio_put(bi); bio_put(bi);
rdev = (void*)raid_bi->bi_next;
raid_bi->bi_next = NULL; raid_bi->bi_next = NULL;
mddev = rdev->mddev;
conf = mddev->private;
rdev_dec_pending(rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev);
if (!error) { if (!error) {
if (blk_queue_io_stat(raid_bi->bi_bdev->bd_disk->queue))
bio_end_io_acct(raid_bi, start_time);
bio_endio(raid_bi); bio_endio(raid_bi);
if (atomic_dec_and_test(&conf->active_aligned_reads)) if (atomic_dec_and_test(&conf->active_aligned_reads))
wake_up(&conf->wait_for_quiescent); wake_up(&conf->wait_for_quiescent);
...@@ -5506,7 +5497,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) ...@@ -5506,7 +5497,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
struct md_rdev *rdev; struct md_rdev *rdev;
sector_t sector, end_sector, first_bad; sector_t sector, end_sector, first_bad;
int bad_sectors, dd_idx; int bad_sectors, dd_idx;
struct md_io_acct *md_io_acct;
bool did_inc; bool did_inc;
if (!in_chunk_boundary(mddev, raid_bio)) { if (!in_chunk_boundary(mddev, raid_bio)) {
...@@ -5543,16 +5533,13 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) ...@@ -5543,16 +5533,13 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
return 0; return 0;
} }
align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO, md_account_bio(mddev, &raid_bio);
&mddev->io_acct_set);
md_io_acct = container_of(align_bio, struct md_io_acct, bio_clone);
raid_bio->bi_next = (void *)rdev; raid_bio->bi_next = (void *)rdev;
if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue))
md_io_acct->start_time = bio_start_io_acct(raid_bio);
md_io_acct->orig_bio = raid_bio;
align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
&mddev->bio_set);
align_bio->bi_end_io = raid5_align_endio; align_bio->bi_end_io = raid5_align_endio;
align_bio->bi_private = md_io_acct; align_bio->bi_private = raid_bio;
align_bio->bi_iter.bi_sector = sector; align_bio->bi_iter.bi_sector = sector;
/* No reshape active, so we can trust rdev->data_offset */ /* No reshape active, so we can trust rdev->data_offset */
...@@ -7787,19 +7774,12 @@ static int raid5_run(struct mddev *mddev) ...@@ -7787,19 +7774,12 @@ static int raid5_run(struct mddev *mddev)
struct md_rdev *rdev; struct md_rdev *rdev;
struct md_rdev *journal_dev = NULL; struct md_rdev *journal_dev = NULL;
sector_t reshape_offset = 0; sector_t reshape_offset = 0;
int i, ret = 0; int i;
long long min_offset_diff = 0; long long min_offset_diff = 0;
int first = 1; int first = 1;
if (acct_bioset_init(mddev)) { if (mddev_init_writes_pending(mddev) < 0)
pr_err("md/raid456:%s: alloc acct bioset failed.\n", mdname(mddev));
return -ENOMEM; return -ENOMEM;
}
if (mddev_init_writes_pending(mddev) < 0) {
ret = -ENOMEM;
goto exit_acct_set;
}
if (mddev->recovery_cp != MaxSector) if (mddev->recovery_cp != MaxSector)
pr_notice("md/raid:%s: not clean -- starting background reconstruction\n", pr_notice("md/raid:%s: not clean -- starting background reconstruction\n",
...@@ -7830,8 +7810,7 @@ static int raid5_run(struct mddev *mddev) ...@@ -7830,8 +7810,7 @@ static int raid5_run(struct mddev *mddev)
(mddev->bitmap_info.offset || mddev->bitmap_info.file)) { (mddev->bitmap_info.offset || mddev->bitmap_info.file)) {
pr_notice("md/raid:%s: array cannot have both journal and bitmap\n", pr_notice("md/raid:%s: array cannot have both journal and bitmap\n",
mdname(mddev)); mdname(mddev));
ret = -EINVAL; return -EINVAL;
goto exit_acct_set;
} }
if (mddev->reshape_position != MaxSector) { if (mddev->reshape_position != MaxSector) {
...@@ -7856,15 +7835,13 @@ static int raid5_run(struct mddev *mddev) ...@@ -7856,15 +7835,13 @@ static int raid5_run(struct mddev *mddev)
if (journal_dev) { if (journal_dev) {
pr_warn("md/raid:%s: don't support reshape with journal - aborting.\n", pr_warn("md/raid:%s: don't support reshape with journal - aborting.\n",
mdname(mddev)); mdname(mddev));
ret = -EINVAL; return -EINVAL;
goto exit_acct_set;
} }
if (mddev->new_level != mddev->level) { if (mddev->new_level != mddev->level) {
pr_warn("md/raid:%s: unsupported reshape required - aborting.\n", pr_warn("md/raid:%s: unsupported reshape required - aborting.\n",
mdname(mddev)); mdname(mddev));
ret = -EINVAL; return -EINVAL;
goto exit_acct_set;
} }
old_disks = mddev->raid_disks - mddev->delta_disks; old_disks = mddev->raid_disks - mddev->delta_disks;
/* reshape_position must be on a new-stripe boundary, and one /* reshape_position must be on a new-stripe boundary, and one
...@@ -7880,8 +7857,7 @@ static int raid5_run(struct mddev *mddev) ...@@ -7880,8 +7857,7 @@ static int raid5_run(struct mddev *mddev)
if (sector_div(here_new, chunk_sectors * new_data_disks)) { if (sector_div(here_new, chunk_sectors * new_data_disks)) {
pr_warn("md/raid:%s: reshape_position not on a stripe boundary\n", pr_warn("md/raid:%s: reshape_position not on a stripe boundary\n",
mdname(mddev)); mdname(mddev));
ret = -EINVAL; return -EINVAL;
goto exit_acct_set;
} }
reshape_offset = here_new * chunk_sectors; reshape_offset = here_new * chunk_sectors;
/* here_new is the stripe we will write to */ /* here_new is the stripe we will write to */
...@@ -7903,8 +7879,7 @@ static int raid5_run(struct mddev *mddev) ...@@ -7903,8 +7879,7 @@ static int raid5_run(struct mddev *mddev)
else if (mddev->ro == 0) { else if (mddev->ro == 0) {
pr_warn("md/raid:%s: in-place reshape must be started in read-only mode - aborting\n", pr_warn("md/raid:%s: in-place reshape must be started in read-only mode - aborting\n",
mdname(mddev)); mdname(mddev));
ret = -EINVAL; return -EINVAL;
goto exit_acct_set;
} }
} else if (mddev->reshape_backwards } else if (mddev->reshape_backwards
? (here_new * chunk_sectors + min_offset_diff <= ? (here_new * chunk_sectors + min_offset_diff <=
...@@ -7914,8 +7889,7 @@ static int raid5_run(struct mddev *mddev) ...@@ -7914,8 +7889,7 @@ static int raid5_run(struct mddev *mddev)
/* Reading from the same stripe as writing to - bad */ /* Reading from the same stripe as writing to - bad */
pr_warn("md/raid:%s: reshape_position too early for auto-recovery - aborting.\n", pr_warn("md/raid:%s: reshape_position too early for auto-recovery - aborting.\n",
mdname(mddev)); mdname(mddev));
ret = -EINVAL; return -EINVAL;
goto exit_acct_set;
} }
pr_debug("md/raid:%s: reshape will continue\n", mdname(mddev)); pr_debug("md/raid:%s: reshape will continue\n", mdname(mddev));
/* OK, we should be able to continue; */ /* OK, we should be able to continue; */
...@@ -7939,10 +7913,8 @@ static int raid5_run(struct mddev *mddev) ...@@ -7939,10 +7913,8 @@ static int raid5_run(struct mddev *mddev)
else else
conf = mddev->private; conf = mddev->private;
if (IS_ERR(conf)) { if (IS_ERR(conf))
ret = PTR_ERR(conf); return PTR_ERR(conf);
goto exit_acct_set;
}
if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
if (!journal_dev) { if (!journal_dev) {
...@@ -8140,10 +8112,7 @@ static int raid5_run(struct mddev *mddev) ...@@ -8140,10 +8112,7 @@ static int raid5_run(struct mddev *mddev)
free_conf(conf); free_conf(conf);
mddev->private = NULL; mddev->private = NULL;
pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev)); pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev));
ret = -EIO; return -EIO;
exit_acct_set:
acct_bioset_exit(mddev);
return ret;
} }
static void raid5_free(struct mddev *mddev, void *priv) static void raid5_free(struct mddev *mddev, void *priv)
...@@ -8151,7 +8120,6 @@ static void raid5_free(struct mddev *mddev, void *priv) ...@@ -8151,7 +8120,6 @@ static void raid5_free(struct mddev *mddev, void *priv)
struct r5conf *conf = priv; struct r5conf *conf = priv;
free_conf(conf); free_conf(conf);
acct_bioset_exit(mddev);
mddev->to_remove = &raid5_attrs_group; mddev->to_remove = &raid5_attrs_group;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment