Commit e0d245e2 authored by Jens Axboe's avatar Jens Axboe

Merge branch 'md-next' of...

Merge branch 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-5.14/drivers

Pull MD changes from Song:

"1) iostats rewrite by Guoqing Jiang;
 2) raid5 lock contention optimization by Gal Ofri."

* 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
  md/raid5: avoid device_lock in read_one_chunk()
  md: add comments in md_integrity_register
  md: check level before create and exit io_acct_set
  md: Constify attribute_group structs
  md: mark some personalities as deprecated
  md/raid10: enable io accounting
  md/raid1: enable io accounting
  md/raid1: rename print_msg with r1bio_existed
  md/raid5: avoid redundant bio clone in raid5_read_one_chunk
  md/raid5: move checking badblock before clone bio in raid5_read_one_chunk
  md: add io accounting for raid0 and raid5
  md: revert io stats accounting
parents 491e5b17 97ae2725
...@@ -47,7 +47,7 @@ config MD_AUTODETECT ...@@ -47,7 +47,7 @@ config MD_AUTODETECT
If unsure, say Y. If unsure, say Y.
config MD_LINEAR config MD_LINEAR
tristate "Linear (append) mode" tristate "Linear (append) mode (deprecated)"
depends on BLK_DEV_MD depends on BLK_DEV_MD
help help
If you say Y here, then your multiple devices driver will be able to If you say Y here, then your multiple devices driver will be able to
...@@ -158,7 +158,7 @@ config MD_RAID456 ...@@ -158,7 +158,7 @@ config MD_RAID456
If unsure, say Y. If unsure, say Y.
config MD_MULTIPATH config MD_MULTIPATH
tristate "Multipath I/O support" tristate "Multipath I/O support (deprecated)"
depends on BLK_DEV_MD depends on BLK_DEV_MD
help help
MD_MULTIPATH provides a simple multi-path personality for use MD_MULTIPATH provides a simple multi-path personality for use
...@@ -169,7 +169,7 @@ config MD_MULTIPATH ...@@ -169,7 +169,7 @@ config MD_MULTIPATH
If unsure, say N. If unsure, say N.
config MD_FAULTY config MD_FAULTY
tristate "Faulty test module for MD" tristate "Faulty test module for MD (deprecated)"
depends on BLK_DEV_MD depends on BLK_DEV_MD
help help
The "faulty" module allows for a block device that occasionally returns The "faulty" module allows for a block device that occasionally returns
......
...@@ -2616,7 +2616,7 @@ static struct attribute *md_bitmap_attrs[] = { ...@@ -2616,7 +2616,7 @@ static struct attribute *md_bitmap_attrs[] = {
&max_backlog_used.attr, &max_backlog_used.attr,
NULL NULL
}; };
struct attribute_group md_bitmap_group = { const struct attribute_group md_bitmap_group = {
.name = "bitmap", .name = "bitmap",
.attrs = md_bitmap_attrs, .attrs = md_bitmap_attrs,
}; };
...@@ -357,7 +357,7 @@ static void raid_exit(void) ...@@ -357,7 +357,7 @@ static void raid_exit(void)
module_init(raid_init); module_init(raid_init);
module_exit(raid_exit); module_exit(raid_exit);
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Fault injection personality for MD"); MODULE_DESCRIPTION("Fault injection personality for MD (deprecated)");
MODULE_ALIAS("md-personality-10"); /* faulty */ MODULE_ALIAS("md-personality-10"); /* faulty */
MODULE_ALIAS("md-faulty"); MODULE_ALIAS("md-faulty");
MODULE_ALIAS("md-level--5"); MODULE_ALIAS("md-level--5");
...@@ -312,7 +312,7 @@ static void linear_exit (void) ...@@ -312,7 +312,7 @@ static void linear_exit (void)
module_init(linear_init); module_init(linear_init);
module_exit(linear_exit); module_exit(linear_exit);
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Linear device concatenation personality for MD"); MODULE_DESCRIPTION("Linear device concatenation personality for MD (deprecated)");
MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/ MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/
MODULE_ALIAS("md-linear"); MODULE_ALIAS("md-linear");
MODULE_ALIAS("md-level--1"); MODULE_ALIAS("md-level--1");
...@@ -471,7 +471,7 @@ static void __exit multipath_exit (void) ...@@ -471,7 +471,7 @@ static void __exit multipath_exit (void)
module_init(multipath_init); module_init(multipath_init);
module_exit(multipath_exit); module_exit(multipath_exit);
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("simple multi-path personality for MD"); MODULE_DESCRIPTION("simple multi-path personality for MD (deprecated)");
MODULE_ALIAS("md-personality-7"); /* MULTIPATH */ MODULE_ALIAS("md-personality-7"); /* MULTIPATH */
MODULE_ALIAS("md-multipath"); MODULE_ALIAS("md-multipath");
MODULE_ALIAS("md-level--4"); MODULE_ALIAS("md-level--4");
...@@ -441,30 +441,6 @@ void md_handle_request(struct mddev *mddev, struct bio *bio) ...@@ -441,30 +441,6 @@ void md_handle_request(struct mddev *mddev, struct bio *bio)
} }
EXPORT_SYMBOL(md_handle_request); EXPORT_SYMBOL(md_handle_request);
struct md_io {
struct mddev *mddev;
bio_end_io_t *orig_bi_end_io;
void *orig_bi_private;
struct block_device *orig_bi_bdev;
unsigned long start_time;
};
static void md_end_io(struct bio *bio)
{
struct md_io *md_io = bio->bi_private;
struct mddev *mddev = md_io->mddev;
bio_end_io_acct_remapped(bio, md_io->start_time, md_io->orig_bi_bdev);
bio->bi_end_io = md_io->orig_bi_end_io;
bio->bi_private = md_io->orig_bi_private;
mempool_free(md_io, &mddev->md_io_pool);
if (bio->bi_end_io)
bio->bi_end_io(bio);
}
static blk_qc_t md_submit_bio(struct bio *bio) static blk_qc_t md_submit_bio(struct bio *bio)
{ {
const int rw = bio_data_dir(bio); const int rw = bio_data_dir(bio);
...@@ -489,21 +465,6 @@ static blk_qc_t md_submit_bio(struct bio *bio) ...@@ -489,21 +465,6 @@ static blk_qc_t md_submit_bio(struct bio *bio)
return BLK_QC_T_NONE; return BLK_QC_T_NONE;
} }
if (bio->bi_end_io != md_end_io) {
struct md_io *md_io;
md_io = mempool_alloc(&mddev->md_io_pool, GFP_NOIO);
md_io->mddev = mddev;
md_io->orig_bi_end_io = bio->bi_end_io;
md_io->orig_bi_private = bio->bi_private;
md_io->orig_bi_bdev = bio->bi_bdev;
bio->bi_end_io = md_end_io;
bio->bi_private = md_io;
md_io->start_time = bio_start_io_acct(bio);
}
/* bio could be mergeable after passing to underlayer */ /* bio could be mergeable after passing to underlayer */
bio->bi_opf &= ~REQ_NOMERGE; bio->bi_opf &= ~REQ_NOMERGE;
...@@ -824,7 +785,7 @@ static struct mddev *mddev_alloc(dev_t unit) ...@@ -824,7 +785,7 @@ static struct mddev *mddev_alloc(dev_t unit)
return ERR_PTR(error); return ERR_PTR(error);
} }
static struct attribute_group md_redundancy_group; static const struct attribute_group md_redundancy_group;
void mddev_unlock(struct mddev *mddev) void mddev_unlock(struct mddev *mddev)
{ {
...@@ -841,7 +802,7 @@ void mddev_unlock(struct mddev *mddev) ...@@ -841,7 +802,7 @@ void mddev_unlock(struct mddev *mddev)
* test it under the same mutex to ensure its correct value * test it under the same mutex to ensure its correct value
* is seen. * is seen.
*/ */
struct attribute_group *to_remove = mddev->to_remove; const struct attribute_group *to_remove = mddev->to_remove;
mddev->to_remove = NULL; mddev->to_remove = NULL;
mddev->sysfs_active = 1; mddev->sysfs_active = 1;
mutex_unlock(&mddev->reconfig_mutex); mutex_unlock(&mddev->reconfig_mutex);
...@@ -2379,7 +2340,15 @@ int md_integrity_register(struct mddev *mddev) ...@@ -2379,7 +2340,15 @@ int md_integrity_register(struct mddev *mddev)
bdev_get_integrity(reference->bdev)); bdev_get_integrity(reference->bdev));
pr_debug("md: data integrity enabled on %s\n", mdname(mddev)); pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) { if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
(mddev->level != 1 && mddev->level != 10 &&
bioset_integrity_create(&mddev->io_acct_set, BIO_POOL_SIZE))) {
/*
* No need to handle the failure of bioset_integrity_create,
* because the function is called by md_run() -> pers->run(),
* md_run calls bioset_exit -> bioset_integrity_free in case
* of failure case.
*/
pr_err("md: failed to create integrity pool for %s\n", pr_err("md: failed to create integrity pool for %s\n",
mdname(mddev)); mdname(mddev));
return -EINVAL; return -EINVAL;
...@@ -5538,7 +5507,7 @@ static struct attribute *md_redundancy_attrs[] = { ...@@ -5538,7 +5507,7 @@ static struct attribute *md_redundancy_attrs[] = {
&md_degraded.attr, &md_degraded.attr,
NULL, NULL,
}; };
static struct attribute_group md_redundancy_group = { static const struct attribute_group md_redundancy_group = {
.name = NULL, .name = NULL,
.attrs = md_redundancy_attrs, .attrs = md_redundancy_attrs,
}; };
...@@ -5608,7 +5577,8 @@ static void md_free(struct kobject *ko) ...@@ -5608,7 +5577,8 @@ static void md_free(struct kobject *ko)
bioset_exit(&mddev->bio_set); bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set); bioset_exit(&mddev->sync_set);
mempool_exit(&mddev->md_io_pool); if (mddev->level != 1 && mddev->level != 10)
bioset_exit(&mddev->io_acct_set);
kfree(mddev); kfree(mddev);
} }
...@@ -5705,11 +5675,6 @@ static int md_alloc(dev_t dev, char *name) ...@@ -5705,11 +5675,6 @@ static int md_alloc(dev_t dev, char *name)
*/ */
mddev->hold_active = UNTIL_STOP; mddev->hold_active = UNTIL_STOP;
error = mempool_init_kmalloc_pool(&mddev->md_io_pool, BIO_POOL_SIZE,
sizeof(struct md_io));
if (error)
goto abort;
error = -ENOMEM; error = -ENOMEM;
mddev->queue = blk_alloc_queue(NUMA_NO_NODE); mddev->queue = blk_alloc_queue(NUMA_NO_NODE);
if (!mddev->queue) if (!mddev->queue)
...@@ -5907,7 +5872,14 @@ int md_run(struct mddev *mddev) ...@@ -5907,7 +5872,14 @@ int md_run(struct mddev *mddev)
if (!bioset_initialized(&mddev->sync_set)) { if (!bioset_initialized(&mddev->sync_set)) {
err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (err) if (err)
return err; goto exit_bio_set;
}
if (mddev->level != 1 && mddev->level != 10 &&
!bioset_initialized(&mddev->io_acct_set)) {
err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
offsetof(struct md_io_acct, bio_clone), 0);
if (err)
goto exit_sync_set;
} }
spin_lock(&pers_lock); spin_lock(&pers_lock);
...@@ -6035,6 +6007,7 @@ int md_run(struct mddev *mddev) ...@@ -6035,6 +6007,7 @@ int md_run(struct mddev *mddev)
blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue); blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
else else
blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue); blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
blk_queue_flag_set(QUEUE_FLAG_IO_STAT, mddev->queue);
} }
if (pers->sync_request) { if (pers->sync_request) {
if (mddev->kobj.sd && if (mddev->kobj.sd &&
...@@ -6084,8 +6057,12 @@ int md_run(struct mddev *mddev) ...@@ -6084,8 +6057,12 @@ int md_run(struct mddev *mddev)
module_put(pers->owner); module_put(pers->owner);
md_bitmap_destroy(mddev); md_bitmap_destroy(mddev);
abort: abort:
bioset_exit(&mddev->bio_set); if (mddev->level != 1 && mddev->level != 10)
bioset_exit(&mddev->io_acct_set);
exit_sync_set:
bioset_exit(&mddev->sync_set); bioset_exit(&mddev->sync_set);
exit_bio_set:
bioset_exit(&mddev->bio_set);
return err; return err;
} }
EXPORT_SYMBOL_GPL(md_run); EXPORT_SYMBOL_GPL(md_run);
...@@ -6309,6 +6286,8 @@ void md_stop(struct mddev *mddev) ...@@ -6309,6 +6286,8 @@ void md_stop(struct mddev *mddev)
__md_stop(mddev); __md_stop(mddev);
bioset_exit(&mddev->bio_set); bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set); bioset_exit(&mddev->sync_set);
if (mddev->level != 1 && mddev->level != 10)
bioset_exit(&mddev->io_acct_set);
} }
EXPORT_SYMBOL_GPL(md_stop); EXPORT_SYMBOL_GPL(md_stop);
...@@ -8613,6 +8592,41 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, ...@@ -8613,6 +8592,41 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
} }
EXPORT_SYMBOL_GPL(md_submit_discard_bio); EXPORT_SYMBOL_GPL(md_submit_discard_bio);
static void md_end_io_acct(struct bio *bio)
{
struct md_io_acct *md_io_acct = bio->bi_private;
struct bio *orig_bio = md_io_acct->orig_bio;
orig_bio->bi_status = bio->bi_status;
bio_end_io_acct(orig_bio, md_io_acct->start_time);
bio_put(bio);
bio_endio(orig_bio);
}
/*
* Used by personalities that don't already clone the bio and thus can't
* easily add the timestamp to their extended bio structure.
*/
void md_account_bio(struct mddev *mddev, struct bio **bio)
{
struct md_io_acct *md_io_acct;
struct bio *clone;
if (!blk_queue_io_stat((*bio)->bi_bdev->bd_disk->queue))
return;
clone = bio_clone_fast(*bio, GFP_NOIO, &mddev->io_acct_set);
md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
md_io_acct->orig_bio = *bio;
md_io_acct->start_time = bio_start_io_acct(*bio);
clone->bi_end_io = md_end_io_acct;
clone->bi_private = md_io_acct;
*bio = clone;
}
EXPORT_SYMBOL_GPL(md_account_bio);
/* md_allow_write(mddev) /* md_allow_write(mddev)
* Calling this ensures that the array is marked 'active' so that writes * Calling this ensures that the array is marked 'active' so that writes
* may proceed without blocking. It is important to call this before * may proceed without blocking. It is important to call this before
......
...@@ -481,13 +481,13 @@ struct mddev { ...@@ -481,13 +481,13 @@ struct mddev {
atomic_t max_corr_read_errors; /* max read retries */ atomic_t max_corr_read_errors; /* max read retries */
struct list_head all_mddevs; struct list_head all_mddevs;
struct attribute_group *to_remove; const struct attribute_group *to_remove;
struct bio_set bio_set; struct bio_set bio_set;
struct bio_set sync_set; /* for sync operations like struct bio_set sync_set; /* for sync operations like
* metadata and bitmap writes * metadata and bitmap writes
*/ */
mempool_t md_io_pool; struct bio_set io_acct_set; /* for raid0 and raid5 io accounting */
/* Generic flush handling. /* Generic flush handling.
* The last to finish preflush schedules a worker to submit * The last to finish preflush schedules a worker to submit
...@@ -613,7 +613,7 @@ struct md_sysfs_entry { ...@@ -613,7 +613,7 @@ struct md_sysfs_entry {
ssize_t (*show)(struct mddev *, char *); ssize_t (*show)(struct mddev *, char *);
ssize_t (*store)(struct mddev *, const char *, size_t); ssize_t (*store)(struct mddev *, const char *, size_t);
}; };
extern struct attribute_group md_bitmap_group; extern const struct attribute_group md_bitmap_group;
static inline struct kernfs_node *sysfs_get_dirent_safe(struct kernfs_node *sd, char *name) static inline struct kernfs_node *sysfs_get_dirent_safe(struct kernfs_node *sd, char *name)
{ {
...@@ -684,6 +684,12 @@ struct md_thread { ...@@ -684,6 +684,12 @@ struct md_thread {
void *private; void *private;
}; };
struct md_io_acct {
struct bio *orig_bio;
unsigned long start_time;
struct bio bio_clone;
};
#define THREAD_WAKEUP 0 #define THREAD_WAKEUP 0
static inline void safe_put_page(struct page *p) static inline void safe_put_page(struct page *p)
...@@ -715,6 +721,7 @@ extern void md_error(struct mddev *mddev, struct md_rdev *rdev); ...@@ -715,6 +721,7 @@ extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
extern void md_finish_reshape(struct mddev *mddev); extern void md_finish_reshape(struct mddev *mddev);
void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
struct bio *bio, sector_t start, sector_t size); struct bio *bio, sector_t start, sector_t size);
void md_account_bio(struct mddev *mddev, struct bio **bio);
extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio); extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev, extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
......
...@@ -546,6 +546,9 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio) ...@@ -546,6 +546,9 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
bio = split; bio = split;
} }
if (bio->bi_pool != &mddev->bio_set)
md_account_bio(mddev, &bio);
orig_sector = sector; orig_sector = sector;
zone = find_zone(mddev->private, &sector); zone = find_zone(mddev->private, &sector);
switch (conf->layout) { switch (conf->layout) {
......
...@@ -300,6 +300,8 @@ static void call_bio_endio(struct r1bio *r1_bio) ...@@ -300,6 +300,8 @@ static void call_bio_endio(struct r1bio *r1_bio)
if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
bio->bi_status = BLK_STS_IOERR; bio->bi_status = BLK_STS_IOERR;
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
bio_end_io_acct(bio, r1_bio->start_time);
bio_endio(bio); bio_endio(bio);
} }
...@@ -1210,7 +1212,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, ...@@ -1210,7 +1212,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
int max_sectors; int max_sectors;
int rdisk; int rdisk;
bool print_msg = !!r1_bio; bool r1bio_existed = !!r1_bio;
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
/* /*
...@@ -1220,7 +1222,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, ...@@ -1220,7 +1222,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
*/ */
gfp_t gfp = r1_bio ? (GFP_NOIO | __GFP_HIGH) : GFP_NOIO; gfp_t gfp = r1_bio ? (GFP_NOIO | __GFP_HIGH) : GFP_NOIO;
if (print_msg) { if (r1bio_existed) {
/* Need to get the block device name carefully */ /* Need to get the block device name carefully */
struct md_rdev *rdev; struct md_rdev *rdev;
rcu_read_lock(); rcu_read_lock();
...@@ -1252,7 +1254,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, ...@@ -1252,7 +1254,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
if (rdisk < 0) { if (rdisk < 0) {
/* couldn't find anywhere to read from */ /* couldn't find anywhere to read from */
if (print_msg) { if (r1bio_existed) {
pr_crit_ratelimited("md/raid1:%s: %s: unrecoverable I/O read error for block %llu\n", pr_crit_ratelimited("md/raid1:%s: %s: unrecoverable I/O read error for block %llu\n",
mdname(mddev), mdname(mddev),
b, b,
...@@ -1263,7 +1265,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, ...@@ -1263,7 +1265,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
} }
mirror = conf->mirrors + rdisk; mirror = conf->mirrors + rdisk;
if (print_msg) if (r1bio_existed)
pr_info_ratelimited("md/raid1:%s: redirecting sector %llu to other mirror: %s\n", pr_info_ratelimited("md/raid1:%s: redirecting sector %llu to other mirror: %s\n",
mdname(mddev), mdname(mddev),
(unsigned long long)r1_bio->sector, (unsigned long long)r1_bio->sector,
...@@ -1292,6 +1294,9 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, ...@@ -1292,6 +1294,9 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
r1_bio->read_disk = rdisk; r1_bio->read_disk = rdisk;
if (!r1bio_existed && blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
r1_bio->start_time = bio_start_io_acct(bio);
read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set); read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
r1_bio->bios[rdisk] = read_bio; r1_bio->bios[rdisk] = read_bio;
...@@ -1461,6 +1466,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, ...@@ -1461,6 +1466,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
r1_bio->sectors = max_sectors; r1_bio->sectors = max_sectors;
} }
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
r1_bio->start_time = bio_start_io_acct(bio);
atomic_set(&r1_bio->remaining, 1); atomic_set(&r1_bio->remaining, 1);
atomic_set(&r1_bio->behind_remaining, 0); atomic_set(&r1_bio->behind_remaining, 0);
......
...@@ -158,6 +158,7 @@ struct r1bio { ...@@ -158,6 +158,7 @@ struct r1bio {
sector_t sector; sector_t sector;
int sectors; int sectors;
unsigned long state; unsigned long state;
unsigned long start_time;
struct mddev *mddev; struct mddev *mddev;
/* /*
* original bio going to /dev/mdx * original bio going to /dev/mdx
......
...@@ -297,6 +297,8 @@ static void raid_end_bio_io(struct r10bio *r10_bio) ...@@ -297,6 +297,8 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
bio->bi_status = BLK_STS_IOERR; bio->bi_status = BLK_STS_IOERR;
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
bio_end_io_acct(bio, r10_bio->start_time);
bio_endio(bio); bio_endio(bio);
/* /*
* Wake up any possible resync thread that waits for the device * Wake up any possible resync thread that waits for the device
...@@ -1184,6 +1186,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, ...@@ -1184,6 +1186,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
} }
slot = r10_bio->read_slot; slot = r10_bio->read_slot;
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
r10_bio->start_time = bio_start_io_acct(bio);
read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set); read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
r10_bio->devs[slot].bio = read_bio; r10_bio->devs[slot].bio = read_bio;
...@@ -1483,6 +1487,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, ...@@ -1483,6 +1487,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
r10_bio->master_bio = bio; r10_bio->master_bio = bio;
} }
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
r10_bio->start_time = bio_start_io_acct(bio);
atomic_set(&r10_bio->remaining, 1); atomic_set(&r10_bio->remaining, 1);
md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0); md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
......
...@@ -124,6 +124,7 @@ struct r10bio { ...@@ -124,6 +124,7 @@ struct r10bio {
sector_t sector; /* virtual sector number */ sector_t sector; /* virtual sector number */
int sectors; int sectors;
unsigned long state; unsigned long state;
unsigned long start_time;
struct mddev *mddev; struct mddev *mddev;
/* /*
* original bio going to /dev/mdx * original bio going to /dev/mdx
......
...@@ -5364,11 +5364,13 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf, ...@@ -5364,11 +5364,13 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf,
*/ */
static void raid5_align_endio(struct bio *bi) static void raid5_align_endio(struct bio *bi)
{ {
struct bio* raid_bi = bi->bi_private; struct md_io_acct *md_io_acct = bi->bi_private;
struct bio *raid_bi = md_io_acct->orig_bio;
struct mddev *mddev; struct mddev *mddev;
struct r5conf *conf; struct r5conf *conf;
struct md_rdev *rdev; struct md_rdev *rdev;
blk_status_t error = bi->bi_status; blk_status_t error = bi->bi_status;
unsigned long start_time = md_io_acct->start_time;
bio_put(bi); bio_put(bi);
...@@ -5380,6 +5382,8 @@ static void raid5_align_endio(struct bio *bi) ...@@ -5380,6 +5382,8 @@ static void raid5_align_endio(struct bio *bi)
rdev_dec_pending(rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev);
if (!error) { if (!error) {
if (blk_queue_io_stat(raid_bi->bi_bdev->bd_disk->queue))
bio_end_io_acct(raid_bi, start_time);
bio_endio(raid_bi); bio_endio(raid_bi);
if (atomic_dec_and_test(&conf->active_aligned_reads)) if (atomic_dec_and_test(&conf->active_aligned_reads))
wake_up(&conf->wait_for_quiescent); wake_up(&conf->wait_for_quiescent);
...@@ -5398,6 +5402,8 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) ...@@ -5398,6 +5402,8 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
struct md_rdev *rdev; struct md_rdev *rdev;
sector_t sector, end_sector, first_bad; sector_t sector, end_sector, first_bad;
int bad_sectors, dd_idx; int bad_sectors, dd_idx;
struct md_io_acct *md_io_acct;
bool did_inc;
if (!in_chunk_boundary(mddev, raid_bio)) { if (!in_chunk_boundary(mddev, raid_bio)) {
pr_debug("%s: non aligned\n", __func__); pr_debug("%s: non aligned\n", __func__);
...@@ -5427,29 +5433,46 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) ...@@ -5427,29 +5433,46 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock(); rcu_read_unlock();
align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->bio_set); if (is_badblock(rdev, sector, bio_sectors(raid_bio), &first_bad,
bio_set_dev(align_bio, rdev->bdev);
align_bio->bi_end_io = raid5_align_endio;
align_bio->bi_private = raid_bio;
align_bio->bi_iter.bi_sector = sector;
raid_bio->bi_next = (void *)rdev;
if (is_badblock(rdev, sector, bio_sectors(align_bio), &first_bad,
&bad_sectors)) { &bad_sectors)) {
bio_put(align_bio); bio_put(raid_bio);
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
return 0; return 0;
} }
align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->io_acct_set);
md_io_acct = container_of(align_bio, struct md_io_acct, bio_clone);
raid_bio->bi_next = (void *)rdev;
if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue))
md_io_acct->start_time = bio_start_io_acct(raid_bio);
md_io_acct->orig_bio = raid_bio;
bio_set_dev(align_bio, rdev->bdev);
align_bio->bi_end_io = raid5_align_endio;
align_bio->bi_private = md_io_acct;
align_bio->bi_iter.bi_sector = sector;
/* No reshape active, so we can trust rdev->data_offset */ /* No reshape active, so we can trust rdev->data_offset */
align_bio->bi_iter.bi_sector += rdev->data_offset; align_bio->bi_iter.bi_sector += rdev->data_offset;
did_inc = false;
if (conf->quiesce == 0) {
atomic_inc(&conf->active_aligned_reads);
did_inc = true;
}
/* need a memory barrier to detect the race with raid5_quiesce() */
if (!did_inc || smp_load_acquire(&conf->quiesce) != 0) {
/* quiesce is in progress, so we need to undo io activation and wait
* for it to finish
*/
if (did_inc && atomic_dec_and_test(&conf->active_aligned_reads))
wake_up(&conf->wait_for_quiescent);
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
wait_event_lock_irq(conf->wait_for_quiescent, conf->quiesce == 0, wait_event_lock_irq(conf->wait_for_quiescent, conf->quiesce == 0,
conf->device_lock); conf->device_lock);
atomic_inc(&conf->active_aligned_reads); atomic_inc(&conf->active_aligned_reads);
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
}
if (mddev->gendisk) if (mddev->gendisk)
trace_block_bio_remap(align_bio, disk_devt(mddev->gendisk), trace_block_bio_remap(align_bio, disk_devt(mddev->gendisk),
...@@ -5798,6 +5821,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) ...@@ -5798,6 +5821,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
last_sector = bio_end_sector(bi); last_sector = bio_end_sector(bi);
bi->bi_next = NULL; bi->bi_next = NULL;
md_account_bio(mddev, &bi);
prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
for (; logical_sector < last_sector; logical_sector += RAID5_STRIPE_SECTORS(conf)) { for (; logical_sector < last_sector; logical_sector += RAID5_STRIPE_SECTORS(conf)) {
int previous; int previous;
...@@ -6930,7 +6954,7 @@ static struct attribute *raid5_attrs[] = { ...@@ -6930,7 +6954,7 @@ static struct attribute *raid5_attrs[] = {
&ppl_write_hint.attr, &ppl_write_hint.attr,
NULL, NULL,
}; };
static struct attribute_group raid5_attrs_group = { static const struct attribute_group raid5_attrs_group = {
.name = NULL, .name = NULL,
.attrs = raid5_attrs, .attrs = raid5_attrs,
}; };
...@@ -8336,7 +8360,10 @@ static void raid5_quiesce(struct mddev *mddev, int quiesce) ...@@ -8336,7 +8360,10 @@ static void raid5_quiesce(struct mddev *mddev, int quiesce)
* active stripes can drain * active stripes can drain
*/ */
r5c_flush_cache(conf, INT_MAX); r5c_flush_cache(conf, INT_MAX);
conf->quiesce = 2; /* need a memory barrier to make sure read_one_chunk() sees
* quiesce started and reverts to slow (locked) path.
*/
smp_store_release(&conf->quiesce, 2);
wait_event_cmd(conf->wait_for_quiescent, wait_event_cmd(conf->wait_for_quiescent,
atomic_read(&conf->active_stripes) == 0 && atomic_read(&conf->active_stripes) == 0 &&
atomic_read(&conf->active_aligned_reads) == 0, atomic_read(&conf->active_aligned_reads) == 0,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment