Commit a167f663 authored by NeilBrown's avatar NeilBrown

md: use separate bio pool for each md device.

bio_clone and bio_alloc allocate from a common bio pool.
If an md device is stacked with other devices that use this pool, or under
something like swap which uses the pool, then the multiple calls on
the pool can cause deadlocks.

So allocate a local bio pool for each md array and use that rather
than the common pool.

This pool is used both for regular IO and metadata updates.
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 2b193363
...@@ -210,7 +210,7 @@ static int make_request(mddev_t *mddev, struct bio *bio) ...@@ -210,7 +210,7 @@ static int make_request(mddev_t *mddev, struct bio *bio)
} }
} }
if (failit) { if (failit) {
struct bio *b = bio_clone(bio, GFP_NOIO); struct bio *b = bio_clone_mddev(bio, GFP_NOIO, mddev);
b->bi_bdev = conf->rdev->bdev; b->bi_bdev = conf->rdev->bdev;
b->bi_private = bio; b->bi_private = bio;
b->bi_end_io = faulty_fail; b->bi_end_io = faulty_fail;
......
...@@ -149,6 +149,72 @@ static const struct block_device_operations md_fops; ...@@ -149,6 +149,72 @@ static const struct block_device_operations md_fops;
static int start_readonly; static int start_readonly;
/* bio_clone_mddev
* like bio_clone, but with a local bio set
*/
static void mddev_bio_destructor(struct bio *bio)
{
mddev_t *mddev, **mddevp;
mddevp = (void*)bio;
mddev = mddevp[-1];
bio_free(bio, mddev->bio_set);
}
struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
mddev_t *mddev)
{
struct bio *b;
mddev_t **mddevp;
if (!mddev || !mddev->bio_set)
return bio_alloc(gfp_mask, nr_iovecs);
b = bio_alloc_bioset(gfp_mask, nr_iovecs,
mddev->bio_set);
if (!b)
return NULL;
mddevp = (void*)b;
mddevp[-1] = mddev;
b->bi_destructor = mddev_bio_destructor;
return b;
}
EXPORT_SYMBOL_GPL(bio_alloc_mddev);
struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
mddev_t *mddev)
{
struct bio *b;
mddev_t **mddevp;
if (!mddev || !mddev->bio_set)
return bio_clone(bio, gfp_mask);
b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs,
mddev->bio_set);
if (!b)
return NULL;
mddevp = (void*)b;
mddevp[-1] = mddev;
b->bi_destructor = mddev_bio_destructor;
__bio_clone(b, bio);
if (bio_integrity(bio)) {
int ret;
ret = bio_integrity_clone(b, bio, gfp_mask, mddev->bio_set);
if (ret < 0) {
bio_put(b);
return NULL;
}
}
return b;
}
EXPORT_SYMBOL_GPL(bio_clone_mddev);
/* /*
* We have a system wide 'event count' that is incremented * We have a system wide 'event count' that is incremented
* on any 'interesting' event, and readers of /proc/mdstat * on any 'interesting' event, and readers of /proc/mdstat
...@@ -321,7 +387,7 @@ static void submit_flushes(mddev_t *mddev) ...@@ -321,7 +387,7 @@ static void submit_flushes(mddev_t *mddev)
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock(); rcu_read_unlock();
bi = bio_alloc(GFP_KERNEL, 0); bi = bio_alloc_mddev(GFP_KERNEL, 0, mddev);
bi->bi_end_io = md_end_flush; bi->bi_end_io = md_end_flush;
bi->bi_private = rdev; bi->bi_private = rdev;
bi->bi_bdev = rdev->bdev; bi->bi_bdev = rdev->bdev;
...@@ -428,6 +494,8 @@ static void mddev_delayed_delete(struct work_struct *ws); ...@@ -428,6 +494,8 @@ static void mddev_delayed_delete(struct work_struct *ws);
static void mddev_put(mddev_t *mddev) static void mddev_put(mddev_t *mddev)
{ {
struct bio_set *bs = NULL;
if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
return; return;
if (!mddev->raid_disks && list_empty(&mddev->disks) && if (!mddev->raid_disks && list_empty(&mddev->disks) &&
...@@ -435,6 +503,8 @@ static void mddev_put(mddev_t *mddev) ...@@ -435,6 +503,8 @@ static void mddev_put(mddev_t *mddev)
/* Array is not configured at all, and not held active, /* Array is not configured at all, and not held active,
* so destroy it */ * so destroy it */
list_del(&mddev->all_mddevs); list_del(&mddev->all_mddevs);
bs = mddev->bio_set;
mddev->bio_set = NULL;
if (mddev->gendisk) { if (mddev->gendisk) {
/* We did a probe so need to clean up. Call /* We did a probe so need to clean up. Call
* queue_work inside the spinlock so that * queue_work inside the spinlock so that
...@@ -447,6 +517,8 @@ static void mddev_put(mddev_t *mddev) ...@@ -447,6 +517,8 @@ static void mddev_put(mddev_t *mddev)
kfree(mddev); kfree(mddev);
} }
spin_unlock(&all_mddevs_lock); spin_unlock(&all_mddevs_lock);
if (bs)
bioset_free(bs);
} }
void mddev_init(mddev_t *mddev) void mddev_init(mddev_t *mddev)
...@@ -690,7 +762,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, ...@@ -690,7 +762,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
* if zero is reached. * if zero is reached.
* If an error occurred, call md_error * If an error occurred, call md_error
*/ */
struct bio *bio = bio_alloc(GFP_NOIO, 1); struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
bio->bi_bdev = rdev->bdev; bio->bi_bdev = rdev->bdev;
bio->bi_sector = sector; bio->bi_sector = sector;
...@@ -724,7 +796,7 @@ static void bi_complete(struct bio *bio, int error) ...@@ -724,7 +796,7 @@ static void bi_complete(struct bio *bio, int error)
int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size, int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
struct page *page, int rw) struct page *page, int rw)
{ {
struct bio *bio = bio_alloc(GFP_NOIO, 1); struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
struct completion event; struct completion event;
int ret; int ret;
...@@ -4379,6 +4451,9 @@ int md_run(mddev_t *mddev) ...@@ -4379,6 +4451,9 @@ int md_run(mddev_t *mddev)
sysfs_notify_dirent_safe(rdev->sysfs_state); sysfs_notify_dirent_safe(rdev->sysfs_state);
} }
if (mddev->bio_set == NULL)
mddev->bio_set = bioset_create(BIO_POOL_SIZE, sizeof(mddev));
spin_lock(&pers_lock); spin_lock(&pers_lock);
pers = find_pers(mddev->level, mddev->clevel); pers = find_pers(mddev->level, mddev->clevel);
if (!pers || !try_module_get(pers->owner)) { if (!pers || !try_module_get(pers->owner)) {
......
...@@ -331,6 +331,8 @@ struct mddev_s ...@@ -331,6 +331,8 @@ struct mddev_s
struct attribute_group *to_remove; struct attribute_group *to_remove;
struct plug_handle *plug; /* if used by personality */ struct plug_handle *plug; /* if used by personality */
struct bio_set *bio_set;
/* Generic flush handling. /* Generic flush handling.
* The last to finish preflush schedules a worker to submit * The last to finish preflush schedules a worker to submit
* the rest of the request (without the REQ_FLUSH flag). * the rest of the request (without the REQ_FLUSH flag).
...@@ -517,4 +519,8 @@ extern void md_rdev_init(mdk_rdev_t *rdev); ...@@ -517,4 +519,8 @@ extern void md_rdev_init(mdk_rdev_t *rdev);
extern void mddev_suspend(mddev_t *mddev); extern void mddev_suspend(mddev_t *mddev);
extern void mddev_resume(mddev_t *mddev); extern void mddev_resume(mddev_t *mddev);
extern struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
mddev_t *mddev);
extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
mddev_t *mddev);
#endif /* _MD_MD_H */ #endif /* _MD_MD_H */
...@@ -861,7 +861,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) ...@@ -861,7 +861,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
} }
r1_bio->read_disk = rdisk; r1_bio->read_disk = rdisk;
read_bio = bio_clone(bio, GFP_NOIO); read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
r1_bio->bios[rdisk] = read_bio; r1_bio->bios[rdisk] = read_bio;
...@@ -950,7 +950,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) ...@@ -950,7 +950,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
if (!r1_bio->bios[i]) if (!r1_bio->bios[i])
continue; continue;
mbio = bio_clone(bio, GFP_NOIO); mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
r1_bio->bios[i] = mbio; r1_bio->bios[i] = mbio;
mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset;
...@@ -1640,7 +1640,8 @@ static void raid1d(mddev_t *mddev) ...@@ -1640,7 +1640,8 @@ static void raid1d(mddev_t *mddev)
mddev->ro ? IO_BLOCKED : NULL; mddev->ro ? IO_BLOCKED : NULL;
r1_bio->read_disk = disk; r1_bio->read_disk = disk;
bio_put(bio); bio_put(bio);
bio = bio_clone(r1_bio->master_bio, GFP_NOIO); bio = bio_clone_mddev(r1_bio->master_bio,
GFP_NOIO, mddev);
r1_bio->bios[r1_bio->read_disk] = bio; r1_bio->bios[r1_bio->read_disk] = bio;
rdev = conf->mirrors[disk].rdev; rdev = conf->mirrors[disk].rdev;
if (printk_ratelimit()) if (printk_ratelimit())
......
...@@ -889,7 +889,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) ...@@ -889,7 +889,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
} }
mirror = conf->mirrors + disk; mirror = conf->mirrors + disk;
read_bio = bio_clone(bio, GFP_NOIO); read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
r10_bio->devs[slot].bio = read_bio; r10_bio->devs[slot].bio = read_bio;
...@@ -958,7 +958,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) ...@@ -958,7 +958,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
if (!r10_bio->devs[i].bio) if (!r10_bio->devs[i].bio)
continue; continue;
mbio = bio_clone(bio, GFP_NOIO); mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
r10_bio->devs[i].bio = mbio; r10_bio->devs[i].bio = mbio;
mbio->bi_sector = r10_bio->devs[i].addr+ mbio->bi_sector = r10_bio->devs[i].addr+
...@@ -1746,7 +1746,8 @@ static void raid10d(mddev_t *mddev) ...@@ -1746,7 +1746,8 @@ static void raid10d(mddev_t *mddev)
mdname(mddev), mdname(mddev),
bdevname(rdev->bdev,b), bdevname(rdev->bdev,b),
(unsigned long long)r10_bio->sector); (unsigned long long)r10_bio->sector);
bio = bio_clone(r10_bio->master_bio, GFP_NOIO); bio = bio_clone_mddev(r10_bio->master_bio,
GFP_NOIO, mddev);
r10_bio->devs[r10_bio->read_slot].bio = bio; r10_bio->devs[r10_bio->read_slot].bio = bio;
bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr
+ rdev->data_offset; + rdev->data_offset;
......
...@@ -3876,9 +3876,9 @@ static int chunk_aligned_read(mddev_t *mddev, struct bio * raid_bio) ...@@ -3876,9 +3876,9 @@ static int chunk_aligned_read(mddev_t *mddev, struct bio * raid_bio)
return 0; return 0;
} }
/* /*
* use bio_clone to make a copy of the bio * use bio_clone_mddev to make a copy of the bio
*/ */
align_bi = bio_clone(raid_bio, GFP_NOIO); align_bi = bio_clone_mddev(raid_bio, GFP_NOIO, mddev);
if (!align_bi) if (!align_bi)
return 0; return 0;
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment