Commit bd415ca2 authored by Neil Brown's avatar Neil Brown Committed by Christoph Hellwig

[PATCH] Change MD Superblock IO to go straight to submit_bio

The current code hits the page cache for the block device
which requires memory allocation which can sometimes cause
a deadlock (if it blocks the raid5d thread).

This code takes the page that holds the superblock, and
passes it to submit_bh in a suitable bio wrapper.
parent b51682c0
...@@ -436,14 +436,15 @@ static int alloc_array_sb(mddev_t * mddev) ...@@ -436,14 +436,15 @@ static int alloc_array_sb(mddev_t * mddev)
static int alloc_disk_sb(mdk_rdev_t * rdev) static int alloc_disk_sb(mdk_rdev_t * rdev)
{ {
if (rdev->sb) if (rdev->sb_page)
MD_BUG(); MD_BUG();
rdev->sb = (mdp_super_t *) __get_free_page(GFP_KERNEL); rdev->sb_page = alloc_page(GFP_KERNEL);
if (!rdev->sb) { if (!rdev->sb_page) {
printk(OUT_OF_MEM); printk(OUT_OF_MEM);
return -EINVAL; return -EINVAL;
} }
rdev->sb = (mdp_super_t *) page_address(rdev->sb_page);
clear_page(rdev->sb); clear_page(rdev->sb);
return 0; return 0;
...@@ -451,9 +452,10 @@ static int alloc_disk_sb(mdk_rdev_t * rdev) ...@@ -451,9 +452,10 @@ static int alloc_disk_sb(mdk_rdev_t * rdev)
static void free_disk_sb(mdk_rdev_t * rdev) static void free_disk_sb(mdk_rdev_t * rdev)
{ {
if (rdev->sb) { if (rdev->sb_page) {
free_page((unsigned long) rdev->sb); page_cache_release(rdev->sb_page);
rdev->sb = NULL; rdev->sb = NULL;
rdev->sb_page = NULL;
rdev->sb_offset = 0; rdev->sb_offset = 0;
rdev->size = 0; rdev->size = 0;
} else { } else {
...@@ -462,13 +464,42 @@ static void free_disk_sb(mdk_rdev_t * rdev) ...@@ -462,13 +464,42 @@ static void free_disk_sb(mdk_rdev_t * rdev)
} }
} }
static void bi_complete(struct bio *bio)
{
complete((struct completion*)bio->bi_private);
}
static int sync_page_io(struct block_device *bdev, sector_t sector, int size,
struct page *page, int rw)
{
struct bio bio;
struct bio_vec vec;
struct completion event;
bio_init(&bio);
bio.bi_io_vec = &vec;
vec.bv_page = page;
vec.bv_len = size;
vec.bv_offset = 0;
bio.bi_vcnt = 1;
bio.bi_idx = 0;
bio.bi_size = size;
bio.bi_bdev = bdev;
bio.bi_sector = sector;
init_completion(&event);
bio.bi_private = &event;
bio.bi_end_io = bi_complete;
submit_bio(rw, &bio);
run_task_queue(&tq_disk);
wait_for_completion(&event);
return test_bit(BIO_UPTODATE, &bio.bi_flags);
}
static int read_disk_sb(mdk_rdev_t * rdev) static int read_disk_sb(mdk_rdev_t * rdev)
{ {
struct address_space *mapping = rdev->bdev->bd_inode->i_mapping;
struct page *page;
char *p;
unsigned long sb_offset; unsigned long sb_offset;
int n = PAGE_CACHE_SIZE / BLOCK_SIZE;
if (!rdev->sb) { if (!rdev->sb) {
MD_BUG(); MD_BUG();
...@@ -483,24 +514,14 @@ static int read_disk_sb(mdk_rdev_t * rdev) ...@@ -483,24 +514,14 @@ static int read_disk_sb(mdk_rdev_t * rdev)
*/ */
sb_offset = calc_dev_sboffset(rdev->dev, rdev->mddev, 1); sb_offset = calc_dev_sboffset(rdev->dev, rdev->mddev, 1);
rdev->sb_offset = sb_offset; rdev->sb_offset = sb_offset;
page = read_cache_page(mapping, sb_offset/n,
(filler_t *)mapping->a_ops->readpage, NULL); if (!sync_page_io(rdev->bdev, sb_offset<<1, MD_SB_BYTES, rdev->sb_page, READ))
if (IS_ERR(page))
goto out;
wait_on_page_locked(page);
if (!PageUptodate(page))
goto fail;
if (PageError(page))
goto fail; goto fail;
p = (char *)page_address(page) + BLOCK_SIZE * (sb_offset % n);
memcpy((char*)rdev->sb, p, MD_SB_BYTES);
page_cache_release(page);
printk(KERN_INFO " [events: %08lx]\n", (unsigned long)rdev->sb->events_lo); printk(KERN_INFO " [events: %08lx]\n", (unsigned long)rdev->sb->events_lo);
return 0; return 0;
fail: fail:
page_cache_release(page);
out:
printk(NO_SB,partition_name(rdev->dev)); printk(NO_SB,partition_name(rdev->dev));
return -EINVAL; return -EINVAL;
} }
...@@ -893,11 +914,6 @@ static mdk_rdev_t * find_rdev_all(kdev_t dev) ...@@ -893,11 +914,6 @@ static mdk_rdev_t * find_rdev_all(kdev_t dev)
static int write_disk_sb(mdk_rdev_t * rdev) static int write_disk_sb(mdk_rdev_t * rdev)
{ {
struct block_device *bdev = rdev->bdev;
struct address_space *mapping = bdev->bd_inode->i_mapping;
struct page *page;
unsigned offs;
int error;
kdev_t dev = rdev->dev; kdev_t dev = rdev->dev;
unsigned long sb_offset, size; unsigned long sb_offset, size;
...@@ -933,29 +949,11 @@ static int write_disk_sb(mdk_rdev_t * rdev) ...@@ -933,29 +949,11 @@ static int write_disk_sb(mdk_rdev_t * rdev)
} }
printk(KERN_INFO "(write) %s's sb offset: %ld\n", partition_name(dev), sb_offset); printk(KERN_INFO "(write) %s's sb offset: %ld\n", partition_name(dev), sb_offset);
fsync_bdev(bdev);
page = grab_cache_page(mapping, sb_offset/(PAGE_CACHE_SIZE/BLOCK_SIZE)); if (!sync_page_io(rdev->bdev, sb_offset<<1, MD_SB_BYTES, rdev->sb_page, WRITE))
offs = sb_offset % (PAGE_CACHE_SIZE/BLOCK_SIZE);
if (!page)
goto fail; goto fail;
error = mapping->a_ops->prepare_write(NULL, page, offs,
offs + MD_SB_BYTES);
if (error)
goto unlock;
memcpy((char *)page_address(page) + offs, rdev->sb, MD_SB_BYTES);
error = mapping->a_ops->commit_write(NULL, page, offs,
offs + MD_SB_BYTES);
if (error)
goto unlock;
unlock_page(page);
wait_on_page_locked(page);
page_cache_release(page);
fsync_bdev(bdev);
skip: skip:
return 0; return 0;
unlock:
unlock_page(page);
page_cache_release(page);
fail: fail:
printk("md: write_disk_sb failed for device %s\n", partition_name(dev)); printk("md: write_disk_sb failed for device %s\n", partition_name(dev));
return 1; return 1;
......
...@@ -169,6 +169,7 @@ struct mdk_rdev_s ...@@ -169,6 +169,7 @@ struct mdk_rdev_s
struct block_device *bdev; /* block device handle */ struct block_device *bdev; /* block device handle */
struct page *sb_page;
mdp_super_t *sb; mdp_super_t *sb;
unsigned long sb_offset; unsigned long sb_offset;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment