Commit eb28be2b authored by Andi Kleen's avatar Andi Kleen Committed by root

direct-io: separate fields only used in the submission path from struct dio

This large, but largely mechanic, patch moves all fields in struct dio
that are only used in the submission path into a separate on stack
data structure. This has the advantage that the memory is very likely
cache hot, which is not guaranteed for memory fresh out of kmalloc.

This also gives gcc more optimization potential because it can easier
determine that there are no external aliases for these variables.

The sdio initialization is a initialization now instead of memset.
This allows gcc to break sdio into individual fields and optimize
away unnecessary zeroing (after all the functions are inlined)
Signed-off-by: default avatarAndi Kleen <ak@linux.intel.com>
Acked-by: default avatarJeff Moyer <jmoyer@redhat.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent 62a3ddef
...@@ -55,13 +55,10 @@ ...@@ -55,13 +55,10 @@
* blocksize. * blocksize.
*/ */
struct dio { /* dio_state only used in the submission path */
/* BIO submission state */
struct dio_submit {
struct bio *bio; /* bio under assembly */ struct bio *bio; /* bio under assembly */
struct inode *inode;
int rw;
loff_t i_size; /* i_size when submitted */
int flags; /* doesn't change */
unsigned blkbits; /* doesn't change */ unsigned blkbits; /* doesn't change */
unsigned blkfactor; /* When we're using an alignment which unsigned blkfactor; /* When we're using an alignment which
is finer than the filesystem's soft is finer than the filesystem's soft
...@@ -81,13 +78,12 @@ struct dio { ...@@ -81,13 +78,12 @@ struct dio {
int boundary; /* prev block is at a boundary */ int boundary; /* prev block is at a boundary */
int reap_counter; /* rate limit reaping */ int reap_counter; /* rate limit reaping */
get_block_t *get_block; /* block mapping function */ get_block_t *get_block; /* block mapping function */
dio_iodone_t *end_io; /* IO completion function */
dio_submit_t *submit_io; /* IO submition function */ dio_submit_t *submit_io; /* IO submition function */
loff_t logical_offset_in_bio; /* current first logical block in bio */ loff_t logical_offset_in_bio; /* current first logical block in bio */
sector_t final_block_in_bio; /* current final block in bio + 1 */ sector_t final_block_in_bio; /* current final block in bio + 1 */
sector_t next_block_for_io; /* next block to be put under IO, sector_t next_block_for_io; /* next block to be put under IO,
in dio_blocks units */ in dio_blocks units */
struct buffer_head map_bh; /* last get_block() result */
/* /*
* Deferred addition of a page to the dio. These variables are * Deferred addition of a page to the dio. These variables are
...@@ -100,18 +96,6 @@ struct dio { ...@@ -100,18 +96,6 @@ struct dio {
sector_t cur_page_block; /* Where it starts */ sector_t cur_page_block; /* Where it starts */
loff_t cur_page_fs_offset; /* Offset in file */ loff_t cur_page_fs_offset; /* Offset in file */
/* BIO completion state */
spinlock_t bio_lock; /* protects BIO fields below */
unsigned long refcount; /* direct_io_worker() and bios */
struct bio *bio_list; /* singly linked via bi_private */
struct task_struct *waiter; /* waiting task (NULL if none) */
/* AIO related stuff */
struct kiocb *iocb; /* kiocb */
int is_async; /* is IO async ? */
int io_error; /* IO error in completion path */
ssize_t result; /* IO result */
/* /*
* Page fetching state. These variables belong to dio_refill_pages(). * Page fetching state. These variables belong to dio_refill_pages().
*/ */
...@@ -125,6 +109,30 @@ struct dio { ...@@ -125,6 +109,30 @@ struct dio {
*/ */
unsigned head; /* next page to process */ unsigned head; /* next page to process */
unsigned tail; /* last valid page + 1 */ unsigned tail; /* last valid page + 1 */
};
/* dio_state communicated between submission path and end_io */
struct dio {
int flags; /* doesn't change */
struct inode *inode;
int rw;
loff_t i_size; /* i_size when submitted */
dio_iodone_t *end_io; /* IO completion function */
struct buffer_head map_bh; /* last get_block() result */
/* BIO completion state */
spinlock_t bio_lock; /* protects BIO fields below */
unsigned long refcount; /* direct_io_worker() and bios */
struct bio *bio_list; /* singly linked via bi_private */
struct task_struct *waiter; /* waiting task (NULL if none) */
/* AIO related stuff */
struct kiocb *iocb; /* kiocb */
int is_async; /* is IO async ? */
int io_error; /* IO error in completion path */
ssize_t result; /* IO result */
int page_errors; /* errno from get_user_pages() */ int page_errors; /* errno from get_user_pages() */
/* /*
...@@ -182,27 +190,27 @@ EXPORT_SYMBOL_GPL(inode_dio_done); ...@@ -182,27 +190,27 @@ EXPORT_SYMBOL_GPL(inode_dio_done);
/* /*
* How many pages are in the queue? * How many pages are in the queue?
*/ */
static inline unsigned dio_pages_present(struct dio *dio) static inline unsigned dio_pages_present(struct dio_submit *sdio)
{ {
return dio->tail - dio->head; return sdio->tail - sdio->head;
} }
/* /*
* Go grab and pin some userspace pages. Typically we'll get 64 at a time. * Go grab and pin some userspace pages. Typically we'll get 64 at a time.
*/ */
static int dio_refill_pages(struct dio *dio) static int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
{ {
int ret; int ret;
int nr_pages; int nr_pages;
nr_pages = min(dio->total_pages - dio->curr_page, DIO_PAGES); nr_pages = min(sdio->total_pages - sdio->curr_page, DIO_PAGES);
ret = get_user_pages_fast( ret = get_user_pages_fast(
dio->curr_user_address, /* Where from? */ sdio->curr_user_address, /* Where from? */
nr_pages, /* How many pages? */ nr_pages, /* How many pages? */
dio->rw == READ, /* Write to memory? */ dio->rw == READ, /* Write to memory? */
&dio->pages[0]); /* Put results here */ &dio->pages[0]); /* Put results here */
if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) { if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) {
struct page *page = ZERO_PAGE(0); struct page *page = ZERO_PAGE(0);
/* /*
* A memory fault, but the filesystem has some outstanding * A memory fault, but the filesystem has some outstanding
...@@ -213,17 +221,17 @@ static int dio_refill_pages(struct dio *dio) ...@@ -213,17 +221,17 @@ static int dio_refill_pages(struct dio *dio)
dio->page_errors = ret; dio->page_errors = ret;
page_cache_get(page); page_cache_get(page);
dio->pages[0] = page; dio->pages[0] = page;
dio->head = 0; sdio->head = 0;
dio->tail = 1; sdio->tail = 1;
ret = 0; ret = 0;
goto out; goto out;
} }
if (ret >= 0) { if (ret >= 0) {
dio->curr_user_address += ret * PAGE_SIZE; sdio->curr_user_address += ret * PAGE_SIZE;
dio->curr_page += ret; sdio->curr_page += ret;
dio->head = 0; sdio->head = 0;
dio->tail = ret; sdio->tail = ret;
ret = 0; ret = 0;
} }
out: out:
...@@ -236,17 +244,17 @@ static int dio_refill_pages(struct dio *dio) ...@@ -236,17 +244,17 @@ static int dio_refill_pages(struct dio *dio)
* decent number of pages, less frequently. To provide nicer use of the * decent number of pages, less frequently. To provide nicer use of the
* L1 cache. * L1 cache.
*/ */
static struct page *dio_get_page(struct dio *dio) static struct page *dio_get_page(struct dio *dio, struct dio_submit *sdio)
{ {
if (dio_pages_present(dio) == 0) { if (dio_pages_present(sdio) == 0) {
int ret; int ret;
ret = dio_refill_pages(dio); ret = dio_refill_pages(dio, sdio);
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
BUG_ON(dio_pages_present(dio) == 0); BUG_ON(dio_pages_present(sdio) == 0);
} }
return dio->pages[dio->head++]; return dio->pages[sdio->head++];
} }
/** /**
...@@ -368,8 +376,9 @@ void dio_end_io(struct bio *bio, int error) ...@@ -368,8 +376,9 @@ void dio_end_io(struct bio *bio, int error)
EXPORT_SYMBOL_GPL(dio_end_io); EXPORT_SYMBOL_GPL(dio_end_io);
static void static void
dio_bio_alloc(struct dio *dio, struct block_device *bdev, dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
sector_t first_sector, int nr_vecs) struct block_device *bdev,
sector_t first_sector, int nr_vecs)
{ {
struct bio *bio; struct bio *bio;
...@@ -386,8 +395,8 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev, ...@@ -386,8 +395,8 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev,
else else
bio->bi_end_io = dio_bio_end_io; bio->bi_end_io = dio_bio_end_io;
dio->bio = bio; sdio->bio = bio;
dio->logical_offset_in_bio = dio->cur_page_fs_offset; sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
} }
/* /*
...@@ -397,9 +406,9 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev, ...@@ -397,9 +406,9 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev,
* *
* bios hold a dio reference between submit_bio and ->end_io. * bios hold a dio reference between submit_bio and ->end_io.
*/ */
static void dio_bio_submit(struct dio *dio) static void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
{ {
struct bio *bio = dio->bio; struct bio *bio = sdio->bio;
unsigned long flags; unsigned long flags;
bio->bi_private = dio; bio->bi_private = dio;
...@@ -411,24 +420,24 @@ static void dio_bio_submit(struct dio *dio) ...@@ -411,24 +420,24 @@ static void dio_bio_submit(struct dio *dio)
if (dio->is_async && dio->rw == READ) if (dio->is_async && dio->rw == READ)
bio_set_pages_dirty(bio); bio_set_pages_dirty(bio);
if (dio->submit_io) if (sdio->submit_io)
dio->submit_io(dio->rw, bio, dio->inode, sdio->submit_io(dio->rw, bio, dio->inode,
dio->logical_offset_in_bio); sdio->logical_offset_in_bio);
else else
submit_bio(dio->rw, bio); submit_bio(dio->rw, bio);
dio->bio = NULL; sdio->bio = NULL;
dio->boundary = 0; sdio->boundary = 0;
dio->logical_offset_in_bio = 0; sdio->logical_offset_in_bio = 0;
} }
/* /*
* Release any resources in case of a failure * Release any resources in case of a failure
*/ */
static void dio_cleanup(struct dio *dio) static void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
{ {
while (dio_pages_present(dio)) while (dio_pages_present(sdio))
page_cache_release(dio_get_page(dio)); page_cache_release(dio_get_page(dio, sdio));
} }
/* /*
...@@ -518,11 +527,11 @@ static void dio_await_completion(struct dio *dio) ...@@ -518,11 +527,11 @@ static void dio_await_completion(struct dio *dio)
* *
* This also helps to limit the peak amount of pinned userspace memory. * This also helps to limit the peak amount of pinned userspace memory.
*/ */
static int dio_bio_reap(struct dio *dio) static int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
{ {
int ret = 0; int ret = 0;
if (dio->reap_counter++ >= 64) { if (sdio->reap_counter++ >= 64) {
while (dio->bio_list) { while (dio->bio_list) {
unsigned long flags; unsigned long flags;
struct bio *bio; struct bio *bio;
...@@ -536,14 +545,14 @@ static int dio_bio_reap(struct dio *dio) ...@@ -536,14 +545,14 @@ static int dio_bio_reap(struct dio *dio)
if (ret == 0) if (ret == 0)
ret = ret2; ret = ret2;
} }
dio->reap_counter = 0; sdio->reap_counter = 0;
} }
return ret; return ret;
} }
/* /*
* Call into the fs to map some more disk blocks. We record the current number * Call into the fs to map some more disk blocks. We record the current number
* of available blocks at dio->blocks_available. These are in units of the * of available blocks at sdio->blocks_available. These are in units of the
* fs blocksize, (1 << inode->i_blkbits). * fs blocksize, (1 << inode->i_blkbits).
* *
* The fs is allowed to map lots of blocks at once. If it wants to do that, * The fs is allowed to map lots of blocks at once. If it wants to do that,
...@@ -564,7 +573,7 @@ static int dio_bio_reap(struct dio *dio) ...@@ -564,7 +573,7 @@ static int dio_bio_reap(struct dio *dio)
* buffer_mapped(). However the direct-io code will only process holes one * buffer_mapped(). However the direct-io code will only process holes one
* block at a time - it will repeatedly call get_block() as it walks the hole. * block at a time - it will repeatedly call get_block() as it walks the hole.
*/ */
static int get_more_blocks(struct dio *dio) static int get_more_blocks(struct dio *dio, struct dio_submit *sdio)
{ {
int ret; int ret;
struct buffer_head *map_bh = &dio->map_bh; struct buffer_head *map_bh = &dio->map_bh;
...@@ -580,11 +589,11 @@ static int get_more_blocks(struct dio *dio) ...@@ -580,11 +589,11 @@ static int get_more_blocks(struct dio *dio)
*/ */
ret = dio->page_errors; ret = dio->page_errors;
if (ret == 0) { if (ret == 0) {
BUG_ON(dio->block_in_file >= dio->final_block_in_request); BUG_ON(sdio->block_in_file >= sdio->final_block_in_request);
fs_startblk = dio->block_in_file >> dio->blkfactor; fs_startblk = sdio->block_in_file >> sdio->blkfactor;
dio_count = dio->final_block_in_request - dio->block_in_file; dio_count = sdio->final_block_in_request - sdio->block_in_file;
fs_count = dio_count >> dio->blkfactor; fs_count = dio_count >> sdio->blkfactor;
blkmask = (1 << dio->blkfactor) - 1; blkmask = (1 << sdio->blkfactor) - 1;
if (dio_count & blkmask) if (dio_count & blkmask)
fs_count++; fs_count++;
...@@ -604,12 +613,12 @@ static int get_more_blocks(struct dio *dio) ...@@ -604,12 +613,12 @@ static int get_more_blocks(struct dio *dio)
*/ */
create = dio->rw & WRITE; create = dio->rw & WRITE;
if (dio->flags & DIO_SKIP_HOLES) { if (dio->flags & DIO_SKIP_HOLES) {
if (dio->block_in_file < (i_size_read(dio->inode) >> if (sdio->block_in_file < (i_size_read(dio->inode) >>
dio->blkbits)) sdio->blkbits))
create = 0; create = 0;
} }
ret = (*dio->get_block)(dio->inode, fs_startblk, ret = (*sdio->get_block)(dio->inode, fs_startblk,
map_bh, create); map_bh, create);
} }
return ret; return ret;
...@@ -618,20 +627,21 @@ static int get_more_blocks(struct dio *dio) ...@@ -618,20 +627,21 @@ static int get_more_blocks(struct dio *dio)
/* /*
* There is no bio. Make one now. * There is no bio. Make one now.
*/ */
static int dio_new_bio(struct dio *dio, sector_t start_sector) static int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
sector_t start_sector)
{ {
sector_t sector; sector_t sector;
int ret, nr_pages; int ret, nr_pages;
ret = dio_bio_reap(dio); ret = dio_bio_reap(dio, sdio);
if (ret) if (ret)
goto out; goto out;
sector = start_sector << (dio->blkbits - 9); sector = start_sector << (sdio->blkbits - 9);
nr_pages = min(dio->pages_in_io, bio_get_nr_vecs(dio->map_bh.b_bdev)); nr_pages = min(sdio->pages_in_io, bio_get_nr_vecs(dio->map_bh.b_bdev));
nr_pages = min(nr_pages, BIO_MAX_PAGES); nr_pages = min(nr_pages, BIO_MAX_PAGES);
BUG_ON(nr_pages <= 0); BUG_ON(nr_pages <= 0);
dio_bio_alloc(dio, dio->map_bh.b_bdev, sector, nr_pages); dio_bio_alloc(dio, sdio, dio->map_bh.b_bdev, sector, nr_pages);
dio->boundary = 0; sdio->boundary = 0;
out: out:
return ret; return ret;
} }
...@@ -643,21 +653,21 @@ static int dio_new_bio(struct dio *dio, sector_t start_sector) ...@@ -643,21 +653,21 @@ static int dio_new_bio(struct dio *dio, sector_t start_sector)
* *
* Return zero on success. Non-zero means the caller needs to start a new BIO. * Return zero on success. Non-zero means the caller needs to start a new BIO.
*/ */
static int dio_bio_add_page(struct dio *dio) static int dio_bio_add_page(struct dio_submit *sdio)
{ {
int ret; int ret;
ret = bio_add_page(dio->bio, dio->cur_page, ret = bio_add_page(sdio->bio, sdio->cur_page,
dio->cur_page_len, dio->cur_page_offset); sdio->cur_page_len, sdio->cur_page_offset);
if (ret == dio->cur_page_len) { if (ret == sdio->cur_page_len) {
/* /*
* Decrement count only, if we are done with this page * Decrement count only, if we are done with this page
*/ */
if ((dio->cur_page_len + dio->cur_page_offset) == PAGE_SIZE) if ((sdio->cur_page_len + sdio->cur_page_offset) == PAGE_SIZE)
dio->pages_in_io--; sdio->pages_in_io--;
page_cache_get(dio->cur_page); page_cache_get(sdio->cur_page);
dio->final_block_in_bio = dio->cur_page_block + sdio->final_block_in_bio = sdio->cur_page_block +
(dio->cur_page_len >> dio->blkbits); (sdio->cur_page_len >> sdio->blkbits);
ret = 0; ret = 0;
} else { } else {
ret = 1; ret = 1;
...@@ -675,14 +685,14 @@ static int dio_bio_add_page(struct dio *dio) ...@@ -675,14 +685,14 @@ static int dio_bio_add_page(struct dio *dio)
* The caller of this function is responsible for removing cur_page from the * The caller of this function is responsible for removing cur_page from the
* dio, and for dropping the refcount which came from that presence. * dio, and for dropping the refcount which came from that presence.
*/ */
static int dio_send_cur_page(struct dio *dio) static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio)
{ {
int ret = 0; int ret = 0;
if (dio->bio) { if (sdio->bio) {
loff_t cur_offset = dio->cur_page_fs_offset; loff_t cur_offset = sdio->cur_page_fs_offset;
loff_t bio_next_offset = dio->logical_offset_in_bio + loff_t bio_next_offset = sdio->logical_offset_in_bio +
dio->bio->bi_size; sdio->bio->bi_size;
/* /*
* See whether this new request is contiguous with the old. * See whether this new request is contiguous with the old.
...@@ -698,28 +708,28 @@ static int dio_send_cur_page(struct dio *dio) ...@@ -698,28 +708,28 @@ static int dio_send_cur_page(struct dio *dio)
* be the next logical offset in the bio, submit the bio we * be the next logical offset in the bio, submit the bio we
* have. * have.
*/ */
if (dio->final_block_in_bio != dio->cur_page_block || if (sdio->final_block_in_bio != sdio->cur_page_block ||
cur_offset != bio_next_offset) cur_offset != bio_next_offset)
dio_bio_submit(dio); dio_bio_submit(dio, sdio);
/* /*
* Submit now if the underlying fs is about to perform a * Submit now if the underlying fs is about to perform a
* metadata read * metadata read
*/ */
else if (dio->boundary) else if (sdio->boundary)
dio_bio_submit(dio); dio_bio_submit(dio, sdio);
} }
if (dio->bio == NULL) { if (sdio->bio == NULL) {
ret = dio_new_bio(dio, dio->cur_page_block); ret = dio_new_bio(dio, sdio, sdio->cur_page_block);
if (ret) if (ret)
goto out; goto out;
} }
if (dio_bio_add_page(dio) != 0) { if (dio_bio_add_page(sdio) != 0) {
dio_bio_submit(dio); dio_bio_submit(dio, sdio);
ret = dio_new_bio(dio, dio->cur_page_block); ret = dio_new_bio(dio, sdio, sdio->cur_page_block);
if (ret == 0) { if (ret == 0) {
ret = dio_bio_add_page(dio); ret = dio_bio_add_page(sdio);
BUG_ON(ret != 0); BUG_ON(ret != 0);
} }
} }
...@@ -745,7 +755,7 @@ static int dio_send_cur_page(struct dio *dio) ...@@ -745,7 +755,7 @@ static int dio_send_cur_page(struct dio *dio)
* page to the dio instead. * page to the dio instead.
*/ */
static int static int
submit_page_section(struct dio *dio, struct page *page, submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
unsigned offset, unsigned len, sector_t blocknr) unsigned offset, unsigned len, sector_t blocknr)
{ {
int ret = 0; int ret = 0;
...@@ -760,20 +770,20 @@ submit_page_section(struct dio *dio, struct page *page, ...@@ -760,20 +770,20 @@ submit_page_section(struct dio *dio, struct page *page,
/* /*
* Can we just grow the current page's presence in the dio? * Can we just grow the current page's presence in the dio?
*/ */
if ( (dio->cur_page == page) && if (sdio->cur_page == page &&
(dio->cur_page_offset + dio->cur_page_len == offset) && sdio->cur_page_offset + sdio->cur_page_len == offset &&
(dio->cur_page_block + sdio->cur_page_block +
(dio->cur_page_len >> dio->blkbits) == blocknr)) { (sdio->cur_page_len >> sdio->blkbits) == blocknr) {
dio->cur_page_len += len; sdio->cur_page_len += len;
/* /*
* If dio->boundary then we want to schedule the IO now to * If sdio->boundary then we want to schedule the IO now to
* avoid metadata seeks. * avoid metadata seeks.
*/ */
if (dio->boundary) { if (sdio->boundary) {
ret = dio_send_cur_page(dio); ret = dio_send_cur_page(dio, sdio);
page_cache_release(dio->cur_page); page_cache_release(sdio->cur_page);
dio->cur_page = NULL; sdio->cur_page = NULL;
} }
goto out; goto out;
} }
...@@ -781,20 +791,20 @@ submit_page_section(struct dio *dio, struct page *page, ...@@ -781,20 +791,20 @@ submit_page_section(struct dio *dio, struct page *page,
/* /*
* If there's a deferred page already there then send it. * If there's a deferred page already there then send it.
*/ */
if (dio->cur_page) { if (sdio->cur_page) {
ret = dio_send_cur_page(dio); ret = dio_send_cur_page(dio, sdio);
page_cache_release(dio->cur_page); page_cache_release(sdio->cur_page);
dio->cur_page = NULL; sdio->cur_page = NULL;
if (ret) if (ret)
goto out; goto out;
} }
page_cache_get(page); /* It is in dio */ page_cache_get(page); /* It is in dio */
dio->cur_page = page; sdio->cur_page = page;
dio->cur_page_offset = offset; sdio->cur_page_offset = offset;
dio->cur_page_len = len; sdio->cur_page_len = len;
dio->cur_page_block = blocknr; sdio->cur_page_block = blocknr;
dio->cur_page_fs_offset = dio->block_in_file << dio->blkbits; sdio->cur_page_fs_offset = sdio->block_in_file << sdio->blkbits;
out: out:
return ret; return ret;
} }
...@@ -826,19 +836,19 @@ static void clean_blockdev_aliases(struct dio *dio) ...@@ -826,19 +836,19 @@ static void clean_blockdev_aliases(struct dio *dio)
* `end' is zero if we're doing the start of the IO, 1 at the end of the * `end' is zero if we're doing the start of the IO, 1 at the end of the
* IO. * IO.
*/ */
static void dio_zero_block(struct dio *dio, int end) static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end)
{ {
unsigned dio_blocks_per_fs_block; unsigned dio_blocks_per_fs_block;
unsigned this_chunk_blocks; /* In dio_blocks */ unsigned this_chunk_blocks; /* In dio_blocks */
unsigned this_chunk_bytes; unsigned this_chunk_bytes;
struct page *page; struct page *page;
dio->start_zero_done = 1; sdio->start_zero_done = 1;
if (!dio->blkfactor || !buffer_new(&dio->map_bh)) if (!sdio->blkfactor || !buffer_new(&dio->map_bh))
return; return;
dio_blocks_per_fs_block = 1 << dio->blkfactor; dio_blocks_per_fs_block = 1 << sdio->blkfactor;
this_chunk_blocks = dio->block_in_file & (dio_blocks_per_fs_block - 1); this_chunk_blocks = sdio->block_in_file & (dio_blocks_per_fs_block - 1);
if (!this_chunk_blocks) if (!this_chunk_blocks)
return; return;
...@@ -850,14 +860,14 @@ static void dio_zero_block(struct dio *dio, int end) ...@@ -850,14 +860,14 @@ static void dio_zero_block(struct dio *dio, int end)
if (end) if (end)
this_chunk_blocks = dio_blocks_per_fs_block - this_chunk_blocks; this_chunk_blocks = dio_blocks_per_fs_block - this_chunk_blocks;
this_chunk_bytes = this_chunk_blocks << dio->blkbits; this_chunk_bytes = this_chunk_blocks << sdio->blkbits;
page = ZERO_PAGE(0); page = ZERO_PAGE(0);
if (submit_page_section(dio, page, 0, this_chunk_bytes, if (submit_page_section(dio, sdio, page, 0, this_chunk_bytes,
dio->next_block_for_io)) sdio->next_block_for_io))
return; return;
dio->next_block_for_io += this_chunk_blocks; sdio->next_block_for_io += this_chunk_blocks;
} }
/* /*
...@@ -876,9 +886,9 @@ static void dio_zero_block(struct dio *dio, int end) ...@@ -876,9 +886,9 @@ static void dio_zero_block(struct dio *dio, int end)
* it should set b_size to PAGE_SIZE or more inside get_block(). This gives * it should set b_size to PAGE_SIZE or more inside get_block(). This gives
* fine alignment but still allows this function to work in PAGE_SIZE units. * fine alignment but still allows this function to work in PAGE_SIZE units.
*/ */
static int do_direct_IO(struct dio *dio) static int do_direct_IO(struct dio *dio, struct dio_submit *sdio)
{ {
const unsigned blkbits = dio->blkbits; const unsigned blkbits = sdio->blkbits;
const unsigned blocks_per_page = PAGE_SIZE >> blkbits; const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
struct page *page; struct page *page;
unsigned block_in_page; unsigned block_in_page;
...@@ -886,10 +896,10 @@ static int do_direct_IO(struct dio *dio) ...@@ -886,10 +896,10 @@ static int do_direct_IO(struct dio *dio)
int ret = 0; int ret = 0;
/* The I/O can start at any block offset within the first page */ /* The I/O can start at any block offset within the first page */
block_in_page = dio->first_block_in_page; block_in_page = sdio->first_block_in_page;
while (dio->block_in_file < dio->final_block_in_request) { while (sdio->block_in_file < sdio->final_block_in_request) {
page = dio_get_page(dio); page = dio_get_page(dio, sdio);
if (IS_ERR(page)) { if (IS_ERR(page)) {
ret = PTR_ERR(page); ret = PTR_ERR(page);
goto out; goto out;
...@@ -901,14 +911,14 @@ static int do_direct_IO(struct dio *dio) ...@@ -901,14 +911,14 @@ static int do_direct_IO(struct dio *dio)
unsigned this_chunk_blocks; /* # of blocks */ unsigned this_chunk_blocks; /* # of blocks */
unsigned u; unsigned u;
if (dio->blocks_available == 0) { if (sdio->blocks_available == 0) {
/* /*
* Need to go and map some more disk * Need to go and map some more disk
*/ */
unsigned long blkmask; unsigned long blkmask;
unsigned long dio_remainder; unsigned long dio_remainder;
ret = get_more_blocks(dio); ret = get_more_blocks(dio, sdio);
if (ret) { if (ret) {
page_cache_release(page); page_cache_release(page);
goto out; goto out;
...@@ -916,18 +926,18 @@ static int do_direct_IO(struct dio *dio) ...@@ -916,18 +926,18 @@ static int do_direct_IO(struct dio *dio)
if (!buffer_mapped(map_bh)) if (!buffer_mapped(map_bh))
goto do_holes; goto do_holes;
dio->blocks_available = sdio->blocks_available =
map_bh->b_size >> dio->blkbits; map_bh->b_size >> sdio->blkbits;
dio->next_block_for_io = sdio->next_block_for_io =
map_bh->b_blocknr << dio->blkfactor; map_bh->b_blocknr << sdio->blkfactor;
if (buffer_new(map_bh)) if (buffer_new(map_bh))
clean_blockdev_aliases(dio); clean_blockdev_aliases(dio);
if (!dio->blkfactor) if (!sdio->blkfactor)
goto do_holes; goto do_holes;
blkmask = (1 << dio->blkfactor) - 1; blkmask = (1 << sdio->blkfactor) - 1;
dio_remainder = (dio->block_in_file & blkmask); dio_remainder = (sdio->block_in_file & blkmask);
/* /*
* If we are at the start of IO and that IO * If we are at the start of IO and that IO
...@@ -941,8 +951,8 @@ static int do_direct_IO(struct dio *dio) ...@@ -941,8 +951,8 @@ static int do_direct_IO(struct dio *dio)
* on-disk * on-disk
*/ */
if (!buffer_new(map_bh)) if (!buffer_new(map_bh))
dio->next_block_for_io += dio_remainder; sdio->next_block_for_io += dio_remainder;
dio->blocks_available -= dio_remainder; sdio->blocks_available -= dio_remainder;
} }
do_holes: do_holes:
/* Handle holes */ /* Handle holes */
...@@ -961,7 +971,7 @@ static int do_direct_IO(struct dio *dio) ...@@ -961,7 +971,7 @@ static int do_direct_IO(struct dio *dio)
*/ */
i_size_aligned = ALIGN(i_size_read(dio->inode), i_size_aligned = ALIGN(i_size_read(dio->inode),
1 << blkbits); 1 << blkbits);
if (dio->block_in_file >= if (sdio->block_in_file >=
i_size_aligned >> blkbits) { i_size_aligned >> blkbits) {
/* We hit eof */ /* We hit eof */
page_cache_release(page); page_cache_release(page);
...@@ -969,7 +979,7 @@ static int do_direct_IO(struct dio *dio) ...@@ -969,7 +979,7 @@ static int do_direct_IO(struct dio *dio)
} }
zero_user(page, block_in_page << blkbits, zero_user(page, block_in_page << blkbits,
1 << blkbits); 1 << blkbits);
dio->block_in_file++; sdio->block_in_file++;
block_in_page++; block_in_page++;
goto next_block; goto next_block;
} }
...@@ -979,38 +989,40 @@ static int do_direct_IO(struct dio *dio) ...@@ -979,38 +989,40 @@ static int do_direct_IO(struct dio *dio)
* is finer than the underlying fs, go check to see if * is finer than the underlying fs, go check to see if
* we must zero out the start of this block. * we must zero out the start of this block.
*/ */
if (unlikely(dio->blkfactor && !dio->start_zero_done)) if (unlikely(sdio->blkfactor && !sdio->start_zero_done))
dio_zero_block(dio, 0); dio_zero_block(dio, sdio, 0);
/* /*
* Work out, in this_chunk_blocks, how much disk we * Work out, in this_chunk_blocks, how much disk we
* can add to this page * can add to this page
*/ */
this_chunk_blocks = dio->blocks_available; this_chunk_blocks = sdio->blocks_available;
u = (PAGE_SIZE - offset_in_page) >> blkbits; u = (PAGE_SIZE - offset_in_page) >> blkbits;
if (this_chunk_blocks > u) if (this_chunk_blocks > u)
this_chunk_blocks = u; this_chunk_blocks = u;
u = dio->final_block_in_request - dio->block_in_file; u = sdio->final_block_in_request - sdio->block_in_file;
if (this_chunk_blocks > u) if (this_chunk_blocks > u)
this_chunk_blocks = u; this_chunk_blocks = u;
this_chunk_bytes = this_chunk_blocks << blkbits; this_chunk_bytes = this_chunk_blocks << blkbits;
BUG_ON(this_chunk_bytes == 0); BUG_ON(this_chunk_bytes == 0);
dio->boundary = buffer_boundary(map_bh); sdio->boundary = buffer_boundary(map_bh);
ret = submit_page_section(dio, page, offset_in_page, ret = submit_page_section(dio, sdio, page,
this_chunk_bytes, dio->next_block_for_io); offset_in_page,
this_chunk_bytes,
sdio->next_block_for_io);
if (ret) { if (ret) {
page_cache_release(page); page_cache_release(page);
goto out; goto out;
} }
dio->next_block_for_io += this_chunk_blocks; sdio->next_block_for_io += this_chunk_blocks;
dio->block_in_file += this_chunk_blocks; sdio->block_in_file += this_chunk_blocks;
block_in_page += this_chunk_blocks; block_in_page += this_chunk_blocks;
dio->blocks_available -= this_chunk_blocks; sdio->blocks_available -= this_chunk_blocks;
next_block: next_block:
BUG_ON(dio->block_in_file > dio->final_block_in_request); BUG_ON(sdio->block_in_file > sdio->final_block_in_request);
if (dio->block_in_file == dio->final_block_in_request) if (sdio->block_in_file == sdio->final_block_in_request)
break; break;
} }
...@@ -1026,7 +1038,7 @@ static ssize_t ...@@ -1026,7 +1038,7 @@ static ssize_t
direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
const struct iovec *iov, loff_t offset, unsigned long nr_segs, const struct iovec *iov, loff_t offset, unsigned long nr_segs,
unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, unsigned blkbits, get_block_t get_block, dio_iodone_t end_io,
dio_submit_t submit_io, struct dio *dio) dio_submit_t submit_io, struct dio *dio, struct dio_submit *sdio)
{ {
unsigned long user_addr; unsigned long user_addr;
unsigned long flags; unsigned long flags;
...@@ -1037,15 +1049,15 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, ...@@ -1037,15 +1049,15 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
dio->inode = inode; dio->inode = inode;
dio->rw = rw; dio->rw = rw;
dio->blkbits = blkbits; sdio->blkbits = blkbits;
dio->blkfactor = inode->i_blkbits - blkbits; sdio->blkfactor = inode->i_blkbits - blkbits;
dio->block_in_file = offset >> blkbits; sdio->block_in_file = offset >> blkbits;
dio->get_block = get_block; sdio->get_block = get_block;
dio->end_io = end_io; dio->end_io = end_io;
dio->submit_io = submit_io; sdio->submit_io = submit_io;
dio->final_block_in_bio = -1; sdio->final_block_in_bio = -1;
dio->next_block_for_io = -1; sdio->next_block_for_io = -1;
dio->iocb = iocb; dio->iocb = iocb;
dio->i_size = i_size_read(inode); dio->i_size = i_size_read(inode);
...@@ -1057,45 +1069,45 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, ...@@ -1057,45 +1069,45 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
* In case of non-aligned buffers, we may need 2 more * In case of non-aligned buffers, we may need 2 more
* pages since we need to zero out first and last block. * pages since we need to zero out first and last block.
*/ */
if (unlikely(dio->blkfactor)) if (unlikely(sdio->blkfactor))
dio->pages_in_io = 2; sdio->pages_in_io = 2;
for (seg = 0; seg < nr_segs; seg++) { for (seg = 0; seg < nr_segs; seg++) {
user_addr = (unsigned long)iov[seg].iov_base; user_addr = (unsigned long)iov[seg].iov_base;
dio->pages_in_io += sdio->pages_in_io +=
((user_addr+iov[seg].iov_len +PAGE_SIZE-1)/PAGE_SIZE ((user_addr+iov[seg].iov_len +PAGE_SIZE-1)/PAGE_SIZE
- user_addr/PAGE_SIZE); - user_addr/PAGE_SIZE);
} }
for (seg = 0; seg < nr_segs; seg++) { for (seg = 0; seg < nr_segs; seg++) {
user_addr = (unsigned long)iov[seg].iov_base; user_addr = (unsigned long)iov[seg].iov_base;
dio->size += bytes = iov[seg].iov_len; sdio->size += bytes = iov[seg].iov_len;
/* Index into the first page of the first block */ /* Index into the first page of the first block */
dio->first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits; sdio->first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits;
dio->final_block_in_request = dio->block_in_file + sdio->final_block_in_request = sdio->block_in_file +
(bytes >> blkbits); (bytes >> blkbits);
/* Page fetching state */ /* Page fetching state */
dio->head = 0; sdio->head = 0;
dio->tail = 0; sdio->tail = 0;
dio->curr_page = 0; sdio->curr_page = 0;
dio->total_pages = 0; sdio->total_pages = 0;
if (user_addr & (PAGE_SIZE-1)) { if (user_addr & (PAGE_SIZE-1)) {
dio->total_pages++; sdio->total_pages++;
bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1)); bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1));
} }
dio->total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE; sdio->total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
dio->curr_user_address = user_addr; sdio->curr_user_address = user_addr;
ret = do_direct_IO(dio); ret = do_direct_IO(dio, sdio);
dio->result += iov[seg].iov_len - dio->result += iov[seg].iov_len -
((dio->final_block_in_request - dio->block_in_file) << ((sdio->final_block_in_request - sdio->block_in_file) <<
blkbits); blkbits);
if (ret) { if (ret) {
dio_cleanup(dio); dio_cleanup(dio, sdio);
break; break;
} }
} /* end iovec loop */ } /* end iovec loop */
...@@ -1111,23 +1123,23 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, ...@@ -1111,23 +1123,23 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
* There may be some unwritten disk at the end of a part-written * There may be some unwritten disk at the end of a part-written
* fs-block-sized block. Go zero that now. * fs-block-sized block. Go zero that now.
*/ */
dio_zero_block(dio, 1); dio_zero_block(dio, sdio, 1);
if (dio->cur_page) { if (sdio->cur_page) {
ret2 = dio_send_cur_page(dio); ret2 = dio_send_cur_page(dio, sdio);
if (ret == 0) if (ret == 0)
ret = ret2; ret = ret2;
page_cache_release(dio->cur_page); page_cache_release(sdio->cur_page);
dio->cur_page = NULL; sdio->cur_page = NULL;
} }
if (dio->bio) if (sdio->bio)
dio_bio_submit(dio); dio_bio_submit(dio, sdio);
/* /*
* It is possible that, we return short IO due to end of file. * It is possible that, we return short IO due to end of file.
* In that case, we need to release all the pages we got hold on. * In that case, we need to release all the pages we got hold on.
*/ */
dio_cleanup(dio); dio_cleanup(dio, sdio);
/* /*
* All block lookups have been performed. For READ requests * All block lookups have been performed. For READ requests
...@@ -1146,7 +1158,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, ...@@ -1146,7 +1158,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
*/ */
BUG_ON(ret == -EIOCBQUEUED); BUG_ON(ret == -EIOCBQUEUED);
if (dio->is_async && ret == 0 && dio->result && if (dio->is_async && ret == 0 && dio->result &&
((rw & READ) || (dio->result == dio->size))) ((rw & READ) || (dio->result == sdio->size)))
ret = -EIOCBQUEUED; ret = -EIOCBQUEUED;
if (ret != -EIOCBQUEUED) if (ret != -EIOCBQUEUED)
...@@ -1211,6 +1223,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, ...@@ -1211,6 +1223,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
ssize_t retval = -EINVAL; ssize_t retval = -EINVAL;
loff_t end = offset; loff_t end = offset;
struct dio *dio; struct dio *dio;
struct dio_submit sdio = { 0, };
if (rw & WRITE) if (rw & WRITE)
rw = WRITE_ODIRECT; rw = WRITE_ODIRECT;
...@@ -1290,7 +1303,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, ...@@ -1290,7 +1303,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
retval = direct_io_worker(rw, iocb, inode, iov, offset, retval = direct_io_worker(rw, iocb, inode, iov, offset,
nr_segs, blkbits, get_block, end_io, nr_segs, blkbits, get_block, end_io,
submit_io, dio); submit_io, dio, &sdio);
out: out:
return retval; return retval;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment