Commit ba253fbf authored by Andi Kleen's avatar Andi Kleen Committed by root

direct-io: inline the complete submission path

Add inlines to all the submission path functions. While this increases
code size it also gives gcc a lot of optimization opportunities
in this critical hotpath.

In particular -- together with some other changes -- this
allows gcc to get rid of the unnecessary clearing of
sdio at the beginning and optimize the messy parameter passing.
Any non inlining of a function which takes a sdio parameter
would break this optimization because they cannot be done if the
address of a structure is taken.

Note that benefits are only seen with CONFIG_OPTIMIZE_INLINING
and CONFIG_CC_OPTIMIZE_FOR_SIZE both set to off.

This gives about 2.2% improvement on a large database benchmark
with a high IOPS rate.
Signed-off-by: default avatarAndi Kleen <ak@linux.intel.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent 18772641
...@@ -199,7 +199,7 @@ static inline unsigned dio_pages_present(struct dio_submit *sdio) ...@@ -199,7 +199,7 @@ static inline unsigned dio_pages_present(struct dio_submit *sdio)
/* /*
* Go grab and pin some userspace pages. Typically we'll get 64 at a time. * Go grab and pin some userspace pages. Typically we'll get 64 at a time.
*/ */
static int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
{ {
int ret; int ret;
int nr_pages; int nr_pages;
...@@ -245,7 +245,8 @@ static int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) ...@@ -245,7 +245,8 @@ static int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
* decent number of pages, less frequently. To provide nicer use of the * decent number of pages, less frequently. To provide nicer use of the
* L1 cache. * L1 cache.
*/ */
static struct page *dio_get_page(struct dio *dio, struct dio_submit *sdio) static inline struct page *dio_get_page(struct dio *dio,
struct dio_submit *sdio)
{ {
if (dio_pages_present(sdio) == 0) { if (dio_pages_present(sdio) == 0) {
int ret; int ret;
...@@ -376,7 +377,7 @@ void dio_end_io(struct bio *bio, int error) ...@@ -376,7 +377,7 @@ void dio_end_io(struct bio *bio, int error)
} }
EXPORT_SYMBOL_GPL(dio_end_io); EXPORT_SYMBOL_GPL(dio_end_io);
static void static inline void
dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
struct block_device *bdev, struct block_device *bdev,
sector_t first_sector, int nr_vecs) sector_t first_sector, int nr_vecs)
...@@ -407,7 +408,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, ...@@ -407,7 +408,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
* *
* bios hold a dio reference between submit_bio and ->end_io. * bios hold a dio reference between submit_bio and ->end_io.
*/ */
static void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
{ {
struct bio *bio = sdio->bio; struct bio *bio = sdio->bio;
unsigned long flags; unsigned long flags;
...@@ -435,7 +436,7 @@ static void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) ...@@ -435,7 +436,7 @@ static void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
/* /*
* Release any resources in case of a failure * Release any resources in case of a failure
*/ */
static void dio_cleanup(struct dio *dio, struct dio_submit *sdio) static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
{ {
while (dio_pages_present(sdio)) while (dio_pages_present(sdio))
page_cache_release(dio_get_page(dio, sdio)); page_cache_release(dio_get_page(dio, sdio));
...@@ -528,7 +529,7 @@ static void dio_await_completion(struct dio *dio) ...@@ -528,7 +529,7 @@ static void dio_await_completion(struct dio *dio)
* *
* This also helps to limit the peak amount of pinned userspace memory. * This also helps to limit the peak amount of pinned userspace memory.
*/ */
static int dio_bio_reap(struct dio *dio, struct dio_submit *sdio) static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
{ {
int ret = 0; int ret = 0;
...@@ -631,7 +632,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, ...@@ -631,7 +632,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
/* /*
* There is no bio. Make one now. * There is no bio. Make one now.
*/ */
static int dio_new_bio(struct dio *dio, struct dio_submit *sdio, static inline int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
sector_t start_sector, struct buffer_head *map_bh) sector_t start_sector, struct buffer_head *map_bh)
{ {
sector_t sector; sector_t sector;
...@@ -657,7 +658,7 @@ static int dio_new_bio(struct dio *dio, struct dio_submit *sdio, ...@@ -657,7 +658,7 @@ static int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
* *
* Return zero on success. Non-zero means the caller needs to start a new BIO. * Return zero on success. Non-zero means the caller needs to start a new BIO.
*/ */
static int dio_bio_add_page(struct dio_submit *sdio) static inline int dio_bio_add_page(struct dio_submit *sdio)
{ {
int ret; int ret;
...@@ -689,7 +690,7 @@ static int dio_bio_add_page(struct dio_submit *sdio) ...@@ -689,7 +690,7 @@ static int dio_bio_add_page(struct dio_submit *sdio)
* The caller of this function is responsible for removing cur_page from the * The caller of this function is responsible for removing cur_page from the
* dio, and for dropping the refcount which came from that presence. * dio, and for dropping the refcount which came from that presence.
*/ */
static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio, static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
struct buffer_head *map_bh) struct buffer_head *map_bh)
{ {
int ret = 0; int ret = 0;
...@@ -759,7 +760,7 @@ static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio, ...@@ -759,7 +760,7 @@ static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
* If that doesn't work out then we put the old page into the bio and add this * If that doesn't work out then we put the old page into the bio and add this
* page to the dio instead. * page to the dio instead.
*/ */
static int static inline int
submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
unsigned offset, unsigned len, sector_t blocknr, unsigned offset, unsigned len, sector_t blocknr,
struct buffer_head *map_bh) struct buffer_head *map_bh)
...@@ -842,8 +843,8 @@ static void clean_blockdev_aliases(struct dio *dio, struct buffer_head *map_bh) ...@@ -842,8 +843,8 @@ static void clean_blockdev_aliases(struct dio *dio, struct buffer_head *map_bh)
* `end' is zero if we're doing the start of the IO, 1 at the end of the * `end' is zero if we're doing the start of the IO, 1 at the end of the
* IO. * IO.
*/ */
static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end, static inline void dio_zero_block(struct dio *dio, struct dio_submit *sdio,
struct buffer_head *map_bh) int end, struct buffer_head *map_bh)
{ {
unsigned dio_blocks_per_fs_block; unsigned dio_blocks_per_fs_block;
unsigned this_chunk_blocks; /* In dio_blocks */ unsigned this_chunk_blocks; /* In dio_blocks */
...@@ -1042,7 +1043,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio, ...@@ -1042,7 +1043,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
return ret; return ret;
} }
static ssize_t static inline ssize_t
direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
const struct iovec *iov, loff_t offset, unsigned long nr_segs, const struct iovec *iov, loff_t offset, unsigned long nr_segs,
unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, unsigned blkbits, get_block_t get_block, dio_iodone_t end_io,
...@@ -1216,6 +1217,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, ...@@ -1216,6 +1217,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
* expected that filesystem provide exclusion between new direct I/O * expected that filesystem provide exclusion between new direct I/O
* and truncates. For DIO_LOCKING filesystems this is done by i_mutex, * and truncates. For DIO_LOCKING filesystems this is done by i_mutex,
* but other filesystems need to take care of this on their own. * but other filesystems need to take care of this on their own.
*
* NOTE: if you pass "sdio" to anything by pointer make sure that function
* is always inlined. Otherwise gcc is unable to split the structure into
* individual fields and will generate much worse code. This is important
* for the whole file.
*/ */
ssize_t ssize_t
__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment