Commit 2406a307 authored by Jiufei Xue's avatar Jiufei Xue Committed by Miklos Szeredi

ovl: implement async IO routines

A performance regression was observed since linux v4.19 with aio test using
fio with iodepth 128 on overlayfs.  The queue depth of the device was
always 1 which is unexpected.

After investigation, it was found that commit 16914e6f ("ovl: add
ovl_read_iter()") and commit 2a92e07e ("ovl: add ovl_write_iter()")
resulted in vfs_iter_{read,write} being called on underlying filesystem,
which always results in syncronous IO.

Implement async IO for stacked reading and writing.  This resolves the
performance regresion.

This is implemented by allocating a new kiocb for submitting the AIO
request on the underlying filesystem.  When the request is completed, the
new kiocb is freed and the completion callback is called on the original
iocb.
Signed-off-by: default avatarJiufei Xue <jiufei.xue@linux.alibaba.com>
Signed-off-by: default avatarMiklos Szeredi <mszeredi@redhat.com>
parent 5dcdc43e
...@@ -11,6 +11,14 @@ ...@@ -11,6 +11,14 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include "overlayfs.h" #include "overlayfs.h"
struct ovl_aio_req {
struct kiocb iocb;
struct kiocb *orig_iocb;
struct fd fd;
};
static struct kmem_cache *ovl_aio_request_cachep;
static char ovl_whatisit(struct inode *inode, struct inode *realinode) static char ovl_whatisit(struct inode *inode, struct inode *realinode)
{ {
if (realinode != ovl_inode_upper(inode)) if (realinode != ovl_inode_upper(inode))
...@@ -225,6 +233,33 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb) ...@@ -225,6 +233,33 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
return flags; return flags;
} }
static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
{
struct kiocb *iocb = &aio_req->iocb;
struct kiocb *orig_iocb = aio_req->orig_iocb;
if (iocb->ki_flags & IOCB_WRITE) {
struct inode *inode = file_inode(orig_iocb->ki_filp);
file_end_write(iocb->ki_filp);
ovl_copyattr(ovl_inode_real(inode), inode);
}
orig_iocb->ki_pos = iocb->ki_pos;
fdput(aio_req->fd);
kmem_cache_free(ovl_aio_request_cachep, aio_req);
}
static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2)
{
struct ovl_aio_req *aio_req = container_of(iocb,
struct ovl_aio_req, iocb);
struct kiocb *orig_iocb = aio_req->orig_iocb;
ovl_aio_cleanup_handler(aio_req);
orig_iocb->ki_complete(orig_iocb, res, res2);
}
static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
...@@ -240,10 +275,28 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) ...@@ -240,10 +275,28 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
return ret; return ret;
old_cred = ovl_override_creds(file_inode(file)->i_sb); old_cred = ovl_override_creds(file_inode(file)->i_sb);
if (is_sync_kiocb(iocb)) {
ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
ovl_iocb_to_rwf(iocb)); ovl_iocb_to_rwf(iocb));
} else {
struct ovl_aio_req *aio_req;
ret = -ENOMEM;
aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
if (!aio_req)
goto out;
aio_req->fd = real;
real.flags = 0;
aio_req->orig_iocb = iocb;
kiocb_clone(&aio_req->iocb, iocb, real.file);
aio_req->iocb.ki_complete = ovl_aio_rw_complete;
ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
if (ret != -EIOCBQUEUED)
ovl_aio_cleanup_handler(aio_req);
}
out:
revert_creds(old_cred); revert_creds(old_cred);
ovl_file_accessed(file); ovl_file_accessed(file);
fdput(real); fdput(real);
...@@ -274,15 +327,33 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) ...@@ -274,15 +327,33 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
goto out_unlock; goto out_unlock;
old_cred = ovl_override_creds(file_inode(file)->i_sb); old_cred = ovl_override_creds(file_inode(file)->i_sb);
if (is_sync_kiocb(iocb)) {
file_start_write(real.file); file_start_write(real.file);
ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
ovl_iocb_to_rwf(iocb)); ovl_iocb_to_rwf(iocb));
file_end_write(real.file); file_end_write(real.file);
revert_creds(old_cred);
/* Update size */ /* Update size */
ovl_copyattr(ovl_inode_real(inode), inode); ovl_copyattr(ovl_inode_real(inode), inode);
} else {
struct ovl_aio_req *aio_req;
ret = -ENOMEM;
aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
if (!aio_req)
goto out;
file_start_write(real.file);
aio_req->fd = real;
real.flags = 0;
aio_req->orig_iocb = iocb;
kiocb_clone(&aio_req->iocb, iocb, real.file);
aio_req->iocb.ki_complete = ovl_aio_rw_complete;
ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
if (ret != -EIOCBQUEUED)
ovl_aio_cleanup_handler(aio_req);
}
out:
revert_creds(old_cred);
fdput(real); fdput(real);
out_unlock: out_unlock:
...@@ -651,3 +722,19 @@ const struct file_operations ovl_file_operations = { ...@@ -651,3 +722,19 @@ const struct file_operations ovl_file_operations = {
.copy_file_range = ovl_copy_file_range, .copy_file_range = ovl_copy_file_range,
.remap_file_range = ovl_remap_file_range, .remap_file_range = ovl_remap_file_range,
}; };
int __init ovl_aio_request_cache_init(void)
{
ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
sizeof(struct ovl_aio_req),
0, SLAB_HWCACHE_ALIGN, NULL);
if (!ovl_aio_request_cachep)
return -ENOMEM;
return 0;
}
void ovl_aio_request_cache_destroy(void)
{
kmem_cache_destroy(ovl_aio_request_cachep);
}
...@@ -450,6 +450,8 @@ struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr); ...@@ -450,6 +450,8 @@ struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr);
/* file.c */ /* file.c */
extern const struct file_operations ovl_file_operations; extern const struct file_operations ovl_file_operations;
int __init ovl_aio_request_cache_init(void);
void ovl_aio_request_cache_destroy(void);
/* copy_up.c */ /* copy_up.c */
int ovl_copy_up(struct dentry *dentry); int ovl_copy_up(struct dentry *dentry);
......
...@@ -1770,8 +1770,14 @@ static int __init ovl_init(void) ...@@ -1770,8 +1770,14 @@ static int __init ovl_init(void)
if (ovl_inode_cachep == NULL) if (ovl_inode_cachep == NULL)
return -ENOMEM; return -ENOMEM;
err = ovl_aio_request_cache_init();
if (!err) {
err = register_filesystem(&ovl_fs_type); err = register_filesystem(&ovl_fs_type);
if (err) if (!err)
return 0;
ovl_aio_request_cache_destroy();
}
kmem_cache_destroy(ovl_inode_cachep); kmem_cache_destroy(ovl_inode_cachep);
return err; return err;
...@@ -1787,7 +1793,7 @@ static void __exit ovl_exit(void) ...@@ -1787,7 +1793,7 @@ static void __exit ovl_exit(void)
*/ */
rcu_barrier(); rcu_barrier();
kmem_cache_destroy(ovl_inode_cachep); kmem_cache_destroy(ovl_inode_cachep);
ovl_aio_request_cache_destroy();
} }
module_init(ovl_init); module_init(ovl_init);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment