Commit 2d82ab25 authored by Greg Kurz's avatar Greg Kurz Committed by Miklos Szeredi

virtiofs: propagate sync() to file server

Even if POSIX doesn't mandate it, linux users legitimately expect sync() to
flush all data and metadata to physical storage when it is located on the
same system.  This isn't happening with virtiofs though: sync() inside the
guest returns right away even though data still needs to be flushed from
the host page cache.

This is easily demonstrated by doing the following in the guest:

$ dd if=/dev/zero of=/mnt/foo bs=1M count=5K ; strace -T -e sync sync
5120+0 records in
5120+0 records out
5368709120 bytes (5.4 GB, 5.0 GiB) copied, 5.22224 s, 1.0 GB/s
sync()                                  = 0 <0.024068>

and start the following in the host when the 'dd' command completes
in the guest:

$ strace -T -e fsync /usr/bin/sync virtiofs/foo
fsync(3)                                = 0 <10.371640>

There are no good reasons not to honor the expected behavior of sync()
actually: it gives an unrealistic impression that virtiofs is super fast
and that data has safely landed on HW, which isn't the case obviously.

Implement a ->sync_fs() superblock operation that sends a new FUSE_SYNCFS
request type for this purpose.  Provision a 64-bit placeholder for possible
future extensions.  Since the file server cannot handle the wait == 0 case,
we skip it to avoid a gratuitous roundtrip.  Note that this is
per-superblock: a FUSE_SYNCFS is send for the root mount and for each
submount.

Like with FUSE_FSYNC and FUSE_FSYNCDIR, lack of support for FUSE_SYNCFS in
the file server is treated as permanent success.  This ensures
compatibility with older file servers: the client will get the current
behavior of sync() not being propagated to the file server.

Note that such an operation allows the file server to DoS sync().  Since a
typical FUSE file server is an untrusted piece of software running in
userspace, this is disabled by default.  Only enable it with virtiofs for
now since virtiofsd is supposedly trusted by the guest kernel.
Reported-by: default avatarRobert Krawitz <rlk@redhat.com>
Signed-off-by: default avatarGreg Kurz <groug@kaod.org>
Signed-off-by: default avatarMiklos Szeredi <mszeredi@redhat.com>
parent 49221cf8
...@@ -761,6 +761,9 @@ struct fuse_conn { ...@@ -761,6 +761,9 @@ struct fuse_conn {
/* Auto-mount submounts announced by the server */ /* Auto-mount submounts announced by the server */
unsigned int auto_submounts:1; unsigned int auto_submounts:1;
/* Propagate syncfs() to server */
unsigned int sync_fs:1;
/** The number of requests waiting for completion */ /** The number of requests waiting for completion */
atomic_t num_waiting; atomic_t num_waiting;
......
...@@ -506,6 +506,45 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -506,6 +506,45 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
return err; return err;
} }
static int fuse_sync_fs(struct super_block *sb, int wait)
{
struct fuse_mount *fm = get_fuse_mount_super(sb);
struct fuse_conn *fc = fm->fc;
struct fuse_syncfs_in inarg;
FUSE_ARGS(args);
int err;
/*
* Userspace cannot handle the wait == 0 case. Avoid a
* gratuitous roundtrip.
*/
if (!wait)
return 0;
/* The filesystem is being unmounted. Nothing to do. */
if (!sb->s_root)
return 0;
if (!fc->sync_fs)
return 0;
memset(&inarg, 0, sizeof(inarg));
args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
args.opcode = FUSE_SYNCFS;
args.nodeid = get_node_id(sb->s_root->d_inode);
args.out_numargs = 0;
err = fuse_simple_request(fm, &args);
if (err == -ENOSYS) {
fc->sync_fs = 0;
err = 0;
}
return err;
}
enum { enum {
OPT_SOURCE, OPT_SOURCE,
OPT_SUBTYPE, OPT_SUBTYPE,
...@@ -909,6 +948,7 @@ static const struct super_operations fuse_super_operations = { ...@@ -909,6 +948,7 @@ static const struct super_operations fuse_super_operations = {
.put_super = fuse_put_super, .put_super = fuse_put_super,
.umount_begin = fuse_umount_begin, .umount_begin = fuse_umount_begin,
.statfs = fuse_statfs, .statfs = fuse_statfs,
.sync_fs = fuse_sync_fs,
.show_options = fuse_show_options, .show_options = fuse_show_options,
}; };
......
...@@ -1447,6 +1447,7 @@ static int virtio_fs_get_tree(struct fs_context *fsc) ...@@ -1447,6 +1447,7 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
fc->release = fuse_free_conn; fc->release = fuse_free_conn;
fc->delete_stale = true; fc->delete_stale = true;
fc->auto_submounts = true; fc->auto_submounts = true;
fc->sync_fs = true;
/* Tell FUSE to split requests that exceed the virtqueue's size */ /* Tell FUSE to split requests that exceed the virtqueue's size */
fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit, fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit,
......
...@@ -181,6 +181,9 @@ ...@@ -181,6 +181,9 @@
* - add FUSE_OPEN_KILL_SUIDGID * - add FUSE_OPEN_KILL_SUIDGID
* - extend fuse_setxattr_in, add FUSE_SETXATTR_EXT * - extend fuse_setxattr_in, add FUSE_SETXATTR_EXT
* - add FUSE_SETXATTR_ACL_KILL_SGID * - add FUSE_SETXATTR_ACL_KILL_SGID
*
* 7.34
* - add FUSE_SYNCFS
*/ */
#ifndef _LINUX_FUSE_H #ifndef _LINUX_FUSE_H
...@@ -216,7 +219,7 @@ ...@@ -216,7 +219,7 @@
#define FUSE_KERNEL_VERSION 7 #define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */ /** Minor version number of this interface */
#define FUSE_KERNEL_MINOR_VERSION 33 #define FUSE_KERNEL_MINOR_VERSION 34
/** The node ID of the root inode */ /** The node ID of the root inode */
#define FUSE_ROOT_ID 1 #define FUSE_ROOT_ID 1
...@@ -509,6 +512,7 @@ enum fuse_opcode { ...@@ -509,6 +512,7 @@ enum fuse_opcode {
FUSE_COPY_FILE_RANGE = 47, FUSE_COPY_FILE_RANGE = 47,
FUSE_SETUPMAPPING = 48, FUSE_SETUPMAPPING = 48,
FUSE_REMOVEMAPPING = 49, FUSE_REMOVEMAPPING = 49,
FUSE_SYNCFS = 50,
/* CUSE specific operations */ /* CUSE specific operations */
CUSE_INIT = 4096, CUSE_INIT = 4096,
...@@ -971,4 +975,8 @@ struct fuse_removemapping_one { ...@@ -971,4 +975,8 @@ struct fuse_removemapping_one {
#define FUSE_REMOVEMAPPING_MAX_ENTRY \ #define FUSE_REMOVEMAPPING_MAX_ENTRY \
(PAGE_SIZE / sizeof(struct fuse_removemapping_one)) (PAGE_SIZE / sizeof(struct fuse_removemapping_one))
struct fuse_syncfs_in {
uint64_t padding;
};
#endif /* _LINUX_FUSE_H */ #endif /* _LINUX_FUSE_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment