Commit 7f851936 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ovl-update-6.7' of git://git.kernel.org/pub/scm/linux/kernel/git/overlayfs/vfs

Pull overlayfs updates from Amir Goldstein:

 - Overlayfs aio cleanups and fixes

   Cleanups and minor fixes in preparation for factoring out of
   read/write passthrough code.

 - Overlayfs lock ordering changes

   Hold mnt_writers only throughout copy up instead of a long lived
   elevated refcount.

 - Add support for nesting overlayfs private xattrs

   There are cases where you want to use an overlayfs mount as a
   lowerdir for another overlayfs mount. For example, if the system
   rootfs is on overlayfs due to composefs, or to make it volatile (via
   tmpfs), then you cannot currently store a lowerdir on the rootfs,
   because the inner overlayfs will eat all the whiteouts and overlay
   xattrs. This means you can't e.g. store on the rootfs a prepared
   container image for use with overlayfs.

   This adds support for nesting of overlayfs mounts by escaping the
   problematic features and unescaping them when exposing to the
   overlayfs user.

 - Add new mount options for appending lowerdirs

* tag 'ovl-update-6.7' of git://git.kernel.org/pub/scm/linux/kernel/git/overlayfs/vfs:
  ovl: add support for appending lowerdirs one by one
  ovl: refactor layer parsing helpers
  ovl: store and show the user provided lowerdir mount option
  ovl: remove unused code in lowerdir param parsing
  ovl: Add documentation on nesting of overlayfs mounts
  ovl: Add an alternative type of whiteout
  ovl: Support escaped overlay.* xattrs
  ovl: Add OVL_XATTR_TRUSTED/USER_PREFIX_LEN macros
  ovl: Move xattr support to new xattrs.c file
  ovl: do not encode lower fh with upper sb_writers held
  ovl: do not open/llseek lower file with upper sb_writers held
  ovl: reorder ovl_want_write() after ovl_inode_lock()
  ovl: split ovl_want_write() into two helpers
  ovl: add helper ovl_file_modified()
  ovl: protect copying of realinode attributes to ovl inode
  ovl: punt write aio completion to workqueue
  ovl: propagate IOCB_APPEND flag on writes to realfile
  ovl: use simpler function to convert iocb to rw flags
parents c9d01179 24e16e38
......@@ -344,10 +344,11 @@ escaping the colons with a single backslash. For example:
mount -t overlay overlay -olowerdir=/a\:lower\:\:dir /merged
Since kernel version v6.5, directory names containing colons can also
be provided as lower layer using the fsconfig syscall from new mount api:
Since kernel version v6.8, directory names containing colons can also
be configured as lower layer using the "lowerdir+" mount options and the
fsconfig syscall from new mount api. For example:
fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir", "/a:lower::dir", 0);
fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/a:lower::dir", 0);
In the latter case, colons in lower layer directory names will be escaped
as an octal characters (\072) when displayed in /proc/self/mountinfo.
......@@ -416,6 +417,16 @@ Only the data of the files in the "data-only" lower layers may be visible
when a "metacopy" file in one of the lower layers above it, has a "redirect"
to the absolute path of the "lower data" file in the "data-only" lower layer.
Since kernel version v6.8, "data-only" lower layers can also be added using
the "datadir+" mount options and the fsconfig syscall from new mount api.
For example:
fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/l1", 0);
fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/l2", 0);
fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/l3", 0);
fsconfig(fs_fd, FSCONFIG_SET_STRING, "datadir+", "/do1", 0);
fsconfig(fs_fd, FSCONFIG_SET_STRING, "datadir+", "/do2", 0);
fs-verity support
----------------------
......@@ -504,6 +515,29 @@ directory tree on the same or different underlying filesystem, and even
to a different machine. With the "inodes index" feature, trying to mount
the copied layers will fail the verification of the lower root file handle.
Nesting overlayfs mounts
------------------------
It is possible to use a lower directory that is stored on an overlayfs
mount. For regular files this does not need any special care. However, files
that have overlayfs attributes, such as whiteouts or "overlay.*" xattrs will be
interpreted by the underlying overlayfs mount and stripped out. In order to
allow the second overlayfs mount to see the attributes they must be escaped.
Overlayfs specific xattrs are escaped by using a special prefix of
"overlay.overlay.". So, a file with a "trusted.overlay.overlay.metacopy" xattr
in the lower dir will be exposed as a regular file with a
"trusted.overlay.metacopy" xattr in the overlayfs mount. This can be nested by
repeating the prefix multiple time, as each instance only removes one prefix.
A lower dir with a regular whiteout will always be handled by the overlayfs
mount, so to support storing an effective whiteout file in an overlayfs mount an
alternative form of whiteout is supported. This form is a regular, zero-size
file with the "overlay.whiteout" xattr set, inside a directory with the
"overlay.whiteouts" xattr set. Such whiteouts are never created by overlayfs,
but can be used by userspace tools (like containers) that generate lower layers.
These alternative whiteouts can be escaped using the standard xattr escape
mechanism in order to properly nest to any depth.
Non-standard behavior
---------------------
......
......@@ -6,4 +6,4 @@
obj-$(CONFIG_OVERLAY_FS) += overlay.o
overlay-objs := super.o namei.o util.o inode.o file.o dir.o readdir.o \
copy_up.o export.o params.o
copy_up.o export.o params.o xattrs.o
......@@ -252,7 +252,9 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
return PTR_ERR(old_file);
/* Try to use clone_file_range to clone up within the same fs */
ovl_start_write(dentry);
cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
ovl_end_write(dentry);
if (cloned == len)
goto out_fput;
/* Couldn't clone, so now we try to copy the data */
......@@ -287,8 +289,12 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
* it may not recognize all kind of holes and sometimes
* only skips partial of hole area. However, it will be
* enough for most of the use cases.
*
* We do not hold upper sb_writers throughout the loop to avert
* lockdep warning with llseek of lower file in nested overlay:
* - upper sb_writers
* -- lower ovl_inode_lock (ovl_llseek)
*/
if (skip_hole && data_pos < old_pos) {
data_pos = vfs_llseek(old_file, old_pos, SEEK_DATA);
if (data_pos > old_pos) {
......@@ -303,9 +309,11 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
}
}
ovl_start_write(dentry);
bytes = do_splice_direct(old_file, &old_pos,
new_file, &new_pos,
this_len, SPLICE_F_MOVE);
ovl_end_write(dentry);
if (bytes <= 0) {
error = bytes;
break;
......@@ -426,29 +434,29 @@ struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
return ERR_PTR(err);
}
int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower,
struct dentry *upper)
struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin)
{
const struct ovl_fh *fh = NULL;
int err;
/*
* When lower layer doesn't support export operations store a 'null' fh,
* so we can use the overlay.origin xattr to distignuish between a copy
* up and a pure upper inode.
*/
if (ovl_can_decode_fh(lower->d_sb)) {
fh = ovl_encode_real_fh(ofs, lower, false);
if (IS_ERR(fh))
return PTR_ERR(fh);
}
if (!ovl_can_decode_fh(origin->d_sb))
return NULL;
return ovl_encode_real_fh(ofs, origin, false);
}
int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh,
struct dentry *upper)
{
int err;
/*
* Do not fail when upper doesn't support xattrs.
*/
err = ovl_check_setxattr(ofs, upper, OVL_XATTR_ORIGIN, fh->buf,
fh ? fh->fb.len : 0, 0);
kfree(fh);
/* Ignore -EPERM from setting "user.*" on symlink/special */
return err == -EPERM ? 0 : err;
......@@ -476,7 +484,7 @@ static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper,
*
* Caller must hold i_mutex on indexdir.
*/
static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
static int ovl_create_index(struct dentry *dentry, const struct ovl_fh *fh,
struct dentry *upper)
{
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
......@@ -502,7 +510,7 @@ static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry))))
return -EIO;
err = ovl_get_index_name(ofs, origin, &name);
err = ovl_get_index_name_fh(fh, &name);
if (err)
return err;
......@@ -541,6 +549,7 @@ struct ovl_copy_up_ctx {
struct dentry *destdir;
struct qstr destname;
struct dentry *workdir;
const struct ovl_fh *origin_fh;
bool origin;
bool indexed;
bool metacopy;
......@@ -555,14 +564,16 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
struct inode *udir = d_inode(upperdir);
ovl_start_write(c->dentry);
/* Mark parent "impure" because it may now contain non-pure upper */
err = ovl_set_impure(c->parent, upperdir);
if (err)
return err;
goto out;
err = ovl_set_nlink_lower(c->dentry);
if (err)
return err;
goto out;
inode_lock_nested(udir, I_MUTEX_PARENT);
upper = ovl_lookup_upper(ofs, c->dentry->d_name.name, upperdir,
......@@ -581,10 +592,12 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
}
inode_unlock(udir);
if (err)
return err;
goto out;
err = ovl_set_nlink_upper(c->dentry);
out:
ovl_end_write(c->dentry);
return err;
}
......@@ -637,7 +650,7 @@ static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp)
* hard link.
*/
if (c->origin) {
err = ovl_set_origin(ofs, c->lowerpath.dentry, temp);
err = ovl_set_origin_fh(ofs, c->origin_fh, temp);
if (err)
return err;
}
......@@ -719,21 +732,19 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
.link = c->link
};
/* workdir and destdir could be the same when copying up to indexdir */
err = -EIO;
if (lock_rename(c->workdir, c->destdir) != NULL)
goto unlock;
err = ovl_prep_cu_creds(c->dentry, &cc);
if (err)
goto unlock;
return err;
ovl_start_write(c->dentry);
inode_lock(wdir);
temp = ovl_create_temp(ofs, c->workdir, &cattr);
inode_unlock(wdir);
ovl_end_write(c->dentry);
ovl_revert_cu_creds(&cc);
err = PTR_ERR(temp);
if (IS_ERR(temp))
goto unlock;
return PTR_ERR(temp);
/*
* Copy up data first and then xattrs. Writing data after
......@@ -741,15 +752,28 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
*/
path.dentry = temp;
err = ovl_copy_up_data(c, &path);
if (err)
/*
* We cannot hold lock_rename() throughout this helper, because or
* lock ordering with sb_writers, which shouldn't be held when calling
* ovl_copy_up_data(), so lock workdir and destdir and make sure that
* temp wasn't moved before copy up completion or cleanup.
* If temp was moved, abort without the cleanup.
*/
ovl_start_write(c->dentry);
if (lock_rename(c->workdir, c->destdir) != NULL ||
temp->d_parent != c->workdir) {
err = -EIO;
goto unlock;
} else if (err) {
goto cleanup;
}
err = ovl_copy_up_metadata(c, temp);
if (err)
goto cleanup;
if (S_ISDIR(c->stat.mode) && c->indexed) {
err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp);
err = ovl_create_index(c->dentry, c->origin_fh, temp);
if (err)
goto cleanup;
}
......@@ -779,6 +803,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
ovl_set_flag(OVL_WHITEOUTS, inode);
unlock:
unlock_rename(c->workdir, c->destdir);
ovl_end_write(c->dentry);
return err;
......@@ -802,9 +827,10 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
if (err)
return err;
ovl_start_write(c->dentry);
tmpfile = ovl_do_tmpfile(ofs, c->workdir, c->stat.mode);
ovl_end_write(c->dentry);
ovl_revert_cu_creds(&cc);
if (IS_ERR(tmpfile))
return PTR_ERR(tmpfile);
......@@ -815,9 +841,11 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
goto out_fput;
}
ovl_start_write(c->dentry);
err = ovl_copy_up_metadata(c, temp);
if (err)
goto out_fput;
goto out;
inode_lock_nested(udir, I_MUTEX_PARENT);
......@@ -831,7 +859,7 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
inode_unlock(udir);
if (err)
goto out_fput;
goto out;
if (c->metacopy_digest)
ovl_set_flag(OVL_HAS_DIGEST, d_inode(c->dentry));
......@@ -843,6 +871,8 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
ovl_set_upperdata(d_inode(c->dentry));
ovl_inode_update(d_inode(c->dentry), dget(temp));
out:
ovl_end_write(c->dentry);
out_fput:
fput(tmpfile);
return err;
......@@ -861,6 +891,8 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
{
int err;
struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
struct dentry *origin = c->lowerpath.dentry;
struct ovl_fh *fh = NULL;
bool to_index = false;
/*
......@@ -877,25 +909,35 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
to_index = true;
}
if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index)
if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index) {
fh = ovl_get_origin_fh(ofs, origin);
if (IS_ERR(fh))
return PTR_ERR(fh);
/* origin_fh may be NULL */
c->origin_fh = fh;
c->origin = true;
}
if (to_index) {
c->destdir = ovl_indexdir(c->dentry->d_sb);
err = ovl_get_index_name(ofs, c->lowerpath.dentry, &c->destname);
err = ovl_get_index_name(ofs, origin, &c->destname);
if (err)
return err;
goto out_free_fh;
} else if (WARN_ON(!c->parent)) {
/* Disconnected dentry must be copied up to index dir */
return -EIO;
err = -EIO;
goto out_free_fh;
} else {
/*
* Mark parent "impure" because it may now contain non-pure
* upper
*/
ovl_start_write(c->dentry);
err = ovl_set_impure(c->parent, c->destdir);
ovl_end_write(c->dentry);
if (err)
return err;
goto out_free_fh;
}
/* Should we copyup with O_TMPFILE or with workdir? */
......@@ -909,6 +951,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
if (c->indexed)
ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
ovl_start_write(c->dentry);
if (to_index) {
/* Initialize nlink for copy up of disconnected dentry */
err = ovl_set_nlink_upper(c->dentry);
......@@ -923,10 +966,13 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
ovl_dentry_set_upper_alias(c->dentry);
ovl_dentry_update_reval(c->dentry, ovl_dentry_upper(c->dentry));
}
ovl_end_write(c->dentry);
out:
if (to_index)
kfree(c->destname.name);
out_free_fh:
kfree(fh);
return err;
}
......@@ -1011,15 +1057,16 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
* Writing to upper file will clear security.capability xattr. We
* don't want that to happen for normal copy-up operation.
*/
ovl_start_write(c->dentry);
if (capability) {
err = ovl_do_setxattr(ofs, upperpath.dentry, XATTR_NAME_CAPS,
capability, cap_size, 0);
if (err)
goto out_free;
}
err = ovl_removexattr(ofs, upperpath.dentry, OVL_XATTR_METACOPY);
if (!err) {
err = ovl_removexattr(ofs, upperpath.dentry,
OVL_XATTR_METACOPY);
}
ovl_end_write(c->dentry);
if (err)
goto out_free;
......@@ -1170,17 +1217,10 @@ static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
int ovl_maybe_copy_up(struct dentry *dentry, int flags)
{
int err = 0;
if (ovl_open_need_copy_up(dentry, flags)) {
err = ovl_want_write(dentry);
if (!err) {
err = ovl_copy_up_flags(dentry, flags);
ovl_drop_write(dentry);
}
}
if (!ovl_open_need_copy_up(dentry, flags))
return 0;
return err;
return ovl_copy_up_flags(dentry, flags);
}
int ovl_copy_up_with_data(struct dentry *dentry)
......
......@@ -477,7 +477,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
goto out_unlock;
err = -ESTALE;
if (d_is_negative(upper) || !IS_WHITEOUT(d_inode(upper)))
if (d_is_negative(upper) || !ovl_upper_is_whiteout(ofs, upper))
goto out_dput;
newdentry = ovl_create_temp(ofs, workdir, cattr);
......@@ -559,10 +559,6 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
struct cred *override_cred;
struct dentry *parent = dentry->d_parent;
err = ovl_copy_up(parent);
if (err)
return err;
old_cred = ovl_override_creds(dentry->d_sb);
/*
......@@ -626,6 +622,10 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
.link = link,
};
err = ovl_copy_up(dentry->d_parent);
if (err)
return err;
err = ovl_want_write(dentry);
if (err)
goto out;
......@@ -700,28 +700,24 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
int err;
struct inode *inode;
err = ovl_want_write(old);
err = ovl_copy_up(old);
if (err)
goto out;
err = ovl_copy_up(old);
err = ovl_copy_up(new->d_parent);
if (err)
goto out_drop_write;
goto out;
err = ovl_copy_up(new->d_parent);
err = ovl_nlink_start(old);
if (err)
goto out_drop_write;
goto out;
if (ovl_is_metacopy_dentry(old)) {
err = ovl_set_link_redirect(old);
if (err)
goto out_drop_write;
goto out_nlink_end;
}
err = ovl_nlink_start(old);
if (err)
goto out_drop_write;
inode = d_inode(old);
ihold(inode);
......@@ -731,9 +727,8 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
if (err)
iput(inode);
out_nlink_end:
ovl_nlink_end(old);
out_drop_write:
ovl_drop_write(old);
out:
return err;
}
......@@ -891,17 +886,13 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
goto out;
}
err = ovl_want_write(dentry);
if (err)
goto out;
err = ovl_copy_up(dentry->d_parent);
if (err)
goto out_drop_write;
goto out;
err = ovl_nlink_start(dentry);
if (err)
goto out_drop_write;
goto out;
old_cred = ovl_override_creds(dentry->d_sb);
if (!lower_positive)
......@@ -926,8 +917,6 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
if (ovl_dentry_upper(dentry))
ovl_copyattr(d_inode(dentry));
out_drop_write:
ovl_drop_write(dentry);
out:
ovl_cache_free(&list);
return err;
......@@ -1131,29 +1120,32 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
}
}
err = ovl_want_write(old);
if (err)
goto out;
err = ovl_copy_up(old);
if (err)
goto out_drop_write;
goto out;
err = ovl_copy_up(new->d_parent);
if (err)
goto out_drop_write;
goto out;
if (!overwrite) {
err = ovl_copy_up(new);
if (err)
goto out_drop_write;
goto out;
} else if (d_inode(new)) {
err = ovl_nlink_start(new);
if (err)
goto out_drop_write;
goto out;
update_nlink = true;
}
if (!update_nlink) {
/* ovl_nlink_start() took ovl_want_write() */
err = ovl_want_write(old);
if (err)
goto out;
}
old_cred = ovl_override_creds(old->d_sb);
if (!list_empty(&list)) {
......@@ -1219,7 +1211,7 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
}
} else {
if (!d_is_negative(newdentry)) {
if (!new_opaque || !ovl_is_whiteout(newdentry))
if (!new_opaque || !ovl_upper_is_whiteout(ofs, newdentry))
goto out_dput;
} else {
if (flags & RENAME_EXCHANGE)
......@@ -1286,8 +1278,8 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
revert_creds(old_cred);
if (update_nlink)
ovl_nlink_end(new);
out_drop_write:
ovl_drop_write(old);
else
ovl_drop_write(old);
out:
dput(opaquedir);
ovl_cache_free(&list);
......
......@@ -23,12 +23,7 @@ static int ovl_encode_maybe_copy_up(struct dentry *dentry)
if (ovl_dentry_upper(dentry))
return 0;
err = ovl_want_write(dentry);
if (!err) {
err = ovl_copy_up(dentry);
ovl_drop_write(dentry);
}
err = ovl_copy_up(dentry);
if (err) {
pr_warn_ratelimited("failed to copy up on encode (%pd2, err=%i)\n",
dentry, err);
......
......@@ -15,10 +15,15 @@
#include <linux/fs.h>
#include "overlayfs.h"
#include "../internal.h" /* for sb_init_dio_done_wq */
struct ovl_aio_req {
struct kiocb iocb;
refcount_t ref;
struct kiocb *orig_iocb;
/* used for aio completion */
struct work_struct work;
long res;
};
static struct kmem_cache *ovl_aio_request_cachep;
......@@ -235,6 +240,12 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
return ret;
}
static void ovl_file_modified(struct file *file)
{
/* Update size/mtime */
ovl_copyattr(file_inode(file));
}
static void ovl_file_accessed(struct file *file)
{
struct inode *inode, *upperinode;
......@@ -263,20 +274,12 @@ static void ovl_file_accessed(struct file *file)
touch_atime(&file->f_path);
}
static rwf_t ovl_iocb_to_rwf(int ifl)
#define OVL_IOCB_MASK \
(IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
static rwf_t iocb_to_rw_flags(int flags)
{
rwf_t flags = 0;
if (ifl & IOCB_NOWAIT)
flags |= RWF_NOWAIT;
if (ifl & IOCB_HIPRI)
flags |= RWF_HIPRI;
if (ifl & IOCB_DSYNC)
flags |= RWF_DSYNC;
if (ifl & IOCB_SYNC)
flags |= RWF_SYNC;
return flags;
return (__force rwf_t)(flags & OVL_IOCB_MASK);
}
static inline void ovl_aio_put(struct ovl_aio_req *aio_req)
......@@ -293,10 +296,8 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
struct kiocb *orig_iocb = aio_req->orig_iocb;
if (iocb->ki_flags & IOCB_WRITE) {
struct inode *inode = file_inode(orig_iocb->ki_filp);
kiocb_end_write(iocb);
ovl_copyattr(inode);
ovl_file_modified(orig_iocb->ki_filp);
}
orig_iocb->ki_pos = iocb->ki_pos;
......@@ -313,6 +314,37 @@ static void ovl_aio_rw_complete(struct kiocb *iocb, long res)
orig_iocb->ki_complete(orig_iocb, res);
}
static void ovl_aio_complete_work(struct work_struct *work)
{
struct ovl_aio_req *aio_req = container_of(work,
struct ovl_aio_req, work);
ovl_aio_rw_complete(&aio_req->iocb, aio_req->res);
}
static void ovl_aio_queue_completion(struct kiocb *iocb, long res)
{
struct ovl_aio_req *aio_req = container_of(iocb,
struct ovl_aio_req, iocb);
struct kiocb *orig_iocb = aio_req->orig_iocb;
/*
* Punt to a work queue to serialize updates of mtime/size.
*/
aio_req->res = res;
INIT_WORK(&aio_req->work, ovl_aio_complete_work);
queue_work(file_inode(orig_iocb->ki_filp)->i_sb->s_dio_done_wq,
&aio_req->work);
}
static int ovl_init_aio_done_wq(struct super_block *sb)
{
if (sb->s_dio_done_wq)
return 0;
return sb_init_dio_done_wq(sb);
}
static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
......@@ -334,8 +366,9 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
old_cred = ovl_override_creds(file_inode(file)->i_sb);
if (is_sync_kiocb(iocb)) {
ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
ovl_iocb_to_rwf(iocb->ki_flags));
rwf_t rwf = iocb_to_rw_flags(iocb->ki_flags);
ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, rwf);
} else {
struct ovl_aio_req *aio_req;
......@@ -401,15 +434,20 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
old_cred = ovl_override_creds(file_inode(file)->i_sb);
if (is_sync_kiocb(iocb)) {
rwf_t rwf = iocb_to_rw_flags(ifl);
file_start_write(real.file);
ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
ovl_iocb_to_rwf(ifl));
ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, rwf);
file_end_write(real.file);
/* Update size */
ovl_copyattr(inode);
ovl_file_modified(file);
} else {
struct ovl_aio_req *aio_req;
ret = ovl_init_aio_done_wq(inode->i_sb);
if (ret)
goto out;
ret = -ENOMEM;
aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
if (!aio_req)
......@@ -418,7 +456,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
aio_req->orig_iocb = iocb;
kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
aio_req->iocb.ki_flags = ifl;
aio_req->iocb.ki_complete = ovl_aio_rw_complete;
aio_req->iocb.ki_complete = ovl_aio_queue_completion;
refcount_set(&aio_req->ref, 2);
kiocb_start_write(&aio_req->iocb);
ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
......@@ -492,7 +530,7 @@ static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
file_end_write(real.file);
/* Update size */
ovl_copyattr(inode);
ovl_file_modified(out);
revert_creds(old_cred);
fdput(real);
......@@ -573,7 +611,7 @@ static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len
revert_creds(old_cred);
/* Update size */
ovl_copyattr(inode);
ovl_file_modified(file);
fdput(real);
......@@ -657,7 +695,7 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
revert_creds(old_cred);
/* Update size */
ovl_copyattr(inode_out);
ovl_file_modified(file_out);
fdput(real_in);
fdput(real_out);
......
......@@ -32,10 +32,6 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
if (err)
return err;
err = ovl_want_write(dentry);
if (err)
goto out;
if (attr->ia_valid & ATTR_SIZE) {
/* Truncate should trigger data copy up as well */
full_copy_up = true;
......@@ -54,7 +50,7 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
winode = d_inode(upperdentry);
err = get_write_access(winode);
if (err)
goto out_drop_write;
goto out;
}
if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
......@@ -78,6 +74,10 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
*/
attr->ia_valid &= ~ATTR_OPEN;
err = ovl_want_write(dentry);
if (err)
goto out_put_write;
inode_lock(upperdentry->d_inode);
old_cred = ovl_override_creds(dentry->d_sb);
err = ovl_do_notify_change(ofs, upperdentry, attr);
......@@ -85,12 +85,12 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
if (!err)
ovl_copyattr(dentry->d_inode);
inode_unlock(upperdentry->d_inode);
ovl_drop_write(dentry);
out_put_write:
if (winode)
put_write_access(winode);
}
out_drop_write:
ovl_drop_write(dentry);
out:
return err;
}
......@@ -339,130 +339,6 @@ static const char *ovl_get_link(struct dentry *dentry,
return p;
}
bool ovl_is_private_xattr(struct super_block *sb, const char *name)
{
struct ovl_fs *ofs = OVL_FS(sb);
if (ofs->config.userxattr)
return strncmp(name, OVL_XATTR_USER_PREFIX,
sizeof(OVL_XATTR_USER_PREFIX) - 1) == 0;
else
return strncmp(name, OVL_XATTR_TRUSTED_PREFIX,
sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) == 0;
}
int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
const void *value, size_t size, int flags)
{
int err;
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *upperdentry = ovl_i_dentry_upper(inode);
struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry);
struct path realpath;
const struct cred *old_cred;
err = ovl_want_write(dentry);
if (err)
goto out;
if (!value && !upperdentry) {
ovl_path_lower(dentry, &realpath);
old_cred = ovl_override_creds(dentry->d_sb);
err = vfs_getxattr(mnt_idmap(realpath.mnt), realdentry, name, NULL, 0);
revert_creds(old_cred);
if (err < 0)
goto out_drop_write;
}
if (!upperdentry) {
err = ovl_copy_up(dentry);
if (err)
goto out_drop_write;
realdentry = ovl_dentry_upper(dentry);
}
old_cred = ovl_override_creds(dentry->d_sb);
if (value) {
err = ovl_do_setxattr(ofs, realdentry, name, value, size,
flags);
} else {
WARN_ON(flags != XATTR_REPLACE);
err = ovl_do_removexattr(ofs, realdentry, name);
}
revert_creds(old_cred);
/* copy c/mtime */
ovl_copyattr(inode);
out_drop_write:
ovl_drop_write(dentry);
out:
return err;
}
int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
void *value, size_t size)
{
ssize_t res;
const struct cred *old_cred;
struct path realpath;
ovl_i_path_real(inode, &realpath);
old_cred = ovl_override_creds(dentry->d_sb);
res = vfs_getxattr(mnt_idmap(realpath.mnt), realpath.dentry, name, value, size);
revert_creds(old_cred);
return res;
}
static bool ovl_can_list(struct super_block *sb, const char *s)
{
/* Never list private (.overlay) */
if (ovl_is_private_xattr(sb, s))
return false;
/* List all non-trusted xattrs */
if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
return true;
/* list other trusted for superuser only */
return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
}
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
{
struct dentry *realdentry = ovl_dentry_real(dentry);
ssize_t res;
size_t len;
char *s;
const struct cred *old_cred;
old_cred = ovl_override_creds(dentry->d_sb);
res = vfs_listxattr(realdentry, list, size);
revert_creds(old_cred);
if (res <= 0 || size == 0)
return res;
/* filter out private xattrs */
for (s = list, len = res; len;) {
size_t slen = strnlen(s, len) + 1;
/* underlying fs providing us with an broken xattr list? */
if (WARN_ON(slen > len))
return -EIO;
len -= slen;
if (!ovl_can_list(dentry->d_sb, s)) {
res -= slen;
memmove(s, s + slen, len);
} else {
s += slen;
}
}
return res;
}
#ifdef CONFIG_FS_POSIX_ACL
/*
* Apply the idmapping of the layer to POSIX ACLs. The caller must pass a clone
......@@ -611,10 +487,6 @@ static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode,
struct dentry *upperdentry = ovl_dentry_upper(dentry);
struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry);
err = ovl_want_write(dentry);
if (err)
return err;
/*
* If ACL is to be removed from a lower file, check if it exists in
* the first place before copying it up.
......@@ -630,7 +502,7 @@ static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode,
revert_creds(old_cred);
if (IS_ERR(real_acl)) {
err = PTR_ERR(real_acl);
goto out_drop_write;
goto out;
}
posix_acl_release(real_acl);
}
......@@ -638,23 +510,26 @@ static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode,
if (!upperdentry) {
err = ovl_copy_up(dentry);
if (err)
goto out_drop_write;
goto out;
realdentry = ovl_dentry_upper(dentry);
}
err = ovl_want_write(dentry);
if (err)
goto out;
old_cred = ovl_override_creds(dentry->d_sb);
if (acl)
err = ovl_do_set_acl(ofs, realdentry, acl_name, acl);
else
err = ovl_do_remove_acl(ofs, realdentry, acl_name);
revert_creds(old_cred);
ovl_drop_write(dentry);
/* copy c/mtime */
ovl_copyattr(inode);
out_drop_write:
ovl_drop_write(dentry);
out:
return err;
}
......@@ -778,14 +653,14 @@ int ovl_fileattr_set(struct mnt_idmap *idmap,
unsigned int flags;
int err;
err = ovl_want_write(dentry);
if (err)
goto out;
err = ovl_copy_up(dentry);
if (!err) {
ovl_path_real(dentry, &upperpath);
err = ovl_want_write(dentry);
if (err)
goto out;
old_cred = ovl_override_creds(inode->i_sb);
/*
* Store immutable/append-only flags in xattr and clear them
......@@ -798,6 +673,7 @@ int ovl_fileattr_set(struct mnt_idmap *idmap,
if (!err)
err = ovl_real_fileattr_set(&upperpath, fa);
revert_creds(old_cred);
ovl_drop_write(dentry);
/*
* Merge real inode flags with inode flags read from
......@@ -812,7 +688,6 @@ int ovl_fileattr_set(struct mnt_idmap *idmap,
/* Update ctime */
ovl_copyattr(inode);
}
ovl_drop_write(dentry);
out:
return err;
}
......
......@@ -251,7 +251,10 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
err = -EREMOTE;
goto out_err;
}
if (ovl_is_whiteout(this)) {
path.dentry = this;
path.mnt = d->mnt;
if (ovl_path_is_whiteout(OVL_FS(d->sb), &path)) {
d->stop = d->opaque = true;
goto put_and_out;
}
......@@ -264,8 +267,6 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
goto put_and_out;
}
path.dentry = this;
path.mnt = d->mnt;
if (!d_can_lookup(this)) {
if (d->is_dir || !last_element) {
d->stop = true;
......@@ -438,7 +439,7 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
else if (IS_ERR(origin))
return PTR_ERR(origin);
if (upperdentry && !ovl_is_whiteout(upperdentry) &&
if (upperdentry && !ovl_upper_is_whiteout(ofs, upperdentry) &&
inode_wrong_type(d_inode(upperdentry), d_inode(origin)->i_mode))
goto invalid;
......@@ -507,6 +508,19 @@ static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry,
return err;
}
int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
enum ovl_xattr ox, const struct ovl_fh *fh,
bool is_upper, bool set)
{
int err;
err = ovl_verify_fh(ofs, dentry, ox, fh);
if (set && err == -ENODATA)
err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len);
return err;
}
/*
* Verify that @real dentry matches the file handle stored in xattr @name.
*
......@@ -515,9 +529,9 @@ static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry,
*
* Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
*/
int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
enum ovl_xattr ox, struct dentry *real, bool is_upper,
bool set)
int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry,
enum ovl_xattr ox, struct dentry *real,
bool is_upper, bool set)
{
struct inode *inode;
struct ovl_fh *fh;
......@@ -530,9 +544,7 @@ int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
goto fail;
}
err = ovl_verify_fh(ofs, dentry, ox, fh);
if (set && err == -ENODATA)
err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len);
err = ovl_verify_set_fh(ofs, dentry, ox, fh, is_upper, set);
if (err)
goto fail;
......@@ -548,6 +560,7 @@ int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
goto out;
}
/* Get upper dentry from index */
struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index,
bool connected)
......@@ -684,7 +697,7 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
goto out;
}
static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name)
{
char *n, *s;
......@@ -873,20 +886,27 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry,
struct dentry *lower, struct dentry *upper)
{
const struct ovl_fh *fh;
int err;
if (ovl_check_origin_xattr(ofs, upper))
return 0;
fh = ovl_get_origin_fh(ofs, lower);
if (IS_ERR(fh))
return PTR_ERR(fh);
err = ovl_want_write(dentry);
if (err)
return err;
goto out;
err = ovl_set_origin(ofs, lower, upper);
err = ovl_set_origin_fh(ofs, fh, upper);
if (!err)
err = ovl_set_impure(dentry->d_parent, upper->d_parent);
ovl_drop_write(dentry);
out:
kfree(fh);
return err;
}
......@@ -1383,7 +1403,11 @@ bool ovl_lower_positive(struct dentry *dentry)
break;
}
} else {
positive = !ovl_is_whiteout(this);
struct path path = {
.dentry = this,
.mnt = parentpath->layer->mnt,
};
positive = !ovl_path_is_whiteout(OVL_FS(dentry->d_sb), &path);
done = true;
dput(this);
}
......
......@@ -28,7 +28,16 @@ enum ovl_path_type {
#define OVL_XATTR_NAMESPACE "overlay."
#define OVL_XATTR_TRUSTED_PREFIX XATTR_TRUSTED_PREFIX OVL_XATTR_NAMESPACE
#define OVL_XATTR_TRUSTED_PREFIX_LEN (sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1)
#define OVL_XATTR_USER_PREFIX XATTR_USER_PREFIX OVL_XATTR_NAMESPACE
#define OVL_XATTR_USER_PREFIX_LEN (sizeof(OVL_XATTR_USER_PREFIX) - 1)
#define OVL_XATTR_ESCAPE_PREFIX OVL_XATTR_NAMESPACE
#define OVL_XATTR_ESCAPE_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_PREFIX) - 1)
#define OVL_XATTR_ESCAPE_TRUSTED_PREFIX OVL_XATTR_TRUSTED_PREFIX OVL_XATTR_ESCAPE_PREFIX
#define OVL_XATTR_ESCAPE_TRUSTED_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_TRUSTED_PREFIX) - 1)
#define OVL_XATTR_ESCAPE_USER_PREFIX OVL_XATTR_USER_PREFIX OVL_XATTR_ESCAPE_PREFIX
#define OVL_XATTR_ESCAPE_USER_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_USER_PREFIX) - 1)
enum ovl_xattr {
OVL_XATTR_OPAQUE,
......@@ -40,6 +49,8 @@ enum ovl_xattr {
OVL_XATTR_UUID,
OVL_XATTR_METACOPY,
OVL_XATTR_PROTATTR,
OVL_XATTR_XWHITEOUT,
OVL_XATTR_XWHITEOUTS,
};
enum ovl_inode_flag {
......@@ -398,6 +409,10 @@ static inline bool ovl_open_flags_need_copy_up(int flags)
}
/* util.c */
int ovl_get_write_access(struct dentry *dentry);
void ovl_put_write_access(struct dentry *dentry);
void ovl_start_write(struct dentry *dentry);
void ovl_end_write(struct dentry *dentry);
int ovl_want_write(struct dentry *dentry);
void ovl_drop_write(struct dentry *dentry);
struct dentry *ovl_workdir(struct dentry *dentry);
......@@ -460,6 +475,7 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry);
void ovl_dir_modified(struct dentry *dentry, bool impurity);
u64 ovl_inode_version_get(struct inode *inode);
bool ovl_is_whiteout(struct dentry *dentry);
bool ovl_path_is_whiteout(struct ovl_fs *ofs, const struct path *path);
struct file *ovl_path_open(const struct path *path, int flags);
int ovl_copy_up_start(struct dentry *dentry, int flags);
void ovl_copy_up_end(struct dentry *dentry);
......@@ -467,9 +483,21 @@ bool ovl_already_copied_up(struct dentry *dentry, int flags);
bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
enum ovl_xattr ox);
bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path);
bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path);
bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path);
bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs,
const struct path *upperpath);
static inline bool ovl_upper_is_whiteout(struct ovl_fs *ofs,
struct dentry *upperdentry)
{
struct path upperpath = {
.dentry = upperdentry,
.mnt = ovl_upper_mnt(ofs),
};
return ovl_path_is_whiteout(ofs, &upperpath);
}
static inline bool ovl_check_origin_xattr(struct ovl_fs *ofs,
struct dentry *upperdentry)
{
......@@ -624,11 +652,15 @@ struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
struct dentry *upperdentry, struct ovl_path **stackp);
int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
enum ovl_xattr ox, struct dentry *real, bool is_upper,
bool set);
enum ovl_xattr ox, const struct ovl_fh *fh,
bool is_upper, bool set);
int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry,
enum ovl_xattr ox, struct dentry *real,
bool is_upper, bool set);
struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index,
bool connected);
int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index);
int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name);
int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
struct qstr *name);
struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh);
......@@ -640,17 +672,24 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags);
bool ovl_lower_positive(struct dentry *dentry);
static inline int ovl_verify_origin_fh(struct ovl_fs *ofs, struct dentry *upper,
const struct ovl_fh *fh, bool set)
{
return ovl_verify_set_fh(ofs, upper, OVL_XATTR_ORIGIN, fh, false, set);
}
static inline int ovl_verify_origin(struct ovl_fs *ofs, struct dentry *upper,
struct dentry *origin, bool set)
{
return ovl_verify_set_fh(ofs, upper, OVL_XATTR_ORIGIN, origin,
false, set);
return ovl_verify_origin_xattr(ofs, upper, OVL_XATTR_ORIGIN, origin,
false, set);
}
static inline int ovl_verify_upper(struct ovl_fs *ofs, struct dentry *index,
struct dentry *upper, bool set)
{
return ovl_verify_set_fh(ofs, index, OVL_XATTR_UPPER, upper, true, set);
return ovl_verify_origin_xattr(ofs, index, OVL_XATTR_UPPER, upper,
true, set);
}
/* readdir.c */
......@@ -684,17 +723,8 @@ int ovl_set_nlink_lower(struct dentry *dentry);
unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry,
struct dentry *upperdentry,
unsigned int fallback);
int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr);
int ovl_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags);
int ovl_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask);
int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
const void *value, size_t size, int flags);
int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
void *value, size_t size);
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
#ifdef CONFIG_FS_POSIX_ACL
struct posix_acl *do_ovl_get_acl(struct mnt_idmap *idmap,
......@@ -815,8 +845,9 @@ int ovl_copy_xattr(struct super_block *sb, const struct path *path, struct dentr
int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upper, struct kstat *stat);
struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
bool is_upper);
int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower,
struct dentry *upper);
struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin);
int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh,
struct dentry *upper);
/* export.c */
extern const struct export_operations ovl_export_operations;
......@@ -830,3 +861,12 @@ static inline bool ovl_force_readonly(struct ovl_fs *ofs)
{
return (!ovl_upper_mnt(ofs) || !ofs->workdir);
}
/* xattr.c */
const struct xattr_handler * const *ovl_xattr_handlers(struct ovl_fs *ofs);
int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr);
int ovl_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags);
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
......@@ -43,8 +43,10 @@ module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
MODULE_PARM_DESC(metacopy,
"Default to on or off for the metadata only copy up feature");
enum {
enum ovl_opt {
Opt_lowerdir,
Opt_lowerdir_add,
Opt_datadir_add,
Opt_upperdir,
Opt_workdir,
Opt_default_permissions,
......@@ -140,8 +142,11 @@ static int ovl_verity_mode_def(void)
#define fsparam_string_empty(NAME, OPT) \
__fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL)
const struct fs_parameter_spec ovl_parameter_spec[] = {
fsparam_string_empty("lowerdir", Opt_lowerdir),
fsparam_string("lowerdir+", Opt_lowerdir_add),
fsparam_string("datadir+", Opt_datadir_add),
fsparam_string("upperdir", Opt_upperdir),
fsparam_string("workdir", Opt_workdir),
fsparam_flag("default_permissions", Opt_default_permissions),
......@@ -238,19 +243,8 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path)
pr_err("failed to resolve '%s': %i\n", name, err);
goto out;
}
err = -EINVAL;
if (ovl_dentry_weird(path->dentry)) {
pr_err("filesystem on '%s' not supported\n", name);
goto out_put;
}
if (!d_is_dir(path->dentry)) {
pr_err("'%s' not a directory\n", name);
goto out_put;
}
return 0;
out_put:
path_put_init(path);
out:
return err;
}
......@@ -268,7 +262,7 @@ static void ovl_unescape(char *s)
}
}
static int ovl_mount_dir(const char *name, struct path *path, bool upper)
static int ovl_mount_dir(const char *name, struct path *path)
{
int err = -ENOMEM;
char *tmp = kstrdup(name, GFP_KERNEL);
......@@ -276,68 +270,147 @@ static int ovl_mount_dir(const char *name, struct path *path, bool upper)
if (tmp) {
ovl_unescape(tmp);
err = ovl_mount_dir_noesc(tmp, path);
if (!err && upper && path->dentry->d_flags & DCACHE_OP_REAL) {
pr_err("filesystem on '%s' not supported as upperdir\n",
tmp);
path_put_init(path);
err = -EINVAL;
}
kfree(tmp);
}
return err;
}
static int ovl_parse_param_upperdir(const char *name, struct fs_context *fc,
bool workdir)
static int ovl_mount_dir_check(struct fs_context *fc, const struct path *path,
enum ovl_opt layer, const char *name, bool upper)
{
int err;
struct ovl_fs *ofs = fc->s_fs_info;
struct ovl_config *config = &ofs->config;
struct ovl_fs_context *ctx = fc->fs_private;
struct path path;
char *dup;
err = ovl_mount_dir(name, &path, true);
if (err)
return err;
if (ovl_dentry_weird(path->dentry))
return invalfc(fc, "filesystem on %s not supported", name);
if (!d_is_dir(path->dentry))
return invalfc(fc, "%s is not a directory", name);
/*
* Check whether upper path is read-only here to report failures
* early. Don't forget to recheck when the superblock is created
* as the mount attributes could change.
*/
if (__mnt_is_readonly(path.mnt)) {
path_put(&path);
return -EINVAL;
if (upper) {
if (path->dentry->d_flags & DCACHE_OP_REAL)
return invalfc(fc, "filesystem on %s not supported as upperdir", name);
if (__mnt_is_readonly(path->mnt))
return invalfc(fc, "filesystem on %s is read-only", name);
} else {
if (ctx->lowerdir_all && layer != Opt_lowerdir)
return invalfc(fc, "lowerdir+ and datadir+ cannot follow lowerdir");
if (ctx->nr_data && layer == Opt_lowerdir_add)
return invalfc(fc, "regular lower layers cannot follow data layers");
if (ctx->nr == OVL_MAX_STACK)
return invalfc(fc, "too many lower directories, limit is %d",
OVL_MAX_STACK);
}
return 0;
}
dup = kstrdup(name, GFP_KERNEL);
if (!dup) {
path_put(&path);
static int ovl_ctx_realloc_lower(struct fs_context *fc)
{
struct ovl_fs_context *ctx = fc->fs_private;
struct ovl_fs_context_layer *l;
size_t nr;
if (ctx->nr < ctx->capacity)
return 0;
nr = min_t(size_t, max(4096 / sizeof(*l), ctx->capacity * 2),
OVL_MAX_STACK);
l = krealloc_array(ctx->lower, nr, sizeof(*l), GFP_KERNEL_ACCOUNT);
if (!l)
return -ENOMEM;
ctx->lower = l;
ctx->capacity = nr;
return 0;
}
static void ovl_add_layer(struct fs_context *fc, enum ovl_opt layer,
struct path *path, char **pname)
{
struct ovl_fs *ofs = fc->s_fs_info;
struct ovl_config *config = &ofs->config;
struct ovl_fs_context *ctx = fc->fs_private;
struct ovl_fs_context_layer *l;
switch (layer) {
case Opt_workdir:
swap(config->workdir, *pname);
swap(ctx->work, *path);
break;
case Opt_upperdir:
swap(config->upperdir, *pname);
swap(ctx->upper, *path);
break;
case Opt_datadir_add:
ctx->nr_data++;
fallthrough;
case Opt_lowerdir_add:
WARN_ON(ctx->nr >= ctx->capacity);
l = &ctx->lower[ctx->nr++];
memset(l, 0, sizeof(*l));
swap(l->name, *pname);
swap(l->path, *path);
break;
default:
WARN_ON(1);
}
}
if (workdir) {
kfree(config->workdir);
config->workdir = dup;
path_put(&ctx->work);
ctx->work = path;
} else {
kfree(config->upperdir);
config->upperdir = dup;
path_put(&ctx->upper);
ctx->upper = path;
static int ovl_parse_layer(struct fs_context *fc, struct fs_parameter *param,
enum ovl_opt layer)
{
char *name = kstrdup(param->string, GFP_KERNEL);
bool upper = (layer == Opt_upperdir || layer == Opt_workdir);
struct path path;
int err;
if (!name)
return -ENOMEM;
if (upper)
err = ovl_mount_dir(name, &path);
else
err = ovl_mount_dir_noesc(name, &path);
if (err)
goto out_free;
err = ovl_mount_dir_check(fc, &path, layer, name, upper);
if (err)
goto out_put;
if (!upper) {
err = ovl_ctx_realloc_lower(fc);
if (err)
goto out_put;
}
return 0;
/* Store the user provided path string in ctx to show in mountinfo */
ovl_add_layer(fc, layer, &path, &name);
out_put:
path_put(&path);
out_free:
kfree(name);
return err;
}
static void ovl_parse_param_drop_lowerdir(struct ovl_fs_context *ctx)
static void ovl_reset_lowerdirs(struct ovl_fs_context *ctx)
{
for (size_t nr = 0; nr < ctx->nr; nr++) {
path_put(&ctx->lower[nr].path);
kfree(ctx->lower[nr].name);
ctx->lower[nr].name = NULL;
struct ovl_fs_context_layer *l = ctx->lower;
// Reset old user provided lowerdir string
kfree(ctx->lowerdir_all);
ctx->lowerdir_all = NULL;
for (size_t nr = 0; nr < ctx->nr; nr++, l++) {
path_put(&l->path);
kfree(l->name);
l->name = NULL;
}
ctx->nr = 0;
ctx->nr_data = 0;
......@@ -346,7 +419,7 @@ static void ovl_parse_param_drop_lowerdir(struct ovl_fs_context *ctx)
/*
* Parse lowerdir= mount option:
*
* (1) lowerdir=/lower1:/lower2:/lower3::/data1::/data2
* e.g.: lowerdir=/lower1:/lower2:/lower3::/data1::/data2
* Set "/lower1", "/lower2", and "/lower3" as lower layers and
* "/data1" and "/data2" as data lower layers. Any existing lower
* layers are replaced.
......@@ -356,9 +429,9 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
int err;
struct ovl_fs_context *ctx = fc->fs_private;
struct ovl_fs_context_layer *l;
char *dup = NULL, *dup_iter;
char *dup = NULL, *iter;
ssize_t nr_lower = 0, nr = 0, nr_data = 0;
bool append = false, data_layer = false;
bool data_layer = false;
/*
* Ensure we're backwards compatible with mount(2)
......@@ -366,16 +439,21 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
*/
/* drop all existing lower layers */
if (!*name) {
ovl_parse_param_drop_lowerdir(ctx);
ovl_reset_lowerdirs(ctx);
if (!*name)
return 0;
}
if (*name == ':') {
pr_err("cannot append lower layer");
return -EINVAL;
}
// Store user provided lowerdir string to show in mount options
ctx->lowerdir_all = kstrdup(name, GFP_KERNEL);
if (!ctx->lowerdir_all)
return -ENOMEM;
dup = kstrdup(name, GFP_KERNEL);
if (!dup)
return -ENOMEM;
......@@ -385,36 +463,11 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
if (nr_lower < 0)
goto out_err;
if ((nr_lower > OVL_MAX_STACK) ||
(append && (size_add(ctx->nr, nr_lower) > OVL_MAX_STACK))) {
if (nr_lower > OVL_MAX_STACK) {
pr_err("too many lower directories, limit is %d\n", OVL_MAX_STACK);
goto out_err;
}
if (!append)
ovl_parse_param_drop_lowerdir(ctx);
/*
* (1) append
*
* We want nr <= nr_lower <= capacity We know nr > 0 and nr <=
* capacity. If nr == 0 this wouldn't be append. If nr +
* nr_lower is <= capacity then nr <= nr_lower <= capacity
* already holds. If nr + nr_lower exceeds capacity, we realloc.
*
* (2) replace
*
* Ensure we're backwards compatible with mount(2) which allows
* "lowerdir=/a:/b:/c,lowerdir=/d:/e:/f" causing the last
* specified lowerdir mount option to win.
*
* We want nr <= nr_lower <= capacity We know either (i) nr == 0
* or (ii) nr > 0. We also know nr_lower > 0. The capacity
* could've been changed multiple times already so we only know
* nr <= capacity. If nr + nr_lower > capacity we realloc,
* otherwise nr <= nr_lower <= capacity holds already.
*/
nr_lower += ctx->nr;
if (nr_lower > ctx->capacity) {
err = -ENOMEM;
l = krealloc_array(ctx->lower, nr_lower, sizeof(*ctx->lower),
......@@ -426,41 +479,21 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
ctx->capacity = nr_lower;
}
/*
* (3) By (1) and (2) we know nr <= nr_lower <= capacity.
* (4) If ctx->nr == 0 => replace
* We have verified above that the lowerdir mount option
* isn't an append, i.e., the lowerdir mount option
* doesn't start with ":" or "::".
* (4.1) The lowerdir mount options only contains regular lower
* layers ":".
* => Nothing to verify.
* (4.2) The lowerdir mount options contains regular ":" and
* data "::" layers.
* => We need to verify that data lower layers "::" aren't
* followed by regular ":" lower layers
* (5) If ctx->nr > 0 => append
* We know that there's at least one regular layer
* otherwise we would've failed when parsing the previous
* lowerdir mount option.
* (5.1) The lowerdir mount option is a regular layer ":" append
* => We need to verify that no data layers have been
* specified before.
* (5.2) The lowerdir mount option is a data layer "::" append
* We know that there's at least one regular layer or
* other data layers. => There's nothing to verify.
*/
dup_iter = dup;
for (nr = ctx->nr; nr < nr_lower; nr++) {
l = &ctx->lower[nr];
iter = dup;
l = ctx->lower;
for (nr = 0; nr < nr_lower; nr++, l++) {
memset(l, 0, sizeof(*l));
err = ovl_mount_dir(dup_iter, &l->path, false);
err = ovl_mount_dir(iter, &l->path);
if (err)
goto out_put;
err = ovl_mount_dir_check(fc, &l->path, Opt_lowerdir, iter, false);
if (err)
goto out_put;
err = -ENOMEM;
l->name = kstrdup(dup_iter, GFP_KERNEL_ACCOUNT);
l->name = kstrdup(iter, GFP_KERNEL_ACCOUNT);
if (!l->name)
goto out_put;
......@@ -472,8 +505,8 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
break;
err = -EINVAL;
dup_iter = strchr(dup_iter, '\0') + 1;
if (*dup_iter) {
iter = strchr(iter, '\0') + 1;
if (*iter) {
/*
* This is a regular layer so we require that
* there are no data layers.
......@@ -489,7 +522,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
/* This is a data lower layer. */
data_layer = true;
dup_iter++;
iter++;
}
ctx->nr = nr_lower;
ctx->nr_data += nr_data;
......@@ -497,21 +530,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
return 0;
out_put:
/*
* We know nr >= ctx->nr < nr_lower. If we failed somewhere
* we want to undo until nr == ctx->nr. This is correct for
* both ctx->nr == 0 and ctx->nr > 0.
*/
for (; nr >= ctx->nr; nr--) {
l = &ctx->lower[nr];
kfree(l->name);
l->name = NULL;
path_put(&l->path);
/* don't overflow */
if (nr == 0)
break;
}
ovl_reset_lowerdirs(ctx);
out_err:
kfree(dup);
......@@ -556,11 +575,11 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param)
case Opt_lowerdir:
err = ovl_parse_param_lowerdir(param->string, fc);
break;
case Opt_lowerdir_add:
case Opt_datadir_add:
case Opt_upperdir:
fallthrough;
case Opt_workdir:
err = ovl_parse_param_upperdir(param->string, fc,
(Opt_workdir == opt));
err = ovl_parse_layer(fc, param, opt);
break;
case Opt_default_permissions:
config->default_permissions = true;
......@@ -617,7 +636,7 @@ static int ovl_get_tree(struct fs_context *fc)
static inline void ovl_fs_context_free(struct ovl_fs_context *ctx)
{
ovl_parse_param_drop_lowerdir(ctx);
ovl_reset_lowerdirs(ctx);
path_put(&ctx->upper);
path_put(&ctx->work);
kfree(ctx->lower);
......@@ -933,23 +952,28 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry)
{
struct super_block *sb = dentry->d_sb;
struct ovl_fs *ofs = OVL_FS(sb);
size_t nr, nr_merged_lower = ofs->numlayer - ofs->numdatalayer;
size_t nr, nr_merged_lower, nr_lower = 0;
char **lowerdirs = ofs->config.lowerdirs;
/*
* lowerdirs[] starts from offset 1, then
* >= 0 regular lower layers prefixed with : and
* >= 0 data-only lower layers prefixed with ::
*
* we need to escase comma and space like seq_show_option() does and
* we also need to escape the colon separator from lowerdir paths.
* lowerdirs[0] holds the colon separated list that user provided
* with lowerdir mount option.
* lowerdirs[1..numlayer] hold the lowerdir paths that were added
* using the lowerdir+ and datadir+ mount options.
* For now, we do not allow mixing the legacy lowerdir mount option
* with the new lowerdir+ and datadir+ mount options.
*/
seq_puts(m, ",lowerdir=");
for (nr = 1; nr < ofs->numlayer; nr++) {
if (nr > 1)
seq_putc(m, ':');
if (nr >= nr_merged_lower)
seq_putc(m, ':');
seq_escape(m, ofs->config.lowerdirs[nr], ":, \t\n\\");
if (lowerdirs[0]) {
seq_show_option(m, "lowerdir", lowerdirs[0]);
} else {
nr_lower = ofs->numlayer;
nr_merged_lower = nr_lower - ofs->numdatalayer;
}
for (nr = 1; nr < nr_lower; nr++) {
if (nr < nr_merged_lower)
seq_show_option(m, "lowerdir+", lowerdirs[nr]);
else
seq_show_option(m, "datadir+", lowerdirs[nr]);
}
if (ofs->config.upperdir) {
seq_show_option(m, "upperdir", ofs->config.upperdir);
......
......@@ -32,6 +32,7 @@ struct ovl_fs_context {
size_t nr_data;
struct ovl_opt_set set;
struct ovl_fs_context_layer *lower;
char *lowerdir_all; /* user provided lowerdir string */
};
int ovl_init_fs_context(struct fs_context *fc);
......
......@@ -25,6 +25,7 @@ struct ovl_cache_entry {
struct ovl_cache_entry *next_maybe_whiteout;
bool is_upper;
bool is_whiteout;
bool check_xwhiteout;
char name[];
};
......@@ -47,6 +48,7 @@ struct ovl_readdir_data {
int err;
bool is_upper;
bool d_type_supported;
bool in_xwhiteouts_dir;
};
struct ovl_dir_file {
......@@ -162,6 +164,8 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
p->ino = 0;
p->is_upper = rdd->is_upper;
p->is_whiteout = false;
/* Defer check for overlay.whiteout to ovl_iterate() */
p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG;
if (d_type == DT_CHR) {
p->next_maybe_whiteout = rdd->first_maybe_whiteout;
......@@ -301,6 +305,8 @@ static inline int ovl_dir_read(const struct path *realpath,
if (IS_ERR(realfile))
return PTR_ERR(realfile);
rdd->in_xwhiteouts_dir = rdd->dentry &&
ovl_path_check_xwhiteouts_xattr(OVL_FS(rdd->dentry->d_sb), realpath);
rdd->first_maybe_whiteout = NULL;
rdd->ctx.pos = 0;
do {
......@@ -447,7 +453,7 @@ static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
}
/*
* Set d_ino for upper entries. Non-upper entries should always report
* Set d_ino for upper entries if needed. Non-upper entries should always report
* the uppermost real inode ino and should not call this function.
*
* When not all layer are on same fs, report real ino also for upper.
......@@ -455,8 +461,11 @@ static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
* When all layers are on the same fs, and upper has a reference to
* copy up origin, call vfs_getattr() on the overlay entry to make
* sure that d_ino will be consistent with st_ino from stat(2).
*
* Also checks the overlay.whiteout xattr by doing a full lookup which will return
* negative in this case.
*/
static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry *p)
static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p, bool update_ino)
{
struct dentry *dir = path->dentry;
......@@ -467,7 +476,7 @@ static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry
int xinobits = ovl_xino_bits(ofs);
int err = 0;
if (!ovl_same_dev(ofs))
if (!ovl_same_dev(ofs) && !p->check_xwhiteout)
goto out;
if (p->name[0] == '.') {
......@@ -481,6 +490,7 @@ static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry
goto get;
}
}
/* This checks also for xwhiteouts */
this = lookup_one(mnt_idmap(path->mnt), p->name, dir, p->len);
if (IS_ERR_OR_NULL(this) || !this->d_inode) {
/* Mark a stale entry */
......@@ -494,6 +504,9 @@ static int ovl_cache_update_ino(const struct path *path, struct ovl_cache_entry
}
get:
if (!ovl_same_dev(ofs) || !update_ino)
goto out;
type = ovl_path_type(this);
if (OVL_TYPE_ORIGIN(type)) {
struct kstat stat;
......@@ -572,7 +585,7 @@ static int ovl_dir_read_impure(const struct path *path, struct list_head *list,
list_for_each_entry_safe(p, n, list, l_node) {
if (strcmp(p->name, ".") != 0 &&
strcmp(p->name, "..") != 0) {
err = ovl_cache_update_ino(path, p);
err = ovl_cache_update(path, p, true);
if (err)
return err;
}
......@@ -778,13 +791,13 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx)
while (od->cursor != &od->cache->entries) {
p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
if (!p->is_whiteout) {
if (!p->ino) {
err = ovl_cache_update_ino(&file->f_path, p);
if (!p->ino || p->check_xwhiteout) {
err = ovl_cache_update(&file->f_path, p, !p->ino);
if (err)
goto out;
}
}
/* ovl_cache_update_ino() sets is_whiteout on stale entry */
/* ovl_cache_update() sets is_whiteout on stale entry */
if (!p->is_whiteout) {
if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
break;
......
......@@ -445,68 +445,6 @@ static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
return ok;
}
static int ovl_own_xattr_get(const struct xattr_handler *handler,
struct dentry *dentry, struct inode *inode,
const char *name, void *buffer, size_t size)
{
return -EOPNOTSUPP;
}
static int ovl_own_xattr_set(const struct xattr_handler *handler,
struct mnt_idmap *idmap,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
{
return -EOPNOTSUPP;
}
static int ovl_other_xattr_get(const struct xattr_handler *handler,
struct dentry *dentry, struct inode *inode,
const char *name, void *buffer, size_t size)
{
return ovl_xattr_get(dentry, inode, name, buffer, size);
}
static int ovl_other_xattr_set(const struct xattr_handler *handler,
struct mnt_idmap *idmap,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
{
return ovl_xattr_set(dentry, inode, name, value, size, flags);
}
static const struct xattr_handler ovl_own_trusted_xattr_handler = {
.prefix = OVL_XATTR_TRUSTED_PREFIX,
.get = ovl_own_xattr_get,
.set = ovl_own_xattr_set,
};
static const struct xattr_handler ovl_own_user_xattr_handler = {
.prefix = OVL_XATTR_USER_PREFIX,
.get = ovl_own_xattr_get,
.set = ovl_own_xattr_set,
};
static const struct xattr_handler ovl_other_xattr_handler = {
.prefix = "", /* catch all */
.get = ovl_other_xattr_get,
.set = ovl_other_xattr_set,
};
static const struct xattr_handler * const ovl_trusted_xattr_handlers[] = {
&ovl_own_trusted_xattr_handler,
&ovl_other_xattr_handler,
NULL
};
static const struct xattr_handler * const ovl_user_xattr_handlers[] = {
&ovl_own_user_xattr_handler,
&ovl_other_xattr_handler,
NULL
};
static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
struct inode **ptrap, const char *name)
{
......@@ -647,7 +585,7 @@ static int ovl_check_rename_whiteout(struct ovl_fs *ofs)
if (IS_ERR(whiteout))
goto cleanup_temp;
err = ovl_is_whiteout(whiteout);
err = ovl_upper_is_whiteout(ofs, whiteout);
/* Best effort cleanup of whiteout and temp file */
if (err)
......@@ -887,15 +825,20 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
{
struct vfsmount *mnt = ovl_upper_mnt(ofs);
struct dentry *indexdir;
struct dentry *origin = ovl_lowerstack(oe)->dentry;
const struct ovl_fh *fh;
int err;
fh = ovl_get_origin_fh(ofs, origin);
if (IS_ERR(fh))
return PTR_ERR(fh);
err = mnt_want_write(mnt);
if (err)
return err;
goto out_free_fh;
/* Verify lower root is upper root origin */
err = ovl_verify_origin(ofs, upperpath->dentry,
ovl_lowerstack(oe)->dentry, true);
err = ovl_verify_origin_fh(ofs, upperpath->dentry, fh, true);
if (err) {
pr_err("failed to verify upper root origin\n");
goto out;
......@@ -927,9 +870,10 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
* directory entries.
*/
if (ovl_check_origin_xattr(ofs, ofs->indexdir)) {
err = ovl_verify_set_fh(ofs, ofs->indexdir,
OVL_XATTR_ORIGIN,
upperpath->dentry, true, false);
err = ovl_verify_origin_xattr(ofs, ofs->indexdir,
OVL_XATTR_ORIGIN,
upperpath->dentry, true,
false);
if (err)
pr_err("failed to verify index dir 'origin' xattr\n");
}
......@@ -947,6 +891,8 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
out:
mnt_drop_write(mnt);
out_free_fh:
kfree(fh);
return err;
}
......@@ -1382,8 +1328,11 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc)
ofs->layers = layers;
/*
* Layer 0 is reserved for upper even if there's no upper.
* For consistency, config.lowerdirs[0] is NULL.
* config.lowerdirs[0] is used for storing the user provided colon
* separated lowerdir string.
*/
ofs->config.lowerdirs[0] = ctx->lowerdir_all;
ctx->lowerdir_all = NULL;
ofs->numlayer = 1;
sb->s_stack_depth = 0;
......@@ -1493,8 +1442,7 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc)
cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
sb->s_magic = OVERLAYFS_SUPER_MAGIC;
sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers :
ovl_trusted_xattr_handlers;
sb->s_xattr = ovl_xattr_handlers(ofs);
sb->s_fs_info = ofs;
#ifdef CONFIG_FS_POSIX_ACL
sb->s_flags |= SB_POSIXACL;
......
......@@ -17,12 +17,38 @@
#include <linux/ratelimit.h>
#include "overlayfs.h"
/* Get write access to upper mnt - may fail if upper sb was remounted ro */
int ovl_get_write_access(struct dentry *dentry)
{
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
return mnt_get_write_access(ovl_upper_mnt(ofs));
}
/* Get write access to upper sb - may block if upper sb is frozen */
void ovl_start_write(struct dentry *dentry)
{
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
sb_start_write(ovl_upper_mnt(ofs)->mnt_sb);
}
int ovl_want_write(struct dentry *dentry)
{
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
return mnt_want_write(ovl_upper_mnt(ofs));
}
void ovl_put_write_access(struct dentry *dentry)
{
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
mnt_put_write_access(ovl_upper_mnt(ofs));
}
void ovl_end_write(struct dentry *dentry)
{
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
sb_end_write(ovl_upper_mnt(ofs)->mnt_sb);
}
void ovl_drop_write(struct dentry *dentry)
{
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
......@@ -575,6 +601,16 @@ bool ovl_is_whiteout(struct dentry *dentry)
return inode && IS_WHITEOUT(inode);
}
/*
* Use this over ovl_is_whiteout for upper and lower files, as it also
* handles overlay.whiteout xattr whiteout files.
*/
bool ovl_path_is_whiteout(struct ovl_fs *ofs, const struct path *path)
{
return ovl_is_whiteout(path->dentry) ||
ovl_path_check_xwhiteout_xattr(ofs, path);
}
struct file *ovl_path_open(const struct path *path, int flags)
{
struct inode *inode = d_inode(path->dentry);
......@@ -644,22 +680,36 @@ bool ovl_already_copied_up(struct dentry *dentry, int flags)
return false;
}
/*
* The copy up "transaction" keeps an elevated mnt write count on upper mnt,
* but leaves taking freeze protection on upper sb to lower level helpers.
*/
int ovl_copy_up_start(struct dentry *dentry, int flags)
{
struct inode *inode = d_inode(dentry);
int err;
err = ovl_inode_lock_interruptible(inode);
if (!err && ovl_already_copied_up_locked(dentry, flags)) {
if (err)
return err;
if (ovl_already_copied_up_locked(dentry, flags))
err = 1; /* Already copied up */
ovl_inode_unlock(inode);
}
else
err = ovl_get_write_access(dentry);
if (err)
goto out_unlock;
return 0;
out_unlock:
ovl_inode_unlock(inode);
return err;
}
void ovl_copy_up_end(struct dentry *dentry)
{
ovl_put_write_access(dentry);
ovl_inode_unlock(d_inode(dentry));
}
......@@ -676,6 +726,32 @@ bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path)
return false;
}
bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path)
{
struct dentry *dentry = path->dentry;
int res;
/* xattr.whiteout must be a zero size regular file */
if (!d_is_reg(dentry) || i_size_read(d_inode(dentry)) != 0)
return false;
res = ovl_path_getxattr(ofs, path, OVL_XATTR_XWHITEOUT, NULL, 0);
return res >= 0;
}
bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path)
{
struct dentry *dentry = path->dentry;
int res;
/* xattr.whiteouts must be a directory */
if (!d_is_dir(dentry))
return false;
res = ovl_path_getxattr(ofs, path, OVL_XATTR_XWHITEOUTS, NULL, 0);
return res >= 0;
}
/*
* Load persistent uuid from xattr into s_uuid if found, or store a new
* random generated value in s_uuid and in xattr.
......@@ -760,6 +836,8 @@ bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
#define OVL_XATTR_UUID_POSTFIX "uuid"
#define OVL_XATTR_METACOPY_POSTFIX "metacopy"
#define OVL_XATTR_PROTATTR_POSTFIX "protattr"
#define OVL_XATTR_XWHITEOUT_POSTFIX "whiteout"
#define OVL_XATTR_XWHITEOUTS_POSTFIX "whiteouts"
#define OVL_XATTR_TAB_ENTRY(x) \
[x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \
......@@ -775,6 +853,8 @@ const char *const ovl_xattr_table[][2] = {
OVL_XATTR_TAB_ENTRY(OVL_XATTR_UUID),
OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY),
OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR),
OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUT),
OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUTS),
};
int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry,
......@@ -973,12 +1053,18 @@ static void ovl_cleanup_index(struct dentry *dentry)
struct dentry *index = NULL;
struct inode *inode;
struct qstr name = { };
bool got_write = false;
int err;
err = ovl_get_index_name(ofs, lowerdentry, &name);
if (err)
goto fail;
err = ovl_want_write(dentry);
if (err)
goto fail;
got_write = true;
inode = d_inode(upperdentry);
if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) {
pr_warn_ratelimited("cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
......@@ -1016,6 +1102,8 @@ static void ovl_cleanup_index(struct dentry *dentry)
goto fail;
out:
if (got_write)
ovl_drop_write(dentry);
kfree(name.name);
dput(index);
return;
......@@ -1062,8 +1150,12 @@ int ovl_nlink_start(struct dentry *dentry)
if (err)
return err;
err = ovl_want_write(dentry);
if (err)
goto out_unlock;
if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, inode))
goto out;
return 0;
old_cred = ovl_override_creds(dentry->d_sb);
/*
......@@ -1074,10 +1166,15 @@ int ovl_nlink_start(struct dentry *dentry)
*/
err = ovl_set_nlink_upper(dentry);
revert_creds(old_cred);
out:
if (err)
ovl_inode_unlock(inode);
goto out_drop_write;
return 0;
out_drop_write:
ovl_drop_write(dentry);
out_unlock:
ovl_inode_unlock(inode);
return err;
}
......@@ -1086,6 +1183,8 @@ void ovl_nlink_end(struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
ovl_drop_write(dentry);
if (ovl_test_flag(OVL_INDEX, inode) && inode->i_nlink == 0) {
const struct cred *old_cred;
......@@ -1403,6 +1502,7 @@ void ovl_copyattr(struct inode *inode)
realinode = ovl_i_path_real(inode, &realpath);
real_idmap = mnt_idmap(realpath.mnt);
spin_lock(&inode->i_lock);
vfsuid = i_uid_into_vfsuid(real_idmap, realinode);
vfsgid = i_gid_into_vfsgid(real_idmap, realinode);
......@@ -1413,4 +1513,5 @@ void ovl_copyattr(struct inode *inode)
inode_set_mtime_to_ts(inode, inode_get_mtime(realinode));
inode_set_ctime_to_ts(inode, inode_get_ctime(realinode));
i_size_write(inode, i_size_read(realinode));
spin_unlock(&inode->i_lock);
}
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/fs.h>
#include <linux/xattr.h>
#include "overlayfs.h"
static bool ovl_is_escaped_xattr(struct super_block *sb, const char *name)
{
struct ovl_fs *ofs = sb->s_fs_info;
if (ofs->config.userxattr)
return strncmp(name, OVL_XATTR_ESCAPE_USER_PREFIX,
OVL_XATTR_ESCAPE_USER_PREFIX_LEN) == 0;
else
return strncmp(name, OVL_XATTR_ESCAPE_TRUSTED_PREFIX,
OVL_XATTR_ESCAPE_TRUSTED_PREFIX_LEN - 1) == 0;
}
static bool ovl_is_own_xattr(struct super_block *sb, const char *name)
{
struct ovl_fs *ofs = OVL_FS(sb);
if (ofs->config.userxattr)
return strncmp(name, OVL_XATTR_USER_PREFIX,
OVL_XATTR_USER_PREFIX_LEN) == 0;
else
return strncmp(name, OVL_XATTR_TRUSTED_PREFIX,
OVL_XATTR_TRUSTED_PREFIX_LEN) == 0;
}
bool ovl_is_private_xattr(struct super_block *sb, const char *name)
{
return ovl_is_own_xattr(sb, name) && !ovl_is_escaped_xattr(sb, name);
}
static int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
const void *value, size_t size, int flags)
{
int err;
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *upperdentry = ovl_i_dentry_upper(inode);
struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry);
struct path realpath;
const struct cred *old_cred;
if (!value && !upperdentry) {
ovl_path_lower(dentry, &realpath);
old_cred = ovl_override_creds(dentry->d_sb);
err = vfs_getxattr(mnt_idmap(realpath.mnt), realdentry, name, NULL, 0);
revert_creds(old_cred);
if (err < 0)
goto out;
}
if (!upperdentry) {
err = ovl_copy_up(dentry);
if (err)
goto out;
realdentry = ovl_dentry_upper(dentry);
}
err = ovl_want_write(dentry);
if (err)
goto out;
old_cred = ovl_override_creds(dentry->d_sb);
if (value) {
err = ovl_do_setxattr(ofs, realdentry, name, value, size,
flags);
} else {
WARN_ON(flags != XATTR_REPLACE);
err = ovl_do_removexattr(ofs, realdentry, name);
}
revert_creds(old_cred);
ovl_drop_write(dentry);
/* copy c/mtime */
ovl_copyattr(inode);
out:
return err;
}
static int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
void *value, size_t size)
{
ssize_t res;
const struct cred *old_cred;
struct path realpath;
ovl_i_path_real(inode, &realpath);
old_cred = ovl_override_creds(dentry->d_sb);
res = vfs_getxattr(mnt_idmap(realpath.mnt), realpath.dentry, name, value, size);
revert_creds(old_cred);
return res;
}
static bool ovl_can_list(struct super_block *sb, const char *s)
{
/* Never list private (.overlay) */
if (ovl_is_private_xattr(sb, s))
return false;
/* List all non-trusted xattrs */
if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
return true;
/* list other trusted for superuser only */
return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
}
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
{
struct dentry *realdentry = ovl_dentry_real(dentry);
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
ssize_t res;
size_t len;
char *s;
const struct cred *old_cred;
size_t prefix_len, name_len;
old_cred = ovl_override_creds(dentry->d_sb);
res = vfs_listxattr(realdentry, list, size);
revert_creds(old_cred);
if (res <= 0 || size == 0)
return res;
prefix_len = ofs->config.userxattr ?
OVL_XATTR_USER_PREFIX_LEN : OVL_XATTR_TRUSTED_PREFIX_LEN;
/* filter out private xattrs */
for (s = list, len = res; len;) {
size_t slen = strnlen(s, len) + 1;
/* underlying fs providing us with an broken xattr list? */
if (WARN_ON(slen > len))
return -EIO;
len -= slen;
if (!ovl_can_list(dentry->d_sb, s)) {
res -= slen;
memmove(s, s + slen, len);
} else if (ovl_is_escaped_xattr(dentry->d_sb, s)) {
res -= OVL_XATTR_ESCAPE_PREFIX_LEN;
name_len = slen - prefix_len - OVL_XATTR_ESCAPE_PREFIX_LEN;
s += prefix_len;
memmove(s, s + OVL_XATTR_ESCAPE_PREFIX_LEN, name_len + len);
s += name_len;
} else {
s += slen;
}
}
return res;
}
static char *ovl_xattr_escape_name(const char *prefix, const char *name)
{
size_t prefix_len = strlen(prefix);
size_t name_len = strlen(name);
size_t escaped_len;
char *escaped, *s;
escaped_len = prefix_len + OVL_XATTR_ESCAPE_PREFIX_LEN + name_len;
if (escaped_len > XATTR_NAME_MAX)
return ERR_PTR(-EOPNOTSUPP);
escaped = kmalloc(escaped_len + 1, GFP_KERNEL);
if (escaped == NULL)
return ERR_PTR(-ENOMEM);
s = escaped;
memcpy(s, prefix, prefix_len);
s += prefix_len;
memcpy(s, OVL_XATTR_ESCAPE_PREFIX, OVL_XATTR_ESCAPE_PREFIX_LEN);
s += OVL_XATTR_ESCAPE_PREFIX_LEN;
memcpy(s, name, name_len + 1);
return escaped;
}
static int ovl_own_xattr_get(const struct xattr_handler *handler,
struct dentry *dentry, struct inode *inode,
const char *name, void *buffer, size_t size)
{
char *escaped;
int r;
escaped = ovl_xattr_escape_name(handler->prefix, name);
if (IS_ERR(escaped))
return PTR_ERR(escaped);
r = ovl_xattr_get(dentry, inode, escaped, buffer, size);
kfree(escaped);
return r;
}
static int ovl_own_xattr_set(const struct xattr_handler *handler,
struct mnt_idmap *idmap,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
{
char *escaped;
int r;
escaped = ovl_xattr_escape_name(handler->prefix, name);
if (IS_ERR(escaped))
return PTR_ERR(escaped);
r = ovl_xattr_set(dentry, inode, escaped, value, size, flags);
kfree(escaped);
return r;
}
static int ovl_other_xattr_get(const struct xattr_handler *handler,
struct dentry *dentry, struct inode *inode,
const char *name, void *buffer, size_t size)
{
return ovl_xattr_get(dentry, inode, name, buffer, size);
}
static int ovl_other_xattr_set(const struct xattr_handler *handler,
struct mnt_idmap *idmap,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
{
return ovl_xattr_set(dentry, inode, name, value, size, flags);
}
static const struct xattr_handler ovl_own_trusted_xattr_handler = {
.prefix = OVL_XATTR_TRUSTED_PREFIX,
.get = ovl_own_xattr_get,
.set = ovl_own_xattr_set,
};
static const struct xattr_handler ovl_own_user_xattr_handler = {
.prefix = OVL_XATTR_USER_PREFIX,
.get = ovl_own_xattr_get,
.set = ovl_own_xattr_set,
};
static const struct xattr_handler ovl_other_xattr_handler = {
.prefix = "", /* catch all */
.get = ovl_other_xattr_get,
.set = ovl_other_xattr_set,
};
static const struct xattr_handler * const ovl_trusted_xattr_handlers[] = {
&ovl_own_trusted_xattr_handler,
&ovl_other_xattr_handler,
NULL
};
static const struct xattr_handler * const ovl_user_xattr_handlers[] = {
&ovl_own_user_xattr_handler,
&ovl_other_xattr_handler,
NULL
};
const struct xattr_handler * const *ovl_xattr_handlers(struct ovl_fs *ofs)
{
return ofs->config.userxattr ? ovl_user_xattr_handlers :
ovl_trusted_xattr_handlers;
}
......@@ -2160,3 +2160,4 @@ int sb_init_dio_done_wq(struct super_block *sb)
destroy_workqueue(wq);
return 0;
}
EXPORT_SYMBOL_GPL(sb_init_dio_done_wq);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment