Commit c353f88f authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs

Pull overlayfs updates from Miklos Szeredi:
 "This fixes d_ino correctness in readdir, which brings overlayfs on par
  with normal filesystems regarding inode number semantics, as long as
  all layers are on the same filesystem.

  There are also some bug fixes, one in particular (random ioctl's
  shouldn't be able to modify lower layers) that touches some vfs code,
  but of course no-op for non-overlay fs"

* 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs:
  ovl: fix false positive ESTALE on lookup
  ovl: don't allow writing ioctl on lower layer
  ovl: fix relatime for directories
  vfs: add flags to d_real()
  ovl: cleanup d_real for negative
  ovl: constant d_ino for non-merge dirs
  ovl: constant d_ino across copy up
  ovl: fix readdir error value
  ovl: check snprintf return
parents 6d8ef53e 939ae4ef
......@@ -22,7 +22,7 @@ prototypes:
struct vfsmount *(*d_automount)(struct path *path);
int (*d_manage)(const struct path *, bool);
struct dentry *(*d_real)(struct dentry *, const struct inode *,
unsigned int);
unsigned int, unsigned int);
locking rules:
rename_lock ->d_lock may block rcu-walk
......
......@@ -988,7 +988,7 @@ struct dentry_operations {
struct vfsmount *(*d_automount)(struct path *);
int (*d_manage)(const struct path *, bool);
struct dentry *(*d_real)(struct dentry *, const struct inode *,
unsigned int);
unsigned int, unsigned int);
};
d_revalidate: called when the VFS needs to revalidate a dentry. This
......
......@@ -1570,11 +1570,24 @@ EXPORT_SYMBOL(bmap);
static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
bool rcu)
{
if (!rcu) {
struct inode *realinode = d_real_inode(dentry);
struct dentry *upperdentry;
if (unlikely(inode != realinode) &&
(!timespec_equal(&inode->i_mtime, &realinode->i_mtime) ||
/*
* Nothing to do if in rcu or if non-overlayfs
*/
if (rcu || likely(!(dentry->d_flags & DCACHE_OP_REAL)))
return;
upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
/*
* If file is on lower then we can't update atime, so no worries about
* stale mtime/ctime.
*/
if (upperdentry) {
struct inode *realinode = d_inode(upperdentry);
if ((!timespec_equal(&inode->i_mtime, &realinode->i_mtime) ||
!timespec_equal(&inode->i_ctime, &realinode->i_ctime))) {
inode->i_mtime = realinode->i_mtime;
inode->i_ctime = realinode->i_ctime;
......
......@@ -71,8 +71,10 @@ extern void __init mnt_init(void);
extern int __mnt_want_write(struct vfsmount *);
extern int __mnt_want_write_file(struct file *);
extern int mnt_want_write_file_path(struct file *);
extern void __mnt_drop_write(struct vfsmount *);
extern void __mnt_drop_write_file(struct file *);
extern void mnt_drop_write_file_path(struct file *);
/*
* fs_struct.c
......
......@@ -431,13 +431,18 @@ int __mnt_want_write_file(struct file *file)
}
/**
* mnt_want_write_file - get write access to a file's mount
* mnt_want_write_file_path - get write access to a file's mount
* @file: the file who's mount on which to take a write
*
* This is like mnt_want_write, but it takes a file and can
* do some optimisations if the file is open for write already
*
* Called by the vfs for cases when we have an open file at hand, but will do an
* inode operation on it (important distinction for files opened on overlayfs,
* since the file operations will come from the real underlying file, while
* inode operations come from the overlay).
*/
int mnt_want_write_file(struct file *file)
int mnt_want_write_file_path(struct file *file)
{
int ret;
......@@ -447,6 +452,53 @@ int mnt_want_write_file(struct file *file)
sb_end_write(file->f_path.mnt->mnt_sb);
return ret;
}
static inline int may_write_real(struct file *file)
{
struct dentry *dentry = file->f_path.dentry;
struct dentry *upperdentry;
/* Writable file? */
if (file->f_mode & FMODE_WRITER)
return 0;
/* Not overlayfs? */
if (likely(!(dentry->d_flags & DCACHE_OP_REAL)))
return 0;
/* File refers to upper, writable layer? */
upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
if (upperdentry && file_inode(file) == d_inode(upperdentry))
return 0;
/* Lower layer: can't write to real file, sorry... */
return -EPERM;
}
/**
* mnt_want_write_file - get write access to a file's mount
* @file: the file who's mount on which to take a write
*
* This is like mnt_want_write, but it takes a file and can
* do some optimisations if the file is open for write already
*
* Mostly called by filesystems from their ioctl operation before performing
* modification. On overlayfs this needs to check if the file is on a read-only
* lower layer and deny access in that case.
*/
int mnt_want_write_file(struct file *file)
{
int ret;
ret = may_write_real(file);
if (!ret) {
sb_start_write(file_inode(file)->i_sb);
ret = __mnt_want_write_file(file);
if (ret)
sb_end_write(file_inode(file)->i_sb);
}
return ret;
}
EXPORT_SYMBOL_GPL(mnt_want_write_file);
/**
......@@ -484,10 +536,16 @@ void __mnt_drop_write_file(struct file *file)
__mnt_drop_write(file->f_path.mnt);
}
void mnt_drop_write_file(struct file *file)
void mnt_drop_write_file_path(struct file *file)
{
mnt_drop_write(file->f_path.mnt);
}
void mnt_drop_write_file(struct file *file)
{
__mnt_drop_write(file->f_path.mnt);
sb_end_write(file_inode(file)->i_sb);
}
EXPORT_SYMBOL(mnt_drop_write_file);
static int mnt_make_readonly(struct mount *mnt)
......
......@@ -96,7 +96,7 @@ long vfs_truncate(const struct path *path, loff_t length)
* write access on the upper inode, not on the overlay inode. For
* non-overlay filesystems d_real() is an identity function.
*/
upperdentry = d_real(path->dentry, NULL, O_WRONLY);
upperdentry = d_real(path->dentry, NULL, O_WRONLY, 0);
error = PTR_ERR(upperdentry);
if (IS_ERR(upperdentry))
goto mnt_drop_write_and_out;
......@@ -670,12 +670,12 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
if (!f.file)
goto out;
error = mnt_want_write_file(f.file);
error = mnt_want_write_file_path(f.file);
if (error)
goto out_fput;
audit_file(f.file);
error = chown_common(&f.file->f_path, user, group);
mnt_drop_write_file(f.file);
mnt_drop_write_file_path(f.file);
out_fput:
fdput(f);
out:
......@@ -857,7 +857,7 @@ EXPORT_SYMBOL(file_path);
int vfs_open(const struct path *path, struct file *file,
const struct cred *cred)
{
struct dentry *dentry = d_real(path->dentry, NULL, file->f_flags);
struct dentry *dentry = d_real(path->dentry, NULL, file->f_flags, 0);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
......
......@@ -155,7 +155,7 @@ static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
struct dentry *newdentry, bool hardlink)
{
ovl_dentry_version_inc(dentry->d_parent);
ovl_dentry_version_inc(dentry->d_parent, false);
ovl_dentry_set_upper_alias(dentry);
if (!hardlink) {
ovl_inode_update(inode, newdentry);
......@@ -692,7 +692,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
if (flags)
ovl_cleanup(wdir, upper);
ovl_dentry_version_inc(dentry->d_parent);
ovl_dentry_version_inc(dentry->d_parent, true);
out_d_drop:
d_drop(dentry);
dput(whiteout);
......@@ -742,7 +742,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
err = vfs_rmdir(dir, upper);
else
err = vfs_unlink(dir, upper, NULL);
ovl_dentry_version_inc(dentry->d_parent);
ovl_dentry_version_inc(dentry->d_parent, ovl_type_origin(dentry));
/*
* Keeping this dentry hashed would mean having to release
......@@ -1089,8 +1089,9 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
drop_nlink(d_inode(new));
}
ovl_dentry_version_inc(old->d_parent);
ovl_dentry_version_inc(new->d_parent);
ovl_dentry_version_inc(old->d_parent,
!overwrite && ovl_type_origin(new));
ovl_dentry_version_inc(new->d_parent, ovl_type_origin(old));
out_dput:
dput(newdentry);
......
......@@ -498,6 +498,9 @@ static int ovl_set_nlink_common(struct dentry *dentry,
len = snprintf(buf, sizeof(buf), format,
(int) (inode->i_nlink - realinode->i_nlink));
if (WARN_ON(len >= sizeof(buf)))
return -EIO;
return ovl_do_setxattr(ovl_dentry_upper(dentry),
OVL_XATTR_NLINK, buf, len, 0);
}
......@@ -576,10 +579,13 @@ static int ovl_inode_set(struct inode *inode, void *data)
static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry,
struct dentry *upperdentry)
{
struct inode *lowerinode = lowerdentry ? d_inode(lowerdentry) : NULL;
/* Lower (origin) inode must match, even if NULL */
if (ovl_inode_lower(inode) != lowerinode)
/*
* Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL.
* This happens when finding a copied up overlay inode for a renamed
* or hardlinked overlay dentry and lower dentry cannot be followed
* by origin because lower fs does not support file handles.
*/
if (lowerdentry && ovl_inode_lower(inode) != d_inode(lowerdentry))
return false;
/*
......
......@@ -204,8 +204,8 @@ struct dentry *ovl_i_dentry_upper(struct inode *inode);
struct inode *ovl_inode_upper(struct inode *inode);
struct inode *ovl_inode_lower(struct inode *inode);
struct inode *ovl_inode_real(struct inode *inode);
struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
struct ovl_dir_cache *ovl_dir_cache(struct inode *inode);
void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache);
bool ovl_dentry_is_opaque(struct dentry *dentry);
bool ovl_dentry_is_whiteout(struct dentry *dentry);
void ovl_dentry_set_opaque(struct dentry *dentry);
......@@ -217,7 +217,7 @@ void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
struct dentry *lowerdentry);
void ovl_inode_update(struct inode *inode, struct dentry *upperdentry);
void ovl_dentry_version_inc(struct dentry *dentry);
void ovl_dentry_version_inc(struct dentry *dentry, bool impurity);
u64 ovl_dentry_version_get(struct dentry *dentry);
bool ovl_is_whiteout(struct dentry *dentry);
struct file *ovl_path_open(struct path *path, int flags);
......@@ -229,6 +229,7 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
int xerr);
int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
void ovl_set_flag(unsigned long flag, struct inode *inode);
void ovl_clear_flag(unsigned long flag, struct inode *inode);
bool ovl_test_flag(unsigned long flag, struct inode *inode);
bool ovl_inuse_trylock(struct dentry *dentry);
void ovl_inuse_unlock(struct dentry *dentry);
......@@ -256,6 +257,7 @@ extern const struct file_operations ovl_dir_operations;
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
void ovl_cache_free(struct list_head *list);
void ovl_dir_cache_free(struct inode *inode);
int ovl_check_d_type_supported(struct path *realpath);
void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
struct dentry *dentry, int level);
......
This diff is collapsed.
......@@ -70,20 +70,20 @@ static int ovl_check_append_only(struct inode *inode, int flag)
static struct dentry *ovl_d_real(struct dentry *dentry,
const struct inode *inode,
unsigned int open_flags)
unsigned int open_flags, unsigned int flags)
{
struct dentry *real;
int err;
if (flags & D_REAL_UPPER)
return ovl_dentry_upper(dentry);
if (!d_is_reg(dentry)) {
if (!inode || inode == d_inode(dentry))
return dentry;
goto bug;
}
if (d_is_negative(dentry))
return dentry;
if (open_flags) {
err = ovl_open_maybe_copy_up(dentry, open_flags);
if (err)
......@@ -105,7 +105,7 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
goto bug;
/* Handle recursion */
real = d_real(real, inode, open_flags);
real = d_real(real, inode, open_flags, 0);
if (!inode || inode == d_inode(real))
return real;
......@@ -198,6 +198,7 @@ static void ovl_destroy_inode(struct inode *inode)
dput(oi->__upperdentry);
kfree(oi->redirect);
ovl_dir_cache_free(inode);
mutex_destroy(&oi->lock);
call_rcu(&inode->i_rcu, ovl_i_callback);
......
......@@ -180,14 +180,14 @@ struct inode *ovl_inode_real(struct inode *inode)
}
struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry)
struct ovl_dir_cache *ovl_dir_cache(struct inode *inode)
{
return OVL_I(d_inode(dentry))->cache;
return OVL_I(inode)->cache;
}
void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache)
void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache)
{
OVL_I(d_inode(dentry))->cache = cache;
OVL_I(inode)->cache = cache;
}
bool ovl_dentry_is_opaque(struct dentry *dentry)
......@@ -275,12 +275,19 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
}
}
void ovl_dentry_version_inc(struct dentry *dentry)
void ovl_dentry_version_inc(struct dentry *dentry, bool impurity)
{
struct inode *inode = d_inode(dentry);
WARN_ON(!inode_is_locked(inode));
OVL_I(inode)->version++;
/*
* Version is used by readdir code to keep cache consistent. For merge
* dirs all changes need to be noted. For non-merge dirs, cache only
* contains impure (ones which have been copied up and have origins)
* entries, so only need to note changes to impure entries.
*/
if (OVL_TYPE_MERGE(ovl_path_type(dentry)) || impurity)
OVL_I(inode)->version++;
}
u64 ovl_dentry_version_get(struct dentry *dentry)
......@@ -382,6 +389,11 @@ void ovl_set_flag(unsigned long flag, struct inode *inode)
set_bit(flag, &OVL_I(inode)->flags);
}
void ovl_clear_flag(unsigned long flag, struct inode *inode)
{
clear_bit(flag, &OVL_I(inode)->flags);
}
bool ovl_test_flag(unsigned long flag, struct inode *inode)
{
return test_bit(flag, &OVL_I(inode)->flags);
......
......@@ -23,6 +23,7 @@
#include <linux/posix_acl_xattr.h>
#include <linux/uaccess.h>
#include "internal.h"
static const char *
strcmp_prefix(const char *a, const char *a_prefix)
......@@ -502,10 +503,10 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
if (!f.file)
return error;
audit_file(f.file);
error = mnt_want_write_file(f.file);
error = mnt_want_write_file_path(f.file);
if (!error) {
error = setxattr(f.file->f_path.dentry, name, value, size, flags);
mnt_drop_write_file(f.file);
mnt_drop_write_file_path(f.file);
}
fdput(f);
return error;
......@@ -734,10 +735,10 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
if (!f.file)
return error;
audit_file(f.file);
error = mnt_want_write_file(f.file);
error = mnt_want_write_file_path(f.file);
if (!error) {
error = removexattr(f.file->f_path.dentry, name);
mnt_drop_write_file(f.file);
mnt_drop_write_file_path(f.file);
}
fdput(f);
return error;
......
......@@ -147,7 +147,7 @@ struct dentry_operations {
struct vfsmount *(*d_automount)(struct path *);
int (*d_manage)(const struct path *, bool);
struct dentry *(*d_real)(struct dentry *, const struct inode *,
unsigned int);
unsigned int, unsigned int);
} ____cacheline_aligned;
/*
......@@ -562,11 +562,15 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
return upper;
}
/* d_real() flags */
#define D_REAL_UPPER 0x2 /* return upper dentry or NULL if non-upper */
/**
* d_real - Return the real dentry
* @dentry: the dentry to query
* @inode: inode to select the dentry from multiple layers (can be NULL)
* @flags: open flags to control copy-up behavior
* @open_flags: open flags to control copy-up behavior
* @flags: flags to control what is returned by this function
*
* If dentry is on a union/overlay, then return the underlying, real dentry.
* Otherwise return the dentry itself.
......@@ -575,10 +579,10 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
*/
static inline struct dentry *d_real(struct dentry *dentry,
const struct inode *inode,
unsigned int flags)
unsigned int open_flags, unsigned int flags)
{
if (unlikely(dentry->d_flags & DCACHE_OP_REAL))
return dentry->d_op->d_real(dentry, inode, flags);
return dentry->d_op->d_real(dentry, inode, open_flags, flags);
else
return dentry;
}
......@@ -593,7 +597,7 @@ static inline struct dentry *d_real(struct dentry *dentry,
static inline struct inode *d_real_inode(const struct dentry *dentry)
{
/* This usage of d_real() results in const dentry */
return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0));
return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0, 0));
}
struct name_snapshot {
......
......@@ -1235,7 +1235,7 @@ static inline struct inode *file_inode(const struct file *f)
static inline struct dentry *file_dentry(const struct file *file)
{
return d_real(file->f_path.dentry, file_inode(file), 0);
return d_real(file->f_path.dentry, file_inode(file), 0, 0);
}
static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment