Commit f3592877 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'from-miklos' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs updates from Al Viro:
 "Assorted patches from Miklos.

  An interesting part here is /proc/mounts stuff..."

The "/proc/mounts stuff" is using a cursor for keeeping the location
data while traversing the mount listing.

Also probably worth noting is the addition of faccessat2(), which takes
an additional set of flags to specify how the lookup is done
(AT_EACCESS, AT_SYMLINK_NOFOLLOW, AT_EMPTY_PATH).

* 'from-miklos' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  vfs: add faccessat2 syscall
  vfs: don't parse "silent" option
  vfs: don't parse "posixacl" option
  vfs: don't parse forbidden flags
  statx: add mount_root
  statx: add mount ID
  statx: don't clear STATX_ATIME on SB_RDONLY
  uapi: deprecate STATX_ALL
  utimensat: AT_EMPTY_PATH support
  vfs: split out access_override_creds()
  proc/mounts: add cursor
  aio: fix async fsync creds
  vfs: allow unprivileged whiteout creation
parents 8b39a57e c8ffd8bc
...@@ -477,3 +477,4 @@ ...@@ -477,3 +477,4 @@
# 545 reserved for clone3 # 545 reserved for clone3
547 common openat2 sys_openat2 547 common openat2 sys_openat2
548 common pidfd_getfd sys_pidfd_getfd 548 common pidfd_getfd sys_pidfd_getfd
549 common faccessat2 sys_faccessat2
...@@ -451,3 +451,4 @@ ...@@ -451,3 +451,4 @@
435 common clone3 sys_clone3 435 common clone3 sys_clone3
437 common openat2 sys_openat2 437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd 438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
#define __NR_compat_syscalls 439 #define __NR_compat_syscalls 440
#endif #endif
#define __ARCH_WANT_SYS_CLONE #define __ARCH_WANT_SYS_CLONE
......
...@@ -883,6 +883,8 @@ __SYSCALL(__NR_clone3, sys_clone3) ...@@ -883,6 +883,8 @@ __SYSCALL(__NR_clone3, sys_clone3)
__SYSCALL(__NR_openat2, sys_openat2) __SYSCALL(__NR_openat2, sys_openat2)
#define __NR_pidfd_getfd 438 #define __NR_pidfd_getfd 438
__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
#define __NR_faccessat2 439
__SYSCALL(__NR_faccessat2, sys_faccessat2)
/* /*
* Please add new compat syscalls above this comment and update * Please add new compat syscalls above this comment and update
......
...@@ -358,3 +358,4 @@ ...@@ -358,3 +358,4 @@
# 435 reserved for clone3 # 435 reserved for clone3
437 common openat2 sys_openat2 437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd 438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
...@@ -437,3 +437,4 @@ ...@@ -437,3 +437,4 @@
435 common clone3 __sys_clone3 435 common clone3 __sys_clone3
437 common openat2 sys_openat2 437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd 438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
...@@ -443,3 +443,4 @@ ...@@ -443,3 +443,4 @@
435 common clone3 sys_clone3 435 common clone3 sys_clone3
437 common openat2 sys_openat2 437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd 438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
...@@ -376,3 +376,4 @@ ...@@ -376,3 +376,4 @@
435 n32 clone3 __sys_clone3 435 n32 clone3 __sys_clone3
437 n32 openat2 sys_openat2 437 n32 openat2 sys_openat2
438 n32 pidfd_getfd sys_pidfd_getfd 438 n32 pidfd_getfd sys_pidfd_getfd
439 n32 faccessat2 sys_faccessat2
...@@ -352,3 +352,4 @@ ...@@ -352,3 +352,4 @@
435 n64 clone3 __sys_clone3 435 n64 clone3 __sys_clone3
437 n64 openat2 sys_openat2 437 n64 openat2 sys_openat2
438 n64 pidfd_getfd sys_pidfd_getfd 438 n64 pidfd_getfd sys_pidfd_getfd
439 n64 faccessat2 sys_faccessat2
...@@ -425,3 +425,4 @@ ...@@ -425,3 +425,4 @@
435 o32 clone3 __sys_clone3 435 o32 clone3 __sys_clone3
437 o32 openat2 sys_openat2 437 o32 openat2 sys_openat2
438 o32 pidfd_getfd sys_pidfd_getfd 438 o32 pidfd_getfd sys_pidfd_getfd
439 o32 faccessat2 sys_faccessat2
...@@ -435,3 +435,4 @@ ...@@ -435,3 +435,4 @@
435 common clone3 sys_clone3_wrapper 435 common clone3 sys_clone3_wrapper
437 common openat2 sys_openat2 437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd 438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
...@@ -527,3 +527,4 @@ ...@@ -527,3 +527,4 @@
435 spu clone3 sys_ni_syscall 435 spu clone3 sys_ni_syscall
437 common openat2 sys_openat2 437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd 438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
...@@ -440,3 +440,4 @@ ...@@ -440,3 +440,4 @@
435 common clone3 sys_clone3 sys_clone3 435 common clone3 sys_clone3 sys_clone3
437 common openat2 sys_openat2 sys_openat2 437 common openat2 sys_openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd 438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2 sys_faccessat2
...@@ -440,3 +440,4 @@ ...@@ -440,3 +440,4 @@
# 435 reserved for clone3 # 435 reserved for clone3
437 common openat2 sys_openat2 437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd 438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
...@@ -483,3 +483,4 @@ ...@@ -483,3 +483,4 @@
# 435 reserved for clone3 # 435 reserved for clone3
437 common openat2 sys_openat2 437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd 438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
...@@ -442,3 +442,4 @@ ...@@ -442,3 +442,4 @@
435 i386 clone3 sys_clone3 435 i386 clone3 sys_clone3
437 i386 openat2 sys_openat2 437 i386 openat2 sys_openat2
438 i386 pidfd_getfd sys_pidfd_getfd 438 i386 pidfd_getfd sys_pidfd_getfd
439 i386 faccessat2 sys_faccessat2
...@@ -359,6 +359,7 @@ ...@@ -359,6 +359,7 @@
435 common clone3 sys_clone3 435 common clone3 sys_clone3
437 common openat2 sys_openat2 437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd 438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
# #
# x32-specific system call numbers start at 512 to avoid cache impact # x32-specific system call numbers start at 512 to avoid cache impact
......
...@@ -408,3 +408,4 @@ ...@@ -408,3 +408,4 @@
435 common clone3 sys_clone3 435 common clone3 sys_clone3
437 common openat2 sys_openat2 437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd 438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
...@@ -176,6 +176,7 @@ struct fsync_iocb { ...@@ -176,6 +176,7 @@ struct fsync_iocb {
struct file *file; struct file *file;
struct work_struct work; struct work_struct work;
bool datasync; bool datasync;
struct cred *creds;
}; };
struct poll_iocb { struct poll_iocb {
...@@ -1589,8 +1590,11 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb, ...@@ -1589,8 +1590,11 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
static void aio_fsync_work(struct work_struct *work) static void aio_fsync_work(struct work_struct *work)
{ {
struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work); struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work);
const struct cred *old_cred = override_creds(iocb->fsync.creds);
iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync); iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync);
revert_creds(old_cred);
put_cred(iocb->fsync.creds);
iocb_put(iocb); iocb_put(iocb);
} }
...@@ -1604,6 +1608,10 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb, ...@@ -1604,6 +1608,10 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
if (unlikely(!req->file->f_op->fsync)) if (unlikely(!req->file->f_op->fsync))
return -EINVAL; return -EINVAL;
req->creds = prepare_creds();
if (!req->creds)
return -ENOMEM;
req->datasync = datasync; req->datasync = datasync;
INIT_WORK(&req->work, aio_fsync_work); INIT_WORK(&req->work, aio_fsync_work);
schedule_work(&req->work); schedule_work(&req->work);
......
...@@ -483,6 +483,9 @@ int cdev_add(struct cdev *p, dev_t dev, unsigned count) ...@@ -483,6 +483,9 @@ int cdev_add(struct cdev *p, dev_t dev, unsigned count)
p->dev = dev; p->dev = dev;
p->count = count; p->count = count;
if (WARN_ON(dev == WHITEOUT_DEV))
return -EBUSY;
error = kobj_map(cdev_map, dev, count, NULL, error = kobj_map(cdev_map, dev, count, NULL,
exact_match, exact_lock, p); exact_match, exact_lock, p);
if (error) if (error)
......
...@@ -42,7 +42,6 @@ static const struct constant_table common_set_sb_flag[] = { ...@@ -42,7 +42,6 @@ static const struct constant_table common_set_sb_flag[] = {
{ "dirsync", SB_DIRSYNC }, { "dirsync", SB_DIRSYNC },
{ "lazytime", SB_LAZYTIME }, { "lazytime", SB_LAZYTIME },
{ "mand", SB_MANDLOCK }, { "mand", SB_MANDLOCK },
{ "posixacl", SB_POSIXACL },
{ "ro", SB_RDONLY }, { "ro", SB_RDONLY },
{ "sync", SB_SYNCHRONOUS }, { "sync", SB_SYNCHRONOUS },
{ }, { },
...@@ -53,44 +52,15 @@ static const struct constant_table common_clear_sb_flag[] = { ...@@ -53,44 +52,15 @@ static const struct constant_table common_clear_sb_flag[] = {
{ "nolazytime", SB_LAZYTIME }, { "nolazytime", SB_LAZYTIME },
{ "nomand", SB_MANDLOCK }, { "nomand", SB_MANDLOCK },
{ "rw", SB_RDONLY }, { "rw", SB_RDONLY },
{ "silent", SB_SILENT },
{ }, { },
}; };
static const char *const forbidden_sb_flag[] = {
"bind",
"dev",
"exec",
"move",
"noatime",
"nodev",
"nodiratime",
"noexec",
"norelatime",
"nostrictatime",
"nosuid",
"private",
"rec",
"relatime",
"remount",
"shared",
"slave",
"strictatime",
"suid",
"unbindable",
};
/* /*
* Check for a common mount option that manipulates s_flags. * Check for a common mount option that manipulates s_flags.
*/ */
static int vfs_parse_sb_flag(struct fs_context *fc, const char *key) static int vfs_parse_sb_flag(struct fs_context *fc, const char *key)
{ {
unsigned int token; unsigned int token;
unsigned int i;
for (i = 0; i < ARRAY_SIZE(forbidden_sb_flag); i++)
if (strcmp(key, forbidden_sb_flag[i]) == 0)
return -EINVAL;
token = lookup_constant(common_set_sb_flag, key, 0); token = lookup_constant(common_set_sb_flag, key, 0);
if (token) { if (token) {
......
...@@ -126,7 +126,6 @@ extern struct open_how build_open_how(int flags, umode_t mode); ...@@ -126,7 +126,6 @@ extern struct open_how build_open_how(int flags, umode_t mode);
extern int build_open_flags(const struct open_how *how, struct open_flags *op); extern int build_open_flags(const struct open_how *how, struct open_flags *op);
long do_sys_ftruncate(unsigned int fd, loff_t length, int small); long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
long do_faccessat(int dfd, const char __user *filename, int mode);
int do_fchmodat(int dfd, const char __user *filename, umode_t mode); int do_fchmodat(int dfd, const char __user *filename, umode_t mode);
int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
int flag); int flag);
......
...@@ -9,7 +9,13 @@ struct mnt_namespace { ...@@ -9,7 +9,13 @@ struct mnt_namespace {
atomic_t count; atomic_t count;
struct ns_common ns; struct ns_common ns;
struct mount * root; struct mount * root;
/*
* Traversal and modification of .list is protected by either
* - taking namespace_sem for write, OR
* - taking namespace_sem for read AND taking .ns_lock.
*/
struct list_head list; struct list_head list;
spinlock_t ns_lock;
struct user_namespace *user_ns; struct user_namespace *user_ns;
struct ucounts *ucounts; struct ucounts *ucounts;
u64 seq; /* Sequence number to prevent loops */ u64 seq; /* Sequence number to prevent loops */
...@@ -133,9 +139,7 @@ struct proc_mounts { ...@@ -133,9 +139,7 @@ struct proc_mounts {
struct mnt_namespace *ns; struct mnt_namespace *ns;
struct path root; struct path root;
int (*show)(struct seq_file *, struct vfsmount *); int (*show)(struct seq_file *, struct vfsmount *);
void *cached_mount; struct mount cursor;
u64 cached_event;
loff_t cached_index;
}; };
extern const struct seq_operations mounts_op; extern const struct seq_operations mounts_op;
...@@ -153,3 +157,5 @@ static inline bool is_anon_ns(struct mnt_namespace *ns) ...@@ -153,3 +157,5 @@ static inline bool is_anon_ns(struct mnt_namespace *ns)
{ {
return ns->seq == 0; return ns->seq == 0;
} }
extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor);
...@@ -3505,12 +3505,14 @@ EXPORT_SYMBOL(user_path_create); ...@@ -3505,12 +3505,14 @@ EXPORT_SYMBOL(user_path_create);
int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
{ {
bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
int error = may_create(dir, dentry); int error = may_create(dir, dentry);
if (error) if (error)
return error; return error;
if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) if ((S_ISCHR(mode) || S_ISBLK(mode)) && !is_whiteout &&
!capable(CAP_MKNOD))
return -EPERM; return -EPERM;
if (!dir->i_op->mknod) if (!dir->i_op->mknod)
...@@ -4345,9 +4347,6 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd, ...@@ -4345,9 +4347,6 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
(flags & RENAME_EXCHANGE)) (flags & RENAME_EXCHANGE))
return -EINVAL; return -EINVAL;
if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD))
return -EPERM;
if (flags & RENAME_EXCHANGE) if (flags & RENAME_EXCHANGE)
target_flags = 0; target_flags = 0;
...@@ -4483,20 +4482,6 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna ...@@ -4483,20 +4482,6 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna
return do_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); return do_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
} }
int vfs_whiteout(struct inode *dir, struct dentry *dentry)
{
int error = may_create(dir, dentry);
if (error)
return error;
if (!dir->i_op->mknod)
return -EPERM;
return dir->i_op->mknod(dir, dentry,
S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
}
EXPORT_SYMBOL(vfs_whiteout);
int readlink_copy(char __user *buffer, int buflen, const char *link) int readlink_copy(char __user *buffer, int buflen, const char *link)
{ {
int len = PTR_ERR(link); int len = PTR_ERR(link);
......
...@@ -648,6 +648,21 @@ struct vfsmount *lookup_mnt(const struct path *path) ...@@ -648,6 +648,21 @@ struct vfsmount *lookup_mnt(const struct path *path)
return m; return m;
} }
static inline void lock_ns_list(struct mnt_namespace *ns)
{
spin_lock(&ns->ns_lock);
}
static inline void unlock_ns_list(struct mnt_namespace *ns)
{
spin_unlock(&ns->ns_lock);
}
static inline bool mnt_is_cursor(struct mount *mnt)
{
return mnt->mnt.mnt_flags & MNT_CURSOR;
}
/* /*
* __is_local_mountpoint - Test to see if dentry is a mountpoint in the * __is_local_mountpoint - Test to see if dentry is a mountpoint in the
* current mount namespace. * current mount namespace.
...@@ -673,11 +688,15 @@ bool __is_local_mountpoint(struct dentry *dentry) ...@@ -673,11 +688,15 @@ bool __is_local_mountpoint(struct dentry *dentry)
goto out; goto out;
down_read(&namespace_sem); down_read(&namespace_sem);
lock_ns_list(ns);
list_for_each_entry(mnt, &ns->list, mnt_list) { list_for_each_entry(mnt, &ns->list, mnt_list) {
if (mnt_is_cursor(mnt))
continue;
is_covered = (mnt->mnt_mountpoint == dentry); is_covered = (mnt->mnt_mountpoint == dentry);
if (is_covered) if (is_covered)
break; break;
} }
unlock_ns_list(ns);
up_read(&namespace_sem); up_read(&namespace_sem);
out: out:
return is_covered; return is_covered;
...@@ -1245,46 +1264,71 @@ struct vfsmount *mnt_clone_internal(const struct path *path) ...@@ -1245,46 +1264,71 @@ struct vfsmount *mnt_clone_internal(const struct path *path)
} }
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
static struct mount *mnt_list_next(struct mnt_namespace *ns,
struct list_head *p)
{
struct mount *mnt, *ret = NULL;
lock_ns_list(ns);
list_for_each_continue(p, &ns->list) {
mnt = list_entry(p, typeof(*mnt), mnt_list);
if (!mnt_is_cursor(mnt)) {
ret = mnt;
break;
}
}
unlock_ns_list(ns);
return ret;
}
/* iterator; we want it to have access to namespace_sem, thus here... */ /* iterator; we want it to have access to namespace_sem, thus here... */
static void *m_start(struct seq_file *m, loff_t *pos) static void *m_start(struct seq_file *m, loff_t *pos)
{ {
struct proc_mounts *p = m->private; struct proc_mounts *p = m->private;
struct list_head *prev;
down_read(&namespace_sem); down_read(&namespace_sem);
if (p->cached_event == p->ns->event) { if (!*pos) {
void *v = p->cached_mount; prev = &p->ns->list;
if (*pos == p->cached_index) } else {
return v; prev = &p->cursor.mnt_list;
if (*pos == p->cached_index + 1) {
v = seq_list_next(v, &p->ns->list, &p->cached_index); /* Read after we'd reached the end? */
return p->cached_mount = v; if (list_empty(prev))
} return NULL;
} }
p->cached_event = p->ns->event; return mnt_list_next(p->ns, prev);
p->cached_mount = seq_list_start(&p->ns->list, *pos);
p->cached_index = *pos;
return p->cached_mount;
} }
static void *m_next(struct seq_file *m, void *v, loff_t *pos) static void *m_next(struct seq_file *m, void *v, loff_t *pos)
{ {
struct proc_mounts *p = m->private; struct proc_mounts *p = m->private;
struct mount *mnt = v;
p->cached_mount = seq_list_next(v, &p->ns->list, pos); ++*pos;
p->cached_index = *pos; return mnt_list_next(p->ns, &mnt->mnt_list);
return p->cached_mount;
} }
static void m_stop(struct seq_file *m, void *v) static void m_stop(struct seq_file *m, void *v)
{ {
struct proc_mounts *p = m->private;
struct mount *mnt = v;
lock_ns_list(p->ns);
if (mnt)
list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list);
else
list_del_init(&p->cursor.mnt_list);
unlock_ns_list(p->ns);
up_read(&namespace_sem); up_read(&namespace_sem);
} }
static int m_show(struct seq_file *m, void *v) static int m_show(struct seq_file *m, void *v)
{ {
struct proc_mounts *p = m->private; struct proc_mounts *p = m->private;
struct mount *r = list_entry(v, struct mount, mnt_list); struct mount *r = v;
return p->show(m, &r->mnt); return p->show(m, &r->mnt);
} }
...@@ -1294,6 +1338,15 @@ const struct seq_operations mounts_op = { ...@@ -1294,6 +1338,15 @@ const struct seq_operations mounts_op = {
.stop = m_stop, .stop = m_stop,
.show = m_show, .show = m_show,
}; };
void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor)
{
down_read(&namespace_sem);
lock_ns_list(ns);
list_del(&cursor->mnt_list);
unlock_ns_list(ns);
up_read(&namespace_sem);
}
#endif /* CONFIG_PROC_FS */ #endif /* CONFIG_PROC_FS */
/** /**
...@@ -3202,6 +3255,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a ...@@ -3202,6 +3255,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
atomic_set(&new_ns->count, 1); atomic_set(&new_ns->count, 1);
INIT_LIST_HEAD(&new_ns->list); INIT_LIST_HEAD(&new_ns->list);
init_waitqueue_head(&new_ns->poll); init_waitqueue_head(&new_ns->poll);
spin_lock_init(&new_ns->ns_lock);
new_ns->user_ns = get_user_ns(user_ns); new_ns->user_ns = get_user_ns(user_ns);
new_ns->ucounts = ucounts; new_ns->ucounts = ucounts;
return new_ns; return new_ns;
...@@ -3842,10 +3896,14 @@ static bool mnt_already_visible(struct mnt_namespace *ns, ...@@ -3842,10 +3896,14 @@ static bool mnt_already_visible(struct mnt_namespace *ns,
bool visible = false; bool visible = false;
down_read(&namespace_sem); down_read(&namespace_sem);
lock_ns_list(ns);
list_for_each_entry(mnt, &ns->list, mnt_list) { list_for_each_entry(mnt, &ns->list, mnt_list) {
struct mount *child; struct mount *child;
int mnt_flags; int mnt_flags;
if (mnt_is_cursor(mnt))
continue;
if (mnt->mnt.mnt_sb->s_type != sb->s_type) if (mnt->mnt.mnt_sb->s_type != sb->s_type)
continue; continue;
...@@ -3893,6 +3951,7 @@ static bool mnt_already_visible(struct mnt_namespace *ns, ...@@ -3893,6 +3951,7 @@ static bool mnt_already_visible(struct mnt_namespace *ns,
next: ; next: ;
} }
found: found:
unlock_ns_list(ns);
up_read(&namespace_sem); up_read(&namespace_sem);
return visible; return visible;
} }
......
...@@ -345,21 +345,14 @@ SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len) ...@@ -345,21 +345,14 @@ SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
* We do this by temporarily clearing all FS-related capabilities and * We do this by temporarily clearing all FS-related capabilities and
* switching the fsuid/fsgid around to the real ones. * switching the fsuid/fsgid around to the real ones.
*/ */
long do_faccessat(int dfd, const char __user *filename, int mode) static const struct cred *access_override_creds(void)
{ {
const struct cred *old_cred; const struct cred *old_cred;
struct cred *override_cred; struct cred *override_cred;
struct path path;
struct inode *inode;
int res;
unsigned int lookup_flags = LOOKUP_FOLLOW;
if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
return -EINVAL;
override_cred = prepare_creds(); override_cred = prepare_creds();
if (!override_cred) if (!override_cred)
return -ENOMEM; return NULL;
override_cred->fsuid = override_cred->uid; override_cred->fsuid = override_cred->uid;
override_cred->fsgid = override_cred->gid; override_cred->fsgid = override_cred->gid;
...@@ -394,6 +387,38 @@ long do_faccessat(int dfd, const char __user *filename, int mode) ...@@ -394,6 +387,38 @@ long do_faccessat(int dfd, const char __user *filename, int mode)
override_cred->non_rcu = 1; override_cred->non_rcu = 1;
old_cred = override_creds(override_cred); old_cred = override_creds(override_cred);
/* override_cred() gets its own ref */
put_cred(override_cred);
return old_cred;
}
long do_faccessat(int dfd, const char __user *filename, int mode, int flags)
{
struct path path;
struct inode *inode;
int res;
unsigned int lookup_flags = LOOKUP_FOLLOW;
const struct cred *old_cred = NULL;
if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
return -EINVAL;
if (flags & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH))
return -EINVAL;
if (flags & AT_SYMLINK_NOFOLLOW)
lookup_flags &= ~LOOKUP_FOLLOW;
if (flags & AT_EMPTY_PATH)
lookup_flags |= LOOKUP_EMPTY;
if (!(flags & AT_EACCESS)) {
old_cred = access_override_creds();
if (!old_cred)
return -ENOMEM;
}
retry: retry:
res = user_path_at(dfd, filename, lookup_flags, &path); res = user_path_at(dfd, filename, lookup_flags, &path);
if (res) if (res)
...@@ -435,19 +460,26 @@ long do_faccessat(int dfd, const char __user *filename, int mode) ...@@ -435,19 +460,26 @@ long do_faccessat(int dfd, const char __user *filename, int mode)
goto retry; goto retry;
} }
out: out:
if (old_cred)
revert_creds(old_cred); revert_creds(old_cred);
put_cred(override_cred);
return res; return res;
} }
SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
{ {
return do_faccessat(dfd, filename, mode); return do_faccessat(dfd, filename, mode, 0);
}
SYSCALL_DEFINE4(faccessat2, int, dfd, const char __user *, filename, int, mode,
int, flags)
{
return do_faccessat(dfd, filename, mode, flags);
} }
SYSCALL_DEFINE2(access, const char __user *, filename, int, mode) SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
{ {
return do_faccessat(AT_FDCWD, filename, mode); return do_faccessat(AT_FDCWD, filename, mode, 0);
} }
int ksys_chdir(const char __user *filename) int ksys_chdir(const char __user *filename)
......
...@@ -279,7 +279,8 @@ static int mounts_open_common(struct inode *inode, struct file *file, ...@@ -279,7 +279,8 @@ static int mounts_open_common(struct inode *inode, struct file *file,
p->ns = ns; p->ns = ns;
p->root = root; p->root = root;
p->show = show; p->show = show;
p->cached_event = ~0ULL; INIT_LIST_HEAD(&p->cursor.mnt_list);
p->cursor.mnt.mnt_flags = MNT_CURSOR;
return 0; return 0;
...@@ -296,6 +297,7 @@ static int mounts_release(struct inode *inode, struct file *file) ...@@ -296,6 +297,7 @@ static int mounts_release(struct inode *inode, struct file *file)
struct seq_file *m = file->private_data; struct seq_file *m = file->private_data;
struct proc_mounts *p = m->private; struct proc_mounts *p = m->private;
path_put(&p->root); path_put(&p->root);
mnt_cursor_del(p->ns, &p->cursor);
put_mnt_ns(p->ns); put_mnt_ns(p->ns);
return seq_release_private(inode, file); return seq_release_private(inode, file);
} }
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <asm/unistd.h> #include <asm/unistd.h>
#include "internal.h" #include "internal.h"
#include "mount.h"
/** /**
* generic_fillattr - Fill in the basic attributes from the inode struct * generic_fillattr - Fill in the basic attributes from the inode struct
...@@ -70,11 +71,11 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, ...@@ -70,11 +71,11 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
memset(stat, 0, sizeof(*stat)); memset(stat, 0, sizeof(*stat));
stat->result_mask |= STATX_BASIC_STATS; stat->result_mask |= STATX_BASIC_STATS;
request_mask &= STATX_ALL;
query_flags &= KSTAT_QUERY_FLAGS; query_flags &= KSTAT_QUERY_FLAGS;
/* allow the fs to override these if it really wants to */ /* allow the fs to override these if it really wants to */
if (IS_NOATIME(inode)) /* SB_NOATIME means filesystem supplies dummy atime value */
if (inode->i_sb->s_flags & SB_NOATIME)
stat->result_mask &= ~STATX_ATIME; stat->result_mask &= ~STATX_ATIME;
if (IS_AUTOMOUNT(inode)) if (IS_AUTOMOUNT(inode))
stat->attributes |= STATX_ATTR_AUTOMOUNT; stat->attributes |= STATX_ATTR_AUTOMOUNT;
...@@ -199,6 +200,11 @@ int vfs_statx(int dfd, const char __user *filename, int flags, ...@@ -199,6 +200,11 @@ int vfs_statx(int dfd, const char __user *filename, int flags,
goto out; goto out;
error = vfs_getattr(&path, stat, request_mask, flags); error = vfs_getattr(&path, stat, request_mask, flags);
stat->mnt_id = real_mount(path.mnt)->mnt_id;
stat->result_mask |= STATX_MNT_ID;
if (path.mnt->mnt_root == path.dentry)
stat->attributes |= STATX_ATTR_MOUNT_ROOT;
stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT;
path_put(&path); path_put(&path);
if (retry_estale(error, lookup_flags)) { if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL; lookup_flags |= LOOKUP_REVAL;
...@@ -563,6 +569,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer) ...@@ -563,6 +569,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
tmp.stx_rdev_minor = MINOR(stat->rdev); tmp.stx_rdev_minor = MINOR(stat->rdev);
tmp.stx_dev_major = MAJOR(stat->dev); tmp.stx_dev_major = MAJOR(stat->dev);
tmp.stx_dev_minor = MINOR(stat->dev); tmp.stx_dev_minor = MINOR(stat->dev);
tmp.stx_mnt_id = stat->mnt_id;
return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0; return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
} }
......
...@@ -95,13 +95,13 @@ long do_utimes(int dfd, const char __user *filename, struct timespec64 *times, ...@@ -95,13 +95,13 @@ long do_utimes(int dfd, const char __user *filename, struct timespec64 *times,
goto out; goto out;
} }
if (flags & ~AT_SYMLINK_NOFOLLOW) if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH))
goto out; goto out;
if (filename == NULL && dfd != AT_FDCWD) { if (filename == NULL && dfd != AT_FDCWD) {
struct fd f; struct fd f;
if (flags & AT_SYMLINK_NOFOLLOW) if (flags)
goto out; goto out;
f = fdget(dfd); f = fdget(dfd);
...@@ -117,6 +117,8 @@ long do_utimes(int dfd, const char __user *filename, struct timespec64 *times, ...@@ -117,6 +117,8 @@ long do_utimes(int dfd, const char __user *filename, struct timespec64 *times,
if (!(flags & AT_SYMLINK_NOFOLLOW)) if (!(flags & AT_SYMLINK_NOFOLLOW))
lookup_flags |= LOOKUP_FOLLOW; lookup_flags |= LOOKUP_FOLLOW;
if (flags & AT_EMPTY_PATH)
lookup_flags |= LOOKUP_EMPTY;
retry: retry:
error = user_path_at(dfd, filename, lookup_flags, &path); error = user_path_at(dfd, filename, lookup_flags, &path);
if (error) if (error)
......
...@@ -44,6 +44,9 @@ static inline int devcgroup_inode_mknod(int mode, dev_t dev) ...@@ -44,6 +44,9 @@ static inline int devcgroup_inode_mknod(int mode, dev_t dev)
if (!S_ISBLK(mode) && !S_ISCHR(mode)) if (!S_ISBLK(mode) && !S_ISCHR(mode))
return 0; return 0;
if (S_ISCHR(mode) && dev == WHITEOUT_DEV)
return 0;
if (S_ISBLK(mode)) if (S_ISBLK(mode))
type = DEVCG_DEV_BLOCK; type = DEVCG_DEV_BLOCK;
else else
......
...@@ -1721,7 +1721,11 @@ extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct ino ...@@ -1721,7 +1721,11 @@ extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct ino
extern int vfs_rmdir(struct inode *, struct dentry *); extern int vfs_rmdir(struct inode *, struct dentry *);
extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
extern int vfs_whiteout(struct inode *, struct dentry *);
static inline int vfs_whiteout(struct inode *dir, struct dentry *dentry)
{
return vfs_mknod(dir, dentry, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
}
extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode,
int open_flag); int open_flag);
......
...@@ -50,7 +50,8 @@ struct fs_context; ...@@ -50,7 +50,8 @@ struct fs_context;
#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME ) #define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )
#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ #define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED) MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | \
MNT_CURSOR)
#define MNT_INTERNAL 0x4000 #define MNT_INTERNAL 0x4000
...@@ -64,6 +65,7 @@ struct fs_context; ...@@ -64,6 +65,7 @@ struct fs_context;
#define MNT_SYNC_UMOUNT 0x2000000 #define MNT_SYNC_UMOUNT 0x2000000
#define MNT_MARKED 0x4000000 #define MNT_MARKED 0x4000000
#define MNT_UMOUNT 0x8000000 #define MNT_UMOUNT 0x8000000
#define MNT_CURSOR 0x10000000
struct vfsmount { struct vfsmount {
struct dentry *mnt_root; /* root of the mounted tree */ struct dentry *mnt_root; /* root of the mounted tree */
......
...@@ -47,6 +47,7 @@ struct kstat { ...@@ -47,6 +47,7 @@ struct kstat {
struct timespec64 ctime; struct timespec64 ctime;
struct timespec64 btime; /* File creation time */ struct timespec64 btime; /* File creation time */
u64 blocks; u64 blocks;
u64 mnt_id;
}; };
#endif #endif
...@@ -428,6 +428,8 @@ asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length); ...@@ -428,6 +428,8 @@ asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length);
#endif #endif
asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode); asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode);
asmlinkage long sys_faccessat2(int dfd, const char __user *filename, int mode,
int flags);
asmlinkage long sys_chdir(const char __user *filename); asmlinkage long sys_chdir(const char __user *filename);
asmlinkage long sys_fchdir(unsigned int fd); asmlinkage long sys_fchdir(unsigned int fd);
asmlinkage long sys_chroot(const char __user *filename); asmlinkage long sys_chroot(const char __user *filename);
...@@ -1333,11 +1335,11 @@ static inline int ksys_chmod(const char __user *filename, umode_t mode) ...@@ -1333,11 +1335,11 @@ static inline int ksys_chmod(const char __user *filename, umode_t mode)
return do_fchmodat(AT_FDCWD, filename, mode); return do_fchmodat(AT_FDCWD, filename, mode);
} }
extern long do_faccessat(int dfd, const char __user *filename, int mode); long do_faccessat(int dfd, const char __user *filename, int mode, int flags);
static inline long ksys_access(const char __user *filename, int mode) static inline long ksys_access(const char __user *filename, int mode)
{ {
return do_faccessat(AT_FDCWD, filename, mode); return do_faccessat(AT_FDCWD, filename, mode, 0);
} }
extern int do_fchownat(int dfd, const char __user *filename, uid_t user, extern int do_fchownat(int dfd, const char __user *filename, uid_t user,
......
...@@ -855,9 +855,11 @@ __SYSCALL(__NR_clone3, sys_clone3) ...@@ -855,9 +855,11 @@ __SYSCALL(__NR_clone3, sys_clone3)
__SYSCALL(__NR_openat2, sys_openat2) __SYSCALL(__NR_openat2, sys_openat2)
#define __NR_pidfd_getfd 438 #define __NR_pidfd_getfd 438
__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
#define __NR_faccessat2 439
__SYSCALL(__NR_faccessat2, sys_faccessat2)
#undef __NR_syscalls #undef __NR_syscalls
#define __NR_syscalls 439 #define __NR_syscalls 440
/* /*
* 32 bit systems traditionally used different * 32 bit systems traditionally used different
......
...@@ -84,10 +84,20 @@ ...@@ -84,10 +84,20 @@
#define DN_ATTRIB 0x00000020 /* File changed attibutes */ #define DN_ATTRIB 0x00000020 /* File changed attibutes */
#define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ #define DN_MULTISHOT 0x80000000 /* Don't remove notifier */
/*
* The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS is
* meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to
* unlinkat. The two functions do completely different things and therefore,
* the flags can be allowed to overlap. For example, passing AT_REMOVEDIR to
* faccessat would be undefined behavior and thus treating it equivalent to
* AT_EACCESS is valid undefined behavior.
*/
#define AT_FDCWD -100 /* Special value used to indicate #define AT_FDCWD -100 /* Special value used to indicate
openat should use the current openat should use the current
working directory. */ working directory. */
#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ #define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */
#define AT_EACCESS 0x200 /* Test access permitted for
effective IDs, not real IDs. */
#define AT_REMOVEDIR 0x200 /* Remove directory instead of #define AT_REMOVEDIR 0x200 /* Remove directory instead of
unlinking file. */ unlinking file. */
#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ #define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
......
...@@ -123,7 +123,10 @@ struct statx { ...@@ -123,7 +123,10 @@ struct statx {
__u32 stx_dev_major; /* ID of device containing file [uncond] */ __u32 stx_dev_major; /* ID of device containing file [uncond] */
__u32 stx_dev_minor; __u32 stx_dev_minor;
/* 0x90 */ /* 0x90 */
__u64 __spare2[14]; /* Spare space for future expansion */ __u64 stx_mnt_id;
__u64 __spare2;
/* 0xa0 */
__u64 __spare3[12]; /* Spare space for future expansion */
/* 0x100 */ /* 0x100 */
}; };
...@@ -148,9 +151,19 @@ struct statx { ...@@ -148,9 +151,19 @@ struct statx {
#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ #define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */
#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ #define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */
#define STATX_BTIME 0x00000800U /* Want/got stx_btime */ #define STATX_BTIME 0x00000800U /* Want/got stx_btime */
#define STATX_ALL 0x00000fffU /* All currently supported flags */ #define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
#ifndef __KERNEL__
/*
* This is deprecated, and shall remain the same value in the future. To avoid
* confusion please use the equivalent (STATX_BASIC_STATS | STATX_BTIME)
* instead.
*/
#define STATX_ALL 0x00000fffU
#endif
/* /*
* Attributes to be found in stx_attributes and masked in stx_attributes_mask. * Attributes to be found in stx_attributes and masked in stx_attributes_mask.
* *
...@@ -168,6 +181,7 @@ struct statx { ...@@ -168,6 +181,7 @@ struct statx {
#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */ #define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */
#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */ #define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */
#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ #define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */
#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */
#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */
......
...@@ -216,7 +216,7 @@ int main(int argc, char **argv) ...@@ -216,7 +216,7 @@ int main(int argc, char **argv)
struct statx stx; struct statx stx;
int ret, raw = 0, atflag = AT_SYMLINK_NOFOLLOW; int ret, raw = 0, atflag = AT_SYMLINK_NOFOLLOW;
unsigned int mask = STATX_ALL; unsigned int mask = STATX_BASIC_STATS | STATX_BTIME;
for (argv++; *argv; argv++) { for (argv++; *argv; argv++) {
if (strcmp(*argv, "-F") == 0) { if (strcmp(*argv, "-F") == 0) {
......
...@@ -148,9 +148,18 @@ struct statx { ...@@ -148,9 +148,18 @@ struct statx {
#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ #define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */
#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ #define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */
#define STATX_BTIME 0x00000800U /* Want/got stx_btime */ #define STATX_BTIME 0x00000800U /* Want/got stx_btime */
#define STATX_ALL 0x00000fffU /* All currently supported flags */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
#ifndef __KERNEL__
/*
* This is deprecated, and shall remain the same value in the future. To avoid
* confusion please use the equivalent (STATX_BASIC_STATS | STATX_BTIME)
* instead.
*/
#define STATX_ALL 0x00000fffU
#endif
/* /*
* Attributes to be found in stx_attributes and masked in stx_attributes_mask. * Attributes to be found in stx_attributes and masked in stx_attributes_mask.
* *
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment