Commit fd9be4ce authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'ro-bind.b6' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6

* 'ro-bind.b6' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6: (24 commits)
  [PATCH] r/o bind mounts: debugging for missed calls
  [PATCH] r/o bind mounts: honor mount writer counts at remount
  [PATCH] r/o bind mounts: track numbers of writers to mounts
  [PATCH] r/o bind mounts: check mnt instead of superblock directly
  [PATCH] r/o bind mounts: elevate count for xfs timestamp updates
  [PATCH] r/o bind mounts: make access() use new r/o helper
  [PATCH] r/o bind mounts: write counts for truncate()
  [PATCH] r/o bind mounts: elevate write count for chmod/chown callers
  [PATCH] r/o bind mounts: elevate write count for open()s
  [PATCH] r/o bind mounts: elevate write count for ioctls()
  [PATCH] r/o bind mounts: write count for file_update_time()
  [PATCH] r/o bind mounts: elevate write count for do_utimes()
  [PATCH] r/o bind mounts: write counts for touch_atime()
  [PATCH] r/o bind mounts: elevate write count for ncp_ioctl()
  [PATCH] r/o bind mounts: elevate write count for xattr_permission() callers
  [PATCH] r/o bind mounts: get write access for vfs_rename() callers
  [PATCH] r/o bind mounts: write counts for link/symlink
  [PATCH] r/o bind mounts: get callers of vfs_mknod/create/mkdir()
  [PATCH] r/o bind mounts: elevate write count for rmdir and unlink.
  [PATCH] r/o bind mounts: drop write during emergency remount
  ...
parents b1af9ccc ad775f5a
......@@ -12,6 +12,7 @@
#include <linux/time.h>
#include <linux/sched.h>
#include <linux/compat.h>
#include <linux/mount.h>
#include <linux/smp_lock.h>
#include <asm/current.h>
#include <asm/uaccess.h>
......@@ -23,6 +24,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
struct ext2_inode_info *ei = EXT2_I(inode);
unsigned int flags;
unsigned short rsv_window_size;
int ret;
ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg);
......@@ -34,14 +36,19 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case EXT2_IOC_SETFLAGS: {
unsigned int oldflags;
if (IS_RDONLY(inode))
return -EROFS;
ret = mnt_want_write(filp->f_path.mnt);
if (ret)
return ret;
if (!is_owner_or_cap(inode))
return -EACCES;
if (!is_owner_or_cap(inode)) {
ret = -EACCES;
goto setflags_out;
}
if (get_user(flags, (int __user *) arg))
return -EFAULT;
if (get_user(flags, (int __user *) arg)) {
ret = -EFAULT;
goto setflags_out;
}
if (!S_ISDIR(inode->i_mode))
flags &= ~EXT2_DIRSYNC_FL;
......@@ -50,7 +57,8 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
/* Is it quota file? Do not allow user to mess with it */
if (IS_NOQUOTA(inode)) {
mutex_unlock(&inode->i_mutex);
return -EPERM;
ret = -EPERM;
goto setflags_out;
}
oldflags = ei->i_flags;
......@@ -63,7 +71,8 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
if (!capable(CAP_LINUX_IMMUTABLE)) {
mutex_unlock(&inode->i_mutex);
return -EPERM;
ret = -EPERM;
goto setflags_out;
}
}
......@@ -75,20 +84,26 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
ext2_set_inode_flags(inode);
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
return 0;
setflags_out:
mnt_drop_write(filp->f_path.mnt);
return ret;
}
case EXT2_IOC_GETVERSION:
return put_user(inode->i_generation, (int __user *) arg);
case EXT2_IOC_SETVERSION:
if (!is_owner_or_cap(inode))
return -EPERM;
if (IS_RDONLY(inode))
return -EROFS;
if (get_user(inode->i_generation, (int __user *) arg))
return -EFAULT;
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
return 0;
ret = mnt_want_write(filp->f_path.mnt);
if (ret)
return ret;
if (get_user(inode->i_generation, (int __user *) arg)) {
ret = -EFAULT;
} else {
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
}
mnt_drop_write(filp->f_path.mnt);
return ret;
case EXT2_IOC_GETRSVSZ:
if (test_opt(inode->i_sb, RESERVATION)
&& S_ISREG(inode->i_mode)
......@@ -102,15 +117,16 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
return -ENOTTY;
if (IS_RDONLY(inode))
return -EROFS;
if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
if (!is_owner_or_cap(inode))
return -EACCES;
if (get_user(rsv_window_size, (int __user *)arg))
return -EFAULT;
ret = mnt_want_write(filp->f_path.mnt);
if (ret)
return ret;
if (rsv_window_size > EXT2_MAX_RESERVE_BLOCKS)
rsv_window_size = EXT2_MAX_RESERVE_BLOCKS;
......@@ -131,6 +147,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
rsv->rsv_goal_size = rsv_window_size;
}
mutex_unlock(&ei->truncate_mutex);
mnt_drop_write(filp->f_path.mnt);
return 0;
}
default:
......
......@@ -12,6 +12,7 @@
#include <linux/capability.h>
#include <linux/ext3_fs.h>
#include <linux/ext3_jbd.h>
#include <linux/mount.h>
#include <linux/time.h>
#include <linux/compat.h>
#include <linux/smp_lock.h>
......@@ -38,14 +39,19 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
unsigned int oldflags;
unsigned int jflag;
if (IS_RDONLY(inode))
return -EROFS;
err = mnt_want_write(filp->f_path.mnt);
if (err)
return err;
if (!is_owner_or_cap(inode))
return -EACCES;
if (!is_owner_or_cap(inode)) {
err = -EACCES;
goto flags_out;
}
if (get_user(flags, (int __user *) arg))
return -EFAULT;
if (get_user(flags, (int __user *) arg)) {
err = -EFAULT;
goto flags_out;
}
if (!S_ISDIR(inode->i_mode))
flags &= ~EXT3_DIRSYNC_FL;
......@@ -54,7 +60,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
/* Is it quota file? Do not allow user to mess with it */
if (IS_NOQUOTA(inode)) {
mutex_unlock(&inode->i_mutex);
return -EPERM;
err = -EPERM;
goto flags_out;
}
oldflags = ei->i_flags;
......@@ -70,7 +77,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
if (!capable(CAP_LINUX_IMMUTABLE)) {
mutex_unlock(&inode->i_mutex);
return -EPERM;
err = -EPERM;
goto flags_out;
}
}
......@@ -81,7 +89,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
if (!capable(CAP_SYS_RESOURCE)) {
mutex_unlock(&inode->i_mutex);
return -EPERM;
err = -EPERM;
goto flags_out;
}
}
......@@ -89,7 +98,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
handle = ext3_journal_start(inode, 1);
if (IS_ERR(handle)) {
mutex_unlock(&inode->i_mutex);
return PTR_ERR(handle);
err = PTR_ERR(handle);
goto flags_out;
}
if (IS_SYNC(inode))
handle->h_sync = 1;
......@@ -115,6 +125,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL))
err = ext3_change_inode_journal_flag(inode, jflag);
mutex_unlock(&inode->i_mutex);
flags_out:
mnt_drop_write(filp->f_path.mnt);
return err;
}
case EXT3_IOC_GETVERSION:
......@@ -129,14 +141,18 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
if (!is_owner_or_cap(inode))
return -EPERM;
if (IS_RDONLY(inode))
return -EROFS;
if (get_user(generation, (int __user *) arg))
return -EFAULT;
err = mnt_want_write(filp->f_path.mnt);
if (err)
return err;
if (get_user(generation, (int __user *) arg)) {
err = -EFAULT;
goto setversion_out;
}
handle = ext3_journal_start(inode, 1);
if (IS_ERR(handle))
return PTR_ERR(handle);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
goto setversion_out;
}
err = ext3_reserve_inode_write(handle, inode, &iloc);
if (err == 0) {
inode->i_ctime = CURRENT_TIME_SEC;
......@@ -144,6 +160,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
err = ext3_mark_iloc_dirty(handle, inode, &iloc);
}
ext3_journal_stop(handle);
setversion_out:
mnt_drop_write(filp->f_path.mnt);
return err;
}
#ifdef CONFIG_JBD_DEBUG
......@@ -179,18 +197,24 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
}
return -ENOTTY;
case EXT3_IOC_SETRSVSZ: {
int err;
if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
return -ENOTTY;
if (IS_RDONLY(inode))
return -EROFS;
err = mnt_want_write(filp->f_path.mnt);
if (err)
return err;
if (!is_owner_or_cap(inode))
return -EACCES;
if (!is_owner_or_cap(inode)) {
err = -EACCES;
goto setrsvsz_out;
}
if (get_user(rsv_window_size, (int __user *)arg))
return -EFAULT;
if (get_user(rsv_window_size, (int __user *)arg)) {
err = -EFAULT;
goto setrsvsz_out;
}
if (rsv_window_size > EXT3_MAX_RESERVE_BLOCKS)
rsv_window_size = EXT3_MAX_RESERVE_BLOCKS;
......@@ -208,7 +232,9 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
rsv->rsv_goal_size = rsv_window_size;
}
mutex_unlock(&ei->truncate_mutex);
return 0;
setrsvsz_out:
mnt_drop_write(filp->f_path.mnt);
return err;
}
case EXT3_IOC_GROUP_EXTEND: {
ext3_fsblk_t n_blocks_count;
......@@ -218,17 +244,20 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
if (IS_RDONLY(inode))
return -EROFS;
if (get_user(n_blocks_count, (__u32 __user *)arg))
return -EFAULT;
err = mnt_want_write(filp->f_path.mnt);
if (err)
return err;
if (get_user(n_blocks_count, (__u32 __user *)arg)) {
err = -EFAULT;
goto group_extend_out;
}
err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count);
journal_lock_updates(EXT3_SB(sb)->s_journal);
journal_flush(EXT3_SB(sb)->s_journal);
journal_unlock_updates(EXT3_SB(sb)->s_journal);
group_extend_out:
mnt_drop_write(filp->f_path.mnt);
return err;
}
case EXT3_IOC_GROUP_ADD: {
......@@ -239,18 +268,22 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
if (IS_RDONLY(inode))
return -EROFS;
err = mnt_want_write(filp->f_path.mnt);
if (err)
return err;
if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg,
sizeof(input)))
return -EFAULT;
sizeof(input))) {
err = -EFAULT;
goto group_add_out;
}
err = ext3_group_add(sb, &input);
journal_lock_updates(EXT3_SB(sb)->s_journal);
journal_flush(EXT3_SB(sb)->s_journal);
journal_unlock_updates(EXT3_SB(sb)->s_journal);
group_add_out:
mnt_drop_write(filp->f_path.mnt);
return err;
}
......
......@@ -15,6 +15,7 @@
#include <linux/time.h>
#include <linux/compat.h>
#include <linux/smp_lock.h>
#include <linux/mount.h>
#include <asm/uaccess.h>
int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
......@@ -38,24 +39,25 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
unsigned int oldflags;
unsigned int jflag;
if (IS_RDONLY(inode))
return -EROFS;
if (!is_owner_or_cap(inode))
return -EACCES;
if (get_user(flags, (int __user *) arg))
return -EFAULT;
err = mnt_want_write(filp->f_path.mnt);
if (err)
return err;
if (!S_ISDIR(inode->i_mode))
flags &= ~EXT4_DIRSYNC_FL;
err = -EPERM;
mutex_lock(&inode->i_mutex);
/* Is it quota file? Do not allow user to mess with it */
if (IS_NOQUOTA(inode)) {
mutex_unlock(&inode->i_mutex);
return -EPERM;
}
if (IS_NOQUOTA(inode))
goto flags_out;
oldflags = ei->i_flags;
/* The JOURNAL_DATA flag is modifiable only by root */
......@@ -68,10 +70,8 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
* This test looks nicer. Thanks to Pauline Middelink
*/
if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
if (!capable(CAP_LINUX_IMMUTABLE)) {
mutex_unlock(&inode->i_mutex);
return -EPERM;
}
if (!capable(CAP_LINUX_IMMUTABLE))
goto flags_out;
}
/*
......@@ -79,17 +79,14 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
* the relevant capability.
*/
if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
if (!capable(CAP_SYS_RESOURCE)) {
mutex_unlock(&inode->i_mutex);
return -EPERM;
}
if (!capable(CAP_SYS_RESOURCE))
goto flags_out;
}
handle = ext4_journal_start(inode, 1);
if (IS_ERR(handle)) {
mutex_unlock(&inode->i_mutex);
return PTR_ERR(handle);
err = PTR_ERR(handle);
goto flags_out;
}
if (IS_SYNC(inode))
handle->h_sync = 1;
......@@ -107,14 +104,14 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
flags_err:
ext4_journal_stop(handle);
if (err) {
mutex_unlock(&inode->i_mutex);
return err;
}
if (err)
goto flags_out;
if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
err = ext4_change_inode_journal_flag(inode, jflag);
flags_out:
mutex_unlock(&inode->i_mutex);
mnt_drop_write(filp->f_path.mnt);
return err;
}
case EXT4_IOC_GETVERSION:
......@@ -129,14 +126,20 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
if (!is_owner_or_cap(inode))
return -EPERM;
if (IS_RDONLY(inode))
return -EROFS;
if (get_user(generation, (int __user *) arg))
return -EFAULT;
err = mnt_want_write(filp->f_path.mnt);
if (err)
return err;
if (get_user(generation, (int __user *) arg)) {
err = -EFAULT;
goto setversion_out;
}
handle = ext4_journal_start(inode, 1);
if (IS_ERR(handle))
return PTR_ERR(handle);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
goto setversion_out;
}
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (err == 0) {
inode->i_ctime = ext4_current_time(inode);
......@@ -144,6 +147,8 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
}
ext4_journal_stop(handle);
setversion_out:
mnt_drop_write(filp->f_path.mnt);
return err;
}
#ifdef CONFIG_JBD2_DEBUG
......@@ -179,19 +184,21 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
}
return -ENOTTY;
case EXT4_IOC_SETRSVSZ: {
int err;
if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
return -ENOTTY;
if (IS_RDONLY(inode))
return -EROFS;
if (!is_owner_or_cap(inode))
return -EACCES;
if (get_user(rsv_window_size, (int __user *)arg))
return -EFAULT;
err = mnt_want_write(filp->f_path.mnt);
if (err)
return err;
if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS)
rsv_window_size = EXT4_MAX_RESERVE_BLOCKS;
......@@ -208,6 +215,7 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
rsv->rsv_goal_size = rsv_window_size;
}
up_write(&ei->i_data_sem);
mnt_drop_write(filp->f_path.mnt);
return 0;
}
case EXT4_IOC_GROUP_EXTEND: {
......@@ -218,16 +226,18 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
if (IS_RDONLY(inode))
return -EROFS;
if (get_user(n_blocks_count, (__u32 __user *)arg))
return -EFAULT;
err = mnt_want_write(filp->f_path.mnt);
if (err)
return err;
err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
jbd2_journal_flush(EXT4_SB(sb)->s_journal);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
mnt_drop_write(filp->f_path.mnt);
return err;
}
......@@ -239,17 +249,19 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
if (IS_RDONLY(inode))
return -EROFS;
if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
sizeof(input)))
return -EFAULT;
err = mnt_want_write(filp->f_path.mnt);
if (err)
return err;
err = ext4_group_add(sb, &input);
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
jbd2_journal_flush(EXT4_SB(sb)->s_journal);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
mnt_drop_write(filp->f_path.mnt);
return err;
}
......
......@@ -8,6 +8,7 @@
#include <linux/capability.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/time.h>
#include <linux/msdos_fs.h>
#include <linux/smp_lock.h>
......@@ -46,10 +47,9 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
mutex_lock(&inode->i_mutex);
if (IS_RDONLY(inode)) {
err = -EROFS;
goto up;
}
err = mnt_want_write(filp->f_path.mnt);
if (err)
goto up_no_drop_write;
/*
* ATTR_VOLUME and ATTR_DIR cannot be changed; this also
......@@ -105,7 +105,9 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED;
mark_inode_dirty(inode);
up:
up:
mnt_drop_write(filp->f_path.mnt);
up_no_drop_write:
mutex_unlock(&inode->i_mutex);
return err;
}
......
......@@ -42,6 +42,7 @@ static inline void file_free_rcu(struct rcu_head *head)
static inline void file_free(struct file *f)
{
percpu_counter_dec(&nr_files);
file_check_state(f);
call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
}
......@@ -199,6 +200,18 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
file->f_mapping = dentry->d_inode->i_mapping;
file->f_mode = mode;
file->f_op = fop;
/*
* These mounts don't really matter in practice
* for r/o bind mounts. They aren't userspace-
* visible. We do this for consistency, and so
* that we can do debugging checks at __fput()
*/
if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
file_take_write(file);
error = mnt_want_write(mnt);
WARN_ON(error);
}
return error;
}
EXPORT_SYMBOL(init_file);
......@@ -211,6 +224,31 @@ void fput(struct file *file)
EXPORT_SYMBOL(fput);
/**
* drop_file_write_access - give up ability to write to a file
* @file: the file to which we will stop writing
*
* This is a central place which will give up the ability
* to write to @file, along with access to write through
* its vfsmount.
*/
void drop_file_write_access(struct file *file)
{
struct vfsmount *mnt = file->f_path.mnt;
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
put_write_access(inode);
if (special_file(inode->i_mode))
return;
if (file_check_writeable(file) != 0)
return;
mnt_drop_write(mnt);
file_release_write(file);
}
EXPORT_SYMBOL_GPL(drop_file_write_access);
/* __fput is called from task context when aio completion releases the last
* last use of a struct file *. Do not use otherwise.
*/
......@@ -236,10 +274,10 @@ void __fput(struct file *file)
if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL))
cdev_put(inode->i_cdev);
fops_put(file->f_op);
if (file->f_mode & FMODE_WRITE)
put_write_access(inode);
put_pid(file->f_owner.pid);
file_kill(file);
if (file->f_mode & FMODE_WRITE)
drop_file_write_access(file);
file->f_path.dentry = NULL;
file->f_path.mnt = NULL;
file_free(file);
......
......@@ -14,6 +14,7 @@
#include <linux/capability.h>
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/sched.h>
#include <linux/xattr.h>
#include <asm/uaccess.h>
......@@ -35,25 +36,32 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */
return put_user(flags, (int __user *)arg);
case HFSPLUS_IOC_EXT2_SETFLAGS: {
if (IS_RDONLY(inode))
return -EROFS;
if (!is_owner_or_cap(inode))
return -EACCES;
if (get_user(flags, (int __user *)arg))
return -EFAULT;
int err = 0;
err = mnt_want_write(filp->f_path.mnt);
if (err)
return err;
if (!is_owner_or_cap(inode)) {
err = -EACCES;
goto setflags_out;
}
if (get_user(flags, (int __user *)arg)) {
err = -EFAULT;
goto setflags_out;
}
if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) ||
HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) {
if (!capable(CAP_LINUX_IMMUTABLE))
return -EPERM;
if (!capable(CAP_LINUX_IMMUTABLE)) {
err = -EPERM;
goto setflags_out;
}
}
/* don't silently ignore unsupported ext2 flags */
if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL))
return -EOPNOTSUPP;
if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) {
err = -EOPNOTSUPP;
goto setflags_out;
}
if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */
inode->i_flags |= S_IMMUTABLE;
HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE;
......@@ -75,7 +83,9 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
return 0;
setflags_out:
mnt_drop_write(filp->f_path.mnt);
return err;
}
default:
return -ENOTTY;
......
......@@ -1199,42 +1199,37 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
struct inode *inode = dentry->d_inode;
struct timespec now;
if (inode->i_flags & S_NOATIME)
if (mnt_want_write(mnt))
return;
if (inode->i_flags & S_NOATIME)
goto out;
if (IS_NOATIME(inode))
return;
goto out;
if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
return;
goto out;
/*
* We may have a NULL vfsmount when coming from NFSD
*/
if (mnt) {
if (mnt->mnt_flags & MNT_NOATIME)
return;
if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
return;
if (mnt->mnt_flags & MNT_RELATIME) {
/*
* With relative atime, only update atime if the
* previous atime is earlier than either the ctime or
* mtime.
*/
if (timespec_compare(&inode->i_mtime,
&inode->i_atime) < 0 &&
timespec_compare(&inode->i_ctime,
&inode->i_atime) < 0)
return;
}
if (mnt->mnt_flags & MNT_NOATIME)
goto out;
if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
goto out;
if (mnt->mnt_flags & MNT_RELATIME) {
/*
* With relative atime, only update atime if the previous
* atime is earlier than either the ctime or mtime.
*/
if (timespec_compare(&inode->i_mtime, &inode->i_atime) < 0 &&
timespec_compare(&inode->i_ctime, &inode->i_atime) < 0)
goto out;
}
now = current_fs_time(inode->i_sb);
if (timespec_equal(&inode->i_atime, &now))
return;
goto out;
inode->i_atime = now;
mark_inode_dirty_sync(inode);
out:
mnt_drop_write(mnt);
}
EXPORT_SYMBOL(touch_atime);
......@@ -1255,10 +1250,13 @@ void file_update_time(struct file *file)
struct inode *inode = file->f_path.dentry->d_inode;
struct timespec now;
int sync_it = 0;
int err;
if (IS_NOCMTIME(inode))
return;
if (IS_RDONLY(inode))
err = mnt_want_write(file->f_path.mnt);
if (err)
return;
now = current_fs_time(inode->i_sb);
......@@ -1279,6 +1277,7 @@ void file_update_time(struct file *file)
if (sync_it)
mark_inode_dirty_sync(inode);
mnt_drop_write(file->f_path.mnt);
}
EXPORT_SYMBOL(file_update_time);
......
......@@ -8,6 +8,7 @@
#include <linux/fs.h>
#include <linux/ctype.h>
#include <linux/capability.h>
#include <linux/mount.h>
#include <linux/time.h>
#include <linux/sched.h>
#include <asm/current.h>
......@@ -65,23 +66,30 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return put_user(flags, (int __user *) arg);
case JFS_IOC_SETFLAGS: {
unsigned int oldflags;
int err;
if (IS_RDONLY(inode))
return -EROFS;
err = mnt_want_write(filp->f_path.mnt);
if (err)
return err;
if (!is_owner_or_cap(inode))
return -EACCES;
if (get_user(flags, (int __user *) arg))
return -EFAULT;
if (!is_owner_or_cap(inode)) {
err = -EACCES;
goto setflags_out;
}
if (get_user(flags, (int __user *) arg)) {
err = -EFAULT;
goto setflags_out;
}
flags = jfs_map_ext2(flags, 1);
if (!S_ISDIR(inode->i_mode))
flags &= ~JFS_DIRSYNC_FL;
/* Is it quota file? Do not allow user to mess with it */
if (IS_NOQUOTA(inode))
return -EPERM;
if (IS_NOQUOTA(inode)) {
err = -EPERM;
goto setflags_out;
}
/* Lock against other parallel changes of flags */
mutex_lock(&inode->i_mutex);
......@@ -98,7 +106,8 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
(JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
if (!capable(CAP_LINUX_IMMUTABLE)) {
mutex_unlock(&inode->i_mutex);
return -EPERM;
err = -EPERM;
goto setflags_out;
}
}
......@@ -110,7 +119,9 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
mutex_unlock(&inode->i_mutex);
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
return 0;
setflags_out:
mnt_drop_write(filp->f_path.mnt);
return err;
}
default:
return -ENOTTY;
......
......@@ -1623,8 +1623,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
return -EACCES;
flag &= ~O_TRUNC;
} else if (IS_RDONLY(inode) && (acc_mode & MAY_WRITE))
return -EROFS;
}
error = vfs_permission(nd, acc_mode);
if (error)
......@@ -1677,7 +1676,12 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
return 0;
}
static int open_namei_create(struct nameidata *nd, struct path *path,
/*
* Be careful about ever adding any more callers of this
* function. Its flags must be in the namei format, not
* what get passed to sys_open().
*/
static int __open_namei_create(struct nameidata *nd, struct path *path,
int flag, int mode)
{
int error;
......@@ -1696,26 +1700,56 @@ static int open_namei_create(struct nameidata *nd, struct path *path,
}
/*
* open_namei()
* Note that while the flag value (low two bits) for sys_open means:
* 00 - read-only
* 01 - write-only
* 10 - read-write
* 11 - special
* it is changed into
* 00 - no permissions needed
* 01 - read-permission
* 10 - write-permission
* 11 - read-write
* for the internal routines (ie open_namei()/follow_link() etc)
* This is more logical, and also allows the 00 "no perm needed"
* to be used for symlinks (where the permissions are checked
* later).
*
* namei for open - this is in fact almost the whole open-routine.
*
* Note that the low bits of "flag" aren't the same as in the open
* system call - they are 00 - no permissions needed
* 01 - read permission needed
* 10 - write permission needed
* 11 - read/write permissions needed
* which is a lot more logical, and also allows the "no perm" needed
* for symlinks (where the permissions are checked later).
* SMP-safe
*/
static inline int open_to_namei_flags(int flag)
{
if ((flag+1) & O_ACCMODE)
flag++;
return flag;
}
static int open_will_write_to_fs(int flag, struct inode *inode)
{
/*
* We'll never write to the fs underlying
* a device file.
*/
if (special_file(inode->i_mode))
return 0;
return (flag & O_TRUNC);
}
/*
* Note that the low bits of the passed in "open_flag"
* are not the same as in the local variable "flag". See
* open_to_namei_flags() for more details.
*/
int open_namei(int dfd, const char *pathname, int flag,
int mode, struct nameidata *nd)
struct file *do_filp_open(int dfd, const char *pathname,
int open_flag, int mode)
{
struct file *filp;
struct nameidata nd;
int acc_mode, error;
struct path path;
struct dentry *dir;
int count = 0;
int will_write;
int flag = open_to_namei_flags(open_flag);
acc_mode = ACC_MODE(flag);
......@@ -1733,18 +1767,19 @@ int open_namei(int dfd, const char *pathname, int flag,
*/
if (!(flag & O_CREAT)) {
error = path_lookup_open(dfd, pathname, lookup_flags(flag),
nd, flag);
&nd, flag);
if (error)
return error;
return ERR_PTR(error);
goto ok;
}
/*
* Create - we need to know the parent.
*/
error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode);
error = path_lookup_create(dfd, pathname, LOOKUP_PARENT,
&nd, flag, mode);
if (error)
return error;
return ERR_PTR(error);
/*
* We have the parent and last component. First of all, check
......@@ -1752,14 +1787,14 @@ int open_namei(int dfd, const char *pathname, int flag,
* will not do.
*/
error = -EISDIR;
if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len])
goto exit;
dir = nd->path.dentry;
nd->flags &= ~LOOKUP_PARENT;
dir = nd.path.dentry;
nd.flags &= ~LOOKUP_PARENT;
mutex_lock(&dir->d_inode->i_mutex);
path.dentry = lookup_hash(nd);
path.mnt = nd->path.mnt;
path.dentry = lookup_hash(&nd);
path.mnt = nd.path.mnt;
do_last:
error = PTR_ERR(path.dentry);
......@@ -1768,18 +1803,31 @@ int open_namei(int dfd, const char *pathname, int flag,
goto exit;
}
if (IS_ERR(nd->intent.open.file)) {
mutex_unlock(&dir->d_inode->i_mutex);
error = PTR_ERR(nd->intent.open.file);
goto exit_dput;
if (IS_ERR(nd.intent.open.file)) {
error = PTR_ERR(nd.intent.open.file);
goto exit_mutex_unlock;
}
/* Negative dentry, just create the file */
if (!path.dentry->d_inode) {
error = open_namei_create(nd, &path, flag, mode);
/*
* This write is needed to ensure that a
* ro->rw transition does not occur between
* the time when the file is created and when
* a permanent write count is taken through
* the 'struct file' in nameidata_to_filp().
*/
error = mnt_want_write(nd.path.mnt);
if (error)
goto exit_mutex_unlock;
error = __open_namei_create(&nd, &path, flag, mode);
if (error) {
mnt_drop_write(nd.path.mnt);
goto exit;
return 0;
}
filp = nameidata_to_filp(&nd, open_flag);
mnt_drop_write(nd.path.mnt);
return filp;
}
/*
......@@ -1804,23 +1852,52 @@ int open_namei(int dfd, const char *pathname, int flag,
if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link)
goto do_link;
path_to_nameidata(&path, nd);
path_to_nameidata(&path, &nd);
error = -EISDIR;
if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
goto exit;
ok:
error = may_open(nd, acc_mode, flag);
if (error)
/*
* Consider:
* 1. may_open() truncates a file
* 2. a rw->ro mount transition occurs
* 3. nameidata_to_filp() fails due to
* the ro mount.
* That would be inconsistent, and should
* be avoided. Taking this mnt write here
* ensures that (2) can not occur.
*/
will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode);
if (will_write) {
error = mnt_want_write(nd.path.mnt);
if (error)
goto exit;
}
error = may_open(&nd, acc_mode, flag);
if (error) {
if (will_write)
mnt_drop_write(nd.path.mnt);
goto exit;
return 0;
}
filp = nameidata_to_filp(&nd, open_flag);
/*
* It is now safe to drop the mnt write
* because the filp has had a write taken
* on its behalf.
*/
if (will_write)
mnt_drop_write(nd.path.mnt);
return filp;
exit_mutex_unlock:
mutex_unlock(&dir->d_inode->i_mutex);
exit_dput:
path_put_conditional(&path, nd);
path_put_conditional(&path, &nd);
exit:
if (!IS_ERR(nd->intent.open.file))
release_open_intent(nd);
path_put(&nd->path);
return error;
if (!IS_ERR(nd.intent.open.file))
release_open_intent(&nd);
path_put(&nd.path);
return ERR_PTR(error);
do_link:
error = -ELOOP;
......@@ -1836,42 +1913,59 @@ int open_namei(int dfd, const char *pathname, int flag,
* stored in nd->last.name and we will have to putname() it when we
* are done. Procfs-like symlinks just set LAST_BIND.
*/
nd->flags |= LOOKUP_PARENT;
error = security_inode_follow_link(path.dentry, nd);
nd.flags |= LOOKUP_PARENT;
error = security_inode_follow_link(path.dentry, &nd);
if (error)
goto exit_dput;
error = __do_follow_link(&path, nd);
error = __do_follow_link(&path, &nd);
if (error) {
/* Does someone understand code flow here? Or it is only
* me so stupid? Anathema to whoever designed this non-sense
* with "intent.open".
*/
release_open_intent(nd);
return error;
release_open_intent(&nd);
return ERR_PTR(error);
}
nd->flags &= ~LOOKUP_PARENT;
if (nd->last_type == LAST_BIND)
nd.flags &= ~LOOKUP_PARENT;
if (nd.last_type == LAST_BIND)
goto ok;
error = -EISDIR;
if (nd->last_type != LAST_NORM)
if (nd.last_type != LAST_NORM)
goto exit;
if (nd->last.name[nd->last.len]) {
__putname(nd->last.name);
if (nd.last.name[nd.last.len]) {
__putname(nd.last.name);
goto exit;
}
error = -ELOOP;
if (count++==32) {
__putname(nd->last.name);
__putname(nd.last.name);
goto exit;
}
dir = nd->path.dentry;
dir = nd.path.dentry;
mutex_lock(&dir->d_inode->i_mutex);
path.dentry = lookup_hash(nd);
path.mnt = nd->path.mnt;
__putname(nd->last.name);
path.dentry = lookup_hash(&nd);
path.mnt = nd.path.mnt;
__putname(nd.last.name);
goto do_last;
}
/**
* filp_open - open file and return file pointer
*
* @filename: path to open
* @flags: open flags as per the open(2) second argument
* @mode: mode for the new file if O_CREAT is set, else ignored
*
* This is the helper to open a file from kernelspace if you really
* have to. But in generally you should not do this, so please move
* along, nothing to see here..
*/
struct file *filp_open(const char *filename, int flags, int mode)
{
return do_filp_open(AT_FDCWD, filename, flags, mode);
}
EXPORT_SYMBOL(filp_open);
/**
* lookup_create - lookup a dentry, creating it if it doesn't exist
* @nd: nameidata info
......@@ -1945,6 +2039,23 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
return error;
}
static int may_mknod(mode_t mode)
{
switch (mode & S_IFMT) {
case S_IFREG:
case S_IFCHR:
case S_IFBLK:
case S_IFIFO:
case S_IFSOCK:
case 0: /* zero mode translates to S_IFREG */
return 0;
case S_IFDIR:
return -EPERM;
default:
return -EINVAL;
}
}
asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
unsigned dev)
{
......@@ -1963,12 +2074,19 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
if (error)
goto out;
dentry = lookup_create(&nd, 0);
error = PTR_ERR(dentry);
if (IS_ERR(dentry)) {
error = PTR_ERR(dentry);
goto out_unlock;
}
if (!IS_POSIXACL(nd.path.dentry->d_inode))
mode &= ~current->fs->umask;
if (!IS_ERR(dentry)) {
switch (mode & S_IFMT) {
error = may_mknod(mode);
if (error)
goto out_dput;
error = mnt_want_write(nd.path.mnt);
if (error)
goto out_dput;
switch (mode & S_IFMT) {
case 0: case S_IFREG:
error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd);
break;
......@@ -1979,14 +2097,11 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
case S_IFIFO: case S_IFSOCK:
error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0);
break;
case S_IFDIR:
error = -EPERM;
break;
default:
error = -EINVAL;
}
dput(dentry);
}
mnt_drop_write(nd.path.mnt);
out_dput:
dput(dentry);
out_unlock:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
path_put(&nd.path);
out:
......@@ -2044,7 +2159,12 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
if (!IS_POSIXACL(nd.path.dentry->d_inode))
mode &= ~current->fs->umask;
error = mnt_want_write(nd.path.mnt);
if (error)
goto out_dput;
error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
mnt_drop_write(nd.path.mnt);
out_dput:
dput(dentry);
out_unlock:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
......@@ -2151,7 +2271,12 @@ static long do_rmdir(int dfd, const char __user *pathname)
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto exit2;
error = mnt_want_write(nd.path.mnt);
if (error)
goto exit3;
error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
mnt_drop_write(nd.path.mnt);
exit3:
dput(dentry);
exit2:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
......@@ -2232,7 +2357,11 @@ static long do_unlinkat(int dfd, const char __user *pathname)
inode = dentry->d_inode;
if (inode)
atomic_inc(&inode->i_count);
error = mnt_want_write(nd.path.mnt);
if (error)
goto exit2;
error = vfs_unlink(nd.path.dentry->d_inode, dentry);
mnt_drop_write(nd.path.mnt);
exit2:
dput(dentry);
}
......@@ -2313,7 +2442,12 @@ asmlinkage long sys_symlinkat(const char __user *oldname,
if (IS_ERR(dentry))
goto out_unlock;
error = mnt_want_write(nd.path.mnt);
if (error)
goto out_dput;
error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO);
mnt_drop_write(nd.path.mnt);
out_dput:
dput(dentry);
out_unlock:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
......@@ -2408,7 +2542,12 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
goto out_unlock;
error = mnt_want_write(nd.path.mnt);
if (error)
goto out_dput;
error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry);
mnt_drop_write(nd.path.mnt);
out_dput:
dput(new_dentry);
out_unlock:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
......@@ -2634,8 +2773,12 @@ static int do_rename(int olddfd, const char *oldname,
if (new_dentry == trap)
goto exit5;
error = mnt_want_write(oldnd.path.mnt);
if (error)
goto exit5;
error = vfs_rename(old_dir->d_inode, old_dentry,
new_dir->d_inode, new_dentry);
mnt_drop_write(oldnd.path.mnt);
exit5:
dput(new_dentry);
exit4:
......
......@@ -17,6 +17,7 @@
#include <linux/quotaops.h>
#include <linux/acct.h>
#include <linux/capability.h>
#include <linux/cpumask.h>
#include <linux/module.h>
#include <linux/sysfs.h>
#include <linux/seq_file.h>
......@@ -55,6 +56,8 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
return tmp & (HASH_SIZE - 1);
}
#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
struct vfsmount *alloc_vfsmnt(const char *name)
{
struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
......@@ -68,6 +71,7 @@ struct vfsmount *alloc_vfsmnt(const char *name)
INIT_LIST_HEAD(&mnt->mnt_share);
INIT_LIST_HEAD(&mnt->mnt_slave_list);
INIT_LIST_HEAD(&mnt->mnt_slave);
atomic_set(&mnt->__mnt_writers, 0);
if (name) {
int size = strlen(name) + 1;
char *newname = kmalloc(size, GFP_KERNEL);
......@@ -80,6 +84,263 @@ struct vfsmount *alloc_vfsmnt(const char *name)
return mnt;
}
/*
* Most r/o checks on a fs are for operations that take
* discrete amounts of time, like a write() or unlink().
* We must keep track of when those operations start
* (for permission checks) and when they end, so that
* we can determine when writes are able to occur to
* a filesystem.
*/
/*
* __mnt_is_readonly: check whether a mount is read-only
* @mnt: the mount to check for its write status
*
* This shouldn't be used directly ouside of the VFS.
* It does not guarantee that the filesystem will stay
* r/w, just that it is right *now*. This can not and
* should not be used in place of IS_RDONLY(inode).
* mnt_want/drop_write() will _keep_ the filesystem
* r/w.
*/
int __mnt_is_readonly(struct vfsmount *mnt)
{
if (mnt->mnt_flags & MNT_READONLY)
return 1;
if (mnt->mnt_sb->s_flags & MS_RDONLY)
return 1;
return 0;
}
EXPORT_SYMBOL_GPL(__mnt_is_readonly);
struct mnt_writer {
/*
* If holding multiple instances of this lock, they
* must be ordered by cpu number.
*/
spinlock_t lock;
struct lock_class_key lock_class; /* compiles out with !lockdep */
unsigned long count;
struct vfsmount *mnt;
} ____cacheline_aligned_in_smp;
static DEFINE_PER_CPU(struct mnt_writer, mnt_writers);
static int __init init_mnt_writers(void)
{
int cpu;
for_each_possible_cpu(cpu) {
struct mnt_writer *writer = &per_cpu(mnt_writers, cpu);
spin_lock_init(&writer->lock);
lockdep_set_class(&writer->lock, &writer->lock_class);
writer->count = 0;
}
return 0;
}
fs_initcall(init_mnt_writers);
static void unlock_mnt_writers(void)
{
int cpu;
struct mnt_writer *cpu_writer;
for_each_possible_cpu(cpu) {
cpu_writer = &per_cpu(mnt_writers, cpu);
spin_unlock(&cpu_writer->lock);
}
}
static inline void __clear_mnt_count(struct mnt_writer *cpu_writer)
{
if (!cpu_writer->mnt)
return;
/*
* This is in case anyone ever leaves an invalid,
* old ->mnt and a count of 0.
*/
if (!cpu_writer->count)
return;
atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers);
cpu_writer->count = 0;
}
/*
* must hold cpu_writer->lock
*/
static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer,
struct vfsmount *mnt)
{
if (cpu_writer->mnt == mnt)
return;
__clear_mnt_count(cpu_writer);
cpu_writer->mnt = mnt;
}
/*
* Most r/o checks on a fs are for operations that take
* discrete amounts of time, like a write() or unlink().
* We must keep track of when those operations start
* (for permission checks) and when they end, so that
* we can determine when writes are able to occur to
* a filesystem.
*/
/**
* mnt_want_write - get write access to a mount
* @mnt: the mount on which to take a write
*
* This tells the low-level filesystem that a write is
* about to be performed to it, and makes sure that
* writes are allowed before returning success. When
* the write operation is finished, mnt_drop_write()
* must be called. This is effectively a refcount.
*/
int mnt_want_write(struct vfsmount *mnt)
{
int ret = 0;
struct mnt_writer *cpu_writer;
cpu_writer = &get_cpu_var(mnt_writers);
spin_lock(&cpu_writer->lock);
if (__mnt_is_readonly(mnt)) {
ret = -EROFS;
goto out;
}
use_cpu_writer_for_mount(cpu_writer, mnt);
cpu_writer->count++;
out:
spin_unlock(&cpu_writer->lock);
put_cpu_var(mnt_writers);
return ret;
}
EXPORT_SYMBOL_GPL(mnt_want_write);
static void lock_mnt_writers(void)
{
int cpu;
struct mnt_writer *cpu_writer;
for_each_possible_cpu(cpu) {
cpu_writer = &per_cpu(mnt_writers, cpu);
spin_lock(&cpu_writer->lock);
__clear_mnt_count(cpu_writer);
cpu_writer->mnt = NULL;
}
}
/*
* These per-cpu write counts are not guaranteed to have
* matched increments and decrements on any given cpu.
* A file open()ed for write on one cpu and close()d on
* another cpu will imbalance this count. Make sure it
* does not get too far out of whack.
*/
static void handle_write_count_underflow(struct vfsmount *mnt)
{
if (atomic_read(&mnt->__mnt_writers) >=
MNT_WRITER_UNDERFLOW_LIMIT)
return;
/*
* It isn't necessary to hold all of the locks
* at the same time, but doing it this way makes
* us share a lot more code.
*/
lock_mnt_writers();
/*
* vfsmount_lock is for mnt_flags.
*/
spin_lock(&vfsmount_lock);
/*
* If coalescing the per-cpu writer counts did not
* get us back to a positive writer count, we have
* a bug.
*/
if ((atomic_read(&mnt->__mnt_writers) < 0) &&
!(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) {
printk(KERN_DEBUG "leak detected on mount(%p) writers "
"count: %d\n",
mnt, atomic_read(&mnt->__mnt_writers));
WARN_ON(1);
/* use the flag to keep the dmesg spam down */
mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT;
}
spin_unlock(&vfsmount_lock);
unlock_mnt_writers();
}
/**
* mnt_drop_write - give up write access to a mount
* @mnt: the mount on which to give up write access
*
* Tells the low-level filesystem that we are done
* performing writes to it. Must be matched with
* mnt_want_write() call above.
*/
void mnt_drop_write(struct vfsmount *mnt)
{
int must_check_underflow = 0;
struct mnt_writer *cpu_writer;
cpu_writer = &get_cpu_var(mnt_writers);
spin_lock(&cpu_writer->lock);
use_cpu_writer_for_mount(cpu_writer, mnt);
if (cpu_writer->count > 0) {
cpu_writer->count--;
} else {
must_check_underflow = 1;
atomic_dec(&mnt->__mnt_writers);
}
spin_unlock(&cpu_writer->lock);
/*
* Logically, we could call this each time,
* but the __mnt_writers cacheline tends to
* be cold, and makes this expensive.
*/
if (must_check_underflow)
handle_write_count_underflow(mnt);
/*
* This could be done right after the spinlock
* is taken because the spinlock keeps us on
* the cpu, and disables preemption. However,
* putting it here bounds the amount that
* __mnt_writers can underflow. Without it,
* we could theoretically wrap __mnt_writers.
*/
put_cpu_var(mnt_writers);
}
EXPORT_SYMBOL_GPL(mnt_drop_write);
static int mnt_make_readonly(struct vfsmount *mnt)
{
int ret = 0;
lock_mnt_writers();
/*
* With all the locks held, this value is stable
*/
if (atomic_read(&mnt->__mnt_writers) > 0) {
ret = -EBUSY;
goto out;
}
/*
* nobody can do a successful mnt_want_write() with all
* of the counts in MNT_DENIED_WRITE and the locks held.
*/
spin_lock(&vfsmount_lock);
if (!ret)
mnt->mnt_flags |= MNT_READONLY;
spin_unlock(&vfsmount_lock);
out:
unlock_mnt_writers();
return ret;
}
static void __mnt_unmake_readonly(struct vfsmount *mnt)
{
spin_lock(&vfsmount_lock);
mnt->mnt_flags &= ~MNT_READONLY;
spin_unlock(&vfsmount_lock);
}
int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
{
mnt->mnt_sb = sb;
......@@ -271,7 +532,36 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
static inline void __mntput(struct vfsmount *mnt)
{
int cpu;
struct super_block *sb = mnt->mnt_sb;
/*
* We don't have to hold all of the locks at the
* same time here because we know that we're the
* last reference to mnt and that no new writers
* can come in.
*/
for_each_possible_cpu(cpu) {
struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
if (cpu_writer->mnt != mnt)
continue;
spin_lock(&cpu_writer->lock);
atomic_add(cpu_writer->count, &mnt->__mnt_writers);
cpu_writer->count = 0;
/*
* Might as well do this so that no one
* ever sees the pointer and expects
* it to be valid.
*/
cpu_writer->mnt = NULL;
spin_unlock(&cpu_writer->lock);
}
/*
* This probably indicates that somebody messed
* up a mnt_want/drop_write() pair. If this
* happens, the filesystem was probably unable
* to make r/w->r/o transitions.
*/
WARN_ON(atomic_read(&mnt->__mnt_writers));
dput(mnt->mnt_root);
free_vfsmnt(mnt);
deactivate_super(sb);
......@@ -417,7 +707,7 @@ static int show_vfsmnt(struct seq_file *m, void *v)
seq_putc(m, '.');
mangle(m, mnt->mnt_sb->s_subtype);
}
seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
if (mnt->mnt_sb->s_flags & fs_infop->flag)
seq_puts(m, fs_infop->str);
......@@ -1019,6 +1309,23 @@ static noinline int do_loopback(struct nameidata *nd, char *old_name,
return err;
}
static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
{
int error = 0;
int readonly_request = 0;
if (ms_flags & MS_RDONLY)
readonly_request = 1;
if (readonly_request == __mnt_is_readonly(mnt))
return 0;
if (readonly_request)
error = mnt_make_readonly(mnt);
else
__mnt_unmake_readonly(mnt);
return error;
}
/*
* change filesystem flags. dir should be a physical root of filesystem.
* If you've mounted a non-root directory somewhere and want to do remount
......@@ -1041,7 +1348,10 @@ static noinline int do_remount(struct nameidata *nd, int flags, int mnt_flags,
return -EINVAL;
down_write(&sb->s_umount);
err = do_remount_sb(sb, flags, data, 0);
if (flags & MS_BIND)
err = change_mount_flags(nd->path.mnt, flags);
else
err = do_remount_sb(sb, flags, data, 0);
if (!err)
nd->path.mnt->mnt_flags = mnt_flags;
up_write(&sb->s_umount);
......@@ -1425,6 +1735,8 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
mnt_flags |= MNT_NODIRATIME;
if (flags & MS_RELATIME)
mnt_flags |= MNT_RELATIME;
if (flags & MS_RDONLY)
mnt_flags |= MNT_READONLY;
flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT);
......
......@@ -14,6 +14,7 @@
#include <linux/ioctl.h>
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/mount.h>
#include <linux/highuid.h>
#include <linux/smp_lock.h>
#include <linux/vmalloc.h>
......@@ -261,7 +262,7 @@ ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
}
#endif /* CONFIG_NCPFS_NLS */
int ncp_ioctl(struct inode *inode, struct file *filp,
static int __ncp_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
{
struct ncp_server *server = NCP_SERVER(inode);
......@@ -822,6 +823,57 @@ int ncp_ioctl(struct inode *inode, struct file *filp,
return -EINVAL;
}
static int ncp_ioctl_need_write(unsigned int cmd)
{
switch (cmd) {
case NCP_IOC_GET_FS_INFO:
case NCP_IOC_GET_FS_INFO_V2:
case NCP_IOC_NCPREQUEST:
case NCP_IOC_SETDENTRYTTL:
case NCP_IOC_SIGN_INIT:
case NCP_IOC_LOCKUNLOCK:
case NCP_IOC_SET_SIGN_WANTED:
return 1;
case NCP_IOC_GETOBJECTNAME:
case NCP_IOC_SETOBJECTNAME:
case NCP_IOC_GETPRIVATEDATA:
case NCP_IOC_SETPRIVATEDATA:
case NCP_IOC_SETCHARSETS:
case NCP_IOC_GETCHARSETS:
case NCP_IOC_CONN_LOGGED_IN:
case NCP_IOC_GETDENTRYTTL:
case NCP_IOC_GETMOUNTUID2:
case NCP_IOC_SIGN_WANTED:
case NCP_IOC_GETROOT:
case NCP_IOC_SETROOT:
return 0;
default:
/* unkown IOCTL command, assume write */
return 1;
}
}
int ncp_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
{
int ret;
if (ncp_ioctl_need_write(cmd)) {
/*
* inside the ioctl(), any failures which
* are because of file_permission() are
* -EACCESS, so it seems consistent to keep
* that here.
*/
if (mnt_want_write(filp->f_path.mnt))
return -EACCES;
}
ret = __ncp_ioctl(inode, filp, cmd, arg);
if (ncp_ioctl_need_write(cmd))
mnt_drop_write(filp->f_path.mnt);
return ret;
}
#ifdef CONFIG_COMPAT
long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
......
......@@ -967,7 +967,8 @@ static int is_atomic_open(struct inode *dir, struct nameidata *nd)
if (nd->flags & LOOKUP_DIRECTORY)
return 0;
/* Are we trying to write to a read only partition? */
if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
if (__mnt_is_readonly(nd->path.mnt) &&
(nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
return 0;
return 1;
}
......
......@@ -658,14 +658,19 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status;
}
}
status = mnt_want_write(cstate->current_fh.fh_export->ex_path.mnt);
if (status)
return status;
status = nfs_ok;
if (setattr->sa_acl != NULL)
status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh,
setattr->sa_acl);
if (status)
return status;
goto out;
status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
0, (time_t)0);
out:
mnt_drop_write(cstate->current_fh.fh_export->ex_path.mnt);
return status;
}
......
......@@ -46,6 +46,7 @@
#include <linux/scatterlist.h>
#include <linux/crypto.h>
#include <linux/sched.h>
#include <linux/mount.h>
#define NFSDDBG_FACILITY NFSDDBG_PROC
......@@ -154,7 +155,11 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
goto out_put;
}
status = mnt_want_write(rec_dir.path.mnt);
if (status)
goto out_put;
status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, S_IRWXU);
mnt_drop_write(rec_dir.path.mnt);
out_put:
dput(dentry);
out_unlock:
......@@ -313,12 +318,17 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
if (!rec_dir_init || !clp->cl_firststate)
return;
status = mnt_want_write(rec_dir.path.mnt);
if (status)
goto out;
clp->cl_firststate = 0;
nfs4_save_user(&uid, &gid);
status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
nfs4_reset_user(uid, gid);
if (status == 0)
nfsd4_sync_rec_dir();
mnt_drop_write(rec_dir.path.mnt);
out:
if (status)
printk("NFSD: Failed to remove expired client state directory"
" %.*s\n", HEXDIR_LEN, clp->cl_recdir);
......@@ -347,13 +357,17 @@ nfsd4_recdir_purge_old(void) {
if (!rec_dir_init)
return;
status = mnt_want_write(rec_dir.path.mnt);
if (status)
goto out;
status = nfsd4_list_rec_dir(rec_dir.path.dentry, purge_old);
if (status == 0)
nfsd4_sync_rec_dir();
mnt_drop_write(rec_dir.path.mnt);
out:
if (status)
printk("nfsd4: failed to purge old clients from recovery"
" directory %s\n", rec_dir.path.dentry->d_name.name);
return;
}
static int
......
......@@ -41,6 +41,7 @@
#include <linux/sunrpc/svc.h>
#include <linux/nfsd/nfsd.h>
#include <linux/nfsd/cache.h>
#include <linux/file.h>
#include <linux/mount.h>
#include <linux/workqueue.h>
#include <linux/smp_lock.h>
......@@ -1239,7 +1240,7 @@ static inline void
nfs4_file_downgrade(struct file *filp, unsigned int share_access)
{
if (share_access & NFS4_SHARE_ACCESS_WRITE) {
put_write_access(filp->f_path.dentry->d_inode);
drop_file_write_access(filp);
filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE;
}
}
......
......@@ -1255,23 +1255,35 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = 0;
switch (type) {
case S_IFREG:
host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
if (host_err)
goto out_nfserr;
host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
break;
case S_IFDIR:
host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
if (host_err)
goto out_nfserr;
host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
break;
case S_IFCHR:
case S_IFBLK:
case S_IFIFO:
case S_IFSOCK:
host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
if (host_err)
goto out_nfserr;
host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
break;
default:
printk("nfsd: bad file type %o in nfsd_create\n", type);
host_err = -EINVAL;
goto out_nfserr;
}
if (host_err < 0)
if (host_err < 0) {
mnt_drop_write(fhp->fh_export->ex_path.mnt);
goto out_nfserr;
}
if (EX_ISSYNC(fhp->fh_export)) {
err = nfserrno(nfsd_sync_dir(dentry));
......@@ -1282,6 +1294,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
err2 = nfsd_create_setattr(rqstp, resfhp, iap);
if (err2)
err = err2;
mnt_drop_write(fhp->fh_export->ex_path.mnt);
/*
* Update the file handle to get the new inode info.
*/
......@@ -1359,6 +1372,9 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
v_atime = verifier[1]&0x7fffffff;
}
host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
if (host_err)
goto out_nfserr;
if (dchild->d_inode) {
err = 0;
......@@ -1390,12 +1406,15 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
case NFS3_CREATE_GUARDED:
err = nfserr_exist;
}
mnt_drop_write(fhp->fh_export->ex_path.mnt);
goto out;
}
host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
if (host_err < 0)
if (host_err < 0) {
mnt_drop_write(fhp->fh_export->ex_path.mnt);
goto out_nfserr;
}
if (created)
*created = 1;
......@@ -1420,6 +1439,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (err2)
err = err2;
mnt_drop_write(fhp->fh_export->ex_path.mnt);
/*
* Update the filehandle to get the new inode info.
*/
......@@ -1522,6 +1542,10 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (iap && (iap->ia_valid & ATTR_MODE))
mode = iap->ia_mode & S_IALLUGO;
host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
if (host_err)
goto out_nfserr;
if (unlikely(path[plen] != 0)) {
char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
if (path_alloced == NULL)
......@@ -1542,6 +1566,8 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = nfserrno(host_err);
fh_unlock(fhp);
mnt_drop_write(fhp->fh_export->ex_path.mnt);
cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
dput(dnew);
if (err==0) err = cerr;
......@@ -1592,6 +1618,11 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
dold = tfhp->fh_dentry;
dest = dold->d_inode;
host_err = mnt_want_write(tfhp->fh_export->ex_path.mnt);
if (host_err) {
err = nfserrno(host_err);
goto out_dput;
}
host_err = vfs_link(dold, dirp, dnew);
if (!host_err) {
if (EX_ISSYNC(ffhp->fh_export)) {
......@@ -1605,7 +1636,8 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
else
err = nfserrno(host_err);
}
mnt_drop_write(tfhp->fh_export->ex_path.mnt);
out_dput:
dput(dnew);
out_unlock:
fh_unlock(ffhp);
......@@ -1678,13 +1710,20 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
if (ndentry == trap)
goto out_dput_new;
#ifdef MSNFS
if ((ffhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
if (svc_msnfs(ffhp) &&
((atomic_read(&odentry->d_count) > 1)
|| (atomic_read(&ndentry->d_count) > 1))) {
host_err = -EPERM;
} else
#endif
goto out_dput_new;
}
host_err = -EXDEV;
if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
goto out_dput_new;
host_err = mnt_want_write(ffhp->fh_export->ex_path.mnt);
if (host_err)
goto out_dput_new;
host_err = vfs_rename(fdir, odentry, tdir, ndentry);
if (!host_err && EX_ISSYNC(tfhp->fh_export)) {
host_err = nfsd_sync_dir(tdentry);
......@@ -1692,6 +1731,8 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
host_err = nfsd_sync_dir(fdentry);
}
mnt_drop_write(ffhp->fh_export->ex_path.mnt);
out_dput_new:
dput(ndentry);
out_dput_old:
......@@ -1750,6 +1791,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
if (!type)
type = rdentry->d_inode->i_mode & S_IFMT;
host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
if (host_err)
goto out_nfserr;
if (type != S_IFDIR) { /* It's UNLINK */
#ifdef MSNFS
if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
......@@ -1765,10 +1810,12 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
dput(rdentry);
if (host_err)
goto out_nfserr;
goto out_drop;
if (EX_ISSYNC(fhp->fh_export))
host_err = nfsd_sync_dir(dentry);
out_drop:
mnt_drop_write(fhp->fh_export->ex_path.mnt);
out_nfserr:
err = nfserrno(host_err);
out:
......@@ -1865,7 +1912,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
inode->i_mode,
IS_IMMUTABLE(inode)? " immut" : "",
IS_APPEND(inode)? " append" : "",
IS_RDONLY(inode)? " ro" : "");
__mnt_is_readonly(exp->ex_path.mnt)? " ro" : "");
dprintk(" owner %d/%d user %d/%d\n",
inode->i_uid, inode->i_gid, current->fsuid, current->fsgid);
#endif
......@@ -1876,7 +1923,8 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
*/
if (!(acc & MAY_LOCAL_ACCESS))
if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) {
if (exp_rdonly(rqstp, exp) || IS_RDONLY(inode))
if (exp_rdonly(rqstp, exp) ||
__mnt_is_readonly(exp->ex_path.mnt))
return nfserr_rofs;
if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode))
return nfserr_perm;
......@@ -2039,6 +2087,9 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
} else
size = 0;
error = mnt_want_write(fhp->fh_export->ex_path.mnt);
if (error)
goto getout;
if (size)
error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0);
else {
......@@ -2050,6 +2101,7 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
error = 0;
}
}
mnt_drop_write(fhp->fh_export->ex_path.mnt);
getout:
kfree(value);
......
......@@ -60,10 +60,6 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
goto bail;
}
status = -EROFS;
if (IS_RDONLY(inode))
goto bail_unlock;
status = -EACCES;
if (!is_owner_or_cap(inode))
goto bail_unlock;
......@@ -134,8 +130,13 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (get_user(flags, (int __user *) arg))
return -EFAULT;
return ocfs2_set_inode_attr(inode, flags,
status = mnt_want_write(filp->f_path.mnt);
if (status)
return status;
status = ocfs2_set_inode_attr(inode, flags,
OCFS2_FL_MODIFIABLE);
mnt_drop_write(filp->f_path.mnt);
return status;
case OCFS2_IOC_RESVSP:
case OCFS2_IOC_RESVSP64:
case OCFS2_IOC_UNRESVSP:
......
......@@ -244,21 +244,21 @@ static long do_sys_truncate(const char __user * path, loff_t length)
if (!S_ISREG(inode->i_mode))
goto dput_and_out;
error = vfs_permission(&nd, MAY_WRITE);
error = mnt_want_write(nd.path.mnt);
if (error)
goto dput_and_out;
error = -EROFS;
if (IS_RDONLY(inode))
goto dput_and_out;
error = vfs_permission(&nd, MAY_WRITE);
if (error)
goto mnt_drop_write_and_out;
error = -EPERM;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
goto dput_and_out;
goto mnt_drop_write_and_out;
error = get_write_access(inode);
if (error)
goto dput_and_out;
goto mnt_drop_write_and_out;
/*
* Make sure that there are no leases. get_write_access() protects
......@@ -276,6 +276,8 @@ static long do_sys_truncate(const char __user * path, loff_t length)
put_write_and_out:
put_write_access(inode);
mnt_drop_write_and_out:
mnt_drop_write(nd.path.mnt);
dput_and_out:
path_put(&nd.path);
out:
......@@ -457,8 +459,17 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
if(res || !(mode & S_IWOTH) ||
special_file(nd.path.dentry->d_inode->i_mode))
goto out_path_release;
if(IS_RDONLY(nd.path.dentry->d_inode))
/*
* This is a rare case where using __mnt_is_readonly()
* is OK without a mnt_want/drop_write() pair. Since
* no actual write to the fs is performed here, we do
* not need to telegraph to that to anyone.
*
* By doing this, we accept that this access is
* inherently racy and know that the fs may change
* state before we even see this result.
*/
if (__mnt_is_readonly(nd.path.mnt))
res = -EROFS;
out_path_release:
......@@ -567,12 +578,12 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
audit_inode(NULL, dentry);
err = -EROFS;
if (IS_RDONLY(inode))
err = mnt_want_write(file->f_path.mnt);
if (err)
goto out_putf;
err = -EPERM;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
goto out_putf;
goto out_drop_write;
mutex_lock(&inode->i_mutex);
if (mode == (mode_t) -1)
mode = inode->i_mode;
......@@ -581,6 +592,8 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
err = notify_change(dentry, &newattrs);
mutex_unlock(&inode->i_mutex);
out_drop_write:
mnt_drop_write(file->f_path.mnt);
out_putf:
fput(file);
out:
......@@ -600,13 +613,13 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
goto out;
inode = nd.path.dentry->d_inode;
error = -EROFS;
if (IS_RDONLY(inode))
error = mnt_want_write(nd.path.mnt);
if (error)
goto dput_and_out;
error = -EPERM;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
goto dput_and_out;
goto out_drop_write;
mutex_lock(&inode->i_mutex);
if (mode == (mode_t) -1)
......@@ -616,6 +629,8 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
error = notify_change(nd.path.dentry, &newattrs);
mutex_unlock(&inode->i_mutex);
out_drop_write:
mnt_drop_write(nd.path.mnt);
dput_and_out:
path_put(&nd.path);
out:
......@@ -638,9 +653,6 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
printk(KERN_ERR "chown_common: NULL inode\n");
goto out;
}
error = -EROFS;
if (IS_RDONLY(inode))
goto out;
error = -EPERM;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
goto out;
......@@ -671,7 +683,12 @@ asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group)
error = user_path_walk(filename, &nd);
if (error)
goto out;
error = mnt_want_write(nd.path.mnt);
if (error)
goto out_release;
error = chown_common(nd.path.dentry, user, group);
mnt_drop_write(nd.path.mnt);
out_release:
path_put(&nd.path);
out:
return error;
......@@ -691,7 +708,12 @@ asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
error = __user_walk_fd(dfd, filename, follow, &nd);
if (error)
goto out;
error = mnt_want_write(nd.path.mnt);
if (error)
goto out_release;
error = chown_common(nd.path.dentry, user, group);
mnt_drop_write(nd.path.mnt);
out_release:
path_put(&nd.path);
out:
return error;
......@@ -705,7 +727,12 @@ asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group
error = user_path_walk_link(filename, &nd);
if (error)
goto out;
error = mnt_want_write(nd.path.mnt);
if (error)
goto out_release;
error = chown_common(nd.path.dentry, user, group);
mnt_drop_write(nd.path.mnt);
out_release:
path_put(&nd.path);
out:
return error;
......@@ -722,14 +749,48 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
if (!file)
goto out;
error = mnt_want_write(file->f_path.mnt);
if (error)
goto out_fput;
dentry = file->f_path.dentry;
audit_inode(NULL, dentry);
error = chown_common(dentry, user, group);
mnt_drop_write(file->f_path.mnt);
out_fput:
fput(file);
out:
return error;
}
/*
* You have to be very careful that these write
* counts get cleaned up in error cases and
* upon __fput(). This should probably never
* be called outside of __dentry_open().
*/
static inline int __get_file_write_access(struct inode *inode,
struct vfsmount *mnt)
{
int error;
error = get_write_access(inode);
if (error)
return error;
/*
* Do not take mount writer counts on
* special files since no writes to
* the mount itself will occur.
*/
if (!special_file(inode->i_mode)) {
/*
* Balanced in __fput()
*/
error = mnt_want_write(mnt);
if (error)
put_write_access(inode);
}
return error;
}
static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
int flags, struct file *f,
int (*open)(struct inode *, struct file *))
......@@ -742,9 +803,11 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
FMODE_PREAD | FMODE_PWRITE;
inode = dentry->d_inode;
if (f->f_mode & FMODE_WRITE) {
error = get_write_access(inode);
error = __get_file_write_access(inode, mnt);
if (error)
goto cleanup_file;
if (!special_file(inode->i_mode))
file_take_write(f);
}
f->f_mapping = inode->i_mapping;
......@@ -784,8 +847,19 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
cleanup_all:
fops_put(f->f_op);
if (f->f_mode & FMODE_WRITE)
if (f->f_mode & FMODE_WRITE) {
put_write_access(inode);
if (!special_file(inode->i_mode)) {
/*
* We don't consider this a real
* mnt_want/drop_write() pair
* because it all happenend right
* here, so just reset the state.
*/
file_reset_write(f);
mnt_drop_write(mnt);
}
}
file_kill(f);
f->f_path.dentry = NULL;
f->f_path.mnt = NULL;
......@@ -796,43 +870,6 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
return ERR_PTR(error);
}
/*
* Note that while the flag value (low two bits) for sys_open means:
* 00 - read-only
* 01 - write-only
* 10 - read-write
* 11 - special
* it is changed into
* 00 - no permissions needed
* 01 - read-permission
* 10 - write-permission
* 11 - read-write
* for the internal routines (ie open_namei()/follow_link() etc). 00 is
* used by symlinks.
*/
static struct file *do_filp_open(int dfd, const char *filename, int flags,
int mode)
{
int namei_flags, error;
struct nameidata nd;
namei_flags = flags;
if ((namei_flags+1) & O_ACCMODE)
namei_flags++;
error = open_namei(dfd, filename, namei_flags, mode, &nd);
if (!error)
return nameidata_to_filp(&nd, flags);
return ERR_PTR(error);
}
struct file *filp_open(const char *filename, int flags, int mode)
{
return do_filp_open(AT_FDCWD, filename, flags, mode);
}
EXPORT_SYMBOL(filp_open);
/**
* lookup_instantiate_filp - instantiates the open intent filp
* @nd: pointer to nameidata
......
......@@ -4,6 +4,7 @@
#include <linux/capability.h>
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/reiserfs_fs.h>
#include <linux/time.h>
#include <asm/uaccess.h>
......@@ -25,6 +26,7 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
unsigned long arg)
{
unsigned int flags;
int err = 0;
switch (cmd) {
case REISERFS_IOC_UNPACK:
......@@ -48,50 +50,67 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
if (!reiserfs_attrs(inode->i_sb))
return -ENOTTY;
if (IS_RDONLY(inode))
return -EROFS;
err = mnt_want_write(filp->f_path.mnt);
if (err)
return err;
if (!is_owner_or_cap(inode))
return -EPERM;
if (get_user(flags, (int __user *)arg))
return -EFAULT;
/* Is it quota file? Do not allow user to mess with it. */
if (IS_NOQUOTA(inode))
return -EPERM;
if (!is_owner_or_cap(inode)) {
err = -EPERM;
goto setflags_out;
}
if (get_user(flags, (int __user *)arg)) {
err = -EFAULT;
goto setflags_out;
}
/*
* Is it quota file? Do not allow user to mess with it
*/
if (IS_NOQUOTA(inode)) {
err = -EPERM;
goto setflags_out;
}
if (((flags ^ REISERFS_I(inode)->
i_attrs) & (REISERFS_IMMUTABLE_FL |
REISERFS_APPEND_FL))
&& !capable(CAP_LINUX_IMMUTABLE))
return -EPERM;
&& !capable(CAP_LINUX_IMMUTABLE)) {
err = -EPERM;
goto setflags_out;
}
if ((flags & REISERFS_NOTAIL_FL) &&
S_ISREG(inode->i_mode)) {
int result;
result = reiserfs_unpack(inode, filp);
if (result)
return result;
if (result) {
err = result;
goto setflags_out;
}
}
sd_attrs_to_i_attrs(flags, inode);
REISERFS_I(inode)->i_attrs = flags;
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
return 0;
setflags_out:
mnt_drop_write(filp->f_path.mnt);
return err;
}
case REISERFS_IOC_GETVERSION:
return put_user(inode->i_generation, (int __user *)arg);
case REISERFS_IOC_SETVERSION:
if (!is_owner_or_cap(inode))
return -EPERM;
if (IS_RDONLY(inode))
return -EROFS;
if (get_user(inode->i_generation, (int __user *)arg))
return -EFAULT;
err = mnt_want_write(filp->f_path.mnt);
if (err)
return err;
if (get_user(inode->i_generation, (int __user *)arg)) {
err = -EFAULT;
goto setversion_out;
}
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
return 0;
setversion_out:
mnt_drop_write(filp->f_path.mnt);
return err;
default:
return -ENOTTY;
}
......
......@@ -37,6 +37,7 @@
#include <linux/idr.h>
#include <linux/kobject.h>
#include <linux/mutex.h>
#include <linux/file.h>
#include <asm/uaccess.h>
......@@ -567,10 +568,29 @@ static void mark_files_ro(struct super_block *sb)
{
struct file *f;
retry:
file_list_lock();
list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
if (S_ISREG(f->f_path.dentry->d_inode->i_mode) && file_count(f))
f->f_mode &= ~FMODE_WRITE;
struct vfsmount *mnt;
if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
continue;
if (!file_count(f))
continue;
if (!(f->f_mode & FMODE_WRITE))
continue;
f->f_mode &= ~FMODE_WRITE;
if (file_check_writeable(f) != 0)
continue;
file_release_write(f);
mnt = mntget(f->f_path.mnt);
file_list_unlock();
/*
* This can sleep, so we can't hold
* the file_list_lock() spinlock.
*/
mnt_drop_write(mnt);
mntput(mnt);
goto retry;
}
file_list_unlock();
}
......
......@@ -2,6 +2,7 @@
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/linkage.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/sched.h>
#include <linux/stat.h>
......@@ -59,6 +60,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
struct inode *inode;
struct iattr newattrs;
struct file *f = NULL;
struct vfsmount *mnt;
error = -EINVAL;
if (times && (!nsec_valid(times[0].tv_nsec) ||
......@@ -79,18 +81,20 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
if (!f)
goto out;
dentry = f->f_path.dentry;
mnt = f->f_path.mnt;
} else {
error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd);
if (error)
goto out;
dentry = nd.path.dentry;
mnt = nd.path.mnt;
}
inode = dentry->d_inode;
error = -EROFS;
if (IS_RDONLY(inode))
error = mnt_want_write(mnt);
if (error)
goto dput_and_out;
/* Don't worry, the checks are done in inode_change_ok() */
......@@ -98,7 +102,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
if (times) {
error = -EPERM;
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
goto dput_and_out;
goto mnt_drop_write_and_out;
if (times[0].tv_nsec == UTIME_OMIT)
newattrs.ia_valid &= ~ATTR_ATIME;
......@@ -118,22 +122,24 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
} else {
error = -EACCES;
if (IS_IMMUTABLE(inode))
goto dput_and_out;
goto mnt_drop_write_and_out;
if (!is_owner_or_cap(inode)) {
if (f) {
if (!(f->f_mode & FMODE_WRITE))
goto dput_and_out;
goto mnt_drop_write_and_out;
} else {
error = vfs_permission(&nd, MAY_WRITE);
if (error)
goto dput_and_out;
goto mnt_drop_write_and_out;
}
}
}
mutex_lock(&inode->i_mutex);
error = notify_change(dentry, &newattrs);
mutex_unlock(&inode->i_mutex);
mnt_drop_write_and_out:
mnt_drop_write(mnt);
dput_and_out:
if (f)
fput(f);
......
......@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/xattr.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/security.h>
#include <linux/syscalls.h>
......@@ -32,8 +33,6 @@ xattr_permission(struct inode *inode, const char *name, int mask)
* filesystem or on an immutable / append-only inode.
*/
if (mask & MAY_WRITE) {
if (IS_RDONLY(inode))
return -EROFS;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
return -EPERM;
}
......@@ -262,7 +261,11 @@ sys_setxattr(char __user *path, char __user *name, void __user *value,
error = user_path_walk(path, &nd);
if (error)
return error;
error = setxattr(nd.path.dentry, name, value, size, flags);
error = mnt_want_write(nd.path.mnt);
if (!error) {
error = setxattr(nd.path.dentry, name, value, size, flags);
mnt_drop_write(nd.path.mnt);
}
path_put(&nd.path);
return error;
}
......@@ -277,7 +280,11 @@ sys_lsetxattr(char __user *path, char __user *name, void __user *value,
error = user_path_walk_link(path, &nd);
if (error)
return error;
error = setxattr(nd.path.dentry, name, value, size, flags);
error = mnt_want_write(nd.path.mnt);
if (!error) {
error = setxattr(nd.path.dentry, name, value, size, flags);
mnt_drop_write(nd.path.mnt);
}
path_put(&nd.path);
return error;
}
......@@ -295,7 +302,12 @@ sys_fsetxattr(int fd, char __user *name, void __user *value,
return error;
dentry = f->f_path.dentry;
audit_inode(NULL, dentry);
error = setxattr(dentry, name, value, size, flags);
error = mnt_want_write(f->f_path.mnt);
if (!error) {
error = setxattr(dentry, name, value, size, flags);
mnt_drop_write(f->f_path.mnt);
}
out_fput:
fput(f);
return error;
}
......@@ -482,7 +494,11 @@ sys_removexattr(char __user *path, char __user *name)
error = user_path_walk(path, &nd);
if (error)
return error;
error = removexattr(nd.path.dentry, name);
error = mnt_want_write(nd.path.mnt);
if (!error) {
error = removexattr(nd.path.dentry, name);
mnt_drop_write(nd.path.mnt);
}
path_put(&nd.path);
return error;
}
......@@ -496,7 +512,11 @@ sys_lremovexattr(char __user *path, char __user *name)
error = user_path_walk_link(path, &nd);
if (error)
return error;
error = removexattr(nd.path.dentry, name);
error = mnt_want_write(nd.path.mnt);
if (!error) {
error = removexattr(nd.path.dentry, name);
mnt_drop_write(nd.path.mnt);
}
path_put(&nd.path);
return error;
}
......@@ -513,7 +533,11 @@ sys_fremovexattr(int fd, char __user *name)
return error;
dentry = f->f_path.dentry;
audit_inode(NULL, dentry);
error = removexattr(dentry, name);
error = mnt_want_write(f->f_path.mnt);
if (!error) {
error = removexattr(dentry, name);
mnt_drop_write(f->f_path.mnt);
}
fput(f);
return error;
}
......
......@@ -535,8 +535,6 @@ xfs_attrmulti_attr_set(
char *kbuf;
int error = EFAULT;
if (IS_RDONLY(inode))
return -EROFS;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
return EPERM;
if (len > XATTR_SIZE_MAX)
......@@ -562,8 +560,6 @@ xfs_attrmulti_attr_remove(
char *name,
__uint32_t flags)
{
if (IS_RDONLY(inode))
return -EROFS;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
return EPERM;
return xfs_attr_remove(XFS_I(inode), name, flags);
......@@ -573,6 +569,7 @@ STATIC int
xfs_attrmulti_by_handle(
xfs_mount_t *mp,
void __user *arg,
struct file *parfilp,
struct inode *parinode)
{
int error;
......@@ -626,13 +623,21 @@ xfs_attrmulti_by_handle(
&ops[i].am_length, ops[i].am_flags);
break;
case ATTR_OP_SET:
ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
if (ops[i].am_error)
break;
ops[i].am_error = xfs_attrmulti_attr_set(inode,
attr_name, ops[i].am_attrvalue,
ops[i].am_length, ops[i].am_flags);
mnt_drop_write(parfilp->f_path.mnt);
break;
case ATTR_OP_REMOVE:
ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
if (ops[i].am_error)
break;
ops[i].am_error = xfs_attrmulti_attr_remove(inode,
attr_name, ops[i].am_flags);
mnt_drop_write(parfilp->f_path.mnt);
break;
default:
ops[i].am_error = EINVAL;
......@@ -1133,7 +1138,7 @@ xfs_ioctl(
return xfs_attrlist_by_handle(mp, arg, inode);
case XFS_IOC_ATTRMULTI_BY_HANDLE:
return xfs_attrmulti_by_handle(mp, arg, inode);
return xfs_attrmulti_by_handle(mp, arg, filp, inode);
case XFS_IOC_SWAPEXT: {
error = xfs_swapext((struct xfs_swapext __user *)arg);
......
......@@ -155,13 +155,6 @@ xfs_ichgtime_fast(
*/
ASSERT((flags & XFS_ICHGTIME_ACC) == 0);
/*
* We're not supposed to change timestamps in readonly-mounted
* filesystems. Throw it away if anyone asks us.
*/
if (unlikely(IS_RDONLY(inode)))
return;
if (flags & XFS_ICHGTIME_MOD) {
tvp = &inode->i_mtime;
ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec;
......
......@@ -51,6 +51,7 @@
#include "xfs_vnodeops.h"
#include <linux/capability.h>
#include <linux/mount.h>
#include <linux/writeback.h>
......@@ -670,10 +671,16 @@ xfs_write(
if (new_size > xip->i_size)
xip->i_new_size = new_size;
if (likely(!(ioflags & IO_INVIS))) {
/*
* We're not supposed to change timestamps in readonly-mounted
* filesystems. Throw it away if anyone asks us.
*/
if (likely(!(ioflags & IO_INVIS) &&
!mnt_want_write(file->f_path.mnt))) {
file_update_time(file);
xfs_ichgtime_fast(xip, inode,
XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
mnt_drop_write(file->f_path.mnt);
}
/*
......
......@@ -61,6 +61,7 @@ extern struct kmem_cache *filp_cachep;
extern void __fput(struct file *);
extern void fput(struct file *);
extern void drop_file_write_access(struct file *file);
struct file_operations;
struct vfsmount;
......
......@@ -776,6 +776,9 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
index < ra->start + ra->size);
}
#define FILE_MNT_WRITE_TAKEN 1
#define FILE_MNT_WRITE_RELEASED 2
struct file {
/*
* fu_list becomes invalid after file_free is called and queued via
......@@ -810,6 +813,9 @@ struct file {
spinlock_t f_ep_lock;
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping;
#ifdef CONFIG_DEBUG_WRITECOUNT
unsigned long f_mnt_write_state;
#endif
};
extern spinlock_t files_lock;
#define file_list_lock() spin_lock(&files_lock);
......@@ -818,6 +824,49 @@ extern spinlock_t files_lock;
#define get_file(x) atomic_inc(&(x)->f_count)
#define file_count(x) atomic_read(&(x)->f_count)
#ifdef CONFIG_DEBUG_WRITECOUNT
static inline void file_take_write(struct file *f)
{
WARN_ON(f->f_mnt_write_state != 0);
f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN;
}
static inline void file_release_write(struct file *f)
{
f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED;
}
static inline void file_reset_write(struct file *f)
{
f->f_mnt_write_state = 0;
}
static inline void file_check_state(struct file *f)
{
/*
* At this point, either both or neither of these bits
* should be set.
*/
WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN);
WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED);
}
static inline int file_check_writeable(struct file *f)
{
if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN)
return 0;
printk(KERN_WARNING "writeable file with no "
"mnt_want_write()\n");
WARN_ON(1);
return -EINVAL;
}
#else /* !CONFIG_DEBUG_WRITECOUNT */
static inline void file_take_write(struct file *filp) {}
static inline void file_release_write(struct file *filp) {}
static inline void file_reset_write(struct file *filp) {}
static inline void file_check_state(struct file *filp) {}
static inline int file_check_writeable(struct file *filp)
{
return 0;
}
#endif /* CONFIG_DEBUG_WRITECOUNT */
#define MAX_NON_LFS ((1UL<<31) - 1)
/* Page cache limit. The filesystems should put that into their s_maxbytes
......@@ -1735,7 +1784,8 @@ extern struct file *create_read_pipe(struct file *f);
extern struct file *create_write_pipe(void);
extern void free_write_pipe(struct file *);
extern int open_namei(int dfd, const char *, int, int, struct nameidata *);
extern struct file *do_filp_open(int dfd, const char *pathname,
int open_flag, int mode);
extern int may_open(struct nameidata *, int, int);
extern int kernel_read(struct file *, unsigned long, char *, unsigned long);
......
......@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/list.h>
#include <linux/nodemask.h>
#include <linux/spinlock.h>
#include <asm/atomic.h>
......@@ -28,8 +29,10 @@ struct mnt_namespace;
#define MNT_NOATIME 0x08
#define MNT_NODIRATIME 0x10
#define MNT_RELATIME 0x20
#define MNT_READONLY 0x40 /* does the user want this to be r/o? */
#define MNT_SHRINKABLE 0x100
#define MNT_IMBALANCED_WRITE_COUNT 0x200 /* just for debugging */
#define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */
#define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */
......@@ -62,6 +65,11 @@ struct vfsmount {
int mnt_expiry_mark; /* true if marked for expiry */
int mnt_pinned;
int mnt_ghosts;
/*
* This value is not stable unless all of the mnt_writers[] spinlocks
* are held, and all mnt_writer[]s on this mount have 0 as their ->count
*/
atomic_t __mnt_writers;
};
static inline struct vfsmount *mntget(struct vfsmount *mnt)
......@@ -71,9 +79,12 @@ static inline struct vfsmount *mntget(struct vfsmount *mnt)
return mnt;
}
extern int mnt_want_write(struct vfsmount *mnt);
extern void mnt_drop_write(struct vfsmount *mnt);
extern void mntput_no_expire(struct vfsmount *mnt);
extern void mnt_pin(struct vfsmount *mnt);
extern void mnt_unpin(struct vfsmount *mnt);
extern int __mnt_is_readonly(struct vfsmount *mnt);
static inline void mntput(struct vfsmount *mnt)
{
......
......@@ -598,6 +598,7 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry,
int oflag, mode_t mode, struct mq_attr __user *u_attr)
{
struct mq_attr attr;
struct file *result;
int ret;
if (u_attr) {
......@@ -612,13 +613,24 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry,
}
mode &= ~current->fs->umask;
ret = mnt_want_write(mqueue_mnt);
if (ret)
goto out;
ret = vfs_create(dir->d_inode, dentry, mode, NULL);
dentry->d_fsdata = NULL;
if (ret)
goto out;
return dentry_open(dentry, mqueue_mnt, oflag);
goto out_drop_write;
result = dentry_open(dentry, mqueue_mnt, oflag);
/*
* dentry_open() took a persistent mnt_want_write(),
* so we can now drop this one.
*/
mnt_drop_write(mqueue_mnt);
return result;
out_drop_write:
mnt_drop_write(mqueue_mnt);
out:
dput(dentry);
mntput(mqueue_mnt);
......@@ -742,8 +754,11 @@ asmlinkage long sys_mq_unlink(const char __user *u_name)
inode = dentry->d_inode;
if (inode)
atomic_inc(&inode->i_count);
err = mnt_want_write(mqueue_mnt);
if (err)
goto out_err;
err = vfs_unlink(dentry->d_parent->d_inode, dentry);
mnt_drop_write(mqueue_mnt);
out_err:
dput(dentry);
......
......@@ -427,6 +427,16 @@ config DEBUG_VM
If unsure, say N.
config DEBUG_WRITECOUNT
bool "Debug filesystem writers count"
depends on DEBUG_KERNEL
help
Enable this to catch wrong use of the writers count in struct
vfsmount. This will increase the size of each file struct by
32 bits.
If unsure, say N.
config DEBUG_LIST
bool "Debug linked list manipulation"
depends on DEBUG_KERNEL
......
......@@ -819,7 +819,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
*/
mode = S_IFSOCK |
(SOCK_INODE(sock)->i_mode & ~current->fs->umask);
err = mnt_want_write(nd.path.mnt);
if (err)
goto out_mknod_dput;
err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
mnt_drop_write(nd.path.mnt);
if (err)
goto out_mknod_dput;
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment