Commit 7b7fc708 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block

* 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block:
  [PATCH] Remove SUID when splicing into an inode
  [PATCH] Add lockless helpers for remove_suid()
  [PATCH] Introduce generic_file_splice_write_nolock()
  [PATCH] Take i_mutex in splice_from_pipe()
parents c7a3bd17 8c34e2d6
...@@ -1306,6 +1306,42 @@ void wake_up_inode(struct inode *inode) ...@@ -1306,6 +1306,42 @@ void wake_up_inode(struct inode *inode)
wake_up_bit(&inode->i_state, __I_LOCK); wake_up_bit(&inode->i_state, __I_LOCK);
} }
/*
* We rarely want to lock two inodes that do not have a parent/child
* relationship (such as directory, child inode) simultaneously. The
* vast majority of file systems should be able to get along fine
* without this. Do not use these functions except as a last resort.
*/
void inode_double_lock(struct inode *inode1, struct inode *inode2)
{
if (inode1 == NULL || inode2 == NULL || inode1 == inode2) {
if (inode1)
mutex_lock(&inode1->i_mutex);
else if (inode2)
mutex_lock(&inode2->i_mutex);
return;
}
if (inode1 < inode2) {
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
} else {
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
}
}
EXPORT_SYMBOL(inode_double_lock);
void inode_double_unlock(struct inode *inode1, struct inode *inode2)
{
if (inode1)
mutex_unlock(&inode1->i_mutex);
if (inode2 && inode2 != inode1)
mutex_unlock(&inode2->i_mutex);
}
EXPORT_SYMBOL(inode_double_unlock);
static __initdata unsigned long ihash_entries; static __initdata unsigned long ihash_entries;
static int __init set_ihash_entries(char *str) static int __init set_ihash_entries(char *str)
{ {
......
...@@ -707,9 +707,9 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, ...@@ -707,9 +707,9 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
* key here is the 'actor' worker passed in that actually moves the data * key here is the 'actor' worker passed in that actually moves the data
* to the wanted destination. See pipe_to_file/pipe_to_sendpage above. * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
*/ */
ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, static ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
loff_t *ppos, size_t len, unsigned int flags, struct file *out, loff_t *ppos, size_t len,
splice_actor *actor) unsigned int flags, splice_actor *actor)
{ {
int ret, do_wakeup, err; int ret, do_wakeup, err;
struct splice_desc sd; struct splice_desc sd;
...@@ -722,9 +722,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, ...@@ -722,9 +722,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
sd.file = out; sd.file = out;
sd.pos = *ppos; sd.pos = *ppos;
if (pipe->inode)
mutex_lock(&pipe->inode->i_mutex);
for (;;) { for (;;) {
if (pipe->nrbufs) { if (pipe->nrbufs) {
struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
...@@ -797,9 +794,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, ...@@ -797,9 +794,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
pipe_wait(pipe); pipe_wait(pipe);
} }
if (pipe->inode)
mutex_unlock(&pipe->inode->i_mutex);
if (do_wakeup) { if (do_wakeup) {
smp_mb(); smp_mb();
if (waitqueue_active(&pipe->wait)) if (waitqueue_active(&pipe->wait))
...@@ -810,6 +804,73 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, ...@@ -810,6 +804,73 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
return ret; return ret;
} }
ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags,
splice_actor *actor)
{
ssize_t ret;
struct inode *inode = out->f_mapping->host;
/*
* The actor worker might be calling ->prepare_write and
* ->commit_write. Most of the time, these expect i_mutex to
* be held. Since this may result in an ABBA deadlock with
* pipe->inode, we have to order lock acquiry here.
*/
inode_double_lock(inode, pipe->inode);
ret = __splice_from_pipe(pipe, out, ppos, len, flags, actor);
inode_double_unlock(inode, pipe->inode);
return ret;
}
/**
* generic_file_splice_write_nolock - generic_file_splice_write without mutexes
* @pipe: pipe info
* @out: file to write to
* @len: number of bytes to splice
* @flags: splice modifier flags
*
* Will either move or copy pages (determined by @flags options) from
* the given pipe inode to the given file. The caller is responsible
* for acquiring i_mutex on both inodes.
*
*/
ssize_t
generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags)
{
struct address_space *mapping = out->f_mapping;
struct inode *inode = mapping->host;
ssize_t ret;
int err;
err = remove_suid(out->f_dentry);
if (unlikely(err))
return err;
ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
if (ret > 0) {
*ppos += ret;
/*
* If file or inode is SYNC and we actually wrote some data,
* sync it.
*/
if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
err = generic_osync_inode(inode, mapping,
OSYNC_METADATA|OSYNC_DATA);
if (err)
ret = err;
}
}
return ret;
}
EXPORT_SYMBOL(generic_file_splice_write_nolock);
/** /**
* generic_file_splice_write - splice data from a pipe to a file * generic_file_splice_write - splice data from a pipe to a file
* @pipe: pipe info * @pipe: pipe info
...@@ -826,12 +887,21 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, ...@@ -826,12 +887,21 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags) loff_t *ppos, size_t len, unsigned int flags)
{ {
struct address_space *mapping = out->f_mapping; struct address_space *mapping = out->f_mapping;
struct inode *inode = mapping->host;
ssize_t ret; ssize_t ret;
int err;
err = should_remove_suid(out->f_dentry);
if (unlikely(err)) {
mutex_lock(&inode->i_mutex);
err = __remove_suid(out->f_dentry, err);
mutex_unlock(&inode->i_mutex);
if (err)
return err;
}
ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
if (ret > 0) { if (ret > 0) {
struct inode *inode = mapping->host;
*ppos += ret; *ppos += ret;
/* /*
...@@ -839,8 +909,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, ...@@ -839,8 +909,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
* sync it. * sync it.
*/ */
if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
int err;
mutex_lock(&inode->i_mutex); mutex_lock(&inode->i_mutex);
err = generic_osync_inode(inode, mapping, err = generic_osync_inode(inode, mapping,
OSYNC_METADATA|OSYNC_DATA); OSYNC_METADATA|OSYNC_DATA);
...@@ -1400,13 +1468,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, ...@@ -1400,13 +1468,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
* grabbing by inode address. Otherwise two different processes * grabbing by inode address. Otherwise two different processes
* could deadlock (one doing tee from A -> B, the other from B -> A). * could deadlock (one doing tee from A -> B, the other from B -> A).
*/ */
if (ipipe->inode < opipe->inode) { inode_double_lock(ipipe->inode, opipe->inode);
mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD);
} else {
mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD);
}
do { do {
if (!opipe->readers) { if (!opipe->readers) {
...@@ -1450,8 +1512,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, ...@@ -1450,8 +1512,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
i++; i++;
} while (len); } while (len);
mutex_unlock(&ipipe->inode->i_mutex); inode_double_unlock(ipipe->inode, opipe->inode);
mutex_unlock(&opipe->inode->i_mutex);
/* /*
* If we put data in the output pipe, wakeup any potential readers. * If we put data in the output pipe, wakeup any potential readers.
......
...@@ -623,6 +623,9 @@ enum inode_i_mutex_lock_class ...@@ -623,6 +623,9 @@ enum inode_i_mutex_lock_class
I_MUTEX_QUOTA I_MUTEX_QUOTA
}; };
extern void inode_double_lock(struct inode *inode1, struct inode *inode2);
extern void inode_double_unlock(struct inode *inode1, struct inode *inode2);
/* /*
* NOTE: in a 32bit arch with a preemptable kernel and * NOTE: in a 32bit arch with a preemptable kernel and
* an UP compile the i_size_read/write must be atomic * an UP compile the i_size_read/write must be atomic
...@@ -1709,6 +1712,8 @@ extern void __iget(struct inode * inode); ...@@ -1709,6 +1712,8 @@ extern void __iget(struct inode * inode);
extern void clear_inode(struct inode *); extern void clear_inode(struct inode *);
extern void destroy_inode(struct inode *); extern void destroy_inode(struct inode *);
extern struct inode *new_inode(struct super_block *); extern struct inode *new_inode(struct super_block *);
extern int __remove_suid(struct dentry *, int);
extern int should_remove_suid(struct dentry *);
extern int remove_suid(struct dentry *); extern int remove_suid(struct dentry *);
extern void remove_dquot_ref(struct super_block *, int, struct list_head *); extern void remove_dquot_ref(struct super_block *, int, struct list_head *);
...@@ -1755,6 +1760,8 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *, ...@@ -1755,6 +1760,8 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *,
struct pipe_inode_info *, size_t, unsigned int); struct pipe_inode_info *, size_t, unsigned int);
extern ssize_t generic_file_splice_write(struct pipe_inode_info *, extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
struct file *, loff_t *, size_t, unsigned int); struct file *, loff_t *, size_t, unsigned int);
extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *,
struct file *, loff_t *, size_t, unsigned int);
extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
struct file *out, loff_t *, size_t len, unsigned int flags); struct file *out, loff_t *, size_t len, unsigned int flags);
extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
......
...@@ -1884,11 +1884,10 @@ __grab_cache_page(struct address_space *mapping, unsigned long index, ...@@ -1884,11 +1884,10 @@ __grab_cache_page(struct address_space *mapping, unsigned long index,
* if suid or (sgid and xgrp) * if suid or (sgid and xgrp)
* remove privs * remove privs
*/ */
int remove_suid(struct dentry *dentry) int should_remove_suid(struct dentry *dentry)
{ {
mode_t mode = dentry->d_inode->i_mode; mode_t mode = dentry->d_inode->i_mode;
int kill = 0; int kill = 0;
int result = 0;
/* suid always must be killed */ /* suid always must be killed */
if (unlikely(mode & S_ISUID)) if (unlikely(mode & S_ISUID))
...@@ -1901,13 +1900,28 @@ int remove_suid(struct dentry *dentry) ...@@ -1901,13 +1900,28 @@ int remove_suid(struct dentry *dentry)
if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
kill |= ATTR_KILL_SGID; kill |= ATTR_KILL_SGID;
if (unlikely(kill && !capable(CAP_FSETID))) { if (unlikely(kill && !capable(CAP_FSETID)))
return kill;
return 0;
}
int __remove_suid(struct dentry *dentry, int kill)
{
struct iattr newattrs; struct iattr newattrs;
newattrs.ia_valid = ATTR_FORCE | kill; newattrs.ia_valid = ATTR_FORCE | kill;
result = notify_change(dentry, &newattrs); return notify_change(dentry, &newattrs);
} }
return result;
int remove_suid(struct dentry *dentry)
{
int kill = should_remove_suid(dentry);
if (unlikely(kill))
return __remove_suid(dentry, kill);
return 0;
} }
EXPORT_SYMBOL(remove_suid); EXPORT_SYMBOL(remove_suid);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment