Commit 88dd9c16 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block

* 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block:
  [PATCH] vfs: add splice_write and splice_read to documentation
  [PATCH] Remove sys_ prefix of new syscalls from __NR_sys_*
  [PATCH] splice: warning fix
  [PATCH] another round of fs/pipe.c cleanups
  [PATCH] splice: comment styles
  [PATCH] splice: add Ingo as addition copyright holder
  [PATCH] splice: unlikely() optimizations
  [PATCH] splice: speedups and optimizations
  [PATCH] pipe.c/fifo.c code cleanups
  [PATCH] get rid of the PIPE_*() macros
  [PATCH] splice: speedup __generic_file_splice_read
  [PATCH] splice: add direct fd <-> fd splicing support
  [PATCH] splice: add optional input and output offsets
  [PATCH] introduce a "kernel-internal pipe object" abstraction
  [PATCH] splice: be smarter about calling do_page_cache_readahead()
  [PATCH] splice: optimize the splice buffer mapping
  [PATCH] splice: cleanup __generic_file_splice_read()
  [PATCH] splice: only call wake_up_interruptible() when we really have to
  [PATCH] splice: potential !page dereference
  [PATCH] splice: mark the io page as accessed
parents 6dde4325 d1195c51
...@@ -694,7 +694,7 @@ struct file_operations ...@@ -694,7 +694,7 @@ struct file_operations
---------------------- ----------------------
This describes how the VFS can manipulate an open file. As of kernel This describes how the VFS can manipulate an open file. As of kernel
2.6.13, the following members are defined: 2.6.17, the following members are defined:
struct file_operations { struct file_operations {
loff_t (*llseek) (struct file *, loff_t, int); loff_t (*llseek) (struct file *, loff_t, int);
...@@ -723,6 +723,10 @@ struct file_operations { ...@@ -723,6 +723,10 @@ struct file_operations {
int (*check_flags)(int); int (*check_flags)(int);
int (*dir_notify)(struct file *filp, unsigned long arg); int (*dir_notify)(struct file *filp, unsigned long arg);
int (*flock) (struct file *, int, struct file_lock *); int (*flock) (struct file *, int, struct file_lock *);
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned
int);
ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned
int);
}; };
Again, all methods are called without any locks being held, unless Again, all methods are called without any locks being held, unless
...@@ -790,6 +794,12 @@ otherwise noted. ...@@ -790,6 +794,12 @@ otherwise noted.
flock: called by the flock(2) system call flock: called by the flock(2) system call
splice_write: called by the VFS to splice data from a pipe to a file. This
method is used by the splice(2) system call
splice_read: called by the VFS to splice data from file to a pipe. This
method is used by the splice(2) system call
Note that the file operations are implemented by the specific Note that the file operations are implemented by the specific
filesystem in which the inode resides. When opening a device node filesystem in which the inode resides. When opening a device node
(character or block special) most filesystems will call special (character or block special) most filesystems will call special
......
...@@ -15,30 +15,35 @@ ...@@ -15,30 +15,35 @@
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/pipe_fs_i.h> #include <linux/pipe_fs_i.h>
static void wait_for_partner(struct inode* inode, unsigned int* cnt) static void wait_for_partner(struct inode* inode, unsigned int *cnt)
{ {
int cur = *cnt; int cur = *cnt;
while(cur == *cnt) {
pipe_wait(inode); while (cur == *cnt) {
if(signal_pending(current)) pipe_wait(inode->i_pipe);
if (signal_pending(current))
break; break;
} }
} }
static void wake_up_partner(struct inode* inode) static void wake_up_partner(struct inode* inode)
{ {
wake_up_interruptible(PIPE_WAIT(*inode)); wake_up_interruptible(&inode->i_pipe->wait);
} }
static int fifo_open(struct inode *inode, struct file *filp) static int fifo_open(struct inode *inode, struct file *filp)
{ {
struct pipe_inode_info *pipe;
int ret; int ret;
mutex_lock(PIPE_MUTEX(*inode)); mutex_lock(&inode->i_mutex);
if (!inode->i_pipe) { pipe = inode->i_pipe;
if (!pipe) {
ret = -ENOMEM; ret = -ENOMEM;
if(!pipe_new(inode)) pipe = alloc_pipe_info(inode);
if (!pipe)
goto err_nocleanup; goto err_nocleanup;
inode->i_pipe = pipe;
} }
filp->f_version = 0; filp->f_version = 0;
...@@ -53,18 +58,18 @@ static int fifo_open(struct inode *inode, struct file *filp) ...@@ -53,18 +58,18 @@ static int fifo_open(struct inode *inode, struct file *filp)
* opened, even when there is no process writing the FIFO. * opened, even when there is no process writing the FIFO.
*/ */
filp->f_op = &read_fifo_fops; filp->f_op = &read_fifo_fops;
PIPE_RCOUNTER(*inode)++; pipe->r_counter++;
if (PIPE_READERS(*inode)++ == 0) if (pipe->readers++ == 0)
wake_up_partner(inode); wake_up_partner(inode);
if (!PIPE_WRITERS(*inode)) { if (!pipe->writers) {
if ((filp->f_flags & O_NONBLOCK)) { if ((filp->f_flags & O_NONBLOCK)) {
/* suppress POLLHUP until we have /* suppress POLLHUP until we have
* seen a writer */ * seen a writer */
filp->f_version = PIPE_WCOUNTER(*inode); filp->f_version = pipe->w_counter;
} else } else
{ {
wait_for_partner(inode, &PIPE_WCOUNTER(*inode)); wait_for_partner(inode, &pipe->w_counter);
if(signal_pending(current)) if(signal_pending(current))
goto err_rd; goto err_rd;
} }
...@@ -78,16 +83,16 @@ static int fifo_open(struct inode *inode, struct file *filp) ...@@ -78,16 +83,16 @@ static int fifo_open(struct inode *inode, struct file *filp)
* errno=ENXIO when there is no process reading the FIFO. * errno=ENXIO when there is no process reading the FIFO.
*/ */
ret = -ENXIO; ret = -ENXIO;
if ((filp->f_flags & O_NONBLOCK) && !PIPE_READERS(*inode)) if ((filp->f_flags & O_NONBLOCK) && !pipe->readers)
goto err; goto err;
filp->f_op = &write_fifo_fops; filp->f_op = &write_fifo_fops;
PIPE_WCOUNTER(*inode)++; pipe->w_counter++;
if (!PIPE_WRITERS(*inode)++) if (!pipe->writers++)
wake_up_partner(inode); wake_up_partner(inode);
if (!PIPE_READERS(*inode)) { if (!pipe->readers) {
wait_for_partner(inode, &PIPE_RCOUNTER(*inode)); wait_for_partner(inode, &pipe->r_counter);
if (signal_pending(current)) if (signal_pending(current))
goto err_wr; goto err_wr;
} }
...@@ -102,11 +107,11 @@ static int fifo_open(struct inode *inode, struct file *filp) ...@@ -102,11 +107,11 @@ static int fifo_open(struct inode *inode, struct file *filp)
*/ */
filp->f_op = &rdwr_fifo_fops; filp->f_op = &rdwr_fifo_fops;
PIPE_READERS(*inode)++; pipe->readers++;
PIPE_WRITERS(*inode)++; pipe->writers++;
PIPE_RCOUNTER(*inode)++; pipe->r_counter++;
PIPE_WCOUNTER(*inode)++; pipe->w_counter++;
if (PIPE_READERS(*inode) == 1 || PIPE_WRITERS(*inode) == 1) if (pipe->readers == 1 || pipe->writers == 1)
wake_up_partner(inode); wake_up_partner(inode);
break; break;
...@@ -116,27 +121,27 @@ static int fifo_open(struct inode *inode, struct file *filp) ...@@ -116,27 +121,27 @@ static int fifo_open(struct inode *inode, struct file *filp)
} }
/* Ok! */ /* Ok! */
mutex_unlock(PIPE_MUTEX(*inode)); mutex_unlock(&inode->i_mutex);
return 0; return 0;
err_rd: err_rd:
if (!--PIPE_READERS(*inode)) if (!--pipe->readers)
wake_up_interruptible(PIPE_WAIT(*inode)); wake_up_interruptible(&pipe->wait);
ret = -ERESTARTSYS; ret = -ERESTARTSYS;
goto err; goto err;
err_wr: err_wr:
if (!--PIPE_WRITERS(*inode)) if (!--pipe->writers)
wake_up_interruptible(PIPE_WAIT(*inode)); wake_up_interruptible(&pipe->wait);
ret = -ERESTARTSYS; ret = -ERESTARTSYS;
goto err; goto err;
err: err:
if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) if (!pipe->readers && !pipe->writers)
free_pipe_info(inode); free_pipe_info(inode);
err_nocleanup: err_nocleanup:
mutex_unlock(PIPE_MUTEX(*inode)); mutex_unlock(&inode->i_mutex);
return ret; return ret;
} }
......
This diff is collapsed.
...@@ -202,7 +202,7 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count ...@@ -202,7 +202,7 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
goto Einval; goto Einval;
inode = file->f_dentry->d_inode; inode = file->f_dentry->d_inode;
if (inode->i_flock && MANDATORY_LOCK(inode)) { if (unlikely(inode->i_flock && MANDATORY_LOCK(inode))) {
int retval = locks_mandatory_area( int retval = locks_mandatory_area(
read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
inode, file, pos, count); inode, file, pos, count);
......
This diff is collapsed.
...@@ -252,7 +252,7 @@ xfs_file_sendfile_invis( ...@@ -252,7 +252,7 @@ xfs_file_sendfile_invis(
STATIC ssize_t STATIC ssize_t
xfs_file_splice_read( xfs_file_splice_read(
struct file *infilp, struct file *infilp,
struct inode *pipe, struct pipe_inode_info *pipe,
size_t len, size_t len,
unsigned int flags) unsigned int flags)
{ {
...@@ -266,7 +266,7 @@ xfs_file_splice_read( ...@@ -266,7 +266,7 @@ xfs_file_splice_read(
STATIC ssize_t STATIC ssize_t
xfs_file_splice_read_invis( xfs_file_splice_read_invis(
struct file *infilp, struct file *infilp,
struct inode *pipe, struct pipe_inode_info *pipe,
size_t len, size_t len,
unsigned int flags) unsigned int flags)
{ {
...@@ -279,7 +279,7 @@ xfs_file_splice_read_invis( ...@@ -279,7 +279,7 @@ xfs_file_splice_read_invis(
STATIC ssize_t STATIC ssize_t
xfs_file_splice_write( xfs_file_splice_write(
struct inode *pipe, struct pipe_inode_info *pipe,
struct file *outfilp, struct file *outfilp,
size_t len, size_t len,
unsigned int flags) unsigned int flags)
...@@ -293,7 +293,7 @@ xfs_file_splice_write( ...@@ -293,7 +293,7 @@ xfs_file_splice_write(
STATIC ssize_t STATIC ssize_t
xfs_file_splice_write_invis( xfs_file_splice_write_invis(
struct inode *pipe, struct pipe_inode_info *pipe,
struct file *outfilp, struct file *outfilp,
size_t len, size_t len,
unsigned int flags) unsigned int flags)
......
...@@ -338,7 +338,7 @@ ssize_t ...@@ -338,7 +338,7 @@ ssize_t
xfs_splice_read( xfs_splice_read(
bhv_desc_t *bdp, bhv_desc_t *bdp,
struct file *infilp, struct file *infilp,
struct inode *pipe, struct pipe_inode_info *pipe,
size_t count, size_t count,
int flags, int flags,
int ioflags, int ioflags,
...@@ -380,7 +380,7 @@ xfs_splice_read( ...@@ -380,7 +380,7 @@ xfs_splice_read(
ssize_t ssize_t
xfs_splice_write( xfs_splice_write(
bhv_desc_t *bdp, bhv_desc_t *bdp,
struct inode *pipe, struct pipe_inode_info *pipe,
struct file *outfilp, struct file *outfilp,
size_t count, size_t count,
int flags, int flags,
......
...@@ -94,9 +94,9 @@ extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *, ...@@ -94,9 +94,9 @@ extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
loff_t *, int, size_t, read_actor_t, loff_t *, int, size_t, read_actor_t,
void *, struct cred *); void *, struct cred *);
extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *, extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *,
struct inode *, size_t, int, int, struct pipe_inode_info *, size_t, int, int,
struct cred *); struct cred *);
extern ssize_t xfs_splice_write(struct bhv_desc *, struct inode *, extern ssize_t xfs_splice_write(struct bhv_desc *, struct pipe_inode_info *,
struct file *, size_t, int, int, struct file *, size_t, int, int,
struct cred *); struct cred *);
......
...@@ -174,9 +174,9 @@ typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *, ...@@ -174,9 +174,9 @@ typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
loff_t *, int, size_t, read_actor_t, loff_t *, int, size_t, read_actor_t,
void *, struct cred *); void *, struct cred *);
typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *, typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *,
struct inode *, size_t, int, int, struct pipe_inode_info *, size_t, int, int,
struct cred *); struct cred *);
typedef ssize_t (*vop_splice_write_t)(bhv_desc_t *, struct inode *, typedef ssize_t (*vop_splice_write_t)(bhv_desc_t *, struct pipe_inode_info *,
struct file *, size_t, int, int, struct file *, size_t, int, int,
struct cred *); struct cred *);
typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *, typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *,
......
...@@ -318,8 +318,8 @@ ...@@ -318,8 +318,8 @@
#define __NR_unshare 310 #define __NR_unshare 310
#define __NR_set_robust_list 311 #define __NR_set_robust_list 311
#define __NR_get_robust_list 312 #define __NR_get_robust_list 312
#define __NR_sys_splice 313 #define __NR_splice 313
#define __NR_sys_sync_file_range 314 #define __NR_sync_file_range 314
#define NR_syscalls 315 #define NR_syscalls 315
......
...@@ -1039,8 +1039,8 @@ struct file_operations { ...@@ -1039,8 +1039,8 @@ struct file_operations {
int (*check_flags)(int); int (*check_flags)(int);
int (*dir_notify)(struct file *filp, unsigned long arg); int (*dir_notify)(struct file *filp, unsigned long arg);
int (*flock) (struct file *, int, struct file_lock *); int (*flock) (struct file *, int, struct file_lock *);
ssize_t (*splice_write)(struct inode *, struct file *, size_t, unsigned int); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, struct inode *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
}; };
struct inode_operations { struct inode_operations {
...@@ -1611,8 +1611,17 @@ extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor ...@@ -1611,8 +1611,17 @@ extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor
extern void do_generic_mapping_read(struct address_space *mapping, extern void do_generic_mapping_read(struct address_space *mapping,
struct file_ra_state *, struct file *, struct file_ra_state *, struct file *,
loff_t *, read_descriptor_t *, read_actor_t); loff_t *, read_descriptor_t *, read_actor_t);
extern ssize_t generic_file_splice_read(struct file *, struct inode *, size_t, unsigned int);
extern ssize_t generic_file_splice_write(struct inode *, struct file *, size_t, unsigned int); /* fs/splice.c */
extern ssize_t generic_file_splice_read(struct file *,
struct pipe_inode_info *, size_t, unsigned int);
extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
struct file *, size_t, unsigned int);
extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
struct file *out, size_t len, unsigned int flags);
extern long do_splice_direct(struct file *in, struct file *out,
size_t len, unsigned int flags);
extern void extern void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
......
...@@ -36,27 +36,19 @@ struct pipe_inode_info { ...@@ -36,27 +36,19 @@ struct pipe_inode_info {
unsigned int w_counter; unsigned int w_counter;
struct fasync_struct *fasync_readers; struct fasync_struct *fasync_readers;
struct fasync_struct *fasync_writers; struct fasync_struct *fasync_writers;
struct inode *inode;
}; };
/* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ memory allocation, whereas PIPE_BUF makes atomicity guarantees. */
#define PIPE_SIZE PAGE_SIZE #define PIPE_SIZE PAGE_SIZE
#define PIPE_MUTEX(inode) (&(inode).i_mutex)
#define PIPE_WAIT(inode) (&(inode).i_pipe->wait)
#define PIPE_READERS(inode) ((inode).i_pipe->readers)
#define PIPE_WRITERS(inode) ((inode).i_pipe->writers)
#define PIPE_WAITING_WRITERS(inode) ((inode).i_pipe->waiting_writers)
#define PIPE_RCOUNTER(inode) ((inode).i_pipe->r_counter)
#define PIPE_WCOUNTER(inode) ((inode).i_pipe->w_counter)
#define PIPE_FASYNC_READERS(inode) (&((inode).i_pipe->fasync_readers))
#define PIPE_FASYNC_WRITERS(inode) (&((inode).i_pipe->fasync_writers))
/* Drop the inode semaphore and wait for a pipe event, atomically */ /* Drop the inode semaphore and wait for a pipe event, atomically */
void pipe_wait(struct inode * inode); void pipe_wait(struct pipe_inode_info *pipe);
struct inode* pipe_new(struct inode* inode); struct pipe_inode_info * alloc_pipe_info(struct inode * inode);
void free_pipe_info(struct inode* inode); void free_pipe_info(struct inode * inode);
void __free_pipe_info(struct pipe_inode_info *);
/* /*
* splice is tied to pipes as a transport (at least for now), so we'll just * splice is tied to pipes as a transport (at least for now), so we'll just
......
...@@ -684,6 +684,7 @@ static inline void prefetch_stack(struct task_struct *t) { } ...@@ -684,6 +684,7 @@ static inline void prefetch_stack(struct task_struct *t) { }
struct audit_context; /* See audit.c */ struct audit_context; /* See audit.c */
struct mempolicy; struct mempolicy;
struct pipe_inode_info;
enum sleep_type { enum sleep_type {
SLEEP_NORMAL, SLEEP_NORMAL,
...@@ -882,6 +883,11 @@ struct task_struct { ...@@ -882,6 +883,11 @@ struct task_struct {
atomic_t fs_excl; /* holding fs exclusive resources */ atomic_t fs_excl; /* holding fs exclusive resources */
struct rcu_head rcu; struct rcu_head rcu;
/*
* cache last used pipe for splice
*/
struct pipe_inode_info *splice_pipe;
}; };
static inline pid_t process_group(struct task_struct *tsk) static inline pid_t process_group(struct task_struct *tsk)
......
...@@ -569,8 +569,11 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename, ...@@ -569,8 +569,11 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename,
asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename, asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename,
int flags, int mode); int flags, int mode);
asmlinkage long sys_unshare(unsigned long unshare_flags); asmlinkage long sys_unshare(unsigned long unshare_flags);
asmlinkage long sys_splice(int fdin, int fdout, size_t len,
unsigned int flags); asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
int fd_out, loff_t __user *off_out,
size_t len, unsigned int flags);
asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes, asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
unsigned int flags); unsigned int flags);
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/futex.h> #include <linux/futex.h>
#include <linux/compat.h> #include <linux/compat.h>
#include <linux/pipe_fs_i.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/unistd.h> #include <asm/unistd.h>
...@@ -941,6 +942,9 @@ fastcall NORET_TYPE void do_exit(long code) ...@@ -941,6 +942,9 @@ fastcall NORET_TYPE void do_exit(long code)
if (tsk->io_context) if (tsk->io_context)
exit_io_context(); exit_io_context();
if (tsk->splice_pipe)
__free_pipe_info(tsk->splice_pipe);
/* PF_DEAD causes final put_task_struct after we schedule. */ /* PF_DEAD causes final put_task_struct after we schedule. */
preempt_disable(); preempt_disable();
BUG_ON(tsk->flags & PF_DEAD); BUG_ON(tsk->flags & PF_DEAD);
......
...@@ -119,10 +119,6 @@ static ssize_t sock_writev(struct file *file, const struct iovec *vector, ...@@ -119,10 +119,6 @@ static ssize_t sock_writev(struct file *file, const struct iovec *vector,
static ssize_t sock_sendpage(struct file *file, struct page *page, static ssize_t sock_sendpage(struct file *file, struct page *page,
int offset, size_t size, loff_t *ppos, int more); int offset, size_t size, loff_t *ppos, int more);
extern ssize_t generic_splice_sendpage(struct inode *inode, struct file *out,
size_t len, unsigned int flags);
/* /*
* Socket files have a set of 'special' operations as well as the generic file ones. These don't appear * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
* in the operation structures but are done directly via the socketcall() multiplexor. * in the operation structures but are done directly via the socketcall() multiplexor.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment