Commit ebdef6d5 authored by Davide Libenzi's avatar Davide Libenzi Committed by Linus Torvalds

[PATCH] epoll bits 0.59 ...

- Finalized the interface by :

        * Having an epoll_event structure instead of using the pollfd
        * Adding a 64 bit opaque data member to the epoll_event structure
        * Removing the "fd" member from the epoll_event structure
        * Removing the "revents" member to leave space for a unique 32 bit
                "events" member

- Fixes the problem where, due the new callback'd wake_up() mechanism
        loops might be generated by bringing deadlock or stack blow ups.
        In fact a user could create a cycle by adding epoll fds inside
        other epoll fds. The patch solves the problem by either :

        * Moving the wake_up() call done on the poll wait queue head,
                outside the locked region
        * Implementing a new safe wake up function for the poll wait queue
                head

- Some variable renaming

- Changed __NR_sys_epoll_* to __NR_epoll_* ( Hanna Linder )

- Blocked the add operation of an epoll file descriptor inside itself

- Comments added/fixed
parent 5e6c072f
...@@ -485,9 +485,9 @@ syscall_handler_t *sys_call_table[] = { ...@@ -485,9 +485,9 @@ syscall_handler_t *sys_call_table[] = {
[ __NR_free_hugepages ] = sys_ni_syscall, [ __NR_free_hugepages ] = sys_ni_syscall,
[ __NR_exit_group ] = sys_exit_group, [ __NR_exit_group ] = sys_exit_group,
[ __NR_lookup_dcookie ] = sys_lookup_dcookie, [ __NR_lookup_dcookie ] = sys_lookup_dcookie,
[ __NR_sys_epoll_create ] = sys_epoll_create, [ __NR_epoll_create ] = sys_epoll_create,
[ __NR_sys_epoll_ctl ] = sys_epoll_ctl, [ __NR_epoll_ctl ] = sys_epoll_ctl,
[ __NR_sys_epoll_wait ] = sys_epoll_wait, [ __NR_epoll_wait ] = sys_epoll_wait,
[ __NR_remap_file_pages ] = sys_remap_file_pages, [ __NR_remap_file_pages ] = sys_remap_file_pages,
ARCH_SYSCALLS ARCH_SYSCALLS
......
...@@ -52,15 +52,18 @@ ...@@ -52,15 +52,18 @@
#define DNPRINTK(n, x) (void) 0 #define DNPRINTK(n, x) (void) 0
#endif /* #if DEBUG_EPOLL > 0 */ #endif /* #if DEBUG_EPOLL > 0 */
#define DEBUG_DPI 0 #define DEBUG_EPI 0
#if DEBUG_DPI != 0 #if DEBUG_EPI != 0
#define DPI_SLAB_DEBUG (SLAB_DEBUG_FREE | SLAB_RED_ZONE /* | SLAB_POISON */) #define EPI_SLAB_DEBUG (SLAB_DEBUG_FREE | SLAB_RED_ZONE /* | SLAB_POISON */)
#else /* #if DEBUG_DPI != 0 */ #else /* #if DEBUG_EPI != 0 */
#define DPI_SLAB_DEBUG 0 #define EPI_SLAB_DEBUG 0
#endif /* #if DEBUG_DPI != 0 */ #endif /* #if DEBUG_EPI != 0 */
/* Maximum number of poll wake up nests we are allowing */
#define EP_MAX_POLLWAKE_NESTS 4
/* Maximum size of the hash in bits ( 2^N ) */ /* Maximum size of the hash in bits ( 2^N ) */
#define EP_MAX_HASH_BITS 17 #define EP_MAX_HASH_BITS 17
...@@ -78,10 +81,10 @@ ...@@ -78,10 +81,10 @@
((1 << (hbits)) % EP_HENTRY_X_PAGE ? 1: 0))) ((1 << (hbits)) % EP_HENTRY_X_PAGE ? 1: 0)))
/* Macro to allocate a "struct epitem" from the slab cache */ /* Macro to allocate a "struct epitem" from the slab cache */
#define DPI_MEM_ALLOC() (struct epitem *) kmem_cache_alloc(dpi_cache, SLAB_KERNEL) #define EPI_MEM_ALLOC() (struct epitem *) kmem_cache_alloc(epi_cache, SLAB_KERNEL)
/* Macro to free a "struct epitem" to the slab cache */ /* Macro to free a "struct epitem" to the slab cache */
#define DPI_MEM_FREE(p) kmem_cache_free(dpi_cache, p) #define EPI_MEM_FREE(p) kmem_cache_free(epi_cache, p)
/* Macro to allocate a "struct eppoll_entry" from the slab cache */ /* Macro to allocate a "struct eppoll_entry" from the slab cache */
#define PWQ_MEM_ALLOC() (struct eppoll_entry *) kmem_cache_alloc(pwq_cache, SLAB_KERNEL) #define PWQ_MEM_ALLOC() (struct eppoll_entry *) kmem_cache_alloc(pwq_cache, SLAB_KERNEL)
...@@ -106,7 +109,7 @@ ...@@ -106,7 +109,7 @@
#define EP_ITEM_FROM_WAIT(p) ((struct epitem *) container_of(p, struct eppoll_entry, wait)->base) #define EP_ITEM_FROM_WAIT(p) ((struct epitem *) container_of(p, struct eppoll_entry, wait)->base)
/* Get the "struct epitem" from an epoll queue wrapper */ /* Get the "struct epitem" from an epoll queue wrapper */
#define EP_ITEM_FROM_EPQUEUE(p) (container_of(p, struct ep_pqueue, pt)->dpi) #define EP_ITEM_FROM_EPQUEUE(p) (container_of(p, struct ep_pqueue, pt)->epi)
/* /*
* This is used to optimize the event transfer to userspace. Since this * This is used to optimize the event transfer to userspace. Since this
...@@ -121,6 +124,27 @@ ...@@ -121,6 +124,27 @@
#define EP_MAX_COLLECT_ITEMS 64 #define EP_MAX_COLLECT_ITEMS 64
/*
* Node that is linked into the "wake_task_list" member of the "struct poll_safewake".
* It is used to keep track on all tasks that are currently inside the wake_up() code
* to 1) short-circuit the one coming from the same task and same wait queue head
* ( loop ) 2) allow a maximum number of epoll descriptors inclusion nesting
* 3) let go the ones coming from other tasks.
*/
struct wake_task_node {
struct list_head llink;
task_t *task;
wait_queue_head_t *wq;
};
/*
* This is used to implement the safe poll wake up avoiding to reenter
* the poll callback from inside wake_up().
*/
struct poll_safewake {
struct list_head wake_task_list;
spinlock_t lock;
};
/* /*
* This structure is stored inside the "private_data" member of the file * This structure is stored inside the "private_data" member of the file
...@@ -189,7 +213,7 @@ struct epitem { ...@@ -189,7 +213,7 @@ struct epitem {
struct file *file; struct file *file;
/* The structure that describe the interested events and the source fd */ /* The structure that describe the interested events and the source fd */
struct pollfd pfd; struct epoll_event event;
/* /*
* Used to keep track of the usage count of the structure. This avoids * Used to keep track of the usage count of the structure. This avoids
...@@ -204,11 +228,13 @@ struct epitem { ...@@ -204,11 +228,13 @@ struct epitem {
/* Wrapper struct used by poll queueing */ /* Wrapper struct used by poll queueing */
struct ep_pqueue { struct ep_pqueue {
poll_table pt; poll_table pt;
struct epitem *dpi; struct epitem *epi;
}; };
static void ep_poll_safewake_init(struct poll_safewake *psw);
static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq);
static unsigned int ep_get_hash_bits(unsigned int hintsize); static unsigned int ep_get_hash_bits(unsigned int hintsize);
static int ep_getfd(int *efd, struct inode **einode, struct file **efile); static int ep_getfd(int *efd, struct inode **einode, struct file **efile);
static int ep_alloc_pages(char **pages, int numpages); static int ep_alloc_pages(char **pages, int numpages);
...@@ -219,22 +245,22 @@ static struct list_head *ep_hash_entry(struct eventpoll *ep, unsigned int index) ...@@ -219,22 +245,22 @@ static struct list_head *ep_hash_entry(struct eventpoll *ep, unsigned int index)
static int ep_init(struct eventpoll *ep, unsigned int hashbits); static int ep_init(struct eventpoll *ep, unsigned int hashbits);
static void ep_free(struct eventpoll *ep); static void ep_free(struct eventpoll *ep);
static struct epitem *ep_find(struct eventpoll *ep, struct file *file); static struct epitem *ep_find(struct eventpoll *ep, struct file *file);
static void ep_use_epitem(struct epitem *dpi); static void ep_use_epitem(struct epitem *epi);
static void ep_release_epitem(struct epitem *dpi); static void ep_release_epitem(struct epitem *epi);
static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, poll_table *pt); static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, poll_table *pt);
static int ep_insert(struct eventpoll *ep, struct pollfd *pfd, struct file *tfile); static int ep_insert(struct eventpoll *ep, struct epoll_event *event, struct file *tfile);
static int ep_modify(struct eventpoll *ep, struct epitem *dpi, unsigned int events); static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_event *event);
static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *dpi); static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi);
static int ep_unlink(struct eventpoll *ep, struct epitem *dpi); static int ep_unlink(struct eventpoll *ep, struct epitem *epi);
static int ep_remove(struct eventpoll *ep, struct epitem *dpi); static int ep_remove(struct eventpoll *ep, struct epitem *epi);
static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync); static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync);
static int ep_eventpoll_close(struct inode *inode, struct file *file); static int ep_eventpoll_close(struct inode *inode, struct file *file);
static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait); static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait);
static int ep_collect_ready_items(struct eventpoll *ep, struct epitem **adpi, int maxdpi); static int ep_collect_ready_items(struct eventpoll *ep, struct epitem **aepi, int maxepi);
static int ep_send_events(struct eventpoll *ep, struct epitem **adpi, int ndpi, static int ep_send_events(struct eventpoll *ep, struct epitem **aepi, int nepi,
struct pollfd *events); struct epoll_event *events);
static int ep_events_transfer(struct eventpoll *ep, struct pollfd *events, int maxevents); static int ep_events_transfer(struct eventpoll *ep, struct epoll_event *events, int maxevents);
static int ep_poll(struct eventpoll *ep, struct pollfd *events, int maxevents, static int ep_poll(struct eventpoll *ep, struct epoll_event *events, int maxevents,
int timeout); int timeout);
static int eventpollfs_delete_dentry(struct dentry *dentry); static int eventpollfs_delete_dentry(struct dentry *dentry);
static struct inode *ep_eventpoll_inode(void); static struct inode *ep_eventpoll_inode(void);
...@@ -242,6 +268,9 @@ static struct super_block *eventpollfs_get_sb(struct file_system_type *fs_type, ...@@ -242,6 +268,9 @@ static struct super_block *eventpollfs_get_sb(struct file_system_type *fs_type,
int flags, char *dev_name, void *data); int flags, char *dev_name, void *data);
/* Safe wake up implementation */
static struct poll_safewake psw;
/* /*
* This semaphore is used to ensure that files are not removed * This semaphore is used to ensure that files are not removed
* while epoll is using them. Namely the f_op->poll(), since * while epoll is using them. Namely the f_op->poll(), since
...@@ -250,10 +279,10 @@ static struct super_block *eventpollfs_get_sb(struct file_system_type *fs_type, ...@@ -250,10 +279,10 @@ static struct super_block *eventpollfs_get_sb(struct file_system_type *fs_type,
* and it is write-held during the file cleanup path and the epoll * and it is write-held during the file cleanup path and the epoll
* file exit code. * file exit code.
*/ */
struct rw_semaphore epsem; static struct rw_semaphore epsem;
/* Slab cache used to allocate "struct epitem" */ /* Slab cache used to allocate "struct epitem" */
static kmem_cache_t *dpi_cache; static kmem_cache_t *epi_cache;
/* Slab cache used to allocate "struct eppoll_entry" */ /* Slab cache used to allocate "struct eppoll_entry" */
static kmem_cache_t *pwq_cache; static kmem_cache_t *pwq_cache;
...@@ -284,6 +313,70 @@ static struct dentry_operations eventpollfs_dentry_operations = { ...@@ -284,6 +313,70 @@ static struct dentry_operations eventpollfs_dentry_operations = {
/* Initialize the poll safe wake up structure */
static void ep_poll_safewake_init(struct poll_safewake *psw)
{
INIT_LIST_HEAD(&psw->wake_task_list);
spin_lock_init(&psw->lock);
}
/*
* Perform a safe wake up of the poll wait list. The problem is that
* with the new callback'd wake up system, it is possible that the
* poll callback is reentered from inside the call to wake_up() done
* on the poll wait queue head. The rule is that we cannot reenter the
* wake up code from the same task more than EP_MAX_POLLWAKE_NESTS times,
* and we cannot reenter the same wait queue head at all. This will
* enable to have a hierarchy of epoll file descriptor of no more than
* EP_MAX_POLLWAKE_NESTS deep. We need the irq version of the spin lock
* because this one gets called by the poll callback, that in turn is called
* from inside a wake_up(), that might be called from irq context.
*/
static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq)
{
int wake_nests = 0;
unsigned long flags;
task_t *this_task = current;
struct list_head *lsthead = &psw->wake_task_list, *lnk;
struct wake_task_node tnode;
spin_lock_irqsave(&psw->lock, flags);
/* Try to see if the current task is already inside this wakeup call */
list_for_each(lnk, lsthead) {
struct wake_task_node *tncur = list_entry(lnk, struct wake_task_node, llink);
if (tncur->task == this_task) {
if (tncur->wq == wq || ++wake_nests > EP_MAX_POLLWAKE_NESTS) {
/*
* Ops ... loop detected or maximum nest level reached.
* We abort this wake by breaking the cycle itself.
*/
spin_unlock_irqrestore(&psw->lock, flags);
return;
}
}
}
/* Add the current task to the list */
tnode.task = this_task;
tnode.wq = wq;
list_add(&tnode.llink, lsthead);
spin_unlock_irqrestore(&psw->lock, flags);
/* Do really wake up now */
wake_up(wq);
/* Remove the current task from the list */
spin_lock_irqsave(&psw->lock, flags);
list_del(&tnode.llink);
spin_unlock_irqrestore(&psw->lock, flags);
}
/* /*
* Calculate the size of the hash in bits. The returned size will be * Calculate the size of the hash in bits. The returned size will be
* bounded between EP_MIN_HASH_BITS and EP_MAX_HASH_BITS. * bounded between EP_MIN_HASH_BITS and EP_MAX_HASH_BITS.
...@@ -315,7 +408,7 @@ void eventpoll_init_file(struct file *file) ...@@ -315,7 +408,7 @@ void eventpoll_init_file(struct file *file)
void eventpoll_release(struct file *file) void eventpoll_release(struct file *file)
{ {
struct list_head *lsthead = &file->f_ep_links; struct list_head *lsthead = &file->f_ep_links;
struct epitem *dpi; struct epitem *epi;
/* /*
* Fast check to avoid the get/release of the semaphore. Since * Fast check to avoid the get/release of the semaphore. Since
...@@ -337,10 +430,10 @@ void eventpoll_release(struct file *file) ...@@ -337,10 +430,10 @@ void eventpoll_release(struct file *file)
*/ */
down_write(&epsem); down_write(&epsem);
while (!list_empty(lsthead)) { while (!list_empty(lsthead)) {
dpi = list_entry(lsthead->next, struct epitem, fllink); epi = list_entry(lsthead->next, struct epitem, fllink);
EP_LIST_DEL(&dpi->fllink); EP_LIST_DEL(&epi->fllink);
ep_remove(dpi->ep, dpi); ep_remove(epi->ep, epi);
} }
up_write(&epsem); up_write(&epsem);
} }
...@@ -399,16 +492,20 @@ asmlinkage int sys_epoll_create(int size) ...@@ -399,16 +492,20 @@ asmlinkage int sys_epoll_create(int size)
* file that enable the insertion/removal/change of file descriptors inside * file that enable the insertion/removal/change of file descriptors inside
* the interest set. It rapresents the kernel part of the user spcae epoll_ctl(2). * the interest set. It rapresents the kernel part of the user spcae epoll_ctl(2).
*/ */
asmlinkage int sys_epoll_ctl(int epfd, int op, int fd, unsigned int events) asmlinkage int sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event *event)
{ {
int error; int error;
struct file *file, *tfile; struct file *file, *tfile;
struct eventpoll *ep; struct eventpoll *ep;
struct epitem *dpi; struct epitem *epi;
struct pollfd pfd; struct epoll_event epds;
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %u)\n", DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %u)\n",
current, epfd, op, fd, events)); current, epfd, op, fd, event->events));
error = -EFAULT;
if (copy_from_user(&epds, event, sizeof(struct epoll_event)))
goto eexit_1;
/* Get the "struct file *" for the eventpoll file */ /* Get the "struct file *" for the eventpoll file */
error = -EBADF; error = -EBADF;
...@@ -428,10 +525,11 @@ asmlinkage int sys_epoll_ctl(int epfd, int op, int fd, unsigned int events) ...@@ -428,10 +525,11 @@ asmlinkage int sys_epoll_ctl(int epfd, int op, int fd, unsigned int events)
/* /*
* We have to check that the file structure underneath the file descriptor * We have to check that the file structure underneath the file descriptor
* the user passed to us _is_ an eventpoll file. * the user passed to us _is_ an eventpoll file. And also we do not permit
* adding an epoll file descriptor inside itself.
*/ */
error = -EINVAL; error = -EINVAL;
if (!IS_FILE_EPOLL(file)) if (file == tfile || !IS_FILE_EPOLL(file))
goto eexit_3; goto eexit_3;
/* /*
...@@ -448,30 +546,29 @@ asmlinkage int sys_epoll_ctl(int epfd, int op, int fd, unsigned int events) ...@@ -448,30 +546,29 @@ asmlinkage int sys_epoll_ctl(int epfd, int op, int fd, unsigned int events)
* This does not rapresent a problem though and we don't really want * This does not rapresent a problem though and we don't really want
* to put an extra syncronization object to deal with this harmless condition. * to put an extra syncronization object to deal with this harmless condition.
*/ */
dpi = ep_find(ep, tfile); epi = ep_find(ep, tfile);
error = -EINVAL; error = -EINVAL;
switch (op) { switch (op) {
case EP_CTL_ADD: case EPOLL_CTL_ADD:
if (!dpi) { if (!epi) {
pfd.fd = fd; epds.events |= POLLERR | POLLHUP;
pfd.events = events | POLLERR | POLLHUP;
pfd.revents = 0;
error = ep_insert(ep, &pfd, tfile); error = ep_insert(ep, &epds, tfile);
} else } else
error = -EEXIST; error = -EEXIST;
break; break;
case EP_CTL_DEL: case EPOLL_CTL_DEL:
if (dpi) if (epi)
error = ep_remove(ep, dpi); error = ep_remove(ep, epi);
else else
error = -ENOENT; error = -ENOENT;
break; break;
case EP_CTL_MOD: case EPOLL_CTL_MOD:
if (dpi) if (epi) {
error = ep_modify(ep, dpi, events | POLLERR | POLLHUP); epds.events |= POLLERR | POLLHUP;
else error = ep_modify(ep, epi, &epds);
} else
error = -ENOENT; error = -ENOENT;
break; break;
} }
...@@ -480,8 +577,8 @@ asmlinkage int sys_epoll_ctl(int epfd, int op, int fd, unsigned int events) ...@@ -480,8 +577,8 @@ asmlinkage int sys_epoll_ctl(int epfd, int op, int fd, unsigned int events)
* The function ep_find() increments the usage count of the structure * The function ep_find() increments the usage count of the structure
* so, if this is not NULL, we need to release it. * so, if this is not NULL, we need to release it.
*/ */
if (dpi) if (epi)
ep_release_epitem(dpi); ep_release_epitem(epi);
eexit_3: eexit_3:
fput(tfile); fput(tfile);
...@@ -489,7 +586,7 @@ asmlinkage int sys_epoll_ctl(int epfd, int op, int fd, unsigned int events) ...@@ -489,7 +586,7 @@ asmlinkage int sys_epoll_ctl(int epfd, int op, int fd, unsigned int events)
fput(file); fput(file);
eexit_1: eexit_1:
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %u) = %d\n", DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %u) = %d\n",
current, epfd, op, fd, events, error)); current, epfd, op, fd, event->events, error));
return error; return error;
} }
...@@ -499,7 +596,7 @@ asmlinkage int sys_epoll_ctl(int epfd, int op, int fd, unsigned int events) ...@@ -499,7 +596,7 @@ asmlinkage int sys_epoll_ctl(int epfd, int op, int fd, unsigned int events)
* Implement the event wait interface for the eventpoll file. It is the kernel * Implement the event wait interface for the eventpoll file. It is the kernel
* part of the user space epoll_wait(2). * part of the user space epoll_wait(2).
*/ */
asmlinkage int sys_epoll_wait(int epfd, struct pollfd *events, int maxevents, asmlinkage int sys_epoll_wait(int epfd, struct epoll_event *events, int maxevents,
int timeout) int timeout)
{ {
int error; int error;
...@@ -514,7 +611,7 @@ asmlinkage int sys_epoll_wait(int epfd, struct pollfd *events, int maxevents, ...@@ -514,7 +611,7 @@ asmlinkage int sys_epoll_wait(int epfd, struct pollfd *events, int maxevents,
return -EINVAL; return -EINVAL;
/* Verify that the area passed by the user is writeable */ /* Verify that the area passed by the user is writeable */
if ((error = verify_area(VERIFY_WRITE, events, maxevents * sizeof(struct pollfd)))) if ((error = verify_area(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))))
goto eexit_1; goto eexit_1;
/* Get the "struct file *" for the eventpoll file */ /* Get the "struct file *" for the eventpoll file */
...@@ -747,9 +844,9 @@ static void ep_free(struct eventpoll *ep) ...@@ -747,9 +844,9 @@ static void ep_free(struct eventpoll *ep)
lsthead = ep_hash_entry(ep, i); lsthead = ep_hash_entry(ep, i);
list_for_each(lnk, lsthead) { list_for_each(lnk, lsthead) {
struct epitem *dpi = list_entry(lnk, struct epitem, llink); struct epitem *epi = list_entry(lnk, struct epitem, llink);
ep_unregister_pollwait(ep, dpi); ep_unregister_pollwait(ep, epi);
} }
} }
...@@ -763,9 +860,9 @@ static void ep_free(struct eventpoll *ep) ...@@ -763,9 +860,9 @@ static void ep_free(struct eventpoll *ep)
lsthead = ep_hash_entry(ep, i); lsthead = ep_hash_entry(ep, i);
while (!list_empty(lsthead)) { while (!list_empty(lsthead)) {
struct epitem *dpi = list_entry(lsthead->next, struct epitem, llink); struct epitem *epi = list_entry(lsthead->next, struct epitem, llink);
ep_remove(ep, dpi); ep_remove(ep, epi);
} }
} }
...@@ -785,27 +882,27 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file) ...@@ -785,27 +882,27 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file)
{ {
unsigned long flags; unsigned long flags;
struct list_head *lsthead, *lnk; struct list_head *lsthead, *lnk;
struct epitem *dpi = NULL; struct epitem *epi = NULL;
read_lock_irqsave(&ep->lock, flags); read_lock_irqsave(&ep->lock, flags);
lsthead = ep_hash_entry(ep, ep_hash_index(ep, file)); lsthead = ep_hash_entry(ep, ep_hash_index(ep, file));
list_for_each(lnk, lsthead) { list_for_each(lnk, lsthead) {
dpi = list_entry(lnk, struct epitem, llink); epi = list_entry(lnk, struct epitem, llink);
if (dpi->file == file) { if (epi->file == file) {
ep_use_epitem(dpi); ep_use_epitem(epi);
break; break;
} }
dpi = NULL; epi = NULL;
} }
read_unlock_irqrestore(&ep->lock, flags); read_unlock_irqrestore(&ep->lock, flags);
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_find(%p) -> %p\n", DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_find(%p) -> %p\n",
current, file, dpi)); current, file, epi));
return dpi; return epi;
} }
...@@ -813,10 +910,10 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file) ...@@ -813,10 +910,10 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file)
* Increment the usage count of the "struct epitem" making it sure * Increment the usage count of the "struct epitem" making it sure
* that the user will have a valid pointer to reference. * that the user will have a valid pointer to reference.
*/ */
static void ep_use_epitem(struct epitem *dpi) static void ep_use_epitem(struct epitem *epi)
{ {
atomic_inc(&dpi->usecnt); atomic_inc(&epi->usecnt);
} }
...@@ -825,11 +922,11 @@ static void ep_use_epitem(struct epitem *dpi) ...@@ -825,11 +922,11 @@ static void ep_use_epitem(struct epitem *dpi)
* has finished using the structure. It might lead to freeing the * has finished using the structure. It might lead to freeing the
* structure itself if the count goes to zero. * structure itself if the count goes to zero.
*/ */
static void ep_release_epitem(struct epitem *dpi) static void ep_release_epitem(struct epitem *epi)
{ {
if (atomic_dec_and_test(&dpi->usecnt)) if (atomic_dec_and_test(&epi->usecnt))
DPI_MEM_FREE(dpi); EPI_MEM_FREE(epi);
} }
...@@ -839,50 +936,50 @@ static void ep_release_epitem(struct epitem *dpi) ...@@ -839,50 +936,50 @@ static void ep_release_epitem(struct epitem *dpi)
*/ */
static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, poll_table *pt) static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, poll_table *pt)
{ {
struct epitem *dpi = EP_ITEM_FROM_EPQUEUE(pt); struct epitem *epi = EP_ITEM_FROM_EPQUEUE(pt);
struct eppoll_entry *pwq; struct eppoll_entry *pwq;
if (dpi->nwait >= 0 && (pwq = PWQ_MEM_ALLOC())) if (epi->nwait >= 0 && (pwq = PWQ_MEM_ALLOC()))
{ {
init_waitqueue_func_entry(&pwq->wait, ep_poll_callback); init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
pwq->whead = whead; pwq->whead = whead;
pwq->base = dpi; pwq->base = epi;
add_wait_queue(whead, &pwq->wait); add_wait_queue(whead, &pwq->wait);
list_add_tail(&pwq->llink, &dpi->pwqlist); list_add_tail(&pwq->llink, &epi->pwqlist);
dpi->nwait++; epi->nwait++;
} }
else else
{ {
/* We have to signal that an error occured */ /* We have to signal that an error occured */
dpi->nwait = -1; epi->nwait = -1;
} }
} }
static int ep_insert(struct eventpoll *ep, struct pollfd *pfd, struct file *tfile) static int ep_insert(struct eventpoll *ep, struct epoll_event *event, struct file *tfile)
{ {
int error, revents; int error, revents, pwake = 0;
unsigned long flags; unsigned long flags;
struct epitem *dpi; struct epitem *epi;
struct ep_pqueue epq; struct ep_pqueue epq;
error = -ENOMEM; error = -ENOMEM;
if (!(dpi = DPI_MEM_ALLOC())) if (!(epi = EPI_MEM_ALLOC()))
goto eexit_1; goto eexit_1;
/* Item initialization follow here ... */ /* Item initialization follow here ... */
INIT_LIST_HEAD(&dpi->llink); INIT_LIST_HEAD(&epi->llink);
INIT_LIST_HEAD(&dpi->rdllink); INIT_LIST_HEAD(&epi->rdllink);
INIT_LIST_HEAD(&dpi->fllink); INIT_LIST_HEAD(&epi->fllink);
INIT_LIST_HEAD(&dpi->pwqlist); INIT_LIST_HEAD(&epi->pwqlist);
dpi->ep = ep; epi->ep = ep;
dpi->file = tfile; epi->file = tfile;
dpi->pfd = *pfd; epi->event = *event;
atomic_set(&dpi->usecnt, 1); atomic_set(&epi->usecnt, 1);
dpi->nwait = 0; epi->nwait = 0;
/* Initialize the poll table using the queue callback */ /* Initialize the poll table using the queue callback */
epq.dpi = dpi; epq.epi = epi;
init_poll_funcptr(&epq.pt, ep_ptable_queue_proc); init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);
/* /*
...@@ -897,51 +994,55 @@ static int ep_insert(struct eventpoll *ep, struct pollfd *pfd, struct file *tfil ...@@ -897,51 +994,55 @@ static int ep_insert(struct eventpoll *ep, struct pollfd *pfd, struct file *tfil
* install process. Namely an allocation for a wait queue failed due * install process. Namely an allocation for a wait queue failed due
* high memory pressure. * high memory pressure.
*/ */
if (dpi->nwait < 0) if (epi->nwait < 0)
goto eexit_2; goto eexit_2;
/* Add the current item to the list of active epoll hook for this file */
spin_lock(&tfile->f_ep_lock);
list_add_tail(&epi->fllink, &tfile->f_ep_links);
spin_unlock(&tfile->f_ep_lock);
/* We have to drop the new item inside our item list to keep track of it */ /* We have to drop the new item inside our item list to keep track of it */
write_lock_irqsave(&ep->lock, flags); write_lock_irqsave(&ep->lock, flags);
/* Add the current item to the hash table */ /* Add the current item to the hash table */
list_add(&dpi->llink, ep_hash_entry(ep, ep_hash_index(ep, tfile))); list_add(&epi->llink, ep_hash_entry(ep, ep_hash_index(ep, tfile)));
/* If the file is already "ready" we drop it inside the ready list */ /* If the file is already "ready" we drop it inside the ready list */
if ((revents & pfd->events) && !EP_IS_LINKED(&dpi->rdllink)) { if ((revents & event->events) && !EP_IS_LINKED(&epi->rdllink)) {
list_add_tail(&dpi->rdllink, &ep->rdllist); list_add_tail(&epi->rdllink, &ep->rdllist);
/* Notify waiting tasks that events are available */ /* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq)) if (waitqueue_active(&ep->wq))
wake_up(&ep->wq); wake_up(&ep->wq);
if (waitqueue_active(&ep->poll_wait)) if (waitqueue_active(&ep->poll_wait))
wake_up(&ep->poll_wait); pwake++;
} }
write_unlock_irqrestore(&ep->lock, flags); write_unlock_irqrestore(&ep->lock, flags);
/* Add the current item to the list of active epoll hook for this file */ /* We have to call this outside the lock */
spin_lock(&tfile->f_ep_lock); if (pwake)
list_add_tail(&dpi->fllink, &tfile->f_ep_links); ep_poll_safewake(&psw, &ep->poll_wait);
spin_unlock(&tfile->f_ep_lock);
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_insert(%p, %d)\n", DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_insert(%p, %p)\n",
current, ep, pfd->fd)); current, ep, tfile));
return 0; return 0;
eexit_2: eexit_2:
ep_unregister_pollwait(ep, dpi); ep_unregister_pollwait(ep, epi);
/* /*
* We need to do this because an event could have been arrived on some * We need to do this because an event could have been arrived on some
* allocated wait queue. * allocated wait queue.
*/ */
write_lock_irqsave(&ep->lock, flags); write_lock_irqsave(&ep->lock, flags);
if (EP_IS_LINKED(&dpi->rdllink)) if (EP_IS_LINKED(&epi->rdllink))
EP_LIST_DEL(&dpi->rdllink); EP_LIST_DEL(&epi->rdllink);
write_unlock_irqrestore(&ep->lock, flags); write_unlock_irqrestore(&ep->lock, flags);
DPI_MEM_FREE(dpi); EPI_MEM_FREE(epi);
eexit_1: eexit_1:
return error; return error;
} }
...@@ -951,8 +1052,9 @@ static int ep_insert(struct eventpoll *ep, struct pollfd *pfd, struct file *tfil ...@@ -951,8 +1052,9 @@ static int ep_insert(struct eventpoll *ep, struct pollfd *pfd, struct file *tfil
* Modify the interest event mask by dropping an event if the new mask * Modify the interest event mask by dropping an event if the new mask
* has a match in the current file status. * has a match in the current file status.
*/ */
static int ep_modify(struct eventpoll *ep, struct epitem *dpi, unsigned int events) static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_event *event)
{ {
int pwake = 0;
unsigned int revents; unsigned int revents;
unsigned long flags; unsigned long flags;
...@@ -962,30 +1064,37 @@ static int ep_modify(struct eventpoll *ep, struct epitem *dpi, unsigned int even ...@@ -962,30 +1064,37 @@ static int ep_modify(struct eventpoll *ep, struct epitem *dpi, unsigned int even
* the lock, an event might happen between the f_op->poll() call and the * the lock, an event might happen between the f_op->poll() call and the
* new event set registering. * new event set registering.
*/ */
dpi->pfd.events = events; epi->event.events = event->events;
/* /*
* Get current event bits. We can safely use the file* here because * Get current event bits. We can safely use the file* here because
* its usage count has been increased by the caller of this function. * its usage count has been increased by the caller of this function.
*/ */
revents = dpi->file->f_op->poll(dpi->file, NULL); revents = epi->file->f_op->poll(epi->file, NULL);
write_lock_irqsave(&ep->lock, flags); write_lock_irqsave(&ep->lock, flags);
/* Copy the data member from inside the lock */
epi->event.data = event->data;
/* If the file is already "ready" we drop it inside the ready list */ /* If the file is already "ready" we drop it inside the ready list */
if ((revents & events) && EP_IS_LINKED(&dpi->llink) && if ((revents & event->events) && EP_IS_LINKED(&epi->llink) &&
!EP_IS_LINKED(&dpi->rdllink)) { !EP_IS_LINKED(&epi->rdllink)) {
list_add_tail(&dpi->rdllink, &ep->rdllist); list_add_tail(&epi->rdllink, &ep->rdllist);
/* Notify waiting tasks that events are available */ /* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq)) if (waitqueue_active(&ep->wq))
wake_up(&ep->wq); wake_up(&ep->wq);
if (waitqueue_active(&ep->poll_wait)) if (waitqueue_active(&ep->poll_wait))
wake_up(&ep->poll_wait); pwake++;
} }
write_unlock_irqrestore(&ep->lock, flags); write_unlock_irqrestore(&ep->lock, flags);
/* We have to call this outside the lock */
if (pwake)
ep_poll_safewake(&psw, &ep->poll_wait);
return 0; return 0;
} }
...@@ -995,14 +1104,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *dpi, unsigned int even ...@@ -995,14 +1104,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *dpi, unsigned int even
* Since this must be called without holding "ep->lock" the atomic exchange trick * Since this must be called without holding "ep->lock" the atomic exchange trick
* will protect us from multiple unregister. * will protect us from multiple unregister.
*/ */
static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *dpi) static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
{ {
int nwait; int nwait;
struct list_head *lsthead = &dpi->pwqlist; struct list_head *lsthead = &epi->pwqlist;
struct eppoll_entry *pwq; struct eppoll_entry *pwq;
/* This is called without locks, so we need the atomic exchange */ /* This is called without locks, so we need the atomic exchange */
nwait = xchg(&dpi->nwait, 0); nwait = xchg(&epi->nwait, 0);
if (nwait) if (nwait)
{ {
...@@ -1021,7 +1130,7 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *dpi) ...@@ -1021,7 +1130,7 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *dpi)
* Unlink the "struct epitem" from all places it might have been hooked up. * Unlink the "struct epitem" from all places it might have been hooked up.
* This function must be called with write IRQ lock on "ep->lock". * This function must be called with write IRQ lock on "ep->lock".
*/ */
static int ep_unlink(struct eventpoll *ep, struct epitem *dpi) static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
{ {
int error; int error;
...@@ -1030,7 +1139,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *dpi) ...@@ -1030,7 +1139,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *dpi)
* The check protect us from doing a double unlink ( crash ). * The check protect us from doing a double unlink ( crash ).
*/ */
error = -ENOENT; error = -ENOENT;
if (!EP_IS_LINKED(&dpi->llink)) if (!EP_IS_LINKED(&epi->llink))
goto eexit_1; goto eexit_1;
/* /*
...@@ -1038,20 +1147,20 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *dpi) ...@@ -1038,20 +1147,20 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *dpi)
* This operation togheter with the above check closes the door to * This operation togheter with the above check closes the door to
* double unlinks. * double unlinks.
*/ */
EP_LIST_DEL(&dpi->llink); EP_LIST_DEL(&epi->llink);
/* /*
* If the item we are going to remove is inside the ready file descriptors * If the item we are going to remove is inside the ready file descriptors
* we want to remove it from this list to avoid stale events. * we want to remove it from this list to avoid stale events.
*/ */
if (EP_IS_LINKED(&dpi->rdllink)) if (EP_IS_LINKED(&epi->rdllink))
EP_LIST_DEL(&dpi->rdllink); EP_LIST_DEL(&epi->rdllink);
error = 0; error = 0;
eexit_1: eexit_1:
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %d) = %d\n", DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %p) = %d\n",
current, ep, dpi->pfd.fd, error)); current, ep, epi->file, error));
return error; return error;
} }
...@@ -1061,7 +1170,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *dpi) ...@@ -1061,7 +1170,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *dpi)
* Removes a "struct epitem" from the eventpoll hash and deallocates * Removes a "struct epitem" from the eventpoll hash and deallocates
* all the associated resources. * all the associated resources.
*/ */
static int ep_remove(struct eventpoll *ep, struct epitem *dpi) static int ep_remove(struct eventpoll *ep, struct epitem *epi)
{ {
int error; int error;
unsigned long flags; unsigned long flags;
...@@ -1074,19 +1183,19 @@ static int ep_remove(struct eventpoll *ep, struct epitem *dpi) ...@@ -1074,19 +1183,19 @@ static int ep_remove(struct eventpoll *ep, struct epitem *dpi)
* will run by holding the wait queue head lock and will call our callback * will run by holding the wait queue head lock and will call our callback
* that will try to get "ep->lock". * that will try to get "ep->lock".
*/ */
ep_unregister_pollwait(ep, dpi); ep_unregister_pollwait(ep, epi);
/* Remove the current item from the list of epoll hooks */ /* Remove the current item from the list of epoll hooks */
spin_lock(&dpi->file->f_ep_lock); spin_lock(&epi->file->f_ep_lock);
if (EP_IS_LINKED(&dpi->fllink)) if (EP_IS_LINKED(&epi->fllink))
EP_LIST_DEL(&dpi->fllink); EP_LIST_DEL(&epi->fllink);
spin_unlock(&dpi->file->f_ep_lock); spin_unlock(&epi->file->f_ep_lock);
/* We need to acquire the write IRQ lock before calling ep_unlink() */ /* We need to acquire the write IRQ lock before calling ep_unlink() */
write_lock_irqsave(&ep->lock, flags); write_lock_irqsave(&ep->lock, flags);
/* Really unlink the item from the hash */ /* Really unlink the item from the hash */
error = ep_unlink(ep, dpi); error = ep_unlink(ep, epi);
write_unlock_irqrestore(&ep->lock, flags); write_unlock_irqrestore(&ep->lock, flags);
...@@ -1094,12 +1203,12 @@ static int ep_remove(struct eventpoll *ep, struct epitem *dpi) ...@@ -1094,12 +1203,12 @@ static int ep_remove(struct eventpoll *ep, struct epitem *dpi)
goto eexit_1; goto eexit_1;
/* At this point it is safe to free the eventpoll item */ /* At this point it is safe to free the eventpoll item */
ep_release_epitem(dpi); ep_release_epitem(epi);
error = 0; error = 0;
eexit_1: eexit_1:
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %d) = %d\n", DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p) = %d\n",
current, ep, dpi->pfd.fd, error)); current, ep, epi->file, error));
return error; return error;
} }
...@@ -1112,20 +1221,21 @@ static int ep_remove(struct eventpoll *ep, struct epitem *dpi) ...@@ -1112,20 +1221,21 @@ static int ep_remove(struct eventpoll *ep, struct epitem *dpi)
*/ */
static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync) static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync)
{ {
int pwake = 0;
unsigned long flags; unsigned long flags;
struct epitem *dpi = EP_ITEM_FROM_WAIT(wait); struct epitem *epi = EP_ITEM_FROM_WAIT(wait);
struct eventpoll *ep = dpi->ep; struct eventpoll *ep = epi->ep;
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) dpi=%p ep=%p\n", DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n",
current, dpi->file, dpi, ep)); current, epi->file, epi, ep));
write_lock_irqsave(&ep->lock, flags); write_lock_irqsave(&ep->lock, flags);
/* If this file is already in the ready list we exit soon */ /* If this file is already in the ready list we exit soon */
if (EP_IS_LINKED(&dpi->rdllink)) if (EP_IS_LINKED(&epi->rdllink))
goto is_linked; goto is_linked;
list_add_tail(&dpi->rdllink, &ep->rdllist); list_add_tail(&epi->rdllink, &ep->rdllist);
is_linked: is_linked:
/* /*
...@@ -1135,9 +1245,14 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync) ...@@ -1135,9 +1245,14 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync)
if (waitqueue_active(&ep->wq)) if (waitqueue_active(&ep->wq))
wake_up(&ep->wq); wake_up(&ep->wq);
if (waitqueue_active(&ep->poll_wait)) if (waitqueue_active(&ep->poll_wait))
wake_up(&ep->poll_wait); pwake++;
write_unlock_irqrestore(&ep->lock, flags); write_unlock_irqrestore(&ep->lock, flags);
/* We have to call this outside the lock */
if (pwake)
ep_poll_safewake(&psw, &ep->poll_wait);
return 1; return 1;
} }
...@@ -1180,33 +1295,33 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) ...@@ -1180,33 +1295,33 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
* during the f_op->poll() call, we try to collect the maximum number of items * during the f_op->poll() call, we try to collect the maximum number of items
* by reducing the irqlock/irqunlock switching rate. * by reducing the irqlock/irqunlock switching rate.
*/ */
static int ep_collect_ready_items(struct eventpoll *ep, struct epitem **adpi, int maxdpi) static int ep_collect_ready_items(struct eventpoll *ep, struct epitem **aepi, int maxepi)
{ {
int ndpi; int nepi;
unsigned long flags; unsigned long flags;
struct list_head *lsthead = &ep->rdllist; struct list_head *lsthead = &ep->rdllist;
write_lock_irqsave(&ep->lock, flags); write_lock_irqsave(&ep->lock, flags);
for (ndpi = 0; ndpi < maxdpi && !list_empty(lsthead);) { for (nepi = 0; nepi < maxepi && !list_empty(lsthead);) {
struct epitem *dpi = list_entry(lsthead->next, struct epitem, rdllink); struct epitem *epi = list_entry(lsthead->next, struct epitem, rdllink);
/* Remove the item from the ready list */ /* Remove the item from the ready list */
EP_LIST_DEL(&dpi->rdllink); EP_LIST_DEL(&epi->rdllink);
/* /*
* We need to increase the usage count of the "struct epitem" because * We need to increase the usage count of the "struct epitem" because
* another thread might call EP_CTL_DEL on this target and make the * another thread might call EPOLL_CTL_DEL on this target and make the
* object to vanish underneath our nose. * object to vanish underneath our nose.
*/ */
ep_use_epitem(dpi); ep_use_epitem(epi);
adpi[ndpi++] = dpi; aepi[nepi++] = epi;
} }
write_unlock_irqrestore(&ep->lock, flags); write_unlock_irqrestore(&ep->lock, flags);
return ndpi; return nepi;
} }
...@@ -1215,28 +1330,28 @@ static int ep_collect_ready_items(struct eventpoll *ep, struct epitem **adpi, in ...@@ -1215,28 +1330,28 @@ static int ep_collect_ready_items(struct eventpoll *ep, struct epitem **adpi, in
* __copy_to_user() might sleep, and also f_op->poll() might reenable the IRQ * __copy_to_user() might sleep, and also f_op->poll() might reenable the IRQ
* because of the way poll() is traditionally implemented in Linux. * because of the way poll() is traditionally implemented in Linux.
*/ */
static int ep_send_events(struct eventpoll *ep, struct epitem **adpi, int ndpi, static int ep_send_events(struct eventpoll *ep, struct epitem **aepi, int nepi,
struct pollfd *events) struct epoll_event *events)
{ {
int i, eventcnt, eventbuf, revents; int i, eventcnt, eventbuf, revents;
struct epitem *dpi; struct epitem *epi;
struct pollfd pfd[EP_MAX_BUF_EVENTS]; struct epoll_event event[EP_MAX_BUF_EVENTS];
for (i = 0, eventcnt = 0, eventbuf = 0; i < ndpi; i++, adpi++) { for (i = 0, eventcnt = 0, eventbuf = 0; i < nepi; i++, aepi++) {
dpi = *adpi; epi = *aepi;
/* Get the ready file event set */ /* Get the ready file event set */
revents = dpi->file->f_op->poll(dpi->file, NULL); revents = epi->file->f_op->poll(epi->file, NULL);
if (revents & dpi->pfd.events) { if (revents & epi->event.events) {
pfd[eventbuf] = dpi->pfd; event[eventbuf] = epi->event;
pfd[eventbuf].revents = revents & pfd[eventbuf].events; event[eventbuf].events &= revents;
eventbuf++; eventbuf++;
if (eventbuf == EP_MAX_BUF_EVENTS) { if (eventbuf == EP_MAX_BUF_EVENTS) {
if (__copy_to_user(&events[eventcnt], pfd, if (__copy_to_user(&events[eventcnt], event,
eventbuf * sizeof(struct pollfd))) { eventbuf * sizeof(struct epoll_event))) {
for (; i < ndpi; i++, adpi++) for (; i < nepi; i++, aepi++)
ep_release_epitem(*adpi); ep_release_epitem(*aepi);
return -EFAULT; return -EFAULT;
} }
eventcnt += eventbuf; eventcnt += eventbuf;
...@@ -1244,12 +1359,12 @@ static int ep_send_events(struct eventpoll *ep, struct epitem **adpi, int ndpi, ...@@ -1244,12 +1359,12 @@ static int ep_send_events(struct eventpoll *ep, struct epitem **adpi, int ndpi,
} }
} }
ep_release_epitem(dpi); ep_release_epitem(epi);
} }
if (eventbuf) { if (eventbuf) {
if (__copy_to_user(&events[eventcnt], pfd, if (__copy_to_user(&events[eventcnt], event,
eventbuf * sizeof(struct pollfd))) eventbuf * sizeof(struct epoll_event)))
return -EFAULT; return -EFAULT;
eventcnt += eventbuf; eventcnt += eventbuf;
} }
...@@ -1261,10 +1376,10 @@ static int ep_send_events(struct eventpoll *ep, struct epitem **adpi, int ndpi, ...@@ -1261,10 +1376,10 @@ static int ep_send_events(struct eventpoll *ep, struct epitem **adpi, int ndpi,
/* /*
* Perform the transfer of events to user space. * Perform the transfer of events to user space.
*/ */
static int ep_events_transfer(struct eventpoll *ep, struct pollfd *events, int maxevents) static int ep_events_transfer(struct eventpoll *ep, struct epoll_event *events, int maxevents)
{ {
int eventcnt, ndpi, sdpi, maxdpi; int eventcnt, nepi, sepi, maxepi;
struct epitem *adpi[EP_MAX_COLLECT_ITEMS]; struct epitem *aepi[EP_MAX_COLLECT_ITEMS];
/* /*
* We need to lock this because we could be hit by * We need to lock this because we could be hit by
...@@ -1279,22 +1394,22 @@ static int ep_events_transfer(struct eventpoll *ep, struct pollfd *events, int m ...@@ -1279,22 +1394,22 @@ static int ep_events_transfer(struct eventpoll *ep, struct pollfd *events, int m
for (eventcnt = 0; eventcnt < maxevents;) { for (eventcnt = 0; eventcnt < maxevents;) {
/* Maximum items we can extract this time */ /* Maximum items we can extract this time */
maxdpi = min(EP_MAX_COLLECT_ITEMS, maxevents - eventcnt); maxepi = min(EP_MAX_COLLECT_ITEMS, maxevents - eventcnt);
/* Collect/extract ready items */ /* Collect/extract ready items */
ndpi = ep_collect_ready_items(ep, adpi, maxdpi); nepi = ep_collect_ready_items(ep, aepi, maxepi);
if (ndpi) { if (nepi) {
/* Send events to userspace */ /* Send events to userspace */
sdpi = ep_send_events(ep, adpi, ndpi, &events[eventcnt]); sepi = ep_send_events(ep, aepi, nepi, &events[eventcnt]);
if (sdpi < 0) { if (sepi < 0) {
up_read(&epsem); up_read(&epsem);
return sdpi; return sepi;
} }
eventcnt += sdpi; eventcnt += sepi;
} }
if (ndpi < maxdpi) if (nepi < maxepi)
break; break;
} }
...@@ -1304,7 +1419,7 @@ static int ep_events_transfer(struct eventpoll *ep, struct pollfd *events, int m ...@@ -1304,7 +1419,7 @@ static int ep_events_transfer(struct eventpoll *ep, struct pollfd *events, int m
} }
static int ep_poll(struct eventpoll *ep, struct pollfd *events, int maxevents, static int ep_poll(struct eventpoll *ep, struct epoll_event *events, int maxevents,
int timeout) int timeout)
{ {
int res, eavail; int res, eavail;
...@@ -1423,13 +1538,16 @@ static int __init eventpoll_init(void) ...@@ -1423,13 +1538,16 @@ static int __init eventpoll_init(void)
/* Initialize the semaphore used to syncronize the file cleanup code */ /* Initialize the semaphore used to syncronize the file cleanup code */
init_rwsem(&epsem); init_rwsem(&epsem);
/* Initialize the structure used to perform safe poll wait head wake ups */
ep_poll_safewake_init(&psw);
/* Allocates slab cache used to allocate "struct epitem" items */ /* Allocates slab cache used to allocate "struct epitem" items */
error = -ENOMEM; error = -ENOMEM;
dpi_cache = kmem_cache_create("eventpoll dpi", epi_cache = kmem_cache_create("eventpoll epi",
sizeof(struct epitem), sizeof(struct epitem),
0, 0,
SLAB_HWCACHE_ALIGN | DPI_SLAB_DEBUG, NULL, NULL); SLAB_HWCACHE_ALIGN | EPI_SLAB_DEBUG, NULL, NULL);
if (!dpi_cache) if (!epi_cache)
goto eexit_1; goto eexit_1;
/* Allocates slab cache used to allocate "struct eppoll_entry" */ /* Allocates slab cache used to allocate "struct eppoll_entry" */
...@@ -1437,7 +1555,7 @@ static int __init eventpoll_init(void) ...@@ -1437,7 +1555,7 @@ static int __init eventpoll_init(void)
pwq_cache = kmem_cache_create("eventpoll pwq", pwq_cache = kmem_cache_create("eventpoll pwq",
sizeof(struct eppoll_entry), sizeof(struct eppoll_entry),
0, 0,
DPI_SLAB_DEBUG, NULL, NULL); EPI_SLAB_DEBUG, NULL, NULL);
if (!pwq_cache) if (!pwq_cache)
goto eexit_2; goto eexit_2;
...@@ -1464,7 +1582,7 @@ static int __init eventpoll_init(void) ...@@ -1464,7 +1582,7 @@ static int __init eventpoll_init(void)
eexit_3: eexit_3:
kmem_cache_destroy(pwq_cache); kmem_cache_destroy(pwq_cache);
eexit_2: eexit_2:
kmem_cache_destroy(dpi_cache); kmem_cache_destroy(epi_cache);
eexit_1: eexit_1:
return error; return error;
...@@ -1477,7 +1595,7 @@ static void __exit eventpoll_exit(void) ...@@ -1477,7 +1595,7 @@ static void __exit eventpoll_exit(void)
unregister_filesystem(&eventpoll_fs_type); unregister_filesystem(&eventpoll_fs_type);
mntput(eventpoll_mnt); mntput(eventpoll_mnt);
kmem_cache_destroy(pwq_cache); kmem_cache_destroy(pwq_cache);
kmem_cache_destroy(dpi_cache); kmem_cache_destroy(epi_cache);
} }
module_init(eventpoll_init); module_init(eventpoll_init);
......
...@@ -259,9 +259,9 @@ ...@@ -259,9 +259,9 @@
#define __NR_free_hugepages 251 #define __NR_free_hugepages 251
#define __NR_exit_group 252 #define __NR_exit_group 252
#define __NR_lookup_dcookie 253 #define __NR_lookup_dcookie 253
#define __NR_sys_epoll_create 254 #define __NR_epoll_create 254
#define __NR_sys_epoll_ctl 255 #define __NR_epoll_ctl 255
#define __NR_sys_epoll_wait 256 #define __NR_epoll_wait 256
#define __NR_remap_file_pages 257 #define __NR_remap_file_pages 257
#define __NR_set_tid_address 258 #define __NR_set_tid_address 258
......
...@@ -240,9 +240,9 @@ ...@@ -240,9 +240,9 @@
#define __NR_free_hugepages 233 #define __NR_free_hugepages 233
#define __NR_exit_group 234 #define __NR_exit_group 234
#define __NR_lookup_dcookie 235 #define __NR_lookup_dcookie 235
#define __NR_sys_epoll_create 236 #define __NR_epoll_create 236
#define __NR_sys_epoll_ctl 237 #define __NR_epoll_ctl 237
#define __NR_sys_epoll_wait 238 #define __NR_epoll_wait 238
#define __NR_remap_file_pages 239 #define __NR_remap_file_pages 239
#define __NR(n) #n #define __NR(n) #n
......
...@@ -16,22 +16,25 @@ ...@@ -16,22 +16,25 @@
/* Valid opcodes to issue to sys_epoll_ctl() */ /* Valid opcodes to issue to sys_epoll_ctl() */
#define EP_CTL_ADD 1 #define EPOLL_CTL_ADD 1
#define EP_CTL_DEL 2 #define EPOLL_CTL_DEL 2
#define EP_CTL_MOD 3 #define EPOLL_CTL_MOD 3
struct epoll_event {
__u32 events;
__u64 data;
};
#ifdef __KERNEL__ #ifdef __KERNEL__
/* Forward declarations to avoid compiler errors */ /* Forward declarations to avoid compiler errors */
struct file; struct file;
struct pollfd;
/* Kernel space functions implementing the user space "epoll" API */ /* Kernel space functions implementing the user space "epoll" API */
asmlinkage int sys_epoll_create(int size); asmlinkage int sys_epoll_create(int size);
asmlinkage int sys_epoll_ctl(int epfd, int op, int fd, unsigned int events); asmlinkage int sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event *event);
asmlinkage int sys_epoll_wait(int epfd, struct pollfd *events, int maxevents, asmlinkage int sys_epoll_wait(int epfd, struct epoll_event *events, int maxevents,
int timeout); int timeout);
/* Used to initialize the epoll bits inside the "struct file" */ /* Used to initialize the epoll bits inside the "struct file" */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment