Commit 30724dcd authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

[PATCH] poll/select fast path

This patch streamlines poll and select by adding fast paths for a
small number of descriptors passed. The majority of polls/selects
seem to be of this nature. The main saving comes from not allocating
two pages for wait queue and table, but from using stack allocation
(upto 256bytes) when only a few descriptors are needed. This makes
it as fast again as 2.0 and even a bit faster because the wait queue
page allocation is avoided too (except when the drivers overflow it)

select also skips a lot faster over big holes and avoids the separate
pass of determining the max. number of descriptors in the bitmap.

A typical linux system saves a considerable amount of unswappable memory
with this patch, because it usually has 10+ daemons hanging around in poll or
select with each two pages allocated for data and wait queue.

Some other cleanups.
parent 86403107
This diff is collapsed.
...@@ -10,13 +10,32 @@ ...@@ -10,13 +10,32 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
struct poll_table_page; #define POLL_INLINE_BYTES 256
#define FAST_SELECT_MAX 128
#define FAST_POLL_MAX 128
#define POLL_INLINE_ENTRIES (1+(POLL_INLINE_BYTES / sizeof(struct poll_table_entry)))
struct poll_table_entry {
struct file * filp;
wait_queue_t wait;
wait_queue_head_t * wait_address;
};
struct poll_table_page {
struct poll_table_page * next;
struct poll_table_entry * entry;
struct poll_table_entry entries[0];
};
typedef struct poll_table_struct { typedef struct poll_table_struct {
int error; int error;
struct poll_table_page * table; struct poll_table_page * table;
struct poll_table_page inline_page;
struct poll_table_entry inline_table[POLL_INLINE_ENTRIES];
} poll_table; } poll_table;
#define POLL_INLINE_TABLE_LEN (sizeof(poll_table) - offsetof(poll_table, inline_page))
extern void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p); extern void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p);
static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
...@@ -30,6 +49,7 @@ static inline void poll_initwait(poll_table* pt) ...@@ -30,6 +49,7 @@ static inline void poll_initwait(poll_table* pt)
pt->error = 0; pt->error = 0;
pt->table = NULL; pt->table = NULL;
} }
extern void poll_freewait(poll_table* pt); extern void poll_freewait(poll_table* pt);
...@@ -49,27 +69,6 @@ typedef struct { ...@@ -49,27 +69,6 @@ typedef struct {
#define FDS_LONGS(nr) (((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG) #define FDS_LONGS(nr) (((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG)
#define FDS_BYTES(nr) (FDS_LONGS(nr)*sizeof(long)) #define FDS_BYTES(nr) (FDS_LONGS(nr)*sizeof(long))
/*
* We do a VERIFY_WRITE here even though we are only reading this time:
* we'll write to it eventually..
*
* Use "unsigned long" accesses to let user-mode fd_set's be long-aligned.
*/
static inline
int get_fd_set(unsigned long nr, void *ufdset, unsigned long *fdset)
{
nr = FDS_BYTES(nr);
if (ufdset) {
int error;
error = verify_area(VERIFY_WRITE, ufdset, nr);
if (!error && __copy_from_user(fdset, ufdset, nr))
error = -EFAULT;
return error;
}
memset(fdset, 0, nr);
return 0;
}
static inline static inline
void set_fd_set(unsigned long nr, void *ufdset, unsigned long *fdset) void set_fd_set(unsigned long nr, void *ufdset, unsigned long *fdset)
{ {
...@@ -77,12 +76,6 @@ void set_fd_set(unsigned long nr, void *ufdset, unsigned long *fdset) ...@@ -77,12 +76,6 @@ void set_fd_set(unsigned long nr, void *ufdset, unsigned long *fdset)
__copy_to_user(ufdset, fdset, FDS_BYTES(nr)); __copy_to_user(ufdset, fdset, FDS_BYTES(nr));
} }
static inline
void zero_fd_set(unsigned long nr, unsigned long *fdset)
{
memset(fdset, 0, FDS_BYTES(nr));
}
extern int do_select(int n, fd_set_bits *fds, long *timeout); extern int do_select(int n, fd_set_bits *fds, long *timeout);
#endif /* KERNEL */ #endif /* KERNEL */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment