Commit dd826030 authored by Ingo Molnar's avatar Ingo Molnar Committed by Linus Torvalds

[PATCH] x86 TSS: io port caching

There's one additional step we can do ontop of the ports-max code to get rid
of copying in X.org's case: cache the last task that set up the IO bitmap. 
This means we can set the offset to invalid and keep the IO bitmap of that
task, and switch back to a valid offset (without any copying) when switching
back to that task.  (or do a copy if there is another ioperm task we switch
to.)

I've attached ioport-cache-2.6.8.1.patch that implements this. When
there's a single active ioperm() using task in the system then the
context-switch overhead is very low and constant:

 # ./ioperm-latency
 default no ioperm:             scheduling latency: 2478 cycles
 turning on port 80 ioperm:     scheduling latency: 2499 cycles
 turning on port 65535 ioperm:  scheduling latency: 2481 cycles

This single-ioperm-user situation matches 99% of the actual ioperm()
usage scenarios and gets rid of any copying whatsoever - without relying
on any fault mechanism. I can see no advantage of the GPF approach over
this patch.
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 9642f757
......@@ -56,7 +56,7 @@ static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int ex
*/
asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
{
unsigned int i, max_long, bytes, bytes_updated;
unsigned long i, max_long, bytes, bytes_updated;
struct thread_struct * t = &current->thread;
struct tss_struct * tss;
unsigned long *bitmap;
......@@ -107,6 +107,9 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
/* Update the TSS: */
memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);
tss->io_bitmap_max = bytes;
tss->io_bitmap_owner = &current->thread;
tss->io_bitmap_base = IO_BITMAP_OFFSET;
put_cpu();
......
......@@ -301,8 +301,11 @@ void exit_thread(void)
/*
* Careful, clear this in the TSS too:
*/
memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
memset(tss->io_bitmap, 0xff, tss->io_bitmap_max);
t->io_bitmap_max = 0;
tss->io_bitmap_owner = NULL;
tss->io_bitmap_max = 0;
tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
put_cpu();
}
}
......@@ -472,6 +475,38 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
return 1;
}
static inline void
handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss)
{
if (!next->io_bitmap_ptr) {
/*
* Disable the bitmap via an invalid offset. We still cache
* the previous bitmap owner and the IO bitmap contents:
*/
tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
return;
}
if (likely(next == tss->io_bitmap_owner)) {
/*
* Previous owner of the bitmap (hence the bitmap content)
* matches the next task, we dont have to do anything but
* to set a valid offset in the TSS:
*/
tss->io_bitmap_base = IO_BITMAP_OFFSET;
return;
}
/*
* The IO bitmap in the TSS needs updating: copy the relevant
* range of the new task's IO bitmap. Normally this is 128 bytes
* or less:
*/
memcpy(tss->io_bitmap, next->io_bitmap_ptr,
max(tss->io_bitmap_max, next->io_bitmap_max));
tss->io_bitmap_max = next->io_bitmap_max;
tss->io_bitmap_owner = next;
tss->io_bitmap_base = IO_BITMAP_OFFSET;
}
/*
* This special macro can be used to load a debugging register
*/
......@@ -556,20 +591,9 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
loaddebug(next, 7);
}
if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
if (next->io_bitmap_ptr)
/*
* Copy the relevant range of the IO bitmap.
* Normally this is 128 bytes or less:
*/
memcpy(tss->io_bitmap, next->io_bitmap_ptr,
max(prev->io_bitmap_max, next->io_bitmap_max));
else
/*
* Clear any possible leftover bits:
*/
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr))
handle_io_bitmap(next, tss);
return prev_p;
}
......
......@@ -360,6 +360,8 @@ typedef struct {
unsigned long seg;
} mm_segment_t;
struct thread_struct;
struct tss_struct {
unsigned short back_link,__blh;
unsigned long esp0;
......@@ -391,10 +393,15 @@ struct tss_struct {
* be within the limit.
*/
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
/*
* Cache the current maximum and the last task that used the bitmap:
*/
unsigned long io_bitmap_max;
struct thread_struct *io_bitmap_owner;
/*
* pads the TSS to be cacheline-aligned (size is 0x100)
*/
unsigned long __cacheline_filler[37];
unsigned long __cacheline_filler[35];
/*
* .. and then another 0x100 bytes for emergency kernel stack
*/
......@@ -426,7 +433,7 @@ struct thread_struct {
/* IO permissions */
unsigned long *io_bitmap_ptr;
/* max allowed port in the bitmap, in bytes: */
unsigned int io_bitmap_max;
unsigned long io_bitmap_max;
};
#define INIT_THREAD { \
......@@ -446,7 +453,7 @@ struct thread_struct {
.ss0 = __KERNEL_DS, \
.ss1 = __KERNEL_CS, \
.ldt = GDT_ENTRY_LDT, \
.io_bitmap_base = offsetof(struct tss_struct,io_bitmap), \
.io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
.io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment