Commit 6a2a196f authored by Dave Jones's avatar Dave Jones Committed by Jaroslav Kysela

[PATCH] Dynamic LDT sizing.

Originally from Manfred Spraul.

* dynamically grow the LDT
Every app that's linked against libpthread right now allocates a full 64
kB LDT, without proper error handling, and always from the vmalloc area
parent 68575eb8
...@@ -12,37 +12,137 @@ ...@@ -12,37 +12,137 @@
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/smp_lock.h> #include <linux/smp_lock.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/slab.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/ldt.h> #include <asm/ldt.h>
#include <asm/desc.h> #include <asm/desc.h>
#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
static void flush_ldt(void *mm)
{
if (current->mm)
load_LDT(&current->mm->context);
}
#endif
static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
{
void *oldldt;
void *newldt;
int oldsize;
if (mincount <= pc->size)
return 0;
oldsize = pc->size;
mincount = (mincount+511)&(~511);
if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
else
newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
if (!newldt)
return -ENOMEM;
if (oldsize)
memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
oldldt = pc->ldt;
memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
wmb();
pc->ldt = newldt;
pc->size = mincount;
if (reload) {
load_LDT(pc);
#ifdef CONFIG_SMP
if (current->mm->cpu_vm_mask != (1<<smp_processor_id()))
smp_call_function(flush_ldt, 0, 1, 1);
#endif
}
wmb();
if (oldsize) {
if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(oldldt);
else
kfree(oldldt);
}
return 0;
}
static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
{
int err = alloc_ldt(new, old->size, 0);
if (err < 0) {
printk(KERN_WARNING "ldt allocation failed\n");
new->size = 0;
return err;
}
memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
return 0;
}
/* /*
* read_ldt() is not really atomic - this is not a problem since * we do not have to muck with descriptors here, that is
* synchronization of reads and writes done to the LDT has to be * done in switch_mm() as needed.
* assured by user-space anyway. Writes are atomic, to protect
* the security checks done on new descriptors.
*/ */
int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
struct mm_struct * old_mm;
int retval = 0;
init_MUTEX(&mm->context.sem);
mm->context.size = 0;
old_mm = current->mm;
if (old_mm && old_mm->context.size > 0) {
down(&old_mm->context.sem);
retval = copy_ldt(&mm->context, &old_mm->context);
up(&old_mm->context.sem);
}
return retval;
}
/*
* No need to lock the MM as we are the last user
*/
void release_segments(struct mm_struct *mm)
{
if (mm->context.size) {
clear_LDT();
if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(mm->context.ldt);
else
kfree(mm->context.ldt);
mm->context.size = 0;
}
}
static int read_ldt(void * ptr, unsigned long bytecount) static int read_ldt(void * ptr, unsigned long bytecount)
{ {
int err; int err;
unsigned long size; unsigned long size;
struct mm_struct * mm = current->mm; struct mm_struct * mm = current->mm;
err = 0; if (!mm->context.size)
if (!mm->context.segments) return 0;
goto out; if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
size = LDT_ENTRIES*LDT_ENTRY_SIZE; down(&mm->context.sem);
size = mm->context.size*LDT_ENTRY_SIZE;
if (size > bytecount) if (size > bytecount)
size = bytecount; size = bytecount;
err = size; err = 0;
if (copy_to_user(ptr, mm->context.segments, size)) if (copy_to_user(ptr, mm->context.ldt, size))
err = -EFAULT; err = -EFAULT;
out: up(&mm->context.sem);
return err; if (err < 0)
return err;
if (size != bytecount) {
/* zero-fill the rest */
clear_user(ptr+size, bytecount-size);
}
return bytecount;
} }
static int read_default_ldt(void * ptr, unsigned long bytecount) static int read_default_ldt(void * ptr, unsigned long bytecount)
...@@ -53,7 +153,7 @@ static int read_default_ldt(void * ptr, unsigned long bytecount) ...@@ -53,7 +153,7 @@ static int read_default_ldt(void * ptr, unsigned long bytecount)
err = 0; err = 0;
address = &default_ldt[0]; address = &default_ldt[0];
size = sizeof(struct desc_struct); size = 5*sizeof(struct desc_struct);
if (size > bytecount) if (size > bytecount)
size = bytecount; size = bytecount;
...@@ -88,24 +188,14 @@ static int write_ldt(void * ptr, unsigned long bytecount, int oldmode) ...@@ -88,24 +188,14 @@ static int write_ldt(void * ptr, unsigned long bytecount, int oldmode)
goto out; goto out;
} }
/* down(&mm->context.sem);
* the GDT index of the LDT is allocated dynamically, and is if (ldt_info.entry_number >= mm->context.size) {
* limited by MAX_LDT_DESCRIPTORS. error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
*/ if (error < 0)
down_write(&mm->mmap_sem);
if (!mm->context.segments) {
void * segments = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
error = -ENOMEM;
if (!segments)
goto out_unlock; goto out_unlock;
memset(segments, 0, LDT_ENTRIES*LDT_ENTRY_SIZE);
wmb();
mm->context.segments = segments;
mm->context.cpuvalid = 1UL << smp_processor_id();
load_LDT(mm);
} }
lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.segments); lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
/* Allow LDTs to be cleared by the user. */ /* Allow LDTs to be cleared by the user. */
if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
...@@ -143,7 +233,7 @@ static int write_ldt(void * ptr, unsigned long bytecount, int oldmode) ...@@ -143,7 +233,7 @@ static int write_ldt(void * ptr, unsigned long bytecount, int oldmode)
error = 0; error = 0;
out_unlock: out_unlock:
up_write(&mm->mmap_sem); up(&mm->context.sem);
out: out:
return error; return error;
} }
......
...@@ -475,23 +475,6 @@ void show_regs(struct pt_regs * regs) ...@@ -475,23 +475,6 @@ void show_regs(struct pt_regs * regs)
show_trace(&regs->esp); show_trace(&regs->esp);
} }
/*
* No need to lock the MM as we are the last user
*/
void release_segments(struct mm_struct *mm)
{
void * ldt = mm->context.segments;
/*
* free the LDT
*/
if (ldt) {
mm->context.segments = NULL;
clear_LDT();
vfree(ldt);
}
}
/* /*
* Create a kernel thread * Create a kernel thread
*/ */
...@@ -544,42 +527,17 @@ void flush_thread(void) ...@@ -544,42 +527,17 @@ void flush_thread(void)
void release_thread(struct task_struct *dead_task) void release_thread(struct task_struct *dead_task)
{ {
if (dead_task->mm) { if (dead_task->mm) {
void * ldt = dead_task->mm->context.segments;
// temporary debugging check // temporary debugging check
if (ldt) { if (dead_task->mm->context.size) {
printk("WARNING: dead process %8s still has LDT? <%p>\n", printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
dead_task->comm, ldt); dead_task->comm,
dead_task->mm->context.ldt,
dead_task->mm->context.size);
BUG(); BUG();
} }
} }
} }
/*
* we do not have to muck with descriptors here, that is
* done in switch_mm() as needed.
*/
void copy_segments(struct task_struct *p, struct mm_struct *new_mm)
{
struct mm_struct * old_mm;
void *old_ldt, *ldt;
ldt = NULL;
old_mm = current->mm;
if (old_mm && (old_ldt = old_mm->context.segments) != NULL) {
/*
* Completely new LDT, we initialize it from the parent:
*/
ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
if (!ldt)
printk(KERN_WARNING "ldt allocation failed\n");
else
memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
}
new_mm->context.segments = ldt;
new_mm->context.cpuvalid = ~0UL; /* valid on all CPU's - they can't have stale data */
}
/* /*
* Save a segment. * Save a segment.
*/ */
......
...@@ -2875,7 +2875,7 @@ void __init cpu_init (void) ...@@ -2875,7 +2875,7 @@ void __init cpu_init (void)
set_tss_desc(nr,t); set_tss_desc(nr,t);
gdt_table[__TSS(nr)].b &= 0xfffffdff; gdt_table[__TSS(nr)].b &= 0xfffffdff;
load_TR(nr); load_TR(nr);
load_LDT(&init_mm); load_LDT(&init_mm.context);
/* Clear %fs and %gs. */ /* Clear %fs and %gs. */
asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
of the stack frame of math_emulate() */ of the stack frame of math_emulate() */
#define SETUP_DATA_AREA(arg) FPU_info = (struct info *) &arg #define SETUP_DATA_AREA(arg) FPU_info = (struct info *) &arg
#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.segments)[(s) >> 3]) #define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3])
#define SEG_D_SIZE(x) ((x).b & (3 << 21)) #define SEG_D_SIZE(x) ((x).b & (3 << 21))
#define SEG_G_BIT(x) ((x).b & (1 << 23)) #define SEG_G_BIT(x) ((x).b & (1 << 23))
#define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1) #define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1)
......
...@@ -87,13 +87,13 @@ static inline void clear_LDT(void) ...@@ -87,13 +87,13 @@ static inline void clear_LDT(void)
/* /*
* load one particular LDT into the current CPU * load one particular LDT into the current CPU
*/ */
static inline void load_LDT (struct mm_struct *mm) static inline void load_LDT (mm_context_t *pc)
{ {
int cpu = smp_processor_id(); int cpu = smp_processor_id();
void *segments = mm->context.segments; void *segments = pc->ldt;
int count = LDT_ENTRIES; int count = pc->size;
if (!segments) { if (!count) {
segments = &default_ldt[0]; segments = &default_ldt[0];
count = 5; count = 5;
} }
......
...@@ -4,10 +4,13 @@ ...@@ -4,10 +4,13 @@
/* /*
* The i386 doesn't have a mmu context, but * The i386 doesn't have a mmu context, but
* we put the segment information here. * we put the segment information here.
*
* cpu_vm_mask is used to optimize ldt flushing.
*/ */
typedef struct { typedef struct {
void *segments; int size;
unsigned long cpuvalid; struct semaphore sem;
void * ldt;
} mm_context_t; } mm_context_t;
#endif #endif
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
* possibly do the LDT unload here? * possibly do the LDT unload here?
*/ */
#define destroy_context(mm) do { } while(0) #define destroy_context(mm) do { } while(0)
#define init_new_context(tsk,mm) 0 int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
...@@ -31,19 +31,20 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, str ...@@ -31,19 +31,20 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, str
if (likely(prev != next)) { if (likely(prev != next)) {
/* stop flush ipis for the previous mm */ /* stop flush ipis for the previous mm */
clear_bit(cpu, &prev->cpu_vm_mask); clear_bit(cpu, &prev->cpu_vm_mask);
/*
* Re-load LDT if necessary
*/
if (unlikely(prev->context.segments != next->context.segments))
load_LDT(next);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
cpu_tlbstate[cpu].state = TLBSTATE_OK; cpu_tlbstate[cpu].state = TLBSTATE_OK;
cpu_tlbstate[cpu].active_mm = next; cpu_tlbstate[cpu].active_mm = next;
#endif #endif
set_bit(cpu, &next->cpu_vm_mask); set_bit(cpu, &next->cpu_vm_mask);
set_bit(cpu, &next->context.cpuvalid);
/* Re-load page tables */ /* Re-load page tables */
asm volatile("movl %0,%%cr3": :"r" (__pa(next->pgd))); asm volatile("movl %0,%%cr3": :"r" (__pa(next->pgd)));
/* load_LDT, if either the previous or next thread
* has a non-default LDT.
*/
if (next->context.size+prev->context.size)
load_LDT(&next->context);
} }
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
else { else {
...@@ -55,9 +56,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, str ...@@ -55,9 +56,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, str
* tlb flush IPI delivery. We must flush our tlb. * tlb flush IPI delivery. We must flush our tlb.
*/ */
local_flush_tlb(); local_flush_tlb();
load_LDT(&next->context);
} }
if (!test_and_set_bit(cpu, &next->context.cpuvalid))
load_LDT(next);
} }
#endif #endif
} }
......
...@@ -429,8 +429,7 @@ extern void release_thread(struct task_struct *); ...@@ -429,8 +429,7 @@ extern void release_thread(struct task_struct *);
*/ */
extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
/* Copy and release all segment info associated with a VM */ /* Release all segment info associated with a VM */
extern void copy_segments(struct task_struct *p, struct mm_struct * mm);
extern void release_segments(struct mm_struct * mm); extern void release_segments(struct mm_struct * mm);
extern unsigned long thread_saved_pc(struct task_struct *tsk); extern unsigned long thread_saved_pc(struct task_struct *tsk);
......
...@@ -394,11 +394,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) ...@@ -394,11 +394,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
if (retval) if (retval)
goto free_pt; goto free_pt;
/*
* child gets a private LDT (if there was an LDT in the parent)
*/
copy_segments(tsk, mm);
if (init_new_context(tsk,mm)) if (init_new_context(tsk,mm))
goto free_pt; goto free_pt;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment