Commit 0bbed3be authored by Ingo Molnar's avatar Ingo Molnar

[PATCH] Thread-Local Storage (TLS) support

the following patch implements proper x86 TLS support in the Linux kernel,
via a new system-call, sys_set_thread_area():

   http://redhat.com/~mingo/tls-patches/tls-2.5.28-C6

a TLS test utility can be downloaded from:

    http://redhat.com/~mingo/tls-patches/tls_test.c

what is TLS? Thread Local Storage is a concept used by threading
abstractions - fast an efficient way to store per-thread local (but not
on-stack local) data. The __thread extension is already supported by gcc.

proper TLS support in compilers (and glibc/pthreads) is a bit problematic
on the x86 platform. There's only 8 general purpose registers available,
so on x86 we have to use segments to access the TLS. The approach used by
glibc so far was to set up a per-thread LDT entry to describe the TLS.
Besides the generic unrobustness of LDTs, this also introduced a limit:
the maximum number of LDT entries is 8192, so the maximum number of
threads per application is 8192.

this patch does it differently - the kernel keeps a specific per-thread
GDT entry that can be set up and modified by each thread:

     asmlinkage int sys_set_thread_area(unsigned int base,
               unsigned int limit, unsigned int flags)

the kernel, upon context-switch, modifies this GDT entry to match that of
the thread's TLS setting. This way user-space threaded code can access
per-thread data via this descriptor - by using the same, constant %gs (or
%gs) selector. The number of TLS areas is unlimited, and there is no
additional allocation overhead associated with TLS support.


the biggest problem preventing the introduction of this concept was
Linux's global shared GDT on SMP systems. The patch fixes this by
implementing a per-CPU GDT, which is also a nice context-switch speedup,
2-task lat_ctx context-switching got faster by about 5% on a dual Celeron
testbox. [ Could it be that a shared GDT is fundamentally suboptimal on
SMP? perhaps updating the 'accessed' bit in the DS/CS descriptors causes
some sort locked memory cycle overhead? ]

the GDT layout got simplified:

 *   0 - null
 *   1 - Thread-Local Storage (TLS) segment
 *   2 - kernel code segment
 *   3 - kernel data segment
 *   4 - user code segment              <==== new cacheline
 *   5 - user data segment
 *   6 - TSS
 *   7 - LDT
 *   8 - APM BIOS support               <==== new cacheline
 *   9 - APM BIOS support
 *  10 - APM BIOS support
 *  11 - APM BIOS support
 *  12 - PNPBIOS support                <==== new cacheline
 *  13 - PNPBIOS support
 *  14 - PNPBIOS support
 *  15 - PNPBIOS support
 *  16 - PNPBIOS support                <==== new cacheline
 *  17 - not used
 *  18 - not used
 *  19 - not used

set_thread_area() currently recognizes the following flags:

  #define TLS_FLAG_LIMIT_IN_PAGES         0x00000001
  #define TLS_FLAG_WRITABLE               0x00000002
  #define TLS_FLAG_CLEAR                  0x00000004

- in theory we could avoid the 'limit in pages' bit, but i wanted to
  preserve the flexibility to potentially enable the setting of
  byte-granularity stack segments for example. And unlimited segments
  (granularity = pages, limit = 0xfffff) might have a performance
  advantage on some CPUs. We could also automatically figure out the best
  possible granularity for a given limit - but i wanted to avoid this kind
  of guesswork. Some CPUs might have a plus for page-limit segments - who
  knows.

- The 'writable' flag is straightforward and could be useful to some
  applications.

- The 'clear' flag clears the TLS. [note that a base 0 limit 0 TLS is in
  fact legal, it's a single-byte segment at address 0.]

(the system-call does not expose any other segment options to user-space,
priviledge level is 3, the segment is 32-bit, etc. - it's using safe and
sane defaults.)

NOTE: the interface does not allow the changing of the TLS of another
thread on purpose - that would just complicate the interface (and
implementation) unnecesserily. Is there any good reason to allow the
setting of another thread's TLS?

NOTE2: non-pthreads glibc applications can call set_thread_area() to set
up a GDT entry just below the end of stack. We could use some sort of
default TLS area as well, but that would hard-code a given segment.
parent 2c0a3925
...@@ -1924,35 +1924,38 @@ static int __init apm_init(void) ...@@ -1924,35 +1924,38 @@ static int __init apm_init(void)
* that extends up to the end of page zero (that we have reserved). * that extends up to the end of page zero (that we have reserved).
* This is for buggy BIOS's that refer to (real mode) segment 0x40 * This is for buggy BIOS's that refer to (real mode) segment 0x40
* even though they are called in protected mode. * even though they are called in protected mode.
*
* NOTE: on SMP we call into the APM BIOS only on CPU#0, so it's
* enough to modify CPU#0's GDT.
*/ */
set_base(gdt[APM_40 >> 3], set_base(cpu_gdt_table[0][APM_40 >> 3],
__va((unsigned long)0x40 << 4)); __va((unsigned long)0x40 << 4));
_set_limit((char *)&gdt[APM_40 >> 3], 4095 - (0x40 << 4)); _set_limit((char *)&cpu_gdt_table[0][APM_40 >> 3], 4095 - (0x40 << 4));
apm_bios_entry.offset = apm_info.bios.offset; apm_bios_entry.offset = apm_info.bios.offset;
apm_bios_entry.segment = APM_CS; apm_bios_entry.segment = APM_CS;
set_base(gdt[APM_CS >> 3], set_base(cpu_gdt_table[0][APM_CS >> 3],
__va((unsigned long)apm_info.bios.cseg << 4)); __va((unsigned long)apm_info.bios.cseg << 4));
set_base(gdt[APM_CS_16 >> 3], set_base(cpu_gdt_table[0][APM_CS_16 >> 3],
__va((unsigned long)apm_info.bios.cseg_16 << 4)); __va((unsigned long)apm_info.bios.cseg_16 << 4));
set_base(gdt[APM_DS >> 3], set_base(cpu_gdt_table[0][APM_DS >> 3],
__va((unsigned long)apm_info.bios.dseg << 4)); __va((unsigned long)apm_info.bios.dseg << 4));
#ifndef APM_RELAX_SEGMENTS #ifndef APM_RELAX_SEGMENTS
if (apm_info.bios.version == 0x100) { if (apm_info.bios.version == 0x100) {
#endif #endif
/* For ASUS motherboard, Award BIOS rev 110 (and others?) */ /* For ASUS motherboard, Award BIOS rev 110 (and others?) */
_set_limit((char *)&gdt[APM_CS >> 3], 64 * 1024 - 1); _set_limit((char *)&cpu_gdt_table[0][APM_CS >> 3], 64 * 1024 - 1);
/* For some unknown machine. */ /* For some unknown machine. */
_set_limit((char *)&gdt[APM_CS_16 >> 3], 64 * 1024 - 1); _set_limit((char *)&cpu_gdt_table[0][APM_CS_16 >> 3], 64 * 1024 - 1);
/* For the DEC Hinote Ultra CT475 (and others?) */ /* For the DEC Hinote Ultra CT475 (and others?) */
_set_limit((char *)&gdt[APM_DS >> 3], 64 * 1024 - 1); _set_limit((char *)&cpu_gdt_table[0][APM_DS >> 3], 64 * 1024 - 1);
#ifndef APM_RELAX_SEGMENTS #ifndef APM_RELAX_SEGMENTS
} else { } else {
_set_limit((char *)&gdt[APM_CS >> 3], _set_limit((char *)&cpu_gdt_table[0][APM_CS >> 3],
(apm_info.bios.cseg_len - 1) & 0xffff); (apm_info.bios.cseg_len - 1) & 0xffff);
_set_limit((char *)&gdt[APM_CS_16 >> 3], _set_limit((char *)&cpu_gdt_table[0][APM_CS_16 >> 3],
(apm_info.bios.cseg_16_len - 1) & 0xffff); (apm_info.bios.cseg_16_len - 1) & 0xffff);
_set_limit((char *)&gdt[APM_DS >> 3], _set_limit((char *)&cpu_gdt_table[0][APM_DS >> 3],
(apm_info.bios.dseg_len - 1) & 0xffff); (apm_info.bios.dseg_len - 1) & 0xffff);
} }
#endif #endif
......
...@@ -421,14 +421,14 @@ void __init early_cpu_init(void) ...@@ -421,14 +421,14 @@ void __init early_cpu_init(void)
*/ */
void __init cpu_init (void) void __init cpu_init (void)
{ {
int nr = smp_processor_id(); int cpu = smp_processor_id();
struct tss_struct * t = &init_tss[nr]; struct tss_struct * t = init_tss + cpu;
if (test_and_set_bit(nr, &cpu_initialized)) { if (test_and_set_bit(cpu, &cpu_initialized)) {
printk(KERN_WARNING "CPU#%d already initialized!\n", nr); printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
for (;;) local_irq_enable(); for (;;) local_irq_enable();
} }
printk(KERN_INFO "Initializing CPU#%d\n", nr); printk(KERN_INFO "Initializing CPU#%d\n", cpu);
if (cpu_has_vme || cpu_has_tsc || cpu_has_de) if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
...@@ -441,7 +441,17 @@ void __init cpu_init (void) ...@@ -441,7 +441,17 @@ void __init cpu_init (void)
} }
#endif #endif
__asm__ __volatile__("lgdt %0": "=m" (gdt_descr)); /*
* Initialize the per-CPU GDT with the boot GDT,
* and set up the GDT descriptor:
*/
if (cpu) {
memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
cpu_gdt_descr[cpu].size = GDT_SIZE;
cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
}
__asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu]));
__asm__ __volatile__("lidt %0": "=m" (idt_descr)); __asm__ __volatile__("lidt %0": "=m" (idt_descr));
/* /*
...@@ -450,18 +460,18 @@ void __init cpu_init (void) ...@@ -450,18 +460,18 @@ void __init cpu_init (void)
__asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl"); __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
/* /*
* set up and load the per-CPU TSS and LDT * Set up and load the per-CPU TSS and LDT
*/ */
atomic_inc(&init_mm.mm_count); atomic_inc(&init_mm.mm_count);
current->active_mm = &init_mm; current->active_mm = &init_mm;
if(current->mm) if(current->mm)
BUG(); BUG();
enter_lazy_tlb(&init_mm, current, nr); enter_lazy_tlb(&init_mm, current, cpu);
t->esp0 = current->thread.esp0; t->esp0 = current->thread.esp0;
set_tss_desc(nr,t); set_tss_desc(cpu,t);
gdt_table[__TSS(nr)].b &= 0xfffffdff; cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
load_TR(nr); load_TR_desc();
load_LDT(&init_mm.context); load_LDT(&init_mm.context);
/* Clear %fs and %gs. */ /* Clear %fs and %gs. */
......
...@@ -753,6 +753,7 @@ ENTRY(sys_call_table) ...@@ -753,6 +753,7 @@ ENTRY(sys_call_table)
.long sys_futex /* 240 */ .long sys_futex /* 240 */
.long sys_sched_setaffinity .long sys_sched_setaffinity
.long sys_sched_getaffinity .long sys_sched_getaffinity
.long sys_set_thread_area
.rept NR_syscalls-(.-sys_call_table)/4 .rept NR_syscalls-(.-sys_call_table)/4
.long sys_ni_syscall .long sys_ni_syscall
......
...@@ -231,7 +231,7 @@ is386: movl $2,%ecx # set MP ...@@ -231,7 +231,7 @@ is386: movl $2,%ecx # set MP
call check_x87 call check_x87
incb ready incb ready
lgdt gdt_descr lgdt cpu_gdt_descr
lidt idt_descr lidt idt_descr
ljmp $(__KERNEL_CS),$1f ljmp $(__KERNEL_CS),$1f
1: movl $(__KERNEL_DS),%eax # reload all the segment registers 1: movl $(__KERNEL_DS),%eax # reload all the segment registers
...@@ -338,29 +338,27 @@ ignore_int: ...@@ -338,29 +338,27 @@ ignore_int:
iret iret
/* /*
* The interrupt descriptor table has room for 256 idt's, * The IDT and GDT 'descriptors' are a strange 48-bit object
* the global descriptor table is dependent on the number * only used by the lidt and lgdt instructions. They are not
* of tasks we can have.. * like usual segment descriptors - they consist of a 16-bit
* segment size, and 32-bit linear address value:
*/ */
#define IDT_ENTRIES 256
#define GDT_ENTRIES (__TSS(NR_CPUS))
.globl idt_descr
.globl idt .globl cpu_gdt_descr
.globl gdt
ALIGN ALIGN
.word 0
idt_descr: idt_descr:
.word IDT_ENTRIES*8-1 # idt contains 256 entries .word IDT_ENTRIES*8-1 # idt contains 256 entries
idt:
.long idt_table .long idt_table
.word 0 # boot GDT descriptor (later on used by CPU#0):
gdt_descr:
cpu_gdt_descr:
.word GDT_ENTRIES*8-1 .word GDT_ENTRIES*8-1
gdt: .long cpu_gdt_table
.long gdt_table
.fill NR_CPUS-1,6,0 # space for the other GDT descriptors
/* /*
* This is initialized to create an identity-mapping at 0-8M (for bootup * This is initialized to create an identity-mapping at 0-8M (for bootup
...@@ -418,15 +416,15 @@ ALIGN ...@@ -418,15 +416,15 @@ ALIGN
* NOTE! Make sure the gdt descriptor in head.S matches this if you * NOTE! Make sure the gdt descriptor in head.S matches this if you
* change anything. * change anything.
*/ */
ENTRY(gdt_table) ENTRY(cpu_gdt_table)
.quad 0x0000000000000000 /* NULL descriptor */ .quad 0x0000000000000000 /* NULL descriptor */
.quad 0x0000000000000000 /* not used */ .quad 0x0000000000000000 /* TLS descriptor */
.quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */ .quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */
.quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */ .quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */
.quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */ .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */
.quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */ .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */
.quad 0x0000000000000000 /* not used */ .quad 0x0000000000000000 /* TSS descriptor */
.quad 0x0000000000000000 /* not used */ .quad 0x0000000000000000 /* LDT descriptor */
/* /*
* The APM segments have byte granularity and their bases * The APM segments have byte granularity and their bases
* and limits are set at run time. * and limits are set at run time.
...@@ -444,5 +442,8 @@ ENTRY(gdt_table) ...@@ -444,5 +442,8 @@ ENTRY(gdt_table)
.quad 0x0000000000000000 /* 0x88 not used */ .quad 0x0000000000000000 /* 0x88 not used */
.quad 0x0000000000000000 /* 0x90 not used */ .quad 0x0000000000000000 /* 0x90 not used */
.quad 0x0000000000000000 /* 0x98 not used */ .quad 0x0000000000000000 /* 0x98 not used */
/* Per CPU segments */
.fill NR_CPUS*4,8,0 /* space for TSS's and LDT's */ #if CONFIG_SMP
.fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
#endif
...@@ -74,7 +74,6 @@ EXPORT_SYMBOL(pm_idle); ...@@ -74,7 +74,6 @@ EXPORT_SYMBOL(pm_idle);
EXPORT_SYMBOL(pm_power_off); EXPORT_SYMBOL(pm_power_off);
EXPORT_SYMBOL(get_cmos_time); EXPORT_SYMBOL(get_cmos_time);
EXPORT_SYMBOL(apm_info); EXPORT_SYMBOL(apm_info);
EXPORT_SYMBOL(gdt);
#ifdef CONFIG_DEBUG_IOVIRT #ifdef CONFIG_DEBUG_IOVIRT
EXPORT_SYMBOL(__io_virt_debug); EXPORT_SYMBOL(__io_virt_debug);
......
...@@ -662,7 +662,8 @@ void __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -662,7 +662,8 @@ void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{ {
struct thread_struct *prev = &prev_p->thread, struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread; *next = &next_p->thread;
struct tss_struct *tss = init_tss + smp_processor_id(); int cpu = smp_processor_id();
struct tss_struct *tss = init_tss + cpu;
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
...@@ -688,6 +689,14 @@ void __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -688,6 +689,14 @@ void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
loadsegment(gs, next->gs); loadsegment(gs, next->gs);
} }
/*
* Load the per-thread Thread-Local Storage descriptor.
*
* NOTE: it's faster to do the two stores unconditionally
* than to branch away.
*/
load_TLS_desc(next, cpu);
/* /*
* Now maybe reload the debug registers * Now maybe reload the debug registers
*/ */
...@@ -818,3 +827,58 @@ unsigned long get_wchan(struct task_struct *p) ...@@ -818,3 +827,58 @@ unsigned long get_wchan(struct task_struct *p)
} }
#undef last_sched #undef last_sched
#undef first_sched #undef first_sched
/*
* Set the Thread-Local Storage area:
*/
asmlinkage int sys_set_thread_area(unsigned int base, unsigned int limit, unsigned int flags)
{
struct thread_struct *t = &current->thread;
int limit_in_pages = 0, writable = 0;
int cpu;
/* do not allow unused flags */
if (flags & ~TLS_FLAGS_MASK)
return -EINVAL;
/* check limit */
if (limit & 0xfff00000)
return -EINVAL;
/*
* Clear the TLS?
*/
if (flags & TLS_FLAG_CLEAR) {
cpu = get_cpu();
t->tls_base = t->tls_limit = t->tls_flags = 0;
t->tls_desc.a = t->tls_desc.b = 0;
load_TLS_desc(t, cpu);
put_cpu();
return 0;
}
if (flags & TLS_FLAG_LIMIT_IN_PAGES)
limit_in_pages = 1;
if (flags & TLS_FLAG_WRITABLE)
writable = 1;
/*
* We must not get preempted while modifying the TLS.
*/
cpu = get_cpu();
t->tls_base = base;
t->tls_limit = limit;
t->tls_flags = flags;
t->tls_desc.a = ((base & 0x0000ffff) << 16) | (limit & 0x0ffff);
t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) |
(limit & 0xf0000) | (writable << 9) | (1 << 15) |
(1 << 22) | (limit_in_pages << 23) | 0x7000;
load_TLS_desc(t, cpu);
put_cpu();
return TLS_ENTRY*8 + 3;
}
...@@ -203,14 +203,13 @@ void do_suspend_lowlevel(int resume) ...@@ -203,14 +203,13 @@ void do_suspend_lowlevel(int resume)
void fix_processor_context(void) void fix_processor_context(void)
{ {
int nr = smp_processor_id(); int cpu = smp_processor_id();
struct tss_struct * t = &init_tss[nr]; struct tss_struct * t = init_tss + cpu;
set_tss_desc(nr,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */ set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */
gdt_table[__TSS(nr)].b &= 0xfffffdff; cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
load_TR(nr); /* This does ltr */
load_TR_desc(); /* This does ltr */
load_LDT(&current->mm->context); /* This does lldt */ load_LDT(&current->mm->context); /* This does lldt */
/* /*
......
...@@ -63,9 +63,14 @@ idt_48: ...@@ -63,9 +63,14 @@ idt_48:
.word 0 # idt limit = 0 .word 0 # idt limit = 0
.word 0, 0 # idt base = 0L .word 0, 0 # idt base = 0L
#
# NOTE: here we actually use CPU#0's GDT - but that is OK, we reload
# the proper GDT shortly after booting up the secondary CPUs.
#
gdt_48: gdt_48:
.word 0x0800 # gdt limit = 2048, 256 GDT entries .word 0x0800 # gdt limit = 2048, 256 GDT entries
.long gdt_table-__PAGE_OFFSET # gdt base = gdt (first SMP CPU) .long cpu_gdt_table-__PAGE_OFFSET # gdt base = gdt (first SMP CPU)
.globl trampoline_end .globl trampoline_end
trampoline_end: trampoline_end:
...@@ -833,37 +833,6 @@ static void __init set_call_gate(void *a, void *addr) ...@@ -833,37 +833,6 @@ static void __init set_call_gate(void *a, void *addr)
_set_gate(a,12,3,addr); _set_gate(a,12,3,addr);
} }
#define _set_seg_desc(gate_addr,type,dpl,base,limit) {\
*((gate_addr)+1) = ((base) & 0xff000000) | \
(((base) & 0x00ff0000)>>16) | \
((limit) & 0xf0000) | \
((dpl)<<13) | \
(0x00408000) | \
((type)<<8); \
*(gate_addr) = (((base) & 0x0000ffff)<<16) | \
((limit) & 0x0ffff); }
#define _set_tssldt_desc(n,addr,limit,type) \
__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
"movw %%ax,2(%2)\n\t" \
"rorl $16,%%eax\n\t" \
"movb %%al,4(%2)\n\t" \
"movb %4,5(%2)\n\t" \
"movb $0,6(%2)\n\t" \
"movb %%ah,7(%2)\n\t" \
"rorl $16,%%eax" \
: "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type))
void set_tss_desc(unsigned int n, void *addr)
{
_set_tssldt_desc(gdt_table+__TSS(n), (int)addr, 235, 0x89);
}
void set_ldt_desc(unsigned int n, void *addr, unsigned int size)
{
_set_tssldt_desc(gdt_table+__LDT(n), (int)addr, ((size << 3)-1), 0x82);
}
#ifdef CONFIG_X86_VISWS_APIC #ifdef CONFIG_X86_VISWS_APIC
/* /*
......
...@@ -4,72 +4,59 @@ ...@@ -4,72 +4,59 @@
#include <asm/ldt.h> #include <asm/ldt.h>
/* /*
* The layout of the GDT under Linux: * The layout of the per-CPU GDT under Linux:
* *
* 0 - null * 0 - null
* 1 - not used * 1 - Thread-Local Storage (TLS) segment
* 2 - kernel code segment * 2 - kernel code segment
* 3 - kernel data segment * 3 - kernel data segment
* 4 - user code segment <-- new cacheline * 4 - user code segment <==== new cacheline
* 5 - user data segment * 5 - user data segment
* 6 - not used * 6 - TSS
* 7 - not used * 7 - LDT
* 8 - APM BIOS support <-- new cacheline * 8 - APM BIOS support <==== new cacheline
* 9 - APM BIOS support * 9 - APM BIOS support
* 10 - APM BIOS support * 10 - APM BIOS support
* 11 - APM BIOS support * 11 - APM BIOS support
* 12 - PNPBIOS support * 12 - PNPBIOS support <==== new cacheline
* 13 - PNPBIOS support * 13 - PNPBIOS support
* 14 - PNPBIOS support * 14 - PNPBIOS support
* 15 - PNPBIOS support * 15 - PNPBIOS support
* 16 - PNPBIOS support * 16 - PNPBIOS support <==== new cacheline
* 17 - not used * 17 - not used
* 18 - not used * 18 - not used
* 19 - not used * 19 - not used
*/
#define TLS_ENTRY 1
#define TSS_ENTRY 6
#define LDT_ENTRY 7
/*
* The interrupt descriptor table has room for 256 idt's,
* the global descriptor table is dependent on the number
* of tasks we can have..
* *
* The TSS+LDT descriptors are spread out a bit so that every CPU * We pad the GDT to cacheline boundary.
* has an exclusive cacheline for the per-CPU TSS and LDT:
*
* 20 - CPU#0 TSS <-- new cacheline
* 21 - CPU#0 LDT
* 22 - not used
* 23 - not used
* 24 - CPU#1 TSS <-- new cacheline
* 25 - CPU#1 LDT
* 26 - not used
* 27 - not used
* ... NR_CPUS per-CPU TSS+LDT's if on SMP
*
* Entry into gdt where to find first TSS.
*/ */
#define __FIRST_TSS_ENTRY 20 #define IDT_ENTRIES 256
#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY+1) #define GDT_ENTRIES 20
#define __TSS(n) (((n)<<2) + __FIRST_TSS_ENTRY)
#define __LDT(n) (((n)<<2) + __FIRST_LDT_ENTRY)
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <asm/mmu.h> #include <asm/mmu.h>
struct desc_struct { #define GDT_SIZE (GDT_ENTRIES*sizeof(struct desc_struct))
unsigned long a,b;
};
extern struct desc_struct gdt_table[]; extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
extern struct desc_struct *idt, *gdt;
struct Xgt_desc_struct { struct Xgt_desc_struct {
unsigned short size; unsigned short size;
unsigned long address __attribute__((packed)); unsigned long address __attribute__((packed));
}; } __attribute__ ((packed));
#define idt_descr (*(struct Xgt_desc_struct *)((char *)&idt - 2)) extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
#define gdt_descr (*(struct Xgt_desc_struct *)((char *)&gdt - 2))
#define load_TR(n) __asm__ __volatile__("ltr %%ax"::"a" (__TSS(n)<<3)) #define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (TSS_ENTRY<<3))
#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (LDT_ENTRY<<3))
#define __load_LDT(n) __asm__ __volatile__("lldt %%ax"::"a" (__LDT(n)<<3))
/* /*
* This is the ldt that every process will get unless we need * This is the ldt that every process will get unless we need
...@@ -77,14 +64,43 @@ struct Xgt_desc_struct { ...@@ -77,14 +64,43 @@ struct Xgt_desc_struct {
*/ */
extern struct desc_struct default_ldt[]; extern struct desc_struct default_ldt[];
extern void set_intr_gate(unsigned int irq, void * addr); extern void set_intr_gate(unsigned int irq, void * addr);
extern void set_ldt_desc(unsigned int n, void *addr, unsigned int size);
extern void set_tss_desc(unsigned int n, void *addr); #define _set_tssldt_desc(n,addr,limit,type) \
__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
"movw %%ax,2(%2)\n\t" \
"rorl $16,%%eax\n\t" \
"movb %%al,4(%2)\n\t" \
"movb %4,5(%2)\n\t" \
"movb $0,6(%2)\n\t" \
"movb %%ah,7(%2)\n\t" \
"rorl $16,%%eax" \
: "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type))
static inline void set_tss_desc(unsigned int cpu, void *addr)
{
_set_tssldt_desc(&cpu_gdt_table[cpu][TSS_ENTRY], (int)addr, 235, 0x89);
}
static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
{
_set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82);
}
#define TLS_FLAGS_MASK 0x00000007
#define TLS_FLAG_LIMIT_IN_PAGES 0x00000001
#define TLS_FLAG_WRITABLE 0x00000002
#define TLS_FLAG_CLEAR 0x00000004
static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu)
{
cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc;
}
static inline void clear_LDT(void) static inline void clear_LDT(void)
{ {
int cpu = smp_processor_id(); set_ldt_desc(smp_processor_id(), &default_ldt[0], 5);
set_ldt_desc(cpu, &default_ldt[0], 5); load_LDT_desc();
__load_LDT(cpu);
} }
/* /*
...@@ -92,17 +108,16 @@ static inline void clear_LDT(void) ...@@ -92,17 +108,16 @@ static inline void clear_LDT(void)
*/ */
static inline void load_LDT (mm_context_t *pc) static inline void load_LDT (mm_context_t *pc)
{ {
int cpu = smp_processor_id();
void *segments = pc->ldt; void *segments = pc->ldt;
int count = pc->size; int count = pc->size;
if (!count) { if (likely(!count)) {
segments = &default_ldt[0]; segments = &default_ldt[0];
count = 5; count = 5;
} }
set_ldt_desc(cpu, segments, count); set_ldt_desc(smp_processor_id(), segments, count);
__load_LDT(cpu); load_LDT_desc();
} }
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
......
...@@ -17,7 +17,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm); ...@@ -17,7 +17,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu)
{ {
if(cpu_tlbstate[cpu].state == TLBSTATE_OK) if (cpu_tlbstate[cpu].state == TLBSTATE_OK)
cpu_tlbstate[cpu].state = TLBSTATE_LAZY; cpu_tlbstate[cpu].state = TLBSTATE_LAZY;
} }
#else #else
...@@ -40,18 +40,18 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, str ...@@ -40,18 +40,18 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, str
/* Re-load page tables */ /* Re-load page tables */
load_cr3(next->pgd); load_cr3(next->pgd);
/* load_LDT, if either the previous or next thread /*
* has a non-default LDT. * load the LDT, if the LDT is different:
*/ */
if (next->context.size+prev->context.size) if (unlikely(prev->context.ldt != next->context.ldt))
load_LDT(&next->context); load_LDT(&next->context);
} }
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
else { else {
cpu_tlbstate[cpu].state = TLBSTATE_OK; cpu_tlbstate[cpu].state = TLBSTATE_OK;
if(cpu_tlbstate[cpu].active_mm != next) if (cpu_tlbstate[cpu].active_mm != next)
BUG(); BUG();
if(!test_and_set_bit(cpu, &next->cpu_vm_mask)) { if (!test_and_set_bit(cpu, &next->cpu_vm_mask)) {
/* We were in lazy tlb mode and leave_mm disabled /* We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload %cr3. * tlb flush IPI delivery. We must reload %cr3.
*/ */
......
...@@ -18,6 +18,10 @@ ...@@ -18,6 +18,10 @@
#include <linux/config.h> #include <linux/config.h>
#include <linux/threads.h> #include <linux/threads.h>
struct desc_struct {
unsigned long a,b;
};
/* /*
* Default implementation of macro that returns current * Default implementation of macro that returns current
* instruction pointer ("program counter"). * instruction pointer ("program counter").
...@@ -372,6 +376,9 @@ struct thread_struct { ...@@ -372,6 +376,9 @@ struct thread_struct {
unsigned long v86flags, v86mask, v86mode, saved_esp0; unsigned long v86flags, v86mask, v86mode, saved_esp0;
/* IO permissions */ /* IO permissions */
unsigned long *ts_io_bitmap; unsigned long *ts_io_bitmap;
/* TLS info and cached descriptor */
unsigned int tls_base, tls_limit, tls_flags;
struct desc_struct tls_desc;
}; };
#define INIT_THREAD { \ #define INIT_THREAD { \
...@@ -395,7 +402,7 @@ struct thread_struct { ...@@ -395,7 +402,7 @@ struct thread_struct {
0,0,0,0, /* esp,ebp,esi,edi */ \ 0,0,0,0, /* esp,ebp,esi,edi */ \
0,0,0,0,0,0, /* es,cs,ss */ \ 0,0,0,0,0,0, /* es,cs,ss */ \
0,0,0,0,0,0, /* ds,fs,gs */ \ 0,0,0,0,0,0, /* ds,fs,gs */ \
__LDT(0),0, /* ldt */ \ LDT_ENTRY,0, /* ldt */ \
0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \ 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \
{~0, } /* ioperm */ \ {~0, } /* ioperm */ \
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment