Commit 34ddc81a authored by Linus Torvalds's avatar Linus Torvalds

i387: re-introduce FPU state preloading at context switch time

After all the FPU state cleanups and finally finding the problem that
caused all our FPU save/restore problems, this re-introduces the
preloading of FPU state that was removed in commit b3b0870e ("i387:
do not preload FPU state at task switch time").

However, instead of simply reverting the removal, this reimplements
preloading with several fixes, most notably

 - properly abstracted as a true FPU state switch, rather than as
   open-coded save and restore with various hacks.

   In particular, implementing it as a proper FPU state switch allows us
   to optimize the CR0.TS flag accesses: there is no reason to set the
   TS bit only to then almost immediately clear it again.  CR0 accesses
   are quite slow and expensive, don't flip the bit back and forth for
   no good reason.

 - Make sure that the same model works for both x86-32 and x86-64, so
   that there are no gratuitous differences between the two due to the
   way they save and restore segment state differently due to
   architectural differences that really don't matter to the FPU state.

 - Avoid exposing the "preload" state to the context switch routines,
   and in particular allow the concept of lazy state restore: if nothing
   else has used the FPU in the meantime, and the process is still on
   the same CPU, we can avoid restoring state from memory entirely, just
   re-expose the state that is still in the FPU unit.

   That optimized lazy restore isn't actually implemented here, but the
   infrastructure is set up for it.  Of course, older CPU's that use
   'fnsave' to save the state cannot take advantage of this, since the
   state saving also trashes the state.

In other words, there is now an actual _design_ to the FPU state saving,
rather than just random historical baggage.  Hopefully it's easier to
follow as a result.
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent f94edacf
...@@ -29,6 +29,7 @@ extern unsigned int sig_xstate_size; ...@@ -29,6 +29,7 @@ extern unsigned int sig_xstate_size;
extern void fpu_init(void); extern void fpu_init(void);
extern void mxcsr_feature_mask_init(void); extern void mxcsr_feature_mask_init(void);
extern int init_fpu(struct task_struct *child); extern int init_fpu(struct task_struct *child);
extern void __math_state_restore(struct task_struct *);
extern void math_state_restore(void); extern void math_state_restore(void);
extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
...@@ -212,9 +213,10 @@ static inline void fpu_fxsave(struct fpu *fpu) ...@@ -212,9 +213,10 @@ static inline void fpu_fxsave(struct fpu *fpu)
#endif /* CONFIG_X86_64 */ #endif /* CONFIG_X86_64 */
/* /*
* These must be called with preempt disabled * These must be called with preempt disabled. Returns
* 'true' if the FPU state is still intact.
*/ */
static inline void fpu_save_init(struct fpu *fpu) static inline int fpu_save_init(struct fpu *fpu)
{ {
if (use_xsave()) { if (use_xsave()) {
fpu_xsave(fpu); fpu_xsave(fpu);
...@@ -223,22 +225,33 @@ static inline void fpu_save_init(struct fpu *fpu) ...@@ -223,22 +225,33 @@ static inline void fpu_save_init(struct fpu *fpu)
* xsave header may indicate the init state of the FP. * xsave header may indicate the init state of the FP.
*/ */
if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
return; return 1;
} else if (use_fxsr()) { } else if (use_fxsr()) {
fpu_fxsave(fpu); fpu_fxsave(fpu);
} else { } else {
asm volatile("fnsave %[fx]; fwait" asm volatile("fnsave %[fx]; fwait"
: [fx] "=m" (fpu->state->fsave)); : [fx] "=m" (fpu->state->fsave));
return; return 0;
} }
if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) /*
* If exceptions are pending, we need to clear them so
* that we don't randomly get exceptions later.
*
* FIXME! Is this perhaps only true for the old-style
* irq13 case? Maybe we could leave the x87 state
* intact otherwise?
*/
if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
asm volatile("fnclex"); asm volatile("fnclex");
return 0;
}
return 1;
} }
static inline void __save_init_fpu(struct task_struct *tsk) static inline int __save_init_fpu(struct task_struct *tsk)
{ {
fpu_save_init(&tsk->thread.fpu); return fpu_save_init(&tsk->thread.fpu);
} }
static inline int fpu_fxrstor_checking(struct fpu *fpu) static inline int fpu_fxrstor_checking(struct fpu *fpu)
...@@ -301,20 +314,79 @@ static inline void __thread_fpu_begin(struct task_struct *tsk) ...@@ -301,20 +314,79 @@ static inline void __thread_fpu_begin(struct task_struct *tsk)
} }
/* /*
* Signal frame handlers... * FPU state switching for scheduling.
*
* This is a two-stage process:
*
* - switch_fpu_prepare() saves the old state and
* sets the new state of the CR0.TS bit. This is
* done within the context of the old process.
*
* - switch_fpu_finish() restores the new state as
* necessary.
*/ */
extern int save_i387_xstate(void __user *buf); typedef struct { int preload; } fpu_switch_t;
extern int restore_i387_xstate(void __user *buf);
/*
* FIXME! We could do a totally lazy restore, but we need to
* add a per-cpu "this was the task that last touched the FPU
* on this CPU" variable, and the task needs to have a "I last
* touched the FPU on this CPU" and check them.
*
* We don't do that yet, so "fpu_lazy_restore()" always returns
* false, but some day..
*/
#define fpu_lazy_restore(tsk) (0)
#define fpu_lazy_state_intact(tsk) do { } while (0)
static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new)
{
fpu_switch_t fpu;
fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
if (__thread_has_fpu(old)) {
if (__save_init_fpu(old))
fpu_lazy_state_intact(old);
__thread_clear_has_fpu(old);
old->fpu_counter++;
/* Don't change CR0.TS if we just switch! */
if (fpu.preload) {
__thread_set_has_fpu(new);
prefetch(new->thread.fpu.state);
} else
stts();
} else {
old->fpu_counter = 0;
if (fpu.preload) {
if (fpu_lazy_restore(new))
fpu.preload = 0;
else
prefetch(new->thread.fpu.state);
__thread_fpu_begin(new);
}
}
return fpu;
}
static inline void __unlazy_fpu(struct task_struct *tsk) /*
* By the time this gets called, we've already cleared CR0.TS and
* given the process the FPU if we are going to preload the FPU
* state - all we need to do is to conditionally restore the register
* state itself.
*/
static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
{ {
if (__thread_has_fpu(tsk)) { if (fpu.preload)
__save_init_fpu(tsk); __math_state_restore(new);
__thread_fpu_end(tsk);
} else
tsk->fpu_counter = 0;
} }
/*
* Signal frame handlers...
*/
extern int save_i387_xstate(void __user *buf);
extern int restore_i387_xstate(void __user *buf);
static inline void __clear_fpu(struct task_struct *tsk) static inline void __clear_fpu(struct task_struct *tsk)
{ {
if (__thread_has_fpu(tsk)) { if (__thread_has_fpu(tsk)) {
...@@ -474,7 +546,11 @@ static inline void save_init_fpu(struct task_struct *tsk) ...@@ -474,7 +546,11 @@ static inline void save_init_fpu(struct task_struct *tsk)
static inline void unlazy_fpu(struct task_struct *tsk) static inline void unlazy_fpu(struct task_struct *tsk)
{ {
preempt_disable(); preempt_disable();
__unlazy_fpu(tsk); if (__thread_has_fpu(tsk)) {
__save_init_fpu(tsk);
__thread_fpu_end(tsk);
} else
tsk->fpu_counter = 0;
preempt_enable(); preempt_enable();
} }
......
...@@ -299,10 +299,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -299,10 +299,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*next = &next_p->thread; *next = &next_p->thread;
int cpu = smp_processor_id(); int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu); struct tss_struct *tss = &per_cpu(init_tss, cpu);
fpu_switch_t fpu;
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
__unlazy_fpu(prev_p); fpu = switch_fpu_prepare(prev_p, next_p);
/* /*
* Reload esp0. * Reload esp0.
...@@ -357,6 +358,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -357,6 +358,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
if (prev->gs | next->gs) if (prev->gs | next->gs)
lazy_load_gs(next->gs); lazy_load_gs(next->gs);
switch_fpu_finish(next_p, fpu);
percpu_write(current_task, next_p); percpu_write(current_task, next_p);
return prev_p; return prev_p;
......
...@@ -386,8 +386,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -386,8 +386,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
int cpu = smp_processor_id(); int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu); struct tss_struct *tss = &per_cpu(init_tss, cpu);
unsigned fsindex, gsindex; unsigned fsindex, gsindex;
fpu_switch_t fpu;
__unlazy_fpu(prev_p); fpu = switch_fpu_prepare(prev_p, next_p);
/* /*
* Reload esp0, LDT and the page table pointer: * Reload esp0, LDT and the page table pointer:
...@@ -457,6 +458,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -457,6 +458,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
wrmsrl(MSR_KERNEL_GS_BASE, next->gs); wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
prev->gsindex = gsindex; prev->gsindex = gsindex;
switch_fpu_finish(next_p, fpu);
/* /*
* Switch the PDA and FPU contexts. * Switch the PDA and FPU contexts.
*/ */
......
...@@ -570,6 +570,37 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) ...@@ -570,6 +570,37 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
{ {
} }
/*
* This gets called with the process already owning the
* FPU state, and with CR0.TS cleared. It just needs to
* restore the FPU register state.
*/
void __math_state_restore(struct task_struct *tsk)
{
/* We need a safe address that is cheap to find and that is already
in L1. We've just brought in "tsk->thread.has_fpu", so use that */
#define safe_address (tsk->thread.has_fpu)
/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
is pending. Clear the x87 state here by setting it to fixed
values. safe_address is a random variable that should be in L1 */
alternative_input(
ASM_NOP8 ASM_NOP2,
"emms\n\t" /* clear stack tags */
"fildl %P[addr]", /* set F?P to defined value */
X86_FEATURE_FXSAVE_LEAK,
[addr] "m" (safe_address));
/*
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
*/
if (unlikely(restore_fpu_checking(tsk))) {
__thread_fpu_end(tsk);
force_sig(SIGSEGV, tsk);
return;
}
}
/* /*
* 'math_state_restore()' saves the current math information in the * 'math_state_restore()' saves the current math information in the
* old math state array, and gets the new ones from the current task * old math state array, and gets the new ones from the current task
...@@ -584,10 +615,6 @@ void math_state_restore(void) ...@@ -584,10 +615,6 @@ void math_state_restore(void)
{ {
struct task_struct *tsk = current; struct task_struct *tsk = current;
/* We need a safe address that is cheap to find and that is already
in L1. We're just bringing in "tsk->thread.has_fpu", so use that */
#define safe_address (tsk->thread.has_fpu)
if (!tsk_used_math(tsk)) { if (!tsk_used_math(tsk)) {
local_irq_enable(); local_irq_enable();
/* /*
...@@ -604,25 +631,7 @@ void math_state_restore(void) ...@@ -604,25 +631,7 @@ void math_state_restore(void)
} }
__thread_fpu_begin(tsk); __thread_fpu_begin(tsk);
__math_state_restore(tsk);
/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
is pending. Clear the x87 state here by setting it to fixed
values. safe_address is a random variable that should be in L1 */
alternative_input(
ASM_NOP8 ASM_NOP2,
"emms\n\t" /* clear stack tags */
"fildl %P[addr]", /* set F?P to defined value */
X86_FEATURE_FXSAVE_LEAK,
[addr] "m" (safe_address));
/*
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
*/
if (unlikely(restore_fpu_checking(tsk))) {
__thread_fpu_end(tsk);
force_sig(SIGSEGV, tsk);
return;
}
tsk->fpu_counter++; tsk->fpu_counter++;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment