Commit c5bedc68 authored by Ingo Molnar's avatar Ingo Molnar

x86/fpu: Get rid of PF_USED_MATH usage, convert it to fpu->fpstate_active

Introduce a simple fpu->fpstate_active flag in the fpu context data structure
and use that instead of PF_USED_MATH in task->flags.

Testing for this flag byte should be slightly more efficient than
testing a bit in a bitmask, but the main advantage is that most
FPU functions can now be performed on a 'struct fpu' alone, they
don't need access to 'struct task_struct' anymore.

There's a slight linecount increase, mostly due to the 'fpu' local
variables and due to extra comments. The local variables will go away
once we move most of the FPU methods to pure 'struct fpu' parameters.
Reviewed-by: default avatarBorislav Petkov <bp@alien8.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent af7f8721
...@@ -307,6 +307,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, ...@@ -307,6 +307,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
size_t frame_size, size_t frame_size,
void __user **fpstate) void __user **fpstate)
{ {
struct fpu *fpu = &current->thread.fpu;
unsigned long sp; unsigned long sp;
/* Default to using normal stack */ /* Default to using normal stack */
...@@ -321,7 +322,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, ...@@ -321,7 +322,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
ksig->ka.sa.sa_restorer) ksig->ka.sa.sa_restorer)
sp = (unsigned long) ksig->ka.sa.sa_restorer; sp = (unsigned long) ksig->ka.sa.sa_restorer;
if (current->flags & PF_USED_MATH) { if (fpu->fpstate_active) {
unsigned long fx_aligned, math_size; unsigned long fx_aligned, math_size;
sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size); sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size);
......
...@@ -375,7 +375,7 @@ static inline void drop_fpu(struct task_struct *tsk) ...@@ -375,7 +375,7 @@ static inline void drop_fpu(struct task_struct *tsk)
__thread_fpu_end(fpu); __thread_fpu_end(fpu);
} }
tsk->flags &= ~PF_USED_MATH; fpu->fpstate_active = 0;
preempt_enable(); preempt_enable();
} }
...@@ -424,7 +424,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta ...@@ -424,7 +424,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
* If the task has used the math, pre-load the FPU on xsave processors * If the task has used the math, pre-load the FPU on xsave processors
* or if the past 5 consecutive context-switches used math. * or if the past 5 consecutive context-switches used math.
*/ */
fpu.preload = (new->flags & PF_USED_MATH) && fpu.preload = new_fpu->fpstate_active &&
(use_eager_fpu() || new->thread.fpu.counter > 5); (use_eager_fpu() || new->thread.fpu.counter > 5);
if (old_fpu->has_fpu) { if (old_fpu->has_fpu) {
......
...@@ -137,6 +137,12 @@ struct fpu { ...@@ -137,6 +137,12 @@ struct fpu {
* deal with bursty apps that only use the FPU for a short time: * deal with bursty apps that only use the FPU for a short time:
*/ */
unsigned char counter; unsigned char counter;
/*
* This flag indicates whether this context is fpstate_active: if the task is
* not running then we can restore from this context, if the task
* is running then we should save into this context.
*/
unsigned char fpstate_active;
}; };
#endif /* _ASM_X86_FPU_H */ #endif /* _ASM_X86_FPU_H */
...@@ -385,6 +385,10 @@ struct thread_struct { ...@@ -385,6 +385,10 @@ struct thread_struct {
unsigned long fs; unsigned long fs;
#endif #endif
unsigned long gs; unsigned long gs;
/* Floating point and extended processor state */
struct fpu fpu;
/* Save middle states of ptrace breakpoints */ /* Save middle states of ptrace breakpoints */
struct perf_event *ptrace_bps[HBP_NUM]; struct perf_event *ptrace_bps[HBP_NUM];
/* Debug status used for traps, single steps, etc... */ /* Debug status used for traps, single steps, etc... */
...@@ -395,8 +399,6 @@ struct thread_struct { ...@@ -395,8 +399,6 @@ struct thread_struct {
unsigned long cr2; unsigned long cr2;
unsigned long trap_nr; unsigned long trap_nr;
unsigned long error_code; unsigned long error_code;
/* floating point and extended processor state */
struct fpu fpu;
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
/* Virtual 86 mode info */ /* Virtual 86 mode info */
struct vm86_struct __user *vm86_info; struct vm86_struct __user *vm86_info;
......
...@@ -236,14 +236,17 @@ static void fpu_copy(struct task_struct *dst, struct task_struct *src) ...@@ -236,14 +236,17 @@ static void fpu_copy(struct task_struct *dst, struct task_struct *src)
int fpu__copy(struct task_struct *dst, struct task_struct *src) int fpu__copy(struct task_struct *dst, struct task_struct *src)
{ {
struct fpu *dst_fpu = &dst->thread.fpu;
struct fpu *src_fpu = &src->thread.fpu;
dst->thread.fpu.counter = 0; dst->thread.fpu.counter = 0;
dst->thread.fpu.has_fpu = 0; dst->thread.fpu.has_fpu = 0;
dst->thread.fpu.state = NULL; dst->thread.fpu.state = NULL;
task_disable_lazy_fpu_restore(dst); task_disable_lazy_fpu_restore(dst);
if (src->flags & PF_USED_MATH) { if (src_fpu->fpstate_active) {
int err = fpstate_alloc(&dst->thread.fpu); int err = fpstate_alloc(dst_fpu);
if (err) if (err)
return err; return err;
...@@ -260,11 +263,12 @@ int fpu__copy(struct task_struct *dst, struct task_struct *src) ...@@ -260,11 +263,12 @@ int fpu__copy(struct task_struct *dst, struct task_struct *src)
*/ */
int fpstate_alloc_init(struct task_struct *curr) int fpstate_alloc_init(struct task_struct *curr)
{ {
struct fpu *fpu = &curr->thread.fpu;
int ret; int ret;
if (WARN_ON_ONCE(curr != current)) if (WARN_ON_ONCE(curr != current))
return -EINVAL; return -EINVAL;
if (WARN_ON_ONCE(curr->flags & PF_USED_MATH)) if (WARN_ON_ONCE(fpu->fpstate_active))
return -EINVAL; return -EINVAL;
/* /*
...@@ -277,7 +281,7 @@ int fpstate_alloc_init(struct task_struct *curr) ...@@ -277,7 +281,7 @@ int fpstate_alloc_init(struct task_struct *curr)
fpstate_init(&curr->thread.fpu); fpstate_init(&curr->thread.fpu);
/* Safe to do for the current task: */ /* Safe to do for the current task: */
curr->flags |= PF_USED_MATH; fpu->fpstate_active = 1;
return 0; return 0;
} }
...@@ -308,12 +312,13 @@ EXPORT_SYMBOL_GPL(fpstate_alloc_init); ...@@ -308,12 +312,13 @@ EXPORT_SYMBOL_GPL(fpstate_alloc_init);
*/ */
static int fpu__unlazy_stopped(struct task_struct *child) static int fpu__unlazy_stopped(struct task_struct *child)
{ {
struct fpu *child_fpu = &child->thread.fpu;
int ret; int ret;
if (WARN_ON_ONCE(child == current)) if (WARN_ON_ONCE(child == current))
return -EINVAL; return -EINVAL;
if (child->flags & PF_USED_MATH) { if (child_fpu->fpstate_active) {
task_disable_lazy_fpu_restore(child); task_disable_lazy_fpu_restore(child);
return 0; return 0;
} }
...@@ -328,7 +333,7 @@ static int fpu__unlazy_stopped(struct task_struct *child) ...@@ -328,7 +333,7 @@ static int fpu__unlazy_stopped(struct task_struct *child)
fpstate_init(&child->thread.fpu); fpstate_init(&child->thread.fpu);
/* Safe to do for stopped child tasks: */ /* Safe to do for stopped child tasks: */
child->flags |= PF_USED_MATH; child_fpu->fpstate_active = 1;
return 0; return 0;
} }
...@@ -348,7 +353,7 @@ void fpu__restore(void) ...@@ -348,7 +353,7 @@ void fpu__restore(void)
struct task_struct *tsk = current; struct task_struct *tsk = current;
struct fpu *fpu = &tsk->thread.fpu; struct fpu *fpu = &tsk->thread.fpu;
if (!(tsk->flags & PF_USED_MATH)) { if (!fpu->fpstate_active) {
local_irq_enable(); local_irq_enable();
/* /*
* does a slab alloc which can sleep * does a slab alloc which can sleep
...@@ -378,6 +383,8 @@ EXPORT_SYMBOL_GPL(fpu__restore); ...@@ -378,6 +383,8 @@ EXPORT_SYMBOL_GPL(fpu__restore);
void fpu__flush_thread(struct task_struct *tsk) void fpu__flush_thread(struct task_struct *tsk)
{ {
struct fpu *fpu = &tsk->thread.fpu;
WARN_ON(tsk != current); WARN_ON(tsk != current);
if (!use_eager_fpu()) { if (!use_eager_fpu()) {
...@@ -385,7 +392,7 @@ void fpu__flush_thread(struct task_struct *tsk) ...@@ -385,7 +392,7 @@ void fpu__flush_thread(struct task_struct *tsk)
drop_fpu(tsk); drop_fpu(tsk);
fpstate_free(&tsk->thread.fpu); fpstate_free(&tsk->thread.fpu);
} else { } else {
if (!(tsk->flags & PF_USED_MATH)) { if (!fpu->fpstate_active) {
/* kthread execs. TODO: cleanup this horror. */ /* kthread execs. TODO: cleanup this horror. */
if (WARN_ON(fpstate_alloc_init(tsk))) if (WARN_ON(fpstate_alloc_init(tsk)))
force_sig(SIGKILL, tsk); force_sig(SIGKILL, tsk);
...@@ -402,12 +409,16 @@ void fpu__flush_thread(struct task_struct *tsk) ...@@ -402,12 +409,16 @@ void fpu__flush_thread(struct task_struct *tsk)
*/ */
int fpregs_active(struct task_struct *target, const struct user_regset *regset) int fpregs_active(struct task_struct *target, const struct user_regset *regset)
{ {
return (target->flags & PF_USED_MATH) ? regset->n : 0; struct fpu *target_fpu = &target->thread.fpu;
return target_fpu->fpstate_active ? regset->n : 0;
} }
int xfpregs_active(struct task_struct *target, const struct user_regset *regset) int xfpregs_active(struct task_struct *target, const struct user_regset *regset)
{ {
return (cpu_has_fxsr && (target->flags & PF_USED_MATH)) ? regset->n : 0; struct fpu *target_fpu = &target->thread.fpu;
return (cpu_has_fxsr && target_fpu->fpstate_active) ? regset->n : 0;
} }
int xfpregs_get(struct task_struct *target, const struct user_regset *regset, int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
...@@ -733,16 +744,17 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, ...@@ -733,16 +744,17 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
* struct user_i387_struct) but is in fact only used for 32-bit * struct user_i387_struct) but is in fact only used for 32-bit
* dumps, so on 64-bit it is really struct user_i387_ia32_struct. * dumps, so on 64-bit it is really struct user_i387_ia32_struct.
*/ */
int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu) int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu)
{ {
struct task_struct *tsk = current; struct task_struct *tsk = current;
struct fpu *fpu = &tsk->thread.fpu;
int fpvalid; int fpvalid;
fpvalid = !!(tsk->flags & PF_USED_MATH); fpvalid = fpu->fpstate_active;
if (fpvalid) if (fpvalid)
fpvalid = !fpregs_get(tsk, NULL, fpvalid = !fpregs_get(tsk, NULL,
0, sizeof(struct user_i387_ia32_struct), 0, sizeof(struct user_i387_ia32_struct),
fpu, NULL); ufpu, NULL);
return fpvalid; return fpvalid;
} }
......
...@@ -334,6 +334,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) ...@@ -334,6 +334,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
{ {
int ia32_fxstate = (buf != buf_fx); int ia32_fxstate = (buf != buf_fx);
struct task_struct *tsk = current; struct task_struct *tsk = current;
struct fpu *fpu = &tsk->thread.fpu;
int state_size = xstate_size; int state_size = xstate_size;
u64 xstate_bv = 0; u64 xstate_bv = 0;
int fx_only = 0; int fx_only = 0;
...@@ -349,7 +350,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) ...@@ -349,7 +350,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
if (!access_ok(VERIFY_READ, buf, size)) if (!access_ok(VERIFY_READ, buf, size))
return -EACCES; return -EACCES;
if (!(tsk->flags & PF_USED_MATH) && fpstate_alloc_init(tsk)) if (!fpu->fpstate_active && fpstate_alloc_init(tsk))
return -1; return -1;
if (!static_cpu_has(X86_FEATURE_FPU)) if (!static_cpu_has(X86_FEATURE_FPU))
...@@ -384,12 +385,12 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) ...@@ -384,12 +385,12 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
int err = 0; int err = 0;
/* /*
* Drop the current fpu which clears PF_USED_MATH. This ensures * Drop the current fpu which clears fpu->fpstate_active. This ensures
* that any context-switch during the copy of the new state, * that any context-switch during the copy of the new state,
* avoids the intermediate state from getting restored/saved. * avoids the intermediate state from getting restored/saved.
* Thus avoiding the new restored state from getting corrupted. * Thus avoiding the new restored state from getting corrupted.
* We will be ready to restore/save the state only after * We will be ready to restore/save the state only after
* PF_USED_MATH is again set. * fpu->fpstate_active is again set.
*/ */
drop_fpu(tsk); drop_fpu(tsk);
...@@ -401,7 +402,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) ...@@ -401,7 +402,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only);
} }
tsk->flags |= PF_USED_MATH; fpu->fpstate_active = 1;
if (use_eager_fpu()) { if (use_eager_fpu()) {
preempt_disable(); preempt_disable();
fpu__restore(); fpu__restore();
...@@ -685,7 +686,7 @@ void xsave_init(void) ...@@ -685,7 +686,7 @@ void xsave_init(void)
*/ */
void __init_refok eager_fpu_init(void) void __init_refok eager_fpu_init(void)
{ {
WARN_ON(current->flags & PF_USED_MATH); WARN_ON(current->thread.fpu.fpstate_active);
current_thread_info()->status = 0; current_thread_info()->status = 0;
if (eagerfpu == ENABLE) if (eagerfpu == ENABLE)
......
...@@ -198,6 +198,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, ...@@ -198,6 +198,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
unsigned long sp = regs->sp; unsigned long sp = regs->sp;
unsigned long buf_fx = 0; unsigned long buf_fx = 0;
int onsigstack = on_sig_stack(sp); int onsigstack = on_sig_stack(sp);
struct fpu *fpu = &current->thread.fpu;
/* redzone */ /* redzone */
if (config_enabled(CONFIG_X86_64)) if (config_enabled(CONFIG_X86_64))
...@@ -217,7 +218,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, ...@@ -217,7 +218,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
} }
} }
if (current->flags & PF_USED_MATH) { if (fpu->fpstate_active) {
sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32), sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32),
&buf_fx, &math_size); &buf_fx, &math_size);
*fpstate = (void __user *)sp; *fpstate = (void __user *)sp;
...@@ -233,7 +234,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, ...@@ -233,7 +234,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
return (void __user *)-1L; return (void __user *)-1L;
/* save i387 and extended state */ /* save i387 and extended state */
if ((current->flags & PF_USED_MATH) && if (fpu->fpstate_active &&
save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0) save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0)
return (void __user *)-1L; return (void __user *)-1L;
...@@ -616,6 +617,7 @@ static void ...@@ -616,6 +617,7 @@ static void
handle_signal(struct ksignal *ksig, struct pt_regs *regs) handle_signal(struct ksignal *ksig, struct pt_regs *regs)
{ {
bool stepping, failed; bool stepping, failed;
struct fpu *fpu = &current->thread.fpu;
/* Are we from a system call? */ /* Are we from a system call? */
if (syscall_get_nr(current, regs) >= 0) { if (syscall_get_nr(current, regs) >= 0) {
...@@ -664,7 +666,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) ...@@ -664,7 +666,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
/* /*
* Ensure the signal handler starts with the new fpu state. * Ensure the signal handler starts with the new fpu state.
*/ */
if (current->flags & PF_USED_MATH) if (fpu->fpstate_active)
fpu_reset_state(current); fpu_reset_state(current);
} }
signal_setup_done(failed, ksig, stepping); signal_setup_done(failed, ksig, stepping);
......
...@@ -6597,10 +6597,11 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) ...@@ -6597,10 +6597,11 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{ {
struct fpu *fpu = &current->thread.fpu;
int r; int r;
sigset_t sigsaved; sigset_t sigsaved;
if (!(current->flags & PF_USED_MATH) && fpstate_alloc_init(current)) if (!fpu->fpstate_active && fpstate_alloc_init(current))
return -ENOMEM; return -ENOMEM;
if (vcpu->sigset_active) if (vcpu->sigset_active)
......
...@@ -147,8 +147,9 @@ void math_emulate(struct math_emu_info *info) ...@@ -147,8 +147,9 @@ void math_emulate(struct math_emu_info *info)
unsigned long code_base = 0; unsigned long code_base = 0;
unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ unsigned long code_limit = 0; /* Initialized to stop compiler warnings */
struct desc_struct code_descriptor; struct desc_struct code_descriptor;
struct fpu *fpu = &current->thread.fpu;
if (!(current->flags & PF_USED_MATH)) { if (!fpu->fpstate_active) {
if (fpstate_alloc_init(current)) { if (fpstate_alloc_init(current)) {
do_group_exit(SIGKILL); do_group_exit(SIGKILL);
return; return;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment