Commit ecc026bf authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fpu updates from Ingo Molnar:
 "The biggest change in terms of impact is the changing of the FPU
  context switch model to 'eagerfpu' for all CPU types, via: commit
  58122bf1: "x86/fpu: Default eagerfpu=on on all CPUs"

  This makes all FPU saves and restores synchronous and makes the FPU
  code a lot more obvious to read.  In the next cycle, if this change is
  problem free, we'll remove the old lazy FPU restore code altogether.

  This change flushed out some old bugs, which should all be fixed by
  now, BYMMV"

* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/fpu: Default eagerfpu=on on all CPUs
  x86/fpu: Speed up lazy FPU restores slightly
  x86/fpu: Fold fpu_copy() into fpu__copy()
  x86/fpu: Fix FNSAVE usage in eagerfpu mode
  x86/fpu: Fix math emulation in eager fpu mode
parents fa53c489 14ddde78
...@@ -590,7 +590,8 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) ...@@ -590,7 +590,8 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
* If the task has used the math, pre-load the FPU on xsave processors * If the task has used the math, pre-load the FPU on xsave processors
* or if the past 5 consecutive context-switches used math. * or if the past 5 consecutive context-switches used math.
*/ */
fpu.preload = new_fpu->fpstate_active && fpu.preload = static_cpu_has(X86_FEATURE_FPU) &&
new_fpu->fpstate_active &&
(use_eager_fpu() || new_fpu->counter > 5); (use_eager_fpu() || new_fpu->counter > 5);
if (old_fpu->fpregs_active) { if (old_fpu->fpregs_active) {
......
...@@ -114,6 +114,10 @@ void __kernel_fpu_begin(void) ...@@ -114,6 +114,10 @@ void __kernel_fpu_begin(void)
kernel_fpu_disable(); kernel_fpu_disable();
if (fpu->fpregs_active) { if (fpu->fpregs_active) {
/*
* Ignore return value -- we don't care if reg state
* is clobbered.
*/
copy_fpregs_to_fpstate(fpu); copy_fpregs_to_fpstate(fpu);
} else { } else {
this_cpu_write(fpu_fpregs_owner_ctx, NULL); this_cpu_write(fpu_fpregs_owner_ctx, NULL);
...@@ -189,9 +193,13 @@ void fpu__save(struct fpu *fpu) ...@@ -189,9 +193,13 @@ void fpu__save(struct fpu *fpu)
preempt_disable(); preempt_disable();
if (fpu->fpregs_active) { if (fpu->fpregs_active) {
if (!copy_fpregs_to_fpstate(fpu)) if (!copy_fpregs_to_fpstate(fpu)) {
if (use_eager_fpu())
copy_kernel_to_fpregs(&fpu->state);
else
fpregs_deactivate(fpu); fpregs_deactivate(fpu);
} }
}
preempt_enable(); preempt_enable();
} }
EXPORT_SYMBOL_GPL(fpu__save); EXPORT_SYMBOL_GPL(fpu__save);
...@@ -223,14 +231,15 @@ void fpstate_init(union fpregs_state *state) ...@@ -223,14 +231,15 @@ void fpstate_init(union fpregs_state *state)
} }
EXPORT_SYMBOL_GPL(fpstate_init); EXPORT_SYMBOL_GPL(fpstate_init);
/* int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
* Copy the current task's FPU state to a new task's FPU context.
*
* In both the 'eager' and the 'lazy' case we save hardware registers
* directly to the destination buffer.
*/
static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
{ {
dst_fpu->counter = 0;
dst_fpu->fpregs_active = 0;
dst_fpu->last_cpu = -1;
if (!src_fpu->fpstate_active || !cpu_has_fpu)
return 0;
WARN_ON_FPU(src_fpu != &current->thread.fpu); WARN_ON_FPU(src_fpu != &current->thread.fpu);
/* /*
...@@ -243,10 +252,9 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) ...@@ -243,10 +252,9 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
/* /*
* Save current FPU registers directly into the child * Save current FPU registers directly into the child
* FPU context, without any memory-to-memory copying. * FPU context, without any memory-to-memory copying.
* * In lazy mode, if the FPU context isn't loaded into
* If the FPU context got destroyed in the process (FNSAVE * fpregs, CR0.TS will be set and do_device_not_available
* done on old CPUs) then copy it back into the source * will load the FPU context.
* context and mark the current task for lazy restore.
* *
* We have to do all this with preemption disabled, * We have to do all this with preemption disabled,
* mostly because of the FNSAVE case, because in that * mostly because of the FNSAVE case, because in that
...@@ -259,19 +267,13 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) ...@@ -259,19 +267,13 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
preempt_disable(); preempt_disable();
if (!copy_fpregs_to_fpstate(dst_fpu)) { if (!copy_fpregs_to_fpstate(dst_fpu)) {
memcpy(&src_fpu->state, &dst_fpu->state, xstate_size); memcpy(&src_fpu->state, &dst_fpu->state, xstate_size);
if (use_eager_fpu())
copy_kernel_to_fpregs(&src_fpu->state);
else
fpregs_deactivate(src_fpu); fpregs_deactivate(src_fpu);
} }
preempt_enable(); preempt_enable();
}
int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
{
dst_fpu->counter = 0;
dst_fpu->fpregs_active = 0;
dst_fpu->last_cpu = -1;
if (src_fpu->fpstate_active && cpu_has_fpu)
fpu_copy(dst_fpu, src_fpu);
return 0; return 0;
} }
...@@ -425,7 +427,7 @@ void fpu__clear(struct fpu *fpu) ...@@ -425,7 +427,7 @@ void fpu__clear(struct fpu *fpu)
{ {
WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */ WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
if (!use_eager_fpu()) { if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) {
/* FPU state will be reallocated lazily at the first use. */ /* FPU state will be reallocated lazily at the first use. */
fpu__drop(fpu); fpu__drop(fpu);
} else { } else {
......
...@@ -262,7 +262,10 @@ static void __init fpu__init_system_xstate_size_legacy(void) ...@@ -262,7 +262,10 @@ static void __init fpu__init_system_xstate_size_legacy(void)
* not only saved the restores along the way, but we also have the * not only saved the restores along the way, but we also have the
* FPU ready to be used for the original task. * FPU ready to be used for the original task.
* *
* 'eager' switching is used on modern CPUs, there we switch the FPU * 'lazy' is deprecated because it's almost never a performance win
* and it's much more complicated than 'eager'.
*
* 'eager' switching is by default on all CPUs, there we switch the FPU
* state during every context switch, regardless of whether the task * state during every context switch, regardless of whether the task
* has used FPU instructions in that time slice or not. This is done * has used FPU instructions in that time slice or not. This is done
* because modern FPU context saving instructions are able to optimize * because modern FPU context saving instructions are able to optimize
...@@ -273,7 +276,7 @@ static void __init fpu__init_system_xstate_size_legacy(void) ...@@ -273,7 +276,7 @@ static void __init fpu__init_system_xstate_size_legacy(void)
* to use 'eager' restores, if we detect that a task is using the FPU * to use 'eager' restores, if we detect that a task is using the FPU
* frequently. See the fpu->counter logic in fpu/internal.h for that. ] * frequently. See the fpu->counter logic in fpu/internal.h for that. ]
*/ */
static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; static enum { ENABLE, DISABLE } eagerfpu = ENABLE;
/* /*
* Find supported xfeatures based on cpu features and command-line input. * Find supported xfeatures based on cpu features and command-line input.
...@@ -344,15 +347,9 @@ static void __init fpu__init_system_ctx_switch(void) ...@@ -344,15 +347,9 @@ static void __init fpu__init_system_ctx_switch(void)
*/ */
static void __init fpu__init_parse_early_param(void) static void __init fpu__init_parse_early_param(void)
{ {
/*
* No need to check "eagerfpu=auto" again, since it is the
* initial default.
*/
if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) { if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) {
eagerfpu = DISABLE; eagerfpu = DISABLE;
fpu__clear_eager_fpu_features(); fpu__clear_eager_fpu_features();
} else if (cmdline_find_option_bool(boot_command_line, "eagerfpu=on")) {
eagerfpu = ENABLE;
} }
if (cmdline_find_option_bool(boot_command_line, "no387")) if (cmdline_find_option_bool(boot_command_line, "no387"))
......
...@@ -792,10 +792,9 @@ dotraplinkage void ...@@ -792,10 +792,9 @@ dotraplinkage void
do_device_not_available(struct pt_regs *regs, long error_code) do_device_not_available(struct pt_regs *regs, long error_code)
{ {
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
BUG_ON(use_eager_fpu());
#ifdef CONFIG_MATH_EMULATION #ifdef CONFIG_MATH_EMULATION
if (read_cr0() & X86_CR0_EM) { if (!boot_cpu_has(X86_FEATURE_FPU) && (read_cr0() & X86_CR0_EM)) {
struct math_emu_info info = { }; struct math_emu_info info = { };
cond_local_irq_enable(regs); cond_local_irq_enable(regs);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment