Commit 79eb42b2 authored by Will Deacon's avatar Will Deacon

Merge branch 'for-next/fpsimd' into for-next/core

* for-next/fpsimd:
  arm64: fpsimd: Implement lazy restore for kernel mode FPSIMD
  arm64: fpsimd: Preserve/restore kernel mode NEON at context switch
  arm64: fpsimd: Drop unneeded 'busy' flag
parents e90a8a21 2632e252
...@@ -167,6 +167,9 @@ struct thread_struct { ...@@ -167,6 +167,9 @@ struct thread_struct {
unsigned long fault_address; /* fault info */ unsigned long fault_address; /* fault info */
unsigned long fault_code; /* ESR_EL1 value */ unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */ struct debug_info debug; /* debugging */
struct user_fpsimd_state kernel_fpsimd_state;
unsigned int kernel_fpsimd_cpu;
#ifdef CONFIG_ARM64_PTR_AUTH #ifdef CONFIG_ARM64_PTR_AUTH
struct ptrauth_keys_user keys_user; struct ptrauth_keys_user keys_user;
#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL #ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
......
...@@ -12,8 +12,6 @@ ...@@ -12,8 +12,6 @@
#include <linux/preempt.h> #include <linux/preempt.h>
#include <linux/types.h> #include <linux/types.h>
DECLARE_PER_CPU(bool, fpsimd_context_busy);
#ifdef CONFIG_KERNEL_MODE_NEON #ifdef CONFIG_KERNEL_MODE_NEON
/* /*
...@@ -28,17 +26,10 @@ static __must_check inline bool may_use_simd(void) ...@@ -28,17 +26,10 @@ static __must_check inline bool may_use_simd(void)
/* /*
* We must make sure that the SVE has been initialized properly * We must make sure that the SVE has been initialized properly
* before using the SIMD in kernel. * before using the SIMD in kernel.
* fpsimd_context_busy is only set while preemption is disabled,
* and is clear whenever preemption is enabled. Since
* this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy
* cannot change under our feet -- if it's set we cannot be
* migrated, and if it's clear we cannot be migrated to a CPU
* where it is set.
*/ */
return !WARN_ON(!system_capabilities_finalized()) && return !WARN_ON(!system_capabilities_finalized()) &&
system_supports_fpsimd() && system_supports_fpsimd() &&
!in_hardirq() && !irqs_disabled() && !in_nmi() && !in_hardirq() && !irqs_disabled() && !in_nmi();
!this_cpu_read(fpsimd_context_busy);
} }
#else /* ! CONFIG_KERNEL_MODE_NEON */ #else /* ! CONFIG_KERNEL_MODE_NEON */
......
...@@ -80,6 +80,7 @@ void arch_setup_new_exec(void); ...@@ -80,6 +80,7 @@ void arch_setup_new_exec(void);
#define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */ #define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */
#define TIF_SME 27 /* SME in use */ #define TIF_SME 27 /* SME in use */
#define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */ #define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */
#define TIF_KERNEL_FPSTATE 29 /* Task is in a kernel mode FPSIMD section */
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
......
...@@ -85,13 +85,13 @@ ...@@ -85,13 +85,13 @@
* softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and * softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and
* flag the register state as invalid. * flag the register state as invalid.
* *
* In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may be
* save the task's FPSIMD context back to task_struct from softirq context. * called from softirq context, which will save the task's FPSIMD context back
* To prevent this from racing with the manipulation of the task's FPSIMD state * to task_struct. To prevent this from racing with the manipulation of the
* from task context and thereby corrupting the state, it is necessary to * task's FPSIMD state from task context and thereby corrupting the state, it
* protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE * is necessary to protect any manipulation of a task's fpsimd_state or
* flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to * TIF_FOREIGN_FPSTATE flag with get_cpu_fpsimd_context(), which will suspend
* run but prevent them to use FPSIMD. * softirq servicing entirely until put_cpu_fpsimd_context() is called.
* *
* For a certain task, the sequence may look something like this: * For a certain task, the sequence may look something like this:
* - the task gets scheduled in; if both the task's fpsimd_cpu field * - the task gets scheduled in; if both the task's fpsimd_cpu field
...@@ -209,27 +209,14 @@ static inline void sme_free(struct task_struct *t) { } ...@@ -209,27 +209,14 @@ static inline void sme_free(struct task_struct *t) { }
#endif #endif
DEFINE_PER_CPU(bool, fpsimd_context_busy);
EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
static void fpsimd_bind_task_to_cpu(void); static void fpsimd_bind_task_to_cpu(void);
static void __get_cpu_fpsimd_context(void)
{
bool busy = __this_cpu_xchg(fpsimd_context_busy, true);
WARN_ON(busy);
}
/* /*
* Claim ownership of the CPU FPSIMD context for use by the calling context. * Claim ownership of the CPU FPSIMD context for use by the calling context.
* *
* The caller may freely manipulate the FPSIMD context metadata until * The caller may freely manipulate the FPSIMD context metadata until
* put_cpu_fpsimd_context() is called. * put_cpu_fpsimd_context() is called.
* *
* The double-underscore version must only be called if you know the task
* can't be preempted.
*
* On RT kernels local_bh_disable() is not sufficient because it only * On RT kernels local_bh_disable() is not sufficient because it only
* serializes soft interrupt related sections via a local lock, but stays * serializes soft interrupt related sections via a local lock, but stays
* preemptible. Disabling preemption is the right choice here as bottom * preemptible. Disabling preemption is the right choice here as bottom
...@@ -242,14 +229,6 @@ static void get_cpu_fpsimd_context(void) ...@@ -242,14 +229,6 @@ static void get_cpu_fpsimd_context(void)
local_bh_disable(); local_bh_disable();
else else
preempt_disable(); preempt_disable();
__get_cpu_fpsimd_context();
}
static void __put_cpu_fpsimd_context(void)
{
bool busy = __this_cpu_xchg(fpsimd_context_busy, false);
WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */
} }
/* /*
...@@ -261,18 +240,12 @@ static void __put_cpu_fpsimd_context(void) ...@@ -261,18 +240,12 @@ static void __put_cpu_fpsimd_context(void)
*/ */
static void put_cpu_fpsimd_context(void) static void put_cpu_fpsimd_context(void)
{ {
__put_cpu_fpsimd_context();
if (!IS_ENABLED(CONFIG_PREEMPT_RT)) if (!IS_ENABLED(CONFIG_PREEMPT_RT))
local_bh_enable(); local_bh_enable();
else else
preempt_enable(); preempt_enable();
} }
static bool have_cpu_fpsimd_context(void)
{
return !preemptible() && __this_cpu_read(fpsimd_context_busy);
}
unsigned int task_get_vl(const struct task_struct *task, enum vec_type type) unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)
{ {
return task->thread.vl[type]; return task->thread.vl[type];
...@@ -383,7 +356,8 @@ static void task_fpsimd_load(void) ...@@ -383,7 +356,8 @@ static void task_fpsimd_load(void)
bool restore_ffr; bool restore_ffr;
WARN_ON(!system_supports_fpsimd()); WARN_ON(!system_supports_fpsimd());
WARN_ON(!have_cpu_fpsimd_context()); WARN_ON(preemptible());
WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE));
if (system_supports_sve() || system_supports_sme()) { if (system_supports_sve() || system_supports_sme()) {
switch (current->thread.fp_type) { switch (current->thread.fp_type) {
...@@ -406,7 +380,7 @@ static void task_fpsimd_load(void) ...@@ -406,7 +380,7 @@ static void task_fpsimd_load(void)
default: default:
/* /*
* This indicates either a bug in * This indicates either a bug in
* fpsimd_save() or memory corruption, we * fpsimd_save_user_state() or memory corruption, we
* should always record an explicit format * should always record an explicit format
* when we save. We always at least have the * when we save. We always at least have the
* memory allocated for FPSMID registers so * memory allocated for FPSMID registers so
...@@ -457,7 +431,7 @@ static void task_fpsimd_load(void) ...@@ -457,7 +431,7 @@ static void task_fpsimd_load(void)
* than via current, if we are saving KVM state then it will have * than via current, if we are saving KVM state then it will have
* ensured that the type of registers to save is set in last->to_save. * ensured that the type of registers to save is set in last->to_save.
*/ */
static void fpsimd_save(void) static void fpsimd_save_user_state(void)
{ {
struct cpu_fp_state const *last = struct cpu_fp_state const *last =
this_cpu_ptr(&fpsimd_last_state); this_cpu_ptr(&fpsimd_last_state);
...@@ -467,7 +441,7 @@ static void fpsimd_save(void) ...@@ -467,7 +441,7 @@ static void fpsimd_save(void)
unsigned int vl; unsigned int vl;
WARN_ON(!system_supports_fpsimd()); WARN_ON(!system_supports_fpsimd());
WARN_ON(!have_cpu_fpsimd_context()); WARN_ON(preemptible());
if (test_thread_flag(TIF_FOREIGN_FPSTATE)) if (test_thread_flag(TIF_FOREIGN_FPSTATE))
return; return;
...@@ -888,7 +862,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, ...@@ -888,7 +862,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
if (task == current) { if (task == current) {
get_cpu_fpsimd_context(); get_cpu_fpsimd_context();
fpsimd_save(); fpsimd_save_user_state();
} }
fpsimd_flush_task_state(task); fpsimd_flush_task_state(task);
...@@ -1500,6 +1474,34 @@ void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs) ...@@ -1500,6 +1474,34 @@ void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)
current); current);
} }
static void fpsimd_load_kernel_state(struct task_struct *task)
{
struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
/*
* Elide the load if this CPU holds the most recent kernel mode
* FPSIMD context of the current task.
*/
if (last->st == &task->thread.kernel_fpsimd_state &&
task->thread.kernel_fpsimd_cpu == smp_processor_id())
return;
fpsimd_load_state(&task->thread.kernel_fpsimd_state);
}
static void fpsimd_save_kernel_state(struct task_struct *task)
{
struct cpu_fp_state cpu_fp_state = {
.st = &task->thread.kernel_fpsimd_state,
.to_save = FP_STATE_FPSIMD,
};
fpsimd_save_state(&task->thread.kernel_fpsimd_state);
fpsimd_bind_state_to_cpu(&cpu_fp_state);
task->thread.kernel_fpsimd_cpu = smp_processor_id();
}
void fpsimd_thread_switch(struct task_struct *next) void fpsimd_thread_switch(struct task_struct *next)
{ {
bool wrong_task, wrong_cpu; bool wrong_task, wrong_cpu;
...@@ -1507,15 +1509,23 @@ void fpsimd_thread_switch(struct task_struct *next) ...@@ -1507,15 +1509,23 @@ void fpsimd_thread_switch(struct task_struct *next)
if (!system_supports_fpsimd()) if (!system_supports_fpsimd())
return; return;
__get_cpu_fpsimd_context(); WARN_ON_ONCE(!irqs_disabled());
/* Save unsaved fpsimd state, if any: */ /* Save unsaved fpsimd state, if any: */
fpsimd_save(); if (test_thread_flag(TIF_KERNEL_FPSTATE))
fpsimd_save_kernel_state(current);
else
fpsimd_save_user_state();
if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) {
fpsimd_load_kernel_state(next);
set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
} else {
/* /*
* Fix up TIF_FOREIGN_FPSTATE to correctly describe next's * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
* state. For kernel threads, FPSIMD registers are never loaded * state. For kernel threads, FPSIMD registers are never
* and wrong_task and wrong_cpu will always be true. * loaded with user mode FPSIMD state and so wrong_task and
* wrong_cpu will always be true.
*/ */
wrong_task = __this_cpu_read(fpsimd_last_state.st) != wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
&next->thread.uw.fpsimd_state; &next->thread.uw.fpsimd_state;
...@@ -1523,8 +1533,7 @@ void fpsimd_thread_switch(struct task_struct *next) ...@@ -1523,8 +1533,7 @@ void fpsimd_thread_switch(struct task_struct *next)
update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE, update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
wrong_task || wrong_cpu); wrong_task || wrong_cpu);
}
__put_cpu_fpsimd_context();
} }
static void fpsimd_flush_thread_vl(enum vec_type type) static void fpsimd_flush_thread_vl(enum vec_type type)
...@@ -1614,7 +1623,7 @@ void fpsimd_preserve_current_state(void) ...@@ -1614,7 +1623,7 @@ void fpsimd_preserve_current_state(void)
return; return;
get_cpu_fpsimd_context(); get_cpu_fpsimd_context();
fpsimd_save(); fpsimd_save_user_state();
put_cpu_fpsimd_context(); put_cpu_fpsimd_context();
} }
...@@ -1826,13 +1835,15 @@ static void fpsimd_flush_cpu_state(void) ...@@ -1826,13 +1835,15 @@ static void fpsimd_flush_cpu_state(void)
*/ */
void fpsimd_save_and_flush_cpu_state(void) void fpsimd_save_and_flush_cpu_state(void)
{ {
unsigned long flags;
if (!system_supports_fpsimd()) if (!system_supports_fpsimd())
return; return;
WARN_ON(preemptible()); WARN_ON(preemptible());
__get_cpu_fpsimd_context(); local_irq_save(flags);
fpsimd_save(); fpsimd_save_user_state();
fpsimd_flush_cpu_state(); fpsimd_flush_cpu_state();
__put_cpu_fpsimd_context(); local_irq_restore(flags);
} }
#ifdef CONFIG_KERNEL_MODE_NEON #ifdef CONFIG_KERNEL_MODE_NEON
...@@ -1864,10 +1875,37 @@ void kernel_neon_begin(void) ...@@ -1864,10 +1875,37 @@ void kernel_neon_begin(void)
get_cpu_fpsimd_context(); get_cpu_fpsimd_context();
/* Save unsaved fpsimd state, if any: */ /* Save unsaved fpsimd state, if any: */
fpsimd_save(); if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
fpsimd_save_kernel_state(current);
} else {
fpsimd_save_user_state();
/*
* Set the thread flag so that the kernel mode FPSIMD state
* will be context switched along with the rest of the task
* state.
*
* On non-PREEMPT_RT, softirqs may interrupt task level kernel
* mode FPSIMD, but the task will not be preemptible so setting
* TIF_KERNEL_FPSTATE for those would be both wrong (as it
* would mark the task context FPSIMD state as requiring a
* context switch) and unnecessary.
*
* On PREEMPT_RT, softirqs are serviced from a separate thread,
* which is scheduled as usual, and this guarantees that these
* softirqs are not interrupting use of the FPSIMD in kernel
* mode in task context. So in this case, setting the flag here
* is always appropriate.
*/
if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
set_thread_flag(TIF_KERNEL_FPSTATE);
}
/* Invalidate any task state remaining in the fpsimd regs: */ /* Invalidate any task state remaining in the fpsimd regs: */
fpsimd_flush_cpu_state(); fpsimd_flush_cpu_state();
put_cpu_fpsimd_context();
} }
EXPORT_SYMBOL_GPL(kernel_neon_begin); EXPORT_SYMBOL_GPL(kernel_neon_begin);
...@@ -1885,7 +1923,16 @@ void kernel_neon_end(void) ...@@ -1885,7 +1923,16 @@ void kernel_neon_end(void)
if (!system_supports_fpsimd()) if (!system_supports_fpsimd())
return; return;
put_cpu_fpsimd_context(); /*
* If we are returning from a nested use of kernel mode FPSIMD, restore
* the task context kernel mode FPSIMD state. This can only happen when
* running in softirq context on non-PREEMPT_RT.
*/
if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
test_thread_flag(TIF_KERNEL_FPSTATE))
fpsimd_load_kernel_state(current);
else
clear_thread_flag(TIF_KERNEL_FPSTATE);
} }
EXPORT_SYMBOL_GPL(kernel_neon_end); EXPORT_SYMBOL_GPL(kernel_neon_end);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment