Commit 9977e886 authored by Hendrik Brueckner's avatar Hendrik Brueckner Committed by Martin Schwidefsky

s390/kernel: lazy restore fpu registers

Improve the save and restore behavior of FPU register contents to use the
vector extension within the kernel.

The kernel does not use floating-point or vector registers and, therefore,
saving and restoring the FPU register contents are performed for handling
signals or switching processes only.  To prepare for using vector
instructions and vector registers within the kernel, enhance the save
behavior and implement a lazy restore at return to user space from a
system call or interrupt.

To implement the lazy restore, the save_fpu_regs() sets a CPU information
flag, CIF_FPU, to indicate that the FPU registers must be restored.
Saving and setting CIF_FPU is performed in an atomic fashion to be
interrupt-safe.  When the kernel wants to use the vector extension or
wants to change the FPU register state for a task during signal handling,
the save_fpu_regs() must be called first.  The CIF_FPU flag is also set at
process switch.  At return to user space, the FPU state is restored.  In
particular, the FPU state includes the floating-point or vector register
contents, as well as, vector-enablement and floating-point control.  The
FPU state restore and clearing CIF_FPU is also performed in an atomic
fashion.

For KVM, the restore of the FPU register state is performed when restoring
the general-purpose guest registers before the SIE instructions is started.
Because the path towards the SIE instruction is interruptible, the CIF_FPU
flag must be checked again right before going into SIE.  If set, the guest
registers must be reloaded again by re-entering the outer SIE loop.  This
is the same behavior as if the SIE critical section is interrupted.
Signed-off-by: default avatarHendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
parent bd550337
...@@ -46,6 +46,8 @@ static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit) ...@@ -46,6 +46,8 @@ static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
__ctl_load(reg, cr, cr); __ctl_load(reg, cr, cr);
} }
void __ctl_set_vx(void);
void smp_ctl_set_bit(int cr, int bit); void smp_ctl_set_bit(int cr, int bit);
void smp_ctl_clear_bit(int cr, int bit); void smp_ctl_clear_bit(int cr, int bit);
......
...@@ -28,9 +28,14 @@ struct fpu { ...@@ -28,9 +28,14 @@ struct fpu {
}; };
}; };
void save_fpu_regs(struct fpu *fpu);
#define is_vx_fpu(fpu) (!!((fpu)->flags & FPU_USE_VX)) #define is_vx_fpu(fpu) (!!((fpu)->flags & FPU_USE_VX))
#define is_vx_task(tsk) (!!((tsk)->thread.fpu.flags & FPU_USE_VX)) #define is_vx_task(tsk) (!!((tsk)->thread.fpu.flags & FPU_USE_VX))
/* VX array structure for address operand constraints in inline assemblies */
struct vx_array { __vector128 _[__NUM_VXRS]; };
static inline int test_fp_ctl(u32 fpc) static inline int test_fp_ctl(u32 fpc)
{ {
u32 orig_fpc; u32 orig_fpc;
...@@ -48,76 +53,6 @@ static inline int test_fp_ctl(u32 fpc) ...@@ -48,76 +53,6 @@ static inline int test_fp_ctl(u32 fpc)
return rc; return rc;
} }
static inline void save_fp_ctl(u32 *fpc)
{
asm volatile(
" stfpc %0\n"
: "+Q" (*fpc));
}
static inline int restore_fp_ctl(u32 *fpc)
{
int rc;
asm volatile(
" lfpc %1\n"
"0: la %0,0\n"
"1:\n"
: "=d" (rc) : "Q" (*fpc), "0" (-EINVAL));
return rc;
}
static inline void save_fp_regs(freg_t *fprs)
{
asm volatile("std 0,%0" : "=Q" (fprs[0]));
asm volatile("std 2,%0" : "=Q" (fprs[2]));
asm volatile("std 4,%0" : "=Q" (fprs[4]));
asm volatile("std 6,%0" : "=Q" (fprs[6]));
asm volatile("std 1,%0" : "=Q" (fprs[1]));
asm volatile("std 3,%0" : "=Q" (fprs[3]));
asm volatile("std 5,%0" : "=Q" (fprs[5]));
asm volatile("std 7,%0" : "=Q" (fprs[7]));
asm volatile("std 8,%0" : "=Q" (fprs[8]));
asm volatile("std 9,%0" : "=Q" (fprs[9]));
asm volatile("std 10,%0" : "=Q" (fprs[10]));
asm volatile("std 11,%0" : "=Q" (fprs[11]));
asm volatile("std 12,%0" : "=Q" (fprs[12]));
asm volatile("std 13,%0" : "=Q" (fprs[13]));
asm volatile("std 14,%0" : "=Q" (fprs[14]));
asm volatile("std 15,%0" : "=Q" (fprs[15]));
}
static inline void restore_fp_regs(freg_t *fprs)
{
asm volatile("ld 0,%0" : : "Q" (fprs[0]));
asm volatile("ld 2,%0" : : "Q" (fprs[2]));
asm volatile("ld 4,%0" : : "Q" (fprs[4]));
asm volatile("ld 6,%0" : : "Q" (fprs[6]));
asm volatile("ld 1,%0" : : "Q" (fprs[1]));
asm volatile("ld 3,%0" : : "Q" (fprs[3]));
asm volatile("ld 5,%0" : : "Q" (fprs[5]));
asm volatile("ld 7,%0" : : "Q" (fprs[7]));
asm volatile("ld 8,%0" : : "Q" (fprs[8]));
asm volatile("ld 9,%0" : : "Q" (fprs[9]));
asm volatile("ld 10,%0" : : "Q" (fprs[10]));
asm volatile("ld 11,%0" : : "Q" (fprs[11]));
asm volatile("ld 12,%0" : : "Q" (fprs[12]));
asm volatile("ld 13,%0" : : "Q" (fprs[13]));
asm volatile("ld 14,%0" : : "Q" (fprs[14]));
asm volatile("ld 15,%0" : : "Q" (fprs[15]));
}
static inline void save_vx_regs(__vector128 *vxrs)
{
typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
asm volatile(
" la 1,%0\n"
" .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */
" .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */
: "=Q" (*(addrtype *) vxrs) : : "1");
}
static inline void save_vx_regs_safe(__vector128 *vxrs) static inline void save_vx_regs_safe(__vector128 *vxrs)
{ {
unsigned long cr0, flags; unsigned long cr0, flags;
...@@ -126,20 +61,13 @@ static inline void save_vx_regs_safe(__vector128 *vxrs) ...@@ -126,20 +61,13 @@ static inline void save_vx_regs_safe(__vector128 *vxrs)
__ctl_store(cr0, 0, 0); __ctl_store(cr0, 0, 0);
__ctl_set_bit(0, 17); __ctl_set_bit(0, 17);
__ctl_set_bit(0, 18); __ctl_set_bit(0, 18);
save_vx_regs(vxrs);
__ctl_load(cr0, 0, 0);
arch_local_irq_restore(flags);
}
static inline void restore_vx_regs(__vector128 *vxrs)
{
typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
asm volatile( asm volatile(
" la 1,%0\n" " la 1,%0\n"
" .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ " .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */
" .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ " .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */
: : "Q" (*(addrtype *) vxrs) : "1"); : "=Q" (*(struct vx_array *) vxrs) : : "1");
__ctl_load(cr0, 0, 0);
arch_local_irq_restore(flags);
} }
static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs) static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
...@@ -177,24 +105,6 @@ static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu) ...@@ -177,24 +105,6 @@ static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
sizeof(fpregs->fprs)); sizeof(fpregs->fprs));
} }
static inline void save_fpu_regs(struct fpu *fpu)
{
save_fp_ctl(&fpu->fpc);
if (is_vx_fpu(fpu))
save_vx_regs(fpu->vxrs);
else
save_fp_regs(fpu->fprs);
}
static inline void restore_fpu_regs(struct fpu *fpu)
{
restore_fp_ctl(&fpu->fpc);
if (is_vx_fpu(fpu))
restore_vx_regs(fpu->vxrs);
else
restore_fp_regs(fpu->fprs);
}
#endif #endif
#endif /* _ASM_S390_FPU_INTERNAL_H */ #endif /* _ASM_S390_FPU_INTERNAL_H */
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <linux/kvm.h> #include <linux/kvm.h>
#include <asm/debug.h> #include <asm/debug.h>
#include <asm/cpu.h> #include <asm/cpu.h>
#include <asm/fpu-internal.h>
#include <asm/isc.h> #include <asm/isc.h>
#define KVM_MAX_VCPUS 64 #define KVM_MAX_VCPUS 64
...@@ -498,10 +499,9 @@ struct kvm_guestdbg_info_arch { ...@@ -498,10 +499,9 @@ struct kvm_guestdbg_info_arch {
struct kvm_vcpu_arch { struct kvm_vcpu_arch {
struct kvm_s390_sie_block *sie_block; struct kvm_s390_sie_block *sie_block;
s390_fp_regs host_fpregs;
unsigned int host_acrs[NUM_ACRS]; unsigned int host_acrs[NUM_ACRS];
s390_fp_regs guest_fpregs; struct fpu host_fpregs;
struct kvm_s390_vregs *host_vregs; struct fpu guest_fpregs;
struct kvm_s390_local_interrupt local_int; struct kvm_s390_local_interrupt local_int;
struct hrtimer ckc_timer; struct hrtimer ckc_timer;
struct kvm_s390_pgm_info pgm; struct kvm_s390_pgm_info pgm;
......
...@@ -14,10 +14,12 @@ ...@@ -14,10 +14,12 @@
#define CIF_MCCK_PENDING 0 /* machine check handling is pending */ #define CIF_MCCK_PENDING 0 /* machine check handling is pending */
#define CIF_ASCE 1 /* user asce needs fixup / uaccess */ #define CIF_ASCE 1 /* user asce needs fixup / uaccess */
#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ #define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
#define CIF_FPU 3 /* restore vector registers */
#define _CIF_MCCK_PENDING (1<<CIF_MCCK_PENDING) #define _CIF_MCCK_PENDING (1<<CIF_MCCK_PENDING)
#define _CIF_ASCE (1<<CIF_ASCE) #define _CIF_ASCE (1<<CIF_ASCE)
#define _CIF_NOHZ_DELAY (1<<CIF_NOHZ_DELAY) #define _CIF_NOHZ_DELAY (1<<CIF_NOHZ_DELAY)
#define _CIF_FPU (1<<CIF_FPU)
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
......
...@@ -36,7 +36,7 @@ static inline void restore_access_regs(unsigned int *acrs) ...@@ -36,7 +36,7 @@ static inline void restore_access_regs(unsigned int *acrs)
} \ } \
if (next->mm) { \ if (next->mm) { \
update_cr_regs(next); \ update_cr_regs(next); \
restore_fpu_regs(&next->thread.fpu); \ set_cpu_flag(CIF_FPU); \
restore_access_regs(&next->thread.acrs[0]); \ restore_access_regs(&next->thread.acrs[0]); \
restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
} \ } \
......
...@@ -28,11 +28,16 @@ int main(void) ...@@ -28,11 +28,16 @@ int main(void)
DEFINE(__TASK_pid, offsetof(struct task_struct, pid)); DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
BLANK(); BLANK();
DEFINE(__THREAD_ksp, offsetof(struct thread_struct, ksp)); DEFINE(__THREAD_ksp, offsetof(struct thread_struct, ksp));
DEFINE(__THREAD_fpu, offsetof(struct task_struct, thread.fpu));
DEFINE(__THREAD_per_cause, offsetof(struct thread_struct, per_event.cause)); DEFINE(__THREAD_per_cause, offsetof(struct thread_struct, per_event.cause));
DEFINE(__THREAD_per_address, offsetof(struct thread_struct, per_event.address)); DEFINE(__THREAD_per_address, offsetof(struct thread_struct, per_event.address));
DEFINE(__THREAD_per_paid, offsetof(struct thread_struct, per_event.paid)); DEFINE(__THREAD_per_paid, offsetof(struct thread_struct, per_event.paid));
DEFINE(__THREAD_trap_tdb, offsetof(struct thread_struct, trap_tdb)); DEFINE(__THREAD_trap_tdb, offsetof(struct thread_struct, trap_tdb));
BLANK(); BLANK();
DEFINE(__FPU_fpc, offsetof(struct fpu, fpc));
DEFINE(__FPU_flags, offsetof(struct fpu, flags));
DEFINE(__FPU_regs, offsetof(struct fpu, regs));
BLANK();
DEFINE(__TI_task, offsetof(struct thread_info, task)); DEFINE(__TI_task, offsetof(struct thread_info, task));
DEFINE(__TI_flags, offsetof(struct thread_info, flags)); DEFINE(__TI_flags, offsetof(struct thread_info, flags));
DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table)); DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table));
......
...@@ -161,7 +161,6 @@ static void store_sigregs(void) ...@@ -161,7 +161,6 @@ static void store_sigregs(void)
static void load_sigregs(void) static void load_sigregs(void)
{ {
restore_access_regs(current->thread.acrs); restore_access_regs(current->thread.acrs);
restore_fpu_regs(&current->thread.fpu);
} }
static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
...@@ -287,6 +286,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn) ...@@ -287,6 +286,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn)
if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32)) if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32))
goto badframe; goto badframe;
set_current_blocked(&set); set_current_blocked(&set);
save_fpu_regs(&current->thread.fpu);
if (restore_sigregs32(regs, &frame->sregs)) if (restore_sigregs32(regs, &frame->sregs))
goto badframe; goto badframe;
if (restore_sigregs_ext32(regs, &frame->sregs_ext)) if (restore_sigregs_ext32(regs, &frame->sregs_ext))
...@@ -309,6 +309,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn) ...@@ -309,6 +309,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
set_current_blocked(&set); set_current_blocked(&set);
if (compat_restore_altstack(&frame->uc.uc_stack)) if (compat_restore_altstack(&frame->uc.uc_stack))
goto badframe; goto badframe;
save_fpu_regs(&current->thread.fpu);
if (restore_sigregs32(regs, &frame->uc.uc_mcontext)) if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
goto badframe; goto badframe;
if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext)) if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
......
This diff is collapsed.
...@@ -165,8 +165,12 @@ static int notrace s390_revalidate_registers(struct mci *mci) ...@@ -165,8 +165,12 @@ static int notrace s390_revalidate_registers(struct mci *mci)
cr0.val = S390_lowcore.cregs_save_area[0]; cr0.val = S390_lowcore.cregs_save_area[0];
cr0.afp = cr0.vx = 1; cr0.afp = cr0.vx = 1;
__ctl_load(cr0.val, 0, 0); __ctl_load(cr0.val, 0, 0);
restore_vx_regs((__vector128 *) asm volatile(
&S390_lowcore.vector_save_area); " la 1,%0\n"
" .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */
" .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */
: : "Q" (*(struct vx_array *)
&S390_lowcore.vector_save_area) : "1");
__ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
} }
/* Revalidate access registers */ /* Revalidate access registers */
......
...@@ -90,16 +90,28 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) ...@@ -90,16 +90,28 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
*dst = *src; *dst = *src;
/* Set up a new floating-point register save area */ /* Set up a new floating-point register save area */
dst->thread.fpu.fpc = 0;
dst->thread.fpu.flags = 0; /* Always start with VX disabled */
dst->thread.fpu.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS, dst->thread.fpu.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
GFP_KERNEL|__GFP_REPEAT); GFP_KERNEL|__GFP_REPEAT);
if (!dst->thread.fpu.fprs) if (!dst->thread.fpu.fprs)
return -ENOMEM; return -ENOMEM;
/* Save the fpu registers to new thread structure. */ /*
save_fp_ctl(&dst->thread.fpu.fpc); * Save the floating-point or vector register state of the current
save_fp_regs(dst->thread.fpu.fprs); * task. The state is not saved for early kernel threads, for example,
dst->thread.fpu.flags = 0; /* Always start with VX disabled */ * the init_task, which do not have an allocated save area.
* The CIF_FPU flag is set in any case to lazy clear or restore a saved
* state when switching to a different task or returning to user space.
*/
save_fpu_regs(&current->thread.fpu);
dst->thread.fpu.fpc = current->thread.fpu.fpc;
if (is_vx_task(current))
convert_vx_to_fp(dst->thread.fpu.fprs,
current->thread.fpu.vxrs);
else
memcpy(dst->thread.fpu.fprs, current->thread.fpu.fprs,
sizeof(freg_t) * __NUM_FPRS);
return 0; return 0;
} }
...@@ -184,8 +196,15 @@ asmlinkage void execve_tail(void) ...@@ -184,8 +196,15 @@ asmlinkage void execve_tail(void)
*/ */
int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs) int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
{ {
save_fp_ctl(&fpregs->fpc); save_fpu_regs(&current->thread.fpu);
save_fp_regs(fpregs->fprs); fpregs->fpc = current->thread.fpu.fpc;
fpregs->pad = 0;
if (is_vx_task(current))
convert_vx_to_fp((freg_t *)&fpregs->fprs,
current->thread.fpu.vxrs);
else
memcpy(&fpregs->fprs, current->thread.fpu.fprs,
sizeof(fpregs->fprs));
return 1; return 1;
} }
EXPORT_SYMBOL(dump_fpu); EXPORT_SYMBOL(dump_fpu);
......
...@@ -45,39 +45,27 @@ void update_cr_regs(struct task_struct *task) ...@@ -45,39 +45,27 @@ void update_cr_regs(struct task_struct *task)
struct per_regs old, new; struct per_regs old, new;
/* Take care of the enable/disable of transactional execution. */ /* Take care of the enable/disable of transactional execution. */
if (MACHINE_HAS_TE || MACHINE_HAS_VX) { if (MACHINE_HAS_TE) {
unsigned long cr, cr_new; unsigned long cr, cr_new;
__ctl_store(cr, 0, 0); __ctl_store(cr, 0, 0);
cr_new = cr; /* Set or clear transaction execution TXC bit 8. */
if (MACHINE_HAS_TE) { cr_new = cr | (1UL << 55);
/* Set or clear transaction execution TXC bit 8. */ if (task->thread.per_flags & PER_FLAG_NO_TE)
cr_new |= (1UL << 55); cr_new &= ~(1UL << 55);
if (task->thread.per_flags & PER_FLAG_NO_TE)
cr_new &= ~(1UL << 55);
}
if (MACHINE_HAS_VX) {
/* Enable/disable of vector extension */
cr_new &= ~(1UL << 17);
if (task->thread.fpu.vxrs)
cr_new |= (1UL << 17);
}
if (cr_new != cr) if (cr_new != cr)
__ctl_load(cr_new, 0, 0); __ctl_load(cr_new, 0, 0);
if (MACHINE_HAS_TE) { /* Set or clear transaction execution TDC bits 62 and 63. */
/* Set/clear transaction execution TDC bits 62/63. */ __ctl_store(cr, 2, 2);
__ctl_store(cr, 2, 2); cr_new = cr & ~3UL;
cr_new = cr & ~3UL; if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
if (task->thread.per_flags & cr_new |= 1UL;
PER_FLAG_TE_ABORT_RAND_TEND) else
cr_new |= 1UL; cr_new |= 2UL;
else
cr_new |= 2UL;
}
if (cr_new != cr)
__ctl_load(cr_new, 2, 2);
} }
if (cr_new != cr)
__ctl_load(cr_new, 2, 2);
} }
/* Copy user specified PER registers */ /* Copy user specified PER registers */
new.control = thread->per_user.control; new.control = thread->per_user.control;
...@@ -998,9 +986,6 @@ static int s390_fpregs_set(struct task_struct *target, ...@@ -998,9 +986,6 @@ static int s390_fpregs_set(struct task_struct *target,
else else
memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs)); memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs));
if (target == current)
restore_fpu_regs(&target->thread.fpu);
return rc; return rc;
} }
...@@ -1090,12 +1075,9 @@ static int s390_vxrs_low_set(struct task_struct *target, ...@@ -1090,12 +1075,9 @@ static int s390_vxrs_low_set(struct task_struct *target,
save_fpu_regs(&target->thread.fpu); save_fpu_regs(&target->thread.fpu);
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
if (rc == 0) { if (rc == 0)
for (i = 0; i < __NUM_VXRS_LOW; i++) for (i = 0; i < __NUM_VXRS_LOW; i++)
*((__u64 *)(target->thread.fpu.vxrs + i) + 1) = vxrs[i]; *((__u64 *)(target->thread.fpu.vxrs + i) + 1) = vxrs[i];
if (target == current)
restore_fpu_regs(&target->thread.fpu);
}
return rc; return rc;
} }
...@@ -1137,9 +1119,6 @@ static int s390_vxrs_high_set(struct task_struct *target, ...@@ -1137,9 +1119,6 @@ static int s390_vxrs_high_set(struct task_struct *target,
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
target->thread.fpu.vxrs + __NUM_VXRS_LOW, 0, -1); target->thread.fpu.vxrs + __NUM_VXRS_LOW, 0, -1);
if (rc == 0 && target == current)
restore_vx_regs(target->thread.fpu.vxrs);
return rc; return rc;
} }
......
#include <linux/module.h> #include <linux/module.h>
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
#include <asm/fpu-internal.h>
#include <asm/ftrace.h> #include <asm/ftrace.h>
#ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_FUNCTION_TRACER
...@@ -8,6 +9,8 @@ EXPORT_SYMBOL(_mcount); ...@@ -8,6 +9,8 @@ EXPORT_SYMBOL(_mcount);
#if IS_ENABLED(CONFIG_KVM) #if IS_ENABLED(CONFIG_KVM)
EXPORT_SYMBOL(sie64a); EXPORT_SYMBOL(sie64a);
EXPORT_SYMBOL(sie_exit); EXPORT_SYMBOL(sie_exit);
EXPORT_SYMBOL(save_fpu_regs);
EXPORT_SYMBOL(__ctl_set_vx);
#endif #endif
EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memset);
...@@ -112,7 +112,6 @@ static void store_sigregs(void) ...@@ -112,7 +112,6 @@ static void store_sigregs(void)
static void load_sigregs(void) static void load_sigregs(void)
{ {
restore_access_regs(current->thread.acrs); restore_access_regs(current->thread.acrs);
restore_fpu_regs(&current->thread.fpu);
} }
/* Returns non-zero on fault. */ /* Returns non-zero on fault. */
...@@ -223,6 +222,7 @@ SYSCALL_DEFINE0(sigreturn) ...@@ -223,6 +222,7 @@ SYSCALL_DEFINE0(sigreturn)
if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE)) if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
goto badframe; goto badframe;
set_current_blocked(&set); set_current_blocked(&set);
save_fpu_regs(&current->thread.fpu);
if (restore_sigregs(regs, &frame->sregs)) if (restore_sigregs(regs, &frame->sregs))
goto badframe; goto badframe;
if (restore_sigregs_ext(regs, &frame->sregs_ext)) if (restore_sigregs_ext(regs, &frame->sregs_ext))
...@@ -246,6 +246,7 @@ SYSCALL_DEFINE0(rt_sigreturn) ...@@ -246,6 +246,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
set_current_blocked(&set); set_current_blocked(&set);
if (restore_altstack(&frame->uc.uc_stack)) if (restore_altstack(&frame->uc.uc_stack))
goto badframe; goto badframe;
save_fpu_regs(&current->thread.fpu);
if (restore_sigregs(regs, &frame->uc.uc_mcontext)) if (restore_sigregs(regs, &frame->uc.uc_mcontext))
goto badframe; goto badframe;
if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext)) if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
......
...@@ -151,7 +151,7 @@ DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, ...@@ -151,7 +151,7 @@ DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
"transaction constraint exception") "transaction constraint exception")
static inline void do_fp_trap(struct pt_regs *regs, int fpc) static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc)
{ {
int si_code = 0; int si_code = 0;
/* FPC[2] is Data Exception Code */ /* FPC[2] is Data Exception Code */
...@@ -236,17 +236,13 @@ int alloc_vector_registers(struct task_struct *tsk) ...@@ -236,17 +236,13 @@ int alloc_vector_registers(struct task_struct *tsk)
return -ENOMEM; return -ENOMEM;
preempt_disable(); preempt_disable();
if (tsk == current) if (tsk == current)
save_fp_regs(tsk->thread.fpu.fprs); save_fpu_regs(&tsk->thread.fpu);
/* Copy the 16 floating point registers */ /* Copy the 16 floating point registers */
convert_fp_to_vx(vxrs, tsk->thread.fpu.fprs); convert_fp_to_vx(vxrs, tsk->thread.fpu.fprs);
fprs = tsk->thread.fpu.fprs; fprs = tsk->thread.fpu.fprs;
tsk->thread.fpu.vxrs = vxrs; tsk->thread.fpu.vxrs = vxrs;
tsk->thread.fpu.flags |= FPU_USE_VX; tsk->thread.fpu.flags |= FPU_USE_VX;
kfree(fprs); kfree(fprs);
if (tsk == current) {
__ctl_set_bit(0, 17);
restore_vx_regs(vxrs);
}
preempt_enable(); preempt_enable();
return 0; return 0;
} }
...@@ -261,7 +257,7 @@ void vector_exception(struct pt_regs *regs) ...@@ -261,7 +257,7 @@ void vector_exception(struct pt_regs *regs)
} }
/* get vector interrupt code from fpc */ /* get vector interrupt code from fpc */
asm volatile("stfpc %0" : "=Q" (current->thread.fpu.fpc)); save_fpu_regs(&current->thread.fpu);
vic = (current->thread.fpu.fpc & 0xf00) >> 8; vic = (current->thread.fpu.fpc & 0xf00) >> 8;
switch (vic) { switch (vic) {
case 1: /* invalid vector operation */ case 1: /* invalid vector operation */
...@@ -299,7 +295,7 @@ void data_exception(struct pt_regs *regs) ...@@ -299,7 +295,7 @@ void data_exception(struct pt_regs *regs)
location = get_trap_ip(regs); location = get_trap_ip(regs);
asm volatile("stfpc %0" : "=Q" (current->thread.fpu.fpc)); save_fpu_regs(&current->thread.fpu);
/* Check for vector register enablement */ /* Check for vector register enablement */
if (MACHINE_HAS_VX && !is_vx_task(current) && if (MACHINE_HAS_VX && !is_vx_task(current) &&
(current->thread.fpu.fpc & FPC_DXC_MASK) == 0xfe00) { (current->thread.fpu.fpc & FPC_DXC_MASK) == 0xfe00) {
......
...@@ -1198,27 +1198,54 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) ...@@ -1198,27 +1198,54 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
return 0; return 0;
} }
/*
* Backs up the current FP/VX register save area on a particular
* destination. Used to switch between different register save
* areas.
*/
static inline void save_fpu_to(struct fpu *dst)
{
dst->fpc = current->thread.fpu.fpc;
dst->flags = current->thread.fpu.flags;
dst->regs = current->thread.fpu.regs;
}
/*
* Switches the FP/VX register save area from which to lazy
* restore register contents.
*/
static inline void load_fpu_from(struct fpu *from)
{
current->thread.fpu.fpc = from->fpc;
current->thread.fpu.flags = from->flags;
current->thread.fpu.regs = from->regs;
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{ {
__u32 fpc; /* Save host register state */
save_fpu_regs(&current->thread.fpu);
save_fpu_to(&vcpu->arch.host_fpregs);
save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
if (test_kvm_facility(vcpu->kvm, 129))
save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
else
save_fp_regs(vcpu->arch.host_fpregs.fprs);
save_access_regs(vcpu->arch.host_acrs);
if (test_kvm_facility(vcpu->kvm, 129)) { if (test_kvm_facility(vcpu->kvm, 129)) {
fpc = vcpu->run->s.regs.fpc; current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs); current->thread.fpu.flags = FPU_USE_VX;
} else { /*
fpc = vcpu->arch.guest_fpregs.fpc; * Use the register save area in the SIE-control block
restore_fp_regs(vcpu->arch.guest_fpregs.fprs); * for register restore and save in kvm_arch_vcpu_put()
} */
if (test_fp_ctl(fpc)) current->thread.fpu.vxrs =
(__vector128 *)&vcpu->run->s.regs.vrs;
/* Always enable the vector extension for KVM */
__ctl_set_vx();
} else
load_fpu_from(&vcpu->arch.guest_fpregs);
if (test_fp_ctl(current->thread.fpu.fpc))
/* User space provided an invalid FPC, let's clear it */ /* User space provided an invalid FPC, let's clear it */
fpc = 0; current->thread.fpu.fpc = 0;
restore_fp_ctl(&fpc);
save_access_regs(vcpu->arch.host_acrs);
restore_access_regs(vcpu->run->s.regs.acrs); restore_access_regs(vcpu->run->s.regs.acrs);
gmap_enable(vcpu->arch.gmap); gmap_enable(vcpu->arch.gmap);
atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
...@@ -1228,19 +1255,22 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) ...@@ -1228,19 +1255,22 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{ {
atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
gmap_disable(vcpu->arch.gmap); gmap_disable(vcpu->arch.gmap);
if (test_kvm_facility(vcpu->kvm, 129)) {
save_fp_ctl(&vcpu->run->s.regs.fpc); save_fpu_regs(&current->thread.fpu);
save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
} else {
save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
save_fp_regs(vcpu->arch.guest_fpregs.fprs);
}
save_access_regs(vcpu->run->s.regs.acrs);
restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
if (test_kvm_facility(vcpu->kvm, 129)) if (test_kvm_facility(vcpu->kvm, 129))
restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs); /*
* kvm_arch_vcpu_load() set up the register save area to
* the &vcpu->run->s.regs.vrs and, thus, the vector registers
* are already saved. Only the floating-point control must be
* copied.
*/
vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
else else
restore_fp_regs(vcpu->arch.host_fpregs.fprs); save_fpu_to(&vcpu->arch.guest_fpregs);
load_fpu_from(&vcpu->arch.host_fpregs);
save_access_regs(vcpu->run->s.regs.acrs);
restore_access_regs(vcpu->arch.host_acrs); restore_access_regs(vcpu->arch.host_acrs);
} }
...@@ -1383,7 +1413,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, ...@@ -1383,7 +1413,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.sie_block = &sie_page->sie_block; vcpu->arch.sie_block = &sie_page->sie_block;
vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
vcpu->arch.host_vregs = &sie_page->vregs;
vcpu->arch.sie_block->icpua = id; vcpu->arch.sie_block->icpua = id;
if (!kvm_is_ucontrol(kvm)) { if (!kvm_is_ucontrol(kvm)) {
...@@ -1405,6 +1434,19 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, ...@@ -1405,6 +1434,19 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.local_int.wq = &vcpu->wq; vcpu->arch.local_int.wq = &vcpu->wq;
vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
/*
* Allocate a save area for floating-point registers. If the vector
* extension is available, register contents are saved in the SIE
* control block. The allocated save area is still required in
* particular places, for example, in kvm_s390_vcpu_store_status().
*/
vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
GFP_KERNEL);
if (!vcpu->arch.guest_fpregs.fprs) {
rc = -ENOMEM;
goto out_free_sie_block;
}
rc = kvm_vcpu_init(vcpu, kvm, id); rc = kvm_vcpu_init(vcpu, kvm, id);
if (rc) if (rc)
goto out_free_sie_block; goto out_free_sie_block;
...@@ -1627,16 +1669,16 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) ...@@ -1627,16 +1669,16 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{ {
if (test_fp_ctl(fpu->fpc)) if (test_fp_ctl(fpu->fpc))
return -EINVAL; return -EINVAL;
memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
vcpu->arch.guest_fpregs.fpc = fpu->fpc; vcpu->arch.guest_fpregs.fpc = fpu->fpc;
restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc); save_fpu_regs(&current->thread.fpu);
restore_fp_regs(vcpu->arch.guest_fpregs.fprs); load_fpu_from(&vcpu->arch.guest_fpregs);
return 0; return 0;
} }
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{ {
memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
fpu->fpc = vcpu->arch.guest_fpregs.fpc; fpu->fpc = vcpu->arch.guest_fpregs.fpc;
return 0; return 0;
} }
...@@ -2199,8 +2241,21 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) ...@@ -2199,8 +2241,21 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
* copying in vcpu load/put. Lets update our copies before we save * copying in vcpu load/put. Lets update our copies before we save
* it into the save area * it into the save area
*/ */
save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); save_fpu_regs(&current->thread.fpu);
save_fp_regs(vcpu->arch.guest_fpregs.fprs); if (test_kvm_facility(vcpu->kvm, 129)) {
/*
* If the vector extension is available, the vector registers
* which overlaps with floating-point registers are saved in
* the SIE-control block. Hence, extract the floating-point
* registers and the FPC value and store them in the
* guest_fpregs structure.
*/
WARN_ON(!is_vx_task(current)); /* XXX remove later */
vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
current->thread.fpu.vxrs);
} else
save_fpu_to(&vcpu->arch.guest_fpregs);
save_access_regs(vcpu->run->s.regs.acrs); save_access_regs(vcpu->run->s.regs.acrs);
return kvm_s390_store_status_unloaded(vcpu, addr); return kvm_s390_store_status_unloaded(vcpu, addr);
...@@ -2227,10 +2282,13 @@ int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr) ...@@ -2227,10 +2282,13 @@ int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
/* /*
* The guest VXRS are in the host VXRs due to the lazy * The guest VXRS are in the host VXRs due to the lazy
* copying in vcpu load/put. Let's update our copies before we save * copying in vcpu load/put. We can simply call save_fpu_regs()
* it into the save area. * to save the current register state because we are in the
* middle of a load/put cycle.
*
* Let's update our copies before we save it into the save area.
*/ */
save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs); save_fpu_regs(&current->thread.fpu);
return kvm_s390_store_adtl_status_unloaded(vcpu, addr); return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment