Commit 41040cf7 authored by Dave Martin's avatar Dave Martin Committed by Will Deacon

arm64/sve: Fix missing SVE/FPSIMD endianness conversions

The in-memory representation of SVE and FPSIMD registers is
different: the FPSIMD V-registers are stored as single 128-bit
host-endian values, whereas SVE registers are stored in an
endianness-invariant byte order.

This means that the two representations differ when running on a
big-endian host.  But we blindly copy data from one representation
to another when converting between the two, resulting in the
register contents being unintentionally byteswapped in certain
situations.  Currently this can be triggered by the first SVE
instruction after a syscall, for example (though the potential
trigger points may vary in future).

So, fix the conversion functions fpsimd_to_sve(), sve_to_fpsimd()
and sve_sync_from_fpsimd_zeropad() to swab where appropriate.

There is no common swahl128() or swab128() that we could use here.
Maybe it would be worth making this generic, but for now add a
simple local hack.

Since the byte order differences are exposed in ABI, also clarify
the documentation.

Cc: Alex Bennée <alex.bennee@linaro.org>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Alan Hayward <alan.hayward@arm.com>
Cc: Julien Grall <julien.grall@arm.com>
Fixes: bc0ee476 ("arm64/sve: Core task context handling")
Fixes: 8cd969d2 ("arm64/sve: Signal handling support")
Fixes: 43d4da2c ("arm64/sve: ptrace and ELF coredump support")
Signed-off-by: default avatarDave Martin <Dave.Martin@arm.com>
[will: Fix typos in comments and docs spotted by Julien]
Signed-off-by: default avatarWill Deacon <will.deacon@arm.com>
parent 01d57485
...@@ -56,6 +56,18 @@ model features for SVE is included in Appendix A. ...@@ -56,6 +56,18 @@ model features for SVE is included in Appendix A.
is to connect to a target process first and then attempt a is to connect to a target process first and then attempt a
ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov). ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov).
* Whenever SVE scalable register values (Zn, Pn, FFR) are exchanged in memory
between userspace and the kernel, the register value is encoded in memory in
an endianness-invariant layout, with bits [(8 * i + 7) : (8 * i)] encoded at
byte offset i from the start of the memory representation. This affects for
example the signal frame (struct sve_context) and ptrace interface
(struct user_sve_header) and associated data.
Beware that on big-endian systems this results in a different byte order than
for the FPSIMD V-registers, which are stored as single host-endian 128-bit
values, with bits [(127 - 8 * i) : (120 - 8 * i)] of the register encoded at
byte offset i. (struct fpsimd_context, struct user_fpsimd_state).
2. Vector length terminology 2. Vector length terminology
----------------------------- -----------------------------
...@@ -124,6 +136,10 @@ the SVE instruction set architecture. ...@@ -124,6 +136,10 @@ the SVE instruction set architecture.
size and layout. Macros SVE_SIG_* are defined [1] to facilitate access to size and layout. Macros SVE_SIG_* are defined [1] to facilitate access to
the members. the members.
* Each scalable register (Zn, Pn, FFR) is stored in an endianness-invariant
layout, with bits [(8 * i + 7) : (8 * i)] stored at byte offset i from the
start of the register's representation in memory.
* If the SVE context is too big to fit in sigcontext.__reserved[], then extra * If the SVE context is too big to fit in sigcontext.__reserved[], then extra
space is allocated on the stack, an extra_context record is written in space is allocated on the stack, an extra_context record is written in
__reserved[] referencing this space. sve_context is then written in the __reserved[] referencing this space. sve_context is then written in the
......
...@@ -260,6 +260,13 @@ struct kvm_vcpu_events { ...@@ -260,6 +260,13 @@ struct kvm_vcpu_events {
KVM_REG_SIZE_U256 | \ KVM_REG_SIZE_U256 | \
((i) & (KVM_ARM64_SVE_MAX_SLICES - 1))) ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1)))
/*
* Register values for KVM_REG_ARM64_SVE_ZREG(), KVM_REG_ARM64_SVE_PREG() and
* KVM_REG_ARM64_SVE_FFR() are represented in memory in an endianness-
* invariant layout which differs from the layout used for the FPSIMD
* V-registers on big-endian systems: see sigcontext.h for more explanation.
*/
#define KVM_ARM64_SVE_VQ_MIN __SVE_VQ_MIN #define KVM_ARM64_SVE_VQ_MIN __SVE_VQ_MIN
#define KVM_ARM64_SVE_VQ_MAX __SVE_VQ_MAX #define KVM_ARM64_SVE_VQ_MAX __SVE_VQ_MAX
......
...@@ -176,6 +176,10 @@ struct user_sve_header { ...@@ -176,6 +176,10 @@ struct user_sve_header {
* FPCR uint32_t FPCR * FPCR uint32_t FPCR
* *
* Additional data might be appended in the future. * Additional data might be appended in the future.
*
* The Z-, P- and FFR registers are represented in memory in an endianness-
* invariant layout which differs from the layout used for the FPSIMD
* V-registers on big-endian systems: see sigcontext.h for more explanation.
*/ */
#define SVE_PT_SVE_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq) #define SVE_PT_SVE_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq)
......
...@@ -77,6 +77,15 @@ struct fpsimd_context { ...@@ -77,6 +77,15 @@ struct fpsimd_context {
__uint128_t vregs[32]; __uint128_t vregs[32];
}; };
/*
* Note: similarly to all other integer fields, each V-register is stored in an
* endianness-dependent format, with the byte at offset i from the start of the
* in-memory representation of the register value containing
*
* bits [(7 + 8 * i) : (8 * i)] of the register on little-endian hosts; or
* bits [(127 - 8 * i) : (120 - 8 * i)] on big-endian hosts.
*/
/* ESR_EL1 context */ /* ESR_EL1 context */
#define ESR_MAGIC 0x45535201 #define ESR_MAGIC 0x45535201
...@@ -204,6 +213,11 @@ struct sve_context { ...@@ -204,6 +213,11 @@ struct sve_context {
* FFR uint16_t[vq] first-fault status register * FFR uint16_t[vq] first-fault status register
* *
* Additional data might be appended in the future. * Additional data might be appended in the future.
*
* Unlike vregs[] in fpsimd_context, each SVE scalable register (Z-, P- or FFR)
* is encoded in memory in an endianness-invariant format, with the byte at
* offset i from the start of the in-memory representation containing bits
* [(7 + 8 * i) : (8 * i)] of the register value.
*/ */
#define SVE_SIG_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq) #define SVE_SIG_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq)
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/stddef.h> #include <linux/stddef.h>
#include <linux/sysctl.h> #include <linux/sysctl.h>
#include <linux/swab.h>
#include <asm/esr.h> #include <asm/esr.h>
#include <asm/fpsimd.h> #include <asm/fpsimd.h>
...@@ -352,6 +353,23 @@ static int __init sve_sysctl_init(void) { return 0; } ...@@ -352,6 +353,23 @@ static int __init sve_sysctl_init(void) { return 0; }
#define ZREG(sve_state, vq, n) ((char *)(sve_state) + \ #define ZREG(sve_state, vq, n) ((char *)(sve_state) + \
(SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET)) (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
#ifdef CONFIG_CPU_BIG_ENDIAN
static __uint128_t arm64_cpu_to_le128(__uint128_t x)
{
u64 a = swab64(x);
u64 b = swab64(x >> 64);
return ((__uint128_t)a << 64) | b;
}
#else
static __uint128_t arm64_cpu_to_le128(__uint128_t x)
{
return x;
}
#endif
#define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x)
/* /*
* Transfer the FPSIMD state in task->thread.uw.fpsimd_state to * Transfer the FPSIMD state in task->thread.uw.fpsimd_state to
* task->thread.sve_state. * task->thread.sve_state.
...@@ -369,14 +387,16 @@ static void fpsimd_to_sve(struct task_struct *task) ...@@ -369,14 +387,16 @@ static void fpsimd_to_sve(struct task_struct *task)
void *sst = task->thread.sve_state; void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
unsigned int i; unsigned int i;
__uint128_t *p;
if (!system_supports_sve()) if (!system_supports_sve())
return; return;
vq = sve_vq_from_vl(task->thread.sve_vl); vq = sve_vq_from_vl(task->thread.sve_vl);
for (i = 0; i < 32; ++i) for (i = 0; i < 32; ++i) {
memcpy(ZREG(sst, vq, i), &fst->vregs[i], p = (__uint128_t *)ZREG(sst, vq, i);
sizeof(fst->vregs[i])); *p = arm64_cpu_to_le128(fst->vregs[i]);
}
} }
/* /*
...@@ -395,14 +415,16 @@ static void sve_to_fpsimd(struct task_struct *task) ...@@ -395,14 +415,16 @@ static void sve_to_fpsimd(struct task_struct *task)
void const *sst = task->thread.sve_state; void const *sst = task->thread.sve_state;
struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state; struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state;
unsigned int i; unsigned int i;
__uint128_t const *p;
if (!system_supports_sve()) if (!system_supports_sve())
return; return;
vq = sve_vq_from_vl(task->thread.sve_vl); vq = sve_vq_from_vl(task->thread.sve_vl);
for (i = 0; i < 32; ++i) for (i = 0; i < 32; ++i) {
memcpy(&fst->vregs[i], ZREG(sst, vq, i), p = (__uint128_t const *)ZREG(sst, vq, i);
sizeof(fst->vregs[i])); fst->vregs[i] = arm64_le128_to_cpu(*p);
}
} }
#ifdef CONFIG_ARM64_SVE #ifdef CONFIG_ARM64_SVE
...@@ -491,6 +513,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) ...@@ -491,6 +513,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
void *sst = task->thread.sve_state; void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
unsigned int i; unsigned int i;
__uint128_t *p;
if (!test_tsk_thread_flag(task, TIF_SVE)) if (!test_tsk_thread_flag(task, TIF_SVE))
return; return;
...@@ -499,9 +522,10 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) ...@@ -499,9 +522,10 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
memset(sst, 0, SVE_SIG_REGS_SIZE(vq)); memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
for (i = 0; i < 32; ++i) for (i = 0; i < 32; ++i) {
memcpy(ZREG(sst, vq, i), &fst->vregs[i], p = (__uint128_t *)ZREG(sst, vq, i);
sizeof(fst->vregs[i])); *p = arm64_cpu_to_le128(fst->vregs[i]);
}
} }
int sve_set_vector_length(struct task_struct *task, int sve_set_vector_length(struct task_struct *task,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment