Commit 46f87632 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 updates from Will Deacon:
 "There's the usual summary below, but the highlights are support for
  the Armv8.6 timer extensions, KASAN support for asymmetric MTE, the
  ability to kexec() with the MMU enabled and a second attempt at
  switching to the generic pfn_valid() implementation.

  Summary:

   - Support for the Arm8.6 timer extensions, including a
     self-synchronising view of the system registers to elide some
     expensive ISB instructions.

   - Exception table cleanup and rework so that the fixup handlers
     appear correctly in backtraces.

   - A handful of miscellaneous changes, the main one being selection of
     CONFIG_HAVE_POSIX_CPU_TIMERS_TASK_WORK.

   - More mm and pgtable cleanups.

   - KASAN support for "asymmetric" MTE, where tag faults are reported
     synchronously for loads (via an exception) and asynchronously for
     stores (via a register).

   - Support for leaving the MMU enabled during kexec relocation, which
     significantly speeds up the operation.

   - Minor improvements to our perf PMU drivers.

   - Improvements to the compat vDSO build system, particularly when
     building with LLVM=1.

   - Preparatory work for handling some Coresight TRBE tracing errata.

   - Cleanup and refactoring of the SVE code to pave the way for SME
     support in future.

   - Ensure SCS pages are unpoisoned immediately prior to freeing them
     when KASAN is enabled for the vmalloc area.

   - Try moving to the generic pfn_valid() implementation again now that
     the DMA mapping issue from last time has been resolved.

   - Numerous improvements and additions to our FPSIMD and SVE
     selftests"

[ armv8.6 timer updates were in a shared branch and already came in
  through -tip in the timer pull  - Linus ]

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (85 commits)
  arm64: Select POSIX_CPU_TIMERS_TASK_WORK
  arm64: Document boot requirements for FEAT_SME_FA64
  arm64/sve: Fix warnings when SVE is disabled
  arm64/sve: Add stub for sve_max_virtualisable_vl()
  arm64: errata: Add detection for TRBE write to out-of-range
  arm64: errata: Add workaround for TSB flush failures
  arm64: errata: Add detection for TRBE overwrite in FILL mode
  arm64: Add Neoverse-N2, Cortex-A710 CPU part definition
  selftests: arm64: Factor out utility functions for assembly FP tests
  arm64: vmlinux.lds.S: remove `.fixup` section
  arm64: extable: add load_unaligned_zeropad() handler
  arm64: extable: add a dedicated uaccess handler
  arm64: extable: add `type` and `data` fields
  arm64: extable: use `ex` for `exception_table_entry`
  arm64: extable: make fixup_exception() return bool
  arm64: extable: consolidate definitions
  arm64: gpr-num: support W registers
  arm64: factor out GPR numbering helpers
  arm64: kvm: use kvm_exception_table_entry
  arm64: lib: __arch_copy_to_user(): fold fixups into body
  ...
parents 879dbe9f e6359798
......@@ -340,6 +340,16 @@ Before jumping into the kernel, the following conditions must be met:
- SMCR_EL2.LEN must be initialised to the same value for all CPUs the
kernel will execute on.
For CPUs with the Scalable Matrix Extension FA64 feature (FEAT_SME_FA64)
- If EL3 is present:
- SMCR_EL3.FA64 (bit 31) must be initialised to 0b1.
- If the kernel is entered at EL1 and EL2 is present:
- SMCR_EL2.FA64 (bit 31) must be initialised to 0b1.
The requirements described above for CPU mode, caches, MMUs, architected
timers, coherency and system registers apply to all CPUs. All CPUs must
enter the kernel in the same exception level. Where the values documented
......
......@@ -235,7 +235,15 @@ infrastructure:
| DPB | [3-0] | y |
+------------------------------+---------+---------+
6) ID_AA64MMFR2_EL1 - Memory model feature register 2
6) ID_AA64MMFR0_EL1 - Memory model feature register 0
+------------------------------+---------+---------+
| Name | bits | visible |
+------------------------------+---------+---------+
| ECV | [63-60] | y |
+------------------------------+---------+---------+
7) ID_AA64MMFR2_EL1 - Memory model feature register 2
+------------------------------+---------+---------+
| Name | bits | visible |
......@@ -243,7 +251,7 @@ infrastructure:
| AT | [35-32] | y |
+------------------------------+---------+---------+
7) ID_AA64ZFR0_EL1 - SVE feature ID register 0
8) ID_AA64ZFR0_EL1 - SVE feature ID register 0
+------------------------------+---------+---------+
| Name | bits | visible |
......
......@@ -247,6 +247,10 @@ HWCAP2_MTE
Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0010, as described
by Documentation/arm64/memory-tagging-extension.rst.
HWCAP2_ECV
Functionality implied by ID_AA64MMFR0_EL1.ECV == 0b0001.
4. Unused AT_HWCAP bits
-----------------------
......
......@@ -92,12 +92,24 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A710 | #2224489 | ARM64_ERRATUM_2224489 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N1 | #1349291 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N1 | #1542419 | ARM64_ERRATUM_1542419 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N2 | #2139208 | ARM64_ERRATUM_2139208 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N2 | #2067961 | ARM64_ERRATUM_2067961 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N2 | #2253138 | ARM64_ERRATUM_2253138 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | MMU-500 | #841119,826419 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
......
......@@ -194,14 +194,17 @@ additional boot parameters that allow disabling KASAN or controlling features:
- ``kasan=off`` or ``=on`` controls whether KASAN is enabled (default: ``on``).
- ``kasan.mode=sync`` or ``=async`` controls whether KASAN is configured in
synchronous or asynchronous mode of execution (default: ``sync``).
- ``kasan.mode=sync``, ``=async`` or ``=asymm`` controls whether KASAN
is configured in synchronous, asynchronous or asymmetric mode of
execution (default: ``sync``).
Synchronous mode: a bad access is detected immediately when a tag
check fault occurs.
Asynchronous mode: a bad access detection is delayed. When a tag check
fault occurs, the information is stored in hardware (in the TFSR_EL1
register for arm64). The kernel periodically checks the hardware and
only reports tag faults during these checks.
Asymmetric mode: a bad access is detected synchronously on reads and
asynchronously on writes.
- ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack
traces collection (default: ``on``).
......
......@@ -153,7 +153,6 @@ config ARM64
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
select HAVE_ARCH_PFN_VALID
select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
select HAVE_ARCH_SECCOMP_FILTER
......@@ -191,6 +190,7 @@ config ARM64
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUTEX_CMPXCHG if FUTEX
select MMU_GATHER_RCU_TABLE_FREE
......@@ -665,6 +665,121 @@ config ARM64_ERRATUM_1508412
If unsure, say Y.
config ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
bool
config ARM64_ERRATUM_2119858
bool "Cortex-A710: 2119858: workaround TRBE overwriting trace data in FILL mode"
default y
depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in
depends on CORESIGHT_TRBE
select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
help
This option adds the workaround for ARM Cortex-A710 erratum 2119858.
Affected Cortex-A710 cores could overwrite up to 3 cache lines of trace
data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in
the event of a WRAP event.
Work around the issue by always making sure we move the TRBPTR_EL1 by
256 bytes before enabling the buffer and filling the first 256 bytes of
the buffer with ETM ignore packets upon disabling.
If unsure, say Y.
config ARM64_ERRATUM_2139208
bool "Neoverse-N2: 2139208: workaround TRBE overwriting trace data in FILL mode"
default y
depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in
depends on CORESIGHT_TRBE
select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
help
This option adds the workaround for ARM Neoverse-N2 erratum 2139208.
Affected Neoverse-N2 cores could overwrite up to 3 cache lines of trace
data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in
the event of a WRAP event.
Work around the issue by always making sure we move the TRBPTR_EL1 by
256 bytes before enabling the buffer and filling the first 256 bytes of
the buffer with ETM ignore packets upon disabling.
If unsure, say Y.
config ARM64_WORKAROUND_TSB_FLUSH_FAILURE
bool
config ARM64_ERRATUM_2054223
bool "Cortex-A710: 2054223: workaround TSB instruction failing to flush trace"
default y
select ARM64_WORKAROUND_TSB_FLUSH_FAILURE
help
Enable workaround for ARM Cortex-A710 erratum 2054223
Affected cores may fail to flush the trace data on a TSB instruction, when
the PE is in trace prohibited state. This will cause losing a few bytes
of the trace cached.
Workaround is to issue two TSB consecutively on affected cores.
If unsure, say Y.
config ARM64_ERRATUM_2067961
bool "Neoverse-N2: 2067961: workaround TSB instruction failing to flush trace"
default y
select ARM64_WORKAROUND_TSB_FLUSH_FAILURE
help
Enable workaround for ARM Neoverse-N2 erratum 2067961
Affected cores may fail to flush the trace data on a TSB instruction, when
the PE is in trace prohibited state. This will cause losing a few bytes
of the trace cached.
Workaround is to issue two TSB consecutively on affected cores.
If unsure, say Y.
config ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
bool
config ARM64_ERRATUM_2253138
bool "Neoverse-N2: 2253138: workaround TRBE writing to address out-of-range"
depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in
depends on CORESIGHT_TRBE
default y
select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
help
This option adds the workaround for ARM Neoverse-N2 erratum 2253138.
Affected Neoverse-N2 cores might write to an out-of-range address, not reserved
for TRBE. Under some conditions, the TRBE might generate a write to the next
virtually addressed page following the last page of the TRBE address space
(i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base.
Work around this in the driver by always making sure that there is a
page beyond the TRBLIMITR_EL1.LIMIT, within the space allowed for the TRBE.
If unsure, say Y.
config ARM64_ERRATUM_2224489
bool "Cortex-A710: 2224489: workaround TRBE writing to address out-of-range"
depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in
depends on CORESIGHT_TRBE
default y
select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
help
This option adds the workaround for ARM Cortex-A710 erratum 2224489.
Affected Cortex-A710 cores might write to an out-of-range address, not reserved
for TRBE. Under some conditions, the TRBE might generate a write to the next
virtually addressed page following the last page of the TRBE address space
(i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base.
Work around this in the driver by always making sure that there is a
page beyond the TRBLIMITR_EL1.LIMIT, within the space allowed for the TRBE.
If unsure, say Y.
config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y
......@@ -1143,7 +1258,7 @@ config CRASH_DUMP
config TRANS_TABLE
def_bool y
depends on HIBERNATION
depends on HIBERNATION || KEXEC_CORE
config XEN_DOM0
def_bool y
......@@ -1272,7 +1387,8 @@ config KUSER_HELPERS
config COMPAT_VDSO
bool "Enable vDSO for 32-bit applications"
depends on !CPU_BIG_ENDIAN && "$(CROSS_COMPILE_COMPAT)" != ""
depends on !CPU_BIG_ENDIAN
depends on (CC_IS_CLANG && LD_IS_LLD) || "$(CROSS_COMPILE_COMPAT)" != ""
select GENERIC_COMPAT_VDSO
default y
help
......
......@@ -64,14 +64,26 @@ DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *,
static inline notrace u64 arch_timer_read_cntpct_el0(void)
{
isb();
return read_sysreg(cntpct_el0);
u64 cnt;
asm volatile(ALTERNATIVE("isb\n mrs %0, cntpct_el0",
"nop\n" __mrs_s("%0", SYS_CNTPCTSS_EL0),
ARM64_HAS_ECV)
: "=r" (cnt));
return cnt;
}
static inline notrace u64 arch_timer_read_cntvct_el0(void)
{
isb();
return read_sysreg(cntvct_el0);
u64 cnt;
asm volatile(ALTERNATIVE("isb\n mrs %0, cntvct_el0",
"nop\n" __mrs_s("%0", SYS_CNTVCTSS_EL0),
ARM64_HAS_ECV)
: "=r" (cnt));
return cnt;
}
#define arch_timer_reg_read_stable(reg) \
......@@ -174,8 +186,10 @@ static __always_inline u64 __arch_counter_get_cntpct(void)
{
u64 cnt;
isb();
cnt = read_sysreg(cntpct_el0);
asm volatile(ALTERNATIVE("isb\n mrs %0, cntpct_el0",
"nop\n" __mrs_s("%0", SYS_CNTPCTSS_EL0),
ARM64_HAS_ECV)
: "=r" (cnt));
arch_counter_enforce_ordering(cnt);
return cnt;
}
......@@ -193,8 +207,10 @@ static __always_inline u64 __arch_counter_get_cntvct(void)
{
u64 cnt;
isb();
cnt = read_sysreg(cntvct_el0);
asm volatile(ALTERNATIVE("isb\n mrs %0, cntvct_el0",
"nop\n" __mrs_s("%0", SYS_CNTVCTSS_EL0),
ARM64_HAS_ECV)
: "=r" (cnt));
arch_counter_enforce_ordering(cnt);
return cnt;
}
......
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __ASM_ASM_EXTABLE_H
#define __ASM_ASM_EXTABLE_H
#define EX_TYPE_NONE 0
#define EX_TYPE_FIXUP 1
#define EX_TYPE_BPF 2
#define EX_TYPE_UACCESS_ERR_ZERO 3
#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4
#ifdef __ASSEMBLY__
#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \
.pushsection __ex_table, "a"; \
.align 2; \
.long ((insn) - .); \
.long ((fixup) - .); \
.short (type); \
.short (data); \
.popsection;
/*
* Create an exception table entry for `insn`, which will branch to `fixup`
* when an unhandled fault is taken.
*/
.macro _asm_extable, insn, fixup
__ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0)
.endm
/*
* Create an exception table entry for `insn` if `fixup` is provided. Otherwise
* do nothing.
*/
.macro _cond_extable, insn, fixup
.ifnc \fixup,
_asm_extable \insn, \fixup
.endif
.endm
#else /* __ASSEMBLY__ */
#include <linux/bits.h>
#include <linux/stringify.h>
#include <asm/gpr-num.h>
#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \
".pushsection __ex_table, \"a\"\n" \
".align 2\n" \
".long ((" insn ") - .)\n" \
".long ((" fixup ") - .)\n" \
".short (" type ")\n" \
".short (" data ")\n" \
".popsection\n"
#define _ASM_EXTABLE(insn, fixup) \
__ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0")
#define EX_DATA_REG_ERR_SHIFT 0
#define EX_DATA_REG_ERR GENMASK(4, 0)
#define EX_DATA_REG_ZERO_SHIFT 5
#define EX_DATA_REG_ZERO GENMASK(9, 5)
#define EX_DATA_REG(reg, gpr) \
"((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")"
#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \
__DEFINE_ASM_GPR_NUMS \
__ASM_EXTABLE_RAW(#insn, #fixup, \
__stringify(EX_TYPE_UACCESS_ERR_ZERO), \
"(" \
EX_DATA_REG(ERR, err) " | " \
EX_DATA_REG(ZERO, zero) \
")")
#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \
_ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, wzr)
#define EX_DATA_REG_DATA_SHIFT 0
#define EX_DATA_REG_DATA GENMASK(4, 0)
#define EX_DATA_REG_ADDR_SHIFT 5
#define EX_DATA_REG_ADDR GENMASK(9, 5)
#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \
__DEFINE_ASM_GPR_NUMS \
__ASM_EXTABLE_RAW(#insn, #fixup, \
__stringify(EX_TYPE_LOAD_UNALIGNED_ZEROPAD), \
"(" \
EX_DATA_REG(DATA, data) " | " \
EX_DATA_REG(ADDR, addr) \
")")
#endif /* __ASSEMBLY__ */
#endif /* __ASM_ASM_EXTABLE_H */
......@@ -3,10 +3,11 @@
#define __ASM_ASM_UACCESS_H
#include <asm/alternative-macros.h>
#include <asm/asm-extable.h>
#include <asm/assembler.h>
#include <asm/kernel-pgtable.h>
#include <asm/mmu.h>
#include <asm/sysreg.h>
#include <asm/assembler.h>
/*
* User access enabling/disabling macros.
......@@ -58,6 +59,10 @@ alternative_else_nop_endif
.endm
#endif
#define USER(l, x...) \
9999: x; \
_asm_extable 9999b, l
/*
* Generate the assembly for LDTR/STTR with exception table entries.
* This is complicated as there is no post-increment or pair versions of the
......
......@@ -14,9 +14,10 @@
#include <asm-generic/export.h>
#include <asm/asm-offsets.h>
#include <asm/alternative.h>
#include <asm/asm-bug.h>
#include <asm/asm-extable.h>
#include <asm/asm-offsets.h>
#include <asm/cpufeature.h>
#include <asm/cputype.h>
#include <asm/debug-monitors.h>
......@@ -129,32 +130,6 @@ alternative_endif
.endr
.endm
/*
* Create an exception table entry for `insn`, which will branch to `fixup`
* when an unhandled fault is taken.
*/
.macro _asm_extable, insn, fixup
.pushsection __ex_table, "a"
.align 3
.long (\insn - .), (\fixup - .)
.popsection
.endm
/*
* Create an exception table entry for `insn` if `fixup` is provided. Otherwise
* do nothing.
*/
.macro _cond_extable, insn, fixup
.ifnc \fixup,
_asm_extable \insn, \fixup
.endif
.endm
#define USER(l, x...) \
9999: x; \
_asm_extable 9999b, l
/*
* Register aliases.
*/
......@@ -405,19 +380,19 @@ alternative_endif
/*
* Macro to perform a data cache maintenance for the interval
* [start, end)
* [start, end) with dcache line size explicitly provided.
*
* op: operation passed to dc instruction
* domain: domain used in dsb instruciton
* start: starting virtual address of the region
* end: end virtual address of the region
* linesz: dcache line size
* fixup: optional label to branch to on user fault
* Corrupts: start, end, tmp1, tmp2
* Corrupts: start, end, tmp
*/
.macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup
dcache_line_size \tmp1, \tmp2
sub \tmp2, \tmp1, #1
bic \start, \start, \tmp2
.macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup
sub \tmp, \linesz, #1
bic \start, \start, \tmp
.Ldcache_op\@:
.ifc \op, cvau
__dcache_op_workaround_clean_cache \op, \start
......@@ -436,7 +411,7 @@ alternative_endif
.endif
.endif
.endif
add \start, \start, \tmp1
add \start, \start, \linesz
cmp \start, \end
b.lo .Ldcache_op\@
dsb \domain
......@@ -444,6 +419,22 @@ alternative_endif
_cond_extable .Ldcache_op\@, \fixup
.endm
/*
* Macro to perform a data cache maintenance for the interval
* [start, end)
*
* op: operation passed to dc instruction
* domain: domain used in dsb instruciton
* start: starting virtual address of the region
* end: end virtual address of the region
* fixup: optional label to branch to on user fault
* Corrupts: start, end, tmp1, tmp2
*/
.macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup
dcache_line_size \tmp1, \tmp2
dcache_by_myline_op \op, \domain, \start, \end, \tmp1, \tmp2, \fixup
.endm
/*
* Macro to perform an instruction cache maintenance for the interval
* [start, end)
......@@ -467,6 +458,25 @@ alternative_endif
_cond_extable .Licache_op\@, \fixup
.endm
/*
* To prevent the possibility of old and new partial table walks being visible
* in the tlb, switch the ttbr to a zero page when we invalidate the old
* records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i
* Even switching to our copied tables will cause a changed output address at
* each stage of the walk.
*/
.macro break_before_make_ttbr_switch zero_page, page_table, tmp, tmp2
phys_to_ttbr \tmp, \zero_page
msr ttbr1_el1, \tmp
isb
tlbi vmalle1
dsb nsh
phys_to_ttbr \tmp, \page_table
offset_ttbr1 \tmp, \tmp2
msr ttbr1_el1, \tmp
isb
.endm
/*
* reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
*/
......
......@@ -23,7 +23,7 @@
#define dsb(opt) asm volatile("dsb " #opt : : : "memory")
#define psb_csync() asm volatile("hint #17" : : : "memory")
#define tsb_csync() asm volatile("hint #18" : : : "memory")
#define __tsb_csync() asm volatile("hint #18" : : : "memory")
#define csdb() asm volatile("hint #20" : : : "memory")
#ifdef CONFIG_ARM64_PSEUDO_NMI
......@@ -46,6 +46,20 @@
#define dma_rmb() dmb(oshld)
#define dma_wmb() dmb(oshst)
#define tsb_csync() \
do { \
/* \
* CPUs affected by Arm Erratum 2054223 or 2067961 needs \
* another TSB to ensure the trace is flushed. The barriers \
* don't have to be strictly back to back, as long as the \
* CPU is in trace prohibited state. \
*/ \
if (cpus_have_final_cap(ARM64_WORKAROUND_TSB_FLUSH_FAILURE)) \
__tsb_csync(); \
__tsb_csync(); \
} while (0)
/*
* Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz
* and 0 otherwise.
......
......@@ -73,6 +73,8 @@
#define ARM_CPU_PART_CORTEX_A76 0xD0B
#define ARM_CPU_PART_NEOVERSE_N1 0xD0C
#define ARM_CPU_PART_CORTEX_A77 0xD0D
#define ARM_CPU_PART_CORTEX_A710 0xD47
#define ARM_CPU_PART_NEOVERSE_N2 0xD49
#define APM_CPU_PART_POTENZA 0x000
......@@ -113,6 +115,8 @@
#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)
#define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1)
#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
......
......@@ -227,6 +227,9 @@
#define ESR_ELx_SYS64_ISS_SYS_CNTVCT (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 2, 14, 0) | \
ESR_ELx_SYS64_ISS_DIR_READ)
#define ESR_ELx_SYS64_ISS_SYS_CNTVCTSS (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 6, 14, 0) | \
ESR_ELx_SYS64_ISS_DIR_READ)
#define ESR_ELx_SYS64_ISS_SYS_CNTFRQ (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \
ESR_ELx_SYS64_ISS_DIR_READ)
......@@ -317,6 +320,9 @@
#define ESR_ELx_CP15_64_ISS_SYS_CNTVCT (ESR_ELx_CP15_64_ISS_SYS_VAL(1, 14) | \
ESR_ELx_CP15_64_ISS_DIR_READ)
#define ESR_ELx_CP15_64_ISS_SYS_CNTVCTSS (ESR_ELx_CP15_64_ISS_SYS_VAL(9, 14) | \
ESR_ELx_CP15_64_ISS_DIR_READ)
#define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\
ESR_ELx_CP15_32_ISS_DIR_READ)
......
......@@ -18,10 +18,21 @@
struct exception_table_entry
{
int insn, fixup;
short type, data;
};
#define ARCH_HAS_RELATIVE_EXTABLE
#define swap_ex_entry_fixup(a, b, tmp, delta) \
do { \
(a)->fixup = (b)->fixup + (delta); \
(b)->fixup = (tmp).fixup - (delta); \
(a)->type = (b)->type; \
(b)->type = (tmp).type; \
(a)->data = (b)->data; \
(b)->data = (tmp).data; \
} while (0)
static inline bool in_bpf_jit(struct pt_regs *regs)
{
if (!IS_ENABLED(CONFIG_BPF_JIT))
......@@ -32,16 +43,16 @@ static inline bool in_bpf_jit(struct pt_regs *regs)
}
#ifdef CONFIG_BPF_JIT
int arm64_bpf_fixup_exception(const struct exception_table_entry *ex,
bool ex_handler_bpf(const struct exception_table_entry *ex,
struct pt_regs *regs);
#else /* !CONFIG_BPF_JIT */
static inline
int arm64_bpf_fixup_exception(const struct exception_table_entry *ex,
bool ex_handler_bpf(const struct exception_table_entry *ex,
struct pt_regs *regs)
{
return 0;
return false;
}
#endif /* !CONFIG_BPF_JIT */
extern int fixup_exception(struct pt_regs *regs);
bool fixup_exception(struct pt_regs *regs);
#endif
......@@ -62,15 +62,13 @@ static inline size_t sve_ffr_offset(int vl)
static inline void *sve_pffr(struct thread_struct *thread)
{
return (char *)thread->sve_state + sve_ffr_offset(thread->sve_vl);
return (char *)thread->sve_state + sve_ffr_offset(thread_get_sve_vl(thread));
}
extern void sve_save_state(void *state, u32 *pfpsr);
extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr);
extern void sve_load_state(void const *state, u32 const *pfpsr,
unsigned long vq_minus_1);
extern void sve_flush_live(unsigned long vq_minus_1);
extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state,
unsigned long vq_minus_1);
int restore_ffr);
extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
extern unsigned int sve_get_vl(void);
extern void sve_set_vq(unsigned long vq_minus_1);
......@@ -79,10 +77,6 @@ extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern u64 read_zcr_features(void);
extern int __ro_after_init sve_max_vl;
extern int __ro_after_init sve_max_virtualisable_vl;
extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
/*
* Helpers to translate bit indices in sve_vq_map to VQ values (and
* vice versa). This allows find_next_bit() to be used to find the
......@@ -98,15 +92,29 @@ static inline unsigned int __bit_to_vq(unsigned int bit)
return SVE_VQ_MAX - bit;
}
/* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */
static inline bool sve_vq_available(unsigned int vq)
{
return test_bit(__vq_to_bit(vq), sve_vq_map);
}
#ifdef CONFIG_ARM64_SVE
struct vl_info {
enum vec_type type;
const char *name; /* For display purposes */
extern size_t sve_state_size(struct task_struct const *task);
/* Minimum supported vector length across all CPUs */
int min_vl;
/* Maximum supported vector length across all CPUs */
int max_vl;
int max_virtualisable_vl;
/*
* Set of available vector lengths,
* where length vq encoded as bit __vq_to_bit(vq):
*/
DECLARE_BITMAP(vq_map, SVE_VQ_MAX);
/* Set of vector lengths present on at least one cpu: */
DECLARE_BITMAP(vq_partial_map, SVE_VQ_MAX);
};
#ifdef CONFIG_ARM64_SVE
extern void sve_alloc(struct task_struct *task);
extern void fpsimd_release_task(struct task_struct *task);
......@@ -143,11 +151,63 @@ static inline void sve_user_enable(void)
* Probing and setup functions.
* Calls to these functions must be serialised with one another.
*/
extern void __init sve_init_vq_map(void);
extern void sve_update_vq_map(void);
extern int sve_verify_vq_map(void);
enum vec_type;
extern void __init vec_init_vq_map(enum vec_type type);
extern void vec_update_vq_map(enum vec_type type);
extern int vec_verify_vq_map(enum vec_type type);
extern void __init sve_setup(void);
extern __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX];
static inline void write_vl(enum vec_type type, u64 val)
{
u64 tmp;
switch (type) {
#ifdef CONFIG_ARM64_SVE
case ARM64_VEC_SVE:
tmp = read_sysreg_s(SYS_ZCR_EL1) & ~ZCR_ELx_LEN_MASK;
write_sysreg_s(tmp | val, SYS_ZCR_EL1);
break;
#endif
default:
WARN_ON_ONCE(1);
break;
}
}
static inline int vec_max_vl(enum vec_type type)
{
return vl_info[type].max_vl;
}
static inline int vec_max_virtualisable_vl(enum vec_type type)
{
return vl_info[type].max_virtualisable_vl;
}
static inline int sve_max_vl(void)
{
return vec_max_vl(ARM64_VEC_SVE);
}
static inline int sve_max_virtualisable_vl(void)
{
return vec_max_virtualisable_vl(ARM64_VEC_SVE);
}
/* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */
static inline bool vq_available(enum vec_type type, unsigned int vq)
{
return test_bit(__vq_to_bit(vq), vl_info[type].vq_map);
}
static inline bool sve_vq_available(unsigned int vq)
{
return vq_available(ARM64_VEC_SVE, vq);
}
#else /* ! CONFIG_ARM64_SVE */
static inline void sve_alloc(struct task_struct *task) { }
......@@ -155,6 +215,11 @@ static inline void fpsimd_release_task(struct task_struct *task) { }
static inline void sve_sync_to_fpsimd(struct task_struct *task) { }
static inline void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { }
static inline int sve_max_virtualisable_vl(void)
{
return 0;
}
static inline int sve_set_current_vl(unsigned long arg)
{
return -EINVAL;
......@@ -165,14 +230,21 @@ static inline int sve_get_current_vl(void)
return -EINVAL;
}
static inline int sve_max_vl(void)
{
return -EINVAL;
}
static inline bool sve_vq_available(unsigned int vq) { return false; }
static inline void sve_user_disable(void) { BUILD_BUG(); }
static inline void sve_user_enable(void) { BUILD_BUG(); }
#define sve_cond_update_zcr_vq(val, reg) do { } while (0)
static inline void sve_init_vq_map(void) { }
static inline void sve_update_vq_map(void) { }
static inline int sve_verify_vq_map(void) { return 0; }
static inline void vec_init_vq_map(enum vec_type t) { }
static inline void vec_update_vq_map(enum vec_type t) { }
static inline int vec_verify_vq_map(enum vec_type t) { return 0; }
static inline void sve_setup(void) { }
#endif /* ! CONFIG_ARM64_SVE */
......
......@@ -217,28 +217,36 @@
.macro sve_flush_z
_for n, 0, 31, _sve_flush_z \n
.endm
.macro sve_flush_p_ffr
.macro sve_flush_p
_for n, 0, 15, _sve_pfalse \n
.endm
.macro sve_flush_ffr
_sve_wrffr 0
.endm
.macro sve_save nxbase, xpfpsr, nxtmp
.macro sve_save nxbase, xpfpsr, save_ffr, nxtmp
_for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34
_for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16
cbz \save_ffr, 921f
_sve_rdffr 0
_sve_str_p 0, \nxbase
_sve_ldr_p 0, \nxbase, -16
b 922f
921:
str xzr, [x\nxbase] // Zero out FFR
922:
mrs x\nxtmp, fpsr
str w\nxtmp, [\xpfpsr]
mrs x\nxtmp, fpcr
str w\nxtmp, [\xpfpsr, #4]
.endm
.macro __sve_load nxbase, xpfpsr, nxtmp
.macro sve_load nxbase, xpfpsr, restore_ffr, nxtmp
_for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34
cbz \restore_ffr, 921f
_sve_ldr_p 0, \nxbase
_sve_wrffr 0
921:
_for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16
ldr w\nxtmp, [\xpfpsr]
......@@ -246,8 +254,3 @@
ldr w\nxtmp, [\xpfpsr, #4]
msr fpcr, x\nxtmp
.endm
.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2
sve_load_vq \xvqminus1, x\nxtmp, \xtmp2
__sve_load \nxbase, \xpfpsr, \nxtmp
.endm
......@@ -15,7 +15,7 @@
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
#define ARCH_SUPPORTS_FTRACE_OPS 1
#else
#define MCOUNT_ADDR ((unsigned long)_mcount)
#define MCOUNT_ADDR ((unsigned long)function_nocfi(_mcount))
#endif
/* The BL at the callsite's adjusted rec->ip */
......
......@@ -25,19 +25,14 @@ do { \
" cbz %w0, 3f\n" \
" sub %w4, %w4, %w0\n" \
" cbnz %w4, 1b\n" \
" mov %w0, %w7\n" \
" mov %w0, %w6\n" \
"3:\n" \
" dmb ish\n" \
" .pushsection .fixup,\"ax\"\n" \
" .align 2\n" \
"4: mov %w0, %w6\n" \
" b 3b\n" \
" .popsection\n" \
_ASM_EXTABLE(1b, 4b) \
_ASM_EXTABLE(2b, 4b) \
_ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w0) \
_ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w0) \
: "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp), \
"+r" (loops) \
: "r" (oparg), "Ir" (-EFAULT), "Ir" (-EAGAIN) \
: "r" (oparg), "Ir" (-EAGAIN) \
: "memory"); \
uaccess_disable_privileged(); \
} while (0)
......@@ -105,18 +100,14 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr,
" cbz %w3, 3f\n"
" sub %w4, %w4, %w3\n"
" cbnz %w4, 1b\n"
" mov %w0, %w8\n"
" mov %w0, %w7\n"
"3:\n"
" dmb ish\n"
"4:\n"
" .pushsection .fixup,\"ax\"\n"
"5: mov %w0, %w7\n"
" b 4b\n"
" .popsection\n"
_ASM_EXTABLE(1b, 5b)
_ASM_EXTABLE(2b, 5b)
_ASM_EXTABLE_UACCESS_ERR(1b, 4b, %w0)
_ASM_EXTABLE_UACCESS_ERR(2b, 4b, %w0)
: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp), "+r" (loops)
: "r" (oldval), "r" (newval), "Ir" (-EFAULT), "Ir" (-EAGAIN)
: "r" (oldval), "r" (newval), "Ir" (-EAGAIN)
: "memory");
uaccess_disable_privileged();
......
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __ASM_GPR_NUM_H
#define __ASM_GPR_NUM_H
#ifdef __ASSEMBLY__
.irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
.equ .L__gpr_num_x\num, \num
.equ .L__gpr_num_w\num, \num
.endr
.equ .L__gpr_num_xzr, 31
.equ .L__gpr_num_wzr, 31
#else /* __ASSEMBLY__ */
#define __DEFINE_ASM_GPR_NUMS \
" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \
" .equ .L__gpr_num_x\\num, \\num\n" \
" .equ .L__gpr_num_w\\num, \\num\n" \
" .endr\n" \
" .equ .L__gpr_num_xzr, 31\n" \
" .equ .L__gpr_num_wzr, 31\n"
#endif /* __ASSEMBLY__ */
#endif /* __ASM_GPR_NUM_H */
......@@ -105,6 +105,7 @@
#define KERNEL_HWCAP_RNG __khwcap2_feature(RNG)
#define KERNEL_HWCAP_BTI __khwcap2_feature(BTI)
#define KERNEL_HWCAP_MTE __khwcap2_feature(MTE)
#define KERNEL_HWCAP_ECV __khwcap2_feature(ECV)
/*
* This yields a mask that user programs can use to figure out what
......
......@@ -90,12 +90,24 @@ static inline void crash_prepare_suspend(void) {}
static inline void crash_post_resume(void) {}
#endif
#if defined(CONFIG_KEXEC_CORE)
void cpu_soft_restart(unsigned long el2_switch, unsigned long entry,
unsigned long arg0, unsigned long arg1,
unsigned long arg2);
#endif
#define ARCH_HAS_KIMAGE_ARCH
struct kimage_arch {
void *dtb;
phys_addr_t dtb_mem;
phys_addr_t kern_reloc;
phys_addr_t el2_vectors;
phys_addr_t ttbr0;
phys_addr_t ttbr1;
phys_addr_t zero_page;
unsigned long phys_offset;
unsigned long t0sz;
};
#ifdef CONFIG_KEXEC_FILE
......
......@@ -263,9 +263,10 @@ extern u64 __kvm_get_mdcr_el2(void);
/*
* KVM extable for unexpected exceptions.
* In the same format _asm_extable, but output to a different section so that
* it can be mapped to EL2. The KVM version is not sorted. The caller must
* ensure:
* Create a struct kvm_exception_table_entry output to a section that can be
* mapped by EL2. The table is not sorted.
*
* The caller must ensure:
* x18 has the hypervisor value to allow any Shadow-Call-Stack instrumented
* code to write to it, and that SPSR_EL2 and ELR_EL2 are restored by the fixup.
*/
......
......@@ -243,6 +243,7 @@ static inline const void *__tag_set(const void *addr, u8 tag)
#ifdef CONFIG_KASAN_HW_TAGS
#define arch_enable_tagging_sync() mte_enable_kernel_sync()
#define arch_enable_tagging_async() mte_enable_kernel_async()
#define arch_enable_tagging_asymm() mte_enable_kernel_asymm()
#define arch_force_async_tag_fault() mte_check_tfsr_exit()
#define arch_get_random_tag() mte_get_random_tag()
#define arch_get_mem_tag(addr) mte_get_mem_tag(addr)
......
......@@ -115,6 +115,30 @@ static inline void cpu_install_idmap(void)
cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm);
}
/*
* Load our new page tables. A strict BBM approach requires that we ensure that
* TLBs are free of any entries that may overlap with the global mappings we are
* about to install.
*
* For a real hibernate/resume/kexec cycle TTBR0 currently points to a zero
* page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI runtime
* services), while for a userspace-driven test_resume cycle it points to
* userspace page tables (and we must point it at a zero page ourselves).
*
* We change T0SZ as part of installing the idmap. This is undone by
* cpu_uninstall_idmap() in __cpu_suspend_exit().
*/
static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz)
{
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
__cpu_set_tcr_t0sz(t0sz);
/* avoid cpu_switch_mm() and its SW-PAN and CNP interactions */
write_sysreg(ttbr0, ttbr0_el1);
isb();
}
/*
* Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
* avoiding the possibility of conflicting TLB entries being allocated.
......
......@@ -130,6 +130,7 @@ static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag,
void mte_enable_kernel_sync(void);
void mte_enable_kernel_async(void);
void mte_enable_kernel_asymm(void);
#else /* CONFIG_ARM64_MTE */
......@@ -161,6 +162,10 @@ static inline void mte_enable_kernel_async(void)
{
}
static inline void mte_enable_kernel_asymm(void)
{
}
#endif /* CONFIG_ARM64_MTE */
#endif /* __ASSEMBLY__ */
......
......@@ -88,11 +88,11 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child,
#ifdef CONFIG_KASAN_HW_TAGS
/* Whether the MTE asynchronous mode is enabled. */
DECLARE_STATIC_KEY_FALSE(mte_async_mode);
DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
static inline bool system_uses_mte_async_mode(void)
static inline bool system_uses_mte_async_or_asymm_mode(void)
{
return static_branch_unlikely(&mte_async_mode);
return static_branch_unlikely(&mte_async_or_asymm_mode);
}
void mte_check_tfsr_el1(void);
......@@ -121,7 +121,7 @@ static inline void mte_check_tfsr_exit(void)
mte_check_tfsr_el1();
}
#else
static inline bool system_uses_mte_async_mode(void)
static inline bool system_uses_mte_async_or_asymm_mode(void)
{
return false;
}
......
......@@ -41,7 +41,6 @@ void tag_clear_highpage(struct page *to);
typedef struct page *pgtable_t;
int pfn_valid(unsigned long pfn);
int pfn_is_map_memory(unsigned long pfn);
#include <asm/memory.h>
......
......@@ -1022,6 +1022,11 @@ static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
return PAGE_READONLY_EXEC;
}
static inline bool pud_sect_supported(void)
{
return PAGE_SIZE == SZ_4K;
}
#endif /* !__ASSEMBLY__ */
......
......@@ -115,6 +115,11 @@ struct debug_info {
#endif
};
enum vec_type {
ARM64_VEC_SVE = 0,
ARM64_VEC_MAX,
};
struct cpu_context {
unsigned long x19;
unsigned long x20;
......@@ -147,8 +152,8 @@ struct thread_struct {
unsigned int fpsimd_cpu;
void *sve_state; /* SVE registers, if any */
unsigned int sve_vl; /* SVE vector length */
unsigned int sve_vl_onexec; /* SVE vl after next exec */
unsigned int vl[ARM64_VEC_MAX]; /* vector length */
unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */
unsigned long fault_address; /* fault info */
unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */
......@@ -164,6 +169,46 @@ struct thread_struct {
u64 sctlr_user;
};
static inline unsigned int thread_get_vl(struct thread_struct *thread,
enum vec_type type)
{
return thread->vl[type];
}
static inline unsigned int thread_get_sve_vl(struct thread_struct *thread)
{
return thread_get_vl(thread, ARM64_VEC_SVE);
}
unsigned int task_get_vl(const struct task_struct *task, enum vec_type type);
void task_set_vl(struct task_struct *task, enum vec_type type,
unsigned long vl);
void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
unsigned long vl);
unsigned int task_get_vl_onexec(const struct task_struct *task,
enum vec_type type);
static inline unsigned int task_get_sve_vl(const struct task_struct *task)
{
return task_get_vl(task, ARM64_VEC_SVE);
}
static inline void task_set_sve_vl(struct task_struct *task, unsigned long vl)
{
task_set_vl(task, ARM64_VEC_SVE, vl);
}
static inline unsigned int task_get_sve_vl_onexec(const struct task_struct *task)
{
return task_get_vl_onexec(task, ARM64_VEC_SVE);
}
static inline void task_set_sve_vl_onexec(struct task_struct *task,
unsigned long vl)
{
task_set_vl_onexec(task, ARM64_VEC_SVE, vl);
}
#define SCTLR_USER_MASK \
(SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | SCTLR_ELx_ENDA | SCTLR_ELx_ENDB | \
SCTLR_EL1_TCF0_MASK)
......
......@@ -21,5 +21,6 @@ extern char __exittext_begin[], __exittext_end[];
extern char __irqentry_text_start[], __irqentry_text_end[];
extern char __mmuoff_data_start[], __mmuoff_data_end[];
extern char __entry_tramp_text_start[], __entry_tramp_text_end[];
extern char __relocate_new_kernel_start[], __relocate_new_kernel_end[];
#endif /* __ASM_SECTIONS_H */
......@@ -8,4 +8,10 @@
void *get_early_fdt_ptr(void);
void early_fdt_map(u64 dt_phys);
/*
* These two variables are used in the head.S file.
*/
extern phys_addr_t __fdt_pointer __initdata;
extern u64 __cacheline_aligned boot_args[4];
#endif
......@@ -13,6 +13,8 @@
#include <linux/stringify.h>
#include <linux/kasan-tags.h>
#include <asm/gpr-num.h>
/*
* ARMv8 ARM reserves the following encoding for system registers:
* (Ref: ARMv8 ARM, Section: "System instruction class encoding overview",
......@@ -507,6 +509,9 @@
#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0)
#define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5)
#define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6)
#define SYS_CNTP_TVAL_EL0 sys_reg(3, 3, 14, 2, 0)
#define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1)
#define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2)
......@@ -621,6 +626,7 @@
#define SCTLR_ELx_TCF_NONE (UL(0x0) << SCTLR_ELx_TCF_SHIFT)
#define SCTLR_ELx_TCF_SYNC (UL(0x1) << SCTLR_ELx_TCF_SHIFT)
#define SCTLR_ELx_TCF_ASYNC (UL(0x2) << SCTLR_ELx_TCF_SHIFT)
#define SCTLR_ELx_TCF_ASYMM (UL(0x3) << SCTLR_ELx_TCF_SHIFT)
#define SCTLR_ELx_TCF_MASK (UL(0x3) << SCTLR_ELx_TCF_SHIFT)
#define SCTLR_ELx_ENIA_SHIFT 31
......@@ -666,6 +672,7 @@
#define SCTLR_EL1_TCF0_NONE (UL(0x0) << SCTLR_EL1_TCF0_SHIFT)
#define SCTLR_EL1_TCF0_SYNC (UL(0x1) << SCTLR_EL1_TCF0_SHIFT)
#define SCTLR_EL1_TCF0_ASYNC (UL(0x2) << SCTLR_EL1_TCF0_SHIFT)
#define SCTLR_EL1_TCF0_ASYMM (UL(0x3) << SCTLR_EL1_TCF0_SHIFT)
#define SCTLR_EL1_TCF0_MASK (UL(0x3) << SCTLR_EL1_TCF0_SHIFT)
#define SCTLR_EL1_BT1 (BIT(36))
......@@ -807,6 +814,7 @@
#define ID_AA64PFR1_MTE_NI 0x0
#define ID_AA64PFR1_MTE_EL0 0x1
#define ID_AA64PFR1_MTE 0x2
#define ID_AA64PFR1_MTE_ASYMM 0x3
/* id_aa64zfr0 */
#define ID_AA64ZFR0_F64MM_SHIFT 56
......@@ -1192,17 +1200,12 @@
#ifdef __ASSEMBLY__
.irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
.equ .L__reg_num_x\num, \num
.endr
.equ .L__reg_num_xzr, 31
.macro mrs_s, rt, sreg
__emit_inst(0xd5200000|(\sreg)|(.L__reg_num_\rt))
__emit_inst(0xd5200000|(\sreg)|(.L__gpr_num_\rt))
.endm
.macro msr_s, sreg, rt
__emit_inst(0xd5000000|(\sreg)|(.L__reg_num_\rt))
__emit_inst(0xd5000000|(\sreg)|(.L__gpr_num_\rt))
.endm
#else
......@@ -1211,22 +1214,16 @@
#include <linux/types.h>
#include <asm/alternative.h>
#define __DEFINE_MRS_MSR_S_REGNUM \
" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \
" .equ .L__reg_num_x\\num, \\num\n" \
" .endr\n" \
" .equ .L__reg_num_xzr, 31\n"
#define DEFINE_MRS_S \
__DEFINE_MRS_MSR_S_REGNUM \
__DEFINE_ASM_GPR_NUMS \
" .macro mrs_s, rt, sreg\n" \
__emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt)) \
__emit_inst(0xd5200000|(\\sreg)|(.L__gpr_num_\\rt)) \
" .endm\n"
#define DEFINE_MSR_S \
__DEFINE_MRS_MSR_S_REGNUM \
__DEFINE_ASM_GPR_NUMS \
" .macro msr_s, sreg, rt\n" \
__emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt)) \
__emit_inst(0xd5000000|(\\sreg)|(.L__gpr_num_\\rt)) \
" .endm\n"
#define UNDEFINE_MRS_S \
......
......@@ -78,7 +78,7 @@ int arch_dup_task_struct(struct task_struct *dst,
#define TIF_SINGLESTEP 21
#define TIF_32BIT 22 /* 32bit process */
#define TIF_SVE 23 /* Scalable Vector Extension in use */
#define TIF_SVE_VL_INHERIT 24 /* Inherit sve_vl_onexec across exec */
#define TIF_SVE_VL_INHERIT 24 /* Inherit SVE vl_onexec across exec */
#define TIF_SSBD 25 /* Wants SSB mitigation */
#define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2020, Microsoft Corporation.
* Pavel Tatashin <pasha.tatashin@soleen.com>
* Copyright (c) 2021, Microsoft Corporation.
* Pasha Tatashin <pasha.tatashin@soleen.com>
*/
#ifndef _ASM_TRANS_TABLE_H
......@@ -15,7 +15,7 @@
/*
* trans_alloc_page
* - Allocator that should return exactly one zeroed page, if this
* allocator fails, trans_pgd_create_copy() and trans_pgd_map_page()
* allocator fails, trans_pgd_create_copy() and trans_pgd_idmap_page()
* return -ENOMEM error.
*
* trans_alloc_arg
......@@ -30,10 +30,12 @@ struct trans_pgd_info {
int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **trans_pgd,
unsigned long start, unsigned long end);
int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd,
void *page, unsigned long dst_addr, pgprot_t pgprot);
int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0,
unsigned long *t0sz, void *page);
int trans_pgd_copy_el2_vectors(struct trans_pgd_info *info,
phys_addr_t *el2_vectors);
extern char trans_pgd_stub_vectors[];
#endif /* _ASM_TRANS_TABLE_H */
......@@ -18,6 +18,7 @@
#include <linux/kasan-checks.h>
#include <linux/string.h>
#include <asm/asm-extable.h>
#include <asm/cpufeature.h>
#include <asm/mmu.h>
#include <asm/mte.h>
......@@ -70,12 +71,6 @@ static inline unsigned long __range_ok(const void __user *addr, unsigned long si
#define access_ok(addr, size) __range_ok(addr, size)
#define _ASM_EXTABLE(from, to) \
" .pushsection __ex_table, \"a\"\n" \
" .align 3\n" \
" .long (" #from " - .), (" #to " - .)\n" \
" .popsection\n"
/*
* User access enabling/disabling.
*/
......@@ -196,13 +191,13 @@ static inline void __uaccess_enable_tco(void)
*/
static inline void __uaccess_disable_tco_async(void)
{
if (system_uses_mte_async_mode())
if (system_uses_mte_async_or_asymm_mode())
__uaccess_disable_tco();
}
static inline void __uaccess_enable_tco_async(void)
{
if (system_uses_mte_async_mode())
if (system_uses_mte_async_or_asymm_mode())
__uaccess_enable_tco();
}
......@@ -260,15 +255,9 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
asm volatile( \
"1: " load " " reg "1, [%2]\n" \
"2:\n" \
" .section .fixup, \"ax\"\n" \
" .align 2\n" \
"3: mov %w0, %3\n" \
" mov %1, #0\n" \
" b 2b\n" \
" .previous\n" \
_ASM_EXTABLE(1b, 3b) \
_ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \
: "+r" (err), "=&r" (x) \
: "r" (addr), "i" (-EFAULT))
: "r" (addr))
#define __raw_get_mem(ldr, x, ptr, err) \
do { \
......@@ -337,14 +326,9 @@ do { \
asm volatile( \
"1: " store " " reg "1, [%2]\n" \
"2:\n" \
" .section .fixup,\"ax\"\n" \
" .align 2\n" \
"3: mov %w0, %3\n" \
" b 2b\n" \
" .previous\n" \
_ASM_EXTABLE(1b, 3b) \
_ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) \
: "+r" (err) \
: "r" (x), "r" (addr), "i" (-EFAULT))
: "r" (x), "r" (addr))
#define __raw_put_mem(str, x, ptr, err) \
do { \
......
......@@ -20,16 +20,9 @@
#define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory")
#if __LINUX_ARM_ARCH__ >= 8 && defined(CONFIG_AS_DMB_ISHLD)
#define aarch32_smp_mb() dmb(ish)
#define aarch32_smp_rmb() dmb(ishld)
#define aarch32_smp_wmb() dmb(ishst)
#else
#define aarch32_smp_mb() dmb(ish)
#define aarch32_smp_rmb() aarch32_smp_mb()
#define aarch32_smp_wmb() dmb(ishst)
#endif
#undef smp_mb
#undef smp_rmb
......
......@@ -67,6 +67,8 @@
*/
extern u32 __boot_cpu_mode[2];
#define ARM64_VECTOR_TABLE_LEN SZ_2K
void __hyp_set_vectors(phys_addr_t phys_vector_base);
void __hyp_reset_vectors(void);
......@@ -128,6 +130,11 @@ static __always_inline bool is_protected_kvm_enabled(void)
return cpus_have_final_cap(ARM64_KVM_PROTECTED_MODE);
}
static inline bool is_hyp_nvhe(void)
{
return is_hyp_mode_available() && !is_kernel_in_hyp_mode();
}
#endif /* __ASSEMBLY__ */
#endif /* ! __ASM__VIRT_H */
......@@ -2,6 +2,7 @@
#define _ASM_ARM64_VMALLOC_H
#include <asm/page.h>
#include <asm/pgtable.h>
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
......@@ -9,10 +10,9 @@
static inline bool arch_vmap_pud_supported(pgprot_t prot)
{
/*
* Only 4k granule supports level 1 block mappings.
* SW table walks can't handle removal of intermediate entries.
*/
return IS_ENABLED(CONFIG_ARM64_4K_PAGES) &&
return pud_sect_supported() &&
!IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
}
......
......@@ -53,29 +53,16 @@ static inline unsigned long find_zero(unsigned long mask)
*/
static inline unsigned long load_unaligned_zeropad(const void *addr)
{
unsigned long ret, tmp;
unsigned long ret;
__uaccess_enable_tco_async();
/* Load word from unaligned pointer addr */
asm(
"1: ldr %0, %3\n"
"1: ldr %0, %2\n"
"2:\n"
" .pushsection .fixup,\"ax\"\n"
" .align 2\n"
"3: bic %1, %2, #0x7\n"
" ldr %0, [%1]\n"
" and %1, %2, #0x7\n"
" lsl %1, %1, #0x3\n"
#ifndef __AARCH64EB__
" lsr %0, %0, %1\n"
#else
" lsl %0, %0, %1\n"
#endif
" b 2b\n"
" .popsection\n"
_ASM_EXTABLE(1b, 3b)
: "=&r" (ret), "=&r" (tmp)
_ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(1b, 2b, %0, %1)
: "=&r" (ret)
: "r" (addr), "Q" (*(unsigned long *)addr));
__uaccess_disable_tco_async();
......
......@@ -75,5 +75,6 @@
#define HWCAP2_RNG (1 << 16)
#define HWCAP2_BTI (1 << 17)
#define HWCAP2_MTE (1 << 18)
#define HWCAP2_ECV (1 << 19)
#endif /* _UAPI__ASM_HWCAP_H */
......@@ -279,7 +279,7 @@ static void __init register_insn_emulation_sysctl(void)
do { \
uaccess_enable_privileged(); \
__asm__ __volatile__( \
" mov %w3, %w7\n" \
" mov %w3, %w6\n" \
"0: ldxr"B" %w2, [%4]\n" \
"1: stxr"B" %w0, %w1, [%4]\n" \
" cbz %w0, 2f\n" \
......@@ -290,16 +290,10 @@ do { \
"2:\n" \
" mov %w1, %w2\n" \
"3:\n" \
" .pushsection .fixup,\"ax\"\n" \
" .align 2\n" \
"4: mov %w0, %w6\n" \
" b 3b\n" \
" .popsection" \
_ASM_EXTABLE(0b, 4b) \
_ASM_EXTABLE(1b, 4b) \
_ASM_EXTABLE_UACCESS_ERR(0b, 3b, %w0) \
_ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w0) \
: "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2) \
: "r" ((unsigned long)addr), "i" (-EAGAIN), \
"i" (-EFAULT), \
"i" (__SWP_LL_SC_LOOPS) \
: "memory"); \
uaccess_disable_privileged(); \
......
......@@ -9,6 +9,7 @@
#include <linux/arm_sdei.h>
#include <linux/sched.h>
#include <linux/kexec.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/kvm_host.h>
......@@ -170,6 +171,16 @@ int main(void)
DEFINE(PTRAUTH_KERNEL_KEY_APIA, offsetof(struct ptrauth_keys_kernel, apia));
#endif
BLANK();
#endif
#ifdef CONFIG_KEXEC_CORE
DEFINE(KIMAGE_ARCH_DTB_MEM, offsetof(struct kimage, arch.dtb_mem));
DEFINE(KIMAGE_ARCH_EL2_VECTORS, offsetof(struct kimage, arch.el2_vectors));
DEFINE(KIMAGE_ARCH_ZERO_PAGE, offsetof(struct kimage, arch.zero_page));
DEFINE(KIMAGE_ARCH_PHYS_OFFSET, offsetof(struct kimage, arch.phys_offset));
DEFINE(KIMAGE_ARCH_TTBR1, offsetof(struct kimage, arch.ttbr1));
DEFINE(KIMAGE_HEAD, offsetof(struct kimage, head));
DEFINE(KIMAGE_START, offsetof(struct kimage, start));
BLANK();
#endif
return 0;
}
......@@ -16,8 +16,7 @@
.pushsection .idmap.text, "awx"
/*
* __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for
* cpu_soft_restart.
* cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2)
*
* @el2_switch: Flag to indicate a switch to EL2 is needed.
* @entry: Location to jump to for soft reset.
......@@ -29,7 +28,7 @@
* branch to what would be the reset vector. It must be executed with the
* flat identity mapping.
*/
SYM_CODE_START(__cpu_soft_restart)
SYM_CODE_START(cpu_soft_restart)
mov_q x12, INIT_SCTLR_EL1_MMU_OFF
pre_disable_mmu_workaround
/*
......@@ -48,6 +47,6 @@ SYM_CODE_START(__cpu_soft_restart)
mov x1, x3 // arg1
mov x2, x4 // arg2
br x8
SYM_CODE_END(__cpu_soft_restart)
SYM_CODE_END(cpu_soft_restart)
.popsection
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* CPU reset routines
*
* Copyright (C) 2015 Huawei Futurewei Technologies.
*/
#ifndef _ARM64_CPU_RESET_H
#define _ARM64_CPU_RESET_H
#include <asm/virt.h>
void __cpu_soft_restart(unsigned long el2_switch, unsigned long entry,
unsigned long arg0, unsigned long arg1, unsigned long arg2);
static inline void __noreturn __nocfi cpu_soft_restart(unsigned long entry,
unsigned long arg0,
unsigned long arg1,
unsigned long arg2)
{
typeof(__cpu_soft_restart) *restart;
unsigned long el2_switch = !is_kernel_in_hyp_mode() &&
is_hyp_mode_available();
restart = (void *)__pa_symbol(function_nocfi(__cpu_soft_restart));
cpu_install_idmap();
restart(el2_switch, entry, arg0, arg1, arg2);
unreachable();
}
#endif
......@@ -340,6 +340,42 @@ static const struct midr_range erratum_1463225[] = {
};
#endif
#ifdef CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
static const struct midr_range trbe_overwrite_fill_mode_cpus[] = {
#ifdef CONFIG_ARM64_ERRATUM_2139208
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
#endif
#ifdef CONFIG_ARM64_ERRATUM_2119858
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
#endif
{},
};
#endif /* CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE */
#ifdef CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE
static const struct midr_range tsb_flush_fail_cpus[] = {
#ifdef CONFIG_ARM64_ERRATUM_2067961
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
#endif
#ifdef CONFIG_ARM64_ERRATUM_2054223
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
#endif
{},
};
#endif /* CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE */
#ifdef CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
static struct midr_range trbe_write_out_of_range_cpus[] = {
#ifdef CONFIG_ARM64_ERRATUM_2253138
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
#endif
#ifdef CONFIG_ARM64_ERRATUM_2224489
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
#endif
{},
};
#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */
const struct arm64_cpu_capabilities arm64_errata[] = {
#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
{
......@@ -533,6 +569,34 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.capability = ARM64_WORKAROUND_NVIDIA_CARMEL_CNP,
ERRATA_MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL),
},
#endif
#ifdef CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
{
/*
* The erratum work around is handled within the TRBE
* driver and can be applied per-cpu. So, we can allow
* a late CPU to come online with this erratum.
*/
.desc = "ARM erratum 2119858 or 2139208",
.capability = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE,
.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
CAP_MIDR_RANGE_LIST(trbe_overwrite_fill_mode_cpus),
},
#endif
#ifdef CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE
{
.desc = "ARM erratum 2067961 or 2054223",
.capability = ARM64_WORKAROUND_TSB_FLUSH_FAILURE,
ERRATA_MIDR_RANGE_LIST(tsb_flush_fail_cpus),
},
#endif
#ifdef CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
{
.desc = "ARM erratum 2253138 or 2224489",
.capability = ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE,
.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus),
},
#endif
{
}
......
......@@ -279,7 +279,7 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EXS_SHIFT, 4, 0),
/*
......@@ -941,7 +941,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {
init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
sve_init_vq_map();
vec_init_vq_map(ARM64_VEC_SVE);
}
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
......@@ -1175,7 +1175,7 @@ void update_cpu_features(int cpu,
/* Probe vector lengths, unless we already gave up on SVE */
if (id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) &&
!system_capabilities_finalized())
sve_update_vq_map();
vec_update_vq_map(ARM64_VEC_SVE);
}
/*
......@@ -1930,6 +1930,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.sign = FTR_UNSIGNED,
.min_field_value = 1,
},
{
.desc = "Enhanced Counter Virtualization",
.capability = ARM64_HAS_ECV,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64MMFR0_EL1,
.field_pos = ID_AA64MMFR0_ECV_SHIFT,
.sign = FTR_UNSIGNED,
.min_field_value = 1,
},
#ifdef CONFIG_ARM64_PAN
{
.desc = "Privileged Access Never",
......@@ -2321,6 +2331,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.sign = FTR_UNSIGNED,
.cpu_enable = cpu_enable_mte,
},
{
.desc = "Asymmetric MTE Tag Check Fault",
.capability = ARM64_MTE_ASYMM,
.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64PFR1_EL1,
.field_pos = ID_AA64PFR1_MTE_SHIFT,
.min_field_value = ID_AA64PFR1_MTE_ASYMM,
.sign = FTR_UNSIGNED,
},
#endif /* CONFIG_ARM64_MTE */
{
.desc = "RCpc load-acquire (LDAPR)",
......@@ -2451,6 +2471,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
#ifdef CONFIG_ARM64_MTE
HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE),
#endif /* CONFIG_ARM64_MTE */
HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
{},
};
......@@ -2739,7 +2760,7 @@ static void verify_sve_features(void)
unsigned int safe_len = safe_zcr & ZCR_ELx_LEN_MASK;
unsigned int len = zcr & ZCR_ELx_LEN_MASK;
if (len < safe_len || sve_verify_vq_map()) {
if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SVE)) {
pr_crit("CPU%d: SVE: vector length support mismatch\n",
smp_processor_id());
cpu_die_early();
......
......@@ -94,6 +94,7 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_RNG] = "rng",
[KERNEL_HWCAP_BTI] = "bti",
[KERNEL_HWCAP_MTE] = "mte",
[KERNEL_HWCAP_ECV] = "ecv",
};
#ifdef CONFIG_COMPAT
......
......@@ -38,9 +38,10 @@ SYM_FUNC_END(fpsimd_load_state)
*
* x0 - pointer to buffer for state
* x1 - pointer to storage for FPSR
* x2 - Save FFR if non-zero
*/
SYM_FUNC_START(sve_save_state)
sve_save 0, x1, 2
sve_save 0, x1, x2, 3
ret
SYM_FUNC_END(sve_save_state)
......@@ -49,10 +50,10 @@ SYM_FUNC_END(sve_save_state)
*
* x0 - pointer to buffer for state
* x1 - pointer to storage for FPSR
* x2 - VQ-1
* x2 - Restore FFR if non-zero
*/
SYM_FUNC_START(sve_load_state)
sve_load 0, x1, x2, 3, x4
sve_load 0, x1, x2, 4
ret
SYM_FUNC_END(sve_load_state)
......@@ -66,35 +67,22 @@ SYM_FUNC_START(sve_set_vq)
ret
SYM_FUNC_END(sve_set_vq)
/*
* Load SVE state from FPSIMD state.
*
* x0 = pointer to struct fpsimd_state
* x1 = VQ - 1
*
* Each SVE vector will be loaded with the first 128-bits taken from FPSIMD
* and the rest zeroed. All the other SVE registers will be zeroed.
*/
SYM_FUNC_START(sve_load_from_fpsimd_state)
sve_load_vq x1, x2, x3
fpsimd_restore x0, 8
sve_flush_p_ffr
ret
SYM_FUNC_END(sve_load_from_fpsimd_state)
/*
* Zero all SVE registers but the first 128-bits of each vector
*
* VQ must already be configured by caller, any further updates of VQ
* will need to ensure that the register state remains valid.
*
* x0 = VQ - 1
* x0 = include FFR?
* x1 = VQ - 1
*/
SYM_FUNC_START(sve_flush_live)
cbz x0, 1f // A VQ-1 of 0 is 128 bits so no extra Z state
cbz x1, 1f // A VQ-1 of 0 is 128 bits so no extra Z state
sve_flush_z
1: sve_flush_p_ffr
ret
1: sve_flush_p
tbz x0, #0, 2f
sve_flush_ffr
2: ret
SYM_FUNC_END(sve_flush_live)
#endif /* CONFIG_ARM64_SVE */
......@@ -168,9 +168,9 @@ alternative_else_nop_endif
.macro mte_set_kernel_gcr, tmp, tmp2
#ifdef CONFIG_KASAN_HW_TAGS
alternative_if_not ARM64_MTE
alternative_cb kasan_hw_tags_enable
b 1f
alternative_else_nop_endif
alternative_cb_end
mov \tmp, KERNEL_GCR_EL1
msr_s SYS_GCR_EL1, \tmp
1:
......@@ -178,10 +178,10 @@ alternative_else_nop_endif
.endm
.macro mte_set_user_gcr, tsk, tmp, tmp2
#ifdef CONFIG_ARM64_MTE
alternative_if_not ARM64_MTE
#ifdef CONFIG_KASAN_HW_TAGS
alternative_cb kasan_hw_tags_enable
b 1f
alternative_else_nop_endif
alternative_cb_end
ldr \tmp, [\tsk, #THREAD_MTE_CTRL]
mte_set_gcr \tmp, \tmp2
......
......@@ -121,40 +121,62 @@ struct fpsimd_last_state_struct {
static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
/* Default VL for tasks that don't set it explicitly: */
static int __sve_default_vl = -1;
__ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {
#ifdef CONFIG_ARM64_SVE
[ARM64_VEC_SVE] = {
.type = ARM64_VEC_SVE,
.name = "SVE",
.min_vl = SVE_VL_MIN,
.max_vl = SVE_VL_MIN,
.max_virtualisable_vl = SVE_VL_MIN,
},
#endif
};
static int get_sve_default_vl(void)
static unsigned int vec_vl_inherit_flag(enum vec_type type)
{
return READ_ONCE(__sve_default_vl);
switch (type) {
case ARM64_VEC_SVE:
return TIF_SVE_VL_INHERIT;
default:
WARN_ON_ONCE(1);
return 0;
}
}
struct vl_config {
int __default_vl; /* Default VL for tasks */
};
static struct vl_config vl_config[ARM64_VEC_MAX];
static inline int get_default_vl(enum vec_type type)
{
return READ_ONCE(vl_config[type].__default_vl);
}
#ifdef CONFIG_ARM64_SVE
static void set_sve_default_vl(int val)
static inline int get_sve_default_vl(void)
{
WRITE_ONCE(__sve_default_vl, val);
return get_default_vl(ARM64_VEC_SVE);
}
/* Maximum supported vector length across all CPUs (initially poisoned) */
int __ro_after_init sve_max_vl = SVE_VL_MIN;
int __ro_after_init sve_max_virtualisable_vl = SVE_VL_MIN;
static inline void set_default_vl(enum vec_type type, int val)
{
WRITE_ONCE(vl_config[type].__default_vl, val);
}
/*
* Set of available vector lengths,
* where length vq encoded as bit __vq_to_bit(vq):
*/
__ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
/* Set of vector lengths present on at least one cpu: */
static __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
static inline void set_sve_default_vl(int val)
{
set_default_vl(ARM64_VEC_SVE, val);
}
static void __percpu *efi_sve_state;
#else /* ! CONFIG_ARM64_SVE */
/* Dummy declaration for code that will be optimised out: */
extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
extern __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
extern void __percpu *efi_sve_state;
#endif /* ! CONFIG_ARM64_SVE */
......@@ -228,6 +250,29 @@ static void sve_free(struct task_struct *task)
__sve_free(task);
}
unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)
{
return task->thread.vl[type];
}
void task_set_vl(struct task_struct *task, enum vec_type type,
unsigned long vl)
{
task->thread.vl[type] = vl;
}
unsigned int task_get_vl_onexec(const struct task_struct *task,
enum vec_type type)
{
return task->thread.vl_onexec[type];
}
void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
unsigned long vl)
{
task->thread.vl_onexec[type] = vl;
}
/*
* TIF_SVE controls whether a task can use SVE without trapping while
* in userspace, and also the way a task's FPSIMD/SVE state is stored
......@@ -287,12 +332,13 @@ static void task_fpsimd_load(void)
WARN_ON(!system_supports_fpsimd());
WARN_ON(!have_cpu_fpsimd_context());
if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE))
if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) {
sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);
sve_load_state(sve_pffr(&current->thread),
&current->thread.uw.fpsimd_state.fpsr,
sve_vq_from_vl(current->thread.sve_vl) - 1);
else
&current->thread.uw.fpsimd_state.fpsr, true);
} else {
fpsimd_load_state(&current->thread.uw.fpsimd_state);
}
}
/*
......@@ -308,7 +354,9 @@ static void fpsimd_save(void)
WARN_ON(!system_supports_fpsimd());
WARN_ON(!have_cpu_fpsimd_context());
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
return;
if (IS_ENABLED(CONFIG_ARM64_SVE) &&
test_thread_flag(TIF_SVE)) {
if (WARN_ON(sve_get_vl() != last->sve_vl)) {
......@@ -323,8 +371,8 @@ static void fpsimd_save(void)
sve_save_state((char *)last->sve_state +
sve_ffr_offset(last->sve_vl),
&last->st->fpsr);
} else
&last->st->fpsr, true);
} else {
fpsimd_save_state(last->st);
}
}
......@@ -335,21 +383,23 @@ static void fpsimd_save(void)
* If things go wrong there's a bug somewhere, but try to fall back to a
* safe choice.
*/
static unsigned int find_supported_vector_length(unsigned int vl)
static unsigned int find_supported_vector_length(enum vec_type type,
unsigned int vl)
{
struct vl_info *info = &vl_info[type];
int bit;
int max_vl = sve_max_vl;
int max_vl = info->max_vl;
if (WARN_ON(!sve_vl_valid(vl)))
vl = SVE_VL_MIN;
vl = info->min_vl;
if (WARN_ON(!sve_vl_valid(max_vl)))
max_vl = SVE_VL_MIN;
max_vl = info->min_vl;
if (vl > max_vl)
vl = max_vl;
bit = find_next_bit(sve_vq_map, SVE_VQ_MAX,
bit = find_next_bit(info->vq_map, SVE_VQ_MAX,
__vq_to_bit(sve_vq_from_vl(vl)));
return sve_vl_from_vq(__bit_to_vq(bit));
}
......@@ -359,6 +409,7 @@ static unsigned int find_supported_vector_length(unsigned int vl)
static int sve_proc_do_default_vl(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct vl_info *info = &vl_info[ARM64_VEC_SVE];
int ret;
int vl = get_sve_default_vl();
struct ctl_table tmp_table = {
......@@ -372,12 +423,12 @@ static int sve_proc_do_default_vl(struct ctl_table *table, int write,
/* Writing -1 has the special meaning "set to max": */
if (vl == -1)
vl = sve_max_vl;
vl = info->max_vl;
if (!sve_vl_valid(vl))
return -EINVAL;
set_sve_default_vl(find_supported_vector_length(vl));
set_sve_default_vl(find_supported_vector_length(ARM64_VEC_SVE, vl));
return 0;
}
......@@ -456,7 +507,7 @@ static void fpsimd_to_sve(struct task_struct *task)
if (!system_supports_sve())
return;
vq = sve_vq_from_vl(task->thread.sve_vl);
vq = sve_vq_from_vl(task_get_sve_vl(task));
__fpsimd_to_sve(sst, fst, vq);
}
......@@ -482,7 +533,7 @@ static void sve_to_fpsimd(struct task_struct *task)
if (!system_supports_sve())
return;
vq = sve_vq_from_vl(task->thread.sve_vl);
vq = sve_vq_from_vl(task_get_sve_vl(task));
for (i = 0; i < SVE_NUM_ZREGS; ++i) {
p = (__uint128_t const *)ZREG(sst, vq, i);
fst->vregs[i] = arm64_le128_to_cpu(*p);
......@@ -495,9 +546,9 @@ static void sve_to_fpsimd(struct task_struct *task)
* Return how many bytes of memory are required to store the full SVE
* state for task, given task's currently configured vector length.
*/
size_t sve_state_size(struct task_struct const *task)
static size_t sve_state_size(struct task_struct const *task)
{
return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task->thread.sve_vl));
return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task_get_sve_vl(task)));
}
/*
......@@ -572,7 +623,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
if (!test_tsk_thread_flag(task, TIF_SVE))
return;
vq = sve_vq_from_vl(task->thread.sve_vl);
vq = sve_vq_from_vl(task_get_sve_vl(task));
memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
__fpsimd_to_sve(sst, fst, vq);
......@@ -596,20 +647,20 @@ int sve_set_vector_length(struct task_struct *task,
if (vl > SVE_VL_ARCH_MAX)
vl = SVE_VL_ARCH_MAX;
vl = find_supported_vector_length(vl);
vl = find_supported_vector_length(ARM64_VEC_SVE, vl);
if (flags & (PR_SVE_VL_INHERIT |
PR_SVE_SET_VL_ONEXEC))
task->thread.sve_vl_onexec = vl;
task_set_sve_vl_onexec(task, vl);
else
/* Reset VL to system default on next exec: */
task->thread.sve_vl_onexec = 0;
task_set_sve_vl_onexec(task, 0);
/* Only actually set the VL if not deferred: */
if (flags & PR_SVE_SET_VL_ONEXEC)
goto out;
if (vl == task->thread.sve_vl)
if (vl == task_get_sve_vl(task))
goto out;
/*
......@@ -636,7 +687,7 @@ int sve_set_vector_length(struct task_struct *task,
*/
sve_free(task);
task->thread.sve_vl = vl;
task_set_sve_vl(task, vl);
out:
update_tsk_thread_flag(task, TIF_SVE_VL_INHERIT,
......@@ -656,9 +707,9 @@ static int sve_prctl_status(unsigned long flags)
int ret;
if (flags & PR_SVE_SET_VL_ONEXEC)
ret = current->thread.sve_vl_onexec;
ret = task_get_sve_vl_onexec(current);
else
ret = current->thread.sve_vl;
ret = task_get_sve_vl(current);
if (test_thread_flag(TIF_SVE_VL_INHERIT))
ret |= PR_SVE_VL_INHERIT;
......@@ -694,18 +745,15 @@ int sve_get_current_vl(void)
return sve_prctl_status(0);
}
static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
static void vec_probe_vqs(struct vl_info *info,
DECLARE_BITMAP(map, SVE_VQ_MAX))
{
unsigned int vq, vl;
unsigned long zcr;
bitmap_zero(map, SVE_VQ_MAX);
zcr = ZCR_ELx_LEN_MASK;
zcr = read_sysreg_s(SYS_ZCR_EL1) & ~zcr;
for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {
write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1); /* self-syncing */
write_vl(info->type, vq - 1); /* self-syncing */
vl = sve_get_vl();
vq = sve_vq_from_vl(vl); /* skip intervening lengths */
set_bit(__vq_to_bit(vq), map);
......@@ -716,10 +764,11 @@ static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
* Initialise the set of known supported VQs for the boot CPU.
* This is called during kernel boot, before secondary CPUs are brought up.
*/
void __init sve_init_vq_map(void)
void __init vec_init_vq_map(enum vec_type type)
{
sve_probe_vqs(sve_vq_map);
bitmap_copy(sve_vq_partial_map, sve_vq_map, SVE_VQ_MAX);
struct vl_info *info = &vl_info[type];
vec_probe_vqs(info, info->vq_map);
bitmap_copy(info->vq_partial_map, info->vq_map, SVE_VQ_MAX);
}
/*
......@@ -727,30 +776,33 @@ void __init sve_init_vq_map(void)
* those not supported by the current CPU.
* This function is called during the bring-up of early secondary CPUs only.
*/
void sve_update_vq_map(void)
void vec_update_vq_map(enum vec_type type)
{
struct vl_info *info = &vl_info[type];
DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
sve_probe_vqs(tmp_map);
bitmap_and(sve_vq_map, sve_vq_map, tmp_map, SVE_VQ_MAX);
bitmap_or(sve_vq_partial_map, sve_vq_partial_map, tmp_map, SVE_VQ_MAX);
vec_probe_vqs(info, tmp_map);
bitmap_and(info->vq_map, info->vq_map, tmp_map, SVE_VQ_MAX);
bitmap_or(info->vq_partial_map, info->vq_partial_map, tmp_map,
SVE_VQ_MAX);
}
/*
* Check whether the current CPU supports all VQs in the committed set.
* This function is called during the bring-up of late secondary CPUs only.
*/
int sve_verify_vq_map(void)
int vec_verify_vq_map(enum vec_type type)
{
struct vl_info *info = &vl_info[type];
DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
unsigned long b;
sve_probe_vqs(tmp_map);
vec_probe_vqs(info, tmp_map);
bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
if (bitmap_intersects(tmp_map, sve_vq_map, SVE_VQ_MAX)) {
pr_warn("SVE: cpu%d: Required vector length(s) missing\n",
smp_processor_id());
if (bitmap_intersects(tmp_map, info->vq_map, SVE_VQ_MAX)) {
pr_warn("%s: cpu%d: Required vector length(s) missing\n",
info->name, smp_processor_id());
return -EINVAL;
}
......@@ -766,7 +818,7 @@ int sve_verify_vq_map(void)
/* Recover the set of supported VQs: */
bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
/* Find VQs supported that are not globally supported: */
bitmap_andnot(tmp_map, tmp_map, sve_vq_map, SVE_VQ_MAX);
bitmap_andnot(tmp_map, tmp_map, info->vq_map, SVE_VQ_MAX);
/* Find the lowest such VQ, if any: */
b = find_last_bit(tmp_map, SVE_VQ_MAX);
......@@ -777,9 +829,9 @@ int sve_verify_vq_map(void)
* Mismatches above sve_max_virtualisable_vl are fine, since
* no guest is allowed to configure ZCR_EL2.LEN to exceed this:
*/
if (sve_vl_from_vq(__bit_to_vq(b)) <= sve_max_virtualisable_vl) {
pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n",
smp_processor_id());
if (sve_vl_from_vq(__bit_to_vq(b)) <= info->max_virtualisable_vl) {
pr_warn("%s: cpu%d: Unsupported vector length(s) present\n",
info->name, smp_processor_id());
return -EINVAL;
}
......@@ -788,6 +840,8 @@ int sve_verify_vq_map(void)
static void __init sve_efi_setup(void)
{
struct vl_info *info = &vl_info[ARM64_VEC_SVE];
if (!IS_ENABLED(CONFIG_EFI))
return;
......@@ -796,11 +850,11 @@ static void __init sve_efi_setup(void)
* This is evidence of a crippled system and we are returning void,
* so no attempt is made to handle this situation here.
*/
if (!sve_vl_valid(sve_max_vl))
if (!sve_vl_valid(info->max_vl))
goto fail;
efi_sve_state = __alloc_percpu(
SVE_SIG_REGS_SIZE(sve_vq_from_vl(sve_max_vl)), SVE_VQ_BYTES);
SVE_SIG_REGS_SIZE(sve_vq_from_vl(info->max_vl)), SVE_VQ_BYTES);
if (!efi_sve_state)
goto fail;
......@@ -849,6 +903,7 @@ u64 read_zcr_features(void)
void __init sve_setup(void)
{
struct vl_info *info = &vl_info[ARM64_VEC_SVE];
u64 zcr;
DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
unsigned long b;
......@@ -861,49 +916,52 @@ void __init sve_setup(void)
* so sve_vq_map must have at least SVE_VQ_MIN set.
* If something went wrong, at least try to patch it up:
*/
if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map)))
set_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map);
if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map)))
set_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map);
zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
sve_max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1);
info->max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1);
/*
* Sanity-check that the max VL we determined through CPU features
* corresponds properly to sve_vq_map. If not, do our best:
*/
if (WARN_ON(sve_max_vl != find_supported_vector_length(sve_max_vl)))
sve_max_vl = find_supported_vector_length(sve_max_vl);
if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SVE,
info->max_vl)))
info->max_vl = find_supported_vector_length(ARM64_VEC_SVE,
info->max_vl);
/*
* For the default VL, pick the maximum supported value <= 64.
* VL == 64 is guaranteed not to grow the signal frame.
*/
set_sve_default_vl(find_supported_vector_length(64));
set_sve_default_vl(find_supported_vector_length(ARM64_VEC_SVE, 64));
bitmap_andnot(tmp_map, sve_vq_partial_map, sve_vq_map,
bitmap_andnot(tmp_map, info->vq_partial_map, info->vq_map,
SVE_VQ_MAX);
b = find_last_bit(tmp_map, SVE_VQ_MAX);
if (b >= SVE_VQ_MAX)
/* No non-virtualisable VLs found */
sve_max_virtualisable_vl = SVE_VQ_MAX;
info->max_virtualisable_vl = SVE_VQ_MAX;
else if (WARN_ON(b == SVE_VQ_MAX - 1))
/* No virtualisable VLs? This is architecturally forbidden. */
sve_max_virtualisable_vl = SVE_VQ_MIN;
info->max_virtualisable_vl = SVE_VQ_MIN;
else /* b + 1 < SVE_VQ_MAX */
sve_max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1));
info->max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1));
if (sve_max_virtualisable_vl > sve_max_vl)
sve_max_virtualisable_vl = sve_max_vl;
if (info->max_virtualisable_vl > info->max_vl)
info->max_virtualisable_vl = info->max_vl;
pr_info("SVE: maximum available vector length %u bytes per vector\n",
sve_max_vl);
pr_info("SVE: default vector length %u bytes per vector\n",
get_sve_default_vl());
pr_info("%s: maximum available vector length %u bytes per vector\n",
info->name, info->max_vl);
pr_info("%s: default vector length %u bytes per vector\n",
info->name, get_sve_default_vl());
/* KVM decides whether to support mismatched systems. Just warn here: */
if (sve_max_virtualisable_vl < sve_max_vl)
pr_warn("SVE: unvirtualisable vector lengths present\n");
if (sve_max_virtualisable_vl() < sve_max_vl())
pr_warn("%s: unvirtualisable vector lengths present\n",
info->name);
sve_efi_setup();
}
......@@ -958,9 +1016,9 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
*/
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
unsigned long vq_minus_one =
sve_vq_from_vl(current->thread.sve_vl) - 1;
sve_vq_from_vl(task_get_sve_vl(current)) - 1;
sve_set_vq(vq_minus_one);
sve_flush_live(vq_minus_one);
sve_flush_live(true, vq_minus_one);
fpsimd_bind_task_to_cpu();
} else {
fpsimd_to_sve(current);
......@@ -1030,52 +1088,56 @@ void fpsimd_thread_switch(struct task_struct *next)
__put_cpu_fpsimd_context();
}
void fpsimd_flush_thread(void)
static void fpsimd_flush_thread_vl(enum vec_type type)
{
int vl, supported_vl;
if (!system_supports_fpsimd())
return;
get_cpu_fpsimd_context();
fpsimd_flush_task_state(current);
memset(&current->thread.uw.fpsimd_state, 0,
sizeof(current->thread.uw.fpsimd_state));
if (system_supports_sve()) {
clear_thread_flag(TIF_SVE);
sve_free(current);
/*
* Reset the task vector length as required.
* This is where we ensure that all user tasks have a valid
* vector length configured: no kernel task can become a user
* task without an exec and hence a call to this function.
* By the time the first call to this function is made, all
* early hardware probing is complete, so __sve_default_vl
* should be valid.
* Reset the task vector length as required. This is where we
* ensure that all user tasks have a valid vector length
* configured: no kernel task can become a user task without
* an exec and hence a call to this function. By the time the
* first call to this function is made, all early hardware
* probing is complete, so __sve_default_vl should be valid.
* If a bug causes this to go wrong, we make some noise and
* try to fudge thread.sve_vl to a safe value here.
*/
vl = current->thread.sve_vl_onexec ?
current->thread.sve_vl_onexec : get_sve_default_vl();
vl = task_get_vl_onexec(current, type);
if (!vl)
vl = get_default_vl(type);
if (WARN_ON(!sve_vl_valid(vl)))
vl = SVE_VL_MIN;
supported_vl = find_supported_vector_length(vl);
supported_vl = find_supported_vector_length(type, vl);
if (WARN_ON(supported_vl != vl))
vl = supported_vl;
current->thread.sve_vl = vl;
task_set_vl(current, type, vl);
/*
* If the task is not set to inherit, ensure that the vector
* length will be reset by a subsequent exec:
*/
if (!test_thread_flag(TIF_SVE_VL_INHERIT))
current->thread.sve_vl_onexec = 0;
if (!test_thread_flag(vec_vl_inherit_flag(type)))
task_set_vl_onexec(current, type, 0);
}
void fpsimd_flush_thread(void)
{
if (!system_supports_fpsimd())
return;
get_cpu_fpsimd_context();
fpsimd_flush_task_state(current);
memset(&current->thread.uw.fpsimd_state, 0,
sizeof(current->thread.uw.fpsimd_state));
if (system_supports_sve()) {
clear_thread_flag(TIF_SVE);
sve_free(current);
fpsimd_flush_thread_vl(ARM64_VEC_SVE);
}
put_cpu_fpsimd_context();
......@@ -1120,7 +1182,7 @@ static void fpsimd_bind_task_to_cpu(void)
WARN_ON(!system_supports_fpsimd());
last->st = &current->thread.uw.fpsimd_state;
last->sve_state = current->thread.sve_state;
last->sve_vl = current->thread.sve_vl;
last->sve_vl = task_get_sve_vl(current);
current->thread.fpsimd_cpu = smp_processor_id();
if (system_supports_sve()) {
......@@ -1353,8 +1415,9 @@ void __efi_fpsimd_begin(void)
__this_cpu_write(efi_sve_state_used, true);
sve_save_state(sve_state + sve_ffr_offset(sve_max_vl),
&this_cpu_ptr(&efi_fpsimd_state)->fpsr);
sve_save_state(sve_state + sve_ffr_offset(sve_max_vl()),
&this_cpu_ptr(&efi_fpsimd_state)->fpsr,
true);
} else {
fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
}
......@@ -1378,9 +1441,10 @@ void __efi_fpsimd_end(void)
likely(__this_cpu_read(efi_sve_state_used))) {
char const *sve_state = this_cpu_ptr(efi_sve_state);
sve_load_state(sve_state + sve_ffr_offset(sve_max_vl),
sve_set_vq(sve_vq_from_vl(sve_get_vl()) - 1);
sve_load_state(sve_state + sve_ffr_offset(sve_max_vl()),
&this_cpu_ptr(&efi_fpsimd_state)->fpsr,
sve_vq_from_vl(sve_get_vl()) - 1);
true);
__this_cpu_write(efi_sve_state_used, false);
} else {
......
......@@ -15,26 +15,6 @@
#include <asm/page.h>
#include <asm/virt.h>
/*
* To prevent the possibility of old and new partial table walks being visible
* in the tlb, switch the ttbr to a zero page when we invalidate the old
* records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i
* Even switching to our copied tables will cause a changed output address at
* each stage of the walk.
*/
.macro break_before_make_ttbr_switch zero_page, page_table, tmp, tmp2
phys_to_ttbr \tmp, \zero_page
msr ttbr1_el1, \tmp
isb
tlbi vmalle1
dsb nsh
phys_to_ttbr \tmp, \page_table
offset_ttbr1 \tmp, \tmp2
msr ttbr1_el1, \tmp
isb
.endm
/*
* Resume from hibernate
*
......@@ -112,56 +92,4 @@ alternative_insn "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE
hvc #0
3: ret
SYM_CODE_END(swsusp_arch_suspend_exit)
/*
* Restore the hyp stub.
* This must be done before the hibernate page is unmapped by _cpu_resume(),
* but happens before any of the hyp-stub's code is cleaned to PoC.
*
* x24: The physical address of __hyp_stub_vectors
*/
SYM_CODE_START_LOCAL(el1_sync)
msr vbar_el2, x24
eret
SYM_CODE_END(el1_sync)
.macro invalid_vector label
SYM_CODE_START_LOCAL(\label)
b \label
SYM_CODE_END(\label)
.endm
invalid_vector el2_sync_invalid
invalid_vector el2_irq_invalid
invalid_vector el2_fiq_invalid
invalid_vector el2_error_invalid
invalid_vector el1_sync_invalid
invalid_vector el1_irq_invalid
invalid_vector el1_fiq_invalid
invalid_vector el1_error_invalid
/* el2 vectors - switch el2 here while we restore the memory image. */
.align 11
SYM_CODE_START(hibernate_el2_vectors)
ventry el2_sync_invalid // Synchronous EL2t
ventry el2_irq_invalid // IRQ EL2t
ventry el2_fiq_invalid // FIQ EL2t
ventry el2_error_invalid // Error EL2t
ventry el2_sync_invalid // Synchronous EL2h
ventry el2_irq_invalid // IRQ EL2h
ventry el2_fiq_invalid // FIQ EL2h
ventry el2_error_invalid // Error EL2h
ventry el1_sync // Synchronous 64-bit EL1
ventry el1_irq_invalid // IRQ 64-bit EL1
ventry el1_fiq_invalid // FIQ 64-bit EL1
ventry el1_error_invalid // Error 64-bit EL1
ventry el1_sync_invalid // Synchronous 32-bit EL1
ventry el1_irq_invalid // IRQ 32-bit EL1
ventry el1_fiq_invalid // FIQ 32-bit EL1
ventry el1_error_invalid // Error 32-bit EL1
SYM_CODE_END(hibernate_el2_vectors)
.popsection
......@@ -49,10 +49,7 @@
extern int in_suspend;
/* Do we need to reset el2? */
#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode())
/* temporary el2 vectors in the __hibernate_exit_text section. */
extern char hibernate_el2_vectors[];
#define el2_reset_needed() (is_hyp_nvhe())
/* hyp-stub vectors, used to restore el2 during resume from hibernate. */
extern char __hyp_stub_vectors[];
......@@ -215,26 +212,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
if (rc)
return rc;
/*
* Load our new page tables. A strict BBM approach requires that we
* ensure that TLBs are free of any entries that may overlap with the
* global mappings we are about to install.
*
* For a real hibernate/resume cycle TTBR0 currently points to a zero
* page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI
* runtime services), while for a userspace-driven test_resume cycle it
* points to userspace page tables (and we must point it at a zero page
* ourselves).
*
* We change T0SZ as part of installing the idmap. This is undone by
* cpu_uninstall_idmap() in __cpu_suspend_exit().
*/
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
__cpu_set_tcr_t0sz(t0sz);
write_sysreg(trans_ttbr0, ttbr0_el1);
isb();
cpu_install_ttbr0(trans_ttbr0, t0sz);
*phys_dst_addr = virt_to_phys(page);
return 0;
......@@ -434,6 +412,7 @@ int swsusp_arch_resume(void)
void *zero_page;
size_t exit_size;
pgd_t *tmp_pg_dir;
phys_addr_t el2_vectors;
void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *,
void *, phys_addr_t, phys_addr_t);
struct trans_pgd_info trans_info = {
......@@ -461,6 +440,14 @@ int swsusp_arch_resume(void)
return -ENOMEM;
}
if (el2_reset_needed()) {
rc = trans_pgd_copy_el2_vectors(&trans_info, &el2_vectors);
if (rc) {
pr_err("Failed to setup el2 vectors\n");
return rc;
}
}
exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start;
/*
* Copy swsusp_arch_suspend_exit() to a safe page. This will generate
......@@ -473,26 +460,14 @@ int swsusp_arch_resume(void)
return rc;
}
/*
* The hibernate exit text contains a set of el2 vectors, that will
* be executed at el2 with the mmu off in order to reload hyp-stub.
*/
dcache_clean_inval_poc((unsigned long)hibernate_exit,
(unsigned long)hibernate_exit + exit_size);
/*
* KASLR will cause the el2 vectors to be in a different location in
* the resumed kernel. Load hibernate's temporary copy into el2.
*
* We can skip this step if we booted at EL1, or are running with VHE.
*/
if (el2_reset_needed()) {
phys_addr_t el2_vectors = (phys_addr_t)hibernate_exit;
el2_vectors += hibernate_el2_vectors -
__hibernate_exit_text_start; /* offset */
if (el2_reset_needed())
__hyp_set_vectors(el2_vectors);
}
hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1,
resume_hdr.reenter_kernel, restore_pblist,
......
......@@ -21,12 +21,8 @@
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/page.h>
#include "cpu-reset.h"
/* Global variables for the arm64_relocate_new_kernel routine. */
extern const unsigned char arm64_relocate_new_kernel[];
extern const unsigned long arm64_relocate_new_kernel_size;
#include <asm/sections.h>
#include <asm/trans_pgd.h>
/**
* kexec_image_info - For debugging output.
......@@ -43,7 +39,9 @@ static void _kexec_image_info(const char *func, int line,
pr_debug(" start: %lx\n", kimage->start);
pr_debug(" head: %lx\n", kimage->head);
pr_debug(" nr_segments: %lu\n", kimage->nr_segments);
pr_debug(" dtb_mem: %pa\n", &kimage->arch.dtb_mem);
pr_debug(" kern_reloc: %pa\n", &kimage->arch.kern_reloc);
pr_debug(" el2_vectors: %pa\n", &kimage->arch.el2_vectors);
for (i = 0; i < kimage->nr_segments; i++) {
pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
......@@ -60,29 +58,6 @@ void machine_kexec_cleanup(struct kimage *kimage)
/* Empty routine needed to avoid build errors. */
}
int machine_kexec_post_load(struct kimage *kimage)
{
void *reloc_code = page_to_virt(kimage->control_code_page);
memcpy(reloc_code, arm64_relocate_new_kernel,
arm64_relocate_new_kernel_size);
kimage->arch.kern_reloc = __pa(reloc_code);
kexec_image_info(kimage);
/*
* For execution with the MMU off, reloc_code needs to be cleaned to the
* PoC and invalidated from the I-cache.
*/
dcache_clean_inval_poc((unsigned long)reloc_code,
(unsigned long)reloc_code +
arm64_relocate_new_kernel_size);
icache_inval_pou((uintptr_t)reloc_code,
(uintptr_t)reloc_code +
arm64_relocate_new_kernel_size);
return 0;
}
/**
* machine_kexec_prepare - Prepare for a kexec reboot.
*
......@@ -100,45 +75,6 @@ int machine_kexec_prepare(struct kimage *kimage)
return 0;
}
/**
* kexec_list_flush - Helper to flush the kimage list and source pages to PoC.
*/
static void kexec_list_flush(struct kimage *kimage)
{
kimage_entry_t *entry;
for (entry = &kimage->head; ; entry++) {
unsigned int flag;
unsigned long addr;
/* flush the list entries. */
dcache_clean_inval_poc((unsigned long)entry,
(unsigned long)entry +
sizeof(kimage_entry_t));
flag = *entry & IND_FLAGS;
if (flag == IND_DONE)
break;
addr = (unsigned long)phys_to_virt(*entry & PAGE_MASK);
switch (flag) {
case IND_INDIRECTION:
/* Set entry point just before the new list page. */
entry = (kimage_entry_t *)addr - 1;
break;
case IND_SOURCE:
/* flush the source pages. */
dcache_clean_inval_poc(addr, addr + PAGE_SIZE);
break;
case IND_DESTINATION:
break;
default:
BUG();
}
}
}
/**
* kexec_segment_flush - Helper to flush the kimage segments to PoC.
*/
......@@ -163,6 +99,75 @@ static void kexec_segment_flush(const struct kimage *kimage)
}
}
/* Allocates pages for kexec page table */
static void *kexec_page_alloc(void *arg)
{
struct kimage *kimage = (struct kimage *)arg;
struct page *page = kimage_alloc_control_pages(kimage, 0);
if (!page)
return NULL;
memset(page_address(page), 0, PAGE_SIZE);
return page_address(page);
}
int machine_kexec_post_load(struct kimage *kimage)
{
int rc;
pgd_t *trans_pgd;
void *reloc_code = page_to_virt(kimage->control_code_page);
long reloc_size;
struct trans_pgd_info info = {
.trans_alloc_page = kexec_page_alloc,
.trans_alloc_arg = kimage,
};
/* If in place, relocation is not used, only flush next kernel */
if (kimage->head & IND_DONE) {
kexec_segment_flush(kimage);
kexec_image_info(kimage);
return 0;
}
kimage->arch.el2_vectors = 0;
if (is_hyp_nvhe()) {
rc = trans_pgd_copy_el2_vectors(&info,
&kimage->arch.el2_vectors);
if (rc)
return rc;
}
/* Create a copy of the linear map */
trans_pgd = kexec_page_alloc(kimage);
if (!trans_pgd)
return -ENOMEM;
rc = trans_pgd_create_copy(&info, &trans_pgd, PAGE_OFFSET, PAGE_END);
if (rc)
return rc;
kimage->arch.ttbr1 = __pa(trans_pgd);
kimage->arch.zero_page = __pa(empty_zero_page);
reloc_size = __relocate_new_kernel_end - __relocate_new_kernel_start;
memcpy(reloc_code, __relocate_new_kernel_start, reloc_size);
kimage->arch.kern_reloc = __pa(reloc_code);
rc = trans_pgd_idmap_page(&info, &kimage->arch.ttbr0,
&kimage->arch.t0sz, reloc_code);
if (rc)
return rc;
kimage->arch.phys_offset = virt_to_phys(kimage) - (long)kimage;
/* Flush the reloc_code in preparation for its execution. */
dcache_clean_inval_poc((unsigned long)reloc_code,
(unsigned long)reloc_code + reloc_size);
icache_inval_pou((uintptr_t)reloc_code,
(uintptr_t)reloc_code + reloc_size);
kexec_image_info(kimage);
return 0;
}
/**
* machine_kexec - Do the kexec reboot.
*
......@@ -180,31 +185,35 @@ void machine_kexec(struct kimage *kimage)
WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()),
"Some CPUs may be stale, kdump will be unreliable.\n");
/* Flush the kimage list and its buffers. */
kexec_list_flush(kimage);
/* Flush the new image if already in place. */
if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE))
kexec_segment_flush(kimage);
pr_info("Bye!\n");
local_daif_mask();
/*
* cpu_soft_restart will shutdown the MMU, disable data caches, then
* transfer control to the kern_reloc which contains a copy of
* the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel
* uses physical addressing to relocate the new image to its final
* position and transfers control to the image entry point when the
* relocation is complete.
* Both restart and kernel_reloc will shutdown the MMU, disable data
* caches. However, restart will start new kernel or purgatory directly,
* kernel_reloc contains the body of arm64_relocate_new_kernel
* In kexec case, kimage->start points to purgatory assuming that
* kernel entry and dtb address are embedded in purgatory by
* userspace (kexec-tools).
* In kexec_file case, the kernel starts directly without purgatory.
*/
cpu_soft_restart(kimage->arch.kern_reloc, kimage->head, kimage->start,
kimage->arch.dtb_mem);
if (kimage->head & IND_DONE) {
typeof(cpu_soft_restart) *restart;
cpu_install_idmap();
restart = (void *)__pa_symbol(function_nocfi(cpu_soft_restart));
restart(is_hyp_nvhe(), kimage->start, kimage->arch.dtb_mem,
0, 0);
} else {
void (*kernel_reloc)(struct kimage *kimage);
if (is_hyp_nvhe())
__hyp_set_vectors(kimage->arch.el2_vectors);
cpu_install_ttbr0(kimage->arch.ttbr0, kimage->arch.t0sz);
kernel_reloc = (void *)kimage->arch.kern_reloc;
kernel_reloc(kimage);
}
BUG(); /* Should never get here. */
}
......@@ -261,8 +270,6 @@ void arch_kexec_protect_crashkres(void)
{
int i;
kexec_segment_flush(kexec_crash_image);
for (i = 0; i < kexec_crash_image->nr_segments; i++)
set_memory_valid(
__phys_to_virt(kexec_crash_image->segment[i].mem),
......
......@@ -26,9 +26,12 @@
static DEFINE_PER_CPU_READ_MOSTLY(u64, mte_tcf_preferred);
#ifdef CONFIG_KASAN_HW_TAGS
/* Whether the MTE asynchronous mode is enabled. */
DEFINE_STATIC_KEY_FALSE(mte_async_mode);
EXPORT_SYMBOL_GPL(mte_async_mode);
/*
* The asynchronous and asymmetric MTE modes have the same behavior for
* store operations. This flag is set when either of these modes is enabled.
*/
DEFINE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode);
#endif
static void mte_sync_page_tags(struct page *page, pte_t old_pte,
......@@ -116,7 +119,7 @@ void mte_enable_kernel_sync(void)
* Make sure we enter this function when no PE has set
* async mode previously.
*/
WARN_ONCE(system_uses_mte_async_mode(),
WARN_ONCE(system_uses_mte_async_or_asymm_mode(),
"MTE async mode enabled system wide!");
__mte_enable_kernel("synchronous", SCTLR_ELx_TCF_SYNC);
......@@ -134,8 +137,34 @@ void mte_enable_kernel_async(void)
* mode in between sync and async, this strategy needs
* to be reviewed.
*/
if (!system_uses_mte_async_mode())
static_branch_enable(&mte_async_mode);
if (!system_uses_mte_async_or_asymm_mode())
static_branch_enable(&mte_async_or_asymm_mode);
}
void mte_enable_kernel_asymm(void)
{
if (cpus_have_cap(ARM64_MTE_ASYMM)) {
__mte_enable_kernel("asymmetric", SCTLR_ELx_TCF_ASYMM);
/*
* MTE asymm mode behaves as async mode for store
* operations. The mode is set system wide by the
* first PE that executes this function.
*
* Note: If in future KASAN acquires a runtime switching
* mode in between sync and async, this strategy needs
* to be reviewed.
*/
if (!system_uses_mte_async_or_asymm_mode())
static_branch_enable(&mte_async_or_asymm_mode);
} else {
/*
* If the CPU does not support MTE asymmetric mode the
* kernel falls back on synchronous mode which is the
* default for kasan=on.
*/
mte_enable_kernel_sync();
}
}
#endif
......@@ -179,6 +208,30 @@ static void mte_update_sctlr_user(struct task_struct *task)
task->thread.sctlr_user = sctlr;
}
static void mte_update_gcr_excl(struct task_struct *task)
{
/*
* SYS_GCR_EL1 will be set to current->thread.mte_ctrl value by
* mte_set_user_gcr() in kernel_exit, but only if KASAN is enabled.
*/
if (kasan_hw_tags_enabled())
return;
write_sysreg_s(
((task->thread.mte_ctrl >> MTE_CTRL_GCR_USER_EXCL_SHIFT) &
SYS_GCR_EL1_EXCL_MASK) | SYS_GCR_EL1_RRND,
SYS_GCR_EL1);
}
void __init kasan_hw_tags_enable(struct alt_instr *alt, __le32 *origptr,
__le32 *updptr, int nr_inst)
{
BUG_ON(nr_inst != 1); /* Branch -> NOP */
if (kasan_hw_tags_enabled())
*updptr = cpu_to_le32(aarch64_insn_gen_nop());
}
void mte_thread_init_user(void)
{
if (!system_supports_mte())
......@@ -198,6 +251,7 @@ void mte_thread_switch(struct task_struct *next)
return;
mte_update_sctlr_user(next);
mte_update_gcr_excl(next);
/*
* Check if an async tag exception occurred at EL1.
......@@ -243,6 +297,7 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
if (task == current) {
preempt_disable();
mte_update_sctlr_user(task);
mte_update_gcr_excl(task);
update_sctlr_el1(task->thread.sctlr_user);
preempt_enable();
}
......
......@@ -725,10 +725,10 @@ static void sve_init_header_from_task(struct user_sve_header *header,
if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
header->flags |= SVE_PT_VL_INHERIT;
header->vl = target->thread.sve_vl;
header->vl = task_get_sve_vl(target);
vq = sve_vq_from_vl(header->vl);
header->max_vl = sve_max_vl;
header->max_vl = sve_max_vl();
header->size = SVE_PT_SIZE(vq, header->flags);
header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),
SVE_PT_REGS_SVE);
......@@ -820,7 +820,7 @@ static int sve_set(struct task_struct *target,
goto out;
/* Actual VL set may be less than the user asked for: */
vq = sve_vq_from_vl(target->thread.sve_vl);
vq = sve_vq_from_vl(task_get_sve_vl(target));
/* Registers: FPSIMD-only case */
......
......@@ -4,6 +4,8 @@
*
* Copyright (C) Linaro.
* Copyright (C) Huawei Futurewei Technologies.
* Copyright (C) 2021, Microsoft Corporation.
* Pasha Tatashin <pasha.tatashin@soleen.com>
*/
#include <linux/kexec.h>
......@@ -13,7 +15,16 @@
#include <asm/kexec.h>
#include <asm/page.h>
#include <asm/sysreg.h>
#include <asm/virt.h>
.macro turn_off_mmu tmp1, tmp2
mov_q \tmp1, INIT_SCTLR_EL1_MMU_OFF
pre_disable_mmu_workaround
msr sctlr_el1, \tmp1
isb
.endm
.section ".kexec_relocate.text", "ax"
/*
* arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
*
......@@ -27,33 +38,24 @@
*/
SYM_CODE_START(arm64_relocate_new_kernel)
/* Setup the list loop variables. */
mov x18, x2 /* x18 = dtb address */
mov x17, x1 /* x17 = kimage_start */
mov x16, x0 /* x16 = kimage_head */
mov x14, xzr /* x14 = entry ptr */
mov x13, xzr /* x13 = copy dest */
/* Check if the new image needs relocation. */
tbnz x16, IND_DONE_BIT, .Ldone
ldr x18, [x0, #KIMAGE_ARCH_ZERO_PAGE] /* x18 = zero page for BBM */
ldr x17, [x0, #KIMAGE_ARCH_TTBR1] /* x17 = linear map copy */
ldr x16, [x0, #KIMAGE_HEAD] /* x16 = kimage_head */
ldr x22, [x0, #KIMAGE_ARCH_PHYS_OFFSET] /* x22 phys_offset */
raw_dcache_line_size x15, x1 /* x15 = dcache line size */
break_before_make_ttbr_switch x18, x17, x1, x2 /* set linear map */
.Lloop:
and x12, x16, PAGE_MASK /* x12 = addr */
sub x12, x12, x22 /* Convert x12 to virt */
/* Test the entry flags. */
.Ltest_source:
tbz x16, IND_SOURCE_BIT, .Ltest_indirection
/* Invalidate dest page to PoC. */
mov x2, x13
add x20, x2, #PAGE_SIZE
sub x1, x15, #1
bic x2, x2, x1
2: dc ivac, x2
add x2, x2, x15
cmp x2, x20
b.lo 2b
dsb sy
mov x19, x13
copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8
add x1, x19, #PAGE_SIZE
dcache_by_myline_op civac, sy, x19, x1, x15, x20
b .Lnext
.Ltest_indirection:
tbz x16, IND_INDIRECTION_BIT, .Ltest_destination
......@@ -65,31 +67,26 @@ SYM_CODE_START(arm64_relocate_new_kernel)
.Lnext:
ldr x16, [x14], #8 /* entry = *ptr++ */
tbz x16, IND_DONE_BIT, .Lloop /* while (!(entry & DONE)) */
.Ldone:
/* wait for writes from copy_page to finish */
dsb nsh
ic iallu
dsb nsh
isb
ldr x4, [x0, #KIMAGE_START] /* relocation start */
ldr x1, [x0, #KIMAGE_ARCH_EL2_VECTORS] /* relocation start */
ldr x0, [x0, #KIMAGE_ARCH_DTB_MEM] /* dtb address */
turn_off_mmu x12, x13
/* Start new image. */
mov x0, x18
mov x1, xzr
cbz x1, .Lel1
mov x1, x4 /* relocation start */
mov x2, x0 /* dtb address */
mov x3, xzr
mov x4, xzr
mov x0, #HVC_SOFT_RESTART
hvc #0 /* Jumps from el2 */
.Lel1:
mov x2, xzr
mov x3, xzr
br x17
br x4 /* Jumps from el1 */
SYM_CODE_END(arm64_relocate_new_kernel)
.align 3 /* To keep the 64-bit values below naturally aligned. */
.Lcopy_end:
.org KEXEC_CONTROL_PAGE_SIZE
/*
* arm64_relocate_new_kernel_size - Number of bytes to copy to the
* control_code_page.
*/
.globl arm64_relocate_new_kernel_size
arm64_relocate_new_kernel_size:
.quad .Lcopy_end - arm64_relocate_new_kernel
......@@ -202,7 +202,7 @@ unsigned long sdei_arch_get_entry_point(int conduit)
* dropped to EL1 because we don't support VHE, then we can't support
* SDEI.
*/
if (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) {
if (is_hyp_nvhe()) {
pr_err("Not supported on this hardware/boot configuration\n");
goto out_err;
}
......
......@@ -227,7 +227,7 @@ static int preserve_sve_context(struct sve_context __user *ctx)
{
int err = 0;
u16 reserved[ARRAY_SIZE(ctx->__reserved)];
unsigned int vl = current->thread.sve_vl;
unsigned int vl = task_get_sve_vl(current);
unsigned int vq = 0;
if (test_thread_flag(TIF_SVE))
......@@ -266,7 +266,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
if (__copy_from_user(&sve, user->sve, sizeof(sve)))
return -EFAULT;
if (sve.vl != current->thread.sve_vl)
if (sve.vl != task_get_sve_vl(current))
return -EINVAL;
if (sve.head.size <= sizeof(*user->sve)) {
......@@ -594,10 +594,10 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
unsigned int vq = 0;
if (add_all || test_thread_flag(TIF_SVE)) {
int vl = sve_max_vl;
int vl = sve_max_vl();
if (!add_all)
vl = current->thread.sve_vl;
vl = task_get_sve_vl(current);
vq = sve_vq_from_vl(vl);
}
......
......@@ -400,11 +400,11 @@ static int call_undef_hook(struct pt_regs *regs)
unsigned long flags;
u32 instr;
int (*fn)(struct pt_regs *regs, u32 instr) = NULL;
void __user *pc = (void __user *)instruction_pointer(regs);
unsigned long pc = instruction_pointer(regs);
if (!user_mode(regs)) {
__le32 instr_le;
if (get_kernel_nofault(instr_le, (__force __le32 *)pc))
if (get_kernel_nofault(instr_le, (__le32 *)pc))
goto exit;
instr = le32_to_cpu(instr_le);
} else if (compat_thumb_mode(regs)) {
......@@ -527,14 +527,9 @@ NOKPROBE_SYMBOL(do_ptrauth_fault);
"1: " insn ", %1\n" \
" mov %w0, #0\n" \
"2:\n" \
" .pushsection .fixup,\"ax\"\n" \
" .align 2\n" \
"3: mov %w0, %w2\n" \
" b 2b\n" \
" .popsection\n" \
_ASM_EXTABLE(1b, 3b) \
_ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) \
: "=r" (res) \
: "r" (address), "i" (-EFAULT)); \
: "r" (address)); \
uaccess_ttbr0_disable(); \
}
......@@ -653,6 +648,12 @@ static const struct sys64_hook sys64_hooks[] = {
.esr_val = ESR_ELx_SYS64_ISS_SYS_CNTVCT,
.handler = cntvct_read_handler,
},
{
/* Trap read access to CNTVCTSS_EL0 */
.esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK,
.esr_val = ESR_ELx_SYS64_ISS_SYS_CNTVCTSS,
.handler = cntvct_read_handler,
},
{
/* Trap read access to CNTFRQ_EL0 */
.esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK,
......@@ -729,6 +730,11 @@ static const struct sys64_hook cp15_64_hooks[] = {
.esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCT,
.handler = compat_cntvct_read_handler,
},
{
.esr_mask = ESR_ELx_CP15_64_ISS_SYS_MASK,
.esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCTSS,
.handler = compat_cntvct_read_handler,
},
{},
};
......
......@@ -10,18 +10,15 @@ include $(srctree)/lib/vdso/Makefile
# Same as cc-*option, but using CC_COMPAT instead of CC
ifeq ($(CONFIG_CC_IS_CLANG), y)
CC_COMPAT_CLANG_FLAGS := --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%))
CC_COMPAT ?= $(CC)
CC_COMPAT += $(CC_COMPAT_CLANG_FLAGS)
ifneq ($(LLVM),)
LD_COMPAT ?= $(LD)
CC_COMPAT += --target=arm-linux-gnueabi
else
LD_COMPAT ?= $(CROSS_COMPILE_COMPAT)ld
CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc
endif
ifeq ($(CONFIG_LD_IS_LLD), y)
LD_COMPAT ?= $(LD)
else
CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc
LD_COMPAT ?= $(CROSS_COMPILE_COMPAT)ld
endif
......@@ -29,8 +26,6 @@ cc32-option = $(call try-run,\
$(CC_COMPAT) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
cc32-disable-warning = $(call try-run,\
$(CC_COMPAT) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
cc32-as-instr = $(call try-run,\
printf "%b\n" "$(1)" | $(CC_COMPAT) $(VDSO_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3))
# We cannot use the global flags to compile the vDSO files, the main reason
# being that the 32-bit compiler may be older than the main (64-bit) compiler
......@@ -40,16 +35,13 @@ cc32-as-instr = $(call try-run,\
# As a result we set our own flags here.
# KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile
VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include)
VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc
VDSO_CPPFLAGS += -isystem $(shell $(CC_COMPAT) -print-file-name=include 2>/dev/null)
VDSO_CPPFLAGS += $(LINUXINCLUDE)
# Common C and assembly flags
# From top-level Makefile
VDSO_CAFLAGS := $(VDSO_CPPFLAGS)
ifneq ($(shell $(CC_COMPAT) --version 2>&1 | head -n 1 | grep clang),)
VDSO_CAFLAGS += --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%))
endif
VDSO_CAFLAGS += $(call cc32-option,-fno-PIE)
ifdef CONFIG_DEBUG_INFO
VDSO_CAFLAGS += -g
......@@ -67,13 +59,7 @@ endif
# From arm vDSO Makefile
VDSO_CAFLAGS += -fPIC -fno-builtin -fno-stack-protector
VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING
# Try to compile for ARMv8. If the compiler is too old and doesn't support it,
# fall back to v7. There is no easy way to check for what architecture the code
# is being compiled, so define a macro specifying that (see arch/arm/Makefile).
VDSO_CAFLAGS += $(call cc32-option,-march=armv8-a -D__LINUX_ARM_ARCH__=8,\
-march=armv7-a -D__LINUX_ARM_ARCH__=7)
VDSO_CAFLAGS += -march=armv8-a
VDSO_CFLAGS := $(VDSO_CAFLAGS)
VDSO_CFLAGS += -DENABLE_COMPAT_VDSO=1
......@@ -113,12 +99,6 @@ endif
VDSO_AFLAGS := $(VDSO_CAFLAGS)
VDSO_AFLAGS += -D__ASSEMBLY__
# Check for binutils support for dmb ishld
dmbinstr := $(call cc32-as-instr,dmb ishld,-DCONFIG_AS_DMB_ISHLD=1)
VDSO_CFLAGS += $(dmbinstr)
VDSO_AFLAGS += $(dmbinstr)
# From arm vDSO Makefile
VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1
VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096
......
......@@ -57,12 +57,13 @@
#define SBSS_ALIGN 0
#endif
#define RO_EXCEPTION_TABLE_ALIGN 8
#define RO_EXCEPTION_TABLE_ALIGN 4
#define RUNTIME_DISCARD_EXIT
#include <asm-generic/vmlinux.lds.h>
#include <asm/cache.h>
#include <asm/kernel-pgtable.h>
#include <asm/kexec.h>
#include <asm/memory.h>
#include <asm/page.h>
......@@ -100,6 +101,16 @@ jiffies = jiffies_64;
#define HIBERNATE_TEXT
#endif
#ifdef CONFIG_KEXEC_CORE
#define KEXEC_TEXT \
. = ALIGN(SZ_4K); \
__relocate_new_kernel_start = .; \
*(.kexec_relocate.text) \
__relocate_new_kernel_end = .;
#else
#define KEXEC_TEXT
#endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
#define TRAMP_TEXT \
. = ALIGN(PAGE_SIZE); \
......@@ -160,8 +171,8 @@ SECTIONS
HYPERVISOR_TEXT
IDMAP_TEXT
HIBERNATE_TEXT
KEXEC_TEXT
TRAMP_TEXT
*(.fixup)
*(.gnu.warning)
. = ALIGN(16);
*(.got) /* Global offset table */
......@@ -348,3 +359,10 @@ ASSERT(swapper_pg_dir - reserved_pg_dir == RESERVED_SWAPPER_OFFSET,
ASSERT(swapper_pg_dir - tramp_pg_dir == TRAMP_SWAPPER_OFFSET,
"TRAMP_SWAPPER_OFFSET is wrong!")
#endif
#ifdef CONFIG_KEXEC_CORE
/* kexec relocation code should fit into one KEXEC_CONTROL_PAGE_SIZE */
ASSERT(__relocate_new_kernel_end - (__relocate_new_kernel_start & ~(SZ_4K - 1))
<= SZ_4K, "kexec relocation code is too big or misaligned")
ASSERT(KEXEC_CONTROL_PAGE_SIZE >= SZ_4K, "KEXEC_CONTROL_PAGE_SIZE is broken")
#endif
......@@ -21,11 +21,13 @@ SYM_FUNC_START(__fpsimd_restore_state)
SYM_FUNC_END(__fpsimd_restore_state)
SYM_FUNC_START(__sve_restore_state)
__sve_load 0, x1, 2
mov x2, #1
sve_load 0, x1, x2, 3
ret
SYM_FUNC_END(__sve_restore_state)
SYM_FUNC_START(__sve_save_state)
sve_save 0, x1, 2
mov x2, #1
sve_save 0, x1, x2, 3
ret
SYM_FUNC_END(__sve_save_state)
......@@ -30,8 +30,12 @@
#include <asm/processor.h>
#include <asm/thread_info.h>
extern struct exception_table_entry __start___kvm_ex_table;
extern struct exception_table_entry __stop___kvm_ex_table;
struct kvm_exception_table_entry {
int insn, fixup;
};
extern struct kvm_exception_table_entry __start___kvm_ex_table;
extern struct kvm_exception_table_entry __stop___kvm_ex_table;
/* Check whether the FP regs were dirtied while in the host-side run loop: */
static inline bool update_fp_enabled(struct kvm_vcpu *vcpu)
......@@ -510,7 +514,7 @@ static inline void __kvm_unexpected_el2_exception(void)
{
extern char __guest_exit_panic[];
unsigned long addr, fixup;
struct exception_table_entry *entry, *end;
struct kvm_exception_table_entry *entry, *end;
unsigned long elr_el2 = read_sysreg(elr_el2);
entry = &__start___kvm_ex_table;
......
......@@ -46,7 +46,7 @@ unsigned int kvm_sve_max_vl;
int kvm_arm_init_sve(void)
{
if (system_supports_sve()) {
kvm_sve_max_vl = sve_max_virtualisable_vl;
kvm_sve_max_vl = sve_max_virtualisable_vl();
/*
* The get_sve_reg()/set_sve_reg() ioctl interface will need
......@@ -61,7 +61,7 @@ int kvm_arm_init_sve(void)
* Don't even try to make use of vector lengths that
* aren't available on all CPUs, for now:
*/
if (kvm_sve_max_vl < sve_max_vl)
if (kvm_sve_max_vl < sve_max_vl())
pr_warn("KVM: SVE vector length for guests limited to %u bytes\n",
kvm_sve_max_vl);
}
......@@ -102,7 +102,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
* kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and
* set_sve_vls(). Double-check here just to be sure:
*/
if (WARN_ON(!sve_vl_valid(vl) || vl > sve_max_virtualisable_vl ||
if (WARN_ON(!sve_vl_valid(vl) || vl > sve_max_virtualisable_vl() ||
vl > SVE_VL_ARCH_MAX))
return -EIO;
......
......@@ -4,7 +4,7 @@
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/asm-uaccess.h>
.text
......@@ -45,13 +45,11 @@ USER(9f, sttrh wzr, [x0])
USER(7f, sttrb wzr, [x2, #-1])
5: mov x0, #0
ret
SYM_FUNC_END(__arch_clear_user)
EXPORT_SYMBOL(__arch_clear_user)
.section .fixup,"ax"
.align 2
// Exception fixups
7: sub x0, x2, #5 // Adjust for faulting on the final byte...
8: add x0, x0, #4 // ...or the second word of the 4-7 byte case
9: sub x0, x2, x0
ret
.previous
SYM_FUNC_END(__arch_clear_user)
EXPORT_SYMBOL(__arch_clear_user)
......@@ -60,11 +60,8 @@ SYM_FUNC_START(__arch_copy_from_user)
#include "copy_template.S"
mov x0, #0 // Nothing to copy
ret
SYM_FUNC_END(__arch_copy_from_user)
EXPORT_SYMBOL(__arch_copy_from_user)
.section .fixup,"ax"
.align 2
// Exception fixups
9997: cmp dst, dstin
b.ne 9998f
// Before being absolutely sure we couldn't copy anything, try harder
......@@ -72,4 +69,5 @@ USER(9998f, ldtrb tmp1w, [srcin])
strb tmp1w, [dst], #1
9998: sub x0, end, dst // bytes not copied
ret
.previous
SYM_FUNC_END(__arch_copy_from_user)
EXPORT_SYMBOL(__arch_copy_from_user)
......@@ -59,11 +59,8 @@ SYM_FUNC_START(__arch_copy_to_user)
#include "copy_template.S"
mov x0, #0
ret
SYM_FUNC_END(__arch_copy_to_user)
EXPORT_SYMBOL(__arch_copy_to_user)
.section .fixup,"ax"
.align 2
// Exception fixups
9997: cmp dst, dstin
b.ne 9998f
// Before being absolutely sure we couldn't copy anything, try harder
......@@ -72,4 +69,5 @@ USER(9998f, sttrb tmp1w, [dst])
add dst, dst, #1
9998: sub x0, end, dst // bytes not copied
ret
.previous
SYM_FUNC_END(__arch_copy_to_user)
EXPORT_SYMBOL(__arch_copy_to_user)
......@@ -7,6 +7,7 @@ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o
obj-$(CONFIG_TRANS_TABLE) += trans_pgd.o
obj-$(CONFIG_TRANS_TABLE) += trans_pgd-asm.o
obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
obj-$(CONFIG_ARM64_MTE) += mteswap.o
KASAN_SANITIZE_physaddr.o += n
......
......@@ -3,20 +3,87 @@
* Based on arch/arm/mm/extable.c
*/
#include <linux/bitfield.h>
#include <linux/extable.h>
#include <linux/uaccess.h>
int fixup_exception(struct pt_regs *regs)
#include <asm/asm-extable.h>
#include <asm/ptrace.h>
typedef bool (*ex_handler_t)(const struct exception_table_entry *,
struct pt_regs *);
static inline unsigned long
get_ex_fixup(const struct exception_table_entry *ex)
{
return ((unsigned long)&ex->fixup + ex->fixup);
}
static bool ex_handler_fixup(const struct exception_table_entry *ex,
struct pt_regs *regs)
{
regs->pc = get_ex_fixup(ex);
return true;
}
static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
struct pt_regs *regs)
{
int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
int reg_zero = FIELD_GET(EX_DATA_REG_ZERO, ex->data);
pt_regs_write_reg(regs, reg_err, -EFAULT);
pt_regs_write_reg(regs, reg_zero, 0);
regs->pc = get_ex_fixup(ex);
return true;
}
static bool
ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex,
struct pt_regs *regs)
{
int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->type);
int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->type);
unsigned long data, addr, offset;
addr = pt_regs_read_reg(regs, reg_addr);
offset = addr & 0x7UL;
addr &= ~0x7UL;
data = *(unsigned long*)addr;
#ifndef __AARCH64EB__
data >>= 8 * offset;
#else
data <<= 8 * offset;
#endif
pt_regs_write_reg(regs, reg_data, data);
regs->pc = get_ex_fixup(ex);
return true;
}
bool fixup_exception(struct pt_regs *regs)
{
const struct exception_table_entry *fixup;
const struct exception_table_entry *ex;
fixup = search_exception_tables(instruction_pointer(regs));
if (!fixup)
return 0;
ex = search_exception_tables(instruction_pointer(regs));
if (!ex)
return false;
if (in_bpf_jit(regs))
return arm64_bpf_fixup_exception(fixup, regs);
switch (ex->type) {
case EX_TYPE_FIXUP:
return ex_handler_fixup(ex, regs);
case EX_TYPE_BPF:
return ex_handler_bpf(ex, regs);
case EX_TYPE_UACCESS_ERR_ZERO:
return ex_handler_uaccess_err_zero(ex, regs);
case EX_TYPE_LOAD_UNALIGNED_ZEROPAD:
return ex_handler_load_unaligned_zeropad(ex, regs);
}
regs->pc = (unsigned long)&fixup->fixup + fixup->fixup;
return 1;
BUG();
}
......@@ -40,11 +40,11 @@ void __init arm64_hugetlb_cma_reserve(void)
{
int order;
#ifdef CONFIG_ARM64_4K_PAGES
if (pud_sect_supported())
order = PUD_SHIFT - PAGE_SHIFT;
#else
else
order = CONT_PMD_SHIFT - PAGE_SHIFT;
#endif
/*
* HugeTLB CMA reservation is required for gigantic
* huge pages which could not be allocated via the
......@@ -62,8 +62,9 @@ bool arch_hugetlb_migration_supported(struct hstate *h)
size_t pagesize = huge_page_size(h);
switch (pagesize) {
#ifdef CONFIG_ARM64_4K_PAGES
#ifndef __PAGETABLE_PMD_FOLDED
case PUD_SIZE:
return pud_sect_supported();
#endif
case PMD_SIZE:
case CONT_PMD_SIZE:
......@@ -126,8 +127,11 @@ static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
*pgsize = size;
switch (size) {
#ifdef CONFIG_ARM64_4K_PAGES
#ifndef __PAGETABLE_PMD_FOLDED
case PUD_SIZE:
if (pud_sect_supported())
contig_ptes = 1;
break;
#endif
case PMD_SIZE:
contig_ptes = 1;
......@@ -489,9 +493,9 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma,
static int __init hugetlbpage_init(void)
{
#ifdef CONFIG_ARM64_4K_PAGES
if (pud_sect_supported())
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
#endif
hugetlb_add_hstate(CONT_PMD_SHIFT - PAGE_SHIFT);
hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
hugetlb_add_hstate(CONT_PTE_SHIFT - PAGE_SHIFT);
......@@ -503,8 +507,9 @@ arch_initcall(hugetlbpage_init);
bool __init arch_hugetlb_valid_size(unsigned long size)
{
switch (size) {
#ifdef CONFIG_ARM64_4K_PAGES
#ifndef __PAGETABLE_PMD_FOLDED
case PUD_SIZE:
return pud_sect_supported();
#endif
case CONT_PMD_SIZE:
case PMD_SIZE:
......
......@@ -160,43 +160,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
free_area_init(max_zone_pfns);
}
int pfn_valid(unsigned long pfn)
{
phys_addr_t addr = PFN_PHYS(pfn);
struct mem_section *ms;
/*
* Ensure the upper PAGE_SHIFT bits are clear in the
* pfn. Else it might lead to false positives when
* some of the upper bits are set, but the lower bits
* match a valid pfn.
*/
if (PHYS_PFN(addr) != pfn)
return 0;
if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
return 0;
ms = __pfn_to_section(pfn);
if (!valid_section(ms))
return 0;
/*
* ZONE_DEVICE memory does not have the memblock entries.
* memblock_is_map_memory() check for ZONE_DEVICE based
* addresses will always fail. Even the normal hotplugged
* memory will never have MEMBLOCK_NOMAP flag set in their
* memblock entries. Skip memblock search for all non early
* memory sections covering all of hotplug memory including
* both normal and ZONE_DEVICE based.
*/
if (!early_section(ms))
return pfn_section_valid(ms, pfn);
return memblock_is_memory(addr);
}
EXPORT_SYMBOL(pfn_valid);
int pfn_is_map_memory(unsigned long pfn)
{
phys_addr_t addr = PFN_PHYS(pfn);
......@@ -416,8 +379,6 @@ void __init mem_init(void)
else if (!xen_swiotlb_detect())
swiotlb_force = SWIOTLB_NO_FORCE;
set_max_mapnr(max_pfn - PHYS_PFN_OFFSET);
/* this will put all unused low memory onto the freelists */
memblock_free_all();
......
......@@ -1499,6 +1499,11 @@ int arch_add_memory(int nid, u64 start, u64 size,
if (ret)
__remove_pgd_mapping(swapper_pg_dir,
__phys_to_virt(start), size);
else {
max_pfn = PFN_UP(start + size);
max_low_pfn = max_pfn;
}
return ret;
}
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2021, Microsoft Corporation.
* Pasha Tatashin <pasha.tatashin@soleen.com>
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/kvm_asm.h>
.macro invalid_vector label
SYM_CODE_START_LOCAL(\label)
.align 7
b \label
SYM_CODE_END(\label)
.endm
.macro el1_sync_vector
SYM_CODE_START_LOCAL(el1_sync)
.align 7
cmp x0, #HVC_SET_VECTORS /* Called from hibernate */
b.ne 1f
msr vbar_el2, x1
mov x0, xzr
eret
1: cmp x0, #HVC_SOFT_RESTART /* Called from kexec */
b.ne 2f
mov x0, x2
mov x2, x4
mov x4, x1
mov x1, x3
br x4
2: /* Unexpected argument, set an error */
mov_q x0, HVC_STUB_ERR
eret
SYM_CODE_END(el1_sync)
.endm
SYM_CODE_START(trans_pgd_stub_vectors)
invalid_vector hyp_stub_el2t_sync_invalid // Synchronous EL2t
invalid_vector hyp_stub_el2t_irq_invalid // IRQ EL2t
invalid_vector hyp_stub_el2t_fiq_invalid // FIQ EL2t
invalid_vector hyp_stub_el2t_error_invalid // Error EL2t
invalid_vector hyp_stub_el2h_sync_invalid // Synchronous EL2h
invalid_vector hyp_stub_el2h_irq_invalid // IRQ EL2h
invalid_vector hyp_stub_el2h_fiq_invalid // FIQ EL2h
invalid_vector hyp_stub_el2h_error_invalid // Error EL2h
el1_sync_vector // Synchronous 64-bit EL1
invalid_vector hyp_stub_el1_irq_invalid // IRQ 64-bit EL1
invalid_vector hyp_stub_el1_fiq_invalid // FIQ 64-bit EL1
invalid_vector hyp_stub_el1_error_invalid // Error 64-bit EL1
invalid_vector hyp_stub_32b_el1_sync_invalid // Synchronous 32-bit EL1
invalid_vector hyp_stub_32b_el1_irq_invalid // IRQ 32-bit EL1
invalid_vector hyp_stub_32b_el1_fiq_invalid // FIQ 32-bit EL1
invalid_vector hyp_stub_32b_el1_error_invalid // Error 32-bit EL1
.align 11
SYM_INNER_LABEL(__trans_pgd_stub_vectors_end, SYM_L_LOCAL)
SYM_CODE_END(trans_pgd_stub_vectors)
# Check the trans_pgd_stub_vectors didn't overflow
.org . - (__trans_pgd_stub_vectors_end - trans_pgd_stub_vectors) + SZ_2K
......@@ -5,8 +5,8 @@
*
* This file derived from: arch/arm64/kernel/hibernate.c
*
* Copyright (c) 2020, Microsoft Corporation.
* Pavel Tatashin <pasha.tatashin@soleen.com>
* Copyright (c) 2021, Microsoft Corporation.
* Pasha Tatashin <pasha.tatashin@soleen.com>
*
*/
......@@ -217,63 +217,6 @@ int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **dst_pgdp,
return rc;
}
/*
* Add map entry to trans_pgd for a base-size page at PTE level.
* info: contains allocator and its argument
* trans_pgd: page table in which new map is added.
* page: page to be mapped.
* dst_addr: new VA address for the page
* pgprot: protection for the page.
*
* Returns 0 on success, and -ENOMEM on failure.
*/
int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd,
void *page, unsigned long dst_addr, pgprot_t pgprot)
{
pgd_t *pgdp;
p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
pgdp = pgd_offset_pgd(trans_pgd, dst_addr);
if (pgd_none(READ_ONCE(*pgdp))) {
p4dp = trans_alloc(info);
if (!pgdp)
return -ENOMEM;
pgd_populate(NULL, pgdp, p4dp);
}
p4dp = p4d_offset(pgdp, dst_addr);
if (p4d_none(READ_ONCE(*p4dp))) {
pudp = trans_alloc(info);
if (!pudp)
return -ENOMEM;
p4d_populate(NULL, p4dp, pudp);
}
pudp = pud_offset(p4dp, dst_addr);
if (pud_none(READ_ONCE(*pudp))) {
pmdp = trans_alloc(info);
if (!pmdp)
return -ENOMEM;
pud_populate(NULL, pudp, pmdp);
}
pmdp = pmd_offset(pudp, dst_addr);
if (pmd_none(READ_ONCE(*pmdp))) {
ptep = trans_alloc(info);
if (!ptep)
return -ENOMEM;
pmd_populate_kernel(NULL, pmdp, ptep);
}
ptep = pte_offset_kernel(pmdp, dst_addr);
set_pte(ptep, pfn_pte(virt_to_pfn(page), pgprot));
return 0;
}
/*
* The page we want to idmap may be outside the range covered by VA_BITS that
* can be built using the kernel's p?d_populate() helpers. As a one off, for a
......@@ -322,3 +265,26 @@ int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0,
return 0;
}
/*
* Create a copy of the vector table so we can call HVC_SET_VECTORS or
* HVC_SOFT_RESTART from contexts where the table may be overwritten.
*/
int trans_pgd_copy_el2_vectors(struct trans_pgd_info *info,
phys_addr_t *el2_vectors)
{
void *hyp_stub = trans_alloc(info);
if (!hyp_stub)
return -ENOMEM;
*el2_vectors = virt_to_phys(hyp_stub);
memcpy(hyp_stub, &trans_pgd_stub_vectors, ARM64_VECTOR_TABLE_LEN);
caches_clean_inval_pou((unsigned long)hyp_stub,
(unsigned long)hyp_stub +
ARM64_VECTOR_TABLE_LEN);
dcache_clean_inval_poc((unsigned long)hyp_stub,
(unsigned long)hyp_stub +
ARM64_VECTOR_TABLE_LEN);
return 0;
}
......@@ -13,6 +13,7 @@
#include <linux/printk.h>
#include <linux/slab.h>
#include <asm/asm-extable.h>
#include <asm/byteorder.h>
#include <asm/cacheflush.h>
#include <asm/debug-monitors.h>
......@@ -358,7 +359,7 @@ static void build_epilogue(struct jit_ctx *ctx)
#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
#define BPF_FIXUP_REG_MASK GENMASK(31, 27)
int arm64_bpf_fixup_exception(const struct exception_table_entry *ex,
bool ex_handler_bpf(const struct exception_table_entry *ex,
struct pt_regs *regs)
{
off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
......@@ -366,7 +367,7 @@ int arm64_bpf_fixup_exception(const struct exception_table_entry *ex,
regs->regs[dst_reg] = 0;
regs->pc = (unsigned long)&ex->fixup - offset;
return 1;
return true;
}
/* For accesses to BTF pointers, add an entry to the exception table */
......@@ -412,6 +413,8 @@ static int add_exception_handler(const struct bpf_insn *insn,
ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) |
FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
ex->type = EX_TYPE_BPF;
ctx->exentry_idx++;
return 0;
}
......
......@@ -18,6 +18,7 @@ HAS_CRC32
HAS_DCPODP
HAS_DCPOP
HAS_E0PD
HAS_ECV
HAS_EPAN
HAS_GENERIC_AUTH
HAS_GENERIC_AUTH_ARCH
......@@ -39,6 +40,7 @@ HW_DBM
KVM_PROTECTED_MODE
MISMATCHED_CACHE_TYPE
MTE
MTE_ASYMM
SPECTRE_V2
SPECTRE_V3A
SPECTRE_V4
......@@ -53,6 +55,9 @@ WORKAROUND_1418040
WORKAROUND_1463225
WORKAROUND_1508412
WORKAROUND_1542419
WORKAROUND_TRBE_OVERWRITE_FILL_MODE
WORKAROUND_TSB_FLUSH_FAILURE
WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
WORKAROUND_CAVIUM_23154
WORKAROUND_CAVIUM_27456
WORKAROUND_CAVIUM_30115
......
......@@ -36,7 +36,7 @@ config ARM_CCI5xx_PMU
config ARM_CCN
tristate "ARM CCN driver support"
depends on ARM || ARM64
depends on ARM || ARM64 || COMPILE_TEST
help
PMU (perf) driver supporting the ARM CCN (Cache Coherent Network)
interconnect.
......@@ -62,7 +62,8 @@ config ARM_PMU_ACPI
config ARM_SMMU_V3_PMU
tristate "ARM SMMUv3 Performance Monitors Extension"
depends on ARM64 && ACPI
depends on (ARM64 && ACPI) || (COMPILE_TEST && 64BIT)
depends on GENERIC_MSI_IRQ_DOMAIN
help
Provides support for the ARM SMMUv3 Performance Monitor Counter
Groups (PMCG), which provide monitoring of transactions passing
......@@ -80,7 +81,7 @@ config ARM_DSU_PMU
config FSL_IMX8_DDR_PMU
tristate "Freescale i.MX8 DDR perf monitor"
depends on ARCH_MXC
depends on ARCH_MXC || COMPILE_TEST
help
Provides support for the DDR performance monitor in i.MX8, which
can give information about memory throughput and other related
......@@ -108,7 +109,8 @@ config QCOM_L3_PMU
config THUNDERX2_PMU
tristate "Cavium ThunderX2 SoC PMU UNCORE"
depends on ARCH_THUNDER2 && ARM64 && ACPI && NUMA
depends on ARCH_THUNDER2 || COMPILE_TEST
depends on NUMA && ACPI
default m
help
Provides support for ThunderX2 UNCORE events.
......@@ -116,7 +118,7 @@ config THUNDERX2_PMU
in the DDR4 Memory Controller (DMC).
config XGENE_PMU
depends on ARCH_XGENE
depends on ARCH_XGENE || (COMPILE_TEST && 64BIT)
bool "APM X-Gene SoC PMU"
default n
help
......
......@@ -27,7 +27,7 @@
#define PA_INT_CLEAR 0x1c7c
#define PA_EVENT_TYPE0 0x1c80
#define PA_PMU_VERSION 0x1cf0
#define PA_EVENT_CNT0_L 0x1f00
#define PA_EVENT_CNT0_L 0x1d00
#define PA_EVTYPE_MASK 0xff
#define PA_NR_COUNTERS 0x8
......
......@@ -487,7 +487,7 @@ static void tx2_uncore_event_update(struct perf_event *event)
new = reg_readl(hwc->event_base);
prev = local64_xchg(&hwc->prev_count, new);
/* handles rollover of 32 bit counter */
delta = (u32)(((1UL << 32) - prev) + new);
delta = (u32)(((1ULL << 32) - prev) + new);
}
/* DMC event data_transfers granularity is 16 Bytes, convert it to 64 */
......
......@@ -89,7 +89,7 @@ static __always_inline bool kasan_enabled(void)
return static_branch_likely(&kasan_flag_enabled);
}
static inline bool kasan_has_integrated_init(void)
static inline bool kasan_hw_tags_enabled(void)
{
return kasan_enabled();
}
......@@ -104,7 +104,7 @@ static inline bool kasan_enabled(void)
return IS_ENABLED(CONFIG_KASAN);
}
static inline bool kasan_has_integrated_init(void)
static inline bool kasan_hw_tags_enabled(void)
{
return false;
}
......@@ -125,6 +125,11 @@ static __always_inline void kasan_free_pages(struct page *page,
#endif /* CONFIG_KASAN_HW_TAGS */
static inline bool kasan_has_integrated_init(void)
{
return kasan_hw_tags_enabled();
}
#ifdef CONFIG_KASAN
struct kasan_cache {
......
......@@ -296,10 +296,6 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr,
if (WARN_ON_ONCE(!dev->dma_mask))
return DMA_MAPPING_ERROR;
/* Don't allow RAM to be mapped */
if (WARN_ON_ONCE(pfn_valid(PHYS_PFN(phys_addr))))
return DMA_MAPPING_ERROR;
if (dma_map_direct(dev, ops))
addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
else if (ops->map_resource)
......
......@@ -78,6 +78,7 @@ void scs_free(void *s)
if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL)
return;
kasan_unpoison_vmalloc(s, SCS_SIZE);
vfree_atomic(s);
}
......
......@@ -88,7 +88,7 @@ static void kasan_test_exit(struct kunit *test)
*/
#define KUNIT_EXPECT_KASAN_FAIL(test, expression) do { \
if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) && \
!kasan_async_mode_enabled()) \
kasan_sync_fault_possible()) \
migrate_disable(); \
KUNIT_EXPECT_FALSE(test, READ_ONCE(fail_data.report_found)); \
barrier(); \
......
......@@ -29,6 +29,7 @@ enum kasan_arg_mode {
KASAN_ARG_MODE_DEFAULT,
KASAN_ARG_MODE_SYNC,
KASAN_ARG_MODE_ASYNC,
KASAN_ARG_MODE_ASYMM,
};
enum kasan_arg_stacktrace {
......@@ -45,9 +46,9 @@ static enum kasan_arg_stacktrace kasan_arg_stacktrace __ro_after_init;
DEFINE_STATIC_KEY_FALSE(kasan_flag_enabled);
EXPORT_SYMBOL(kasan_flag_enabled);
/* Whether the asynchronous mode is enabled. */
bool kasan_flag_async __ro_after_init;
EXPORT_SYMBOL_GPL(kasan_flag_async);
/* Whether the selected mode is synchronous/asynchronous/asymmetric.*/
enum kasan_mode kasan_mode __ro_after_init;
EXPORT_SYMBOL_GPL(kasan_mode);
/* Whether to collect alloc/free stack traces. */
DEFINE_STATIC_KEY_FALSE(kasan_flag_stacktrace);
......@@ -69,7 +70,7 @@ static int __init early_kasan_flag(char *arg)
}
early_param("kasan", early_kasan_flag);
/* kasan.mode=sync/async */
/* kasan.mode=sync/async/asymm */
static int __init early_kasan_mode(char *arg)
{
if (!arg)
......@@ -79,6 +80,8 @@ static int __init early_kasan_mode(char *arg)
kasan_arg_mode = KASAN_ARG_MODE_SYNC;
else if (!strcmp(arg, "async"))
kasan_arg_mode = KASAN_ARG_MODE_ASYNC;
else if (!strcmp(arg, "asymm"))
kasan_arg_mode = KASAN_ARG_MODE_ASYMM;
else
return -EINVAL;
......@@ -116,11 +119,13 @@ void kasan_init_hw_tags_cpu(void)
return;
/*
* Enable async mode only when explicitly requested through
* the command line.
* Enable async or asymm modes only when explicitly requested
* through the command line.
*/
if (kasan_arg_mode == KASAN_ARG_MODE_ASYNC)
hw_enable_tagging_async();
else if (kasan_arg_mode == KASAN_ARG_MODE_ASYMM)
hw_enable_tagging_asymm();
else
hw_enable_tagging_sync();
}
......@@ -143,15 +148,19 @@ void __init kasan_init_hw_tags(void)
case KASAN_ARG_MODE_DEFAULT:
/*
* Default to sync mode.
* Do nothing, kasan_flag_async keeps its default value.
*/
break;
fallthrough;
case KASAN_ARG_MODE_SYNC:
/* Do nothing, kasan_flag_async keeps its default value. */
/* Sync mode enabled. */
kasan_mode = KASAN_MODE_SYNC;
break;
case KASAN_ARG_MODE_ASYNC:
/* Async mode enabled. */
kasan_flag_async = true;
kasan_mode = KASAN_MODE_ASYNC;
break;
case KASAN_ARG_MODE_ASYMM:
/* Asymm mode enabled. */
kasan_mode = KASAN_MODE_ASYMM;
break;
}
......
......@@ -13,16 +13,28 @@
#include "../slab.h"
DECLARE_STATIC_KEY_FALSE(kasan_flag_stacktrace);
extern bool kasan_flag_async __ro_after_init;
enum kasan_mode {
KASAN_MODE_SYNC,
KASAN_MODE_ASYNC,
KASAN_MODE_ASYMM,
};
extern enum kasan_mode kasan_mode __ro_after_init;
static inline bool kasan_stack_collection_enabled(void)
{
return static_branch_unlikely(&kasan_flag_stacktrace);
}
static inline bool kasan_async_mode_enabled(void)
static inline bool kasan_async_fault_possible(void)
{
return kasan_mode == KASAN_MODE_ASYNC || kasan_mode == KASAN_MODE_ASYMM;
}
static inline bool kasan_sync_fault_possible(void)
{
return kasan_flag_async;
return kasan_mode == KASAN_MODE_SYNC || kasan_mode == KASAN_MODE_ASYMM;
}
#else
......@@ -31,14 +43,17 @@ static inline bool kasan_stack_collection_enabled(void)
return true;
}
static inline bool kasan_async_mode_enabled(void)
static inline bool kasan_async_fault_possible(void)
{
return false;
}
#endif
static inline bool kasan_sync_fault_possible(void)
{
return true;
}
extern bool kasan_flag_async __ro_after_init;
#endif
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
#define KASAN_GRANULE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT)
......@@ -289,6 +304,9 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
#ifndef arch_enable_tagging_async
#define arch_enable_tagging_async()
#endif
#ifndef arch_enable_tagging_asymm
#define arch_enable_tagging_asymm()
#endif
#ifndef arch_force_async_tag_fault
#define arch_force_async_tag_fault()
#endif
......@@ -304,6 +322,7 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
#define hw_enable_tagging_sync() arch_enable_tagging_sync()
#define hw_enable_tagging_async() arch_enable_tagging_async()
#define hw_enable_tagging_asymm() arch_enable_tagging_asymm()
#define hw_force_async_tag_fault() arch_force_async_tag_fault()
#define hw_get_random_tag() arch_get_random_tag()
#define hw_get_mem_tag(addr) arch_get_mem_tag(addr)
......@@ -314,6 +333,7 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
#define hw_enable_tagging_sync()
#define hw_enable_tagging_async()
#define hw_enable_tagging_asymm()
#endif /* CONFIG_KASAN_HW_TAGS */
......
......@@ -112,7 +112,7 @@ static void start_report(unsigned long *flags)
static void end_report(unsigned long *flags, unsigned long addr)
{
if (!kasan_async_mode_enabled())
if (!kasan_async_fault_possible())
trace_error_report_end(ERROR_DETECTOR_KASAN, addr);
pr_err("==================================================================\n");
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
......
......@@ -231,6 +231,34 @@ static void sort_relative_table(char *extab_image, int image_size)
}
}
static void arm64_sort_relative_table(char *extab_image, int image_size)
{
int i = 0;
while (i < image_size) {
uint32_t *loc = (uint32_t *)(extab_image + i);
w(r(loc) + i, loc);
w(r(loc + 1) + i + 4, loc + 1);
/* Don't touch the fixup type or data */
i += sizeof(uint32_t) * 3;
}
qsort(extab_image, image_size / 12, 12, compare_relative_table);
i = 0;
while (i < image_size) {
uint32_t *loc = (uint32_t *)(extab_image + i);
w(r(loc) - i, loc);
w(r(loc + 1) - (i + 4), loc + 1);
/* Don't touch the fixup type or data */
i += sizeof(uint32_t) * 3;
}
}
static void x86_sort_relative_table(char *extab_image, int image_size)
{
int i = 0;
......@@ -343,6 +371,8 @@ static int do_file(char const *const fname, void *addr)
custom_sort = s390_sort_relative_table;
break;
case EM_AARCH64:
custom_sort = arm64_sort_relative_table;
break;
case EM_PARISC:
case EM_PPC:
case EM_PPC64:
......
......@@ -9,12 +9,12 @@ TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress \
all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED)
fpsimd-test: fpsimd-test.o
fpsimd-test: fpsimd-test.o asm-utils.o
$(CC) -nostdlib $^ -o $@
rdvl-sve: rdvl-sve.o rdvl.o
sve-ptrace: sve-ptrace.o sve-ptrace-asm.o
sve-ptrace: sve-ptrace.o
sve-probe-vls: sve-probe-vls.o rdvl.o
sve-test: sve-test.o
sve-test: sve-test.o asm-utils.o
$(CC) -nostdlib $^ -o $@
vec-syscfg: vec-syscfg.o rdvl.o
vlset: vlset.o
......
- Test unsupported values in the ABIs.
- More coverage for ptrace (eg, vector length conversions).
- Coverage for signals.
- Test PR_SVE_VL_INHERITY after a double fork.
- More coverage for ptrace:
- Get/set of FFR.
- Ensure ptraced processes actually see the register state visible through
the ptrace interface.
- Big endian.
- Test PR_SVE_VL_INHERIT after a double fork.
// SPDX-License-Identifier: GPL-2.0-only
// Copyright (C) 2015-2021 ARM Limited.
// Original author: Dave Martin <Dave.Martin@arm.com>
//
// Utility functions for assembly code.
#include <asm/unistd.h>
#include "assembler.h"
// Print a single character x0 to stdout
// Clobbers x0-x2,x8
function putc
str x0, [sp, #-16]!
mov x0, #1 // STDOUT_FILENO
mov x1, sp
mov x2, #1
mov x8, #__NR_write
svc #0
add sp, sp, #16
ret
endfunction
.globl putc
// Print a NUL-terminated string starting at address x0 to stdout
// Clobbers x0-x3,x8
function puts
mov x1, x0
mov x2, #0
0: ldrb w3, [x0], #1
cbz w3, 1f
add x2, x2, #1
b 0b
1: mov w0, #1 // STDOUT_FILENO
mov x8, #__NR_write
svc #0
ret
endfunction
.globl puts
// Print an unsigned decimal number x0 to stdout
// Clobbers x0-x4,x8
function putdec
mov x1, sp
str x30, [sp, #-32]! // Result can't be > 20 digits
mov x2, #0
strb w2, [x1, #-1]! // Write the NUL terminator
mov x2, #10
0: udiv x3, x0, x2 // div-mod loop to generate the digits
msub x0, x3, x2, x0
add w0, w0, #'0'
strb w0, [x1, #-1]!
mov x0, x3
cbnz x3, 0b
ldrb w0, [x1]
cbnz w0, 1f
mov w0, #'0' // Print "0" for 0, not ""
strb w0, [x1, #-1]!
1: mov x0, x1
bl puts
ldr x30, [sp], #32
ret
endfunction
.globl putdec
// Print an unsigned decimal number x0 to stdout, followed by a newline
// Clobbers x0-x5,x8
function putdecn
mov x5, x30
bl putdec
mov x0, #'\n'
bl putc
ret x5
endfunction
.globl putdecn
// Clobbers x0-x3,x8
function puthexb
str x30, [sp, #-0x10]!
mov w3, w0
lsr w0, w0, #4
bl puthexnibble
mov w0, w3
ldr x30, [sp], #0x10
// fall through to puthexnibble
endfunction
.globl puthexb
// Clobbers x0-x2,x8
function puthexnibble
and w0, w0, #0xf
cmp w0, #10
blo 1f
add w0, w0, #'a' - ('9' + 1)
1: add w0, w0, #'0'
b putc
endfunction
.globl puthexnibble
// x0=data in, x1=size in, clobbers x0-x5,x8
function dumphex
str x30, [sp, #-0x10]!
mov x4, x0
mov x5, x1
0: subs x5, x5, #1
b.lo 1f
ldrb w0, [x4], #1
bl puthexb
b 0b
1: ldr x30, [sp], #0x10
ret
endfunction
.globl dumphex
// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
// Clobbers x0-x3
function memcpy
cmp x2, #0
b.eq 1f
0: ldrb w3, [x1], #1
strb w3, [x0], #1
subs x2, x2, #1
b.ne 0b
1: ret
endfunction
.globl memcpy
// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
// Clobbers x1, x2.
function memfill_ae
mov w2, #0xae
b memfill
endfunction
.globl memfill_ae
// Fill x1 bytes starting at x0 with 0.
// Clobbers x1, x2.
function memclr
mov w2, #0
endfunction
.globl memclr
// fall through to memfill
// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
// Clobbers x1
function memfill
cmp x1, #0
b.eq 1f
0: strb w2, [x0], #1
subs x1, x1, #1
b.ne 0b
1: ret
endfunction
.globl memfill
......@@ -54,4 +54,15 @@ endfunction
.purgem \name\()_entry
.endm
// Utility macro to print a literal string
// Clobbers x0-x4,x8
.macro puts string
.pushsection .rodata.str1.1, "aMS", 1
.L__puts_literal\@: .string "\string"
.popsection
ldr x0, =.L__puts_literal\@
bl puts
.endm
#endif /* ! ASSEMBLER_H */
......@@ -33,131 +33,6 @@
define_accessor setv, NVR, _vldr
define_accessor getv, NVR, _vstr
// Print a single character x0 to stdout
// Clobbers x0-x2,x8
function putc
str x0, [sp, #-16]!
mov x0, #1 // STDOUT_FILENO
mov x1, sp
mov x2, #1
mov x8, #__NR_write
svc #0
add sp, sp, #16
ret
endfunction
// Print a NUL-terminated string starting at address x0 to stdout
// Clobbers x0-x3,x8
function puts
mov x1, x0
mov x2, #0
0: ldrb w3, [x0], #1
cbz w3, 1f
add x2, x2, #1
b 0b
1: mov w0, #1 // STDOUT_FILENO
mov x8, #__NR_write
svc #0
ret
endfunction
// Utility macro to print a literal string
// Clobbers x0-x4,x8
.macro puts string
.pushsection .rodata.str1.1, "aMS", 1
.L__puts_literal\@: .string "\string"
.popsection
ldr x0, =.L__puts_literal\@
bl puts
.endm
// Print an unsigned decimal number x0 to stdout
// Clobbers x0-x4,x8
function putdec
mov x1, sp
str x30, [sp, #-32]! // Result can't be > 20 digits
mov x2, #0
strb w2, [x1, #-1]! // Write the NUL terminator
mov x2, #10
0: udiv x3, x0, x2 // div-mod loop to generate the digits
msub x0, x3, x2, x0
add w0, w0, #'0'
strb w0, [x1, #-1]!
mov x0, x3
cbnz x3, 0b
ldrb w0, [x1]
cbnz w0, 1f
mov w0, #'0' // Print "0" for 0, not ""
strb w0, [x1, #-1]!
1: mov x0, x1
bl puts
ldr x30, [sp], #32
ret
endfunction
// Print an unsigned decimal number x0 to stdout, followed by a newline
// Clobbers x0-x5,x8
function putdecn
mov x5, x30
bl putdec
mov x0, #'\n'
bl putc
ret x5
endfunction
// Clobbers x0-x3,x8
function puthexb
str x30, [sp, #-0x10]!
mov w3, w0
lsr w0, w0, #4
bl puthexnibble
mov w0, w3
ldr x30, [sp], #0x10
// fall through to puthexnibble
endfunction
// Clobbers x0-x2,x8
function puthexnibble
and w0, w0, #0xf
cmp w0, #10
blo 1f
add w0, w0, #'a' - ('9' + 1)
1: add w0, w0, #'0'
b putc
endfunction
// x0=data in, x1=size in, clobbers x0-x5,x8
function dumphex
str x30, [sp, #-0x10]!
mov x4, x0
mov x5, x1
0: subs x5, x5, #1
b.lo 1f
ldrb w0, [x4], #1
bl puthexb
b 0b
1: ldr x30, [sp], #0x10
ret
endfunction
// Declare some storate space to shadow the SVE register contents:
.pushsection .text
.data
......@@ -168,18 +43,6 @@ scratch:
.space MAXVL_B
.popsection
// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
// Clobbers x0-x3
function memcpy
cmp x2, #0
b.eq 1f
0: ldrb w3, [x1], #1
strb w3, [x0], #1
subs x2, x2, #1
b.ne 0b
1: ret
endfunction
// Generate a test pattern for storage in SVE registers
// x0: pid (16 bits)
// x1: register number (6 bits)
......@@ -227,33 +90,6 @@ function setup_vreg
ret x4
endfunction
// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
// Clobbers x1, x2.
function memfill_ae
mov w2, #0xae
b memfill
endfunction
// Fill x1 bytes starting at x0 with 0.
// Clobbers x1, x2.
function memclr
mov w2, #0
endfunction
// fall through to memfill
// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
// Clobbers x1
function memfill
cmp x1, #0
b.eq 1f
0: strb w2, [x0], #1
subs x1, x1, #1
b.ne 0b
1: ret
endfunction
// Trivial memory compare: compare x2 bytes starting at address x0 with
// bytes starting at address x1.
// Returns only if all bytes match; otherwise, the program is aborted.
......
// SPDX-License-Identifier: GPL-2.0-only
// Copyright (C) 2015-2019 ARM Limited.
// Original author: Dave Martin <Dave.Martin@arm.com>
#include <asm/unistd.h>
.arch_extension sve
.globl sve_store_patterns
sve_store_patterns:
mov x1, x0
index z0.b, #0, #1
str q0, [x1]
mov w8, #__NR_getpid
svc #0
str q0, [x1, #0x10]
mov z1.d, z0.d
str q0, [x1, #0x20]
mov w8, #__NR_getpid
svc #0
str q0, [x1, #0x30]
mov z1.d, z0.d
str q0, [x1, #0x40]
ret
.size sve_store_patterns, . - sve_store_patterns
.type sve_store_patterns, @function
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2015-2020 ARM Limited.
* Copyright (C) 2015-2021 ARM Limited.
* Original author: Dave Martin <Dave.Martin@arm.com>
*/
#include <errno.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/auxv.h>
#include <sys/prctl.h>
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/uio.h>
......@@ -19,40 +21,22 @@
#include "../../kselftest.h"
#define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 3)
#define FPSIMD_TESTS 5
#define EXPECTED_TESTS (VL_TESTS + FPSIMD_TESTS)
/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
#ifndef NT_ARM_SVE
#define NT_ARM_SVE 0x405
#endif
/* Number of registers filled in by sve_store_patterns */
#define NR_VREGS 5
void sve_store_patterns(__uint128_t v[NR_VREGS]);
static void dump(const void *buf, size_t size)
{
size_t i;
const unsigned char *p = buf;
for (i = 0; i < size; ++i)
printf(" %.2x", *p++);
}
static int check_vregs(const __uint128_t vregs[NR_VREGS])
static void fill_buf(char *buf, size_t size)
{
int i;
int ok = 1;
for (i = 0; i < NR_VREGS; ++i) {
printf("# v[%d]:", i);
dump(&vregs[i], sizeof vregs[i]);
putchar('\n');
if (vregs[i] != vregs[0])
ok = 0;
}
return ok;
for (i = 0; i < size; i++)
buf[i] = random();
}
static int do_child(void)
......@@ -66,6 +50,15 @@ static int do_child(void)
return EXIT_SUCCESS;
}
static int get_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd)
{
struct iovec iov;
iov.iov_base = fpsimd;
iov.iov_len = sizeof(*fpsimd);
return ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov);
}
static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size)
{
struct user_sve_header *sve;
......@@ -112,25 +105,335 @@ static int set_sve(pid_t pid, const struct user_sve_header *sve)
return ptrace(PTRACE_SETREGSET, pid, NT_ARM_SVE, &iov);
}
static void dump_sve_regs(const struct user_sve_header *sve, unsigned int num,
unsigned int vlmax)
/* Validate setting and getting the inherit flag */
static void ptrace_set_get_inherit(pid_t child)
{
struct user_sve_header sve;
struct user_sve_header *new_sve = NULL;
size_t new_sve_size = 0;
int ret;
/* First set the flag */
memset(&sve, 0, sizeof(sve));
sve.size = sizeof(sve);
sve.vl = sve_vl_from_vq(SVE_VQ_MIN);
sve.flags = SVE_PT_VL_INHERIT;
ret = set_sve(child, &sve);
if (ret != 0) {
ksft_test_result_fail("Failed to set SVE_PT_VL_INHERIT\n");
return;
}
/*
* Read back the new register state and verify that we have
* set the flags we expected.
*/
if (!get_sve(child, (void **)&new_sve, &new_sve_size)) {
ksft_test_result_fail("Failed to read SVE flags\n");
return;
}
ksft_test_result(new_sve->flags & SVE_PT_VL_INHERIT,
"SVE_PT_VL_INHERIT set\n");
/* Now clear */
sve.flags &= ~SVE_PT_VL_INHERIT;
ret = set_sve(child, &sve);
if (ret != 0) {
ksft_test_result_fail("Failed to clear SVE_PT_VL_INHERIT\n");
return;
}
if (!get_sve(child, (void **)&new_sve, &new_sve_size)) {
ksft_test_result_fail("Failed to read SVE flags\n");
return;
}
ksft_test_result(!(new_sve->flags & SVE_PT_VL_INHERIT),
"SVE_PT_VL_INHERIT cleared\n");
free(new_sve);
}
/* Validate attempting to set the specfied VL via ptrace */
static void ptrace_set_get_vl(pid_t child, unsigned int vl, bool *supported)
{
struct user_sve_header sve;
struct user_sve_header *new_sve = NULL;
size_t new_sve_size = 0;
int ret, prctl_vl;
*supported = false;
/* Check if the VL is supported in this process */
prctl_vl = prctl(PR_SVE_SET_VL, vl);
if (prctl_vl == -1)
ksft_exit_fail_msg("prctl(PR_SVE_SET_VL) failed: %s (%d)\n",
strerror(errno), errno);
/* If the VL is not supported then a supported VL will be returned */
*supported = (prctl_vl == vl);
/* Set the VL by doing a set with no register payload */
memset(&sve, 0, sizeof(sve));
sve.size = sizeof(sve);
sve.vl = vl;
ret = set_sve(child, &sve);
if (ret != 0) {
ksft_test_result_fail("Failed to set VL %u\n", vl);
return;
}
/*
* Read back the new register state and verify that we have the
* same VL that we got from prctl() on ourselves.
*/
if (!get_sve(child, (void **)&new_sve, &new_sve_size)) {
ksft_test_result_fail("Failed to read VL %u\n", vl);
return;
}
ksft_test_result(new_sve->vl = prctl_vl, "Set VL %u\n", vl);
free(new_sve);
}
static void check_u32(unsigned int vl, const char *reg,
uint32_t *in, uint32_t *out, int *errors)
{
if (*in != *out) {
printf("# VL %d %s wrote %x read %x\n",
vl, reg, *in, *out);
(*errors)++;
}
}
/* Access the FPSIMD registers via the SVE regset */
static void ptrace_sve_fpsimd(pid_t child)
{
void *svebuf = NULL;
size_t svebufsz = 0;
struct user_sve_header *sve;
struct user_fpsimd_state *fpsimd, new_fpsimd;
unsigned int i, j;
unsigned char *p;
/* New process should start with FPSIMD registers only */
sve = get_sve(child, &svebuf, &svebufsz);
if (!sve) {
ksft_test_result_fail("get_sve: %s\n", strerror(errno));
return;
} else {
ksft_test_result_pass("get_sve(FPSIMD)\n");
}
ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD,
"Set FPSIMD registers\n");
if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD)
goto out;
/* Try to set a known FPSIMD state via PT_REGS_SVE */
fpsimd = (struct user_fpsimd_state *)((char *)sve +
SVE_PT_FPSIMD_OFFSET);
for (i = 0; i < 32; ++i) {
p = (unsigned char *)&fpsimd->vregs[i];
for (j = 0; j < sizeof(fpsimd->vregs[i]); ++j)
p[j] = j;
}
if (set_sve(child, sve)) {
ksft_test_result_fail("set_sve(FPSIMD): %s\n",
strerror(errno));
goto out;
}
/* Verify via the FPSIMD regset */
if (get_fpsimd(child, &new_fpsimd)) {
ksft_test_result_fail("get_fpsimd(): %s\n",
strerror(errno));
goto out;
}
if (memcmp(fpsimd, &new_fpsimd, sizeof(*fpsimd)) == 0)
ksft_test_result_pass("get_fpsimd() gave same state\n");
else
ksft_test_result_fail("get_fpsimd() gave different state\n");
out:
free(svebuf);
}
/* Validate attempting to set SVE data and read SVE data */
static void ptrace_set_sve_get_sve_data(pid_t child, unsigned int vl)
{
void *write_buf;
void *read_buf = NULL;
struct user_sve_header *write_sve;
struct user_sve_header *read_sve;
size_t read_sve_size = 0;
unsigned int vq = sve_vq_from_vl(vl);
int ret, i;
size_t data_size;
int errors = 0;
data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
write_buf = malloc(data_size);
if (!write_buf) {
ksft_test_result_fail("Error allocating %d byte buffer for VL %u\n",
data_size, vl);
return;
}
write_sve = write_buf;
/* Set up some data and write it out */
memset(write_sve, 0, data_size);
write_sve->size = data_size;
write_sve->vl = vl;
write_sve->flags = SVE_PT_REGS_SVE;
for (i = 0; i < __SVE_NUM_ZREGS; i++)
fill_buf(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
SVE_PT_SVE_ZREG_SIZE(vq));
for (i = 0; i < __SVE_NUM_PREGS; i++)
fill_buf(write_buf + SVE_PT_SVE_PREG_OFFSET(vq, i),
SVE_PT_SVE_PREG_SIZE(vq));
fill_buf(write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), SVE_PT_SVE_FPSR_SIZE);
fill_buf(write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), SVE_PT_SVE_FPCR_SIZE);
/* TODO: Generate a valid FFR pattern */
ret = set_sve(child, write_sve);
if (ret != 0) {
ksft_test_result_fail("Failed to set VL %u data\n", vl);
goto out;
}
/* Read the data back */
if (!get_sve(child, (void **)&read_buf, &read_sve_size)) {
ksft_test_result_fail("Failed to read VL %u data\n", vl);
goto out;
}
read_sve = read_buf;
/* We might read more data if there's extensions we don't know */
if (read_sve->size < write_sve->size) {
ksft_test_result_fail("Wrote %d bytes, only read %d\n",
write_sve->size, read_sve->size);
goto out_read;
}
for (i = 0; i < __SVE_NUM_ZREGS; i++) {
if (memcmp(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
read_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
SVE_PT_SVE_ZREG_SIZE(vq)) != 0) {
printf("# Mismatch in %u Z%d\n", vl, i);
errors++;
}
}
for (i = 0; i < __SVE_NUM_PREGS; i++) {
if (memcmp(write_buf + SVE_PT_SVE_PREG_OFFSET(vq, i),
read_buf + SVE_PT_SVE_PREG_OFFSET(vq, i),
SVE_PT_SVE_PREG_SIZE(vq)) != 0) {
printf("# Mismatch in %u P%d\n", vl, i);
errors++;
}
}
check_u32(vl, "FPSR", write_buf + SVE_PT_SVE_FPSR_OFFSET(vq),
read_buf + SVE_PT_SVE_FPSR_OFFSET(vq), &errors);
check_u32(vl, "FPCR", write_buf + SVE_PT_SVE_FPCR_OFFSET(vq),
read_buf + SVE_PT_SVE_FPCR_OFFSET(vq), &errors);
ksft_test_result(errors == 0, "Set and get SVE data for VL %u\n", vl);
out_read:
free(read_buf);
out:
free(write_buf);
}
/* Validate attempting to set SVE data and read SVE data */
static void ptrace_set_sve_get_fpsimd_data(pid_t child, unsigned int vl)
{
unsigned int vq;
unsigned int i;
void *write_buf;
struct user_sve_header *write_sve;
unsigned int vq = sve_vq_from_vl(vl);
struct user_fpsimd_state fpsimd_state;
int ret, i;
size_t data_size;
int errors = 0;
if (__BYTE_ORDER == __BIG_ENDIAN) {
ksft_test_result_skip("Big endian not supported\n");
return;
}
if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE)
ksft_exit_fail_msg("Dumping non-SVE register\n");
data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
write_buf = malloc(data_size);
if (!write_buf) {
ksft_test_result_fail("Error allocating %d byte buffer for VL %u\n",
data_size, vl);
return;
}
write_sve = write_buf;
/* Set up some data and write it out */
memset(write_sve, 0, data_size);
write_sve->size = data_size;
write_sve->vl = vl;
write_sve->flags = SVE_PT_REGS_SVE;
for (i = 0; i < __SVE_NUM_ZREGS; i++)
fill_buf(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
SVE_PT_SVE_ZREG_SIZE(vq));
if (vlmax > sve->vl)
vlmax = sve->vl;
fill_buf(write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), SVE_PT_SVE_FPSR_SIZE);
fill_buf(write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), SVE_PT_SVE_FPCR_SIZE);
ret = set_sve(child, write_sve);
if (ret != 0) {
ksft_test_result_fail("Failed to set VL %u data\n", vl);
goto out;
}
vq = sve_vq_from_vl(sve->vl);
for (i = 0; i < num; ++i) {
printf("# z%u:", i);
dump((const char *)sve + SVE_PT_SVE_ZREG_OFFSET(vq, i),
vlmax);
printf("%s\n", vlmax == sve->vl ? "" : " ...");
/* Read the data back */
if (get_fpsimd(child, &fpsimd_state)) {
ksft_test_result_fail("Failed to read VL %u FPSIMD data\n",
vl);
goto out;
}
for (i = 0; i < __SVE_NUM_ZREGS; i++) {
__uint128_t tmp = 0;
/*
* Z regs are stored endianness invariant, this won't
* work for big endian
*/
memcpy(&tmp, write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
sizeof(tmp));
if (tmp != fpsimd_state.vregs[i]) {
printf("# Mismatch in FPSIMD for VL %u Z%d\n", vl, i);
errors++;
}
}
check_u32(vl, "FPSR", write_buf + SVE_PT_SVE_FPSR_OFFSET(vq),
&fpsimd_state.fpsr, &errors);
check_u32(vl, "FPCR", write_buf + SVE_PT_SVE_FPCR_OFFSET(vq),
&fpsimd_state.fpcr, &errors);
ksft_test_result(errors == 0, "Set and get FPSIMD data for VL %u\n",
vl);
out:
free(write_buf);
}
static int do_parent(pid_t child)
......@@ -139,13 +442,8 @@ static int do_parent(pid_t child)
pid_t pid;
int status;
siginfo_t si;
void *svebuf = NULL, *newsvebuf;
size_t svebufsz = 0, newsvebufsz;
struct user_sve_header *sve, *new_sve;
struct user_fpsimd_state *fpsimd;
unsigned int i, j;
unsigned char *p;
unsigned int vq;
unsigned int vq, vl;
bool vl_supported;
/* Attach to the child */
while (1) {
......@@ -167,8 +465,6 @@ static int do_parent(pid_t child)
if (WIFEXITED(status) || WIFSIGNALED(status))
ksft_exit_fail_msg("Child died unexpectedly\n");
ksft_test_result(WIFSTOPPED(status), "WIFSTOPPED(%d)\n",
status);
if (!WIFSTOPPED(status))
goto error;
......@@ -203,98 +499,27 @@ static int do_parent(pid_t child)
}
}
sve = get_sve(pid, &svebuf, &svebufsz);
if (!sve) {
int e = errno;
/* FPSIMD via SVE regset */
ptrace_sve_fpsimd(child);
ksft_test_result_fail("get_sve: %s\n", strerror(errno));
if (e == ESRCH)
goto disappeared;
/* prctl() flags */
ptrace_set_get_inherit(child);
goto error;
} else {
ksft_test_result_pass("get_sve\n");
}
/* Step through every possible VQ */
for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) {
vl = sve_vl_from_vq(vq);
ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD,
"FPSIMD registers\n");
if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD)
goto error;
/* First, try to set this vector length */
ptrace_set_get_vl(child, vl, &vl_supported);
fpsimd = (struct user_fpsimd_state *)((char *)sve +
SVE_PT_FPSIMD_OFFSET);
for (i = 0; i < 32; ++i) {
p = (unsigned char *)&fpsimd->vregs[i];
for (j = 0; j < sizeof fpsimd->vregs[i]; ++j)
p[j] = j;
}
if (set_sve(pid, sve)) {
int e = errno;
ksft_test_result_fail("set_sve(FPSIMD): %s\n",
strerror(errno));
if (e == ESRCH)
goto disappeared;
goto error;
}
vq = sve_vq_from_vl(sve->vl);
newsvebufsz = SVE_PT_SVE_ZREG_OFFSET(vq, 1);
new_sve = newsvebuf = malloc(newsvebufsz);
if (!new_sve) {
errno = ENOMEM;
perror(NULL);
goto error;
}
*new_sve = *sve;
new_sve->flags &= ~SVE_PT_REGS_MASK;
new_sve->flags |= SVE_PT_REGS_SVE;
memset((char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 0),
0, SVE_PT_SVE_ZREG_SIZE(vq));
new_sve->size = SVE_PT_SVE_ZREG_OFFSET(vq, 1);
if (set_sve(pid, new_sve)) {
int e = errno;
ksft_test_result_fail("set_sve(ZREG): %s\n", strerror(errno));
if (e == ESRCH)
goto disappeared;
goto error;
/* If the VL is supported validate data set/get */
if (vl_supported) {
ptrace_set_sve_get_sve_data(child, vl);
ptrace_set_sve_get_fpsimd_data(child, vl);
} else {
ksft_test_result_skip("set SVE get SVE for VL %d\n", vl);
ksft_test_result_skip("set SVE get FPSIMD for VL %d\n", vl);
}
new_sve = get_sve(pid, &newsvebuf, &newsvebufsz);
if (!new_sve) {
int e = errno;
ksft_test_result_fail("get_sve(ZREG): %s\n", strerror(errno));
if (e == ESRCH)
goto disappeared;
goto error;
}
ksft_test_result((new_sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE,
"SVE registers\n");
if ((new_sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE)
goto error;
dump_sve_regs(new_sve, 3, sizeof fpsimd->vregs[0]);
p = (unsigned char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 1);
for (i = 0; i < sizeof fpsimd->vregs[0]; ++i) {
unsigned char expected = i;
if (__BYTE_ORDER == __BIG_ENDIAN)
expected = sizeof fpsimd->vregs[0] - 1 - expected;
ksft_test_result(p[i] == expected, "p[%d] == expected\n", i);
if (p[i] != expected)
goto error;
}
ret = EXIT_SUCCESS;
......@@ -309,20 +534,16 @@ static int do_parent(pid_t child)
int main(void)
{
int ret = EXIT_SUCCESS;
__uint128_t v[NR_VREGS];
pid_t child;
srandom(getpid());
ksft_print_header();
ksft_set_plan(20);
ksft_set_plan(EXPECTED_TESTS);
if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
ksft_exit_skip("SVE not available\n");
sve_store_patterns(v);
if (!check_vregs(v))
ksft_exit_fail_msg("Initial check_vregs() failed\n");
child = fork();
if (!child)
return do_child();
......
......@@ -46,130 +46,6 @@ define_accessor getz, NZR, _sve_str_v
define_accessor setp, NPR, _sve_ldr_p
define_accessor getp, NPR, _sve_str_p
// Print a single character x0 to stdout
// Clobbers x0-x2,x8
function putc
str x0, [sp, #-16]!
mov x0, #1 // STDOUT_FILENO
mov x1, sp
mov x2, #1
mov x8, #__NR_write
svc #0
add sp, sp, #16
ret
endfunction
// Print a NUL-terminated string starting at address x0 to stdout
// Clobbers x0-x3,x8
function puts
mov x1, x0
mov x2, #0
0: ldrb w3, [x0], #1
cbz w3, 1f
add x2, x2, #1
b 0b
1: mov w0, #1 // STDOUT_FILENO
mov x8, #__NR_write
svc #0
ret
endfunction
// Utility macro to print a literal string
// Clobbers x0-x4,x8
.macro puts string
.pushsection .rodata.str1.1, "aMS", 1
.L__puts_literal\@: .string "\string"
.popsection
ldr x0, =.L__puts_literal\@
bl puts
.endm
// Print an unsigned decimal number x0 to stdout
// Clobbers x0-x4,x8
function putdec
mov x1, sp
str x30, [sp, #-32]! // Result can't be > 20 digits
mov x2, #0
strb w2, [x1, #-1]! // Write the NUL terminator
mov x2, #10
0: udiv x3, x0, x2 // div-mod loop to generate the digits
msub x0, x3, x2, x0
add w0, w0, #'0'
strb w0, [x1, #-1]!
mov x0, x3
cbnz x3, 0b
ldrb w0, [x1]
cbnz w0, 1f
mov w0, #'0' // Print "0" for 0, not ""
strb w0, [x1, #-1]!
1: mov x0, x1
bl puts
ldr x30, [sp], #32
ret
endfunction
// Print an unsigned decimal number x0 to stdout, followed by a newline
// Clobbers x0-x5,x8
function putdecn
mov x5, x30
bl putdec
mov x0, #'\n'
bl putc
ret x5
endfunction
// Clobbers x0-x3,x8
function puthexb
str x30, [sp, #-0x10]!
mov w3, w0
lsr w0, w0, #4
bl puthexnibble
mov w0, w3
ldr x30, [sp], #0x10
// fall through to puthexnibble
endfunction
// Clobbers x0-x2,x8
function puthexnibble
and w0, w0, #0xf
cmp w0, #10
blo 1f
add w0, w0, #'a' - ('9' + 1)
1: add w0, w0, #'0'
b putc
endfunction
// x0=data in, x1=size in, clobbers x0-x5,x8
function dumphex
str x30, [sp, #-0x10]!
mov x4, x0
mov x5, x1
0: subs x5, x5, #1
b.lo 1f
ldrb w0, [x4], #1
bl puthexb
b 0b
1: ldr x30, [sp], #0x10
ret
endfunction
// Declare some storate space to shadow the SVE register contents:
.pushsection .text
.data
......@@ -184,18 +60,6 @@ scratch:
.space MAXVL_B
.popsection
// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
// Clobbers x0-x3
function memcpy
cmp x2, #0
b.eq 1f
0: ldrb w3, [x1], #1
strb w3, [x0], #1
subs x2, x2, #1
b.ne 0b
1: ret
endfunction
// Generate a test pattern for storage in SVE registers
// x0: pid (16 bits)
// x1: register number (6 bits)
......@@ -316,33 +180,6 @@ function setup_ffr
ret x4
endfunction
// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
// Clobbers x1, x2.
function memfill_ae
mov w2, #0xae
b memfill
endfunction
// Fill x1 bytes starting at x0 with 0.
// Clobbers x1, x2.
function memclr
mov w2, #0
endfunction
// fall through to memfill
// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
// Clobbers x1
function memfill
cmp x1, #0
b.eq 1f
0: strb w2, [x0], #1
subs x1, x1, #1
b.ne 0b
1: ret
endfunction
// Trivial memory compare: compare x2 bytes starting at address x0 with
// bytes starting at address x1.
// Returns only if all bytes match; otherwise, the program is aborted.
......
......@@ -109,7 +109,7 @@ static int get_child_rdvl(struct vec_data *data)
/* exec() a new binary which puts the VL on stdout */
ret = execl(data->rdvl_binary, data->rdvl_binary, NULL);
fprintf(stderr, "execl(%s) failed: %d\n",
fprintf(stderr, "execl(%s) failed: %d (%s)\n",
data->rdvl_binary, errno, strerror(errno));
exit(EXIT_FAILURE);
......@@ -180,7 +180,6 @@ static int file_read_integer(const char *name, int *val)
static int file_write_integer(const char *name, int val)
{
FILE *f;
int ret;
f = fopen(name, "w");
if (!f) {
......@@ -192,11 +191,6 @@ static int file_write_integer(const char *name, int val)
fprintf(f, "%d", val);
fclose(f);
if (ret < 0) {
ksft_test_result_fail("Error writing %d to %s\n",
val, name);
return -1;
}
return 0;
}
......@@ -335,12 +329,9 @@ static void prctl_set_same(struct vec_data *data)
return;
}
if (cur_vl != data->rdvl())
ksft_test_result_pass("%s current VL is %d\n",
data->name, ret);
else
ksft_test_result_fail("%s prctl() VL %d but RDVL is %d\n",
data->name, ret, data->rdvl());
ksft_test_result(cur_vl == data->rdvl(),
"%s set VL %d and have VL %d\n",
data->name, cur_vl, data->rdvl());
}
/* Can we set a new VL for this process? */
......@@ -549,6 +540,82 @@ static void prctl_set_onexec(struct vec_data *data)
file_write_integer(data->default_vl_file, data->default_vl);
}
/* For each VQ verify that setting via prctl() does the right thing */
static void prctl_set_all_vqs(struct vec_data *data)
{
int ret, vq, vl, new_vl;
int errors = 0;
if (!data->min_vl || !data->max_vl) {
ksft_test_result_skip("%s Failed to enumerate VLs, not testing VL setting\n",
data->name);
return;
}
for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) {
vl = sve_vl_from_vq(vq);
/* Attempt to set the VL */
ret = prctl(data->prctl_set, vl);
if (ret < 0) {
errors++;
ksft_print_msg("%s prctl set failed for %d: %d (%s)\n",
data->name, vl,
errno, strerror(errno));
continue;
}
new_vl = ret & PR_SVE_VL_LEN_MASK;
/* Check that we actually have the reported new VL */
if (data->rdvl() != new_vl) {
ksft_print_msg("Set %s VL %d but RDVL reports %d\n",
data->name, new_vl, data->rdvl());
errors++;
}
/* Was that the VL we asked for? */
if (new_vl == vl)
continue;
/* Should round up to the minimum VL if below it */
if (vl < data->min_vl) {
if (new_vl != data->min_vl) {
ksft_print_msg("%s VL %d returned %d not minimum %d\n",
data->name, vl, new_vl,
data->min_vl);
errors++;
}
continue;
}
/* Should round down to maximum VL if above it */
if (vl > data->max_vl) {
if (new_vl != data->max_vl) {
ksft_print_msg("%s VL %d returned %d not maximum %d\n",
data->name, vl, new_vl,
data->max_vl);
errors++;
}
continue;
}
/* Otherwise we should've rounded down */
if (!(new_vl < vl)) {
ksft_print_msg("%s VL %d returned %d, did not round down\n",
data->name, vl, new_vl);
errors++;
continue;
}
}
ksft_test_result(errors == 0, "%s prctl() set all VLs, %d errors\n",
data->name, errors);
}
typedef void (*test_type)(struct vec_data *);
static const test_type tests[] = {
......@@ -561,10 +628,12 @@ static const test_type tests[] = {
proc_write_max,
prctl_get,
prctl_set_same,
prctl_set,
prctl_set_no_child,
prctl_set_for_child,
prctl_set_onexec,
prctl_set_all_vqs,
};
int main(void)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment