Commit 3b3b6810 authored by Dongjiu Geng's avatar Dongjiu Geng Committed by Catalin Marinas

arm64: v8.4: Support for new floating point multiplication instructions

ARM v8.4 extensions add new neon instructions for performing a
multiplication of each FP16 element of one vector with the corresponding
FP16 element of a second vector, and to add or subtract this without an
intermediate rounding to the corresponding FP32 element in a third vector.

This patch detects this feature and let the userspace know about it via a
HWCAP bit and MRS emulation.

Cc: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: default avatarSuzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: default avatarDongjiu Geng <gengdongjiu@huawei.com>
Reviewed-by: default avatarDave Martin <Dave.Martin@arm.com>
Signed-off-by: default avatarCatalin Marinas <catalin.marinas@arm.com>
parent a8ffaaa0
...@@ -110,7 +110,9 @@ infrastructure: ...@@ -110,7 +110,9 @@ infrastructure:
x--------------------------------------------------x x--------------------------------------------------x
| Name | bits | visible | | Name | bits | visible |
|--------------------------------------------------| |--------------------------------------------------|
| RES0 | [63-48] | n | | RES0 | [63-52] | n |
|--------------------------------------------------|
| FHM | [51-48] | y |
|--------------------------------------------------| |--------------------------------------------------|
| DP | [47-44] | y | | DP | [47-44] | y |
|--------------------------------------------------| |--------------------------------------------------|
......
...@@ -158,3 +158,7 @@ HWCAP_SHA512 ...@@ -158,3 +158,7 @@ HWCAP_SHA512
HWCAP_SVE HWCAP_SVE
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001. Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001.
HWCAP_ASIMDFHM
Functionality implied by ID_AA64ISAR0_EL1.FHM == 0b0001.
...@@ -419,6 +419,7 @@ ...@@ -419,6 +419,7 @@
#define SCTLR_EL1_CP15BEN (1 << 5) #define SCTLR_EL1_CP15BEN (1 << 5)
/* id_aa64isar0 */ /* id_aa64isar0 */
#define ID_AA64ISAR0_FHM_SHIFT 48
#define ID_AA64ISAR0_DP_SHIFT 44 #define ID_AA64ISAR0_DP_SHIFT 44
#define ID_AA64ISAR0_SM4_SHIFT 40 #define ID_AA64ISAR0_SM4_SHIFT 40
#define ID_AA64ISAR0_SM3_SHIFT 36 #define ID_AA64ISAR0_SM3_SHIFT 36
......
...@@ -43,5 +43,6 @@ ...@@ -43,5 +43,6 @@
#define HWCAP_ASIMDDP (1 << 20) #define HWCAP_ASIMDDP (1 << 20)
#define HWCAP_SHA512 (1 << 21) #define HWCAP_SHA512 (1 << 21)
#define HWCAP_SVE (1 << 22) #define HWCAP_SVE (1 << 22)
#define HWCAP_ASIMDFHM (1 << 23)
#endif /* _UAPI__ASM_HWCAP_H */ #endif /* _UAPI__ASM_HWCAP_H */
...@@ -123,6 +123,7 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) ...@@ -123,6 +123,7 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)
* sync with the documentation of the CPU feature register ABI. * sync with the documentation of the CPU feature register ABI.
*/ */
static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM4_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM4_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM3_SHIFT, 4, 0),
...@@ -1032,6 +1033,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { ...@@ -1032,6 +1033,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM3), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM3),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDFHM),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
......
...@@ -76,6 +76,7 @@ static const char *const hwcap_str[] = { ...@@ -76,6 +76,7 @@ static const char *const hwcap_str[] = {
"asimddp", "asimddp",
"sha512", "sha512",
"sve", "sve",
"asimdfhm",
NULL NULL
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment