Commit 62a31d6e authored by Evan Green's avatar Evan Green Committed by Palmer Dabbelt

RISC-V: hwprobe: Support probing of misaligned access performance

This allows userspace to select various routines to use based on the
performance of misaligned access on the target hardware.

Rather than adding DT bindings, this change taps into the alternatives
mechanism used to probe CPU errata. Add a new function pointer alongside
the vendor-specific errata_patch_func() that probes for desirable errata
(otherwise known as "features"). Unlike the errata_patch_func(), this
function is called on each CPU as it comes up, so it can save
feature information per-CPU.

The T-head C906 has fast unaligned access, both as defined by GCC [1],
and in performing a basic benchmark, which determined that byte copies
are >50% slower than a misaligned word copy of the same data size (source
for this test at [2]):

bytecopy size f000 count 50000 offset 0 took 31664899 us
wordcopy size f000 count 50000 offset 0 took 5180919 us
wordcopy size f000 count 50000 offset 1 took 13416949 us

[1] https://github.com/gcc-mirror/gcc/blob/master/gcc/config/riscv/riscv.cc#L353
[2] https://pastebin.com/EPXvDHSWCo-developed-by: default avatarPalmer Dabbelt <palmer@rivosinc.com>
Signed-off-by: default avatarEvan Green <evan@rivosinc.com>
Reviewed-by: default avatarHeiko Stuebner <heiko.stuebner@vrull.eu>
Tested-by: default avatarHeiko Stuebner <heiko.stuebner@vrull.eu>
Reviewed-by: default avatarConor Dooley <conor.dooley@microchip.com>
Reviewed-by: default avatarPaul Walmsley <paul.walmsley@sifive.com>
Link: https://lore.kernel.org/r/20230407231103.2622178-5-evan@rivosinc.comSigned-off-by: default avatarPalmer Dabbelt <palmer@rivosinc.com>
parent 00e76e2c
...@@ -63,3 +63,24 @@ The following keys are defined: ...@@ -63,3 +63,24 @@ The following keys are defined:
* :c:macro:`RISCV_HWPROBE_IMA_C`: The C extension is supported, as defined * :c:macro:`RISCV_HWPROBE_IMA_C`: The C extension is supported, as defined
by version 2.2 of the RISC-V ISA manual. by version 2.2 of the RISC-V ISA manual.
* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance
information about the selected set of processors.
* :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned
accesses is unknown.
* :c:macro:`RISCV_HWPROBE_MISALIGNED_EMULATED`: Misaligned accesses are
emulated via software, either in or below the kernel. These accesses are
always extremely slow.
* :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are supported
in hardware, but are slower than the cooresponding aligned accesses
sequences.
* :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are supported
in hardware and are faster than the cooresponding aligned accesses
sequences.
* :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are
not supported at all and will generate a misaligned address fault.
...@@ -11,7 +11,9 @@ ...@@ -11,7 +11,9 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <asm/alternative.h> #include <asm/alternative.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/errata_list.h> #include <asm/errata_list.h>
#include <asm/hwprobe.h>
#include <asm/patch.h> #include <asm/patch.h>
#include <asm/vendorid_list.h> #include <asm/vendorid_list.h>
...@@ -115,3 +117,11 @@ void __init_or_module thead_errata_patch_func(struct alt_entry *begin, struct al ...@@ -115,3 +117,11 @@ void __init_or_module thead_errata_patch_func(struct alt_entry *begin, struct al
if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
local_flush_icache_all(); local_flush_icache_all();
} }
void __init_or_module thead_feature_probe_func(unsigned int cpu,
unsigned long archid,
unsigned long impid)
{
if ((archid == 0) && (impid == 0))
per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_FAST;
}
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#define ALT_OLD_PTR(a) __ALT_PTR(a, old_offset) #define ALT_OLD_PTR(a) __ALT_PTR(a, old_offset)
#define ALT_ALT_PTR(a) __ALT_PTR(a, alt_offset) #define ALT_ALT_PTR(a) __ALT_PTR(a, alt_offset)
void __init probe_vendor_features(unsigned int cpu);
void __init apply_boot_alternatives(void); void __init apply_boot_alternatives(void);
void __init apply_early_boot_alternatives(void); void __init apply_early_boot_alternatives(void);
void apply_module_alternatives(void *start, size_t length); void apply_module_alternatives(void *start, size_t length);
...@@ -55,11 +56,15 @@ void thead_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, ...@@ -55,11 +56,15 @@ void thead_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
unsigned long archid, unsigned long impid, unsigned long archid, unsigned long impid,
unsigned int stage); unsigned int stage);
void thead_feature_probe_func(unsigned int cpu, unsigned long archid,
unsigned long impid);
void riscv_cpufeature_patch_func(struct alt_entry *begin, struct alt_entry *end, void riscv_cpufeature_patch_func(struct alt_entry *begin, struct alt_entry *end,
unsigned int stage); unsigned int stage);
#else /* CONFIG_RISCV_ALTERNATIVE */ #else /* CONFIG_RISCV_ALTERNATIVE */
static inline void probe_vendor_features(unsigned int cpu) { }
static inline void apply_boot_alternatives(void) { } static inline void apply_boot_alternatives(void) { }
static inline void apply_early_boot_alternatives(void) { } static inline void apply_early_boot_alternatives(void) { }
static inline void apply_module_alternatives(void *start, size_t length) { } static inline void apply_module_alternatives(void *start, size_t length) { }
......
...@@ -18,4 +18,6 @@ struct riscv_cpuinfo { ...@@ -18,4 +18,6 @@ struct riscv_cpuinfo {
DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
DECLARE_PER_CPU(long, misaligned_access_speed);
#endif #endif
...@@ -8,6 +8,6 @@ ...@@ -8,6 +8,6 @@
#include <uapi/asm/hwprobe.h> #include <uapi/asm/hwprobe.h>
#define RISCV_HWPROBE_MAX_KEY 4 #define RISCV_HWPROBE_MAX_KEY 5
#endif #endif
...@@ -25,6 +25,13 @@ struct riscv_hwprobe { ...@@ -25,6 +25,13 @@ struct riscv_hwprobe {
#define RISCV_HWPROBE_KEY_IMA_EXT_0 4 #define RISCV_HWPROBE_KEY_IMA_EXT_0 4
#define RISCV_HWPROBE_IMA_FD (1 << 0) #define RISCV_HWPROBE_IMA_FD (1 << 0)
#define RISCV_HWPROBE_IMA_C (1 << 1) #define RISCV_HWPROBE_IMA_C (1 << 1)
#define RISCV_HWPROBE_KEY_CPUPERF_0 5
#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0)
#define RISCV_HWPROBE_MISALIGNED_SLOW (2 << 0)
#define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0)
#define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
#define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
#endif #endif
...@@ -27,6 +27,8 @@ struct cpu_manufacturer_info_t { ...@@ -27,6 +27,8 @@ struct cpu_manufacturer_info_t {
void (*patch_func)(struct alt_entry *begin, struct alt_entry *end, void (*patch_func)(struct alt_entry *begin, struct alt_entry *end,
unsigned long archid, unsigned long impid, unsigned long archid, unsigned long impid,
unsigned int stage); unsigned int stage);
void (*feature_probe_func)(unsigned int cpu, unsigned long archid,
unsigned long impid);
}; };
static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info) static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info)
...@@ -41,6 +43,7 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf ...@@ -41,6 +43,7 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf
cpu_mfr_info->imp_id = sbi_get_mimpid(); cpu_mfr_info->imp_id = sbi_get_mimpid();
#endif #endif
cpu_mfr_info->feature_probe_func = NULL;
switch (cpu_mfr_info->vendor_id) { switch (cpu_mfr_info->vendor_id) {
#ifdef CONFIG_ERRATA_SIFIVE #ifdef CONFIG_ERRATA_SIFIVE
case SIFIVE_VENDOR_ID: case SIFIVE_VENDOR_ID:
...@@ -50,6 +53,7 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf ...@@ -50,6 +53,7 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf
#ifdef CONFIG_ERRATA_THEAD #ifdef CONFIG_ERRATA_THEAD
case THEAD_VENDOR_ID: case THEAD_VENDOR_ID:
cpu_mfr_info->patch_func = thead_errata_patch_func; cpu_mfr_info->patch_func = thead_errata_patch_func;
cpu_mfr_info->feature_probe_func = thead_feature_probe_func;
break; break;
#endif #endif
default: default:
...@@ -139,6 +143,20 @@ void riscv_alternative_fix_offsets(void *alt_ptr, unsigned int len, ...@@ -139,6 +143,20 @@ void riscv_alternative_fix_offsets(void *alt_ptr, unsigned int len,
} }
} }
/* Called on each CPU as it starts */
void __init_or_module probe_vendor_features(unsigned int cpu)
{
struct cpu_manufacturer_info_t cpu_mfr_info;
riscv_fill_cpu_mfr_info(&cpu_mfr_info);
if (!cpu_mfr_info.feature_probe_func)
return;
cpu_mfr_info.feature_probe_func(cpu,
cpu_mfr_info.arch_id,
cpu_mfr_info.imp_id);
}
/* /*
* This is called very early in the boot process (directly after we run * This is called very early in the boot process (directly after we run
* a feature detect on the boot CPU). No need to worry about other CPUs * a feature detect on the boot CPU). No need to worry about other CPUs
...@@ -193,6 +211,7 @@ void __init apply_boot_alternatives(void) ...@@ -193,6 +211,7 @@ void __init apply_boot_alternatives(void)
/* If called on non-boot cpu things could go wrong */ /* If called on non-boot cpu things could go wrong */
WARN_ON(smp_processor_id() != 0); WARN_ON(smp_processor_id() != 0);
probe_vendor_features(0);
_apply_alternatives((struct alt_entry *)__alt_start, _apply_alternatives((struct alt_entry *)__alt_start,
(struct alt_entry *)__alt_end, (struct alt_entry *)__alt_end,
RISCV_ALTERNATIVES_BOOT); RISCV_ALTERNATIVES_BOOT);
......
...@@ -30,6 +30,9 @@ unsigned long elf_hwcap __read_mostly; ...@@ -30,6 +30,9 @@ unsigned long elf_hwcap __read_mostly;
/* Host ISA bitmap */ /* Host ISA bitmap */
static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
/* Performance information */
DEFINE_PER_CPU(long, misaligned_access_speed);
/** /**
* riscv_isa_extension_base() - Get base extension word * riscv_isa_extension_base() - Get base extension word
* *
......
...@@ -168,6 +168,7 @@ asmlinkage __visible void smp_callin(void) ...@@ -168,6 +168,7 @@ asmlinkage __visible void smp_callin(void)
notify_cpu_starting(curr_cpuid); notify_cpu_starting(curr_cpuid);
numa_add_cpu(curr_cpuid); numa_add_cpu(curr_cpuid);
set_cpu_online(curr_cpuid, 1); set_cpu_online(curr_cpuid, 1);
probe_vendor_features(curr_cpuid);
/* /*
* Remote TLB flushes are ignored while the CPU is offline, so emit * Remote TLB flushes are ignored while the CPU is offline, so emit
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <linux/syscalls.h> #include <linux/syscalls.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/hwprobe.h> #include <asm/hwprobe.h>
#include <asm/sbi.h> #include <asm/sbi.h>
#include <asm/switch_to.h> #include <asm/switch_to.h>
...@@ -117,6 +118,29 @@ static void hwprobe_arch_id(struct riscv_hwprobe *pair, ...@@ -117,6 +118,29 @@ static void hwprobe_arch_id(struct riscv_hwprobe *pair,
pair->value = id; pair->value = id;
} }
static u64 hwprobe_misaligned(const struct cpumask *cpus)
{
int cpu;
u64 perf = -1ULL;
for_each_cpu(cpu, cpus) {
int this_perf = per_cpu(misaligned_access_speed, cpu);
if (perf == -1ULL)
perf = this_perf;
if (perf != this_perf) {
perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
break;
}
}
if (perf == -1ULL)
return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
return perf;
}
static void hwprobe_one_pair(struct riscv_hwprobe *pair, static void hwprobe_one_pair(struct riscv_hwprobe *pair,
const struct cpumask *cpus) const struct cpumask *cpus)
{ {
...@@ -146,6 +170,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, ...@@ -146,6 +170,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
break; break;
case RISCV_HWPROBE_KEY_CPUPERF_0:
pair->value = hwprobe_misaligned(cpus);
break;
/* /*
* For forward compatibility, unknown keys don't fail the whole * For forward compatibility, unknown keys don't fail the whole
* call, but get their element key set to -1 and value set to 0 * call, but get their element key set to -1 and value set to 0
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment