Commit f413aae9 authored by Charlie Jenkins's avatar Charlie Jenkins Committed by Palmer Dabbelt

riscv: Set unaligned access speed at compile time

Introduce Kconfig options to set the kernel unaligned access support.
These options provide a non-portable alternative to the runtime
unaligned access probe.

To support this, the unaligned access probing code is moved into it's
own file and gated behind a new RISCV_PROBE_UNALIGNED_ACCESS_SUPPORT
option.
Signed-off-by: default avatarCharlie Jenkins <charlie@rivosinc.com>
Reviewed-by: default avatarConor Dooley <conor.dooley@microchip.com>
Tested-by: default avatarSamuel Holland <samuel.holland@sifive.com>
Link: https://lore.kernel.org/r/20240308-disable_misaligned_probe_config-v9-4-a388770ba0ce@rivosinc.comSigned-off-by: default avatarPalmer Dabbelt <palmer@rivosinc.com>
parent 6e5ce7f2
...@@ -688,27 +688,61 @@ config THREAD_SIZE_ORDER ...@@ -688,27 +688,61 @@ config THREAD_SIZE_ORDER
affects irq stack size, which is equal to thread stack size. affects irq stack size, which is equal to thread stack size.
config RISCV_MISALIGNED config RISCV_MISALIGNED
bool "Support misaligned load/store traps for kernel and userspace" bool
select SYSCTL_ARCH_UNALIGN_ALLOW select SYSCTL_ARCH_UNALIGN_ALLOW
default y
help help
Say Y here if you want the kernel to embed support for misaligned Embed support for emulating misaligned loads and stores.
load/store for both kernel and userspace. When disable, misaligned
accesses will generate SIGBUS in userspace and panic in kernel. choice
prompt "Unaligned Accesses Support"
default RISCV_PROBE_UNALIGNED_ACCESS
help
This determines the level of support for unaligned accesses. This
information is used by the kernel to perform optimizations. It is also
exposed to user space via the hwprobe syscall. The hardware will be
probed at boot by default.
config RISCV_PROBE_UNALIGNED_ACCESS
bool "Probe for hardware unaligned access support"
select RISCV_MISALIGNED
help
During boot, the kernel will run a series of tests to determine the
speed of unaligned accesses. This probing will dynamically determine
the speed of unaligned accesses on the underlying system. If unaligned
memory accesses trap into the kernel as they are not supported by the
system, the kernel will emulate the unaligned accesses to preserve the
UABI.
config RISCV_EMULATED_UNALIGNED_ACCESS
bool "Emulate unaligned access where system support is missing"
select RISCV_MISALIGNED
help
If unaligned memory accesses trap into the kernel as they are not
supported by the system, the kernel will emulate the unaligned
accesses to preserve the UABI. When the underlying system does support
unaligned accesses, the unaligned accesses are assumed to be slow.
config RISCV_SLOW_UNALIGNED_ACCESS
bool "Assume the system supports slow unaligned memory accesses"
depends on NONPORTABLE
help
Assume that the system supports slow unaligned memory accesses. The
kernel and userspace programs may not be able to run at all on systems
that do not support unaligned memory accesses.
config RISCV_EFFICIENT_UNALIGNED_ACCESS config RISCV_EFFICIENT_UNALIGNED_ACCESS
bool "Assume the CPU supports fast unaligned memory accesses" bool "Assume the system supports fast unaligned memory accesses"
depends on NONPORTABLE depends on NONPORTABLE
select DCACHE_WORD_ACCESS if MMU select DCACHE_WORD_ACCESS if MMU
select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_EFFICIENT_UNALIGNED_ACCESS
help help
Say Y here if you want the kernel to assume that the CPU supports Assume that the system supports fast unaligned memory accesses. When
efficient unaligned memory accesses. When enabled, this option enabled, this option improves the performance of the kernel on such
improves the performance of the kernel on such CPUs. However, the systems. However, the kernel and userspace programs will run much more
kernel will run much more slowly, or will not be able to run at all, slowly, or will not be able to run at all, on systems that do not
on CPUs that do not support efficient unaligned memory accesses. support efficient unaligned memory accesses.
If unsure what to do here, say N. endchoice
endmenu # "Platform type" endmenu # "Platform type"
......
...@@ -28,37 +28,39 @@ struct riscv_isainfo { ...@@ -28,37 +28,39 @@ struct riscv_isainfo {
DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
DECLARE_PER_CPU(long, misaligned_access_speed);
/* Per-cpu ISA extensions. */ /* Per-cpu ISA extensions. */
extern struct riscv_isainfo hart_isa[NR_CPUS]; extern struct riscv_isainfo hart_isa[NR_CPUS];
void riscv_user_isa_enable(void); void riscv_user_isa_enable(void);
#ifdef CONFIG_RISCV_MISALIGNED #if defined(CONFIG_RISCV_MISALIGNED)
bool unaligned_ctl_available(void);
bool check_unaligned_access_emulated_all_cpus(void); bool check_unaligned_access_emulated_all_cpus(void);
void unaligned_emulation_finish(void); void unaligned_emulation_finish(void);
bool unaligned_ctl_available(void);
DECLARE_PER_CPU(long, misaligned_access_speed);
#else #else
static inline bool unaligned_ctl_available(void) static inline bool unaligned_ctl_available(void)
{ {
return false; return false;
} }
static inline bool check_unaligned_access_emulated(int cpu)
{
return false;
}
static inline void unaligned_emulation_finish(void) {}
#endif #endif
#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS)
DECLARE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key); DECLARE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key);
static __always_inline bool has_fast_unaligned_accesses(void) static __always_inline bool has_fast_unaligned_accesses(void)
{ {
return static_branch_likely(&fast_unaligned_access_speed_key); return static_branch_likely(&fast_unaligned_access_speed_key);
} }
#else
static __always_inline bool has_fast_unaligned_accesses(void)
{
if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
return true;
else
return false;
}
#endif
unsigned long riscv_get_elf_hwcap(void); unsigned long riscv_get_elf_hwcap(void);
......
...@@ -38,7 +38,6 @@ extra-y += vmlinux.lds ...@@ -38,7 +38,6 @@ extra-y += vmlinux.lds
obj-y += head.o obj-y += head.o
obj-y += soc.o obj-y += soc.o
obj-$(CONFIG_RISCV_ALTERNATIVE) += alternative.o obj-$(CONFIG_RISCV_ALTERNATIVE) += alternative.o
obj-y += copy-unaligned.o
obj-y += cpu.o obj-y += cpu.o
obj-y += cpufeature.o obj-y += cpufeature.o
obj-y += entry.o obj-y += entry.o
...@@ -62,6 +61,9 @@ obj-y += tests/ ...@@ -62,6 +61,9 @@ obj-y += tests/
obj-$(CONFIG_MMU) += vdso.o vdso/ obj-$(CONFIG_MMU) += vdso.o vdso/
obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
obj-$(CONFIG_RISCV_MISALIGNED) += unaligned_access_speed.o
obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) += copy-unaligned.o
obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_FPU) += fpu.o
obj-$(CONFIG_RISCV_ISA_V) += vector.o obj-$(CONFIG_RISCV_ISA_V) += vector.o
obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o
......
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/cpuhotplug.h> #include <linux/cpuhotplug.h>
#include <linux/ctype.h> #include <linux/ctype.h>
#include <linux/jump_label.h>
#include <linux/log2.h> #include <linux/log2.h>
#include <linux/memory.h> #include <linux/memory.h>
#include <linux/module.h> #include <linux/module.h>
...@@ -21,20 +20,12 @@ ...@@ -21,20 +20,12 @@
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/hwcap.h> #include <asm/hwcap.h>
#include <asm/hwprobe.h>
#include <asm/patch.h> #include <asm/patch.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/vector.h> #include <asm/vector.h>
#include "copy-unaligned.h"
#define NUM_ALPHA_EXTS ('z' - 'a' + 1) #define NUM_ALPHA_EXTS ('z' - 'a' + 1)
#define MISALIGNED_ACCESS_JIFFIES_LG2 1
#define MISALIGNED_BUFFER_SIZE 0x4000
#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
unsigned long elf_hwcap __read_mostly; unsigned long elf_hwcap __read_mostly;
/* Host ISA bitmap */ /* Host ISA bitmap */
...@@ -43,11 +34,6 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; ...@@ -43,11 +34,6 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
/* Per-cpu ISA extensions. */ /* Per-cpu ISA extensions. */
struct riscv_isainfo hart_isa[NR_CPUS]; struct riscv_isainfo hart_isa[NR_CPUS];
/* Performance information */
DEFINE_PER_CPU(long, misaligned_access_speed);
static cpumask_t fast_misaligned_access;
/** /**
* riscv_isa_extension_base() - Get base extension word * riscv_isa_extension_base() - Get base extension word
* *
...@@ -706,264 +692,6 @@ unsigned long riscv_get_elf_hwcap(void) ...@@ -706,264 +692,6 @@ unsigned long riscv_get_elf_hwcap(void)
return hwcap; return hwcap;
} }
static int check_unaligned_access(void *param)
{
int cpu = smp_processor_id();
u64 start_cycles, end_cycles;
u64 word_cycles;
u64 byte_cycles;
int ratio;
unsigned long start_jiffies, now;
struct page *page = param;
void *dst;
void *src;
long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
if (IS_ENABLED(CONFIG_RISCV_MISALIGNED) &&
per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
return 0;
/* Make an unaligned destination buffer. */
dst = (void *)((unsigned long)page_address(page) | 0x1);
/* Unalign src as well, but differently (off by 1 + 2 = 3). */
src = dst + (MISALIGNED_BUFFER_SIZE / 2);
src += 2;
word_cycles = -1ULL;
/* Do a warmup. */
__riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
preempt_disable();
start_jiffies = jiffies;
while ((now = jiffies) == start_jiffies)
cpu_relax();
/*
* For a fixed amount of time, repeatedly try the function, and take
* the best time in cycles as the measurement.
*/
while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
start_cycles = get_cycles64();
/* Ensure the CSR read can't reorder WRT to the copy. */
mb();
__riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
/* Ensure the copy ends before the end time is snapped. */
mb();
end_cycles = get_cycles64();
if ((end_cycles - start_cycles) < word_cycles)
word_cycles = end_cycles - start_cycles;
}
byte_cycles = -1ULL;
__riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
start_jiffies = jiffies;
while ((now = jiffies) == start_jiffies)
cpu_relax();
while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
start_cycles = get_cycles64();
mb();
__riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
mb();
end_cycles = get_cycles64();
if ((end_cycles - start_cycles) < byte_cycles)
byte_cycles = end_cycles - start_cycles;
}
preempt_enable();
/* Don't divide by zero. */
if (!word_cycles || !byte_cycles) {
pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n",
cpu);
return 0;
}
if (word_cycles < byte_cycles)
speed = RISCV_HWPROBE_MISALIGNED_FAST;
ratio = div_u64((byte_cycles * 100), word_cycles);
pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n",
cpu,
ratio / 100,
ratio % 100,
(speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
per_cpu(misaligned_access_speed, cpu) = speed;
/*
* Set the value of fast_misaligned_access of a CPU. These operations
* are atomic to avoid race conditions.
*/
if (speed == RISCV_HWPROBE_MISALIGNED_FAST)
cpumask_set_cpu(cpu, &fast_misaligned_access);
else
cpumask_clear_cpu(cpu, &fast_misaligned_access);
return 0;
}
static void check_unaligned_access_nonboot_cpu(void *param)
{
unsigned int cpu = smp_processor_id();
struct page **pages = param;
if (smp_processor_id() != 0)
check_unaligned_access(pages[cpu]);
}
DEFINE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key);
static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
{
if (cpumask_weight(mask) == weight)
static_branch_enable_cpuslocked(&fast_unaligned_access_speed_key);
else
static_branch_disable_cpuslocked(&fast_unaligned_access_speed_key);
}
static void set_unaligned_access_static_branches_except_cpu(int cpu)
{
/*
* Same as set_unaligned_access_static_branches, except excludes the
* given CPU from the result. When a CPU is hotplugged into an offline
* state, this function is called before the CPU is set to offline in
* the cpumask, and thus the CPU needs to be explicitly excluded.
*/
cpumask_t fast_except_me;
cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask);
cpumask_clear_cpu(cpu, &fast_except_me);
modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1);
}
static void set_unaligned_access_static_branches(void)
{
/*
* This will be called after check_unaligned_access_all_cpus so the
* result of unaligned access speed for all CPUs will be available.
*
* To avoid the number of online cpus changing between reading
* cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
* held before calling this function.
*/
cpumask_t fast_and_online;
cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask);
modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
}
static int lock_and_set_unaligned_access_static_branch(void)
{
cpus_read_lock();
set_unaligned_access_static_branches();
cpus_read_unlock();
return 0;
}
arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
static int riscv_online_cpu(unsigned int cpu)
{
static struct page *buf;
/* We are already set since the last check */
if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
goto exit;
buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
if (!buf) {
pr_warn("Allocation failure, not measuring misaligned performance\n");
return -ENOMEM;
}
check_unaligned_access(buf);
__free_pages(buf, MISALIGNED_BUFFER_ORDER);
exit:
set_unaligned_access_static_branches();
return 0;
}
static int riscv_offline_cpu(unsigned int cpu)
{
set_unaligned_access_static_branches_except_cpu(cpu);
return 0;
}
/* Measure unaligned access speed on all CPUs present at boot in parallel. */
static int check_unaligned_access_speed_all_cpus(void)
{
unsigned int cpu;
unsigned int cpu_count = num_possible_cpus();
struct page **bufs = kzalloc(cpu_count * sizeof(struct page *),
GFP_KERNEL);
if (!bufs) {
pr_warn("Allocation failure, not measuring misaligned performance\n");
return 0;
}
/*
* Allocate separate buffers for each CPU so there's no fighting over
* cache lines.
*/
for_each_cpu(cpu, cpu_online_mask) {
bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
if (!bufs[cpu]) {
pr_warn("Allocation failure, not measuring misaligned performance\n");
goto out;
}
}
/* Check everybody except 0, who stays behind to tend jiffies. */
on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
/* Check core 0. */
smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
/*
* Setup hotplug callbacks for any new CPUs that come online or go
* offline.
*/
cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
riscv_online_cpu, riscv_offline_cpu);
out:
for_each_cpu(cpu, cpu_online_mask) {
if (bufs[cpu])
__free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER);
}
kfree(bufs);
return 0;
}
#ifdef CONFIG_RISCV_MISALIGNED
static int check_unaligned_access_all_cpus(void)
{
bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
if (!all_cpus_emulated)
return check_unaligned_access_speed_all_cpus();
return 0;
}
#else
static int check_unaligned_access_all_cpus(void)
{
return check_unaligned_access_speed_all_cpus();
}
#endif
arch_initcall(check_unaligned_access_all_cpus);
void riscv_user_isa_enable(void) void riscv_user_isa_enable(void)
{ {
if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ)) if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ))
......
...@@ -147,6 +147,7 @@ static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext) ...@@ -147,6 +147,7 @@ static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext)
return (pair.value & ext); return (pair.value & ext);
} }
#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS)
static u64 hwprobe_misaligned(const struct cpumask *cpus) static u64 hwprobe_misaligned(const struct cpumask *cpus)
{ {
int cpu; int cpu;
...@@ -169,6 +170,18 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus) ...@@ -169,6 +170,18 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus)
return perf; return perf;
} }
#else
static u64 hwprobe_misaligned(const struct cpumask *cpus)
{
if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_UNALIGNED_ACCESS))
return RISCV_HWPROBE_MISALIGNED_FAST;
if (IS_ENABLED(CONFIG_RISCV_EMULATED_UNALIGNED_ACCESS) && unaligned_ctl_available())
return RISCV_HWPROBE_MISALIGNED_EMULATED;
return RISCV_HWPROBE_MISALIGNED_SLOW;
}
#endif
static void hwprobe_one_pair(struct riscv_hwprobe *pair, static void hwprobe_one_pair(struct riscv_hwprobe *pair,
const struct cpumask *cpus) const struct cpumask *cpus)
......
...@@ -413,7 +413,9 @@ int handle_misaligned_load(struct pt_regs *regs) ...@@ -413,7 +413,9 @@ int handle_misaligned_load(struct pt_regs *regs)
perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
*this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED; *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED;
#endif
if (!unaligned_enabled) if (!unaligned_enabled)
return -1; return -1;
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2024 Rivos Inc.
*/
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/jump_label.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/types.h>
#include <asm/cpufeature.h>
#include <asm/hwprobe.h>
#include "copy-unaligned.h"
#define MISALIGNED_ACCESS_JIFFIES_LG2 1
#define MISALIGNED_BUFFER_SIZE 0x4000
#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
DEFINE_PER_CPU(long, misaligned_access_speed);
#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
static cpumask_t fast_misaligned_access;
static int check_unaligned_access(void *param)
{
int cpu = smp_processor_id();
u64 start_cycles, end_cycles;
u64 word_cycles;
u64 byte_cycles;
int ratio;
unsigned long start_jiffies, now;
struct page *page = param;
void *dst;
void *src;
long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
return 0;
/* Make an unaligned destination buffer. */
dst = (void *)((unsigned long)page_address(page) | 0x1);
/* Unalign src as well, but differently (off by 1 + 2 = 3). */
src = dst + (MISALIGNED_BUFFER_SIZE / 2);
src += 2;
word_cycles = -1ULL;
/* Do a warmup. */
__riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
preempt_disable();
start_jiffies = jiffies;
while ((now = jiffies) == start_jiffies)
cpu_relax();
/*
* For a fixed amount of time, repeatedly try the function, and take
* the best time in cycles as the measurement.
*/
while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
start_cycles = get_cycles64();
/* Ensure the CSR read can't reorder WRT to the copy. */
mb();
__riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
/* Ensure the copy ends before the end time is snapped. */
mb();
end_cycles = get_cycles64();
if ((end_cycles - start_cycles) < word_cycles)
word_cycles = end_cycles - start_cycles;
}
byte_cycles = -1ULL;
__riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
start_jiffies = jiffies;
while ((now = jiffies) == start_jiffies)
cpu_relax();
while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
start_cycles = get_cycles64();
mb();
__riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
mb();
end_cycles = get_cycles64();
if ((end_cycles - start_cycles) < byte_cycles)
byte_cycles = end_cycles - start_cycles;
}
preempt_enable();
/* Don't divide by zero. */
if (!word_cycles || !byte_cycles) {
pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n",
cpu);
return 0;
}
if (word_cycles < byte_cycles)
speed = RISCV_HWPROBE_MISALIGNED_FAST;
ratio = div_u64((byte_cycles * 100), word_cycles);
pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n",
cpu,
ratio / 100,
ratio % 100,
(speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
per_cpu(misaligned_access_speed, cpu) = speed;
/*
* Set the value of fast_misaligned_access of a CPU. These operations
* are atomic to avoid race conditions.
*/
if (speed == RISCV_HWPROBE_MISALIGNED_FAST)
cpumask_set_cpu(cpu, &fast_misaligned_access);
else
cpumask_clear_cpu(cpu, &fast_misaligned_access);
return 0;
}
static void check_unaligned_access_nonboot_cpu(void *param)
{
unsigned int cpu = smp_processor_id();
struct page **pages = param;
if (smp_processor_id() != 0)
check_unaligned_access(pages[cpu]);
}
DEFINE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key);
static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
{
if (cpumask_weight(mask) == weight)
static_branch_enable_cpuslocked(&fast_unaligned_access_speed_key);
else
static_branch_disable_cpuslocked(&fast_unaligned_access_speed_key);
}
static void set_unaligned_access_static_branches_except_cpu(int cpu)
{
/*
* Same as set_unaligned_access_static_branches, except excludes the
* given CPU from the result. When a CPU is hotplugged into an offline
* state, this function is called before the CPU is set to offline in
* the cpumask, and thus the CPU needs to be explicitly excluded.
*/
cpumask_t fast_except_me;
cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask);
cpumask_clear_cpu(cpu, &fast_except_me);
modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1);
}
static void set_unaligned_access_static_branches(void)
{
/*
* This will be called after check_unaligned_access_all_cpus so the
* result of unaligned access speed for all CPUs will be available.
*
* To avoid the number of online cpus changing between reading
* cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
* held before calling this function.
*/
cpumask_t fast_and_online;
cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask);
modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
}
static int lock_and_set_unaligned_access_static_branch(void)
{
cpus_read_lock();
set_unaligned_access_static_branches();
cpus_read_unlock();
return 0;
}
arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
static int riscv_online_cpu(unsigned int cpu)
{
static struct page *buf;
/* We are already set since the last check */
if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
goto exit;
buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
if (!buf) {
pr_warn("Allocation failure, not measuring misaligned performance\n");
return -ENOMEM;
}
check_unaligned_access(buf);
__free_pages(buf, MISALIGNED_BUFFER_ORDER);
exit:
set_unaligned_access_static_branches();
return 0;
}
static int riscv_offline_cpu(unsigned int cpu)
{
set_unaligned_access_static_branches_except_cpu(cpu);
return 0;
}
/* Measure unaligned access speed on all CPUs present at boot in parallel. */
static int check_unaligned_access_speed_all_cpus(void)
{
unsigned int cpu;
unsigned int cpu_count = num_possible_cpus();
struct page **bufs = kzalloc(cpu_count * sizeof(struct page *),
GFP_KERNEL);
if (!bufs) {
pr_warn("Allocation failure, not measuring misaligned performance\n");
return 0;
}
/*
* Allocate separate buffers for each CPU so there's no fighting over
* cache lines.
*/
for_each_cpu(cpu, cpu_online_mask) {
bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
if (!bufs[cpu]) {
pr_warn("Allocation failure, not measuring misaligned performance\n");
goto out;
}
}
/* Check everybody except 0, who stays behind to tend jiffies. */
on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
/* Check core 0. */
smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
/*
* Setup hotplug callbacks for any new CPUs that come online or go
* offline.
*/
cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
riscv_online_cpu, riscv_offline_cpu);
out:
for_each_cpu(cpu, cpu_online_mask) {
if (bufs[cpu])
__free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER);
}
kfree(bufs);
return 0;
}
static int check_unaligned_access_all_cpus(void)
{
bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
if (!all_cpus_emulated)
return check_unaligned_access_speed_all_cpus();
return 0;
}
#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
static int check_unaligned_access_all_cpus(void)
{
check_unaligned_access_emulated_all_cpus();
return 0;
}
#endif
arch_initcall(check_unaligned_access_all_cpus);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment