Commit ab0f7746 authored by Andrew Jones's avatar Andrew Jones Committed by Palmer Dabbelt

RISC-V: Use Zicboz in clear_page when available

Using memset() to zero a 4K page takes 563 total instructions, where
20 are branches. clear_page(), with Zicboz and a 64 byte block size,
takes 169 total instructions, where 4 are branches and 33 are nops.
Even though the block size is a variable, thanks to alternatives, we
can still implement a Duff device without having to do any preliminary
calculations. This is achieved by using the alternatives' cpufeature
value (the upper 16 bits of patch_id). The value used is the maximum
zicboz block size order accepted at the patch site. This enables us
to stop patching / unrolling when 4K bytes have been zeroed (we would
loop and continue after 4K if the page size would be larger)

For 4K pages, unrolling 16 times allows block sizes of 64 and 128 to
only loop a few times and larger block sizes to not loop at all. Since
cbo.zero doesn't take an offset, we also need an 'add' after each
instruction, making the loop body 112 to 160 bytes. Hopefully this
is small enough to not cause icache misses.
Signed-off-by: default avatarAndrew Jones <ajones@ventanamicro.com>
Acked-by: default avatarConor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20230224162631.405473-7-ajones@ventanamicro.comSigned-off-by: default avatarPalmer Dabbelt <palmer@rivosinc.com>
parent d25f2563
...@@ -457,6 +457,19 @@ config RISCV_ISA_ZICBOM ...@@ -457,6 +457,19 @@ config RISCV_ISA_ZICBOM
If you don't know what to do here, say Y. If you don't know what to do here, say Y.
config RISCV_ISA_ZICBOZ
bool "Zicboz extension support for faster zeroing of memory"
depends on !XIP_KERNEL && MMU
select RISCV_ALTERNATIVE
default y
help
Enable the use of the ZICBOZ extension (cbo.zero instruction)
when available.
The Zicboz extension is used for faster zeroing of memory.
If you don't know what to do here, say Y.
config TOOLCHAIN_HAS_ZIHINTPAUSE config TOOLCHAIN_HAS_ZIHINTPAUSE
bool bool
default y default y
......
...@@ -192,4 +192,8 @@ ...@@ -192,4 +192,8 @@
INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \
RS1(base), SIMM12(2)) RS1(base), SIMM12(2))
#define CBO_zero(base) \
INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \
RS1(base), SIMM12(4))
#endif /* __ASM_INSN_DEF_H */ #endif /* __ASM_INSN_DEF_H */
...@@ -49,10 +49,14 @@ ...@@ -49,10 +49,14 @@
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#ifdef CONFIG_RISCV_ISA_ZICBOZ
void clear_page(void *page);
#else
#define clear_page(pgaddr) memset((pgaddr), 0, PAGE_SIZE) #define clear_page(pgaddr) memset((pgaddr), 0, PAGE_SIZE)
#endif
#define copy_page(to, from) memcpy((to), (from), PAGE_SIZE) #define copy_page(to, from) memcpy((to), (from), PAGE_SIZE)
#define clear_user_page(pgaddr, vaddr, page) memset((pgaddr), 0, PAGE_SIZE) #define clear_user_page(pgaddr, vaddr, page) clear_page(pgaddr)
#define copy_user_page(vto, vfrom, vaddr, topg) \ #define copy_user_page(vto, vfrom, vaddr, topg) \
memcpy((vto), (vfrom), PAGE_SIZE) memcpy((vto), (vfrom), PAGE_SIZE)
......
...@@ -293,6 +293,17 @@ static bool riscv_cpufeature_patch_check(u16 id, u16 value) ...@@ -293,6 +293,17 @@ static bool riscv_cpufeature_patch_check(u16 id, u16 value)
if (!value) if (!value)
return true; return true;
switch (id) {
case RISCV_ISA_EXT_ZICBOZ:
/*
* Zicboz alternative applications provide the maximum
* supported block size order, or zero when it doesn't
* matter. If the current block size exceeds the maximum,
* then the alternative cannot be applied.
*/
return riscv_cboz_block_size <= (1U << value);
}
return false; return false;
} }
......
...@@ -8,5 +8,6 @@ lib-y += strlen.o ...@@ -8,5 +8,6 @@ lib-y += strlen.o
lib-y += strncmp.o lib-y += strncmp.o
lib-$(CONFIG_MMU) += uaccess.o lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2023 Ventana Micro Systems Inc.
*/
#include <linux/linkage.h>
#include <asm/asm.h>
#include <asm/alternative-macros.h>
#include <asm-generic/export.h>
#include <asm/hwcap.h>
#include <asm/insn-def.h>
#include <asm/page.h>
#define CBOZ_ALT(order, old, new) \
ALTERNATIVE(old, new, 0, \
((order) << 16) | RISCV_ISA_EXT_ZICBOZ, \
CONFIG_RISCV_ISA_ZICBOZ)
/* void clear_page(void *page) */
SYM_FUNC_START(clear_page)
li a2, PAGE_SIZE
/*
* If Zicboz isn't present, or somehow has a block
* size larger than 4K, then fallback to memset.
*/
CBOZ_ALT(12, "j .Lno_zicboz", "nop")
lw a1, riscv_cboz_block_size
add a2, a0, a2
.Lzero_loop:
CBO_zero(a0)
add a0, a0, a1
CBOZ_ALT(11, "bltu a0, a2, .Lzero_loop; ret", "nop; nop")
CBO_zero(a0)
add a0, a0, a1
CBOZ_ALT(10, "bltu a0, a2, .Lzero_loop; ret", "nop; nop")
CBO_zero(a0)
add a0, a0, a1
CBO_zero(a0)
add a0, a0, a1
CBOZ_ALT(9, "bltu a0, a2, .Lzero_loop; ret", "nop; nop")
CBO_zero(a0)
add a0, a0, a1
CBO_zero(a0)
add a0, a0, a1
CBO_zero(a0)
add a0, a0, a1
CBO_zero(a0)
add a0, a0, a1
CBOZ_ALT(8, "bltu a0, a2, .Lzero_loop; ret", "nop; nop")
CBO_zero(a0)
add a0, a0, a1
CBO_zero(a0)
add a0, a0, a1
CBO_zero(a0)
add a0, a0, a1
CBO_zero(a0)
add a0, a0, a1
CBO_zero(a0)
add a0, a0, a1
CBO_zero(a0)
add a0, a0, a1
CBO_zero(a0)
add a0, a0, a1
CBO_zero(a0)
add a0, a0, a1
bltu a0, a2, .Lzero_loop
ret
.Lno_zicboz:
li a1, 0
tail __memset
SYM_FUNC_END(clear_page)
EXPORT_SYMBOL(clear_page)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment