Commit 611c9d88 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'loongarch-6.4' of...

Merge tag 'loongarch-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson

Pull LoongArch updates from Huacai Chen:

 - Better backtraces for humanization

 - Relay BCE exceptions to userland as SIGSEGV

 - Provide kernel fpu functions

 - Optimize memory ops (memset/memcpy/memmove)

 - Optimize checksum and crc32(c) calculation

 - Add ARCH_HAS_FORTIFY_SOURCE selection

 - Add function error injection support

 - Add ftrace with direct call support

 - Add basic perf tools support

* tag 'loongarch-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson: (24 commits)
  tools/perf: Add basic support for LoongArch
  LoongArch: ftrace: Add direct call trampoline samples support
  LoongArch: ftrace: Add direct call support
  LoongArch: ftrace: Implement ftrace_find_callable_addr() to simplify code
  LoongArch: ftrace: Fix build error if DYNAMIC_FTRACE_WITH_REGS is not set
  LoongArch: ftrace: Abstract DYNAMIC_FTRACE_WITH_ARGS accesses
  LoongArch: Add support for function error injection
  LoongArch: Add ARCH_HAS_FORTIFY_SOURCE selection
  LoongArch: crypto: Add crc32 and crc32c hw acceleration
  LoongArch: Add checksum optimization for 64-bit system
  LoongArch: Optimize memory ops (memset/memcpy/memmove)
  LoongArch: Provide kernel fpu functions
  LoongArch: Relay BCE exceptions to userland as SIGSEGV with si_code=SEGV_BNDERR
  LoongArch: Tweak the BADV and CPUCFG.PRID lines in show_regs()
  LoongArch: Humanize the ESTAT line when showing registers
  LoongArch: Humanize the ECFG line when showing registers
  LoongArch: Humanize the EUEN line when showing registers
  LoongArch: Humanize the PRMD line when showing registers
  LoongArch: Humanize the CRMD line when showing registers
  LoongArch: Fix format of CSR lines during show_regs()
  ...
parents a1f749de 2fa5ebe3
......@@ -10,6 +10,7 @@ config LOONGARCH
select ARCH_ENABLE_MEMORY_HOTPLUG
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
......@@ -93,6 +94,7 @@ config LOONGARCH
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_ARGS
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN
......@@ -100,6 +102,7 @@ config LOONGARCH
select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_GENERIC_VDSO
......@@ -118,6 +121,8 @@ config LOONGARCH
select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RSEQ
select HAVE_SAMPLE_FTRACE_DIRECT
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
select HAVE_SETUP_PER_CPU_AREA if NUMA
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
......
......@@ -115,6 +115,8 @@ endif
libs-y += arch/loongarch/lib/
libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
drivers-y += arch/loongarch/crypto/
# suspend and hibernation support
drivers-$(CONFIG_PM) += arch/loongarch/power/
......
# SPDX-License-Identifier: GPL-2.0
menu "Accelerated Cryptographic Algorithms for CPU (loongarch)"
config CRYPTO_CRC32_LOONGARCH
tristate "CRC32c and CRC32"
select CRC32
select CRYPTO_HASH
help
CRC32c and CRC32 CRC algorithms
Architecture: LoongArch with CRC32 instructions
endmenu
# SPDX-License-Identifier: GPL-2.0
#
# Makefile for LoongArch crypto files..
#
obj-$(CONFIG_CRYPTO_CRC32_LOONGARCH) += crc32-loongarch.o
// SPDX-License-Identifier: GPL-2.0
/*
* crc32.c - CRC32 and CRC32C using LoongArch crc* instructions
*
* Module based on mips/crypto/crc32-mips.c
*
* Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
* Copyright (C) 2018 MIPS Tech, LLC
* Copyright (C) 2020-2023 Loongson Technology Corporation Limited
*/
#include <linux/module.h>
#include <crypto/internal/hash.h>
#include <asm/cpu-features.h>
#include <asm/unaligned.h>
#define _CRC32(crc, value, size, type) \
do { \
__asm__ __volatile__( \
#type ".w." #size ".w" " %0, %1, %0\n\t"\
: "+r" (crc) \
: "r" (value) \
: "memory"); \
} while (0)
#define CRC32(crc, value, size) _CRC32(crc, value, size, crc)
#define CRC32C(crc, value, size) _CRC32(crc, value, size, crcc)
static u32 crc32_loongarch_hw(u32 crc_, const u8 *p, unsigned int len)
{
u32 crc = crc_;
while (len >= sizeof(u64)) {
u64 value = get_unaligned_le64(p);
CRC32(crc, value, d);
p += sizeof(u64);
len -= sizeof(u64);
}
if (len & sizeof(u32)) {
u32 value = get_unaligned_le32(p);
CRC32(crc, value, w);
p += sizeof(u32);
len -= sizeof(u32);
}
if (len & sizeof(u16)) {
u16 value = get_unaligned_le16(p);
CRC32(crc, value, h);
p += sizeof(u16);
}
if (len & sizeof(u8)) {
u8 value = *p++;
CRC32(crc, value, b);
}
return crc;
}
static u32 crc32c_loongarch_hw(u32 crc_, const u8 *p, unsigned int len)
{
u32 crc = crc_;
while (len >= sizeof(u64)) {
u64 value = get_unaligned_le64(p);
CRC32C(crc, value, d);
p += sizeof(u64);
len -= sizeof(u64);
}
if (len & sizeof(u32)) {
u32 value = get_unaligned_le32(p);
CRC32C(crc, value, w);
p += sizeof(u32);
len -= sizeof(u32);
}
if (len & sizeof(u16)) {
u16 value = get_unaligned_le16(p);
CRC32C(crc, value, h);
p += sizeof(u16);
}
if (len & sizeof(u8)) {
u8 value = *p++;
CRC32C(crc, value, b);
}
return crc;
}
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
struct chksum_ctx {
u32 key;
};
struct chksum_desc_ctx {
u32 crc;
};
static int chksum_init(struct shash_desc *desc)
{
struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = mctx->key;
return 0;
}
/*
* Setting the seed allows arbitrary accumulators and flexible XOR policy
* If your algorithm starts with ~0, then XOR with ~0 before you set the seed.
*/
static int chksum_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen)
{
struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
if (keylen != sizeof(mctx->key))
return -EINVAL;
mctx->key = get_unaligned_le32(key);
return 0;
}
static int chksum_update(struct shash_desc *desc, const u8 *data, unsigned int length)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = crc32_loongarch_hw(ctx->crc, data, length);
return 0;
}
static int chksumc_update(struct shash_desc *desc, const u8 *data, unsigned int length)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = crc32c_loongarch_hw(ctx->crc, data, length);
return 0;
}
static int chksum_final(struct shash_desc *desc, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
put_unaligned_le32(ctx->crc, out);
return 0;
}
static int chksumc_final(struct shash_desc *desc, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
put_unaligned_le32(~ctx->crc, out);
return 0;
}
static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
{
put_unaligned_le32(crc32_loongarch_hw(crc, data, len), out);
return 0;
}
static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
{
put_unaligned_le32(~crc32c_loongarch_hw(crc, data, len), out);
return 0;
}
static int chksum_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
return __chksum_finup(ctx->crc, data, len, out);
}
static int chksumc_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
return __chksumc_finup(ctx->crc, data, len, out);
}
static int chksum_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out)
{
struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
return __chksum_finup(mctx->key, data, length, out);
}
static int chksumc_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out)
{
struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
return __chksumc_finup(mctx->key, data, length, out);
}
static int chksum_cra_init(struct crypto_tfm *tfm)
{
struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
mctx->key = 0;
return 0;
}
static int chksumc_cra_init(struct crypto_tfm *tfm)
{
struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
mctx->key = ~0;
return 0;
}
static struct shash_alg crc32_alg = {
.digestsize = CHKSUM_DIGEST_SIZE,
.setkey = chksum_setkey,
.init = chksum_init,
.update = chksum_update,
.final = chksum_final,
.finup = chksum_finup,
.digest = chksum_digest,
.descsize = sizeof(struct chksum_desc_ctx),
.base = {
.cra_name = "crc32",
.cra_driver_name = "crc32-loongarch",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_alignmask = 0,
.cra_ctxsize = sizeof(struct chksum_ctx),
.cra_module = THIS_MODULE,
.cra_init = chksum_cra_init,
}
};
static struct shash_alg crc32c_alg = {
.digestsize = CHKSUM_DIGEST_SIZE,
.setkey = chksum_setkey,
.init = chksum_init,
.update = chksumc_update,
.final = chksumc_final,
.finup = chksumc_finup,
.digest = chksumc_digest,
.descsize = sizeof(struct chksum_desc_ctx),
.base = {
.cra_name = "crc32c",
.cra_driver_name = "crc32c-loongarch",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_alignmask = 0,
.cra_ctxsize = sizeof(struct chksum_ctx),
.cra_module = THIS_MODULE,
.cra_init = chksumc_cra_init,
}
};
static int __init crc32_mod_init(void)
{
int err;
if (!cpu_has(CPU_FEATURE_CRC32))
return 0;
err = crypto_register_shash(&crc32_alg);
if (err)
return err;
err = crypto_register_shash(&crc32c_alg);
if (err)
return err;
return 0;
}
static void __exit crc32_mod_exit(void)
{
if (!cpu_has(CPU_FEATURE_CRC32))
return;
crypto_unregister_shash(&crc32_alg);
crypto_unregister_shash(&crc32c_alg);
}
module_init(crc32_mod_init);
module_exit(crc32_mod_exit);
MODULE_AUTHOR("Min Zhou <zhoumin@loongson.cn>");
MODULE_AUTHOR("Huacai Chen <chenhuacai@loongson.cn>");
MODULE_DESCRIPTION("CRC32 and CRC32C using LoongArch crc* instructions");
MODULE_LICENSE("GPL v2");
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2016 ARM Ltd.
* Copyright (C) 2023 Loongson Technology Corporation Limited
*/
#ifndef __ASM_CHECKSUM_H
#define __ASM_CHECKSUM_H
#include <linux/bitops.h>
#include <linux/in6.h>
#define _HAVE_ARCH_IPV6_CSUM
__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
const struct in6_addr *daddr,
__u32 len, __u8 proto, __wsum sum);
/*
* turns a 32-bit partial checksum (e.g. from csum_partial) into a
* 1's complement 16-bit checksum.
*/
static inline __sum16 csum_fold(__wsum sum)
{
u32 tmp = (__force u32)sum;
/*
* swap the two 16-bit halves of sum
* if there is a carry from adding the two 16-bit halves,
* it will carry from the lower half into the upper half,
* giving us the correct sum in the upper half.
*/
return (__force __sum16)(~(tmp + rol32(tmp, 16)) >> 16);
}
#define csum_fold csum_fold
/*
* This is a version of ip_compute_csum() optimized for IP headers,
* which always checksum on 4 octet boundaries. ihl is the number
* of 32-bit words and is always >= 5.
*/
static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
{
u64 sum;
__uint128_t tmp;
int n = ihl; /* we want it signed */
tmp = *(const __uint128_t *)iph;
iph += 16;
n -= 4;
tmp += ((tmp >> 64) | (tmp << 64));
sum = tmp >> 64;
do {
sum += *(const u32 *)iph;
iph += 4;
} while (--n > 0);
sum += ror64(sum, 32);
return csum_fold((__force __wsum)(sum >> 32));
}
#define ip_fast_csum ip_fast_csum
extern unsigned int do_csum(const unsigned char *buff, int len);
#define do_csum do_csum
#include <asm-generic/checksum.h>
#endif /* __ASM_CHECKSUM_H */
......@@ -21,6 +21,9 @@
struct sigcontext;
extern void kernel_fpu_begin(void);
extern void kernel_fpu_end(void);
extern void _init_fpu(unsigned int);
extern void _save_fp(struct loongarch_fpu *);
extern void _restore_fp(struct loongarch_fpu *);
......
......@@ -54,9 +54,46 @@ static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *
return &fregs->regs;
}
static __always_inline unsigned long
ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs)
{
return instruction_pointer(&fregs->regs);
}
static __always_inline void
ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long ip)
{
regs_set_return_value(&fregs->regs, ip);
}
#define ftrace_regs_get_argument(fregs, n) \
regs_get_kernel_argument(&(fregs)->regs, n)
#define ftrace_regs_get_stack_pointer(fregs) \
kernel_stack_pointer(&(fregs)->regs)
#define ftrace_regs_return_value(fregs) \
regs_return_value(&(fregs)->regs)
#define ftrace_regs_set_return_value(fregs, ret) \
regs_set_return_value(&(fregs)->regs, ret)
#define ftrace_override_function_with_return(fregs) \
override_function_with_return(&(fregs)->regs)
#define ftrace_regs_query_register_offset(name) \
regs_query_register_offset(name)
#define ftrace_graph_func ftrace_graph_func
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs);
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
static inline void
__arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
{
regs->regs[13] = addr; /* t1 */
}
#define arch_ftrace_set_direct_caller(fregs, addr) \
__arch_ftrace_set_direct_caller(&(fregs)->regs, addr)
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
#endif
#endif /* __ASSEMBLY__ */
......
......@@ -121,6 +121,8 @@ enum reg2bstrd_op {
};
enum reg3_op {
asrtle_op = 0x02,
asrtgt_op = 0x03,
addw_op = 0x20,
addd_op = 0x21,
subw_op = 0x22,
......@@ -176,6 +178,30 @@ enum reg3_op {
amord_op = 0x70c7,
amxorw_op = 0x70c8,
amxord_op = 0x70c9,
fldgts_op = 0x70e8,
fldgtd_op = 0x70e9,
fldles_op = 0x70ea,
fldled_op = 0x70eb,
fstgts_op = 0x70ec,
fstgtd_op = 0x70ed,
fstles_op = 0x70ee,
fstled_op = 0x70ef,
ldgtb_op = 0x70f0,
ldgth_op = 0x70f1,
ldgtw_op = 0x70f2,
ldgtd_op = 0x70f3,
ldleb_op = 0x70f4,
ldleh_op = 0x70f5,
ldlew_op = 0x70f6,
ldled_op = 0x70f7,
stgtb_op = 0x70f8,
stgth_op = 0x70f9,
stgtw_op = 0x70fa,
stgtd_op = 0x70fb,
stleb_op = 0x70fc,
stleh_op = 0x70fd,
stlew_op = 0x70fe,
stled_op = 0x70ff,
};
enum reg3sa2_op {
......
......@@ -311,8 +311,8 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)
#define CSR_ECFG_VS_WIDTH 3
#define CSR_ECFG_VS (_ULCAST_(0x7) << CSR_ECFG_VS_SHIFT)
#define CSR_ECFG_IM_SHIFT 0
#define CSR_ECFG_IM_WIDTH 13
#define CSR_ECFG_IM (_ULCAST_(0x1fff) << CSR_ECFG_IM_SHIFT)
#define CSR_ECFG_IM_WIDTH 14
#define CSR_ECFG_IM (_ULCAST_(0x3fff) << CSR_ECFG_IM_SHIFT)
#define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */
#define CSR_ESTAT_ESUBCODE_SHIFT 22
......@@ -322,8 +322,8 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)
#define CSR_ESTAT_EXC_WIDTH 6
#define CSR_ESTAT_EXC (_ULCAST_(0x3f) << CSR_ESTAT_EXC_SHIFT)
#define CSR_ESTAT_IS_SHIFT 0
#define CSR_ESTAT_IS_WIDTH 15
#define CSR_ESTAT_IS (_ULCAST_(0x7fff) << CSR_ESTAT_IS_SHIFT)
#define CSR_ESTAT_IS_WIDTH 14
#define CSR_ESTAT_IS (_ULCAST_(0x3fff) << CSR_ESTAT_IS_SHIFT)
#define LOONGARCH_CSR_ERA 0x6 /* ERA */
......@@ -1090,7 +1090,7 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)
#define ECFGF_IPI (_ULCAST_(1) << ECFGB_IPI)
#define ECFGF(hwirq) (_ULCAST_(1) << hwirq)
#define ESTATF_IP 0x00001fff
#define ESTATF_IP 0x00003fff
#define LOONGARCH_IOCSR_FEATURES 0x8
#define IOCSRF_TEMP BIT_ULL(0)
......@@ -1397,7 +1397,7 @@ __BUILD_CSR_OP(tlbidx)
#define EXSUBCODE_ADEF 0 /* Fetch Instruction */
#define EXSUBCODE_ADEM 1 /* Access Memory*/
#define EXCCODE_ALE 9 /* Unalign Access */
#define EXCCODE_OOB 10 /* Out of bounds */
#define EXCCODE_BCE 10 /* Bounds Check Error */
#define EXCCODE_SYS 11 /* System call */
#define EXCCODE_BP 12 /* Breakpoint */
#define EXCCODE_INE 13 /* Inst. Not Exist */
......@@ -1408,33 +1408,38 @@ __BUILD_CSR_OP(tlbidx)
#define EXCCODE_FPE 18 /* Floating Point Exception */
#define EXCSUBCODE_FPE 0 /* Floating Point Exception */
#define EXCSUBCODE_VFPE 1 /* Vector Exception */
#define EXCCODE_WATCH 19 /* Watch address reference */
#define EXCCODE_WATCH 19 /* WatchPoint Exception */
#define EXCSUBCODE_WPEF 0 /* ... on Instruction Fetch */
#define EXCSUBCODE_WPEM 1 /* ... on Memory Accesses */
#define EXCCODE_BTDIS 20 /* Binary Trans. Disabled */
#define EXCCODE_BTE 21 /* Binary Trans. Exception */
#define EXCCODE_PSI 22 /* Guest Privileged Error */
#define EXCCODE_HYP 23 /* Hypercall */
#define EXCCODE_GSPR 22 /* Guest Privileged Error */
#define EXCCODE_HVC 23 /* Hypercall */
#define EXCCODE_GCM 24 /* Guest CSR modified */
#define EXCSUBCODE_GCSC 0 /* Software caused */
#define EXCSUBCODE_GCHC 1 /* Hardware caused */
#define EXCCODE_SE 25 /* Security */
/* Interrupt numbers */
#define INT_SWI0 0 /* Software Interrupts */
#define INT_SWI1 1
#define INT_HWI0 2 /* Hardware Interrupts */
#define INT_HWI1 3
#define INT_HWI2 4
#define INT_HWI3 5
#define INT_HWI4 6
#define INT_HWI5 7
#define INT_HWI6 8
#define INT_HWI7 9
#define INT_PCOV 10 /* Performance Counter Overflow */
#define INT_TI 11 /* Timer */
#define INT_IPI 12
#define INT_NMI 13
/* ExcCodes corresponding to interrupts */
#define EXCCODE_INT_NUM (INT_NMI + 1)
#define EXCCODE_INT_START 64
#define EXCCODE_SIP0 64
#define EXCCODE_SIP1 65
#define EXCCODE_IP0 66
#define EXCCODE_IP1 67
#define EXCCODE_IP2 68
#define EXCCODE_IP3 69
#define EXCCODE_IP4 70
#define EXCCODE_IP5 71
#define EXCCODE_IP6 72
#define EXCCODE_IP7 73
#define EXCCODE_PMC 74 /* Performance Counter */
#define EXCCODE_TIMER 75
#define EXCCODE_IPI 76
#define EXCCODE_NMI 77
#define EXCCODE_INT_END 78
#define EXCCODE_INT_NUM (EXCCODE_INT_END - EXCCODE_INT_START)
#define EXCCODE_INT_END (EXCCODE_INT_START + EXCCODE_INT_NUM - 1)
/* FPU register names */
#define LOONGARCH_FCSR0 $r0
......
......@@ -154,6 +154,11 @@ static inline long regs_return_value(struct pt_regs *regs)
return regs->regs[4];
}
static inline void regs_set_return_value(struct pt_regs *regs, unsigned long val)
{
regs->regs[4] = val;
}
#define instruction_pointer(regs) ((regs)->csr_era)
#define profile_pc(regs) instruction_pointer(regs)
......
......@@ -13,7 +13,7 @@ obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
obj-$(CONFIG_ACPI) += acpi.o
obj-$(CONFIG_EFI) += efi.o
obj-$(CONFIG_CPU_HAS_FPU) += fpu.o
obj-$(CONFIG_CPU_HAS_FPU) += fpu.o kfpu.o
obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o
......
......@@ -30,19 +30,12 @@ static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, bool validate)
return 0;
}
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
#ifdef CONFIG_MODULES
static inline int __get_mod(struct module **mod, unsigned long addr)
static bool reachable_by_bl(unsigned long addr, unsigned long pc)
{
preempt_disable();
*mod = __module_text_address(addr);
preempt_enable();
long offset = (long)addr - (long)pc;
if (WARN_ON(!(*mod)))
return -EINVAL;
return 0;
return offset >= -SZ_128M && offset < SZ_128M;
}
static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
......@@ -58,51 +51,88 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
return NULL;
}
static unsigned long get_plt_addr(struct module *mod, unsigned long addr)
/*
* Find the address the callsite must branch to in order to reach '*addr'.
*
* Due to the limited range of 'bl' instruction, modules may be placed too far
* away to branch directly and we must use a PLT.
*
* Returns true when '*addr' contains a reachable target address, or has been
* modified to contain a PLT address. Returns false otherwise.
*/
static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, struct module *mod, unsigned long *addr)
{
unsigned long pc = rec->ip + LOONGARCH_INSN_SIZE;
struct plt_entry *plt;
plt = get_ftrace_plt(mod, addr);
/*
* If a custom trampoline is unreachable, rely on the ftrace_regs_caller
* trampoline which knows how to indirectly reach that trampoline through
* ops->direct_call.
*/
if (*addr != FTRACE_ADDR && *addr != FTRACE_REGS_ADDR && !reachable_by_bl(*addr, pc))
*addr = FTRACE_REGS_ADDR;
/*
* When the target is within range of the 'bl' instruction, use 'addr'
* as-is and branch to that directly.
*/
if (reachable_by_bl(*addr, pc))
return true;
/*
* 'mod' is only set at module load time, but if we end up
* dealing with an out-of-range condition, we can assume it
* is due to a module being loaded far away from the kernel.
*
* NOTE: __module_text_address() must be called with preemption
* disabled, but we can rely on ftrace_lock to ensure that 'mod'
* retains its validity throughout the remainder of this code.
*/
if (!mod) {
preempt_disable();
mod = __module_text_address(pc);
preempt_enable();
}
if (WARN_ON(!mod))
return false;
plt = get_ftrace_plt(mod, *addr);
if (!plt) {
pr_err("ftrace: no module PLT for %ps\n", (void *)addr);
return -EINVAL;
pr_err("ftrace: no module PLT for %ps\n", (void *)*addr);
return false;
}
return (unsigned long)plt;
*addr = (unsigned long)plt;
return true;
}
#else /* !CONFIG_MODULES */
static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, struct module *mod, unsigned long *addr)
{
return true;
}
#endif
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr)
{
u32 old, new;
unsigned long pc;
long offset __maybe_unused;
pc = rec->ip + LOONGARCH_INSN_SIZE;
#ifdef CONFIG_MODULES
offset = (long)pc - (long)addr;
if (offset < -SZ_128M || offset >= SZ_128M) {
int ret;
struct module *mod;
ret = __get_mod(&mod, pc);
if (ret)
return ret;
addr = get_plt_addr(mod, addr);
if (!ftrace_find_callable_addr(rec, NULL, &addr))
return -EINVAL;
old_addr = get_plt_addr(mod, old_addr);
}
#endif
if (!ftrace_find_callable_addr(rec, NULL, &old_addr))
return -EINVAL;
new = larch_insn_gen_bl(pc, addr);
old = larch_insn_gen_bl(pc, old_addr);
return ftrace_modify_code(pc, old, new, true);
}
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
int ftrace_update_ftrace_func(ftrace_func_t func)
......@@ -153,24 +183,11 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
u32 old, new;
unsigned long pc;
long offset __maybe_unused;
pc = rec->ip + LOONGARCH_INSN_SIZE;
#ifdef CONFIG_MODULES
offset = (long)pc - (long)addr;
if (offset < -SZ_128M || offset >= SZ_128M) {
int ret;
struct module *mod;
ret = __get_mod(&mod, pc);
if (ret)
return ret;
addr = get_plt_addr(mod, addr);
}
#endif
if (!ftrace_find_callable_addr(rec, NULL, &addr))
return -EINVAL;
old = larch_insn_gen_nop();
new = larch_insn_gen_bl(pc, addr);
......@@ -182,24 +199,11 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long ad
{
u32 old, new;
unsigned long pc;
long offset __maybe_unused;
pc = rec->ip + LOONGARCH_INSN_SIZE;
#ifdef CONFIG_MODULES
offset = (long)pc - (long)addr;
if (offset < -SZ_128M || offset >= SZ_128M) {
int ret;
struct module *mod;
ret = __get_mod(&mod, pc);
if (ret)
return ret;
addr = get_plt_addr(mod, addr);
}
#endif
if (!ftrace_find_callable_addr(rec, NULL, &addr))
return -EINVAL;
new = larch_insn_gen_nop();
old = larch_insn_gen_bl(pc, addr);
......
......@@ -82,6 +82,7 @@ SYM_FUNC_END(except_vec_cex)
BUILD_HANDLER ade ade badv
BUILD_HANDLER ale ale badv
BUILD_HANDLER bce bce none
BUILD_HANDLER bp bp none
BUILD_HANDLER fpe fpe fcsr
BUILD_HANDLER fpu fpu none
......
......@@ -92,7 +92,7 @@ static int __init get_ipi_irq(void)
struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
if (d)
return irq_create_mapping(d, EXCCODE_IPI - EXCCODE_INT_START);
return irq_create_mapping(d, INT_IPI);
return -EINVAL;
}
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2023 Loongson Technology Corporation Limited
*/
#include <linux/cpu.h>
#include <linux/init.h>
#include <asm/fpu.h>
#include <asm/smp.h>
static DEFINE_PER_CPU(bool, in_kernel_fpu);
void kernel_fpu_begin(void)
{
preempt_disable();
WARN_ON(this_cpu_read(in_kernel_fpu));
this_cpu_write(in_kernel_fpu, true);
if (!is_fpu_owner())
enable_fpu();
else
_save_fp(&current->thread.fpu);
write_fcsr(LOONGARCH_FCSR0, 0);
}
EXPORT_SYMBOL_GPL(kernel_fpu_begin);
void kernel_fpu_end(void)
{
WARN_ON(!this_cpu_read(in_kernel_fpu));
if (!is_fpu_owner())
disable_fpu();
else
_restore_fp(&current->thread.fpu);
this_cpu_write(in_kernel_fpu, false);
preempt_enable();
}
EXPORT_SYMBOL_GPL(kernel_fpu_end);
......@@ -42,7 +42,6 @@
.if \allregs
PTR_S tp, sp, PT_R2
PTR_S t0, sp, PT_R12
PTR_S t1, sp, PT_R13
PTR_S t2, sp, PT_R14
PTR_S t3, sp, PT_R15
PTR_S t4, sp, PT_R16
......@@ -64,6 +63,8 @@
PTR_S zero, sp, PT_R0
.endif
PTR_S ra, sp, PT_ERA /* Save trace function ra at PT_ERA */
move t1, zero
PTR_S t1, sp, PT_R13
PTR_ADDI t8, sp, PT_SIZE
PTR_S t8, sp, PT_R3
.endm
......@@ -104,8 +105,12 @@ ftrace_common_return:
PTR_L a7, sp, PT_R11
PTR_L fp, sp, PT_R22
PTR_L t0, sp, PT_ERA
PTR_L t1, sp, PT_R13
PTR_ADDI sp, sp, PT_SIZE
bnez t1, .Ldirect
jr t0
.Ldirect:
jr t1
SYM_CODE_END(ftrace_common)
SYM_CODE_START(ftrace_caller)
......@@ -147,3 +152,9 @@ SYM_CODE_START(return_to_handler)
jr ra
SYM_CODE_END(return_to_handler)
#endif
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
SYM_CODE_START(ftrace_stub_direct_tramp)
jr t0
SYM_CODE_END(ftrace_stub_direct_tramp)
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
......@@ -461,7 +461,7 @@ static int get_pmc_irq(void)
struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
if (d)
return irq_create_mapping(d, EXCCODE_PMC - EXCCODE_INT_START);
return irq_create_mapping(d, INT_PCOV);
return -EINVAL;
}
......
......@@ -133,7 +133,7 @@ static int get_timer_irq(void)
struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
if (d)
return irq_create_mapping(d, EXCCODE_TIMER - EXCCODE_INT_START);
return irq_create_mapping(d, INT_TI);
return -EINVAL;
}
......
This diff is collapsed.
......@@ -4,4 +4,6 @@
#
lib-y += delay.o memset.o memcpy.o memmove.o \
clear_user.o copy_user.o dump_tlb.o unaligned.o
clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
......@@ -13,7 +13,14 @@
.irp to, 0, 1, 2, 3, 4, 5, 6, 7
.L_fixup_handle_\to\():
addi.d a0, a1, (\to) * (-8)
sub.d a0, a2, a0
addi.d a0, a0, (\to) * (-8)
jr ra
.endr
.irp to, 0, 2, 4
.L_fixup_handle_s\to\():
addi.d a0, a1, -\to
jr ra
.endr
......@@ -44,7 +51,7 @@ SYM_FUNC_START(__clear_user_generic)
2: move a0, a1
jr ra
_asm_extable 1b, .L_fixup_handle_0
_asm_extable 1b, .L_fixup_handle_s0
SYM_FUNC_END(__clear_user_generic)
/*
......@@ -54,12 +61,21 @@ SYM_FUNC_END(__clear_user_generic)
* a1: size
*/
SYM_FUNC_START(__clear_user_fast)
beqz a1, 10f
sltui t0, a1, 9
bnez t0, .Lsmall
ori a2, zero, 64
blt a1, a2, 9f
add.d a2, a0, a1
0: st.d zero, a0, 0
/* align up address */
addi.d a0, a0, 8
bstrins.d a0, zero, 2, 0
addi.d a3, a2, -64
bgeu a0, a3, .Llt64
/* set 64 bytes at a time */
.Lloop64:
1: st.d zero, a0, 0
2: st.d zero, a0, 8
3: st.d zero, a0, 16
......@@ -68,24 +84,95 @@ SYM_FUNC_START(__clear_user_fast)
6: st.d zero, a0, 40
7: st.d zero, a0, 48
8: st.d zero, a0, 56
addi.d a0, a0, 64
addi.d a1, a1, -64
bge a1, a2, 1b
beqz a1, 10f
bltu a0, a3, .Lloop64
/* set the remaining bytes */
9: st.b zero, a0, 0
addi.d a0, a0, 1
addi.d a1, a1, -1
bgt a1, zero, 9b
.Llt64:
addi.d a3, a2, -32
bgeu a0, a3, .Llt32
9: st.d zero, a0, 0
10: st.d zero, a0, 8
11: st.d zero, a0, 16
12: st.d zero, a0, 24
addi.d a0, a0, 32
.Llt32:
addi.d a3, a2, -16
bgeu a0, a3, .Llt16
13: st.d zero, a0, 0
14: st.d zero, a0, 8
addi.d a0, a0, 16
.Llt16:
addi.d a3, a2, -8
bgeu a0, a3, .Llt8
15: st.d zero, a0, 0
.Llt8:
16: st.d zero, a2, -8
/* return */
10: move a0, a1
move a0, zero
jr ra
.align 4
.Lsmall:
pcaddi t0, 4
slli.d a2, a1, 4
add.d t0, t0, a2
jr t0
.align 4
move a0, zero
jr ra
.align 4
17: st.b zero, a0, 0
move a0, zero
jr ra
.align 4
18: st.h zero, a0, 0
move a0, zero
jr ra
.align 4
19: st.h zero, a0, 0
20: st.b zero, a0, 2
move a0, zero
jr ra
.align 4
21: st.w zero, a0, 0
move a0, zero
jr ra
.align 4
22: st.w zero, a0, 0
23: st.b zero, a0, 4
move a0, zero
jr ra
.align 4
24: st.w zero, a0, 0
25: st.h zero, a0, 4
move a0, zero
jr ra
.align 4
26: st.w zero, a0, 0
27: st.w zero, a0, 3
move a0, zero
jr ra
.align 4
28: st.d zero, a0, 0
move a0, zero
jr ra
/* fixup and ex_table */
_asm_extable 0b, .L_fixup_handle_0
_asm_extable 1b, .L_fixup_handle_0
_asm_extable 2b, .L_fixup_handle_1
_asm_extable 3b, .L_fixup_handle_2
......@@ -95,4 +182,23 @@ SYM_FUNC_START(__clear_user_fast)
_asm_extable 7b, .L_fixup_handle_6
_asm_extable 8b, .L_fixup_handle_7
_asm_extable 9b, .L_fixup_handle_0
_asm_extable 10b, .L_fixup_handle_1
_asm_extable 11b, .L_fixup_handle_2
_asm_extable 12b, .L_fixup_handle_3
_asm_extable 13b, .L_fixup_handle_0
_asm_extable 14b, .L_fixup_handle_1
_asm_extable 15b, .L_fixup_handle_0
_asm_extable 16b, .L_fixup_handle_1
_asm_extable 17b, .L_fixup_handle_s0
_asm_extable 18b, .L_fixup_handle_s0
_asm_extable 19b, .L_fixup_handle_s0
_asm_extable 20b, .L_fixup_handle_s2
_asm_extable 21b, .L_fixup_handle_s0
_asm_extable 22b, .L_fixup_handle_s0
_asm_extable 23b, .L_fixup_handle_s4
_asm_extable 24b, .L_fixup_handle_s0
_asm_extable 25b, .L_fixup_handle_s4
_asm_extable 26b, .L_fixup_handle_s0
_asm_extable 27b, .L_fixup_handle_s4
_asm_extable 28b, .L_fixup_handle_s0
SYM_FUNC_END(__clear_user_fast)
......@@ -13,7 +13,14 @@
.irp to, 0, 1, 2, 3, 4, 5, 6, 7
.L_fixup_handle_\to\():
addi.d a0, a2, (\to) * (-8)
sub.d a0, a2, a0
addi.d a0, a0, (\to) * (-8)
jr ra
.endr
.irp to, 0, 2, 4
.L_fixup_handle_s\to\():
addi.d a0, a2, -\to
jr ra
.endr
......@@ -47,8 +54,8 @@ SYM_FUNC_START(__copy_user_generic)
3: move a0, a2
jr ra
_asm_extable 1b, .L_fixup_handle_0
_asm_extable 2b, .L_fixup_handle_0
_asm_extable 1b, .L_fixup_handle_s0
_asm_extable 2b, .L_fixup_handle_s0
SYM_FUNC_END(__copy_user_generic)
/*
......@@ -59,65 +66,209 @@ SYM_FUNC_END(__copy_user_generic)
* a2: n
*/
SYM_FUNC_START(__copy_user_fast)
beqz a2, 19f
sltui t0, a2, 9
bnez t0, .Lsmall
ori a3, zero, 64
blt a2, a3, 17f
add.d a3, a1, a2
add.d a2, a0, a2
0: ld.d t0, a1, 0
1: st.d t0, a0, 0
/* copy 64 bytes at a time */
1: ld.d t0, a1, 0
2: ld.d t1, a1, 8
3: ld.d t2, a1, 16
4: ld.d t3, a1, 24
5: ld.d t4, a1, 32
6: ld.d t5, a1, 40
7: ld.d t6, a1, 48
8: ld.d t7, a1, 56
9: st.d t0, a0, 0
10: st.d t1, a0, 8
11: st.d t2, a0, 16
12: st.d t3, a0, 24
13: st.d t4, a0, 32
14: st.d t5, a0, 40
15: st.d t6, a0, 48
16: st.d t7, a0, 56
/* align up destination address */
andi t1, a0, 7
sub.d t0, zero, t1
addi.d t0, t0, 8
add.d a1, a1, t0
add.d a0, a0, t0
addi.d a0, a0, 64
addi.d a1, a1, 64
addi.d a2, a2, -64
bge a2, a3, 1b
addi.d a4, a3, -64
bgeu a1, a4, .Llt64
beqz a2, 19f
/* copy 64 bytes at a time */
.Lloop64:
2: ld.d t0, a1, 0
3: ld.d t1, a1, 8
4: ld.d t2, a1, 16
5: ld.d t3, a1, 24
6: ld.d t4, a1, 32
7: ld.d t5, a1, 40
8: ld.d t6, a1, 48
9: ld.d t7, a1, 56
addi.d a1, a1, 64
10: st.d t0, a0, 0
11: st.d t1, a0, 8
12: st.d t2, a0, 16
13: st.d t3, a0, 24
14: st.d t4, a0, 32
15: st.d t5, a0, 40
16: st.d t6, a0, 48
17: st.d t7, a0, 56
addi.d a0, a0, 64
bltu a1, a4, .Lloop64
/* copy the remaining bytes */
17: ld.b t0, a1, 0
18: st.b t0, a0, 0
addi.d a0, a0, 1
addi.d a1, a1, 1
addi.d a2, a2, -1
bgt a2, zero, 17b
.Llt64:
addi.d a4, a3, -32
bgeu a1, a4, .Llt32
18: ld.d t0, a1, 0
19: ld.d t1, a1, 8
20: ld.d t2, a1, 16
21: ld.d t3, a1, 24
addi.d a1, a1, 32
22: st.d t0, a0, 0
23: st.d t1, a0, 8
24: st.d t2, a0, 16
25: st.d t3, a0, 24
addi.d a0, a0, 32
.Llt32:
addi.d a4, a3, -16
bgeu a1, a4, .Llt16
26: ld.d t0, a1, 0
27: ld.d t1, a1, 8
addi.d a1, a1, 16
28: st.d t0, a0, 0
29: st.d t1, a0, 8
addi.d a0, a0, 16
.Llt16:
addi.d a4, a3, -8
bgeu a1, a4, .Llt8
30: ld.d t0, a1, 0
31: st.d t0, a0, 0
.Llt8:
32: ld.d t0, a3, -8
33: st.d t0, a2, -8
/* return */
19: move a0, a2
move a0, zero
jr ra
.align 5
.Lsmall:
pcaddi t0, 8
slli.d a3, a2, 5
add.d t0, t0, a3
jr t0
.align 5
move a0, zero
jr ra
.align 5
34: ld.b t0, a1, 0
35: st.b t0, a0, 0
move a0, zero
jr ra
.align 5
36: ld.h t0, a1, 0
37: st.h t0, a0, 0
move a0, zero
jr ra
.align 5
38: ld.h t0, a1, 0
39: ld.b t1, a1, 2
40: st.h t0, a0, 0
41: st.b t1, a0, 2
move a0, zero
jr ra
.align 5
42: ld.w t0, a1, 0
43: st.w t0, a0, 0
move a0, zero
jr ra
.align 5
44: ld.w t0, a1, 0
45: ld.b t1, a1, 4
46: st.w t0, a0, 0
47: st.b t1, a0, 4
move a0, zero
jr ra
.align 5
48: ld.w t0, a1, 0
49: ld.h t1, a1, 4
50: st.w t0, a0, 0
51: st.h t1, a0, 4
move a0, zero
jr ra
.align 5
52: ld.w t0, a1, 0
53: ld.w t1, a1, 3
54: st.w t0, a0, 0
55: st.w t1, a0, 3
move a0, zero
jr ra
.align 5
56: ld.d t0, a1, 0
57: st.d t0, a0, 0
move a0, zero
jr ra
/* fixup and ex_table */
_asm_extable 0b, .L_fixup_handle_0
_asm_extable 1b, .L_fixup_handle_0
_asm_extable 2b, .L_fixup_handle_1
_asm_extable 3b, .L_fixup_handle_2
_asm_extable 4b, .L_fixup_handle_3
_asm_extable 5b, .L_fixup_handle_4
_asm_extable 6b, .L_fixup_handle_5
_asm_extable 7b, .L_fixup_handle_6
_asm_extable 8b, .L_fixup_handle_7
_asm_extable 2b, .L_fixup_handle_0
_asm_extable 3b, .L_fixup_handle_0
_asm_extable 4b, .L_fixup_handle_0
_asm_extable 5b, .L_fixup_handle_0
_asm_extable 6b, .L_fixup_handle_0
_asm_extable 7b, .L_fixup_handle_0
_asm_extable 8b, .L_fixup_handle_0
_asm_extable 9b, .L_fixup_handle_0
_asm_extable 10b, .L_fixup_handle_1
_asm_extable 11b, .L_fixup_handle_2
_asm_extable 12b, .L_fixup_handle_3
_asm_extable 13b, .L_fixup_handle_4
_asm_extable 14b, .L_fixup_handle_5
_asm_extable 15b, .L_fixup_handle_6
_asm_extable 16b, .L_fixup_handle_7
_asm_extable 17b, .L_fixup_handle_0
_asm_extable 10b, .L_fixup_handle_0
_asm_extable 11b, .L_fixup_handle_1
_asm_extable 12b, .L_fixup_handle_2
_asm_extable 13b, .L_fixup_handle_3
_asm_extable 14b, .L_fixup_handle_4
_asm_extable 15b, .L_fixup_handle_5
_asm_extable 16b, .L_fixup_handle_6
_asm_extable 17b, .L_fixup_handle_7
_asm_extable 18b, .L_fixup_handle_0
_asm_extable 19b, .L_fixup_handle_0
_asm_extable 20b, .L_fixup_handle_0
_asm_extable 21b, .L_fixup_handle_0
_asm_extable 22b, .L_fixup_handle_0
_asm_extable 23b, .L_fixup_handle_1
_asm_extable 24b, .L_fixup_handle_2
_asm_extable 25b, .L_fixup_handle_3
_asm_extable 26b, .L_fixup_handle_0
_asm_extable 27b, .L_fixup_handle_0
_asm_extable 28b, .L_fixup_handle_0
_asm_extable 29b, .L_fixup_handle_1
_asm_extable 30b, .L_fixup_handle_0
_asm_extable 31b, .L_fixup_handle_0
_asm_extable 32b, .L_fixup_handle_0
_asm_extable 33b, .L_fixup_handle_1
_asm_extable 34b, .L_fixup_handle_s0
_asm_extable 35b, .L_fixup_handle_s0
_asm_extable 36b, .L_fixup_handle_s0
_asm_extable 37b, .L_fixup_handle_s0
_asm_extable 38b, .L_fixup_handle_s0
_asm_extable 39b, .L_fixup_handle_s0
_asm_extable 40b, .L_fixup_handle_s0
_asm_extable 41b, .L_fixup_handle_s2
_asm_extable 42b, .L_fixup_handle_s0
_asm_extable 43b, .L_fixup_handle_s0
_asm_extable 44b, .L_fixup_handle_s0
_asm_extable 45b, .L_fixup_handle_s0
_asm_extable 46b, .L_fixup_handle_s0
_asm_extable 47b, .L_fixup_handle_s4
_asm_extable 48b, .L_fixup_handle_s0
_asm_extable 49b, .L_fixup_handle_s0
_asm_extable 50b, .L_fixup_handle_s0
_asm_extable 51b, .L_fixup_handle_s4
_asm_extable 52b, .L_fixup_handle_s0
_asm_extable 53b, .L_fixup_handle_s0
_asm_extable 54b, .L_fixup_handle_s0
_asm_extable 55b, .L_fixup_handle_s4
_asm_extable 56b, .L_fixup_handle_s0
_asm_extable 57b, .L_fixup_handle_s0
SYM_FUNC_END(__copy_user_fast)
// SPDX-License-Identifier: GPL-2.0-only
// Copyright (C) 2019-2020 Arm Ltd.
#include <linux/compiler.h>
#include <linux/kasan-checks.h>
#include <linux/kernel.h>
#include <net/checksum.h>
static u64 accumulate(u64 sum, u64 data)
{
sum += data;
if (sum < data)
sum += 1;
return sum;
}
/*
* We over-read the buffer and this makes KASAN unhappy. Instead, disable
* instrumentation and call kasan explicitly.
*/
unsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len)
{
unsigned int offset, shift, sum;
const u64 *ptr;
u64 data, sum64 = 0;
if (unlikely(len == 0))
return 0;
offset = (unsigned long)buff & 7;
/*
* This is to all intents and purposes safe, since rounding down cannot
* result in a different page or cache line being accessed, and @buff
* should absolutely not be pointing to anything read-sensitive. We do,
* however, have to be careful not to piss off KASAN, which means using
* unchecked reads to accommodate the head and tail, for which we'll
* compensate with an explicit check up-front.
*/
kasan_check_read(buff, len);
ptr = (u64 *)(buff - offset);
len = len + offset - 8;
/*
* Head: zero out any excess leading bytes. Shifting back by the same
* amount should be at least as fast as any other way of handling the
* odd/even alignment, and means we can ignore it until the very end.
*/
shift = offset * 8;
data = *ptr++;
data = (data >> shift) << shift;
/*
* Body: straightforward aligned loads from here on (the paired loads
* underlying the quadword type still only need dword alignment). The
* main loop strictly excludes the tail, so the second loop will always
* run at least once.
*/
while (unlikely(len > 64)) {
__uint128_t tmp1, tmp2, tmp3, tmp4;
tmp1 = *(__uint128_t *)ptr;
tmp2 = *(__uint128_t *)(ptr + 2);
tmp3 = *(__uint128_t *)(ptr + 4);
tmp4 = *(__uint128_t *)(ptr + 6);
len -= 64;
ptr += 8;
/* This is the "don't dump the carry flag into a GPR" idiom */
tmp1 += (tmp1 >> 64) | (tmp1 << 64);
tmp2 += (tmp2 >> 64) | (tmp2 << 64);
tmp3 += (tmp3 >> 64) | (tmp3 << 64);
tmp4 += (tmp4 >> 64) | (tmp4 << 64);
tmp1 = ((tmp1 >> 64) << 64) | (tmp2 >> 64);
tmp1 += (tmp1 >> 64) | (tmp1 << 64);
tmp3 = ((tmp3 >> 64) << 64) | (tmp4 >> 64);
tmp3 += (tmp3 >> 64) | (tmp3 << 64);
tmp1 = ((tmp1 >> 64) << 64) | (tmp3 >> 64);
tmp1 += (tmp1 >> 64) | (tmp1 << 64);
tmp1 = ((tmp1 >> 64) << 64) | sum64;
tmp1 += (tmp1 >> 64) | (tmp1 << 64);
sum64 = tmp1 >> 64;
}
while (len > 8) {
__uint128_t tmp;
sum64 = accumulate(sum64, data);
tmp = *(__uint128_t *)ptr;
len -= 16;
ptr += 2;
data = tmp >> 64;
sum64 = accumulate(sum64, tmp);
}
if (len > 0) {
sum64 = accumulate(sum64, data);
data = *ptr;
len -= 8;
}
/*
* Tail: zero any over-read bytes similarly to the head, again
* preserving odd/even alignment.
*/
shift = len * -8;
data = (data << shift) >> shift;
sum64 = accumulate(sum64, data);
/* Finally, folding */
sum64 += (sum64 >> 32) | (sum64 << 32);
sum = sum64 >> 32;
sum += (sum >> 16) | (sum << 16);
if (offset & 1)
return (u16)swab32(sum);
return sum >> 16;
}
__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
const struct in6_addr *daddr,
__u32 len, __u8 proto, __wsum csum)
{
__uint128_t src, dst;
u64 sum = (__force u64)csum;
src = *(const __uint128_t *)saddr->s6_addr;
dst = *(const __uint128_t *)daddr->s6_addr;
sum += (__force u32)htonl(len);
sum += (u32)proto << 24;
src += (src >> 64) | (src << 64);
dst += (dst >> 64) | (dst << 64);
sum = accumulate(sum, src >> 64);
sum = accumulate(sum, dst >> 64);
sum += ((sum >> 32) | (sum << 32));
return csum_fold((__force __wsum)(sum >> 32));
}
EXPORT_SYMBOL(csum_ipv6_magic);
// SPDX-License-Identifier: GPL-2.0
#include <linux/error-injection.h>
#include <linux/kprobes.h>
void override_function_with_return(struct pt_regs *regs)
{
instruction_pointer_set(regs, regs->regs[1]);
}
NOKPROBE_SYMBOL(override_function_with_return);
......@@ -44,6 +44,66 @@ SYM_FUNC_START(__memcpy_generic)
SYM_FUNC_END(__memcpy_generic)
_ASM_NOKPROBE(__memcpy_generic)
.align 5
SYM_FUNC_START_NOALIGN(__memcpy_small)
pcaddi t0, 8
slli.d a2, a2, 5
add.d t0, t0, a2
jr t0
.align 5
0: jr ra
.align 5
1: ld.b t0, a1, 0
st.b t0, a0, 0
jr ra
.align 5
2: ld.h t0, a1, 0
st.h t0, a0, 0
jr ra
.align 5
3: ld.h t0, a1, 0
ld.b t1, a1, 2
st.h t0, a0, 0
st.b t1, a0, 2
jr ra
.align 5
4: ld.w t0, a1, 0
st.w t0, a0, 0
jr ra
.align 5
5: ld.w t0, a1, 0
ld.b t1, a1, 4
st.w t0, a0, 0
st.b t1, a0, 4
jr ra
.align 5
6: ld.w t0, a1, 0
ld.h t1, a1, 4
st.w t0, a0, 0
st.h t1, a0, 4
jr ra
.align 5
7: ld.w t0, a1, 0
ld.w t1, a1, 3
st.w t0, a0, 0
st.w t1, a0, 3
jr ra
.align 5
8: ld.d t0, a1, 0
st.d t0, a0, 0
jr ra
SYM_FUNC_END(__memcpy_small)
_ASM_NOKPROBE(__memcpy_small)
/*
* void *__memcpy_fast(void *dst, const void *src, size_t n)
*
......@@ -52,14 +112,27 @@ _ASM_NOKPROBE(__memcpy_generic)
* a2: n
*/
SYM_FUNC_START(__memcpy_fast)
move a3, a0
beqz a2, 3f
sltui t0, a2, 9
bnez t0, __memcpy_small
add.d a3, a1, a2
add.d a2, a0, a2
ld.d a6, a1, 0
ld.d a7, a3, -8
/* align up destination address */
andi t1, a0, 7
sub.d t0, zero, t1
addi.d t0, t0, 8
add.d a1, a1, t0
add.d a5, a0, t0
ori a4, zero, 64
blt a2, a4, 2f
addi.d a4, a3, -64
bgeu a1, a4, .Llt64
/* copy 64 bytes at a time */
1: ld.d t0, a1, 0
.Lloop64:
ld.d t0, a1, 0
ld.d t1, a1, 8
ld.d t2, a1, 16
ld.d t3, a1, 24
......@@ -67,32 +140,54 @@ SYM_FUNC_START(__memcpy_fast)
ld.d t5, a1, 40
ld.d t6, a1, 48
ld.d t7, a1, 56
st.d t0, a0, 0
st.d t1, a0, 8
st.d t2, a0, 16
st.d t3, a0, 24
st.d t4, a0, 32
st.d t5, a0, 40
st.d t6, a0, 48
st.d t7, a0, 56
addi.d a0, a0, 64
addi.d a1, a1, 64
addi.d a2, a2, -64
bge a2, a4, 1b
beqz a2, 3f
st.d t0, a5, 0
st.d t1, a5, 8
st.d t2, a5, 16
st.d t3, a5, 24
st.d t4, a5, 32
st.d t5, a5, 40
st.d t6, a5, 48
st.d t7, a5, 56
addi.d a5, a5, 64
bltu a1, a4, .Lloop64
/* copy the remaining bytes */
2: ld.b t0, a1, 0
st.b t0, a0, 0
addi.d a0, a0, 1
addi.d a1, a1, 1
addi.d a2, a2, -1
bgt a2, zero, 2b
.Llt64:
addi.d a4, a3, -32
bgeu a1, a4, .Llt32
ld.d t0, a1, 0
ld.d t1, a1, 8
ld.d t2, a1, 16
ld.d t3, a1, 24
addi.d a1, a1, 32
st.d t0, a5, 0
st.d t1, a5, 8
st.d t2, a5, 16
st.d t3, a5, 24
addi.d a5, a5, 32
.Llt32:
addi.d a4, a3, -16
bgeu a1, a4, .Llt16
ld.d t0, a1, 0
ld.d t1, a1, 8
addi.d a1, a1, 16
st.d t0, a5, 0
st.d t1, a5, 8
addi.d a5, a5, 16
.Llt16:
addi.d a4, a3, -8
bgeu a1, a4, .Llt8
ld.d t0, a1, 0
st.d t0, a5, 0
.Llt8:
st.d a6, a0, 0
st.d a7, a2, -8
/* return */
3: move a0, a3
jr ra
SYM_FUNC_END(__memcpy_fast)
_ASM_NOKPROBE(__memcpy_fast)
......@@ -11,23 +11,9 @@
#include <asm/regdef.h>
SYM_FUNC_START(memmove)
blt a0, a1, 1f /* dst < src, memcpy */
blt a1, a0, 3f /* src < dst, rmemcpy */
blt a0, a1, memcpy /* dst < src, memcpy */
blt a1, a0, rmemcpy /* src < dst, rmemcpy */
jr ra /* dst == src, return */
/* if (src - dst) < 64, copy 1 byte at a time */
1: ori a3, zero, 64
sub.d t0, a1, a0
blt t0, a3, 2f
b memcpy
2: b __memcpy_generic
/* if (dst - src) < 64, copy 1 byte at a time */
3: ori a3, zero, 64
sub.d t0, a0, a1
blt t0, a3, 4f
b rmemcpy
4: b __rmemcpy_generic
SYM_FUNC_END(memmove)
_ASM_NOKPROBE(memmove)
......@@ -76,50 +62,80 @@ _ASM_NOKPROBE(__rmemcpy_generic)
* a2: n
*/
SYM_FUNC_START(__rmemcpy_fast)
move a3, a0
beqz a2, 3f
sltui t0, a2, 9
bnez t0, __memcpy_small
add.d a0, a0, a2
add.d a1, a1, a2
add.d a3, a1, a2
add.d a2, a0, a2
ld.d a6, a1, 0
ld.d a7, a3, -8
/* align up destination address */
andi t1, a2, 7
sub.d a3, a3, t1
sub.d a5, a2, t1
ori a4, zero, 64
blt a2, a4, 2f
addi.d a4, a1, 64
bgeu a4, a3, .Llt64
/* copy 64 bytes at a time */
1: ld.d t0, a1, -8
ld.d t1, a1, -16
ld.d t2, a1, -24
ld.d t3, a1, -32
ld.d t4, a1, -40
ld.d t5, a1, -48
ld.d t6, a1, -56
ld.d t7, a1, -64
st.d t0, a0, -8
st.d t1, a0, -16
st.d t2, a0, -24
st.d t3, a0, -32
st.d t4, a0, -40
st.d t5, a0, -48
st.d t6, a0, -56
st.d t7, a0, -64
addi.d a0, a0, -64
addi.d a1, a1, -64
addi.d a2, a2, -64
bge a2, a4, 1b
beqz a2, 3f
.Lloop64:
ld.d t0, a3, -8
ld.d t1, a3, -16
ld.d t2, a3, -24
ld.d t3, a3, -32
ld.d t4, a3, -40
ld.d t5, a3, -48
ld.d t6, a3, -56
ld.d t7, a3, -64
addi.d a3, a3, -64
st.d t0, a5, -8
st.d t1, a5, -16
st.d t2, a5, -24
st.d t3, a5, -32
st.d t4, a5, -40
st.d t5, a5, -48
st.d t6, a5, -56
st.d t7, a5, -64
addi.d a5, a5, -64
bltu a4, a3, .Lloop64
/* copy the remaining bytes */
2: ld.b t0, a1, -1
st.b t0, a0, -1
addi.d a0, a0, -1
addi.d a1, a1, -1
addi.d a2, a2, -1
bgt a2, zero, 2b
.Llt64:
addi.d a4, a1, 32
bgeu a4, a3, .Llt32
ld.d t0, a3, -8
ld.d t1, a3, -16
ld.d t2, a3, -24
ld.d t3, a3, -32
addi.d a3, a3, -32
st.d t0, a5, -8
st.d t1, a5, -16
st.d t2, a5, -24
st.d t3, a5, -32
addi.d a5, a5, -32
.Llt32:
addi.d a4, a1, 16
bgeu a4, a3, .Llt16
ld.d t0, a3, -8
ld.d t1, a3, -16
addi.d a3, a3, -16
st.d t0, a5, -8
st.d t1, a5, -16
addi.d a5, a5, -16
.Llt16:
addi.d a4, a1, 8
bgeu a4, a3, .Llt8
ld.d t0, a3, -8
st.d t0, a5, -8
.Llt8:
st.d a6, a0, 0
st.d a7, a2, -8
/* return */
3: move a0, a3
jr ra
SYM_FUNC_END(__rmemcpy_fast)
_ASM_NOKPROBE(__rmemcpy_fast)
......@@ -56,39 +56,107 @@ _ASM_NOKPROBE(__memset_generic)
* a2: n
*/
SYM_FUNC_START(__memset_fast)
move a3, a0
beqz a2, 3f
ori a4, zero, 64
blt a2, a4, 2f
/* fill a1 to 64 bits */
fill_to_64 a1
/* set 64 bytes at a time */
1: st.d a1, a0, 0
st.d a1, a0, 8
st.d a1, a0, 16
st.d a1, a0, 24
st.d a1, a0, 32
st.d a1, a0, 40
st.d a1, a0, 48
st.d a1, a0, 56
sltui t0, a2, 9
bnez t0, .Lsmall
addi.d a0, a0, 64
addi.d a2, a2, -64
bge a2, a4, 1b
add.d a2, a0, a2
st.d a1, a0, 0
beqz a2, 3f
/* align up address */
addi.d a3, a0, 8
bstrins.d a3, zero, 2, 0
addi.d a4, a2, -64
bgeu a3, a4, .Llt64
/* set 64 bytes at a time */
.Lloop64:
st.d a1, a3, 0
st.d a1, a3, 8
st.d a1, a3, 16
st.d a1, a3, 24
st.d a1, a3, 32
st.d a1, a3, 40
st.d a1, a3, 48
st.d a1, a3, 56
addi.d a3, a3, 64
bltu a3, a4, .Lloop64
/* set the remaining bytes */
2: st.b a1, a0, 0
addi.d a0, a0, 1
addi.d a2, a2, -1
bgt a2, zero, 2b
.Llt64:
addi.d a4, a2, -32
bgeu a3, a4, .Llt32
st.d a1, a3, 0
st.d a1, a3, 8
st.d a1, a3, 16
st.d a1, a3, 24
addi.d a3, a3, 32
.Llt32:
addi.d a4, a2, -16
bgeu a3, a4, .Llt16
st.d a1, a3, 0
st.d a1, a3, 8
addi.d a3, a3, 16
.Llt16:
addi.d a4, a2, -8
bgeu a3, a4, .Llt8
st.d a1, a3, 0
.Llt8:
st.d a1, a2, -8
/* return */
3: move a0, a3
jr ra
.align 4
.Lsmall:
pcaddi t0, 4
slli.d a2, a2, 4
add.d t0, t0, a2
jr t0
.align 4
0: jr ra
.align 4
1: st.b a1, a0, 0
jr ra
.align 4
2: st.h a1, a0, 0
jr ra
.align 4
3: st.h a1, a0, 0
st.b a1, a0, 2
jr ra
.align 4
4: st.w a1, a0, 0
jr ra
.align 4
5: st.w a1, a0, 0
st.b a1, a0, 4
jr ra
.align 4
6: st.w a1, a0, 0
st.h a1, a0, 4
jr ra
.align 4
7: st.w a1, a0, 0
st.w a1, a0, 3
jr ra
.align 4
8: st.d a1, a0, 0
jr ra
SYM_FUNC_END(__memset_fast)
_ASM_NOKPROBE(__memset_fast)
......@@ -1395,6 +1395,9 @@ endif
if ARM64
source "arch/arm64/crypto/Kconfig"
endif
if LOONGARCH
source "arch/loongarch/crypto/Kconfig"
endif
if MIPS
source "arch/mips/crypto/Kconfig"
endif
......
......@@ -96,6 +96,40 @@ asm (
#endif /* CONFIG_S390 */
#ifdef CONFIG_LOONGARCH
asm (
" .pushsection .text, \"ax\", @progbits\n"
" .type my_tramp1, @function\n"
" .globl my_tramp1\n"
" my_tramp1:\n"
" addi.d $sp, $sp, -16\n"
" st.d $t0, $sp, 0\n"
" st.d $ra, $sp, 8\n"
" bl my_direct_func1\n"
" ld.d $t0, $sp, 0\n"
" ld.d $ra, $sp, 8\n"
" addi.d $sp, $sp, 16\n"
" jr $t0\n"
" .size my_tramp1, .-my_tramp1\n"
" .type my_tramp2, @function\n"
" .globl my_tramp2\n"
" my_tramp2:\n"
" addi.d $sp, $sp, -16\n"
" st.d $t0, $sp, 0\n"
" st.d $ra, $sp, 8\n"
" bl my_direct_func2\n"
" ld.d $t0, $sp, 0\n"
" ld.d $ra, $sp, 8\n"
" addi.d $sp, $sp, 16\n"
" jr $t0\n"
" .size my_tramp2, .-my_tramp2\n"
" .popsection\n"
);
#endif /* CONFIG_LOONGARCH */
static struct ftrace_ops direct;
static unsigned long my_tramp = (unsigned long)my_tramp1;
......
......@@ -103,6 +103,47 @@ asm (
#endif /* CONFIG_S390 */
#ifdef CONFIG_LOONGARCH
#include <asm/asm.h>
asm (
" .pushsection .text, \"ax\", @progbits\n"
" .type my_tramp1, @function\n"
" .globl my_tramp1\n"
" my_tramp1:\n"
" addi.d $sp, $sp, -32\n"
" st.d $a0, $sp, 0\n"
" st.d $t0, $sp, 8\n"
" st.d $ra, $sp, 16\n"
" move $a0, $t0\n"
" bl my_direct_func1\n"
" ld.d $a0, $sp, 0\n"
" ld.d $t0, $sp, 8\n"
" ld.d $ra, $sp, 16\n"
" addi.d $sp, $sp, 32\n"
" jr $t0\n"
" .size my_tramp1, .-my_tramp1\n"
" .type my_tramp2, @function\n"
" .globl my_tramp2\n"
" my_tramp2:\n"
" addi.d $sp, $sp, -32\n"
" st.d $a0, $sp, 0\n"
" st.d $t0, $sp, 8\n"
" st.d $ra, $sp, 16\n"
" move $a0, $t0\n"
" bl my_direct_func2\n"
" ld.d $a0, $sp, 0\n"
" ld.d $t0, $sp, 8\n"
" ld.d $ra, $sp, 16\n"
" addi.d $sp, $sp, 32\n"
" jr $t0\n"
" .size my_tramp2, .-my_tramp2\n"
" .popsection\n"
);
#endif /* CONFIG_LOONGARCH */
static unsigned long my_tramp = (unsigned long)my_tramp1;
static unsigned long tramps[2] = {
(unsigned long)my_tramp1,
......
......@@ -66,6 +66,31 @@ asm (
#endif /* CONFIG_S390 */
#ifdef CONFIG_LOONGARCH
#include <asm/asm.h>
asm (
" .pushsection .text, \"ax\", @progbits\n"
" .type my_tramp, @function\n"
" .globl my_tramp\n"
" my_tramp:\n"
" addi.d $sp, $sp, -32\n"
" st.d $a0, $sp, 0\n"
" st.d $t0, $sp, 8\n"
" st.d $ra, $sp, 16\n"
" move $a0, $t0\n"
" bl my_direct_func\n"
" ld.d $a0, $sp, 0\n"
" ld.d $t0, $sp, 8\n"
" ld.d $ra, $sp, 16\n"
" addi.d $sp, $sp, 32\n"
" jr $t0\n"
" .size my_tramp, .-my_tramp\n"
" .popsection\n"
);
#endif /* CONFIG_LOONGARCH */
static struct ftrace_ops direct;
static int __init ftrace_direct_multi_init(void)
......
......@@ -70,6 +70,33 @@ asm (
#endif /* CONFIG_S390 */
#ifdef CONFIG_LOONGARCH
asm (
" .pushsection .text, \"ax\", @progbits\n"
" .type my_tramp, @function\n"
" .globl my_tramp\n"
" my_tramp:\n"
" addi.d $sp, $sp, -48\n"
" st.d $a0, $sp, 0\n"
" st.d $a1, $sp, 8\n"
" st.d $a2, $sp, 16\n"
" st.d $t0, $sp, 24\n"
" st.d $ra, $sp, 32\n"
" bl my_direct_func\n"
" ld.d $a0, $sp, 0\n"
" ld.d $a1, $sp, 8\n"
" ld.d $a2, $sp, 16\n"
" ld.d $t0, $sp, 24\n"
" ld.d $ra, $sp, 32\n"
" addi.d $sp, $sp, 48\n"
" jr $t0\n"
" .size my_tramp, .-my_tramp\n"
" .popsection\n"
);
#endif /* CONFIG_LOONGARCH */
static struct ftrace_ops direct;
static int __init ftrace_direct_init(void)
......
......@@ -63,6 +63,29 @@ asm (
#endif /* CONFIG_S390 */
#ifdef CONFIG_LOONGARCH
asm (
" .pushsection .text, \"ax\", @progbits\n"
" .type my_tramp, @function\n"
" .globl my_tramp\n"
" my_tramp:\n"
" addi.d $sp, $sp, -32\n"
" st.d $a0, $sp, 0\n"
" st.d $t0, $sp, 8\n"
" st.d $ra, $sp, 16\n"
" bl my_direct_func\n"
" ld.d $a0, $sp, 0\n"
" ld.d $t0, $sp, 8\n"
" ld.d $ra, $sp, 16\n"
" addi.d $sp, $sp, 32\n"
" jr $t0\n"
" .size my_tramp, .-my_tramp\n"
" .popsection\n"
);
#endif /* CONFIG_LOONGARCH */
static struct ftrace_ops direct;
static int __init ftrace_direct_init(void)
......
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_LOONGARCH_PERF_REGS_H
#define _ASM_LOONGARCH_PERF_REGS_H
enum perf_event_loongarch_regs {
PERF_REG_LOONGARCH_PC,
PERF_REG_LOONGARCH_R1,
PERF_REG_LOONGARCH_R2,
PERF_REG_LOONGARCH_R3,
PERF_REG_LOONGARCH_R4,
PERF_REG_LOONGARCH_R5,
PERF_REG_LOONGARCH_R6,
PERF_REG_LOONGARCH_R7,
PERF_REG_LOONGARCH_R8,
PERF_REG_LOONGARCH_R9,
PERF_REG_LOONGARCH_R10,
PERF_REG_LOONGARCH_R11,
PERF_REG_LOONGARCH_R12,
PERF_REG_LOONGARCH_R13,
PERF_REG_LOONGARCH_R14,
PERF_REG_LOONGARCH_R15,
PERF_REG_LOONGARCH_R16,
PERF_REG_LOONGARCH_R17,
PERF_REG_LOONGARCH_R18,
PERF_REG_LOONGARCH_R19,
PERF_REG_LOONGARCH_R20,
PERF_REG_LOONGARCH_R21,
PERF_REG_LOONGARCH_R22,
PERF_REG_LOONGARCH_R23,
PERF_REG_LOONGARCH_R24,
PERF_REG_LOONGARCH_R25,
PERF_REG_LOONGARCH_R26,
PERF_REG_LOONGARCH_R27,
PERF_REG_LOONGARCH_R28,
PERF_REG_LOONGARCH_R29,
PERF_REG_LOONGARCH_R30,
PERF_REG_LOONGARCH_R31,
PERF_REG_LOONGARCH_MAX,
};
#endif /* _ASM_LOONGARCH_PERF_REGS_H */
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Copyright (C) 2020-2023 Loongson Technology Corporation Limited
*/
#define __ARCH_WANT_SYS_CLONE
#define __ARCH_WANT_SYS_CLONE3
#include <asm-generic/unistd.h>
......@@ -38,7 +38,7 @@ ifneq ($(NO_SYSCALL_TABLE),1)
NO_SYSCALL_TABLE := 0
endif
else
ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390 mips))
ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390 mips loongarch))
NO_SYSCALL_TABLE := 0
endif
endif
......@@ -80,6 +80,12 @@ ifeq ($(SRCARCH),arm64)
LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
endif
ifeq ($(SRCARCH),loongarch)
NO_PERF_REGS := 0
CFLAGS += -I$(OUTPUT)arch/loongarch/include/generated
LIBUNWIND_LIBS = -lunwind -lunwind-loongarch64
endif
ifeq ($(SRCARCH),riscv)
NO_PERF_REGS := 0
endif
......@@ -107,7 +113,7 @@ endif
# Disable it on all other architectures in case libdw unwind
# support is detected in system. Add supported architectures
# to the check.
ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc s390 csky riscv))
ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc s390 csky riscv loongarch))
NO_LIBDW_DWARF_UNWIND := 1
endif
......@@ -129,7 +135,7 @@ endef
ifdef LIBUNWIND_DIR
LIBUNWIND_CFLAGS = -I$(LIBUNWIND_DIR)/include
LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib
LIBUNWIND_ARCHS = x86 x86_64 arm aarch64 debug-frame-arm debug-frame-aarch64
LIBUNWIND_ARCHS = x86 x86_64 arm aarch64 debug-frame-arm debug-frame-aarch64 loongarch
$(foreach libunwind_arch,$(LIBUNWIND_ARCHS),$(call libunwind_arch_set_flags,$(libunwind_arch)))
endif
......
# SPDX-License-Identifier: GPL-2.0
ifndef NO_DWARF
PERF_HAVE_DWARF_REGS := 1
endif
PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
PERF_HAVE_JITDUMP := 1
#
# Syscall table generation for perf
#
out := $(OUTPUT)arch/loongarch/include/generated/asm
header := $(out)/syscalls.c
incpath := $(srctree)/tools
sysdef := $(srctree)/tools/arch/loongarch/include/uapi/asm/unistd.h
sysprf := $(srctree)/tools/perf/arch/loongarch/entry/syscalls/
systbl := $(sysprf)/mksyscalltbl
# Create output directory if not already present
_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
$(header): $(sysdef) $(systbl)
$(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(incpath) $(sysdef) > $@
clean::
$(call QUIET_CLEAN, loongarch) $(RM) $(header)
archheaders: $(header)
// SPDX-License-Identifier: GPL-2.0
/*
* Perf annotate functions.
*
* Copyright (C) 2020-2023 Loongson Technology Corporation Limited
*/
static
struct ins_ops *loongarch__associate_ins_ops(struct arch *arch, const char *name)
{
struct ins_ops *ops = NULL;
if (!strncmp(name, "beqz", 4) ||
!strncmp(name, "bnez", 4) ||
!strncmp(name, "beq", 3) ||
!strncmp(name, "bne", 3) ||
!strncmp(name, "blt", 3) ||
!strncmp(name, "bge", 3) ||
!strncmp(name, "bltu", 4) ||
!strncmp(name, "bgeu", 4) ||
!strncmp(name, "bl", 2))
ops = &call_ops;
else if (!strncmp(name, "jirl", 4))
ops = &ret_ops;
else if (name[0] == 'b')
ops = &jump_ops;
else
return NULL;
arch__associate_ins_ops(arch, name, ops);
return ops;
}
static
int loongarch__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
{
if (!arch->initialized) {
arch->associate_instruction_ops = loongarch__associate_ins_ops;
arch->initialized = true;
arch->objdump.comment_char = '#';
}
return 0;
}
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
#
# Generate system call table for perf. Derived from
# powerpc script.
#
# Author(s): Ming Wang <wangming01@loongson.cn>
# Author(s): Huacai Chen <chenhuacai@loongson.cn>
# Copyright (C) 2020-2023 Loongson Technology Corporation Limited
gcc=$1
hostcc=$2
incpath=$3
input=$4
if ! test -r $input; then
echo "Could not read input file" >&2
exit 1
fi
create_table_from_c()
{
local sc nr last_sc
create_table_exe=`mktemp ${TMPDIR:-/tmp}/create-table-XXXXXX`
{
cat <<-_EoHEADER
#include <stdio.h>
#include "$input"
int main(int argc, char *argv[])
{
_EoHEADER
while read sc nr; do
printf "%s\n" " printf(\"\\t[%d] = \\\"$sc\\\",\\n\", $nr);"
last_sc=$nr
done
printf "%s\n" " printf(\"#define SYSCALLTBL_LOONGARCH_MAX_ID %d\\n\", $last_sc);"
printf "}\n"
} | $hostcc -I $incpath/include/uapi -o $create_table_exe -x c -
$create_table_exe
rm -f $create_table_exe
}
create_table()
{
echo "static const char *syscalltbl_loongarch[] = {"
create_table_from_c
echo "};"
}
$gcc -E -dM -x c -I $incpath/include/uapi $input \
|sed -ne 's/^#define __NR_//p' \
|sort -t' ' -k2 -n \
|create_table
/* SPDX-License-Identifier: GPL-2.0 */
/*
* dwarf-regs-table.h : Mapping of DWARF debug register numbers into
* register names.
*
* Copyright (C) 2020-2023 Loongson Technology Corporation Limited
*/
#ifdef DEFINE_DWARF_REGSTR_TABLE
static const char * const loongarch_regstr_tbl[] = {
"%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
"%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
"%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
"%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
};
#endif
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef ARCH_PERF_REGS_H
#define ARCH_PERF_REGS_H
#include <stdlib.h>
#include <linux/types.h>
#include <asm/perf_regs.h>
#define PERF_REGS_MAX PERF_REG_LOONGARCH_MAX
#define PERF_REG_IP PERF_REG_LOONGARCH_PC
#define PERF_REG_SP PERF_REG_LOONGARCH_R3
#define PERF_REGS_MASK ((1ULL << PERF_REG_LOONGARCH_MAX) - 1)
#endif /* ARCH_PERF_REGS_H */
perf-y += perf_regs.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
// SPDX-License-Identifier: GPL-2.0
/*
* dwarf-regs.c : Mapping of DWARF debug register numbers into register names.
*
* Copyright (C) 2020-2023 Loongson Technology Corporation Limited
*/
#include <stdio.h>
#include <errno.h> /* for EINVAL */
#include <string.h> /* for strcmp */
#include <dwarf-regs.h>
struct pt_regs_dwarfnum {
const char *name;
unsigned int dwarfnum;
};
static struct pt_regs_dwarfnum loongarch_gpr_table[] = {
{"%r0", 0}, {"%r1", 1}, {"%r2", 2}, {"%r3", 3},
{"%r4", 4}, {"%r5", 5}, {"%r6", 6}, {"%r7", 7},
{"%r8", 8}, {"%r9", 9}, {"%r10", 10}, {"%r11", 11},
{"%r12", 12}, {"%r13", 13}, {"%r14", 14}, {"%r15", 15},
{"%r16", 16}, {"%r17", 17}, {"%r18", 18}, {"%r19", 19},
{"%r20", 20}, {"%r21", 21}, {"%r22", 22}, {"%r23", 23},
{"%r24", 24}, {"%r25", 25}, {"%r26", 26}, {"%r27", 27},
{"%r28", 28}, {"%r29", 29}, {"%r30", 30}, {"%r31", 31},
{NULL, 0}
};
const char *get_arch_regstr(unsigned int n)
{
n %= 32;
return loongarch_gpr_table[n].name;
}
int regs_query_register_offset(const char *name)
{
const struct pt_regs_dwarfnum *roff;
for (roff = loongarch_gpr_table; roff->name != NULL; roff++)
if (!strcmp(roff->name, name))
return roff->dwarfnum;
return -EINVAL;
}
// SPDX-License-Identifier: GPL-2.0
#include "../../../util/perf_regs.h"
const struct sample_reg sample_reg_masks[] = {
SMPL_REG_END
};
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2020-2023 Loongson Technology Corporation Limited */
#include <elfutils/libdwfl.h>
#include "../../util/unwind-libdw.h"
#include "../../util/perf_regs.h"
#include "../../util/sample.h"
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
struct unwind_info *ui = arg;
struct regs_dump *user_regs = &ui->sample->user_regs;
Dwarf_Word dwarf_regs[PERF_REG_LOONGARCH_MAX];
#define REG(r) ({ \
Dwarf_Word val = 0; \
perf_reg_value(&val, user_regs, PERF_REG_LOONGARCH_##r); \
val; \
})
dwarf_regs[0] = 0;
dwarf_regs[1] = REG(R1);
dwarf_regs[2] = REG(R2);
dwarf_regs[3] = REG(R3);
dwarf_regs[4] = REG(R4);
dwarf_regs[5] = REG(R5);
dwarf_regs[6] = REG(R6);
dwarf_regs[7] = REG(R7);
dwarf_regs[8] = REG(R8);
dwarf_regs[9] = REG(R9);
dwarf_regs[10] = REG(R10);
dwarf_regs[11] = REG(R11);
dwarf_regs[12] = REG(R12);
dwarf_regs[13] = REG(R13);
dwarf_regs[14] = REG(R14);
dwarf_regs[15] = REG(R15);
dwarf_regs[16] = REG(R16);
dwarf_regs[17] = REG(R17);
dwarf_regs[18] = REG(R18);
dwarf_regs[19] = REG(R19);
dwarf_regs[20] = REG(R20);
dwarf_regs[21] = REG(R21);
dwarf_regs[22] = REG(R22);
dwarf_regs[23] = REG(R23);
dwarf_regs[24] = REG(R24);
dwarf_regs[25] = REG(R25);
dwarf_regs[26] = REG(R26);
dwarf_regs[27] = REG(R27);
dwarf_regs[28] = REG(R28);
dwarf_regs[29] = REG(R29);
dwarf_regs[30] = REG(R30);
dwarf_regs[31] = REG(R31);
dwfl_thread_state_register_pc(thread, REG(PC));
return dwfl_thread_state_registers(thread, 0, PERF_REG_LOONGARCH_MAX, dwarf_regs);
}
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
#include <libunwind.h>
#include "perf_regs.h"
#include "../../util/unwind.h"
#include "util/debug.h"
int libunwind__arch_reg_id(int regnum)
{
switch (regnum) {
case UNW_LOONGARCH64_R1:
return PERF_REG_LOONGARCH_R1;
case UNW_LOONGARCH64_R2:
return PERF_REG_LOONGARCH_R2;
case UNW_LOONGARCH64_R3:
return PERF_REG_LOONGARCH_R3;
case UNW_LOONGARCH64_R4:
return PERF_REG_LOONGARCH_R4;
case UNW_LOONGARCH64_R5:
return PERF_REG_LOONGARCH_R5;
case UNW_LOONGARCH64_R6:
return PERF_REG_LOONGARCH_R6;
case UNW_LOONGARCH64_R7:
return PERF_REG_LOONGARCH_R7;
case UNW_LOONGARCH64_R8:
return PERF_REG_LOONGARCH_R8;
case UNW_LOONGARCH64_R9:
return PERF_REG_LOONGARCH_R9;
case UNW_LOONGARCH64_R10:
return PERF_REG_LOONGARCH_R10;
case UNW_LOONGARCH64_R11:
return PERF_REG_LOONGARCH_R11;
case UNW_LOONGARCH64_R12:
return PERF_REG_LOONGARCH_R12;
case UNW_LOONGARCH64_R13:
return PERF_REG_LOONGARCH_R13;
case UNW_LOONGARCH64_R14:
return PERF_REG_LOONGARCH_R14;
case UNW_LOONGARCH64_R15:
return PERF_REG_LOONGARCH_R15;
case UNW_LOONGARCH64_R16:
return PERF_REG_LOONGARCH_R16;
case UNW_LOONGARCH64_R17:
return PERF_REG_LOONGARCH_R17;
case UNW_LOONGARCH64_R18:
return PERF_REG_LOONGARCH_R18;
case UNW_LOONGARCH64_R19:
return PERF_REG_LOONGARCH_R19;
case UNW_LOONGARCH64_R20:
return PERF_REG_LOONGARCH_R20;
case UNW_LOONGARCH64_R21:
return PERF_REG_LOONGARCH_R21;
case UNW_LOONGARCH64_R22:
return PERF_REG_LOONGARCH_R22;
case UNW_LOONGARCH64_R23:
return PERF_REG_LOONGARCH_R23;
case UNW_LOONGARCH64_R24:
return PERF_REG_LOONGARCH_R24;
case UNW_LOONGARCH64_R25:
return PERF_REG_LOONGARCH_R25;
case UNW_LOONGARCH64_R26:
return PERF_REG_LOONGARCH_R26;
case UNW_LOONGARCH64_R27:
return PERF_REG_LOONGARCH_R27;
case UNW_LOONGARCH64_R28:
return PERF_REG_LOONGARCH_R28;
case UNW_LOONGARCH64_R29:
return PERF_REG_LOONGARCH_R29;
case UNW_LOONGARCH64_R30:
return PERF_REG_LOONGARCH_R30;
case UNW_LOONGARCH64_R31:
return PERF_REG_LOONGARCH_R31;
case UNW_LOONGARCH64_PC:
return PERF_REG_LOONGARCH_PC;
default:
pr_err("unwind: invalid reg id %d\n", regnum);
return -EINVAL;
}
return -EINVAL;
}
......@@ -40,6 +40,7 @@ arch/x86/lib/x86-opcode-map.txt
arch/x86/tools/gen-insn-attr-x86.awk
arch/arm/include/uapi/asm/perf_regs.h
arch/arm64/include/uapi/asm/perf_regs.h
arch/loongarch/include/uapi/asm/perf_regs.h
arch/mips/include/uapi/asm/perf_regs.h
arch/powerpc/include/uapi/asm/perf_regs.h
arch/s390/include/uapi/asm/perf_regs.h
......
......@@ -149,6 +149,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i
#include "arch/arm/annotate/instructions.c"
#include "arch/arm64/annotate/instructions.c"
#include "arch/csky/annotate/instructions.c"
#include "arch/loongarch/annotate/instructions.c"
#include "arch/mips/annotate/instructions.c"
#include "arch/x86/annotate/instructions.c"
#include "arch/powerpc/annotate/instructions.c"
......@@ -211,6 +212,13 @@ static struct arch architectures[] = {
.comment_char = '#',
},
},
{
.name = "loongarch",
.init = loongarch__annotate_init,
.objdump = {
.comment_char = '#',
},
},
};
static void ins__delete(struct ins_operands *ops)
......
......@@ -14,6 +14,10 @@
#define EM_AARCH64 183 /* ARM 64 bit */
#endif
#ifndef EM_LOONGARCH
#define EM_LOONGARCH 258 /* LoongArch */
#endif
/* Define const char * {arch}_register_tbl[] */
#define DEFINE_DWARF_REGSTR_TABLE
#include "../arch/x86/include/dwarf-regs-table.h"
......@@ -25,6 +29,7 @@
#include "../arch/sparc/include/dwarf-regs-table.h"
#include "../arch/xtensa/include/dwarf-regs-table.h"
#include "../arch/mips/include/dwarf-regs-table.h"
#include "../arch/loongarch/include/dwarf-regs-table.h"
#define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL)
......@@ -56,6 +61,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine)
return __get_dwarf_regstr(xtensa_regstr_tbl, n);
case EM_MIPS:
return __get_dwarf_regstr(mips_regstr_tbl, n);
case EM_LOONGARCH:
return __get_dwarf_regstr(loongarch_regstr_tbl, n);
default:
pr_err("ELF MACHINE %x is not supported.\n", machine);
}
......
......@@ -435,6 +435,8 @@ static const char *normalize_arch(char *arch)
return "mips";
if (!strncmp(arch, "sh", 2) && isdigit(arch[2]))
return "sh";
if (!strncmp(arch, "loongarch", 9))
return "loongarch";
return arch;
}
......
......@@ -43,6 +43,9 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
#elif defined(__riscv) && __riscv_xlen == 64
#define GEN_ELF_ARCH EM_RISCV
#define GEN_ELF_CLASS ELFCLASS64
#elif defined(__loongarch__)
#define GEN_ELF_ARCH EM_LOONGARCH
#define GEN_ELF_CLASS ELFCLASS64
#else
#error "unsupported architecture"
#endif
......
......@@ -28,6 +28,7 @@ uint64_t __weak arch__user_reg_mask(void)
#include "../../arch/arm/include/uapi/asm/perf_regs.h"
#include "../../arch/csky/include/uapi/asm/perf_regs.h"
#include "../../arch/loongarch/include/uapi/asm/perf_regs.h"
#include "../../arch/mips/include/uapi/asm/perf_regs.h"
#include "../../arch/powerpc/include/uapi/asm/perf_regs.h"
#include "../../arch/riscv/include/uapi/asm/perf_regs.h"
......@@ -236,6 +237,79 @@ static const char *__perf_reg_name_csky(int id)
return NULL;
}
static inline const char *__perf_reg_name_loongarch(int id)
{
switch (id) {
case PERF_REG_LOONGARCH_PC:
return "PC";
case PERF_REG_LOONGARCH_R1:
return "%r1";
case PERF_REG_LOONGARCH_R2:
return "%r2";
case PERF_REG_LOONGARCH_R3:
return "%r3";
case PERF_REG_LOONGARCH_R4:
return "%r4";
case PERF_REG_LOONGARCH_R5:
return "%r5";
case PERF_REG_LOONGARCH_R6:
return "%r6";
case PERF_REG_LOONGARCH_R7:
return "%r7";
case PERF_REG_LOONGARCH_R8:
return "%r8";
case PERF_REG_LOONGARCH_R9:
return "%r9";
case PERF_REG_LOONGARCH_R10:
return "%r10";
case PERF_REG_LOONGARCH_R11:
return "%r11";
case PERF_REG_LOONGARCH_R12:
return "%r12";
case PERF_REG_LOONGARCH_R13:
return "%r13";
case PERF_REG_LOONGARCH_R14:
return "%r14";
case PERF_REG_LOONGARCH_R15:
return "%r15";
case PERF_REG_LOONGARCH_R16:
return "%r16";
case PERF_REG_LOONGARCH_R17:
return "%r17";
case PERF_REG_LOONGARCH_R18:
return "%r18";
case PERF_REG_LOONGARCH_R19:
return "%r19";
case PERF_REG_LOONGARCH_R20:
return "%r20";
case PERF_REG_LOONGARCH_R21:
return "%r21";
case PERF_REG_LOONGARCH_R22:
return "%r22";
case PERF_REG_LOONGARCH_R23:
return "%r23";
case PERF_REG_LOONGARCH_R24:
return "%r24";
case PERF_REG_LOONGARCH_R25:
return "%r25";
case PERF_REG_LOONGARCH_R26:
return "%r26";
case PERF_REG_LOONGARCH_R27:
return "%r27";
case PERF_REG_LOONGARCH_R28:
return "%r28";
case PERF_REG_LOONGARCH_R29:
return "%r29";
case PERF_REG_LOONGARCH_R30:
return "%r30";
case PERF_REG_LOONGARCH_R31:
return "%r31";
default:
break;
}
return NULL;
}
static const char *__perf_reg_name_mips(int id)
{
switch (id) {
......@@ -670,6 +744,8 @@ const char *perf_reg_name(int id, const char *arch)
if (!strcmp(arch, "csky"))
reg_name = __perf_reg_name_csky(id);
else if (!strcmp(arch, "loongarch"))
reg_name = __perf_reg_name_loongarch(id);
else if (!strcmp(arch, "mips"))
reg_name = __perf_reg_name_mips(id);
else if (!strcmp(arch, "powerpc"))
......
......@@ -38,6 +38,10 @@ static const char **syscalltbl_native = syscalltbl_arm64;
#include <asm/syscalls_n64.c>
const int syscalltbl_native_max_id = SYSCALLTBL_MIPS_N64_MAX_ID;
static const char **syscalltbl_native = syscalltbl_mips_n64;
#elif defined(__loongarch__)
#include <asm/syscalls.c>
const int syscalltbl_native_max_id = SYSCALLTBL_LOONGARCH_MAX_ID;
static const char **syscalltbl_native = syscalltbl_loongarch;
#endif
struct syscall {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment