Commit 1519018c authored by Catalin Marinas's avatar Catalin Marinas

Merge branches 'for-next/sve-remove-pseudo-regs', 'for-next/backtrace-ipi',...

Merge branches 'for-next/sve-remove-pseudo-regs', 'for-next/backtrace-ipi', 'for-next/kselftest', 'for-next/misc' and 'for-next/cpufeat-display-cores', remote-tracking branch 'arm64/for-next/perf' into for-next/core

* arm64/for-next/perf:
  perf: hisi: Fix use-after-free when register pmu fails
  drivers/perf: hisi_pcie: Initialize event->cpu only on success
  drivers/perf: hisi_pcie: Check the type first in pmu::event_init()
  perf/arm-cmn: Enable per-DTC counter allocation
  perf/arm-cmn: Rework DTC counters (again)
  perf/arm-cmn: Fix DTC domain detection
  drivers: perf: arm_pmuv3: Drop some unused arguments from armv8_pmu_init()
  drivers: perf: arm_pmuv3: Read PMMIR_EL1 unconditionally
  drivers/perf: hisi: use cpuhp_state_remove_instance_nocalls() for hisi_hns3_pmu uninit process
  drivers/perf: xgene: Use device_get_match_data()
  perf/amlogic: add missing MODULE_DEVICE_TABLE
  docs/perf: Add ampere_cspmu to toctree to fix a build warning
  perf: arm_cspmu: ampere_cspmu: Add support for Ampere SoC PMU
  perf: arm_cspmu: Support implementation specific validation
  perf: arm_cspmu: Support implementation specific filters
  perf: arm_cspmu: Split 64-bit write to 32-bit writes
  perf: arm_cspmu: Separate Arm and vendor module

* for-next/sve-remove-pseudo-regs:
  : arm64/fpsimd: Remove the vector length pseudo registers
  arm64/sve: Remove SMCR pseudo register from cpufeature code
  arm64/sve: Remove ZCR pseudo register from cpufeature code

* for-next/backtrace-ipi:
  : Add IPI for backtraces/kgdb, use NMI
  arm64: smp: Don't directly call arch_smp_send_reschedule() for wakeup
  arm64: smp: avoid NMI IPIs with broken MediaTek FW
  arm64: smp: Mark IPI globals as __ro_after_init
  arm64: kgdb: Implement kgdb_roundup_cpus() to enable pseudo-NMI roundup
  arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI
  arm64: smp: Add arch support for backtrace using pseudo-NMI
  arm64: smp: Remove dedicated wakeup IPI
  arm64: idle: Tag the arm64 idle functions as __cpuidle
  irqchip/gic-v3: Enable support for SGIs to act as NMIs

* for-next/kselftest:
  : Various arm64 kselftest updates
  kselftest/arm64: Validate SVCR in streaming SVE stress test

* for-next/misc:
  : Miscellaneous patches
  arm64: Restrict CPU_BIG_ENDIAN to GNU as or LLVM IAS 15.x or newer
  arm64: module: Fix PLT counting when CONFIG_RANDOMIZE_BASE=n
  arm64, irqchip/gic-v3, ACPI: Move MADT GICC enabled check into a helper
  clocksource/drivers/arm_arch_timer: limit XGene-1 workaround
  arm64: Remove system_uses_lse_atomics()
  arm64: Mark the 'addr' argument to set_ptes() and __set_pte_at() as unused
  arm64/mm: Hoist synchronization out of set_ptes() loop
  arm64: swiotlb: Reduce the default size if no ZONE_DMA bouncing needed

* for-next/cpufeat-display-cores:
  : arm64 cpufeature display enabled cores
  arm64: cpufeature: Change DBM to display enabled cores
  arm64: cpufeature: Display the set of cores with a feature
......@@ -1355,6 +1355,8 @@ choice
config CPU_BIG_ENDIAN
bool "Build big-endian kernel"
depends on !LD_IS_LLD || LLD_VERSION >= 130000
# https://github.com/llvm/llvm-project/commit/1379b150991f70a5782e9a143c2ba5308da1161c
depends on AS_IS_GNU || AS_VERSION >= 150000
help
Say Y if you plan on running a kernel with a big-endian userspace.
......
......@@ -63,12 +63,6 @@ struct cpuinfo_arm64 {
u64 reg_id_aa64smfr0;
struct cpuinfo_32bit aarch32;
/* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */
u64 reg_zcr;
/* pseudo-SMCR for recording maximum SMCR_EL1 LEN value: */
u64 reg_smcr;
};
DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
......
......@@ -23,6 +23,7 @@
#include <linux/bug.h>
#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/cpumask.h>
/*
* CPU feature register tracking
......@@ -380,6 +381,7 @@ struct arm64_cpu_capabilities {
* method is robust against being called multiple times.
*/
const struct arm64_cpu_capabilities *match_list;
const struct cpumask *cpus;
};
static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap)
......
......@@ -85,7 +85,8 @@
#define ARM_CPU_PART_NEOVERSE_N2 0xD49
#define ARM_CPU_PART_CORTEX_A78C 0xD4B
#define APM_CPU_PART_POTENZA 0x000
#define APM_CPU_PART_XGENE 0x000
#define APM_CPU_VAR_POTENZA 0x00
#define CAVIUM_CPU_PART_THUNDERX 0x0A1
#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2
......
......@@ -128,7 +128,6 @@ extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern void sme2_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern u64 read_zcr_features(void);
extern u64 read_smcr_features(void);
/*
......
......@@ -6,6 +6,9 @@
#include <asm-generic/irq.h>
void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu);
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
struct pt_regs;
int set_handle_irq(void (*handle_irq)(struct pt_regs *));
......
......@@ -16,14 +16,9 @@
#include <asm/atomic_lse.h>
#include <asm/cpucaps.h>
static __always_inline bool system_uses_lse_atomics(void)
{
return alternative_has_cap_likely(ARM64_HAS_LSE_ATOMICS);
}
#define __lse_ll_sc_body(op, ...) \
({ \
system_uses_lse_atomics() ? \
alternative_has_cap_likely(ARM64_HAS_LSE_ATOMICS) ? \
__lse_##op(__VA_ARGS__) : \
__ll_sc_##op(__VA_ARGS__); \
})
......@@ -34,8 +29,6 @@ static __always_inline bool system_uses_lse_atomics(void)
#else /* CONFIG_ARM64_LSE_ATOMICS */
static inline bool system_uses_lse_atomics(void) { return false; }
#define __lse_ll_sc_body(op, ...) __ll_sc_##op(__VA_ARGS__)
#define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc
......
......@@ -90,7 +90,7 @@ static inline bool try_page_mte_tagging(struct page *page)
}
void mte_zero_clear_page_tags(void *addr);
void mte_sync_tags(pte_t pte);
void mte_sync_tags(pte_t pte, unsigned int nr_pages);
void mte_copy_page_tags(void *kto, const void *kfrom);
void mte_thread_init_user(void);
void mte_thread_switch(struct task_struct *next);
......@@ -122,7 +122,7 @@ static inline bool try_page_mte_tagging(struct page *page)
static inline void mte_zero_clear_page_tags(void *addr)
{
}
static inline void mte_sync_tags(pte_t pte)
static inline void mte_sync_tags(pte_t pte, unsigned int nr_pages)
{
}
static inline void mte_copy_page_tags(void *kto, const void *kfrom)
......
......@@ -325,8 +325,7 @@ static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep,
__func__, pte_val(old_pte), pte_val(pte));
}
static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages)
{
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
__sync_icache_dcache(pte);
......@@ -339,24 +338,22 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
*/
if (system_supports_mte() && pte_access_permitted(pte, false) &&
!pte_special(pte) && pte_tagged(pte))
mte_sync_tags(pte);
__check_safe_pte_update(mm, ptep, pte);
set_pte(ptep, pte);
mte_sync_tags(pte, nr_pages);
}
static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, unsigned int nr)
static inline void set_ptes(struct mm_struct *mm,
unsigned long __always_unused addr,
pte_t *ptep, pte_t pte, unsigned int nr)
{
page_table_check_ptes_set(mm, ptep, pte, nr);
__sync_cache_and_tags(pte, nr);
for (;;) {
__set_pte_at(mm, addr, ptep, pte);
__check_safe_pte_update(mm, ptep, pte);
set_pte(ptep, pte);
if (--nr == 0)
break;
ptep++;
addr += PAGE_SIZE;
pte_val(pte) += PAGE_SIZE;
}
}
......@@ -531,18 +528,29 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
#define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
#define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
static inline void __set_pte_at(struct mm_struct *mm,
unsigned long __always_unused addr,
pte_t *ptep, pte_t pte, unsigned int nr)
{
__sync_cache_and_tags(pte, nr);
__check_safe_pte_update(mm, ptep, pte);
set_pte(ptep, pte);
}
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
page_table_check_pmd_set(mm, pmdp, pmd);
return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd),
PMD_SIZE >> PAGE_SHIFT);
}
static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
pud_t *pudp, pud_t pud)
{
page_table_check_pud_set(mm, pudp, pud);
return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud));
return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud),
PUD_SIZE >> PAGE_SHIFT);
}
#define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d))
......
......@@ -89,9 +89,9 @@ extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
extern void arch_send_wakeup_ipi(unsigned int cpu);
#else
static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
static inline void arch_send_wakeup_ipi(unsigned int cpu)
{
BUILD_BUG();
}
......
......@@ -103,7 +103,7 @@ static int acpi_parking_protocol_cpu_boot(unsigned int cpu)
&mailbox->entry_point);
writel_relaxed(cpu_entry->gic_cpu_id, &mailbox->cpu_id);
arch_send_wakeup_ipi_mask(cpumask_of(cpu));
arch_send_wakeup_ipi(cpu);
return 0;
}
......
......@@ -611,18 +611,6 @@ static const struct arm64_ftr_bits ftr_id_dfr1[] = {
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_zcr[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE,
ZCR_ELx_LEN_SHIFT, ZCR_ELx_LEN_WIDTH, 0), /* LEN */
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_smcr[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE,
SMCR_ELx_LEN_SHIFT, SMCR_ELx_LEN_WIDTH, 0), /* LEN */
ARM64_FTR_END,
};
/*
* Common ftr bits for a 32bit register with all hidden, strict
* attributes, with 4bit feature fields and a default safe value of
......@@ -735,10 +723,6 @@ static const struct __ftr_reg_entry {
ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3),
/* Op1 = 0, CRn = 1, CRm = 2 */
ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr),
ARM64_FTR_REG(SYS_SMCR_EL1, ftr_smcr),
/* Op1 = 1, CRn = 0, CRm = 0 */
ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid),
......@@ -1040,21 +1024,20 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
if (IS_ENABLED(CONFIG_ARM64_SVE) &&
id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
info->reg_zcr = read_zcr_features();
init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
sve_kernel_enable(NULL);
vec_init_vq_map(ARM64_VEC_SVE);
}
if (IS_ENABLED(CONFIG_ARM64_SME) &&
id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
info->reg_smcr = read_smcr_features();
sme_kernel_enable(NULL);
/*
* We mask out SMPS since even if the hardware
* supports priorities the kernel does not at present
* and we block access to them.
*/
info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
init_cpu_ftr_reg(SYS_SMCR_EL1, info->reg_smcr);
vec_init_vq_map(ARM64_VEC_SME);
}
......@@ -1289,28 +1272,25 @@ void update_cpu_features(int cpu,
taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu,
info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0);
/* Probe vector lengths */
if (IS_ENABLED(CONFIG_ARM64_SVE) &&
id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
info->reg_zcr = read_zcr_features();
taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu,
info->reg_zcr, boot->reg_zcr);
/* Probe vector lengths */
if (!system_capabilities_finalized())
if (!system_capabilities_finalized()) {
sve_kernel_enable(NULL);
vec_update_vq_map(ARM64_VEC_SVE);
}
}
if (IS_ENABLED(CONFIG_ARM64_SME) &&
id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
info->reg_smcr = read_smcr_features();
sme_kernel_enable(NULL);
/*
* We mask out SMPS since even if the hardware
* supports priorities the kernel does not at present
* and we block access to them.
*/
info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
taint |= check_update_ftr_reg(SYS_SMCR_EL1, cpu,
info->reg_smcr, boot->reg_smcr);
/* Probe vector lengths */
if (!system_capabilities_finalized())
......@@ -1848,6 +1828,8 @@ static int __init parse_kpti(char *str)
early_param("kpti", parse_kpti);
#ifdef CONFIG_ARM64_HW_AFDBM
static struct cpumask dbm_cpus __read_mostly;
static inline void __cpu_enable_hw_dbm(void)
{
u64 tcr = read_sysreg(tcr_el1) | TCR_HD;
......@@ -1883,35 +1865,22 @@ static bool cpu_can_use_dbm(const struct arm64_cpu_capabilities *cap)
static void cpu_enable_hw_dbm(struct arm64_cpu_capabilities const *cap)
{
if (cpu_can_use_dbm(cap))
if (cpu_can_use_dbm(cap)) {
__cpu_enable_hw_dbm();
cpumask_set_cpu(smp_processor_id(), &dbm_cpus);
}
}
static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap,
int __unused)
{
static bool detected = false;
/*
* DBM is a non-conflicting feature. i.e, the kernel can safely
* run a mix of CPUs with and without the feature. So, we
* unconditionally enable the capability to allow any late CPU
* to use the feature. We only enable the control bits on the
* CPU, if it actually supports.
*
* We have to make sure we print the "feature" detection only
* when at least one CPU actually uses it. So check if this CPU
* can actually use it and print the message exactly once.
*
* This is safe as all CPUs (including secondary CPUs - due to the
* LOCAL_CPU scope - and the hotplugged CPUs - via verification)
* goes through the "matches" check exactly once. Also if a CPU
* matches the criteria, it is guaranteed that the CPU will turn
* the DBM on, as the capability is unconditionally enabled.
* CPU, if it is supported.
*/
if (!detected && cpu_can_use_dbm(cap)) {
detected = true;
pr_info("detected: Hardware dirty bit management\n");
}
return true;
}
......@@ -1944,8 +1913,6 @@ int get_cpu_with_amu_feat(void)
static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap)
{
if (has_cpuid_feature(cap, SCOPE_LOCAL_CPU)) {
pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n",
smp_processor_id());
cpumask_set_cpu(smp_processor_id(), &amu_cpus);
/* 0 reference values signal broken/disabled counters */
......@@ -2405,16 +2372,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
#endif /* CONFIG_ARM64_RAS_EXTN */
#ifdef CONFIG_ARM64_AMU_EXTN
{
/*
* The feature is enabled by default if CONFIG_ARM64_AMU_EXTN=y.
* Therefore, don't provide .desc as we don't want the detection
* message to be shown until at least one CPU is detected to
* support the feature.
*/
.desc = "Activity Monitors Unit (AMU)",
.capability = ARM64_HAS_AMU_EXTN,
.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
.matches = has_amu,
.cpu_enable = cpu_amu_enable,
.cpus = &amu_cpus,
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, AMU, IMP)
},
#endif /* CONFIG_ARM64_AMU_EXTN */
......@@ -2454,18 +2417,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
},
#ifdef CONFIG_ARM64_HW_AFDBM
{
/*
* Since we turn this on always, we don't want the user to
* think that the feature is available when it may not be.
* So hide the description.
*
* .desc = "Hardware pagetable Dirty Bit Management",
*
*/
.desc = "Hardware dirty bit management",
.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
.capability = ARM64_HW_DBM,
.matches = has_hw_dbm,
.cpu_enable = cpu_enable_hw_dbm,
.cpus = &dbm_cpus,
ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, DBM)
},
#endif
......@@ -2981,7 +2938,7 @@ static void update_cpu_capabilities(u16 scope_mask)
!caps->matches(caps, cpucap_default_scope(caps)))
continue;
if (caps->desc)
if (caps->desc && !caps->cpus)
pr_info("detected: %s\n", caps->desc);
__set_bit(caps->capability, system_cpucaps);
......@@ -3153,36 +3110,20 @@ static void verify_local_elf_hwcaps(void)
static void verify_sve_features(void)
{
u64 safe_zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
u64 zcr = read_zcr_features();
unsigned int safe_len = safe_zcr & ZCR_ELx_LEN_MASK;
unsigned int len = zcr & ZCR_ELx_LEN_MASK;
if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SVE)) {
if (vec_verify_vq_map(ARM64_VEC_SVE)) {
pr_crit("CPU%d: SVE: vector length support mismatch\n",
smp_processor_id());
cpu_die_early();
}
/* Add checks on other ZCR bits here if necessary */
}
static void verify_sme_features(void)
{
u64 safe_smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1);
u64 smcr = read_smcr_features();
unsigned int safe_len = safe_smcr & SMCR_ELx_LEN_MASK;
unsigned int len = smcr & SMCR_ELx_LEN_MASK;
if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SME)) {
if (vec_verify_vq_map(ARM64_VEC_SME)) {
pr_crit("CPU%d: SME: vector length support mismatch\n",
smp_processor_id());
cpu_die_early();
}
/* Add checks on other SMCR bits here if necessary */
}
static void verify_hyp_capabilities(void)
......@@ -3330,6 +3271,7 @@ unsigned long cpu_get_elf_hwcap2(void)
static void __init setup_system_capabilities(void)
{
int i;
/*
* We have finalised the system-wide safe feature
* registers, finalise the capabilities that depend
......@@ -3338,6 +3280,15 @@ static void __init setup_system_capabilities(void)
*/
update_cpu_capabilities(SCOPE_SYSTEM);
enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU);
for (i = 0; i < ARM64_NCAPS; i++) {
const struct arm64_cpu_capabilities *caps = cpucap_ptrs[i];
if (caps && caps->cpus && caps->desc &&
cpumask_any(caps->cpus) < nr_cpu_ids)
pr_info("detected: %s on CPU%*pbl\n",
caps->desc, cpumask_pr_args(caps->cpus));
}
}
void __init setup_cpu_features(void)
......
......@@ -1170,32 +1170,12 @@ void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
isb();
}
/*
* Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
* vector length.
*
* Use only if SVE is present.
* This function clobbers the SVE vector length.
*/
u64 read_zcr_features(void)
{
/*
* Set the maximum possible VL, and write zeroes to all other
* bits to see if they stick.
*/
sve_kernel_enable(NULL);
write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
/* Return LEN value that would be written to get the maximum VL */
return sve_vq_from_vl(sve_get_vl()) - 1;
}
void __init sve_setup(void)
{
struct vl_info *info = &vl_info[ARM64_VEC_SVE];
u64 zcr;
DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
unsigned long b;
int max_bit;
if (!system_supports_sve())
return;
......@@ -1208,17 +1188,8 @@ void __init sve_setup(void)
if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map)))
set_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map);
zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
info->max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1);
/*
* Sanity-check that the max VL we determined through CPU features
* corresponds properly to sve_vq_map. If not, do our best:
*/
if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SVE,
info->max_vl)))
info->max_vl = find_supported_vector_length(ARM64_VEC_SVE,
info->max_vl);
max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);
info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit));
/*
* For the default VL, pick the maximum supported value <= 64.
......@@ -1333,32 +1304,10 @@ void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
SYS_SMCR_EL1);
}
/*
* Read the pseudo-SMCR used by cpufeatures to identify the supported
* vector length.
*
* Use only if SME is present.
* This function clobbers the SME vector length.
*/
u64 read_smcr_features(void)
{
sme_kernel_enable(NULL);
/*
* Set the maximum possible VL.
*/
write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK,
SYS_SMCR_EL1);
/* Return LEN value that would be written to get the maximum VL */
return sve_vq_from_vl(sme_get_vl()) - 1;
}
void __init sme_setup(void)
{
struct vl_info *info = &vl_info[ARM64_VEC_SME];
u64 smcr;
int min_bit;
int min_bit, max_bit;
if (!system_supports_sme())
return;
......@@ -1367,24 +1316,16 @@ void __init sme_setup(void)
* SME doesn't require any particular vector length be
* supported but it does require at least one. We should have
* disabled the feature entirely while bringing up CPUs but
* let's double check here.
* let's double check here. The bitmap is SVE_VQ_MAP sized for
* sharing with SVE.
*/
WARN_ON(bitmap_empty(info->vq_map, SVE_VQ_MAX));
min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX);
info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit));
smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1);
info->max_vl = sve_vl_from_vq((smcr & SMCR_ELx_LEN_MASK) + 1);
/*
* Sanity-check that the max VL we determined through CPU features
* corresponds properly to sme_vq_map. If not, do our best:
*/
if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SME,
info->max_vl)))
info->max_vl = find_supported_vector_length(ARM64_VEC_SME,
info->max_vl);
max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);
info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit));
WARN_ON(info->min_vl > info->max_vl);
......
......@@ -20,7 +20,7 @@
* ensure that interrupts are not masked at the PMR (because the core will
* not wake up if we block the wake up signal in the interrupt controller).
*/
void noinstr cpu_do_idle(void)
void __cpuidle cpu_do_idle(void)
{
struct arm_cpuidle_irq_context context;
......@@ -35,7 +35,7 @@ void noinstr cpu_do_idle(void)
/*
* This is our default idle handler.
*/
void noinstr arch_cpu_idle(void)
void __cpuidle arch_cpu_idle(void)
{
/*
* This should do all the clock switching and wait for interrupt
......
......@@ -167,9 +167,6 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
switch (ELF64_R_TYPE(rela[i].r_info)) {
case R_AARCH64_JUMP26:
case R_AARCH64_CALL26:
if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
break;
/*
* We only have to consider branch targets that resolve
* to symbols that are defined in a different section.
......@@ -269,9 +266,6 @@ static int partition_branch_plt_relas(Elf64_Sym *syms, Elf64_Rela *rela,
{
int i = 0, j = numrels - 1;
if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
return 0;
while (i < j) {
if (branch_rela_needs_plt(syms, &rela[i], dstidx))
i++;
......
......@@ -35,10 +35,10 @@ DEFINE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode);
#endif
void mte_sync_tags(pte_t pte)
void mte_sync_tags(pte_t pte, unsigned int nr_pages)
{
struct page *page = pte_page(pte);
long i, nr_pages = compound_nr(page);
unsigned int i;
/* if PG_mte_tagged is set, tags have already been initialised */
for (i = 0; i < nr_pages; i++, page++) {
......
......@@ -32,7 +32,9 @@
#include <linux/irq_work.h>
#include <linux/kernel_stat.h>
#include <linux/kexec.h>
#include <linux/kgdb.h>
#include <linux/kvm_host.h>
#include <linux/nmi.h>
#include <asm/alternative.h>
#include <asm/atomic.h>
......@@ -72,13 +74,19 @@ enum ipi_msg_type {
IPI_CPU_CRASH_STOP,
IPI_TIMER,
IPI_IRQ_WORK,
IPI_WAKEUP,
NR_IPI
NR_IPI,
/*
* Any enum >= NR_IPI and < MAX_IPI is special and not tracable
* with trace_ipi_*
*/
IPI_CPU_BACKTRACE = NR_IPI,
IPI_KGDB_ROUNDUP,
MAX_IPI
};
static int ipi_irq_base __read_mostly;
static int nr_ipi __read_mostly = NR_IPI;
static struct irq_desc *ipi_desc[NR_IPI] __read_mostly;
static int ipi_irq_base __ro_after_init;
static int nr_ipi __ro_after_init = NR_IPI;
static struct irq_desc *ipi_desc[MAX_IPI] __ro_after_init;
static void ipi_setup(int cpu);
......@@ -520,7 +528,7 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
{
u64 hwid = processor->arm_mpidr;
if (!(processor->flags & ACPI_MADT_ENABLED)) {
if (!acpi_gicc_is_usable(processor)) {
pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid);
return;
}
......@@ -764,7 +772,6 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
[IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts",
[IPI_TIMER] = "Timer broadcast interrupts",
[IPI_IRQ_WORK] = "IRQ work interrupts",
[IPI_WAKEUP] = "CPU wake-up interrupts",
};
static void smp_cross_call(const struct cpumask *target, unsigned int ipinr);
......@@ -797,13 +804,6 @@ void arch_send_call_function_single_ipi(int cpu)
smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC);
}
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
{
smp_cross_call(mask, IPI_WAKEUP);
}
#endif
#ifdef CONFIG_IRQ_WORK
void arch_irq_work_raise(void)
{
......@@ -854,6 +854,38 @@ static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs
#endif
}
static void arm64_backtrace_ipi(cpumask_t *mask)
{
__ipi_send_mask(ipi_desc[IPI_CPU_BACKTRACE], mask);
}
void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
{
/*
* NOTE: though nmi_trigger_cpumask_backtrace() has "nmi_" in the name,
* nothing about it truly needs to be implemented using an NMI, it's
* just that it's _allowed_ to work with NMIs. If ipi_should_be_nmi()
* returned false our backtrace attempt will just use a regular IPI.
*/
nmi_trigger_cpumask_backtrace(mask, exclude_cpu, arm64_backtrace_ipi);
}
#ifdef CONFIG_KGDB
void kgdb_roundup_cpus(void)
{
int this_cpu = raw_smp_processor_id();
int cpu;
for_each_online_cpu(cpu) {
/* No need to roundup ourselves */
if (cpu == this_cpu)
continue;
__ipi_send_single(ipi_desc[IPI_KGDB_ROUNDUP], cpu);
}
}
#endif
/*
* Main handler for inter-processor interrupts
*/
......@@ -897,13 +929,17 @@ static void do_handle_IPI(int ipinr)
break;
#endif
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
case IPI_WAKEUP:
WARN_ONCE(!acpi_parking_protocol_valid(cpu),
"CPU%u: Wake-up IPI outside the ACPI parking protocol\n",
cpu);
case IPI_CPU_BACKTRACE:
/*
* NOTE: in some cases this _won't_ be NMI context. See the
* comment in arch_trigger_cpumask_backtrace().
*/
nmi_cpu_backtrace(get_irq_regs());
break;
case IPI_KGDB_ROUNDUP:
kgdb_nmicallback(cpu, get_irq_regs());
break;
#endif
default:
pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
......@@ -926,6 +962,25 @@ static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
__ipi_send_mask(ipi_desc[ipinr], target);
}
static bool ipi_should_be_nmi(enum ipi_msg_type ipi)
{
DECLARE_STATIC_KEY_FALSE(supports_pseudo_nmis);
if (!system_uses_irq_prio_masking() ||
!static_branch_likely(&supports_pseudo_nmis))
return false;
switch (ipi) {
case IPI_CPU_STOP:
case IPI_CPU_CRASH_STOP:
case IPI_CPU_BACKTRACE:
case IPI_KGDB_ROUNDUP:
return true;
default:
return false;
}
}
static void ipi_setup(int cpu)
{
int i;
......@@ -933,8 +988,14 @@ static void ipi_setup(int cpu)
if (WARN_ON_ONCE(!ipi_irq_base))
return;
for (i = 0; i < nr_ipi; i++)
enable_percpu_irq(ipi_irq_base + i, 0);
for (i = 0; i < nr_ipi; i++) {
if (ipi_should_be_nmi(i)) {
prepare_percpu_nmi(ipi_irq_base + i);
enable_percpu_nmi(ipi_irq_base + i, 0);
} else {
enable_percpu_irq(ipi_irq_base + i, 0);
}
}
}
#ifdef CONFIG_HOTPLUG_CPU
......@@ -945,8 +1006,14 @@ static void ipi_teardown(int cpu)
if (WARN_ON_ONCE(!ipi_irq_base))
return;
for (i = 0; i < nr_ipi; i++)
disable_percpu_irq(ipi_irq_base + i);
for (i = 0; i < nr_ipi; i++) {
if (ipi_should_be_nmi(i)) {
disable_percpu_nmi(ipi_irq_base + i);
teardown_percpu_nmi(ipi_irq_base + i);
} else {
disable_percpu_irq(ipi_irq_base + i);
}
}
}
#endif
......@@ -954,15 +1021,23 @@ void __init set_smp_ipi_range(int ipi_base, int n)
{
int i;
WARN_ON(n < NR_IPI);
nr_ipi = min(n, NR_IPI);
WARN_ON(n < MAX_IPI);
nr_ipi = min(n, MAX_IPI);
for (i = 0; i < nr_ipi; i++) {
int err;
err = request_percpu_irq(ipi_base + i, ipi_handler,
"IPI", &cpu_number);
WARN_ON(err);
if (ipi_should_be_nmi(i)) {
err = request_percpu_nmi(ipi_base + i, ipi_handler,
"IPI", &cpu_number);
WARN(err, "Could not request IPI %d as NMI, err=%d\n",
i, err);
} else {
err = request_percpu_irq(ipi_base + i, ipi_handler,
"IPI", &cpu_number);
WARN(err, "Could not request IPI %d as IRQ, err=%d\n",
i, err);
}
ipi_desc[i] = irq_to_desc(ipi_base + i);
irq_set_status_flags(ipi_base + i, IRQ_HIDDEN);
......@@ -979,6 +1054,17 @@ void arch_smp_send_reschedule(int cpu)
smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE);
}
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
void arch_send_wakeup_ipi(unsigned int cpu)
{
/*
* We use a scheduler IPI to wake the CPU as this avoids the need for a
* dedicated IPI and we can safely handle spurious scheduler IPIs.
*/
smp_send_reschedule(cpu);
}
#endif
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
void tick_broadcast(const struct cpumask *mask)
{
......
......@@ -874,7 +874,7 @@ u32 __attribute_const__ kvm_target_cpu(void)
break;
case ARM_CPU_IMP_APM:
switch (part_number) {
case APM_CPU_PART_POTENZA:
case APM_CPU_PART_XGENE:
return KVM_ARM_TARGET_XGENE_POTENZA;
}
break;
......
......@@ -16,6 +16,7 @@
#include <linux/nodemask.h>
#include <linux/initrd.h>
#include <linux/gfp.h>
#include <linux/math.h>
#include <linux/memblock.h>
#include <linux/sort.h>
#include <linux/of.h>
......@@ -493,8 +494,16 @@ void __init mem_init(void)
{
bool swiotlb = max_pfn > PFN_DOWN(arm64_dma_phys_limit);
if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC))
if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && !swiotlb) {
/*
* If no bouncing needed for ZONE_DMA, reduce the swiotlb
* buffer for kmalloc() bouncing to 1MB per 1GB of RAM.
*/
unsigned long size =
DIV_ROUND_UP(memblock_phys_mem_size(), 1024);
swiotlb_adjust_size(min(swiotlb_size_or_default(), size));
swiotlb = true;
}
swiotlb_init(swiotlb, SWIOTLB_VERBOSE);
......
......@@ -90,7 +90,7 @@ static int map_gicc_mpidr(struct acpi_subtable_header *entry,
struct acpi_madt_generic_interrupt *gicc =
container_of(entry, struct acpi_madt_generic_interrupt, header);
if (!(gicc->flags & ACPI_MADT_ENABLED))
if (!acpi_gicc_is_usable(gicc))
return -ENODEV;
/* device_declaration means Device object in DSDT, in the
......
......@@ -836,8 +836,9 @@ static u64 __arch_timer_check_delta(void)
* Note that TVAL is signed, thus has only 31 of its
* 32 bits to express magnitude.
*/
MIDR_ALL_VERSIONS(MIDR_CPU_MODEL(ARM_CPU_IMP_APM,
APM_CPU_PART_POTENZA)),
MIDR_REV_RANGE(MIDR_CPU_MODEL(ARM_CPU_IMP_APM,
APM_CPU_PART_XGENE),
APM_CPU_VAR_POTENZA, 0x0, 0xf),
{},
};
......
......@@ -78,6 +78,13 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
#define GIC_LINE_NR min(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U)
#define GIC_ESPI_NR GICD_TYPER_ESPIS(gic_data.rdists.gicd_typer)
/*
* There are 16 SGIs, though we only actually use 8 in Linux. The other 8 SGIs
* are potentially stolen by the secure side. Some code, especially code dealing
* with hwirq IDs, is simplified by accounting for all 16.
*/
#define SGI_NR 16
/*
* The behaviours of RPR and PMR registers differ depending on the value of
* SCR_EL3.FIQ, and the behaviour of non-secure priority registers of the
......@@ -99,7 +106,7 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
* - Figure 4-7 Secure read of the priority field for a Non-secure Group 1
* interrupt.
*/
static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis);
DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis);
DEFINE_STATIC_KEY_FALSE(gic_nonsecure_priorities);
EXPORT_SYMBOL(gic_nonsecure_priorities);
......@@ -125,8 +132,8 @@ EXPORT_SYMBOL(gic_nonsecure_priorities);
__priority; \
})
/* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */
static refcount_t *ppi_nmi_refs;
/* rdist_nmi_refs[n] == number of cpus having the rdist interrupt n set as NMI */
static refcount_t *rdist_nmi_refs;
static struct gic_kvm_info gic_v3_kvm_info __initdata;
static DEFINE_PER_CPU(bool, has_rss);
......@@ -519,9 +526,22 @@ static u32 __gic_get_ppi_index(irq_hw_number_t hwirq)
}
}
static u32 gic_get_ppi_index(struct irq_data *d)
static u32 __gic_get_rdist_index(irq_hw_number_t hwirq)
{
switch (__get_intid_range(hwirq)) {
case SGI_RANGE:
case PPI_RANGE:
return hwirq;
case EPPI_RANGE:
return hwirq - EPPI_BASE_INTID + 32;
default:
unreachable();
}
}
static u32 gic_get_rdist_index(struct irq_data *d)
{
return __gic_get_ppi_index(d->hwirq);
return __gic_get_rdist_index(d->hwirq);
}
static int gic_irq_nmi_setup(struct irq_data *d)
......@@ -545,11 +565,14 @@ static int gic_irq_nmi_setup(struct irq_data *d)
/* desc lock should already be held */
if (gic_irq_in_rdist(d)) {
u32 idx = gic_get_ppi_index(d);
u32 idx = gic_get_rdist_index(d);
/* Setting up PPI as NMI, only switch handler for first NMI */
if (!refcount_inc_not_zero(&ppi_nmi_refs[idx])) {
refcount_set(&ppi_nmi_refs[idx], 1);
/*
* Setting up a percpu interrupt as NMI, only switch handler
* for first NMI
*/
if (!refcount_inc_not_zero(&rdist_nmi_refs[idx])) {
refcount_set(&rdist_nmi_refs[idx], 1);
desc->handle_irq = handle_percpu_devid_fasteoi_nmi;
}
} else {
......@@ -582,10 +605,10 @@ static void gic_irq_nmi_teardown(struct irq_data *d)
/* desc lock should already be held */
if (gic_irq_in_rdist(d)) {
u32 idx = gic_get_ppi_index(d);
u32 idx = gic_get_rdist_index(d);
/* Tearing down NMI, only switch handler for last NMI */
if (refcount_dec_and_test(&ppi_nmi_refs[idx]))
if (refcount_dec_and_test(&rdist_nmi_refs[idx]))
desc->handle_irq = handle_percpu_devid_irq;
} else {
desc->handle_irq = handle_fasteoi_irq;
......@@ -1279,10 +1302,10 @@ static void gic_cpu_init(void)
rbase = gic_data_rdist_sgi_base();
/* Configure SGIs/PPIs as non-secure Group-1 */
for (i = 0; i < gic_data.ppi_nr + 16; i += 32)
for (i = 0; i < gic_data.ppi_nr + SGI_NR; i += 32)
writel_relaxed(~0, rbase + GICR_IGROUPR0 + i / 8);
gic_cpu_config(rbase, gic_data.ppi_nr + 16, gic_redist_wait_for_rwp);
gic_cpu_config(rbase, gic_data.ppi_nr + SGI_NR, gic_redist_wait_for_rwp);
/* initialise system registers */
gic_cpu_sys_reg_init();
......@@ -1939,12 +1962,13 @@ static void gic_enable_nmi_support(void)
return;
}
ppi_nmi_refs = kcalloc(gic_data.ppi_nr, sizeof(*ppi_nmi_refs), GFP_KERNEL);
if (!ppi_nmi_refs)
rdist_nmi_refs = kcalloc(gic_data.ppi_nr + SGI_NR,
sizeof(*rdist_nmi_refs), GFP_KERNEL);
if (!rdist_nmi_refs)
return;
for (i = 0; i < gic_data.ppi_nr; i++)
refcount_set(&ppi_nmi_refs[i], 0);
for (i = 0; i < gic_data.ppi_nr + SGI_NR; i++)
refcount_set(&rdist_nmi_refs[i], 0);
pr_info("Pseudo-NMIs enabled using %s ICC_PMR_EL1 synchronisation\n",
gic_has_relaxed_pmr_sync() ? "relaxed" : "forced");
......@@ -2061,6 +2085,7 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base,
gic_dist_init();
gic_cpu_init();
gic_enable_nmi_support();
gic_smp_init();
gic_cpu_pm_init();
......@@ -2073,8 +2098,6 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base,
gicv2m_init(handle, gic_data.domain);
}
gic_enable_nmi_support();
return 0;
out_free:
......@@ -2367,8 +2390,7 @@ gic_acpi_parse_madt_gicc(union acpi_subtable_headers *header,
u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2;
void __iomem *redist_base;
/* GICC entry which has !ACPI_MADT_ENABLED is not unusable so skip */
if (!(gicc->flags & ACPI_MADT_ENABLED))
if (!acpi_gicc_is_usable(gicc))
return 0;
redist_base = ioremap(gicc->gicr_base_address, size);
......@@ -2418,7 +2440,7 @@ static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header,
* If GICC is enabled and has valid gicr base address, then it means
* GICR base is presented via GICC
*/
if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) {
if (acpi_gicc_is_usable(gicc) && gicc->gicr_base_address) {
acpi_data.enabled_rdists++;
return 0;
}
......@@ -2427,7 +2449,7 @@ static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header,
* It's perfectly valid firmware can pass disabled GICC entry, driver
* should not treat as errors, skip the entry instead of probe fail.
*/
if (!(gicc->flags & ACPI_MADT_ENABLED))
if (!acpi_gicc_is_usable(gicc))
return 0;
return -ENODEV;
......@@ -2486,8 +2508,7 @@ static int __init gic_acpi_parse_virt_madt_gicc(union acpi_subtable_headers *hea
int maint_irq_mode;
static int first_madt = true;
/* Skip unusable CPUs */
if (!(gicc->flags & ACPI_MADT_ENABLED))
if (!acpi_gicc_is_usable(gicc))
return 0;
maint_irq_mode = (gicc->flags & ACPI_MADT_VGIC_IRQ_MODE) ?
......
......@@ -256,6 +256,11 @@ acpi_table_parse_cedt(enum acpi_cedt_type id,
int acpi_parse_mcfg (struct acpi_table_header *header);
void acpi_table_print_madt_entry (struct acpi_subtable_header *madt);
static inline bool acpi_gicc_is_usable(struct acpi_madt_generic_interrupt *gicc)
{
return gicc->flags & ACPI_MADT_ENABLED;
}
/* the following numa functions are architecture-dependent */
void acpi_numa_slit_init (struct acpi_table_slit *slit);
......
......@@ -473,6 +473,13 @@ function _start
// mov x8, #__NR_sched_yield // Encourage preemption
// svc #0
#ifdef SSVE
mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=0,SM=1
and x1, x0, #3
cmp x1, #1
b.ne svcr_barf
#endif
mov x21, #0
0: mov x0, x21
bl check_zreg
......@@ -553,3 +560,15 @@ function vl_barf
mov x1, #1
svc #0
endfunction
function svcr_barf
mov x10, x0
puts "Bad SVCR: "
mov x0, x10
bl putdecn
mov x8, #__NR_exit
mov x1, #1
svc #0
endfunction
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment