Commit dfd437a2 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 updates from Catalin Marinas:

 - arm64 support for syscall emulation via PTRACE_SYSEMU{,_SINGLESTEP}

 - Wire up VM_FLUSH_RESET_PERMS for arm64, allowing the core code to
   manage the permissions of executable vmalloc regions more strictly

 - Slight performance improvement by keeping softirqs enabled while
   touching the FPSIMD/SVE state (kernel_neon_begin/end)

 - Expose a couple of ARMv8.5 features to user (HWCAP): CondM (new
   XAFLAG and AXFLAG instructions for floating point comparison flags
   manipulation) and FRINT (rounding floating point numbers to integers)

 - Re-instate ARM64_PSEUDO_NMI support which was previously marked as
   BROKEN due to some bugs (now fixed)

 - Improve parking of stopped CPUs and implement an arm64-specific
   panic_smp_self_stop() to avoid warning on not being able to stop
   secondary CPUs during panic

 - perf: enable the ARM Statistical Profiling Extensions (SPE) on ACPI
   platforms

 - perf: DDR performance monitor support for iMX8QXP

 - cache_line_size() can now be set from DT or ACPI/PPTT if provided to
   cope with a system cache info not exposed via the CPUID registers

 - Avoid warning on hardware cache line size greater than
   ARCH_DMA_MINALIGN if the system is fully coherent

 - arm64 do_page_fault() and hugetlb cleanups

 - Refactor set_pte_at() to avoid redundant READ_ONCE(*ptep)

 - Ignore ACPI 5.1 FADTs reported as 5.0 (infer from the
   'arm_boot_flags' introduced in 5.1)

 - CONFIG_RANDOMIZE_BASE now enabled in defconfig

 - Allow the selection of ARM64_MODULE_PLTS, currently only done via
   RANDOMIZE_BASE (and an erratum workaround), allowing modules to spill
   over into the vmalloc area

 - Make ZONE_DMA32 configurable

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (54 commits)
  perf: arm_spe: Enable ACPI/Platform automatic module loading
  arm_pmu: acpi: spe: Add initial MADT/SPE probing
  ACPI/PPTT: Add function to return ACPI 6.3 Identical tokens
  ACPI/PPTT: Modify node flag detection to find last IDENTICAL
  x86/entry: Simplify _TIF_SYSCALL_EMU handling
  arm64: rename dump_instr as dump_kernel_instr
  arm64/mm: Drop [PTE|PMD]_TYPE_FAULT
  arm64: Implement panic_smp_self_stop()
  arm64: Improve parking of stopped CPUs
  arm64: Expose FRINT capabilities to userspace
  arm64: Expose ARMv8.5 CondM capability to userspace
  arm64: defconfig: enable CONFIG_RANDOMIZE_BASE
  arm64: ARM64_MODULES_PLTS must depend on MODULES
  arm64: bpf: do not allocate executable memory
  arm64/kprobes: set VM_FLUSH_RESET_PERMS on kprobe instruction pages
  arm64/mm: wire up CONFIG_ARCH_HAS_SET_DIRECT_MAP
  arm64: module: create module allocations without exec permissions
  arm64: Allow user selection of ARM64_MODULE_PLTS
  acpi/arm64: ignore 5.1 FADTs that are reported as 5.0
  arm64: Allow selecting Pseudo-NMI again
  ...
parents 0ecfebd2 0c61efd3
......@@ -207,6 +207,10 @@ HWCAP_FLAGM
Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001.
HWCAP2_FLAGM2
Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0010.
HWCAP_SSBS
Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010.
......@@ -223,6 +227,10 @@ HWCAP_PACG
ID_AA64ISAR1_EL1.GPI == 0b0001, as described by
Documentation/arm64/pointer-authentication.txt.
HWCAP2_FRINT
Functionality implied by ID_AA64ISAR1_EL1.FRINTTS == 0b0001.
4. Unused AT_HWCAP bits
-----------------------
......
* Freescale(NXP) IMX8 DDR performance monitor
Required properties:
- compatible: should be one of:
"fsl,imx8-ddr-pmu"
"fsl,imx8m-ddr-pmu"
- reg: physical address and size
- interrupts: single interrupt
generated by the control block
Example:
ddr-pmu@5c020000 {
compatible = "fsl,imx8-ddr-pmu";
reg = <0x5c020000 0x10000>;
interrupt-parent = <&gic>;
interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>;
};
......@@ -6337,6 +6337,13 @@ L: linux-i2c@vger.kernel.org
S: Maintained
F: drivers/i2c/busses/i2c-cpm.c
FREESCALE IMX DDR PMU DRIVER
M: Frank Li <Frank.li@nxp.com>
L: linux-arm-kernel@lists.infradead.org
S: Maintained
F: drivers/perf/fsl_imx8_ddr_perf.c
F: Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
FREESCALE IMX LPI2C DRIVER
M: Dong Aisheng <aisheng.dong@nxp.com>
L: linux-i2c@vger.kernel.org
......
......@@ -26,6 +26,7 @@ config ARM64
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SETUP_DMA_OPS
select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
......@@ -260,7 +261,8 @@ config GENERIC_CALIBRATE_DELAY
def_bool y
config ZONE_DMA32
def_bool y
bool "Support DMA32 zone" if EXPERT
default y
config HAVE_GENERIC_GUP
def_bool y
......@@ -933,7 +935,6 @@ config PARAVIRT
config PARAVIRT_TIME_ACCOUNTING
bool "Paravirtual steal time accounting"
select PARAVIRT
default n
help
Select this option to enable fine granularity task steal time
accounting. Time spent executing other tasks in parallel with
......@@ -1418,12 +1419,27 @@ config ARM64_SVE
KVM in the same kernel image.
config ARM64_MODULE_PLTS
bool
bool "Use PLTs to allow module memory to spill over into vmalloc area"
depends on MODULES
select HAVE_MOD_ARCH_SPECIFIC
help
Allocate PLTs when loading modules so that jumps and calls whose
targets are too far away for their relative offsets to be encoded
in the instructions themselves can be bounced via veneers in the
module's PLT. This allows modules to be allocated in the generic
vmalloc area after the dedicated module memory area has been
exhausted.
When running with address space randomization (KASLR), the module
region itself may be too far away for ordinary relative jumps and
calls, and so in that case, module PLTs are required and cannot be
disabled.
Specific errata workaround(s) might also force module PLTs to be
enabled (ARM64_ERRATUM_843419).
config ARM64_PSEUDO_NMI
bool "Support for NMI-like interrupts"
depends on BROKEN # 1556553607-46531-1-git-send-email-julien.thierry@arm.com
select CONFIG_ARM_GIC_V3
help
Adds support for mimicking Non-Maskable Interrupts through the use of
......@@ -1436,6 +1452,17 @@ config ARM64_PSEUDO_NMI
If unsure, say N
if ARM64_PSEUDO_NMI
config ARM64_DEBUG_PRIORITY_MASKING
bool "Debug interrupt priority masking"
help
This adds runtime checks to functions enabling/disabling
interrupts when using priority masking. The additional checks verify
the validity of ICC_PMR_EL1 when calling concerned functions.
If unsure, say N
endif
config RELOCATABLE
bool
help
......
......@@ -68,6 +68,7 @@ CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
CONFIG_XEN=y
CONFIG_COMPAT=y
CONFIG_RANDOMIZE_BASE=y
CONFIG_HIBERNATION=y
CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
CONFIG_ARM_CPUIDLE=y
......
......@@ -38,6 +38,9 @@
(!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \
(unsigned long)(entry) + (entry)->header.length > (end))
#define ACPI_MADT_GICC_SPE (ACPI_OFFSET(struct acpi_madt_generic_interrupt, \
spe_interrupt) + sizeof(u16))
/* Basic configuration for ACPI */
#ifdef CONFIG_ACPI
pgprot_t __acpi_get_mem_attribute(phys_addr_t addr);
......
......@@ -152,7 +152,9 @@ static inline bool gic_prio_masking_enabled(void)
static inline void gic_pmr_mask_irqs(void)
{
BUILD_BUG_ON(GICD_INT_DEF_PRI <= GIC_PRIO_IRQOFF);
BUILD_BUG_ON(GICD_INT_DEF_PRI < (GIC_PRIO_IRQOFF |
GIC_PRIO_PSR_I_SET));
BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON);
gic_write_pmr(GIC_PRIO_IRQOFF);
}
......
......@@ -80,12 +80,15 @@ static inline u32 cache_type_cwg(void)
#define __read_mostly __attribute__((__section__(".data..read_mostly")))
static inline int cache_line_size(void)
static inline int cache_line_size_of_cpu(void)
{
u32 cwg = cache_type_cwg();
return cwg ? 4 << cwg : ARCH_DMA_MINALIGN;
}
int cache_line_size(void);
/*
* Read the effective value of CTR_EL0.
*
......
......@@ -176,4 +176,7 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
int set_memory_valid(unsigned long addr, int numpages, int enable);
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
#endif
......@@ -614,6 +614,12 @@ static inline bool system_uses_irq_prio_masking(void)
cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING);
}
static inline bool system_has_prio_mask_debugging(void)
{
return IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING) &&
system_uses_irq_prio_masking();
}
#define ARM64_SSBD_UNKNOWN -1
#define ARM64_SSBD_FORCE_DISABLE 0
#define ARM64_SSBD_KERNEL 1
......
......@@ -7,6 +7,7 @@
#include <linux/irqflags.h>
#include <asm/arch_gicv3.h>
#include <asm/cpufeature.h>
#define DAIF_PROCCTX 0
......@@ -16,11 +17,20 @@
/* mask/save/unmask/restore all exceptions, including interrupts. */
static inline void local_daif_mask(void)
{
WARN_ON(system_has_prio_mask_debugging() &&
(read_sysreg_s(SYS_ICC_PMR_EL1) == (GIC_PRIO_IRQOFF |
GIC_PRIO_PSR_I_SET)));
asm volatile(
"msr daifset, #0xf // local_daif_mask\n"
:
:
: "memory");
/* Don't really care for a dsb here, we don't intend to enable IRQs */
if (system_uses_irq_prio_masking())
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
trace_hardirqs_off();
}
......@@ -32,7 +42,7 @@ static inline unsigned long local_daif_save(void)
if (system_uses_irq_prio_masking()) {
/* If IRQs are masked with PMR, reflect it in the flags */
if (read_sysreg_s(SYS_ICC_PMR_EL1) <= GIC_PRIO_IRQOFF)
if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON)
flags |= PSR_I_BIT;
}
......@@ -45,39 +55,50 @@ static inline void local_daif_restore(unsigned long flags)
{
bool irq_disabled = flags & PSR_I_BIT;
WARN_ON(system_has_prio_mask_debugging() &&
!(read_sysreg(daif) & PSR_I_BIT));
if (!irq_disabled) {
trace_hardirqs_on();
if (system_uses_irq_prio_masking())
arch_local_irq_enable();
} else if (!(flags & PSR_A_BIT)) {
/*
* If interrupts are disabled but we can take
* asynchronous errors, we can take NMIs
*/
if (system_uses_irq_prio_masking()) {
flags &= ~PSR_I_BIT;
gic_write_pmr(GIC_PRIO_IRQON);
dsb(sy);
}
} else if (system_uses_irq_prio_masking()) {
u64 pmr;
if (!(flags & PSR_A_BIT)) {
/*
* There has been concern that the write to daif
* might be reordered before this write to PMR.
* From the ARM ARM DDI 0487D.a, section D1.7.1
* "Accessing PSTATE fields":
* Writes to the PSTATE fields have side-effects on
* various aspects of the PE operation. All of these
* side-effects are guaranteed:
* - Not to be visible to earlier instructions in
* the execution stream.
* - To be visible to later instructions in the
* execution stream
*
* Also, writes to PMR are self-synchronizing, so no
* interrupts with a lower priority than PMR is signaled
* to the PE after the write.
*
* So we don't need additional synchronization here.
* If interrupts are disabled but we can take
* asynchronous errors, we can take NMIs
*/
arch_local_irq_disable();
flags &= ~PSR_I_BIT;
pmr = GIC_PRIO_IRQOFF;
} else {
pmr = GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET;
}
/*
* There has been concern that the write to daif
* might be reordered before this write to PMR.
* From the ARM ARM DDI 0487D.a, section D1.7.1
* "Accessing PSTATE fields":
* Writes to the PSTATE fields have side-effects on
* various aspects of the PE operation. All of these
* side-effects are guaranteed:
* - Not to be visible to earlier instructions in
* the execution stream.
* - To be visible to later instructions in the
* execution stream
*
* Also, writes to PMR are self-synchronizing, so no
* interrupts with a lower priority than PMR is signaled
* to the PE after the write.
*
* So we don't need additional synchronization here.
*/
gic_write_pmr(pmr);
}
write_sysreg(flags, daif);
......
......@@ -37,8 +37,6 @@ struct task_struct;
extern void fpsimd_save_state(struct user_fpsimd_state *state);
extern void fpsimd_load_state(struct user_fpsimd_state *state);
extern void fpsimd_save(void);
extern void fpsimd_thread_switch(struct task_struct *next);
extern void fpsimd_flush_thread(void);
......@@ -52,8 +50,7 @@ extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
void *sve_state, unsigned int sve_vl);
extern void fpsimd_flush_task_state(struct task_struct *target);
extern void fpsimd_flush_cpu_state(void);
extern void sve_flush_cpu_state(void);
extern void fpsimd_save_and_flush_cpu_state(void);
/* Maximum VL that SVE VL-agnostic software can transparently support */
#define SVE_VL_ARCH_MAX 0x100
......
......@@ -84,6 +84,8 @@
#define KERNEL_HWCAP_SVEBITPERM __khwcap2_feature(SVEBITPERM)
#define KERNEL_HWCAP_SVESHA3 __khwcap2_feature(SVESHA3)
#define KERNEL_HWCAP_SVESM4 __khwcap2_feature(SVESM4)
#define KERNEL_HWCAP_FLAGM2 __khwcap2_feature(FLAGM2)
#define KERNEL_HWCAP_FRINT __khwcap2_feature(FRINT)
/*
* This yields a mask that user programs can use to figure out what
......
......@@ -29,6 +29,12 @@
*/
static inline void arch_local_irq_enable(void)
{
if (system_has_prio_mask_debugging()) {
u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF);
}
asm volatile(ALTERNATIVE(
"msr daifclr, #2 // arch_local_irq_enable\n"
"nop",
......@@ -42,6 +48,12 @@ static inline void arch_local_irq_enable(void)
static inline void arch_local_irq_disable(void)
{
if (system_has_prio_mask_debugging()) {
u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF);
}
asm volatile(ALTERNATIVE(
"msr daifset, #2 // arch_local_irq_disable",
__msr_s(SYS_ICC_PMR_EL1, "%0"),
......@@ -56,43 +68,46 @@ static inline void arch_local_irq_disable(void)
*/
static inline unsigned long arch_local_save_flags(void)
{
unsigned long daif_bits;
unsigned long flags;
daif_bits = read_sysreg(daif);
/*
* The asm is logically equivalent to:
*
* if (system_uses_irq_prio_masking())
* flags = (daif_bits & PSR_I_BIT) ?
* GIC_PRIO_IRQOFF :
* read_sysreg_s(SYS_ICC_PMR_EL1);
* else
* flags = daif_bits;
*/
asm volatile(ALTERNATIVE(
"mov %0, %1\n"
"nop\n"
"nop",
__mrs_s("%0", SYS_ICC_PMR_EL1)
"ands %1, %1, " __stringify(PSR_I_BIT) "\n"
"csel %0, %0, %2, eq",
ARM64_HAS_IRQ_PRIO_MASKING)
: "=&r" (flags), "+r" (daif_bits)
: "r" ((unsigned long) GIC_PRIO_IRQOFF)
"mrs %0, daif",
__mrs_s("%0", SYS_ICC_PMR_EL1),
ARM64_HAS_IRQ_PRIO_MASKING)
: "=&r" (flags)
:
: "memory");
return flags;
}
static inline int arch_irqs_disabled_flags(unsigned long flags)
{
int res;
asm volatile(ALTERNATIVE(
"and %w0, %w1, #" __stringify(PSR_I_BIT),
"eor %w0, %w1, #" __stringify(GIC_PRIO_IRQON),
ARM64_HAS_IRQ_PRIO_MASKING)
: "=&r" (res)
: "r" ((int) flags)
: "memory");
return res;
}
static inline unsigned long arch_local_irq_save(void)
{
unsigned long flags;
flags = arch_local_save_flags();
arch_local_irq_disable();
/*
* There are too many states with IRQs disabled, just keep the current
* state if interrupts are already disabled/masked.
*/
if (!arch_irqs_disabled_flags(flags))
arch_local_irq_disable();
return flags;
}
......@@ -108,26 +123,10 @@ static inline void arch_local_irq_restore(unsigned long flags)
__msr_s(SYS_ICC_PMR_EL1, "%0")
"dsb sy",
ARM64_HAS_IRQ_PRIO_MASKING)
: "+r" (flags)
:
: "r" (flags)
: "memory");
}
static inline int arch_irqs_disabled_flags(unsigned long flags)
{
int res;
asm volatile(ALTERNATIVE(
"and %w0, %w1, #" __stringify(PSR_I_BIT) "\n"
"nop",
"cmp %w1, #" __stringify(GIC_PRIO_IRQOFF) "\n"
"cset %w0, ls",
ARM64_HAS_IRQ_PRIO_MASKING)
: "=&r" (res)
: "r" ((int) flags)
: "memory");
return res;
}
#endif
#endif
......@@ -597,11 +597,12 @@ static inline void kvm_arm_vhe_guest_enter(void)
* will not signal the CPU of interrupts of lower priority, and the
* only way to get out will be via guest exceptions.
* Naturally, we want to avoid this.
*
* local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a
* dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
*/
if (system_uses_irq_prio_masking()) {
gic_write_pmr(GIC_PRIO_IRQON);
if (system_uses_irq_prio_masking())
dsb(sy);
}
}
static inline void kvm_arm_vhe_guest_exit(void)
......
......@@ -115,7 +115,6 @@
* Level 2 descriptor (PMD).
*/
#define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0)
#define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0)
#define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0)
#define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0)
#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1)
......@@ -142,8 +141,8 @@
/*
* Level 3 descriptor (PTE).
*/
#define PTE_VALID (_AT(pteval_t, 1) << 0)
#define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0)
#define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0)
#define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0)
#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1)
#define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */
......
......@@ -13,7 +13,6 @@
/*
* Software defined PTE bits definition.
*/
#define PTE_VALID (_AT(pteval_t, 1) << 0)
#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */
#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
......
......@@ -235,29 +235,42 @@ extern void __sync_icache_dcache(pte_t pteval);
*
* PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY)
*/
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
pte_t pte)
{
pte_t old_pte;
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
__sync_icache_dcache(pte);
if (!IS_ENABLED(CONFIG_DEBUG_VM))
return;
old_pte = READ_ONCE(*ptep);
if (!pte_valid(old_pte) || !pte_valid(pte))
return;
if (mm != current->active_mm && atomic_read(&mm->mm_users) <= 1)
return;
/*
* If the existing pte is valid, check for potential race with
* hardware updates of the pte (ptep_set_access_flags safely changes
* valid ptes without going through an invalid entry).
* Check for potential race with hardware updates of the pte
* (ptep_set_access_flags safely changes valid ptes without going
* through an invalid entry).
*/
old_pte = READ_ONCE(*ptep);
if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(old_pte) && pte_valid(pte) &&
(mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {
VM_WARN_ONCE(!pte_young(pte),
"%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
__func__, pte_val(old_pte), pte_val(pte));
VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
"%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
__func__, pte_val(old_pte), pte_val(pte));
}
VM_WARN_ONCE(!pte_young(pte),
"%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
__func__, pte_val(old_pte), pte_val(pte));
VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
"%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
__func__, pte_val(old_pte), pte_val(pte));
}
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
__sync_icache_dcache(pte);
__check_racy_pte_update(mm, ptep, pte);
set_pte(ptep, pte);
}
......@@ -324,9 +337,14 @@ static inline pmd_t pte_pmd(pte_t pte)
return __pmd(pte_val(pte));
}
static inline pgprot_t mk_sect_prot(pgprot_t prot)
static inline pgprot_t mk_pud_sect_prot(pgprot_t prot)
{
return __pgprot((pgprot_val(prot) & ~PUD_TABLE_BIT) | PUD_TYPE_SECT);
}
static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot)
{
return __pgprot(pgprot_val(prot) & ~PTE_TABLE_BIT);
return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT);
}
#ifdef CONFIG_NUMA_BALANCING
......
......@@ -24,9 +24,15 @@
* means masking more IRQs (or at least that the same IRQs remain masked).
*
* To mask interrupts, we clear the most significant bit of PMR.
*
* Some code sections either automatically switch back to PSR.I or explicitly
* require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included
* in the the priority mask, it indicates that PSR.I should be set and
* interrupt disabling temporarily does not rely on IRQ priorities.
*/
#define GIC_PRIO_IRQON 0xf0
#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80)
#define GIC_PRIO_IRQON 0xc0
#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80)
#define GIC_PRIO_PSR_I_SET (1 << 4)
/* Additional SPSR bits not exposed in the UABI */
#define PSR_IL_BIT (1 << 20)
......
......@@ -12,9 +12,9 @@
#include <linux/preempt.h>
#include <linux/types.h>
#ifdef CONFIG_KERNEL_MODE_NEON
DECLARE_PER_CPU(bool, fpsimd_context_busy);
DECLARE_PER_CPU(bool, kernel_neon_busy);
#ifdef CONFIG_KERNEL_MODE_NEON
/*
* may_use_simd - whether it is allowable at this time to issue SIMD
......@@ -26,15 +26,15 @@ DECLARE_PER_CPU(bool, kernel_neon_busy);
static __must_check inline bool may_use_simd(void)
{
/*
* kernel_neon_busy is only set while preemption is disabled,
* fpsimd_context_busy is only set while preemption is disabled,
* and is clear whenever preemption is enabled. Since
* this_cpu_read() is atomic w.r.t. preemption, kernel_neon_busy
* this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy
* cannot change under our feet -- if it's set we cannot be
* migrated, and if it's clear we cannot be migrated to a CPU
* where it is set.
*/
return !in_irq() && !irqs_disabled() && !in_nmi() &&
!this_cpu_read(kernel_neon_busy);
!this_cpu_read(fpsimd_context_busy);
}
#else /* ! CONFIG_KERNEL_MODE_NEON */
......
......@@ -549,6 +549,7 @@
/* id_aa64isar1 */
#define ID_AA64ISAR1_SB_SHIFT 36
#define ID_AA64ISAR1_FRINTTS_SHIFT 32
#define ID_AA64ISAR1_GPI_SHIFT 28
#define ID_AA64ISAR1_GPA_SHIFT 24
#define ID_AA64ISAR1_LRCPC_SHIFT 20
......
......@@ -65,6 +65,7 @@ void arch_release_task_struct(struct task_struct *tsk);
* TIF_SYSCALL_TRACEPOINT - syscall tracepoint for ftrace
* TIF_SYSCALL_AUDIT - syscall auditing
* TIF_SECCOMP - syscall secure computing
* TIF_SYSCALL_EMU - syscall emulation active
* TIF_SIGPENDING - signal pending
* TIF_NEED_RESCHED - rescheduling necessary
* TIF_NOTIFY_RESUME - callback before returning to user
......@@ -80,6 +81,7 @@ void arch_release_task_struct(struct task_struct *tsk);
#define TIF_SYSCALL_AUDIT 9
#define TIF_SYSCALL_TRACEPOINT 10
#define TIF_SECCOMP 11
#define TIF_SYSCALL_EMU 12
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
#define TIF_FREEZE 19
#define TIF_RESTORE_SIGMASK 20
......@@ -98,6 +100,7 @@ void arch_release_task_struct(struct task_struct *tsk);
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_FSCHECK (1 << TIF_FSCHECK)
#define _TIF_32BIT (1 << TIF_32BIT)
......@@ -109,7 +112,7 @@ void arch_release_task_struct(struct task_struct *tsk);
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
_TIF_NOHZ)
_TIF_NOHZ | _TIF_SYSCALL_EMU)
#define INIT_THREAD_INFO(tsk) \
{ \
......
......@@ -63,5 +63,7 @@
#define HWCAP2_SVEBITPERM (1 << 4)
#define HWCAP2_SVESHA3 (1 << 5)
#define HWCAP2_SVESM4 (1 << 6)
#define HWCAP2_FLAGM2 (1 << 7)
#define HWCAP2_FRINT (1 << 8)
#endif /* _UAPI__ASM_HWCAP_H */
......@@ -62,6 +62,9 @@
#define PSR_x 0x0000ff00 /* Extension */
#define PSR_c 0x000000ff /* Control */
/* syscall emulation path in ptrace */
#define PTRACE_SYSEMU 31
#define PTRACE_SYSEMU_SINGLESTEP 32
#ifndef __ASSEMBLY__
......
......@@ -152,10 +152,14 @@ static int __init acpi_fadt_sanity_check(void)
*/
if (table->revision < 5 ||
(table->revision == 5 && fadt->minor_revision < 1)) {
pr_err("Unsupported FADT revision %d.%d, should be 5.1+\n",
pr_err(FW_BUG "Unsupported FADT revision %d.%d, should be 5.1+\n",
table->revision, fadt->minor_revision);
ret = -EINVAL;
goto out;
if (!fadt->arm_boot_flags) {
ret = -EINVAL;
goto out;
}
pr_err("FADT has ARM boot flags set, assuming 5.1\n");
}
if (!(fadt->flags & ACPI_FADT_HW_REDUCED)) {
......
......@@ -17,6 +17,15 @@
#define CLIDR_CTYPE(clidr, level) \
(((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
int cache_line_size(void)
{
if (coherency_max_size != 0)
return coherency_max_size;
return cache_line_size_of_cpu();
}
EXPORT_SYMBOL_GPL(cache_line_size);
static inline enum cache_type get_cache_type(int level)
{
u64 clidr;
......
......@@ -1184,14 +1184,14 @@ static struct undef_hook ssbs_emulation_hook = {
static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused)
{
static bool undef_hook_registered = false;
static DEFINE_SPINLOCK(hook_lock);
static DEFINE_RAW_SPINLOCK(hook_lock);
spin_lock(&hook_lock);
raw_spin_lock(&hook_lock);
if (!undef_hook_registered) {
register_undef_hook(&ssbs_emulation_hook);
undef_hook_registered = true;
}
spin_unlock(&hook_lock);
raw_spin_unlock(&hook_lock);
if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS);
......@@ -1618,6 +1618,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
......@@ -1629,6 +1630,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
#ifdef CONFIG_ARM64_SVE
......
......@@ -82,6 +82,8 @@ static const char *const hwcap_str[] = {
"svebitperm",
"svesha3",
"svesm4",
"flagm2",
"frint",
NULL
};
......
......@@ -247,6 +247,7 @@ alternative_else_nop_endif
/*
* Registers that may be useful after this macro is invoked:
*
* x20 - ICC_PMR_EL1
* x21 - aborted SP
* x22 - aborted PC
* x23 - aborted PSTATE
......@@ -424,6 +425,38 @@ tsk .req x28 // current thread_info
irq_stack_exit
.endm
#ifdef CONFIG_ARM64_PSEUDO_NMI
/*
* Set res to 0 if irqs were unmasked in interrupted context.
* Otherwise set res to non-0 value.
*/
.macro test_irqs_unmasked res:req, pmr:req
alternative_if ARM64_HAS_IRQ_PRIO_MASKING
sub \res, \pmr, #GIC_PRIO_IRQON
alternative_else
mov \res, xzr
alternative_endif
.endm
#endif
.macro gic_prio_kentry_setup, tmp:req
#ifdef CONFIG_ARM64_PSEUDO_NMI
alternative_if ARM64_HAS_IRQ_PRIO_MASKING
mov \tmp, #(GIC_PRIO_PSR_I_SET | GIC_PRIO_IRQON)
msr_s SYS_ICC_PMR_EL1, \tmp
alternative_else_nop_endif
#endif
.endm
.macro gic_prio_irq_setup, pmr:req, tmp:req
#ifdef CONFIG_ARM64_PSEUDO_NMI
alternative_if ARM64_HAS_IRQ_PRIO_MASKING
orr \tmp, \pmr, #GIC_PRIO_PSR_I_SET
msr_s SYS_ICC_PMR_EL1, \tmp
alternative_else_nop_endif
#endif
.endm
.text
/*
......@@ -602,6 +635,7 @@ el1_dbg:
cmp x24, #ESR_ELx_EC_BRK64 // if BRK64
cinc x24, x24, eq // set bit '0'
tbz x24, #0, el1_inv // EL1 only
gic_prio_kentry_setup tmp=x3
mrs x0, far_el1
mov x2, sp // struct pt_regs
bl do_debug_exception
......@@ -619,20 +653,18 @@ ENDPROC(el1_sync)
.align 6
el1_irq:
kernel_entry 1
gic_prio_irq_setup pmr=x20, tmp=x1
enable_da_f
#ifdef CONFIG_TRACE_IRQFLAGS
#ifdef CONFIG_ARM64_PSEUDO_NMI
alternative_if ARM64_HAS_IRQ_PRIO_MASKING
ldr x20, [sp, #S_PMR_SAVE]
alternative_else
mov x20, #GIC_PRIO_IRQON
alternative_endif
cmp x20, #GIC_PRIO_IRQOFF
/* Irqs were disabled, don't trace */
b.ls 1f
test_irqs_unmasked res=x0, pmr=x20
cbz x0, 1f
bl asm_nmi_enter
1:
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off
1:
#endif
irq_handler
......@@ -651,14 +683,23 @@ alternative_else_nop_endif
bl preempt_schedule_irq // irq en/disable is done inside
1:
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
#ifdef CONFIG_ARM64_PSEUDO_NMI
/*
* if IRQs were disabled when we received the interrupt, we have an NMI
* and we are not re-enabling interrupt upon eret. Skip tracing.
* When using IRQ priority masking, we can get spurious interrupts while
* PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a
* section with interrupts disabled. Skip tracing in those cases.
*/
cmp x20, #GIC_PRIO_IRQOFF
b.ls 1f
test_irqs_unmasked res=x0, pmr=x20
cbz x0, 1f
bl asm_nmi_exit
1:
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
#ifdef CONFIG_ARM64_PSEUDO_NMI
test_irqs_unmasked res=x0, pmr=x20
cbnz x0, 1f
#endif
bl trace_hardirqs_on
1:
......@@ -776,6 +817,7 @@ el0_ia:
* Instruction abort handling
*/
mrs x26, far_el1
gic_prio_kentry_setup tmp=x0
enable_da_f
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off
......@@ -821,6 +863,7 @@ el0_sp_pc:
* Stack or PC alignment exception handling
*/
mrs x26, far_el1
gic_prio_kentry_setup tmp=x0
enable_da_f
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off
......@@ -855,11 +898,12 @@ el0_dbg:
* Debug exception handling
*/
tbnz x24, #0, el0_inv // EL0 only
gic_prio_kentry_setup tmp=x3
mrs x0, far_el1
mov x1, x25
mov x2, sp
bl do_debug_exception
enable_daif
enable_da_f
ct_user_exit
b ret_to_user
el0_inv:
......@@ -876,7 +920,9 @@ ENDPROC(el0_sync)
el0_irq:
kernel_entry 0
el0_irq_naked:
gic_prio_irq_setup pmr=x20, tmp=x0
enable_da_f
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off
#endif
......@@ -898,6 +944,7 @@ ENDPROC(el0_irq)
el1_error:
kernel_entry 1
mrs x1, esr_el1
gic_prio_kentry_setup tmp=x2
enable_dbg
mov x0, sp
bl do_serror
......@@ -908,10 +955,11 @@ el0_error:
kernel_entry 0
el0_error_naked:
mrs x1, esr_el1
gic_prio_kentry_setup tmp=x2
enable_dbg
mov x0, sp
bl do_serror
enable_daif
enable_da_f
ct_user_exit
b ret_to_user
ENDPROC(el0_error)
......@@ -932,6 +980,7 @@ work_pending:
*/
ret_to_user:
disable_daif
gic_prio_kentry_setup tmp=x3
ldr x1, [tsk, #TSK_TI_FLAGS]
and x2, x1, #_TIF_WORK_MASK
cbnz x2, work_pending
......@@ -948,6 +997,7 @@ ENDPROC(ret_to_user)
*/
.align 6
el0_svc:
gic_prio_kentry_setup tmp=x1
mov x0, sp
bl el0_svc_handler
b ret_to_user
......
......@@ -82,7 +82,8 @@
* To prevent this from racing with the manipulation of the task's FPSIMD state
* from task context and thereby corrupting the state, it is necessary to
* protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
* flag with local_bh_disable() unless softirqs are already masked.
* flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to
* run but prevent them to use FPSIMD.
*
* For a certain task, the sequence may look something like this:
* - the task gets scheduled in; if both the task's fpsimd_cpu field
......@@ -145,6 +146,56 @@ extern void __percpu *efi_sve_state;
#endif /* ! CONFIG_ARM64_SVE */
DEFINE_PER_CPU(bool, fpsimd_context_busy);
EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
static void __get_cpu_fpsimd_context(void)
{
bool busy = __this_cpu_xchg(fpsimd_context_busy, true);
WARN_ON(busy);
}
/*
* Claim ownership of the CPU FPSIMD context for use by the calling context.
*
* The caller may freely manipulate the FPSIMD context metadata until
* put_cpu_fpsimd_context() is called.
*
* The double-underscore version must only be called if you know the task
* can't be preempted.
*/
static void get_cpu_fpsimd_context(void)
{
preempt_disable();
__get_cpu_fpsimd_context();
}
static void __put_cpu_fpsimd_context(void)
{
bool busy = __this_cpu_xchg(fpsimd_context_busy, false);
WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */
}
/*
* Release the CPU FPSIMD context.
*
* Must be called from a context in which get_cpu_fpsimd_context() was
* previously called, with no call to put_cpu_fpsimd_context() in the
* meantime.
*/
static void put_cpu_fpsimd_context(void)
{
__put_cpu_fpsimd_context();
preempt_enable();
}
static bool have_cpu_fpsimd_context(void)
{
return !preemptible() && __this_cpu_read(fpsimd_context_busy);
}
/*
* Call __sve_free() directly only if you know task can't be scheduled
* or preempted.
......@@ -215,12 +266,10 @@ static void sve_free(struct task_struct *task)
* This function should be called only when the FPSIMD/SVE state in
* thread_struct is known to be up to date, when preparing to enter
* userspace.
*
* Softirqs (and preemption) must be disabled.
*/
static void task_fpsimd_load(void)
{
WARN_ON(!in_softirq() && !irqs_disabled());
WARN_ON(!have_cpu_fpsimd_context());
if (system_supports_sve() && test_thread_flag(TIF_SVE))
sve_load_state(sve_pffr(&current->thread),
......@@ -233,16 +282,14 @@ static void task_fpsimd_load(void)
/*
* Ensure FPSIMD/SVE storage in memory for the loaded context is up to
* date with respect to the CPU registers.
*
* Softirqs (and preemption) must be disabled.
*/
void fpsimd_save(void)
static void fpsimd_save(void)
{
struct fpsimd_last_state_struct const *last =
this_cpu_ptr(&fpsimd_last_state);
/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
WARN_ON(!in_softirq() && !irqs_disabled());
WARN_ON(!have_cpu_fpsimd_context());
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
......@@ -364,7 +411,8 @@ static __uint128_t arm64_cpu_to_le128(__uint128_t x)
* task->thread.sve_state.
*
* Task can be a non-runnable task, or current. In the latter case,
* softirqs (and preemption) must be disabled.
* the caller must have ownership of the cpu FPSIMD context before calling
* this function.
* task->thread.sve_state must point to at least sve_state_size(task)
* bytes of allocated kernel memory.
* task->thread.uw.fpsimd_state must be up to date before calling this
......@@ -393,7 +441,8 @@ static void fpsimd_to_sve(struct task_struct *task)
* task->thread.uw.fpsimd_state.
*
* Task can be a non-runnable task, or current. In the latter case,
* softirqs (and preemption) must be disabled.
* the caller must have ownership of the cpu FPSIMD context before calling
* this function.
* task->thread.sve_state must point to at least sve_state_size(task)
* bytes of allocated kernel memory.
* task->thread.sve_state must be up to date before calling this function.
......@@ -557,7 +606,7 @@ int sve_set_vector_length(struct task_struct *task,
* non-SVE thread.
*/
if (task == current) {
local_bh_disable();
get_cpu_fpsimd_context();
fpsimd_save();
}
......@@ -567,7 +616,7 @@ int sve_set_vector_length(struct task_struct *task,
sve_to_fpsimd(task);
if (task == current)
local_bh_enable();
put_cpu_fpsimd_context();
/*
* Force reallocation of task SVE state to the correct size
......@@ -880,7 +929,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
sve_alloc(current);
local_bh_disable();
get_cpu_fpsimd_context();
fpsimd_save();
......@@ -891,7 +940,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
if (test_and_set_thread_flag(TIF_SVE))
WARN_ON(1); /* SVE access shouldn't have trapped */
local_bh_enable();
put_cpu_fpsimd_context();
}
/*
......@@ -935,6 +984,8 @@ void fpsimd_thread_switch(struct task_struct *next)
if (!system_supports_fpsimd())
return;
__get_cpu_fpsimd_context();
/* Save unsaved fpsimd state, if any: */
fpsimd_save();
......@@ -949,6 +1000,8 @@ void fpsimd_thread_switch(struct task_struct *next)
update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
wrong_task || wrong_cpu);
__put_cpu_fpsimd_context();
}
void fpsimd_flush_thread(void)
......@@ -958,7 +1011,7 @@ void fpsimd_flush_thread(void)
if (!system_supports_fpsimd())
return;
local_bh_disable();
get_cpu_fpsimd_context();
fpsimd_flush_task_state(current);
memset(&current->thread.uw.fpsimd_state, 0,
......@@ -999,7 +1052,7 @@ void fpsimd_flush_thread(void)
current->thread.sve_vl_onexec = 0;
}
local_bh_enable();
put_cpu_fpsimd_context();
}
/*
......@@ -1011,9 +1064,9 @@ void fpsimd_preserve_current_state(void)
if (!system_supports_fpsimd())
return;
local_bh_disable();
get_cpu_fpsimd_context();
fpsimd_save();
local_bh_enable();
put_cpu_fpsimd_context();
}
/*
......@@ -1030,7 +1083,8 @@ void fpsimd_signal_preserve_current_state(void)
/*
* Associate current's FPSIMD context with this cpu
* Preemption must be disabled when calling this function.
* The caller must have ownership of the cpu FPSIMD context before calling
* this function.
*/
void fpsimd_bind_task_to_cpu(void)
{
......@@ -1076,14 +1130,14 @@ void fpsimd_restore_current_state(void)
if (!system_supports_fpsimd())
return;
local_bh_disable();
get_cpu_fpsimd_context();
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
task_fpsimd_load();
fpsimd_bind_task_to_cpu();
}
local_bh_enable();
put_cpu_fpsimd_context();
}
/*
......@@ -1096,7 +1150,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
if (!system_supports_fpsimd())
return;
local_bh_disable();
get_cpu_fpsimd_context();
current->thread.uw.fpsimd_state = *state;
if (system_supports_sve() && test_thread_flag(TIF_SVE))
......@@ -1107,7 +1161,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
clear_thread_flag(TIF_FOREIGN_FPSTATE);
local_bh_enable();
put_cpu_fpsimd_context();
}
/*
......@@ -1133,18 +1187,29 @@ void fpsimd_flush_task_state(struct task_struct *t)
/*
* Invalidate any task's FPSIMD state that is present on this cpu.
* This function must be called with softirqs disabled.
* The FPSIMD context should be acquired with get_cpu_fpsimd_context()
* before calling this function.
*/
void fpsimd_flush_cpu_state(void)
static void fpsimd_flush_cpu_state(void)
{
__this_cpu_write(fpsimd_last_state.st, NULL);
set_thread_flag(TIF_FOREIGN_FPSTATE);
}
#ifdef CONFIG_KERNEL_MODE_NEON
/*
* Save the FPSIMD state to memory and invalidate cpu view.
* This function must be called with preemption disabled.
*/
void fpsimd_save_and_flush_cpu_state(void)
{
WARN_ON(preemptible());
__get_cpu_fpsimd_context();
fpsimd_save();
fpsimd_flush_cpu_state();
__put_cpu_fpsimd_context();
}
DEFINE_PER_CPU(bool, kernel_neon_busy);
EXPORT_PER_CPU_SYMBOL(kernel_neon_busy);
#ifdef CONFIG_KERNEL_MODE_NEON
/*
* Kernel-side NEON support functions
......@@ -1170,19 +1235,13 @@ void kernel_neon_begin(void)
BUG_ON(!may_use_simd());
local_bh_disable();
__this_cpu_write(kernel_neon_busy, true);
get_cpu_fpsimd_context();
/* Save unsaved fpsimd state, if any: */
fpsimd_save();
/* Invalidate any task state remaining in the fpsimd regs: */
fpsimd_flush_cpu_state();
preempt_disable();
local_bh_enable();
}
EXPORT_SYMBOL(kernel_neon_begin);
......@@ -1197,15 +1256,10 @@ EXPORT_SYMBOL(kernel_neon_begin);
*/
void kernel_neon_end(void)
{
bool busy;
if (!system_supports_fpsimd())
return;
busy = __this_cpu_xchg(kernel_neon_busy, false);
WARN_ON(!busy); /* No matching kernel_neon_begin()? */
preempt_enable();
put_cpu_fpsimd_context();
}
EXPORT_SYMBOL(kernel_neon_end);
......@@ -1297,8 +1351,7 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
{
switch (cmd) {
case CPU_PM_ENTER:
fpsimd_save();
fpsimd_flush_cpu_state();
fpsimd_save_and_flush_cpu_state();
break;
case CPU_PM_EXIT:
break;
......
......@@ -16,8 +16,10 @@
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/irqchip.h>
#include <linux/kprobes.h>
#include <linux/seq_file.h>
#include <linux/vmalloc.h>
#include <asm/daifflags.h>
#include <asm/vmap_stack.h>
unsigned long irq_err_count;
......@@ -64,4 +66,28 @@ void __init init_IRQ(void)
irqchip_init();
if (!handle_arch_irq)
panic("No interrupt controller found.");
if (system_uses_irq_prio_masking()) {
/*
* Now that we have a stack for our IRQ handler, set
* the PMR/PSR pair to a consistent state.
*/
WARN_ON(read_sysreg(daif) & PSR_A_BIT);
local_daif_restore(DAIF_PROCCTX_NOIRQ);
}
}
/*
* Stubs to make nmi_enter/exit() code callable from ASM
*/
asmlinkage void notrace asm_nmi_enter(void)
{
nmi_enter();
}
NOKPROBE_SYMBOL(asm_nmi_enter);
asmlinkage void notrace asm_nmi_exit(void)
{
nmi_exit();
}
NOKPROBE_SYMBOL(asm_nmi_exit);
......@@ -34,7 +34,7 @@ void *module_alloc(unsigned long size)
module_alloc_end = MODULES_END;
p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
module_alloc_end, gfp_mask, PAGE_KERNEL_EXEC, 0,
module_alloc_end, gfp_mask, PAGE_KERNEL, 0,
NUMA_NO_NODE, __builtin_return_address(0));
if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
......@@ -50,7 +50,7 @@ void *module_alloc(unsigned long size)
*/
p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
module_alloc_base + SZ_2G, GFP_KERNEL,
PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
if (p && (kasan_module_alloc(p, size) < 0)) {
......
......@@ -122,8 +122,10 @@ void *alloc_insn_page(void)
void *page;
page = vmalloc_exec(PAGE_SIZE);
if (page)
if (page) {
set_memory_ro((unsigned long)page, 1);
set_vm_flush_reset_perms(page);
}
return page;
}
......
......@@ -83,7 +83,7 @@ static void __cpu_do_idle_irqprio(void)
* be raised.
*/
pmr = gic_read_pmr();
gic_write_pmr(GIC_PRIO_IRQON);
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
__cpu_do_idle();
......
......@@ -1808,8 +1808,12 @@ static void tracehook_report_syscall(struct pt_regs *regs,
int syscall_trace_enter(struct pt_regs *regs)
{
if (test_thread_flag(TIF_SYSCALL_TRACE))
if (test_thread_flag(TIF_SYSCALL_TRACE) ||
test_thread_flag(TIF_SYSCALL_EMU)) {
tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
if (!in_syscall(regs) || test_thread_flag(TIF_SYSCALL_EMU))
return -1;
}
/* Do the secure computing after ptrace; failures should be fast. */
if (secure_computing(NULL) == -1)
......
......@@ -27,7 +27,7 @@
* aff0 = mpidr_masked & 0xff;
* aff1 = mpidr_masked & 0xff00;
* aff2 = mpidr_masked & 0xff0000;
* aff2 = mpidr_masked & 0xff00000000;
* aff3 = mpidr_masked & 0xff00000000;
* dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3);
*}
* Input registers: rs0, rs1, rs2, rs3, mpidr, mask
......
......@@ -181,11 +181,7 @@ static void init_gic_priority_masking(void)
WARN_ON(!(cpuflags & PSR_I_BIT));
gic_write_pmr(GIC_PRIO_IRQOFF);
/* We can only unmask PSR.I if we can take aborts */
if (!(cpuflags & PSR_A_BIT))
write_sysreg(cpuflags & ~PSR_I_BIT, daif);
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
}
/*
......@@ -834,18 +830,23 @@ void arch_irq_work_raise(void)
}
#endif
/*
* ipi_cpu_stop - handle IPI from smp_send_stop()
*/
static void ipi_cpu_stop(unsigned int cpu)
static void local_cpu_stop(void)
{
set_cpu_online(cpu, false);
set_cpu_online(smp_processor_id(), false);
local_daif_mask();
sdei_mask_local_cpu();
cpu_park_loop();
}
while (1)
cpu_relax();
/*
* We need to implement panic_smp_self_stop() for parallel panic() calls, so
* that cpu_online_mask gets correctly updated and smp_send_stop() can skip
* CPUs that have already stopped themselves.
*/
void panic_smp_self_stop(void)
{
local_cpu_stop();
}
#ifdef CONFIG_KEXEC_CORE
......@@ -898,7 +899,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
case IPI_CPU_STOP:
irq_enter();
ipi_cpu_stop(cpu);
local_cpu_stop();
irq_exit();
break;
......
......@@ -55,16 +55,19 @@ static void dump_backtrace_entry(unsigned long where)
printk(" %pS\n", (void *)where);
}
static void __dump_instr(const char *lvl, struct pt_regs *regs)
static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
{
unsigned long addr = instruction_pointer(regs);
char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
int i;
if (user_mode(regs))
return;
for (i = -4; i < 1; i++) {
unsigned int val, bad;
bad = get_user(val, &((u32 *)addr)[i]);
bad = aarch64_insn_read(&((u32 *)addr)[i], &val);
if (!bad)
p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
......@@ -73,19 +76,8 @@ static void __dump_instr(const char *lvl, struct pt_regs *regs)
break;
}
}
printk("%sCode: %s\n", lvl, str);
}
static void dump_instr(const char *lvl, struct pt_regs *regs)
{
if (!user_mode(regs)) {
mm_segment_t fs = get_fs();
set_fs(KERNEL_DS);
__dump_instr(lvl, regs);
set_fs(fs);
} else {
__dump_instr(lvl, regs);
}
printk("%sCode: %s\n", lvl, str);
}
void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
......@@ -171,8 +163,7 @@ static int __die(const char *str, int err, struct pt_regs *regs)
print_modules();
show_regs(regs);
if (!user_mode(regs))
dump_instr(KERN_EMERG, regs);
dump_kernel_instr(KERN_EMERG, regs);
return ret;
}
......
......@@ -112,9 +112,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
u64 *guest_zcr = &vcpu->arch.ctxt.sys_regs[ZCR_EL1];
/* Clean guest FP state to memory and invalidate cpu view */
fpsimd_save();
fpsimd_flush_cpu_state();
fpsimd_save_and_flush_cpu_state();
if (guest_has_sve)
*guest_zcr = read_sysreg_s(SYS_ZCR_EL12);
......
......@@ -604,7 +604,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
* Naturally, we want to avoid this.
*/
if (system_uses_irq_prio_masking()) {
gic_write_pmr(GIC_PRIO_IRQON);
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
dsb(sy);
}
......
......@@ -80,10 +80,6 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
static int __init arm64_dma_init(void)
{
WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
TAINT_CPU_OUT_OF_SPEC,
"ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
ARCH_DMA_MINALIGN, cache_line_size());
return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
}
arch_initcall(arm64_dma_init);
......@@ -461,6 +457,14 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent)
{
int cls = cache_line_size_of_cpu();
WARN_TAINT(!coherent && cls > ARCH_DMA_MINALIGN,
TAINT_CPU_OUT_OF_SPEC,
"%s %s: ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
dev_driver_string(dev), dev_name(dev),
ARCH_DMA_MINALIGN, cls);
dev->dma_coherent = coherent;
__iommu_setup_dma_ops(dev, dma_base, size, iommu);
......
......@@ -384,40 +384,31 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
#define VM_FAULT_BADACCESS 0x020000
static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
unsigned int mm_flags, unsigned long vm_flags,
struct task_struct *tsk)
unsigned int mm_flags, unsigned long vm_flags)
{
struct vm_area_struct *vma;
vm_fault_t fault;
struct vm_area_struct *vma = find_vma(mm, addr);
vma = find_vma(mm, addr);
fault = VM_FAULT_BADMAP;
if (unlikely(!vma))
goto out;
if (unlikely(vma->vm_start > addr))
goto check_stack;
return VM_FAULT_BADMAP;
/*
* Ok, we have a good vm_area for this memory access, so we can handle
* it.
*/
good_area:
if (unlikely(vma->vm_start > addr)) {
if (!(vma->vm_flags & VM_GROWSDOWN))
return VM_FAULT_BADMAP;
if (expand_stack(vma, addr))
return VM_FAULT_BADMAP;
}
/*
* Check that the permissions on the VMA allow for the fault which
* occurred.
*/
if (!(vma->vm_flags & vm_flags)) {
fault = VM_FAULT_BADACCESS;
goto out;
}
if (!(vma->vm_flags & vm_flags))
return VM_FAULT_BADACCESS;
return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags);
check_stack:
if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
goto good_area;
out:
return fault;
}
static bool is_el0_instruction_abort(unsigned int esr)
......@@ -425,12 +416,20 @@ static bool is_el0_instruction_abort(unsigned int esr)
return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
}
/*
* Note: not valid for EL1 DC IVAC, but we never use that such that it
* should fault. EL0 cannot issue DC IVAC (undef).
*/
static bool is_write_abort(unsigned int esr)
{
return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
}
static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
const struct fault_info *inf;
struct task_struct *tsk;
struct mm_struct *mm;
struct mm_struct *mm = current->mm;
vm_fault_t fault, major = 0;
unsigned long vm_flags = VM_READ | VM_WRITE;
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
......@@ -438,9 +437,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
if (notify_page_fault(regs, esr))
return 0;
tsk = current;
mm = tsk->mm;
/*
* If we're in an interrupt or have no user context, we must not take
* the fault.
......@@ -453,7 +449,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
if (is_el0_instruction_abort(esr)) {
vm_flags = VM_EXEC;
} else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) {
mm_flags |= FAULT_FLAG_INSTRUCTION;
} else if (is_write_abort(esr)) {
vm_flags = VM_WRITE;
mm_flags |= FAULT_FLAG_WRITE;
}
......@@ -492,12 +489,14 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
*/
might_sleep();
#ifdef CONFIG_DEBUG_VM
if (!user_mode(regs) && !search_exception_tables(regs->pc))
if (!user_mode(regs) && !search_exception_tables(regs->pc)) {
up_read(&mm->mmap_sem);
goto no_context;
}
#endif
}
fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk);
fault = __do_page_fault(mm, addr, mm_flags, vm_flags);
major |= fault & VM_FAULT_MAJOR;
if (fault & VM_FAULT_RETRY) {
......@@ -537,11 +536,11 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
* that point.
*/
if (major) {
tsk->maj_flt++;
current->maj_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
addr);
} else {
tsk->min_flt++;
current->min_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
addr);
}
......
......@@ -228,7 +228,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
if (sz == PUD_SIZE) {
ptep = (pte_t *)pudp;
} else if (sz == (PAGE_SIZE * CONT_PTES)) {
} else if (sz == (CONT_PTE_SIZE)) {
pmdp = pmd_alloc(mm, pudp, addr);
WARN_ON(addr & (sz - 1));
......@@ -246,7 +246,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
ptep = huge_pmd_share(mm, addr, pudp);
else
ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
} else if (sz == (PMD_SIZE * CONT_PMDS)) {
} else if (sz == (CONT_PMD_SIZE)) {
pmdp = pmd_alloc(mm, pudp, addr);
WARN_ON(addr & (sz - 1));
return (pte_t *)pmdp;
......@@ -454,9 +454,9 @@ static int __init hugetlbpage_init(void)
#ifdef CONFIG_ARM64_4K_PAGES
add_huge_page_size(PUD_SIZE);
#endif
add_huge_page_size(PMD_SIZE * CONT_PMDS);
add_huge_page_size(CONT_PMD_SIZE);
add_huge_page_size(PMD_SIZE);
add_huge_page_size(PAGE_SIZE * CONT_PTES);
add_huge_page_size(CONT_PTE_SIZE);
return 0;
}
......@@ -470,9 +470,9 @@ static __init int setup_hugepagesz(char *opt)
#ifdef CONFIG_ARM64_4K_PAGES
case PUD_SIZE:
#endif
case PMD_SIZE * CONT_PMDS:
case CONT_PMD_SIZE:
case PMD_SIZE:
case PAGE_SIZE * CONT_PTES:
case CONT_PTE_SIZE:
add_huge_page_size(ps);
return 1;
}
......
......@@ -180,8 +180,9 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
{
unsigned long max_zone_pfns[MAX_NR_ZONES] = {0};
if (IS_ENABLED(CONFIG_ZONE_DMA32))
max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys());
#ifdef CONFIG_ZONE_DMA32
max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys());
#endif
max_zone_pfns[ZONE_NORMAL] = max;
free_area_init_nodes(max_zone_pfns);
......
......@@ -765,7 +765,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
return 0;
}
#endif /* CONFIG_ARM64_64K_PAGES */
#endif /* !ARM64_SWAPPER_USES_SECTION_MAPS */
void vmemmap_free(unsigned long start, unsigned long end,
struct vmem_altmap *altmap)
{
......@@ -960,32 +960,28 @@ int __init arch_ioremap_pmd_supported(void)
int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
{
pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
pgprot_val(mk_sect_prot(prot)));
pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot);
pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot));
/* Only allow permission changes for now */
if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)),
pud_val(new_pud)))
return 0;
BUG_ON(phys & ~PUD_MASK);
VM_BUG_ON(phys & ~PUD_MASK);
set_pud(pudp, new_pud);
return 1;
}
int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
{
pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
pgprot_val(mk_sect_prot(prot)));
pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot);
pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), mk_pmd_sect_prot(prot));
/* Only allow permission changes for now */
if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)),
pmd_val(new_pmd)))
return 0;
BUG_ON(phys & ~PMD_MASK);
VM_BUG_ON(phys & ~PMD_MASK);
set_pmd(pmdp, new_pmd);
return 1;
}
......
......@@ -151,17 +151,48 @@ int set_memory_valid(unsigned long addr, int numpages, int enable)
__pgprot(PTE_VALID));
}
#ifdef CONFIG_DEBUG_PAGEALLOC
int set_direct_map_invalid_noflush(struct page *page)
{
struct page_change_data data = {
.set_mask = __pgprot(0),
.clear_mask = __pgprot(PTE_VALID),
};
if (!rodata_full)
return 0;
return apply_to_page_range(&init_mm,
(unsigned long)page_address(page),
PAGE_SIZE, change_page_range, &data);
}
int set_direct_map_default_noflush(struct page *page)
{
struct page_change_data data = {
.set_mask = __pgprot(PTE_VALID | PTE_WRITE),
.clear_mask = __pgprot(PTE_RDONLY),
};
if (!rodata_full)
return 0;
return apply_to_page_range(&init_mm,
(unsigned long)page_address(page),
PAGE_SIZE, change_page_range, &data);
}
void __kernel_map_pages(struct page *page, int numpages, int enable)
{
if (!debug_pagealloc_enabled() && !rodata_full)
return;
set_memory_valid((unsigned long)page_address(page), numpages, enable);
}
#ifdef CONFIG_HIBERNATION
/*
* When built with CONFIG_DEBUG_PAGEALLOC and CONFIG_HIBERNATION, this function
* is used to determine if a linear map page has been marked as not-valid by
* CONFIG_DEBUG_PAGEALLOC. Walk the page table and check the PTE_VALID bit.
* This is based on kern_addr_valid(), which almost does what we need.
* This function is used to determine if a linear map page has been marked as
* not-valid. Walk the page table and check the PTE_VALID bit. This is based
* on kern_addr_valid(), which almost does what we need.
*
* Because this is only called on the kernel linear map, p?d_sect() implies
* p?d_present(). When debug_pagealloc is enabled, sections mappings are
......@@ -175,6 +206,9 @@ bool kernel_page_present(struct page *page)
pte_t *ptep;
unsigned long addr = (unsigned long)page_address(page);
if (!debug_pagealloc_enabled() && !rodata_full)
return true;
pgdp = pgd_offset_k(addr);
if (pgd_none(READ_ONCE(*pgdp)))
return false;
......@@ -196,5 +230,3 @@ bool kernel_page_present(struct page *page)
ptep = pte_offset_kernel(pmdp, addr);
return pte_valid(READ_ONCE(*ptep));
}
#endif /* CONFIG_HIBERNATION */
#endif /* CONFIG_DEBUG_PAGEALLOC */
......@@ -970,7 +970,7 @@ void *bpf_jit_alloc_exec(unsigned long size)
{
return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
BPF_JIT_REGION_END, GFP_KERNEL,
PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
}
......
......@@ -2521,7 +2521,6 @@ void ptrace_disable(struct task_struct *child)
{
/* make sure the single step bit is not set. */
user_disable_single_step(child);
clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
}
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
......
......@@ -72,23 +72,18 @@ static long syscall_trace_enter(struct pt_regs *regs)
struct thread_info *ti = current_thread_info();
unsigned long ret = 0;
bool emulated = false;
u32 work;
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
BUG_ON(regs != task_pt_regs(current));
work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
work = READ_ONCE(ti->flags);
if (unlikely(work & _TIF_SYSCALL_EMU))
emulated = true;
if ((emulated || (work & _TIF_SYSCALL_TRACE)) &&
tracehook_report_syscall_entry(regs))
return -1L;
if (emulated)
return -1L;
if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) {
ret = tracehook_report_syscall_entry(regs);
if (ret || (work & _TIF_SYSCALL_EMU))
return -1L;
}
#ifdef CONFIG_SECCOMP
/*
......
......@@ -747,9 +747,6 @@ static int ioperm_get(struct task_struct *target,
void ptrace_disable(struct task_struct *child)
{
user_disable_single_step(child);
#ifdef TIF_SYSCALL_EMU
clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
#endif
}
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
......
......@@ -432,17 +432,40 @@ static void cache_setup_acpi_cpu(struct acpi_table_header *table,
}
}
static bool flag_identical(struct acpi_table_header *table_hdr,
struct acpi_pptt_processor *cpu)
{
struct acpi_pptt_processor *next;
/* heterogeneous machines must use PPTT revision > 1 */
if (table_hdr->revision < 2)
return false;
/* Locate the last node in the tree with IDENTICAL set */
if (cpu->flags & ACPI_PPTT_ACPI_IDENTICAL) {
next = fetch_pptt_node(table_hdr, cpu->parent);
if (!(next && next->flags & ACPI_PPTT_ACPI_IDENTICAL))
return true;
}
return false;
}
/* Passing level values greater than this will result in search termination */
#define PPTT_ABORT_PACKAGE 0xFF
static struct acpi_pptt_processor *acpi_find_processor_package_id(struct acpi_table_header *table_hdr,
struct acpi_pptt_processor *cpu,
int level, int flag)
static struct acpi_pptt_processor *acpi_find_processor_tag(struct acpi_table_header *table_hdr,
struct acpi_pptt_processor *cpu,
int level, int flag)
{
struct acpi_pptt_processor *prev_node;
while (cpu && level) {
if (cpu->flags & flag)
/* special case the identical flag to find last identical */
if (flag == ACPI_PPTT_ACPI_IDENTICAL) {
if (flag_identical(table_hdr, cpu))
break;
} else if (cpu->flags & flag)
break;
pr_debug("level %d\n", level);
prev_node = fetch_pptt_node(table_hdr, cpu->parent);
......@@ -480,8 +503,8 @@ static int topology_get_acpi_cpu_tag(struct acpi_table_header *table,
cpu_node = acpi_find_processor_node(table, acpi_cpu_id);
if (cpu_node) {
cpu_node = acpi_find_processor_package_id(table, cpu_node,
level, flag);
cpu_node = acpi_find_processor_tag(table, cpu_node,
level, flag);
/*
* As per specification if the processor structure represents
* an actual processor, then ACPI processor ID must be valid.
......@@ -660,3 +683,29 @@ int find_acpi_cpu_topology_package(unsigned int cpu)
return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE,
ACPI_PPTT_PHYSICAL_PACKAGE);
}
/**
* find_acpi_cpu_topology_hetero_id() - Get a core architecture tag
* @cpu: Kernel logical CPU number
*
* Determine a unique heterogeneous tag for the given CPU. CPUs with the same
* implementation should have matching tags.
*
* The returned tag can be used to group peers with identical implementation.
*
* The search terminates when a level is found with the identical implementation
* flag set or we reach a root node.
*
* Due to limitations in the PPTT data structure, there may be rare situations
* where two cores in a heterogeneous machine may be identical, but won't have
* the same tag.
*
* Return: -ENOENT if the PPTT doesn't exist, or the CPU cannot be found.
* Otherwise returns a value which represents a group of identical cores
* similar to this CPU.
*/
int find_acpi_cpu_topology_hetero_id(unsigned int cpu)
{
return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE,
ACPI_PPTT_ACPI_IDENTICAL);
}
......@@ -213,6 +213,8 @@ int __weak cache_setup_acpi(unsigned int cpu)
return -ENOTSUPP;
}
unsigned int coherency_max_size;
static int cache_shared_cpu_map_setup(unsigned int cpu)
{
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
......@@ -251,6 +253,9 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
}
}
/* record the maximum cache line size */
if (this_leaf->coherency_line_size > coherency_max_size)
coherency_max_size = this_leaf->coherency_line_size;
}
return 0;
......
......@@ -461,8 +461,12 @@ static void gic_deactivate_unhandled(u32 irqnr)
static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs)
{
bool irqs_enabled = interrupts_enabled(regs);
int err;
if (irqs_enabled)
nmi_enter();
if (static_branch_likely(&supports_deactivate_key))
gic_write_eoir(irqnr);
/*
......@@ -474,6 +478,9 @@ static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs)
err = handle_domain_nmi(gic_data.domain, irqnr, regs);
if (err)
gic_deactivate_unhandled(irqnr);
if (irqs_enabled)
nmi_exit();
}
static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
......
......@@ -71,6 +71,14 @@ config ARM_DSU_PMU
system, control logic. The PMU allows counting various events related
to DSU.
config FSL_IMX8_DDR_PMU
tristate "Freescale i.MX8 DDR perf monitor"
depends on ARCH_MXC
help
Provides support for the DDR performance monitor in i.MX8, which
can give information about memory throughput and other related
events.
config HISI_PMU
bool "HiSilicon SoC PMU"
depends on ARM64 && ACPI
......
......@@ -5,6 +5,7 @@ obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o
obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o
obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o
obj-$(CONFIG_HISI_PMU) += hisilicon/
obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o
obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
......
......@@ -71,6 +71,76 @@ static void arm_pmu_acpi_unregister_irq(int cpu)
acpi_unregister_gsi(gsi);
}
#if IS_ENABLED(CONFIG_ARM_SPE_PMU)
static struct resource spe_resources[] = {
{
/* irq */
.flags = IORESOURCE_IRQ,
}
};
static struct platform_device spe_dev = {
.name = ARMV8_SPE_PDEV_NAME,
.id = -1,
.resource = spe_resources,
.num_resources = ARRAY_SIZE(spe_resources)
};
/*
* For lack of a better place, hook the normal PMU MADT walk
* and create a SPE device if we detect a recent MADT with
* a homogeneous PPI mapping.
*/
static void arm_spe_acpi_register_device(void)
{
int cpu, hetid, irq, ret;
bool first = true;
u16 gsi = 0;
/*
* Sanity check all the GICC tables for the same interrupt number.
* For now, we only support homogeneous ACPI/SPE machines.
*/
for_each_possible_cpu(cpu) {
struct acpi_madt_generic_interrupt *gicc;
gicc = acpi_cpu_get_madt_gicc(cpu);
if (gicc->header.length < ACPI_MADT_GICC_SPE)
return;
if (first) {
gsi = gicc->spe_interrupt;
if (!gsi)
return;
hetid = find_acpi_cpu_topology_hetero_id(cpu);
first = false;
} else if ((gsi != gicc->spe_interrupt) ||
(hetid != find_acpi_cpu_topology_hetero_id(cpu))) {
pr_warn("ACPI: SPE must be homogeneous\n");
return;
}
}
irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE,
ACPI_ACTIVE_HIGH);
if (irq < 0) {
pr_warn("ACPI: SPE Unable to register interrupt: %d\n", gsi);
return;
}
spe_resources[0].start = irq;
ret = platform_device_register(&spe_dev);
if (ret < 0) {
pr_warn("ACPI: SPE: Unable to register device\n");
acpi_unregister_gsi(gsi);
}
}
#else
static inline void arm_spe_acpi_register_device(void)
{
}
#endif /* CONFIG_ARM_SPE_PMU */
static int arm_pmu_acpi_parse_irqs(void)
{
int irq, cpu, irq_cpu, err;
......@@ -276,6 +346,8 @@ static int arm_pmu_acpi_init(void)
if (acpi_disabled)
return 0;
arm_spe_acpi_register_device();
ret = arm_pmu_acpi_parse_irqs();
if (ret)
return ret;
......
......@@ -27,6 +27,7 @@
#include <linux/of_address.h>
#include <linux/of_device.h>
#include <linux/perf_event.h>
#include <linux/perf/arm_pmu.h>
#include <linux/platform_device.h>
#include <linux/printk.h>
#include <linux/slab.h>
......@@ -1157,7 +1158,13 @@ static const struct of_device_id arm_spe_pmu_of_match[] = {
};
MODULE_DEVICE_TABLE(of, arm_spe_pmu_of_match);
static int arm_spe_pmu_device_dt_probe(struct platform_device *pdev)
static const struct platform_device_id arm_spe_match[] = {
{ ARMV8_SPE_PDEV_NAME, 0},
{ }
};
MODULE_DEVICE_TABLE(platform, arm_spe_match);
static int arm_spe_pmu_device_probe(struct platform_device *pdev)
{
int ret;
struct arm_spe_pmu *spe_pmu;
......@@ -1217,11 +1224,12 @@ static int arm_spe_pmu_device_remove(struct platform_device *pdev)
}
static struct platform_driver arm_spe_pmu_driver = {
.id_table = arm_spe_match,
.driver = {
.name = DRVNAME,
.of_match_table = of_match_ptr(arm_spe_pmu_of_match),
},
.probe = arm_spe_pmu_device_dt_probe,
.probe = arm_spe_pmu_device_probe,
.remove = arm_spe_pmu_device_remove,
};
......
This diff is collapsed.
......@@ -1303,6 +1303,7 @@ static inline int lpit_read_residency_count_address(u64 *address)
#ifdef CONFIG_ACPI_PPTT
int find_acpi_cpu_topology(unsigned int cpu, int level);
int find_acpi_cpu_topology_package(unsigned int cpu);
int find_acpi_cpu_topology_hetero_id(unsigned int cpu);
int find_acpi_cpu_cache_topology(unsigned int cpu, int level);
#else
static inline int find_acpi_cpu_topology(unsigned int cpu, int level)
......@@ -1313,6 +1314,10 @@ static inline int find_acpi_cpu_topology_package(unsigned int cpu)
{
return -EINVAL;
}
static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu)
{
return -EINVAL;
}
static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level)
{
return -EINVAL;
......
......@@ -17,6 +17,8 @@ enum cache_type {
CACHE_TYPE_UNIFIED = BIT(2),
};
extern unsigned int coherency_max_size;
/**
* struct cacheinfo - represent a cache leaf node
* @id: This cache's id. It is unique among caches with the same (type, level).
......
......@@ -171,4 +171,6 @@ void armpmu_free_irq(int irq, int cpu);
#endif /* CONFIG_ARM_PMU */
#define ARMV8_SPE_PDEV_NAME "arm,spe-v1"
#endif /* __ARM_PMU_H__ */
......@@ -680,6 +680,8 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
* @hwirq: The HW irq number to convert to a logical one
* @regs: Register file coming from the low-level handling code
*
* This function must be called from an NMI context.
*
* Returns: 0 on success, or -EINVAL if conversion has failed
*/
int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
......@@ -689,7 +691,10 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
unsigned int irq;
int ret = 0;
nmi_enter();
/*
* NMI context needs to be setup earlier in order to deal with tracing.
*/
WARN_ON(!in_nmi());
irq = irq_find_mapping(domain, hwirq);
......@@ -702,7 +707,6 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
else
ret = -EINVAL;
nmi_exit();
set_irq_regs(old_regs);
return ret;
}
......
......@@ -116,6 +116,9 @@ void __ptrace_unlink(struct task_struct *child)
BUG_ON(!child->ptrace);
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
#ifdef TIF_SYSCALL_EMU
clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
#endif
child->parent = child->real_parent;
list_del_init(&child->ptrace_entry);
......
......@@ -2128,17 +2128,6 @@ static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages)
int flush_dmap = 0;
int i;
/*
* The below block can be removed when all architectures that have
* direct map permissions also have set_direct_map_() implementations.
* This is concerned with resetting the direct map any an vm alias with
* execute permissions, without leaving a RW+X window.
*/
if (flush_reset && !IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) {
set_memory_nx((unsigned long)area->addr, area->nr_pages);
set_memory_rw((unsigned long)area->addr, area->nr_pages);
}
remove_vm_area(area->addr);
/* If this is not VM_FLUSH_RESET_PERMS memory, no need for the below. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment