Commit 4b1967c9 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 fixes from Will Deacon:
 "The critical one here is a fix for fpsimd register corruption across
  signals which was introduced by the SVE support code (the register
  files overlap), but the others are worth having as well.

  Summary:

   - Fix FP register corruption when SVE is not available or in use

   - Fix out-of-tree module build failure when CONFIG_ARM64_MODULE_PLTS=y

   - Missing 'const' generating errors with LTO builds

   - Remove unsupported events from Cortex-A73 PMU description

   - Removal of stale and incorrect comments"

* tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
  arm64: context: Fix comments and remove pointless smp_wmb()
  arm64: cpu_ops: Add missing 'const' qualifiers
  arm64: perf: remove unsupported events for Cortex-A73
  arm64: fpsimd: Fix failure to restore FPSIMD state after signals
  arm64: pgd: Mark pgd_cache as __ro_after_init
  arm64: ftrace: emit ftrace-mod.o contents through code
  arm64: module-plts: factor out PLT generation code for ftrace
  arm64: mm: cleanup stale AIVIVT references
parents a0651c7f 3a33c760
......@@ -83,9 +83,6 @@ endif
ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds
ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
KBUILD_LDFLAGS_MODULE += $(objtree)/arch/arm64/kernel/ftrace-mod.o
endif
endif
# Default value
......
......@@ -38,7 +38,7 @@
*
* See Documentation/cachetlb.txt for more information. Please note that
* the implementation assumes non-aliasing VIPT D-cache and (aliasing)
* VIPT or ASID-tagged VIVT I-cache.
* VIPT I-cache.
*
* flush_cache_mm(mm)
*
......
......@@ -32,7 +32,7 @@ struct mod_arch_specific {
struct mod_plt_sec init;
/* for CONFIG_DYNAMIC_FTRACE */
void *ftrace_trampoline;
struct plt_entry *ftrace_trampoline;
};
#endif
......@@ -45,4 +45,48 @@ extern u64 module_alloc_base;
#define module_alloc_base ((u64)_etext - MODULES_VSIZE)
#endif
struct plt_entry {
/*
* A program that conforms to the AArch64 Procedure Call Standard
* (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or
* IP1 (x17) may be inserted at any branch instruction that is
* exposed to a relocation that supports long branches. Since that
* is exactly what we are dealing with here, we are free to use x16
* as a scratch register in the PLT veneers.
*/
__le32 mov0; /* movn x16, #0x.... */
__le32 mov1; /* movk x16, #0x...., lsl #16 */
__le32 mov2; /* movk x16, #0x...., lsl #32 */
__le32 br; /* br x16 */
};
static inline struct plt_entry get_plt_entry(u64 val)
{
/*
* MOVK/MOVN/MOVZ opcode:
* +--------+------------+--------+-----------+-------------+---------+
* | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
* +--------+------------+--------+-----------+-------------+---------+
*
* Rd := 0x10 (x16)
* hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
* opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
* sf := 1 (64-bit variant)
*/
return (struct plt_entry){
cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5),
cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
cpu_to_le32(0xd61f0200)
};
}
static inline bool plt_entries_equal(const struct plt_entry *a,
const struct plt_entry *b)
{
return a->mov0 == b->mov0 &&
a->mov1 == b->mov1 &&
a->mov2 == b->mov2;
}
#endif /* __ASM_MODULE_H */
......@@ -61,6 +61,3 @@ extra-y += $(head-y) vmlinux.lds
ifeq ($(CONFIG_DEBUG_EFI),y)
AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\""
endif
# will be included by each individual module but not by the core kernel itself
extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o
......@@ -31,13 +31,13 @@ extern const struct cpu_operations cpu_psci_ops;
const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = {
static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = {
&smp_spin_table_ops,
&cpu_psci_ops,
NULL,
};
static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = {
static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = {
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
&acpi_parking_protocol_ops,
#endif
......@@ -47,7 +47,7 @@ static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = {
static const struct cpu_operations * __init cpu_get_ops(const char *name)
{
const struct cpu_operations **ops;
const struct cpu_operations *const *ops;
ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops;
......
......@@ -1026,10 +1026,10 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
local_bh_disable();
if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
current->thread.fpsimd_state = *state;
if (system_supports_sve() && test_thread_flag(TIF_SVE))
fpsimd_to_sve(current);
}
task_fpsimd_load();
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
......
/*
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.section ".text.ftrace_trampoline", "ax"
.align 3
0: .quad 0
__ftrace_trampoline:
ldr x16, 0b
br x16
ENDPROC(__ftrace_trampoline)
......@@ -76,7 +76,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
if (offset < -SZ_128M || offset >= SZ_128M) {
#ifdef CONFIG_ARM64_MODULE_PLTS
unsigned long *trampoline;
struct plt_entry trampoline;
struct module *mod;
/*
......@@ -104,22 +104,24 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
* is added in the future, but for now, the pr_err() below
* deals with a theoretical issue only.
*/
trampoline = (unsigned long *)mod->arch.ftrace_trampoline;
if (trampoline[0] != addr) {
if (trampoline[0] != 0) {
trampoline = get_plt_entry(addr);
if (!plt_entries_equal(mod->arch.ftrace_trampoline,
&trampoline)) {
if (!plt_entries_equal(mod->arch.ftrace_trampoline,
&(struct plt_entry){})) {
pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
return -EINVAL;
}
/* point the trampoline to our ftrace entry point */
module_disable_ro(mod);
trampoline[0] = addr;
*mod->arch.ftrace_trampoline = trampoline;
module_enable_ro(mod, true);
/* update trampoline before patching in the branch */
smp_wmb();
}
addr = (unsigned long)&trampoline[1];
addr = (unsigned long)(void *)mod->arch.ftrace_trampoline;
#else /* CONFIG_ARM64_MODULE_PLTS */
return -EINVAL;
#endif /* CONFIG_ARM64_MODULE_PLTS */
......
......@@ -11,21 +11,6 @@
#include <linux/module.h>
#include <linux/sort.h>
struct plt_entry {
/*
* A program that conforms to the AArch64 Procedure Call Standard
* (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or
* IP1 (x17) may be inserted at any branch instruction that is
* exposed to a relocation that supports long branches. Since that
* is exactly what we are dealing with here, we are free to use x16
* as a scratch register in the PLT veneers.
*/
__le32 mov0; /* movn x16, #0x.... */
__le32 mov1; /* movk x16, #0x...., lsl #16 */
__le32 mov2; /* movk x16, #0x...., lsl #32 */
__le32 br; /* br x16 */
};
static bool in_init(const struct module *mod, void *loc)
{
return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size;
......@@ -40,33 +25,14 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
int i = pltsec->plt_num_entries;
u64 val = sym->st_value + rela->r_addend;
/*
* MOVK/MOVN/MOVZ opcode:
* +--------+------------+--------+-----------+-------------+---------+
* | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
* +--------+------------+--------+-----------+-------------+---------+
*
* Rd := 0x10 (x16)
* hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
* opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
* sf := 1 (64-bit variant)
*/
plt[i] = (struct plt_entry){
cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5),
cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
cpu_to_le32(0xd61f0200)
};
plt[i] = get_plt_entry(val);
/*
* Check if the entry we just created is a duplicate. Given that the
* relocations are sorted, this will be the last entry we allocated.
* (if one exists).
*/
if (i > 0 &&
plt[i].mov0 == plt[i - 1].mov0 &&
plt[i].mov1 == plt[i - 1].mov1 &&
plt[i].mov2 == plt[i - 1].mov2)
if (i > 0 && plt_entries_equal(plt + i, plt + i - 1))
return (u64)&plt[i - 1];
pltsec->plt_num_entries++;
......@@ -154,6 +120,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
unsigned long core_plts = 0;
unsigned long init_plts = 0;
Elf64_Sym *syms = NULL;
Elf_Shdr *tramp = NULL;
int i;
/*
......@@ -165,6 +132,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
mod->arch.core.plt = sechdrs + i;
else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt"))
mod->arch.init.plt = sechdrs + i;
else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) &&
!strcmp(secstrings + sechdrs[i].sh_name,
".text.ftrace_trampoline"))
tramp = sechdrs + i;
else if (sechdrs[i].sh_type == SHT_SYMTAB)
syms = (Elf64_Sym *)sechdrs[i].sh_addr;
}
......@@ -215,5 +186,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
mod->arch.init.plt_num_entries = 0;
mod->arch.init.plt_max_entries = init_plts;
if (tramp) {
tramp->sh_type = SHT_NOBITS;
tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
tramp->sh_addralign = __alignof__(struct plt_entry);
tramp->sh_size = sizeof(struct plt_entry);
}
return 0;
}
SECTIONS {
.plt (NOLOAD) : { BYTE(0) }
.init.plt (NOLOAD) : { BYTE(0) }
.text.ftrace_trampoline (NOLOAD) : { BYTE(0) }
}
......@@ -262,12 +262,6 @@ static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
};
static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
......
......@@ -96,12 +96,6 @@ static void flush_context(unsigned int cpu)
set_reserved_asid_bits();
/*
* Ensure the generation bump is observed before we xchg the
* active_asids.
*/
smp_wmb();
for_each_possible_cpu(i) {
asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
/*
......@@ -117,7 +111,10 @@ static void flush_context(unsigned int cpu)
per_cpu(reserved_asids, i) = asid;
}
/* Queue a TLB invalidate and flush the I-cache if necessary. */
/*
* Queue a TLB invalidation for each CPU to perform on next
* context-switch
*/
cpumask_setall(&tlb_flush_pending);
}
......@@ -202,11 +199,18 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
asid = atomic64_read(&mm->context.id);
/*
* The memory ordering here is subtle. We rely on the control
* dependency between the generation read and the update of
* active_asids to ensure that we are synchronised with a
* parallel rollover (i.e. this pairs with the smp_wmb() in
* flush_context).
* The memory ordering here is subtle.
* If our ASID matches the current generation, then we update
* our active_asids entry with a relaxed xchg. Racing with a
* concurrent rollover means that either:
*
* - We get a zero back from the xchg and end up waiting on the
* lock. Taking the lock synchronises with the rollover and so
* we are forced to see the updated generation.
*
* - We get a valid ASID back from the xchg, which means the
* relaxed xchg in flush_context will treat us as reserved
* because atomic RmWs are totally ordered for a given location.
*/
if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits)
&& atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid))
......
......@@ -26,7 +26,7 @@
#include <asm/page.h>
#include <asm/tlbflush.h>
static struct kmem_cache *pgd_cache;
static struct kmem_cache *pgd_cache __ro_after_init;
pgd_t *pgd_alloc(struct mm_struct *mm)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment