Commit 43aa0a19 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'objtool-core-2021-10-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull objtool updates from Thomas Gleixner:

 - Improve retpoline code patching by separating it from alternatives
   which reduces memory footprint and allows to do better optimizations
   in the actual runtime patching.

 - Add proper retpoline support for x86/BPF

 - Address noinstr warnings in x86/kvm, lockdep and paravirtualization
   code

 - Add support to handle pv_opsindirect calls in the noinstr analysis

 - Classify symbols upfront and cache the result to avoid redundant
   str*cmp() invocations.

 - Add a CFI hash to reduce memory consumption which also reduces
   runtime on a allyesconfig by ~50%

 - Adjust XEN code to make objtool handling more robust and as a side
   effect to prevent text fragmentation due to placement of the
   hypercall page.

* tag 'objtool-core-2021-10-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (41 commits)
  bpf,x86: Respect X86_FEATURE_RETPOLINE*
  bpf,x86: Simplify computing label offsets
  x86,bugs: Unconditionally allow spectre_v2=retpoline,amd
  x86/alternative: Add debug prints to apply_retpolines()
  x86/alternative: Try inline spectre_v2=retpoline,amd
  x86/alternative: Handle Jcc __x86_indirect_thunk_\reg
  x86/alternative: Implement .retpoline_sites support
  x86/retpoline: Create a retpoline thunk array
  x86/retpoline: Move the retpoline thunk declarations to nospec-branch.h
  x86/asm: Fixup odd GEN-for-each-reg.h usage
  x86/asm: Fix register order
  x86/retpoline: Remove unused replacement symbols
  objtool,x86: Replace alternatives with .retpoline_sites
  objtool: Shrink struct instruction
  objtool: Explicitly avoid self modifying code in .altinstr_replacement
  objtool: Classify symbols
  objtool: Support pv_opsindirect calls for noinstr
  x86/xen: Rework the xen_{cpu,irq,mmu}_opsarrays
  x86/xen: Mark xen_force_evtchn_callback() noinstr
  x86/xen: Make irq_disable() noinstr
  ...
parents 595b28fb 87c87ecd
......@@ -421,6 +421,10 @@ void __init check_bugs(void)
os_check_bugs();
}
void apply_retpolines(s32 *start, s32 *end)
{
}
void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
}
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* These are in machine order; things rely on that.
*/
#ifdef CONFIG_64BIT
GEN(rax)
GEN(rbx)
GEN(rcx)
GEN(rdx)
GEN(rbx)
GEN(rsp)
GEN(rbp)
GEN(rsi)
GEN(rdi)
GEN(rbp)
GEN(r8)
GEN(r9)
GEN(r10)
......@@ -16,10 +21,11 @@ GEN(r14)
GEN(r15)
#else
GEN(eax)
GEN(ebx)
GEN(ecx)
GEN(edx)
GEN(ebx)
GEN(esp)
GEN(ebp)
GEN(esi)
GEN(edi)
GEN(ebp)
#endif
......@@ -75,6 +75,7 @@ extern int alternatives_patched;
extern void alternative_instructions(void);
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
extern void apply_retpolines(s32 *start, s32 *end);
struct module;
......
......@@ -17,21 +17,3 @@
extern void cmpxchg8b_emu(void);
#endif
#ifdef CONFIG_RETPOLINE
#undef GEN
#define GEN(reg) \
extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
#include <asm/GEN-for-each-reg.h>
#undef GEN
#define GEN(reg) \
extern asmlinkage void __x86_indirect_alt_call_ ## reg (void);
#include <asm/GEN-for-each-reg.h>
#undef GEN
#define GEN(reg) \
extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void);
#include <asm/GEN-for-each-reg.h>
#endif /* CONFIG_RETPOLINE */
......@@ -5,12 +5,15 @@
#include <linux/static_key.h>
#include <linux/objtool.h>
#include <linux/linkage.h>
#include <asm/alternative.h>
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
#include <asm/unwind_hints.h>
#define RETPOLINE_THUNK_SIZE 32
/*
* Fill the CPU return stack buffer.
*
......@@ -118,6 +121,16 @@
".popsection\n\t"
#ifdef CONFIG_RETPOLINE
typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
#define GEN(reg) \
extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
#include <asm/GEN-for-each-reg.h>
#undef GEN
extern retpoline_thunk_t __x86_indirect_thunk_array[];
#ifdef CONFIG_X86_64
/*
......@@ -303,63 +316,4 @@ static inline void mds_idle_clear_cpu_buffers(void)
#endif /* __ASSEMBLY__ */
/*
* Below is used in the eBPF JIT compiler and emits the byte sequence
* for the following assembly:
*
* With retpolines configured:
*
* callq do_rop
* spec_trap:
* pause
* lfence
* jmp spec_trap
* do_rop:
* mov %rcx,(%rsp) for x86_64
* mov %edx,(%esp) for x86_32
* retq
*
* Without retpolines configured:
*
* jmp *%rcx for x86_64
* jmp *%edx for x86_32
*/
#ifdef CONFIG_RETPOLINE
# ifdef CONFIG_X86_64
# define RETPOLINE_RCX_BPF_JIT_SIZE 17
# define RETPOLINE_RCX_BPF_JIT() \
do { \
EMIT1_off32(0xE8, 7); /* callq do_rop */ \
/* spec_trap: */ \
EMIT2(0xF3, 0x90); /* pause */ \
EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
/* do_rop: */ \
EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */ \
EMIT1(0xC3); /* retq */ \
} while (0)
# else /* !CONFIG_X86_64 */
# define RETPOLINE_EDX_BPF_JIT() \
do { \
EMIT1_off32(0xE8, 7); /* call do_rop */ \
/* spec_trap: */ \
EMIT2(0xF3, 0x90); /* pause */ \
EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
/* do_rop: */ \
EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \
EMIT1(0xC3); /* ret */ \
} while (0)
# endif
#else /* !CONFIG_RETPOLINE */
# ifdef CONFIG_X86_64
# define RETPOLINE_RCX_BPF_JIT_SIZE 2
# define RETPOLINE_RCX_BPF_JIT() \
EMIT2(0xFF, 0xE1); /* jmp *%rcx */
# else /* !CONFIG_X86_64 */
# define RETPOLINE_EDX_BPF_JIT() \
EMIT2(0xFF, 0xE2) /* jmp *%edx */
# endif
#endif
#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
......@@ -52,11 +52,11 @@ void __init paravirt_set_cap(void);
/* The paravirtualized I/O functions */
static inline void slow_down_io(void)
{
pv_ops.cpu.io_delay();
PVOP_VCALL0(cpu.io_delay);
#ifdef REALLY_SLOW_IO
pv_ops.cpu.io_delay();
pv_ops.cpu.io_delay();
pv_ops.cpu.io_delay();
PVOP_VCALL0(cpu.io_delay);
PVOP_VCALL0(cpu.io_delay);
PVOP_VCALL0(cpu.io_delay);
#endif
}
......@@ -113,12 +113,12 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
/*
* These special macros can be used to get or set a debugging register
*/
static inline unsigned long paravirt_get_debugreg(int reg)
static __always_inline unsigned long paravirt_get_debugreg(int reg)
{
return PVOP_CALL1(unsigned long, cpu.get_debugreg, reg);
}
#define get_debugreg(var, reg) var = paravirt_get_debugreg(reg)
static inline void set_debugreg(unsigned long val, int reg)
static __always_inline void set_debugreg(unsigned long val, int reg)
{
PVOP_VCALL2(cpu.set_debugreg, reg, val);
}
......@@ -133,14 +133,14 @@ static inline void write_cr0(unsigned long x)
PVOP_VCALL1(cpu.write_cr0, x);
}
static inline unsigned long read_cr2(void)
static __always_inline unsigned long read_cr2(void)
{
return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2,
"mov %%cr2, %%rax;",
ALT_NOT(X86_FEATURE_XENPV));
}
static inline void write_cr2(unsigned long x)
static __always_inline void write_cr2(unsigned long x)
{
PVOP_VCALL1(mmu.write_cr2, x);
}
......@@ -653,10 +653,10 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
* functions.
*/
#define PV_THUNK_NAME(func) "__raw_callee_save_" #func
#define PV_CALLEE_SAVE_REGS_THUNK(func) \
#define __PV_CALLEE_SAVE_REGS_THUNK(func, section) \
extern typeof(func) __raw_callee_save_##func; \
\
asm(".pushsection .text;" \
asm(".pushsection " section ", \"ax\";" \
".globl " PV_THUNK_NAME(func) ";" \
".type " PV_THUNK_NAME(func) ", @function;" \
PV_THUNK_NAME(func) ":" \
......@@ -669,6 +669,9 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \
".popsection")
#define PV_CALLEE_SAVE_REGS_THUNK(func) \
__PV_CALLEE_SAVE_REGS_THUNK(func, ".text")
/* Get a reference to a callee-save function */
#define PV_CALLEE_SAVE(func) \
((struct paravirt_callee_save) { __raw_callee_save_##func })
......@@ -678,23 +681,23 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
((struct paravirt_callee_save) { func })
#ifdef CONFIG_PARAVIRT_XXL
static inline notrace unsigned long arch_local_save_flags(void)
static __always_inline unsigned long arch_local_save_flags(void)
{
return PVOP_ALT_CALLEE0(unsigned long, irq.save_fl, "pushf; pop %%rax;",
ALT_NOT(X86_FEATURE_XENPV));
}
static inline notrace void arch_local_irq_disable(void)
static __always_inline void arch_local_irq_disable(void)
{
PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT(X86_FEATURE_XENPV));
}
static inline notrace void arch_local_irq_enable(void)
static __always_inline void arch_local_irq_enable(void)
{
PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT(X86_FEATURE_XENPV));
}
static inline notrace unsigned long arch_local_irq_save(void)
static __always_inline unsigned long arch_local_irq_save(void)
{
unsigned long f;
......
......@@ -181,7 +181,7 @@ static inline bool any_64bit_mode(struct pt_regs *regs)
#define current_user_stack_pointer() current_pt_regs()->sp
#define compat_user_stack_pointer() current_pt_regs()->sp
static inline bool ip_within_syscall_gap(struct pt_regs *regs)
static __always_inline bool ip_within_syscall_gap(struct pt_regs *regs)
{
bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack);
......
......@@ -308,13 +308,13 @@ HYPERVISOR_platform_op(struct xen_platform_op *op)
return _hypercall1(int, platform_op, op);
}
static inline int
static __always_inline int
HYPERVISOR_set_debugreg(int reg, unsigned long value)
{
return _hypercall2(int, set_debugreg, reg, value);
}
static inline unsigned long
static __always_inline unsigned long
HYPERVISOR_get_debugreg(int reg)
{
return _hypercall1(unsigned long, get_debugreg, reg);
......@@ -358,7 +358,7 @@ HYPERVISOR_event_channel_op(int cmd, void *arg)
return _hypercall2(int, event_channel_op, cmd, arg);
}
static inline int
static __always_inline int
HYPERVISOR_xen_version(int cmd, void *arg)
{
return _hypercall2(int, xen_version, cmd, arg);
......
......@@ -29,6 +29,7 @@
#include <asm/io.h>
#include <asm/fixmap.h>
#include <asm/paravirt.h>
#include <asm/asm-prototypes.h>
int __read_mostly alternatives_patched;
......@@ -113,6 +114,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
}
}
extern s32 __retpoline_sites[], __retpoline_sites_end[];
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern s32 __smp_locks[], __smp_locks_end[];
void text_poke_early(void *addr, const void *opcode, size_t len);
......@@ -221,7 +223,7 @@ static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
* "noinline" to cause control flow change and thus invalidate I$ and
* cause refetch after modification.
*/
static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
{
struct insn insn;
int i = 0;
......@@ -239,11 +241,11 @@ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *ins
* optimized.
*/
if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
i += optimize_nops_range(instr, a->instrlen, i);
i += optimize_nops_range(instr, len, i);
else
i += insn.length;
if (i >= a->instrlen)
if (i >= len)
return;
}
}
......@@ -331,10 +333,185 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
text_poke_early(instr, insn_buff, insn_buff_sz);
next:
optimize_nops(a, instr);
optimize_nops(instr, a->instrlen);
}
}
#if defined(CONFIG_RETPOLINE) && defined(CONFIG_STACK_VALIDATION)
/*
* CALL/JMP *%\reg
*/
static int emit_indirect(int op, int reg, u8 *bytes)
{
int i = 0;
u8 modrm;
switch (op) {
case CALL_INSN_OPCODE:
modrm = 0x10; /* Reg = 2; CALL r/m */
break;
case JMP32_INSN_OPCODE:
modrm = 0x20; /* Reg = 4; JMP r/m */
break;
default:
WARN_ON_ONCE(1);
return -1;
}
if (reg >= 8) {
bytes[i++] = 0x41; /* REX.B prefix */
reg -= 8;
}
modrm |= 0xc0; /* Mod = 3 */
modrm += reg;
bytes[i++] = 0xff; /* opcode */
bytes[i++] = modrm;
return i;
}
/*
* Rewrite the compiler generated retpoline thunk calls.
*
* For spectre_v2=off (!X86_FEATURE_RETPOLINE), rewrite them into immediate
* indirect instructions, avoiding the extra indirection.
*
* For example, convert:
*
* CALL __x86_indirect_thunk_\reg
*
* into:
*
* CALL *%\reg
*
* It also tries to inline spectre_v2=retpoline,amd when size permits.
*/
static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
{
retpoline_thunk_t *target;
int reg, ret, i = 0;
u8 op, cc;
target = addr + insn->length + insn->immediate.value;
reg = target - __x86_indirect_thunk_array;
if (WARN_ON_ONCE(reg & ~0xf))
return -1;
/* If anyone ever does: CALL/JMP *%rsp, we're in deep trouble. */
BUG_ON(reg == 4);
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
!cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD))
return -1;
op = insn->opcode.bytes[0];
/*
* Convert:
*
* Jcc.d32 __x86_indirect_thunk_\reg
*
* into:
*
* Jncc.d8 1f
* [ LFENCE ]
* JMP *%\reg
* [ NOP ]
* 1:
*/
/* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */
if (op == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80) {
cc = insn->opcode.bytes[1] & 0xf;
cc ^= 1; /* invert condition */
bytes[i++] = 0x70 + cc; /* Jcc.d8 */
bytes[i++] = insn->length - 2; /* sizeof(Jcc.d8) == 2 */
/* Continue as if: JMP.d32 __x86_indirect_thunk_\reg */
op = JMP32_INSN_OPCODE;
}
/*
* For RETPOLINE_AMD: prepend the indirect CALL/JMP with an LFENCE.
*/
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
bytes[i++] = 0x0f;
bytes[i++] = 0xae;
bytes[i++] = 0xe8; /* LFENCE */
}
ret = emit_indirect(op, reg, bytes + i);
if (ret < 0)
return ret;
i += ret;
for (; i < insn->length;)
bytes[i++] = BYTES_NOP1;
return i;
}
/*
* Generated by 'objtool --retpoline'.
*/
void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
struct insn insn;
int len, ret;
u8 bytes[16];
u8 op1, op2;
ret = insn_decode_kernel(&insn, addr);
if (WARN_ON_ONCE(ret < 0))
continue;
op1 = insn.opcode.bytes[0];
op2 = insn.opcode.bytes[1];
switch (op1) {
case CALL_INSN_OPCODE:
case JMP32_INSN_OPCODE:
break;
case 0x0f: /* escape */
if (op2 >= 0x80 && op2 <= 0x8f)
break;
fallthrough;
default:
WARN_ON_ONCE(1);
continue;
}
DPRINTK("retpoline at: %pS (%px) len: %d to: %pS",
addr, addr, insn.length,
addr + insn.length + insn.immediate.value);
len = patch_retpoline(addr, &insn, bytes);
if (len == insn.length) {
optimize_nops(bytes, len);
DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr);
DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
text_poke_early(addr, bytes, len);
}
}
}
#else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */
void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
#endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */
#ifdef CONFIG_SMP
static void alternatives_smp_lock(const s32 *start, const s32 *end,
u8 *text, u8 *text_end)
......@@ -642,6 +819,12 @@ void __init alternative_instructions(void)
*/
apply_paravirt(__parainstructions, __parainstructions_end);
/*
* Rewrite the retpolines, must be done before alternatives since
* those can rewrite the retpoline thunks.
*/
apply_retpolines(__retpoline_sites, __retpoline_sites_end);
/*
* Then patch alternatives, such that those paravirt calls that are in
* alternatives can be overwritten by their immediate fragments.
......
......@@ -882,13 +882,6 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
return SPECTRE_V2_CMD_AUTO;
}
if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD &&
boot_cpu_data.x86_vendor != X86_VENDOR_HYGON &&
boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
return SPECTRE_V2_CMD_AUTO;
}
spec_v2_print_cond(mitigation_options[i].option,
mitigation_options[i].secure);
return cmd;
......
......@@ -7,9 +7,11 @@
/*
* unsigned long native_save_fl(void)
*/
.pushsection .noinstr.text, "ax"
SYM_FUNC_START(native_save_fl)
pushf
pop %_ASM_AX
ret
SYM_FUNC_END(native_save_fl)
.popsection
EXPORT_SYMBOL(native_save_fl)
......@@ -251,7 +251,8 @@ int module_finalize(const Elf_Ehdr *hdr,
struct module *me)
{
const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
*para = NULL, *orc = NULL, *orc_ip = NULL;
*para = NULL, *orc = NULL, *orc_ip = NULL,
*retpolines = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
......@@ -267,8 +268,14 @@ int module_finalize(const Elf_Ehdr *hdr,
orc = s;
if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name))
orc_ip = s;
if (!strcmp(".retpoline_sites", secstrings + s->sh_name))
retpolines = s;
}
if (retpolines) {
void *rseg = (void *)retpolines->sh_addr;
apply_retpolines(rseg, rseg + retpolines->sh_size);
}
if (alt) {
/* patch .altinstructions */
void *aseg = (void *)alt->sh_addr;
......
......@@ -218,6 +218,36 @@ void paravirt_end_context_switch(struct task_struct *next)
if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
arch_enter_lazy_mmu_mode();
}
static noinstr unsigned long pv_native_read_cr2(void)
{
return native_read_cr2();
}
static noinstr void pv_native_write_cr2(unsigned long val)
{
native_write_cr2(val);
}
static noinstr unsigned long pv_native_get_debugreg(int regno)
{
return native_get_debugreg(regno);
}
static noinstr void pv_native_set_debugreg(int regno, unsigned long val)
{
native_set_debugreg(regno, val);
}
static noinstr void pv_native_irq_enable(void)
{
native_irq_enable();
}
static noinstr void pv_native_irq_disable(void)
{
native_irq_disable();
}
#endif
enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
......@@ -244,8 +274,8 @@ struct paravirt_patch_template pv_ops = {
#ifdef CONFIG_PARAVIRT_XXL
.cpu.cpuid = native_cpuid,
.cpu.get_debugreg = native_get_debugreg,
.cpu.set_debugreg = native_set_debugreg,
.cpu.get_debugreg = pv_native_get_debugreg,
.cpu.set_debugreg = pv_native_set_debugreg,
.cpu.read_cr0 = native_read_cr0,
.cpu.write_cr0 = native_write_cr0,
.cpu.write_cr4 = native_write_cr4,
......@@ -281,8 +311,8 @@ struct paravirt_patch_template pv_ops = {
/* Irq ops. */
.irq.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
.irq.irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
.irq.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
.irq.irq_disable = __PV_IS_CALLEE_SAVE(pv_native_irq_disable),
.irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable),
.irq.safe_halt = native_safe_halt,
.irq.halt = native_halt,
#endif /* CONFIG_PARAVIRT_XXL */
......@@ -298,8 +328,8 @@ struct paravirt_patch_template pv_ops = {
.mmu.exit_mmap = paravirt_nop,
#ifdef CONFIG_PARAVIRT_XXL
.mmu.read_cr2 = __PV_IS_CALLEE_SAVE(native_read_cr2),
.mmu.write_cr2 = native_write_cr2,
.mmu.read_cr2 = __PV_IS_CALLEE_SAVE(pv_native_read_cr2),
.mmu.write_cr2 = pv_native_write_cr2,
.mmu.read_cr3 = __native_read_cr3,
.mmu.write_cr3 = native_write_cr3,
......@@ -371,9 +401,6 @@ struct paravirt_patch_template pv_ops = {
};
#ifdef CONFIG_PARAVIRT_XXL
/* At this point, native_get/set_debugreg has real function entries */
NOKPROBE_SYMBOL(native_get_debugreg);
NOKPROBE_SYMBOL(native_set_debugreg);
NOKPROBE_SYMBOL(native_load_idt);
void (*paravirt_iret)(void) = native_iret;
......
......@@ -64,7 +64,7 @@ static bool sev_es_negotiate_protocol(void)
static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
{
ghcb->save.sw_exit_code = 0;
memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
__builtin_memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
}
static bool vc_decoding_needed(unsigned long exit_code)
......
......@@ -272,6 +272,20 @@ SECTIONS
__parainstructions_end = .;
}
#ifdef CONFIG_RETPOLINE
/*
* List of instructions that call/jmp/jcc to retpoline thunks
* __x86_indirect_thunk_*(). These instructions can be patched along
* with alternatives, after which the section can be freed.
*/
. = ALIGN(8);
.retpoline_sites : AT(ADDR(.retpoline_sites) - LOAD_OFFSET) {
__retpoline_sites = .;
*(.retpoline_sites)
__retpoline_sites_end = .;
}
#endif
/*
* struct alt_inst entries. From the header (alternative.h):
* "Alternative instructions for different CPU types or capabilities"
......
......@@ -218,12 +218,12 @@ DECLARE_PER_CPU(struct svm_cpu_data *, svm_data);
void recalc_intercepts(struct vcpu_svm *svm);
static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
static __always_inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
{
return container_of(kvm, struct kvm_svm, kvm);
}
static inline bool sev_guest(struct kvm *kvm)
static __always_inline bool sev_guest(struct kvm *kvm)
{
#ifdef CONFIG_KVM_AMD_SEV
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
......@@ -234,7 +234,7 @@ static inline bool sev_guest(struct kvm *kvm)
#endif
}
static inline bool sev_es_guest(struct kvm *kvm)
static __always_inline bool sev_es_guest(struct kvm *kvm)
{
#ifdef CONFIG_KVM_AMD_SEV
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
......@@ -271,7 +271,7 @@ static inline bool vmcb_is_dirty(struct vmcb *vmcb, int bit)
return !test_bit(bit, (unsigned long *)&vmcb->control.clean);
}
static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
{
return container_of(vcpu, struct vcpu_svm, vcpu);
}
......
......@@ -56,12 +56,12 @@ static inline void invlpga(unsigned long addr, u32 asid)
* VMSAVE, VMLOAD, etc... is still controlled by the effective address size,
* hence 'unsigned long' instead of 'hpa_t'.
*/
static inline void vmsave(unsigned long pa)
static __always_inline void vmsave(unsigned long pa)
{
svm_asm1(vmsave, "a" (pa), "memory");
}
static inline void vmload(unsigned long pa)
static __always_inline void vmload(unsigned long pa)
{
svm_asm1(vmload, "a" (pa), "memory");
}
......
......@@ -93,7 +93,7 @@ static __always_inline int get_evmcs_offset(unsigned long field,
return evmcs_field->offset;
}
static inline void evmcs_write64(unsigned long field, u64 value)
static __always_inline void evmcs_write64(unsigned long field, u64 value)
{
u16 clean_field;
int offset = get_evmcs_offset(field, &clean_field);
......@@ -183,7 +183,7 @@ static inline void evmcs_load(u64 phys_addr)
__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf);
#else /* !IS_ENABLED(CONFIG_HYPERV) */
static inline void evmcs_write64(unsigned long field, u64 value) {}
static __always_inline void evmcs_write64(unsigned long field, u64 value) {}
static inline void evmcs_write32(unsigned long field, u32 value) {}
static inline void evmcs_write16(unsigned long field, u16 value) {}
static inline u64 evmcs_read64(unsigned long field) { return 0; }
......
......@@ -28,46 +28,14 @@
.macro THUNK reg
.align 32
SYM_FUNC_START(__x86_indirect_thunk_\reg)
.align RETPOLINE_THUNK_SIZE
SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
UNWIND_HINT_EMPTY
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
__stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
SYM_FUNC_END(__x86_indirect_thunk_\reg)
.endm
/*
* This generates .altinstr_replacement symbols for use by objtool. They,
* however, must not actually live in .altinstr_replacement since that will be
* discarded after init, but module alternatives will also reference these
* symbols.
*
* Their names matches the "__x86_indirect_" prefix to mark them as retpolines.
*/
.macro ALT_THUNK reg
.align 1
SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg)
ANNOTATE_RETPOLINE_SAFE
1: call *%\reg
2: .skip 5-(2b-1b), 0x90
SYM_FUNC_END(__x86_indirect_alt_call_\reg)
STACK_FRAME_NON_STANDARD(__x86_indirect_alt_call_\reg)
SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg)
ANNOTATE_RETPOLINE_SAFE
1: jmp *%\reg
2: .skip 5-(2b-1b), 0x90
SYM_FUNC_END(__x86_indirect_alt_jmp_\reg)
STACK_FRAME_NON_STANDARD(__x86_indirect_alt_jmp_\reg)
.endm
/*
......@@ -85,22 +53,16 @@ STACK_FRAME_NON_STANDARD(__x86_indirect_alt_jmp_\reg)
#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
#undef GEN
.align RETPOLINE_THUNK_SIZE
SYM_CODE_START(__x86_indirect_thunk_array)
#define GEN(reg) THUNK reg
#include <asm/GEN-for-each-reg.h>
#undef GEN
#define GEN(reg) EXPORT_THUNK(reg)
#include <asm/GEN-for-each-reg.h>
#undef GEN
#define GEN(reg) ALT_THUNK reg
#include <asm/GEN-for-each-reg.h>
.align RETPOLINE_THUNK_SIZE
SYM_CODE_END(__x86_indirect_thunk_array)
#undef GEN
#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_call_ ## reg)
#define GEN(reg) EXPORT_THUNK(reg)
#include <asm/GEN-for-each-reg.h>
#undef GEN
#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_jmp_ ## reg)
#include <asm/GEN-for-each-reg.h>
......@@ -15,7 +15,6 @@
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
#include <asm/text-patching.h>
#include <asm/asm-prototypes.h>
static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
{
......@@ -225,6 +224,14 @@ static void jit_fill_hole(void *area, unsigned int size)
struct jit_context {
int cleanup_addr; /* Epilogue code offset */
/*
* Program specific offsets of labels in the code; these rely on the
* JIT doing at least 2 passes, recording the position on the first
* pass, only to generate the correct offset on the second pass.
*/
int tail_call_direct_label;
int tail_call_indirect_label;
};
/* Maximum number of bytes emitted while JITing one eBPF insn */
......@@ -380,20 +387,23 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
}
static int get_pop_bytes(bool *callee_regs_used)
#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8)
static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
{
int bytes = 0;
u8 *prog = *pprog;
if (callee_regs_used[3])
bytes += 2;
if (callee_regs_used[2])
bytes += 2;
if (callee_regs_used[1])
bytes += 2;
if (callee_regs_used[0])
bytes += 1;
#ifdef CONFIG_RETPOLINE
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
EMIT_LFENCE();
EMIT2(0xFF, 0xE0 + reg);
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
} else
#endif
EMIT2(0xFF, 0xE0 + reg);
return bytes;
*pprog = prog;
}
/*
......@@ -411,29 +421,12 @@ static int get_pop_bytes(bool *callee_regs_used)
* out:
*/
static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
u32 stack_depth)
u32 stack_depth, u8 *ip,
struct jit_context *ctx)
{
int tcc_off = -4 - round_up(stack_depth, 8);
u8 *prog = *pprog;
int pop_bytes = 0;
int off1 = 42;
int off2 = 31;
int off3 = 9;
/* count the additional bytes used for popping callee regs from stack
* that need to be taken into account for each of the offsets that
* are used for bailing out of the tail call
*/
pop_bytes = get_pop_bytes(callee_regs_used);
off1 += pop_bytes;
off2 += pop_bytes;
off3 += pop_bytes;
if (stack_depth) {
off1 += 7;
off2 += 7;
off3 += 7;
}
u8 *prog = *pprog, *start = *pprog;
int offset;
/*
* rdi - pointer to ctx
......@@ -448,8 +441,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
EMIT2(0x89, 0xD2); /* mov edx, edx */
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
#define OFFSET1 (off1 + RETPOLINE_RCX_BPF_JIT_SIZE) /* Number of bytes to jump */
EMIT2(X86_JBE, OFFSET1); /* jbe out */
offset = ctx->tail_call_indirect_label - (prog + 2 - start);
EMIT2(X86_JBE, offset); /* jbe out */
/*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
......@@ -457,8 +451,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
*/
EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
#define OFFSET2 (off2 + RETPOLINE_RCX_BPF_JIT_SIZE)
EMIT2(X86_JA, OFFSET2); /* ja out */
offset = ctx->tail_call_indirect_label - (prog + 2 - start);
EMIT2(X86_JA, offset); /* ja out */
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
......@@ -471,12 +466,11 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
* goto out;
*/
EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */
#define OFFSET3 (off3 + RETPOLINE_RCX_BPF_JIT_SIZE)
EMIT2(X86_JE, OFFSET3); /* je out */
*pprog = prog;
pop_callee_regs(pprog, callee_regs_used);
prog = *pprog;
offset = ctx->tail_call_indirect_label - (prog + 2 - start);
EMIT2(X86_JE, offset); /* je out */
pop_callee_regs(&prog, callee_regs_used);
EMIT1(0x58); /* pop rax */
if (stack_depth)
......@@ -493,41 +487,21 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
* rdi == ctx (1st arg)
* rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET
*/
RETPOLINE_RCX_BPF_JIT();
emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start));
/* out: */
ctx->tail_call_indirect_label = prog - start;
*pprog = prog;
}
static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
u8 **pprog, int addr, u8 *image,
bool *callee_regs_used, u32 stack_depth)
u8 **pprog, u8 *ip,
bool *callee_regs_used, u32 stack_depth,
struct jit_context *ctx)
{
int tcc_off = -4 - round_up(stack_depth, 8);
u8 *prog = *pprog;
int pop_bytes = 0;
int off1 = 20;
int poke_off;
/* count the additional bytes used for popping callee regs to stack
* that need to be taken into account for jump offset that is used for
* bailing out from of the tail call when limit is reached
*/
pop_bytes = get_pop_bytes(callee_regs_used);
off1 += pop_bytes;
/*
* total bytes for:
* - nop5/ jmpq $off
* - pop callee regs
* - sub rsp, $val if depth > 0
* - pop rax
*/
poke_off = X86_PATCH_SIZE + pop_bytes + 1;
if (stack_depth) {
poke_off += 7;
off1 += 7;
}
u8 *prog = *pprog, *start = *pprog;
int offset;
/*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
......@@ -535,28 +509,30 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
*/
EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
EMIT2(X86_JA, off1); /* ja out */
offset = ctx->tail_call_direct_label - (prog + 2 - start);
EMIT2(X86_JA, offset); /* ja out */
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE);
poke->tailcall_bypass = ip + (prog - start);
poke->adj_off = X86_TAIL_CALL_OFFSET;
poke->tailcall_target = image + (addr - X86_PATCH_SIZE);
poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE;
poke->bypass_addr = (u8 *)poke->tailcall_target + X86_PATCH_SIZE;
emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE,
poke->tailcall_bypass);
*pprog = prog;
pop_callee_regs(pprog, callee_regs_used);
prog = *pprog;
pop_callee_regs(&prog, callee_regs_used);
EMIT1(0x58); /* pop rax */
if (stack_depth)
EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
memcpy(prog, x86_nops[5], X86_PATCH_SIZE);
prog += X86_PATCH_SIZE;
/* out: */
ctx->tail_call_direct_label = prog - start;
*pprog = prog;
}
......@@ -1222,8 +1198,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
/* speculation barrier */
case BPF_ST | BPF_NOSPEC:
if (boot_cpu_has(X86_FEATURE_XMM2))
/* Emit 'lfence' */
EMIT3(0x0F, 0xAE, 0xE8);
EMIT_LFENCE();
break;
/* ST: *(u8*)(dst_reg + off) = imm */
......@@ -1412,13 +1387,16 @@ st: if (is_imm8(insn->off))
case BPF_JMP | BPF_TAIL_CALL:
if (imm32)
emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
&prog, addrs[i], image,
&prog, image + addrs[i - 1],
callee_regs_used,
bpf_prog->aux->stack_depth);
bpf_prog->aux->stack_depth,
ctx);
else
emit_bpf_tail_call_indirect(&prog,
callee_regs_used,
bpf_prog->aux->stack_depth);
bpf_prog->aux->stack_depth,
image + addrs[i - 1],
ctx);
break;
/* cond jump */
......@@ -2124,24 +2102,6 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
return ret;
}
static int emit_fallback_jump(u8 **pprog)
{
u8 *prog = *pprog;
int err = 0;
#ifdef CONFIG_RETPOLINE
/* Note that this assumes the the compiler uses external
* thunks for indirect calls. Both clang and GCC use the same
* naming convention for external thunks.
*/
err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog);
#else
EMIT2(0xFF, 0xE2); /* jmp rdx */
#endif
*pprog = prog;
return err;
}
static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
{
u8 *jg_reloc, *prog = *pprog;
......@@ -2163,9 +2123,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
if (err)
return err;
err = emit_fallback_jump(&prog); /* jmp thunk/indirect */
if (err)
return err;
emit_indirect_jump(&prog, 2 /* rdx */, prog);
*pprog = prog;
return 0;
......
......@@ -15,6 +15,7 @@
#include <asm/cacheflush.h>
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
#include <asm/asm-prototypes.h>
#include <linux/bpf.h>
/*
......@@ -1267,6 +1268,21 @@ static void emit_epilogue(u8 **pprog, u32 stack_depth)
*pprog = prog;
}
static int emit_jmp_edx(u8 **pprog, u8 *ip)
{
u8 *prog = *pprog;
int cnt = 0;
#ifdef CONFIG_RETPOLINE
EMIT1_off32(0xE9, (u8 *)__x86_indirect_thunk_edx - (ip + 5));
#else
EMIT2(0xFF, 0xE2);
#endif
*pprog = prog;
return cnt;
}
/*
* Generate the following code:
* ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
......@@ -1280,7 +1296,7 @@ static void emit_epilogue(u8 **pprog, u32 stack_depth)
* goto *(prog->bpf_func + prologue_size);
* out:
*/
static void emit_bpf_tail_call(u8 **pprog)
static void emit_bpf_tail_call(u8 **pprog, u8 *ip)
{
u8 *prog = *pprog;
int cnt = 0;
......@@ -1362,7 +1378,7 @@ static void emit_bpf_tail_call(u8 **pprog)
* eax == ctx (1st arg)
* edx == prog->bpf_func + prologue_size
*/
RETPOLINE_EDX_BPF_JIT();
cnt += emit_jmp_edx(&prog, ip + cnt);
if (jmp_label1 == -1)
jmp_label1 = cnt;
......@@ -2122,7 +2138,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
break;
}
case BPF_JMP | BPF_TAIL_CALL:
emit_bpf_tail_call(&prog);
emit_bpf_tail_call(&prog, image + addrs[i - 1]);
break;
/* cond jump */
......
......@@ -283,12 +283,12 @@ static void __init xen_init_capabilities(void)
}
}
static void xen_set_debugreg(int reg, unsigned long val)
static noinstr void xen_set_debugreg(int reg, unsigned long val)
{
HYPERVISOR_set_debugreg(reg, val);
}
static unsigned long xen_get_debugreg(int reg)
static noinstr unsigned long xen_get_debugreg(int reg)
{
return HYPERVISOR_get_debugreg(reg);
}
......@@ -1025,7 +1025,8 @@ static const struct pv_info xen_info __initconst = {
.name = "Xen",
};
static const struct pv_cpu_ops xen_cpu_ops __initconst = {
static const typeof(pv_ops) xen_cpu_ops __initconst = {
.cpu = {
.cpuid = xen_cpuid,
.set_debugreg = xen_set_debugreg,
......@@ -1071,6 +1072,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.start_context_switch = paravirt_start_context_switch,
.end_context_switch = xen_end_context_switch,
},
};
static void xen_restart(char *msg)
......@@ -1211,7 +1213,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
/* Install Xen paravirt ops */
pv_info = xen_info;
pv_ops.cpu = xen_cpu_ops;
pv_ops.cpu = xen_cpu_ops.cpu;
paravirt_iret = xen_iret;
xen_init_irq_ops();
......
......@@ -19,12 +19,12 @@
* callback mask. We do this in a very simple manner, by making a call
* down into Xen. The pending flag will be checked by Xen on return.
*/
void xen_force_evtchn_callback(void)
noinstr void xen_force_evtchn_callback(void)
{
(void)HYPERVISOR_xen_version(0, NULL);
}
asmlinkage __visible unsigned long xen_save_fl(void)
asmlinkage __visible noinstr unsigned long xen_save_fl(void)
{
struct vcpu_info *vcpu;
unsigned long flags;
......@@ -40,9 +40,9 @@ asmlinkage __visible unsigned long xen_save_fl(void)
*/
return (-flags) & X86_EFLAGS_IF;
}
PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl);
__PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl, ".noinstr.text");
asmlinkage __visible void xen_irq_disable(void)
asmlinkage __visible noinstr void xen_irq_disable(void)
{
/* There's a one instruction preempt window here. We need to
make sure we're don't switch CPUs between getting the vcpu
......@@ -51,9 +51,9 @@ asmlinkage __visible void xen_irq_disable(void)
this_cpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
preempt_enable_no_resched();
}
PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable);
__PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable, ".noinstr.text");
asmlinkage __visible void xen_irq_enable(void)
asmlinkage __visible noinstr void xen_irq_enable(void)
{
struct vcpu_info *vcpu;
......@@ -76,7 +76,7 @@ asmlinkage __visible void xen_irq_enable(void)
preempt_enable();
}
PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable);
__PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable, ".noinstr.text");
static void xen_safe_halt(void)
{
......@@ -94,17 +94,20 @@ static void xen_halt(void)
xen_safe_halt();
}
static const struct pv_irq_ops xen_irq_ops __initconst = {
static const typeof(pv_ops) xen_irq_ops __initconst = {
.irq = {
.save_fl = PV_CALLEE_SAVE(xen_save_fl),
.irq_disable = PV_CALLEE_SAVE(xen_irq_disable),
.irq_enable = PV_CALLEE_SAVE(xen_irq_enable),
.safe_halt = xen_safe_halt,
.halt = xen_halt,
},
};
void __init xen_init_irq_ops(void)
{
pv_ops.irq = xen_irq_ops;
pv_ops.irq = xen_irq_ops.irq;
x86_init.irqs.intr_init = xen_init_IRQ;
}
......@@ -1204,7 +1204,8 @@ static void __init xen_pagetable_init(void)
xen_remap_memory();
xen_setup_mfn_list_list();
}
static void xen_write_cr2(unsigned long cr2)
static noinstr void xen_write_cr2(unsigned long cr2)
{
this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
}
......@@ -2078,7 +2079,8 @@ static void xen_leave_lazy_mmu(void)
preempt_enable();
}
static const struct pv_mmu_ops xen_mmu_ops __initconst = {
static const typeof(pv_ops) xen_mmu_ops __initconst = {
.mmu = {
.read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2),
.write_cr2 = xen_write_cr2,
......@@ -2139,6 +2141,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
},
.set_fixmap = xen_set_fixmap,
},
};
void __init xen_init_mmu_ops(void)
......@@ -2146,7 +2149,7 @@ void __init xen_init_mmu_ops(void)
x86_init.paging.pagetable_init = xen_pagetable_init;
x86_init.hyper.init_after_bootmem = xen_after_bootmem;
pv_ops.mmu = xen_mmu_ops;
pv_ops.mmu = xen_mmu_ops.mmu;
memset(dummy_mapping, 0xff, PAGE_SIZE);
}
......
......@@ -21,6 +21,45 @@
#include <linux/init.h>
#include <linux/linkage.h>
.pushsection .noinstr.text, "ax"
/*
* Disabling events is simply a matter of making the event mask
* non-zero.
*/
SYM_FUNC_START(xen_irq_disable_direct)
movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
ret
SYM_FUNC_END(xen_irq_disable_direct)
/*
* Force an event check by making a hypercall, but preserve regs
* before making the call.
*/
SYM_FUNC_START(check_events)
FRAME_BEGIN
push %rax
push %rcx
push %rdx
push %rsi
push %rdi
push %r8
push %r9
push %r10
push %r11
call xen_force_evtchn_callback
pop %r11
pop %r10
pop %r9
pop %r8
pop %rdi
pop %rsi
pop %rdx
pop %rcx
pop %rax
FRAME_END
ret
SYM_FUNC_END(check_events)
/*
* Enable events. This clears the event mask and tests the pending
* event status with one and operation. If there are pending events,
......@@ -47,16 +86,6 @@ SYM_FUNC_START(xen_irq_enable_direct)
ret
SYM_FUNC_END(xen_irq_enable_direct)
/*
* Disabling events is simply a matter of making the event mask
* non-zero.
*/
SYM_FUNC_START(xen_irq_disable_direct)
movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
ret
SYM_FUNC_END(xen_irq_disable_direct)
/*
* (xen_)save_fl is used to get the current interrupt enable status.
* Callers expect the status to be in X86_EFLAGS_IF, and other bits
......@@ -73,35 +102,6 @@ SYM_FUNC_START(xen_save_fl_direct)
ret
SYM_FUNC_END(xen_save_fl_direct)
/*
* Force an event check by making a hypercall, but preserve regs
* before making the call.
*/
SYM_FUNC_START(check_events)
FRAME_BEGIN
push %rax
push %rcx
push %rdx
push %rsi
push %rdi
push %r8
push %r9
push %r10
push %r11
call xen_force_evtchn_callback
pop %r11
pop %r10
pop %r9
pop %r8
pop %rdi
pop %rsi
pop %rdx
pop %rcx
pop %rax
FRAME_END
ret
SYM_FUNC_END(check_events)
SYM_FUNC_START(xen_read_cr2)
FRAME_BEGIN
_ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX
......@@ -116,6 +116,7 @@ SYM_FUNC_START(xen_read_cr2_direct)
FRAME_END
ret
SYM_FUNC_END(xen_read_cr2_direct);
.popsection
.macro xen_pv_trap name
SYM_CODE_START(xen_\name)
......
......@@ -20,6 +20,23 @@
#include <xen/interface/xen-mca.h>
#include <asm/xen/interface.h>
.pushsection .noinstr.text, "ax"
.balign PAGE_SIZE
SYM_CODE_START(hypercall_page)
.rept (PAGE_SIZE / 32)
UNWIND_HINT_FUNC
.skip 31, 0x90
ret
.endr
#define HYPERCALL(n) \
.equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
.type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
#include <asm/xen-hypercalls.h>
#undef HYPERCALL
SYM_CODE_END(hypercall_page)
.popsection
#ifdef CONFIG_XEN_PV
__INIT
SYM_CODE_START(startup_xen)
......@@ -64,23 +81,6 @@ SYM_CODE_END(asm_cpu_bringup_and_idle)
#endif
#endif
.pushsection .text
.balign PAGE_SIZE
SYM_CODE_START(hypercall_page)
.rept (PAGE_SIZE / 32)
UNWIND_HINT_FUNC
.skip 31, 0x90
ret
.endr
#define HYPERCALL(n) \
.equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
.type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
#include <asm/xen-hypercalls.h>
#undef HYPERCALL
SYM_CODE_END(hypercall_page)
.popsection
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6")
ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0")
......
......@@ -105,7 +105,7 @@ static inline void user_exit_irqoff(void) { }
static inline enum ctx_state exception_enter(void) { return 0; }
static inline void exception_exit(enum ctx_state prev_ctx) { }
static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; }
static inline bool context_tracking_guest_enter(void) { return false; }
static __always_inline bool context_tracking_guest_enter(void) { return false; }
static inline void context_tracking_guest_exit(void) { }
#endif /* !CONFIG_CONTEXT_TRACKING */
......
......@@ -888,7 +888,7 @@ look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass)
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return NULL;
hlist_for_each_entry_rcu(class, hash_head, hash_entry) {
hlist_for_each_entry_rcu_notrace(class, hash_head, hash_entry) {
if (class->key == key) {
/*
* Huh! same key, different name? Did someone trample
......
......@@ -458,7 +458,7 @@ config STACK_VALIDATION
config VMLINUX_VALIDATION
bool
depends on STACK_VALIDATION && DEBUG_ENTRY && !PARAVIRT
depends on STACK_VALIDATION && DEBUG_ENTRY
default y
config VMLINUX_MAP
......
......@@ -20,6 +20,7 @@
#include <objtool/arch.h>
#include <objtool/warn.h>
#include <objtool/endianness.h>
#include <objtool/builtin.h>
#include <arch/elf.h>
static int is_x86_64(const struct elf *elf)
......@@ -102,12 +103,13 @@ unsigned long arch_jump_destination(struct instruction *insn)
#define rm_is_mem(reg) (mod_is_mem() && !is_RIP() && rm_is(reg))
#define rm_is_reg(reg) (mod_is_reg() && modrm_rm == (reg))
int arch_decode_instruction(const struct elf *elf, const struct section *sec,
int arch_decode_instruction(struct objtool_file *file, const struct section *sec,
unsigned long offset, unsigned int maxlen,
unsigned int *len, enum insn_type *type,
unsigned long *immediate,
struct list_head *ops_list)
{
const struct elf *elf = file->elf;
struct insn insn;
int x86_64, ret;
unsigned char op1, op2,
......@@ -544,6 +546,36 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
*type = INSN_RETURN;
break;
case 0xc7: /* mov imm, r/m */
if (!noinstr)
break;
if (insn.length == 3+4+4 && !strncmp(sec->name, ".init.text", 10)) {
struct reloc *immr, *disp;
struct symbol *func;
int idx;
immr = find_reloc_by_dest(elf, (void *)sec, offset+3);
disp = find_reloc_by_dest(elf, (void *)sec, offset+7);
if (!immr || strcmp(immr->sym->name, "pv_ops"))
break;
idx = (immr->addend + 8) / sizeof(void *);
func = disp->sym;
if (disp->sym->type == STT_SECTION)
func = find_symbol_by_offset(disp->sym->sec, disp->addend);
if (!func) {
WARN("no func for pv_ops[]");
return -1;
}
objtool_pv_add(file, idx, func);
}
break;
case 0xcf: /* iret */
/*
* Handle sync_core(), which has an IRET to self.
......@@ -659,154 +691,52 @@ const char *arch_nop_insn(int len)
return nops[len-1];
}
/* asm/alternative.h ? */
#define ALTINSTR_FLAG_INV (1 << 15)
#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV)
struct alt_instr {
s32 instr_offset; /* original instruction */
s32 repl_offset; /* offset to replacement instruction */
u16 cpuid; /* cpuid bit set for replacement */
u8 instrlen; /* length of original instruction */
u8 replacementlen; /* length of new instruction */
} __packed;
static int elf_add_alternative(struct elf *elf,
struct instruction *orig, struct symbol *sym,
int cpuid, u8 orig_len, u8 repl_len)
{
const int size = sizeof(struct alt_instr);
struct alt_instr *alt;
struct section *sec;
Elf_Scn *s;
sec = find_section_by_name(elf, ".altinstructions");
if (!sec) {
sec = elf_create_section(elf, ".altinstructions",
SHF_ALLOC, 0, 0);
if (!sec) {
WARN_ELF("elf_create_section");
return -1;
}
}
s = elf_getscn(elf->elf, sec->idx);
if (!s) {
WARN_ELF("elf_getscn");
return -1;
}
sec->data = elf_newdata(s);
if (!sec->data) {
WARN_ELF("elf_newdata");
return -1;
}
sec->data->d_size = size;
sec->data->d_align = 1;
alt = sec->data->d_buf = malloc(size);
if (!sec->data->d_buf) {
perror("malloc");
return -1;
}
memset(sec->data->d_buf, 0, size);
if (elf_add_reloc_to_insn(elf, sec, sec->sh.sh_size,
R_X86_64_PC32, orig->sec, orig->offset)) {
WARN("elf_create_reloc: alt_instr::instr_offset");
return -1;
}
if (elf_add_reloc(elf, sec, sec->sh.sh_size + 4,
R_X86_64_PC32, sym, 0)) {
WARN("elf_create_reloc: alt_instr::repl_offset");
return -1;
}
alt->cpuid = bswap_if_needed(cpuid);
alt->instrlen = orig_len;
alt->replacementlen = repl_len;
sec->sh.sh_size += size;
sec->changed = true;
return 0;
}
#define X86_FEATURE_RETPOLINE ( 7*32+12)
#define BYTE_RET 0xC3
int arch_rewrite_retpolines(struct objtool_file *file)
const char *arch_ret_insn(int len)
{
struct instruction *insn;
struct reloc *reloc;
struct symbol *sym;
char name[32] = "";
list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
if (insn->type != INSN_JUMP_DYNAMIC &&
insn->type != INSN_CALL_DYNAMIC)
continue;
if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk"))
continue;
reloc = insn->reloc;
sprintf(name, "__x86_indirect_alt_%s_%s",
insn->type == INSN_JUMP_DYNAMIC ? "jmp" : "call",
reloc->sym->name + 21);
sym = find_symbol_by_name(file->elf, name);
if (!sym) {
sym = elf_create_undef_symbol(file->elf, name);
if (!sym) {
WARN("elf_create_undef_symbol");
return -1;
}
}
static const char ret[5][5] = {
{ BYTE_RET },
{ BYTE_RET, BYTES_NOP1 },
{ BYTE_RET, BYTES_NOP2 },
{ BYTE_RET, BYTES_NOP3 },
{ BYTE_RET, BYTES_NOP4 },
};
if (elf_add_alternative(file->elf, insn, sym,
ALT_NOT(X86_FEATURE_RETPOLINE), 5, 5)) {
WARN("elf_add_alternative");
return -1;
}
if (len < 1 || len > 5) {
WARN("invalid RET size: %d\n", len);
return NULL;
}
return 0;
return ret[len-1];
}
int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg)
int arch_decode_hint_reg(u8 sp_reg, int *base)
{
struct cfi_reg *cfa = &insn->cfi.cfa;
switch (sp_reg) {
case ORC_REG_UNDEFINED:
cfa->base = CFI_UNDEFINED;
*base = CFI_UNDEFINED;
break;
case ORC_REG_SP:
cfa->base = CFI_SP;
*base = CFI_SP;
break;
case ORC_REG_BP:
cfa->base = CFI_BP;
*base = CFI_BP;
break;
case ORC_REG_SP_INDIRECT:
cfa->base = CFI_SP_INDIRECT;
*base = CFI_SP_INDIRECT;
break;
case ORC_REG_R10:
cfa->base = CFI_R10;
*base = CFI_R10;
break;
case ORC_REG_R13:
cfa->base = CFI_R13;
*base = CFI_R13;
break;
case ORC_REG_DI:
cfa->base = CFI_DI;
*base = CFI_DI;
break;
case ORC_REG_DX:
cfa->base = CFI_DX;
*base = CFI_DX;
break;
default:
return -1;
......
This diff is collapsed.
......@@ -741,90 +741,6 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
return len;
}
struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
{
struct section *symtab, *symtab_shndx;
struct symbol *sym;
Elf_Data *data;
Elf_Scn *s;
sym = malloc(sizeof(*sym));
if (!sym) {
perror("malloc");
return NULL;
}
memset(sym, 0, sizeof(*sym));
sym->name = strdup(name);
sym->sym.st_name = elf_add_string(elf, NULL, sym->name);
if (sym->sym.st_name == -1)
return NULL;
sym->sym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_NOTYPE);
// st_other 0
// st_shndx 0
// st_value 0
// st_size 0
symtab = find_section_by_name(elf, ".symtab");
if (!symtab) {
WARN("can't find .symtab");
return NULL;
}
s = elf_getscn(elf->elf, symtab->idx);
if (!s) {
WARN_ELF("elf_getscn");
return NULL;
}
data = elf_newdata(s);
if (!data) {
WARN_ELF("elf_newdata");
return NULL;
}
data->d_buf = &sym->sym;
data->d_size = sizeof(sym->sym);
data->d_align = 1;
data->d_type = ELF_T_SYM;
sym->idx = symtab->sh.sh_size / sizeof(sym->sym);
symtab->sh.sh_size += data->d_size;
symtab->changed = true;
symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
if (symtab_shndx) {
s = elf_getscn(elf->elf, symtab_shndx->idx);
if (!s) {
WARN_ELF("elf_getscn");
return NULL;
}
data = elf_newdata(s);
if (!data) {
WARN_ELF("elf_newdata");
return NULL;
}
data->d_buf = &sym->sym.st_size; /* conveniently 0 */
data->d_size = sizeof(Elf32_Word);
data->d_align = 4;
data->d_type = ELF_T_WORD;
symtab_shndx->sh.sh_size += 4;
symtab_shndx->changed = true;
}
sym->sec = find_section_by_index(elf, 0);
elf_add_symbol(elf, sym);
return sym;
}
struct section *elf_create_section(struct elf *elf, const char *name,
unsigned int sh_flags, size_t entsize, int nr)
{
......
......@@ -69,7 +69,7 @@ struct instruction;
void arch_initial_func_cfi_state(struct cfi_init_state *state);
int arch_decode_instruction(const struct elf *elf, const struct section *sec,
int arch_decode_instruction(struct objtool_file *file, const struct section *sec,
unsigned long offset, unsigned int maxlen,
unsigned int *len, enum insn_type *type,
unsigned long *immediate,
......@@ -82,8 +82,9 @@ unsigned long arch_jump_destination(struct instruction *insn);
unsigned long arch_dest_reloc_offset(int addend);
const char *arch_nop_insn(int len);
const char *arch_ret_insn(int len);
int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg);
int arch_decode_hint_reg(u8 sp_reg, int *base);
bool arch_is_retpoline(struct symbol *sym);
......
......@@ -7,6 +7,7 @@
#define _OBJTOOL_CFI_H
#include <arch/cfi_regs.h>
#include <linux/list.h>
#define CFI_UNDEFINED -1
#define CFI_CFA -2
......@@ -24,6 +25,7 @@ struct cfi_init_state {
};
struct cfi_state {
struct hlist_node hash; /* must be first, cficmp() */
struct cfi_reg regs[CFI_NUM_REGS];
struct cfi_reg vals[CFI_NUM_REGS];
struct cfi_reg cfa;
......
......@@ -40,7 +40,6 @@ struct instruction {
struct list_head list;
struct hlist_node hash;
struct list_head call_node;
struct list_head mcount_loc_node;
struct section *sec;
unsigned long offset;
unsigned int len;
......@@ -60,7 +59,7 @@ struct instruction {
struct list_head alts;
struct symbol *func;
struct list_head stack_ops;
struct cfi_state cfi;
struct cfi_state *cfi;
};
static inline bool is_static_jump(struct instruction *insn)
......
......@@ -54,8 +54,12 @@ struct symbol {
unsigned long offset;
unsigned int len;
struct symbol *pfunc, *cfunc, *alias;
bool uaccess_safe;
bool static_call_tramp;
u8 uaccess_safe : 1;
u8 static_call_tramp : 1;
u8 retpoline_thunk : 1;
u8 fentry : 1;
u8 kcov : 1;
struct list_head pv_target;
};
struct reloc {
......@@ -140,7 +144,6 @@ int elf_write_insn(struct elf *elf, struct section *sec,
unsigned long offset, unsigned int len,
const char *insn);
int elf_write_reloc(struct elf *elf, struct reloc *reloc);
struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name);
int elf_write(struct elf *elf);
void elf_close(struct elf *elf);
......
......@@ -14,6 +14,11 @@
#define __weak __attribute__((weak))
struct pv_state {
bool clean;
struct list_head targets;
};
struct objtool_file {
struct elf *elf;
struct list_head insn_list;
......@@ -25,10 +30,14 @@ struct objtool_file {
unsigned long jl_short, jl_long;
unsigned long jl_nop_short, jl_nop_long;
struct pv_state *pv_ops;
};
struct objtool_file *objtool_open_read(const char *_objname);
void objtool_pv_add(struct objtool_file *file, int idx, struct symbol *func);
int check(struct objtool_file *file);
int orc_dump(const char *objname);
int orc_create(struct objtool_file *file);
......
......@@ -135,6 +135,28 @@ struct objtool_file *objtool_open_read(const char *_objname)
return &file;
}
void objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func)
{
if (!noinstr)
return;
if (!f->pv_ops) {
WARN("paravirt confusion");
return;
}
/*
* These functions will be patched into native code,
* see paravirt_patch().
*/
if (!strcmp(func->name, "_paravirt_nop") ||
!strcmp(func->name, "_paravirt_ident_64"))
return;
list_add(&func->pv_target, &f->pv_ops[idx].targets);
f->pv_ops[idx].clean = false;
}
static void cmd_usage(void)
{
unsigned int i, longest = 0;
......
......@@ -13,13 +13,19 @@
#include <objtool/warn.h>
#include <objtool/endianness.h>
static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi)
static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi,
struct instruction *insn)
{
struct instruction *insn = container_of(cfi, struct instruction, cfi);
struct cfi_reg *bp = &cfi->regs[CFI_BP];
memset(orc, 0, sizeof(*orc));
if (!cfi) {
orc->end = 0;
orc->sp_reg = ORC_REG_UNDEFINED;
return 0;
}
orc->end = cfi->end;
if (cfi->cfa.base == CFI_UNDEFINED) {
......@@ -162,7 +168,7 @@ int orc_create(struct objtool_file *file)
int i;
if (!alt_group) {
if (init_orc_entry(&orc, &insn->cfi))
if (init_orc_entry(&orc, insn->cfi, insn))
return -1;
if (!memcmp(&prev_orc, &orc, sizeof(orc)))
continue;
......@@ -186,7 +192,8 @@ int orc_create(struct objtool_file *file)
struct cfi_state *cfi = alt_group->cfi[i];
if (!cfi)
continue;
if (init_orc_entry(&orc, cfi))
/* errors are reported on the original insn */
if (init_orc_entry(&orc, cfi, insn))
return -1;
if (!memcmp(&prev_orc, &orc, sizeof(orc)))
continue;
......
......@@ -109,14 +109,6 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
return -1;
}
/*
* Skip retpoline .altinstr_replacement... we already rewrite the
* instructions for retpolines anyway, see arch_is_retpoline()
* usage in add_{call,jump}_destinations().
*/
if (arch_is_retpoline(new_reloc->sym))
return 1;
reloc_to_sec_off(new_reloc, &alt->new_sec, &alt->new_off);
/* _ASM_EXTABLE_EX hack */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment