Commit c3d6324f authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Alexei Starovoitov

x86/alternatives: Teach text_poke_bp() to emulate instructions

In preparation for static_call and variable size jump_label support,
teach text_poke_bp() to emulate instructions, namely:

  JMP32, JMP8, CALL, NOP2, NOP_ATOMIC5, INT3

The current text_poke_bp() takes a @handler argument which is used as
a jump target when the temporary INT3 is hit by a different CPU.

When patching CALL instructions, this doesn't work because we'd miss
the PUSH of the return address. Instead, teach poke_int3_handler() to
emulate an instruction, typically the instruction we're patching in.

This fits almost all text_poke_bp() users, except
arch_unoptimize_kprobe() which restores random text, and for that site
we have to build an explicit emulate instruction.
Tested-by: default avatarAlexei Starovoitov <ast@kernel.org>
Tested-by: default avatarSteven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarMasami Hiramatsu <mhiramat@kernel.org>
Reviewed-by: default avatarDaniel Bristot de Oliveira <bristot@redhat.com>
Acked-by: default avatarAlexei Starovoitov <ast@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20191111132457.529086974@infradead.orgSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
(cherry picked from commit 8c7eebc10687af45ac8e40ad1bac0cf7893dba9f)
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 808c9f7e
...@@ -26,10 +26,11 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, ...@@ -26,10 +26,11 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
#define POKE_MAX_OPCODE_SIZE 5 #define POKE_MAX_OPCODE_SIZE 5
struct text_poke_loc { struct text_poke_loc {
void *detour;
void *addr; void *addr;
size_t len; int len;
const char opcode[POKE_MAX_OPCODE_SIZE]; s32 rel32;
u8 opcode;
const u8 text[POKE_MAX_OPCODE_SIZE];
}; };
extern void text_poke_early(void *addr, const void *opcode, size_t len); extern void text_poke_early(void *addr, const void *opcode, size_t len);
...@@ -51,8 +52,10 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len); ...@@ -51,8 +52,10 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len);
extern void *text_poke(void *addr, const void *opcode, size_t len); extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
extern int poke_int3_handler(struct pt_regs *regs); extern int poke_int3_handler(struct pt_regs *regs);
extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries); extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries);
extern void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
const void *opcode, size_t len, const void *emulate);
extern int after_bootmem; extern int after_bootmem;
extern __ro_after_init struct mm_struct *poking_mm; extern __ro_after_init struct mm_struct *poking_mm;
extern __ro_after_init unsigned long poking_addr; extern __ro_after_init unsigned long poking_addr;
...@@ -63,8 +66,17 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) ...@@ -63,8 +66,17 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
regs->ip = ip; regs->ip = ip;
} }
#define INT3_INSN_SIZE 1 #define INT3_INSN_SIZE 1
#define CALL_INSN_SIZE 5 #define INT3_INSN_OPCODE 0xCC
#define CALL_INSN_SIZE 5
#define CALL_INSN_OPCODE 0xE8
#define JMP32_INSN_SIZE 5
#define JMP32_INSN_OPCODE 0xE9
#define JMP8_INSN_SIZE 2
#define JMP8_INSN_OPCODE 0xEB
static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
{ {
......
...@@ -956,16 +956,15 @@ NOKPROBE_SYMBOL(patch_cmp); ...@@ -956,16 +956,15 @@ NOKPROBE_SYMBOL(patch_cmp);
int poke_int3_handler(struct pt_regs *regs) int poke_int3_handler(struct pt_regs *regs)
{ {
struct text_poke_loc *tp; struct text_poke_loc *tp;
unsigned char int3 = 0xcc;
void *ip; void *ip;
/* /*
* Having observed our INT3 instruction, we now must observe * Having observed our INT3 instruction, we now must observe
* bp_patching.nr_entries. * bp_patching.nr_entries.
* *
* nr_entries != 0 INT3 * nr_entries != 0 INT3
* WMB RMB * WMB RMB
* write INT3 if (nr_entries) * write INT3 if (nr_entries)
* *
* Idem for other elements in bp_patching. * Idem for other elements in bp_patching.
*/ */
...@@ -978,9 +977,9 @@ int poke_int3_handler(struct pt_regs *regs) ...@@ -978,9 +977,9 @@ int poke_int3_handler(struct pt_regs *regs)
return 0; return 0;
/* /*
* Discount the sizeof(int3). See text_poke_bp_batch(). * Discount the INT3. See text_poke_bp_batch().
*/ */
ip = (void *) regs->ip - sizeof(int3); ip = (void *) regs->ip - INT3_INSN_SIZE;
/* /*
* Skip the binary search if there is a single member in the vector. * Skip the binary search if there is a single member in the vector.
...@@ -997,8 +996,28 @@ int poke_int3_handler(struct pt_regs *regs) ...@@ -997,8 +996,28 @@ int poke_int3_handler(struct pt_regs *regs)
return 0; return 0;
} }
/* set up the specified breakpoint detour */ ip += tp->len;
regs->ip = (unsigned long) tp->detour;
switch (tp->opcode) {
case INT3_INSN_OPCODE:
/*
* Someone poked an explicit INT3, they'll want to handle it,
* do not consume.
*/
return 0;
case CALL_INSN_OPCODE:
int3_emulate_call(regs, (long)ip + tp->rel32);
break;
case JMP32_INSN_OPCODE:
case JMP8_INSN_OPCODE:
int3_emulate_jmp(regs, (long)ip + tp->rel32);
break;
default:
BUG();
}
return 1; return 1;
} }
...@@ -1014,7 +1033,7 @@ NOKPROBE_SYMBOL(poke_int3_handler); ...@@ -1014,7 +1033,7 @@ NOKPROBE_SYMBOL(poke_int3_handler);
* synchronization using int3 breakpoint. * synchronization using int3 breakpoint.
* *
* The way it is done: * The way it is done:
* - For each entry in the vector: * - For each entry in the vector:
* - add a int3 trap to the address that will be patched * - add a int3 trap to the address that will be patched
* - sync cores * - sync cores
* - For each entry in the vector: * - For each entry in the vector:
...@@ -1027,9 +1046,9 @@ NOKPROBE_SYMBOL(poke_int3_handler); ...@@ -1027,9 +1046,9 @@ NOKPROBE_SYMBOL(poke_int3_handler);
*/ */
void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
{ {
int patched_all_but_first = 0; unsigned char int3 = INT3_INSN_OPCODE;
unsigned char int3 = 0xcc;
unsigned int i; unsigned int i;
int do_sync;
lockdep_assert_held(&text_mutex); lockdep_assert_held(&text_mutex);
...@@ -1053,16 +1072,16 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) ...@@ -1053,16 +1072,16 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
/* /*
* Second step: update all but the first byte of the patched range. * Second step: update all but the first byte of the patched range.
*/ */
for (i = 0; i < nr_entries; i++) { for (do_sync = 0, i = 0; i < nr_entries; i++) {
if (tp[i].len - sizeof(int3) > 0) { if (tp[i].len - sizeof(int3) > 0) {
text_poke((char *)tp[i].addr + sizeof(int3), text_poke((char *)tp[i].addr + sizeof(int3),
(const char *)tp[i].opcode + sizeof(int3), (const char *)tp[i].text + sizeof(int3),
tp[i].len - sizeof(int3)); tp[i].len - sizeof(int3));
patched_all_but_first++; do_sync++;
} }
} }
if (patched_all_but_first) { if (do_sync) {
/* /*
* According to Intel, this core syncing is very likely * According to Intel, this core syncing is very likely
* not necessary and we'd be safe even without it. But * not necessary and we'd be safe even without it. But
...@@ -1075,10 +1094,17 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) ...@@ -1075,10 +1094,17 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
* Third step: replace the first byte (int3) by the first byte of * Third step: replace the first byte (int3) by the first byte of
* replacing opcode. * replacing opcode.
*/ */
for (i = 0; i < nr_entries; i++) for (do_sync = 0, i = 0; i < nr_entries; i++) {
text_poke(tp[i].addr, tp[i].opcode, sizeof(int3)); if (tp[i].text[0] == INT3_INSN_OPCODE)
continue;
text_poke(tp[i].addr, tp[i].text, sizeof(int3));
do_sync++;
}
if (do_sync)
on_each_cpu(do_sync_core, NULL, 1);
on_each_cpu(do_sync_core, NULL, 1);
/* /*
* sync_core() implies an smp_mb() and orders this store against * sync_core() implies an smp_mb() and orders this store against
* the writing of the new instruction. * the writing of the new instruction.
...@@ -1087,6 +1113,60 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) ...@@ -1087,6 +1113,60 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
bp_patching.nr_entries = 0; bp_patching.nr_entries = 0;
} }
void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
const void *opcode, size_t len, const void *emulate)
{
struct insn insn;
if (!opcode)
opcode = (void *)tp->text;
else
memcpy((void *)tp->text, opcode, len);
if (!emulate)
emulate = opcode;
kernel_insn_init(&insn, emulate, MAX_INSN_SIZE);
insn_get_length(&insn);
BUG_ON(!insn_complete(&insn));
BUG_ON(len != insn.length);
tp->addr = addr;
tp->len = len;
tp->opcode = insn.opcode.bytes[0];
switch (tp->opcode) {
case INT3_INSN_OPCODE:
break;
case CALL_INSN_OPCODE:
case JMP32_INSN_OPCODE:
case JMP8_INSN_OPCODE:
tp->rel32 = insn.immediate.value;
break;
default: /* assume NOP */
switch (len) {
case 2: /* NOP2 -- emulate as JMP8+0 */
BUG_ON(memcmp(emulate, ideal_nops[len], len));
tp->opcode = JMP8_INSN_OPCODE;
tp->rel32 = 0;
break;
case 5: /* NOP5 -- emulate as JMP32+0 */
BUG_ON(memcmp(emulate, ideal_nops[NOP_ATOMIC5], len));
tp->opcode = JMP32_INSN_OPCODE;
tp->rel32 = 0;
break;
default: /* unknown instruction */
BUG();
}
break;
}
}
/** /**
* text_poke_bp() -- update instructions on live kernel on SMP * text_poke_bp() -- update instructions on live kernel on SMP
* @addr: address to patch * @addr: address to patch
...@@ -1098,20 +1178,10 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) ...@@ -1098,20 +1178,10 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
* dynamically allocated memory. This function should be used when it is * dynamically allocated memory. This function should be used when it is
* not possible to allocate memory. * not possible to allocate memory.
*/ */
void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler) void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)
{ {
struct text_poke_loc tp = { struct text_poke_loc tp;
.detour = handler,
.addr = addr,
.len = len,
};
if (len > POKE_MAX_OPCODE_SIZE) {
WARN_ONCE(1, "len is larger than %d\n", POKE_MAX_OPCODE_SIZE);
return;
}
memcpy((void *)tp.opcode, opcode, len);
text_poke_loc_init(&tp, addr, opcode, len, emulate);
text_poke_bp_batch(&tp, 1); text_poke_bp_batch(&tp, 1);
} }
...@@ -89,8 +89,7 @@ static void __ref __jump_label_transform(struct jump_entry *entry, ...@@ -89,8 +89,7 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
return; return;
} }
text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, NULL);
(void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
} }
void arch_jump_label_transform(struct jump_entry *entry, void arch_jump_label_transform(struct jump_entry *entry,
...@@ -147,11 +146,9 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry, ...@@ -147,11 +146,9 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
} }
__jump_label_set_jump_code(entry, type, __jump_label_set_jump_code(entry, type,
(union jump_code_union *) &tp->opcode, 0); (union jump_code_union *)&tp->text, 0);
tp->addr = entry_code; text_poke_loc_init(tp, entry_code, NULL, JUMP_LABEL_NOP_SIZE, NULL);
tp->detour = entry_code + JUMP_LABEL_NOP_SIZE;
tp->len = JUMP_LABEL_NOP_SIZE;
tp_vec_nr++; tp_vec_nr++;
......
...@@ -437,8 +437,7 @@ void arch_optimize_kprobes(struct list_head *oplist) ...@@ -437,8 +437,7 @@ void arch_optimize_kprobes(struct list_head *oplist)
insn_buff[0] = RELATIVEJUMP_OPCODE; insn_buff[0] = RELATIVEJUMP_OPCODE;
*(s32 *)(&insn_buff[1]) = rel; *(s32 *)(&insn_buff[1]) = rel;
text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, NULL);
op->optinsn.insn);
list_del_init(&op->list); list_del_init(&op->list);
} }
...@@ -448,12 +447,18 @@ void arch_optimize_kprobes(struct list_head *oplist) ...@@ -448,12 +447,18 @@ void arch_optimize_kprobes(struct list_head *oplist)
void arch_unoptimize_kprobe(struct optimized_kprobe *op) void arch_unoptimize_kprobe(struct optimized_kprobe *op)
{ {
u8 insn_buff[RELATIVEJUMP_SIZE]; u8 insn_buff[RELATIVEJUMP_SIZE];
u8 emulate_buff[RELATIVEJUMP_SIZE];
/* Set int3 to first byte for kprobes */ /* Set int3 to first byte for kprobes */
insn_buff[0] = BREAKPOINT_INSTRUCTION; insn_buff[0] = BREAKPOINT_INSTRUCTION;
memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
emulate_buff[0] = RELATIVEJUMP_OPCODE;
*(s32 *)(&emulate_buff[1]) = (s32)((long)op->optinsn.insn -
((long)op->kp.addr + RELATIVEJUMP_SIZE));
text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
op->optinsn.insn); emulate_buff);
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment