Commit 553a8f2f authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf-override-return'

Josef Bacik says:

====================
This is the same as v8, just rebased onto the bpf tree.

v8->v9:
- rebased onto the bpf tree.

v7->v8:
- removed the _ASM_KPROBE_ERROR_INJECT since it was not needed.

v6->v7:
- moved the opt-in macro to bpf.h out of kprobes.h.

v5->v6:
- add BPF_ALLOW_ERROR_INJECTION() tagging for functions that will support this
  feature.  This way only functions that opt-in will be allowed to be
  overridden.
- added a btrfs patch to allow error injection for open_ctree() so that the bpf
  sample actually works.

v4->v5:
- disallow kprobe_override programs from being put in the prog map array so we
  don't tail call into something we didn't check.  This allows us to make the
  normal path still fast without a bunch of percpu operations.

v3->v4:
- fix a build error found by kbuild test bot (I didn't wait long enough
  apparently.)
- Added a warning message as per Daniels suggestion.

v2->v3:
- added a ->kprobe_override flag to bpf_prog.
- added some sanity checks to disallow attaching bpf progs that have
  ->kprobe_override set that aren't for ftrace kprobes.
- added the trace_kprobe_ftrace helper to check if the trace_event_call is a
  ftrace kprobe.
- renamed bpf_kprobe_state to bpf_kprobe_override, fixed it so we only read this
  value in the kprobe path, and thus only write to it if we're overriding or
  clearing the override.

v1->v2:
- moved things around to make sure that bpf_override_return could really only be
  used for an ftrace kprobe.
- killed the special return values from trace_call_bpf.
- renamed pc_modified to bpf_kprobe_state so bpf_override_return could tell if
  it was being called from an ftrace kprobe context.
- reworked the logic in kprobe_perf_func to take advantage of bpf_kprobe_state.
- updated the test as per Alexei's review.

- Original message -

A lot of our error paths are not well tested because we have no good way of
injecting errors generically.  Some subystems (block, memory) have ways to
inject errors, but they are random so it's hard to get reproduceable results.

With BPF we can add determinism to our error injection.  We can use kprobes and
other things to verify we are injecting errors at the exact case we are trying
to test.  This patch gives us the tool to actual do the error injection part.
It is very simple, we just set the return value of the pt_regs we're given to
whatever we provide, and then override the PC with a dummy function that simply
returns.

Right now this only works on x86, but it would be simple enough to expand to
other architectures.  Thanks,

Josef
====================

In patch "bpf: add a bpf_override_function helper" Alexei moved
"ifdef CONFIG_BPF_KPROBE_OVERRIDE" few lines to fail program loading
when kprobe_override is not available instead of failing at run-time.
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents a23967c1 023f46c5
...@@ -196,6 +196,9 @@ config HAVE_OPTPROBES ...@@ -196,6 +196,9 @@ config HAVE_OPTPROBES
config HAVE_KPROBES_ON_FTRACE config HAVE_KPROBES_ON_FTRACE
bool bool
config HAVE_KPROBE_OVERRIDE
bool
config HAVE_NMI config HAVE_NMI
bool bool
......
...@@ -154,6 +154,7 @@ config X86 ...@@ -154,6 +154,7 @@ config X86
select HAVE_KERNEL_XZ select HAVE_KERNEL_XZ
select HAVE_KPROBES select HAVE_KPROBES
select HAVE_KPROBES_ON_FTRACE select HAVE_KPROBES_ON_FTRACE
select HAVE_KPROBE_OVERRIDE
select HAVE_KRETPROBES select HAVE_KRETPROBES
select HAVE_KVM select HAVE_KVM
select HAVE_LIVEPATCH if X86_64 select HAVE_LIVEPATCH if X86_64
......
...@@ -67,6 +67,10 @@ extern const int kretprobe_blacklist_size; ...@@ -67,6 +67,10 @@ extern const int kretprobe_blacklist_size;
void arch_remove_kprobe(struct kprobe *p); void arch_remove_kprobe(struct kprobe *p);
asmlinkage void kretprobe_trampoline(void); asmlinkage void kretprobe_trampoline(void);
#ifdef CONFIG_KPROBES_ON_FTRACE
extern void arch_ftrace_kprobe_override_function(struct pt_regs *regs);
#endif
/* Architecture specific copy of original instruction*/ /* Architecture specific copy of original instruction*/
struct arch_specific_insn { struct arch_specific_insn {
/* copy of the original instruction */ /* copy of the original instruction */
......
...@@ -109,6 +109,11 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) ...@@ -109,6 +109,11 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
return regs->ax; return regs->ax;
} }
static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
{
regs->ax = rc;
}
/* /*
* user_mode(regs) determines whether a register set came from user * user_mode(regs) determines whether a register set came from user
* mode. On x86_32, this is true if V8086 mode was enabled OR if the * mode. On x86_32, this is true if V8086 mode was enabled OR if the
......
...@@ -97,3 +97,17 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p) ...@@ -97,3 +97,17 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p)
p->ainsn.boostable = false; p->ainsn.boostable = false;
return 0; return 0;
} }
asmlinkage void override_func(void);
asm(
".type override_func, @function\n"
"override_func:\n"
" ret\n"
".size override_func, .-override_func\n"
);
void arch_ftrace_kprobe_override_function(struct pt_regs *regs)
{
regs->ip = (unsigned long)&override_func;
}
NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function);
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <linux/ratelimit.h> #include <linux/ratelimit.h>
#include <linux/uuid.h> #include <linux/uuid.h>
#include <linux/semaphore.h> #include <linux/semaphore.h>
#include <linux/bpf.h>
#include <asm/unaligned.h> #include <asm/unaligned.h>
#include "ctree.h" #include "ctree.h"
#include "disk-io.h" #include "disk-io.h"
...@@ -3123,6 +3124,7 @@ int open_ctree(struct super_block *sb, ...@@ -3123,6 +3124,7 @@ int open_ctree(struct super_block *sb,
goto fail_block_groups; goto fail_block_groups;
goto retry_root_backup; goto retry_root_backup;
} }
BPF_ALLOW_ERROR_INJECTION(open_ctree);
static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
{ {
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/math64.h> #include <linux/math64.h>
#include <linux/ratelimit.h> #include <linux/ratelimit.h>
#include <linux/bpf.h>
#include "ctree.h" #include "ctree.h"
#include "free-space-cache.h" #include "free-space-cache.h"
#include "transaction.h" #include "transaction.h"
...@@ -332,6 +333,7 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, ...@@ -332,6 +333,7 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
return 0; return 0;
} }
BPF_ALLOW_ERROR_INJECTION(io_ctl_init);
static void io_ctl_free(struct btrfs_io_ctl *io_ctl) static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
{ {
......
...@@ -136,6 +136,15 @@ ...@@ -136,6 +136,15 @@
#define KPROBE_BLACKLIST() #define KPROBE_BLACKLIST()
#endif #endif
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
#define ERROR_INJECT_LIST() . = ALIGN(8); \
VMLINUX_SYMBOL(__start_kprobe_error_inject_list) = .; \
KEEP(*(_kprobe_error_inject_list)) \
VMLINUX_SYMBOL(__stop_kprobe_error_inject_list) = .;
#else
#define ERROR_INJECT_LIST()
#endif
#ifdef CONFIG_EVENT_TRACING #ifdef CONFIG_EVENT_TRACING
#define FTRACE_EVENTS() . = ALIGN(8); \ #define FTRACE_EVENTS() . = ALIGN(8); \
VMLINUX_SYMBOL(__start_ftrace_events) = .; \ VMLINUX_SYMBOL(__start_ftrace_events) = .; \
...@@ -564,6 +573,7 @@ ...@@ -564,6 +573,7 @@
FTRACE_EVENTS() \ FTRACE_EVENTS() \
TRACE_SYSCALLS() \ TRACE_SYSCALLS() \
KPROBE_BLACKLIST() \ KPROBE_BLACKLIST() \
ERROR_INJECT_LIST() \
MEM_DISCARD(init.rodata) \ MEM_DISCARD(init.rodata) \
CLK_OF_TABLES() \ CLK_OF_TABLES() \
RESERVEDMEM_OF_TABLES() \ RESERVEDMEM_OF_TABLES() \
......
...@@ -580,4 +580,15 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto; ...@@ -580,4 +580,15 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto;
void bpf_user_rnd_init_once(void); void bpf_user_rnd_init_once(void);
u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
#define BPF_ALLOW_ERROR_INJECTION(fname) \
static unsigned long __used \
__attribute__((__section__("_kprobe_error_inject_list"))) \
_eil_addr_##fname = (unsigned long)fname;
#else
#define BPF_ALLOW_ERROR_INJECTION(fname)
#endif
#endif
#endif /* _LINUX_BPF_H */ #endif /* _LINUX_BPF_H */
...@@ -458,7 +458,8 @@ struct bpf_prog { ...@@ -458,7 +458,8 @@ struct bpf_prog {
locked:1, /* Program image locked? */ locked:1, /* Program image locked? */
gpl_compatible:1, /* Is filter GPL compatible? */ gpl_compatible:1, /* Is filter GPL compatible? */
cb_access:1, /* Is control block accessed? */ cb_access:1, /* Is control block accessed? */
dst_needed:1; /* Do we need dst entry? */ dst_needed:1, /* Do we need dst entry? */
kprobe_override:1; /* Do we override a kprobe? */
enum bpf_prog_type type; /* Type of BPF program */ enum bpf_prog_type type; /* Type of BPF program */
u32 len; /* Number of filter blocks */ u32 len; /* Number of filter blocks */
u32 jited_len; /* Size of jited insns in bytes */ u32 jited_len; /* Size of jited insns in bytes */
......
...@@ -271,6 +271,7 @@ extern bool arch_kprobe_on_func_entry(unsigned long offset); ...@@ -271,6 +271,7 @@ extern bool arch_kprobe_on_func_entry(unsigned long offset);
extern bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); extern bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset);
extern bool within_kprobe_blacklist(unsigned long addr); extern bool within_kprobe_blacklist(unsigned long addr);
extern bool within_kprobe_error_injection_list(unsigned long addr);
struct kprobe_insn_cache { struct kprobe_insn_cache {
struct mutex mutex; struct mutex mutex;
......
...@@ -475,6 +475,11 @@ struct module { ...@@ -475,6 +475,11 @@ struct module {
ctor_fn_t *ctors; ctor_fn_t *ctors;
unsigned int num_ctors; unsigned int num_ctors;
#endif #endif
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
unsigned int num_kprobe_ei_funcs;
unsigned long *kprobe_ei_funcs;
#endif
} ____cacheline_aligned __randomize_layout; } ____cacheline_aligned __randomize_layout;
#ifndef MODULE_ARCH_INIT #ifndef MODULE_ARCH_INIT
#define MODULE_ARCH_INIT {} #define MODULE_ARCH_INIT {}
......
...@@ -528,6 +528,7 @@ do { \ ...@@ -528,6 +528,7 @@ do { \
struct perf_event; struct perf_event;
DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
DECLARE_PER_CPU(int, bpf_kprobe_override);
extern int perf_trace_init(struct perf_event *event); extern int perf_trace_init(struct perf_event *event);
extern void perf_trace_destroy(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event);
......
...@@ -677,6 +677,10 @@ union bpf_attr { ...@@ -677,6 +677,10 @@ union bpf_attr {
* @buf: buf to fill * @buf: buf to fill
* @buf_size: size of the buf * @buf_size: size of the buf
* Return : 0 on success or negative error code * Return : 0 on success or negative error code
*
* int bpf_override_return(pt_regs, rc)
* @pt_regs: pointer to struct pt_regs
* @rc: the return value to set
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -736,7 +740,8 @@ union bpf_attr { ...@@ -736,7 +740,8 @@ union bpf_attr {
FN(xdp_adjust_meta), \ FN(xdp_adjust_meta), \
FN(perf_event_read_value), \ FN(perf_event_read_value), \
FN(perf_prog_read_value), \ FN(perf_prog_read_value), \
FN(getsockopt), FN(getsockopt), \
FN(override_return),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call * function eBPF program intends to call
......
...@@ -1320,6 +1320,9 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) ...@@ -1320,6 +1320,9 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
bool bpf_prog_array_compatible(struct bpf_array *array, bool bpf_prog_array_compatible(struct bpf_array *array,
const struct bpf_prog *fp) const struct bpf_prog *fp)
{ {
if (fp->kprobe_override)
return false;
if (!array->owner_prog_type) { if (!array->owner_prog_type) {
/* There's no owner yet where we could check for /* There's no owner yet where we could check for
* compatibility. * compatibility.
......
...@@ -4413,6 +4413,8 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) ...@@ -4413,6 +4413,8 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
prog->dst_needed = 1; prog->dst_needed = 1;
if (insn->imm == BPF_FUNC_get_prandom_u32) if (insn->imm == BPF_FUNC_get_prandom_u32)
bpf_user_rnd_init_once(); bpf_user_rnd_init_once();
if (insn->imm == BPF_FUNC_override_return)
prog->kprobe_override = 1;
if (insn->imm == BPF_FUNC_tail_call) { if (insn->imm == BPF_FUNC_tail_call) {
/* If we tail call into other programs, we /* If we tail call into other programs, we
* cannot make any assumptions since they can * cannot make any assumptions since they can
......
...@@ -8080,6 +8080,13 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) ...@@ -8080,6 +8080,13 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
return -EINVAL; return -EINVAL;
} }
/* Kprobe override only works for kprobes, not uprobes. */
if (prog->kprobe_override &&
!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) {
bpf_prog_put(prog);
return -EINVAL;
}
if (is_tracepoint || is_syscall_tp) { if (is_tracepoint || is_syscall_tp) {
int off = trace_event_get_offsets(event->tp_event); int off = trace_event_get_offsets(event->tp_event);
......
...@@ -83,6 +83,16 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) ...@@ -83,6 +83,16 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
return &(kretprobe_table_locks[hash].lock); return &(kretprobe_table_locks[hash].lock);
} }
/* List of symbols that can be overriden for error injection. */
static LIST_HEAD(kprobe_error_injection_list);
static DEFINE_MUTEX(kprobe_ei_mutex);
struct kprobe_ei_entry {
struct list_head list;
unsigned long start_addr;
unsigned long end_addr;
void *priv;
};
/* Blacklist -- list of struct kprobe_blacklist_entry */ /* Blacklist -- list of struct kprobe_blacklist_entry */
static LIST_HEAD(kprobe_blacklist); static LIST_HEAD(kprobe_blacklist);
...@@ -1394,6 +1404,17 @@ bool within_kprobe_blacklist(unsigned long addr) ...@@ -1394,6 +1404,17 @@ bool within_kprobe_blacklist(unsigned long addr)
return false; return false;
} }
bool within_kprobe_error_injection_list(unsigned long addr)
{
struct kprobe_ei_entry *ent;
list_for_each_entry(ent, &kprobe_error_injection_list, list) {
if (addr >= ent->start_addr && addr < ent->end_addr)
return true;
}
return false;
}
/* /*
* If we have a symbol_name argument, look it up and add the offset field * If we have a symbol_name argument, look it up and add the offset field
* to it. This way, we can specify a relative address to a symbol. * to it. This way, we can specify a relative address to a symbol.
...@@ -2168,6 +2189,86 @@ static int __init populate_kprobe_blacklist(unsigned long *start, ...@@ -2168,6 +2189,86 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
return 0; return 0;
} }
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
/* Markers of the _kprobe_error_inject_list section */
extern unsigned long __start_kprobe_error_inject_list[];
extern unsigned long __stop_kprobe_error_inject_list[];
/*
* Lookup and populate the kprobe_error_injection_list.
*
* For safety reasons we only allow certain functions to be overriden with
* bpf_error_injection, so we need to populate the list of the symbols that have
* been marked as safe for overriding.
*/
static void populate_kprobe_error_injection_list(unsigned long *start,
unsigned long *end,
void *priv)
{
unsigned long *iter;
struct kprobe_ei_entry *ent;
unsigned long entry, offset = 0, size = 0;
mutex_lock(&kprobe_ei_mutex);
for (iter = start; iter < end; iter++) {
entry = arch_deref_entry_point((void *)*iter);
if (!kernel_text_address(entry) ||
!kallsyms_lookup_size_offset(entry, &size, &offset)) {
pr_err("Failed to find error inject entry at %p\n",
(void *)entry);
continue;
}
ent = kmalloc(sizeof(*ent), GFP_KERNEL);
if (!ent)
break;
ent->start_addr = entry;
ent->end_addr = entry + size;
ent->priv = priv;
INIT_LIST_HEAD(&ent->list);
list_add_tail(&ent->list, &kprobe_error_injection_list);
}
mutex_unlock(&kprobe_ei_mutex);
}
static void __init populate_kernel_kprobe_ei_list(void)
{
populate_kprobe_error_injection_list(__start_kprobe_error_inject_list,
__stop_kprobe_error_inject_list,
NULL);
}
static void module_load_kprobe_ei_list(struct module *mod)
{
if (!mod->num_kprobe_ei_funcs)
return;
populate_kprobe_error_injection_list(mod->kprobe_ei_funcs,
mod->kprobe_ei_funcs +
mod->num_kprobe_ei_funcs, mod);
}
static void module_unload_kprobe_ei_list(struct module *mod)
{
struct kprobe_ei_entry *ent, *n;
if (!mod->num_kprobe_ei_funcs)
return;
mutex_lock(&kprobe_ei_mutex);
list_for_each_entry_safe(ent, n, &kprobe_error_injection_list, list) {
if (ent->priv == mod) {
list_del_init(&ent->list);
kfree(ent);
}
}
mutex_unlock(&kprobe_ei_mutex);
}
#else
static inline void __init populate_kernel_kprobe_ei_list(void) {}
static inline void module_load_kprobe_ei_list(struct module *m) {}
static inline void module_unload_kprobe_ei_list(struct module *m) {}
#endif
/* Module notifier call back, checking kprobes on the module */ /* Module notifier call back, checking kprobes on the module */
static int kprobes_module_callback(struct notifier_block *nb, static int kprobes_module_callback(struct notifier_block *nb,
unsigned long val, void *data) unsigned long val, void *data)
...@@ -2178,6 +2279,11 @@ static int kprobes_module_callback(struct notifier_block *nb, ...@@ -2178,6 +2279,11 @@ static int kprobes_module_callback(struct notifier_block *nb,
unsigned int i; unsigned int i;
int checkcore = (val == MODULE_STATE_GOING); int checkcore = (val == MODULE_STATE_GOING);
if (val == MODULE_STATE_COMING)
module_load_kprobe_ei_list(mod);
else if (val == MODULE_STATE_GOING)
module_unload_kprobe_ei_list(mod);
if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE) if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
return NOTIFY_DONE; return NOTIFY_DONE;
...@@ -2240,6 +2346,8 @@ static int __init init_kprobes(void) ...@@ -2240,6 +2346,8 @@ static int __init init_kprobes(void)
pr_err("Please take care of using kprobes.\n"); pr_err("Please take care of using kprobes.\n");
} }
populate_kernel_kprobe_ei_list();
if (kretprobe_blacklist_size) { if (kretprobe_blacklist_size) {
/* lookup the function address from its name */ /* lookup the function address from its name */
for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
...@@ -2407,6 +2515,56 @@ static const struct file_operations debugfs_kprobe_blacklist_ops = { ...@@ -2407,6 +2515,56 @@ static const struct file_operations debugfs_kprobe_blacklist_ops = {
.release = seq_release, .release = seq_release,
}; };
/*
* kprobes/error_injection_list -- shows which functions can be overriden for
* error injection.
* */
static void *kprobe_ei_seq_start(struct seq_file *m, loff_t *pos)
{
mutex_lock(&kprobe_ei_mutex);
return seq_list_start(&kprobe_error_injection_list, *pos);
}
static void kprobe_ei_seq_stop(struct seq_file *m, void *v)
{
mutex_unlock(&kprobe_ei_mutex);
}
static void *kprobe_ei_seq_next(struct seq_file *m, void *v, loff_t *pos)
{
return seq_list_next(v, &kprobe_error_injection_list, pos);
}
static int kprobe_ei_seq_show(struct seq_file *m, void *v)
{
char buffer[KSYM_SYMBOL_LEN];
struct kprobe_ei_entry *ent =
list_entry(v, struct kprobe_ei_entry, list);
sprint_symbol(buffer, ent->start_addr);
seq_printf(m, "%s\n", buffer);
return 0;
}
static const struct seq_operations kprobe_ei_seq_ops = {
.start = kprobe_ei_seq_start,
.next = kprobe_ei_seq_next,
.stop = kprobe_ei_seq_stop,
.show = kprobe_ei_seq_show,
};
static int kprobe_ei_open(struct inode *inode, struct file *filp)
{
return seq_open(filp, &kprobe_ei_seq_ops);
}
static const struct file_operations debugfs_kprobe_ei_ops = {
.open = kprobe_ei_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
static void arm_all_kprobes(void) static void arm_all_kprobes(void)
{ {
struct hlist_head *head; struct hlist_head *head;
...@@ -2548,6 +2706,11 @@ static int __init debugfs_kprobe_init(void) ...@@ -2548,6 +2706,11 @@ static int __init debugfs_kprobe_init(void)
if (!file) if (!file)
goto error; goto error;
file = debugfs_create_file("error_injection_list", 0444, dir, NULL,
&debugfs_kprobe_ei_ops);
if (!file)
goto error;
return 0; return 0;
error: error:
......
...@@ -3118,7 +3118,11 @@ static int find_module_sections(struct module *mod, struct load_info *info) ...@@ -3118,7 +3118,11 @@ static int find_module_sections(struct module *mod, struct load_info *info)
sizeof(*mod->ftrace_callsites), sizeof(*mod->ftrace_callsites),
&mod->num_ftrace_callsites); &mod->num_ftrace_callsites);
#endif #endif
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
mod->kprobe_ei_funcs = section_objs(info, "_kprobe_error_inject_list",
sizeof(*mod->kprobe_ei_funcs),
&mod->num_kprobe_ei_funcs);
#endif
mod->extable = section_objs(info, "__ex_table", mod->extable = section_objs(info, "__ex_table",
sizeof(*mod->extable), &mod->num_exentries); sizeof(*mod->extable), &mod->num_exentries);
......
...@@ -529,6 +529,17 @@ config FUNCTION_PROFILER ...@@ -529,6 +529,17 @@ config FUNCTION_PROFILER
If in doubt, say N. If in doubt, say N.
config BPF_KPROBE_OVERRIDE
bool "Enable BPF programs to override a kprobed function"
depends on BPF_EVENTS
depends on KPROBES_ON_FTRACE
depends on HAVE_KPROBE_OVERRIDE
depends on DYNAMIC_FTRACE_WITH_REGS
default n
help
Allows BPF to override the execution of a probed function and
set a different return value. This is used for error injection.
config FTRACE_MCOUNT_RECORD config FTRACE_MCOUNT_RECORD
def_bool y def_bool y
depends on DYNAMIC_FTRACE depends on DYNAMIC_FTRACE
......
...@@ -13,6 +13,10 @@ ...@@ -13,6 +13,10 @@
#include <linux/filter.h> #include <linux/filter.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/ctype.h> #include <linux/ctype.h>
#include <linux/kprobes.h>
#include <asm/kprobes.h>
#include "trace_probe.h"
#include "trace.h" #include "trace.h"
u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
...@@ -76,6 +80,24 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) ...@@ -76,6 +80,24 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
} }
EXPORT_SYMBOL_GPL(trace_call_bpf); EXPORT_SYMBOL_GPL(trace_call_bpf);
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
{
__this_cpu_write(bpf_kprobe_override, 1);
regs_set_return_value(regs, rc);
arch_ftrace_kprobe_override_function(regs);
return 0;
}
static const struct bpf_func_proto bpf_override_return_proto = {
.func = bpf_override_return,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
};
#endif
BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
{ {
int ret; int ret;
...@@ -551,6 +573,10 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func ...@@ -551,6 +573,10 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
return &bpf_get_stackid_proto; return &bpf_get_stackid_proto;
case BPF_FUNC_perf_event_read_value: case BPF_FUNC_perf_event_read_value:
return &bpf_perf_event_read_value_proto; return &bpf_perf_event_read_value_proto;
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
case BPF_FUNC_override_return:
return &bpf_override_return_proto;
#endif
default: default:
return tracing_func_proto(func_id); return tracing_func_proto(func_id);
} }
...@@ -768,6 +794,15 @@ int perf_event_attach_bpf_prog(struct perf_event *event, ...@@ -768,6 +794,15 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
struct bpf_prog_array *new_array; struct bpf_prog_array *new_array;
int ret = -EEXIST; int ret = -EEXIST;
/*
* Kprobe override only works for ftrace based kprobes, and only if they
* are on the opt-in list.
*/
if (prog->kprobe_override &&
(!trace_kprobe_ftrace(event->tp_event) ||
!trace_kprobe_error_injectable(event->tp_event)))
return -EINVAL;
mutex_lock(&bpf_event_mutex); mutex_lock(&bpf_event_mutex);
if (event->prog) if (event->prog)
......
...@@ -42,6 +42,7 @@ struct trace_kprobe { ...@@ -42,6 +42,7 @@ struct trace_kprobe {
(offsetof(struct trace_kprobe, tp.args) + \ (offsetof(struct trace_kprobe, tp.args) + \
(sizeof(struct probe_arg) * (n))) (sizeof(struct probe_arg) * (n)))
DEFINE_PER_CPU(int, bpf_kprobe_override);
static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk) static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
{ {
...@@ -87,6 +88,27 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) ...@@ -87,6 +88,27 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
return nhit; return nhit;
} }
int trace_kprobe_ftrace(struct trace_event_call *call)
{
struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
return kprobe_ftrace(&tk->rp.kp);
}
int trace_kprobe_error_injectable(struct trace_event_call *call)
{
struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
unsigned long addr;
if (tk->symbol) {
addr = (unsigned long)
kallsyms_lookup_name(trace_kprobe_symbol(tk));
addr += tk->rp.kp.offset;
} else {
addr = (unsigned long)tk->rp.kp.addr;
}
return within_kprobe_error_injection_list(addr);
}
static int register_kprobe_event(struct trace_kprobe *tk); static int register_kprobe_event(struct trace_kprobe *tk);
static int unregister_kprobe_event(struct trace_kprobe *tk); static int unregister_kprobe_event(struct trace_kprobe *tk);
...@@ -1170,7 +1192,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call) ...@@ -1170,7 +1192,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call)
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
/* Kprobe profile handler */ /* Kprobe profile handler */
static void static int
kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
{ {
struct trace_event_call *call = &tk->tp.call; struct trace_event_call *call = &tk->tp.call;
...@@ -1179,12 +1201,29 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) ...@@ -1179,12 +1201,29 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
int size, __size, dsize; int size, __size, dsize;
int rctx; int rctx;
if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) if (bpf_prog_array_valid(call)) {
return; int ret;
ret = trace_call_bpf(call, regs);
/*
* We need to check and see if we modified the pc of the
* pt_regs, and if so clear the kprobe and return 1 so that we
* don't do the instruction skipping. Also reset our state so
* we are clean the next pass through.
*/
if (__this_cpu_read(bpf_kprobe_override)) {
__this_cpu_write(bpf_kprobe_override, 0);
reset_current_kprobe();
return 1;
}
if (!ret)
return 0;
}
head = this_cpu_ptr(call->perf_events); head = this_cpu_ptr(call->perf_events);
if (hlist_empty(head)) if (hlist_empty(head))
return; return 0;
dsize = __get_data_size(&tk->tp, regs); dsize = __get_data_size(&tk->tp, regs);
__size = sizeof(*entry) + tk->tp.size + dsize; __size = sizeof(*entry) + tk->tp.size + dsize;
...@@ -1193,13 +1232,14 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) ...@@ -1193,13 +1232,14 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
entry = perf_trace_buf_alloc(size, NULL, &rctx); entry = perf_trace_buf_alloc(size, NULL, &rctx);
if (!entry) if (!entry)
return; return 0;
entry->ip = (unsigned long)tk->rp.kp.addr; entry->ip = (unsigned long)tk->rp.kp.addr;
memset(&entry[1], 0, dsize); memset(&entry[1], 0, dsize);
store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
head, NULL); head, NULL);
return 0;
} }
NOKPROBE_SYMBOL(kprobe_perf_func); NOKPROBE_SYMBOL(kprobe_perf_func);
...@@ -1275,6 +1315,7 @@ static int kprobe_register(struct trace_event_call *event, ...@@ -1275,6 +1315,7 @@ static int kprobe_register(struct trace_event_call *event,
static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
{ {
struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
int ret = 0;
raw_cpu_inc(*tk->nhit); raw_cpu_inc(*tk->nhit);
...@@ -1282,9 +1323,9 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) ...@@ -1282,9 +1323,9 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
kprobe_trace_func(tk, regs); kprobe_trace_func(tk, regs);
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
if (tk->tp.flags & TP_FLAG_PROFILE) if (tk->tp.flags & TP_FLAG_PROFILE)
kprobe_perf_func(tk, regs); ret = kprobe_perf_func(tk, regs);
#endif #endif
return 0; /* We don't tweek kernel, so just return 0 */ return ret;
} }
NOKPROBE_SYMBOL(kprobe_dispatcher); NOKPROBE_SYMBOL(kprobe_dispatcher);
......
...@@ -252,6 +252,8 @@ struct symbol_cache; ...@@ -252,6 +252,8 @@ struct symbol_cache;
unsigned long update_symbol_cache(struct symbol_cache *sc); unsigned long update_symbol_cache(struct symbol_cache *sc);
void free_symbol_cache(struct symbol_cache *sc); void free_symbol_cache(struct symbol_cache *sc);
struct symbol_cache *alloc_symbol_cache(const char *sym, long offset); struct symbol_cache *alloc_symbol_cache(const char *sym, long offset);
int trace_kprobe_ftrace(struct trace_event_call *call);
int trace_kprobe_error_injectable(struct trace_event_call *call);
#else #else
/* uprobes do not support symbol fetch methods */ /* uprobes do not support symbol fetch methods */
#define fetch_symbol_u8 NULL #define fetch_symbol_u8 NULL
...@@ -277,6 +279,16 @@ alloc_symbol_cache(const char *sym, long offset) ...@@ -277,6 +279,16 @@ alloc_symbol_cache(const char *sym, long offset)
{ {
return NULL; return NULL;
} }
static inline int trace_kprobe_ftrace(struct trace_event_call *call)
{
return 0;
}
static inline int trace_kprobe_error_injectable(struct trace_event_call *call)
{
return 0;
}
#endif /* CONFIG_KPROBE_EVENTS */ #endif /* CONFIG_KPROBE_EVENTS */
struct probe_arg { struct probe_arg {
......
...@@ -12,6 +12,7 @@ hostprogs-y += tracex3 ...@@ -12,6 +12,7 @@ hostprogs-y += tracex3
hostprogs-y += tracex4 hostprogs-y += tracex4
hostprogs-y += tracex5 hostprogs-y += tracex5
hostprogs-y += tracex6 hostprogs-y += tracex6
hostprogs-y += tracex7
hostprogs-y += test_probe_write_user hostprogs-y += test_probe_write_user
hostprogs-y += trace_output hostprogs-y += trace_output
hostprogs-y += lathist hostprogs-y += lathist
...@@ -58,6 +59,7 @@ tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o ...@@ -58,6 +59,7 @@ tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o
tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o
tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o
tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o
tracex7-objs := bpf_load.o $(LIBBPF) tracex7_user.o
load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o
test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o
trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o
...@@ -101,6 +103,7 @@ always += tracex3_kern.o ...@@ -101,6 +103,7 @@ always += tracex3_kern.o
always += tracex4_kern.o always += tracex4_kern.o
always += tracex5_kern.o always += tracex5_kern.o
always += tracex6_kern.o always += tracex6_kern.o
always += tracex7_kern.o
always += sock_flags_kern.o always += sock_flags_kern.o
always += test_probe_write_user_kern.o always += test_probe_write_user_kern.o
always += trace_output_kern.o always += trace_output_kern.o
...@@ -155,6 +158,7 @@ HOSTLOADLIBES_tracex3 += -lelf ...@@ -155,6 +158,7 @@ HOSTLOADLIBES_tracex3 += -lelf
HOSTLOADLIBES_tracex4 += -lelf -lrt HOSTLOADLIBES_tracex4 += -lelf -lrt
HOSTLOADLIBES_tracex5 += -lelf HOSTLOADLIBES_tracex5 += -lelf
HOSTLOADLIBES_tracex6 += -lelf HOSTLOADLIBES_tracex6 += -lelf
HOSTLOADLIBES_tracex7 += -lelf
HOSTLOADLIBES_test_cgrp2_sock2 += -lelf HOSTLOADLIBES_test_cgrp2_sock2 += -lelf
HOSTLOADLIBES_load_sock_ops += -lelf HOSTLOADLIBES_load_sock_ops += -lelf
HOSTLOADLIBES_test_probe_write_user += -lelf HOSTLOADLIBES_test_probe_write_user += -lelf
......
#!/bin/bash
rm -f testfile.img
dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1
DEVICE=$(losetup --show -f testfile.img)
mkfs.btrfs -f $DEVICE
mkdir tmpmnt
./tracex7 $DEVICE
if [ $? -eq 0 ]
then
echo "SUCCESS!"
else
echo "FAILED!"
fi
losetup -d $DEVICE
#include <uapi/linux/ptrace.h>
#include <uapi/linux/bpf.h>
#include <linux/version.h>
#include "bpf_helpers.h"
SEC("kprobe/open_ctree")
int bpf_prog1(struct pt_regs *ctx)
{
unsigned long rc = -12;
bpf_override_return(ctx, rc);
return 0;
}
char _license[] SEC("license") = "GPL";
u32 _version SEC("version") = LINUX_VERSION_CODE;
#define _GNU_SOURCE
#include <stdio.h>
#include <linux/bpf.h>
#include <unistd.h>
#include "libbpf.h"
#include "bpf_load.h"
int main(int argc, char **argv)
{
FILE *f;
char filename[256];
char command[256];
int ret;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
return 1;
}
snprintf(command, 256, "mount %s tmpmnt/", argv[1]);
f = popen(command, "r");
ret = pclose(f);
return ret ? 0 : 1;
}
...@@ -677,6 +677,10 @@ union bpf_attr { ...@@ -677,6 +677,10 @@ union bpf_attr {
* @buf: buf to fill * @buf: buf to fill
* @buf_size: size of the buf * @buf_size: size of the buf
* Return : 0 on success or negative error code * Return : 0 on success or negative error code
*
* int bpf_override_return(pt_regs, rc)
* @pt_regs: pointer to struct pt_regs
* @rc: the return value to set
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -736,7 +740,8 @@ union bpf_attr { ...@@ -736,7 +740,8 @@ union bpf_attr {
FN(xdp_adjust_meta), \ FN(xdp_adjust_meta), \
FN(perf_event_read_value), \ FN(perf_event_read_value), \
FN(perf_prog_read_value), \ FN(perf_prog_read_value), \
FN(getsockopt), FN(getsockopt), \
FN(override_return),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call * function eBPF program intends to call
......
...@@ -82,7 +82,8 @@ static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, ...@@ -82,7 +82,8 @@ static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags,
static int (*bpf_perf_prog_read_value)(void *ctx, void *buf, static int (*bpf_perf_prog_read_value)(void *ctx, void *buf,
unsigned int buf_size) = unsigned int buf_size) =
(void *) BPF_FUNC_perf_prog_read_value; (void *) BPF_FUNC_perf_prog_read_value;
static int (*bpf_override_return)(void *ctx, unsigned long rc) =
(void *) BPF_FUNC_override_return;
/* llvm builtin functions that eBPF C program may use to /* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions * emit BPF_LD_ABS and BPF_LD_IND instructions
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment