Commit f6ef5658 authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-raw-tracepoints'

Alexei Starovoitov says:

====================
v7->v8:
- moved 'u32 num_args' from 'struct tracepoint' into 'struct bpf_raw_event_map'
  that increases memory overhead, but can be optimized/compressed later.
  Now it's zero changes in tracepoint.[ch]

v6->v7:
- adopted Steven's bpf_raw_tp_map section approach to find tracepoint
  and corresponding bpf probe function instead of kallsyms approach.
  dropped kernel_tracepoint_find_by_name() patch

v5->v6:
- avoid changing semantics of for_each_kernel_tracepoint() function, instead
  introduce kernel_tracepoint_find_by_name() helper

v4->v5:
- adopted Daniel's fancy REPEAT macro in bpf_trace.c in patch 6

v3->v4:
- adopted Linus's CAST_TO_U64 macro to cast any integer, pointer, or small
  struct to u64. That nicely reduced the size of patch 1

v2->v3:
- with Linus's suggestion introduced generic COUNT_ARGS and CONCATENATE macros
  (or rather moved them from apparmor)
  that cleaned up patch 6
- added patch 4 to refactor trace_iwlwifi_dev_ucode_error() from 17 args to 4
  Now any tracepoint with >12 args will have build error

v1->v2:
- simplified api by combing bpf_raw_tp_open(name) + bpf_attach(prog_fd) into
  bpf_raw_tp_open(name, prog_fd) as suggested by Daniel.
  That simplifies bpf_detach as well which is now simple close() of fd.
- fixed memory leak in error path which was spotted by Daniel.
- fixed bpf_get_stackid(), bpf_perf_event_output() called from raw tracepoints
- added more tests
- fixed allyesconfig build caught by buildbot

v1:
This patch set is a different way to address the pressing need to access
task_struct pointers in sched tracepoints from bpf programs.

The first approach simply added these pointers to sched tracepoints:
https://lkml.org/lkml/2017/12/14/753
which Peter nacked.
Few options were discussed and eventually the discussion converged on
doing bpf specific tracepoint_probe_register() probe functions.
Details here:
https://lkml.org/lkml/2017/12/20/929

Patch 1 is kernel wide cleanup of pass-struct-by-value into
pass-struct-by-reference into tracepoints.

Patches 2 and 3 are minor cleanups to address allyesconfig build

Patch 4 refactor trace_iwlwifi_dev_ucode_error from 17 to 4 args

Patch 5 introduces COUNT_ARGS macro

Patch 6 introduces BPF_RAW_TRACEPOINT api.
the auto-cleanup and multiple concurrent users are must have
features of tracing api. For bpf raw tracepoints it looks like:
  // load bpf prog with BPF_PROG_TYPE_RAW_TRACEPOINT type
  prog_fd = bpf_prog_load(...);

  // receive anon_inode fd for given bpf_raw_tracepoint
  // and attach bpf program to it
  raw_tp_fd = bpf_raw_tracepoint_open("xdp_exception", prog_fd);

Ctrl-C of tracing daemon or cmdline tool will automatically
detach bpf program, unload it and unregister tracepoint probe.
More details in patch 6.

Patch 7 - trivial support in libbpf
Patches 8, 9 - user space tests

samples/bpf/test_overhead performance on 1 cpu:

tracepoint    base  kprobe+bpf tracepoint+bpf raw_tracepoint+bpf
task_rename   1.1M   769K        947K            1.0M
urandom_read  789K   697K        750K            755K
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents 6f5c39fa 3bbe0869
......@@ -1153,7 +1153,7 @@ static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
cinfo.sdma_ring_size = fd->cq->nentries;
cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size;
trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo);
trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, &cinfo);
if (copy_to_user((void __user *)arg, &cinfo, len))
return -EFAULT;
......
......@@ -106,7 +106,7 @@ TRACE_EVENT(hfi1_uctxtdata,
TRACE_EVENT(hfi1_ctxt_info,
TP_PROTO(struct hfi1_devdata *dd, unsigned int ctxt,
unsigned int subctxt,
struct hfi1_ctxt_info cinfo),
struct hfi1_ctxt_info *cinfo),
TP_ARGS(dd, ctxt, subctxt, cinfo),
TP_STRUCT__entry(DD_DEV_ENTRY(dd)
__field(unsigned int, ctxt)
......@@ -120,11 +120,11 @@ TRACE_EVENT(hfi1_ctxt_info,
TP_fast_assign(DD_DEV_ASSIGN(dd);
__entry->ctxt = ctxt;
__entry->subctxt = subctxt;
__entry->egrtids = cinfo.egrtids;
__entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt;
__entry->rcvhdrq_size = cinfo.rcvhdrq_entsize;
__entry->sdma_ring_size = cinfo.sdma_ring_size;
__entry->rcvegr_size = cinfo.rcvegr_size;
__entry->egrtids = cinfo->egrtids;
__entry->rcvhdrq_cnt = cinfo->rcvhdrq_cnt;
__entry->rcvhdrq_size = cinfo->rcvhdrq_entsize;
__entry->sdma_ring_size = cinfo->sdma_ring_size;
__entry->rcvegr_size = cinfo->rcvegr_size;
),
TP_printk("[%s] ctxt %u:%u " CINFO_FMT,
__get_str(dev),
......
......@@ -1651,12 +1651,7 @@ static void iwl_dump_nic_error_log(struct iwl_priv *priv)
priv->status, table.valid);
}
trace_iwlwifi_dev_ucode_error(trans->dev, table.error_id, table.tsf_low,
table.data1, table.data2, table.line,
table.blink2, table.ilink1, table.ilink2,
table.bcon_time, table.gp1, table.gp2,
table.gp3, table.ucode_ver, table.hw_ver,
0, table.brd_ver);
trace_iwlwifi_dev_ucode_error(trans->dev, &table, 0, table.brd_ver);
IWL_ERR(priv, "0x%08X | %-28s\n", table.error_id,
desc_lookup(table.error_id));
IWL_ERR(priv, "0x%08X | uPc\n", table.pc);
......
......@@ -126,14 +126,11 @@ TRACE_EVENT(iwlwifi_dev_tx,
__entry->framelen, __entry->skbaddr)
);
struct iwl_error_event_table;
TRACE_EVENT(iwlwifi_dev_ucode_error,
TP_PROTO(const struct device *dev, u32 desc, u32 tsf_low,
u32 data1, u32 data2, u32 line, u32 blink2, u32 ilink1,
u32 ilink2, u32 bcon_time, u32 gp1, u32 gp2, u32 rev_type,
u32 major, u32 minor, u32 hw_ver, u32 brd_ver),
TP_ARGS(dev, desc, tsf_low, data1, data2, line,
blink2, ilink1, ilink2, bcon_time, gp1, gp2,
rev_type, major, minor, hw_ver, brd_ver),
TP_PROTO(const struct device *dev, const struct iwl_error_event_table *table,
u32 hw_ver, u32 brd_ver),
TP_ARGS(dev, table, hw_ver, brd_ver),
TP_STRUCT__entry(
DEV_ENTRY
__field(u32, desc)
......@@ -155,20 +152,20 @@ TRACE_EVENT(iwlwifi_dev_ucode_error,
),
TP_fast_assign(
DEV_ASSIGN;
__entry->desc = desc;
__entry->tsf_low = tsf_low;
__entry->data1 = data1;
__entry->data2 = data2;
__entry->line = line;
__entry->blink2 = blink2;
__entry->ilink1 = ilink1;
__entry->ilink2 = ilink2;
__entry->bcon_time = bcon_time;
__entry->gp1 = gp1;
__entry->gp2 = gp2;
__entry->rev_type = rev_type;
__entry->major = major;
__entry->minor = minor;
__entry->desc = table->error_id;
__entry->tsf_low = table->tsf_low;
__entry->data1 = table->data1;
__entry->data2 = table->data2;
__entry->line = table->line;
__entry->blink2 = table->blink2;
__entry->ilink1 = table->ilink1;
__entry->ilink2 = table->ilink2;
__entry->bcon_time = table->bcon_time;
__entry->gp1 = table->gp1;
__entry->gp2 = table->gp2;
__entry->rev_type = table->gp3;
__entry->major = table->ucode_ver;
__entry->minor = table->hw_ver;
__entry->hw_ver = hw_ver;
__entry->brd_ver = brd_ver;
),
......
......@@ -30,6 +30,7 @@
#ifndef __CHECKER__
#include "iwl-trans.h"
#include "dvm/commands.h"
#define CREATE_TRACE_POINTS
#include "iwl-devtrace.h"
......
......@@ -549,12 +549,7 @@ static void iwl_mvm_dump_lmac_error_log(struct iwl_mvm *mvm, u32 base)
IWL_ERR(mvm, "Loaded firmware version: %s\n", mvm->fw->fw_version);
trace_iwlwifi_dev_ucode_error(trans->dev, table.error_id, table.tsf_low,
table.data1, table.data2, table.data3,
table.blink2, table.ilink1,
table.ilink2, table.bcon_time, table.gp1,
table.gp2, table.fw_rev_type, table.major,
table.minor, table.hw_ver, table.brd_ver);
trace_iwlwifi_dev_ucode_error(trans->dev, &table, table.hw_ver, table.brd_ver);
IWL_ERR(mvm, "0x%08X | %-28s\n", table.error_id,
desc_lookup(table.error_id));
IWL_ERR(mvm, "0x%08X | trm_hw_status0\n", table.trm_hw_status0);
......
......@@ -34,7 +34,7 @@
#define REG_PR_FMT "%04x=%08x"
#define REG_PR_ARG __entry->reg, __entry->val
DECLARE_EVENT_CLASS(dev_reg_evt,
DECLARE_EVENT_CLASS(dev_reg_evtu,
TP_PROTO(struct mt7601u_dev *dev, u32 reg, u32 val),
TP_ARGS(dev, reg, val),
TP_STRUCT__entry(
......@@ -51,12 +51,12 @@ DECLARE_EVENT_CLASS(dev_reg_evt,
)
);
DEFINE_EVENT(dev_reg_evt, reg_read,
DEFINE_EVENT(dev_reg_evtu, reg_read,
TP_PROTO(struct mt7601u_dev *dev, u32 reg, u32 val),
TP_ARGS(dev, reg, val)
);
DEFINE_EVENT(dev_reg_evt, reg_write,
DEFINE_EVENT(dev_reg_evtu, reg_write,
TP_PROTO(struct mt7601u_dev *dev, u32 reg, u32 val),
TP_ARGS(dev, reg, val)
);
......
......@@ -178,6 +178,15 @@
#define TRACE_SYSCALLS()
#endif
#ifdef CONFIG_BPF_EVENTS
#define BPF_RAW_TP() STRUCT_ALIGN(); \
VMLINUX_SYMBOL(__start__bpf_raw_tp) = .; \
KEEP(*(__bpf_raw_tp_map)) \
VMLINUX_SYMBOL(__stop__bpf_raw_tp) = .;
#else
#define BPF_RAW_TP()
#endif
#ifdef CONFIG_SERIAL_EARLYCON
#define EARLYCON_TABLE() STRUCT_ALIGN(); \
VMLINUX_SYMBOL(__earlycon_table) = .; \
......@@ -249,6 +258,7 @@
LIKELY_PROFILE() \
BRANCH_PROFILE() \
TRACE_PRINTKS() \
BPF_RAW_TP() \
TRACEPOINT_STR()
/*
......
......@@ -19,6 +19,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)
BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
#endif
#ifdef CONFIG_CGROUP_BPF
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
......
......@@ -919,6 +919,13 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
#define swap(a, b) \
do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
/* This counts to 12. Any more, it will return 13th argument. */
#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
#define __CONCAT(a, b) a ## b
#define CONCATENATE(a, b) __CONCAT(a, b)
/**
* container_of - cast a member of a structure out to the containing structure
* @ptr: the pointer to the member.
......
......@@ -468,6 +468,9 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog);
void perf_event_detach_bpf_prog(struct perf_event *event);
int perf_event_query_prog_array(struct perf_event *event, void __user *info);
int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
#else
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{
......@@ -487,6 +490,18 @@ perf_event_query_prog_array(struct perf_event *event, void __user *info)
{
return -EOPNOTSUPP;
}
static inline int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *p)
{
return -EOPNOTSUPP;
}
static inline int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *p)
{
return -EOPNOTSUPP;
}
static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name)
{
return NULL;
}
#endif
enum {
......@@ -546,6 +561,33 @@ extern void ftrace_profile_free_filter(struct perf_event *event);
void perf_trace_buf_update(void *record, u16 type);
void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
void bpf_trace_run1(struct bpf_prog *prog, u64 arg1);
void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2);
void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
u64 arg3);
void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2,
u64 arg3, u64 arg4);
void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2,
u64 arg3, u64 arg4, u64 arg5);
void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2,
u64 arg3, u64 arg4, u64 arg5, u64 arg6);
void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2,
u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7);
void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2,
u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
u64 arg8);
void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2,
u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
u64 arg8, u64 arg9);
void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2,
u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
u64 arg8, u64 arg9, u64 arg10);
void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2,
u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
u64 arg8, u64 arg9, u64 arg10, u64 arg11);
void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2,
u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12);
void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
struct trace_event_call *call, u64 count,
struct pt_regs *regs, struct hlist_head *head,
......
......@@ -35,4 +35,10 @@ struct tracepoint {
struct tracepoint_func __rcu *funcs;
};
struct bpf_raw_event_map {
struct tracepoint *tp;
void *bpf_func;
u32 num_args;
} __aligned(32);
#endif
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM_VAR
#ifdef CONFIG_BPF_EVENTS
#undef __entry
#define __entry entry
#undef __get_dynamic_array
#define __get_dynamic_array(field) \
((void *)__entry + (__entry->__data_loc_##field & 0xffff))
#undef __get_dynamic_array_len
#define __get_dynamic_array_len(field) \
((__entry->__data_loc_##field >> 16) & 0xffff)
#undef __get_str
#define __get_str(field) ((char *)__get_dynamic_array(field))
#undef __get_bitmask
#define __get_bitmask(field) (char *)__get_dynamic_array(field)
#undef __perf_count
#define __perf_count(c) (c)
#undef __perf_task
#define __perf_task(t) (t)
/* cast any integer, pointer, or small struct to u64 */
#define UINTTYPE(size) \
__typeof__(__builtin_choose_expr(size == 1, (u8)1, \
__builtin_choose_expr(size == 2, (u16)2, \
__builtin_choose_expr(size == 4, (u32)3, \
__builtin_choose_expr(size == 8, (u64)4, \
(void)5)))))
#define __CAST_TO_U64(x) ({ \
typeof(x) __src = (x); \
UINTTYPE(sizeof(x)) __dst; \
memcpy(&__dst, &__src, sizeof(__dst)); \
(u64)__dst; })
#define __CAST1(a,...) __CAST_TO_U64(a)
#define __CAST2(a,...) __CAST_TO_U64(a), __CAST1(__VA_ARGS__)
#define __CAST3(a,...) __CAST_TO_U64(a), __CAST2(__VA_ARGS__)
#define __CAST4(a,...) __CAST_TO_U64(a), __CAST3(__VA_ARGS__)
#define __CAST5(a,...) __CAST_TO_U64(a), __CAST4(__VA_ARGS__)
#define __CAST6(a,...) __CAST_TO_U64(a), __CAST5(__VA_ARGS__)
#define __CAST7(a,...) __CAST_TO_U64(a), __CAST6(__VA_ARGS__)
#define __CAST8(a,...) __CAST_TO_U64(a), __CAST7(__VA_ARGS__)
#define __CAST9(a,...) __CAST_TO_U64(a), __CAST8(__VA_ARGS__)
#define __CAST10(a,...) __CAST_TO_U64(a), __CAST9(__VA_ARGS__)
#define __CAST11(a,...) __CAST_TO_U64(a), __CAST10(__VA_ARGS__)
#define __CAST12(a,...) __CAST_TO_U64(a), __CAST11(__VA_ARGS__)
/* tracepoints with more than 12 arguments will hit build error */
#define CAST_TO_U64(...) CONCATENATE(__CAST, COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__)
#undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
static notrace void \
__bpf_trace_##call(void *__data, proto) \
{ \
struct bpf_prog *prog = __data; \
CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(prog, CAST_TO_U64(args)); \
}
/*
* This part is compiled out, it is only here as a build time check
* to make sure that if the tracepoint handling changes, the
* bpf probe will fail to compile unless it too is updated.
*/
#undef DEFINE_EVENT
#define DEFINE_EVENT(template, call, proto, args) \
static inline void bpf_test_probe_##call(void) \
{ \
check_trace_callback_type_##call(__bpf_trace_##template); \
} \
static struct bpf_raw_event_map __used \
__attribute__((section("__bpf_raw_tp_map"))) \
__bpf_trace_tp_map_##call = { \
.tp = &__tracepoint_##call, \
.bpf_func = (void *)__bpf_trace_##template, \
.num_args = COUNT_ARGS(args), \
};
#undef DEFINE_EVENT_PRINT
#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
#endif /* CONFIG_BPF_EVENTS */
......@@ -95,6 +95,7 @@
#ifdef TRACEPOINTS_ENABLED
#include <trace/trace_events.h>
#include <trace/perf.h>
#include <trace/bpf_probe.h>
#endif
#undef TRACE_EVENT
......
......@@ -491,7 +491,7 @@ DEFINE_EVENT(f2fs__truncate_node, f2fs_truncate_node,
TRACE_EVENT(f2fs_truncate_partial_nodes,
TP_PROTO(struct inode *inode, nid_t nid[], int depth, int err),
TP_PROTO(struct inode *inode, nid_t *nid, int depth, int err),
TP_ARGS(inode, nid, depth, err),
......
......@@ -94,6 +94,7 @@ enum bpf_cmd {
BPF_MAP_GET_FD_BY_ID,
BPF_OBJ_GET_INFO_BY_FD,
BPF_PROG_QUERY,
BPF_RAW_TRACEPOINT_OPEN,
};
enum bpf_map_type {
......@@ -134,6 +135,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SK_SKB,
BPF_PROG_TYPE_CGROUP_DEVICE,
BPF_PROG_TYPE_SK_MSG,
BPF_PROG_TYPE_RAW_TRACEPOINT,
};
enum bpf_attach_type {
......@@ -344,6 +346,11 @@ union bpf_attr {
__aligned_u64 prog_ids;
__u32 prog_cnt;
} query;
struct {
__u64 name;
__u32 prog_fd;
} raw_tracepoint;
} __attribute__((aligned(8)));
/* BPF helper function descriptions:
......@@ -1152,4 +1159,8 @@ struct bpf_cgroup_dev_ctx {
__u32 minor;
};
struct bpf_raw_tracepoint_args {
__u64 args[0];
};
#endif /* _UAPI__LINUX_BPF_H__ */
......@@ -1315,6 +1315,81 @@ static int bpf_obj_get(const union bpf_attr *attr)
attr->file_flags);
}
struct bpf_raw_tracepoint {
struct bpf_raw_event_map *btp;
struct bpf_prog *prog;
};
static int bpf_raw_tracepoint_release(struct inode *inode, struct file *filp)
{
struct bpf_raw_tracepoint *raw_tp = filp->private_data;
if (raw_tp->prog) {
bpf_probe_unregister(raw_tp->btp, raw_tp->prog);
bpf_prog_put(raw_tp->prog);
}
kfree(raw_tp);
return 0;
}
static const struct file_operations bpf_raw_tp_fops = {
.release = bpf_raw_tracepoint_release,
.read = bpf_dummy_read,
.write = bpf_dummy_write,
};
#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
{
struct bpf_raw_tracepoint *raw_tp;
struct bpf_raw_event_map *btp;
struct bpf_prog *prog;
char tp_name[128];
int tp_fd, err;
if (strncpy_from_user(tp_name, u64_to_user_ptr(attr->raw_tracepoint.name),
sizeof(tp_name) - 1) < 0)
return -EFAULT;
tp_name[sizeof(tp_name) - 1] = 0;
btp = bpf_find_raw_tracepoint(tp_name);
if (!btp)
return -ENOENT;
raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER);
if (!raw_tp)
return -ENOMEM;
raw_tp->btp = btp;
prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
BPF_PROG_TYPE_RAW_TRACEPOINT);
if (IS_ERR(prog)) {
err = PTR_ERR(prog);
goto out_free_tp;
}
err = bpf_probe_register(raw_tp->btp, prog);
if (err)
goto out_put_prog;
raw_tp->prog = prog;
tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp,
O_CLOEXEC);
if (tp_fd < 0) {
bpf_probe_unregister(raw_tp->btp, prog);
err = tp_fd;
goto out_put_prog;
}
return tp_fd;
out_put_prog:
bpf_prog_put(prog);
out_free_tp:
kfree(raw_tp);
return err;
}
#ifdef CONFIG_CGROUP_BPF
#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
......@@ -1925,6 +2000,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_OBJ_GET_INFO_BY_FD:
err = bpf_obj_get_info_by_fd(&attr, uattr);
break;
case BPF_RAW_TRACEPOINT_OPEN:
err = bpf_raw_tracepoint_open(&attr);
break;
default:
err = -EINVAL;
break;
......
......@@ -735,6 +735,86 @@ static const struct bpf_func_proto *pe_prog_func_proto(enum bpf_func_id func_id)
}
}
/*
* bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
* to avoid potential recursive reuse issue when/if tracepoints are added
* inside bpf_*_event_output and/or bpf_get_stack_id
*/
static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs);
BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
struct bpf_map *, map, u64, flags, void *, data, u64, size)
{
struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
perf_fetch_caller_regs(regs);
return ____bpf_perf_event_output(regs, map, flags, data, size);
}
static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
.func = bpf_perf_event_output_raw_tp,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
struct bpf_map *, map, u64, flags)
{
struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
perf_fetch_caller_regs(regs);
/* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */
return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
flags, 0, 0);
}
static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
.func = bpf_get_stackid_raw_tp,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
};
static const struct bpf_func_proto *raw_tp_prog_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
case BPF_FUNC_perf_event_output:
return &bpf_perf_event_output_proto_raw_tp;
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto_raw_tp;
default:
return tracing_func_proto(func_id);
}
}
static bool raw_tp_prog_is_valid_access(int off, int size,
enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
/* largest tracepoint in the kernel has 12 args */
if (off < 0 || off >= sizeof(__u64) * 12)
return false;
if (type != BPF_READ)
return false;
if (off % size != 0)
return false;
return true;
}
const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
.get_func_proto = raw_tp_prog_func_proto,
.is_valid_access = raw_tp_prog_is_valid_access,
};
const struct bpf_prog_ops raw_tracepoint_prog_ops = {
};
static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
......@@ -908,3 +988,106 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
return ret;
}
extern struct bpf_raw_event_map __start__bpf_raw_tp[];
extern struct bpf_raw_event_map __stop__bpf_raw_tp[];
struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name)
{
struct bpf_raw_event_map *btp = __start__bpf_raw_tp;
for (; btp < __stop__bpf_raw_tp; btp++) {
if (!strcmp(btp->tp->name, name))
return btp;
}
return NULL;
}
static __always_inline
void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
{
rcu_read_lock();
preempt_disable();
(void) BPF_PROG_RUN(prog, args);
preempt_enable();
rcu_read_unlock();
}
#define UNPACK(...) __VA_ARGS__
#define REPEAT_1(FN, DL, X, ...) FN(X)
#define REPEAT_2(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__)
#define REPEAT_3(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__)
#define REPEAT_4(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__)
#define REPEAT_5(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__)
#define REPEAT_6(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__)
#define REPEAT_7(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__)
#define REPEAT_8(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__)
#define REPEAT_9(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__)
#define REPEAT_10(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_9(FN, DL, __VA_ARGS__)
#define REPEAT_11(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_10(FN, DL, __VA_ARGS__)
#define REPEAT_12(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_11(FN, DL, __VA_ARGS__)
#define REPEAT(X, FN, DL, ...) REPEAT_##X(FN, DL, __VA_ARGS__)
#define SARG(X) u64 arg##X
#define COPY(X) args[X] = arg##X
#define __DL_COM (,)
#define __DL_SEM (;)
#define __SEQ_0_11 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
#define BPF_TRACE_DEFN_x(x) \
void bpf_trace_run##x(struct bpf_prog *prog, \
REPEAT(x, SARG, __DL_COM, __SEQ_0_11)) \
{ \
u64 args[x]; \
REPEAT(x, COPY, __DL_SEM, __SEQ_0_11); \
__bpf_trace_run(prog, args); \
} \
EXPORT_SYMBOL_GPL(bpf_trace_run##x)
BPF_TRACE_DEFN_x(1);
BPF_TRACE_DEFN_x(2);
BPF_TRACE_DEFN_x(3);
BPF_TRACE_DEFN_x(4);
BPF_TRACE_DEFN_x(5);
BPF_TRACE_DEFN_x(6);
BPF_TRACE_DEFN_x(7);
BPF_TRACE_DEFN_x(8);
BPF_TRACE_DEFN_x(9);
BPF_TRACE_DEFN_x(10);
BPF_TRACE_DEFN_x(11);
BPF_TRACE_DEFN_x(12);
static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
{
struct tracepoint *tp = btp->tp;
/*
* check that program doesn't access arguments beyond what's
* available in this tracepoint
*/
if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
return -EINVAL;
return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog);
}
int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
{
int err;
mutex_lock(&bpf_event_mutex);
err = __bpf_probe_register(btp, prog);
mutex_unlock(&bpf_event_mutex);
return err;
}
int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
{
int err;
mutex_lock(&bpf_event_mutex);
err = tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
mutex_unlock(&bpf_event_mutex);
return err;
}
......@@ -33,7 +33,7 @@
/* Tracing for driver callbacks */
DECLARE_EVENT_CLASS(local_only_evt,
DECLARE_EVENT_CLASS(local_only_evt4,
TP_PROTO(struct ieee802154_local *local),
TP_ARGS(local),
TP_STRUCT__entry(
......@@ -45,7 +45,7 @@ DECLARE_EVENT_CLASS(local_only_evt,
TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG)
);
DEFINE_EVENT(local_only_evt, 802154_drv_return_void,
DEFINE_EVENT(local_only_evt4, 802154_drv_return_void,
TP_PROTO(struct ieee802154_local *local),
TP_ARGS(local)
);
......@@ -65,12 +65,12 @@ TRACE_EVENT(802154_drv_return_int,
__entry->ret)
);
DEFINE_EVENT(local_only_evt, 802154_drv_start,
DEFINE_EVENT(local_only_evt4, 802154_drv_start,
TP_PROTO(struct ieee802154_local *local),
TP_ARGS(local)
);
DEFINE_EVENT(local_only_evt, 802154_drv_stop,
DEFINE_EVENT(local_only_evt4, 802154_drv_stop,
TP_PROTO(struct ieee802154_local *local),
TP_ARGS(local)
);
......
......@@ -3137,7 +3137,7 @@ TRACE_EVENT(rdev_start_radar_detection,
TRACE_EVENT(rdev_set_mcast_rate,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
int mcast_rate[NUM_NL80211_BANDS]),
int *mcast_rate),
TP_ARGS(wiphy, netdev, mcast_rate),
TP_STRUCT__entry(
WIPHY_ENTRY
......
......@@ -119,6 +119,7 @@ always += offwaketime_kern.o
always += spintest_kern.o
always += map_perf_test_kern.o
always += test_overhead_tp_kern.o
always += test_overhead_raw_tp_kern.o
always += test_overhead_kprobe_kern.o
always += parse_varlen.o parse_simple.o parse_ldabs.o
always += test_cgrp2_tc_kern.o
......
......@@ -61,6 +61,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
bool is_raw_tracepoint = strncmp(event, "raw_tracepoint/", 15) == 0;
bool is_xdp = strncmp(event, "xdp", 3) == 0;
bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
......@@ -85,6 +86,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
prog_type = BPF_PROG_TYPE_KPROBE;
} else if (is_tracepoint) {
prog_type = BPF_PROG_TYPE_TRACEPOINT;
} else if (is_raw_tracepoint) {
prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT;
} else if (is_xdp) {
prog_type = BPF_PROG_TYPE_XDP;
} else if (is_perf_event) {
......@@ -131,6 +134,16 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
return populate_prog_array(event, fd);
}
if (is_raw_tracepoint) {
efd = bpf_raw_tracepoint_open(event + 15, fd);
if (efd < 0) {
printf("tracepoint %s %s\n", event + 15, strerror(errno));
return -1;
}
event_fd[prog_cnt - 1] = efd;
return 0;
}
if (is_kprobe || is_kretprobe) {
if (is_kprobe)
event += 7;
......@@ -587,6 +600,7 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
if (memcmp(shname, "kprobe/", 7) == 0 ||
memcmp(shname, "kretprobe/", 10) == 0 ||
memcmp(shname, "tracepoint/", 11) == 0 ||
memcmp(shname, "raw_tracepoint/", 15) == 0 ||
memcmp(shname, "xdp", 3) == 0 ||
memcmp(shname, "perf_event", 10) == 0 ||
memcmp(shname, "socket", 6) == 0 ||
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018 Facebook */
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"
SEC("raw_tracepoint/task_rename")
int prog(struct bpf_raw_tracepoint_args *ctx)
{
return 0;
}
SEC("raw_tracepoint/urandom_read")
int prog2(struct bpf_raw_tracepoint_args *ctx)
{
return 0;
}
char _license[] SEC("license") = "GPL";
......@@ -158,5 +158,17 @@ int main(int argc, char **argv)
unload_progs();
}
if (test_flags & 0xC0) {
snprintf(filename, sizeof(filename),
"%s_raw_tp_kern.o", argv[0]);
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
return 1;
}
printf("w/RAW_TRACEPOINT\n");
run_perf_test(num_cpu, test_flags >> 6);
unload_progs();
}
return 0;
}
......@@ -43,15 +43,10 @@ struct aa_buffers {
DECLARE_PER_CPU(struct aa_buffers, aa_buffers);
#define COUNT_ARGS(X...) COUNT_ARGS_HELPER(, ##X, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
#define COUNT_ARGS_HELPER(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, n, X...) n
#define CONCAT(X, Y) X ## Y
#define CONCAT_AFTER(X, Y) CONCAT(X, Y)
#define ASSIGN(FN, X, N) ((X) = FN(N))
#define EVAL1(FN, X) ASSIGN(FN, X, 0) /*X = FN(0)*/
#define EVAL2(FN, X, Y...) do { ASSIGN(FN, X, 1); EVAL1(FN, Y); } while (0)
#define EVAL(FN, X...) CONCAT_AFTER(EVAL, COUNT_ARGS(X))(FN, X)
#define EVAL(FN, X...) CONCATENATE(EVAL, COUNT_ARGS(X))(FN, X)
#define for_each_cpu_buffer(I) for ((I) = 0; (I) < MAX_PATH_BUFFERS; (I)++)
......
......@@ -14,7 +14,7 @@
#include <linux/tracepoint.h>
TRACE_EVENT(in_packet,
TP_PROTO(const struct amdtp_stream *s, u32 cycles, u32 cip_header[2], unsigned int payload_length, unsigned int index),
TP_PROTO(const struct amdtp_stream *s, u32 cycles, u32 *cip_header, unsigned int payload_length, unsigned int index),
TP_ARGS(s, cycles, cip_header, payload_length, index),
TP_STRUCT__entry(
__field(unsigned int, second)
......
......@@ -94,6 +94,7 @@ enum bpf_cmd {
BPF_MAP_GET_FD_BY_ID,
BPF_OBJ_GET_INFO_BY_FD,
BPF_PROG_QUERY,
BPF_RAW_TRACEPOINT_OPEN,
};
enum bpf_map_type {
......@@ -134,6 +135,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SK_SKB,
BPF_PROG_TYPE_CGROUP_DEVICE,
BPF_PROG_TYPE_SK_MSG,
BPF_PROG_TYPE_RAW_TRACEPOINT,
};
enum bpf_attach_type {
......@@ -344,6 +346,11 @@ union bpf_attr {
__aligned_u64 prog_ids;
__u32 prog_cnt;
} query;
struct {
__u64 name;
__u32 prog_fd;
} raw_tracepoint;
} __attribute__((aligned(8)));
/* BPF helper function descriptions:
......@@ -1151,4 +1158,8 @@ struct bpf_cgroup_dev_ctx {
__u32 minor;
};
struct bpf_raw_tracepoint_args {
__u64 args[0];
};
#endif /* _UAPI__LINUX_BPF_H__ */
......@@ -428,6 +428,17 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
return err;
}
int bpf_raw_tracepoint_open(const char *name, int prog_fd)
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
attr.raw_tracepoint.name = ptr_to_u64(name);
attr.raw_tracepoint.prog_fd = prog_fd;
return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
}
int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
{
struct sockaddr_nl sa;
......
......@@ -79,4 +79,5 @@ int bpf_map_get_fd_by_id(__u32 id);
int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
__u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt);
int bpf_raw_tracepoint_open(const char *name, int prog_fd);
#endif
......@@ -877,7 +877,7 @@ static void test_stacktrace_map()
err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
goto out;
return;
/* Get the ID for the sched/sched_switch tracepoint */
snprintf(buf, sizeof(buf),
......@@ -888,8 +888,7 @@ static void test_stacktrace_map()
bytes = read(efd, buf, sizeof(buf));
close(efd);
if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
"read", "bytes %d errno %d\n", bytes, errno))
if (bytes <= 0 || bytes >= sizeof(buf))
goto close_prog;
/* Open the perf event and attach bpf progrram */
......@@ -906,29 +905,24 @@ static void test_stacktrace_map()
goto close_prog;
err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
err, errno))
goto close_pmu;
if (err)
goto disable_pmu;
err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
err, errno))
if (err)
goto disable_pmu;
/* find map fds */
control_map_fd = bpf_find_map(__func__, obj, "control_map");
if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
"err %d errno %d\n", err, errno))
if (control_map_fd < 0)
goto disable_pmu;
stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
"err %d errno %d\n", err, errno))
if (stackid_hmap_fd < 0)
goto disable_pmu;
stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
err, errno))
if (stackmap_fd < 0)
goto disable_pmu;
/* give some time for bpf program run */
......@@ -945,24 +939,78 @@ static void test_stacktrace_map()
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
"err %d errno %d\n", err, errno))
goto disable_pmu;
goto disable_pmu_noerr;
err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
"err %d errno %d\n", err, errno))
; /* fall through */
goto disable_pmu_noerr;
goto disable_pmu_noerr;
disable_pmu:
error_cnt++;
disable_pmu_noerr:
ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
close_pmu:
close(pmu_fd);
close_prog:
bpf_object__close(obj);
}
out:
static void test_stacktrace_map_raw_tp()
{
int control_map_fd, stackid_hmap_fd, stackmap_fd;
const char *file = "./test_stacktrace_map.o";
int efd, err, prog_fd;
__u32 key, val, duration = 0;
struct bpf_object *obj;
err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
return;
efd = bpf_raw_tracepoint_open("sched_switch", prog_fd);
if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
goto close_prog;
/* find map fds */
control_map_fd = bpf_find_map(__func__, obj, "control_map");
if (control_map_fd < 0)
goto close_prog;
stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
if (stackid_hmap_fd < 0)
goto close_prog;
stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
if (stackmap_fd < 0)
goto close_prog;
/* give some time for bpf program run */
sleep(1);
/* disable stack trace collection */
key = 0;
val = 1;
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
* in stackmap, and vise versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
"err %d errno %d\n", err, errno))
goto close_prog;
err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
"err %d errno %d\n", err, errno))
goto close_prog;
goto close_prog_noerr;
close_prog:
error_cnt++;
close_prog_noerr:
bpf_object__close(obj);
}
static int extract_build_id(char *build_id, size_t size)
......@@ -1138,6 +1186,7 @@ int main(void)
test_tp_attach_query();
test_stacktrace_map();
test_stacktrace_build_id();
test_stacktrace_map_raw_tp();
printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment