eprobes: Remove redundant event type information

Currently, the event probes save the type of the event they are attached
to when recording the event. For example:

  # echo 'e:switch sched/sched_switch prev_state=$prev_state prev_prio=$prev_prio next_pid=$next_pid next_prio=$next_prio' > dynamic_events
  # cat events/eprobes/switch/format

 name: switch
 ID: 1717
 format:
        field:unsigned short common_type;       offset:0;       size:2; signed:0;
        field:unsigned char common_flags;       offset:2;       size:1; signed:0;
        field:unsigned char common_preempt_count;       offset:3;       size:1; signed:0;
        field:int common_pid;   offset:4;       size:4; signed:1;

        field:unsigned int __probe_type;        offset:8;       size:4; signed:0;
        field:u64 prev_state;   offset:12;      size:8; signed:0;
        field:u64 prev_prio;    offset:20;      size:8; signed:0;
        field:u64 next_pid;     offset:28;      size:8; signed:0;
        field:u64 next_prio;    offset:36;      size:8; signed:0;

 print fmt: "(%u) prev_state=0x%Lx prev_prio=0x%Lx next_pid=0x%Lx next_prio=0x%Lx", REC->__probe_type, REC->prev_state, REC->prev_prio, REC->next_pid, REC->next_prio

The __probe_type adds 4 bytes to every event.

One of the reasons for creating eprobes is to limit what is traced in an
event to be able to limit what is written into the ring buffer. Having
this redundant 4 bytes to every event takes away from this.

The event that is recorded can be retrieved from the event probe itself,
that is available when the trace is happening. For user space tools, it
could simply read the dynamic_event file to find the event they are for.
So there is really no reason to write this information into the ring
buffer for every event.

Link: https://lkml.kernel.org/r/20220218190057.2f5a19a8@gandalf.local.homeAcked-by: default avatarMasami Hiramatsu <mhiramat@kernel.org>
Reviewed-by: default avatarJoel Fernandes <joel@joelfernandes.org>
Signed-off-by: default avatarSteven Rostedt (Google) <rostedt@goodmis.org>
parent 302e9edd
......@@ -136,7 +136,6 @@ struct kprobe_trace_entry_head {
struct eprobe_trace_entry_head {
struct trace_entry ent;
unsigned int type;
};
struct kretprobe_trace_entry_head {
......
......@@ -242,7 +242,6 @@ static int trace_eprobe_tp_arg_update(struct trace_eprobe *ep, int i)
static int eprobe_event_define_fields(struct trace_event_call *event_call)
{
int ret;
struct eprobe_trace_entry_head field;
struct trace_probe *tp;
......@@ -250,8 +249,6 @@ static int eprobe_event_define_fields(struct trace_event_call *event_call)
if (WARN_ON_ONCE(!tp))
return -ENOENT;
DEFINE_FIELD(unsigned int, type, FIELD_STRING_TYPE, 0);
return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
}
......@@ -270,7 +267,9 @@ print_eprobe_event(struct trace_iterator *iter, int flags,
struct trace_event_call *pevent;
struct trace_event *probed_event;
struct trace_seq *s = &iter->seq;
struct trace_eprobe *ep;
struct trace_probe *tp;
unsigned int type;
field = (struct eprobe_trace_entry_head *)iter->ent;
tp = trace_probe_primary_from_call(
......@@ -278,15 +277,18 @@ print_eprobe_event(struct trace_iterator *iter, int flags,
if (WARN_ON_ONCE(!tp))
goto out;
ep = container_of(tp, struct trace_eprobe, tp);
type = ep->event->event.type;
trace_seq_printf(s, "%s: (", trace_probe_name(tp));
probed_event = ftrace_find_event(field->type);
probed_event = ftrace_find_event(type);
if (probed_event) {
pevent = container_of(probed_event, struct trace_event_call, event);
trace_seq_printf(s, "%s.%s", pevent->class->system,
trace_event_name(pevent));
} else {
trace_seq_printf(s, "%u", field->type);
trace_seq_printf(s, "%u", type);
}
trace_seq_putc(s, ')');
......@@ -498,10 +500,6 @@ __eprobe_trace_func(struct eprobe_data *edata, void *rec)
return;
entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
if (edata->ep->event)
entry->type = edata->ep->event->event.type;
else
entry->type = 0;
store_trace_args(&entry[1], &edata->ep->tp, rec, sizeof(*entry), dsize);
trace_event_buffer_commit(&fbuffer);
......
......@@ -871,15 +871,15 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
switch (ptype) {
case PROBE_PRINT_NORMAL:
fmt = "(%lx)";
arg = "REC->" FIELD_STRING_IP;
arg = ", REC->" FIELD_STRING_IP;
break;
case PROBE_PRINT_RETURN:
fmt = "(%lx <- %lx)";
arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
arg = ", REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
break;
case PROBE_PRINT_EVENT:
fmt = "(%u)";
arg = "REC->" FIELD_STRING_TYPE;
fmt = "";
arg = "";
break;
default:
WARN_ON_ONCE(1);
......@@ -903,7 +903,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
parg->type->fmt);
}
pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", arg);
for (i = 0; i < tp->nr_args; i++) {
parg = tp->args + i;
......
......@@ -38,7 +38,6 @@
#define FIELD_STRING_IP "__probe_ip"
#define FIELD_STRING_RETIP "__probe_ret_ip"
#define FIELD_STRING_FUNC "__probe_func"
#define FIELD_STRING_TYPE "__probe_type"
#undef DEFINE_FIELD
#define DEFINE_FIELD(type, item, name, is_signed) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment