Commit 8b3b1bb3 authored by Namhyung Kim's avatar Namhyung Kim Committed by Arnaldo Carvalho de Melo

perf record offcpu: Constify control data for BPF

The control knobs set before loading BPF programs should be declared as
'const volatile' so that it can be optimized by the BPF core.

Committer testing:

  root@x1:~# perf record --off-cpu
  ^C[ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 1.807 MB perf.data (5645 samples) ]

  root@x1:~# perf evlist
  cpu_atom/cycles/P
  cpu_core/cycles/P
  offcpu-time
  dummy:u
  root@x1:~# perf evlist -v
  cpu_atom/cycles/P: type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0xa00000000, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD|IDENTIFIER, read_format: ID|LOST, disabled: 1, inherit: 1, freq: 1, precise_ip: 3, sample_id_all: 1
  cpu_core/cycles/P: type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0x400000000, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD|IDENTIFIER, read_format: ID|LOST, disabled: 1, inherit: 1, freq: 1, precise_ip: 3, sample_id_all: 1
  offcpu-time: type: 1 (software), size: 136, config: 0xa (PERF_COUNT_SW_BPF_OUTPUT), { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|IDENTIFIER, read_format: ID|LOST, disabled: 1, inherit: 1, freq: 1, sample_id_all: 1
  dummy:u: type: 1 (software), size: 136, config: 0x9 (PERF_COUNT_SW_DUMMY), { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CPU|IDENTIFIER, read_format: ID|LOST, inherit: 1, exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1
  root@x1:~# perf trace -e bpf --max-events 5 perf record --off-cpu
       0.000 ( 0.015 ms): :2949124/2949124 bpf(cmd: 36, uattr: 0x7ffefc6dbe30, size: 8)          = -1 EOPNOTSUPP (Operation not supported)
       0.031 ( 0.115 ms): :2949124/2949124 bpf(cmd: PROG_LOAD, uattr: 0x7ffefc6dbb60, size: 148) = 14
       0.159 ( 0.037 ms): :2949124/2949124 bpf(cmd: PROG_LOAD, uattr: 0x7ffefc6dbc20, size: 148) = 14
      23.868 ( 0.144 ms): perf/2949124 bpf(cmd: PROG_LOAD, uattr: 0x7ffefc6dbad0, size: 148)     = 14
      24.027 ( 0.014 ms): perf/2949124 bpf(uattr: 0x7ffefc6dbc80, size: 80)                      = 14
  root@x1:~#
Signed-off-by: default avatarNamhyung Kim <namhyung@kernel.org>
Tested-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20240902200515.2103769-6-namhyung@kernel.orgSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 4afdc00c
...@@ -73,14 +73,12 @@ static void off_cpu_start(void *arg) ...@@ -73,14 +73,12 @@ static void off_cpu_start(void *arg)
struct evlist *evlist = arg; struct evlist *evlist = arg;
/* update task filter for the given workload */ /* update task filter for the given workload */
if (!skel->bss->has_cpu && !skel->bss->has_task && if (skel->rodata->has_task && skel->rodata->uses_tgid &&
perf_thread_map__pid(evlist->core.threads, 0) != -1) { perf_thread_map__pid(evlist->core.threads, 0) != -1) {
int fd; int fd;
u32 pid; u32 pid;
u8 val = 1; u8 val = 1;
skel->bss->has_task = 1;
skel->bss->uses_tgid = 1;
fd = bpf_map__fd(skel->maps.task_filter); fd = bpf_map__fd(skel->maps.task_filter);
pid = perf_thread_map__pid(evlist->core.threads, 0); pid = perf_thread_map__pid(evlist->core.threads, 0);
bpf_map_update_elem(fd, &pid, &val, BPF_ANY); bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
...@@ -148,6 +146,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, ...@@ -148,6 +146,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
if (target->cpu_list) { if (target->cpu_list) {
ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
skel->rodata->has_cpu = 1;
} }
if (target->pid) { if (target->pid) {
...@@ -173,11 +172,16 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, ...@@ -173,11 +172,16 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
ntasks = MAX_PROC; ntasks = MAX_PROC;
bpf_map__set_max_entries(skel->maps.task_filter, ntasks); bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
skel->rodata->has_task = 1;
skel->rodata->uses_tgid = 1;
} else if (target__has_task(target)) { } else if (target__has_task(target)) {
ntasks = perf_thread_map__nr(evlist->core.threads); ntasks = perf_thread_map__nr(evlist->core.threads);
bpf_map__set_max_entries(skel->maps.task_filter, ntasks); bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
skel->rodata->has_task = 1;
} else if (target__none(target)) { } else if (target__none(target)) {
bpf_map__set_max_entries(skel->maps.task_filter, MAX_PROC); bpf_map__set_max_entries(skel->maps.task_filter, MAX_PROC);
skel->rodata->has_task = 1;
skel->rodata->uses_tgid = 1;
} }
if (evlist__first(evlist)->cgrp) { if (evlist__first(evlist)->cgrp) {
...@@ -186,6 +190,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, ...@@ -186,6 +190,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
if (!cgroup_is_v2("perf_event")) if (!cgroup_is_v2("perf_event"))
skel->rodata->uses_cgroup_v1 = true; skel->rodata->uses_cgroup_v1 = true;
skel->rodata->has_cgroup = 1;
} }
if (opts->record_cgroup) { if (opts->record_cgroup) {
...@@ -208,7 +213,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, ...@@ -208,7 +213,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
u32 cpu; u32 cpu;
u8 val = 1; u8 val = 1;
skel->bss->has_cpu = 1;
fd = bpf_map__fd(skel->maps.cpu_filter); fd = bpf_map__fd(skel->maps.cpu_filter);
for (i = 0; i < ncpus; i++) { for (i = 0; i < ncpus; i++) {
...@@ -220,8 +224,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, ...@@ -220,8 +224,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
if (target->pid) { if (target->pid) {
u8 val = 1; u8 val = 1;
skel->bss->has_task = 1;
skel->bss->uses_tgid = 1;
fd = bpf_map__fd(skel->maps.task_filter); fd = bpf_map__fd(skel->maps.task_filter);
strlist__for_each_entry(pos, pid_slist) { strlist__for_each_entry(pos, pid_slist) {
...@@ -240,7 +242,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, ...@@ -240,7 +242,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
u32 pid; u32 pid;
u8 val = 1; u8 val = 1;
skel->bss->has_task = 1;
fd = bpf_map__fd(skel->maps.task_filter); fd = bpf_map__fd(skel->maps.task_filter);
for (i = 0; i < ntasks; i++) { for (i = 0; i < ntasks; i++) {
...@@ -253,7 +254,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, ...@@ -253,7 +254,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
struct evsel *evsel; struct evsel *evsel;
u8 val = 1; u8 val = 1;
skel->bss->has_cgroup = 1;
fd = bpf_map__fd(skel->maps.cgroup_filter); fd = bpf_map__fd(skel->maps.cgroup_filter);
evlist__for_each_entry(evlist, evsel) { evlist__for_each_entry(evlist, evsel) {
......
...@@ -85,10 +85,11 @@ struct task_struct___old { ...@@ -85,10 +85,11 @@ struct task_struct___old {
} __attribute__((preserve_access_index)); } __attribute__((preserve_access_index));
int enabled = 0; int enabled = 0;
int has_cpu = 0;
int has_task = 0; const volatile int has_cpu = 0;
int has_cgroup = 0; const volatile int has_task = 0;
int uses_tgid = 0; const volatile int has_cgroup = 0;
const volatile int uses_tgid = 0;
const volatile bool has_prev_state = false; const volatile bool has_prev_state = false;
const volatile bool needs_cgroup = false; const volatile bool needs_cgroup = false;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment