Commit 4afdc00c authored by Namhyung Kim's avatar Namhyung Kim Committed by Arnaldo Carvalho de Melo

perf lock contention: Constify control data for BPF

The control knobs set before loading BPF programs should be declared as
'const volatile' so that it can be optimized by the BPF core.

Committer testing:

  root@x1:~# perf lock contention --use-bpf
   contended   total wait     max wait     avg wait         type   caller

           5     31.57 us     14.93 us      6.31 us        mutex   btrfs_delayed_update_inode+0x43
           1     16.91 us     16.91 us     16.91 us      rwsem:R   btrfs_tree_read_lock_nested+0x1b
           1     15.13 us     15.13 us     15.13 us     spinlock   btrfs_getattr+0xd1
           1      6.65 us      6.65 us      6.65 us      rwsem:R   btrfs_tree_read_lock_nested+0x1b
           1      4.34 us      4.34 us      4.34 us     spinlock   process_one_work+0x1a9
  root@x1:~#
  root@x1:~# perf trace -e bpf --max-events 10 perf lock contention --use-bpf
       0.000 ( 0.013 ms): :2948281/2948281 bpf(cmd: 36, uattr: 0x7ffd5f12d730, size: 8)          = -1 EOPNOTSUPP (Operation not supported)
       0.024 ( 0.120 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d460, size: 148) = 16
       0.158 ( 0.034 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d520, size: 148) = 16
      26.653 ( 0.154 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d3d0, size: 148)     = 16
      26.825 ( 0.014 ms): perf/2948281 bpf(uattr: 0x7ffd5f12d580, size: 80)                      = 16
      87.924 ( 0.038 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d400, size: 40)       = 16
      87.988 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d470, size: 40)       = 16
      88.019 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d250, size: 40)       = 16
      88.029 ( 0.172 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d320, size: 148)     = 17
      88.217 ( 0.005 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d4d0, size: 40)       = 16
  root@x1:~#
Signed-off-by: default avatarNamhyung Kim <namhyung@kernel.org>
Tested-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20240902200515.2103769-5-namhyung@kernel.orgSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 066fd840
...@@ -46,14 +46,22 @@ int lock_contention_prepare(struct lock_contention *con) ...@@ -46,14 +46,22 @@ int lock_contention_prepare(struct lock_contention *con)
else else
bpf_map__set_max_entries(skel->maps.stacks, 1); bpf_map__set_max_entries(skel->maps.stacks, 1);
if (target__has_cpu(target)) if (target__has_cpu(target)) {
skel->rodata->has_cpu = 1;
ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
if (target__has_task(target)) }
if (target__has_task(target)) {
skel->rodata->has_task = 1;
ntasks = perf_thread_map__nr(evlist->core.threads); ntasks = perf_thread_map__nr(evlist->core.threads);
if (con->filters->nr_types) }
if (con->filters->nr_types) {
skel->rodata->has_type = 1;
ntypes = con->filters->nr_types; ntypes = con->filters->nr_types;
if (con->filters->nr_cgrps) }
if (con->filters->nr_cgrps) {
skel->rodata->has_cgroup = 1;
ncgrps = con->filters->nr_cgrps; ncgrps = con->filters->nr_cgrps;
}
/* resolve lock name filters to addr */ /* resolve lock name filters to addr */
if (con->filters->nr_syms) { if (con->filters->nr_syms) {
...@@ -82,6 +90,7 @@ int lock_contention_prepare(struct lock_contention *con) ...@@ -82,6 +90,7 @@ int lock_contention_prepare(struct lock_contention *con)
con->filters->addrs = addrs; con->filters->addrs = addrs;
} }
naddrs = con->filters->nr_addrs; naddrs = con->filters->nr_addrs;
skel->rodata->has_addr = 1;
} }
bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
...@@ -90,6 +99,16 @@ int lock_contention_prepare(struct lock_contention *con) ...@@ -90,6 +99,16 @@ int lock_contention_prepare(struct lock_contention *con)
bpf_map__set_max_entries(skel->maps.addr_filter, naddrs); bpf_map__set_max_entries(skel->maps.addr_filter, naddrs);
bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps); bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps);
skel->rodata->stack_skip = con->stack_skip;
skel->rodata->aggr_mode = con->aggr_mode;
skel->rodata->needs_callstack = con->save_callstack;
skel->rodata->lock_owner = con->owner;
if (con->aggr_mode == LOCK_AGGR_CGROUP || con->filters->nr_cgrps) {
if (cgroup_is_v2("perf_event"))
skel->rodata->use_cgroup_v2 = 1;
}
if (lock_contention_bpf__load(skel) < 0) { if (lock_contention_bpf__load(skel) < 0) {
pr_err("Failed to load lock-contention BPF skeleton\n"); pr_err("Failed to load lock-contention BPF skeleton\n");
return -1; return -1;
...@@ -99,7 +118,6 @@ int lock_contention_prepare(struct lock_contention *con) ...@@ -99,7 +118,6 @@ int lock_contention_prepare(struct lock_contention *con)
u32 cpu; u32 cpu;
u8 val = 1; u8 val = 1;
skel->bss->has_cpu = 1;
fd = bpf_map__fd(skel->maps.cpu_filter); fd = bpf_map__fd(skel->maps.cpu_filter);
for (i = 0; i < ncpus; i++) { for (i = 0; i < ncpus; i++) {
...@@ -112,7 +130,6 @@ int lock_contention_prepare(struct lock_contention *con) ...@@ -112,7 +130,6 @@ int lock_contention_prepare(struct lock_contention *con)
u32 pid; u32 pid;
u8 val = 1; u8 val = 1;
skel->bss->has_task = 1;
fd = bpf_map__fd(skel->maps.task_filter); fd = bpf_map__fd(skel->maps.task_filter);
for (i = 0; i < ntasks; i++) { for (i = 0; i < ntasks; i++) {
...@@ -125,7 +142,6 @@ int lock_contention_prepare(struct lock_contention *con) ...@@ -125,7 +142,6 @@ int lock_contention_prepare(struct lock_contention *con)
u32 pid = evlist->workload.pid; u32 pid = evlist->workload.pid;
u8 val = 1; u8 val = 1;
skel->bss->has_task = 1;
fd = bpf_map__fd(skel->maps.task_filter); fd = bpf_map__fd(skel->maps.task_filter);
bpf_map_update_elem(fd, &pid, &val, BPF_ANY); bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
} }
...@@ -133,7 +149,6 @@ int lock_contention_prepare(struct lock_contention *con) ...@@ -133,7 +149,6 @@ int lock_contention_prepare(struct lock_contention *con)
if (con->filters->nr_types) { if (con->filters->nr_types) {
u8 val = 1; u8 val = 1;
skel->bss->has_type = 1;
fd = bpf_map__fd(skel->maps.type_filter); fd = bpf_map__fd(skel->maps.type_filter);
for (i = 0; i < con->filters->nr_types; i++) for (i = 0; i < con->filters->nr_types; i++)
...@@ -143,7 +158,6 @@ int lock_contention_prepare(struct lock_contention *con) ...@@ -143,7 +158,6 @@ int lock_contention_prepare(struct lock_contention *con)
if (con->filters->nr_addrs) { if (con->filters->nr_addrs) {
u8 val = 1; u8 val = 1;
skel->bss->has_addr = 1;
fd = bpf_map__fd(skel->maps.addr_filter); fd = bpf_map__fd(skel->maps.addr_filter);
for (i = 0; i < con->filters->nr_addrs; i++) for (i = 0; i < con->filters->nr_addrs; i++)
...@@ -153,25 +167,14 @@ int lock_contention_prepare(struct lock_contention *con) ...@@ -153,25 +167,14 @@ int lock_contention_prepare(struct lock_contention *con)
if (con->filters->nr_cgrps) { if (con->filters->nr_cgrps) {
u8 val = 1; u8 val = 1;
skel->bss->has_cgroup = 1;
fd = bpf_map__fd(skel->maps.cgroup_filter); fd = bpf_map__fd(skel->maps.cgroup_filter);
for (i = 0; i < con->filters->nr_cgrps; i++) for (i = 0; i < con->filters->nr_cgrps; i++)
bpf_map_update_elem(fd, &con->filters->cgrps[i], &val, BPF_ANY); bpf_map_update_elem(fd, &con->filters->cgrps[i], &val, BPF_ANY);
} }
/* these don't work well if in the rodata section */ if (con->aggr_mode == LOCK_AGGR_CGROUP)
skel->bss->stack_skip = con->stack_skip;
skel->bss->aggr_mode = con->aggr_mode;
skel->bss->needs_callstack = con->save_callstack;
skel->bss->lock_owner = con->owner;
if (con->aggr_mode == LOCK_AGGR_CGROUP) {
if (cgroup_is_v2("perf_event"))
skel->bss->use_cgroup_v2 = 1;
read_all_cgroups(&con->cgroups); read_all_cgroups(&con->cgroups);
}
bpf_program__set_autoload(skel->progs.collect_lock_syms, false); bpf_program__set_autoload(skel->progs.collect_lock_syms, false);
......
...@@ -117,21 +117,22 @@ struct mm_struct___new { ...@@ -117,21 +117,22 @@ struct mm_struct___new {
} __attribute__((preserve_access_index)); } __attribute__((preserve_access_index));
/* control flags */ /* control flags */
int enabled; const volatile int has_cpu;
int has_cpu; const volatile int has_task;
int has_task; const volatile int has_type;
int has_type; const volatile int has_addr;
int has_addr; const volatile int has_cgroup;
int has_cgroup; const volatile int needs_callstack;
int needs_callstack; const volatile int stack_skip;
int stack_skip; const volatile int lock_owner;
int lock_owner; const volatile int use_cgroup_v2;
int use_cgroup_v2;
int perf_subsys_id = -1;
/* determine the key of lock stat */ /* determine the key of lock stat */
int aggr_mode; const volatile int aggr_mode;
int enabled;
int perf_subsys_id = -1;
__u64 end_ts; __u64 end_ts;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment