Commit 8c2accc8 authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo' of...

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

  - Allows BPF scriptlets specify arguments to be fetched using
    DWARF info, using a prologue generated at compile/build time (He Kuang, Wang Nan)

  - Allow attaching BPF scriptlets to module symbols (Wang Nan)

  - Allow attaching BPF scriptlets to userspace code using uprobe (Wang Nan)

  - BPF programs now can specify 'perf probe' tunables via its section name,
    separating key=val values using semicolons (Wang Nan)

Testing some of these new BPF features:

  Use case: get callchains when receiving SSL packets, filter then in the
            kernel, at arbitrary place.

  # cat ssl.bpf.c
  #define SEC(NAME) __attribute__((section(NAME), used))

  struct pt_regs;

  SEC("func=__inet_lookup_established hnum")
  int func(struct pt_regs *ctx, int err, unsigned short port)
  {
          return err == 0 && port == 443;
  }

  char _license[] SEC("license") = "GPL";
  int  _version   SEC("version") = LINUX_VERSION_CODE;
  #
  # perf record -a -g -e ssl.bpf.c
  ^C[ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.787 MB perf.data (3 samples) ]
  # perf script | head -30
  swapper     0 [000] 58783.268118: perf_bpf_probe:func: (ffffffff816a0f60) hnum=0x1bb
	 8a0f61 __inet_lookup_established (/lib/modules/4.3.0+/build/vmlinux)
	 896def ip_rcv_finish (/lib/modules/4.3.0+/build/vmlinux)
	 8976c2 ip_rcv (/lib/modules/4.3.0+/build/vmlinux)
	 855eba __netif_receive_skb_core (/lib/modules/4.3.0+/build/vmlinux)
	 8565d8 __netif_receive_skb (/lib/modules/4.3.0+/build/vmlinux)
	 8572a8 process_backlog (/lib/modules/4.3.0+/build/vmlinux)
	 856b11 net_rx_action (/lib/modules/4.3.0+/build/vmlinux)
	 2a284b __do_softirq (/lib/modules/4.3.0+/build/vmlinux)
	 2a2ba3 irq_exit (/lib/modules/4.3.0+/build/vmlinux)
	 96b7a4 do_IRQ (/lib/modules/4.3.0+/build/vmlinux)
	 969807 ret_from_intr (/lib/modules/4.3.0+/build/vmlinux)
	 2dede5 cpu_startup_entry (/lib/modules/4.3.0+/build/vmlinux)
	 95d5bc rest_init (/lib/modules/4.3.0+/build/vmlinux)
	1163ffa start_kernel ([kernel.vmlinux].init.text)
	11634d7 x86_64_start_reservations ([kernel.vmlinux].init.text)
	1163623 x86_64_start_kernel ([kernel.vmlinux].init.text)

  qemu-system-x86  9178 [003] 58785.792417: perf_bpf_probe:func: (ffffffff816a0f60) hnum=0x1bb
	 8a0f61 __inet_lookup_established (/lib/modules/4.3.0+/build/vmlinux)
	 896def ip_rcv_finish (/lib/modules/4.3.0+/build/vmlinux)
	 8976c2 ip_rcv (/lib/modules/4.3.0+/build/vmlinux)
	 855eba __netif_receive_skb_core (/lib/modules/4.3.0+/build/vmlinux)
	 8565d8 __netif_receive_skb (/lib/modules/4.3.0+/build/vmlinux)
	 856660 netif_receive_skb_internal (/lib/modules/4.3.0+/build/vmlinux)
	 8566ec netif_receive_skb_sk (/lib/modules/4.3.0+/build/vmlinux)
	   430a br_handle_frame_finish ([bridge])
	   48bc br_handle_frame ([bridge])
	 855f44 __netif_receive_skb_core (/lib/modules/4.3.0+/build/vmlinux)
	 8565d8 __netif_receive_skb (/lib/modules/4.3.0+/build/vmlinux)
  #

    Use 'perf probe' various options to list functions, see what variables can
    be collected at any given point, experiment first collecting without a filter,
    then filter, use it together with 'perf trace', 'perf top', with or without
    callchains, if it explodes, please tell us!

  - Introduce a new callchain mode: "folded", that will list per line
    representations of all callchains for a give histogram entry, facilitating
    'perf report' output processing by other tools, such as Brendan Gregg's
    flamegraph tools (Namhyung Kim)

  E.g:

 # perf report | grep -v ^# | head
    18.37%     0.00%  swapper  [kernel.kallsyms]   [k] cpu_startup_entry
                    |
                    ---cpu_startup_entry
                       |
                       |--12.07%--start_secondary
                       |
                        --6.30%--rest_init
                                  start_kernel
                                  x86_64_start_reservations
                                  x86_64_start_kernel
  #

 Becomes, in "folded" mode:

 # perf report -g folded | grep -v ^# | head -5
     18.37%     0.00%  swapper [kernel.kallsyms]   [k] cpu_startup_entry
   12.07% cpu_startup_entry;start_secondary
    6.30% cpu_startup_entry;rest_init;start_kernel;x86_64_start_reservations;x86_64_start_kernel
     16.90%     0.00%  swapper [kernel.kallsyms]   [k] call_cpuidle
   11.23% call_cpuidle;cpu_startup_entry;start_secondary
    5.67% call_cpuidle;cpu_startup_entry;rest_init;start_kernel;x86_64_start_reservations;x86_64_start_kernel
     16.90%     0.00%  swapper [kernel.kallsyms]   [k] cpuidle_enter
   11.23% cpuidle_enter;call_cpuidle;cpu_startup_entry;start_secondary
    5.67% cpuidle_enter;call_cpuidle;cpu_startup_entry;rest_init;start_kernel;x86_64_start_reservations;x86_64_start_kernel
     15.12%     0.00%  swapper [kernel.kallsyms]   [k] cpuidle_enter_state
  #

   The user can also select one of "count", "period" or "percent" as the first column.

Infrastructure changes:

  - Fix multiple leaks found with Valgrind and a refcount
    debugger (Masami Hiramatsu)

  - Add further 'perf test' entries for BPF and LLVM (Wang Nan)

  - Improve 'perf test' to suport subtests, so that the series of tests
    performed in the LLVM and BPF main tests appear in the default 'perf test'
    output (Wang Nan)

  - Move memdup() from tools/perf to tools/lib/string.c (Arnaldo Carvalho de Melo)

  - Adopt strtobool() from the kernel into tools/lib/ (Wang Nan)

  - Fix selftests_install tools/ Makefile rule (Kevin Hilman)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 90eec103 2c6caff2
......@@ -96,7 +96,7 @@ cgroup_install firewire_install hv_install lguest_install perf_install usb_insta
$(call descend,$(@:_install=),install)
selftests_install:
$(call descend,testing/$(@:_clean=),install)
$(call descend,testing/$(@:_install=),install)
turbostat_install x86_energy_perf_policy_install:
$(call descend,power/x86/$(@:_install=),install)
......
#ifndef _TOOLS_LINUX_STRING_H_
#define _TOOLS_LINUX_STRING_H_
#include <linux/types.h> /* for size_t */
void *memdup(const void *src, size_t len);
int strtobool(const char *s, bool *res);
#endif /* _LINUX_STRING_H_ */
......@@ -152,7 +152,11 @@ struct bpf_program {
} *reloc_desc;
int nr_reloc;
int fd;
struct {
int nr;
int *fds;
} instances;
bpf_program_prep_t preprocessor;
struct bpf_object *obj;
void *priv;
......@@ -206,10 +210,25 @@ struct bpf_object {
static void bpf_program__unload(struct bpf_program *prog)
{
int i;
if (!prog)
return;
zclose(prog->fd);
/*
* If the object is opened but the program was never loaded,
* it is possible that prog->instances.nr == -1.
*/
if (prog->instances.nr > 0) {
for (i = 0; i < prog->instances.nr; i++)
zclose(prog->instances.fds[i]);
} else if (prog->instances.nr != -1) {
pr_warning("Internal error: instances.nr is %d\n",
prog->instances.nr);
}
prog->instances.nr = -1;
zfree(&prog->instances.fds);
}
static void bpf_program__exit(struct bpf_program *prog)
......@@ -260,7 +279,8 @@ bpf_program__init(void *data, size_t size, char *name, int idx,
memcpy(prog->insns, data,
prog->insns_cnt * sizeof(struct bpf_insn));
prog->idx = idx;
prog->fd = -1;
prog->instances.fds = NULL;
prog->instances.nr = -1;
return 0;
errout:
......@@ -860,13 +880,73 @@ static int
bpf_program__load(struct bpf_program *prog,
char *license, u32 kern_version)
{
int err, fd;
int err = 0, fd, i;
if (prog->instances.nr < 0 || !prog->instances.fds) {
if (prog->preprocessor) {
pr_warning("Internal error: can't load program '%s'\n",
prog->section_name);
return -LIBBPF_ERRNO__INTERNAL;
}
prog->instances.fds = malloc(sizeof(int));
if (!prog->instances.fds) {
pr_warning("Not enough memory for BPF fds\n");
return -ENOMEM;
}
prog->instances.nr = 1;
prog->instances.fds[0] = -1;
}
if (!prog->preprocessor) {
if (prog->instances.nr != 1) {
pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n",
prog->section_name, prog->instances.nr);
}
err = load_program(prog->insns, prog->insns_cnt,
license, kern_version, &fd);
if (!err)
prog->fd = fd;
prog->instances.fds[0] = fd;
goto out;
}
for (i = 0; i < prog->instances.nr; i++) {
struct bpf_prog_prep_result result;
bpf_program_prep_t preprocessor = prog->preprocessor;
bzero(&result, sizeof(result));
err = preprocessor(prog, i, prog->insns,
prog->insns_cnt, &result);
if (err) {
pr_warning("Preprocessing the %dth instance of program '%s' failed\n",
i, prog->section_name);
goto out;
}
if (!result.new_insn_ptr || !result.new_insn_cnt) {
pr_debug("Skip loading the %dth instance of program '%s'\n",
i, prog->section_name);
prog->instances.fds[i] = -1;
if (result.pfd)
*result.pfd = -1;
continue;
}
err = load_program(result.new_insn_ptr,
result.new_insn_cnt,
license, kern_version, &fd);
if (err) {
pr_warning("Loading the %dth instance of program '%s' failed\n",
i, prog->section_name);
goto out;
}
if (result.pfd)
*result.pfd = fd;
prog->instances.fds[i] = fd;
}
out:
if (err)
pr_warning("failed to load program '%s'\n",
prog->section_name);
......@@ -1121,5 +1201,53 @@ const char *bpf_program__title(struct bpf_program *prog, bool needs_copy)
int bpf_program__fd(struct bpf_program *prog)
{
return prog->fd;
return bpf_program__nth_fd(prog, 0);
}
int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
bpf_program_prep_t prep)
{
int *instances_fds;
if (nr_instances <= 0 || !prep)
return -EINVAL;
if (prog->instances.nr > 0 || prog->instances.fds) {
pr_warning("Can't set pre-processor after loading\n");
return -EINVAL;
}
instances_fds = malloc(sizeof(int) * nr_instances);
if (!instances_fds) {
pr_warning("alloc memory failed for fds\n");
return -ENOMEM;
}
/* fill all fd with -1 */
memset(instances_fds, -1, sizeof(int) * nr_instances);
prog->instances.nr = nr_instances;
prog->instances.fds = instances_fds;
prog->preprocessor = prep;
return 0;
}
int bpf_program__nth_fd(struct bpf_program *prog, int n)
{
int fd;
if (n >= prog->instances.nr || n < 0) {
pr_warning("Can't get the %dth fd from program %s: only %d instances\n",
n, prog->section_name, prog->instances.nr);
return -EINVAL;
}
fd = prog->instances.fds[n];
if (fd < 0) {
pr_warning("%dth instance of program '%s' is invalid\n",
n, prog->section_name);
return -ENOENT;
}
return fd;
}
......@@ -88,6 +88,70 @@ const char *bpf_program__title(struct bpf_program *prog, bool needs_copy);
int bpf_program__fd(struct bpf_program *prog);
struct bpf_insn;
/*
* Libbpf allows callers to adjust BPF programs before being loaded
* into kernel. One program in an object file can be transform into
* multiple variants to be attached to different code.
*
* bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd
* are APIs for this propose.
*
* - bpf_program_prep_t:
* It defines 'preprocessor', which is a caller defined function
* passed to libbpf through bpf_program__set_prep(), and will be
* called before program is loaded. The processor should adjust
* the program one time for each instances according to the number
* passed to it.
*
* - bpf_program__set_prep:
* Attachs a preprocessor to a BPF program. The number of instances
* whould be created is also passed through this function.
*
* - bpf_program__nth_fd:
* After the program is loaded, get resuling fds from bpf program for
* each instances.
*
* If bpf_program__set_prep() is not used, the program whould be loaded
* without adjustment during bpf_object__load(). The program has only
* one instance. In this case bpf_program__fd(prog) is equal to
* bpf_program__nth_fd(prog, 0).
*/
struct bpf_prog_prep_result {
/*
* If not NULL, load new instruction array.
* If set to NULL, don't load this instance.
*/
struct bpf_insn *new_insn_ptr;
int new_insn_cnt;
/* If not NULL, result fd is set to it */
int *pfd;
};
/*
* Parameters of bpf_program_prep_t:
* - prog: The bpf_program being loaded.
* - n: Index of instance being generated.
* - insns: BPF instructions array.
* - insns_cnt:Number of instructions in insns.
* - res: Output parameter, result of transformation.
*
* Return value:
* - Zero: pre-processing success.
* - Non-zero: pre-processing, stop loading.
*/
typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
struct bpf_insn *insns, int insns_cnt,
struct bpf_prog_prep_result *res);
int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
bpf_program_prep_t prep);
int bpf_program__nth_fd(struct bpf_program *prog, int n);
/*
* We don't need __attribute__((packed)) now since it is
* unnecessary for 'bpf_map_def' because they are all aligned.
......
/*
* linux/tools/lib/string.c
*
* Copied from linux/lib/string.c, where it is:
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
* More specifically, the first copied function was strtobool, which
* was introduced by:
*
* d0f1fed29e6e ("Add a strtobool function matching semantics of existing in kernel equivalents")
* Author: Jonathan Cameron <jic23@cam.ac.uk>
*/
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <linux/string.h>
/**
* memdup - duplicate region of memory
*
* @src: memory region to duplicate
* @len: memory region length
*/
void *memdup(const void *src, size_t len)
{
void *p = malloc(len);
if (p)
memcpy(p, src, len);
return p;
}
/**
* strtobool - convert common user inputs into boolean values
* @s: input string
* @res: result
*
* This routine returns 0 iff the first character is one of 'Yy1Nn0'.
* Otherwise it will return -EINVAL. Value pointed to by res is
* updated upon finding a match.
*/
int strtobool(const char *s, bool *res)
{
switch (s[0]) {
case 'y':
case 'Y':
case '1':
*res = true;
break;
case 'n':
case 'N':
case '0':
*res = false;
break;
default:
return -EINVAL;
}
return 0;
}
......@@ -170,17 +170,18 @@ OPTIONS
Dump raw trace in ASCII.
-g::
--call-graph=<print_type,threshold[,print_limit],order,sort_key,branch>::
--call-graph=<print_type,threshold[,print_limit],order,sort_key[,branch],value>::
Display call chains using type, min percent threshold, print limit,
call order, sort key and branch. Note that ordering of parameters is not
fixed so any parement can be given in an arbitraty order. One exception
is the print_limit which should be preceded by threshold.
call order, sort key, optional branch and value. Note that ordering of
parameters is not fixed so any parement can be given in an arbitraty order.
One exception is the print_limit which should be preceded by threshold.
print_type can be either:
- flat: single column, linear exposure of call chains.
- graph: use a graph tree, displaying absolute overhead rates. (default)
- fractal: like graph, but displays relative rates. Each branch of
the tree is considered as a new profiled object.
- folded: call chains are displayed in a line, separated by semicolons
- none: disable call chain display.
threshold is a percentage value which specifies a minimum percent to be
......@@ -204,6 +205,11 @@ OPTIONS
- branch: include last branch information in callgraph when available.
Usually more convenient to use --branch-history for this.
value can be:
- percent: diplay overhead percent (default)
- period: display event period
- count: display event count
--children::
Accumulate callchain of children to parent entry so that then can
show up in the output. The output will have a new "Children" column
......
......@@ -22,6 +22,7 @@ tools/lib/api
tools/lib/bpf
tools/lib/hweight.c
tools/lib/rbtree.c
tools/lib/string.c
tools/lib/symbol/kallsyms.c
tools/lib/symbol/kallsyms.h
tools/lib/util/find_next_bit.c
......@@ -50,6 +51,7 @@ tools/include/linux/log2.h
tools/include/linux/poison.h
tools/include/linux/rbtree.h
tools/include/linux/rbtree_augmented.h
tools/include/linux/string.h
tools/include/linux/types.h
tools/include/linux/err.h
include/asm-generic/bitops/arch_hweight.h
......
......@@ -2,10 +2,10 @@
#define ARCH_TESTS_H
/* Tests */
int test__rdpmc(void);
int test__perf_time_to_tsc(void);
int test__insn_x86(void);
int test__intel_cqm_count_nmi_context(void);
int test__rdpmc(int subtest);
int test__perf_time_to_tsc(int subtest);
int test__insn_x86(int subtest);
int test__intel_cqm_count_nmi_context(int subtest);
#ifdef HAVE_DWARF_UNWIND_SUPPORT
struct thread;
......
......@@ -171,7 +171,7 @@ static int test_data_set(struct test_data *dat_set, int x86_64)
* verbose (-v) option to see all the instructions and whether or not they
* decoded successfuly.
*/
int test__insn_x86(void)
int test__insn_x86(int subtest __maybe_unused)
{
int ret = 0;
......
......@@ -33,7 +33,7 @@ static pid_t spawn(void)
* the last read counter value to avoid triggering a WARN_ON_ONCE() in
* smp_call_function_many() caused by sending IPIs from NMI context.
*/
int test__intel_cqm_count_nmi_context(void)
int test__intel_cqm_count_nmi_context(int subtest __maybe_unused)
{
struct perf_evlist *evlist = NULL;
struct perf_evsel *evsel = NULL;
......
......@@ -35,7 +35,7 @@
* %0 is returned, otherwise %-1 is returned. If TSC conversion is not
* supported then then the test passes but " (not supported)" is printed.
*/
int test__perf_time_to_tsc(void)
int test__perf_time_to_tsc(int subtest __maybe_unused)
{
struct record_opts opts = {
.mmap_pages = UINT_MAX,
......
......@@ -149,7 +149,7 @@ static int __test__rdpmc(void)
return 0;
}
int test__rdpmc(void)
int test__rdpmc(int subtest __maybe_unused)
{
int status = 0;
int wret = 0;
......
......@@ -5,6 +5,7 @@ libperf-y += kvm-stat.o
libperf-y += perf_regs.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
......
......@@ -625,7 +625,7 @@ parse_percent_limit(const struct option *opt, const char *str,
return 0;
}
#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function"
#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent"
const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n"
CALLCHAIN_REPORT_HELP
......@@ -708,7 +708,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
"Only display entries with parent-match"),
OPT_CALLBACK_DEFAULT('g', "call-graph", &report,
"print_type,threshold[,print_limit],order,sort_key[,branch]",
"print_type,threshold[,print_limit],order,sort_key[,branch],value",
report_callchain_help, &report_parse_callchain_opt,
callchain_default_opt),
OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
......
......@@ -318,6 +318,18 @@ ifndef NO_LIBELF
CFLAGS += -DHAVE_LIBBPF_SUPPORT
$(call detected,CONFIG_LIBBPF)
endif
ifndef NO_DWARF
ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
CFLAGS += -DHAVE_BPF_PROLOGUE
$(call detected,CONFIG_BPF_PROLOGUE)
else
msg := $(warning BPF prologue is not supported by architecture $(ARCH), missing regs_query_register_offset());
endif
else
msg := $(warning DWARF support is off, BPF prologue is disabled);
endif
endif # NO_LIBBPF
endif # NO_LIBELF
......
llvm-src-base.c
llvm-src-kbuild.c
llvm-src-prologue.c
......@@ -31,7 +31,7 @@ perf-y += sample-parsing.o
perf-y += parse-no-sample-id-all.o
perf-y += kmod-path.o
perf-y += thread-map.o
perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o
perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o
perf-y += bpf.o
perf-y += topology.o
......@@ -49,6 +49,13 @@ $(OUTPUT)tests/llvm-src-kbuild.c: tests/bpf-script-test-kbuild.c
$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
$(Q)echo ';' >> $@
$(OUTPUT)tests/llvm-src-prologue.c: tests/bpf-script-test-prologue.c
$(call rule_mkdir)
$(Q)echo '#include <tests/llvm.h>' > $@
$(Q)echo 'const char test_llvm__bpf_test_prologue_prog[] =' >> $@
$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
$(Q)echo ';' >> $@
ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64))
perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
endif
......
......@@ -153,7 +153,7 @@ static int run_dir(const char *d, const char *perf)
return system(cmd);
}
int test__attr(void)
int test__attr(int subtest __maybe_unused)
{
struct stat st;
char path_perf[PATH_MAX];
......
......@@ -111,7 +111,7 @@ static long long bp_count(int fd)
return count;
}
int test__bp_signal(void)
int test__bp_signal(int subtest __maybe_unused)
{
struct sigaction sa;
long long count1, count2;
......
......@@ -58,7 +58,7 @@ static long long bp_count(int fd)
#define EXECUTIONS 10000
#define THRESHOLD 100
int test__bp_signal_overflow(void)
int test__bp_signal_overflow(int subtest __maybe_unused)
{
struct perf_event_attr pe;
struct sigaction sa;
......
/*
* bpf-script-test-prologue.c
* Test BPF prologue
*/
#ifndef LINUX_VERSION_CODE
# error Need LINUX_VERSION_CODE
# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig'
#endif
#define SEC(NAME) __attribute__((section(NAME), used))
#include <uapi/linux/fs.h>
#define FMODE_READ 0x1
#define FMODE_WRITE 0x2
static void (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *) 6;
SEC("func=null_lseek file->f_mode offset orig")
int bpf_func__null_lseek(void *ctx, int err, unsigned long f_mode,
unsigned long offset, unsigned long orig)
{
if (err)
return 0;
if (f_mode & FMODE_WRITE)
return 0;
if (offset & 1)
return 0;
if (orig == SEEK_CUR)
return 0;
return 1;
}
char _license[] SEC("license") = "GPL";
int _version SEC("version") = LINUX_VERSION_CODE;
......@@ -19,6 +19,29 @@ static int epoll_pwait_loop(void)
return 0;
}
#ifdef HAVE_BPF_PROLOGUE
static int llseek_loop(void)
{
int fds[2], i;
fds[0] = open("/dev/null", O_RDONLY);
fds[1] = open("/dev/null", O_RDWR);
if (fds[0] < 0 || fds[1] < 0)
return -1;
for (i = 0; i < NR_ITERS; i++) {
lseek(fds[i % 2], i, (i / 2) % 2 ? SEEK_CUR : SEEK_SET);
lseek(fds[(i + 1) % 2], i, (i / 2) % 2 ? SEEK_CUR : SEEK_SET);
}
close(fds[0]);
close(fds[1]);
return 0;
}
#endif
static struct {
enum test_llvm__testcase prog_id;
const char *desc;
......@@ -37,6 +60,17 @@ static struct {
&epoll_pwait_loop,
(NR_ITERS + 1) / 2,
},
#ifdef HAVE_BPF_PROLOGUE
{
LLVM_TESTCASE_BPF_PROLOGUE,
"Test BPF prologue generation",
"[bpf_prologue_test]",
"fix kbuild first",
"check your vmlinux setting?",
&llseek_loop,
(NR_ITERS + 1) / 4,
},
#endif
};
static int do_test(struct bpf_object *obj, int (*func)(void),
......@@ -68,7 +102,6 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
err = parse_events_load_bpf_obj(&parse_evlist, &parse_evlist.list, obj);
if (err || list_empty(&parse_evlist.list)) {
pr_debug("Failed to add events selected by BPF\n");
if (!err)
return TEST_FAIL;
}
......@@ -123,8 +156,10 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
}
}
if (count != expect)
if (count != expect) {
pr_debug("BPF filter result incorrect\n");
goto out_delete_evlist;
}
ret = TEST_OK;
......@@ -146,7 +181,7 @@ prepare_bpf(void *obj_buf, size_t obj_buf_sz, const char *name)
return obj;
}
static int __test__bpf(int index)
static int __test__bpf(int idx)
{
int ret;
void *obj_buf;
......@@ -154,54 +189,72 @@ static int __test__bpf(int index)
struct bpf_object *obj;
ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
bpf_testcase_table[index].prog_id,
bpf_testcase_table[idx].prog_id,
true);
if (ret != TEST_OK || !obj_buf || !obj_buf_sz) {
pr_debug("Unable to get BPF object, %s\n",
bpf_testcase_table[index].msg_compile_fail);
if (index == 0)
bpf_testcase_table[idx].msg_compile_fail);
if (idx == 0)
return TEST_SKIP;
else
return TEST_FAIL;
}
obj = prepare_bpf(obj_buf, obj_buf_sz,
bpf_testcase_table[index].name);
bpf_testcase_table[idx].name);
if (!obj) {
ret = TEST_FAIL;
goto out;
}
ret = do_test(obj,
bpf_testcase_table[index].target_func,
bpf_testcase_table[index].expect_result);
bpf_testcase_table[idx].target_func,
bpf_testcase_table[idx].expect_result);
out:
bpf__clear();
return ret;
}
int test__bpf(void)
int test__bpf_subtest_get_nr(void)
{
return (int)ARRAY_SIZE(bpf_testcase_table);
}
const char *test__bpf_subtest_get_desc(int i)
{
if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table))
return NULL;
return bpf_testcase_table[i].desc;
}
int test__bpf(int i)
{
unsigned int i;
int err;
if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table))
return TEST_FAIL;
if (geteuid() != 0) {
pr_debug("Only root can run BPF test\n");
return TEST_SKIP;
}
for (i = 0; i < ARRAY_SIZE(bpf_testcase_table); i++) {
err = __test__bpf(i);
if (err != TEST_OK)
return err;
}
return TEST_OK;
}
#else
int test__bpf(void)
int test__bpf_subtest_get_nr(void)
{
return 0;
}
const char *test__bpf_subtest_get_desc(int i __maybe_unused)
{
return NULL;
}
int test__bpf(int i __maybe_unused)
{
pr_debug("Skip BPF test because BPF support is not compiled\n");
return TEST_SKIP;
......
......@@ -160,6 +160,11 @@ static struct test generic_tests[] = {
{
.desc = "Test LLVM searching and compiling",
.func = test__llvm,
.subtest = {
.skip_if_fail = true,
.get_nr = test__llvm_subtest_get_nr,
.get_desc = test__llvm_subtest_get_desc,
},
},
{
.desc = "Test topology in session",
......@@ -168,6 +173,11 @@ static struct test generic_tests[] = {
{
.desc = "Test BPF filter",
.func = test__bpf,
.subtest = {
.skip_if_fail = true,
.get_nr = test__bpf_subtest_get_nr,
.get_desc = test__bpf_subtest_get_desc,
},
},
{
.func = NULL,
......@@ -203,7 +213,7 @@ static bool perf_test__matches(struct test *test, int curr, int argc, const char
return false;
}
static int run_test(struct test *test)
static int run_test(struct test *test, int subtest)
{
int status, err = -1, child = fork();
char sbuf[STRERR_BUFSIZE];
......@@ -216,7 +226,19 @@ static int run_test(struct test *test)
if (!child) {
pr_debug("test child forked, pid %d\n", getpid());
err = test->func();
if (!verbose) {
int nullfd = open("/dev/null", O_WRONLY);
if (nullfd >= 0) {
close(STDERR_FILENO);
close(STDOUT_FILENO);
dup2(nullfd, STDOUT_FILENO);
dup2(STDOUT_FILENO, STDERR_FILENO);
close(nullfd);
}
}
err = test->func(subtest);
exit(err);
}
......@@ -237,6 +259,40 @@ static int run_test(struct test *test)
for (j = 0; j < ARRAY_SIZE(tests); j++) \
for (t = &tests[j][0]; t->func; t++)
static int test_and_print(struct test *t, bool force_skip, int subtest)
{
int err;
if (!force_skip) {
pr_debug("\n--- start ---\n");
err = run_test(t, subtest);
pr_debug("---- end ----\n");
} else {
pr_debug("\n--- force skipped ---\n");
err = TEST_SKIP;
}
if (!t->subtest.get_nr)
pr_debug("%s:", t->desc);
else
pr_debug("%s subtest %d:", t->desc, subtest);
switch (err) {
case TEST_OK:
pr_info(" Ok\n");
break;
case TEST_SKIP:
color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
break;
case TEST_FAIL:
default:
color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n");
break;
}
return err;
}
static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
{
struct test *t;
......@@ -264,21 +320,43 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
continue;
}
pr_debug("\n--- start ---\n");
err = run_test(t);
pr_debug("---- end ----\n%s:", t->desc);
if (!t->subtest.get_nr) {
test_and_print(t, false, -1);
} else {
int subn = t->subtest.get_nr();
/*
* minus 2 to align with normal testcases.
* For subtest we print additional '.x' in number.
* for example:
*
* 35: Test LLVM searching and compiling :
* 35.1: Basic BPF llvm compiling test : Ok
*/
int subw = width > 2 ? width - 2 : width;
bool skip = false;
int subi;
switch (err) {
case TEST_OK:
pr_info(" Ok\n");
break;
case TEST_SKIP:
color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
break;
case TEST_FAIL:
default:
color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n");
break;
if (subn <= 0) {
color_fprintf(stderr, PERF_COLOR_YELLOW,
" Skip (not compiled in)\n");
continue;
}
pr_info("\n");
for (subi = 0; subi < subn; subi++) {
int len = strlen(t->subtest.get_desc(subi));
if (subw < len)
subw = len;
}
for (subi = 0; subi < subn; subi++) {
pr_info("%2d.%1d: %-*s:", i, subi + 1, subw,
t->subtest.get_desc(subi));
err = test_and_print(t, skip, subi);
if (err != TEST_OK && t->subtest.skip_if_fail)
skip = true;
}
}
}
......
......@@ -601,7 +601,7 @@ static int do_test_code_reading(bool try_kcore)
return err;
}
int test__code_reading(void)
int test__code_reading(int subtest __maybe_unused)
{
int ret;
......
......@@ -110,7 +110,7 @@ static int dso__data_fd(struct dso *dso, struct machine *machine)
return fd;
}
int test__dso_data(void)
int test__dso_data(int subtest __maybe_unused)
{
struct machine machine;
struct dso *dso;
......@@ -245,7 +245,7 @@ static int set_fd_limit(int n)
return setrlimit(RLIMIT_NOFILE, &rlim);
}
int test__dso_data_cache(void)
int test__dso_data_cache(int subtest __maybe_unused)
{
struct machine machine;
long nr_end, nr = open_files_cnt();
......@@ -302,7 +302,7 @@ int test__dso_data_cache(void)
return 0;
}
int test__dso_data_reopen(void)
int test__dso_data_reopen(int subtest __maybe_unused)
{
struct machine machine;
long nr_end, nr = open_files_cnt();
......
......@@ -142,7 +142,7 @@ static int krava_1(struct thread *thread)
return krava_2(thread);
}
int test__dwarf_unwind(void)
int test__dwarf_unwind(int subtest __maybe_unused)
{
struct machines machines;
struct machine *machine;
......
......@@ -95,7 +95,7 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names)
#define perf_evsel__name_array_test(names) \
__perf_evsel__name_array_test(names, ARRAY_SIZE(names))
int test__perf_evsel__roundtrip_name_test(void)
int test__perf_evsel__roundtrip_name_test(int subtest __maybe_unused)
{
int err = 0, ret = 0;
......
......@@ -32,7 +32,7 @@ static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name,
return ret;
}
int test__perf_evsel__tp_sched_test(void)
int test__perf_evsel__tp_sched_test(int subtest __maybe_unused)
{
struct perf_evsel *evsel = perf_evsel__newtp("sched", "sched_switch");
int ret = 0;
......
......@@ -25,7 +25,7 @@ static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE
return printed + fdarray__fprintf(fda, fp);
}
int test__fdarray__filter(void)
int test__fdarray__filter(int subtest __maybe_unused)
{
int nr_fds, expected_fd[2], fd, err = TEST_FAIL;
struct fdarray *fda = fdarray__new(5, 5);
......@@ -103,7 +103,7 @@ int test__fdarray__filter(void)
return err;
}
int test__fdarray__add(void)
int test__fdarray__add(int subtest __maybe_unused)
{
int err = TEST_FAIL;
struct fdarray *fda = fdarray__new(2, 2);
......
......@@ -686,7 +686,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine)
return err;
}
int test__hists_cumulate(void)
int test__hists_cumulate(int subtest __maybe_unused)
{
int err = TEST_FAIL;
struct machines machines;
......
......@@ -104,7 +104,7 @@ static int add_hist_entries(struct perf_evlist *evlist,
return TEST_FAIL;
}
int test__hists_filter(void)
int test__hists_filter(int subtest __maybe_unused)
{
int err = TEST_FAIL;
struct machines machines;
......
......@@ -274,7 +274,7 @@ static int validate_link(struct hists *leader, struct hists *other)
return __validate_link(leader, 0) || __validate_link(other, 1);
}
int test__hists_link(void)
int test__hists_link(int subtest __maybe_unused)
{
int err = -1;
struct hists *hists, *first_hists;
......
......@@ -576,7 +576,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine)
return err;
}
int test__hists_output(void)
int test__hists_output(int subtest __maybe_unused)
{
int err = TEST_FAIL;
struct machines machines;
......
......@@ -49,7 +49,7 @@ static int find_comm(struct perf_evlist *evlist, const char *comm)
* when an event is disabled but a dummy software event is not disabled. If the
* test passes %0 is returned, otherwise %-1 is returned.
*/
int test__keep_tracking(void)
int test__keep_tracking(int subtest __maybe_unused)
{
struct record_opts opts = {
.mmap_pages = UINT_MAX,
......
......@@ -49,7 +49,7 @@ static int test_is_kernel_module(const char *path, int cpumode, bool expect)
#define M(path, c, e) \
TEST_ASSERT_VAL("failed", !test_is_kernel_module(path, c, e))
int test__kmod_path__parse(void)
int test__kmod_path__parse(int subtest __maybe_unused)
{
/* path alloc_name alloc_ext kmod comp name ext */
T("/xxxx/xxxx/x-x.ko", true , true , true, false, "[x_x]", NULL);
......
......@@ -44,13 +44,17 @@ static struct {
.source = test_llvm__bpf_test_kbuild_prog,
.desc = "Test kbuild searching",
},
[LLVM_TESTCASE_BPF_PROLOGUE] = {
.source = test_llvm__bpf_test_prologue_prog,
.desc = "Compile source for BPF prologue generation test",
},
};
int
test_llvm__fetch_bpf_obj(void **p_obj_buf,
size_t *p_obj_buf_sz,
enum test_llvm__testcase index,
enum test_llvm__testcase idx,
bool force)
{
const char *source;
......@@ -59,11 +63,11 @@ test_llvm__fetch_bpf_obj(void **p_obj_buf,
char *tmpl_new = NULL, *clang_opt_new = NULL;
int err, old_verbose, ret = TEST_FAIL;
if (index >= __LLVM_TESTCASE_MAX)
if (idx >= __LLVM_TESTCASE_MAX)
return TEST_FAIL;
source = bpf_source_table[index].source;
desc = bpf_source_table[index].desc;
source = bpf_source_table[idx].source;
desc = bpf_source_table[idx].desc;
perf_config(perf_config_cb, NULL);
......@@ -127,44 +131,39 @@ test_llvm__fetch_bpf_obj(void **p_obj_buf,
return ret;
}
int test__llvm(void)
int test__llvm(int subtest)
{
enum test_llvm__testcase i;
for (i = 0; i < __LLVM_TESTCASE_MAX; i++) {
int ret;
void *obj_buf = NULL;
size_t obj_buf_sz = 0;
if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX))
return TEST_FAIL;
ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
i, false);
subtest, false);
if (ret == TEST_OK) {
ret = test__bpf_parsing(obj_buf, obj_buf_sz);
if (ret != TEST_OK)
if (ret != TEST_OK) {
pr_debug("Failed to parse test case '%s'\n",
bpf_source_table[i].desc);
bpf_source_table[subtest].desc);
}
}
free(obj_buf);
switch (ret) {
case TEST_SKIP:
return TEST_SKIP;
case TEST_OK:
break;
default:
/*
* Test 0 is the basic LLVM test. If test 0
* fail, the basic LLVM support not functional
* so the whole test should fail. If other test
* case fail, it can be fixed by adjusting
* config so don't report error.
*/
if (i == 0)
return TEST_FAIL;
else
return TEST_SKIP;
}
}
return TEST_OK;
return ret;
}
int test__llvm_subtest_get_nr(void)
{
return __LLVM_TESTCASE_MAX;
}
const char *test__llvm_subtest_get_desc(int subtest)
{
if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX))
return NULL;
return bpf_source_table[subtest].desc;
}
......@@ -6,10 +6,12 @@
extern const char test_llvm__bpf_base_prog[];
extern const char test_llvm__bpf_test_kbuild_prog[];
extern const char test_llvm__bpf_test_prologue_prog[];
enum test_llvm__testcase {
LLVM_TESTCASE_BASE,
LLVM_TESTCASE_KBUILD,
LLVM_TESTCASE_BPF_PROLOGUE,
__LLVM_TESTCASE_MAX,
};
......
......@@ -16,7 +16,7 @@
* Then it checks if the number of syscalls reported as perf events by
* the kernel corresponds to the number of syscalls made.
*/
int test__basic_mmap(void)
int test__basic_mmap(int subtest __maybe_unused)
{
int err = -1;
union perf_event *event;
......
......@@ -221,7 +221,7 @@ static int mmap_events(synth_cb synth)
*
* by using all thread objects.
*/
int test__mmap_thread_lookup(void)
int test__mmap_thread_lookup(int subtest __maybe_unused)
{
/* perf_event__synthesize_threads synthesize */
TEST_ASSERT_VAL("failed with sythesizing all",
......
......@@ -7,7 +7,7 @@
#include "debug.h"
#include "stat.h"
int test__openat_syscall_event_on_all_cpus(void)
int test__openat_syscall_event_on_all_cpus(int subtest __maybe_unused)
{
int err = -1, fd, cpu;
struct cpu_map *cpus;
......
......@@ -6,7 +6,7 @@
#include "tests.h"
#include "debug.h"
int test__syscall_openat_tp_fields(void)
int test__syscall_openat_tp_fields(int subtest __maybe_unused)
{
struct record_opts opts = {
.target = {
......
......@@ -5,7 +5,7 @@
#include "debug.h"
#include "tests.h"
int test__openat_syscall_event(void)
int test__openat_syscall_event(int subtest __maybe_unused)
{
int err = -1, fd;
struct perf_evsel *evsel;
......
......@@ -1765,7 +1765,7 @@ static void debug_warn(const char *warn, va_list params)
fprintf(stderr, " Warning: %s\n", msg);
}
int test__parse_events(void)
int test__parse_events(int subtest __maybe_unused)
{
int ret1, ret2 = 0;
......
......@@ -67,7 +67,7 @@ struct test_attr_event {
*
* Return: %0 on success, %-1 if the test fails.
*/
int test__parse_no_sample_id_all(void)
int test__parse_no_sample_id_all(int subtest __maybe_unused)
{
int err;
......
......@@ -32,7 +32,7 @@ static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
return cpu;
}
int test__PERF_RECORD(void)
int test__PERF_RECORD(int subtest __maybe_unused)
{
struct record_opts opts = {
.target = {
......
......@@ -133,7 +133,7 @@ static struct list_head *test_terms_list(void)
return &terms;
}
int test__pmu(void)
int test__pmu(int subtest __maybe_unused)
{
char *format = test_format_dir_get();
LIST_HEAD(formats);
......
......@@ -4,11 +4,12 @@
#include <stdio.h>
#include <stdlib.h>
#include <linux/compiler.h>
#include "tests.h"
extern int verbose;
int test__python_use(void)
int test__python_use(int subtest __maybe_unused)
{
char *cmd;
int ret;
......
......@@ -290,7 +290,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
* checks sample format bits separately and together. If the test passes %0 is
* returned, otherwise %-1 is returned.
*/
int test__sample_parsing(void)
int test__sample_parsing(int subtest __maybe_unused)
{
const u64 rf[] = {4, 5, 6, 7, 12, 13, 14, 15};
u64 sample_type;
......
......@@ -122,7 +122,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
return err;
}
int test__sw_clock_freq(void)
int test__sw_clock_freq(int subtest __maybe_unused)
{
int ret;
......
......@@ -305,7 +305,7 @@ static int process_events(struct perf_evlist *evlist,
* evsel->system_wide and evsel->tracking flags (respectively) with other events
* sometimes enabled or disabled.
*/
int test__switch_tracking(void)
int test__switch_tracking(int subtest __maybe_unused)
{
const char *sched_switch = "sched:sched_switch";
struct switch_tracking switch_tracking = { .tids = NULL, };
......
......@@ -31,7 +31,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
* if the number of exit event reported by the kernel is 1 or not
* in order to check the kernel returns correct number of event.
*/
int test__task_exit(void)
int test__task_exit(int subtest __maybe_unused)
{
int err = -1;
union perf_event *event;
......
#ifndef TESTS_H
#define TESTS_H
#include <stdbool.h>
#define TEST_ASSERT_VAL(text, cond) \
do { \
if (!(cond)) { \
......@@ -26,48 +28,57 @@ enum {
struct test {
const char *desc;
int (*func)(void);
int (*func)(int subtest);
struct {
bool skip_if_fail;
int (*get_nr)(void);
const char *(*get_desc)(int subtest);
} subtest;
};
/* Tests */
int test__vmlinux_matches_kallsyms(void);
int test__openat_syscall_event(void);
int test__openat_syscall_event_on_all_cpus(void);
int test__basic_mmap(void);
int test__PERF_RECORD(void);
int test__perf_evsel__roundtrip_name_test(void);
int test__perf_evsel__tp_sched_test(void);
int test__syscall_openat_tp_fields(void);
int test__pmu(void);
int test__attr(void);
int test__dso_data(void);
int test__dso_data_cache(void);
int test__dso_data_reopen(void);
int test__parse_events(void);
int test__hists_link(void);
int test__python_use(void);
int test__bp_signal(void);
int test__bp_signal_overflow(void);
int test__task_exit(void);
int test__sw_clock_freq(void);
int test__code_reading(void);
int test__sample_parsing(void);
int test__keep_tracking(void);
int test__parse_no_sample_id_all(void);
int test__dwarf_unwind(void);
int test__hists_filter(void);
int test__mmap_thread_lookup(void);
int test__thread_mg_share(void);
int test__hists_output(void);
int test__hists_cumulate(void);
int test__switch_tracking(void);
int test__fdarray__filter(void);
int test__fdarray__add(void);
int test__kmod_path__parse(void);
int test__thread_map(void);
int test__llvm(void);
int test__bpf(void);
int test_session_topology(void);
int test__vmlinux_matches_kallsyms(int subtest);
int test__openat_syscall_event(int subtest);
int test__openat_syscall_event_on_all_cpus(int subtest);
int test__basic_mmap(int subtest);
int test__PERF_RECORD(int subtest);
int test__perf_evsel__roundtrip_name_test(int subtest);
int test__perf_evsel__tp_sched_test(int subtest);
int test__syscall_openat_tp_fields(int subtest);
int test__pmu(int subtest);
int test__attr(int subtest);
int test__dso_data(int subtest);
int test__dso_data_cache(int subtest);
int test__dso_data_reopen(int subtest);
int test__parse_events(int subtest);
int test__hists_link(int subtest);
int test__python_use(int subtest);
int test__bp_signal(int subtest);
int test__bp_signal_overflow(int subtest);
int test__task_exit(int subtest);
int test__sw_clock_freq(int subtest);
int test__code_reading(int subtest);
int test__sample_parsing(int subtest);
int test__keep_tracking(int subtest);
int test__parse_no_sample_id_all(int subtest);
int test__dwarf_unwind(int subtest);
int test__hists_filter(int subtest);
int test__mmap_thread_lookup(int subtest);
int test__thread_mg_share(int subtest);
int test__hists_output(int subtest);
int test__hists_cumulate(int subtest);
int test__switch_tracking(int subtest);
int test__fdarray__filter(int subtest);
int test__fdarray__add(int subtest);
int test__kmod_path__parse(int subtest);
int test__thread_map(int subtest);
int test__llvm(int subtest);
const char *test__llvm_subtest_get_desc(int subtest);
int test__llvm_subtest_get_nr(void);
int test__bpf(int subtest);
const char *test__bpf_subtest_get_desc(int subtest);
int test__bpf_subtest_get_nr(void);
int test_session_topology(int subtest);
#if defined(__arm__) || defined(__aarch64__)
#ifdef HAVE_DWARF_UNWIND_SUPPORT
......
......@@ -4,7 +4,7 @@
#include "thread_map.h"
#include "debug.h"
int test__thread_map(void)
int test__thread_map(int subtest __maybe_unused)
{
struct thread_map *map;
......
......@@ -4,7 +4,7 @@
#include "map.h"
#include "debug.h"
int test__thread_mg_share(void)
int test__thread_mg_share(int subtest __maybe_unused)
{
struct machines machines;
struct machine *machine;
......
......@@ -84,7 +84,7 @@ static int check_cpu_topology(char *path, struct cpu_map *map)
return 0;
}
int test_session_topology(void)
int test_session_topology(int subtest __maybe_unused)
{
char path[PATH_MAX];
struct cpu_map *map;
......
......@@ -18,7 +18,7 @@ static int vmlinux_matches_kallsyms_filter(struct map *map __maybe_unused,
#define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x))
int test__vmlinux_matches_kallsyms(void)
int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
{
int err = -1;
struct rb_node *nd;
......
......@@ -178,12 +178,51 @@ static int callchain_node__count_rows_rb_tree(struct callchain_node *node)
return n;
}
static int callchain_node__count_flat_rows(struct callchain_node *node)
{
struct callchain_list *chain;
char folded_sign = 0;
int n = 0;
list_for_each_entry(chain, &node->parent_val, list) {
if (!folded_sign) {
/* only check first chain list entry */
folded_sign = callchain_list__folded(chain);
if (folded_sign == '+')
return 1;
}
n++;
}
list_for_each_entry(chain, &node->val, list) {
if (!folded_sign) {
/* node->parent_val list might be empty */
folded_sign = callchain_list__folded(chain);
if (folded_sign == '+')
return 1;
}
n++;
}
return n;
}
static int callchain_node__count_folded_rows(struct callchain_node *node __maybe_unused)
{
return 1;
}
static int callchain_node__count_rows(struct callchain_node *node)
{
struct callchain_list *chain;
bool unfolded = false;
int n = 0;
if (callchain_param.mode == CHAIN_FLAT)
return callchain_node__count_flat_rows(node);
else if (callchain_param.mode == CHAIN_FOLDED)
return callchain_node__count_folded_rows(node);
list_for_each_entry(chain, &node->val, list) {
++n;
unfolded = chain->unfolded;
......@@ -263,7 +302,7 @@ static void callchain_node__init_have_children(struct callchain_node *node,
chain = list_entry(node->val.next, struct callchain_list, list);
chain->has_children = has_sibling;
if (!list_empty(&node->val)) {
if (node->val.next != node->val.prev) {
chain = list_entry(node->val.prev, struct callchain_list, list);
chain->has_children = !RB_EMPTY_ROOT(&node->rb_root);
}
......@@ -279,6 +318,9 @@ static void callchain__init_have_children(struct rb_root *root)
for (nd = rb_first(root); nd; nd = rb_next(nd)) {
struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node);
callchain_node__init_have_children(node, has_sibling);
if (callchain_param.mode == CHAIN_FLAT ||
callchain_param.mode == CHAIN_FOLDED)
callchain_node__make_parent_list(node);
}
}
......@@ -574,16 +616,53 @@ static bool hist_browser__check_dump_full(struct hist_browser *browser __maybe_u
#define LEVEL_OFFSET_STEP 3
static int hist_browser__show_callchain(struct hist_browser *browser,
struct rb_root *root, int level,
static int hist_browser__show_callchain_list(struct hist_browser *browser,
struct callchain_node *node,
struct callchain_list *chain,
unsigned short row, u64 total,
bool need_percent, int offset,
print_callchain_entry_fn print,
struct callchain_print_arg *arg)
{
char bf[1024], *alloc_str;
const char *str;
if (arg->row_offset != 0) {
arg->row_offset--;
return 0;
}
alloc_str = NULL;
str = callchain_list__sym_name(chain, bf, sizeof(bf),
browser->show_dso);
if (need_percent) {
char buf[64];
callchain_node__scnprintf_value(node, buf, sizeof(buf),
total);
if (asprintf(&alloc_str, "%s %s", buf, str) < 0)
str = "Not enough memory!";
else
str = alloc_str;
}
print(browser, chain, str, offset, row, arg);
free(alloc_str);
return 1;
}
static int hist_browser__show_callchain_flat(struct hist_browser *browser,
struct rb_root *root,
unsigned short row, u64 total,
print_callchain_entry_fn print,
struct callchain_print_arg *arg,
check_output_full_fn is_output_full)
{
struct rb_node *node;
int first_row = row, offset = level * LEVEL_OFFSET_STEP;
u64 new_total;
int first_row = row, offset = LEVEL_OFFSET_STEP;
bool need_percent;
node = rb_first(root);
......@@ -592,15 +671,35 @@ static int hist_browser__show_callchain(struct hist_browser *browser,
while (node) {
struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
struct rb_node *next = rb_next(node);
u64 cumul = callchain_cumul_hits(child);
struct callchain_list *chain;
char folded_sign = ' ';
int first = true;
int extra_offset = 0;
list_for_each_entry(chain, &child->parent_val, list) {
bool was_first = first;
if (first)
first = false;
else if (need_percent)
extra_offset = LEVEL_OFFSET_STEP;
folded_sign = callchain_list__folded(chain);
row += hist_browser__show_callchain_list(browser, child,
chain, row, total,
was_first && need_percent,
offset + extra_offset,
print, arg);
if (is_output_full(browser, row))
goto out;
if (folded_sign == '+')
goto next;
}
list_for_each_entry(chain, &child->val, list) {
char bf[1024], *alloc_str;
const char *str;
bool was_first = first;
if (first)
......@@ -609,31 +708,181 @@ static int hist_browser__show_callchain(struct hist_browser *browser,
extra_offset = LEVEL_OFFSET_STEP;
folded_sign = callchain_list__folded(chain);
if (arg->row_offset != 0) {
arg->row_offset--;
goto do_next;
row += hist_browser__show_callchain_list(browser, child,
chain, row, total,
was_first && need_percent,
offset + extra_offset,
print, arg);
if (is_output_full(browser, row))
goto out;
if (folded_sign == '+')
break;
}
alloc_str = NULL;
next:
if (is_output_full(browser, row))
break;
node = next;
}
out:
return row - first_row;
}
static char *hist_browser__folded_callchain_str(struct hist_browser *browser,
struct callchain_list *chain,
char *value_str, char *old_str)
{
char bf[1024];
const char *str;
char *new;
str = callchain_list__sym_name(chain, bf, sizeof(bf),
browser->show_dso);
if (old_str) {
if (asprintf(&new, "%s%s%s", old_str,
symbol_conf.field_sep ?: ";", str) < 0)
new = NULL;
} else {
if (value_str) {
if (asprintf(&new, "%s %s", value_str, str) < 0)
new = NULL;
} else {
if (asprintf(&new, "%s", str) < 0)
new = NULL;
}
}
return new;
}
if (was_first && need_percent) {
double percent = cumul * 100.0 / total;
static int hist_browser__show_callchain_folded(struct hist_browser *browser,
struct rb_root *root,
unsigned short row, u64 total,
print_callchain_entry_fn print,
struct callchain_print_arg *arg,
check_output_full_fn is_output_full)
{
struct rb_node *node;
int first_row = row, offset = LEVEL_OFFSET_STEP;
bool need_percent;
if (asprintf(&alloc_str, "%2.2f%% %s", percent, str) < 0)
str = "Not enough memory!";
else
str = alloc_str;
node = rb_first(root);
need_percent = node && rb_next(node);
while (node) {
struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
struct rb_node *next = rb_next(node);
struct callchain_list *chain, *first_chain = NULL;
int first = true;
char *value_str = NULL, *value_str_alloc = NULL;
char *chain_str = NULL, *chain_str_alloc = NULL;
if (arg->row_offset != 0) {
arg->row_offset--;
goto next;
}
print(browser, chain, str, offset + extra_offset, row, arg);
if (need_percent) {
char buf[64];
free(alloc_str);
callchain_node__scnprintf_value(child, buf, sizeof(buf), total);
if (asprintf(&value_str, "%s", buf) < 0) {
value_str = (char *)"<...>";
goto do_print;
}
value_str_alloc = value_str;
}
list_for_each_entry(chain, &child->parent_val, list) {
chain_str = hist_browser__folded_callchain_str(browser,
chain, value_str, chain_str);
if (first) {
first = false;
first_chain = chain;
}
if (chain_str == NULL) {
chain_str = (char *)"Not enough memory!";
goto do_print;
}
chain_str_alloc = chain_str;
}
list_for_each_entry(chain, &child->val, list) {
chain_str = hist_browser__folded_callchain_str(browser,
chain, value_str, chain_str);
if (first) {
first = false;
first_chain = chain;
}
if (chain_str == NULL) {
chain_str = (char *)"Not enough memory!";
goto do_print;
}
chain_str_alloc = chain_str;
}
do_print:
print(browser, first_chain, chain_str, offset, row++, arg);
free(value_str_alloc);
free(chain_str_alloc);
next:
if (is_output_full(browser, row))
break;
node = next;
}
return row - first_row;
}
if (is_output_full(browser, ++row))
static int hist_browser__show_callchain(struct hist_browser *browser,
struct rb_root *root, int level,
unsigned short row, u64 total,
print_callchain_entry_fn print,
struct callchain_print_arg *arg,
check_output_full_fn is_output_full)
{
struct rb_node *node;
int first_row = row, offset = level * LEVEL_OFFSET_STEP;
u64 new_total;
bool need_percent;
node = rb_first(root);
need_percent = node && rb_next(node);
while (node) {
struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
struct rb_node *next = rb_next(node);
struct callchain_list *chain;
char folded_sign = ' ';
int first = true;
int extra_offset = 0;
list_for_each_entry(chain, &child->val, list) {
bool was_first = first;
if (first)
first = false;
else if (need_percent)
extra_offset = LEVEL_OFFSET_STEP;
folded_sign = callchain_list__folded(chain);
row += hist_browser__show_callchain_list(browser, child,
chain, row, total,
was_first && need_percent,
offset + extra_offset,
print, arg);
if (is_output_full(browser, row))
goto out;
do_next:
if (folded_sign == '+')
break;
}
......@@ -844,10 +1093,22 @@ static int hist_browser__show_entry(struct hist_browser *browser,
total = entry->stat.period;
}
if (callchain_param.mode == CHAIN_FLAT) {
printed += hist_browser__show_callchain_flat(browser,
&entry->sorted_chain, row, total,
hist_browser__show_callchain_entry, &arg,
hist_browser__check_output_full);
} else if (callchain_param.mode == CHAIN_FOLDED) {
printed += hist_browser__show_callchain_folded(browser,
&entry->sorted_chain, row, total,
hist_browser__show_callchain_entry, &arg,
hist_browser__check_output_full);
} else {
printed += hist_browser__show_callchain(browser,
&entry->sorted_chain, 1, row, total,
hist_browser__show_callchain_entry, &arg,
hist_browser__check_output_full);
}
if (arg.is_current_entry)
browser->he_selection = entry;
......
......@@ -89,7 +89,7 @@ void perf_gtk__init_hpp(void)
perf_gtk__hpp_color_overhead_acc;
}
static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
static void perf_gtk__add_callchain_flat(struct rb_root *root, GtkTreeStore *store,
GtkTreeIter *parent, int col, u64 total)
{
struct rb_node *nd;
......@@ -100,13 +100,132 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
struct callchain_list *chain;
GtkTreeIter iter, new_parent;
bool need_new_parent;
double percent;
u64 hits, child_total;
node = rb_entry(nd, struct callchain_node, rb_node);
hits = callchain_cumul_hits(node);
percent = 100.0 * hits / total;
new_parent = *parent;
need_new_parent = !has_single_node;
callchain_node__make_parent_list(node);
list_for_each_entry(chain, &node->parent_val, list) {
char buf[128];
gtk_tree_store_append(store, &iter, &new_parent);
callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
gtk_tree_store_set(store, &iter, 0, buf, -1);
callchain_list__sym_name(chain, buf, sizeof(buf), false);
gtk_tree_store_set(store, &iter, col, buf, -1);
if (need_new_parent) {
/*
* Only show the top-most symbol in a callchain
* if it's not the only callchain.
*/
new_parent = iter;
need_new_parent = false;
}
}
list_for_each_entry(chain, &node->val, list) {
char buf[128];
gtk_tree_store_append(store, &iter, &new_parent);
callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
gtk_tree_store_set(store, &iter, 0, buf, -1);
callchain_list__sym_name(chain, buf, sizeof(buf), false);
gtk_tree_store_set(store, &iter, col, buf, -1);
if (need_new_parent) {
/*
* Only show the top-most symbol in a callchain
* if it's not the only callchain.
*/
new_parent = iter;
need_new_parent = false;
}
}
}
}
static void perf_gtk__add_callchain_folded(struct rb_root *root, GtkTreeStore *store,
GtkTreeIter *parent, int col, u64 total)
{
struct rb_node *nd;
for (nd = rb_first(root); nd; nd = rb_next(nd)) {
struct callchain_node *node;
struct callchain_list *chain;
GtkTreeIter iter;
char buf[64];
char *str, *str_alloc = NULL;
bool first = true;
node = rb_entry(nd, struct callchain_node, rb_node);
callchain_node__make_parent_list(node);
list_for_each_entry(chain, &node->parent_val, list) {
char name[1024];
callchain_list__sym_name(chain, name, sizeof(name), false);
if (asprintf(&str, "%s%s%s",
first ? "" : str_alloc,
first ? "" : symbol_conf.field_sep ?: "; ",
name) < 0)
return;
first = false;
free(str_alloc);
str_alloc = str;
}
list_for_each_entry(chain, &node->val, list) {
char name[1024];
callchain_list__sym_name(chain, name, sizeof(name), false);
if (asprintf(&str, "%s%s%s",
first ? "" : str_alloc,
first ? "" : symbol_conf.field_sep ?: "; ",
name) < 0)
return;
first = false;
free(str_alloc);
str_alloc = str;
}
gtk_tree_store_append(store, &iter, parent);
callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
gtk_tree_store_set(store, &iter, 0, buf, -1);
gtk_tree_store_set(store, &iter, col, str, -1);
free(str_alloc);
}
}
static void perf_gtk__add_callchain_graph(struct rb_root *root, GtkTreeStore *store,
GtkTreeIter *parent, int col, u64 total)
{
struct rb_node *nd;
bool has_single_node = (rb_first(root) == rb_last(root));
for (nd = rb_first(root); nd; nd = rb_next(nd)) {
struct callchain_node *node;
struct callchain_list *chain;
GtkTreeIter iter, new_parent;
bool need_new_parent;
u64 child_total;
node = rb_entry(nd, struct callchain_node, rb_node);
new_parent = *parent;
need_new_parent = !has_single_node && (node->val_nr > 1);
......@@ -116,7 +235,7 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
gtk_tree_store_append(store, &iter, &new_parent);
scnprintf(buf, sizeof(buf), "%5.2f%%", percent);
callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
gtk_tree_store_set(store, &iter, 0, buf, -1);
callchain_list__sym_name(chain, buf, sizeof(buf), false);
......@@ -138,11 +257,22 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
child_total = total;
/* Now 'iter' contains info of the last callchain_list */
perf_gtk__add_callchain(&node->rb_root, store, &iter, col,
perf_gtk__add_callchain_graph(&node->rb_root, store, &iter, col,
child_total);
}
}
static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
GtkTreeIter *parent, int col, u64 total)
{
if (callchain_param.mode == CHAIN_FLAT)
perf_gtk__add_callchain_flat(root, store, parent, col, total);
else if (callchain_param.mode == CHAIN_FOLDED)
perf_gtk__add_callchain_folded(root, store, parent, col, total);
else
perf_gtk__add_callchain_graph(root, store, parent, col, total);
}
static void on_row_activated(GtkTreeView *view, GtkTreePath *path,
GtkTreeViewColumn *col __maybe_unused,
gpointer user_data __maybe_unused)
......
......@@ -34,10 +34,10 @@ static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask,
return ret;
}
static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain,
static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
struct callchain_list *chain,
int depth, int depth_mask, int period,
u64 total_samples, u64 hits,
int left_margin)
u64 total_samples, int left_margin)
{
int i;
size_t ret = 0;
......@@ -50,10 +50,9 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain,
else
ret += fprintf(fp, " ");
if (!period && i == depth - 1) {
double percent;
percent = hits * 100.0 / total_samples;
ret += percent_color_fprintf(fp, "--%2.2f%%-- ", percent);
ret += fprintf(fp, "--");
ret += callchain_node__fprintf_value(node, fp, total_samples);
ret += fprintf(fp, "--");
} else
ret += fprintf(fp, "%s", " ");
}
......@@ -82,13 +81,14 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
int depth_mask, int left_margin)
{
struct rb_node *node, *next;
struct callchain_node *child;
struct callchain_node *child = NULL;
struct callchain_list *chain;
int new_depth_mask = depth_mask;
u64 remaining;
size_t ret = 0;
int i;
uint entries_printed = 0;
int cumul_count = 0;
remaining = total_samples;
......@@ -100,6 +100,7 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
child = rb_entry(node, struct callchain_node, rb_node);
cumul = callchain_cumul_hits(child);
remaining -= cumul;
cumul_count += callchain_cumul_counts(child);
/*
* The depth mask manages the output of pipes that show
......@@ -120,10 +121,9 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
left_margin);
i = 0;
list_for_each_entry(chain, &child->val, list) {
ret += ipchain__fprintf_graph(fp, chain, depth,
ret += ipchain__fprintf_graph(fp, child, chain, depth,
new_depth_mask, i++,
total_samples,
cumul,
left_margin);
}
......@@ -143,14 +143,23 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
if (callchain_param.mode == CHAIN_GRAPH_REL &&
remaining && remaining != total_samples) {
struct callchain_node rem_node = {
.hit = remaining,
};
if (!rem_sq_bracket)
return ret;
if (callchain_param.value == CCVAL_COUNT && child && child->parent) {
rem_node.count = child->parent->children_count - cumul_count;
if (rem_node.count <= 0)
return ret;
}
new_depth_mask &= ~(1 << (depth - 1));
ret += ipchain__fprintf_graph(fp, &rem_hits, depth,
ret += ipchain__fprintf_graph(fp, &rem_node, &rem_hits, depth,
new_depth_mask, 0, total_samples,
remaining, left_margin);
left_margin);
}
return ret;
......@@ -243,12 +252,11 @@ static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *tree,
struct rb_node *rb_node = rb_first(tree);
while (rb_node) {
double percent;
chain = rb_entry(rb_node, struct callchain_node, rb_node);
percent = chain->hit * 100.0 / total_samples;
ret = percent_color_fprintf(fp, " %6.2f%%\n", percent);
ret += fprintf(fp, " ");
ret += callchain_node__fprintf_value(chain, fp, total_samples);
ret += fprintf(fp, "\n");
ret += __callchain__fprintf_flat(fp, chain, total_samples);
ret += fprintf(fp, "\n");
if (++entries_printed == callchain_param.print_limit)
......@@ -260,6 +268,57 @@ static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *tree,
return ret;
}
static size_t __callchain__fprintf_folded(FILE *fp, struct callchain_node *node)
{
const char *sep = symbol_conf.field_sep ?: ";";
struct callchain_list *chain;
size_t ret = 0;
char bf[1024];
bool first;
if (!node)
return 0;
ret += __callchain__fprintf_folded(fp, node->parent);
first = (ret == 0);
list_for_each_entry(chain, &node->val, list) {
if (chain->ip >= PERF_CONTEXT_MAX)
continue;
ret += fprintf(fp, "%s%s", first ? "" : sep,
callchain_list__sym_name(chain,
bf, sizeof(bf), false));
first = false;
}
return ret;
}
static size_t callchain__fprintf_folded(FILE *fp, struct rb_root *tree,
u64 total_samples)
{
size_t ret = 0;
u32 entries_printed = 0;
struct callchain_node *chain;
struct rb_node *rb_node = rb_first(tree);
while (rb_node) {
chain = rb_entry(rb_node, struct callchain_node, rb_node);
ret += callchain_node__fprintf_value(chain, fp, total_samples);
ret += fprintf(fp, " ");
ret += __callchain__fprintf_folded(fp, chain);
ret += fprintf(fp, "\n");
if (++entries_printed == callchain_param.print_limit)
break;
rb_node = rb_next(rb_node);
}
return ret;
}
static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
u64 total_samples, int left_margin,
FILE *fp)
......@@ -278,6 +337,9 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
case CHAIN_FLAT:
return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples);
break;
case CHAIN_FOLDED:
return callchain__fprintf_folded(fp, &he->sorted_chain, total_samples);
break;
case CHAIN_NONE:
break;
default:
......
......@@ -21,6 +21,7 @@ libperf-y += parse-events.o
libperf-y += perf_regs.o
libperf-y += path.o
libperf-y += rbtree.o
libperf-y += libstring.o
libperf-y += bitmap.o
libperf-y += hweight.o
libperf-y += run-command.o
......@@ -88,6 +89,7 @@ libperf-y += parse-branch-options.o
libperf-y += parse-regs-options.o
libperf-$(CONFIG_LIBBPF) += bpf-loader.o
libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
libperf-$(CONFIG_LIBELF) += symbol-elf.o
libperf-$(CONFIG_LIBELF) += probe-file.o
libperf-$(CONFIG_LIBELF) += probe-event.o
......@@ -138,6 +140,7 @@ $(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
CFLAGS_find_next_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_parse-events.o += -Wno-redundant-decls
......@@ -153,6 +156,10 @@ $(OUTPUT)util/rbtree.o: ../lib/rbtree.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
$(OUTPUT)util/libstring.o: ../lib/string.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
$(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
......@@ -5,11 +5,15 @@
* Copyright (C) 2015 Huawei Inc.
*/
#include <linux/bpf.h>
#include <bpf/libbpf.h>
#include <linux/err.h>
#include <linux/string.h>
#include "perf.h"
#include "debug.h"
#include "bpf-loader.h"
#include "bpf-prologue.h"
#include "llvm-utils.h"
#include "probe-event.h"
#include "probe-finder.h" // for MAX_PROBES
#include "llvm-utils.h"
......@@ -32,6 +36,10 @@ DEFINE_PRINT_FN(debug, 1)
struct bpf_prog_priv {
struct perf_probe_event pev;
bool need_prologue;
struct bpf_insn *insns_buf;
int nr_types;
int *type_mapping;
};
static bool libbpf_initialized;
......@@ -106,9 +114,178 @@ bpf_prog_priv__clear(struct bpf_program *prog __maybe_unused,
struct bpf_prog_priv *priv = _priv;
cleanup_perf_probe_events(&priv->pev, 1);
zfree(&priv->insns_buf);
zfree(&priv->type_mapping);
free(priv);
}
static int
config__exec(const char *value, struct perf_probe_event *pev)
{
pev->uprobes = true;
pev->target = strdup(value);
if (!pev->target)
return -ENOMEM;
return 0;
}
static int
config__module(const char *value, struct perf_probe_event *pev)
{
pev->uprobes = false;
pev->target = strdup(value);
if (!pev->target)
return -ENOMEM;
return 0;
}
static int
config__bool(const char *value,
bool *pbool, bool invert)
{
int err;
bool bool_value;
if (!pbool)
return -EINVAL;
err = strtobool(value, &bool_value);
if (err)
return err;
*pbool = invert ? !bool_value : bool_value;
return 0;
}
static int
config__inlines(const char *value,
struct perf_probe_event *pev __maybe_unused)
{
return config__bool(value, &probe_conf.no_inlines, true);
}
static int
config__force(const char *value,
struct perf_probe_event *pev __maybe_unused)
{
return config__bool(value, &probe_conf.force_add, false);
}
static struct {
const char *key;
const char *usage;
const char *desc;
int (*func)(const char *, struct perf_probe_event *);
} bpf_config_terms[] = {
{
.key = "exec",
.usage = "exec=<full path of file>",
.desc = "Set uprobe target",
.func = config__exec,
},
{
.key = "module",
.usage = "module=<module name> ",
.desc = "Set kprobe module",
.func = config__module,
},
{
.key = "inlines",
.usage = "inlines=[yes|no] ",
.desc = "Probe at inline symbol",
.func = config__inlines,
},
{
.key = "force",
.usage = "force=[yes|no] ",
.desc = "Forcibly add events with existing name",
.func = config__force,
},
};
static int
do_config(const char *key, const char *value,
struct perf_probe_event *pev)
{
unsigned int i;
pr_debug("config bpf program: %s=%s\n", key, value);
for (i = 0; i < ARRAY_SIZE(bpf_config_terms); i++)
if (strcmp(key, bpf_config_terms[i].key) == 0)
return bpf_config_terms[i].func(value, pev);
pr_debug("BPF: ERROR: invalid config option in object: %s=%s\n",
key, value);
pr_debug("\nHint: Currently valid options are:\n");
for (i = 0; i < ARRAY_SIZE(bpf_config_terms); i++)
pr_debug("\t%s:\t%s\n", bpf_config_terms[i].usage,
bpf_config_terms[i].desc);
pr_debug("\n");
return -BPF_LOADER_ERRNO__CONFIG_TERM;
}
static const char *
parse_config_kvpair(const char *config_str, struct perf_probe_event *pev)
{
char *text = strdup(config_str);
char *sep, *line;
const char *main_str = NULL;
int err = 0;
if (!text) {
pr_debug("No enough memory: dup config_str failed\n");
return ERR_PTR(-ENOMEM);
}
line = text;
while ((sep = strchr(line, ';'))) {
char *equ;
*sep = '\0';
equ = strchr(line, '=');
if (!equ) {
pr_warning("WARNING: invalid config in BPF object: %s\n",
line);
pr_warning("\tShould be 'key=value'.\n");
goto nextline;
}
*equ = '\0';
err = do_config(line, equ + 1, pev);
if (err)
break;
nextline:
line = sep + 1;
}
if (!err)
main_str = config_str + (line - text);
free(text);
return err ? ERR_PTR(err) : main_str;
}
static int
parse_config(const char *config_str, struct perf_probe_event *pev)
{
int err;
const char *main_str = parse_config_kvpair(config_str, pev);
if (IS_ERR(main_str))
return PTR_ERR(main_str);
err = parse_perf_probe_command(main_str, pev);
if (err < 0) {
pr_debug("bpf: '%s' is not a valid config string\n",
config_str);
/* parse failed, don't need clear pev. */
return -BPF_LOADER_ERRNO__CONFIG;
}
return 0;
}
static int
config_bpf_program(struct bpf_program *prog)
{
......@@ -117,6 +294,10 @@ config_bpf_program(struct bpf_program *prog)
const char *config_str;
int err;
/* Initialize per-program probing setting */
probe_conf.no_inlines = false;
probe_conf.force_add = false;
config_str = bpf_program__title(prog, false);
if (IS_ERR(config_str)) {
pr_debug("bpf: unable to get title for program\n");
......@@ -131,13 +312,9 @@ config_bpf_program(struct bpf_program *prog)
pev = &priv->pev;
pr_debug("bpf: config program '%s'\n", config_str);
err = parse_perf_probe_command(config_str, pev);
if (err < 0) {
pr_debug("bpf: '%s' is not a valid config string\n",
config_str);
err = -BPF_LOADER_ERRNO__CONFIG;
err = parse_config(config_str, pev);
if (err)
goto errout;
}
if (pev->group && strcmp(pev->group, PERF_BPF_PROBE_GROUP)) {
pr_debug("bpf: '%s': group for event is set and not '%s'.\n",
......@@ -197,6 +374,220 @@ static int bpf__prepare_probe(void)
return err;
}
static int
preproc_gen_prologue(struct bpf_program *prog, int n,
struct bpf_insn *orig_insns, int orig_insns_cnt,
struct bpf_prog_prep_result *res)
{
struct probe_trace_event *tev;
struct perf_probe_event *pev;
struct bpf_prog_priv *priv;
struct bpf_insn *buf;
size_t prologue_cnt = 0;
int i, err;
err = bpf_program__get_private(prog, (void **)&priv);
if (err || !priv)
goto errout;
pev = &priv->pev;
if (n < 0 || n >= priv->nr_types)
goto errout;
/* Find a tev belongs to that type */
for (i = 0; i < pev->ntevs; i++) {
if (priv->type_mapping[i] == n)
break;
}
if (i >= pev->ntevs) {
pr_debug("Internal error: prologue type %d not found\n", n);
return -BPF_LOADER_ERRNO__PROLOGUE;
}
tev = &pev->tevs[i];
buf = priv->insns_buf;
err = bpf__gen_prologue(tev->args, tev->nargs,
buf, &prologue_cnt,
BPF_MAXINSNS - orig_insns_cnt);
if (err) {
const char *title;
title = bpf_program__title(prog, false);
if (!title)
title = "[unknown]";
pr_debug("Failed to generate prologue for program %s\n",
title);
return err;
}
memcpy(&buf[prologue_cnt], orig_insns,
sizeof(struct bpf_insn) * orig_insns_cnt);
res->new_insn_ptr = buf;
res->new_insn_cnt = prologue_cnt + orig_insns_cnt;
res->pfd = NULL;
return 0;
errout:
pr_debug("Internal error in preproc_gen_prologue\n");
return -BPF_LOADER_ERRNO__PROLOGUE;
}
/*
* compare_tev_args is reflexive, transitive and antisymmetric.
* I can proof it but this margin is too narrow to contain.
*/
static int compare_tev_args(const void *ptev1, const void *ptev2)
{
int i, ret;
const struct probe_trace_event *tev1 =
*(const struct probe_trace_event **)ptev1;
const struct probe_trace_event *tev2 =
*(const struct probe_trace_event **)ptev2;
ret = tev2->nargs - tev1->nargs;
if (ret)
return ret;
for (i = 0; i < tev1->nargs; i++) {
struct probe_trace_arg *arg1, *arg2;
struct probe_trace_arg_ref *ref1, *ref2;
arg1 = &tev1->args[i];
arg2 = &tev2->args[i];
ret = strcmp(arg1->value, arg2->value);
if (ret)
return ret;
ref1 = arg1->ref;
ref2 = arg2->ref;
while (ref1 && ref2) {
ret = ref2->offset - ref1->offset;
if (ret)
return ret;
ref1 = ref1->next;
ref2 = ref2->next;
}
if (ref1 || ref2)
return ref2 ? 1 : -1;
}
return 0;
}
/*
* Assign a type number to each tevs in a pev.
* mapping is an array with same slots as tevs in that pev.
* nr_types will be set to number of types.
*/
static int map_prologue(struct perf_probe_event *pev, int *mapping,
int *nr_types)
{
int i, type = 0;
struct probe_trace_event **ptevs;
size_t array_sz = sizeof(*ptevs) * pev->ntevs;
ptevs = malloc(array_sz);
if (!ptevs) {
pr_debug("No ehough memory: alloc ptevs failed\n");
return -ENOMEM;
}
pr_debug("In map_prologue, ntevs=%d\n", pev->ntevs);
for (i = 0; i < pev->ntevs; i++)
ptevs[i] = &pev->tevs[i];
qsort(ptevs, pev->ntevs, sizeof(*ptevs),
compare_tev_args);
for (i = 0; i < pev->ntevs; i++) {
int n;
n = ptevs[i] - pev->tevs;
if (i == 0) {
mapping[n] = type;
pr_debug("mapping[%d]=%d\n", n, type);
continue;
}
if (compare_tev_args(ptevs + i, ptevs + i - 1) == 0)
mapping[n] = type;
else
mapping[n] = ++type;
pr_debug("mapping[%d]=%d\n", n, mapping[n]);
}
free(ptevs);
*nr_types = type + 1;
return 0;
}
static int hook_load_preprocessor(struct bpf_program *prog)
{
struct perf_probe_event *pev;
struct bpf_prog_priv *priv;
bool need_prologue = false;
int err, i;
err = bpf_program__get_private(prog, (void **)&priv);
if (err || !priv) {
pr_debug("Internal error when hook preprocessor\n");
return -BPF_LOADER_ERRNO__INTERNAL;
}
pev = &priv->pev;
for (i = 0; i < pev->ntevs; i++) {
struct probe_trace_event *tev = &pev->tevs[i];
if (tev->nargs > 0) {
need_prologue = true;
break;
}
}
/*
* Since all tevs don't have argument, we don't need generate
* prologue.
*/
if (!need_prologue) {
priv->need_prologue = false;
return 0;
}
priv->need_prologue = true;
priv->insns_buf = malloc(sizeof(struct bpf_insn) * BPF_MAXINSNS);
if (!priv->insns_buf) {
pr_debug("No enough memory: alloc insns_buf failed\n");
return -ENOMEM;
}
priv->type_mapping = malloc(sizeof(int) * pev->ntevs);
if (!priv->type_mapping) {
pr_debug("No enough memory: alloc type_mapping failed\n");
return -ENOMEM;
}
memset(priv->type_mapping, -1,
sizeof(int) * pev->ntevs);
err = map_prologue(pev, priv->type_mapping, &priv->nr_types);
if (err)
return err;
err = bpf_program__set_prep(prog, priv->nr_types,
preproc_gen_prologue);
return err;
}
int bpf__probe(struct bpf_object *obj)
{
int err = 0;
......@@ -231,6 +622,18 @@ int bpf__probe(struct bpf_object *obj)
pr_debug("bpf_probe: failed to apply perf probe events");
goto out;
}
/*
* After probing, let's consider prologue, which
* adds program fetcher to BPF programs.
*
* hook_load_preprocessorr() hooks pre-processor
* to bpf_program, let it generate prologue
* dynamically during loading.
*/
err = hook_load_preprocessor(prog);
if (err)
goto out;
}
out:
return err < 0 ? err : 0;
......@@ -314,7 +717,14 @@ int bpf__foreach_tev(struct bpf_object *obj,
for (i = 0; i < pev->ntevs; i++) {
tev = &pev->tevs[i];
if (priv->need_prologue) {
int type = priv->type_mapping[i];
fd = bpf_program__nth_fd(prog, type);
} else {
fd = bpf_program__fd(prog);
}
if (fd < 0) {
pr_debug("bpf: failed to get file descriptor\n");
return fd;
......@@ -340,6 +750,10 @@ static const char *bpf_loader_strerror_table[NR_ERRNO] = {
[ERRCODE_OFFSET(EVENTNAME)] = "No event name found in config string",
[ERRCODE_OFFSET(INTERNAL)] = "BPF loader internal error",
[ERRCODE_OFFSET(COMPILE)] = "Error when compiling BPF scriptlet",
[ERRCODE_OFFSET(CONFIG_TERM)] = "Invalid config term in config string",
[ERRCODE_OFFSET(PROLOGUE)] = "Failed to generate prologue",
[ERRCODE_OFFSET(PROLOGUE2BIG)] = "Prologue too big for program",
[ERRCODE_OFFSET(PROLOGUEOOB)] = "Offset out of bound for prologue",
};
static int
......@@ -420,7 +834,11 @@ int bpf__strerror_probe(struct bpf_object *obj __maybe_unused,
int err, char *buf, size_t size)
{
bpf__strerror_head(err, buf, size);
bpf__strerror_entry(EEXIST, "Probe point exist. Try use 'perf probe -d \"*\"'");
case BPF_LOADER_ERRNO__CONFIG_TERM: {
scnprintf(buf, size, "%s (add -v to see detail)", emsg);
break;
}
bpf__strerror_entry(EEXIST, "Probe point exist. Try 'perf probe -d \"*\"' and set 'force=yes'");
bpf__strerror_entry(EACCES, "You need to be root");
bpf__strerror_entry(EPERM, "You need to be root, and /proc/sys/kernel/kptr_restrict should be 0");
bpf__strerror_entry(ENOENT, "You need to check probing points in BPF file");
......
......@@ -20,6 +20,10 @@ enum bpf_loader_errno {
BPF_LOADER_ERRNO__EVENTNAME, /* Event name is missing */
BPF_LOADER_ERRNO__INTERNAL, /* BPF loader internal error */
BPF_LOADER_ERRNO__COMPILE, /* Error when compiling BPF scriptlet */
BPF_LOADER_ERRNO__CONFIG_TERM, /* Invalid config term in config term */
BPF_LOADER_ERRNO__PROLOGUE, /* Failed to generate prologue */
BPF_LOADER_ERRNO__PROLOGUE2BIG, /* Prologue too big for program */
BPF_LOADER_ERRNO__PROLOGUEOOB, /* Offset out of bound for prologue */
__BPF_LOADER_ERRNO__END,
};
......
/*
* bpf-prologue.c
*
* Copyright (C) 2015 He Kuang <hekuang@huawei.com>
* Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
* Copyright (C) 2015 Huawei Inc.
*/
#include <bpf/libbpf.h>
#include "perf.h"
#include "debug.h"
#include "bpf-loader.h"
#include "bpf-prologue.h"
#include "probe-finder.h"
#include <dwarf-regs.h>
#include <linux/filter.h>
#define BPF_REG_SIZE 8
#define JMP_TO_ERROR_CODE -1
#define JMP_TO_SUCCESS_CODE -2
#define JMP_TO_USER_CODE -3
struct bpf_insn_pos {
struct bpf_insn *begin;
struct bpf_insn *end;
struct bpf_insn *pos;
};
static inline int
pos_get_cnt(struct bpf_insn_pos *pos)
{
return pos->pos - pos->begin;
}
static int
append_insn(struct bpf_insn new_insn, struct bpf_insn_pos *pos)
{
if (!pos->pos)
return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
if (pos->pos + 1 >= pos->end) {
pr_err("bpf prologue: prologue too long\n");
pos->pos = NULL;
return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
}
*(pos->pos)++ = new_insn;
return 0;
}
static int
check_pos(struct bpf_insn_pos *pos)
{
if (!pos->pos || pos->pos >= pos->end)
return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
return 0;
}
/* Give it a shorter name */
#define ins(i, p) append_insn((i), (p))
/*
* Give a register name (in 'reg'), generate instruction to
* load register into an eBPF register rd:
* 'ldd target_reg, offset(ctx_reg)', where:
* ctx_reg is pre initialized to pointer of 'struct pt_regs'.
*/
static int
gen_ldx_reg_from_ctx(struct bpf_insn_pos *pos, int ctx_reg,
const char *reg, int target_reg)
{
int offset = regs_query_register_offset(reg);
if (offset < 0) {
pr_err("bpf: prologue: failed to get register %s\n",
reg);
return offset;
}
ins(BPF_LDX_MEM(BPF_DW, target_reg, ctx_reg, offset), pos);
return check_pos(pos);
}
/*
* Generate a BPF_FUNC_probe_read function call.
*
* src_base_addr_reg is a register holding base address,
* dst_addr_reg is a register holding dest address (on stack),
* result is:
*
* *[dst_addr_reg] = *([src_base_addr_reg] + offset)
*
* Arguments of BPF_FUNC_probe_read:
* ARG1: ptr to stack (dest)
* ARG2: size (8)
* ARG3: unsafe ptr (src)
*/
static int
gen_read_mem(struct bpf_insn_pos *pos,
int src_base_addr_reg,
int dst_addr_reg,
long offset)
{
/* mov arg3, src_base_addr_reg */
if (src_base_addr_reg != BPF_REG_ARG3)
ins(BPF_MOV64_REG(BPF_REG_ARG3, src_base_addr_reg), pos);
/* add arg3, #offset */
if (offset)
ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG3, offset), pos);
/* mov arg2, #reg_size */
ins(BPF_ALU64_IMM(BPF_MOV, BPF_REG_ARG2, BPF_REG_SIZE), pos);
/* mov arg1, dst_addr_reg */
if (dst_addr_reg != BPF_REG_ARG1)
ins(BPF_MOV64_REG(BPF_REG_ARG1, dst_addr_reg), pos);
/* Call probe_read */
ins(BPF_EMIT_CALL(BPF_FUNC_probe_read), pos);
/*
* Error processing: if read fail, goto error code,
* will be relocated. Target should be the start of
* error processing code.
*/
ins(BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, JMP_TO_ERROR_CODE),
pos);
return check_pos(pos);
}
/*
* Each arg should be bare register. Fetch and save them into argument
* registers (r3 - r5).
*
* BPF_REG_1 should have been initialized with pointer to
* 'struct pt_regs'.
*/
static int
gen_prologue_fastpath(struct bpf_insn_pos *pos,
struct probe_trace_arg *args, int nargs)
{
int i, err = 0;
for (i = 0; i < nargs; i++) {
err = gen_ldx_reg_from_ctx(pos, BPF_REG_1, args[i].value,
BPF_PROLOGUE_START_ARG_REG + i);
if (err)
goto errout;
}
return check_pos(pos);
errout:
return err;
}
/*
* Slow path:
* At least one argument has the form of 'offset($rx)'.
*
* Following code first stores them into stack, then loads all of then
* to r2 - r5.
* Before final loading, the final result should be:
*
* low address
* BPF_REG_FP - 24 ARG3
* BPF_REG_FP - 16 ARG2
* BPF_REG_FP - 8 ARG1
* BPF_REG_FP
* high address
*
* For each argument (described as: offn(...off2(off1(reg)))),
* generates following code:
*
* r7 <- fp
* r7 <- r7 - stack_offset // Ideal code should initialize r7 using
* // fp before generating args. However,
* // eBPF won't regard r7 as stack pointer
* // if it is generated by minus 8 from
* // another stack pointer except fp.
* // This is why we have to set r7
* // to fp for each variable.
* r3 <- value of 'reg'-> generated using gen_ldx_reg_from_ctx()
* (r7) <- r3 // skip following instructions for bare reg
* r3 <- r3 + off1 . // skip if off1 == 0
* r2 <- 8 \
* r1 <- r7 |-> generated by gen_read_mem()
* call probe_read /
* jnei r0, 0, err ./
* r3 <- (r7)
* r3 <- r3 + off2 . // skip if off2 == 0
* r2 <- 8 \ // r2 may be broken by probe_read, so set again
* r1 <- r7 |-> generated by gen_read_mem()
* call probe_read /
* jnei r0, 0, err ./
* ...
*/
static int
gen_prologue_slowpath(struct bpf_insn_pos *pos,
struct probe_trace_arg *args, int nargs)
{
int err, i;
for (i = 0; i < nargs; i++) {
struct probe_trace_arg *arg = &args[i];
const char *reg = arg->value;
struct probe_trace_arg_ref *ref = NULL;
int stack_offset = (i + 1) * -8;
pr_debug("prologue: fetch arg %d, base reg is %s\n",
i, reg);
/* value of base register is stored into ARG3 */
err = gen_ldx_reg_from_ctx(pos, BPF_REG_CTX, reg,
BPF_REG_ARG3);
if (err) {
pr_err("prologue: failed to get offset of register %s\n",
reg);
goto errout;
}
/* Make r7 the stack pointer. */
ins(BPF_MOV64_REG(BPF_REG_7, BPF_REG_FP), pos);
/* r7 += -8 */
ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, stack_offset), pos);
/*
* Store r3 (base register) onto stack
* Ensure fp[offset] is set.
* fp is the only valid base register when storing
* into stack. We are not allowed to use r7 as base
* register here.
*/
ins(BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_ARG3,
stack_offset), pos);
ref = arg->ref;
while (ref) {
pr_debug("prologue: arg %d: offset %ld\n",
i, ref->offset);
err = gen_read_mem(pos, BPF_REG_3, BPF_REG_7,
ref->offset);
if (err) {
pr_err("prologue: failed to generate probe_read function call\n");
goto errout;
}
ref = ref->next;
/*
* Load previous result into ARG3. Use
* BPF_REG_FP instead of r7 because verifier
* allows FP based addressing only.
*/
if (ref)
ins(BPF_LDX_MEM(BPF_DW, BPF_REG_ARG3,
BPF_REG_FP, stack_offset), pos);
}
}
/* Final pass: read to registers */
for (i = 0; i < nargs; i++)
ins(BPF_LDX_MEM(BPF_DW, BPF_PROLOGUE_START_ARG_REG + i,
BPF_REG_FP, -BPF_REG_SIZE * (i + 1)), pos);
ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_SUCCESS_CODE), pos);
return check_pos(pos);
errout:
return err;
}
static int
prologue_relocate(struct bpf_insn_pos *pos, struct bpf_insn *error_code,
struct bpf_insn *success_code, struct bpf_insn *user_code)
{
struct bpf_insn *insn;
if (check_pos(pos))
return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
for (insn = pos->begin; insn < pos->pos; insn++) {
struct bpf_insn *target;
u8 class = BPF_CLASS(insn->code);
u8 opcode;
if (class != BPF_JMP)
continue;
opcode = BPF_OP(insn->code);
if (opcode == BPF_CALL)
continue;
switch (insn->off) {
case JMP_TO_ERROR_CODE:
target = error_code;
break;
case JMP_TO_SUCCESS_CODE:
target = success_code;
break;
case JMP_TO_USER_CODE:
target = user_code;
break;
default:
pr_err("bpf prologue: internal error: relocation failed\n");
return -BPF_LOADER_ERRNO__PROLOGUE;
}
insn->off = target - (insn + 1);
}
return 0;
}
int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
struct bpf_insn *new_prog, size_t *new_cnt,
size_t cnt_space)
{
struct bpf_insn *success_code = NULL;
struct bpf_insn *error_code = NULL;
struct bpf_insn *user_code = NULL;
struct bpf_insn_pos pos;
bool fastpath = true;
int err = 0, i;
if (!new_prog || !new_cnt)
return -EINVAL;
if (cnt_space > BPF_MAXINSNS)
cnt_space = BPF_MAXINSNS;
pos.begin = new_prog;
pos.end = new_prog + cnt_space;
pos.pos = new_prog;
if (!nargs) {
ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0),
&pos);
if (check_pos(&pos))
goto errout;
*new_cnt = pos_get_cnt(&pos);
return 0;
}
if (nargs > BPF_PROLOGUE_MAX_ARGS) {
pr_warning("bpf: prologue: %d arguments are dropped\n",
nargs - BPF_PROLOGUE_MAX_ARGS);
nargs = BPF_PROLOGUE_MAX_ARGS;
}
/* First pass: validation */
for (i = 0; i < nargs; i++) {
struct probe_trace_arg_ref *ref = args[i].ref;
if (args[i].value[0] == '@') {
/* TODO: fetch global variable */
pr_err("bpf: prologue: global %s%+ld not support\n",
args[i].value, ref ? ref->offset : 0);
return -ENOTSUP;
}
while (ref) {
/* fastpath is true if all args has ref == NULL */
fastpath = false;
/*
* Instruction encodes immediate value using
* s32, ref->offset is long. On systems which
* can't fill long in s32, refuse to process if
* ref->offset too large (or small).
*/
#ifdef __LP64__
#define OFFSET_MAX ((1LL << 31) - 1)
#define OFFSET_MIN ((1LL << 31) * -1)
if (ref->offset > OFFSET_MAX ||
ref->offset < OFFSET_MIN) {
pr_err("bpf: prologue: offset out of bound: %ld\n",
ref->offset);
return -BPF_LOADER_ERRNO__PROLOGUEOOB;
}
#endif
ref = ref->next;
}
}
pr_debug("prologue: pass validation\n");
if (fastpath) {
/* If all variables are registers... */
pr_debug("prologue: fast path\n");
err = gen_prologue_fastpath(&pos, args, nargs);
if (err)
goto errout;
} else {
pr_debug("prologue: slow path\n");
/* Initialization: move ctx to a callee saved register. */
ins(BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1), &pos);
err = gen_prologue_slowpath(&pos, args, nargs);
if (err)
goto errout;
/*
* start of ERROR_CODE (only slow pass needs error code)
* mov r2 <- 1 // r2 is error number
* mov r3 <- 0 // r3, r4... should be touched or
* // verifier would complain
* mov r4 <- 0
* ...
* goto usercode
*/
error_code = pos.pos;
ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 1),
&pos);
for (i = 0; i < nargs; i++)
ins(BPF_ALU64_IMM(BPF_MOV,
BPF_PROLOGUE_START_ARG_REG + i,
0),
&pos);
ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_USER_CODE),
&pos);
}
/*
* start of SUCCESS_CODE:
* mov r2 <- 0
* goto usercode // skip
*/
success_code = pos.pos;
ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), &pos);
/*
* start of USER_CODE:
* Restore ctx to r1
*/
user_code = pos.pos;
if (!fastpath) {
/*
* Only slow path needs restoring of ctx. In fast path,
* register are loaded directly from r1.
*/
ins(BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX), &pos);
err = prologue_relocate(&pos, error_code, success_code,
user_code);
if (err)
goto errout;
}
err = check_pos(&pos);
if (err)
goto errout;
*new_cnt = pos_get_cnt(&pos);
return 0;
errout:
return err;
}
/*
* Copyright (C) 2015, He Kuang <hekuang@huawei.com>
* Copyright (C) 2015, Huawei Inc.
*/
#ifndef __BPF_PROLOGUE_H
#define __BPF_PROLOGUE_H
#include <linux/compiler.h>
#include <linux/filter.h>
#include "probe-event.h"
#define BPF_PROLOGUE_MAX_ARGS 3
#define BPF_PROLOGUE_START_ARG_REG BPF_REG_3
#define BPF_PROLOGUE_FETCH_RESULT_REG BPF_REG_2
#ifdef HAVE_BPF_PROLOGUE
int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
struct bpf_insn *new_prog, size_t *new_cnt,
size_t cnt_space);
#else
static inline int
bpf__gen_prologue(struct probe_trace_arg *args __maybe_unused,
int nargs __maybe_unused,
struct bpf_insn *new_prog __maybe_unused,
size_t *new_cnt,
size_t cnt_space __maybe_unused)
{
if (!new_cnt)
return -EINVAL;
*new_cnt = 0;
return -ENOTSUP;
}
#endif
#endif /* __BPF_PROLOGUE_H */
......@@ -44,6 +44,10 @@ static int parse_callchain_mode(const char *value)
callchain_param.mode = CHAIN_GRAPH_REL;
return 0;
}
if (!strncmp(value, "folded", strlen(value))) {
callchain_param.mode = CHAIN_FOLDED;
return 0;
}
return -1;
}
......@@ -79,6 +83,23 @@ static int parse_callchain_sort_key(const char *value)
return -1;
}
static int parse_callchain_value(const char *value)
{
if (!strncmp(value, "percent", strlen(value))) {
callchain_param.value = CCVAL_PERCENT;
return 0;
}
if (!strncmp(value, "period", strlen(value))) {
callchain_param.value = CCVAL_PERIOD;
return 0;
}
if (!strncmp(value, "count", strlen(value))) {
callchain_param.value = CCVAL_COUNT;
return 0;
}
return -1;
}
static int
__parse_callchain_report_opt(const char *arg, bool allow_record_opt)
{
......@@ -102,7 +123,8 @@ __parse_callchain_report_opt(const char *arg, bool allow_record_opt)
if (!parse_callchain_mode(tok) ||
!parse_callchain_order(tok) ||
!parse_callchain_sort_key(tok)) {
!parse_callchain_sort_key(tok) ||
!parse_callchain_value(tok)) {
/* parsing ok - move on to the next */
try_stack_size = false;
goto next;
......@@ -218,6 +240,7 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
switch (mode) {
case CHAIN_FLAT:
case CHAIN_FOLDED:
if (rnode->hit < chain->hit)
p = &(*p)->rb_left;
else
......@@ -338,6 +361,7 @@ int callchain_register_param(struct callchain_param *param)
param->sort = sort_chain_graph_rel;
break;
case CHAIN_FLAT:
case CHAIN_FOLDED:
param->sort = sort_chain_flat;
break;
case CHAIN_NONE:
......@@ -363,6 +387,7 @@ create_child(struct callchain_node *parent, bool inherit_children)
}
new->parent = parent;
INIT_LIST_HEAD(&new->val);
INIT_LIST_HEAD(&new->parent_val);
if (inherit_children) {
struct rb_node *n;
......@@ -431,6 +456,8 @@ add_child(struct callchain_node *parent,
new->children_hit = 0;
new->hit = period;
new->children_count = 0;
new->count = 1;
return new;
}
......@@ -478,6 +505,9 @@ split_add_child(struct callchain_node *parent,
parent->children_hit = callchain_cumul_hits(new);
new->val_nr = parent->val_nr - idx_local;
parent->val_nr = idx_local;
new->count = parent->count;
new->children_count = parent->children_count;
parent->children_count = callchain_cumul_counts(new);
/* create a new child for the new branch if any */
if (idx_total < cursor->nr) {
......@@ -488,6 +518,8 @@ split_add_child(struct callchain_node *parent,
parent->hit = 0;
parent->children_hit += period;
parent->count = 0;
parent->children_count += 1;
node = callchain_cursor_current(cursor);
new = add_child(parent, cursor, period);
......@@ -510,6 +542,7 @@ split_add_child(struct callchain_node *parent,
rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
} else {
parent->hit = period;
parent->count = 1;
}
}
......@@ -556,6 +589,7 @@ append_chain_children(struct callchain_node *root,
inc_children_hit:
root->children_hit += period;
root->children_count++;
}
static int
......@@ -608,6 +642,7 @@ append_chain(struct callchain_node *root,
/* we match 100% of the path, increment the hit */
if (matches == root->val_nr && cursor->pos == cursor->nr) {
root->hit += period;
root->count++;
return 0;
}
......@@ -799,12 +834,72 @@ char *callchain_list__sym_name(struct callchain_list *cl,
return bf;
}
char *callchain_node__scnprintf_value(struct callchain_node *node,
char *bf, size_t bfsize, u64 total)
{
double percent = 0.0;
u64 period = callchain_cumul_hits(node);
unsigned count = callchain_cumul_counts(node);
if (callchain_param.mode == CHAIN_FOLDED) {
period = node->hit;
count = node->count;
}
switch (callchain_param.value) {
case CCVAL_PERIOD:
scnprintf(bf, bfsize, "%"PRIu64, period);
break;
case CCVAL_COUNT:
scnprintf(bf, bfsize, "%u", count);
break;
case CCVAL_PERCENT:
default:
if (total)
percent = period * 100.0 / total;
scnprintf(bf, bfsize, "%.2f%%", percent);
break;
}
return bf;
}
int callchain_node__fprintf_value(struct callchain_node *node,
FILE *fp, u64 total)
{
double percent = 0.0;
u64 period = callchain_cumul_hits(node);
unsigned count = callchain_cumul_counts(node);
if (callchain_param.mode == CHAIN_FOLDED) {
period = node->hit;
count = node->count;
}
switch (callchain_param.value) {
case CCVAL_PERIOD:
return fprintf(fp, "%"PRIu64, period);
case CCVAL_COUNT:
return fprintf(fp, "%u", count);
case CCVAL_PERCENT:
default:
if (total)
percent = period * 100.0 / total;
return percent_color_fprintf(fp, "%.2f%%", percent);
}
return 0;
}
static void free_callchain_node(struct callchain_node *node)
{
struct callchain_list *list, *tmp;
struct callchain_node *child;
struct rb_node *n;
list_for_each_entry_safe(list, tmp, &node->parent_val, list) {
list_del(&list->list);
free(list);
}
list_for_each_entry_safe(list, tmp, &node->val, list) {
list_del(&list->list);
free(list);
......@@ -828,3 +923,41 @@ void free_callchain(struct callchain_root *root)
free_callchain_node(&root->node);
}
int callchain_node__make_parent_list(struct callchain_node *node)
{
struct callchain_node *parent = node->parent;
struct callchain_list *chain, *new;
LIST_HEAD(head);
while (parent) {
list_for_each_entry_reverse(chain, &parent->val, list) {
new = malloc(sizeof(*new));
if (new == NULL)
goto out;
*new = *chain;
new->has_children = false;
list_add_tail(&new->list, &head);
}
parent = parent->parent;
}
list_for_each_entry_safe_reverse(chain, new, &head, list)
list_move_tail(&chain->list, &node->parent_val);
if (!list_empty(&node->parent_val)) {
chain = list_first_entry(&node->parent_val, struct callchain_list, list);
chain->has_children = rb_prev(&node->rb_node) || rb_next(&node->rb_node);
chain = list_first_entry(&node->val, struct callchain_list, list);
chain->has_children = false;
}
return 0;
out:
list_for_each_entry_safe(chain, new, &head, list) {
list_del(&chain->list);
free(chain);
}
return -ENOMEM;
}
......@@ -24,12 +24,13 @@
#define CALLCHAIN_RECORD_HELP CALLCHAIN_HELP RECORD_MODE_HELP RECORD_SIZE_HELP
#define CALLCHAIN_REPORT_HELP \
HELP_PAD "print_type:\tcall graph printing style (graph|flat|fractal|none)\n" \
HELP_PAD "print_type:\tcall graph printing style (graph|flat|fractal|folded|none)\n" \
HELP_PAD "threshold:\tminimum call graph inclusion threshold (<percent>)\n" \
HELP_PAD "print_limit:\tmaximum number of call graph entry (<number>)\n" \
HELP_PAD "order:\t\tcall graph order (caller|callee)\n" \
HELP_PAD "sort_key:\tcall graph sort key (function|address)\n" \
HELP_PAD "branch:\t\tinclude last branch info to call graph (branch)\n"
HELP_PAD "branch:\t\tinclude last branch info to call graph (branch)\n" \
HELP_PAD "value:\t\tcall graph value (percent|period|count)\n"
enum perf_call_graph_mode {
CALLCHAIN_NONE,
......@@ -43,7 +44,8 @@ enum chain_mode {
CHAIN_NONE,
CHAIN_FLAT,
CHAIN_GRAPH_ABS,
CHAIN_GRAPH_REL
CHAIN_GRAPH_REL,
CHAIN_FOLDED,
};
enum chain_order {
......@@ -54,11 +56,14 @@ enum chain_order {
struct callchain_node {
struct callchain_node *parent;
struct list_head val;
struct list_head parent_val;
struct rb_node rb_node_in; /* to insert nodes in an rbtree */
struct rb_node rb_node; /* to sort nodes in an output tree */
struct rb_root rb_root_in; /* input tree of children */
struct rb_root rb_root; /* sorted output tree of children */
unsigned int val_nr;
unsigned int count;
unsigned int children_count;
u64 hit;
u64 children_hit;
};
......@@ -78,6 +83,12 @@ enum chain_key {
CCKEY_ADDRESS
};
enum chain_value {
CCVAL_PERCENT,
CCVAL_PERIOD,
CCVAL_COUNT,
};
struct callchain_param {
bool enabled;
enum perf_call_graph_mode record_mode;
......@@ -90,6 +101,7 @@ struct callchain_param {
bool order_set;
enum chain_key key;
bool branch_callstack;
enum chain_value value;
};
extern struct callchain_param callchain_param;
......@@ -144,6 +156,11 @@ static inline u64 callchain_cumul_hits(struct callchain_node *node)
return node->hit + node->children_hit;
}
static inline unsigned callchain_cumul_counts(struct callchain_node *node)
{
return node->count + node->children_count;
}
int callchain_register_param(struct callchain_param *param);
int callchain_append(struct callchain_root *root,
struct callchain_cursor *cursor,
......@@ -229,7 +246,12 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
char *callchain_list__sym_name(struct callchain_list *cl,
char *bf, size_t bfsize, bool show_dso);
char *callchain_node__scnprintf_value(struct callchain_node *node,
char *bf, size_t bfsize, u64 total);
int callchain_node__fprintf_value(struct callchain_node *node,
FILE *fp, u64 total);
void free_callchain(struct callchain_root *root);
int callchain_node__make_parent_list(struct callchain_node *node);
#endif /* __PERF_CALLCHAIN_H */
......@@ -1243,6 +1243,8 @@ struct dso *__dsos__addnew(struct dsos *dsos, const char *name)
if (dso != NULL) {
__dsos__add(dsos, dso);
dso__set_basename(dso);
/* Put dso here because __dsos_add already got it */
dso__put(dso);
}
return dso;
}
......
......@@ -9,17 +9,17 @@
static const char *argv_exec_path;
static const char *argv0_path;
const char *system_path(const char *path)
char *system_path(const char *path)
{
static const char *prefix = PREFIX;
struct strbuf d = STRBUF_INIT;
if (is_absolute_path(path))
return path;
return strdup(path);
strbuf_addf(&d, "%s/%s", prefix, path);
path = strbuf_detach(&d, NULL);
return path;
return (char *)path;
}
const char *perf_extract_argv0_path(const char *argv0)
......@@ -52,17 +52,16 @@ void perf_set_argv_exec_path(const char *exec_path)
/* Returns the highest-priority, location to look for perf programs. */
const char *perf_exec_path(void)
char *perf_exec_path(void)
{
const char *env;
char *env;
if (argv_exec_path)
return argv_exec_path;
return strdup(argv_exec_path);
env = getenv(EXEC_PATH_ENVIRONMENT);
if (env && *env) {
return env;
}
if (env && *env)
return strdup(env);
return system_path(PERF_EXEC_PATH);
}
......@@ -83,9 +82,11 @@ void setup_path(void)
{
const char *old_path = getenv("PATH");
struct strbuf new_path = STRBUF_INIT;
char *tmp = perf_exec_path();
add_path(&new_path, perf_exec_path());
add_path(&new_path, tmp);
add_path(&new_path, argv0_path);
free(tmp);
if (old_path)
strbuf_addstr(&new_path, old_path);
......
......@@ -3,10 +3,11 @@
extern void perf_set_argv_exec_path(const char *exec_path);
extern const char *perf_extract_argv0_path(const char *path);
extern const char *perf_exec_path(void);
extern void setup_path(void);
extern int execv_perf_cmd(const char **argv); /* NULL terminated */
extern int execl_perf_cmd(const char *cmd, ...);
extern const char *system_path(const char *path);
/* perf_exec_path and system_path return malloc'd string, caller must free it */
extern char *perf_exec_path(void);
extern char *system_path(const char *path);
#endif /* __PERF_EXEC_CMD_H */
......@@ -159,7 +159,7 @@ void load_command_list(const char *prefix,
struct cmdnames *other_cmds)
{
const char *env_path = getenv("PATH");
const char *exec_path = perf_exec_path();
char *exec_path = perf_exec_path();
if (exec_path) {
list_commands_in_dir(main_cmds, exec_path, prefix);
......@@ -187,6 +187,7 @@ void load_command_list(const char *prefix,
sizeof(*other_cmds->names), cmdname_compare);
uniq(other_cmds);
}
free(exec_path);
exclude_cmds(other_cmds, main_cmds);
}
......@@ -203,13 +204,14 @@ void list_commands(const char *title, struct cmdnames *main_cmds,
longest = other_cmds->names[i]->len;
if (main_cmds->cnt) {
const char *exec_path = perf_exec_path();
char *exec_path = perf_exec_path();
printf("available %s in '%s'\n", title, exec_path);
printf("----------------");
mput_char('-', strlen(title) + strlen(exec_path));
putchar('\n');
pretty_print_string_list(main_cmds, longest);
putchar('\n');
free(exec_path);
}
if (other_cmds->cnt) {
......
#include <string.h>
void *memdup(const void *src, size_t len);
......@@ -122,6 +122,7 @@ void machine__delete_threads(struct machine *machine)
void machine__exit(struct machine *machine)
{
machine__destroy_kernel_maps(machine);
map_groups__exit(&machine->kmaps);
dsos__exit(&machine->dsos);
machine__exit_vdso(machine);
......@@ -564,7 +565,7 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
const char *filename)
{
struct map *map = NULL;
struct dso *dso;
struct dso *dso = NULL;
struct kmod_path m;
if (kmod_path__parse_name(&m, filename))
......@@ -585,7 +586,11 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
map_groups__insert(&machine->kmaps, map);
/* Put the map here because map_groups__insert alread got it */
map__put(map);
out:
/* put the dso here, corresponding to machine__findnew_module_dso */
dso__put(dso);
free(m.name);
return map;
}
......@@ -788,6 +793,7 @@ void machine__destroy_kernel_maps(struct machine *machine)
kmap->ref_reloc_sym = NULL;
}
map__put(machine->vmlinux_maps[type]);
machine->vmlinux_maps[type] = NULL;
}
}
......@@ -1084,11 +1090,14 @@ int machine__create_kernel_maps(struct machine *machine)
struct dso *kernel = machine__get_kernel(machine);
const char *name;
u64 addr = machine__get_running_kernel_start(machine, &name);
if (!addr)
int ret;
if (!addr || kernel == NULL)
return -1;
if (kernel == NULL ||
__machine__create_kernel_maps(machine, kernel) < 0)
ret = __machine__create_kernel_maps(machine, kernel);
dso__put(kernel);
if (ret < 0)
return -1;
if (symbol_conf.use_modules && machine__create_modules(machine) < 0) {
......
......@@ -2326,8 +2326,11 @@ static int get_new_event_name(char *buf, size_t len, const char *base,
goto out;
if (!allow_suffix) {
pr_warning("Error: event \"%s\" already exists. "
"(Use -f to force duplicates.)\n", buf);
pr_warning("Error: event \"%s\" already exists.\n"
" Hint: Remove existing event by 'perf probe -d'\n"
" or force duplicates by 'perf probe -f'\n"
" or set 'force=yes' in BPF source.\n",
buf);
ret = -EEXIST;
goto out;
}
......
......@@ -683,20 +683,23 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1);
if (ret <= 0 || nops == 0) {
pf->fb_ops = NULL;
ret = 0;
#if _ELFUTILS_PREREQ(0, 142)
} else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa &&
pf->cfi != NULL) {
Dwarf_Frame *frame;
Dwarf_Frame *frame = NULL;
if (dwarf_cfi_addrframe(pf->cfi, pf->addr, &frame) != 0 ||
dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) {
pr_warning("Failed to get call frame on 0x%jx\n",
(uintmax_t)pf->addr);
return -ENOENT;
ret = -ENOENT;
}
free(frame);
#endif
}
/* Call finder's callback handler */
if (ret >= 0)
ret = pf->callback(sc_die, pf);
/* *pf->fb_ops will be cached in libdw. Don't free it. */
......
......@@ -342,22 +342,6 @@ char *rtrim(char *s)
return s;
}
/**
* memdup - duplicate region of memory
* @src: memory region to duplicate
* @len: memory region length
*/
void *memdup(const void *src, size_t len)
{
void *p;
p = malloc(len);
if (p)
memcpy(p, src, len);
return p;
}
char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints)
{
/*
......
......@@ -1042,6 +1042,8 @@ int dso__load_sym(struct dso *dso, struct map *map,
}
curr_dso->symtab_type = dso->symtab_type;
map_groups__insert(kmaps, curr_map);
/* kmaps already got it */
map__put(curr_map);
dsos__add(&map->groups->machine->dsos, curr_dso);
dso__set_loaded(curr_dso, map->type);
} else
......
......@@ -21,7 +21,8 @@ struct callchain_param callchain_param = {
.mode = CHAIN_GRAPH_ABS,
.min_percent = 0.5,
.order = ORDER_CALLEE,
.key = CCKEY_FUNCTION
.key = CCKEY_FUNCTION,
.value = CCVAL_PERCENT,
};
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment