Commit 6a6b12e2 authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo-20161114' of...

Merge tag 'perf-core-for-mingo-20161114' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New features:

 - Allow querying and setting .perfconfig variables (Taeung Song)

 - Show branch information in callchains (predicted, TSX aborts, loop
   iteractions, etc) (Jin Yao)

Infrastructure changes:

 - Support kbuild's CFLAGS_REMOVE_ in tools/build (Jiri Olsa)

 - Plug building jvmti to the main perf Makefile (Jiri Olsa)

Documentation changes:

 - Update Intel PT documentation about context switch events (Arnaldo Carvalho de Melo)

 - Fix 'perf record --call-graph dwarf' help/config in builds not linking
   with a unwind library, mentioning that is a possible record option (Rabin Vincent)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 91a79e5f fef51ecd
......@@ -89,7 +89,9 @@ if_changed = $(if $(strip $(any-prereq) $(arg-check)), \
# - per target C flags
# - per object C flags
# - BUILD_STR macro to allow '-D"$(variable)"' constructs
c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj))
c_flags_1 = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj))
c_flags_2 = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(c_flags_1))
c_flags = $(filter-out $(CFLAGS_REMOVE_$(obj)), $(c_flags_2))
cxx_flags = -Wp,-MD,$(depfile),-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXXFLAGS_$(basetarget).o) $(CXXFLAGS_$(obj))
###
......
......@@ -135,8 +135,10 @@ CFLAGS
It's possible to alter the standard object C flags in the following way:
CFLAGS_perf.o += '...' - alters CFLAGS for perf.o object
CFLAGS_gtk += '...' - alters CFLAGS for gtk build object
CFLAGS_perf.o += '...' - adds CFLAGS for perf.o object
CFLAGS_gtk += '...' - adds CFLAGS for gtk build object
CFLAGS_REMOVE_perf.o += '...' - removes CFLAGS for perf.o object
CFLAGS_REMOVE_gtk += '...' - removes CFLAGS for gtk build object
This C flags changes has the scope of the Build makefile they are defined in.
......
......@@ -47,7 +47,8 @@ FILES= \
test-bpf.bin \
test-get_cpuid.bin \
test-sdt.bin \
test-cxx.bin
test-cxx.bin \
test-jvmti.bin
FILES := $(addprefix $(OUTPUT),$(FILES))
......@@ -225,6 +226,9 @@ $(OUTPUT)test-sdt.bin:
$(OUTPUT)test-cxx.bin:
$(BUILDXX) -std=gnu++11
$(OUTPUT)test-jvmti.bin:
$(BUILD)
-include $(OUTPUT)*.d
###############################
......
#include <jvmti.h>
#include <jvmticmlr.h>
int main(void)
{
JavaVM jvm __attribute__((unused));
jvmtiEventCallbacks cb __attribute__((unused));
jvmtiCapabilities caps __attribute__((unused));
jvmtiJlocationFormat format __attribute__((unused));
jvmtiEnv jvmti __attribute__((unused));
return 0;
}
......@@ -550,6 +550,18 @@ Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users
have memory limits imposed upon them. That affects what buffer sizes they can
have as outlined above.
The v4.2 kernel introduced support for a context switch metadata event,
PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes
are scheduled out and in, just not by whom, which is left for the
PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context,
which in turn requires CAP_SYS_ADMIN.
Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context
switches") commit, that introduces these metadata events for further info.
When working with kernels < v4.2, the following considerations must be taken,
as the sched:sched_switch tracepoints will be used to receive such information:
Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are
not permitted to use tracepoints which means there is insufficient side-band
information to decode Intel PT in per-cpu mode, and potentially workload-only
......@@ -564,8 +576,11 @@ sched_switch tracepoint
-----------------------
The sched_switch tracepoint is used to provide side-band data for Intel PT
decoding. sched_switch events are automatically added. e.g. the second event
shown below
decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't
available.
The sched_switch events are automatically added. e.g. the second event shown
below:
$ perf record -vv -e intel_pt//u uname
------------------------------------------------------------
......
......@@ -8,6 +8,8 @@ perf-config - Get and set variables in a configuration file.
SYNOPSIS
--------
[verse]
'perf config' [<file-option>] [section.name[=value] ...]
or
'perf config' [<file-option>] -l | --list
DESCRIPTION
......@@ -118,6 +120,39 @@ Given a $HOME/.perfconfig like this:
children = true
group = true
You can hide source code of annotate feature setting the config to false with
% perf config annotate.hide_src_code=true
If you want to add or modify several config items, you can do like
% perf config ui.show-headers=false kmem.default=slab
To modify the sort order of report functionality in user config file(i.e. `~/.perfconfig`), do
% perf config --user report sort-order=srcline
To change colors of selected line to other foreground and background colors
in system config file (i.e. `$(sysconf)/perfconfig`), do
% perf config --system colors.selected=yellow,green
To query the record mode of call graph, do
% perf config call-graph.record-mode
If you want to know multiple config key/value pairs, you can do like
% perf config report.queue-size call-graph.order report.children
To query the config value of sort order of call graph in user config file (i.e. `~/.perfconfig`), do
% perf config --user call-graph.sort-order
To query the config value of buildid directory in system config file (i.e. `$(sysconf)/perfconfig`), do
% perf config --system buildid.dir
Variables
~~~~~~~~~
......
......@@ -758,6 +758,31 @@ ifndef NO_AUXTRACE
endif
endif
ifndef NO_JVMTI
ifneq (,$(wildcard /usr/sbin/update-java-alternatives))
JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}')
else
ifneq (,$(wildcard /usr/sbin/alternatives))
JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
endif
endif
ifndef JDIR
$(warning No alternatives command found, you need to set JDIR= to point to the root of your Java directory)
NO_JVMTI := 1
endif
endif
ifndef NO_JVMTI
FEATURE_CHECK_CFLAGS-jvmti := -I$(JDIR)/include -I$(JDIR)/include/linux
$(call feature_check,jvmti)
ifeq ($(feature-jvmti), 1)
$(call detected_var,JDIR)
else
$(warning No openjdk development package found, please install JDK package)
NO_JVMTI := 1
endif
endif
# Among the variables below, these:
# perfexecdir
# template_dir
......@@ -850,6 +875,7 @@ ifeq ($(VF),1)
$(call print_var,sysconfdir)
$(call print_var,LIBUNWIND_DIR)
$(call print_var,LIBDW_DIR)
$(call print_var,JDIR)
ifeq ($(dwarf-post-unwind),1)
$(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text))
......
......@@ -86,6 +86,8 @@ include ../scripts/utilities.mak
#
# Define FEATURES_DUMP to provide features detection dump file
# and bypass the feature detection
#
# Define NO_JVMTI if you do not want jvmti agent built
# As per kernel Makefile, avoid funny character set dependencies
unexport LC_ALL
......@@ -283,6 +285,12 @@ ifndef NO_PERF_READ_VDSOX32
PROGRAMS += $(OUTPUT)perf-read-vdsox32
endif
LIBJVMTI = libperf-jvmti.so
ifndef NO_JVMTI
PROGRAMS += $(OUTPUT)$(LIBJVMTI)
endif
# what 'all' will build and 'install' will install, in perfexecdir
ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
......@@ -551,6 +559,16 @@ $(OUTPUT)perf-read-vdsox32: perf-read-vdso.c util/find-vdso-map.c
$(QUIET_CC)$(CC) -mx32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c
endif
ifndef NO_JVMTI
LIBJVMTI_IN := $(OUTPUT)jvmti/jvmti-in.o
$(LIBJVMTI_IN): FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=jvmti obj=jvmti
$(OUTPUT)$(LIBJVMTI): $(LIBJVMTI_IN)
$(QUIET_LINK)$(CC) -shared -Wl,-soname -Wl,$(LIBJVMTI) -o $@ $< -lelf -lrt
endif
$(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
LIBPERF_IN := $(OUTPUT)libperf-in.o
......@@ -687,6 +705,10 @@ endif
ifndef NO_PERF_READ_VDSOX32
$(call QUIET_INSTALL, perf-read-vdsox32) \
$(INSTALL) $(OUTPUT)perf-read-vdsox32 '$(DESTDIR_SQ)$(bindir_SQ)';
endif
ifndef NO_JVMTI
$(call QUIET_INSTALL, $(LIBJVMTI)) \
$(INSTALL) $(OUTPUT)$(LIBJVMTI) '$(DESTDIR_SQ)$(libdir_SQ)';
endif
$(call QUIET_INSTALL, libexec) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
......@@ -754,7 +776,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)$(RM) $(OUTPUT).config-detected
$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents
$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so
$(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
$(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \
$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
......
......@@ -17,7 +17,7 @@
static bool use_system_config, use_user_config;
static const char * const config_usage[] = {
"perf config [<file-option>] [options]",
"perf config [<file-option>] [options] [section.name[=value] ...]",
NULL
};
......@@ -33,6 +33,73 @@ static struct option config_options[] = {
OPT_END()
};
static int set_config(struct perf_config_set *set, const char *file_name,
const char *var, const char *value)
{
struct perf_config_section *section = NULL;
struct perf_config_item *item = NULL;
const char *first_line = "# this file is auto-generated.";
FILE *fp;
if (set == NULL)
return -1;
fp = fopen(file_name, "w");
if (!fp)
return -1;
perf_config_set__collect(set, file_name, var, value);
fprintf(fp, "%s\n", first_line);
/* overwrite configvariables */
perf_config_items__for_each_entry(&set->sections, section) {
if (!use_system_config && section->from_system_config)
continue;
fprintf(fp, "[%s]\n", section->name);
perf_config_items__for_each_entry(&section->items, item) {
if (!use_system_config && section->from_system_config)
continue;
if (item->value)
fprintf(fp, "\t%s = %s\n",
item->name, item->value);
}
}
fclose(fp);
return 0;
}
static int show_spec_config(struct perf_config_set *set, const char *var)
{
struct perf_config_section *section;
struct perf_config_item *item;
if (set == NULL)
return -1;
perf_config_items__for_each_entry(&set->sections, section) {
if (prefixcmp(var, section->name) != 0)
continue;
perf_config_items__for_each_entry(&section->items, item) {
const char *name = var + strlen(section->name) + 1;
if (strcmp(name, item->name) == 0) {
char *value = item->value;
if (value) {
printf("%s=%s\n", var, value);
return 0;
}
}
}
}
return 0;
}
static int show_config(struct perf_config_set *set)
{
struct perf_config_section *section;
......@@ -52,9 +119,44 @@ static int show_config(struct perf_config_set *set)
return 0;
}
static int parse_config_arg(char *arg, char **var, char **value)
{
const char *last_dot = strchr(arg, '.');
/*
* Since "var" actually contains the section name and the real
* config variable name separated by a dot, we have to know where the dot is.
*/
if (last_dot == NULL || last_dot == arg) {
pr_err("The config variable does not contain a section name: %s\n", arg);
return -1;
}
if (!last_dot[1]) {
pr_err("The config variable does not contain a variable name: %s\n", arg);
return -1;
}
*value = strchr(arg, '=');
if (*value == NULL)
*var = arg;
else if (!strcmp(*value, "=")) {
pr_err("The config variable does not contain a value: %s\n", arg);
return -1;
} else {
*value = *value + 1; /* excluding a first character '=' */
*var = strsep(&arg, "=");
if (*var[0] == '\0') {
pr_err("invalid config variable: %s\n", arg);
return -1;
}
}
return 0;
}
int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
{
int ret = 0;
int i, ret = 0;
struct perf_config_set *set;
char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
......@@ -100,7 +202,36 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
}
break;
default:
usage_with_options(config_usage, config_options);
if (argc) {
for (i = 0; argv[i]; i++) {
char *var, *value;
char *arg = strdup(argv[i]);
if (!arg) {
pr_err("%s: strdup failed\n", __func__);
ret = -1;
break;
}
if (parse_config_arg(arg, &var, &value) < 0) {
free(arg);
ret = -1;
break;
}
if (value == NULL)
ret = show_spec_config(set, var);
else {
const char *config_filename = config_exclusive_filename;
if (!config_exclusive_filename)
config_filename = user_config;
ret = set_config(set, config_filename, var, value);
}
free(arg);
}
} else
usage_with_options(config_usage, config_options);
}
perf_config_set__delete(set);
......
......@@ -911,6 +911,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
if (itrace_synth_opts.last_branch)
has_br_stack = true;
if (has_br_stack && branch_call_mode)
symbol_conf.show_branchflag_count = true;
/*
* Branch mode is a tristate:
* -1 means default, so decide based on the file having branch data.
......
jvmti-y += libjvmti.o
jvmti-y += jvmti_agent.o
CFLAGS_jvmti = -fPIC -DPIC -I$(JDIR)/include -I$(JDIR)/include/linux
CFLAGS_REMOVE_jvmti = -Wmissing-declarations
CFLAGS_REMOVE_jvmti += -Wstrict-prototypes
CFLAGS_REMOVE_jvmti += -Wextra
CFLAGS_REMOVE_jvmti += -Wwrite-strings
ARCH=$(shell uname -m)
ifeq ($(ARCH), x86_64)
JARCH=amd64
endif
ifeq ($(ARCH), armv7l)
JARCH=armhf
endif
ifeq ($(ARCH), armv6l)
JARCH=armhf
endif
ifeq ($(ARCH), aarch64)
JARCH=aarch64
endif
ifeq ($(ARCH), ppc64)
JARCH=powerpc
endif
ifeq ($(ARCH), ppc64le)
JARCH=powerpc
endif
DESTDIR=/usr/local
VERSION=1
REVISION=0
AGE=0
LN=ln -sf
RM=rm
SLIBJVMTI=libjvmti.so.$(VERSION).$(REVISION).$(AGE)
VLIBJVMTI=libjvmti.so.$(VERSION)
SLDFLAGS=-shared -Wl,-soname -Wl,$(VLIBJVMTI)
SOLIBEXT=so
# The following works at least on fedora 23, you may need the next
# line for other distros.
ifneq (,$(wildcard /usr/sbin/update-java-alternatives))
JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}')
else
ifneq (,$(wildcard /usr/sbin/alternatives))
JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
endif
endif
ifndef JDIR
$(error Could not find alternatives command, you need to set JDIR= to point to the root of your Java directory)
else
ifeq (,$(wildcard $(JDIR)/include/jvmti.h))
$(error the openjdk development package appears to me missing, install and try again)
endif
endif
$(info Using Java from $(JDIR))
# -lrt required in 32-bit mode for clock_gettime()
LIBS=-lelf -lrt
INCDIR=-I $(JDIR)/include -I $(JDIR)/include/linux
TARGETS=$(SLIBJVMTI)
SRCS=libjvmti.c jvmti_agent.c
OBJS=$(SRCS:.c=.o)
SOBJS=$(OBJS:.o=.lo)
OPT=-O2 -g -Werror -Wall
CFLAGS=$(INCDIR) $(OPT)
all: $(TARGETS)
.c.o:
$(CC) $(CFLAGS) -c $*.c
.c.lo:
$(CC) -fPIC -DPIC $(CFLAGS) -c $*.c -o $*.lo
$(OBJS) $(SOBJS): Makefile jvmti_agent.h ../util/jitdump.h
$(SLIBJVMTI): $(SOBJS)
$(CC) $(CFLAGS) $(SLDFLAGS) -o $@ $(SOBJS) $(LIBS)
$(LN) $@ libjvmti.$(SOLIBEXT)
clean:
$(RM) -f *.o *.so.* *.so *.lo
install:
-mkdir -p $(DESTDIR)/lib
install -m 755 $(SLIBJVMTI) $(DESTDIR)/lib/
(cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) $(VLIBJVMTI))
(cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) libjvmti.$(SOLIBEXT))
ldconfig
.SUFFIXES: .c .S .o .lo
......@@ -106,7 +106,7 @@ make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1
make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1
make_minimal += NO_LIBCRYPTO=1 NO_SDT=1
make_minimal += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1
# $(run) contains all available tests
run := make_pure
......
......@@ -738,6 +738,7 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
struct callchain_print_arg *arg)
{
char bf[1024], *alloc_str;
char buf[64], *alloc_str2;
const char *str;
if (arg->row_offset != 0) {
......@@ -746,12 +747,26 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
}
alloc_str = NULL;
alloc_str2 = NULL;
str = callchain_list__sym_name(chain, bf, sizeof(bf),
browser->show_dso);
if (need_percent) {
char buf[64];
if (symbol_conf.show_branchflag_count) {
if (need_percent)
callchain_list_counts__printf_value(node, chain, NULL,
buf, sizeof(buf));
else
callchain_list_counts__printf_value(NULL, chain, NULL,
buf, sizeof(buf));
if (asprintf(&alloc_str2, "%s%s", str, buf) < 0)
str = "Not enough memory!";
else
str = alloc_str2;
}
if (need_percent) {
callchain_node__scnprintf_value(node, buf, sizeof(buf),
total);
......@@ -764,6 +779,7 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
print(browser, chain, str, offset, row, arg);
free(alloc_str);
free(alloc_str2);
return 1;
}
......
......@@ -41,7 +41,9 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
{
int i;
size_t ret = 0;
char bf[1024];
char bf[1024], *alloc_str = NULL;
char buf[64];
const char *str;
ret += callchain__fprintf_left_margin(fp, left_margin);
for (i = 0; i < depth; i++) {
......@@ -56,8 +58,26 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
} else
ret += fprintf(fp, "%s", " ");
}
fputs(callchain_list__sym_name(chain, bf, sizeof(bf), false), fp);
str = callchain_list__sym_name(chain, bf, sizeof(bf), false);
if (symbol_conf.show_branchflag_count) {
if (!period)
callchain_list_counts__printf_value(node, chain, NULL,
buf, sizeof(buf));
else
callchain_list_counts__printf_value(NULL, chain, NULL,
buf, sizeof(buf));
if (asprintf(&alloc_str, "%s%s", str, buf) < 0)
str = "Not enough memory!";
else
str = alloc_str;
}
fputs(str, fp);
fputc('\n', fp);
free(alloc_str);
return ret;
}
......@@ -219,8 +239,15 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
} else
ret += callchain__fprintf_left_margin(fp, left_margin);
ret += fprintf(fp, "%s\n", callchain_list__sym_name(chain, bf, sizeof(bf),
false));
ret += fprintf(fp, "%s",
callchain_list__sym_name(chain, bf,
sizeof(bf),
false));
if (symbol_conf.show_branchflag_count)
ret += callchain_list_counts__printf_value(
NULL, chain, fp, NULL, 0);
ret += fprintf(fp, "\n");
if (++entries_printed == callchain_param.print_limit)
break;
......
......@@ -193,7 +193,6 @@ int perf_callchain_config(const char *var, const char *value)
if (!strcmp(var, "record-mode"))
return parse_callchain_record_opt(value, &callchain_param);
#ifdef HAVE_DWARF_UNWIND_SUPPORT
if (!strcmp(var, "dump-size")) {
unsigned long size = 0;
int ret;
......@@ -203,7 +202,6 @@ int perf_callchain_config(const char *var, const char *value)
return ret;
}
#endif
if (!strcmp(var, "print-type"))
return parse_callchain_mode(value);
if (!strcmp(var, "order"))
......@@ -440,6 +438,21 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
call->ip = cursor_node->ip;
call->ms.sym = cursor_node->sym;
call->ms.map = cursor_node->map;
if (cursor_node->branch) {
call->branch_count = 1;
if (cursor_node->branch_flags.predicted)
call->predicted_count = 1;
if (cursor_node->branch_flags.abort)
call->abort_count = 1;
call->cycles_count = cursor_node->branch_flags.cycles;
call->iter_count = cursor_node->nr_loop_iter;
call->samples_count = cursor_node->samples;
}
list_add_tail(&call->list, &node->val);
callchain_cursor_advance(cursor);
......@@ -499,8 +512,23 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
right = node->ip;
}
if (left == right)
if (left == right) {
if (node->branch) {
cnode->branch_count++;
if (node->branch_flags.predicted)
cnode->predicted_count++;
if (node->branch_flags.abort)
cnode->abort_count++;
cnode->cycles_count += node->branch_flags.cycles;
cnode->iter_count += node->nr_loop_iter;
cnode->samples_count += node->samples;
}
return MATCH_EQ;
}
return left > right ? MATCH_GT : MATCH_LT;
}
......@@ -730,7 +758,8 @@ merge_chain_branch(struct callchain_cursor *cursor,
list_for_each_entry_safe(list, next_list, &src->val, list) {
callchain_cursor_append(cursor, list->ip,
list->ms.map, list->ms.sym);
list->ms.map, list->ms.sym,
false, NULL, 0, 0);
list_del(&list->list);
free(list);
}
......@@ -767,7 +796,9 @@ int callchain_merge(struct callchain_cursor *cursor,
}
int callchain_cursor_append(struct callchain_cursor *cursor,
u64 ip, struct map *map, struct symbol *sym)
u64 ip, struct map *map, struct symbol *sym,
bool branch, struct branch_flags *flags,
int nr_loop_iter, int samples)
{
struct callchain_cursor_node *node = *cursor->last;
......@@ -782,6 +813,13 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
node->ip = ip;
node->map = map;
node->sym = sym;
node->branch = branch;
node->nr_loop_iter = nr_loop_iter;
node->samples = samples;
if (flags)
memcpy(&node->branch_flags, flags,
sizeof(struct branch_flags));
cursor->nr++;
......@@ -939,6 +977,163 @@ int callchain_node__fprintf_value(struct callchain_node *node,
return 0;
}
static void callchain_counts_value(struct callchain_node *node,
u64 *branch_count, u64 *predicted_count,
u64 *abort_count, u64 *cycles_count)
{
struct callchain_list *clist;
list_for_each_entry(clist, &node->val, list) {
if (branch_count)
*branch_count += clist->branch_count;
if (predicted_count)
*predicted_count += clist->predicted_count;
if (abort_count)
*abort_count += clist->abort_count;
if (cycles_count)
*cycles_count += clist->cycles_count;
}
}
static int callchain_node_branch_counts_cumul(struct callchain_node *node,
u64 *branch_count,
u64 *predicted_count,
u64 *abort_count,
u64 *cycles_count)
{
struct callchain_node *child;
struct rb_node *n;
n = rb_first(&node->rb_root_in);
while (n) {
child = rb_entry(n, struct callchain_node, rb_node_in);
n = rb_next(n);
callchain_node_branch_counts_cumul(child, branch_count,
predicted_count,
abort_count,
cycles_count);
callchain_counts_value(child, branch_count,
predicted_count, abort_count,
cycles_count);
}
return 0;
}
int callchain_branch_counts(struct callchain_root *root,
u64 *branch_count, u64 *predicted_count,
u64 *abort_count, u64 *cycles_count)
{
if (branch_count)
*branch_count = 0;
if (predicted_count)
*predicted_count = 0;
if (abort_count)
*abort_count = 0;
if (cycles_count)
*cycles_count = 0;
return callchain_node_branch_counts_cumul(&root->node,
branch_count,
predicted_count,
abort_count,
cycles_count);
}
static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
u64 branch_count, u64 predicted_count,
u64 abort_count, u64 cycles_count,
u64 iter_count, u64 samples_count)
{
double predicted_percent = 0.0;
const char *null_str = "";
char iter_str[32];
char *str;
u64 cycles = 0;
if (branch_count == 0) {
if (fp)
return fprintf(fp, " (calltrace)");
return scnprintf(bf, bfsize, " (calltrace)");
}
if (iter_count && samples_count) {
scnprintf(iter_str, sizeof(iter_str),
", iterations:%" PRId64 "",
iter_count / samples_count);
str = iter_str;
} else
str = (char *)null_str;
predicted_percent = predicted_count * 100.0 / branch_count;
cycles = cycles_count / branch_count;
if ((predicted_percent >= 100.0) && (abort_count == 0)) {
if (fp)
return fprintf(fp, " (cycles:%" PRId64 "%s)",
cycles, str);
return scnprintf(bf, bfsize, " (cycles:%" PRId64 "%s)",
cycles, str);
}
if ((predicted_percent < 100.0) && (abort_count == 0)) {
if (fp)
return fprintf(fp,
" (predicted:%.1f%%, cycles:%" PRId64 "%s)",
predicted_percent, cycles, str);
return scnprintf(bf, bfsize,
" (predicted:%.1f%%, cycles:%" PRId64 "%s)",
predicted_percent, cycles, str);
}
if (fp)
return fprintf(fp,
" (predicted:%.1f%%, abort:%" PRId64 ", cycles:%" PRId64 "%s)",
predicted_percent, abort_count, cycles, str);
return scnprintf(bf, bfsize,
" (predicted:%.1f%%, abort:%" PRId64 ", cycles:%" PRId64 "%s)",
predicted_percent, abort_count, cycles, str);
}
int callchain_list_counts__printf_value(struct callchain_node *node,
struct callchain_list *clist,
FILE *fp, char *bf, int bfsize)
{
u64 branch_count, predicted_count;
u64 abort_count, cycles_count;
u64 iter_count = 0, samples_count = 0;
branch_count = clist->branch_count;
predicted_count = clist->predicted_count;
abort_count = clist->abort_count;
cycles_count = clist->cycles_count;
if (node) {
struct callchain_list *call;
list_for_each_entry(call, &node->val, list) {
iter_count += call->iter_count;
samples_count += call->samples_count;
}
}
return callchain_counts_printf(fp, bf, bfsize, branch_count,
predicted_count, abort_count,
cycles_count, iter_count, samples_count);
}
static void free_callchain_node(struct callchain_node *node)
{
struct callchain_list *list, *tmp;
......
......@@ -11,11 +11,7 @@
#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n"
#ifdef HAVE_DWARF_UNWIND_SUPPORT
# define RECORD_MODE_HELP HELP_PAD "record_mode:\tcall graph recording mode (fp|dwarf|lbr)\n"
#else
# define RECORD_MODE_HELP HELP_PAD "record_mode:\tcall graph recording mode (fp|lbr)\n"
#endif
#define RECORD_SIZE_HELP \
HELP_PAD "record_size:\tif record_mode is 'dwarf', max size of stack recording (<bytes>)\n" \
......@@ -115,6 +111,12 @@ struct callchain_list {
bool unfolded;
bool has_children;
};
u64 branch_count;
u64 predicted_count;
u64 abort_count;
u64 cycles_count;
u64 iter_count;
u64 samples_count;
char *srcline;
struct list_head list;
};
......@@ -129,6 +131,10 @@ struct callchain_cursor_node {
u64 ip;
struct map *map;
struct symbol *sym;
bool branch;
struct branch_flags branch_flags;
int nr_loop_iter;
int samples;
struct callchain_cursor_node *next;
};
......@@ -183,7 +189,9 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
}
int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
struct map *map, struct symbol *sym);
struct map *map, struct symbol *sym,
bool branch, struct branch_flags *flags,
int nr_loop_iter, int samples);
/* Close a cursor writing session. Initialize for the reader */
static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
......@@ -261,8 +269,16 @@ char *callchain_node__scnprintf_value(struct callchain_node *node,
int callchain_node__fprintf_value(struct callchain_node *node,
FILE *fp, u64 total);
int callchain_list_counts__printf_value(struct callchain_node *node,
struct callchain_list *clist,
FILE *fp, char *bf, int bfsize);
void free_callchain(struct callchain_root *root);
void decay_callchain(struct callchain_root *root);
int callchain_node__make_parent_list(struct callchain_node *node);
int callchain_branch_counts(struct callchain_root *root,
u64 *branch_count, u64 *predicted_count,
u64 *abort_count, u64 *cycles_count);
#endif /* __PERF_CALLCHAIN_H */
......@@ -594,6 +594,19 @@ static int collect_config(const char *var, const char *value,
goto out_free;
}
/* perf_config_set can contain both user and system config items.
* So we should know where each value is from.
* The classification would be needed when a particular config file
* is overwrited by setting feature i.e. set_config().
*/
if (strcmp(config_file_name, perf_etc_perfconfig()) == 0) {
section->from_system_config = true;
item->from_system_config = true;
} else {
section->from_system_config = false;
item->from_system_config = false;
}
ret = set_value(item, value);
return ret;
......@@ -602,6 +615,13 @@ static int collect_config(const char *var, const char *value,
return -1;
}
int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
const char *var, const char *value)
{
config_file_name = file_name;
return collect_config(var, value, set);
}
static int perf_config_set__init(struct perf_config_set *set)
{
int ret = -1;
......
......@@ -7,12 +7,14 @@
struct perf_config_item {
char *name;
char *value;
bool from_system_config;
struct list_head node;
};
struct perf_config_section {
char *name;
struct list_head items;
bool from_system_config;
struct list_head node;
};
......@@ -33,6 +35,8 @@ const char *perf_etc_perfconfig(void);
struct perf_config_set *perf_config_set__new(void);
void perf_config_set__delete(struct perf_config_set *set);
int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
const char *var, const char *value);
void perf_config__init(void);
void perf_config__exit(void);
void perf_config__refresh(void);
......
......@@ -1616,7 +1616,11 @@ static int add_callchain_ip(struct thread *thread,
struct symbol **parent,
struct addr_location *root_al,
u8 *cpumode,
u64 ip)
u64 ip,
bool branch,
struct branch_flags *flags,
int nr_loop_iter,
int samples)
{
struct addr_location al;
......@@ -1668,7 +1672,8 @@ static int add_callchain_ip(struct thread *thread,
if (symbol_conf.hide_unresolved && al.sym == NULL)
return 0;
return callchain_cursor_append(cursor, al.addr, al.map, al.sym);
return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
branch, flags, nr_loop_iter, samples);
}
struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
......@@ -1757,7 +1762,9 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
/* LBR only affects the user callchain */
if (i != chain_nr) {
struct branch_stack *lbr_stack = sample->branch_stack;
int lbr_nr = lbr_stack->nr, j;
int lbr_nr = lbr_stack->nr, j, k;
bool branch;
struct branch_flags *flags;
/*
* LBR callstack can only get user call chain.
* The mix_chain_nr is kernel call chain
......@@ -1772,23 +1779,41 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
for (j = 0; j < mix_chain_nr; j++) {
int err;
branch = false;
flags = NULL;
if (callchain_param.order == ORDER_CALLEE) {
if (j < i + 1)
ip = chain->ips[j];
else if (j > i + 1)
ip = lbr_stack->entries[j - i - 2].from;
else
else if (j > i + 1) {
k = j - i - 2;
ip = lbr_stack->entries[k].from;
branch = true;
flags = &lbr_stack->entries[k].flags;
} else {
ip = lbr_stack->entries[0].to;
branch = true;
flags = &lbr_stack->entries[0].flags;
}
} else {
if (j < lbr_nr)
ip = lbr_stack->entries[lbr_nr - j - 1].from;
if (j < lbr_nr) {
k = lbr_nr - j - 1;
ip = lbr_stack->entries[k].from;
branch = true;
flags = &lbr_stack->entries[k].flags;
}
else if (j > lbr_nr)
ip = chain->ips[i + 1 - (j - lbr_nr)];
else
else {
ip = lbr_stack->entries[0].to;
branch = true;
flags = &lbr_stack->entries[0].flags;
}
}
err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip);
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
branch, flags, 0, 0);
if (err)
return (err < 0) ? err : 0;
}
......@@ -1813,6 +1838,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
int i, j, err, nr_entries;
int skip_idx = -1;
int first_call = 0;
int nr_loop_iter;
if (perf_evsel__has_branch_callstack(evsel)) {
err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
......@@ -1868,14 +1894,37 @@ static int thread__resolve_callchain_sample(struct thread *thread,
be[i] = branch->entries[branch->nr - i - 1];
}
nr_loop_iter = nr;
nr = remove_loops(be, nr);
/*
* Get the number of iterations.
* It's only approximation, but good enough in practice.
*/
if (nr_loop_iter > nr)
nr_loop_iter = nr_loop_iter - nr + 1;
else
nr_loop_iter = 0;
for (i = 0; i < nr; i++) {
err = add_callchain_ip(thread, cursor, parent, root_al,
NULL, be[i].to);
if (i == nr - 1)
err = add_callchain_ip(thread, cursor, parent,
root_al,
NULL, be[i].to,
true, &be[i].flags,
nr_loop_iter, 1);
else
err = add_callchain_ip(thread, cursor, parent,
root_al,
NULL, be[i].to,
true, &be[i].flags,
0, 0);
if (!err)
err = add_callchain_ip(thread, cursor, parent, root_al,
NULL, be[i].from);
NULL, be[i].from,
true, &be[i].flags,
0, 0);
if (err == -EINVAL)
break;
if (err)
......@@ -1903,7 +1952,9 @@ static int thread__resolve_callchain_sample(struct thread *thread,
if (ip < PERF_CONTEXT_MAX)
++nr_entries;
err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip);
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
false, NULL, 0, 0);
if (err)
return (err < 0) ? err : 0;
......@@ -1919,7 +1970,8 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
if (symbol_conf.hide_unresolved && entry->sym == NULL)
return 0;
return callchain_cursor_append(cursor, entry->ip,
entry->map, entry->sym);
entry->map, entry->sym,
false, NULL, 0, 0);
}
static int thread__resolve_callchain_unwind(struct thread *thread,
......
......@@ -100,6 +100,7 @@ struct symbol_conf {
show_total_period,
use_callchain,
cumulate_callchain,
show_branchflag_count,
exclude_other,
show_cpu_utilization,
initialized,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment