Commit 43f4e627 authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo-5.1-20190214' of...

Merge tag 'perf-core-for-mingo-5.1-20190214' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

perf list:

  Jiri Olsa:

  - Display metric expressions for --details option

perf record:

  Alexey Budankov:

  - Implement --affinity=node|cpu option, leftover, the other patches
    in this kit were already applied.

perf trace:

  Arnaldo Carvalho de Melo:

  - Fix segfaults due to not properly handling negative file descriptor syscall args.

  - Fix segfault related to the 'waitid' 'options' prefix showing logic.

  - Filter out 'gnome-terminal*' if it is a parent of 'perf trace', to reduce the
    syscall feedback loop in system wide sessions.

BPF:

  Song Liu:

  - Silence "Couldn't synthesize bpf events" warning for EPERM.

Build system:

  Arnaldo Carvalho de Melo:

  - Fix the test-all.c feature detection fast path that was broken for
    quite a while leading to longer build times.

Event parsing:

  Jiri Olsa:

  - Fix legacy events symbol separator parsing

cs-etm:

  Mathieu Poirier:

  - Fix some error path return errors and plug some memory leaks.

  - Add proper header file for symbols

  - Remove unused structure fields.

  - Modularize auxtrace_buffer fetch, decoder and packet processing loop.

Vendor events:

  Paul Clarke:

  - Add assorted metrics for the Power8 and Power9 architectures.

perf report:

  Thomas Richter:

  - Add s390 diagnostic sampling descriptor size
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 02106f88 44ec8396
...@@ -53,10 +53,6 @@ FEATURE_TESTS_BASIC := \ ...@@ -53,10 +53,6 @@ FEATURE_TESTS_BASIC := \
libslang \ libslang \
libcrypto \ libcrypto \
libunwind \ libunwind \
libunwind-x86 \
libunwind-x86_64 \
libunwind-arm \
libunwind-aarch64 \
pthread-attr-setaffinity-np \ pthread-attr-setaffinity-np \
pthread-barrier \ pthread-barrier \
reallocarray \ reallocarray \
...@@ -70,7 +66,6 @@ FEATURE_TESTS_BASIC := \ ...@@ -70,7 +66,6 @@ FEATURE_TESTS_BASIC := \
sched_getcpu \ sched_getcpu \
sdt \ sdt \
setns \ setns \
libopencsd \
libaio libaio
# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
...@@ -84,6 +79,11 @@ FEATURE_TESTS_EXTRA := \ ...@@ -84,6 +79,11 @@ FEATURE_TESTS_EXTRA := \
libbabeltrace \ libbabeltrace \
libbfd-liberty \ libbfd-liberty \
libbfd-liberty-z \ libbfd-liberty-z \
libopencsd \
libunwind-x86 \
libunwind-x86_64 \
libunwind-arm \
libunwind-aarch64 \
libunwind-debug-frame \ libunwind-debug-frame \
libunwind-debug-frame-arm \ libunwind-debug-frame-arm \
libunwind-debug-frame-aarch64 \ libunwind-debug-frame-aarch64 \
......
...@@ -170,14 +170,14 @@ ...@@ -170,14 +170,14 @@
# include "test-setns.c" # include "test-setns.c"
#undef main #undef main
#define main main_test_libopencsd
# include "test-libopencsd.c"
#undef main
#define main main_test_libaio #define main main_test_libaio
# include "test-libaio.c" # include "test-libaio.c"
#undef main #undef main
#define main main_test_reallocarray
# include "test-reallocarray.c"
#undef main
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
main_test_libpython(); main_test_libpython();
...@@ -217,8 +217,8 @@ int main(int argc, char *argv[]) ...@@ -217,8 +217,8 @@ int main(int argc, char *argv[])
main_test_sched_getcpu(); main_test_sched_getcpu();
main_test_sdt(); main_test_sdt();
main_test_setns(); main_test_setns();
main_test_libopencsd();
main_test_libaio(); main_test_libaio();
main_test_reallocarray();
return 0; return 0;
} }
...@@ -8,3 +8,4 @@ int main(void) ...@@ -8,3 +8,4 @@ int main(void)
free(get_current_dir_name()); free(get_current_dir_name());
return 0; return 0;
} }
#undef _GNU_SOURCE
...@@ -7,3 +7,4 @@ int main(void) ...@@ -7,3 +7,4 @@ int main(void)
return 0; return 0;
} }
#undef _GNU_SOURCE
...@@ -6,3 +6,5 @@ int main(void) ...@@ -6,3 +6,5 @@ int main(void)
{ {
return !!reallocarray(NULL, 1, 1); return !!reallocarray(NULL, 1, 1);
} }
#undef _GNU_SOURCE
...@@ -8,3 +8,5 @@ int main(void) ...@@ -8,3 +8,5 @@ int main(void)
{ {
return sched_getcpu(); return sched_getcpu();
} }
#undef _GNU_SOURCE
...@@ -5,3 +5,4 @@ int main(void) ...@@ -5,3 +5,4 @@ int main(void)
{ {
return setns(0, 0); return setns(0, 0);
} }
#undef _GNU_SOURCE
...@@ -46,10 +46,10 @@ CFLAGS_builtin-trace.o += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_ ...@@ -46,10 +46,10 @@ CFLAGS_builtin-trace.o += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_
CFLAGS_builtin-report.o += -DTIPDIR="BUILD_STR($(tipdir_SQ))" CFLAGS_builtin-report.o += -DTIPDIR="BUILD_STR($(tipdir_SQ))"
CFLAGS_builtin-report.o += -DDOCDIR="BUILD_STR($(srcdir_SQ)/Documentation)" CFLAGS_builtin-report.o += -DDOCDIR="BUILD_STR($(srcdir_SQ)/Documentation)"
libperf-y += util/ perf-y += util/
libperf-y += arch/ perf-y += arch/
libperf-y += ui/ perf-y += ui/
libperf-y += scripts/ perf-y += scripts/
libperf-$(CONFIG_TRACE) += trace/beauty/ perf-$(CONFIG_TRACE) += trace/beauty/
gtk-y += ui/gtk/ gtk-y += ui/gtk/
...@@ -454,6 +454,11 @@ Use <n> control blocks in asynchronous (Posix AIO) trace writing mode (default: ...@@ -454,6 +454,11 @@ Use <n> control blocks in asynchronous (Posix AIO) trace writing mode (default:
Asynchronous mode is supported only when linking Perf tool with libc library Asynchronous mode is supported only when linking Perf tool with libc library
providing implementation for Posix AIO API. providing implementation for Posix AIO API.
--affinity=mode::
Set affinity mask of trace reading thread according to the policy defined by 'mode' value:
node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer
cpu - thread affinity mask is set to cpu of the processed mmap buffer
--all-kernel:: --all-kernel::
Configure all used events to run in kernel space. Configure all used events to run in kernel space.
......
...@@ -109,6 +109,13 @@ FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS) ...@@ -109,6 +109,13 @@ FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS) FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS) FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
FEATURE_CHECK_LDFLAGS-libunwind-arm = -lunwind -lunwind-arm
FEATURE_CHECK_LDFLAGS-libunwind-aarch64 = -lunwind -lunwind-aarch64
FEATURE_CHECK_LDFLAGS-libunwind-x86 = -lunwind -llzma -lunwind-x86
FEATURE_CHECK_LDFLAGS-libunwind-x86_64 = -lunwind -llzma -lunwind-x86_64
FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto
ifdef CSINCLUDES ifdef CSINCLUDES
LIBOPENCSD_CFLAGS := -I$(CSINCLUDES) LIBOPENCSD_CFLAGS := -I$(CSINCLUDES)
endif endif
...@@ -218,6 +225,8 @@ FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS) ...@@ -218,6 +225,8 @@ FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS) FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS)
FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS)
FEATURE_CHECK_LDFLAGS-libaio = -lrt
CFLAGS += -fno-omit-frame-pointer CFLAGS += -fno-omit-frame-pointer
CFLAGS += -ggdb3 CFLAGS += -ggdb3
CFLAGS += -funwind-tables CFLAGS += -funwind-tables
...@@ -386,7 +395,8 @@ ifeq ($(feature-setns), 1) ...@@ -386,7 +395,8 @@ ifeq ($(feature-setns), 1)
$(call detected,CONFIG_SETNS) $(call detected,CONFIG_SETNS)
endif endif
ifndef NO_CORESIGHT ifdef CORESIGHT
$(call feature_check,libopencsd)
ifeq ($(feature-libopencsd), 1) ifeq ($(feature-libopencsd), 1)
CFLAGS += -DHAVE_CSTRACE_SUPPORT $(LIBOPENCSD_CFLAGS) CFLAGS += -DHAVE_CSTRACE_SUPPORT $(LIBOPENCSD_CFLAGS)
LDFLAGS += $(LIBOPENCSD_LDFLAGS) LDFLAGS += $(LIBOPENCSD_LDFLAGS)
...@@ -482,6 +492,7 @@ endif ...@@ -482,6 +492,7 @@ endif
ifndef NO_LIBUNWIND ifndef NO_LIBUNWIND
have_libunwind := have_libunwind :=
$(call feature_check,libunwind-x86)
ifeq ($(feature-libunwind-x86), 1) ifeq ($(feature-libunwind-x86), 1)
$(call detected,CONFIG_LIBUNWIND_X86) $(call detected,CONFIG_LIBUNWIND_X86)
CFLAGS += -DHAVE_LIBUNWIND_X86_SUPPORT CFLAGS += -DHAVE_LIBUNWIND_X86_SUPPORT
...@@ -490,6 +501,7 @@ ifndef NO_LIBUNWIND ...@@ -490,6 +501,7 @@ ifndef NO_LIBUNWIND
have_libunwind = 1 have_libunwind = 1
endif endif
$(call feature_check,libunwind-aarch64)
ifeq ($(feature-libunwind-aarch64), 1) ifeq ($(feature-libunwind-aarch64), 1)
$(call detected,CONFIG_LIBUNWIND_AARCH64) $(call detected,CONFIG_LIBUNWIND_AARCH64)
CFLAGS += -DHAVE_LIBUNWIND_AARCH64_SUPPORT CFLAGS += -DHAVE_LIBUNWIND_AARCH64_SUPPORT
......
...@@ -102,7 +102,7 @@ include ../scripts/utilities.mak ...@@ -102,7 +102,7 @@ include ../scripts/utilities.mak
# When selected, pass LLVM_CONFIG=/path/to/llvm-config to `make' if # When selected, pass LLVM_CONFIG=/path/to/llvm-config to `make' if
# llvm-config is not in $PATH. # llvm-config is not in $PATH.
# #
# Define NO_CORESIGHT if you do not want support for CoreSight trace decoding. # Define CORESIGHT if you DO WANT support for CoreSight trace decoding.
# #
# Define NO_AIO if you do not want support of Posix AIO based trace # Define NO_AIO if you do not want support of Posix AIO based trace
# streaming for record mode. Currently Posix AIO trace streaming is # streaming for record mode. Currently Posix AIO trace streaming is
...@@ -344,9 +344,9 @@ endif ...@@ -344,9 +344,9 @@ endif
export PERL_PATH export PERL_PATH
LIB_FILE=$(OUTPUT)libperf.a LIBPERF_A=$(OUTPUT)libperf.a
PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD) PERFLIBS = $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD)
ifndef NO_LIBBPF ifndef NO_LIBBPF
PERFLIBS += $(LIBBPF) PERFLIBS += $(LIBBPF)
endif endif
...@@ -549,6 +549,8 @@ JEVENTS_IN := $(OUTPUT)pmu-events/jevents-in.o ...@@ -549,6 +549,8 @@ JEVENTS_IN := $(OUTPUT)pmu-events/jevents-in.o
PMU_EVENTS_IN := $(OUTPUT)pmu-events/pmu-events-in.o PMU_EVENTS_IN := $(OUTPUT)pmu-events/pmu-events-in.o
LIBPERF_IN := $(OUTPUT)libperf-in.o
export JEVENTS export JEVENTS
build := -f $(srctree)/tools/build/Makefile.build dir=. obj build := -f $(srctree)/tools/build/Makefile.build dir=. obj
...@@ -565,9 +567,12 @@ $(JEVENTS): $(JEVENTS_IN) ...@@ -565,9 +567,12 @@ $(JEVENTS): $(JEVENTS_IN)
$(PMU_EVENTS_IN): $(JEVENTS) FORCE $(PMU_EVENTS_IN): $(JEVENTS) FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events
$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) $(LIBPERF_IN): prepare FORCE
$(Q)$(MAKE) $(build)=libperf
$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBPERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \
$(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@ $(PERF_IN) $(PMU_EVENTS_IN) $(LIBPERF_IN) $(LIBS) -o $@
$(GTK_IN): FORCE $(GTK_IN): FORCE
$(Q)$(MAKE) $(build)=gtk $(Q)$(MAKE) $(build)=gtk
...@@ -683,12 +688,7 @@ endif ...@@ -683,12 +688,7 @@ endif
$(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
LIBPERF_IN := $(OUTPUT)libperf-in.o $(LIBPERF_A): $(LIBPERF_IN)
$(LIBPERF_IN): prepare FORCE
$(Q)$(MAKE) $(build)=libperf
$(LIB_FILE): $(LIBPERF_IN)
$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS)
LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(LDFLAGS)' LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(LDFLAGS)'
...@@ -910,7 +910,7 @@ python-clean: ...@@ -910,7 +910,7 @@ python-clean:
$(python-clean) $(python-clean)
clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean python-clean clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean python-clean
$(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)$(RM) $(OUTPUT).config-detected $(Q)$(RM) $(OUTPUT).config-detected
$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so
......
libperf-y += common.o perf-y += common.o
libperf-y += $(SRCARCH)/ perf-y += $(SRCARCH)/
libperf-y += util/ perf-y += util/
libperf-$(CONFIG_DWARF_UNWIND) += tests/ perf-$(CONFIG_DWARF_UNWIND) += tests/
libperf-y += regs_load.o perf-y += regs_load.o
libperf-y += dwarf-unwind.o perf-y += dwarf-unwind.o
libperf-y += vectors-page.o perf-y += vectors-page.o
libperf-y += arch-tests.o perf-y += arch-tests.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o perf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o
libperf-y += util/ perf-y += util/
libperf-$(CONFIG_DWARF_UNWIND) += tests/ perf-$(CONFIG_DWARF_UNWIND) += tests/
libperf-y += regs_load.o perf-y += regs_load.o
libperf-y += dwarf-unwind.o perf-y += dwarf-unwind.o
libperf-y += arch-tests.o perf-y += arch-tests.o
libperf-y += header.o perf-y += header.o
libperf-y += sym-handling.o perf-y += sym-handling.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \ perf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
../../arm/util/auxtrace.o \ ../../arm/util/auxtrace.o \
../../arm/util/cs-etm.o \ ../../arm/util/cs-etm.o \
arm-spe.o arm-spe.o
libperf-y += util/ perf-y += util/
libperf-y += header.o perf-y += header.o
libperf-y += util/ perf-y += util/
libperf-y += tests/ perf-y += tests/
libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o perf-$(CONFIG_DWARF_UNWIND) += regs_load.o
libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
libperf-y += arch-tests.o perf-y += arch-tests.o
libperf-y += header.o perf-y += header.o
libperf-y += sym-handling.o perf-y += sym-handling.o
libperf-y += kvm-stat.o perf-y += kvm-stat.o
libperf-y += perf_regs.o perf-y += perf_regs.o
libperf-y += mem-events.o perf-y += mem-events.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_DWARF) += skip-callchain-idx.o perf-$(CONFIG_DWARF) += skip-callchain-idx.o
libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-y += util/ perf-y += util/
libperf-y += header.o perf-y += header.o
libperf-y += kvm-stat.o perf-y += kvm-stat.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-y += machine.o perf-y += machine.o
libperf-$(CONFIG_AUXTRACE) += auxtrace.o perf-$(CONFIG_AUXTRACE) += auxtrace.o
libperf-y += util/ perf-y += util/
libperf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-y += util/ perf-y += util/
libperf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-y += util/ perf-y += util/
libperf-y += tests/ perf-y += tests/
libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o perf-$(CONFIG_DWARF_UNWIND) += regs_load.o
libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
libperf-y += arch-tests.o perf-y += arch-tests.o
libperf-y += rdpmc.o perf-y += rdpmc.o
libperf-y += perf-time-to-tsc.o perf-y += perf-time-to-tsc.o
libperf-$(CONFIG_AUXTRACE) += insn-x86.o perf-$(CONFIG_AUXTRACE) += insn-x86.o
libperf-$(CONFIG_X86_64) += bp-modify.o perf-$(CONFIG_X86_64) += bp-modify.o
libperf-y += header.o perf-y += header.o
libperf-y += tsc.o perf-y += tsc.o
libperf-y += pmu.o perf-y += pmu.o
libperf-y += kvm-stat.o perf-y += kvm-stat.o
libperf-y += perf_regs.o perf-y += perf_regs.o
libperf-y += group.o perf-y += group.o
libperf-y += machine.o perf-y += machine.o
libperf-y += event.o perf-y += event.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o perf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-$(CONFIG_AUXTRACE) += auxtrace.o perf-$(CONFIG_AUXTRACE) += auxtrace.o
libperf-$(CONFIG_AUXTRACE) += intel-pt.o perf-$(CONFIG_AUXTRACE) += intel-pt.o
libperf-$(CONFIG_AUXTRACE) += intel-bts.o perf-$(CONFIG_AUXTRACE) += intel-bts.o
libperf-y += util/ perf-y += util/
libperf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_DWARF) += dwarf-regs.o
...@@ -82,9 +82,9 @@ int cmd_list(int argc, const char **argv) ...@@ -82,9 +82,9 @@ int cmd_list(int argc, const char **argv)
else if (strcmp(argv[i], "sdt") == 0) else if (strcmp(argv[i], "sdt") == 0)
print_sdt_events(NULL, NULL, raw_dump); print_sdt_events(NULL, NULL, raw_dump);
else if (strcmp(argv[i], "metric") == 0) else if (strcmp(argv[i], "metric") == 0)
metricgroup__print(true, false, NULL, raw_dump); metricgroup__print(true, false, NULL, raw_dump, details_flag);
else if (strcmp(argv[i], "metricgroup") == 0) else if (strcmp(argv[i], "metricgroup") == 0)
metricgroup__print(false, true, NULL, raw_dump); metricgroup__print(false, true, NULL, raw_dump, details_flag);
else if ((sep = strchr(argv[i], ':')) != NULL) { else if ((sep = strchr(argv[i], ':')) != NULL) {
int sep_idx; int sep_idx;
...@@ -102,7 +102,7 @@ int cmd_list(int argc, const char **argv) ...@@ -102,7 +102,7 @@ int cmd_list(int argc, const char **argv)
s[sep_idx] = '\0'; s[sep_idx] = '\0';
print_tracepoint_events(s, s + sep_idx + 1, raw_dump); print_tracepoint_events(s, s + sep_idx + 1, raw_dump);
print_sdt_events(s, s + sep_idx + 1, raw_dump); print_sdt_events(s, s + sep_idx + 1, raw_dump);
metricgroup__print(true, true, s, raw_dump); metricgroup__print(true, true, s, raw_dump, details_flag);
free(s); free(s);
} else { } else {
if (asprintf(&s, "*%s*", argv[i]) < 0) { if (asprintf(&s, "*%s*", argv[i]) < 0) {
...@@ -119,7 +119,7 @@ int cmd_list(int argc, const char **argv) ...@@ -119,7 +119,7 @@ int cmd_list(int argc, const char **argv)
details_flag); details_flag);
print_tracepoint_events(NULL, s, raw_dump); print_tracepoint_events(NULL, s, raw_dump);
print_sdt_events(NULL, s, raw_dump); print_sdt_events(NULL, s, raw_dump);
metricgroup__print(true, true, NULL, raw_dump); metricgroup__print(true, true, NULL, raw_dump, details_flag);
free(s); free(s);
} }
} }
......
...@@ -1656,6 +1656,21 @@ static int parse_clockid(const struct option *opt, const char *str, int unset) ...@@ -1656,6 +1656,21 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
return -1; return -1;
} }
static int record__parse_affinity(const struct option *opt, const char *str, int unset)
{
struct record_opts *opts = (struct record_opts *)opt->value;
if (unset || !str)
return 0;
if (!strcasecmp(str, "node"))
opts->affinity = PERF_AFFINITY_NODE;
else if (!strcasecmp(str, "cpu"))
opts->affinity = PERF_AFFINITY_CPU;
return 0;
}
static int record__parse_mmap_pages(const struct option *opt, static int record__parse_mmap_pages(const struct option *opt,
const char *str, const char *str,
int unset __maybe_unused) int unset __maybe_unused)
...@@ -1964,6 +1979,9 @@ static struct option __record_options[] = { ...@@ -1964,6 +1979,9 @@ static struct option __record_options[] = {
&nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
record__aio_parse), record__aio_parse),
#endif #endif
OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
"Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
record__parse_affinity),
OPT_END() OPT_END()
}; };
......
...@@ -1041,6 +1041,9 @@ static const size_t trace__entry_str_size = 2048; ...@@ -1041,6 +1041,9 @@ static const size_t trace__entry_str_size = 2048;
static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd) static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd)
{ {
if (fd < 0)
return NULL;
if (fd > ttrace->files.max) { if (fd > ttrace->files.max) {
struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file)); struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file));
...@@ -2768,7 +2771,8 @@ static int trace__set_filter_loop_pids(struct trace *trace) ...@@ -2768,7 +2771,8 @@ static int trace__set_filter_loop_pids(struct trace *trace)
if (parent == NULL) if (parent == NULL)
break; break;
if (!strcmp(thread__comm_str(parent), "sshd")) { if (!strcmp(thread__comm_str(parent), "sshd") ||
strstarts(thread__comm_str(parent), "gnome-terminal")) {
pids[nr++] = parent->tid; pids[nr++] = parent->tid;
break; break;
} }
......
This source diff could not be displayed because it is too large. You can view the blob instead.
[
{
"MetricExpr": "PM_BR_MPRED_CMPL / PM_BR_PRED * 100",
"MetricGroup": "branch_prediction",
"MetricName": "br_misprediction_percent"
},
{
"BriefDescription": "Count cache branch misprediction per instruction",
"MetricExpr": "PM_BR_MPRED_CCACHE / PM_RUN_INST_CMPL * 100",
"MetricGroup": "branch_prediction",
"MetricName": "ccache_mispredict_rate_percent"
},
{
"BriefDescription": "Count cache branch misprediction",
"MetricExpr": "PM_BR_MPRED_CCACHE / PM_BR_PRED_CCACHE * 100",
"MetricGroup": "branch_prediction",
"MetricName": "ccache_misprediction_percent"
},
{
"BriefDescription": "Link stack branch misprediction",
"MetricExpr": "PM_BR_MPRED_LSTACK / PM_RUN_INST_CMPL * 100",
"MetricGroup": "branch_prediction",
"MetricName": "lstack_mispredict_rate_percent"
},
{
"BriefDescription": "Link stack branch misprediction",
"MetricExpr": "PM_BR_MPRED_LSTACK/ PM_BR_PRED_LSTACK * 100",
"MetricGroup": "branch_prediction",
"MetricName": "lstack_misprediction_percent"
},
{
"BriefDescription": "% Branches Taken",
"MetricExpr": "PM_BR_TAKEN_CMPL * 100 / PM_BRU_FIN",
"MetricGroup": "branch_prediction",
"MetricName": "taken_branches_percent"
},
{
"BriefDescription": "Completion stall due to a Branch Unit",
"MetricExpr": "PM_CMPLU_STALL_BRU/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "bru_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was routed to the crypto execution pipe and was waiting to finish",
"MetricExpr": "PM_CMPLU_STALL_CRYPTO/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "crypto_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a load that missed the L1 and was waiting for the data to return from the nest",
"MetricExpr": "PM_CMPLU_STALL_DCACHE_MISS/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dcache_miss_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a multi-cycle instruction issued to the Decimal Floating Point execution pipe and waiting to finish.",
"MetricExpr": "PM_CMPLU_STALL_DFLONG/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dflong_stall_cpi"
},
{
"BriefDescription": "Stalls due to short latency decimal floating ops.",
"MetricExpr": "(PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_DFLONG)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dfu_other_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was issued to the Decimal Floating Point execution pipe and waiting to finish.",
"MetricExpr": "PM_CMPLU_STALL_DFU/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dfu_stall_cpi"
},
{
"BriefDescription": "Completion stall by Dcache miss which resolved off node memory/cache",
"MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM - PM_CMPLU_STALL_DMISS_REMOTE)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_distant_stall_cpi"
},
{
"BriefDescription": "Completion stall by Dcache miss which resolved on chip ( excluding local L2/L3)",
"MetricExpr": "PM_CMPLU_STALL_DMISS_L21_L31/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_l21_l31_stall_cpi"
},
{
"BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 with a conflict",
"MetricExpr": "PM_CMPLU_STALL_DMISS_L2L3_CONFLICT/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_l2l3_conflict_stall_cpi"
},
{
"BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 without conflict",
"MetricExpr": "(PM_CMPLU_STALL_DMISS_L2L3 - PM_CMPLU_STALL_DMISS_L2L3_CONFLICT)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_l2l3_noconflict_stall_cpi"
},
{
"BriefDescription": "Completion stall by Dcache miss which resolved in L2/L3",
"MetricExpr": "PM_CMPLU_STALL_DMISS_L2L3/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_l2l3_stall_cpi"
},
{
"BriefDescription": "Completion stall due to cache miss resolving missed the L3",
"MetricExpr": "PM_CMPLU_STALL_DMISS_L3MISS/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_l3miss_stall_cpi"
},
{
"BriefDescription": "Completion stall due to cache miss that resolves in local memory",
"MetricExpr": "PM_CMPLU_STALL_DMISS_LMEM/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_lmem_stall_cpi"
},
{
"BriefDescription": "Completion stall by Dcache miss which resolved outside of local memory",
"MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_non_local_stall_cpi"
},
{
"BriefDescription": "Completion stall by Dcache miss which resolved from remote chip (cache or memory)",
"MetricExpr": "PM_CMPLU_STALL_DMISS_REMOTE/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_remote_stall_cpi"
},
{
"BriefDescription": "Stalls due to short latency double precision ops.",
"MetricExpr": "(PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DPLONG)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dp_other_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a scalar instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format.",
"MetricExpr": "PM_CMPLU_STALL_DP/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dp_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format.",
"MetricExpr": "PM_CMPLU_STALL_DPLONG/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dplong_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction is an EIEIO waiting for response from L2",
"MetricExpr": "PM_CMPLU_STALL_EIEIO/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "eieio_stall_cpi"
},
{
"BriefDescription": "Finish stall because the next to finish instruction suffered an ERAT miss and the EMQ was full",
"MetricExpr": "PM_CMPLU_STALL_EMQ_FULL/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "emq_full_stall_cpi"
},
{
"MetricExpr": "(PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "emq_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a load or store that suffered a translation miss",
"MetricExpr": "PM_CMPLU_STALL_ERAT_MISS/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "erat_miss_stall_cpi"
},
{
"BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete",
"MetricExpr": "PM_CMPLU_STALL_EXCEPTION/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "exception_stall_cpi"
},
{
"BriefDescription": "Completion stall due to execution units for other reasons.",
"MetricExpr": "(PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_PM - PM_CMPLU_STALL_CRYPTO - PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "exec_unit_other_stall_cpi"
},
{
"BriefDescription": "Completion stall due to execution units (FXU/VSU/CRU)",
"MetricExpr": "PM_CMPLU_STALL_EXEC_UNIT/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "exec_unit_stall_cpi"
},
{
"BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because any of the 4 threads in the same core suffered a flush, which blocks completion",
"MetricExpr": "PM_CMPLU_STALL_FLUSH_ANY_THREAD/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "flush_any_thread_stall_cpi"
},
{
"BriefDescription": "Completion stall due to a long latency scalar fixed point instruction (division, square root)",
"MetricExpr": "PM_CMPLU_STALL_FXLONG/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "fxlong_stall_cpi"
},
{
"BriefDescription": "Stalls due to short latency integer ops",
"MetricExpr": "(PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_FXLONG)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "fxu_other_stall_cpi"
},
{
"BriefDescription": "Finish stall due to a scalar fixed point or CR instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes",
"MetricExpr": "PM_CMPLU_STALL_FXU/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "fxu_stall_cpi"
},
{
"MetricExpr": "(PM_NTC_ISSUE_HELD_DARQ_FULL + PM_NTC_ISSUE_HELD_ARB + PM_NTC_ISSUE_HELD_OTHER)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "issue_hold_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a larx waiting to be satisfied",
"MetricExpr": "PM_CMPLU_STALL_LARX/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "larx_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a load that hit on an older store and it was waiting for store data",
"MetricExpr": "PM_CMPLU_STALL_LHS/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lhs_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a load that missed in the L1 and the LMQ was unable to accept this load miss request because it was full",
"MetricExpr": "PM_CMPLU_STALL_LMQ_FULL/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lmq_full_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a load instruction with all its dependencies satisfied just going through the LSU pipe to finish",
"MetricExpr": "PM_CMPLU_STALL_LOAD_FINISH/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "load_finish_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a load that was held in LSAQ because the LRQ was full",
"MetricExpr": "PM_CMPLU_STALL_LRQ_FULL/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lrq_full_stall_cpi"
},
{
"BriefDescription": "Finish stall due to LRQ miscellaneous reasons, lost arbitration to LMQ slot, bank collisions, set prediction cleanup, set prediction multihit and others",
"MetricExpr": "PM_CMPLU_STALL_LRQ_OTHER/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lrq_other_stall_cpi"
},
{
"MetricExpr": "(PM_CMPLU_STALL_LMQ_FULL + PM_CMPLU_STALL_ST_FWD + PM_CMPLU_STALL_LHS + PM_CMPLU_STALL_LSU_MFSPR + PM_CMPLU_STALL_LARX + PM_CMPLU_STALL_LRQ_OTHER)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lrq_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a load or store that was held in LSAQ because an older instruction from SRQ or LRQ won arbitration to the LSU pipe when this instruction tried to launch",
"MetricExpr": "PM_CMPLU_STALL_LSAQ_ARB/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lsaq_arb_stall_cpi"
},
{
"MetricExpr": "(PM_CMPLU_STALL_LRQ_FULL + PM_CMPLU_STALL_SRQ_FULL + PM_CMPLU_STALL_LSAQ_ARB)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lsaq_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was an LSU op (other than a load or a store) with all its dependencies met and just going through the LSU pipe to finish",
"MetricExpr": "PM_CMPLU_STALL_LSU_FIN/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lsu_fin_stall_cpi"
},
{
"BriefDescription": "Completion stall of one cycle because the LSU requested to flush the next iop in the sequence. It takes 1 cycle for the ISU to process this request before the LSU instruction is allowed to complete",
"MetricExpr": "PM_CMPLU_STALL_LSU_FLUSH_NEXT/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lsu_flush_next_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a mfspr instruction targeting an LSU SPR and it was waiting for the register data to be returned",
"MetricExpr": "PM_CMPLU_STALL_LSU_MFSPR/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lsu_mfspr_stall_cpi"
},
{
"BriefDescription": "Completion LSU stall for other reasons",
"MetricExpr": "(PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_LSU_FIN - PM_CMPLU_STALL_STORE_FINISH - PM_CMPLU_STALL_STORE_DATA - PM_CMPLU_STALL_EIEIO - PM_CMPLU_STALL_STCX - PM_CMPLU_STALL_SLB - PM_CMPLU_STALL_TEND - PM_CMPLU_STALL_PASTE - PM_CMPLU_STALL_TLBIE - PM_CMPLU_STALL_STORE_PIPE_ARB - PM_CMPLU_STALL_STORE_FIN_ARB - PM_CMPLU_STALL_LOAD_FINISH + PM_CMPLU_STALL_DCACHE_MISS - PM_CMPLU_STALL_LMQ_FULL - PM_CMPLU_STALL_ST_FWD - PM_CMPLU_STALL_LHS - PM_CMPLU_STALL_LSU_MFSPR - PM_CMPLU_STALL_LARX - PM_CMPLU_STALL_LRQ_OTHER + PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL - PM_CMPLU_STALL_LRQ_FULL - PM_CMPLU_STALL_SRQ_FULL - PM_CMPLU_STALL_LSAQ_ARB) / PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lsu_other_stall_cpi"
},
{
"BriefDescription": "Completion stall by LSU instruction",
"MetricExpr": "PM_CMPLU_STALL_LSU/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lsu_stall_cpi"
},
{
"BriefDescription": "Completion stall because the ISU is updating the register and notifying the Effective Address Table (EAT)",
"MetricExpr": "PM_CMPLU_STALL_MTFPSCR/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "mtfpscr_stall_cpi"
},
{
"BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tbegin. This is a short delay, and it includes ROT",
"MetricExpr": "PM_CMPLU_STALL_NESTED_TBEGIN/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "nested_tbegin_stall_cpi"
},
{
"BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tend and decrement the TEXASR nested level. This is a short delay",
"MetricExpr": "PM_CMPLU_STALL_NESTED_TEND/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "nested_tend_stall_cpi"
},
{
"BriefDescription": "Number of cycles the ICT has no itags assigned to this thread",
"MetricExpr": "PM_ICT_NOSLOT_CYC/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "nothing_dispatched_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was one that must finish at dispatch.",
"MetricExpr": "PM_CMPLU_STALL_NTC_DISP_FIN/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "ntc_disp_fin_stall_cpi"
},
{
"BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. This event is used to account for cycles in which work is being completed in the CPI stack",
"MetricExpr": "PM_NTC_FIN/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "ntc_fin_cpi"
},
{
"BriefDescription": "Completion stall due to ntc flush",
"MetricExpr": "PM_CMPLU_STALL_NTC_FLUSH/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "ntc_flush_stall_cpi"
},
{
"BriefDescription": "The NTC instruction is being held at dispatch because it lost arbitration onto the issue pipe to another instruction (from the same thread or a different thread)",
"MetricExpr": "PM_NTC_ISSUE_HELD_ARB/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "ntc_issue_held_arb_cpi"
},
{
"BriefDescription": "The NTC instruction is being held at dispatch because there are no slots in the DARQ for it",
"MetricExpr": "PM_NTC_ISSUE_HELD_DARQ_FULL/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "ntc_issue_held_darq_full_cpi"
},
{
"BriefDescription": "The NTC instruction is being held at dispatch during regular pipeline cycles, or because the VSU is busy with multi-cycle instructions, or because of a write-back collision with VSU",
"MetricExpr": "PM_NTC_ISSUE_HELD_OTHER/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "ntc_issue_held_other_cpi"
},
{
"BriefDescription": "Cycles unaccounted for.",
"MetricExpr": "(PM_RUN_CYC - PM_1PLUS_PPC_CMPL - PM_CMPLU_STALL_THRD - PM_CMPLU_STALL - PM_ICT_NOSLOT_CYC)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "other_cpi"
},
{
"BriefDescription": "Completion stall for other reasons",
"MetricExpr": "PM_CMPLU_STALL - PM_CMPLU_STALL_NTC_DISP_FIN - PM_CMPLU_STALL_NTC_FLUSH - PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_BRU)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "other_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a paste waiting for response from L2",
"MetricExpr": "PM_CMPLU_STALL_PASTE/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "paste_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was issued to the Permute execution pipe and waiting to finish.",
"MetricExpr": "PM_CMPLU_STALL_PM/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "pm_stall_cpi"
},
{
"BriefDescription": "Run cycles per run instruction",
"MetricExpr": "PM_RUN_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "run_cpi"
},
{
"BriefDescription": "Run_cycles",
"MetricExpr": "PM_RUN_CYC/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "run_cyc_cpi"
},
{
"MetricExpr": "(PM_CMPLU_STALL_FXU + PM_CMPLU_STALL_DP + PM_CMPLU_STALL_DFU + PM_CMPLU_STALL_PM + PM_CMPLU_STALL_CRYPTO)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "scalar_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was awaiting L2 response for an SLB",
"MetricExpr": "PM_CMPLU_STALL_SLB/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "slb_stall_cpi"
},
{
"BriefDescription": "Finish stall while waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC",
"MetricExpr": "PM_CMPLU_STALL_SPEC_FINISH/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "spec_finish_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a store that was held in LSAQ because the SRQ was full",
"MetricExpr": "PM_CMPLU_STALL_SRQ_FULL/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "srq_full_stall_cpi"
},
{
"MetricExpr": "(PM_CMPLU_STALL_STORE_DATA + PM_CMPLU_STALL_EIEIO + PM_CMPLU_STALL_STCX + PM_CMPLU_STALL_SLB + PM_CMPLU_STALL_TEND + PM_CMPLU_STALL_PASTE + PM_CMPLU_STALL_TLBIE + PM_CMPLU_STALL_STORE_PIPE_ARB + PM_CMPLU_STALL_STORE_FIN_ARB)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "srq_stall_cpi"
},
{
"BriefDescription": "Completion stall due to store forward",
"MetricExpr": "PM_CMPLU_STALL_ST_FWD/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "st_fwd_stall_cpi"
},
{
"BriefDescription": "Nothing completed and ICT not empty",
"MetricExpr": "PM_CMPLU_STALL/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a stcx waiting for response from L2",
"MetricExpr": "PM_CMPLU_STALL_STCX/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "stcx_stall_cpi"
},
{
"BriefDescription": "Finish stall because the next to finish instruction was a store waiting on data",
"MetricExpr": "PM_CMPLU_STALL_STORE_DATA/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "store_data_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a store waiting for a slot in the store finish pipe. This means the instruction is ready to finish but there are instructions ahead of it, using the finish pipe",
"MetricExpr": "PM_CMPLU_STALL_STORE_FIN_ARB/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "store_fin_arb_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a store with all its dependencies met, just waiting to go through the LSU pipe to finish",
"MetricExpr": "PM_CMPLU_STALL_STORE_FINISH/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "store_finish_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a store waiting for the next relaunch opportunity after an internal reject. This means the instruction is ready to relaunch and tried once but lost arbitration",
"MetricExpr": "PM_CMPLU_STALL_STORE_PIPE_ARB/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "store_pipe_arb_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a tend instruction awaiting response from L2",
"MetricExpr": "PM_CMPLU_STALL_TEND/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "tend_stall_cpi"
},
{
"BriefDescription": "Completion Stalled because the thread was blocked",
"MetricExpr": "PM_CMPLU_STALL_THRD/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "thread_block_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a tlbie waiting for response from L2",
"MetricExpr": "PM_CMPLU_STALL_TLBIE/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "tlbie_stall_cpi"
},
{
"BriefDescription": "Vector stalls due to small latency double precision ops",
"MetricExpr": "(PM_CMPLU_STALL_VDP - PM_CMPLU_STALL_VDPLONG)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "vdp_other_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a vector instruction issued to the Double Precision execution pipe and waiting to finish.",
"MetricExpr": "PM_CMPLU_STALL_VDP/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "vdp_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format.",
"MetricExpr": "PM_CMPLU_STALL_VDPLONG/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "vdplong_stall_cpi"
},
{
"MetricExpr": "(PM_CMPLU_STALL_VFXU + PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "vector_stall_cpi"
},
{
"BriefDescription": "Completion stall due to a long latency vector fixed point instruction (division, square root)",
"MetricExpr": "PM_CMPLU_STALL_VFXLONG/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "vfxlong_stall_cpi"
},
{
"BriefDescription": "Vector stalls due to small latency integer ops",
"MetricExpr": "(PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VFXLONG)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "vfxu_other_stall_cpi"
},
{
"BriefDescription": "Finish stall due to a vector fixed point instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes",
"MetricExpr": "PM_CMPLU_STALL_VFXU/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "vfxu_stall_cpi"
},
{
"BriefDescription": "% of DL1 Reloads from Distant L2 or L3 (Modified) per Inst",
"MetricExpr": "PM_DATA_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_dl2l3_mod_rate_percent"
},
{
"BriefDescription": "% of DL1 Reloads from Distant L2 or L3 (Shared) per Inst",
"MetricExpr": "PM_DATA_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_dl2l3_shr_rate_percent"
},
{
"BriefDescription": "% of DL1 Reloads from Distant Memory per Inst",
"MetricExpr": "PM_DATA_FROM_DMEM * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_dmem_rate_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L2, other core per Inst",
"MetricExpr": "PM_DATA_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_l21_mod_rate_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L2, other core per Inst",
"MetricExpr": "PM_DATA_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_l21_shr_rate_percent"
},
{
"BriefDescription": "% of DL1 reloads from L2 per Inst",
"MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_l2_miss_rate_percent"
},
{
"BriefDescription": "% of DL1 reloads from L2 per Inst",
"MetricExpr": "PM_DATA_FROM_L2 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_l2_rate_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L3 M state, other core per Inst",
"MetricExpr": "PM_DATA_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_l31_mod_rate_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L3 S tate, other core per Inst",
"MetricExpr": "PM_DATA_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_l31_shr_rate_percent"
},
{
"BriefDescription": "% of DL1 Reloads that came from the L3 and were brought into the L3 by a prefetch, per instruction completed",
"MetricExpr": "PM_DATA_FROM_L3_MEPF * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_l3_mepf_rate_percent"
},
{
"BriefDescription": "% of DL1 reloads from L3 per Inst",
"MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_l3_miss_rate_percent"
},
{
"BriefDescription": "% of DL1 Reloads from L3 per Inst",
"MetricExpr": "PM_DATA_FROM_L3 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_l3_rate_percent"
},
{
"BriefDescription": "% of DL1 Reloads from Local Memory per Inst",
"MetricExpr": "PM_DATA_FROM_LMEM * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_lmem_rate_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L3, other core per Inst",
"MetricExpr": "PM_DATA_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_rl2l3_mod_rate_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L3, other core per Inst",
"MetricExpr": "PM_DATA_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_rl2l3_shr_rate_percent"
},
{
"BriefDescription": "% of DL1 Reloads from Remote Memory per Inst",
"MetricExpr": "PM_DATA_FROM_RMEM * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_rmem_rate_percent"
},
{
"BriefDescription": "Percentage of L1 demand load misses per run instruction",
"MetricExpr": "PM_LD_MISS_L1 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "l1_ld_miss_rate_percent"
},
{
"BriefDescription": "% of DL1 misses that result in a cache reload",
"MetricExpr": "PM_L1_DCACHE_RELOAD_VALID * 100 / PM_LD_MISS_L1",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_miss_reloads_percent"
},
{
"BriefDescription": "% of DL1 dL1_Reloads from Distant L2 or L3 (Modified)",
"MetricExpr": "PM_DATA_FROM_DL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_dl2l3_mod_percent"
},
{
"BriefDescription": "% of DL1 dL1_Reloads from Distant L2 or L3 (Shared)",
"MetricExpr": "PM_DATA_FROM_DL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_dl2l3_shr_percent"
},
{
"BriefDescription": "% of DL1 dL1_Reloads from Distant Memory",
"MetricExpr": "PM_DATA_FROM_DMEM * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_dmem_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L2, other core",
"MetricExpr": "PM_DATA_FROM_L21_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_l21_mod_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L2, other core",
"MetricExpr": "PM_DATA_FROM_L21_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_l21_shr_percent"
},
{
"BriefDescription": "% of DL1 Reloads from sources beyond the local L2",
"MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_l2_miss_percent"
},
{
"BriefDescription": "% of DL1 reloads from L2",
"MetricExpr": "PM_DATA_FROM_L2 * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_l2_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L3, other core",
"MetricExpr": "PM_DATA_FROM_L31_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_l31_mod_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L3, other core",
"MetricExpr": "PM_DATA_FROM_L31_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_l31_shr_percent"
},
{
"BriefDescription": "% of DL1 Reloads that came from L3 and were brought into the L3 by a prefetch",
"MetricExpr": "PM_DATA_FROM_L3_MEPF * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_l3_mepf_percent"
},
{
"BriefDescription": "% of DL1 Reloads from sources beyond the local L3",
"MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_l3_miss_percent"
},
{
"BriefDescription": "% of DL1 Reloads from L3",
"MetricExpr": "PM_DATA_FROM_L3 * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_l3_percent"
},
{
"BriefDescription": "% of DL1 dL1_Reloads from Local Memory",
"MetricExpr": "PM_DATA_FROM_LMEM * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_lmem_percent"
},
{
"BriefDescription": "% of DL1 dL1_Reloads from Remote L2 or L3 (Modified)",
"MetricExpr": "PM_DATA_FROM_RL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_rl2l3_mod_percent"
},
{
"BriefDescription": "% of DL1 dL1_Reloads from Remote L2 or L3 (Shared)",
"MetricExpr": "PM_DATA_FROM_RL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_rl2l3_shr_percent"
},
{
"BriefDescription": "% of DL1 dL1_Reloads from Remote Memory",
"MetricExpr": "PM_DATA_FROM_RMEM * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_rmem_percent"
},
{
"BriefDescription": "estimate of dl2l3 distant MOD miss rates with measured DL2L3 MOD latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_DL2L3_MOD * PM_MRK_DATA_FROM_DL2L3_MOD_CYC / PM_MRK_DATA_FROM_DL2L3_MOD / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "dl2l3_mod_cpi_percent"
},
{
"BriefDescription": "estimate of dl2l3 distant SHR miss rates with measured DL2L3 SHR latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_DL2L3_SHR * PM_MRK_DATA_FROM_DL2L3_SHR_CYC / PM_MRK_DATA_FROM_DL2L3_SHR / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "dl2l3_shr_cpi_percent"
},
{
"BriefDescription": "estimate of distant L4 miss rates with measured DL4 latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_DL4 * PM_MRK_DATA_FROM_DL4_CYC / PM_MRK_DATA_FROM_DL4 / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "dl4_cpi_percent"
},
{
"BriefDescription": "estimate of distant memory miss rates with measured DMEM latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_DMEM * PM_MRK_DATA_FROM_DMEM_CYC / PM_MRK_DATA_FROM_DMEM / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "dmem_cpi_percent"
},
{
"BriefDescription": "estimate of dl21 MOD miss rates with measured L21 MOD latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_L21_MOD * PM_MRK_DATA_FROM_L21_MOD_CYC / PM_MRK_DATA_FROM_L21_MOD / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "l21_mod_cpi_percent"
},
{
"BriefDescription": "estimate of dl21 SHR miss rates with measured L21 SHR latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_L21_SHR * PM_MRK_DATA_FROM_L21_SHR_CYC / PM_MRK_DATA_FROM_L21_SHR / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "l21_shr_cpi_percent"
},
{
"BriefDescription": "estimate of dl2 miss rates with measured L2 latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_L2 * PM_MRK_DATA_FROM_L2_CYC / PM_MRK_DATA_FROM_L2 / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "l2_cpi_percent"
},
{
"BriefDescription": "estimate of dl31 MOD miss rates with measured L31 MOD latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_L31_MOD * PM_MRK_DATA_FROM_L31_MOD_CYC / PM_MRK_DATA_FROM_L31_MOD / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "l31_mod_cpi_percent"
},
{
"BriefDescription": "estimate of dl31 SHR miss rates with measured L31 SHR latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_L31_SHR * PM_MRK_DATA_FROM_L31_SHR_CYC / PM_MRK_DATA_FROM_L31_SHR / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "l31_shr_cpi_percent"
},
{
"BriefDescription": "estimate of dl3 miss rates with measured L3 latency as a % of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_L3 * PM_MRK_DATA_FROM_L3_CYC / PM_MRK_DATA_FROM_L3 / PM_CMPLU_STALL_DCACHE_MISS * 100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "l3_cpi_percent"
},
{
"BriefDescription": "estimate of Local memory miss rates with measured LMEM latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_LMEM * PM_MRK_DATA_FROM_LMEM_CYC / PM_MRK_DATA_FROM_LMEM / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "lmem_cpi_percent"
},
{
"BriefDescription": "estimate of dl2l3 remote MOD miss rates with measured RL2L3 MOD latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_RL2L3_MOD * PM_MRK_DATA_FROM_RL2L3_MOD_CYC / PM_MRK_DATA_FROM_RL2L3_MOD / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "rl2l3_mod_cpi_percent"
},
{
"BriefDescription": "estimate of dl2l3 shared miss rates with measured RL2L3 SHR latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_RL2L3_SHR * PM_MRK_DATA_FROM_RL2L3_SHR_CYC / PM_MRK_DATA_FROM_RL2L3_SHR / PM_CMPLU_STALL_DCACHE_MISS * 100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "rl2l3_shr_cpi_percent"
},
{
"BriefDescription": "estimate of remote L4 miss rates with measured RL4 latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_RL4 * PM_MRK_DATA_FROM_RL4_CYC / PM_MRK_DATA_FROM_RL4 / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "rl4_cpi_percent"
},
{
"BriefDescription": "estimate of remote memory miss rates with measured RMEM latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_RMEM * PM_MRK_DATA_FROM_RMEM_CYC / PM_MRK_DATA_FROM_RMEM / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "rmem_cpi_percent"
},
{
"BriefDescription": "Branch Mispredict flushes per instruction",
"MetricExpr": "PM_FLUSH_MPRED / PM_RUN_INST_CMPL * 100",
"MetricGroup": "general",
"MetricName": "br_mpred_flush_rate_percent"
},
{
"BriefDescription": "Cycles per instruction",
"MetricExpr": "PM_CYC / PM_INST_CMPL",
"MetricGroup": "general",
"MetricName": "cpi"
},
{
"BriefDescription": "GCT empty cycles",
"MetricExpr": "(PM_FLUSH_DISP / PM_RUN_INST_CMPL) * 100",
"MetricGroup": "general",
"MetricName": "disp_flush_rate_percent"
},
{
"BriefDescription": "% DTLB miss rate per inst",
"MetricExpr": "PM_DTLB_MISS / PM_RUN_INST_CMPL *100",
"MetricGroup": "general",
"MetricName": "dtlb_miss_rate_percent"
},
{
"BriefDescription": "Flush rate (%)",
"MetricExpr": "PM_FLUSH * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "general",
"MetricName": "flush_rate_percent"
},
{
"BriefDescription": "Instructions per cycles",
"MetricExpr": "PM_INST_CMPL / PM_CYC",
"MetricGroup": "general",
"MetricName": "ipc"
},
{
"BriefDescription": "% ITLB miss rate per inst",
"MetricExpr": "PM_ITLB_MISS / PM_RUN_INST_CMPL *100",
"MetricGroup": "general",
"MetricName": "itlb_miss_rate_percent"
},
{
"BriefDescription": "Percentage of L1 load misses per L1 load ref",
"MetricExpr": "PM_LD_MISS_L1 / PM_LD_REF_L1 * 100",
"MetricGroup": "general",
"MetricName": "l1_ld_miss_ratio_percent"
},
{
"BriefDescription": "Percentage of L1 store misses per run instruction",
"MetricExpr": "PM_ST_MISS_L1 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "general",
"MetricName": "l1_st_miss_rate_percent"
},
{
"BriefDescription": "Percentage of L1 store misses per L1 store ref",
"MetricExpr": "PM_ST_MISS_L1 / PM_ST_FIN * 100",
"MetricGroup": "general",
"MetricName": "l1_st_miss_ratio_percent"
},
{
"BriefDescription": "L2 Instruction Miss Rate (per instruction)(%)",
"MetricExpr": "PM_INST_FROM_L2MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "general",
"MetricName": "l2_inst_miss_rate_percent"
},
{
"BriefDescription": "L2 dmand Load Miss Rate (per run instruction)(%)",
"MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "general",
"MetricName": "l2_ld_miss_rate_percent"
},
{
"BriefDescription": "L2 PTEG Miss Rate (per run instruction)(%)",
"MetricExpr": "PM_DPTEG_FROM_L2MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "general",
"MetricName": "l2_pteg_miss_rate_percent"
},
{
"BriefDescription": "L3 Instruction Miss Rate (per instruction)(%)",
"MetricExpr": "PM_INST_FROM_L3MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "general",
"MetricName": "l3_inst_miss_rate_percent"
},
{
"BriefDescription": "L3 demand Load Miss Rate (per run instruction)(%)",
"MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "general",
"MetricName": "l3_ld_miss_rate_percent"
},
{
"BriefDescription": "L3 PTEG Miss Rate (per run instruction)(%)",
"MetricExpr": "PM_DPTEG_FROM_L3MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "general",
"MetricName": "l3_pteg_miss_rate_percent"
},
{
"BriefDescription": "Run cycles per cycle",
"MetricExpr": "PM_RUN_CYC / PM_CYC*100",
"MetricGroup": "general",
"MetricName": "run_cycles_percent"
},
{
"BriefDescription": "Instruction dispatch-to-completion ratio",
"MetricExpr": "PM_INST_DISP / PM_INST_CMPL",
"MetricGroup": "general",
"MetricName": "speculation"
},
{
"BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified) per Inst",
"MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_dl2l3_mod_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Distant L2 or L3 (Shared) per Inst",
"MetricExpr": "PM_INST_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_dl2l3_shr_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Distant L4 per Inst",
"MetricExpr": "PM_INST_FROM_DL4 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_dl4_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Distant Memory per Inst",
"MetricExpr": "PM_INST_FROM_DMEM * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_dmem_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Private L2, other core per Inst",
"MetricExpr": "PM_INST_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_l21_mod_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Private L2, other core per Inst",
"MetricExpr": "PM_INST_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_l21_shr_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from L2 per Inst",
"MetricExpr": "PM_INST_FROM_L2 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_l2_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Private L3, other core per Inst",
"MetricExpr": "PM_INST_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_l31_mod_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Private L3 other core per Inst",
"MetricExpr": "PM_INST_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_l31_shr_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from L3 per Inst",
"MetricExpr": "PM_INST_FROM_L3 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_l3_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Local L4 per Inst",
"MetricExpr": "PM_INST_FROM_LL4 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_ll4_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Local Memory per Inst",
"MetricExpr": "PM_INST_FROM_LMEM * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_lmem_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Remote L2 or L3 (Modified) per Inst",
"MetricExpr": "PM_INST_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_rl2l3_mod_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Remote L2 or L3 (Shared) per Inst",
"MetricExpr": "PM_INST_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_rl2l3_shr_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Remote L4 per Inst",
"MetricExpr": "PM_INST_FROM_RL4 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_rl4_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Remote Memory per Inst",
"MetricExpr": "PM_INST_FROM_RMEM * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_rmem_rate_percent"
},
{
"BriefDescription": "Instruction Cache Miss Rate (Per run Instruction)(%)",
"MetricExpr": "PM_L1_ICACHE_MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "l1_inst_miss_rate_percent"
},
{
"BriefDescription": "Icache Fetchs per Icache Miss",
"MetricExpr": "(PM_L1_ICACHE_MISS - PM_IC_PREF_WRITE) / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "icache_miss_reload"
},
{
"BriefDescription": "% of ICache reloads due to prefetch",
"MetricExpr": "PM_IC_PREF_WRITE * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "icache_pref_percent"
},
{
"BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified)",
"MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_dl2l3_mod_percent"
},
{
"BriefDescription": "% of ICache reloads from Distant L2 or L3 (Shared)",
"MetricExpr": "PM_INST_FROM_DL2L3_SHR * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_dl2l3_shr_percent"
},
{
"BriefDescription": "% of ICache reloads from Distant L4",
"MetricExpr": "PM_INST_FROM_DL4 * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_dl4_percent"
},
{
"BriefDescription": "% of ICache reloads from Distant Memory",
"MetricExpr": "PM_INST_FROM_DMEM * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_dmem_percent"
},
{
"BriefDescription": "% of ICache reloads from Private L2, other core",
"MetricExpr": "PM_INST_FROM_L21_MOD * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_l21_mod_percent"
},
{
"BriefDescription": "% of ICache reloads from Private L2, other core",
"MetricExpr": "PM_INST_FROM_L21_SHR * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_l21_shr_percent"
},
{
"BriefDescription": "% of ICache reloads from L2",
"MetricExpr": "PM_INST_FROM_L2 * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_l2_percent"
},
{
"BriefDescription": "% of ICache reloads from Private L3, other core",
"MetricExpr": "PM_INST_FROM_L31_MOD * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_l31_mod_percent"
},
{
"BriefDescription": "% of ICache reloads from Private L3, other core",
"MetricExpr": "PM_INST_FROM_L31_SHR * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_l31_shr_percent"
},
{
"BriefDescription": "% of ICache reloads from L3",
"MetricExpr": "PM_INST_FROM_L3 * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_l3_percent"
},
{
"BriefDescription": "% of ICache reloads from Local L4",
"MetricExpr": "PM_INST_FROM_LL4 * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_ll4_percent"
},
{
"BriefDescription": "% of ICache reloads from Local Memory",
"MetricExpr": "PM_INST_FROM_LMEM * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_lmem_percent"
},
{
"BriefDescription": "% of ICache reloads from Remote L2 or L3 (Modified)",
"MetricExpr": "PM_INST_FROM_RL2L3_MOD * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_rl2l3_mod_percent"
},
{
"BriefDescription": "% of ICache reloads from Remote L2 or L3 (Shared)",
"MetricExpr": "PM_INST_FROM_RL2L3_SHR * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_rl2l3_shr_percent"
},
{
"BriefDescription": "% of ICache reloads from Remote L4",
"MetricExpr": "PM_INST_FROM_RL4 * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_rl4_percent"
},
{
"BriefDescription": "% of ICache reloads from Remote Memory",
"MetricExpr": "PM_INST_FROM_RMEM * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_rmem_percent"
},
{
"BriefDescription": "%L2 Modified CO Cache read Utilization (4 pclks per disp attempt)",
"MetricExpr": "((PM_L2_CASTOUT_MOD/2)*4)/ PM_RUN_CYC * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_co_m_rd_util"
},
{
"BriefDescription": "L2 dcache invalidates per run inst (per core)",
"MetricExpr": "(PM_L2_DC_INV / 2) / PM_RUN_INST_CMPL * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_dc_inv_rate_percent"
},
{
"BriefDescription": "Demand load misses as a % of L2 LD dispatches (per thread)",
"MetricExpr": "PM_L1_DCACHE_RELOAD_VALID / (PM_L2_LD / 2) * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_dem_ld_disp_percent"
},
{
"BriefDescription": "L2 Icache invalidates per run inst (per core)",
"MetricExpr": "(PM_L2_IC_INV / 2) / PM_RUN_INST_CMPL * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_ic_inv_rate_percent"
},
{
"BriefDescription": "L2 Inst misses as a % of total L2 Inst dispatches (per thread)",
"MetricExpr": "PM_L2_INST_MISS / PM_L2_INST * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_inst_miss_ratio_percent"
},
{
"BriefDescription": "Average number of cycles between L2 Load hits",
"MetricExpr": "(PM_L2_LD_HIT / PM_RUN_CYC) / 2",
"MetricGroup": "l2_stats",
"MetricName": "l2_ld_hit_frequency"
},
{
"BriefDescription": "Average number of cycles between L2 Load misses",
"MetricExpr": "(PM_L2_LD_MISS / PM_RUN_CYC) / 2",
"MetricGroup": "l2_stats",
"MetricName": "l2_ld_miss_frequency"
},
{
"BriefDescription": "L2 Load misses as a % of total L2 Load dispatches (per thread)",
"MetricExpr": "PM_L2_LD_MISS / PM_L2_LD * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_ld_miss_ratio_percent"
},
{
"BriefDescription": "% L2 load disp attempts Cache read Utilization (4 pclks per disp attempt)",
"MetricExpr": "((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_ld_rd_util"
},
{
"BriefDescription": "L2 load misses that require a cache write (4 pclks per disp attempt) % of pclks",
"MetricExpr": "((( PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4)/ PM_RUN_CYC * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_ldmiss_wr_util"
},
{
"BriefDescription": "L2 local pump prediction success",
"MetricExpr": "PM_L2_LOC_GUESS_CORRECT / (PM_L2_LOC_GUESS_CORRECT + PM_L2_LOC_GUESS_WRONG) * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_local_pred_correct_percent"
},
{
"BriefDescription": "L2 COs that were in M,Me,Mu state as a % of all L2 COs",
"MetricExpr": "PM_L2_CASTOUT_MOD / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_mod_co_percent"
},
{
"BriefDescription": "% of L2 Load RC dispatch atampts that failed because of address collisions and cclass conflicts",
"MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR )/ PM_L2_RCLD_DISP * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_rc_ld_disp_addr_fail_percent"
},
{
"BriefDescription": "% of L2 Load RC dispatch attempts that failed",
"MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR + PM_L2_RCLD_DISP_FAIL_OTHER)/ PM_L2_RCLD_DISP * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_rc_ld_disp_fail_percent"
},
{
"BriefDescription": "% of L2 Store RC dispatch atampts that failed because of address collisions and cclass conflicts",
"MetricExpr": "PM_L2_RCST_DISP_FAIL_ADDR / PM_L2_RCST_DISP * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_rc_st_disp_addr_fail_percent"
},
{
"BriefDescription": "% of L2 Store RC dispatch attempts that failed",
"MetricExpr": "(PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/ PM_L2_RCST_DISP * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_rc_st_disp_fail_percent"
},
{
"BriefDescription": "L2 Cache Read Utilization (per core)",
"MetricExpr": "(((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100) + (((PM_L2_RCST_DISP/2)*4)/PM_RUN_CYC * 100) + (((PM_L2_CASTOUT_MOD/2)*4)/PM_RUN_CYC * 100)",
"MetricGroup": "l2_stats",
"MetricName": "l2_rd_util_percent"
},
{
"BriefDescription": "L2 COs that were in T,Te,Si,S state as a % of all L2 COs",
"MetricExpr": "PM_L2_CASTOUT_SHR / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_shr_co_percent"
},
{
"BriefDescription": "L2 Store misses as a % of total L2 Store dispatches (per thread)",
"MetricExpr": "PM_L2_ST_MISS / PM_L2_ST * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_st_miss_ratio_percent"
},
{
"BriefDescription": "% L2 store disp attempts Cache read Utilization (4 pclks per disp attempt)",
"MetricExpr": "((PM_L2_RCST_DISP/2)*4) / PM_RUN_CYC * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_st_rd_util"
},
{
"BriefDescription": "L2 stores that require a cache write (4 pclks per disp attempt) % of pclks",
"MetricExpr": "((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_st_wr_util"
},
{
"BriefDescription": "L2 Cache Write Utilization (per core)",
"MetricExpr": "((((PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4) / PM_RUN_CYC * 100) + (((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100)",
"MetricGroup": "l2_stats",
"MetricName": "l2_wr_util_percent"
},
{
"BriefDescription": "Average number of cycles between L3 Load hits",
"MetricExpr": "(PM_L3_LD_HIT / PM_RUN_CYC) / 2",
"MetricGroup": "l3_stats",
"MetricName": "l3_ld_hit_frequency"
},
{
"BriefDescription": "Average number of cycles between L3 Load misses",
"MetricExpr": "(PM_L3_LD_MISS / PM_RUN_CYC) / 2",
"MetricGroup": "l3_stats",
"MetricName": "l3_ld_miss_frequency"
},
{
"BriefDescription": "Average number of Write-in machines used. 1 of 8 WI machines is sampled every L3 cycle",
"MetricExpr": "(PM_L3_WI_USAGE / PM_RUN_CYC) * 8",
"MetricGroup": "l3_stats",
"MetricName": "l3_wi_usage"
},
{
"BriefDescription": "Average icache miss latency",
"MetricExpr": "PM_IC_DEMAND_CYC / PM_IC_DEMAND_REQ",
"MetricGroup": "latency",
"MetricName": "average_il1_miss_latency"
},
{
"BriefDescription": "Marked L2L3 remote Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_DL2L3_MOD_CYC/ PM_MRK_DATA_FROM_DL2L3_MOD",
"MetricGroup": "latency",
"MetricName": "dl2l3_mod_latency"
},
{
"BriefDescription": "Marked L2L3 distant Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_DL2L3_SHR_CYC/ PM_MRK_DATA_FROM_DL2L3_SHR",
"MetricGroup": "latency",
"MetricName": "dl2l3_shr_latency"
},
{
"BriefDescription": "Distant L4 average load latency",
"MetricExpr": "PM_MRK_DATA_FROM_DL4_CYC/ PM_MRK_DATA_FROM_DL4",
"MetricGroup": "latency",
"MetricName": "dl4_latency"
},
{
"BriefDescription": "Marked Dmem Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_DMEM_CYC/ PM_MRK_DATA_FROM_DMEM",
"MetricGroup": "latency",
"MetricName": "dmem_latency"
},
{
"BriefDescription": "average L1 miss latency using marked events",
"MetricExpr": "PM_MRK_LD_MISS_L1_CYC / PM_MRK_LD_MISS_L1",
"MetricGroup": "latency",
"MetricName": "estimated_dl1miss_latency"
},
{
"BriefDescription": "Marked L21 Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_L21_MOD_CYC/ PM_MRK_DATA_FROM_L21_MOD",
"MetricGroup": "latency",
"MetricName": "l21_mod_latency"
},
{
"BriefDescription": "Marked L21 Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_L21_SHR_CYC/ PM_MRK_DATA_FROM_L21_SHR",
"MetricGroup": "latency",
"MetricName": "l21_shr_latency"
},
{
"BriefDescription": "Marked L2 Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_L2_CYC/ PM_MRK_DATA_FROM_L2",
"MetricGroup": "latency",
"MetricName": "l2_latency"
},
{
"BriefDescription": "Marked L31 Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_L31_MOD_CYC/ PM_MRK_DATA_FROM_L31_MOD",
"MetricGroup": "latency",
"MetricName": "l31_mod_latency"
},
{
"BriefDescription": "Marked L31 Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_L31_SHR_CYC/ PM_MRK_DATA_FROM_L31_SHR",
"MetricGroup": "latency",
"MetricName": "l31_shr_latency"
},
{
"BriefDescription": "Marked L3 Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_L3_CYC/ PM_MRK_DATA_FROM_L3",
"MetricGroup": "latency",
"MetricName": "l3_latency"
},
{
"BriefDescription": "Local L4 average load latency",
"MetricExpr": "PM_MRK_DATA_FROM_LL4_CYC/ PM_MRK_DATA_FROM_LL4",
"MetricGroup": "latency",
"MetricName": "ll4_latency"
},
{
"BriefDescription": "Marked Lmem Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_LMEM_CYC/ PM_MRK_DATA_FROM_LMEM",
"MetricGroup": "latency",
"MetricName": "lmem_latency"
},
{
"BriefDescription": "Marked L2L3 remote Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_RL2L3_MOD_CYC/ PM_MRK_DATA_FROM_RL2L3_MOD",
"MetricGroup": "latency",
"MetricName": "rl2l3_mod_latency"
},
{
"BriefDescription": "Marked L2L3 remote Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_RL2L3_SHR_CYC/ PM_MRK_DATA_FROM_RL2L3_SHR",
"MetricGroup": "latency",
"MetricName": "rl2l3_shr_latency"
},
{
"BriefDescription": "Remote L4 average load latency",
"MetricExpr": "PM_MRK_DATA_FROM_RL4_CYC/ PM_MRK_DATA_FROM_RL4",
"MetricGroup": "latency",
"MetricName": "rl4_latency"
},
{
"BriefDescription": "Marked Rmem Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_RMEM_CYC/ PM_MRK_DATA_FROM_RMEM",
"MetricGroup": "latency",
"MetricName": "rmem_latency"
},
{
"BriefDescription": "ERAT miss reject ratio",
"MetricExpr": "PM_LSU_REJECT_ERAT_MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "lsu_rejects",
"MetricName": "erat_reject_rate_percent"
},
{
"BriefDescription": "LHS reject ratio",
"MetricExpr": "PM_LSU_REJECT_LHS *100/ PM_RUN_INST_CMPL",
"MetricGroup": "lsu_rejects",
"MetricName": "lhs_reject_rate_percent"
},
{
"BriefDescription": "ERAT miss reject ratio",
"MetricExpr": "PM_LSU_REJECT_LMQ_FULL * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "lsu_rejects",
"MetricName": "lmq_full_reject_rate_percent"
},
{
"BriefDescription": "ERAT miss reject ratio",
"MetricExpr": "PM_LSU_REJECT_LMQ_FULL * 100 / PM_LD_REF_L1",
"MetricGroup": "lsu_rejects",
"MetricName": "lmq_full_reject_ratio_percent"
},
{
"BriefDescription": "L4 locality(%)",
"MetricExpr": "PM_DATA_FROM_LL4 * 100 / (PM_DATA_FROM_LL4 + PM_DATA_FROM_RL4 + PM_DATA_FROM_DL4)",
"MetricGroup": "memory",
"MetricName": "l4_locality"
},
{
"BriefDescription": "Ratio of reloads from local L4 to distant L4",
"MetricExpr": "PM_DATA_FROM_LL4 / PM_DATA_FROM_DL4",
"MetricGroup": "memory",
"MetricName": "ld_ll4_per_ld_dmem"
},
{
"BriefDescription": "Ratio of reloads from local L4 to remote+distant L4",
"MetricExpr": "PM_DATA_FROM_LL4 / (PM_DATA_FROM_DL4 + PM_DATA_FROM_RL4)",
"MetricGroup": "memory",
"MetricName": "ld_ll4_per_ld_mem"
},
{
"BriefDescription": "Ratio of reloads from local L4 to remote L4",
"MetricExpr": "PM_DATA_FROM_LL4 / PM_DATA_FROM_RL4",
"MetricGroup": "memory",
"MetricName": "ld_ll4_per_ld_rl4"
},
{
"BriefDescription": "Number of loads from local memory per loads from distant memory",
"MetricExpr": "PM_DATA_FROM_LMEM / PM_DATA_FROM_DMEM",
"MetricGroup": "memory",
"MetricName": "ld_lmem_per_ld_dmem"
},
{
"BriefDescription": "Number of loads from local memory per loads from remote and distant memory",
"MetricExpr": "PM_DATA_FROM_LMEM / (PM_DATA_FROM_DMEM + PM_DATA_FROM_RMEM)",
"MetricGroup": "memory",
"MetricName": "ld_lmem_per_ld_mem"
},
{
"BriefDescription": "Number of loads from local memory per loads from remote memory",
"MetricExpr": "PM_DATA_FROM_LMEM / PM_DATA_FROM_RMEM",
"MetricGroup": "memory",
"MetricName": "ld_lmem_per_ld_rmem"
},
{
"BriefDescription": "Number of loads from remote memory per loads from distant memory",
"MetricExpr": "PM_DATA_FROM_RMEM / PM_DATA_FROM_DMEM",
"MetricGroup": "memory",
"MetricName": "ld_rmem_per_ld_dmem"
},
{
"BriefDescription": "Memory locality",
"MetricExpr": "PM_DATA_FROM_LMEM * 100/ (PM_DATA_FROM_LMEM + PM_DATA_FROM_RMEM + PM_DATA_FROM_DMEM)",
"MetricGroup": "memory",
"MetricName": "mem_locality_percent"
},
{
"BriefDescription": "L1 Prefetches issued by the prefetch machine per instruction (per thread)",
"MetricExpr": "PM_L1_PREF / PM_RUN_INST_CMPL * 100",
"MetricGroup": "prefetch",
"MetricName": "l1_prefetch_rate_percent"
},
{
"BriefDescription": "DERAT Miss Rate (per run instruction)(%)",
"MetricExpr": "PM_LSU_DERAT_MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "derat_miss_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Modified) per inst",
"MetricExpr": "PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_dl2l3_mod_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Shared) per inst",
"MetricExpr": "PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_dl2l3_shr_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Distant L4 per inst",
"MetricExpr": "PM_DPTEG_FROM_DL4 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_dl4_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Distant Memory per inst",
"MetricExpr": "PM_DPTEG_FROM_DMEM * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_dmem_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Private L2, other core per inst",
"MetricExpr": "PM_DPTEG_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_l21_mod_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Private L2, other core per inst",
"MetricExpr": "PM_DPTEG_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_l21_shr_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from L2 per inst",
"MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_l2_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Private L3, other core per inst",
"MetricExpr": "PM_DPTEG_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_l31_mod_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Private L3, other core per inst",
"MetricExpr": "PM_DPTEG_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_l31_shr_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from L3 per inst",
"MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_l3_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Local L4 per inst",
"MetricExpr": "PM_DPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_ll4_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Local Memory per inst",
"MetricExpr": "PM_DPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_lmem_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Modified) per inst",
"MetricExpr": "PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_rl2l3_mod_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Shared) per inst",
"MetricExpr": "PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_rl2l3_shr_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Remote L4 per inst",
"MetricExpr": "PM_DPTEG_FROM_RL4 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_rl4_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Remote Memory per inst",
"MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_rmem_rate_percent"
},
{
"BriefDescription": "% of DERAT misses that result in an ERAT reload",
"MetricExpr": "PM_DTLB_MISS * 100 / PM_LSU_DERAT_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "derat_miss_reload_percent"
},
{
"BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Modified)",
"MetricExpr": "PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_dl2l3_mod_percent"
},
{
"BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Shared)",
"MetricExpr": "PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_dl2l3_shr_percent"
},
{
"BriefDescription": "% of DERAT reloads from Distant L4",
"MetricExpr": "PM_DPTEG_FROM_DL4 * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_dl4_percent"
},
{
"BriefDescription": "% of DERAT reloads from Distant Memory",
"MetricExpr": "PM_DPTEG_FROM_DMEM * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_dmem_percent"
},
{
"BriefDescription": "% of DERAT reloads from Private L2, other core",
"MetricExpr": "PM_DPTEG_FROM_L21_MOD * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_l21_mod_percent"
},
{
"BriefDescription": "% of DERAT reloads from Private L2, other core",
"MetricExpr": "PM_DPTEG_FROM_L21_SHR * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_l21_shr_percent"
},
{
"BriefDescription": "% of DERAT reloads from L2",
"MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_l2_percent"
},
{
"BriefDescription": "% of DERAT reloads from Private L3, other core",
"MetricExpr": "PM_DPTEG_FROM_L31_MOD * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_l31_mod_percent"
},
{
"BriefDescription": "% of DERAT reloads from Private L3, other core",
"MetricExpr": "PM_DPTEG_FROM_L31_SHR * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_l31_shr_percent"
},
{
"BriefDescription": "% of DERAT reloads from L3",
"MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_l3_percent"
},
{
"BriefDescription": "% of DERAT reloads from Local L4",
"MetricExpr": "PM_DPTEG_FROM_LL4 * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_ll4_percent"
},
{
"BriefDescription": "% of DERAT reloads from Local Memory",
"MetricExpr": "PM_DPTEG_FROM_LMEM * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_lmem_percent"
},
{
"BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Modified)",
"MetricExpr": "PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_rl2l3_mod_percent"
},
{
"BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Shared)",
"MetricExpr": "PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_rl2l3_shr_percent"
},
{
"BriefDescription": "% of DERAT reloads from Remote L4",
"MetricExpr": "PM_DPTEG_FROM_RL4 * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_rl4_percent"
},
{
"BriefDescription": "% of DERAT reloads from Remote Memory",
"MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_rmem_percent"
},
{
"BriefDescription": "% DERAT miss rate for 4K page per inst",
"MetricExpr": "PM_DERAT_MISS_4K * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "translation",
"MetricName": "derat_4k_miss_rate_percent"
},
{
"BriefDescription": "DERAT miss ratio for 4K page",
"MetricExpr": "PM_DERAT_MISS_4K / PM_LSU_DERAT_MISS",
"MetricGroup": "translation",
"MetricName": "derat_4k_miss_ratio"
},
{
"BriefDescription": "% DERAT miss ratio for 64K page per inst",
"MetricExpr": "PM_DERAT_MISS_64K * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "translation",
"MetricName": "derat_64k_miss_rate_percent"
},
{
"BriefDescription": "DERAT miss ratio for 64K page",
"MetricExpr": "PM_DERAT_MISS_64K / PM_LSU_DERAT_MISS",
"MetricGroup": "translation",
"MetricName": "derat_64k_miss_ratio"
},
{
"BriefDescription": "DERAT miss ratio",
"MetricExpr": "PM_LSU_DERAT_MISS / PM_LSU_DERAT_MISS",
"MetricGroup": "translation",
"MetricName": "derat_miss_ratio"
},
{
"BriefDescription": "% DSLB_Miss_Rate per inst",
"MetricExpr": "PM_DSLB_MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "translation",
"MetricName": "dslb_miss_rate_percent"
},
{
"BriefDescription": "% ISLB miss rate per inst",
"MetricExpr": "PM_ISLB_MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "translation",
"MetricName": "islb_miss_rate_percent"
},
{
"BriefDescription": "ANY_SYNC_STALL_CPI",
"MetricExpr": "PM_CMPLU_STALL_ANY_SYNC / PM_RUN_INST_CMPL",
"MetricName": "any_sync_stall_cpi"
},
{
"BriefDescription": "Avg. more than 1 instructions completed",
"MetricExpr": "PM_INST_CMPL / PM_1PLUS_PPC_CMPL",
"MetricName": "average_completed_instruction_set_size"
},
{
"BriefDescription": "% Branches per instruction",
"MetricExpr": "PM_BRU_FIN / PM_RUN_INST_CMPL",
"MetricName": "branches_per_inst"
},
{
"BriefDescription": "Cycles in which at least one instruction completes in this thread",
"MetricExpr": "PM_1PLUS_PPC_CMPL/PM_RUN_INST_CMPL",
"MetricName": "completion_cpi"
},
{
"BriefDescription": "cycles",
"MetricExpr": "PM_RUN_CYC",
"MetricName": "custom_secs"
},
{
"BriefDescription": "Percentage Cycles atleast one instruction dispatched",
"MetricExpr": "PM_1PLUS_PPC_DISP / PM_CYC * 100",
"MetricName": "cycles_atleast_one_inst_dispatched_percent"
},
{
"BriefDescription": "Cycles per instruction group",
"MetricExpr": "PM_CYC / PM_1PLUS_PPC_CMPL",
"MetricName": "cycles_per_completed_instructions_set"
},
{
"BriefDescription": "% of DL1 dL1_Reloads from Distant L4",
"MetricExpr": "PM_DATA_FROM_DL4 * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricName": "dl1_reload_from_dl4_percent"
},
{
"BriefDescription": "% of DL1 Reloads from Distant L4 per Inst",
"MetricExpr": "PM_DATA_FROM_DL4 * 100 / PM_RUN_INST_CMPL",
"MetricName": "dl1_reload_from_dl4_rate_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L3, other core per Inst",
"MetricExpr": "(PM_DATA_FROM_L31_MOD + PM_DATA_FROM_L31_SHR) * 100 / PM_RUN_INST_CMPL",
"MetricName": "dl1_reload_from_l31_rate_percent"
},
{
"BriefDescription": "% of DL1 dL1_Reloads from Local L4",
"MetricExpr": "PM_DATA_FROM_LL4 * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricName": "dl1_reload_from_ll4_percent"
},
{
"BriefDescription": "% of DL1 Reloads from Local L4 per Inst",
"MetricExpr": "PM_DATA_FROM_LL4 * 100 / PM_RUN_INST_CMPL",
"MetricName": "dl1_reload_from_ll4_rate_percent"
},
{
"BriefDescription": "% of DL1 dL1_Reloads from Remote L4",
"MetricExpr": "PM_DATA_FROM_RL4 * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricName": "dl1_reload_from_rl4_percent"
},
{
"BriefDescription": "% of DL1 Reloads from Remote Memory per Inst",
"MetricExpr": "PM_DATA_FROM_RL4 * 100 / PM_RUN_INST_CMPL",
"MetricName": "dl1_reload_from_rl4_rate_percent"
},
{
"BriefDescription": "Rate of DERAT reloads from L2",
"MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL",
"MetricName": "dpteg_from_l2_rate_percent"
},
{
"BriefDescription": "Rate of DERAT reloads from L3",
"MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL",
"MetricName": "dpteg_from_l3_rate_percent"
},
{
"BriefDescription": "Cycles in which the oldest instruction is finished and ready to complete for waiting to get through the completion pipe",
"MetricExpr": "PM_NTC_ALL_FIN / PM_RUN_INST_CMPL",
"MetricName": "finish_to_cmpl_cpi"
},
{
"BriefDescription": "Total Fixed point operations",
"MetricExpr": "PM_FXU_FIN/PM_RUN_INST_CMPL",
"MetricName": "fixed_per_inst"
},
{
"BriefDescription": "All FXU Busy",
"MetricExpr": "PM_FXU_BUSY / PM_CYC",
"MetricName": "fxu_all_busy"
},
{
"BriefDescription": "All FXU Idle",
"MetricExpr": "PM_FXU_IDLE / PM_CYC",
"MetricName": "fxu_all_idle"
},
{
"BriefDescription": "Ict empty for this thread due to branch mispred",
"MetricExpr": "PM_ICT_NOSLOT_BR_MPRED/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_br_mpred_cpi"
},
{
"BriefDescription": "Ict empty for this thread due to Icache Miss and branch mispred",
"MetricExpr": "PM_ICT_NOSLOT_BR_MPRED_ICMISS/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_br_mpred_icmiss_cpi"
},
{
"BriefDescription": "ICT other stalls",
"MetricExpr": "(PM_ICT_NOSLOT_CYC - PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_BR_MPRED_ICMISS - PM_ICT_NOSLOT_BR_MPRED - PM_ICT_NOSLOT_DISP_HELD)/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_cyc_other_cpi"
},
{
"BriefDescription": "Cycles in which the NTC instruciton is held at dispatch for any reason",
"MetricExpr": "PM_ICT_NOSLOT_DISP_HELD/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_disp_held_cpi"
},
{
"BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF",
"MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_disp_held_hb_full_cpi"
},
{
"BriefDescription": "Ict empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full",
"MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_ISSQ/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_disp_held_issq_cpi"
},
{
"BriefDescription": "ICT_NOSLOT_DISP_HELD_OTHER_CPI",
"MetricExpr": "(PM_ICT_NOSLOT_DISP_HELD - PM_ICT_NOSLOT_DISP_HELD_HB_FULL - PM_ICT_NOSLOT_DISP_HELD_SYNC - PM_ICT_NOSLOT_DISP_HELD_TBEGIN - PM_ICT_NOSLOT_DISP_HELD_ISSQ)/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_disp_held_other_cpi"
},
{
"BriefDescription": "Dispatch held due to a synchronizing instruction at dispatch",
"MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_SYNC/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_disp_held_sync_cpi"
},
{
"BriefDescription": "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch",
"MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_TBEGIN/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_disp_held_tbegin_cpi"
},
{
"BriefDescription": "ICT_NOSLOT_IC_L2_CPI",
"MetricExpr": "(PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_IC_L3 - PM_ICT_NOSLOT_IC_L3MISS)/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_ic_l2_cpi"
},
{
"BriefDescription": "Ict empty for this thread due to icache misses that were sourced from the local L3",
"MetricExpr": "PM_ICT_NOSLOT_IC_L3/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_ic_l3_cpi"
},
{
"BriefDescription": "Ict empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache",
"MetricExpr": "PM_ICT_NOSLOT_IC_L3MISS/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_ic_l3miss_cpi"
},
{
"BriefDescription": "Ict empty for this thread due to Icache Miss",
"MetricExpr": "PM_ICT_NOSLOT_IC_MISS/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_ic_miss_cpi"
},
{
"BriefDescription": "Rate of IERAT reloads from L2",
"MetricExpr": "PM_IPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL",
"MetricName": "ipteg_from_l2_rate_percent"
},
{
"BriefDescription": "Rate of IERAT reloads from L3",
"MetricExpr": "PM_IPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL",
"MetricName": "ipteg_from_l3_rate_percent"
},
{
"BriefDescription": "Rate of IERAT reloads from local memory",
"MetricExpr": "PM_IPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL",
"MetricName": "ipteg_from_ll4_rate_percent"
},
{
"BriefDescription": "Rate of IERAT reloads from local memory",
"MetricExpr": "PM_IPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL",
"MetricName": "ipteg_from_lmem_rate_percent"
},
{
"BriefDescription": "Average number of Castout machines used. 1 of 16 CO machines is sampled every L2 cycle",
"MetricExpr": "PM_CO_USAGE / PM_RUN_CYC * 16",
"MetricName": "l2_co_usage"
},
{
"BriefDescription": "Percent of instruction reads out of all L2 commands",
"MetricExpr": "PM_ISIDE_DISP * 100 / (PM_L2_ST + PM_L2_LD + PM_ISIDE_DISP)",
"MetricName": "l2_instr_commands_percent"
},
{
"BriefDescription": "Percent of loads out of all L2 commands",
"MetricExpr": "PM_L2_LD * 100 / (PM_L2_ST + PM_L2_LD + PM_ISIDE_DISP)",
"MetricName": "l2_ld_commands_percent"
},
{
"BriefDescription": "Rate of L2 store dispatches that failed per core",
"MetricExpr": "100 * (PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/2 / PM_RUN_INST_CMPL",
"MetricName": "l2_rc_st_disp_fail_rate_percent"
},
{
"BriefDescription": "Average number of Read/Claim machines used. 1 of 16 RC machines is sampled every L2 cycle",
"MetricExpr": "PM_RC_USAGE / PM_RUN_CYC * 16",
"MetricName": "l2_rc_usage"
},
{
"BriefDescription": "Average number of Snoop machines used. 1 of 8 SN machines is sampled every L2 cycle",
"MetricExpr": "PM_SN_USAGE / PM_RUN_CYC * 8",
"MetricName": "l2_sn_usage"
},
{
"BriefDescription": "Percent of stores out of all L2 commands",
"MetricExpr": "PM_L2_ST * 100 / (PM_L2_ST + PM_L2_LD + PM_ISIDE_DISP)",
"MetricName": "l2_st_commands_percent"
},
{
"BriefDescription": "Rate of L2 store dispatches that failed per core",
"MetricExpr": "100 * (PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/2 / PM_RUN_INST_CMPL",
"MetricName": "l2_st_disp_fail_rate_percent"
},
{
"BriefDescription": "Rate of L2 dispatches per core",
"MetricExpr": "100 * PM_L2_RCST_DISP/2 / PM_RUN_INST_CMPL",
"MetricName": "l2_st_disp_rate_percent"
},
{
"BriefDescription": "Marked L31 Load latency",
"MetricExpr": "(PM_MRK_DATA_FROM_L31_SHR_CYC + PM_MRK_DATA_FROM_L31_MOD_CYC) / (PM_MRK_DATA_FROM_L31_SHR + PM_MRK_DATA_FROM_L31_MOD)",
"MetricName": "l31_latency"
},
{
"BriefDescription": "PCT instruction loads",
"MetricExpr": "PM_LD_REF_L1 / PM_RUN_INST_CMPL",
"MetricName": "loads_per_inst"
},
{
"BriefDescription": "Cycles stalled by D-Cache Misses",
"MetricExpr": "PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL",
"MetricName": "lsu_stall_dcache_miss_cpi"
},
{
"BriefDescription": "Completion stall because a different thread was using the completion pipe",
"MetricExpr": "(PM_CMPLU_STALL_THRD - PM_CMPLU_STALL_EXCEPTION - PM_CMPLU_STALL_ANY_SYNC - PM_CMPLU_STALL_SYNC_PMU_INT - PM_CMPLU_STALL_SPEC_FINISH - PM_CMPLU_STALL_FLUSH_ANY_THREAD - PM_CMPLU_STALL_LSU_FLUSH_NEXT - PM_CMPLU_STALL_NESTED_TBEGIN - PM_CMPLU_STALL_NESTED_TEND - PM_CMPLU_STALL_MTFPSCR)/PM_RUN_INST_CMPL",
"MetricName": "other_thread_cmpl_stall"
},
{
"BriefDescription": "PCT instruction stores",
"MetricExpr": "PM_ST_FIN / PM_RUN_INST_CMPL",
"MetricName": "stores_per_inst"
},
{
"BriefDescription": "ANY_SYNC_STALL_CPI",
"MetricExpr": "PM_CMPLU_STALL_SYNC_PMU_INT / PM_RUN_INST_CMPL",
"MetricName": "sync_pmu_int_stall_cpi"
}
]
libperf-$(CONFIG_LIBPERL) += perl/Perf-Trace-Util/ perf-$(CONFIG_LIBPERL) += perl/Perf-Trace-Util/
libperf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/ perf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/
libperf-y += Context.o perf-y += Context.o
CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes
CFLAGS_Context.o += -Wno-unused-parameter -Wno-nested-externs -Wno-undef CFLAGS_Context.o += -Wno-unused-parameter -Wno-nested-externs -Wno-undef
......
libperf-y += Context.o perf-y += Context.o
CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs
...@@ -1330,6 +1330,26 @@ static int test__checkevent_complex_name(struct perf_evlist *evlist) ...@@ -1330,6 +1330,26 @@ static int test__checkevent_complex_name(struct perf_evlist *evlist)
return 0; return 0;
} }
static int test__sym_event_slash(struct perf_evlist *evlist)
{
struct perf_evsel *evsel = perf_evlist__first(evlist);
TEST_ASSERT_VAL("wrong type", evsel->attr.type == PERF_TYPE_HARDWARE);
TEST_ASSERT_VAL("wrong config", evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES);
TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
return 0;
}
static int test__sym_event_dc(struct perf_evlist *evlist)
{
struct perf_evsel *evsel = perf_evlist__first(evlist);
TEST_ASSERT_VAL("wrong type", evsel->attr.type == PERF_TYPE_HARDWARE);
TEST_ASSERT_VAL("wrong config", evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES);
TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
return 0;
}
static int count_tracepoints(void) static int count_tracepoints(void)
{ {
struct dirent *events_ent; struct dirent *events_ent;
...@@ -1670,6 +1690,16 @@ static struct evlist_test test__events[] = { ...@@ -1670,6 +1690,16 @@ static struct evlist_test test__events[] = {
.name = "cycles/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks'/Duk", .name = "cycles/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks'/Duk",
.check = test__checkevent_complex_name, .check = test__checkevent_complex_name,
.id = 53 .id = 53
},
{
.name = "cycles//u",
.check = test__sym_event_slash,
.id = 54,
},
{
.name = "cycles:k",
.check = test__sym_event_dc,
.id = 55,
} }
}; };
......
libperf-y += clone.o perf-y += clone.o
libperf-y += fcntl.o perf-y += fcntl.o
libperf-y += flock.o perf-y += flock.o
ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) ifeq ($(SRCARCH),$(filter $(SRCARCH),x86))
libperf-y += ioctl.o perf-y += ioctl.o
endif endif
libperf-y += kcmp.o perf-y += kcmp.o
libperf-y += mount_flags.o perf-y += mount_flags.o
libperf-y += pkey_alloc.o perf-y += pkey_alloc.o
libperf-y += arch_prctl.o perf-y += arch_prctl.o
libperf-y += prctl.o perf-y += prctl.o
libperf-y += renameat.o perf-y += renameat.o
libperf-y += sockaddr.o perf-y += sockaddr.o
libperf-y += socket.o perf-y += socket.o
libperf-y += statx.o perf-y += statx.o
...@@ -175,7 +175,7 @@ static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size, boo ...@@ -175,7 +175,7 @@ static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size, boo
size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg) size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg)
{ {
unsigned long cmd = arg->val; unsigned long cmd = arg->val;
unsigned int fd = syscall_arg__val(arg, 0); int fd = syscall_arg__val(arg, 0);
struct file *file = thread__files_entry(arg->thread, fd); struct file *file = thread__files_entry(arg->thread, fd);
if (file != NULL) { if (file != NULL) {
......
...@@ -11,7 +11,7 @@ static size_t syscall_arg__scnprintf_waitid_options(char *bf, size_t size, ...@@ -11,7 +11,7 @@ static size_t syscall_arg__scnprintf_waitid_options(char *bf, size_t size,
#define P_OPTION(n) \ #define P_OPTION(n) \
if (options & W##n) { \ if (options & W##n) { \
printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : #n); \ printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
options &= ~W##n; \ options &= ~W##n; \
} }
......
libperf-y += setup.o perf-y += setup.o
libperf-y += helpline.o perf-y += helpline.o
libperf-y += progress.o perf-y += progress.o
libperf-y += util.o perf-y += util.o
libperf-y += hist.o perf-y += hist.o
libperf-y += stdio/hist.o perf-y += stdio/hist.o
CFLAGS_setup.o += -DLIBDIR="BUILD_STR($(LIBDIR))" CFLAGS_setup.o += -DLIBDIR="BUILD_STR($(LIBDIR))"
libperf-$(CONFIG_SLANG) += browser.o perf-$(CONFIG_SLANG) += browser.o
libperf-$(CONFIG_SLANG) += browsers/ perf-$(CONFIG_SLANG) += browsers/
libperf-$(CONFIG_SLANG) += tui/ perf-$(CONFIG_SLANG) += tui/
CFLAGS_browser.o += -DENABLE_SLFUTURE_CONST CFLAGS_browser.o += -DENABLE_SLFUTURE_CONST
libperf-y += annotate.o perf-y += annotate.o
libperf-y += hists.o perf-y += hists.o
libperf-y += map.o perf-y += map.o
libperf-y += scripts.o perf-y += scripts.o
libperf-y += header.o perf-y += header.o
CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST
CFLAGS_hists.o += -DENABLE_SLFUTURE_CONST CFLAGS_hists.o += -DENABLE_SLFUTURE_CONST
......
libperf-y += setup.o perf-y += setup.o
libperf-y += util.o perf-y += util.o
libperf-y += helpline.o perf-y += helpline.o
libperf-y += progress.o perf-y += progress.o
libperf-y += annotate.o perf-y += annotate.o
libperf-y += block-range.o perf-y += block-range.o
libperf-y += build-id.o perf-y += build-id.o
libperf-y += config.o perf-y += config.o
libperf-y += ctype.o perf-y += ctype.o
libperf-y += db-export.o perf-y += db-export.o
libperf-y += env.o perf-y += env.o
libperf-y += event.o perf-y += event.o
libperf-y += evlist.o perf-y += evlist.o
libperf-y += evsel.o perf-y += evsel.o
libperf-y += evsel_fprintf.o perf-y += evsel_fprintf.o
libperf-y += find_bit.o perf-y += find_bit.o
libperf-y += get_current_dir_name.o perf-y += get_current_dir_name.o
libperf-y += kallsyms.o perf-y += kallsyms.o
libperf-y += levenshtein.o perf-y += levenshtein.o
libperf-y += llvm-utils.o perf-y += llvm-utils.o
libperf-y += mmap.o perf-y += mmap.o
libperf-y += memswap.o perf-y += memswap.o
libperf-y += parse-events.o perf-y += parse-events.o
libperf-y += perf_regs.o perf-y += perf_regs.o
libperf-y += path.o perf-y += path.o
libperf-y += print_binary.o perf-y += print_binary.o
libperf-y += rbtree.o perf-y += rbtree.o
libperf-y += libstring.o perf-y += libstring.o
libperf-y += bitmap.o perf-y += bitmap.o
libperf-y += hweight.o perf-y += hweight.o
libperf-y += smt.o perf-y += smt.o
libperf-y += strbuf.o perf-y += strbuf.o
libperf-y += string.o perf-y += string.o
libperf-y += strlist.o perf-y += strlist.o
libperf-y += strfilter.o perf-y += strfilter.o
libperf-y += top.o perf-y += top.o
libperf-y += usage.o perf-y += usage.o
libperf-y += dso.o perf-y += dso.o
libperf-y += symbol.o perf-y += symbol.o
libperf-y += symbol_fprintf.o perf-y += symbol_fprintf.o
libperf-y += color.o perf-y += color.o
libperf-y += color_config.o perf-y += color_config.o
libperf-y += metricgroup.o perf-y += metricgroup.o
libperf-y += header.o perf-y += header.o
libperf-y += callchain.o perf-y += callchain.o
libperf-y += values.o perf-y += values.o
libperf-y += debug.o perf-y += debug.o
libperf-y += machine.o perf-y += machine.o
libperf-y += map.o perf-y += map.o
libperf-y += pstack.o perf-y += pstack.o
libperf-y += session.o perf-y += session.o
libperf-y += sample-raw.o perf-y += sample-raw.o
libperf-y += s390-sample-raw.o perf-y += s390-sample-raw.o
libperf-$(CONFIG_TRACE) += syscalltbl.o perf-$(CONFIG_TRACE) += syscalltbl.o
libperf-y += ordered-events.o perf-y += ordered-events.o
libperf-y += namespaces.o perf-y += namespaces.o
libperf-y += comm.o perf-y += comm.o
libperf-y += thread.o perf-y += thread.o
libperf-y += thread_map.o perf-y += thread_map.o
libperf-y += trace-event-parse.o perf-y += trace-event-parse.o
libperf-y += parse-events-flex.o perf-y += parse-events-flex.o
libperf-y += parse-events-bison.o perf-y += parse-events-bison.o
libperf-y += pmu.o perf-y += pmu.o
libperf-y += pmu-flex.o perf-y += pmu-flex.o
libperf-y += pmu-bison.o perf-y += pmu-bison.o
libperf-y += trace-event-read.o perf-y += trace-event-read.o
libperf-y += trace-event-info.o perf-y += trace-event-info.o
libperf-y += trace-event-scripting.o perf-y += trace-event-scripting.o
libperf-y += trace-event.o perf-y += trace-event.o
libperf-y += svghelper.o perf-y += svghelper.o
libperf-y += sort.o perf-y += sort.o
libperf-y += hist.o perf-y += hist.o
libperf-y += util.o perf-y += util.o
libperf-y += xyarray.o perf-y += xyarray.o
libperf-y += cpumap.o perf-y += cpumap.o
libperf-y += cgroup.o perf-y += cgroup.o
libperf-y += target.o perf-y += target.o
libperf-y += rblist.o perf-y += rblist.o
libperf-y += intlist.o perf-y += intlist.o
libperf-y += vdso.o perf-y += vdso.o
libperf-y += counts.o perf-y += counts.o
libperf-y += stat.o perf-y += stat.o
libperf-y += stat-shadow.o perf-y += stat-shadow.o
libperf-y += stat-display.o perf-y += stat-display.o
libperf-y += record.o perf-y += record.o
libperf-y += srcline.o perf-y += srcline.o
libperf-y += srccode.o perf-y += srccode.o
libperf-y += data.o perf-y += data.o
libperf-y += tsc.o perf-y += tsc.o
libperf-y += cloexec.o perf-y += cloexec.o
libperf-y += call-path.o perf-y += call-path.o
libperf-y += rwsem.o perf-y += rwsem.o
libperf-y += thread-stack.o perf-y += thread-stack.o
libperf-$(CONFIG_AUXTRACE) += auxtrace.o perf-$(CONFIG_AUXTRACE) += auxtrace.o
libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
libperf-$(CONFIG_AUXTRACE) += intel-pt.o perf-$(CONFIG_AUXTRACE) += intel-pt.o
libperf-$(CONFIG_AUXTRACE) += intel-bts.o perf-$(CONFIG_AUXTRACE) += intel-bts.o
libperf-$(CONFIG_AUXTRACE) += arm-spe.o perf-$(CONFIG_AUXTRACE) += arm-spe.o
libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o
libperf-$(CONFIG_AUXTRACE) += s390-cpumsf.o perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o
ifdef CONFIG_LIBOPENCSD ifdef CONFIG_LIBOPENCSD
libperf-$(CONFIG_AUXTRACE) += cs-etm.o perf-$(CONFIG_AUXTRACE) += cs-etm.o
libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder/ perf-$(CONFIG_AUXTRACE) += cs-etm-decoder/
endif endif
libperf-y += parse-branch-options.o perf-y += parse-branch-options.o
libperf-y += dump-insn.o perf-y += dump-insn.o
libperf-y += parse-regs-options.o perf-y += parse-regs-options.o
libperf-y += term.o perf-y += term.o
libperf-y += help-unknown-cmd.o perf-y += help-unknown-cmd.o
libperf-y += mem-events.o perf-y += mem-events.o
libperf-y += vsprintf.o perf-y += vsprintf.o
libperf-y += units.o perf-y += units.o
libperf-y += time-utils.o perf-y += time-utils.o
libperf-y += expr-bison.o perf-y += expr-bison.o
libperf-y += branch.o perf-y += branch.o
libperf-y += mem2node.o perf-y += mem2node.o
libperf-$(CONFIG_LIBBPF) += bpf-loader.o perf-$(CONFIG_LIBBPF) += bpf-loader.o
libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
libperf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += symbol-elf.o
libperf-$(CONFIG_LIBELF) += probe-file.o perf-$(CONFIG_LIBELF) += probe-file.o
libperf-$(CONFIG_LIBELF) += probe-event.o perf-$(CONFIG_LIBELF) += probe-event.o
ifndef CONFIG_LIBELF ifndef CONFIG_LIBELF
libperf-y += symbol-minimal.o perf-y += symbol-minimal.o
endif endif
ifndef CONFIG_SETNS ifndef CONFIG_SETNS
libperf-y += setns.o perf-y += setns.o
endif endif
libperf-$(CONFIG_DWARF) += probe-finder.o perf-$(CONFIG_DWARF) += probe-finder.o
libperf-$(CONFIG_DWARF) += dwarf-aux.o perf-$(CONFIG_DWARF) += dwarf-aux.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o
libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o perf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o
libperf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o
libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
libperf-y += scripting-engines/ perf-y += scripting-engines/
libperf-$(CONFIG_ZLIB) += zlib.o perf-$(CONFIG_ZLIB) += zlib.o
libperf-$(CONFIG_LZMA) += lzma.o perf-$(CONFIG_LZMA) += lzma.o
libperf-y += demangle-java.o perf-y += demangle-java.o
libperf-y += demangle-rust.o perf-y += demangle-rust.o
ifdef CONFIG_JITDUMP ifdef CONFIG_JITDUMP
libperf-$(CONFIG_LIBELF) += jitdump.o perf-$(CONFIG_LIBELF) += jitdump.o
libperf-$(CONFIG_LIBELF) += genelf.o perf-$(CONFIG_LIBELF) += genelf.o
libperf-$(CONFIG_DWARF) += genelf_debug.o perf-$(CONFIG_DWARF) += genelf_debug.o
endif endif
libperf-y += perf-hooks.o perf-y += perf-hooks.o
libperf-$(CONFIG_LIBBPF) += bpf-event.o perf-$(CONFIG_LIBBPF) += bpf-event.o
libperf-$(CONFIG_CXX) += c++/ perf-$(CONFIG_CXX) += c++/
CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))" CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))"
......
...@@ -236,8 +236,8 @@ int perf_event__synthesize_bpf_events(struct perf_tool *tool, ...@@ -236,8 +236,8 @@ int perf_event__synthesize_bpf_events(struct perf_tool *tool,
pr_debug("%s: can't get next program: %s%s", pr_debug("%s: can't get next program: %s%s",
__func__, strerror(errno), __func__, strerror(errno),
errno == EINVAL ? " -- kernel too old?" : ""); errno == EINVAL ? " -- kernel too old?" : "");
/* don't report error on old kernel */ /* don't report error on old kernel or EPERM */
err = (errno == EINVAL) ? 0 : -1; err = (errno == EINVAL || errno == EPERM) ? 0 : -1;
break; break;
} }
fd = bpf_prog_get_fd_by_id(id); fd = bpf_prog_get_fd_by_id(id);
......
libperf-$(CONFIG_CLANGLLVM) += clang.o perf-$(CONFIG_CLANGLLVM) += clang.o
libperf-$(CONFIG_CLANGLLVM) += clang-test.o perf-$(CONFIG_CLANGLLVM) += clang-test.o
libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o perf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o
...@@ -15,13 +15,6 @@ ...@@ -15,13 +15,6 @@
struct cs_etm_decoder; struct cs_etm_decoder;
struct cs_etm_buffer {
const unsigned char *buf;
size_t len;
u64 offset;
u64 ref_timestamp;
};
enum cs_etm_sample_type { enum cs_etm_sample_type {
CS_ETM_EMPTY, CS_ETM_EMPTY,
CS_ETM_RANGE, CS_ETM_RANGE,
...@@ -105,9 +98,10 @@ enum { ...@@ -105,9 +98,10 @@ enum {
CS_ETM_PROTO_PTM, CS_ETM_PROTO_PTM,
}; };
enum { enum cs_etm_decoder_operation {
CS_ETM_OPERATION_PRINT = 1, CS_ETM_OPERATION_PRINT = 1,
CS_ETM_OPERATION_DECODE, CS_ETM_OPERATION_DECODE,
CS_ETM_OPERATION_MAX,
}; };
int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder, int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder,
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "machine.h" #include "machine.h"
#include "map.h" #include "map.h"
#include "perf.h" #include "perf.h"
#include "symbol.h"
#include "thread.h" #include "thread.h"
#include "thread_map.h" #include "thread_map.h"
#include "thread-stack.h" #include "thread-stack.h"
...@@ -64,13 +65,10 @@ struct cs_etm_queue { ...@@ -64,13 +65,10 @@ struct cs_etm_queue {
struct thread *thread; struct thread *thread;
struct cs_etm_decoder *decoder; struct cs_etm_decoder *decoder;
struct auxtrace_buffer *buffer; struct auxtrace_buffer *buffer;
const struct cs_etm_state *state;
union perf_event *event_buf; union perf_event *event_buf;
unsigned int queue_nr; unsigned int queue_nr;
pid_t pid, tid; pid_t pid, tid;
int cpu; int cpu;
u64 time;
u64 timestamp;
u64 offset; u64 offset;
u64 period_instructions; u64 period_instructions;
struct branch_stack *last_branch; struct branch_stack *last_branch;
...@@ -78,11 +76,13 @@ struct cs_etm_queue { ...@@ -78,11 +76,13 @@ struct cs_etm_queue {
size_t last_branch_pos; size_t last_branch_pos;
struct cs_etm_packet *prev_packet; struct cs_etm_packet *prev_packet;
struct cs_etm_packet *packet; struct cs_etm_packet *packet;
const unsigned char *buf;
size_t buf_len, buf_used;
}; };
static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); static int cs_etm__update_queues(struct cs_etm_auxtrace *etm);
static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
pid_t tid, u64 time_); pid_t tid);
/* PTMs ETMIDR [11:8] set to b0011 */ /* PTMs ETMIDR [11:8] set to b0011 */
#define ETMIDR_PTM_VERSION 0x00000300 #define ETMIDR_PTM_VERSION 0x00000300
...@@ -138,10 +138,83 @@ static void cs_etm__packet_dump(const char *pkt_string) ...@@ -138,10 +138,83 @@ static void cs_etm__packet_dump(const char *pkt_string)
fflush(stdout); fflush(stdout);
} }
static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
struct cs_etm_auxtrace *etm, int idx,
u32 etmidr)
{
u64 **metadata = etm->metadata;
t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr);
t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR];
t_params[idx].etmv3.reg_trc_id = metadata[idx][CS_ETM_ETMTRACEIDR];
}
static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
struct cs_etm_auxtrace *etm, int idx)
{
u64 **metadata = etm->metadata;
t_params[idx].protocol = CS_ETM_PROTO_ETMV4i;
t_params[idx].etmv4.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0];
t_params[idx].etmv4.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1];
t_params[idx].etmv4.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2];
t_params[idx].etmv4.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8];
t_params[idx].etmv4.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR];
t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR];
}
static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
struct cs_etm_auxtrace *etm)
{
int i;
u32 etmidr;
u64 architecture;
for (i = 0; i < etm->num_cpu; i++) {
architecture = etm->metadata[i][CS_ETM_MAGIC];
switch (architecture) {
case __perf_cs_etmv3_magic:
etmidr = etm->metadata[i][CS_ETM_ETMIDR];
cs_etm__set_trace_param_etmv3(t_params, etm, i, etmidr);
break;
case __perf_cs_etmv4_magic:
cs_etm__set_trace_param_etmv4(t_params, etm, i);
break;
default:
return -EINVAL;
}
}
return 0;
}
static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
struct cs_etm_queue *etmq,
enum cs_etm_decoder_operation mode)
{
int ret = -EINVAL;
if (!(mode < CS_ETM_OPERATION_MAX))
goto out;
d_params->packet_printer = cs_etm__packet_dump;
d_params->operation = mode;
d_params->data = etmq;
d_params->formatted = true;
d_params->fsyncs = false;
d_params->hsyncs = false;
d_params->frame_aligned = true;
ret = 0;
out:
return ret;
}
static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
struct auxtrace_buffer *buffer) struct auxtrace_buffer *buffer)
{ {
int i, ret; int ret;
const char *color = PERF_COLOR_BLUE; const char *color = PERF_COLOR_BLUE;
struct cs_etm_decoder_params d_params; struct cs_etm_decoder_params d_params;
struct cs_etm_trace_params *t_params; struct cs_etm_trace_params *t_params;
...@@ -155,48 +228,22 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, ...@@ -155,48 +228,22 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
/* Use metadata to fill in trace parameters for trace decoder */ /* Use metadata to fill in trace parameters for trace decoder */
t_params = zalloc(sizeof(*t_params) * etm->num_cpu); t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
for (i = 0; i < etm->num_cpu; i++) {
if (etm->metadata[i][CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { if (!t_params)
u32 etmidr = etm->metadata[i][CS_ETM_ETMIDR]; return;
t_params[i].protocol = if (cs_etm__init_trace_params(t_params, etm))
cs_etm__get_v7_protocol_version(etmidr); goto out_free;
t_params[i].etmv3.reg_ctrl =
etm->metadata[i][CS_ETM_ETMCR];
t_params[i].etmv3.reg_trc_id =
etm->metadata[i][CS_ETM_ETMTRACEIDR];
} else if (etm->metadata[i][CS_ETM_MAGIC] ==
__perf_cs_etmv4_magic) {
t_params[i].protocol = CS_ETM_PROTO_ETMV4i;
t_params[i].etmv4.reg_idr0 =
etm->metadata[i][CS_ETMV4_TRCIDR0];
t_params[i].etmv4.reg_idr1 =
etm->metadata[i][CS_ETMV4_TRCIDR1];
t_params[i].etmv4.reg_idr2 =
etm->metadata[i][CS_ETMV4_TRCIDR2];
t_params[i].etmv4.reg_idr8 =
etm->metadata[i][CS_ETMV4_TRCIDR8];
t_params[i].etmv4.reg_configr =
etm->metadata[i][CS_ETMV4_TRCCONFIGR];
t_params[i].etmv4.reg_traceidr =
etm->metadata[i][CS_ETMV4_TRCTRACEIDR];
}
}
/* Set decoder parameters to simply print the trace packets */ /* Set decoder parameters to simply print the trace packets */
d_params.packet_printer = cs_etm__packet_dump; if (cs_etm__init_decoder_params(&d_params, NULL,
d_params.operation = CS_ETM_OPERATION_PRINT; CS_ETM_OPERATION_PRINT))
d_params.formatted = true; goto out_free;
d_params.fsyncs = false;
d_params.hsyncs = false;
d_params.frame_aligned = true;
decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
zfree(&t_params);
if (!decoder) if (!decoder)
return; goto out_free;
do { do {
size_t consumed; size_t consumed;
...@@ -211,6 +258,9 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, ...@@ -211,6 +258,9 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
} while (buffer_used < buffer->size); } while (buffer_used < buffer->size);
cs_etm_decoder__free(decoder); cs_etm_decoder__free(decoder);
out_free:
zfree(&t_params);
} }
static int cs_etm__flush_events(struct perf_session *session, static int cs_etm__flush_events(struct perf_session *session,
...@@ -234,7 +284,7 @@ static int cs_etm__flush_events(struct perf_session *session, ...@@ -234,7 +284,7 @@ static int cs_etm__flush_events(struct perf_session *session,
if (ret < 0) if (ret < 0)
return ret; return ret;
return cs_etm__process_timeless_queues(etm, -1, MAX_TIMESTAMP - 1); return cs_etm__process_timeless_queues(etm, -1);
} }
static void cs_etm__free_queue(void *priv) static void cs_etm__free_queue(void *priv)
...@@ -326,7 +376,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, ...@@ -326,7 +376,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
struct addr_location al; struct addr_location al;
if (!etmq) if (!etmq)
return -1; return 0;
machine = etmq->etm->machine; machine = etmq->etm->machine;
cpumode = cs_etm__cpu_mode(etmq, address); cpumode = cs_etm__cpu_mode(etmq, address);
...@@ -334,7 +384,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, ...@@ -334,7 +384,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
thread = etmq->thread; thread = etmq->thread;
if (!thread) { if (!thread) {
if (cpumode != PERF_RECORD_MISC_KERNEL) if (cpumode != PERF_RECORD_MISC_KERNEL)
return -EINVAL; return 0;
thread = etmq->etm->unknown_thread; thread = etmq->etm->unknown_thread;
} }
...@@ -357,12 +407,10 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, ...@@ -357,12 +407,10 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
return len; return len;
} }
static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
unsigned int queue_nr)
{ {
int i;
struct cs_etm_decoder_params d_params; struct cs_etm_decoder_params d_params;
struct cs_etm_trace_params *t_params; struct cs_etm_trace_params *t_params = NULL;
struct cs_etm_queue *etmq; struct cs_etm_queue *etmq;
size_t szp = sizeof(struct cs_etm_packet); size_t szp = sizeof(struct cs_etm_packet);
...@@ -397,59 +445,22 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, ...@@ -397,59 +445,22 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
if (!etmq->event_buf) if (!etmq->event_buf)
goto out_free; goto out_free;
etmq->etm = etm;
etmq->queue_nr = queue_nr;
etmq->pid = -1;
etmq->tid = -1;
etmq->cpu = -1;
/* Use metadata to fill in trace parameters for trace decoder */ /* Use metadata to fill in trace parameters for trace decoder */
t_params = zalloc(sizeof(*t_params) * etm->num_cpu); t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
if (!t_params) if (!t_params)
goto out_free; goto out_free;
for (i = 0; i < etm->num_cpu; i++) { if (cs_etm__init_trace_params(t_params, etm))
if (etm->metadata[i][CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { goto out_free;
u32 etmidr = etm->metadata[i][CS_ETM_ETMIDR];
t_params[i].protocol =
cs_etm__get_v7_protocol_version(etmidr);
t_params[i].etmv3.reg_ctrl =
etm->metadata[i][CS_ETM_ETMCR];
t_params[i].etmv3.reg_trc_id =
etm->metadata[i][CS_ETM_ETMTRACEIDR];
} else if (etm->metadata[i][CS_ETM_MAGIC] ==
__perf_cs_etmv4_magic) {
t_params[i].protocol = CS_ETM_PROTO_ETMV4i;
t_params[i].etmv4.reg_idr0 =
etm->metadata[i][CS_ETMV4_TRCIDR0];
t_params[i].etmv4.reg_idr1 =
etm->metadata[i][CS_ETMV4_TRCIDR1];
t_params[i].etmv4.reg_idr2 =
etm->metadata[i][CS_ETMV4_TRCIDR2];
t_params[i].etmv4.reg_idr8 =
etm->metadata[i][CS_ETMV4_TRCIDR8];
t_params[i].etmv4.reg_configr =
etm->metadata[i][CS_ETMV4_TRCCONFIGR];
t_params[i].etmv4.reg_traceidr =
etm->metadata[i][CS_ETMV4_TRCTRACEIDR];
}
}
/* Set decoder parameters to simply print the trace packets */ /* Set decoder parameters to decode trace packets */
d_params.packet_printer = cs_etm__packet_dump; if (cs_etm__init_decoder_params(&d_params, etmq,
d_params.operation = CS_ETM_OPERATION_DECODE; CS_ETM_OPERATION_DECODE))
d_params.formatted = true; goto out_free;
d_params.fsyncs = false;
d_params.hsyncs = false;
d_params.frame_aligned = true;
d_params.data = etmq;
etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
zfree(&t_params);
if (!etmq->decoder) if (!etmq->decoder)
goto out_free; goto out_free;
...@@ -462,14 +473,13 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, ...@@ -462,14 +473,13 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
cs_etm__mem_access)) cs_etm__mem_access))
goto out_free_decoder; goto out_free_decoder;
etmq->offset = 0; zfree(&t_params);
etmq->period_instructions = 0;
return etmq; return etmq;
out_free_decoder: out_free_decoder:
cs_etm_decoder__free(etmq->decoder); cs_etm_decoder__free(etmq->decoder);
out_free: out_free:
zfree(&t_params);
zfree(&etmq->event_buf); zfree(&etmq->event_buf);
zfree(&etmq->last_branch); zfree(&etmq->last_branch);
zfree(&etmq->last_branch_rb); zfree(&etmq->last_branch_rb);
...@@ -484,24 +494,30 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, ...@@ -484,24 +494,30 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
struct auxtrace_queue *queue, struct auxtrace_queue *queue,
unsigned int queue_nr) unsigned int queue_nr)
{ {
int ret = 0;
struct cs_etm_queue *etmq = queue->priv; struct cs_etm_queue *etmq = queue->priv;
if (list_empty(&queue->head) || etmq) if (list_empty(&queue->head) || etmq)
return 0; goto out;
etmq = cs_etm__alloc_queue(etm, queue_nr); etmq = cs_etm__alloc_queue(etm);
if (!etmq) if (!etmq) {
return -ENOMEM; ret = -ENOMEM;
goto out;
}
queue->priv = etmq; queue->priv = etmq;
etmq->etm = etm;
if (queue->cpu != -1) etmq->queue_nr = queue_nr;
etmq->cpu = queue->cpu; etmq->cpu = queue->cpu;
etmq->tid = queue->tid; etmq->tid = queue->tid;
etmq->pid = -1;
etmq->offset = 0;
etmq->period_instructions = 0;
return 0; out:
return ret;
} }
static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm)
...@@ -509,6 +525,9 @@ static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) ...@@ -509,6 +525,9 @@ static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm)
unsigned int i; unsigned int i;
int ret; int ret;
if (!etm->kernel_start)
etm->kernel_start = machine__kernel_start(etm->machine);
for (i = 0; i < etm->queues.nr_queues; i++) { for (i = 0; i < etm->queues.nr_queues; i++) {
ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i);
if (ret) if (ret)
...@@ -666,7 +685,7 @@ static int cs_etm__inject_event(union perf_event *event, ...@@ -666,7 +685,7 @@ static int cs_etm__inject_event(union perf_event *event,
static int static int
cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) cs_etm__get_trace(struct cs_etm_queue *etmq)
{ {
struct auxtrace_buffer *aux_buffer = etmq->buffer; struct auxtrace_buffer *aux_buffer = etmq->buffer;
struct auxtrace_buffer *old_buffer = aux_buffer; struct auxtrace_buffer *old_buffer = aux_buffer;
...@@ -680,7 +699,7 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) ...@@ -680,7 +699,7 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq)
if (!aux_buffer) { if (!aux_buffer) {
if (old_buffer) if (old_buffer)
auxtrace_buffer__drop_data(old_buffer); auxtrace_buffer__drop_data(old_buffer);
buff->len = 0; etmq->buf_len = 0;
return 0; return 0;
} }
...@@ -700,13 +719,11 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) ...@@ -700,13 +719,11 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq)
if (old_buffer) if (old_buffer)
auxtrace_buffer__drop_data(old_buffer); auxtrace_buffer__drop_data(old_buffer);
buff->offset = aux_buffer->offset; etmq->buf_used = 0;
buff->len = aux_buffer->size; etmq->buf_len = aux_buffer->size;
buff->buf = aux_buffer->data; etmq->buf = aux_buffer->data;
buff->ref_timestamp = aux_buffer->reference; return etmq->buf_len;
return buff->len;
} }
static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
...@@ -1135,6 +1152,32 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq) ...@@ -1135,6 +1152,32 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq)
return 0; return 0;
} }
/*
* cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
* if need be.
* Returns: < 0 if error
* = 0 if no more auxtrace_buffer to read
* > 0 if the current buffer isn't empty yet
*/
static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
{
int ret;
if (!etmq->buf_len) {
ret = cs_etm__get_trace(etmq);
if (ret <= 0)
return ret;
/*
* We cannot assume consecutive blocks in the data file
* are contiguous, reset the decoder to force re-sync.
*/
ret = cs_etm_decoder__reset(etmq->decoder);
if (ret)
return ret;
}
return etmq->buf_len;
}
static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq,
struct cs_etm_packet *packet, struct cs_etm_packet *packet,
...@@ -1474,105 +1517,124 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) ...@@ -1474,105 +1517,124 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq)
return 0; return 0;
} }
static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
{
int ret = 0;
size_t processed = 0;
/*
* Packets are decoded and added to the decoder's packet queue
* until the decoder packet processing callback has requested that
* processing stops or there is nothing left in the buffer. Normal
* operations that stop processing are a timestamp packet or a full
* decoder buffer queue.
*/
ret = cs_etm_decoder__process_data_block(etmq->decoder,
etmq->offset,
&etmq->buf[etmq->buf_used],
etmq->buf_len,
&processed);
if (ret)
goto out;
etmq->offset += processed;
etmq->buf_used += processed;
etmq->buf_len -= processed;
out:
return ret;
}
static int cs_etm__process_decoder_queue(struct cs_etm_queue *etmq)
{
int ret;
/* Process each packet in this chunk */
while (1) {
ret = cs_etm_decoder__get_packet(etmq->decoder,
etmq->packet);
if (ret <= 0)
/*
* Stop processing this chunk on
* end of data or error
*/
break;
/*
* Since packet addresses are swapped in packet
* handling within below switch() statements,
* thus setting sample flags must be called
* prior to switch() statement to use address
* information before packets swapping.
*/
ret = cs_etm__set_sample_flags(etmq);
if (ret < 0)
break;
switch (etmq->packet->sample_type) {
case CS_ETM_RANGE:
/*
* If the packet contains an instruction
* range, generate instruction sequence
* events.
*/
cs_etm__sample(etmq);
break;
case CS_ETM_EXCEPTION:
case CS_ETM_EXCEPTION_RET:
/*
* If the exception packet is coming,
* make sure the previous instruction
* range packet to be handled properly.
*/
cs_etm__exception(etmq);
break;
case CS_ETM_DISCONTINUITY:
/*
* Discontinuity in trace, flush
* previous branch stack
*/
cs_etm__flush(etmq);
break;
case CS_ETM_EMPTY:
/*
* Should not receive empty packet,
* report error.
*/
pr_err("CS ETM Trace: empty packet\n");
return -EINVAL;
default:
break;
}
}
return ret;
}
static int cs_etm__run_decoder(struct cs_etm_queue *etmq) static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
{ {
struct cs_etm_auxtrace *etm = etmq->etm;
struct cs_etm_buffer buffer;
size_t buffer_used, processed;
int err = 0; int err = 0;
if (!etm->kernel_start)
etm->kernel_start = machine__kernel_start(etm->machine);
/* Go through each buffer in the queue and decode them one by one */ /* Go through each buffer in the queue and decode them one by one */
while (1) { while (1) {
buffer_used = 0; err = cs_etm__get_data_block(etmq);
memset(&buffer, 0, sizeof(buffer));
err = cs_etm__get_trace(&buffer, etmq);
if (err <= 0) if (err <= 0)
return err; return err;
/*
* We cannot assume consecutive blocks in the data file are
* contiguous, reset the decoder to force re-sync.
*/
err = cs_etm_decoder__reset(etmq->decoder);
if (err != 0)
return err;
/* Run trace decoder until buffer consumed or end of trace */ /* Run trace decoder until buffer consumed or end of trace */
do { do {
processed = 0; err = cs_etm__decode_data_block(etmq);
err = cs_etm_decoder__process_data_block(
etmq->decoder,
etmq->offset,
&buffer.buf[buffer_used],
buffer.len - buffer_used,
&processed);
if (err) if (err)
return err; return err;
etmq->offset += processed; /*
buffer_used += processed; * Process each packet in this chunk, nothing to do if
* an error occurs other than hoping the next one will
* be better.
*/
err = cs_etm__process_decoder_queue(etmq);
/* Process each packet in this chunk */ } while (etmq->buf_len);
while (1) {
err = cs_etm_decoder__get_packet(etmq->decoder,
etmq->packet);
if (err <= 0)
/*
* Stop processing this chunk on
* end of data or error
*/
break;
/*
* Since packet addresses are swapped in packet
* handling within below switch() statements,
* thus setting sample flags must be called
* prior to switch() statement to use address
* information before packets swapping.
*/
err = cs_etm__set_sample_flags(etmq);
if (err < 0)
break;
switch (etmq->packet->sample_type) {
case CS_ETM_RANGE:
/*
* If the packet contains an instruction
* range, generate instruction sequence
* events.
*/
cs_etm__sample(etmq);
break;
case CS_ETM_EXCEPTION:
case CS_ETM_EXCEPTION_RET:
/*
* If the exception packet is coming,
* make sure the previous instruction
* range packet to be handled properly.
*/
cs_etm__exception(etmq);
break;
case CS_ETM_DISCONTINUITY:
/*
* Discontinuity in trace, flush
* previous branch stack
*/
cs_etm__flush(etmq);
break;
case CS_ETM_EMPTY:
/*
* Should not receive empty packet,
* report error.
*/
pr_err("CS ETM Trace: empty packet\n");
return -EINVAL;
default:
break;
}
}
} while (buffer.len > buffer_used);
if (err == 0) if (err == 0)
/* Flush any remaining branch stack entries */ /* Flush any remaining branch stack entries */
...@@ -1583,7 +1645,7 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) ...@@ -1583,7 +1645,7 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
} }
static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
pid_t tid, u64 time_) pid_t tid)
{ {
unsigned int i; unsigned int i;
struct auxtrace_queues *queues = &etm->queues; struct auxtrace_queues *queues = &etm->queues;
...@@ -1593,7 +1655,6 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, ...@@ -1593,7 +1655,6 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
struct cs_etm_queue *etmq = queue->priv; struct cs_etm_queue *etmq = queue->priv;
if (etmq && ((tid == -1) || (etmq->tid == tid))) { if (etmq && ((tid == -1) || (etmq->tid == tid))) {
etmq->time = time_;
cs_etm__set_pid_tid_cpu(etm, queue); cs_etm__set_pid_tid_cpu(etm, queue);
cs_etm__run_decoder(etmq); cs_etm__run_decoder(etmq);
} }
...@@ -1637,8 +1698,7 @@ static int cs_etm__process_event(struct perf_session *session, ...@@ -1637,8 +1698,7 @@ static int cs_etm__process_event(struct perf_session *session,
if (event->header.type == PERF_RECORD_EXIT) if (event->header.type == PERF_RECORD_EXIT)
return cs_etm__process_timeless_queues(etm, return cs_etm__process_timeless_queues(etm,
event->fork.tid, event->fork.tid);
sample->time);
return 0; return 0;
} }
......
...@@ -105,8 +105,8 @@ struct intlist *traceid_list; ...@@ -105,8 +105,8 @@ struct intlist *traceid_list;
#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64)) #define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64))
static const u64 __perf_cs_etmv3_magic = 0x3030303030303030ULL; #define __perf_cs_etmv3_magic 0x3030303030303030ULL
static const u64 __perf_cs_etmv4_magic = 0x4040404040404040ULL; #define __perf_cs_etmv4_magic 0x4040404040404040ULL
#define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64)) #define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64))
#define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64)) #define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64))
......
...@@ -563,7 +563,6 @@ static int write_cmdline(struct feat_fd *ff, ...@@ -563,7 +563,6 @@ static int write_cmdline(struct feat_fd *ff,
"/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list" "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list"
struct cpu_topo { struct cpu_topo {
u32 cpu_nr;
u32 core_sib; u32 core_sib;
u32 thread_sib; u32 thread_sib;
char **core_siblings; char **core_siblings;
...@@ -679,7 +678,6 @@ static struct cpu_topo *build_cpu_topology(void) ...@@ -679,7 +678,6 @@ static struct cpu_topo *build_cpu_topology(void)
goto out_free; goto out_free;
tp = addr; tp = addr;
tp->cpu_nr = nr;
addr += sizeof(*tp); addr += sizeof(*tp);
tp->core_siblings = addr; tp->core_siblings = addr;
addr += sz; addr += sz;
...@@ -1042,11 +1040,9 @@ static int write_cpuid(struct feat_fd *ff, ...@@ -1042,11 +1040,9 @@ static int write_cpuid(struct feat_fd *ff,
int ret; int ret;
ret = get_cpuid(buffer, sizeof(buffer)); ret = get_cpuid(buffer, sizeof(buffer));
if (!ret) if (ret)
goto write_it; return -1;
return -1;
write_it:
return do_write_string(ff, buffer); return do_write_string(ff, buffer);
} }
......
libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o perf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o
inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk
inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt
......
...@@ -270,7 +270,7 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw) ...@@ -270,7 +270,7 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw)
} }
void metricgroup__print(bool metrics, bool metricgroups, char *filter, void metricgroup__print(bool metrics, bool metricgroups, char *filter,
bool raw) bool raw, bool details)
{ {
struct pmu_events_map *map = perf_pmu__find_map(NULL); struct pmu_events_map *map = perf_pmu__find_map(NULL);
struct pmu_event *pe; struct pmu_event *pe;
...@@ -329,6 +329,12 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, ...@@ -329,6 +329,12 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
if (asprintf(&s, "%s\n%*s%s]", if (asprintf(&s, "%s\n%*s%s]",
pe->metric_name, 8, "[", pe->desc) < 0) pe->metric_name, 8, "[", pe->desc) < 0)
return; return;
if (details) {
if (asprintf(&s, "%s\n%*s%s]",
s, 8, "[", pe->metric_expr) < 0)
return;
}
} }
if (!s) if (!s)
......
...@@ -27,6 +27,7 @@ int metricgroup__parse_groups(const struct option *opt, ...@@ -27,6 +27,7 @@ int metricgroup__parse_groups(const struct option *opt,
const char *str, const char *str,
struct rblist *metric_events); struct rblist *metric_events);
void metricgroup__print(bool metrics, bool groups, char *filter, bool raw); void metricgroup__print(bool metrics, bool groups, char *filter,
bool raw, bool details);
bool metricgroup__has_metric(const char *metric); bool metricgroup__has_metric(const char *metric);
#endif #endif
...@@ -2540,7 +2540,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, ...@@ -2540,7 +2540,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag,
print_sdt_events(NULL, NULL, name_only); print_sdt_events(NULL, NULL, name_only);
metricgroup__print(true, true, NULL, name_only); metricgroup__print(true, true, NULL, name_only, details_flag);
} }
int parse_events__is_hardcoded_term(struct parse_events_term *term) int parse_events__is_hardcoded_term(struct parse_events_term *term)
......
...@@ -311,7 +311,7 @@ value_sym '/' event_config '/' ...@@ -311,7 +311,7 @@ value_sym '/' event_config '/'
$$ = list; $$ = list;
} }
| |
value_sym sep_slash_dc value_sym sep_slash_slash_dc
{ {
struct list_head *list; struct list_head *list;
int type = $1 >> 16; int type = $1 >> 16;
...@@ -702,7 +702,7 @@ PE_VALUE PE_ARRAY_RANGE PE_VALUE ...@@ -702,7 +702,7 @@ PE_VALUE PE_ARRAY_RANGE PE_VALUE
sep_dc: ':' | sep_dc: ':' |
sep_slash_dc: '/' | ':' | sep_slash_slash_dc: '/' '/' | ':' |
%% %%
......
...@@ -352,6 +352,11 @@ static bool s390_cpumsf_validate(int machine_type, ...@@ -352,6 +352,11 @@ static bool s390_cpumsf_validate(int machine_type,
*dsdes = 85; *dsdes = 85;
*bsdes = 32; *bsdes = 32;
break; break;
case 2964:
case 2965:
*dsdes = 112;
*bsdes = 32;
break;
default: default:
/* Illegal trailer entry */ /* Illegal trailer entry */
return false; return false;
......
libperf-$(CONFIG_LIBPERL) += trace-event-perl.o perf-$(CONFIG_LIBPERL) += trace-event-perl.o
libperf-$(CONFIG_LIBPYTHON) += trace-event-python.o perf-$(CONFIG_LIBPYTHON) += trace-event-python.o
CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment