Commit 43f4e627 authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo-5.1-20190214' of...

Merge tag 'perf-core-for-mingo-5.1-20190214' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

perf list:

  Jiri Olsa:

  - Display metric expressions for --details option

perf record:

  Alexey Budankov:

  - Implement --affinity=node|cpu option, leftover, the other patches
    in this kit were already applied.

perf trace:

  Arnaldo Carvalho de Melo:

  - Fix segfaults due to not properly handling negative file descriptor syscall args.

  - Fix segfault related to the 'waitid' 'options' prefix showing logic.

  - Filter out 'gnome-terminal*' if it is a parent of 'perf trace', to reduce the
    syscall feedback loop in system wide sessions.

BPF:

  Song Liu:

  - Silence "Couldn't synthesize bpf events" warning for EPERM.

Build system:

  Arnaldo Carvalho de Melo:

  - Fix the test-all.c feature detection fast path that was broken for
    quite a while leading to longer build times.

Event parsing:

  Jiri Olsa:

  - Fix legacy events symbol separator parsing

cs-etm:

  Mathieu Poirier:

  - Fix some error path return errors and plug some memory leaks.

  - Add proper header file for symbols

  - Remove unused structure fields.

  - Modularize auxtrace_buffer fetch, decoder and packet processing loop.

Vendor events:

  Paul Clarke:

  - Add assorted metrics for the Power8 and Power9 architectures.

perf report:

  Thomas Richter:

  - Add s390 diagnostic sampling descriptor size
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 02106f88 44ec8396
......@@ -53,10 +53,6 @@ FEATURE_TESTS_BASIC := \
libslang \
libcrypto \
libunwind \
libunwind-x86 \
libunwind-x86_64 \
libunwind-arm \
libunwind-aarch64 \
pthread-attr-setaffinity-np \
pthread-barrier \
reallocarray \
......@@ -70,7 +66,6 @@ FEATURE_TESTS_BASIC := \
sched_getcpu \
sdt \
setns \
libopencsd \
libaio
# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
......@@ -84,6 +79,11 @@ FEATURE_TESTS_EXTRA := \
libbabeltrace \
libbfd-liberty \
libbfd-liberty-z \
libopencsd \
libunwind-x86 \
libunwind-x86_64 \
libunwind-arm \
libunwind-aarch64 \
libunwind-debug-frame \
libunwind-debug-frame-arm \
libunwind-debug-frame-aarch64 \
......
......@@ -170,14 +170,14 @@
# include "test-setns.c"
#undef main
#define main main_test_libopencsd
# include "test-libopencsd.c"
#undef main
#define main main_test_libaio
# include "test-libaio.c"
#undef main
#define main main_test_reallocarray
# include "test-reallocarray.c"
#undef main
int main(int argc, char *argv[])
{
main_test_libpython();
......@@ -217,8 +217,8 @@ int main(int argc, char *argv[])
main_test_sched_getcpu();
main_test_sdt();
main_test_setns();
main_test_libopencsd();
main_test_libaio();
main_test_reallocarray();
return 0;
}
......@@ -8,3 +8,4 @@ int main(void)
free(get_current_dir_name());
return 0;
}
#undef _GNU_SOURCE
......@@ -7,3 +7,4 @@ int main(void)
return 0;
}
#undef _GNU_SOURCE
......@@ -6,3 +6,5 @@ int main(void)
{
return !!reallocarray(NULL, 1, 1);
}
#undef _GNU_SOURCE
......@@ -8,3 +8,5 @@ int main(void)
{
return sched_getcpu();
}
#undef _GNU_SOURCE
......@@ -5,3 +5,4 @@ int main(void)
{
return setns(0, 0);
}
#undef _GNU_SOURCE
......@@ -46,10 +46,10 @@ CFLAGS_builtin-trace.o += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_
CFLAGS_builtin-report.o += -DTIPDIR="BUILD_STR($(tipdir_SQ))"
CFLAGS_builtin-report.o += -DDOCDIR="BUILD_STR($(srcdir_SQ)/Documentation)"
libperf-y += util/
libperf-y += arch/
libperf-y += ui/
libperf-y += scripts/
libperf-$(CONFIG_TRACE) += trace/beauty/
perf-y += util/
perf-y += arch/
perf-y += ui/
perf-y += scripts/
perf-$(CONFIG_TRACE) += trace/beauty/
gtk-y += ui/gtk/
......@@ -454,6 +454,11 @@ Use <n> control blocks in asynchronous (Posix AIO) trace writing mode (default:
Asynchronous mode is supported only when linking Perf tool with libc library
providing implementation for Posix AIO API.
--affinity=mode::
Set affinity mask of trace reading thread according to the policy defined by 'mode' value:
node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer
cpu - thread affinity mask is set to cpu of the processed mmap buffer
--all-kernel::
Configure all used events to run in kernel space.
......
......@@ -109,6 +109,13 @@ FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
FEATURE_CHECK_LDFLAGS-libunwind-arm = -lunwind -lunwind-arm
FEATURE_CHECK_LDFLAGS-libunwind-aarch64 = -lunwind -lunwind-aarch64
FEATURE_CHECK_LDFLAGS-libunwind-x86 = -lunwind -llzma -lunwind-x86
FEATURE_CHECK_LDFLAGS-libunwind-x86_64 = -lunwind -llzma -lunwind-x86_64
FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto
ifdef CSINCLUDES
LIBOPENCSD_CFLAGS := -I$(CSINCLUDES)
endif
......@@ -218,6 +225,8 @@ FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS)
FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS)
FEATURE_CHECK_LDFLAGS-libaio = -lrt
CFLAGS += -fno-omit-frame-pointer
CFLAGS += -ggdb3
CFLAGS += -funwind-tables
......@@ -386,7 +395,8 @@ ifeq ($(feature-setns), 1)
$(call detected,CONFIG_SETNS)
endif
ifndef NO_CORESIGHT
ifdef CORESIGHT
$(call feature_check,libopencsd)
ifeq ($(feature-libopencsd), 1)
CFLAGS += -DHAVE_CSTRACE_SUPPORT $(LIBOPENCSD_CFLAGS)
LDFLAGS += $(LIBOPENCSD_LDFLAGS)
......@@ -482,6 +492,7 @@ endif
ifndef NO_LIBUNWIND
have_libunwind :=
$(call feature_check,libunwind-x86)
ifeq ($(feature-libunwind-x86), 1)
$(call detected,CONFIG_LIBUNWIND_X86)
CFLAGS += -DHAVE_LIBUNWIND_X86_SUPPORT
......@@ -490,6 +501,7 @@ ifndef NO_LIBUNWIND
have_libunwind = 1
endif
$(call feature_check,libunwind-aarch64)
ifeq ($(feature-libunwind-aarch64), 1)
$(call detected,CONFIG_LIBUNWIND_AARCH64)
CFLAGS += -DHAVE_LIBUNWIND_AARCH64_SUPPORT
......
......@@ -102,7 +102,7 @@ include ../scripts/utilities.mak
# When selected, pass LLVM_CONFIG=/path/to/llvm-config to `make' if
# llvm-config is not in $PATH.
#
# Define NO_CORESIGHT if you do not want support for CoreSight trace decoding.
# Define CORESIGHT if you DO WANT support for CoreSight trace decoding.
#
# Define NO_AIO if you do not want support of Posix AIO based trace
# streaming for record mode. Currently Posix AIO trace streaming is
......@@ -344,9 +344,9 @@ endif
export PERL_PATH
LIB_FILE=$(OUTPUT)libperf.a
LIBPERF_A=$(OUTPUT)libperf.a
PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD)
PERFLIBS = $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD)
ifndef NO_LIBBPF
PERFLIBS += $(LIBBPF)
endif
......@@ -549,6 +549,8 @@ JEVENTS_IN := $(OUTPUT)pmu-events/jevents-in.o
PMU_EVENTS_IN := $(OUTPUT)pmu-events/pmu-events-in.o
LIBPERF_IN := $(OUTPUT)libperf-in.o
export JEVENTS
build := -f $(srctree)/tools/build/Makefile.build dir=. obj
......@@ -565,9 +567,12 @@ $(JEVENTS): $(JEVENTS_IN)
$(PMU_EVENTS_IN): $(JEVENTS) FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events
$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
$(LIBPERF_IN): prepare FORCE
$(Q)$(MAKE) $(build)=libperf
$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBPERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \
$(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@
$(PERF_IN) $(PMU_EVENTS_IN) $(LIBPERF_IN) $(LIBS) -o $@
$(GTK_IN): FORCE
$(Q)$(MAKE) $(build)=gtk
......@@ -683,12 +688,7 @@ endif
$(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
LIBPERF_IN := $(OUTPUT)libperf-in.o
$(LIBPERF_IN): prepare FORCE
$(Q)$(MAKE) $(build)=libperf
$(LIB_FILE): $(LIBPERF_IN)
$(LIBPERF_A): $(LIBPERF_IN)
$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS)
LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(LDFLAGS)'
......@@ -910,7 +910,7 @@ python-clean:
$(python-clean)
clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean python-clean
$(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
$(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)$(RM) $(OUTPUT).config-detected
$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so
......
libperf-y += common.o
libperf-y += $(SRCARCH)/
perf-y += common.o
perf-y += $(SRCARCH)/
libperf-y += util/
libperf-$(CONFIG_DWARF_UNWIND) += tests/
perf-y += util/
perf-$(CONFIG_DWARF_UNWIND) += tests/
libperf-y += regs_load.o
libperf-y += dwarf-unwind.o
libperf-y += vectors-page.o
perf-y += regs_load.o
perf-y += dwarf-unwind.o
perf-y += vectors-page.o
libperf-y += arch-tests.o
perf-y += arch-tests.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o
perf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o
libperf-y += util/
libperf-$(CONFIG_DWARF_UNWIND) += tests/
perf-y += util/
perf-$(CONFIG_DWARF_UNWIND) += tests/
libperf-y += regs_load.o
libperf-y += dwarf-unwind.o
perf-y += regs_load.o
perf-y += dwarf-unwind.o
libperf-y += arch-tests.o
perf-y += arch-tests.o
libperf-y += header.o
libperf-y += sym-handling.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
perf-y += header.o
perf-y += sym-handling.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
perf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
../../arm/util/auxtrace.o \
../../arm/util/cs-etm.o \
arm-spe.o
libperf-y += util/
perf-y += util/
libperf-y += header.o
perf-y += header.o
libperf-y += util/
libperf-y += tests/
perf-y += util/
perf-y += tests/
libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o
libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
perf-$(CONFIG_DWARF_UNWIND) += regs_load.o
perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
libperf-y += arch-tests.o
perf-y += arch-tests.o
libperf-y += header.o
libperf-y += sym-handling.o
libperf-y += kvm-stat.o
libperf-y += perf_regs.o
libperf-y += mem-events.o
perf-y += header.o
perf-y += sym-handling.o
perf-y += kvm-stat.o
perf-y += perf_regs.o
perf-y += mem-events.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_DWARF) += skip-callchain-idx.o
libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
perf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-y += util/
perf-y += util/
libperf-y += header.o
libperf-y += kvm-stat.o
perf-y += header.o
perf-y += kvm-stat.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-y += machine.o
perf-y += machine.o
libperf-$(CONFIG_AUXTRACE) += auxtrace.o
perf-$(CONFIG_AUXTRACE) += auxtrace.o
libperf-y += util/
perf-y += util/
libperf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-y += util/
perf-y += util/
libperf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-y += util/
libperf-y += tests/
perf-y += util/
perf-y += tests/
libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o
libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
perf-$(CONFIG_DWARF_UNWIND) += regs_load.o
perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
libperf-y += arch-tests.o
libperf-y += rdpmc.o
libperf-y += perf-time-to-tsc.o
libperf-$(CONFIG_AUXTRACE) += insn-x86.o
libperf-$(CONFIG_X86_64) += bp-modify.o
perf-y += arch-tests.o
perf-y += rdpmc.o
perf-y += perf-time-to-tsc.o
perf-$(CONFIG_AUXTRACE) += insn-x86.o
perf-$(CONFIG_X86_64) += bp-modify.o
libperf-y += header.o
libperf-y += tsc.o
libperf-y += pmu.o
libperf-y += kvm-stat.o
libperf-y += perf_regs.o
libperf-y += group.o
libperf-y += machine.o
libperf-y += event.o
perf-y += header.o
perf-y += tsc.o
perf-y += pmu.o
perf-y += kvm-stat.o
perf-y += perf_regs.o
perf-y += group.o
perf-y += machine.o
perf-y += event.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-$(CONFIG_AUXTRACE) += auxtrace.o
libperf-$(CONFIG_AUXTRACE) += intel-pt.o
libperf-$(CONFIG_AUXTRACE) += intel-bts.o
perf-$(CONFIG_AUXTRACE) += auxtrace.o
perf-$(CONFIG_AUXTRACE) += intel-pt.o
perf-$(CONFIG_AUXTRACE) += intel-bts.o
libperf-y += util/
perf-y += util/
libperf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
......@@ -82,9 +82,9 @@ int cmd_list(int argc, const char **argv)
else if (strcmp(argv[i], "sdt") == 0)
print_sdt_events(NULL, NULL, raw_dump);
else if (strcmp(argv[i], "metric") == 0)
metricgroup__print(true, false, NULL, raw_dump);
metricgroup__print(true, false, NULL, raw_dump, details_flag);
else if (strcmp(argv[i], "metricgroup") == 0)
metricgroup__print(false, true, NULL, raw_dump);
metricgroup__print(false, true, NULL, raw_dump, details_flag);
else if ((sep = strchr(argv[i], ':')) != NULL) {
int sep_idx;
......@@ -102,7 +102,7 @@ int cmd_list(int argc, const char **argv)
s[sep_idx] = '\0';
print_tracepoint_events(s, s + sep_idx + 1, raw_dump);
print_sdt_events(s, s + sep_idx + 1, raw_dump);
metricgroup__print(true, true, s, raw_dump);
metricgroup__print(true, true, s, raw_dump, details_flag);
free(s);
} else {
if (asprintf(&s, "*%s*", argv[i]) < 0) {
......@@ -119,7 +119,7 @@ int cmd_list(int argc, const char **argv)
details_flag);
print_tracepoint_events(NULL, s, raw_dump);
print_sdt_events(NULL, s, raw_dump);
metricgroup__print(true, true, NULL, raw_dump);
metricgroup__print(true, true, NULL, raw_dump, details_flag);
free(s);
}
}
......
......@@ -1656,6 +1656,21 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
return -1;
}
static int record__parse_affinity(const struct option *opt, const char *str, int unset)
{
struct record_opts *opts = (struct record_opts *)opt->value;
if (unset || !str)
return 0;
if (!strcasecmp(str, "node"))
opts->affinity = PERF_AFFINITY_NODE;
else if (!strcasecmp(str, "cpu"))
opts->affinity = PERF_AFFINITY_CPU;
return 0;
}
static int record__parse_mmap_pages(const struct option *opt,
const char *str,
int unset __maybe_unused)
......@@ -1964,6 +1979,9 @@ static struct option __record_options[] = {
&nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
record__aio_parse),
#endif
OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
"Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
record__parse_affinity),
OPT_END()
};
......
......@@ -1041,6 +1041,9 @@ static const size_t trace__entry_str_size = 2048;
static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd)
{
if (fd < 0)
return NULL;
if (fd > ttrace->files.max) {
struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file));
......@@ -2768,7 +2771,8 @@ static int trace__set_filter_loop_pids(struct trace *trace)
if (parent == NULL)
break;
if (!strcmp(thread__comm_str(parent), "sshd")) {
if (!strcmp(thread__comm_str(parent), "sshd") ||
strstarts(thread__comm_str(parent), "gnome-terminal")) {
pids[nr++] = parent->tid;
break;
}
......
This source diff could not be displayed because it is too large. You can view the blob instead.
[
{
"MetricExpr": "PM_BR_MPRED_CMPL / PM_BR_PRED * 100",
"MetricGroup": "branch_prediction",
"MetricName": "br_misprediction_percent"
},
{
"BriefDescription": "Count cache branch misprediction per instruction",
"MetricExpr": "PM_BR_MPRED_CCACHE / PM_RUN_INST_CMPL * 100",
"MetricGroup": "branch_prediction",
"MetricName": "ccache_mispredict_rate_percent"
},
{
"BriefDescription": "Count cache branch misprediction",
"MetricExpr": "PM_BR_MPRED_CCACHE / PM_BR_PRED_CCACHE * 100",
"MetricGroup": "branch_prediction",
"MetricName": "ccache_misprediction_percent"
},
{
"BriefDescription": "Link stack branch misprediction",
"MetricExpr": "PM_BR_MPRED_LSTACK / PM_RUN_INST_CMPL * 100",
"MetricGroup": "branch_prediction",
"MetricName": "lstack_mispredict_rate_percent"
},
{
"BriefDescription": "Link stack branch misprediction",
"MetricExpr": "PM_BR_MPRED_LSTACK/ PM_BR_PRED_LSTACK * 100",
"MetricGroup": "branch_prediction",
"MetricName": "lstack_misprediction_percent"
},
{
"BriefDescription": "% Branches Taken",
"MetricExpr": "PM_BR_TAKEN_CMPL * 100 / PM_BRU_FIN",
"MetricGroup": "branch_prediction",
"MetricName": "taken_branches_percent"
},
{
"BriefDescription": "Completion stall due to a Branch Unit",
"MetricExpr": "PM_CMPLU_STALL_BRU/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "bru_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was routed to the crypto execution pipe and was waiting to finish",
"MetricExpr": "PM_CMPLU_STALL_CRYPTO/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "crypto_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a load that missed the L1 and was waiting for the data to return from the nest",
"MetricExpr": "PM_CMPLU_STALL_DCACHE_MISS/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dcache_miss_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a multi-cycle instruction issued to the Decimal Floating Point execution pipe and waiting to finish.",
"MetricExpr": "PM_CMPLU_STALL_DFLONG/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dflong_stall_cpi"
},
{
"BriefDescription": "Stalls due to short latency decimal floating ops.",
"MetricExpr": "(PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_DFLONG)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dfu_other_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was issued to the Decimal Floating Point execution pipe and waiting to finish.",
"MetricExpr": "PM_CMPLU_STALL_DFU/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dfu_stall_cpi"
},
{
"BriefDescription": "Completion stall by Dcache miss which resolved off node memory/cache",
"MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM - PM_CMPLU_STALL_DMISS_REMOTE)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_distant_stall_cpi"
},
{
"BriefDescription": "Completion stall by Dcache miss which resolved on chip ( excluding local L2/L3)",
"MetricExpr": "PM_CMPLU_STALL_DMISS_L21_L31/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_l21_l31_stall_cpi"
},
{
"BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 with a conflict",
"MetricExpr": "PM_CMPLU_STALL_DMISS_L2L3_CONFLICT/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_l2l3_conflict_stall_cpi"
},
{
"BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 without conflict",
"MetricExpr": "(PM_CMPLU_STALL_DMISS_L2L3 - PM_CMPLU_STALL_DMISS_L2L3_CONFLICT)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_l2l3_noconflict_stall_cpi"
},
{
"BriefDescription": "Completion stall by Dcache miss which resolved in L2/L3",
"MetricExpr": "PM_CMPLU_STALL_DMISS_L2L3/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_l2l3_stall_cpi"
},
{
"BriefDescription": "Completion stall due to cache miss resolving missed the L3",
"MetricExpr": "PM_CMPLU_STALL_DMISS_L3MISS/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_l3miss_stall_cpi"
},
{
"BriefDescription": "Completion stall due to cache miss that resolves in local memory",
"MetricExpr": "PM_CMPLU_STALL_DMISS_LMEM/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_lmem_stall_cpi"
},
{
"BriefDescription": "Completion stall by Dcache miss which resolved outside of local memory",
"MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_non_local_stall_cpi"
},
{
"BriefDescription": "Completion stall by Dcache miss which resolved from remote chip (cache or memory)",
"MetricExpr": "PM_CMPLU_STALL_DMISS_REMOTE/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_remote_stall_cpi"
},
{
"BriefDescription": "Stalls due to short latency double precision ops.",
"MetricExpr": "(PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DPLONG)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dp_other_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a scalar instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format.",
"MetricExpr": "PM_CMPLU_STALL_DP/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dp_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format.",
"MetricExpr": "PM_CMPLU_STALL_DPLONG/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "dplong_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction is an EIEIO waiting for response from L2",
"MetricExpr": "PM_CMPLU_STALL_EIEIO/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "eieio_stall_cpi"
},
{
"BriefDescription": "Finish stall because the next to finish instruction suffered an ERAT miss and the EMQ was full",
"MetricExpr": "PM_CMPLU_STALL_EMQ_FULL/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "emq_full_stall_cpi"
},
{
"MetricExpr": "(PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "emq_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a load or store that suffered a translation miss",
"MetricExpr": "PM_CMPLU_STALL_ERAT_MISS/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "erat_miss_stall_cpi"
},
{
"BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete",
"MetricExpr": "PM_CMPLU_STALL_EXCEPTION/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "exception_stall_cpi"
},
{
"BriefDescription": "Completion stall due to execution units for other reasons.",
"MetricExpr": "(PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_PM - PM_CMPLU_STALL_CRYPTO - PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "exec_unit_other_stall_cpi"
},
{
"BriefDescription": "Completion stall due to execution units (FXU/VSU/CRU)",
"MetricExpr": "PM_CMPLU_STALL_EXEC_UNIT/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "exec_unit_stall_cpi"
},
{
"BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because any of the 4 threads in the same core suffered a flush, which blocks completion",
"MetricExpr": "PM_CMPLU_STALL_FLUSH_ANY_THREAD/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "flush_any_thread_stall_cpi"
},
{
"BriefDescription": "Completion stall due to a long latency scalar fixed point instruction (division, square root)",
"MetricExpr": "PM_CMPLU_STALL_FXLONG/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "fxlong_stall_cpi"
},
{
"BriefDescription": "Stalls due to short latency integer ops",
"MetricExpr": "(PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_FXLONG)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "fxu_other_stall_cpi"
},
{
"BriefDescription": "Finish stall due to a scalar fixed point or CR instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes",
"MetricExpr": "PM_CMPLU_STALL_FXU/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "fxu_stall_cpi"
},
{
"MetricExpr": "(PM_NTC_ISSUE_HELD_DARQ_FULL + PM_NTC_ISSUE_HELD_ARB + PM_NTC_ISSUE_HELD_OTHER)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "issue_hold_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a larx waiting to be satisfied",
"MetricExpr": "PM_CMPLU_STALL_LARX/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "larx_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a load that hit on an older store and it was waiting for store data",
"MetricExpr": "PM_CMPLU_STALL_LHS/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lhs_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a load that missed in the L1 and the LMQ was unable to accept this load miss request because it was full",
"MetricExpr": "PM_CMPLU_STALL_LMQ_FULL/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lmq_full_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a load instruction with all its dependencies satisfied just going through the LSU pipe to finish",
"MetricExpr": "PM_CMPLU_STALL_LOAD_FINISH/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "load_finish_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a load that was held in LSAQ because the LRQ was full",
"MetricExpr": "PM_CMPLU_STALL_LRQ_FULL/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lrq_full_stall_cpi"
},
{
"BriefDescription": "Finish stall due to LRQ miscellaneous reasons, lost arbitration to LMQ slot, bank collisions, set prediction cleanup, set prediction multihit and others",
"MetricExpr": "PM_CMPLU_STALL_LRQ_OTHER/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lrq_other_stall_cpi"
},
{
"MetricExpr": "(PM_CMPLU_STALL_LMQ_FULL + PM_CMPLU_STALL_ST_FWD + PM_CMPLU_STALL_LHS + PM_CMPLU_STALL_LSU_MFSPR + PM_CMPLU_STALL_LARX + PM_CMPLU_STALL_LRQ_OTHER)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lrq_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a load or store that was held in LSAQ because an older instruction from SRQ or LRQ won arbitration to the LSU pipe when this instruction tried to launch",
"MetricExpr": "PM_CMPLU_STALL_LSAQ_ARB/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lsaq_arb_stall_cpi"
},
{
"MetricExpr": "(PM_CMPLU_STALL_LRQ_FULL + PM_CMPLU_STALL_SRQ_FULL + PM_CMPLU_STALL_LSAQ_ARB)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lsaq_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was an LSU op (other than a load or a store) with all its dependencies met and just going through the LSU pipe to finish",
"MetricExpr": "PM_CMPLU_STALL_LSU_FIN/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lsu_fin_stall_cpi"
},
{
"BriefDescription": "Completion stall of one cycle because the LSU requested to flush the next iop in the sequence. It takes 1 cycle for the ISU to process this request before the LSU instruction is allowed to complete",
"MetricExpr": "PM_CMPLU_STALL_LSU_FLUSH_NEXT/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lsu_flush_next_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a mfspr instruction targeting an LSU SPR and it was waiting for the register data to be returned",
"MetricExpr": "PM_CMPLU_STALL_LSU_MFSPR/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lsu_mfspr_stall_cpi"
},
{
"BriefDescription": "Completion LSU stall for other reasons",
"MetricExpr": "(PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_LSU_FIN - PM_CMPLU_STALL_STORE_FINISH - PM_CMPLU_STALL_STORE_DATA - PM_CMPLU_STALL_EIEIO - PM_CMPLU_STALL_STCX - PM_CMPLU_STALL_SLB - PM_CMPLU_STALL_TEND - PM_CMPLU_STALL_PASTE - PM_CMPLU_STALL_TLBIE - PM_CMPLU_STALL_STORE_PIPE_ARB - PM_CMPLU_STALL_STORE_FIN_ARB - PM_CMPLU_STALL_LOAD_FINISH + PM_CMPLU_STALL_DCACHE_MISS - PM_CMPLU_STALL_LMQ_FULL - PM_CMPLU_STALL_ST_FWD - PM_CMPLU_STALL_LHS - PM_CMPLU_STALL_LSU_MFSPR - PM_CMPLU_STALL_LARX - PM_CMPLU_STALL_LRQ_OTHER + PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL - PM_CMPLU_STALL_LRQ_FULL - PM_CMPLU_STALL_SRQ_FULL - PM_CMPLU_STALL_LSAQ_ARB) / PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lsu_other_stall_cpi"
},
{
"BriefDescription": "Completion stall by LSU instruction",
"MetricExpr": "PM_CMPLU_STALL_LSU/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "lsu_stall_cpi"
},
{
"BriefDescription": "Completion stall because the ISU is updating the register and notifying the Effective Address Table (EAT)",
"MetricExpr": "PM_CMPLU_STALL_MTFPSCR/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "mtfpscr_stall_cpi"
},
{
"BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tbegin. This is a short delay, and it includes ROT",
"MetricExpr": "PM_CMPLU_STALL_NESTED_TBEGIN/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "nested_tbegin_stall_cpi"
},
{
"BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tend and decrement the TEXASR nested level. This is a short delay",
"MetricExpr": "PM_CMPLU_STALL_NESTED_TEND/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "nested_tend_stall_cpi"
},
{
"BriefDescription": "Number of cycles the ICT has no itags assigned to this thread",
"MetricExpr": "PM_ICT_NOSLOT_CYC/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "nothing_dispatched_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was one that must finish at dispatch.",
"MetricExpr": "PM_CMPLU_STALL_NTC_DISP_FIN/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "ntc_disp_fin_stall_cpi"
},
{
"BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. This event is used to account for cycles in which work is being completed in the CPI stack",
"MetricExpr": "PM_NTC_FIN/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "ntc_fin_cpi"
},
{
"BriefDescription": "Completion stall due to ntc flush",
"MetricExpr": "PM_CMPLU_STALL_NTC_FLUSH/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "ntc_flush_stall_cpi"
},
{
"BriefDescription": "The NTC instruction is being held at dispatch because it lost arbitration onto the issue pipe to another instruction (from the same thread or a different thread)",
"MetricExpr": "PM_NTC_ISSUE_HELD_ARB/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "ntc_issue_held_arb_cpi"
},
{
"BriefDescription": "The NTC instruction is being held at dispatch because there are no slots in the DARQ for it",
"MetricExpr": "PM_NTC_ISSUE_HELD_DARQ_FULL/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "ntc_issue_held_darq_full_cpi"
},
{
"BriefDescription": "The NTC instruction is being held at dispatch during regular pipeline cycles, or because the VSU is busy with multi-cycle instructions, or because of a write-back collision with VSU",
"MetricExpr": "PM_NTC_ISSUE_HELD_OTHER/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "ntc_issue_held_other_cpi"
},
{
"BriefDescription": "Cycles unaccounted for.",
"MetricExpr": "(PM_RUN_CYC - PM_1PLUS_PPC_CMPL - PM_CMPLU_STALL_THRD - PM_CMPLU_STALL - PM_ICT_NOSLOT_CYC)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "other_cpi"
},
{
"BriefDescription": "Completion stall for other reasons",
"MetricExpr": "PM_CMPLU_STALL - PM_CMPLU_STALL_NTC_DISP_FIN - PM_CMPLU_STALL_NTC_FLUSH - PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_BRU)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "other_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a paste waiting for response from L2",
"MetricExpr": "PM_CMPLU_STALL_PASTE/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "paste_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was issued to the Permute execution pipe and waiting to finish.",
"MetricExpr": "PM_CMPLU_STALL_PM/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "pm_stall_cpi"
},
{
"BriefDescription": "Run cycles per run instruction",
"MetricExpr": "PM_RUN_CYC / PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "run_cpi"
},
{
"BriefDescription": "Run_cycles",
"MetricExpr": "PM_RUN_CYC/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "run_cyc_cpi"
},
{
"MetricExpr": "(PM_CMPLU_STALL_FXU + PM_CMPLU_STALL_DP + PM_CMPLU_STALL_DFU + PM_CMPLU_STALL_PM + PM_CMPLU_STALL_CRYPTO)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "scalar_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was awaiting L2 response for an SLB",
"MetricExpr": "PM_CMPLU_STALL_SLB/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "slb_stall_cpi"
},
{
"BriefDescription": "Finish stall while waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC",
"MetricExpr": "PM_CMPLU_STALL_SPEC_FINISH/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "spec_finish_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a store that was held in LSAQ because the SRQ was full",
"MetricExpr": "PM_CMPLU_STALL_SRQ_FULL/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "srq_full_stall_cpi"
},
{
"MetricExpr": "(PM_CMPLU_STALL_STORE_DATA + PM_CMPLU_STALL_EIEIO + PM_CMPLU_STALL_STCX + PM_CMPLU_STALL_SLB + PM_CMPLU_STALL_TEND + PM_CMPLU_STALL_PASTE + PM_CMPLU_STALL_TLBIE + PM_CMPLU_STALL_STORE_PIPE_ARB + PM_CMPLU_STALL_STORE_FIN_ARB)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "srq_stall_cpi"
},
{
"BriefDescription": "Completion stall due to store forward",
"MetricExpr": "PM_CMPLU_STALL_ST_FWD/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "st_fwd_stall_cpi"
},
{
"BriefDescription": "Nothing completed and ICT not empty",
"MetricExpr": "PM_CMPLU_STALL/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a stcx waiting for response from L2",
"MetricExpr": "PM_CMPLU_STALL_STCX/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "stcx_stall_cpi"
},
{
"BriefDescription": "Finish stall because the next to finish instruction was a store waiting on data",
"MetricExpr": "PM_CMPLU_STALL_STORE_DATA/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "store_data_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a store waiting for a slot in the store finish pipe. This means the instruction is ready to finish but there are instructions ahead of it, using the finish pipe",
"MetricExpr": "PM_CMPLU_STALL_STORE_FIN_ARB/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "store_fin_arb_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a store with all its dependencies met, just waiting to go through the LSU pipe to finish",
"MetricExpr": "PM_CMPLU_STALL_STORE_FINISH/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "store_finish_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a store waiting for the next relaunch opportunity after an internal reject. This means the instruction is ready to relaunch and tried once but lost arbitration",
"MetricExpr": "PM_CMPLU_STALL_STORE_PIPE_ARB/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "store_pipe_arb_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a tend instruction awaiting response from L2",
"MetricExpr": "PM_CMPLU_STALL_TEND/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "tend_stall_cpi"
},
{
"BriefDescription": "Completion Stalled because the thread was blocked",
"MetricExpr": "PM_CMPLU_STALL_THRD/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "thread_block_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a tlbie waiting for response from L2",
"MetricExpr": "PM_CMPLU_STALL_TLBIE/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "tlbie_stall_cpi"
},
{
"BriefDescription": "Vector stalls due to small latency double precision ops",
"MetricExpr": "(PM_CMPLU_STALL_VDP - PM_CMPLU_STALL_VDPLONG)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "vdp_other_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a vector instruction issued to the Double Precision execution pipe and waiting to finish.",
"MetricExpr": "PM_CMPLU_STALL_VDP/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "vdp_stall_cpi"
},
{
"BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format.",
"MetricExpr": "PM_CMPLU_STALL_VDPLONG/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "vdplong_stall_cpi"
},
{
"MetricExpr": "(PM_CMPLU_STALL_VFXU + PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "vector_stall_cpi"
},
{
"BriefDescription": "Completion stall due to a long latency vector fixed point instruction (division, square root)",
"MetricExpr": "PM_CMPLU_STALL_VFXLONG/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "vfxlong_stall_cpi"
},
{
"BriefDescription": "Vector stalls due to small latency integer ops",
"MetricExpr": "(PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VFXLONG)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "vfxu_other_stall_cpi"
},
{
"BriefDescription": "Finish stall due to a vector fixed point instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes",
"MetricExpr": "PM_CMPLU_STALL_VFXU/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "vfxu_stall_cpi"
},
{
"BriefDescription": "% of DL1 Reloads from Distant L2 or L3 (Modified) per Inst",
"MetricExpr": "PM_DATA_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_dl2l3_mod_rate_percent"
},
{
"BriefDescription": "% of DL1 Reloads from Distant L2 or L3 (Shared) per Inst",
"MetricExpr": "PM_DATA_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_dl2l3_shr_rate_percent"
},
{
"BriefDescription": "% of DL1 Reloads from Distant Memory per Inst",
"MetricExpr": "PM_DATA_FROM_DMEM * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_dmem_rate_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L2, other core per Inst",
"MetricExpr": "PM_DATA_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_l21_mod_rate_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L2, other core per Inst",
"MetricExpr": "PM_DATA_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_l21_shr_rate_percent"
},
{
"BriefDescription": "% of DL1 reloads from L2 per Inst",
"MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_l2_miss_rate_percent"
},
{
"BriefDescription": "% of DL1 reloads from L2 per Inst",
"MetricExpr": "PM_DATA_FROM_L2 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_l2_rate_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L3 M state, other core per Inst",
"MetricExpr": "PM_DATA_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_l31_mod_rate_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L3 S tate, other core per Inst",
"MetricExpr": "PM_DATA_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_l31_shr_rate_percent"
},
{
"BriefDescription": "% of DL1 Reloads that came from the L3 and were brought into the L3 by a prefetch, per instruction completed",
"MetricExpr": "PM_DATA_FROM_L3_MEPF * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_l3_mepf_rate_percent"
},
{
"BriefDescription": "% of DL1 reloads from L3 per Inst",
"MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_l3_miss_rate_percent"
},
{
"BriefDescription": "% of DL1 Reloads from L3 per Inst",
"MetricExpr": "PM_DATA_FROM_L3 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_l3_rate_percent"
},
{
"BriefDescription": "% of DL1 Reloads from Local Memory per Inst",
"MetricExpr": "PM_DATA_FROM_LMEM * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_lmem_rate_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L3, other core per Inst",
"MetricExpr": "PM_DATA_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_rl2l3_mod_rate_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L3, other core per Inst",
"MetricExpr": "PM_DATA_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_rl2l3_shr_rate_percent"
},
{
"BriefDescription": "% of DL1 Reloads from Remote Memory per Inst",
"MetricExpr": "PM_DATA_FROM_RMEM * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "dl1_reload_from_rmem_rate_percent"
},
{
"BriefDescription": "Percentage of L1 demand load misses per run instruction",
"MetricExpr": "PM_LD_MISS_L1 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "dl1_reloads_percent_per_inst",
"MetricName": "l1_ld_miss_rate_percent"
},
{
"BriefDescription": "% of DL1 misses that result in a cache reload",
"MetricExpr": "PM_L1_DCACHE_RELOAD_VALID * 100 / PM_LD_MISS_L1",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_miss_reloads_percent"
},
{
"BriefDescription": "% of DL1 dL1_Reloads from Distant L2 or L3 (Modified)",
"MetricExpr": "PM_DATA_FROM_DL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_dl2l3_mod_percent"
},
{
"BriefDescription": "% of DL1 dL1_Reloads from Distant L2 or L3 (Shared)",
"MetricExpr": "PM_DATA_FROM_DL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_dl2l3_shr_percent"
},
{
"BriefDescription": "% of DL1 dL1_Reloads from Distant Memory",
"MetricExpr": "PM_DATA_FROM_DMEM * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_dmem_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L2, other core",
"MetricExpr": "PM_DATA_FROM_L21_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_l21_mod_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L2, other core",
"MetricExpr": "PM_DATA_FROM_L21_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_l21_shr_percent"
},
{
"BriefDescription": "% of DL1 Reloads from sources beyond the local L2",
"MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_l2_miss_percent"
},
{
"BriefDescription": "% of DL1 reloads from L2",
"MetricExpr": "PM_DATA_FROM_L2 * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_l2_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L3, other core",
"MetricExpr": "PM_DATA_FROM_L31_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_l31_mod_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L3, other core",
"MetricExpr": "PM_DATA_FROM_L31_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_l31_shr_percent"
},
{
"BriefDescription": "% of DL1 Reloads that came from L3 and were brought into the L3 by a prefetch",
"MetricExpr": "PM_DATA_FROM_L3_MEPF * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_l3_mepf_percent"
},
{
"BriefDescription": "% of DL1 Reloads from sources beyond the local L3",
"MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_l3_miss_percent"
},
{
"BriefDescription": "% of DL1 Reloads from L3",
"MetricExpr": "PM_DATA_FROM_L3 * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_l3_percent"
},
{
"BriefDescription": "% of DL1 dL1_Reloads from Local Memory",
"MetricExpr": "PM_DATA_FROM_LMEM * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_lmem_percent"
},
{
"BriefDescription": "% of DL1 dL1_Reloads from Remote L2 or L3 (Modified)",
"MetricExpr": "PM_DATA_FROM_RL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_rl2l3_mod_percent"
},
{
"BriefDescription": "% of DL1 dL1_Reloads from Remote L2 or L3 (Shared)",
"MetricExpr": "PM_DATA_FROM_RL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_rl2l3_shr_percent"
},
{
"BriefDescription": "% of DL1 dL1_Reloads from Remote Memory",
"MetricExpr": "PM_DATA_FROM_RMEM * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricGroup": "dl1_reloads_percent_per_ref",
"MetricName": "dl1_reload_from_rmem_percent"
},
{
"BriefDescription": "estimate of dl2l3 distant MOD miss rates with measured DL2L3 MOD latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_DL2L3_MOD * PM_MRK_DATA_FROM_DL2L3_MOD_CYC / PM_MRK_DATA_FROM_DL2L3_MOD / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "dl2l3_mod_cpi_percent"
},
{
"BriefDescription": "estimate of dl2l3 distant SHR miss rates with measured DL2L3 SHR latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_DL2L3_SHR * PM_MRK_DATA_FROM_DL2L3_SHR_CYC / PM_MRK_DATA_FROM_DL2L3_SHR / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "dl2l3_shr_cpi_percent"
},
{
"BriefDescription": "estimate of distant L4 miss rates with measured DL4 latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_DL4 * PM_MRK_DATA_FROM_DL4_CYC / PM_MRK_DATA_FROM_DL4 / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "dl4_cpi_percent"
},
{
"BriefDescription": "estimate of distant memory miss rates with measured DMEM latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_DMEM * PM_MRK_DATA_FROM_DMEM_CYC / PM_MRK_DATA_FROM_DMEM / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "dmem_cpi_percent"
},
{
"BriefDescription": "estimate of dl21 MOD miss rates with measured L21 MOD latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_L21_MOD * PM_MRK_DATA_FROM_L21_MOD_CYC / PM_MRK_DATA_FROM_L21_MOD / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "l21_mod_cpi_percent"
},
{
"BriefDescription": "estimate of dl21 SHR miss rates with measured L21 SHR latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_L21_SHR * PM_MRK_DATA_FROM_L21_SHR_CYC / PM_MRK_DATA_FROM_L21_SHR / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "l21_shr_cpi_percent"
},
{
"BriefDescription": "estimate of dl2 miss rates with measured L2 latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_L2 * PM_MRK_DATA_FROM_L2_CYC / PM_MRK_DATA_FROM_L2 / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "l2_cpi_percent"
},
{
"BriefDescription": "estimate of dl31 MOD miss rates with measured L31 MOD latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_L31_MOD * PM_MRK_DATA_FROM_L31_MOD_CYC / PM_MRK_DATA_FROM_L31_MOD / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "l31_mod_cpi_percent"
},
{
"BriefDescription": "estimate of dl31 SHR miss rates with measured L31 SHR latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_L31_SHR * PM_MRK_DATA_FROM_L31_SHR_CYC / PM_MRK_DATA_FROM_L31_SHR / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "l31_shr_cpi_percent"
},
{
"BriefDescription": "estimate of dl3 miss rates with measured L3 latency as a % of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_L3 * PM_MRK_DATA_FROM_L3_CYC / PM_MRK_DATA_FROM_L3 / PM_CMPLU_STALL_DCACHE_MISS * 100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "l3_cpi_percent"
},
{
"BriefDescription": "estimate of Local memory miss rates with measured LMEM latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_LMEM * PM_MRK_DATA_FROM_LMEM_CYC / PM_MRK_DATA_FROM_LMEM / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "lmem_cpi_percent"
},
{
"BriefDescription": "estimate of dl2l3 remote MOD miss rates with measured RL2L3 MOD latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_RL2L3_MOD * PM_MRK_DATA_FROM_RL2L3_MOD_CYC / PM_MRK_DATA_FROM_RL2L3_MOD / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "rl2l3_mod_cpi_percent"
},
{
"BriefDescription": "estimate of dl2l3 shared miss rates with measured RL2L3 SHR latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_RL2L3_SHR * PM_MRK_DATA_FROM_RL2L3_SHR_CYC / PM_MRK_DATA_FROM_RL2L3_SHR / PM_CMPLU_STALL_DCACHE_MISS * 100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "rl2l3_shr_cpi_percent"
},
{
"BriefDescription": "estimate of remote L4 miss rates with measured RL4 latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_RL4 * PM_MRK_DATA_FROM_RL4_CYC / PM_MRK_DATA_FROM_RL4 / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "rl4_cpi_percent"
},
{
"BriefDescription": "estimate of remote memory miss rates with measured RMEM latency as a %of dcache miss cpi",
"MetricExpr": "PM_DATA_FROM_RMEM * PM_MRK_DATA_FROM_RMEM_CYC / PM_MRK_DATA_FROM_RMEM / PM_CMPLU_STALL_DCACHE_MISS *100",
"MetricGroup": "estimated_dcache_miss_cpi",
"MetricName": "rmem_cpi_percent"
},
{
"BriefDescription": "Branch Mispredict flushes per instruction",
"MetricExpr": "PM_FLUSH_MPRED / PM_RUN_INST_CMPL * 100",
"MetricGroup": "general",
"MetricName": "br_mpred_flush_rate_percent"
},
{
"BriefDescription": "Cycles per instruction",
"MetricExpr": "PM_CYC / PM_INST_CMPL",
"MetricGroup": "general",
"MetricName": "cpi"
},
{
"BriefDescription": "GCT empty cycles",
"MetricExpr": "(PM_FLUSH_DISP / PM_RUN_INST_CMPL) * 100",
"MetricGroup": "general",
"MetricName": "disp_flush_rate_percent"
},
{
"BriefDescription": "% DTLB miss rate per inst",
"MetricExpr": "PM_DTLB_MISS / PM_RUN_INST_CMPL *100",
"MetricGroup": "general",
"MetricName": "dtlb_miss_rate_percent"
},
{
"BriefDescription": "Flush rate (%)",
"MetricExpr": "PM_FLUSH * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "general",
"MetricName": "flush_rate_percent"
},
{
"BriefDescription": "Instructions per cycles",
"MetricExpr": "PM_INST_CMPL / PM_CYC",
"MetricGroup": "general",
"MetricName": "ipc"
},
{
"BriefDescription": "% ITLB miss rate per inst",
"MetricExpr": "PM_ITLB_MISS / PM_RUN_INST_CMPL *100",
"MetricGroup": "general",
"MetricName": "itlb_miss_rate_percent"
},
{
"BriefDescription": "Percentage of L1 load misses per L1 load ref",
"MetricExpr": "PM_LD_MISS_L1 / PM_LD_REF_L1 * 100",
"MetricGroup": "general",
"MetricName": "l1_ld_miss_ratio_percent"
},
{
"BriefDescription": "Percentage of L1 store misses per run instruction",
"MetricExpr": "PM_ST_MISS_L1 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "general",
"MetricName": "l1_st_miss_rate_percent"
},
{
"BriefDescription": "Percentage of L1 store misses per L1 store ref",
"MetricExpr": "PM_ST_MISS_L1 / PM_ST_FIN * 100",
"MetricGroup": "general",
"MetricName": "l1_st_miss_ratio_percent"
},
{
"BriefDescription": "L2 Instruction Miss Rate (per instruction)(%)",
"MetricExpr": "PM_INST_FROM_L2MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "general",
"MetricName": "l2_inst_miss_rate_percent"
},
{
"BriefDescription": "L2 dmand Load Miss Rate (per run instruction)(%)",
"MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "general",
"MetricName": "l2_ld_miss_rate_percent"
},
{
"BriefDescription": "L2 PTEG Miss Rate (per run instruction)(%)",
"MetricExpr": "PM_DPTEG_FROM_L2MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "general",
"MetricName": "l2_pteg_miss_rate_percent"
},
{
"BriefDescription": "L3 Instruction Miss Rate (per instruction)(%)",
"MetricExpr": "PM_INST_FROM_L3MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "general",
"MetricName": "l3_inst_miss_rate_percent"
},
{
"BriefDescription": "L3 demand Load Miss Rate (per run instruction)(%)",
"MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "general",
"MetricName": "l3_ld_miss_rate_percent"
},
{
"BriefDescription": "L3 PTEG Miss Rate (per run instruction)(%)",
"MetricExpr": "PM_DPTEG_FROM_L3MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "general",
"MetricName": "l3_pteg_miss_rate_percent"
},
{
"BriefDescription": "Run cycles per cycle",
"MetricExpr": "PM_RUN_CYC / PM_CYC*100",
"MetricGroup": "general",
"MetricName": "run_cycles_percent"
},
{
"BriefDescription": "Instruction dispatch-to-completion ratio",
"MetricExpr": "PM_INST_DISP / PM_INST_CMPL",
"MetricGroup": "general",
"MetricName": "speculation"
},
{
"BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified) per Inst",
"MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_dl2l3_mod_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Distant L2 or L3 (Shared) per Inst",
"MetricExpr": "PM_INST_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_dl2l3_shr_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Distant L4 per Inst",
"MetricExpr": "PM_INST_FROM_DL4 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_dl4_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Distant Memory per Inst",
"MetricExpr": "PM_INST_FROM_DMEM * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_dmem_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Private L2, other core per Inst",
"MetricExpr": "PM_INST_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_l21_mod_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Private L2, other core per Inst",
"MetricExpr": "PM_INST_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_l21_shr_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from L2 per Inst",
"MetricExpr": "PM_INST_FROM_L2 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_l2_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Private L3, other core per Inst",
"MetricExpr": "PM_INST_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_l31_mod_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Private L3 other core per Inst",
"MetricExpr": "PM_INST_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_l31_shr_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from L3 per Inst",
"MetricExpr": "PM_INST_FROM_L3 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_l3_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Local L4 per Inst",
"MetricExpr": "PM_INST_FROM_LL4 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_ll4_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Local Memory per Inst",
"MetricExpr": "PM_INST_FROM_LMEM * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_lmem_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Remote L2 or L3 (Modified) per Inst",
"MetricExpr": "PM_INST_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_rl2l3_mod_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Remote L2 or L3 (Shared) per Inst",
"MetricExpr": "PM_INST_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_rl2l3_shr_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Remote L4 per Inst",
"MetricExpr": "PM_INST_FROM_RL4 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_rl4_rate_percent"
},
{
"BriefDescription": "% of ICache reloads from Remote Memory per Inst",
"MetricExpr": "PM_INST_FROM_RMEM * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "inst_from_rmem_rate_percent"
},
{
"BriefDescription": "Instruction Cache Miss Rate (Per run Instruction)(%)",
"MetricExpr": "PM_L1_ICACHE_MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "instruction_misses_percent_per_inst",
"MetricName": "l1_inst_miss_rate_percent"
},
{
"BriefDescription": "Icache Fetchs per Icache Miss",
"MetricExpr": "(PM_L1_ICACHE_MISS - PM_IC_PREF_WRITE) / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "icache_miss_reload"
},
{
"BriefDescription": "% of ICache reloads due to prefetch",
"MetricExpr": "PM_IC_PREF_WRITE * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "icache_pref_percent"
},
{
"BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified)",
"MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_dl2l3_mod_percent"
},
{
"BriefDescription": "% of ICache reloads from Distant L2 or L3 (Shared)",
"MetricExpr": "PM_INST_FROM_DL2L3_SHR * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_dl2l3_shr_percent"
},
{
"BriefDescription": "% of ICache reloads from Distant L4",
"MetricExpr": "PM_INST_FROM_DL4 * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_dl4_percent"
},
{
"BriefDescription": "% of ICache reloads from Distant Memory",
"MetricExpr": "PM_INST_FROM_DMEM * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_dmem_percent"
},
{
"BriefDescription": "% of ICache reloads from Private L2, other core",
"MetricExpr": "PM_INST_FROM_L21_MOD * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_l21_mod_percent"
},
{
"BriefDescription": "% of ICache reloads from Private L2, other core",
"MetricExpr": "PM_INST_FROM_L21_SHR * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_l21_shr_percent"
},
{
"BriefDescription": "% of ICache reloads from L2",
"MetricExpr": "PM_INST_FROM_L2 * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_l2_percent"
},
{
"BriefDescription": "% of ICache reloads from Private L3, other core",
"MetricExpr": "PM_INST_FROM_L31_MOD * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_l31_mod_percent"
},
{
"BriefDescription": "% of ICache reloads from Private L3, other core",
"MetricExpr": "PM_INST_FROM_L31_SHR * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_l31_shr_percent"
},
{
"BriefDescription": "% of ICache reloads from L3",
"MetricExpr": "PM_INST_FROM_L3 * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_l3_percent"
},
{
"BriefDescription": "% of ICache reloads from Local L4",
"MetricExpr": "PM_INST_FROM_LL4 * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_ll4_percent"
},
{
"BriefDescription": "% of ICache reloads from Local Memory",
"MetricExpr": "PM_INST_FROM_LMEM * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_lmem_percent"
},
{
"BriefDescription": "% of ICache reloads from Remote L2 or L3 (Modified)",
"MetricExpr": "PM_INST_FROM_RL2L3_MOD * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_rl2l3_mod_percent"
},
{
"BriefDescription": "% of ICache reloads from Remote L2 or L3 (Shared)",
"MetricExpr": "PM_INST_FROM_RL2L3_SHR * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_rl2l3_shr_percent"
},
{
"BriefDescription": "% of ICache reloads from Remote L4",
"MetricExpr": "PM_INST_FROM_RL4 * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_rl4_percent"
},
{
"BriefDescription": "% of ICache reloads from Remote Memory",
"MetricExpr": "PM_INST_FROM_RMEM * 100 / PM_L1_ICACHE_MISS",
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_rmem_percent"
},
{
"BriefDescription": "%L2 Modified CO Cache read Utilization (4 pclks per disp attempt)",
"MetricExpr": "((PM_L2_CASTOUT_MOD/2)*4)/ PM_RUN_CYC * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_co_m_rd_util"
},
{
"BriefDescription": "L2 dcache invalidates per run inst (per core)",
"MetricExpr": "(PM_L2_DC_INV / 2) / PM_RUN_INST_CMPL * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_dc_inv_rate_percent"
},
{
"BriefDescription": "Demand load misses as a % of L2 LD dispatches (per thread)",
"MetricExpr": "PM_L1_DCACHE_RELOAD_VALID / (PM_L2_LD / 2) * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_dem_ld_disp_percent"
},
{
"BriefDescription": "L2 Icache invalidates per run inst (per core)",
"MetricExpr": "(PM_L2_IC_INV / 2) / PM_RUN_INST_CMPL * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_ic_inv_rate_percent"
},
{
"BriefDescription": "L2 Inst misses as a % of total L2 Inst dispatches (per thread)",
"MetricExpr": "PM_L2_INST_MISS / PM_L2_INST * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_inst_miss_ratio_percent"
},
{
"BriefDescription": "Average number of cycles between L2 Load hits",
"MetricExpr": "(PM_L2_LD_HIT / PM_RUN_CYC) / 2",
"MetricGroup": "l2_stats",
"MetricName": "l2_ld_hit_frequency"
},
{
"BriefDescription": "Average number of cycles between L2 Load misses",
"MetricExpr": "(PM_L2_LD_MISS / PM_RUN_CYC) / 2",
"MetricGroup": "l2_stats",
"MetricName": "l2_ld_miss_frequency"
},
{
"BriefDescription": "L2 Load misses as a % of total L2 Load dispatches (per thread)",
"MetricExpr": "PM_L2_LD_MISS / PM_L2_LD * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_ld_miss_ratio_percent"
},
{
"BriefDescription": "% L2 load disp attempts Cache read Utilization (4 pclks per disp attempt)",
"MetricExpr": "((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_ld_rd_util"
},
{
"BriefDescription": "L2 load misses that require a cache write (4 pclks per disp attempt) % of pclks",
"MetricExpr": "((( PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4)/ PM_RUN_CYC * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_ldmiss_wr_util"
},
{
"BriefDescription": "L2 local pump prediction success",
"MetricExpr": "PM_L2_LOC_GUESS_CORRECT / (PM_L2_LOC_GUESS_CORRECT + PM_L2_LOC_GUESS_WRONG) * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_local_pred_correct_percent"
},
{
"BriefDescription": "L2 COs that were in M,Me,Mu state as a % of all L2 COs",
"MetricExpr": "PM_L2_CASTOUT_MOD / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_mod_co_percent"
},
{
"BriefDescription": "% of L2 Load RC dispatch atampts that failed because of address collisions and cclass conflicts",
"MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR )/ PM_L2_RCLD_DISP * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_rc_ld_disp_addr_fail_percent"
},
{
"BriefDescription": "% of L2 Load RC dispatch attempts that failed",
"MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR + PM_L2_RCLD_DISP_FAIL_OTHER)/ PM_L2_RCLD_DISP * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_rc_ld_disp_fail_percent"
},
{
"BriefDescription": "% of L2 Store RC dispatch atampts that failed because of address collisions and cclass conflicts",
"MetricExpr": "PM_L2_RCST_DISP_FAIL_ADDR / PM_L2_RCST_DISP * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_rc_st_disp_addr_fail_percent"
},
{
"BriefDescription": "% of L2 Store RC dispatch attempts that failed",
"MetricExpr": "(PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/ PM_L2_RCST_DISP * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_rc_st_disp_fail_percent"
},
{
"BriefDescription": "L2 Cache Read Utilization (per core)",
"MetricExpr": "(((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100) + (((PM_L2_RCST_DISP/2)*4)/PM_RUN_CYC * 100) + (((PM_L2_CASTOUT_MOD/2)*4)/PM_RUN_CYC * 100)",
"MetricGroup": "l2_stats",
"MetricName": "l2_rd_util_percent"
},
{
"BriefDescription": "L2 COs that were in T,Te,Si,S state as a % of all L2 COs",
"MetricExpr": "PM_L2_CASTOUT_SHR / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_shr_co_percent"
},
{
"BriefDescription": "L2 Store misses as a % of total L2 Store dispatches (per thread)",
"MetricExpr": "PM_L2_ST_MISS / PM_L2_ST * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_st_miss_ratio_percent"
},
{
"BriefDescription": "% L2 store disp attempts Cache read Utilization (4 pclks per disp attempt)",
"MetricExpr": "((PM_L2_RCST_DISP/2)*4) / PM_RUN_CYC * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_st_rd_util"
},
{
"BriefDescription": "L2 stores that require a cache write (4 pclks per disp attempt) % of pclks",
"MetricExpr": "((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_st_wr_util"
},
{
"BriefDescription": "L2 Cache Write Utilization (per core)",
"MetricExpr": "((((PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4) / PM_RUN_CYC * 100) + (((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100)",
"MetricGroup": "l2_stats",
"MetricName": "l2_wr_util_percent"
},
{
"BriefDescription": "Average number of cycles between L3 Load hits",
"MetricExpr": "(PM_L3_LD_HIT / PM_RUN_CYC) / 2",
"MetricGroup": "l3_stats",
"MetricName": "l3_ld_hit_frequency"
},
{
"BriefDescription": "Average number of cycles between L3 Load misses",
"MetricExpr": "(PM_L3_LD_MISS / PM_RUN_CYC) / 2",
"MetricGroup": "l3_stats",
"MetricName": "l3_ld_miss_frequency"
},
{
"BriefDescription": "Average number of Write-in machines used. 1 of 8 WI machines is sampled every L3 cycle",
"MetricExpr": "(PM_L3_WI_USAGE / PM_RUN_CYC) * 8",
"MetricGroup": "l3_stats",
"MetricName": "l3_wi_usage"
},
{
"BriefDescription": "Average icache miss latency",
"MetricExpr": "PM_IC_DEMAND_CYC / PM_IC_DEMAND_REQ",
"MetricGroup": "latency",
"MetricName": "average_il1_miss_latency"
},
{
"BriefDescription": "Marked L2L3 remote Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_DL2L3_MOD_CYC/ PM_MRK_DATA_FROM_DL2L3_MOD",
"MetricGroup": "latency",
"MetricName": "dl2l3_mod_latency"
},
{
"BriefDescription": "Marked L2L3 distant Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_DL2L3_SHR_CYC/ PM_MRK_DATA_FROM_DL2L3_SHR",
"MetricGroup": "latency",
"MetricName": "dl2l3_shr_latency"
},
{
"BriefDescription": "Distant L4 average load latency",
"MetricExpr": "PM_MRK_DATA_FROM_DL4_CYC/ PM_MRK_DATA_FROM_DL4",
"MetricGroup": "latency",
"MetricName": "dl4_latency"
},
{
"BriefDescription": "Marked Dmem Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_DMEM_CYC/ PM_MRK_DATA_FROM_DMEM",
"MetricGroup": "latency",
"MetricName": "dmem_latency"
},
{
"BriefDescription": "average L1 miss latency using marked events",
"MetricExpr": "PM_MRK_LD_MISS_L1_CYC / PM_MRK_LD_MISS_L1",
"MetricGroup": "latency",
"MetricName": "estimated_dl1miss_latency"
},
{
"BriefDescription": "Marked L21 Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_L21_MOD_CYC/ PM_MRK_DATA_FROM_L21_MOD",
"MetricGroup": "latency",
"MetricName": "l21_mod_latency"
},
{
"BriefDescription": "Marked L21 Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_L21_SHR_CYC/ PM_MRK_DATA_FROM_L21_SHR",
"MetricGroup": "latency",
"MetricName": "l21_shr_latency"
},
{
"BriefDescription": "Marked L2 Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_L2_CYC/ PM_MRK_DATA_FROM_L2",
"MetricGroup": "latency",
"MetricName": "l2_latency"
},
{
"BriefDescription": "Marked L31 Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_L31_MOD_CYC/ PM_MRK_DATA_FROM_L31_MOD",
"MetricGroup": "latency",
"MetricName": "l31_mod_latency"
},
{
"BriefDescription": "Marked L31 Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_L31_SHR_CYC/ PM_MRK_DATA_FROM_L31_SHR",
"MetricGroup": "latency",
"MetricName": "l31_shr_latency"
},
{
"BriefDescription": "Marked L3 Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_L3_CYC/ PM_MRK_DATA_FROM_L3",
"MetricGroup": "latency",
"MetricName": "l3_latency"
},
{
"BriefDescription": "Local L4 average load latency",
"MetricExpr": "PM_MRK_DATA_FROM_LL4_CYC/ PM_MRK_DATA_FROM_LL4",
"MetricGroup": "latency",
"MetricName": "ll4_latency"
},
{
"BriefDescription": "Marked Lmem Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_LMEM_CYC/ PM_MRK_DATA_FROM_LMEM",
"MetricGroup": "latency",
"MetricName": "lmem_latency"
},
{
"BriefDescription": "Marked L2L3 remote Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_RL2L3_MOD_CYC/ PM_MRK_DATA_FROM_RL2L3_MOD",
"MetricGroup": "latency",
"MetricName": "rl2l3_mod_latency"
},
{
"BriefDescription": "Marked L2L3 remote Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_RL2L3_SHR_CYC/ PM_MRK_DATA_FROM_RL2L3_SHR",
"MetricGroup": "latency",
"MetricName": "rl2l3_shr_latency"
},
{
"BriefDescription": "Remote L4 average load latency",
"MetricExpr": "PM_MRK_DATA_FROM_RL4_CYC/ PM_MRK_DATA_FROM_RL4",
"MetricGroup": "latency",
"MetricName": "rl4_latency"
},
{
"BriefDescription": "Marked Rmem Load latency",
"MetricExpr": "PM_MRK_DATA_FROM_RMEM_CYC/ PM_MRK_DATA_FROM_RMEM",
"MetricGroup": "latency",
"MetricName": "rmem_latency"
},
{
"BriefDescription": "ERAT miss reject ratio",
"MetricExpr": "PM_LSU_REJECT_ERAT_MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "lsu_rejects",
"MetricName": "erat_reject_rate_percent"
},
{
"BriefDescription": "LHS reject ratio",
"MetricExpr": "PM_LSU_REJECT_LHS *100/ PM_RUN_INST_CMPL",
"MetricGroup": "lsu_rejects",
"MetricName": "lhs_reject_rate_percent"
},
{
"BriefDescription": "ERAT miss reject ratio",
"MetricExpr": "PM_LSU_REJECT_LMQ_FULL * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "lsu_rejects",
"MetricName": "lmq_full_reject_rate_percent"
},
{
"BriefDescription": "ERAT miss reject ratio",
"MetricExpr": "PM_LSU_REJECT_LMQ_FULL * 100 / PM_LD_REF_L1",
"MetricGroup": "lsu_rejects",
"MetricName": "lmq_full_reject_ratio_percent"
},
{
"BriefDescription": "L4 locality(%)",
"MetricExpr": "PM_DATA_FROM_LL4 * 100 / (PM_DATA_FROM_LL4 + PM_DATA_FROM_RL4 + PM_DATA_FROM_DL4)",
"MetricGroup": "memory",
"MetricName": "l4_locality"
},
{
"BriefDescription": "Ratio of reloads from local L4 to distant L4",
"MetricExpr": "PM_DATA_FROM_LL4 / PM_DATA_FROM_DL4",
"MetricGroup": "memory",
"MetricName": "ld_ll4_per_ld_dmem"
},
{
"BriefDescription": "Ratio of reloads from local L4 to remote+distant L4",
"MetricExpr": "PM_DATA_FROM_LL4 / (PM_DATA_FROM_DL4 + PM_DATA_FROM_RL4)",
"MetricGroup": "memory",
"MetricName": "ld_ll4_per_ld_mem"
},
{
"BriefDescription": "Ratio of reloads from local L4 to remote L4",
"MetricExpr": "PM_DATA_FROM_LL4 / PM_DATA_FROM_RL4",
"MetricGroup": "memory",
"MetricName": "ld_ll4_per_ld_rl4"
},
{
"BriefDescription": "Number of loads from local memory per loads from distant memory",
"MetricExpr": "PM_DATA_FROM_LMEM / PM_DATA_FROM_DMEM",
"MetricGroup": "memory",
"MetricName": "ld_lmem_per_ld_dmem"
},
{
"BriefDescription": "Number of loads from local memory per loads from remote and distant memory",
"MetricExpr": "PM_DATA_FROM_LMEM / (PM_DATA_FROM_DMEM + PM_DATA_FROM_RMEM)",
"MetricGroup": "memory",
"MetricName": "ld_lmem_per_ld_mem"
},
{
"BriefDescription": "Number of loads from local memory per loads from remote memory",
"MetricExpr": "PM_DATA_FROM_LMEM / PM_DATA_FROM_RMEM",
"MetricGroup": "memory",
"MetricName": "ld_lmem_per_ld_rmem"
},
{
"BriefDescription": "Number of loads from remote memory per loads from distant memory",
"MetricExpr": "PM_DATA_FROM_RMEM / PM_DATA_FROM_DMEM",
"MetricGroup": "memory",
"MetricName": "ld_rmem_per_ld_dmem"
},
{
"BriefDescription": "Memory locality",
"MetricExpr": "PM_DATA_FROM_LMEM * 100/ (PM_DATA_FROM_LMEM + PM_DATA_FROM_RMEM + PM_DATA_FROM_DMEM)",
"MetricGroup": "memory",
"MetricName": "mem_locality_percent"
},
{
"BriefDescription": "L1 Prefetches issued by the prefetch machine per instruction (per thread)",
"MetricExpr": "PM_L1_PREF / PM_RUN_INST_CMPL * 100",
"MetricGroup": "prefetch",
"MetricName": "l1_prefetch_rate_percent"
},
{
"BriefDescription": "DERAT Miss Rate (per run instruction)(%)",
"MetricExpr": "PM_LSU_DERAT_MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "derat_miss_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Modified) per inst",
"MetricExpr": "PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_dl2l3_mod_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Shared) per inst",
"MetricExpr": "PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_dl2l3_shr_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Distant L4 per inst",
"MetricExpr": "PM_DPTEG_FROM_DL4 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_dl4_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Distant Memory per inst",
"MetricExpr": "PM_DPTEG_FROM_DMEM * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_dmem_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Private L2, other core per inst",
"MetricExpr": "PM_DPTEG_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_l21_mod_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Private L2, other core per inst",
"MetricExpr": "PM_DPTEG_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_l21_shr_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from L2 per inst",
"MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_l2_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Private L3, other core per inst",
"MetricExpr": "PM_DPTEG_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_l31_mod_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Private L3, other core per inst",
"MetricExpr": "PM_DPTEG_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_l31_shr_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from L3 per inst",
"MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_l3_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Local L4 per inst",
"MetricExpr": "PM_DPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_ll4_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Local Memory per inst",
"MetricExpr": "PM_DPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_lmem_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Modified) per inst",
"MetricExpr": "PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_rl2l3_mod_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Shared) per inst",
"MetricExpr": "PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_rl2l3_shr_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Remote L4 per inst",
"MetricExpr": "PM_DPTEG_FROM_RL4 * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_rl4_rate_percent"
},
{
"BriefDescription": "% of DERAT reloads from Remote Memory per inst",
"MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "pteg_reloads_percent_per_inst",
"MetricName": "pteg_from_rmem_rate_percent"
},
{
"BriefDescription": "% of DERAT misses that result in an ERAT reload",
"MetricExpr": "PM_DTLB_MISS * 100 / PM_LSU_DERAT_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "derat_miss_reload_percent"
},
{
"BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Modified)",
"MetricExpr": "PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_dl2l3_mod_percent"
},
{
"BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Shared)",
"MetricExpr": "PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_dl2l3_shr_percent"
},
{
"BriefDescription": "% of DERAT reloads from Distant L4",
"MetricExpr": "PM_DPTEG_FROM_DL4 * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_dl4_percent"
},
{
"BriefDescription": "% of DERAT reloads from Distant Memory",
"MetricExpr": "PM_DPTEG_FROM_DMEM * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_dmem_percent"
},
{
"BriefDescription": "% of DERAT reloads from Private L2, other core",
"MetricExpr": "PM_DPTEG_FROM_L21_MOD * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_l21_mod_percent"
},
{
"BriefDescription": "% of DERAT reloads from Private L2, other core",
"MetricExpr": "PM_DPTEG_FROM_L21_SHR * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_l21_shr_percent"
},
{
"BriefDescription": "% of DERAT reloads from L2",
"MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_l2_percent"
},
{
"BriefDescription": "% of DERAT reloads from Private L3, other core",
"MetricExpr": "PM_DPTEG_FROM_L31_MOD * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_l31_mod_percent"
},
{
"BriefDescription": "% of DERAT reloads from Private L3, other core",
"MetricExpr": "PM_DPTEG_FROM_L31_SHR * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_l31_shr_percent"
},
{
"BriefDescription": "% of DERAT reloads from L3",
"MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_l3_percent"
},
{
"BriefDescription": "% of DERAT reloads from Local L4",
"MetricExpr": "PM_DPTEG_FROM_LL4 * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_ll4_percent"
},
{
"BriefDescription": "% of DERAT reloads from Local Memory",
"MetricExpr": "PM_DPTEG_FROM_LMEM * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_lmem_percent"
},
{
"BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Modified)",
"MetricExpr": "PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_rl2l3_mod_percent"
},
{
"BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Shared)",
"MetricExpr": "PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_rl2l3_shr_percent"
},
{
"BriefDescription": "% of DERAT reloads from Remote L4",
"MetricExpr": "PM_DPTEG_FROM_RL4 * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_rl4_percent"
},
{
"BriefDescription": "% of DERAT reloads from Remote Memory",
"MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_DTLB_MISS",
"MetricGroup": "pteg_reloads_percent_per_ref",
"MetricName": "pteg_from_rmem_percent"
},
{
"BriefDescription": "% DERAT miss rate for 4K page per inst",
"MetricExpr": "PM_DERAT_MISS_4K * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "translation",
"MetricName": "derat_4k_miss_rate_percent"
},
{
"BriefDescription": "DERAT miss ratio for 4K page",
"MetricExpr": "PM_DERAT_MISS_4K / PM_LSU_DERAT_MISS",
"MetricGroup": "translation",
"MetricName": "derat_4k_miss_ratio"
},
{
"BriefDescription": "% DERAT miss ratio for 64K page per inst",
"MetricExpr": "PM_DERAT_MISS_64K * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "translation",
"MetricName": "derat_64k_miss_rate_percent"
},
{
"BriefDescription": "DERAT miss ratio for 64K page",
"MetricExpr": "PM_DERAT_MISS_64K / PM_LSU_DERAT_MISS",
"MetricGroup": "translation",
"MetricName": "derat_64k_miss_ratio"
},
{
"BriefDescription": "DERAT miss ratio",
"MetricExpr": "PM_LSU_DERAT_MISS / PM_LSU_DERAT_MISS",
"MetricGroup": "translation",
"MetricName": "derat_miss_ratio"
},
{
"BriefDescription": "% DSLB_Miss_Rate per inst",
"MetricExpr": "PM_DSLB_MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "translation",
"MetricName": "dslb_miss_rate_percent"
},
{
"BriefDescription": "% ISLB miss rate per inst",
"MetricExpr": "PM_ISLB_MISS * 100 / PM_RUN_INST_CMPL",
"MetricGroup": "translation",
"MetricName": "islb_miss_rate_percent"
},
{
"BriefDescription": "ANY_SYNC_STALL_CPI",
"MetricExpr": "PM_CMPLU_STALL_ANY_SYNC / PM_RUN_INST_CMPL",
"MetricName": "any_sync_stall_cpi"
},
{
"BriefDescription": "Avg. more than 1 instructions completed",
"MetricExpr": "PM_INST_CMPL / PM_1PLUS_PPC_CMPL",
"MetricName": "average_completed_instruction_set_size"
},
{
"BriefDescription": "% Branches per instruction",
"MetricExpr": "PM_BRU_FIN / PM_RUN_INST_CMPL",
"MetricName": "branches_per_inst"
},
{
"BriefDescription": "Cycles in which at least one instruction completes in this thread",
"MetricExpr": "PM_1PLUS_PPC_CMPL/PM_RUN_INST_CMPL",
"MetricName": "completion_cpi"
},
{
"BriefDescription": "cycles",
"MetricExpr": "PM_RUN_CYC",
"MetricName": "custom_secs"
},
{
"BriefDescription": "Percentage Cycles atleast one instruction dispatched",
"MetricExpr": "PM_1PLUS_PPC_DISP / PM_CYC * 100",
"MetricName": "cycles_atleast_one_inst_dispatched_percent"
},
{
"BriefDescription": "Cycles per instruction group",
"MetricExpr": "PM_CYC / PM_1PLUS_PPC_CMPL",
"MetricName": "cycles_per_completed_instructions_set"
},
{
"BriefDescription": "% of DL1 dL1_Reloads from Distant L4",
"MetricExpr": "PM_DATA_FROM_DL4 * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricName": "dl1_reload_from_dl4_percent"
},
{
"BriefDescription": "% of DL1 Reloads from Distant L4 per Inst",
"MetricExpr": "PM_DATA_FROM_DL4 * 100 / PM_RUN_INST_CMPL",
"MetricName": "dl1_reload_from_dl4_rate_percent"
},
{
"BriefDescription": "% of DL1 reloads from Private L3, other core per Inst",
"MetricExpr": "(PM_DATA_FROM_L31_MOD + PM_DATA_FROM_L31_SHR) * 100 / PM_RUN_INST_CMPL",
"MetricName": "dl1_reload_from_l31_rate_percent"
},
{
"BriefDescription": "% of DL1 dL1_Reloads from Local L4",
"MetricExpr": "PM_DATA_FROM_LL4 * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricName": "dl1_reload_from_ll4_percent"
},
{
"BriefDescription": "% of DL1 Reloads from Local L4 per Inst",
"MetricExpr": "PM_DATA_FROM_LL4 * 100 / PM_RUN_INST_CMPL",
"MetricName": "dl1_reload_from_ll4_rate_percent"
},
{
"BriefDescription": "% of DL1 dL1_Reloads from Remote L4",
"MetricExpr": "PM_DATA_FROM_RL4 * 100 / PM_L1_DCACHE_RELOAD_VALID",
"MetricName": "dl1_reload_from_rl4_percent"
},
{
"BriefDescription": "% of DL1 Reloads from Remote Memory per Inst",
"MetricExpr": "PM_DATA_FROM_RL4 * 100 / PM_RUN_INST_CMPL",
"MetricName": "dl1_reload_from_rl4_rate_percent"
},
{
"BriefDescription": "Rate of DERAT reloads from L2",
"MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL",
"MetricName": "dpteg_from_l2_rate_percent"
},
{
"BriefDescription": "Rate of DERAT reloads from L3",
"MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL",
"MetricName": "dpteg_from_l3_rate_percent"
},
{
"BriefDescription": "Cycles in which the oldest instruction is finished and ready to complete for waiting to get through the completion pipe",
"MetricExpr": "PM_NTC_ALL_FIN / PM_RUN_INST_CMPL",
"MetricName": "finish_to_cmpl_cpi"
},
{
"BriefDescription": "Total Fixed point operations",
"MetricExpr": "PM_FXU_FIN/PM_RUN_INST_CMPL",
"MetricName": "fixed_per_inst"
},
{
"BriefDescription": "All FXU Busy",
"MetricExpr": "PM_FXU_BUSY / PM_CYC",
"MetricName": "fxu_all_busy"
},
{
"BriefDescription": "All FXU Idle",
"MetricExpr": "PM_FXU_IDLE / PM_CYC",
"MetricName": "fxu_all_idle"
},
{
"BriefDescription": "Ict empty for this thread due to branch mispred",
"MetricExpr": "PM_ICT_NOSLOT_BR_MPRED/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_br_mpred_cpi"
},
{
"BriefDescription": "Ict empty for this thread due to Icache Miss and branch mispred",
"MetricExpr": "PM_ICT_NOSLOT_BR_MPRED_ICMISS/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_br_mpred_icmiss_cpi"
},
{
"BriefDescription": "ICT other stalls",
"MetricExpr": "(PM_ICT_NOSLOT_CYC - PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_BR_MPRED_ICMISS - PM_ICT_NOSLOT_BR_MPRED - PM_ICT_NOSLOT_DISP_HELD)/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_cyc_other_cpi"
},
{
"BriefDescription": "Cycles in which the NTC instruciton is held at dispatch for any reason",
"MetricExpr": "PM_ICT_NOSLOT_DISP_HELD/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_disp_held_cpi"
},
{
"BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF",
"MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_disp_held_hb_full_cpi"
},
{
"BriefDescription": "Ict empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full",
"MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_ISSQ/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_disp_held_issq_cpi"
},
{
"BriefDescription": "ICT_NOSLOT_DISP_HELD_OTHER_CPI",
"MetricExpr": "(PM_ICT_NOSLOT_DISP_HELD - PM_ICT_NOSLOT_DISP_HELD_HB_FULL - PM_ICT_NOSLOT_DISP_HELD_SYNC - PM_ICT_NOSLOT_DISP_HELD_TBEGIN - PM_ICT_NOSLOT_DISP_HELD_ISSQ)/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_disp_held_other_cpi"
},
{
"BriefDescription": "Dispatch held due to a synchronizing instruction at dispatch",
"MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_SYNC/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_disp_held_sync_cpi"
},
{
"BriefDescription": "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch",
"MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_TBEGIN/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_disp_held_tbegin_cpi"
},
{
"BriefDescription": "ICT_NOSLOT_IC_L2_CPI",
"MetricExpr": "(PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_IC_L3 - PM_ICT_NOSLOT_IC_L3MISS)/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_ic_l2_cpi"
},
{
"BriefDescription": "Ict empty for this thread due to icache misses that were sourced from the local L3",
"MetricExpr": "PM_ICT_NOSLOT_IC_L3/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_ic_l3_cpi"
},
{
"BriefDescription": "Ict empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache",
"MetricExpr": "PM_ICT_NOSLOT_IC_L3MISS/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_ic_l3miss_cpi"
},
{
"BriefDescription": "Ict empty for this thread due to Icache Miss",
"MetricExpr": "PM_ICT_NOSLOT_IC_MISS/PM_RUN_INST_CMPL",
"MetricName": "ict_noslot_ic_miss_cpi"
},
{
"BriefDescription": "Rate of IERAT reloads from L2",
"MetricExpr": "PM_IPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL",
"MetricName": "ipteg_from_l2_rate_percent"
},
{
"BriefDescription": "Rate of IERAT reloads from L3",
"MetricExpr": "PM_IPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL",
"MetricName": "ipteg_from_l3_rate_percent"
},
{
"BriefDescription": "Rate of IERAT reloads from local memory",
"MetricExpr": "PM_IPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL",
"MetricName": "ipteg_from_ll4_rate_percent"
},
{
"BriefDescription": "Rate of IERAT reloads from local memory",
"MetricExpr": "PM_IPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL",
"MetricName": "ipteg_from_lmem_rate_percent"
},
{
"BriefDescription": "Average number of Castout machines used. 1 of 16 CO machines is sampled every L2 cycle",
"MetricExpr": "PM_CO_USAGE / PM_RUN_CYC * 16",
"MetricName": "l2_co_usage"
},
{
"BriefDescription": "Percent of instruction reads out of all L2 commands",
"MetricExpr": "PM_ISIDE_DISP * 100 / (PM_L2_ST + PM_L2_LD + PM_ISIDE_DISP)",
"MetricName": "l2_instr_commands_percent"
},
{
"BriefDescription": "Percent of loads out of all L2 commands",
"MetricExpr": "PM_L2_LD * 100 / (PM_L2_ST + PM_L2_LD + PM_ISIDE_DISP)",
"MetricName": "l2_ld_commands_percent"
},
{
"BriefDescription": "Rate of L2 store dispatches that failed per core",
"MetricExpr": "100 * (PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/2 / PM_RUN_INST_CMPL",
"MetricName": "l2_rc_st_disp_fail_rate_percent"
},
{
"BriefDescription": "Average number of Read/Claim machines used. 1 of 16 RC machines is sampled every L2 cycle",
"MetricExpr": "PM_RC_USAGE / PM_RUN_CYC * 16",
"MetricName": "l2_rc_usage"
},
{
"BriefDescription": "Average number of Snoop machines used. 1 of 8 SN machines is sampled every L2 cycle",
"MetricExpr": "PM_SN_USAGE / PM_RUN_CYC * 8",
"MetricName": "l2_sn_usage"
},
{
"BriefDescription": "Percent of stores out of all L2 commands",
"MetricExpr": "PM_L2_ST * 100 / (PM_L2_ST + PM_L2_LD + PM_ISIDE_DISP)",
"MetricName": "l2_st_commands_percent"
},
{
"BriefDescription": "Rate of L2 store dispatches that failed per core",
"MetricExpr": "100 * (PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/2 / PM_RUN_INST_CMPL",
"MetricName": "l2_st_disp_fail_rate_percent"
},
{
"BriefDescription": "Rate of L2 dispatches per core",
"MetricExpr": "100 * PM_L2_RCST_DISP/2 / PM_RUN_INST_CMPL",
"MetricName": "l2_st_disp_rate_percent"
},
{
"BriefDescription": "Marked L31 Load latency",
"MetricExpr": "(PM_MRK_DATA_FROM_L31_SHR_CYC + PM_MRK_DATA_FROM_L31_MOD_CYC) / (PM_MRK_DATA_FROM_L31_SHR + PM_MRK_DATA_FROM_L31_MOD)",
"MetricName": "l31_latency"
},
{
"BriefDescription": "PCT instruction loads",
"MetricExpr": "PM_LD_REF_L1 / PM_RUN_INST_CMPL",
"MetricName": "loads_per_inst"
},
{
"BriefDescription": "Cycles stalled by D-Cache Misses",
"MetricExpr": "PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL",
"MetricName": "lsu_stall_dcache_miss_cpi"
},
{
"BriefDescription": "Completion stall because a different thread was using the completion pipe",
"MetricExpr": "(PM_CMPLU_STALL_THRD - PM_CMPLU_STALL_EXCEPTION - PM_CMPLU_STALL_ANY_SYNC - PM_CMPLU_STALL_SYNC_PMU_INT - PM_CMPLU_STALL_SPEC_FINISH - PM_CMPLU_STALL_FLUSH_ANY_THREAD - PM_CMPLU_STALL_LSU_FLUSH_NEXT - PM_CMPLU_STALL_NESTED_TBEGIN - PM_CMPLU_STALL_NESTED_TEND - PM_CMPLU_STALL_MTFPSCR)/PM_RUN_INST_CMPL",
"MetricName": "other_thread_cmpl_stall"
},
{
"BriefDescription": "PCT instruction stores",
"MetricExpr": "PM_ST_FIN / PM_RUN_INST_CMPL",
"MetricName": "stores_per_inst"
},
{
"BriefDescription": "ANY_SYNC_STALL_CPI",
"MetricExpr": "PM_CMPLU_STALL_SYNC_PMU_INT / PM_RUN_INST_CMPL",
"MetricName": "sync_pmu_int_stall_cpi"
}
]
libperf-$(CONFIG_LIBPERL) += perl/Perf-Trace-Util/
libperf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/
perf-$(CONFIG_LIBPERL) += perl/Perf-Trace-Util/
perf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/
libperf-y += Context.o
perf-y += Context.o
CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes
CFLAGS_Context.o += -Wno-unused-parameter -Wno-nested-externs -Wno-undef
......
libperf-y += Context.o
perf-y += Context.o
CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs
......@@ -1330,6 +1330,26 @@ static int test__checkevent_complex_name(struct perf_evlist *evlist)
return 0;
}
static int test__sym_event_slash(struct perf_evlist *evlist)
{
struct perf_evsel *evsel = perf_evlist__first(evlist);
TEST_ASSERT_VAL("wrong type", evsel->attr.type == PERF_TYPE_HARDWARE);
TEST_ASSERT_VAL("wrong config", evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES);
TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
return 0;
}
static int test__sym_event_dc(struct perf_evlist *evlist)
{
struct perf_evsel *evsel = perf_evlist__first(evlist);
TEST_ASSERT_VAL("wrong type", evsel->attr.type == PERF_TYPE_HARDWARE);
TEST_ASSERT_VAL("wrong config", evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES);
TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
return 0;
}
static int count_tracepoints(void)
{
struct dirent *events_ent;
......@@ -1670,6 +1690,16 @@ static struct evlist_test test__events[] = {
.name = "cycles/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks'/Duk",
.check = test__checkevent_complex_name,
.id = 53
},
{
.name = "cycles//u",
.check = test__sym_event_slash,
.id = 54,
},
{
.name = "cycles:k",
.check = test__sym_event_dc,
.id = 55,
}
};
......
libperf-y += clone.o
libperf-y += fcntl.o
libperf-y += flock.o
perf-y += clone.o
perf-y += fcntl.o
perf-y += flock.o
ifeq ($(SRCARCH),$(filter $(SRCARCH),x86))
libperf-y += ioctl.o
perf-y += ioctl.o
endif
libperf-y += kcmp.o
libperf-y += mount_flags.o
libperf-y += pkey_alloc.o
libperf-y += arch_prctl.o
libperf-y += prctl.o
libperf-y += renameat.o
libperf-y += sockaddr.o
libperf-y += socket.o
libperf-y += statx.o
perf-y += kcmp.o
perf-y += mount_flags.o
perf-y += pkey_alloc.o
perf-y += arch_prctl.o
perf-y += prctl.o
perf-y += renameat.o
perf-y += sockaddr.o
perf-y += socket.o
perf-y += statx.o
......@@ -175,7 +175,7 @@ static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size, boo
size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg)
{
unsigned long cmd = arg->val;
unsigned int fd = syscall_arg__val(arg, 0);
int fd = syscall_arg__val(arg, 0);
struct file *file = thread__files_entry(arg->thread, fd);
if (file != NULL) {
......
......@@ -11,7 +11,7 @@ static size_t syscall_arg__scnprintf_waitid_options(char *bf, size_t size,
#define P_OPTION(n) \
if (options & W##n) { \
printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : #n); \
printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
options &= ~W##n; \
}
......
libperf-y += setup.o
libperf-y += helpline.o
libperf-y += progress.o
libperf-y += util.o
libperf-y += hist.o
libperf-y += stdio/hist.o
perf-y += setup.o
perf-y += helpline.o
perf-y += progress.o
perf-y += util.o
perf-y += hist.o
perf-y += stdio/hist.o
CFLAGS_setup.o += -DLIBDIR="BUILD_STR($(LIBDIR))"
libperf-$(CONFIG_SLANG) += browser.o
libperf-$(CONFIG_SLANG) += browsers/
libperf-$(CONFIG_SLANG) += tui/
perf-$(CONFIG_SLANG) += browser.o
perf-$(CONFIG_SLANG) += browsers/
perf-$(CONFIG_SLANG) += tui/
CFLAGS_browser.o += -DENABLE_SLFUTURE_CONST
libperf-y += annotate.o
libperf-y += hists.o
libperf-y += map.o
libperf-y += scripts.o
libperf-y += header.o
perf-y += annotate.o
perf-y += hists.o
perf-y += map.o
perf-y += scripts.o
perf-y += header.o
CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST
CFLAGS_hists.o += -DENABLE_SLFUTURE_CONST
......
libperf-y += setup.o
libperf-y += util.o
libperf-y += helpline.o
libperf-y += progress.o
perf-y += setup.o
perf-y += util.o
perf-y += helpline.o
perf-y += progress.o
libperf-y += annotate.o
libperf-y += block-range.o
libperf-y += build-id.o
libperf-y += config.o
libperf-y += ctype.o
libperf-y += db-export.o
libperf-y += env.o
libperf-y += event.o
libperf-y += evlist.o
libperf-y += evsel.o
libperf-y += evsel_fprintf.o
libperf-y += find_bit.o
libperf-y += get_current_dir_name.o
libperf-y += kallsyms.o
libperf-y += levenshtein.o
libperf-y += llvm-utils.o
libperf-y += mmap.o
libperf-y += memswap.o
libperf-y += parse-events.o
libperf-y += perf_regs.o
libperf-y += path.o
libperf-y += print_binary.o
libperf-y += rbtree.o
libperf-y += libstring.o
libperf-y += bitmap.o
libperf-y += hweight.o
libperf-y += smt.o
libperf-y += strbuf.o
libperf-y += string.o
libperf-y += strlist.o
libperf-y += strfilter.o
libperf-y += top.o
libperf-y += usage.o
libperf-y += dso.o
libperf-y += symbol.o
libperf-y += symbol_fprintf.o
libperf-y += color.o
libperf-y += color_config.o
libperf-y += metricgroup.o
libperf-y += header.o
libperf-y += callchain.o
libperf-y += values.o
libperf-y += debug.o
libperf-y += machine.o
libperf-y += map.o
libperf-y += pstack.o
libperf-y += session.o
libperf-y += sample-raw.o
libperf-y += s390-sample-raw.o
libperf-$(CONFIG_TRACE) += syscalltbl.o
libperf-y += ordered-events.o
libperf-y += namespaces.o
libperf-y += comm.o
libperf-y += thread.o
libperf-y += thread_map.o
libperf-y += trace-event-parse.o
libperf-y += parse-events-flex.o
libperf-y += parse-events-bison.o
libperf-y += pmu.o
libperf-y += pmu-flex.o
libperf-y += pmu-bison.o
libperf-y += trace-event-read.o
libperf-y += trace-event-info.o
libperf-y += trace-event-scripting.o
libperf-y += trace-event.o
libperf-y += svghelper.o
libperf-y += sort.o
libperf-y += hist.o
libperf-y += util.o
libperf-y += xyarray.o
libperf-y += cpumap.o
libperf-y += cgroup.o
libperf-y += target.o
libperf-y += rblist.o
libperf-y += intlist.o
libperf-y += vdso.o
libperf-y += counts.o
libperf-y += stat.o
libperf-y += stat-shadow.o
libperf-y += stat-display.o
libperf-y += record.o
libperf-y += srcline.o
libperf-y += srccode.o
libperf-y += data.o
libperf-y += tsc.o
libperf-y += cloexec.o
libperf-y += call-path.o
libperf-y += rwsem.o
libperf-y += thread-stack.o
libperf-$(CONFIG_AUXTRACE) += auxtrace.o
libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
libperf-$(CONFIG_AUXTRACE) += intel-pt.o
libperf-$(CONFIG_AUXTRACE) += intel-bts.o
libperf-$(CONFIG_AUXTRACE) += arm-spe.o
libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o
libperf-$(CONFIG_AUXTRACE) += s390-cpumsf.o
perf-y += annotate.o
perf-y += block-range.o
perf-y += build-id.o
perf-y += config.o
perf-y += ctype.o
perf-y += db-export.o
perf-y += env.o
perf-y += event.o
perf-y += evlist.o
perf-y += evsel.o
perf-y += evsel_fprintf.o
perf-y += find_bit.o
perf-y += get_current_dir_name.o
perf-y += kallsyms.o
perf-y += levenshtein.o
perf-y += llvm-utils.o
perf-y += mmap.o
perf-y += memswap.o
perf-y += parse-events.o
perf-y += perf_regs.o
perf-y += path.o
perf-y += print_binary.o
perf-y += rbtree.o
perf-y += libstring.o
perf-y += bitmap.o
perf-y += hweight.o
perf-y += smt.o
perf-y += strbuf.o
perf-y += string.o
perf-y += strlist.o
perf-y += strfilter.o
perf-y += top.o
perf-y += usage.o
perf-y += dso.o
perf-y += symbol.o
perf-y += symbol_fprintf.o
perf-y += color.o
perf-y += color_config.o
perf-y += metricgroup.o
perf-y += header.o
perf-y += callchain.o
perf-y += values.o
perf-y += debug.o
perf-y += machine.o
perf-y += map.o
perf-y += pstack.o
perf-y += session.o
perf-y += sample-raw.o
perf-y += s390-sample-raw.o
perf-$(CONFIG_TRACE) += syscalltbl.o
perf-y += ordered-events.o
perf-y += namespaces.o
perf-y += comm.o
perf-y += thread.o
perf-y += thread_map.o
perf-y += trace-event-parse.o
perf-y += parse-events-flex.o
perf-y += parse-events-bison.o
perf-y += pmu.o
perf-y += pmu-flex.o
perf-y += pmu-bison.o
perf-y += trace-event-read.o
perf-y += trace-event-info.o
perf-y += trace-event-scripting.o
perf-y += trace-event.o
perf-y += svghelper.o
perf-y += sort.o
perf-y += hist.o
perf-y += util.o
perf-y += xyarray.o
perf-y += cpumap.o
perf-y += cgroup.o
perf-y += target.o
perf-y += rblist.o
perf-y += intlist.o
perf-y += vdso.o
perf-y += counts.o
perf-y += stat.o
perf-y += stat-shadow.o
perf-y += stat-display.o
perf-y += record.o
perf-y += srcline.o
perf-y += srccode.o
perf-y += data.o
perf-y += tsc.o
perf-y += cloexec.o
perf-y += call-path.o
perf-y += rwsem.o
perf-y += thread-stack.o
perf-$(CONFIG_AUXTRACE) += auxtrace.o
perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
perf-$(CONFIG_AUXTRACE) += intel-pt.o
perf-$(CONFIG_AUXTRACE) += intel-bts.o
perf-$(CONFIG_AUXTRACE) += arm-spe.o
perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o
perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o
ifdef CONFIG_LIBOPENCSD
libperf-$(CONFIG_AUXTRACE) += cs-etm.o
libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder/
perf-$(CONFIG_AUXTRACE) += cs-etm.o
perf-$(CONFIG_AUXTRACE) += cs-etm-decoder/
endif
libperf-y += parse-branch-options.o
libperf-y += dump-insn.o
libperf-y += parse-regs-options.o
libperf-y += term.o
libperf-y += help-unknown-cmd.o
libperf-y += mem-events.o
libperf-y += vsprintf.o
libperf-y += units.o
libperf-y += time-utils.o
libperf-y += expr-bison.o
libperf-y += branch.o
libperf-y += mem2node.o
libperf-$(CONFIG_LIBBPF) += bpf-loader.o
libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
libperf-$(CONFIG_LIBELF) += symbol-elf.o
libperf-$(CONFIG_LIBELF) += probe-file.o
libperf-$(CONFIG_LIBELF) += probe-event.o
perf-y += parse-branch-options.o
perf-y += dump-insn.o
perf-y += parse-regs-options.o
perf-y += term.o
perf-y += help-unknown-cmd.o
perf-y += mem-events.o
perf-y += vsprintf.o
perf-y += units.o
perf-y += time-utils.o
perf-y += expr-bison.o
perf-y += branch.o
perf-y += mem2node.o
perf-$(CONFIG_LIBBPF) += bpf-loader.o
perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
perf-$(CONFIG_LIBELF) += symbol-elf.o
perf-$(CONFIG_LIBELF) += probe-file.o
perf-$(CONFIG_LIBELF) += probe-event.o
ifndef CONFIG_LIBELF
libperf-y += symbol-minimal.o
perf-y += symbol-minimal.o
endif
ifndef CONFIG_SETNS
libperf-y += setns.o
perf-y += setns.o
endif
libperf-$(CONFIG_DWARF) += probe-finder.o
libperf-$(CONFIG_DWARF) += dwarf-aux.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_DWARF) += probe-finder.o
perf-$(CONFIG_DWARF) += dwarf-aux.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o
libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o
libperf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o
perf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
perf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o
perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o
libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
libperf-y += scripting-engines/
perf-y += scripting-engines/
libperf-$(CONFIG_ZLIB) += zlib.o
libperf-$(CONFIG_LZMA) += lzma.o
libperf-y += demangle-java.o
libperf-y += demangle-rust.o
perf-$(CONFIG_ZLIB) += zlib.o
perf-$(CONFIG_LZMA) += lzma.o
perf-y += demangle-java.o
perf-y += demangle-rust.o
ifdef CONFIG_JITDUMP
libperf-$(CONFIG_LIBELF) += jitdump.o
libperf-$(CONFIG_LIBELF) += genelf.o
libperf-$(CONFIG_DWARF) += genelf_debug.o
perf-$(CONFIG_LIBELF) += jitdump.o
perf-$(CONFIG_LIBELF) += genelf.o
perf-$(CONFIG_DWARF) += genelf_debug.o
endif
libperf-y += perf-hooks.o
perf-y += perf-hooks.o
libperf-$(CONFIG_LIBBPF) += bpf-event.o
perf-$(CONFIG_LIBBPF) += bpf-event.o
libperf-$(CONFIG_CXX) += c++/
perf-$(CONFIG_CXX) += c++/
CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))"
......
......@@ -236,8 +236,8 @@ int perf_event__synthesize_bpf_events(struct perf_tool *tool,
pr_debug("%s: can't get next program: %s%s",
__func__, strerror(errno),
errno == EINVAL ? " -- kernel too old?" : "");
/* don't report error on old kernel */
err = (errno == EINVAL) ? 0 : -1;
/* don't report error on old kernel or EPERM */
err = (errno == EINVAL || errno == EPERM) ? 0 : -1;
break;
}
fd = bpf_prog_get_fd_by_id(id);
......
libperf-$(CONFIG_CLANGLLVM) += clang.o
libperf-$(CONFIG_CLANGLLVM) += clang-test.o
perf-$(CONFIG_CLANGLLVM) += clang.o
perf-$(CONFIG_CLANGLLVM) += clang-test.o
libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o
perf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o
......@@ -15,13 +15,6 @@
struct cs_etm_decoder;
struct cs_etm_buffer {
const unsigned char *buf;
size_t len;
u64 offset;
u64 ref_timestamp;
};
enum cs_etm_sample_type {
CS_ETM_EMPTY,
CS_ETM_RANGE,
......@@ -105,9 +98,10 @@ enum {
CS_ETM_PROTO_PTM,
};
enum {
enum cs_etm_decoder_operation {
CS_ETM_OPERATION_PRINT = 1,
CS_ETM_OPERATION_DECODE,
CS_ETM_OPERATION_MAX,
};
int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder,
......
......@@ -25,6 +25,7 @@
#include "machine.h"
#include "map.h"
#include "perf.h"
#include "symbol.h"
#include "thread.h"
#include "thread_map.h"
#include "thread-stack.h"
......@@ -64,13 +65,10 @@ struct cs_etm_queue {
struct thread *thread;
struct cs_etm_decoder *decoder;
struct auxtrace_buffer *buffer;
const struct cs_etm_state *state;
union perf_event *event_buf;
unsigned int queue_nr;
pid_t pid, tid;
int cpu;
u64 time;
u64 timestamp;
u64 offset;
u64 period_instructions;
struct branch_stack *last_branch;
......@@ -78,11 +76,13 @@ struct cs_etm_queue {
size_t last_branch_pos;
struct cs_etm_packet *prev_packet;
struct cs_etm_packet *packet;
const unsigned char *buf;
size_t buf_len, buf_used;
};
static int cs_etm__update_queues(struct cs_etm_auxtrace *etm);
static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
pid_t tid, u64 time_);
pid_t tid);
/* PTMs ETMIDR [11:8] set to b0011 */
#define ETMIDR_PTM_VERSION 0x00000300
......@@ -138,10 +138,83 @@ static void cs_etm__packet_dump(const char *pkt_string)
fflush(stdout);
}
static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
struct cs_etm_auxtrace *etm, int idx,
u32 etmidr)
{
u64 **metadata = etm->metadata;
t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr);
t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR];
t_params[idx].etmv3.reg_trc_id = metadata[idx][CS_ETM_ETMTRACEIDR];
}
static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
struct cs_etm_auxtrace *etm, int idx)
{
u64 **metadata = etm->metadata;
t_params[idx].protocol = CS_ETM_PROTO_ETMV4i;
t_params[idx].etmv4.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0];
t_params[idx].etmv4.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1];
t_params[idx].etmv4.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2];
t_params[idx].etmv4.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8];
t_params[idx].etmv4.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR];
t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR];
}
static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
struct cs_etm_auxtrace *etm)
{
int i;
u32 etmidr;
u64 architecture;
for (i = 0; i < etm->num_cpu; i++) {
architecture = etm->metadata[i][CS_ETM_MAGIC];
switch (architecture) {
case __perf_cs_etmv3_magic:
etmidr = etm->metadata[i][CS_ETM_ETMIDR];
cs_etm__set_trace_param_etmv3(t_params, etm, i, etmidr);
break;
case __perf_cs_etmv4_magic:
cs_etm__set_trace_param_etmv4(t_params, etm, i);
break;
default:
return -EINVAL;
}
}
return 0;
}
static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
struct cs_etm_queue *etmq,
enum cs_etm_decoder_operation mode)
{
int ret = -EINVAL;
if (!(mode < CS_ETM_OPERATION_MAX))
goto out;
d_params->packet_printer = cs_etm__packet_dump;
d_params->operation = mode;
d_params->data = etmq;
d_params->formatted = true;
d_params->fsyncs = false;
d_params->hsyncs = false;
d_params->frame_aligned = true;
ret = 0;
out:
return ret;
}
static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
struct auxtrace_buffer *buffer)
{
int i, ret;
int ret;
const char *color = PERF_COLOR_BLUE;
struct cs_etm_decoder_params d_params;
struct cs_etm_trace_params *t_params;
......@@ -155,48 +228,22 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
/* Use metadata to fill in trace parameters for trace decoder */
t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
for (i = 0; i < etm->num_cpu; i++) {
if (etm->metadata[i][CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
u32 etmidr = etm->metadata[i][CS_ETM_ETMIDR];
t_params[i].protocol =
cs_etm__get_v7_protocol_version(etmidr);
t_params[i].etmv3.reg_ctrl =
etm->metadata[i][CS_ETM_ETMCR];
t_params[i].etmv3.reg_trc_id =
etm->metadata[i][CS_ETM_ETMTRACEIDR];
} else if (etm->metadata[i][CS_ETM_MAGIC] ==
__perf_cs_etmv4_magic) {
t_params[i].protocol = CS_ETM_PROTO_ETMV4i;
t_params[i].etmv4.reg_idr0 =
etm->metadata[i][CS_ETMV4_TRCIDR0];
t_params[i].etmv4.reg_idr1 =
etm->metadata[i][CS_ETMV4_TRCIDR1];
t_params[i].etmv4.reg_idr2 =
etm->metadata[i][CS_ETMV4_TRCIDR2];
t_params[i].etmv4.reg_idr8 =
etm->metadata[i][CS_ETMV4_TRCIDR8];
t_params[i].etmv4.reg_configr =
etm->metadata[i][CS_ETMV4_TRCCONFIGR];
t_params[i].etmv4.reg_traceidr =
etm->metadata[i][CS_ETMV4_TRCTRACEIDR];
}
}
if (!t_params)
return;
if (cs_etm__init_trace_params(t_params, etm))
goto out_free;
/* Set decoder parameters to simply print the trace packets */
d_params.packet_printer = cs_etm__packet_dump;
d_params.operation = CS_ETM_OPERATION_PRINT;
d_params.formatted = true;
d_params.fsyncs = false;
d_params.hsyncs = false;
d_params.frame_aligned = true;
if (cs_etm__init_decoder_params(&d_params, NULL,
CS_ETM_OPERATION_PRINT))
goto out_free;
decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
zfree(&t_params);
if (!decoder)
return;
goto out_free;
do {
size_t consumed;
......@@ -211,6 +258,9 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
} while (buffer_used < buffer->size);
cs_etm_decoder__free(decoder);
out_free:
zfree(&t_params);
}
static int cs_etm__flush_events(struct perf_session *session,
......@@ -234,7 +284,7 @@ static int cs_etm__flush_events(struct perf_session *session,
if (ret < 0)
return ret;
return cs_etm__process_timeless_queues(etm, -1, MAX_TIMESTAMP - 1);
return cs_etm__process_timeless_queues(etm, -1);
}
static void cs_etm__free_queue(void *priv)
......@@ -326,7 +376,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
struct addr_location al;
if (!etmq)
return -1;
return 0;
machine = etmq->etm->machine;
cpumode = cs_etm__cpu_mode(etmq, address);
......@@ -334,7 +384,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
thread = etmq->thread;
if (!thread) {
if (cpumode != PERF_RECORD_MISC_KERNEL)
return -EINVAL;
return 0;
thread = etmq->etm->unknown_thread;
}
......@@ -357,12 +407,10 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
return len;
}
static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
unsigned int queue_nr)
static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
{
int i;
struct cs_etm_decoder_params d_params;
struct cs_etm_trace_params *t_params;
struct cs_etm_trace_params *t_params = NULL;
struct cs_etm_queue *etmq;
size_t szp = sizeof(struct cs_etm_packet);
......@@ -397,59 +445,22 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
if (!etmq->event_buf)
goto out_free;
etmq->etm = etm;
etmq->queue_nr = queue_nr;
etmq->pid = -1;
etmq->tid = -1;
etmq->cpu = -1;
/* Use metadata to fill in trace parameters for trace decoder */
t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
if (!t_params)
goto out_free;
for (i = 0; i < etm->num_cpu; i++) {
if (etm->metadata[i][CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
u32 etmidr = etm->metadata[i][CS_ETM_ETMIDR];
t_params[i].protocol =
cs_etm__get_v7_protocol_version(etmidr);
t_params[i].etmv3.reg_ctrl =
etm->metadata[i][CS_ETM_ETMCR];
t_params[i].etmv3.reg_trc_id =
etm->metadata[i][CS_ETM_ETMTRACEIDR];
} else if (etm->metadata[i][CS_ETM_MAGIC] ==
__perf_cs_etmv4_magic) {
t_params[i].protocol = CS_ETM_PROTO_ETMV4i;
t_params[i].etmv4.reg_idr0 =
etm->metadata[i][CS_ETMV4_TRCIDR0];
t_params[i].etmv4.reg_idr1 =
etm->metadata[i][CS_ETMV4_TRCIDR1];
t_params[i].etmv4.reg_idr2 =
etm->metadata[i][CS_ETMV4_TRCIDR2];
t_params[i].etmv4.reg_idr8 =
etm->metadata[i][CS_ETMV4_TRCIDR8];
t_params[i].etmv4.reg_configr =
etm->metadata[i][CS_ETMV4_TRCCONFIGR];
t_params[i].etmv4.reg_traceidr =
etm->metadata[i][CS_ETMV4_TRCTRACEIDR];
}
}
if (cs_etm__init_trace_params(t_params, etm))
goto out_free;
/* Set decoder parameters to simply print the trace packets */
d_params.packet_printer = cs_etm__packet_dump;
d_params.operation = CS_ETM_OPERATION_DECODE;
d_params.formatted = true;
d_params.fsyncs = false;
d_params.hsyncs = false;
d_params.frame_aligned = true;
d_params.data = etmq;
/* Set decoder parameters to decode trace packets */
if (cs_etm__init_decoder_params(&d_params, etmq,
CS_ETM_OPERATION_DECODE))
goto out_free;
etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
zfree(&t_params);
if (!etmq->decoder)
goto out_free;
......@@ -462,14 +473,13 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
cs_etm__mem_access))
goto out_free_decoder;
etmq->offset = 0;
etmq->period_instructions = 0;
zfree(&t_params);
return etmq;
out_free_decoder:
cs_etm_decoder__free(etmq->decoder);
out_free:
zfree(&t_params);
zfree(&etmq->event_buf);
zfree(&etmq->last_branch);
zfree(&etmq->last_branch_rb);
......@@ -484,24 +494,30 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
struct auxtrace_queue *queue,
unsigned int queue_nr)
{
int ret = 0;
struct cs_etm_queue *etmq = queue->priv;
if (list_empty(&queue->head) || etmq)
return 0;
goto out;
etmq = cs_etm__alloc_queue(etm, queue_nr);
etmq = cs_etm__alloc_queue(etm);
if (!etmq)
return -ENOMEM;
if (!etmq) {
ret = -ENOMEM;
goto out;
}
queue->priv = etmq;
if (queue->cpu != -1)
etmq->cpu = queue->cpu;
etmq->etm = etm;
etmq->queue_nr = queue_nr;
etmq->cpu = queue->cpu;
etmq->tid = queue->tid;
etmq->pid = -1;
etmq->offset = 0;
etmq->period_instructions = 0;
return 0;
out:
return ret;
}
static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm)
......@@ -509,6 +525,9 @@ static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm)
unsigned int i;
int ret;
if (!etm->kernel_start)
etm->kernel_start = machine__kernel_start(etm->machine);
for (i = 0; i < etm->queues.nr_queues; i++) {
ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i);
if (ret)
......@@ -666,7 +685,7 @@ static int cs_etm__inject_event(union perf_event *event,
static int
cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq)
cs_etm__get_trace(struct cs_etm_queue *etmq)
{
struct auxtrace_buffer *aux_buffer = etmq->buffer;
struct auxtrace_buffer *old_buffer = aux_buffer;
......@@ -680,7 +699,7 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq)
if (!aux_buffer) {
if (old_buffer)
auxtrace_buffer__drop_data(old_buffer);
buff->len = 0;
etmq->buf_len = 0;
return 0;
}
......@@ -700,13 +719,11 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq)
if (old_buffer)
auxtrace_buffer__drop_data(old_buffer);
buff->offset = aux_buffer->offset;
buff->len = aux_buffer->size;
buff->buf = aux_buffer->data;
etmq->buf_used = 0;
etmq->buf_len = aux_buffer->size;
etmq->buf = aux_buffer->data;
buff->ref_timestamp = aux_buffer->reference;
return buff->len;
return etmq->buf_len;
}
static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
......@@ -1135,6 +1152,32 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq)
return 0;
}
/*
* cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
* if need be.
* Returns: < 0 if error
* = 0 if no more auxtrace_buffer to read
* > 0 if the current buffer isn't empty yet
*/
static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
{
int ret;
if (!etmq->buf_len) {
ret = cs_etm__get_trace(etmq);
if (ret <= 0)
return ret;
/*
* We cannot assume consecutive blocks in the data file
* are contiguous, reset the decoder to force re-sync.
*/
ret = cs_etm_decoder__reset(etmq->decoder);
if (ret)
return ret;
}
return etmq->buf_len;
}
static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq,
struct cs_etm_packet *packet,
......@@ -1474,105 +1517,124 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq)
return 0;
}
static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
{
int ret = 0;
size_t processed = 0;
/*
* Packets are decoded and added to the decoder's packet queue
* until the decoder packet processing callback has requested that
* processing stops or there is nothing left in the buffer. Normal
* operations that stop processing are a timestamp packet or a full
* decoder buffer queue.
*/
ret = cs_etm_decoder__process_data_block(etmq->decoder,
etmq->offset,
&etmq->buf[etmq->buf_used],
etmq->buf_len,
&processed);
if (ret)
goto out;
etmq->offset += processed;
etmq->buf_used += processed;
etmq->buf_len -= processed;
out:
return ret;
}
static int cs_etm__process_decoder_queue(struct cs_etm_queue *etmq)
{
int ret;
/* Process each packet in this chunk */
while (1) {
ret = cs_etm_decoder__get_packet(etmq->decoder,
etmq->packet);
if (ret <= 0)
/*
* Stop processing this chunk on
* end of data or error
*/
break;
/*
* Since packet addresses are swapped in packet
* handling within below switch() statements,
* thus setting sample flags must be called
* prior to switch() statement to use address
* information before packets swapping.
*/
ret = cs_etm__set_sample_flags(etmq);
if (ret < 0)
break;
switch (etmq->packet->sample_type) {
case CS_ETM_RANGE:
/*
* If the packet contains an instruction
* range, generate instruction sequence
* events.
*/
cs_etm__sample(etmq);
break;
case CS_ETM_EXCEPTION:
case CS_ETM_EXCEPTION_RET:
/*
* If the exception packet is coming,
* make sure the previous instruction
* range packet to be handled properly.
*/
cs_etm__exception(etmq);
break;
case CS_ETM_DISCONTINUITY:
/*
* Discontinuity in trace, flush
* previous branch stack
*/
cs_etm__flush(etmq);
break;
case CS_ETM_EMPTY:
/*
* Should not receive empty packet,
* report error.
*/
pr_err("CS ETM Trace: empty packet\n");
return -EINVAL;
default:
break;
}
}
return ret;
}
static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
{
struct cs_etm_auxtrace *etm = etmq->etm;
struct cs_etm_buffer buffer;
size_t buffer_used, processed;
int err = 0;
if (!etm->kernel_start)
etm->kernel_start = machine__kernel_start(etm->machine);
/* Go through each buffer in the queue and decode them one by one */
while (1) {
buffer_used = 0;
memset(&buffer, 0, sizeof(buffer));
err = cs_etm__get_trace(&buffer, etmq);
err = cs_etm__get_data_block(etmq);
if (err <= 0)
return err;
/*
* We cannot assume consecutive blocks in the data file are
* contiguous, reset the decoder to force re-sync.
*/
err = cs_etm_decoder__reset(etmq->decoder);
if (err != 0)
return err;
/* Run trace decoder until buffer consumed or end of trace */
do {
processed = 0;
err = cs_etm_decoder__process_data_block(
etmq->decoder,
etmq->offset,
&buffer.buf[buffer_used],
buffer.len - buffer_used,
&processed);
err = cs_etm__decode_data_block(etmq);
if (err)
return err;
etmq->offset += processed;
buffer_used += processed;
/*
* Process each packet in this chunk, nothing to do if
* an error occurs other than hoping the next one will
* be better.
*/
err = cs_etm__process_decoder_queue(etmq);
/* Process each packet in this chunk */
while (1) {
err = cs_etm_decoder__get_packet(etmq->decoder,
etmq->packet);
if (err <= 0)
/*
* Stop processing this chunk on
* end of data or error
*/
break;
/*
* Since packet addresses are swapped in packet
* handling within below switch() statements,
* thus setting sample flags must be called
* prior to switch() statement to use address
* information before packets swapping.
*/
err = cs_etm__set_sample_flags(etmq);
if (err < 0)
break;
switch (etmq->packet->sample_type) {
case CS_ETM_RANGE:
/*
* If the packet contains an instruction
* range, generate instruction sequence
* events.
*/
cs_etm__sample(etmq);
break;
case CS_ETM_EXCEPTION:
case CS_ETM_EXCEPTION_RET:
/*
* If the exception packet is coming,
* make sure the previous instruction
* range packet to be handled properly.
*/
cs_etm__exception(etmq);
break;
case CS_ETM_DISCONTINUITY:
/*
* Discontinuity in trace, flush
* previous branch stack
*/
cs_etm__flush(etmq);
break;
case CS_ETM_EMPTY:
/*
* Should not receive empty packet,
* report error.
*/
pr_err("CS ETM Trace: empty packet\n");
return -EINVAL;
default:
break;
}
}
} while (buffer.len > buffer_used);
} while (etmq->buf_len);
if (err == 0)
/* Flush any remaining branch stack entries */
......@@ -1583,7 +1645,7 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
}
static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
pid_t tid, u64 time_)
pid_t tid)
{
unsigned int i;
struct auxtrace_queues *queues = &etm->queues;
......@@ -1593,7 +1655,6 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
struct cs_etm_queue *etmq = queue->priv;
if (etmq && ((tid == -1) || (etmq->tid == tid))) {
etmq->time = time_;
cs_etm__set_pid_tid_cpu(etm, queue);
cs_etm__run_decoder(etmq);
}
......@@ -1637,8 +1698,7 @@ static int cs_etm__process_event(struct perf_session *session,
if (event->header.type == PERF_RECORD_EXIT)
return cs_etm__process_timeless_queues(etm,
event->fork.tid,
sample->time);
event->fork.tid);
return 0;
}
......
......@@ -105,8 +105,8 @@ struct intlist *traceid_list;
#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64))
static const u64 __perf_cs_etmv3_magic = 0x3030303030303030ULL;
static const u64 __perf_cs_etmv4_magic = 0x4040404040404040ULL;
#define __perf_cs_etmv3_magic 0x3030303030303030ULL
#define __perf_cs_etmv4_magic 0x4040404040404040ULL
#define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64))
#define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64))
......
......@@ -563,7 +563,6 @@ static int write_cmdline(struct feat_fd *ff,
"/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list"
struct cpu_topo {
u32 cpu_nr;
u32 core_sib;
u32 thread_sib;
char **core_siblings;
......@@ -679,7 +678,6 @@ static struct cpu_topo *build_cpu_topology(void)
goto out_free;
tp = addr;
tp->cpu_nr = nr;
addr += sizeof(*tp);
tp->core_siblings = addr;
addr += sz;
......@@ -1042,11 +1040,9 @@ static int write_cpuid(struct feat_fd *ff,
int ret;
ret = get_cpuid(buffer, sizeof(buffer));
if (!ret)
goto write_it;
if (ret)
return -1;
return -1;
write_it:
return do_write_string(ff, buffer);
}
......
libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o
perf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o
inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk
inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt
......
......@@ -270,7 +270,7 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw)
}
void metricgroup__print(bool metrics, bool metricgroups, char *filter,
bool raw)
bool raw, bool details)
{
struct pmu_events_map *map = perf_pmu__find_map(NULL);
struct pmu_event *pe;
......@@ -329,6 +329,12 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
if (asprintf(&s, "%s\n%*s%s]",
pe->metric_name, 8, "[", pe->desc) < 0)
return;
if (details) {
if (asprintf(&s, "%s\n%*s%s]",
s, 8, "[", pe->metric_expr) < 0)
return;
}
}
if (!s)
......
......@@ -27,6 +27,7 @@ int metricgroup__parse_groups(const struct option *opt,
const char *str,
struct rblist *metric_events);
void metricgroup__print(bool metrics, bool groups, char *filter, bool raw);
void metricgroup__print(bool metrics, bool groups, char *filter,
bool raw, bool details);
bool metricgroup__has_metric(const char *metric);
#endif
......@@ -2540,7 +2540,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag,
print_sdt_events(NULL, NULL, name_only);
metricgroup__print(true, true, NULL, name_only);
metricgroup__print(true, true, NULL, name_only, details_flag);
}
int parse_events__is_hardcoded_term(struct parse_events_term *term)
......
......@@ -311,7 +311,7 @@ value_sym '/' event_config '/'
$$ = list;
}
|
value_sym sep_slash_dc
value_sym sep_slash_slash_dc
{
struct list_head *list;
int type = $1 >> 16;
......@@ -702,7 +702,7 @@ PE_VALUE PE_ARRAY_RANGE PE_VALUE
sep_dc: ':' |
sep_slash_dc: '/' | ':' |
sep_slash_slash_dc: '/' '/' | ':' |
%%
......
......@@ -352,6 +352,11 @@ static bool s390_cpumsf_validate(int machine_type,
*dsdes = 85;
*bsdes = 32;
break;
case 2964:
case 2965:
*dsdes = 112;
*bsdes = 32;
break;
default:
/* Illegal trailer entry */
return false;
......
libperf-$(CONFIG_LIBPERL) += trace-event-perl.o
libperf-$(CONFIG_LIBPYTHON) += trace-event-python.o
perf-$(CONFIG_LIBPERL) += trace-event-perl.o
perf-$(CONFIG_LIBPYTHON) += trace-event-python.o
CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment