Commit a3664a74 authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo-5.3-20190621' of...

Merge tag 'perf-core-for-mingo-5.3-20190621' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

perf trace:

  Arnaldo Carvalho de Melo:

  - Fix exclusion of not available syscall names from selector list.

  - Fixup pointer arithmetic when consuming augmented syscall args.

Intel PT:

  Adrian Hunter:

  - Add support for decoding PEBS via PT packets. See:

      https://software.intel.com/en-us/articles/intel-sdm
      May 2019 version: Vol. 3B 18.5.5.2 PEBS output to Intel:registered: Processor Trace

  for more details about it.

ARM64:

  John Garry:

  - Fix uncore PMU alias list for ARM64

  Raphael Gault:

  - Compile tests unconditionally.

cs-etm:

  Mathieu Poirier:

  - Optimize option setup for CPU-wide sessions.

build:

  Florian Fainelli:

  - Don't hardcode host include path for libslang, fixing up building with it
    in cross build environments.

  Arnaldo Carvalho de Melo:

  - Check if gettid() is available before providing helper, fixing the build
    when using the latest glibc version, where a helper for gettid() is finally
    present.

  - Fix building with libslang in systems where it is located in slang/slang.h.

  - Fix fast path test for zstd library.
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 3ce5aceb 3469fa84
......@@ -36,6 +36,7 @@ FEATURE_TESTS_BASIC := \
fortify-source \
sync-compare-and-swap \
get_current_dir_name \
gettid \
glibc \
gtk2 \
gtk2-infobar \
......@@ -52,6 +53,7 @@ FEATURE_TESTS_BASIC := \
libpython \
libpython-version \
libslang \
libslang-include-subdir \
libcrypto \
libunwind \
pthread-attr-setaffinity-np \
......@@ -113,7 +115,6 @@ FEATURE_DISPLAY ?= \
numa_num_possible_cpus \
libperl \
libpython \
libslang \
libcrypto \
libunwind \
libdw-dwarf-unwind \
......
......@@ -31,6 +31,7 @@ FILES= \
test-libpython.bin \
test-libpython-version.bin \
test-libslang.bin \
test-libslang-include-subdir.bin \
test-libcrypto.bin \
test-libunwind.bin \
test-libunwind-debug-frame.bin \
......@@ -54,6 +55,7 @@ FILES= \
test-get_cpuid.bin \
test-sdt.bin \
test-cxx.bin \
test-gettid.bin \
test-jvmti.bin \
test-jvmti-cmlr.bin \
test-sched_getcpu.bin \
......@@ -181,7 +183,10 @@ $(OUTPUT)test-libaudit.bin:
$(BUILD) -laudit
$(OUTPUT)test-libslang.bin:
$(BUILD) -I/usr/include/slang -lslang
$(BUILD) -lslang
$(OUTPUT)test-libslang-include-subdir.bin:
$(BUILD) -lslang
$(OUTPUT)test-libcrypto.bin:
$(BUILD) -lcrypto
......@@ -267,6 +272,9 @@ $(OUTPUT)test-sdt.bin:
$(OUTPUT)test-cxx.bin:
$(BUILDXX) -std=gnu++11
$(OUTPUT)test-gettid.bin:
$(BUILD)
$(OUTPUT)test-jvmti.bin:
$(BUILD)
......
......@@ -38,6 +38,10 @@
# include "test-get_current_dir_name.c"
#undef main
#define main main_test_gettid
# include "test-gettid.c"
#undef main
#define main main_test_glibc
# include "test-glibc.c"
#undef main
......@@ -182,7 +186,7 @@
# include "test-disassembler-four-args.c"
#undef main
#define main main_test_zstd
#define main main_test_libzstd
# include "test-libzstd.c"
#undef main
......@@ -195,6 +199,7 @@ int main(int argc, char *argv[])
main_test_libelf();
main_test_libelf_mmap();
main_test_get_current_dir_name();
main_test_gettid();
main_test_glibc();
main_test_dwarf();
main_test_dwarf_getlocations();
......
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
int main(void)
......
// SPDX-License-Identifier: GPL-2.0
// Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
#define _GNU_SOURCE
#include <unistd.h>
int main(void)
{
return gettid();
}
#undef _GNU_SOURCE
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
int main(void)
......
// SPDX-License-Identifier: GPL-2.0
#include <slang/slang.h>
int main(void)
{
return SLsmg_init_smg();
}
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include <sched.h>
......
......@@ -332,6 +332,10 @@ ifeq ($(feature-get_current_dir_name), 1)
CFLAGS += -DHAVE_GET_CURRENT_DIR_NAME
endif
ifeq ($(feature-gettid), 1)
CFLAGS += -DHAVE_GETTID
endif
ifdef NO_LIBELF
NO_DWARF := 1
NO_DEMANGLE := 1
......@@ -640,11 +644,15 @@ endif
ifndef NO_SLANG
ifneq ($(feature-libslang), 1)
msg := $(warning slang not found, disables TUI support. Please install slang-devel, libslang-dev or libslang2-dev);
NO_SLANG := 1
else
ifneq ($(feature-libslang-include-subdir), 1)
msg := $(warning slang not found, disables TUI support. Please install slang-devel, libslang-dev or libslang2-dev);
NO_SLANG := 1
else
CFLAGS += -DHAVE_SLANG_INCLUDE_SUBDIR
endif
endif
ifndef NO_SLANG
# Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h
CFLAGS += -I/usr/include/slang
CFLAGS += -DHAVE_SLANG_SUPPORT
EXTLIBS += -lslang
$(call detected,CONFIG_SLANG)
......
......@@ -162,20 +162,19 @@ static int cs_etm_set_option(struct auxtrace_record *itr,
!cpu_map__has(online_cpus, i))
continue;
switch (option) {
case ETM_OPT_CTXTID:
if (option & ETM_OPT_CTXTID) {
err = cs_etm_set_context_id(itr, evsel, i);
if (err)
goto out;
break;
case ETM_OPT_TS:
}
if (option & ETM_OPT_TS) {
err = cs_etm_set_timestamp(itr, evsel, i);
if (err)
goto out;
break;
default:
goto out;
}
if (option & ~(ETM_OPT_CTXTID | ETM_OPT_TS))
/* Nothing else is currently supported */
goto out;
}
err = 0;
......@@ -398,11 +397,8 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
if (!cpu_map__empty(cpus)) {
perf_evsel__set_sample_bit(cs_etm_evsel, CPU);
err = cs_etm_set_option(itr, cs_etm_evsel, ETM_OPT_CTXTID);
if (err)
goto out;
err = cs_etm_set_option(itr, cs_etm_evsel, ETM_OPT_TS);
err = cs_etm_set_option(itr, cs_etm_evsel,
ETM_OPT_CTXTID | ETM_OPT_TS);
if (err)
goto out;
}
......
perf-y += util/
perf-$(CONFIG_DWARF_UNWIND) += tests/
perf-y += tests/
perf-y += regs_load.o
perf-y += dwarf-unwind.o
perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
perf-y += arch-tests.o
......@@ -9,6 +9,7 @@ struct test;
int test__rdpmc(struct test *test __maybe_unused, int subtest);
int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest);
int test__insn_x86(struct test *test __maybe_unused, int subtest);
int test__intel_pt_pkt_decoder(struct test *test, int subtest);
int test__bp_modify(struct test *test, int subtest);
#ifdef HAVE_DWARF_UNWIND_SUPPORT
......
......@@ -4,5 +4,5 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
perf-y += arch-tests.o
perf-y += rdpmc.o
perf-y += perf-time-to-tsc.o
perf-$(CONFIG_AUXTRACE) += insn-x86.o
perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o
perf-$(CONFIG_X86_64) += bp-modify.o
......@@ -23,6 +23,10 @@ struct test arch_tests[] = {
.desc = "x86 instruction decoder - new instructions",
.func = test__insn_x86,
},
{
.desc = "Intel PT packet decoder",
.func = test__intel_pt_pkt_decoder,
},
#endif
#if defined(__x86_64__)
{
......
This diff is collapsed.
......@@ -1239,7 +1239,7 @@ static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, c
*/
int consumed = sizeof(*augmented_arg) + augmented_arg->size;
arg->augmented.args += consumed;
arg->augmented.args = ((void *)arg->augmented.args) + consumed;
arg->augmented.size -= consumed;
return printed;
......@@ -1527,13 +1527,12 @@ static int trace__read_syscall_info(struct trace *trace, int id)
static int trace__validate_ev_qualifier(struct trace *trace)
{
int err = 0, i;
int err = 0;
bool printed_invalid_prefix = false;
size_t nr_allocated;
struct str_node *pos;
size_t nr_used = 0, nr_allocated = strlist__nr_entries(trace->ev_qualifier);
trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
trace->ev_qualifier_ids.entries = malloc(nr_allocated *
sizeof(trace->ev_qualifier_ids.entries[0]));
if (trace->ev_qualifier_ids.entries == NULL) {
......@@ -1543,9 +1542,6 @@ static int trace__validate_ev_qualifier(struct trace *trace)
goto out;
}
nr_allocated = trace->ev_qualifier_ids.nr;
i = 0;
strlist__for_each_entry(pos, trace->ev_qualifier) {
const char *sc = pos->s;
int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
......@@ -1566,7 +1562,7 @@ static int trace__validate_ev_qualifier(struct trace *trace)
continue;
}
matches:
trace->ev_qualifier_ids.entries[i++] = id;
trace->ev_qualifier_ids.entries[nr_used++] = id;
if (match_next == -1)
continue;
......@@ -1574,7 +1570,7 @@ static int trace__validate_ev_qualifier(struct trace *trace)
id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
if (id < 0)
break;
if (nr_allocated == trace->ev_qualifier_ids.nr) {
if (nr_allocated == nr_used) {
void *entries;
nr_allocated += 8;
......@@ -1587,11 +1583,11 @@ static int trace__validate_ev_qualifier(struct trace *trace)
}
trace->ev_qualifier_ids.entries = entries;
}
trace->ev_qualifier_ids.nr++;
trace->ev_qualifier_ids.entries[i++] = id;
trace->ev_qualifier_ids.entries[nr_used++] = id;
}
}
trace->ev_qualifier_ids.nr = nr_used;
out:
if (printed_invalid_prefix)
pr_debug("\n");
......
......@@ -45,10 +45,12 @@
static char jit_path[PATH_MAX];
static void *marker_addr;
#ifndef HAVE_GETTID
static inline pid_t gettid(void)
{
return (pid_t)syscall(__NR_gettid);
}
#endif
static int get_e_machine(struct jitheader *hdr)
{
......
# SPDX-License-Identifier: GPL-2.0
perf-y += builtin-test.o
perf-y += parse-events.o
perf-y += dso-data.o
......
// SPDX-License-Identifier: GPL-2.0
/*
* Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
* 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
......
// SPDX-License-Identifier: GPL-2.0
/*
* bpf-script-example.c
* Test basic LLVM building
......
// SPDX-License-Identifier: GPL-2.0
/*
* bpf-script-test-kbuild.c
* Test include from kernel header
......
// SPDX-License-Identifier: GPL-2.0
/*
* bpf-script-test-prologue.c
* Test BPF prologue
......
// SPDX-License-Identifier: GPL-2.0
/*
* bpf-script-test-relocation.c
* Test BPF loader checking relocation
......
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
#include <stdio.h>
#include <sys/epoll.h>
......
// SPDX-License-Identifier: GPL-2.0
#include <linux/compiler.h>
#include <linux/kernel.h>
#include "tests.h"
......
// SPDX-License-Identifier: GPL-2.0
#include "util/mem-events.h"
#include "util/symbol.h"
#include "linux/perf_event.h"
......
// SPDX-License-Identifier: GPL-2.0
#include <linux/compiler.h>
#include <linux/bitmap.h>
#include "cpumap.h"
......
# SPDX-License-Identifier: GPL-2.0
# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
skip_if_no_perf_probe() {
......
#!/bin/sh
# Add vfs_getname probe to get syscall args filenames
#
# SPDX-License-Identifier: GPL-2.0
# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
. $(dirname $0)/lib/probe.sh
......
......@@ -7,6 +7,7 @@
# This needs no debuginfo package, all is done using the libc ELF symtab
# and the CFI info in the binaries.
# SPDX-License-Identifier: GPL-2.0
# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
. $(dirname $0)/lib/probe.sh
......
......@@ -6,6 +6,7 @@
# checks that that was captured by the vfs_getname probe in the generated
# perf.data file, with the temp file name as the pathname argument.
# SPDX-License-Identifier: GPL-2.0
# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
. $(dirname $0)/lib/probe.sh
......
#!/bin/sh
# Zstd perf.data compression/decompression
# SPDX-License-Identifier: GPL-2.0
trace_file=$(mktemp /tmp/perf.data.XXX)
perf_tool=perf
......
......@@ -7,6 +7,7 @@
# that already handles "probe:vfs_getname" if present, and used in the
# "open" syscall "filename" argument beautifier.
# SPDX-License-Identifier: GPL-2.0
# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
. $(dirname $0)/lib/probe.sh
......
......@@ -10,7 +10,12 @@
#ifndef HAVE_LONG_LONG
#define HAVE_LONG_LONG __GLIBC_HAVE_LONG_LONG
#endif
#ifdef HAVE_SLANG_INCLUDE_SUBDIR
#include <slang/slang.h>
#else
#include <slang.h>
#endif
#if SLANG_VERSION < 20104
#define slsmg_printf(msg, args...) \
......
......@@ -589,6 +589,9 @@ const char *perf_evsel__name(struct perf_evsel *evsel)
{
char bf[128];
if (!evsel)
goto out_unknown;
if (evsel->name)
return evsel->name;
......@@ -628,7 +631,10 @@ const char *perf_evsel__name(struct perf_evsel *evsel)
evsel->name = strdup(bf);
return evsel->name ?: "unknown";
if (evsel->name)
return evsel->name;
out_unknown:
return "unknown";
}
const char *perf_evsel__group_name(struct perf_evsel *evsel)
......
......@@ -133,6 +133,10 @@ struct intel_pt_decoder {
int mtc_shift;
struct intel_pt_stack stack;
enum intel_pt_pkt_state pkt_state;
enum intel_pt_pkt_ctx pkt_ctx;
enum intel_pt_pkt_ctx prev_pkt_ctx;
enum intel_pt_blk_type blk_type;
int blk_type_pos;
struct intel_pt_pkt packet;
struct intel_pt_pkt tnt;
int pkt_step;
......@@ -166,6 +170,7 @@ struct intel_pt_decoder {
bool set_fup_mwait;
bool set_fup_pwre;
bool set_fup_exstop;
bool set_fup_bep;
bool sample_cyc;
unsigned int fup_tx_flags;
unsigned int tx_flags;
......@@ -559,7 +564,8 @@ static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
memcpy(buf + len, decoder->buf, n);
len += n;
ret = intel_pt_get_packet(buf, len, &decoder->packet);
decoder->prev_pkt_ctx = decoder->pkt_ctx;
ret = intel_pt_get_packet(buf, len, &decoder->packet, &decoder->pkt_ctx);
if (ret < (int)old_len) {
decoder->next_buf = decoder->buf;
decoder->next_len = decoder->len;
......@@ -594,6 +600,7 @@ static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
{
struct intel_pt_pkt_info pkt_info;
const unsigned char *buf = decoder->buf;
enum intel_pt_pkt_ctx pkt_ctx = decoder->pkt_ctx;
size_t len = decoder->len;
int ret;
......@@ -612,7 +619,8 @@ static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
if (!len)
return INTEL_PT_NEED_MORE_BYTES;
ret = intel_pt_get_packet(buf, len, &pkt_info.packet);
ret = intel_pt_get_packet(buf, len, &pkt_info.packet,
&pkt_ctx);
if (!ret)
return INTEL_PT_NEED_MORE_BYTES;
if (ret < 0)
......@@ -687,6 +695,10 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
case INTEL_PT_MNT:
case INTEL_PT_PTWRITE:
case INTEL_PT_PTWRITE_IP:
case INTEL_PT_BBP:
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
return 0;
case INTEL_PT_MTC:
......@@ -878,8 +890,9 @@ static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
return ret;
}
decoder->prev_pkt_ctx = decoder->pkt_ctx;
ret = intel_pt_get_packet(decoder->buf, decoder->len,
&decoder->packet);
&decoder->packet, &decoder->pkt_ctx);
if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 &&
decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) {
ret = intel_pt_get_split_packet(decoder);
......@@ -1117,6 +1130,14 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
decoder->state.to_ip = 0;
ret = true;
}
if (decoder->set_fup_bep) {
decoder->set_fup_bep = false;
decoder->state.type |= INTEL_PT_BLK_ITEMS;
decoder->state.type &= ~INTEL_PT_BRANCH;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
ret = true;
}
return ret;
}
......@@ -1602,6 +1623,46 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
intel_pt_log_to("Setting timestamp", decoder->timestamp);
}
static void intel_pt_bbp(struct intel_pt_decoder *decoder)
{
if (decoder->prev_pkt_ctx == INTEL_PT_NO_CTX) {
memset(decoder->state.items.mask, 0, sizeof(decoder->state.items.mask));
decoder->state.items.is_32_bit = false;
}
decoder->blk_type = decoder->packet.payload;
decoder->blk_type_pos = intel_pt_blk_type_pos(decoder->blk_type);
if (decoder->blk_type == INTEL_PT_GP_REGS)
decoder->state.items.is_32_bit = decoder->packet.count;
if (decoder->blk_type_pos < 0) {
intel_pt_log("WARNING: Unknown block type %u\n",
decoder->blk_type);
} else if (decoder->state.items.mask[decoder->blk_type_pos]) {
intel_pt_log("WARNING: Duplicate block type %u\n",
decoder->blk_type);
}
}
static void intel_pt_bip(struct intel_pt_decoder *decoder)
{
uint32_t id = decoder->packet.count;
uint32_t bit = 1 << id;
int pos = decoder->blk_type_pos;
if (pos < 0 || id >= INTEL_PT_BLK_ITEM_ID_CNT) {
intel_pt_log("WARNING: Unknown block item %u type %d\n",
id, decoder->blk_type);
return;
}
if (decoder->state.items.mask[pos] & bit) {
intel_pt_log("WARNING: Duplicate block item %u type %d\n",
id, decoder->blk_type);
}
decoder->state.items.mask[pos] |= bit;
decoder->state.items.val[pos][id] = decoder->packet.payload;
}
/* Walk PSB+ packets when already in sync. */
static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
{
......@@ -1633,6 +1694,10 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
case INTEL_PT_MWAIT:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
case INTEL_PT_BBP:
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
decoder->have_tma = false;
intel_pt_log("ERROR: Unexpected packet\n");
err = -EAGAIN;
......@@ -1726,6 +1791,10 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
case INTEL_PT_MWAIT:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
case INTEL_PT_BBP:
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
intel_pt_log("ERROR: Missing TIP after FUP\n");
decoder->pkt_state = INTEL_PT_STATE_ERR3;
decoder->pkt_step = 0;
......@@ -2047,6 +2116,33 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
decoder->state.pwrx_payload = decoder->packet.payload;
return 0;
case INTEL_PT_BBP:
intel_pt_bbp(decoder);
break;
case INTEL_PT_BIP:
intel_pt_bip(decoder);
break;
case INTEL_PT_BEP:
decoder->state.type = INTEL_PT_BLK_ITEMS;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
return 0;
case INTEL_PT_BEP_IP:
err = intel_pt_get_next_packet(decoder);
if (err)
return err;
if (decoder->packet.type == INTEL_PT_FUP) {
decoder->set_fup_bep = true;
no_tip = true;
} else {
intel_pt_log_at("ERROR: Missing FUP after BEP",
decoder->pos);
}
goto next;
default:
return intel_pt_bug(decoder);
}
......@@ -2085,6 +2181,10 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
case INTEL_PT_MWAIT:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
case INTEL_PT_BBP:
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
intel_pt_log("ERROR: Unexpected packet\n");
err = -ENOENT;
goto out;
......@@ -2291,6 +2391,10 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
case INTEL_PT_MWAIT:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
case INTEL_PT_BBP:
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
default:
break;
}
......@@ -2306,6 +2410,7 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
decoder->set_fup_mwait = false;
decoder->set_fup_pwre = false;
decoder->set_fup_exstop = false;
decoder->set_fup_bep = false;
if (!decoder->branch_enable) {
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
......@@ -2641,11 +2746,12 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc,
size_t *rem)
{
enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX;
struct intel_pt_pkt packet;
int ret;
while (len) {
ret = intel_pt_get_packet(buf, len, &packet);
ret = intel_pt_get_packet(buf, len, &packet, &ctx);
if (ret <= 0)
return false;
if (packet.type == INTEL_PT_TSC) {
......
......@@ -30,6 +30,7 @@ enum intel_pt_sample_type {
INTEL_PT_CBR_CHG = 1 << 8,
INTEL_PT_TRACE_BEGIN = 1 << 9,
INTEL_PT_TRACE_END = 1 << 10,
INTEL_PT_BLK_ITEMS = 1 << 11,
};
enum intel_pt_period_type {
......@@ -61,6 +62,141 @@ enum intel_pt_param_flags {
INTEL_PT_FUP_WITH_NLIP = 1 << 0,
};
enum intel_pt_blk_type {
INTEL_PT_GP_REGS = 1,
INTEL_PT_PEBS_BASIC = 4,
INTEL_PT_PEBS_MEM = 5,
INTEL_PT_LBR_0 = 8,
INTEL_PT_LBR_1 = 9,
INTEL_PT_LBR_2 = 10,
INTEL_PT_XMM = 16,
INTEL_PT_BLK_TYPE_MAX
};
/*
* The block type numbers are not sequential but here they are given sequential
* positions to avoid wasting space for array placement.
*/
enum intel_pt_blk_type_pos {
INTEL_PT_GP_REGS_POS,
INTEL_PT_PEBS_BASIC_POS,
INTEL_PT_PEBS_MEM_POS,
INTEL_PT_LBR_0_POS,
INTEL_PT_LBR_1_POS,
INTEL_PT_LBR_2_POS,
INTEL_PT_XMM_POS,
INTEL_PT_BLK_TYPE_CNT
};
/* Get the array position for a block type */
static inline int intel_pt_blk_type_pos(enum intel_pt_blk_type blk_type)
{
#define BLK_TYPE(bt) [INTEL_PT_##bt] = INTEL_PT_##bt##_POS + 1
const int map[INTEL_PT_BLK_TYPE_MAX] = {
BLK_TYPE(GP_REGS),
BLK_TYPE(PEBS_BASIC),
BLK_TYPE(PEBS_MEM),
BLK_TYPE(LBR_0),
BLK_TYPE(LBR_1),
BLK_TYPE(LBR_2),
BLK_TYPE(XMM),
};
#undef BLK_TYPE
return blk_type < INTEL_PT_BLK_TYPE_MAX ? map[blk_type] - 1 : -1;
}
#define INTEL_PT_BLK_ITEM_ID_CNT 32
/*
* Use unions so that the block items can be accessed by name or by array index.
* There is an array of 32-bit masks for each block type, which indicate which
* values are present. Then arrays of 32 64-bit values for each block type.
*/
struct intel_pt_blk_items {
union {
uint32_t mask[INTEL_PT_BLK_TYPE_CNT];
struct {
uint32_t has_rflags:1;
uint32_t has_rip:1;
uint32_t has_rax:1;
uint32_t has_rcx:1;
uint32_t has_rdx:1;
uint32_t has_rbx:1;
uint32_t has_rsp:1;
uint32_t has_rbp:1;
uint32_t has_rsi:1;
uint32_t has_rdi:1;
uint32_t has_r8:1;
uint32_t has_r9:1;
uint32_t has_r10:1;
uint32_t has_r11:1;
uint32_t has_r12:1;
uint32_t has_r13:1;
uint32_t has_r14:1;
uint32_t has_r15:1;
uint32_t has_unused_0:14;
uint32_t has_ip:1;
uint32_t has_applicable_counters:1;
uint32_t has_timestamp:1;
uint32_t has_unused_1:29;
uint32_t has_mem_access_address:1;
uint32_t has_mem_aux_info:1;
uint32_t has_mem_access_latency:1;
uint32_t has_tsx_aux_info:1;
uint32_t has_unused_2:28;
uint32_t has_lbr_0;
uint32_t has_lbr_1;
uint32_t has_lbr_2;
uint32_t has_xmm;
};
};
union {
uint64_t val[INTEL_PT_BLK_TYPE_CNT][INTEL_PT_BLK_ITEM_ID_CNT];
struct {
struct {
uint64_t rflags;
uint64_t rip;
uint64_t rax;
uint64_t rcx;
uint64_t rdx;
uint64_t rbx;
uint64_t rsp;
uint64_t rbp;
uint64_t rsi;
uint64_t rdi;
uint64_t r8;
uint64_t r9;
uint64_t r10;
uint64_t r11;
uint64_t r12;
uint64_t r13;
uint64_t r14;
uint64_t r15;
uint64_t unused_0[INTEL_PT_BLK_ITEM_ID_CNT - 18];
};
struct {
uint64_t ip;
uint64_t applicable_counters;
uint64_t timestamp;
uint64_t unused_1[INTEL_PT_BLK_ITEM_ID_CNT - 3];
};
struct {
uint64_t mem_access_address;
uint64_t mem_aux_info;
uint64_t mem_access_latency;
uint64_t tsx_aux_info;
uint64_t unused_2[INTEL_PT_BLK_ITEM_ID_CNT - 4];
};
uint64_t lbr_0[INTEL_PT_BLK_ITEM_ID_CNT];
uint64_t lbr_1[INTEL_PT_BLK_ITEM_ID_CNT];
uint64_t lbr_2[INTEL_PT_BLK_ITEM_ID_CNT];
uint64_t xmm[INTEL_PT_BLK_ITEM_ID_CNT];
};
};
bool is_32_bit;
};
struct intel_pt_state {
enum intel_pt_sample_type type;
int err;
......@@ -81,6 +217,7 @@ struct intel_pt_state {
enum intel_pt_insn_op insn_op;
int insn_len;
char insn[INTEL_PT_INSN_BUF_SZ];
struct intel_pt_blk_items items;
};
struct intel_pt_insn;
......
......@@ -62,6 +62,10 @@ static const char * const packet_name[] = {
[INTEL_PT_MWAIT] = "MWAIT",
[INTEL_PT_PWRE] = "PWRE",
[INTEL_PT_PWRX] = "PWRX",
[INTEL_PT_BBP] = "BBP",
[INTEL_PT_BIP] = "BIP",
[INTEL_PT_BEP] = "BEP",
[INTEL_PT_BEP_IP] = "BEP",
};
const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
......@@ -280,6 +284,55 @@ static int intel_pt_get_pwrx(const unsigned char *buf, size_t len,
return 7;
}
static int intel_pt_get_bbp(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
if (len < 3)
return INTEL_PT_NEED_MORE_BYTES;
packet->type = INTEL_PT_BBP;
packet->count = buf[2] >> 7;
packet->payload = buf[2] & 0x1f;
return 3;
}
static int intel_pt_get_bip_4(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
if (len < 5)
return INTEL_PT_NEED_MORE_BYTES;
packet->type = INTEL_PT_BIP;
packet->count = buf[0] >> 3;
memcpy_le64(&packet->payload, buf + 1, 4);
return 5;
}
static int intel_pt_get_bip_8(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
if (len < 9)
return INTEL_PT_NEED_MORE_BYTES;
packet->type = INTEL_PT_BIP;
packet->count = buf[0] >> 3;
memcpy_le64(&packet->payload, buf + 1, 8);
return 9;
}
static int intel_pt_get_bep(size_t len, struct intel_pt_pkt *packet)
{
if (len < 2)
return INTEL_PT_NEED_MORE_BYTES;
packet->type = INTEL_PT_BEP;
return 2;
}
static int intel_pt_get_bep_ip(size_t len, struct intel_pt_pkt *packet)
{
if (len < 2)
return INTEL_PT_NEED_MORE_BYTES;
packet->type = INTEL_PT_BEP_IP;
return 2;
}
static int intel_pt_get_ext(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
......@@ -320,6 +373,12 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len,
return intel_pt_get_pwre(buf, len, packet);
case 0xA2: /* PWRX */
return intel_pt_get_pwrx(buf, len, packet);
case 0x63: /* BBP */
return intel_pt_get_bbp(buf, len, packet);
case 0x33: /* BEP no IP */
return intel_pt_get_bep(len, packet);
case 0xb3: /* BEP with IP */
return intel_pt_get_bep_ip(len, packet);
default:
return INTEL_PT_BAD_PACKET;
}
......@@ -468,7 +527,8 @@ static int intel_pt_get_mtc(const unsigned char *buf, size_t len,
}
static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
struct intel_pt_pkt *packet,
enum intel_pt_pkt_ctx ctx)
{
unsigned int byte;
......@@ -478,6 +538,22 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
return INTEL_PT_NEED_MORE_BYTES;
byte = buf[0];
switch (ctx) {
case INTEL_PT_NO_CTX:
break;
case INTEL_PT_BLK_4_CTX:
if ((byte & 0x7) == 4)
return intel_pt_get_bip_4(buf, len, packet);
break;
case INTEL_PT_BLK_8_CTX:
if ((byte & 0x7) == 4)
return intel_pt_get_bip_8(buf, len, packet);
break;
default:
break;
};
if (!(byte & BIT(0))) {
if (byte == 0)
return intel_pt_get_pad(packet);
......@@ -516,15 +592,65 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
}
}
void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet,
enum intel_pt_pkt_ctx *ctx)
{
switch (packet->type) {
case INTEL_PT_BAD:
case INTEL_PT_PAD:
case INTEL_PT_TSC:
case INTEL_PT_TMA:
case INTEL_PT_MTC:
case INTEL_PT_FUP:
case INTEL_PT_CYC:
case INTEL_PT_CBR:
case INTEL_PT_MNT:
case INTEL_PT_EXSTOP:
case INTEL_PT_EXSTOP_IP:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
case INTEL_PT_BIP:
break;
case INTEL_PT_TNT:
case INTEL_PT_TIP:
case INTEL_PT_TIP_PGD:
case INTEL_PT_TIP_PGE:
case INTEL_PT_MODE_EXEC:
case INTEL_PT_MODE_TSX:
case INTEL_PT_PIP:
case INTEL_PT_OVF:
case INTEL_PT_VMCS:
case INTEL_PT_TRACESTOP:
case INTEL_PT_PSB:
case INTEL_PT_PSBEND:
case INTEL_PT_PTWRITE:
case INTEL_PT_PTWRITE_IP:
case INTEL_PT_MWAIT:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
*ctx = INTEL_PT_NO_CTX;
break;
case INTEL_PT_BBP:
if (packet->count)
*ctx = INTEL_PT_BLK_4_CTX;
else
*ctx = INTEL_PT_BLK_8_CTX;
break;
default:
break;
}
}
int intel_pt_get_packet(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
struct intel_pt_pkt *packet, enum intel_pt_pkt_ctx *ctx)
{
int ret;
ret = intel_pt_do_get_packet(buf, len, packet);
ret = intel_pt_do_get_packet(buf, len, packet, *ctx);
if (ret > 0) {
while (ret < 8 && len > (size_t)ret && !buf[ret])
ret += 1;
intel_pt_upd_pkt_ctx(packet, ctx);
}
return ret;
}
......@@ -602,8 +728,10 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload);
case INTEL_PT_PTWRITE_IP:
return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload);
case INTEL_PT_BEP:
case INTEL_PT_EXSTOP:
return snprintf(buf, buf_len, "%s IP:0", name);
case INTEL_PT_BEP_IP:
case INTEL_PT_EXSTOP_IP:
return snprintf(buf, buf_len, "%s IP:1", name);
case INTEL_PT_MWAIT:
......@@ -621,6 +749,12 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
(unsigned int)((payload >> 4) & 0xf),
(unsigned int)(payload & 0xf),
(unsigned int)((payload >> 8) & 0xf));
case INTEL_PT_BBP:
return snprintf(buf, buf_len, "%s SZ %s-byte Type 0x%llx",
name, packet->count ? "4" : "8", payload);
case INTEL_PT_BIP:
return snprintf(buf, buf_len, "%s ID 0x%02x Value 0x%llx",
name, packet->count, payload);
default:
break;
}
......
......@@ -50,6 +50,10 @@ enum intel_pt_pkt_type {
INTEL_PT_MWAIT,
INTEL_PT_PWRE,
INTEL_PT_PWRX,
INTEL_PT_BBP,
INTEL_PT_BIP,
INTEL_PT_BEP,
INTEL_PT_BEP_IP,
};
struct intel_pt_pkt {
......@@ -58,10 +62,25 @@ struct intel_pt_pkt {
uint64_t payload;
};
/*
* Decoding of BIP packets conflicts with single-byte TNT packets. Since BIP
* packets only occur in the context of a block (i.e. between BBP and BEP), that
* context must be recorded and passed to the packet decoder.
*/
enum intel_pt_pkt_ctx {
INTEL_PT_NO_CTX, /* BIP packets are invalid */
INTEL_PT_BLK_4_CTX, /* 4-byte BIP packets */
INTEL_PT_BLK_8_CTX, /* 8-byte BIP packets */
};
const char *intel_pt_pkt_name(enum intel_pt_pkt_type);
int intel_pt_get_packet(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet);
struct intel_pt_pkt *packet,
enum intel_pt_pkt_ctx *ctx);
void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet,
enum intel_pt_pkt_ctx *ctx);
int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t len);
......
......@@ -35,6 +35,8 @@
#include "config.h"
#include "time-utils.h"
#include "../arch/x86/include/uapi/asm/perf_regs.h"
#include "intel-pt-decoder/intel-pt-log.h"
#include "intel-pt-decoder/intel-pt-decoder.h"
#include "intel-pt-decoder/intel-pt-insn-decoder.h"
......@@ -101,6 +103,9 @@ struct intel_pt {
u64 pwrx_id;
u64 cbr_id;
bool sample_pebs;
struct perf_evsel *pebs_evsel;
u64 tsc_bit;
u64 mtc_bit;
u64 mtc_freq_bits;
......@@ -177,13 +182,14 @@ static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
int ret, pkt_len, i;
char desc[INTEL_PT_PKT_DESC_MAX];
const char *color = PERF_COLOR_BLUE;
enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX;
color_fprintf(stdout, color,
". ... Intel Processor Trace data: size %zu bytes\n",
len);
while (len) {
ret = intel_pt_get_packet(buf, len, &packet);
ret = intel_pt_get_packet(buf, len, &packet, &ctx);
if (ret > 0)
pkt_len = ret;
else
......@@ -1178,28 +1184,37 @@ static inline bool intel_pt_skip_event(struct intel_pt *pt)
pt->num_events++ < pt->synth_opts.initial_skip;
}
static void intel_pt_prep_a_sample(struct intel_pt_queue *ptq,
union perf_event *event,
struct perf_sample *sample)
{
event->sample.header.type = PERF_RECORD_SAMPLE;
event->sample.header.size = sizeof(struct perf_event_header);
sample->pid = ptq->pid;
sample->tid = ptq->tid;
sample->cpu = ptq->cpu;
sample->insn_len = ptq->insn_len;
memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
}
static void intel_pt_prep_b_sample(struct intel_pt *pt,
struct intel_pt_queue *ptq,
union perf_event *event,
struct perf_sample *sample)
{
intel_pt_prep_a_sample(ptq, event, sample);
if (!pt->timeless_decoding)
sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
sample->ip = ptq->state->from_ip;
sample->cpumode = intel_pt_cpumode(pt, sample->ip);
sample->pid = ptq->pid;
sample->tid = ptq->tid;
sample->addr = ptq->state->to_ip;
sample->period = 1;
sample->cpu = ptq->cpu;
sample->flags = ptq->flags;
sample->insn_len = ptq->insn_len;
memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
event->sample.header.type = PERF_RECORD_SAMPLE;
event->sample.header.misc = sample->cpumode;
event->sample.header.size = sizeof(struct perf_event_header);
}
static int intel_pt_inject_event(union perf_event *event,
......@@ -1534,6 +1549,261 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
pt->pwr_events_sample_type);
}
/*
* PEBS gp_regs array indexes plus 1 so that 0 means not present. Refer
* intel_pt_add_gp_regs().
*/
static const int pebs_gp_regs[] = {
[PERF_REG_X86_FLAGS] = 1,
[PERF_REG_X86_IP] = 2,
[PERF_REG_X86_AX] = 3,
[PERF_REG_X86_CX] = 4,
[PERF_REG_X86_DX] = 5,
[PERF_REG_X86_BX] = 6,
[PERF_REG_X86_SP] = 7,
[PERF_REG_X86_BP] = 8,
[PERF_REG_X86_SI] = 9,
[PERF_REG_X86_DI] = 10,
[PERF_REG_X86_R8] = 11,
[PERF_REG_X86_R9] = 12,
[PERF_REG_X86_R10] = 13,
[PERF_REG_X86_R11] = 14,
[PERF_REG_X86_R12] = 15,
[PERF_REG_X86_R13] = 16,
[PERF_REG_X86_R14] = 17,
[PERF_REG_X86_R15] = 18,
};
static u64 *intel_pt_add_gp_regs(struct regs_dump *intr_regs, u64 *pos,
const struct intel_pt_blk_items *items,
u64 regs_mask)
{
const u64 *gp_regs = items->val[INTEL_PT_GP_REGS_POS];
u32 mask = items->mask[INTEL_PT_GP_REGS_POS];
u32 bit;
int i;
for (i = 0, bit = 1; i < PERF_REG_X86_64_MAX; i++, bit <<= 1) {
/* Get the PEBS gp_regs array index */
int n = pebs_gp_regs[i] - 1;
if (n < 0)
continue;
/*
* Add only registers that were requested (i.e. 'regs_mask') and
* that were provided (i.e. 'mask'), and update the resulting
* mask (i.e. 'intr_regs->mask') accordingly.
*/
if (mask & 1 << n && regs_mask & bit) {
intr_regs->mask |= bit;
*pos++ = gp_regs[n];
}
}
return pos;
}
#ifndef PERF_REG_X86_XMM0
#define PERF_REG_X86_XMM0 32
#endif
static void intel_pt_add_xmm(struct regs_dump *intr_regs, u64 *pos,
const struct intel_pt_blk_items *items,
u64 regs_mask)
{
u32 mask = items->has_xmm & (regs_mask >> PERF_REG_X86_XMM0);
const u64 *xmm = items->xmm;
/*
* If there are any XMM registers, then there should be all of them.
* Nevertheless, follow the logic to add only registers that were
* requested (i.e. 'regs_mask') and that were provided (i.e. 'mask'),
* and update the resulting mask (i.e. 'intr_regs->mask') accordingly.
*/
intr_regs->mask |= (u64)mask << PERF_REG_X86_XMM0;
for (; mask; mask >>= 1, xmm++) {
if (mask & 1)
*pos++ = *xmm;
}
}
#define LBR_INFO_MISPRED (1ULL << 63)
#define LBR_INFO_IN_TX (1ULL << 62)
#define LBR_INFO_ABORT (1ULL << 61)
#define LBR_INFO_CYCLES 0xffff
/* Refer kernel's intel_pmu_store_pebs_lbrs() */
static u64 intel_pt_lbr_flags(u64 info)
{
union {
struct branch_flags flags;
u64 result;
} u = {
.flags = {
.mispred = !!(info & LBR_INFO_MISPRED),
.predicted = !(info & LBR_INFO_MISPRED),
.in_tx = !!(info & LBR_INFO_IN_TX),
.abort = !!(info & LBR_INFO_ABORT),
.cycles = info & LBR_INFO_CYCLES,
}
};
return u.result;
}
static void intel_pt_add_lbrs(struct branch_stack *br_stack,
const struct intel_pt_blk_items *items)
{
u64 *to;
int i;
br_stack->nr = 0;
to = &br_stack->entries[0].from;
for (i = INTEL_PT_LBR_0_POS; i <= INTEL_PT_LBR_2_POS; i++) {
u32 mask = items->mask[i];
const u64 *from = items->val[i];
for (; mask; mask >>= 3, from += 3) {
if ((mask & 7) == 7) {
*to++ = from[0];
*to++ = from[1];
*to++ = intel_pt_lbr_flags(from[2]);
br_stack->nr += 1;
}
}
}
}
/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */
#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3)
static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
{
const struct intel_pt_blk_items *items = &ptq->state->items;
struct perf_sample sample = { .ip = 0, };
union perf_event *event = ptq->event_buf;
struct intel_pt *pt = ptq->pt;
struct perf_evsel *evsel = pt->pebs_evsel;
u64 sample_type = evsel->attr.sample_type;
u64 id = evsel->id[0];
u8 cpumode;
if (intel_pt_skip_event(pt))
return 0;
intel_pt_prep_a_sample(ptq, event, &sample);
sample.id = id;
sample.stream_id = id;
if (!evsel->attr.freq)
sample.period = evsel->attr.sample_period;
/* No support for non-zero CS base */
if (items->has_ip)
sample.ip = items->ip;
else if (items->has_rip)
sample.ip = items->rip;
else
sample.ip = ptq->state->from_ip;
/* No support for guest mode at this time */
cpumode = sample.ip < ptq->pt->kernel_start ?
PERF_RECORD_MISC_USER :
PERF_RECORD_MISC_KERNEL;
event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP;
sample.cpumode = cpumode;
if (sample_type & PERF_SAMPLE_TIME) {
u64 timestamp = 0;
if (items->has_timestamp)
timestamp = items->timestamp;
else if (!pt->timeless_decoding)
timestamp = ptq->timestamp;
if (timestamp)
sample.time = tsc_to_perf_time(timestamp, &pt->tc);
}
if (sample_type & PERF_SAMPLE_CALLCHAIN &&
pt->synth_opts.callchain) {
thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
pt->synth_opts.callchain_sz, sample.ip,
pt->kernel_start);
sample.callchain = ptq->chain;
}
if (sample_type & PERF_SAMPLE_REGS_INTR &&
items->mask[INTEL_PT_GP_REGS_POS]) {
u64 regs[sizeof(sample.intr_regs.mask)];
u64 regs_mask = evsel->attr.sample_regs_intr;
u64 *pos;
sample.intr_regs.abi = items->is_32_bit ?
PERF_SAMPLE_REGS_ABI_32 :
PERF_SAMPLE_REGS_ABI_64;
sample.intr_regs.regs = regs;
pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask);
intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask);
}
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
struct {
struct branch_stack br_stack;
struct branch_entry entries[LBRS_MAX];
} br;
if (items->mask[INTEL_PT_LBR_0_POS] ||
items->mask[INTEL_PT_LBR_1_POS] ||
items->mask[INTEL_PT_LBR_2_POS]) {
intel_pt_add_lbrs(&br.br_stack, items);
sample.branch_stack = &br.br_stack;
} else if (pt->synth_opts.last_branch) {
intel_pt_copy_last_branch_rb(ptq);
sample.branch_stack = ptq->last_branch;
} else {
br.br_stack.nr = 0;
sample.branch_stack = &br.br_stack;
}
}
if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address)
sample.addr = items->mem_access_address;
if (sample_type & PERF_SAMPLE_WEIGHT) {
/*
* Refer kernel's setup_pebs_adaptive_sample_data() and
* intel_hsw_weight().
*/
if (items->has_mem_access_latency)
sample.weight = items->mem_access_latency;
if (!sample.weight && items->has_tsx_aux_info) {
/* Cycles last block */
sample.weight = (u32)items->tsx_aux_info;
}
}
if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) {
u64 ax = items->has_rax ? items->rax : 0;
/* Refer kernel's intel_hsw_transaction() */
u64 txn = (u8)(items->tsx_aux_info >> 32);
/* For RTM XABORTs also log the abort code from AX */
if (txn & PERF_TXN_TRANSACTION && ax & 1)
txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
sample.transaction = txn;
}
return intel_pt_deliver_synth_event(pt, ptq, event, &sample, sample_type);
}
static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
pid_t pid, pid_t tid, u64 ip, u64 timestamp)
{
......@@ -1621,6 +1891,16 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;
}
/*
* Do PEBS first to allow for the possibility that the PEBS timestamp
* precedes the current timestamp.
*/
if (pt->sample_pebs && state->type & INTEL_PT_BLK_ITEMS) {
err = intel_pt_synth_pebs_sample(ptq);
if (err)
return err;
}
if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) {
if (state->type & INTEL_PT_CBR_CHG) {
err = intel_pt_synth_cbr_sample(ptq);
......
......@@ -709,9 +709,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
{
int i;
struct pmu_events_map *map;
struct pmu_event *pe;
const char *name = pmu->name;
const char *pname;
map = perf_pmu__find_map(pmu);
if (!map)
......@@ -722,28 +720,26 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
*/
i = 0;
while (1) {
const char *cpu_name = is_arm_pmu_core(name) ? name : "cpu";
struct pmu_event *pe = &map->table[i++];
const char *pname = pe->pmu ? pe->pmu : cpu_name;
pe = &map->table[i++];
if (!pe->name) {
if (pe->metric_group || pe->metric_name)
continue;
break;
}
if (!is_arm_pmu_core(name)) {
pname = pe->pmu ? pe->pmu : "cpu";
/*
* uncore alias may be from different PMU
* with common prefix
*/
if (pmu_is_uncore(name) &&
!strncmp(pname, name, strlen(pname)))
goto new_alias;
/*
* uncore alias may be from different PMU
* with common prefix
*/
if (pmu_is_uncore(name) &&
!strncmp(pname, name, strlen(pname)))
goto new_alias;
if (strcmp(pname, name))
continue;
}
if (strcmp(pname, name))
continue;
new_alias:
/* need type casts to override 'const' */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment