Commit 8e70e840 authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo-4.13-20170621' of...

Merge tag 'perf-core-for-mingo-4.13-20170621' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New features:

 - Add support to measure SMI cost in 'perf stat' (Kan Liang)

 - Add support for unwinding callchains in powerpc with libdw (Paolo Bonzini)

Fixes:

 - Fix message: cpu list option is -C not -c (Adrian Hunter)

 - Fix 'perf script' message: field list option is -F not -f (Adrian Hunter)

 - Intel PT fixes: (Adrian Hunter)

   o Fix missing stack clear
   o Ensure IP is zero when state is INTEL_PT_STATE_NO_IP
   o Fix last_ip usage
   o Ensure never to set 'last_ip' when packet 'count' is zero
   o Clear FUP flag on error
   o Fix transactions_sample_type

Infrastructure changes:

 - Intel PT cleanups/refactorings (Adrian Hunter)

   o Use FUP always when scanning for an IP
   o Add missing __fallthrough
   o Remove redundant initial_skip checks
   o Allow decoding with branch tracing disabled
   o Add default config for pass-through branch enable
   o Add documentation for new config terms
   o Add decoder support for ptwrite and power event packets
   o Add reserved byte to CBR packet payload
   o Add decoder support for CBR events

 - Move  find_process() to the only place that uses it, skimming some
   more fat from util.[ch] (Arnaldo Carvalho de Melo)

 - Do parameter validation earlier on fetch_kernel_version() (Arnaldo Carvalho de Melo)

 - Remove unused _ALL_SOURCE define (Arnaldo Carvalho de Melo)

 - Add sysfs__write_int function (Kan Liang)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 007b811b 701516ae
...@@ -387,6 +387,22 @@ int filename__read_str(const char *filename, char **buf, size_t *sizep) ...@@ -387,6 +387,22 @@ int filename__read_str(const char *filename, char **buf, size_t *sizep)
return err; return err;
} }
int filename__write_int(const char *filename, int value)
{
int fd = open(filename, O_WRONLY), err = -1;
char buf[64];
if (fd < 0)
return err;
sprintf(buf, "%d", value);
if (write(fd, buf, sizeof(buf)) == sizeof(buf))
err = 0;
close(fd);
return err;
}
int procfs__read_str(const char *entry, char **buf, size_t *sizep) int procfs__read_str(const char *entry, char **buf, size_t *sizep)
{ {
char path[PATH_MAX]; char path[PATH_MAX];
...@@ -480,3 +496,17 @@ int sysctl__read_int(const char *sysctl, int *value) ...@@ -480,3 +496,17 @@ int sysctl__read_int(const char *sysctl, int *value)
return filename__read_int(path, value); return filename__read_int(path, value);
} }
int sysfs__write_int(const char *entry, int value)
{
char path[PATH_MAX];
const char *sysfs = sysfs__mountpoint();
if (!sysfs)
return -1;
if (snprintf(path, sizeof(path), "%s/%s", sysfs, entry) >= PATH_MAX)
return -1;
return filename__write_int(path, value);
}
...@@ -31,6 +31,8 @@ int filename__read_int(const char *filename, int *value); ...@@ -31,6 +31,8 @@ int filename__read_int(const char *filename, int *value);
int filename__read_ull(const char *filename, unsigned long long *value); int filename__read_ull(const char *filename, unsigned long long *value);
int filename__read_str(const char *filename, char **buf, size_t *sizep); int filename__read_str(const char *filename, char **buf, size_t *sizep);
int filename__write_int(const char *filename, int value);
int procfs__read_str(const char *entry, char **buf, size_t *sizep); int procfs__read_str(const char *entry, char **buf, size_t *sizep);
int sysctl__read_int(const char *sysctl, int *value); int sysctl__read_int(const char *sysctl, int *value);
...@@ -38,4 +40,6 @@ int sysfs__read_int(const char *entry, int *value); ...@@ -38,4 +40,6 @@ int sysfs__read_int(const char *entry, int *value);
int sysfs__read_ull(const char *entry, unsigned long long *value); int sysfs__read_ull(const char *entry, unsigned long long *value);
int sysfs__read_str(const char *entry, char **buf, size_t *sizep); int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
int sysfs__read_bool(const char *entry, bool *value); int sysfs__read_bool(const char *entry, bool *value);
int sysfs__write_int(const char *entry, int value);
#endif /* __API_FS__ */ #endif /* __API_FS__ */
...@@ -364,6 +364,42 @@ cyc_thresh Specifies how frequently CYC packets are produced - see cyc ...@@ -364,6 +364,42 @@ cyc_thresh Specifies how frequently CYC packets are produced - see cyc
CYC packets are not requested by default. CYC packets are not requested by default.
pt Specifies pass-through which enables the 'branch' config term.
The default config selects 'pt' if it is available, so a user will
never need to specify this term.
branch Enable branch tracing. Branch tracing is enabled by default so to
disable branch tracing use 'branch=0'.
The default config selects 'branch' if it is available.
ptw Enable PTWRITE packets which are produced when a ptwrite instruction
is executed.
Support for this feature is indicated by:
/sys/bus/event_source/devices/intel_pt/caps/ptwrite
which contains "1" if the feature is supported and
"0" otherwise.
fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet
provides the address of the ptwrite instruction. In the absence of
fup_on_ptw, the decoder will use the address of the previous branch
if branch tracing is enabled, otherwise the address will be zero.
Note that fup_on_ptw will work even when branch tracing is disabled.
pwr_evt Enable power events. The power events provide information about
changes to the CPU C-state.
Support for this feature is indicated by:
/sys/bus/event_source/devices/intel_pt/caps/power_event_trace
which contains "1" if the feature is supported and
"0" otherwise.
new snapshot option new snapshot option
------------------- -------------------
......
...@@ -239,6 +239,20 @@ taskset. ...@@ -239,6 +239,20 @@ taskset.
--no-merge:: --no-merge::
Do not merge results from same PMUs. Do not merge results from same PMUs.
--smi-cost::
Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
During the measurement, the /sys/device/cpu/freeze_on_smi will be set to
freeze core counters on SMI.
The aperf counter will not be effected by the setting.
The cost of SMI can be measured by (aperf - unhalted core cycles).
In practice, the percentages of SMI cycles is very useful for performance
oriented analysis. --metric_only will be applied by default.
The output is SMI cycles%, equals to (aperf - unhalted core cycles) / aperf
Users who wants to get the actual value can apply --no-metric-only.
EXAMPLES EXAMPLES
-------- --------
......
...@@ -61,7 +61,7 @@ endif ...@@ -61,7 +61,7 @@ endif
# Disable it on all other architectures in case libdw unwind # Disable it on all other architectures in case libdw unwind
# support is detected in system. Add supported architectures # support is detected in system. Add supported architectures
# to the check. # to the check.
ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm)) ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm powerpc))
NO_LIBDW_DWARF_UNWIND := 1 NO_LIBDW_DWARF_UNWIND := 1
endif endif
......
...@@ -5,4 +5,6 @@ libperf-y += perf_regs.o ...@@ -5,4 +5,6 @@ libperf-y += perf_regs.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_DWARF) += skip-callchain-idx.o libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
#include <elfutils/libdwfl.h>
#include "../../util/unwind-libdw.h"
#include "../../util/perf_regs.h"
#include "../../util/event.h"
/* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils. */
static const int special_regs[3][2] = {
{ 65, PERF_REG_POWERPC_LINK },
{ 101, PERF_REG_POWERPC_XER },
{ 109, PERF_REG_POWERPC_CTR },
};
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
struct unwind_info *ui = arg;
struct regs_dump *user_regs = &ui->sample->user_regs;
Dwarf_Word dwarf_regs[32], dwarf_nip;
size_t i;
#define REG(r) ({ \
Dwarf_Word val = 0; \
perf_reg_value(&val, user_regs, PERF_REG_POWERPC_##r); \
val; \
})
dwarf_regs[0] = REG(R0);
dwarf_regs[1] = REG(R1);
dwarf_regs[2] = REG(R2);
dwarf_regs[3] = REG(R3);
dwarf_regs[4] = REG(R4);
dwarf_regs[5] = REG(R5);
dwarf_regs[6] = REG(R6);
dwarf_regs[7] = REG(R7);
dwarf_regs[8] = REG(R8);
dwarf_regs[9] = REG(R9);
dwarf_regs[10] = REG(R10);
dwarf_regs[11] = REG(R11);
dwarf_regs[12] = REG(R12);
dwarf_regs[13] = REG(R13);
dwarf_regs[14] = REG(R14);
dwarf_regs[15] = REG(R15);
dwarf_regs[16] = REG(R16);
dwarf_regs[17] = REG(R17);
dwarf_regs[18] = REG(R18);
dwarf_regs[19] = REG(R19);
dwarf_regs[20] = REG(R20);
dwarf_regs[21] = REG(R21);
dwarf_regs[22] = REG(R22);
dwarf_regs[23] = REG(R23);
dwarf_regs[24] = REG(R24);
dwarf_regs[25] = REG(R25);
dwarf_regs[26] = REG(R26);
dwarf_regs[27] = REG(R27);
dwarf_regs[28] = REG(R28);
dwarf_regs[29] = REG(R29);
dwarf_regs[30] = REG(R30);
dwarf_regs[31] = REG(R31);
if (!dwfl_thread_state_registers(thread, 0, 32, dwarf_regs))
return false;
dwarf_nip = REG(NIP);
dwfl_thread_state_register_pc(thread, dwarf_nip);
for (i = 0; i < ARRAY_SIZE(special_regs); i++) {
Dwarf_Word val = 0;
perf_reg_value(&val, user_regs, special_regs[i][1]);
if (!dwfl_thread_state_registers(thread,
special_regs[i][0], 1,
&val))
return false;
}
return true;
}
...@@ -192,6 +192,7 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) ...@@ -192,6 +192,7 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
int psb_cyc, psb_periods, psb_period; int psb_cyc, psb_periods, psb_period;
int pos = 0; int pos = 0;
u64 config; u64 config;
char c;
pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc"); pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc");
...@@ -225,6 +226,10 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) ...@@ -225,6 +226,10 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
} }
} }
if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 &&
perf_pmu__scan_file(intel_pt_pmu, "format/branch", "%c", &c) == 1)
pos += scnprintf(buf + pos, sizeof(buf) - pos, ",pt,branch");
pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf); pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf);
intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config); intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config);
......
...@@ -385,7 +385,7 @@ static int perf_session__check_output_opt(struct perf_session *session) ...@@ -385,7 +385,7 @@ static int perf_session__check_output_opt(struct perf_session *session)
*/ */
if (!evsel && output[j].user_set && !output[j].wildcard_set) { if (!evsel && output[j].user_set && !output[j].wildcard_set) {
pr_err("%s events do not exist. " pr_err("%s events do not exist. "
"Remove corresponding -f option to proceed.\n", "Remove corresponding -F option to proceed.\n",
event_type(j)); event_type(j));
return -1; return -1;
} }
......
...@@ -86,6 +86,7 @@ ...@@ -86,6 +86,7 @@
#define DEFAULT_SEPARATOR " " #define DEFAULT_SEPARATOR " "
#define CNTR_NOT_SUPPORTED "<not supported>" #define CNTR_NOT_SUPPORTED "<not supported>"
#define CNTR_NOT_COUNTED "<not counted>" #define CNTR_NOT_COUNTED "<not counted>"
#define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi"
static void print_counters(struct timespec *ts, int argc, const char **argv); static void print_counters(struct timespec *ts, int argc, const char **argv);
...@@ -122,6 +123,14 @@ static const char * topdown_attrs[] = { ...@@ -122,6 +123,14 @@ static const char * topdown_attrs[] = {
NULL, NULL,
}; };
static const char *smi_cost_attrs = {
"{"
"msr/aperf/,"
"msr/smi/,"
"cycles"
"}"
};
static struct perf_evlist *evsel_list; static struct perf_evlist *evsel_list;
static struct target target = { static struct target target = {
...@@ -137,6 +146,8 @@ static bool null_run = false; ...@@ -137,6 +146,8 @@ static bool null_run = false;
static int detailed_run = 0; static int detailed_run = 0;
static bool transaction_run; static bool transaction_run;
static bool topdown_run = false; static bool topdown_run = false;
static bool smi_cost = false;
static bool smi_reset = false;
static bool big_num = true; static bool big_num = true;
static int big_num_opt = -1; static int big_num_opt = -1;
static const char *csv_sep = NULL; static const char *csv_sep = NULL;
...@@ -1782,6 +1793,8 @@ static const struct option stat_options[] = { ...@@ -1782,6 +1793,8 @@ static const struct option stat_options[] = {
"Only print computed metrics. No raw values", enable_metric_only), "Only print computed metrics. No raw values", enable_metric_only),
OPT_BOOLEAN(0, "topdown", &topdown_run, OPT_BOOLEAN(0, "topdown", &topdown_run,
"measure topdown level 1 statistics"), "measure topdown level 1 statistics"),
OPT_BOOLEAN(0, "smi-cost", &smi_cost,
"measure SMI cost"),
OPT_END() OPT_END()
}; };
...@@ -2160,6 +2173,39 @@ static int add_default_attributes(void) ...@@ -2160,6 +2173,39 @@ static int add_default_attributes(void)
return 0; return 0;
} }
if (smi_cost) {
int smi;
if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
fprintf(stderr, "freeze_on_smi is not supported.\n");
return -1;
}
if (!smi) {
if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
fprintf(stderr, "Failed to set freeze_on_smi.\n");
return -1;
}
smi_reset = true;
}
if (pmu_have_event("msr", "aperf") &&
pmu_have_event("msr", "smi")) {
if (!force_metric_only)
metric_only = true;
err = parse_events(evsel_list, smi_cost_attrs, NULL);
} else {
fprintf(stderr, "To measure SMI cost, it needs "
"msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
return -1;
}
if (err) {
fprintf(stderr, "Cannot set up SMI cost events\n");
return -1;
}
return 0;
}
if (topdown_run) { if (topdown_run) {
char *str = NULL; char *str = NULL;
bool warn = false; bool warn = false;
...@@ -2742,6 +2788,9 @@ int cmd_stat(int argc, const char **argv) ...@@ -2742,6 +2788,9 @@ int cmd_stat(int argc, const char **argv)
perf_stat__exit_aggr_mode(); perf_stat__exit_aggr_mode();
perf_evlist__free_stats(evsel_list); perf_evlist__free_stats(evsel_list);
out: out:
if (smi_cost && smi_reset)
sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
perf_evlist__delete(evsel_list); perf_evlist__delete(evsel_list);
return status; return status;
} }
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <errno.h> #include <errno.h>
#include <inttypes.h> #include <inttypes.h>
#include <linux/bitops.h> #include <linux/bitops.h>
#include <api/fs/fs.h>
#include <api/fs/tracing_path.h> #include <api/fs/tracing_path.h>
#include <traceevent/event-parse.h> #include <traceevent/event-parse.h>
#include <linux/hw_breakpoint.h> #include <linux/hw_breakpoint.h>
...@@ -19,6 +20,8 @@ ...@@ -19,6 +20,8 @@
#include <linux/err.h> #include <linux/err.h>
#include <sys/ioctl.h> #include <sys/ioctl.h>
#include <sys/resource.h> #include <sys/resource.h>
#include <sys/types.h>
#include <dirent.h>
#include "asm/bug.h" #include "asm/bug.h"
#include "callchain.h" #include "callchain.h"
#include "cgroup.h" #include "cgroup.h"
...@@ -2472,6 +2475,42 @@ bool perf_evsel__fallback(struct perf_evsel *evsel, int err, ...@@ -2472,6 +2475,42 @@ bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
return false; return false;
} }
static bool find_process(const char *name)
{
size_t len = strlen(name);
DIR *dir;
struct dirent *d;
int ret = -1;
dir = opendir(procfs__mountpoint());
if (!dir)
return false;
/* Walk through the directory. */
while (ret && (d = readdir(dir)) != NULL) {
char path[PATH_MAX];
char *data;
size_t size;
if ((d->d_type != DT_DIR) ||
!strcmp(".", d->d_name) ||
!strcmp("..", d->d_name))
continue;
scnprintf(path, sizeof(path), "%s/%s/comm",
procfs__mountpoint(), d->d_name);
if (filename__read_str(path, &data, &size))
continue;
ret = strncmp(name, data, len);
free(data);
}
closedir(dir);
return ret ? false : true;
}
int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
int err, char *msg, size_t size) int err, char *msg, size_t size)
{ {
......
...@@ -25,11 +25,18 @@ ...@@ -25,11 +25,18 @@
#define INTEL_PT_IN_TX (1 << 0) #define INTEL_PT_IN_TX (1 << 0)
#define INTEL_PT_ABORT_TX (1 << 1) #define INTEL_PT_ABORT_TX (1 << 1)
#define INTEL_PT_ASYNC (1 << 2) #define INTEL_PT_ASYNC (1 << 2)
#define INTEL_PT_FUP_IP (1 << 3)
enum intel_pt_sample_type { enum intel_pt_sample_type {
INTEL_PT_BRANCH = 1 << 0, INTEL_PT_BRANCH = 1 << 0,
INTEL_PT_INSTRUCTION = 1 << 1, INTEL_PT_INSTRUCTION = 1 << 1,
INTEL_PT_TRANSACTION = 1 << 2, INTEL_PT_TRANSACTION = 1 << 2,
INTEL_PT_PTW = 1 << 3,
INTEL_PT_MWAIT_OP = 1 << 4,
INTEL_PT_PWR_ENTRY = 1 << 5,
INTEL_PT_EX_STOP = 1 << 6,
INTEL_PT_PWR_EXIT = 1 << 7,
INTEL_PT_CBR_CHG = 1 << 8,
}; };
enum intel_pt_period_type { enum intel_pt_period_type {
...@@ -63,6 +70,11 @@ struct intel_pt_state { ...@@ -63,6 +70,11 @@ struct intel_pt_state {
uint64_t timestamp; uint64_t timestamp;
uint64_t est_timestamp; uint64_t est_timestamp;
uint64_t trace_nr; uint64_t trace_nr;
uint64_t ptw_payload;
uint64_t mwait_payload;
uint64_t pwre_payload;
uint64_t pwrx_payload;
uint64_t cbr_payload;
uint32_t flags; uint32_t flags;
enum intel_pt_insn_op insn_op; enum intel_pt_insn_op insn_op;
int insn_len; int insn_len;
...@@ -87,6 +99,7 @@ struct intel_pt_params { ...@@ -87,6 +99,7 @@ struct intel_pt_params {
bool (*pgd_ip)(uint64_t ip, void *data); bool (*pgd_ip)(uint64_t ip, void *data);
void *data; void *data;
bool return_compression; bool return_compression;
bool branch_enable;
uint64_t period; uint64_t period;
enum intel_pt_period_type period_type; enum intel_pt_period_type period_type;
unsigned max_non_turbo_ratio; unsigned max_non_turbo_ratio;
......
...@@ -64,6 +64,13 @@ static const char * const packet_name[] = { ...@@ -64,6 +64,13 @@ static const char * const packet_name[] = {
[INTEL_PT_PIP] = "PIP", [INTEL_PT_PIP] = "PIP",
[INTEL_PT_OVF] = "OVF", [INTEL_PT_OVF] = "OVF",
[INTEL_PT_MNT] = "MNT", [INTEL_PT_MNT] = "MNT",
[INTEL_PT_PTWRITE] = "PTWRITE",
[INTEL_PT_PTWRITE_IP] = "PTWRITE",
[INTEL_PT_EXSTOP] = "EXSTOP",
[INTEL_PT_EXSTOP_IP] = "EXSTOP",
[INTEL_PT_MWAIT] = "MWAIT",
[INTEL_PT_PWRE] = "PWRE",
[INTEL_PT_PWRX] = "PWRX",
}; };
const char *intel_pt_pkt_name(enum intel_pt_pkt_type type) const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
...@@ -123,7 +130,7 @@ static int intel_pt_get_cbr(const unsigned char *buf, size_t len, ...@@ -123,7 +130,7 @@ static int intel_pt_get_cbr(const unsigned char *buf, size_t len,
if (len < 4) if (len < 4)
return INTEL_PT_NEED_MORE_BYTES; return INTEL_PT_NEED_MORE_BYTES;
packet->type = INTEL_PT_CBR; packet->type = INTEL_PT_CBR;
packet->payload = buf[2]; packet->payload = le16_to_cpu(*(uint16_t *)(buf + 2));
return 4; return 4;
} }
...@@ -217,12 +224,80 @@ static int intel_pt_get_3byte(const unsigned char *buf, size_t len, ...@@ -217,12 +224,80 @@ static int intel_pt_get_3byte(const unsigned char *buf, size_t len,
} }
} }
static int intel_pt_get_ptwrite(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
packet->count = (buf[1] >> 5) & 0x3;
packet->type = buf[1] & BIT(7) ? INTEL_PT_PTWRITE_IP :
INTEL_PT_PTWRITE;
switch (packet->count) {
case 0:
if (len < 6)
return INTEL_PT_NEED_MORE_BYTES;
packet->payload = le32_to_cpu(*(uint32_t *)(buf + 2));
return 6;
case 1:
if (len < 10)
return INTEL_PT_NEED_MORE_BYTES;
packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
return 10;
default:
return INTEL_PT_BAD_PACKET;
}
}
static int intel_pt_get_exstop(struct intel_pt_pkt *packet)
{
packet->type = INTEL_PT_EXSTOP;
return 2;
}
static int intel_pt_get_exstop_ip(struct intel_pt_pkt *packet)
{
packet->type = INTEL_PT_EXSTOP_IP;
return 2;
}
static int intel_pt_get_mwait(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
if (len < 10)
return INTEL_PT_NEED_MORE_BYTES;
packet->type = INTEL_PT_MWAIT;
packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
return 10;
}
static int intel_pt_get_pwre(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
if (len < 4)
return INTEL_PT_NEED_MORE_BYTES;
packet->type = INTEL_PT_PWRE;
memcpy_le64(&packet->payload, buf + 2, 2);
return 4;
}
static int intel_pt_get_pwrx(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
if (len < 7)
return INTEL_PT_NEED_MORE_BYTES;
packet->type = INTEL_PT_PWRX;
memcpy_le64(&packet->payload, buf + 2, 5);
return 7;
}
static int intel_pt_get_ext(const unsigned char *buf, size_t len, static int intel_pt_get_ext(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet) struct intel_pt_pkt *packet)
{ {
if (len < 2) if (len < 2)
return INTEL_PT_NEED_MORE_BYTES; return INTEL_PT_NEED_MORE_BYTES;
if ((buf[1] & 0x1f) == 0x12)
return intel_pt_get_ptwrite(buf, len, packet);
switch (buf[1]) { switch (buf[1]) {
case 0xa3: /* Long TNT */ case 0xa3: /* Long TNT */
return intel_pt_get_long_tnt(buf, len, packet); return intel_pt_get_long_tnt(buf, len, packet);
...@@ -244,6 +319,16 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len, ...@@ -244,6 +319,16 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len,
return intel_pt_get_tma(buf, len, packet); return intel_pt_get_tma(buf, len, packet);
case 0xC3: /* 3-byte header */ case 0xC3: /* 3-byte header */
return intel_pt_get_3byte(buf, len, packet); return intel_pt_get_3byte(buf, len, packet);
case 0x62: /* EXSTOP no IP */
return intel_pt_get_exstop(packet);
case 0xE2: /* EXSTOP with IP */
return intel_pt_get_exstop_ip(packet);
case 0xC2: /* MWAIT */
return intel_pt_get_mwait(buf, len, packet);
case 0x22: /* PWRE */
return intel_pt_get_pwre(buf, len, packet);
case 0xA2: /* PWRX */
return intel_pt_get_pwrx(buf, len, packet);
default: default:
return INTEL_PT_BAD_PACKET; return INTEL_PT_BAD_PACKET;
} }
...@@ -522,6 +607,29 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, ...@@ -522,6 +607,29 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)", ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)",
name, payload, nr); name, payload, nr);
return ret; return ret;
case INTEL_PT_PTWRITE:
return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload);
case INTEL_PT_PTWRITE_IP:
return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload);
case INTEL_PT_EXSTOP:
return snprintf(buf, buf_len, "%s IP:0", name);
case INTEL_PT_EXSTOP_IP:
return snprintf(buf, buf_len, "%s IP:1", name);
case INTEL_PT_MWAIT:
return snprintf(buf, buf_len, "%s 0x%llx Hints 0x%x Extensions 0x%x",
name, payload, (unsigned int)(payload & 0xff),
(unsigned int)((payload >> 32) & 0x3));
case INTEL_PT_PWRE:
return snprintf(buf, buf_len, "%s 0x%llx HW:%u CState:%u Sub-CState:%u",
name, payload, !!(payload & 0x80),
(unsigned int)((payload >> 12) & 0xf),
(unsigned int)((payload >> 8) & 0xf));
case INTEL_PT_PWRX:
return snprintf(buf, buf_len, "%s 0x%llx Last CState:%u Deepest CState:%u Wake Reason 0x%x",
name, payload,
(unsigned int)((payload >> 4) & 0xf),
(unsigned int)(payload & 0xf),
(unsigned int)((payload >> 8) & 0xf));
default: default:
break; break;
} }
......
...@@ -52,6 +52,13 @@ enum intel_pt_pkt_type { ...@@ -52,6 +52,13 @@ enum intel_pt_pkt_type {
INTEL_PT_PIP, INTEL_PT_PIP,
INTEL_PT_OVF, INTEL_PT_OVF,
INTEL_PT_MNT, INTEL_PT_MNT,
INTEL_PT_PTWRITE,
INTEL_PT_PTWRITE_IP,
INTEL_PT_EXSTOP,
INTEL_PT_EXSTOP_IP,
INTEL_PT_MWAIT,
INTEL_PT_PWRE,
INTEL_PT_PWRX,
}; };
struct intel_pt_pkt { struct intel_pt_pkt {
......
...@@ -668,6 +668,19 @@ static bool intel_pt_return_compression(struct intel_pt *pt) ...@@ -668,6 +668,19 @@ static bool intel_pt_return_compression(struct intel_pt *pt)
return true; return true;
} }
static bool intel_pt_branch_enable(struct intel_pt *pt)
{
struct perf_evsel *evsel;
u64 config;
evlist__for_each_entry(pt->session->evlist, evsel) {
if (intel_pt_get_config(pt, &evsel->attr, &config) &&
(config & 1) && !(config & 0x2000))
return false;
}
return true;
}
static unsigned int intel_pt_mtc_period(struct intel_pt *pt) static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
{ {
struct perf_evsel *evsel; struct perf_evsel *evsel;
...@@ -799,6 +812,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, ...@@ -799,6 +812,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
params.walk_insn = intel_pt_walk_next_insn; params.walk_insn = intel_pt_walk_next_insn;
params.data = ptq; params.data = ptq;
params.return_compression = intel_pt_return_compression(pt); params.return_compression = intel_pt_return_compression(pt);
params.branch_enable = intel_pt_branch_enable(pt);
params.max_non_turbo_ratio = pt->max_non_turbo_ratio; params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
params.mtc_period = intel_pt_mtc_period(pt); params.mtc_period = intel_pt_mtc_period(pt);
params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
...@@ -1308,18 +1322,14 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ...@@ -1308,18 +1322,14 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
ptq->have_sample = false; ptq->have_sample = false;
if (pt->sample_instructions && if (pt->sample_instructions &&
(state->type & INTEL_PT_INSTRUCTION) && (state->type & INTEL_PT_INSTRUCTION)) {
(!pt->synth_opts.initial_skip ||
pt->num_events++ >= pt->synth_opts.initial_skip)) {
err = intel_pt_synth_instruction_sample(ptq); err = intel_pt_synth_instruction_sample(ptq);
if (err) if (err)
return err; return err;
} }
if (pt->sample_transactions && if (pt->sample_transactions &&
(state->type & INTEL_PT_TRANSACTION) && (state->type & INTEL_PT_TRANSACTION)) {
(!pt->synth_opts.initial_skip ||
pt->num_events++ >= pt->synth_opts.initial_skip)) {
err = intel_pt_synth_transaction_sample(ptq); err = intel_pt_synth_transaction_sample(ptq);
if (err) if (err)
return err; return err;
...@@ -2025,6 +2035,7 @@ static int intel_pt_synth_events(struct intel_pt *pt, ...@@ -2025,6 +2035,7 @@ static int intel_pt_synth_events(struct intel_pt *pt,
return err; return err;
} }
pt->sample_transactions = true; pt->sample_transactions = true;
pt->transactions_sample_type = attr.sample_type;
pt->transactions_id = id; pt->transactions_id = id;
id += 1; id += 1;
evlist__for_each_entry(evlist, evsel) { evlist__for_each_entry(evlist, evsel) {
......
...@@ -2035,7 +2035,7 @@ int perf_session__cpu_bitmap(struct perf_session *session, ...@@ -2035,7 +2035,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) { if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
pr_err("File does not contain CPU events. " pr_err("File does not contain CPU events. "
"Remove -c option to proceed.\n"); "Remove -C option to proceed.\n");
return -1; return -1;
} }
} }
......
...@@ -44,6 +44,8 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; ...@@ -44,6 +44,8 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS];
static struct rblist runtime_saved_values; static struct rblist runtime_saved_values;
static bool have_frontend_stalled; static bool have_frontend_stalled;
...@@ -157,6 +159,8 @@ void perf_stat__reset_shadow_stats(void) ...@@ -157,6 +159,8 @@ void perf_stat__reset_shadow_stats(void)
memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats));
memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats));
next = rb_first(&runtime_saved_values.entries); next = rb_first(&runtime_saved_values.entries);
while (next) { while (next) {
...@@ -217,6 +221,10 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, ...@@ -217,6 +221,10 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
else if (perf_stat_evsel__is(counter, SMI_NUM))
update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]);
else if (perf_stat_evsel__is(counter, APERF))
update_stats(&runtime_aperf_stats[ctx][cpu], count[0]);
if (counter->collect_stat) { if (counter->collect_stat) {
struct saved_value *v = saved_value_lookup(counter, cpu, ctx, struct saved_value *v = saved_value_lookup(counter, cpu, ctx,
...@@ -592,6 +600,29 @@ static double td_be_bound(int ctx, int cpu) ...@@ -592,6 +600,29 @@ static double td_be_bound(int ctx, int cpu)
return sanitize_val(1.0 - sum); return sanitize_val(1.0 - sum);
} }
static void print_smi_cost(int cpu, struct perf_evsel *evsel,
struct perf_stat_output_ctx *out)
{
double smi_num, aperf, cycles, cost = 0.0;
int ctx = evsel_context(evsel);
const char *color = NULL;
smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]);
aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]);
cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]);
if ((cycles == 0) || (aperf == 0))
return;
if (smi_num)
cost = (aperf - cycles) / aperf * 100.00;
if (cost > 10)
color = PERF_COLOR_RED;
out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
}
void perf_stat__print_shadow_stats(struct perf_evsel *evsel, void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
double avg, int cpu, double avg, int cpu,
struct perf_stat_output_ctx *out) struct perf_stat_output_ctx *out)
...@@ -825,6 +856,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ...@@ -825,6 +856,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
} }
snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
print_smi_cost(cpu, evsel, out);
} else { } else {
print_metric(ctxp, NULL, NULL, NULL, 0); print_metric(ctxp, NULL, NULL, NULL, 0);
} }
......
...@@ -86,6 +86,8 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { ...@@ -86,6 +86,8 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired), ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles), ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles), ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
ID(SMI_NUM, msr/smi/),
ID(APERF, msr/aperf/),
}; };
#undef ID #undef ID
......
...@@ -22,6 +22,8 @@ enum perf_stat_evsel_id { ...@@ -22,6 +22,8 @@ enum perf_stat_evsel_id {
PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED, PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES, PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES, PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
PERF_STAT_EVSEL_ID__SMI_NUM,
PERF_STAT_EVSEL_ID__APERF,
PERF_STAT_EVSEL_ID__MAX, PERF_STAT_EVSEL_ID__MAX,
}; };
......
...@@ -343,43 +343,6 @@ int perf_event_paranoid(void) ...@@ -343,43 +343,6 @@ int perf_event_paranoid(void)
return value; return value;
} }
bool find_process(const char *name)
{
size_t len = strlen(name);
DIR *dir;
struct dirent *d;
int ret = -1;
dir = opendir(procfs__mountpoint());
if (!dir)
return false;
/* Walk through the directory. */
while (ret && (d = readdir(dir)) != NULL) {
char path[PATH_MAX];
char *data;
size_t size;
if ((d->d_type != DT_DIR) ||
!strcmp(".", d->d_name) ||
!strcmp("..", d->d_name))
continue;
scnprintf(path, sizeof(path), "%s/%s/comm",
procfs__mountpoint(), d->d_name);
if (filename__read_str(path, &data, &size))
continue;
ret = strncmp(name, data, len);
free(data);
}
closedir(dir);
return ret ? false : true;
}
static int static int
fetch_ubuntu_kernel_version(unsigned int *puint) fetch_ubuntu_kernel_version(unsigned int *puint)
{ {
...@@ -387,8 +350,12 @@ fetch_ubuntu_kernel_version(unsigned int *puint) ...@@ -387,8 +350,12 @@ fetch_ubuntu_kernel_version(unsigned int *puint)
size_t line_len = 0; size_t line_len = 0;
char *ptr, *line = NULL; char *ptr, *line = NULL;
int version, patchlevel, sublevel, err; int version, patchlevel, sublevel, err;
FILE *vsig = fopen("/proc/version_signature", "r"); FILE *vsig;
if (!puint)
return 0;
vsig = fopen("/proc/version_signature", "r");
if (!vsig) { if (!vsig) {
pr_debug("Open /proc/version_signature failed: %s\n", pr_debug("Open /proc/version_signature failed: %s\n",
strerror(errno)); strerror(errno));
...@@ -418,8 +385,7 @@ fetch_ubuntu_kernel_version(unsigned int *puint) ...@@ -418,8 +385,7 @@ fetch_ubuntu_kernel_version(unsigned int *puint)
goto errout; goto errout;
} }
if (puint) *puint = (version << 16) + (patchlevel << 8) + sublevel;
*puint = (version << 16) + (patchlevel << 8) + sublevel;
err = 0; err = 0;
errout: errout:
free(line); free(line);
...@@ -446,6 +412,9 @@ fetch_kernel_version(unsigned int *puint, char *str, ...@@ -446,6 +412,9 @@ fetch_kernel_version(unsigned int *puint, char *str,
str[str_size - 1] = '\0'; str[str_size - 1] = '\0';
} }
if (!puint || int_ver_ready)
return 0;
err = sscanf(utsname.release, "%d.%d.%d", err = sscanf(utsname.release, "%d.%d.%d",
&version, &patchlevel, &sublevel); &version, &patchlevel, &sublevel);
...@@ -455,8 +424,7 @@ fetch_kernel_version(unsigned int *puint, char *str, ...@@ -455,8 +424,7 @@ fetch_kernel_version(unsigned int *puint, char *str,
return -1; return -1;
} }
if (puint && !int_ver_ready) *puint = (version << 16) + (patchlevel << 8) + sublevel;
*puint = (version << 16) + (patchlevel << 8) + sublevel;
return 0; return 0;
} }
......
#ifndef GIT_COMPAT_UTIL_H #ifndef GIT_COMPAT_UTIL_H
#define GIT_COMPAT_UTIL_H #define GIT_COMPAT_UTIL_H
#define _ALL_SOURCE 1
#define _BSD_SOURCE 1 #define _BSD_SOURCE 1
/* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */ /* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */
#define _DEFAULT_SOURCE 1 #define _DEFAULT_SOURCE 1
...@@ -49,8 +48,6 @@ int hex2u64(const char *ptr, u64 *val); ...@@ -49,8 +48,6 @@ int hex2u64(const char *ptr, u64 *val);
extern unsigned int page_size; extern unsigned int page_size;
extern int cacheline_size; extern int cacheline_size;
bool find_process(const char *name);
int fetch_kernel_version(unsigned int *puint, int fetch_kernel_version(unsigned int *puint,
char *str, size_t str_sz); char *str, size_t str_sz);
#define KVER_VERSION(x) (((x) >> 16) & 0xff) #define KVER_VERSION(x) (((x) >> 16) & 0xff)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment