Commit a050a6d2 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'perf-tools-fixes-for-v5.13-2021-05-24' of...

Merge tag 'perf-tools-fixes-for-v5.13-2021-05-24' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull perf tool fixes from Arnaldo Carvalho de Melo:

 - Fix 'perf script' decoding of Intel PT traces for abort handling and
   sample instruction bytes.

 - Add missing PERF_IP_FLAG_CHARS for VM-Entry and VM-Exit to Intel PT
   'perf script' decoder.

 - Fixes for the python based Intel PT trace viewer GUI.

 - Sync UAPI copies (unwire quotactl_path, some comment fixes).

 - Fix handling of missing kernel software events, such as the recently
   added 'cgroup-switches', and add the trivial glue for it in the
   tooling side, since it was added in this merge window.

 - Add missing initialization of zstd_data in 'perf buildid-list',
   detected with valgrind's memcheck.

 - Remove needless event enable/disable when all events uses BPF.

 - Fix libpfm4 support (63) test error for nested event groups.

* tag 'perf-tools-fixes-for-v5.13-2021-05-24' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux:
  perf stat: Skip evlist__[enable|disable] when all events uses BPF
  perf script: Add missing PERF_IP_FLAG_CHARS for VM-Entry and VM-Exit
  perf scripts python: exported-sql-viewer.py: Fix warning display
  perf scripts python: exported-sql-viewer.py: Fix Array TypeError
  perf scripts python: exported-sql-viewer.py: Fix copy to clipboard from Top Calls by elapsed Time report
  tools headers UAPI: Sync files changed by the quotactl_path unwiring
  tools headers UAPI: Sync linux/perf_event.h with the kernel sources
  tools headers UAPI: Sync linux/fs.h with the kernel sources
  perf parse-events: Check if the software events array slots are populated
  perf tools: Add 'cgroup-switches' software event
  perf intel-pt: Remove redundant setting of ptq->insn_len
  perf intel-pt: Fix sample instruction bytes
  perf intel-pt: Fix transaction abort handling
  perf test: Fix libpfm4 support (63) test error for nested event groups
  tools arch kvm: Sync kvm headers with the kernel sources
  perf buildid-list: Initialize zstd_data
parents 1434a312 f8b61bd2
...@@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr { ...@@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr {
__u16 flags; __u16 flags;
} smm; } smm;
__u16 pad;
__u32 flags; __u32 flags;
__u64 preemption_timer_deadline; __u64 preemption_timer_deadline;
}; };
......
...@@ -185,7 +185,7 @@ struct fsxattr { ...@@ -185,7 +185,7 @@ struct fsxattr {
#define BLKROTATIONAL _IO(0x12,126) #define BLKROTATIONAL _IO(0x12,126)
#define BLKZEROOUT _IO(0x12,127) #define BLKZEROOUT _IO(0x12,127)
/* /*
* A jump here: 130-131 are reserved for zoned block devices * A jump here: 130-136 are reserved for zoned block devices
* (see uapi/linux/blkzoned.h) * (see uapi/linux/blkzoned.h)
*/ */
......
...@@ -464,7 +464,7 @@ struct perf_event_attr { ...@@ -464,7 +464,7 @@ struct perf_event_attr {
/* /*
* User provided data if sigtrap=1, passed back to user via * User provided data if sigtrap=1, passed back to user via
* siginfo_t::si_perf, e.g. to permit user to identify the event. * siginfo_t::si_perf_data, e.g. to permit user to identify the event.
*/ */
__u64 sig_data; __u64 sig_data;
}; };
......
...@@ -108,9 +108,9 @@ displayed as follows: ...@@ -108,9 +108,9 @@ displayed as follows:
perf script --itrace=ibxwpe -F+flags perf script --itrace=ibxwpe -F+flags
The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, The flags are "bcrosyiABExgh" which stand for branch, call, return, conditional,
system, asynchronous, interrupt, transaction abort, trace begin, trace end, and system, asynchronous, interrupt, transaction abort, trace begin, trace end,
in transaction, respectively. in transaction, VM-entry, and VM-exit respectively.
perf script also supports higher level ways to dump instruction traces: perf script also supports higher level ways to dump instruction traces:
......
...@@ -183,14 +183,15 @@ OPTIONS ...@@ -183,14 +183,15 @@ OPTIONS
At this point usage is displayed, and perf-script exits. At this point usage is displayed, and perf-script exits.
The flags field is synthesized and may have a value when Instruction The flags field is synthesized and may have a value when Instruction
Trace decoding. The flags are "bcrosyiABEx" which stand for branch, Trace decoding. The flags are "bcrosyiABExgh" which stand for branch,
call, return, conditional, system, asynchronous, interrupt, call, return, conditional, system, asynchronous, interrupt,
transaction abort, trace begin, trace end, and in transaction, transaction abort, trace begin, trace end, in transaction, VM-Entry, and VM-Exit
respectively. Known combinations of flags are printed more nicely e.g. respectively. Known combinations of flags are printed more nicely e.g.
"call" for "bc", "return" for "br", "jcc" for "bo", "jmp" for "b", "call" for "bc", "return" for "br", "jcc" for "bo", "jmp" for "b",
"int" for "bci", "iret" for "bri", "syscall" for "bcs", "sysret" for "brs", "int" for "bci", "iret" for "bri", "syscall" for "bcs", "sysret" for "brs",
"async" for "by", "hw int" for "bcyi", "tx abrt" for "bA", "tr strt" for "bB", "async" for "by", "hw int" for "bcyi", "tx abrt" for "bA", "tr strt" for "bB",
"tr end" for "bE". However the "x" flag will be display separately in those "tr end" for "bE", "vmentry" for "bcg", "vmexit" for "bch".
However the "x" flag will be displayed separately in those
cases e.g. "jcc (x)" for a condition branch within a transaction. cases e.g. "jcc (x)" for a condition branch within a transaction.
The callindent field is synthesized and may have a value when The callindent field is synthesized and may have a value when
......
...@@ -357,7 +357,7 @@ ...@@ -357,7 +357,7 @@
440 n64 process_madvise sys_process_madvise 440 n64 process_madvise sys_process_madvise
441 n64 epoll_pwait2 sys_epoll_pwait2 441 n64 epoll_pwait2 sys_epoll_pwait2
442 n64 mount_setattr sys_mount_setattr 442 n64 mount_setattr sys_mount_setattr
443 n64 quotactl_path sys_quotactl_path # 443 reserved for quotactl_path
444 n64 landlock_create_ruleset sys_landlock_create_ruleset 444 n64 landlock_create_ruleset sys_landlock_create_ruleset
445 n64 landlock_add_rule sys_landlock_add_rule 445 n64 landlock_add_rule sys_landlock_add_rule
446 n64 landlock_restrict_self sys_landlock_restrict_self 446 n64 landlock_restrict_self sys_landlock_restrict_self
...@@ -522,7 +522,7 @@ ...@@ -522,7 +522,7 @@
440 common process_madvise sys_process_madvise 440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr 442 common mount_setattr sys_mount_setattr
443 common quotactl_path sys_quotactl_path # 443 reserved for quotactl_path
444 common landlock_create_ruleset sys_landlock_create_ruleset 444 common landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule 445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self 446 common landlock_restrict_self sys_landlock_restrict_self
...@@ -445,7 +445,7 @@ ...@@ -445,7 +445,7 @@
440 common process_madvise sys_process_madvise sys_process_madvise 440 common process_madvise sys_process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr sys_mount_setattr 442 common mount_setattr sys_mount_setattr sys_mount_setattr
443 common quotactl_path sys_quotactl_path sys_quotactl_path # 443 reserved for quotactl_path
444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset 444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule 445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self
...@@ -364,7 +364,7 @@ ...@@ -364,7 +364,7 @@
440 common process_madvise sys_process_madvise 440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 441 common epoll_pwait2 sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr 442 common mount_setattr sys_mount_setattr
443 common quotactl_path sys_quotactl_path # 443 reserved for quotactl_path
444 common landlock_create_ruleset sys_landlock_create_ruleset 444 common landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule 445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self 446 common landlock_restrict_self sys_landlock_restrict_self
......
...@@ -80,6 +80,9 @@ static int perf_session__list_build_ids(bool force, bool with_hits) ...@@ -80,6 +80,9 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
if (!perf_header__has_feat(&session->header, HEADER_BUILD_ID)) if (!perf_header__has_feat(&session->header, HEADER_BUILD_ID))
with_hits = true; with_hits = true;
if (zstd_init(&(session->zstd_data), 0) < 0)
pr_warning("Decompression initialization failed. Reported data may be incomplete.\n");
/* /*
* in pipe-mode, the only way to get the buildids is to parse * in pipe-mode, the only way to get the buildids is to parse
* the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
......
...@@ -572,6 +572,7 @@ static int enable_counters(void) ...@@ -572,6 +572,7 @@ static int enable_counters(void)
* - we have initial delay configured * - we have initial delay configured
*/ */
if (!target__none(&target) || stat_config.initial_delay) { if (!target__none(&target) || stat_config.initial_delay) {
if (!all_counters_use_bpf)
evlist__enable(evsel_list); evlist__enable(evsel_list);
if (stat_config.initial_delay > 0) if (stat_config.initial_delay > 0)
pr_info(EVLIST_ENABLED_MSG); pr_info(EVLIST_ENABLED_MSG);
...@@ -581,13 +582,19 @@ static int enable_counters(void) ...@@ -581,13 +582,19 @@ static int enable_counters(void)
static void disable_counters(void) static void disable_counters(void)
{ {
struct evsel *counter;
/* /*
* If we don't have tracee (attaching to task or cpu), counters may * If we don't have tracee (attaching to task or cpu), counters may
* still be running. To get accurate group ratios, we must stop groups * still be running. To get accurate group ratios, we must stop groups
* from counting before reading their constituent counters. * from counting before reading their constituent counters.
*/ */
if (!target__none(&target)) if (!target__none(&target)) {
evlist__for_each_entry(evsel_list, counter)
bpf_counter__disable(counter);
if (!all_counters_use_bpf)
evlist__disable(evsel_list); evlist__disable(evsel_list);
}
} }
static volatile int workload_exec_errno; static volatile int workload_exec_errno;
......
...@@ -91,6 +91,11 @@ ...@@ -91,6 +91,11 @@
from __future__ import print_function from __future__ import print_function
import sys import sys
# Only change warnings if the python -W option was not used
if not sys.warnoptions:
import warnings
# PySide2 causes deprecation warnings, ignore them.
warnings.filterwarnings("ignore", category=DeprecationWarning)
import argparse import argparse
import weakref import weakref
import threading import threading
...@@ -125,8 +130,9 @@ if pyside_version_1: ...@@ -125,8 +130,9 @@ if pyside_version_1:
from PySide.QtGui import * from PySide.QtGui import *
from PySide.QtSql import * from PySide.QtSql import *
from decimal import * from decimal import Decimal, ROUND_HALF_UP
from ctypes import * from ctypes import CDLL, Structure, create_string_buffer, addressof, sizeof, \
c_void_p, c_bool, c_byte, c_char, c_int, c_uint, c_longlong, c_ulonglong
from multiprocessing import Process, Array, Value, Event from multiprocessing import Process, Array, Value, Event
# xrange is range in Python3 # xrange is range in Python3
...@@ -3868,7 +3874,7 @@ def CopyTableCellsToClipboard(view, as_csv=False, with_hdr=False): ...@@ -3868,7 +3874,7 @@ def CopyTableCellsToClipboard(view, as_csv=False, with_hdr=False):
if with_hdr: if with_hdr:
model = indexes[0].model() model = indexes[0].model()
for col in range(min_col, max_col + 1): for col in range(min_col, max_col + 1):
val = model.headerData(col, Qt.Horizontal) val = model.headerData(col, Qt.Horizontal, Qt.DisplayRole)
if as_csv: if as_csv:
text += sep + ToCSValue(val) text += sep + ToCSValue(val)
sep = "," sep = ","
......
...@@ -131,8 +131,8 @@ static int test__pfm_group(void) ...@@ -131,8 +131,8 @@ static int test__pfm_group(void)
}, },
{ {
.events = "{},{instructions}", .events = "{},{instructions}",
.nr_events = 0, .nr_events = 1,
.nr_groups = 0, .nr_groups = 1,
}, },
{ {
.events = "{instructions},{instructions}", .events = "{instructions},{instructions}",
......
...@@ -100,7 +100,7 @@ enum { ...@@ -100,7 +100,7 @@ enum {
PERF_IP_FLAG_VMEXIT = 1ULL << 12, PERF_IP_FLAG_VMEXIT = 1ULL << 12,
}; };
#define PERF_IP_FLAG_CHARS "bcrosyiABEx" #define PERF_IP_FLAG_CHARS "bcrosyiABExgh"
#define PERF_BRANCH_MASK (\ #define PERF_BRANCH_MASK (\
PERF_IP_FLAG_BRANCH |\ PERF_IP_FLAG_BRANCH |\
......
...@@ -425,9 +425,6 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name) ...@@ -425,9 +425,6 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name)
if (affinity__setup(&affinity) < 0) if (affinity__setup(&affinity) < 0)
return; return;
evlist__for_each_entry(evlist, pos)
bpf_counter__disable(pos);
/* Disable 'immediate' events last */ /* Disable 'immediate' events last */
for (imm = 0; imm <= 1; imm++) { for (imm = 0; imm <= 1; imm++) {
evlist__for_each_cpu(evlist, i, cpu) { evlist__for_each_cpu(evlist, i, cpu) {
......
...@@ -1146,6 +1146,8 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder) ...@@ -1146,6 +1146,8 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
decoder->set_fup_tx_flags = false; decoder->set_fup_tx_flags = false;
decoder->tx_flags = decoder->fup_tx_flags; decoder->tx_flags = decoder->fup_tx_flags;
decoder->state.type = INTEL_PT_TRANSACTION; decoder->state.type = INTEL_PT_TRANSACTION;
if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX)
decoder->state.type |= INTEL_PT_BRANCH;
decoder->state.from_ip = decoder->ip; decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0; decoder->state.to_ip = 0;
decoder->state.flags = decoder->fup_tx_flags; decoder->state.flags = decoder->fup_tx_flags;
...@@ -1220,8 +1222,10 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) ...@@ -1220,8 +1222,10 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
return 0; return 0;
if (err == -EAGAIN || if (err == -EAGAIN ||
intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) { intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) {
bool no_tip = decoder->pkt_state != INTEL_PT_STATE_FUP;
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
if (intel_pt_fup_event(decoder)) if (intel_pt_fup_event(decoder) && no_tip)
return 0; return 0;
return -EAGAIN; return -EAGAIN;
} }
......
...@@ -707,8 +707,10 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, ...@@ -707,8 +707,10 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
*ip += intel_pt_insn->length; *ip += intel_pt_insn->length;
if (to_ip && *ip == to_ip) if (to_ip && *ip == to_ip) {
intel_pt_insn->length = 0;
goto out_no_cache; goto out_no_cache;
}
if (*ip >= al.map->end) if (*ip >= al.map->end)
break; break;
...@@ -1198,6 +1200,7 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, ...@@ -1198,6 +1200,7 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
static void intel_pt_sample_flags(struct intel_pt_queue *ptq) static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
{ {
ptq->insn_len = 0;
if (ptq->state->flags & INTEL_PT_ABORT_TX) { if (ptq->state->flags & INTEL_PT_ABORT_TX) {
ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT; ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
} else if (ptq->state->flags & INTEL_PT_ASYNC) { } else if (ptq->state->flags & INTEL_PT_ASYNC) {
...@@ -1211,7 +1214,6 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq) ...@@ -1211,7 +1214,6 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_ASYNC |
PERF_IP_FLAG_INTERRUPT; PERF_IP_FLAG_INTERRUPT;
ptq->insn_len = 0;
} else { } else {
if (ptq->state->from_ip) if (ptq->state->from_ip)
ptq->flags = intel_pt_insn_type(ptq->state->insn_op); ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
......
...@@ -150,6 +150,10 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { ...@@ -150,6 +150,10 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
.symbol = "bpf-output", .symbol = "bpf-output",
.alias = "", .alias = "",
}, },
[PERF_COUNT_SW_CGROUP_SWITCHES] = {
.symbol = "cgroup-switches",
.alias = "",
},
}; };
#define __PERF_EVENT_FIELD(config, name) \ #define __PERF_EVENT_FIELD(config, name) \
...@@ -2928,9 +2932,14 @@ void print_symbol_events(const char *event_glob, unsigned type, ...@@ -2928,9 +2932,14 @@ void print_symbol_events(const char *event_glob, unsigned type,
} }
for (i = 0; i < max; i++, syms++) { for (i = 0; i < max; i++, syms++) {
/*
* New attr.config still not supported here, the latest
* example was PERF_COUNT_SW_CGROUP_SWITCHES
*/
if (syms->symbol == NULL)
continue;
if (event_glob != NULL && syms->symbol != NULL && if (event_glob != NULL && !(strglobmatch(syms->symbol, event_glob) ||
!(strglobmatch(syms->symbol, event_glob) ||
(syms->alias && strglobmatch(syms->alias, event_glob)))) (syms->alias && strglobmatch(syms->alias, event_glob))))
continue; continue;
......
...@@ -347,6 +347,7 @@ emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EM ...@@ -347,6 +347,7 @@ emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EM
dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); } duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); }
bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); }
/* /*
* We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately. * We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately.
......
...@@ -62,8 +62,16 @@ int parse_libpfm_events_option(const struct option *opt, const char *str, ...@@ -62,8 +62,16 @@ int parse_libpfm_events_option(const struct option *opt, const char *str,
} }
/* no event */ /* no event */
if (*q == '\0') if (*q == '\0') {
if (*sep == '}') {
if (grp_evt < 0) {
ui__error("cannot close a non-existing event group\n");
goto error;
}
grp_evt--;
}
continue; continue;
}
memset(&attr, 0, sizeof(attr)); memset(&attr, 0, sizeof(attr));
event_attr_init(&attr); event_attr_init(&attr);
...@@ -107,6 +115,7 @@ int parse_libpfm_events_option(const struct option *opt, const char *str, ...@@ -107,6 +115,7 @@ int parse_libpfm_events_option(const struct option *opt, const char *str,
grp_evt = -1; grp_evt = -1;
} }
} }
free(p_orig);
return 0; return 0;
error: error:
free(p_orig); free(p_orig);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment