Commit ea15483e authored by German Gomez's avatar German Gomez Committed by Arnaldo Carvalho de Melo

perf report: Add 'simd' sort field

Add 'simd' sort field to visualize SIMD ops in 'perf report'.

Rows are labeled with the SIMD ISA, and the type of predicate (if any):

  - [p] partial predicate
  - [e] empty predicate (no elements in the vector being used)

Example with Arm SPE and SVE (Scalable Vector Extension):

  #include <arm_sve.h>

  double src[1025], dst[1025];

  int main(void) {
    svfloat64_t vc = svdup_f64(1);
    for(;;)
      for(int i = 0; i < 1025; i += svcntd())
      {
        svbool_t pg = svwhilelt_b64(i, 1025);
        svfloat64_t vsrc = svld1(pg, &src[i]);
        svfloat64_t vdst = svadd_x(pg, vsrc, vc);
        svst1(pg, &dst[i], vdst);
      }
    return 0;
  }

  ... compiled using "gcc-11 -march=armv8-a+sve -O3"

Profiling on a platform that implements FEAT_SVE and FEAT_SPEv1p1:

  $ perf record -e arm_spe_0// -- ./a.out
  $ perf report --itrace=i1i -s overhead,pid,simd,sym

  Overhead      Pid:Command   Simd     Symbol
  ........  ................  .......  ......................

    53.76%    10758:program            [.] main
    46.14%    10758:program   [.] SVE  [.] main
     0.09%    10758:program   [p] SVE  [.] main

The report shows 0.09% of the sampled SVE operations use partial
predicates due to src and dst arrays not being multiples of the vector
register lengths.
Signed-off-by: default avatarGerman Gomez <german.gomez@arm.com>
Acked-by: default avatarIan Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Anshuman.Khandual@arm.com
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lore.kernel.org/r/20230320151509.1137462-2-james.clark@arm.comSigned-off-by: default avatarJames Clark <james.clark@arm.com>
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 03a6c16e
...@@ -117,6 +117,7 @@ OPTIONS ...@@ -117,6 +117,7 @@ OPTIONS
- addr: (Full) virtual address of the sampled instruction - addr: (Full) virtual address of the sampled instruction
- retire_lat: On X86, this reports pipeline stall of this instruction compared - retire_lat: On X86, this reports pipeline stall of this instruction compared
to the previous instruction in cycles. And currently supported only on X86 to the previous instruction in cycles. And currently supported only on X86
- simd: Flags describing a SIMD operation. "e" for empty Arm SVE predicate. "p" for partial Arm SVE predicate
By default, comm, dso and symbol keys are used. By default, comm, dso and symbol keys are used.
(i.e. --sort comm,dso,symbol) (i.e. --sort comm,dso,symbol)
......
...@@ -745,6 +745,7 @@ __hists__add_entry(struct hists *hists, ...@@ -745,6 +745,7 @@ __hists__add_entry(struct hists *hists,
.weight = sample->weight, .weight = sample->weight,
.ins_lat = sample->ins_lat, .ins_lat = sample->ins_lat,
.p_stage_cyc = sample->p_stage_cyc, .p_stage_cyc = sample->p_stage_cyc,
.simd_flags = sample->simd_flags,
}, *he = hists__findnew_entry(hists, &entry, al, sample_self); }, *he = hists__findnew_entry(hists, &entry, al, sample_self);
if (!hists->has_callchains && he && he->callchain_size != 0) if (!hists->has_callchains && he && he->callchain_size != 0)
......
...@@ -81,6 +81,7 @@ enum hist_column { ...@@ -81,6 +81,7 @@ enum hist_column {
HISTC_ADDR_FROM, HISTC_ADDR_FROM,
HISTC_ADDR_TO, HISTC_ADDR_TO,
HISTC_ADDR, HISTC_ADDR,
HISTC_SIMD,
HISTC_NR_COLS, /* Last entry */ HISTC_NR_COLS, /* Last entry */
}; };
......
...@@ -139,6 +139,52 @@ struct sort_entry sort_thread = { ...@@ -139,6 +139,52 @@ struct sort_entry sort_thread = {
.se_width_idx = HISTC_THREAD, .se_width_idx = HISTC_THREAD,
}; };
/* --sort simd */
static int64_t
sort__simd_cmp(struct hist_entry *left, struct hist_entry *right)
{
if (left->simd_flags.arch != right->simd_flags.arch)
return (int64_t) left->simd_flags.arch - right->simd_flags.arch;
return (int64_t) left->simd_flags.pred - right->simd_flags.pred;
}
static const char *hist_entry__get_simd_name(struct simd_flags *simd_flags)
{
u64 arch = simd_flags->arch;
if (arch & SIMD_OP_FLAGS_ARCH_SVE)
return "SVE";
else
return "n/a";
}
static int hist_entry__simd_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width __maybe_unused)
{
const char *name;
if (!he->simd_flags.arch)
return repsep_snprintf(bf, size, "");
name = hist_entry__get_simd_name(&he->simd_flags);
if (he->simd_flags.pred & SIMD_OP_FLAGS_PRED_EMPTY)
return repsep_snprintf(bf, size, "[e] %s", name);
else if (he->simd_flags.pred & SIMD_OP_FLAGS_PRED_PARTIAL)
return repsep_snprintf(bf, size, "[p] %s", name);
return repsep_snprintf(bf, size, "[.] %s", name);
}
struct sort_entry sort_simd = {
.se_header = "Simd ",
.se_cmp = sort__simd_cmp,
.se_snprintf = hist_entry__simd_snprintf,
.se_width_idx = HISTC_SIMD,
};
/* --sort comm */ /* --sort comm */
/* /*
...@@ -2142,6 +2188,7 @@ static struct sort_dimension common_sort_dimensions[] = { ...@@ -2142,6 +2188,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_ADDR, "addr", sort_addr), DIM(SORT_ADDR, "addr", sort_addr),
DIM(SORT_LOCAL_RETIRE_LAT, "local_retire_lat", sort_local_p_stage_cyc), DIM(SORT_LOCAL_RETIRE_LAT, "local_retire_lat", sort_local_p_stage_cyc),
DIM(SORT_GLOBAL_RETIRE_LAT, "retire_lat", sort_global_p_stage_cyc), DIM(SORT_GLOBAL_RETIRE_LAT, "retire_lat", sort_global_p_stage_cyc),
DIM(SORT_SIMD, "simd", sort_simd)
}; };
#undef DIM #undef DIM
......
...@@ -111,6 +111,7 @@ struct hist_entry { ...@@ -111,6 +111,7 @@ struct hist_entry {
u64 p_stage_cyc; u64 p_stage_cyc;
u8 cpumode; u8 cpumode;
u8 depth; u8 depth;
struct simd_flags simd_flags;
/* We are added by hists__add_dummy_entry. */ /* We are added by hists__add_dummy_entry. */
bool dummy; bool dummy;
...@@ -241,6 +242,7 @@ enum sort_type { ...@@ -241,6 +242,7 @@ enum sort_type {
SORT_ADDR, SORT_ADDR,
SORT_LOCAL_RETIRE_LAT, SORT_LOCAL_RETIRE_LAT,
SORT_GLOBAL_RETIRE_LAT, SORT_GLOBAL_RETIRE_LAT,
SORT_SIMD,
/* branch stack specific sort keys */ /* branch stack specific sort keys */
__SORT_BRANCH_STACK, __SORT_BRANCH_STACK,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment