Commit 62201368 authored by Ian Rogers's avatar Ian Rogers Committed by Arnaldo Carvalho de Melo

perf vendor events: Update Intel ivytown

Update to v21, the metrics are based on TMA 4.4 full.

Use script at:
https://github.com/intel/event-converter-for-linux-perf/blob/master/download_and_gen.py

to download and generate the latest events and metrics. Manually copy
the ivytown files into perf and update mapfile.csv.

Tested on a non-ivytown with 'perf test':
 10: PMU events                                                      :
 10.1: PMU event table sanity                                        : Ok
 10.2: PMU event map aliases                                         : Ok
 10.3: Parsing of PMU event table metrics                            : Ok
 10.4: Parsing of PMU event table metrics with fake PMUs             : Ok
Signed-off-by: default avatarIan Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kshipra Bopardikar <kshipra.bopardikar@intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: http://lore.kernel.org/lkml/20220727220832.2865794-16-irogers@google.comSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 80c14459
......@@ -1257,4 +1257,4 @@
"SampleAfterValue": "100003",
"UMask": "0x10"
}
]
\ No newline at end of file
]
......@@ -166,4 +166,4 @@
"SampleAfterValue": "2000003",
"UMask": "0x1"
}
]
\ No newline at end of file
]
......@@ -312,4 +312,4 @@
"SampleAfterValue": "2000003",
"UMask": "0x1"
}
]
\ No newline at end of file
]
......@@ -130,17 +130,11 @@
"MetricName": "FLOPc_SMT"
},
{
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
"MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
"MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
"MetricName": "ILP"
},
{
"BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;BadSpec;BrMispredicts",
"MetricName": "IpMispredict"
},
{
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
"MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
......@@ -196,6 +190,18 @@
"MetricGroup": "Summary;TmaL1",
"MetricName": "Instructions"
},
{
"BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
"MetricGroup": "Pipeline;Ret",
"MetricName": "Retire"
},
{
"BriefDescription": "",
"MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "Execute"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )",
......@@ -203,11 +209,16 @@
"MetricName": "DSB_Coverage"
},
{
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles)",
"BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;BadSpec;BrMispredicts",
"MetricName": "IpMispredict"
},
{
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
"MetricGroup": "Mem;MemoryBound;MemoryLat",
"MetricName": "Load_Miss_Real_Latency",
"PublicDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles). Latency may be overestimated for multi-load instructions - e.g. repeat strings."
"MetricName": "Load_Miss_Real_Latency"
},
{
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
......@@ -215,24 +226,6 @@
"MetricGroup": "Mem;MemoryBound;MemoryBW",
"MetricName": "MLP"
},
{
"BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
"MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "L1D_Cache_Fill_BW"
},
{
"BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
"MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "L2_Cache_Fill_BW"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "L3_Cache_Fill_BW"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
......@@ -264,6 +257,48 @@
"MetricGroup": "Mem;MemoryTLB_SMT",
"MetricName": "Page_Walks_Utilization_SMT"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
"MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "L1D_Cache_Fill_BW"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
"MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "L2_Cache_Fill_BW"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "L3_Cache_Fill_BW"
},
{
"BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
"MetricExpr": "(64 * L1D.REPLACEMENT / 1000000000 / duration_time)",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "L1D_Cache_Fill_BW_1T"
},
{
"BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
"MetricExpr": "(64 * L2_LINES_IN.ALL / 1000000000 / duration_time)",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "L2_Cache_Fill_BW_1T"
},
{
"BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "(64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time)",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "L3_Cache_Fill_BW_1T"
},
{
"BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "0",
"MetricGroup": "Mem;MemoryBW;Offcore",
"MetricName": "L3_Cache_Access_BW_1T"
},
{
"BriefDescription": "Average CPU Utilization",
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
......@@ -280,7 +315,8 @@
"BriefDescription": "Giga Floating Point Operations Per Second",
"MetricExpr": "( ( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE ) / 1000000000 ) / duration_time",
"MetricGroup": "Cor;Flops;HPC",
"MetricName": "GFLOPs"
"MetricName": "GFLOPs",
"PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
},
{
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
......
......@@ -500,4 +500,4 @@
"SampleAfterValue": "100003",
"UMask": "0x1"
}
]
\ No newline at end of file
]
......@@ -41,4 +41,4 @@
"SampleAfterValue": "2000003",
"UMask": "0x1"
}
]
\ No newline at end of file
]
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -195,4 +195,4 @@
"SampleAfterValue": "100007",
"UMask": "0x20"
}
]
\ No newline at end of file
]
......@@ -13,7 +13,7 @@ GenuineIntel-6-3F,v25,haswellx,core
GenuineIntel-6-(7D|7E|A7),v1.14,icelake,core
GenuineIntel-6-6[AC],v1.15,icelakex,core
GenuineIntel-6-3A,v22,ivybridge,core
GenuineIntel-6-3E,v19,ivytown,core
GenuineIntel-6-3E,v21,ivytown,core
GenuineIntel-6-2D,v20,jaketown,core
GenuineIntel-6-57,v9,knightslanding,core
GenuineIntel-6-85,v9,knightslanding,core
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment