Commit 8e6389f9 authored by Ian Rogers's avatar Ian Rogers Committed by Arnaldo Carvalho de Melo

perf vendor events: Update Intel haswell

Update to v31, the metrics are based on TMA 4.4 full.

Use script at:
https://github.com/intel/event-converter-for-linux-perf/blob/master/download_and_gen.py

to download and generate the latest events and metrics. Manually copy
the haswell files into perf and update mapfile.csv.

Tested on a non-haswell with 'perf test':
 10: PMU events                                                      :
 10.1: PMU event table sanity                                        : Ok
 10.2: PMU event map aliases                                         : Ok
 10.3: Parsing of PMU event table metrics                            : Ok
 10.4: Parsing of PMU event table metrics with fake PMUs             : Ok
Signed-off-by: default avatarIan Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kshipra Bopardikar <kshipra.bopardikar@intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: http://lore.kernel.org/lkml/20220727220832.2865794-11-irogers@google.comSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent ae54f70d
......@@ -100,4 +100,4 @@
"SampleAfterValue": "100003",
"UMask": "0x10"
}
]
\ No newline at end of file
]
......@@ -301,4 +301,4 @@
"SampleAfterValue": "2000003",
"UMask": "0x1"
}
]
\ No newline at end of file
]
......@@ -111,17 +111,11 @@
"MetricName": "CoreIPC_SMT"
},
{
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
"MetricExpr": "( UOPS_EXECUTED.CORE / 2 / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) ) if #SMT_on else UOPS_EXECUTED.CORE / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@)",
"MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
"MetricName": "ILP"
},
{
"BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;BadSpec;BrMispredicts",
"MetricName": "IpMispredict"
},
{
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
"MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
......@@ -170,6 +164,12 @@
"MetricGroup": "Summary;TmaL1",
"MetricName": "Instructions"
},
{
"BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
"MetricGroup": "Pipeline;Ret",
"MetricName": "Retire"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )",
......@@ -177,11 +177,16 @@
"MetricName": "DSB_Coverage"
},
{
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles)",
"BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;BadSpec;BrMispredicts",
"MetricName": "IpMispredict"
},
{
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
"MetricGroup": "Mem;MemoryBound;MemoryLat",
"MetricName": "Load_Miss_Real_Latency",
"PublicDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles). Latency may be overestimated for multi-load instructions - e.g. repeat strings."
"MetricName": "Load_Miss_Real_Latency"
},
{
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
......@@ -189,24 +194,6 @@
"MetricGroup": "Mem;MemoryBound;MemoryBW",
"MetricName": "MLP"
},
{
"BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
"MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "L1D_Cache_Fill_BW"
},
{
"BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
"MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "L2_Cache_Fill_BW"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "L3_Cache_Fill_BW"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
......@@ -238,6 +225,48 @@
"MetricGroup": "Mem;MemoryTLB_SMT",
"MetricName": "Page_Walks_Utilization_SMT"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
"MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "L1D_Cache_Fill_BW"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
"MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "L2_Cache_Fill_BW"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "L3_Cache_Fill_BW"
},
{
"BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
"MetricExpr": "(64 * L1D.REPLACEMENT / 1000000000 / duration_time)",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "L1D_Cache_Fill_BW_1T"
},
{
"BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
"MetricExpr": "(64 * L2_LINES_IN.ALL / 1000000000 / duration_time)",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "L2_Cache_Fill_BW_1T"
},
{
"BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "(64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time)",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "L3_Cache_Fill_BW_1T"
},
{
"BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "0",
"MetricGroup": "Mem;MemoryBW;Offcore",
"MetricName": "L3_Cache_Access_BW_1T"
},
{
"BriefDescription": "Average CPU Utilization",
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
......
......@@ -40,4 +40,4 @@
"SampleAfterValue": "2000003",
"UMask": "0x1"
}
]
\ No newline at end of file
]
......@@ -1035,7 +1035,6 @@
"CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0xA1",
"EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE",
"PublicDescription": "Cycles per core when uops are exectuted in port 0.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
......@@ -1056,7 +1055,6 @@
"CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0xA1",
"EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE",
"PublicDescription": "Cycles per core when uops are exectuted in port 1.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
......@@ -1117,7 +1115,6 @@
"CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0xA1",
"EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE",
"PublicDescription": "Cycles per core when uops are exectuted in port 4.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
......@@ -1138,7 +1135,6 @@
"CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0xA1",
"EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE",
"PublicDescription": "Cycles per core when uops are exectuted in port 5.",
"SampleAfterValue": "2000003",
"UMask": "0x20"
},
......@@ -1159,7 +1155,6 @@
"CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0xA1",
"EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE",
"PublicDescription": "Cycles per core when uops are exectuted in port 6.",
"SampleAfterValue": "2000003",
"UMask": "0x40"
},
......@@ -1295,11 +1290,11 @@
"BriefDescription": "Cycles with less than 10 actually retired uops.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"CounterMask": "10",
"CounterMask": "16",
"EventCode": "0xC2",
"EventName": "UOPS_RETIRED.TOTAL_CYCLES",
"Invert": "1",
"SampleAfterValue": "2000003",
"UMask": "0x1"
}
]
\ No newline at end of file
]
......@@ -19,11 +19,11 @@
"Unit": "ARB"
},
{
"BriefDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.",
"BriefDescription": "Each cycle counts number of all Core outgoing valid entries. Such entry is defined as valid from its allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.",
"EventCode": "0x80",
"EventName": "UNC_ARB_TRK_OCCUPANCY.ALL",
"PerPkg": "1",
"PublicDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.",
"PublicDescription": "Each cycle counts number of all Core outgoing valid entries. Such entry is defined as valid from its allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.",
"UMask": "0x01",
"Unit": "ARB"
},
......@@ -34,6 +34,7 @@
"EventCode": "0x80",
"EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
"PerPkg": "1",
"PublicDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.\n",
"UMask": "0x01",
"Unit": "ARB"
},
......@@ -64,6 +65,6 @@
"EventName": "UNC_CLOCK.SOCKET",
"PerPkg": "1",
"PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.",
"Unit": "NCU"
"Unit": "CLOCK"
}
]
......@@ -481,4 +481,4 @@
"SampleAfterValue": "100003",
"UMask": "0x20"
}
]
\ No newline at end of file
]
......@@ -8,9 +8,7 @@ GenuineIntel-6-55-[56789ABCDEF],v1.16,cascadelakex,core
GenuineIntel-6-96,v1.03,elkhartlake,core
GenuineIntel-6-5[CF],v13,goldmont,core
GenuineIntel-6-7A,v1.01,goldmontplus,core
GenuineIntel-6-3C,v24,haswell,core
GenuineIntel-6-45,v24,haswell,core
GenuineIntel-6-46,v24,haswell,core
GenuineIntel-6-(3C|45|46),v31,haswell,core
GenuineIntel-6-3F,v17,haswellx,core
GenuineIntel-6-3A,v18,ivybridge,core
GenuineIntel-6-3E,v19,ivytown,core
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment