Commit 00a7f0d7 authored by Lionel Landwerlin's avatar Lionel Landwerlin

drm/i915/tgl: Add perf support on TGL

The design of the OA unit has been split into several units. We now
have a global unit (OAG) and a render specific unit (OAR). This leads
to some changes on how we program things. Some details :

OAR:
  - has its own set of counter registers, they are per-context
    saved/restored
  - counters are not written to the circular OA buffer
  - a snapshot of the counters can be acquired with
    MI_RECORD_PERF_COUNT, or a single counter can be read with
    MI_STORE_REGISTER_MEM.

OAG:
  - has global counters that increment across context switches
  - counters are written into the circular OA buffer (if requested)

v2: Fix checkpatch warnings on code style (Lucas)
v3: (Umesh)
  - Update register from which tail, status and head are read
  - Update logic to sample context reports
  - Update whitelist mux and b counter regs
v4: Fix a bug when updating context image for new contexts (Umesh)
v5: Squash patch enabling save/restore of counters into context image

    We want this so we can preempt performance queries and keep the
    system responsive even when long running queries are ongoing. We
    avoid doing it for all contexts.

    - use LRI to modify context control (Chris)
    - use MASKED_FIELD to program just the masked bits (Chris)
    - disable save/restore of counters on cleanup (Chris)
v6: Do not use implicit parameters (Chris)

BSpec: 28727, 30021
Signed-off-by: default avatarLionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: default avatarUmesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: default avatarLucas De Marchi <lucas.demarchi@intel.com>
Acked-by: default avatarChris Wilson <chris.p.wilson@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191025193746.47155-2-umesh.nerlige.ramappa@intel.com
parent fc215230
...@@ -247,7 +247,8 @@ i915-y += \ ...@@ -247,7 +247,8 @@ i915-y += \
oa/i915_oa_cflgt2.o \ oa/i915_oa_cflgt2.o \
oa/i915_oa_cflgt3.o \ oa/i915_oa_cflgt3.o \
oa/i915_oa_cnl.o \ oa/i915_oa_cnl.o \
oa/i915_oa_icl.o oa/i915_oa_icl.o \
oa/i915_oa_tgl.o
i915-y += i915_perf.o i915-y += i915_perf.o
# Post-mortem debug and GPU hang state capture # Post-mortem debug and GPU hang state capture
......
...@@ -43,6 +43,7 @@ struct intel_engine_cs; ...@@ -43,6 +43,7 @@ struct intel_engine_cs;
#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0)
#define CTX_CTRL_RS_CTX_ENABLE (1 << 1) #define CTX_CTRL_RS_CTX_ENABLE (1 << 1)
#define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2) #define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2)
#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE (1 << 8)
#define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) #define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0)
#define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) #define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510)
#define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) #define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550)
......
This diff is collapsed.
...@@ -684,6 +684,45 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) ...@@ -684,6 +684,45 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define OABUFFER_SIZE_8M (6 << 3) #define OABUFFER_SIZE_8M (6 << 3)
#define OABUFFER_SIZE_16M (7 << 3) #define OABUFFER_SIZE_16M (7 << 3)
/* Gen12 OAR unit */
#define GEN12_OAR_OACONTROL _MMIO(0x2960)
#define GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1
#define GEN12_OAR_OACONTROL_COUNTER_ENABLE (1 << 0)
#define GEN12_OACTXCONTROL _MMIO(0x2360)
#define GEN12_OAR_OASTATUS _MMIO(0x2968)
/* Gen12 OAG unit */
#define GEN12_OAG_OAHEADPTR _MMIO(0xdb00)
#define GEN12_OAG_OAHEADPTR_MASK 0xffffffc0
#define GEN12_OAG_OATAILPTR _MMIO(0xdb04)
#define GEN12_OAG_OATAILPTR_MASK 0xffffffc0
#define GEN12_OAG_OABUFFER _MMIO(0xdb08)
#define GEN12_OAG_OABUFFER_BUFFER_SIZE_MASK (0x7)
#define GEN12_OAG_OABUFFER_BUFFER_SIZE_SHIFT (3)
#define GEN12_OAG_OABUFFER_MEMORY_SELECT (1 << 0) /* 0: PPGTT, 1: GGTT */
#define GEN12_OAG_OAGLBCTXCTRL _MMIO(0x2b28)
#define GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT 2
#define GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE (1 << 1)
#define GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME (1 << 0)
#define GEN12_OAG_OACONTROL _MMIO(0xdaf4)
#define GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT 2
#define GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE (1 << 0)
#define GEN12_OAG_OA_DEBUG _MMIO(0xdaf8)
#define GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO (1 << 6)
#define GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS (1 << 5)
#define GEN12_OAG_OA_DEBUG_DISABLE_GO_1_0_REPORTS (1 << 2)
#define GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS (1 << 1)
#define GEN12_OAG_OASTATUS _MMIO(0xdafc)
#define GEN12_OAG_OASTATUS_COUNTER_OVERFLOW (1 << 2)
#define GEN12_OAG_OASTATUS_BUFFER_OVERFLOW (1 << 1)
#define GEN12_OAG_OASTATUS_REPORT_LOST (1 << 0)
/* /*
* Flexible, Aggregate EU Counter Registers. * Flexible, Aggregate EU Counter Registers.
* Note: these aren't contiguous * Note: these aren't contiguous
...@@ -920,6 +959,26 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) ...@@ -920,6 +959,26 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define OAREPORTTRIG8_NOA_SELECT_6_SHIFT 24 #define OAREPORTTRIG8_NOA_SELECT_6_SHIFT 24
#define OAREPORTTRIG8_NOA_SELECT_7_SHIFT 28 #define OAREPORTTRIG8_NOA_SELECT_7_SHIFT 28
/* Same layout as OASTARTTRIGX */
#define GEN12_OAG_OASTARTTRIG1 _MMIO(0xd900)
#define GEN12_OAG_OASTARTTRIG2 _MMIO(0xd904)
#define GEN12_OAG_OASTARTTRIG3 _MMIO(0xd908)
#define GEN12_OAG_OASTARTTRIG4 _MMIO(0xd90c)
#define GEN12_OAG_OASTARTTRIG5 _MMIO(0xd910)
#define GEN12_OAG_OASTARTTRIG6 _MMIO(0xd914)
#define GEN12_OAG_OASTARTTRIG7 _MMIO(0xd918)
#define GEN12_OAG_OASTARTTRIG8 _MMIO(0xd91c)
/* Same layout as OAREPORTTRIGX */
#define GEN12_OAG_OAREPORTTRIG1 _MMIO(0xd920)
#define GEN12_OAG_OAREPORTTRIG2 _MMIO(0xd924)
#define GEN12_OAG_OAREPORTTRIG3 _MMIO(0xd928)
#define GEN12_OAG_OAREPORTTRIG4 _MMIO(0xd92c)
#define GEN12_OAG_OAREPORTTRIG5 _MMIO(0xd930)
#define GEN12_OAG_OAREPORTTRIG6 _MMIO(0xd934)
#define GEN12_OAG_OAREPORTTRIG7 _MMIO(0xd938)
#define GEN12_OAG_OAREPORTTRIG8 _MMIO(0xd93c)
/* CECX_0 */ /* CECX_0 */
#define OACEC_COMPARE_LESS_OR_EQUAL 6 #define OACEC_COMPARE_LESS_OR_EQUAL 6
#define OACEC_COMPARE_NOT_EQUAL 5 #define OACEC_COMPARE_NOT_EQUAL 5
...@@ -936,6 +995,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) ...@@ -936,6 +995,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define OACEC_SELECT_PREV (1 << 19) #define OACEC_SELECT_PREV (1 << 19)
#define OACEC_SELECT_BOOLEAN (2 << 19) #define OACEC_SELECT_BOOLEAN (2 << 19)
/* 11-bit array 0: pass-through, 1: negated */
#define GEN12_OASCEC_NEGATE_MASK 0x7ff
#define GEN12_OASCEC_NEGATE_SHIFT 21
/* CECX_1 */ /* CECX_1 */
#define OACEC_MASK_MASK 0xffff #define OACEC_MASK_MASK 0xffff
#define OACEC_CONSIDERATIONS_MASK 0xffff #define OACEC_CONSIDERATIONS_MASK 0xffff
...@@ -958,6 +1021,42 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) ...@@ -958,6 +1021,42 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define OACEC7_0 _MMIO(0x27a8) #define OACEC7_0 _MMIO(0x27a8)
#define OACEC7_1 _MMIO(0x27ac) #define OACEC7_1 _MMIO(0x27ac)
/* Same layout as CECX_Y */
#define GEN12_OAG_CEC0_0 _MMIO(0xd940)
#define GEN12_OAG_CEC0_1 _MMIO(0xd944)
#define GEN12_OAG_CEC1_0 _MMIO(0xd948)
#define GEN12_OAG_CEC1_1 _MMIO(0xd94c)
#define GEN12_OAG_CEC2_0 _MMIO(0xd950)
#define GEN12_OAG_CEC2_1 _MMIO(0xd954)
#define GEN12_OAG_CEC3_0 _MMIO(0xd958)
#define GEN12_OAG_CEC3_1 _MMIO(0xd95c)
#define GEN12_OAG_CEC4_0 _MMIO(0xd960)
#define GEN12_OAG_CEC4_1 _MMIO(0xd964)
#define GEN12_OAG_CEC5_0 _MMIO(0xd968)
#define GEN12_OAG_CEC5_1 _MMIO(0xd96c)
#define GEN12_OAG_CEC6_0 _MMIO(0xd970)
#define GEN12_OAG_CEC6_1 _MMIO(0xd974)
#define GEN12_OAG_CEC7_0 _MMIO(0xd978)
#define GEN12_OAG_CEC7_1 _MMIO(0xd97c)
/* Same layout as CECX_Y + negate 11-bit array */
#define GEN12_OAG_SCEC0_0 _MMIO(0xdc00)
#define GEN12_OAG_SCEC0_1 _MMIO(0xdc04)
#define GEN12_OAG_SCEC1_0 _MMIO(0xdc08)
#define GEN12_OAG_SCEC1_1 _MMIO(0xdc0c)
#define GEN12_OAG_SCEC2_0 _MMIO(0xdc10)
#define GEN12_OAG_SCEC2_1 _MMIO(0xdc14)
#define GEN12_OAG_SCEC3_0 _MMIO(0xdc18)
#define GEN12_OAG_SCEC3_1 _MMIO(0xdc1c)
#define GEN12_OAG_SCEC4_0 _MMIO(0xdc20)
#define GEN12_OAG_SCEC4_1 _MMIO(0xdc24)
#define GEN12_OAG_SCEC5_0 _MMIO(0xdc28)
#define GEN12_OAG_SCEC5_1 _MMIO(0xdc2c)
#define GEN12_OAG_SCEC6_0 _MMIO(0xdc30)
#define GEN12_OAG_SCEC6_1 _MMIO(0xdc34)
#define GEN12_OAG_SCEC7_0 _MMIO(0xdc38)
#define GEN12_OAG_SCEC7_1 _MMIO(0xdc3c)
/* OA perf counters */ /* OA perf counters */
#define OA_PERFCNT1_LO _MMIO(0x91B8) #define OA_PERFCNT1_LO _MMIO(0x91B8)
#define OA_PERFCNT1_HI _MMIO(0x91BC) #define OA_PERFCNT1_HI _MMIO(0x91BC)
...@@ -1038,6 +1137,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) ...@@ -1038,6 +1137,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define MICRO_BP3_COUNT_STATUS23 _MMIO(0x9838) #define MICRO_BP3_COUNT_STATUS23 _MMIO(0x9838)
#define MICRO_BP_FIRED_ARMED _MMIO(0x983C) #define MICRO_BP_FIRED_ARMED _MMIO(0x983C)
#define GEN12_OAA_DBG_REG _MMIO(0xdc44)
#define GEN12_OAG_OA_PESS _MMIO(0x2b2c)
#define GEN12_OAG_SPCTR_CNF _MMIO(0xdc40)
#define GDT_CHICKEN_BITS _MMIO(0x9840) #define GDT_CHICKEN_BITS _MMIO(0x9840)
#define GT_NOA_ENABLE 0x00000080 #define GT_NOA_ENABLE 0x00000080
......
// SPDX-License-Identifier: MIT
/*
* Copyright © 2018 Intel Corporation
*
* Autogenerated file by GPU Top : https://github.com/rib/gputop
* DO NOT EDIT manually!
*/
#include <linux/sysfs.h>
#include "i915_drv.h"
#include "i915_oa_tgl.h"
static const struct i915_oa_reg b_counter_config_test_oa[] = {
{ _MMIO(0xD920), 0x00000000 },
{ _MMIO(0xD900), 0x00000000 },
{ _MMIO(0xD904), 0xF0800000 },
{ _MMIO(0xD910), 0x00000000 },
{ _MMIO(0xD914), 0xF0800000 },
{ _MMIO(0xDC40), 0x00FF0000 },
{ _MMIO(0xD940), 0x00000004 },
{ _MMIO(0xD944), 0x0000FFFF },
{ _MMIO(0xDC00), 0x00000004 },
{ _MMIO(0xDC04), 0x0000FFFF },
{ _MMIO(0xD948), 0x00000003 },
{ _MMIO(0xD94C), 0x0000FFFF },
{ _MMIO(0xDC08), 0x00000003 },
{ _MMIO(0xDC0C), 0x0000FFFF },
{ _MMIO(0xD950), 0x00000007 },
{ _MMIO(0xD954), 0x0000FFFF },
{ _MMIO(0xDC10), 0x00000007 },
{ _MMIO(0xDC14), 0x0000FFFF },
{ _MMIO(0xD958), 0x00100002 },
{ _MMIO(0xD95C), 0x0000FFF7 },
{ _MMIO(0xDC18), 0x00100002 },
{ _MMIO(0xDC1C), 0x0000FFF7 },
{ _MMIO(0xD960), 0x00100002 },
{ _MMIO(0xD964), 0x0000FFCF },
{ _MMIO(0xDC20), 0x00100002 },
{ _MMIO(0xDC24), 0x0000FFCF },
{ _MMIO(0xD968), 0x00100082 },
{ _MMIO(0xD96C), 0x0000FFEF },
{ _MMIO(0xDC28), 0x00100082 },
{ _MMIO(0xDC2C), 0x0000FFEF },
{ _MMIO(0xD970), 0x001000C2 },
{ _MMIO(0xD974), 0x0000FFE7 },
{ _MMIO(0xDC30), 0x001000C2 },
{ _MMIO(0xDC34), 0x0000FFE7 },
{ _MMIO(0xD978), 0x00100001 },
{ _MMIO(0xD97C), 0x0000FFE7 },
{ _MMIO(0xDC38), 0x00100001 },
{ _MMIO(0xDC3C), 0x0000FFE7 },
};
static const struct i915_oa_reg flex_eu_config_test_oa[] = {
};
static const struct i915_oa_reg mux_config_test_oa[] = {
{ _MMIO(0x0D04), 0x00000200 },
{ _MMIO(0x9840), 0x00000000 },
{ _MMIO(0x9884), 0x00000000 },
{ _MMIO(0x9888), 0x280E0000 },
{ _MMIO(0x9888), 0x1E0E0147 },
{ _MMIO(0x9888), 0x180E0000 },
{ _MMIO(0x9888), 0x160E0000 },
{ _MMIO(0x9888), 0x1E0F1000 },
{ _MMIO(0x9888), 0x1E104000 },
{ _MMIO(0x9888), 0x2E020100 },
{ _MMIO(0x9888), 0x2C030004 },
{ _MMIO(0x9888), 0x38003000 },
{ _MMIO(0x9888), 0x1E0A8000 },
{ _MMIO(0x9884), 0x00000003 },
{ _MMIO(0x9888), 0x49110000 },
{ _MMIO(0x9888), 0x5D101400 },
{ _MMIO(0x9888), 0x1D140020 },
{ _MMIO(0x9888), 0x1D1103A3 },
{ _MMIO(0x9888), 0x01110000 },
{ _MMIO(0x9888), 0x61111000 },
{ _MMIO(0x9888), 0x1F128000 },
{ _MMIO(0x9888), 0x17100000 },
{ _MMIO(0x9888), 0x55100630 },
{ _MMIO(0x9888), 0x57100000 },
{ _MMIO(0x9888), 0x31100000 },
{ _MMIO(0x9884), 0x00000003 },
{ _MMIO(0x9888), 0x65100002 },
{ _MMIO(0x9884), 0x00000000 },
{ _MMIO(0x9888), 0x42000001 },
};
static ssize_t
show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
{
return sprintf(buf, "1\n");
}
void
i915_perf_load_test_config_tgl(struct drm_i915_private *dev_priv)
{
strlcpy(dev_priv->perf.test_config.uuid,
"80a833f0-2504-4321-8894-e9277844ce7b",
sizeof(dev_priv->perf.test_config.uuid));
dev_priv->perf.test_config.id = 1;
dev_priv->perf.test_config.mux_regs = mux_config_test_oa;
dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa);
dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa;
dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa);
dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa;
dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa);
dev_priv->perf.test_config.sysfs_metric.name = "80a833f0-2504-4321-8894-e9277844ce7b";
dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs;
dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr;
dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id";
dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444;
dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id;
}
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2018 Intel Corporation
*
* Autogenerated file by GPU Top : https://github.com/rib/gputop
* DO NOT EDIT manually!
*/
#ifndef __I915_OA_TGL_H__
#define __I915_OA_TGL_H__
struct drm_i915_private;
void i915_perf_load_test_config_tgl(struct drm_i915_private *dev_priv);
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment