Commit 46c507f0 authored by Matt Roper's avatar Matt Roper

drm/i915/gt: Always use MCR functions on multicast registers

Rather than relying on the implicit behavior of intel_uncore_*()
functions, let's always use the intel_gt_mcr_*() functions to operate on
multicast/replicated registers.

v2:
 - Add TLB invalidation registers

v3:
 - Switch more uncore operations in mmio_invalidate_full() to MCR
   operations for Xe_HP.  (Bala)

Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: default avatarMatt Roper <matthew.d.roper@intel.com>
Reviewed-by: default avatarBalasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221014230239.1023689-10-matthew.d.roper@intel.com
parent a9e69428
...@@ -1017,6 +1017,32 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, ...@@ -1017,6 +1017,32 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
return rb; return rb;
} }
/*
* HW architecture suggest typical invalidation time at 40us,
* with pessimistic cases up to 100us and a recommendation to
* cap at 1ms. We go a bit higher just in case.
*/
#define TLB_INVAL_TIMEOUT_US 100
#define TLB_INVAL_TIMEOUT_MS 4
/*
* On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
* but are now considered MCR registers. Since they exist within a GAM range,
* the primary instance of the register rolls up the status from each unit.
*/
static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
{
if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
return intel_gt_mcr_wait_for_reg_fw(gt, rb.reg, rb.bit, 0,
TLB_INVAL_TIMEOUT_US,
TLB_INVAL_TIMEOUT_MS);
else
return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0,
TLB_INVAL_TIMEOUT_US,
TLB_INVAL_TIMEOUT_MS,
NULL);
}
static void mmio_invalidate_full(struct intel_gt *gt) static void mmio_invalidate_full(struct intel_gt *gt)
{ {
static const i915_reg_t gen8_regs[] = { static const i915_reg_t gen8_regs[] = {
...@@ -1048,7 +1074,7 @@ static void mmio_invalidate_full(struct intel_gt *gt) ...@@ -1048,7 +1074,7 @@ static void mmio_invalidate_full(struct intel_gt *gt)
unsigned int num = 0; unsigned int num = 0;
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
regs = xehp_regs; regs = NULL;
num = ARRAY_SIZE(xehp_regs); num = ARRAY_SIZE(xehp_regs);
} else if (GRAPHICS_VER(i915) == 12) { } else if (GRAPHICS_VER(i915) == 12) {
regs = gen12_regs; regs = gen12_regs;
...@@ -1075,11 +1101,17 @@ static void mmio_invalidate_full(struct intel_gt *gt) ...@@ -1075,11 +1101,17 @@ static void mmio_invalidate_full(struct intel_gt *gt)
if (!intel_engine_pm_is_awake(engine)) if (!intel_engine_pm_is_awake(engine))
continue; continue;
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
if (!i915_mmio_reg_offset(rb.reg)) intel_gt_mcr_multicast_write_fw(gt,
continue; xehp_regs[engine->class],
BIT(engine->instance));
} else {
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
if (!i915_mmio_reg_offset(rb.reg))
continue;
intel_uncore_write_fw(uncore, rb.reg, rb.bit); intel_uncore_write_fw(uncore, rb.reg, rb.bit);
}
awake |= engine->mask; awake |= engine->mask;
} }
...@@ -1099,22 +1131,12 @@ static void mmio_invalidate_full(struct intel_gt *gt) ...@@ -1099,22 +1131,12 @@ static void mmio_invalidate_full(struct intel_gt *gt)
for_each_engine_masked(engine, gt, awake, tmp) { for_each_engine_masked(engine, gt, awake, tmp) {
struct reg_and_bit rb; struct reg_and_bit rb;
/*
* HW architecture suggest typical invalidation time at 40us,
* with pessimistic cases up to 100us and a recommendation to
* cap at 1ms. We go a bit higher just in case.
*/
const unsigned int timeout_us = 100;
const unsigned int timeout_ms = 4;
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
if (__intel_wait_for_register_fw(uncore,
rb.reg, rb.bit, 0, if (wait_for_invalidate(gt, rb))
timeout_us, timeout_ms,
NULL))
drm_err_ratelimited(&gt->i915->drm, drm_err_ratelimited(&gt->i915->drm,
"%s TLB invalidation did not complete in %ums!\n", "%s TLB invalidation did not complete in %ums!\n",
engine->name, timeout_ms); engine->name, TLB_INVAL_TIMEOUT_MS);
} }
/* /*
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include "intel_engine.h" #include "intel_engine.h"
#include "intel_gt.h" #include "intel_gt.h"
#include "intel_gt_mcr.h"
#include "intel_gt_regs.h" #include "intel_gt_regs.h"
#include "intel_mocs.h" #include "intel_mocs.h"
#include "intel_ring.h" #include "intel_ring.h"
...@@ -609,17 +610,17 @@ static u32 l3cc_combine(u16 low, u16 high) ...@@ -609,17 +610,17 @@ static u32 l3cc_combine(u16 low, u16 high)
0; \ 0; \
i++) i++)
static void init_l3cc_table(struct intel_uncore *uncore, static void init_l3cc_table(struct intel_gt *gt,
const struct drm_i915_mocs_table *table) const struct drm_i915_mocs_table *table)
{ {
unsigned int i; unsigned int i;
u32 l3cc; u32 l3cc;
for_each_l3cc(l3cc, table, i) for_each_l3cc(l3cc, table, i)
if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 50)) if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
intel_uncore_write_fw(uncore, XEHP_LNCFCMOCS(i), l3cc); intel_gt_mcr_multicast_write_fw(gt, XEHP_LNCFCMOCS(i), l3cc);
else else
intel_uncore_write_fw(uncore, GEN9_LNCFCMOCS(i), l3cc); intel_uncore_write_fw(gt->uncore, GEN9_LNCFCMOCS(i), l3cc);
} }
void intel_mocs_init_engine(struct intel_engine_cs *engine) void intel_mocs_init_engine(struct intel_engine_cs *engine)
...@@ -639,7 +640,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) ...@@ -639,7 +640,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
init_mocs_table(engine, &table); init_mocs_table(engine, &table);
if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS) if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS)
init_l3cc_table(engine->uncore, &table); init_l3cc_table(engine->gt, &table);
} }
static u32 global_mocs_offset(void) static u32 global_mocs_offset(void)
...@@ -675,7 +676,7 @@ void intel_mocs_init(struct intel_gt *gt) ...@@ -675,7 +676,7 @@ void intel_mocs_init(struct intel_gt *gt)
* memory transactions including guc transactions * memory transactions including guc transactions
*/ */
if (flags & HAS_RENDER_L3CC) if (flags & HAS_RENDER_L3CC)
init_l3cc_table(gt->uncore, &table); init_l3cc_table(gt, &table);
} }
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
......
...@@ -10,12 +10,15 @@ ...@@ -10,12 +10,15 @@
*/ */
#include "gt/intel_gt.h" #include "gt/intel_gt.h"
#include "gt/intel_gt_mcr.h"
#include "gt/intel_gt_regs.h" #include "gt/intel_gt_regs.h"
#include "intel_guc_fw.h" #include "intel_guc_fw.h"
#include "i915_drv.h" #include "i915_drv.h"
static void guc_prepare_xfer(struct intel_uncore *uncore) static void guc_prepare_xfer(struct intel_gt *gt)
{ {
struct intel_uncore *uncore = gt->uncore;
u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC | u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC |
GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA | GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA |
GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA | GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA |
...@@ -35,8 +38,9 @@ static void guc_prepare_xfer(struct intel_uncore *uncore) ...@@ -35,8 +38,9 @@ static void guc_prepare_xfer(struct intel_uncore *uncore)
if (GRAPHICS_VER(uncore->i915) == 9) { if (GRAPHICS_VER(uncore->i915) == 9) {
/* DOP Clock Gating Enable for GuC clocks */ /* DOP Clock Gating Enable for GuC clocks */
intel_uncore_rmw(uncore, GEN8_MISCCPCTL, intel_gt_mcr_multicast_write(gt, GEN8_MISCCPCTL,
0, GEN8_DOP_CLOCK_GATE_GUC_ENABLE); GEN8_DOP_CLOCK_GATE_GUC_ENABLE |
intel_gt_mcr_read_any(gt, GEN8_MISCCPCTL));
/* allows for 5us (in 10ns units) before GT can go to RC6 */ /* allows for 5us (in 10ns units) before GT can go to RC6 */
intel_uncore_write(uncore, GUC_ARAT_C6DIS, 0x1FF); intel_uncore_write(uncore, GUC_ARAT_C6DIS, 0x1FF);
...@@ -168,7 +172,7 @@ int intel_guc_fw_upload(struct intel_guc *guc) ...@@ -168,7 +172,7 @@ int intel_guc_fw_upload(struct intel_guc *guc)
struct intel_uncore *uncore = gt->uncore; struct intel_uncore *uncore = gt->uncore;
int ret; int ret;
guc_prepare_xfer(uncore); guc_prepare_xfer(gt);
/* /*
* Note that GuC needs the CSS header plus uKernel code to be copied * Note that GuC needs the CSS header plus uKernel code to be copied
......
...@@ -30,6 +30,8 @@ ...@@ -30,6 +30,8 @@
#include "display/skl_watermark.h" #include "display/skl_watermark.h"
#include "gt/intel_engine_regs.h" #include "gt/intel_engine_regs.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_mcr.h"
#include "gt/intel_gt_regs.h" #include "gt/intel_gt_regs.h"
#include "i915_drv.h" #include "i915_drv.h"
...@@ -4325,22 +4327,22 @@ static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv, ...@@ -4325,22 +4327,22 @@ static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
u32 val; u32 val;
/* WaTempDisableDOPClkGating:bdw */ /* WaTempDisableDOPClkGating:bdw */
misccpctl = intel_uncore_read(&dev_priv->uncore, GEN8_MISCCPCTL); misccpctl = intel_gt_mcr_multicast_rmw(to_gt(dev_priv), GEN8_MISCCPCTL,
intel_uncore_write(&dev_priv->uncore, GEN8_MISCCPCTL, misccpctl & ~GEN8_DOP_CLOCK_GATE_ENABLE); GEN8_DOP_CLOCK_GATE_ENABLE, 0);
val = intel_uncore_read(&dev_priv->uncore, GEN8_L3SQCREG1); val = intel_gt_mcr_read_any(to_gt(dev_priv), GEN8_L3SQCREG1);
val &= ~L3_PRIO_CREDITS_MASK; val &= ~L3_PRIO_CREDITS_MASK;
val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits); val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits);
val |= L3_HIGH_PRIO_CREDITS(high_prio_credits); val |= L3_HIGH_PRIO_CREDITS(high_prio_credits);
intel_uncore_write(&dev_priv->uncore, GEN8_L3SQCREG1, val); intel_gt_mcr_multicast_write(to_gt(dev_priv), GEN8_L3SQCREG1, val);
/* /*
* Wait at least 100 clocks before re-enabling clock gating. * Wait at least 100 clocks before re-enabling clock gating.
* See the definition of L3SQCREG1 in BSpec. * See the definition of L3SQCREG1 in BSpec.
*/ */
intel_uncore_posting_read(&dev_priv->uncore, GEN8_L3SQCREG1); intel_gt_mcr_read_any(to_gt(dev_priv), GEN8_L3SQCREG1);
udelay(1); udelay(1);
intel_uncore_write(&dev_priv->uncore, GEN8_MISCCPCTL, misccpctl); intel_gt_mcr_multicast_write(to_gt(dev_priv), GEN8_MISCCPCTL, misccpctl);
} }
static void icl_init_clock_gating(struct drm_i915_private *dev_priv) static void icl_init_clock_gating(struct drm_i915_private *dev_priv)
...@@ -4500,9 +4502,8 @@ static void skl_init_clock_gating(struct drm_i915_private *dev_priv) ...@@ -4500,9 +4502,8 @@ static void skl_init_clock_gating(struct drm_i915_private *dev_priv)
gen9_init_clock_gating(dev_priv); gen9_init_clock_gating(dev_priv);
/* WaDisableDopClockGating:skl */ /* WaDisableDopClockGating:skl */
intel_uncore_write(&dev_priv->uncore, GEN8_MISCCPCTL, intel_gt_mcr_multicast_rmw(to_gt(dev_priv), GEN8_MISCCPCTL,
intel_uncore_read(&dev_priv->uncore, GEN8_MISCCPCTL) & GEN8_DOP_CLOCK_GATE_ENABLE, 0);
~GEN8_DOP_CLOCK_GATE_ENABLE);
/* WAC6entrylatency:skl */ /* WAC6entrylatency:skl */
intel_uncore_write(&dev_priv->uncore, FBC_LLC_READ_CTRL, intel_uncore_read(&dev_priv->uncore, FBC_LLC_READ_CTRL) | intel_uncore_write(&dev_priv->uncore, FBC_LLC_READ_CTRL, intel_uncore_read(&dev_priv->uncore, FBC_LLC_READ_CTRL) |
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment