Commit 884c4074 authored by Chris Wilson's avatar Chris Wilson

drm/i915/gt: Apply the CSB w/a for all

Since we expect to inline the csb_parse() routines, the w/a for the
stale CSB data on Tigerlake will be pulled into process_csb(), and so we
might as well simply reuse the logic for all, and so will hopefully
avoid any strange behaviour on Icelake that was not covered by our
previous w/a.

References: d8f50531 ("drm/i915/icl: Forcibly evict stale csb entries")
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Bruce Chang <yu.bruce.chang@intel.com>
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200915134923.30088-3-chris@chris-wilson.co.uk
parent 233c1ae3
...@@ -2496,25 +2496,11 @@ invalidate_csb_entries(const u64 *first, const u64 *last) ...@@ -2496,25 +2496,11 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
* bits 47-57: sw context id of the lrc the GT switched away from * bits 47-57: sw context id of the lrc the GT switched away from
* bits 58-63: sw counter of the lrc the GT switched away from * bits 58-63: sw counter of the lrc the GT switched away from
*/ */
static inline bool gen12_csb_parse(const u64 *csb) static inline bool gen12_csb_parse(const u64 csb)
{ {
bool ctx_away_valid; bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb));
bool new_queue; bool new_queue =
u64 entry; lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
/* HSD#22011248461 */
entry = READ_ONCE(*csb);
if (unlikely(entry == -1)) {
preempt_disable();
if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
GEM_WARN_ON("50us CSB timeout");
preempt_enable();
}
WRITE_ONCE(*(u64 *)csb, -1);
ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
new_queue =
lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
/* /*
* The context switch detail is not guaranteed to be 5 when a preemption * The context switch detail is not guaranteed to be 5 when a preemption
...@@ -2524,7 +2510,7 @@ static inline bool gen12_csb_parse(const u64 *csb) ...@@ -2524,7 +2510,7 @@ static inline bool gen12_csb_parse(const u64 *csb)
* would require some extra handling, but we don't support that. * would require some extra handling, but we don't support that.
*/ */
if (!ctx_away_valid || new_queue) { if (!ctx_away_valid || new_queue) {
GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry))); GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb)));
return true; return true;
} }
...@@ -2533,19 +2519,56 @@ static inline bool gen12_csb_parse(const u64 *csb) ...@@ -2533,19 +2519,56 @@ static inline bool gen12_csb_parse(const u64 *csb)
* context switch on an unsuccessful wait instruction since we always * context switch on an unsuccessful wait instruction since we always
* use polling mode. * use polling mode.
*/ */
GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry))); GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb)));
return false; return false;
} }
static inline bool gen8_csb_parse(const u64 *csb) static inline bool gen8_csb_parse(const u64 csb)
{
return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
}
static noinline u64 wa_csb_read(u64 * const csb)
{ {
return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); u64 entry;
preempt_disable();
if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
GEM_WARN_ON("50us CSB timeout");
preempt_enable();
return entry;
}
static inline u64 csb_read(u64 * const csb)
{
u64 entry = READ_ONCE(*csb);
/*
* Unfortunately, the GPU does not always serialise its write
* of the CSB entries before its write of the CSB pointer, at least
* from the perspective of the CPU, using what is known as a Global
* Observation Point. We may read a new CSB tail pointer, but then
* read the stale CSB entries, causing us to misinterpret the
* context-switch events, and eventually declare the GPU hung.
*
* icl:HSDES#1806554093
* tgl:HSDES#22011248461
*/
if (unlikely(entry == -1))
entry = wa_csb_read(csb);
/* Consume this entry so that we can spot its future reuse. */
WRITE_ONCE(*csb, -1);
/* ELSP is an implicit wmb() before the GPU wraps and overwrites csb */
return entry;
} }
static void process_csb(struct intel_engine_cs *engine) static void process_csb(struct intel_engine_cs *engine)
{ {
struct intel_engine_execlists * const execlists = &engine->execlists; struct intel_engine_execlists * const execlists = &engine->execlists;
const u64 * const buf = execlists->csb_status; u64 * const buf = execlists->csb_status;
const u8 num_entries = execlists->csb_size; const u8 num_entries = execlists->csb_size;
u8 head, tail; u8 head, tail;
...@@ -2603,6 +2626,7 @@ static void process_csb(struct intel_engine_cs *engine) ...@@ -2603,6 +2626,7 @@ static void process_csb(struct intel_engine_cs *engine)
rmb(); rmb();
do { do {
bool promote; bool promote;
u64 csb;
if (++head == num_entries) if (++head == num_entries)
head = 0; head = 0;
...@@ -2625,15 +2649,14 @@ static void process_csb(struct intel_engine_cs *engine) ...@@ -2625,15 +2649,14 @@ static void process_csb(struct intel_engine_cs *engine)
* status notifier. * status notifier.
*/ */
csb = csb_read(buf + head);
ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n", ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
head, head, upper_32_bits(csb), lower_32_bits(csb));
upper_32_bits(buf[head]),
lower_32_bits(buf[head]));
if (INTEL_GEN(engine->i915) >= 12) if (INTEL_GEN(engine->i915) >= 12)
promote = gen12_csb_parse(buf + head); promote = gen12_csb_parse(csb);
else else
promote = gen8_csb_parse(buf + head); promote = gen8_csb_parse(csb);
if (promote) { if (promote) {
struct i915_request * const *old = execlists->active; struct i915_request * const *old = execlists->active;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment