Commit 7e3b8737 authored by Daniel Vetter's avatar Daniel Vetter

drm/i915: dump even more into the error_state

Chris Wilson and me have again stared at funny error states and it's
been pretty clear from the start that something was seriously amiss.
The seqnos last seen by the cpu were a few hundred behind those that
the gpu could have possibly emitted last before it died ...

Chris now tracked it down (hopefully, definit verdict's still out),
but in hindsight we'd have found the bug by simply dumping the cpu
side tracking of the ring head and tail registers.

Fix this and prevent an identical time-waster in the future.

Because the hangs always involved semaphores in one way or another,
we've tried to dump the mbox registers, but couldn't find any
inconsistencies. Still, dump them too.
Reviewed-and-wanted-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarEugeni Dodonov <eugeni.dodonov@intel.com>
Signed-Off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent ff240199
...@@ -721,8 +721,14 @@ static void i915_ring_error_state(struct seq_file *m, ...@@ -721,8 +721,14 @@ static void i915_ring_error_state(struct seq_file *m,
if (INTEL_INFO(dev)->gen >= 6) { if (INTEL_INFO(dev)->gen >= 6) {
seq_printf(m, " FADDR: 0x%08x\n", error->faddr[ring]); seq_printf(m, " FADDR: 0x%08x\n", error->faddr[ring]);
seq_printf(m, " FAULT_REG: 0x%08x\n", error->fault_reg[ring]); seq_printf(m, " FAULT_REG: 0x%08x\n", error->fault_reg[ring]);
seq_printf(m, " SYNC_0: 0x%08x\n",
error->semaphore_mboxes[ring][0]);
seq_printf(m, " SYNC_1: 0x%08x\n",
error->semaphore_mboxes[ring][1]);
} }
seq_printf(m, " seqno: 0x%08x\n", error->seqno[ring]); seq_printf(m, " seqno: 0x%08x\n", error->seqno[ring]);
seq_printf(m, " ring->head: 0x%08x\n", error->cpu_ring_head[ring]);
seq_printf(m, " ring->tail: 0x%08x\n", error->cpu_ring_tail[ring]);
} }
static int i915_error_state(struct seq_file *m, void *unused) static int i915_error_state(struct seq_file *m, void *unused)
......
...@@ -159,6 +159,10 @@ struct drm_i915_error_state { ...@@ -159,6 +159,10 @@ struct drm_i915_error_state {
u32 ipehr[I915_NUM_RINGS]; u32 ipehr[I915_NUM_RINGS];
u32 instdone[I915_NUM_RINGS]; u32 instdone[I915_NUM_RINGS];
u32 acthd[I915_NUM_RINGS]; u32 acthd[I915_NUM_RINGS];
u32 semaphore_mboxes[I915_NUM_RINGS][I915_NUM_RINGS - 1];
/* our own tracking of ring head and tail */
u32 cpu_ring_head[I915_NUM_RINGS];
u32 cpu_ring_tail[I915_NUM_RINGS];
u32 error; /* gen6+ */ u32 error; /* gen6+ */
u32 instpm[I915_NUM_RINGS]; u32 instpm[I915_NUM_RINGS];
u32 instps[I915_NUM_RINGS]; u32 instps[I915_NUM_RINGS];
......
...@@ -903,6 +903,10 @@ static void i915_record_ring_state(struct drm_device *dev, ...@@ -903,6 +903,10 @@ static void i915_record_ring_state(struct drm_device *dev,
if (INTEL_INFO(dev)->gen >= 6) { if (INTEL_INFO(dev)->gen >= 6) {
error->faddr[ring->id] = I915_READ(RING_DMA_FADD(ring->mmio_base)); error->faddr[ring->id] = I915_READ(RING_DMA_FADD(ring->mmio_base));
error->fault_reg[ring->id] = I915_READ(RING_FAULT_REG(ring)); error->fault_reg[ring->id] = I915_READ(RING_FAULT_REG(ring));
error->semaphore_mboxes[ring->id][0]
= I915_READ(RING_SYNC_0(ring->mmio_base));
error->semaphore_mboxes[ring->id][1]
= I915_READ(RING_SYNC_1(ring->mmio_base));
} }
if (INTEL_INFO(dev)->gen >= 4) { if (INTEL_INFO(dev)->gen >= 4) {
...@@ -925,6 +929,9 @@ static void i915_record_ring_state(struct drm_device *dev, ...@@ -925,6 +929,9 @@ static void i915_record_ring_state(struct drm_device *dev,
error->acthd[ring->id] = intel_ring_get_active_head(ring); error->acthd[ring->id] = intel_ring_get_active_head(ring);
error->head[ring->id] = I915_READ_HEAD(ring); error->head[ring->id] = I915_READ_HEAD(ring);
error->tail[ring->id] = I915_READ_TAIL(ring); error->tail[ring->id] = I915_READ_TAIL(ring);
error->cpu_ring_head[ring->id] = ring->head;
error->cpu_ring_tail[ring->id] = ring->tail;
} }
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment