Commit 302e55d7 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Report if an unbannable context is involved in a GPU hang

Since unbannable contexts are special and supposed not to be causing GPU
hangs in the first place, make it clear when they are implicated in said
hang. In practice, most unbannable contexts are those created by igt
for the express purpose of throwing untold thousands of hangs at the GPU
and wish to keep doing so to finish the test. Normally they are cleaned
up, but it's when they or the other unbannable kernel contexts stay
stuck in an erroneous state that we need to worry and so need
highlighting.
Suggested-by: default avatarMika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180205094139.10671-1-chris@chris-wilson.co.ukReviewed-by: default avatarMika Kuoppala <mika.kuoppala@linux.intel.com>
parent 55ef72f2
...@@ -555,6 +555,7 @@ struct i915_gpu_state { ...@@ -555,6 +555,7 @@ struct i915_gpu_state {
int ban_score; int ban_score;
int active; int active;
int guilty; int guilty;
bool bannable;
} context; } context;
struct drm_i915_error_object { struct drm_i915_error_object {
......
...@@ -396,6 +396,11 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, ...@@ -396,6 +396,11 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m,
ee->instdone.row[slice][subslice]); ee->instdone.row[slice][subslice]);
} }
static const char *bannable(const struct drm_i915_error_context *ctx)
{
return ctx->bannable ? "" : " (unbannable)";
}
static void error_print_request(struct drm_i915_error_state_buf *m, static void error_print_request(struct drm_i915_error_state_buf *m,
const char *prefix, const char *prefix,
const struct drm_i915_error_request *erq) const struct drm_i915_error_request *erq)
...@@ -414,9 +419,10 @@ static void error_print_context(struct drm_i915_error_state_buf *m, ...@@ -414,9 +419,10 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
const char *header, const char *header,
const struct drm_i915_error_context *ctx) const struct drm_i915_error_context *ctx)
{ {
err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d guilty %d active %d\n", err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d%s guilty %d active %d\n",
header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id,
ctx->priority, ctx->ban_score, ctx->guilty, ctx->active); ctx->priority, ctx->ban_score, bannable(ctx),
ctx->guilty, ctx->active);
} }
static void error_print_engine(struct drm_i915_error_state_buf *m, static void error_print_engine(struct drm_i915_error_state_buf *m,
...@@ -644,11 +650,12 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, ...@@ -644,11 +650,12 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
for (i = 0; i < ARRAY_SIZE(error->engine); i++) { for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
if (error->engine[i].hangcheck_stalled && if (error->engine[i].hangcheck_stalled &&
error->engine[i].context.pid) { error->engine[i].context.pid) {
err_printf(m, "Active process (on ring %s): %s [%d], score %d\n", err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n",
engine_name(m->i915, i), engine_name(m->i915, i),
error->engine[i].context.comm, error->engine[i].context.comm,
error->engine[i].context.pid, error->engine[i].context.pid,
error->engine[i].context.ban_score); error->engine[i].context.ban_score,
bannable(&error->engine[i].context));
} }
} }
err_printf(m, "Reset count: %u\n", error->reset_count); err_printf(m, "Reset count: %u\n", error->reset_count);
...@@ -736,12 +743,13 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, ...@@ -736,12 +743,13 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
if (obj) { if (obj) {
err_puts(m, dev_priv->engine[i]->name); err_puts(m, dev_priv->engine[i]->name);
if (ee->context.pid) if (ee->context.pid)
err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d)", err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d%s)",
ee->context.comm, ee->context.comm,
ee->context.pid, ee->context.pid,
ee->context.handle, ee->context.handle,
ee->context.hw_id, ee->context.hw_id,
ee->context.ban_score); ee->context.ban_score,
bannable(&ee->context));
err_printf(m, " --- gtt_offset = 0x%08x %08x\n", err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
upper_32_bits(obj->gtt_offset), upper_32_bits(obj->gtt_offset),
lower_32_bits(obj->gtt_offset)); lower_32_bits(obj->gtt_offset));
...@@ -1383,6 +1391,7 @@ static void record_context(struct drm_i915_error_context *e, ...@@ -1383,6 +1391,7 @@ static void record_context(struct drm_i915_error_context *e,
e->hw_id = ctx->hw_id; e->hw_id = ctx->hw_id;
e->priority = ctx->priority; e->priority = ctx->priority;
e->ban_score = atomic_read(&ctx->ban_score); e->ban_score = atomic_read(&ctx->ban_score);
e->bannable = i915_gem_context_is_bannable(ctx);
e->guilty = atomic_read(&ctx->guilty_count); e->guilty = atomic_read(&ctx->guilty_count);
e->active = atomic_read(&ctx->active_count); e->active = atomic_read(&ctx->active_count);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment