Commit a0e04715 authored by Chris Wilson's avatar Chris Wilson

drm/i915/gem: Make context persistence optional

Our existing behaviour is to allow contexts and their GPU requests to
persist past the point of closure until the requests are complete. This
allows clients to operate in a 'fire-and-forget' manner where they can
setup a rendering pipeline and hand it over to the display server and
immediately exit. As the rendering pipeline is kept alive until
completion, the display server (or other consumer) can use the results
in the future and present them to the user.

The compute model is a little different. They have little to no buffer
sharing between processes as their kernels tend to operate on a
continuous stream, feeding the results back to the client application.
These kernels operate for an indeterminate length of time, with many
clients wishing that the kernel was always running for as long as they
keep feeding in the data, i.e. acting like a DSP.

Not all clients want this persistent "desktop" behaviour and would prefer
that the contexts are cleaned up immediately upon closure. This ensures
that when clients are run without hangchecking (e.g. for compute kernels
of indeterminate runtime), any GPU hang or other unexpected workloads
are terminated with the process and does not continue to hog resources.

The default behaviour for new contexts is the legacy persistence mode,
as some desktop applications are dependent upon the existing behaviour.
New clients will have to opt in to immediate cleanup on context
closure. If the hangchecking modparam is disabled, so is persistent
context support -- all contexts will be terminated on closure.

We expect this behaviour change to be welcomed by compute users, who
have often been caught between a rock and a hard place. They disable
hangchecking to avoid their kernels being "unfairly" declared hung, but
have also experienced true hangs that the system was then unable to
clean up. Naturally, this leads to bug reports.

Testcase: igt/gem_ctx_persistence
Link: https://github.com/intel/compute-runtime/pull/228Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
Reviewed-by: default avatarJon Bloomfield <jon.bloomfield@intel.com>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: default avatarJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
Acked-by: default avatarJason Ekstrand <jason@jlekstrand.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20191029202338.8841-1-chris@chris-wilson.co.uk
parent 54516464
...@@ -438,12 +438,39 @@ static void context_close(struct i915_gem_context *ctx) ...@@ -438,12 +438,39 @@ static void context_close(struct i915_gem_context *ctx)
* case we opt to forcibly kill off all remaining requests on * case we opt to forcibly kill off all remaining requests on
* context close. * context close.
*/ */
if (!i915_modparams.enable_hangcheck) if (!i915_gem_context_is_persistent(ctx) ||
!i915_modparams.enable_hangcheck)
kill_context(ctx); kill_context(ctx);
i915_gem_context_put(ctx); i915_gem_context_put(ctx);
} }
static int __context_set_persistence(struct i915_gem_context *ctx, bool state)
{
if (i915_gem_context_is_persistent(ctx) == state)
return 0;
if (state) {
/*
* Only contexts that are short-lived [that will expire or be
* reset] are allowed to survive past termination. We require
* hangcheck to ensure that the persistent requests are healthy.
*/
if (!i915_modparams.enable_hangcheck)
return -EINVAL;
i915_gem_context_set_persistence(ctx);
} else {
/* To cancel a context we use "preempt-to-idle" */
if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
return -ENODEV;
i915_gem_context_clear_persistence(ctx);
}
return 0;
}
static struct i915_gem_context * static struct i915_gem_context *
__create_context(struct drm_i915_private *i915) __create_context(struct drm_i915_private *i915)
{ {
...@@ -478,6 +505,7 @@ __create_context(struct drm_i915_private *i915) ...@@ -478,6 +505,7 @@ __create_context(struct drm_i915_private *i915)
i915_gem_context_set_bannable(ctx); i915_gem_context_set_bannable(ctx);
i915_gem_context_set_recoverable(ctx); i915_gem_context_set_recoverable(ctx);
__context_set_persistence(ctx, true /* cgroup hook? */);
for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
...@@ -634,6 +662,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) ...@@ -634,6 +662,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
return ctx; return ctx;
i915_gem_context_clear_bannable(ctx); i915_gem_context_clear_bannable(ctx);
i915_gem_context_set_persistence(ctx);
ctx->sched.priority = I915_USER_PRIORITY(prio); ctx->sched.priority = I915_USER_PRIORITY(prio);
GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
...@@ -1744,6 +1773,16 @@ get_engines(struct i915_gem_context *ctx, ...@@ -1744,6 +1773,16 @@ get_engines(struct i915_gem_context *ctx,
return err; return err;
} }
static int
set_persistence(struct i915_gem_context *ctx,
const struct drm_i915_gem_context_param *args)
{
if (args->size)
return -EINVAL;
return __context_set_persistence(ctx, args->value);
}
static int ctx_setparam(struct drm_i915_file_private *fpriv, static int ctx_setparam(struct drm_i915_file_private *fpriv,
struct i915_gem_context *ctx, struct i915_gem_context *ctx,
struct drm_i915_gem_context_param *args) struct drm_i915_gem_context_param *args)
...@@ -1821,6 +1860,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ...@@ -1821,6 +1860,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
ret = set_engines(ctx, args); ret = set_engines(ctx, args);
break; break;
case I915_CONTEXT_PARAM_PERSISTENCE:
ret = set_persistence(ctx, args);
break;
case I915_CONTEXT_PARAM_BAN_PERIOD: case I915_CONTEXT_PARAM_BAN_PERIOD:
default: default:
ret = -EINVAL; ret = -EINVAL;
...@@ -2273,6 +2316,11 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, ...@@ -2273,6 +2316,11 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
ret = get_engines(ctx, args); ret = get_engines(ctx, args);
break; break;
case I915_CONTEXT_PARAM_PERSISTENCE:
args->size = 0;
args->value = i915_gem_context_is_persistent(ctx);
break;
case I915_CONTEXT_PARAM_BAN_PERIOD: case I915_CONTEXT_PARAM_BAN_PERIOD:
default: default:
ret = -EINVAL; ret = -EINVAL;
......
...@@ -76,6 +76,21 @@ static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *c ...@@ -76,6 +76,21 @@ static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *c
clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags); clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags);
} }
static inline bool i915_gem_context_is_persistent(const struct i915_gem_context *ctx)
{
return test_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
}
static inline void i915_gem_context_set_persistence(struct i915_gem_context *ctx)
{
set_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
}
static inline void i915_gem_context_clear_persistence(struct i915_gem_context *ctx)
{
clear_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
}
static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx) static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx)
{ {
return test_bit(CONTEXT_BANNED, &ctx->flags); return test_bit(CONTEXT_BANNED, &ctx->flags);
......
...@@ -137,6 +137,7 @@ struct i915_gem_context { ...@@ -137,6 +137,7 @@ struct i915_gem_context {
#define UCONTEXT_NO_ERROR_CAPTURE 1 #define UCONTEXT_NO_ERROR_CAPTURE 1
#define UCONTEXT_BANNABLE 2 #define UCONTEXT_BANNABLE 2
#define UCONTEXT_RECOVERABLE 3 #define UCONTEXT_RECOVERABLE 3
#define UCONTEXT_PERSISTENCE 4
/** /**
* @flags: small set of booleans * @flags: small set of booleans
......
...@@ -22,6 +22,8 @@ mock_context(struct drm_i915_private *i915, ...@@ -22,6 +22,8 @@ mock_context(struct drm_i915_private *i915,
INIT_LIST_HEAD(&ctx->link); INIT_LIST_HEAD(&ctx->link);
ctx->i915 = i915; ctx->i915 = i915;
i915_gem_context_set_persistence(ctx);
mutex_init(&ctx->engines_mutex); mutex_init(&ctx->engines_mutex);
e = default_engines(ctx); e = default_engines(ctx);
if (IS_ERR(e)) if (IS_ERR(e))
......
...@@ -1572,6 +1572,21 @@ struct drm_i915_gem_context_param { ...@@ -1572,6 +1572,21 @@ struct drm_i915_gem_context_param {
* i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND) * i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
*/ */
#define I915_CONTEXT_PARAM_ENGINES 0xa #define I915_CONTEXT_PARAM_ENGINES 0xa
/*
* I915_CONTEXT_PARAM_PERSISTENCE:
*
* Allow the context and active rendering to survive the process until
* completion. Persistence allows fire-and-forget clients to queue up a
* bunch of work, hand the output over to a display server and then quit.
* If the context is marked as not persistent, upon closing (either via
* an explicit DRM_I915_GEM_CONTEXT_DESTROY or implicitly from file closure
* or process termination), the context and any outstanding requests will be
* cancelled (and exported fences for cancelled requests marked as -EIO).
*
* By default, new contexts allow persistence.
*/
#define I915_CONTEXT_PARAM_PERSISTENCE 0xb
/* Must be kept compact -- no holes and well documented */ /* Must be kept compact -- no holes and well documented */
__u64 value; __u64 value;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment