Commit cec82816 authored by Vinay Belgaumkar's avatar Vinay Belgaumkar Committed by John Harrison

drm/i915/guc: Use context hints for GT frequency

Allow user to provide a low latency context hint. When set, KMD
sends a hint to GuC which results in special handling for this
context. SLPC will ramp the GT frequency aggressively every time
it switches to this context. The down freq threshold will also be
lower so GuC will ramp down the GT freq for this context more slowly.
We also disable waitboost for this context as that will interfere with
the strategy.

We need to enable the use of SLPC Compute strategy during init, but
it will apply only to contexts that set this bit during context
creation.

Userland can check whether this feature is supported using a new param-
I915_PARAM_HAS_CONTEXT_FREQ_HINT. This flag is true for all guc submission
enabled platforms as they use SLPC for frequency management.

The Mesa usage model for this flag is here -
https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint

v2: Rename flags as per review suggestions (Rodrigo, Tvrtko).
Also, use flag bits in intel_context as it allows finer control for
toggling per engine if needed (Tvrtko).

v3: Minor review comments (Tvrtko)

v4: Update comment (Sushma)

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Sushma Venkatesh Reddy <sushma.venkatesh.reddy@intel.com>
Reviewed-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: default avatarIvan Briano <ivan.briano@intel.com>
Signed-off-by: default avatarVinay Belgaumkar <vinay.belgaumkar@intel.com>
Signed-off-by: default avatarJohn Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240306012759.204938-1-vinay.belgaumkar@intel.com
parent 71271280
......@@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv,
struct i915_gem_proto_context *pc,
struct drm_i915_gem_context_param *args)
{
struct drm_i915_private *i915 = fpriv->i915;
int ret = 0;
switch (args->param) {
......@@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv,
pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
break;
case I915_CONTEXT_PARAM_LOW_LATENCY:
if (intel_uc_uses_guc_submission(&to_gt(i915)->uc))
pc->user_flags |= BIT(UCONTEXT_LOW_LATENCY);
else
ret = -EINVAL;
break;
case I915_CONTEXT_PARAM_RECOVERABLE:
if (args->size)
ret = -EINVAL;
......@@ -992,6 +1000,9 @@ static int intel_context_set_gem(struct intel_context *ce,
if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS))
ret = intel_context_reconfigure_sseu(ce, sseu);
if (test_bit(UCONTEXT_LOW_LATENCY, &ctx->user_flags))
__set_bit(CONTEXT_LOW_LATENCY, &ce->flags);
return ret;
}
......@@ -1630,6 +1641,9 @@ i915_gem_create_context(struct drm_i915_private *i915,
if (vm)
ctx->vm = vm;
/* Assign early so intel_context_set_gem can access these flags */
ctx->user_flags = pc->user_flags;
mutex_init(&ctx->engines_mutex);
if (pc->num_user_engines >= 0) {
i915_gem_context_set_user_engines(ctx);
......@@ -1652,8 +1666,6 @@ i915_gem_create_context(struct drm_i915_private *i915,
* is no remap info, it will be a NOP. */
ctx->remap_slice = ALL_L3_SLICES(i915);
ctx->user_flags = pc->user_flags;
for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
......
......@@ -338,6 +338,7 @@ struct i915_gem_context {
#define UCONTEXT_BANNABLE 2
#define UCONTEXT_RECOVERABLE 3
#define UCONTEXT_PERSISTENCE 4
#define UCONTEXT_LOW_LATENCY 5
/**
* @flags: small set of booleans
......
......@@ -130,6 +130,7 @@ struct intel_context {
#define CONTEXT_PERMA_PIN 11
#define CONTEXT_IS_PARKING 12
#define CONTEXT_EXITING 13
#define CONTEXT_LOW_LATENCY 14
struct {
u64 timeout_us;
......
......@@ -1013,6 +1013,10 @@ void intel_rps_boost(struct i915_request *rq)
if (i915_request_signaled(rq) || i915_request_has_waitboost(rq))
return;
/* Waitboost is not needed for contexts marked with a Freq hint */
if (test_bit(CONTEXT_LOW_LATENCY, &rq->context->flags))
return;
/* Serializes with i915_request_retire() */
if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) {
struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
......
......@@ -207,6 +207,27 @@ struct slpc_shared_data {
u8 reserved_mode_definition[4096];
} __packed;
struct slpc_context_frequency_request {
u32 frequency_request:16;
u32 reserved:12;
u32 is_compute:1;
u32 ignore_busyness:1;
u32 is_minimum:1;
u32 is_predefined:1;
} __packed;
#define SLPC_CTX_FREQ_REQ_IS_COMPUTE REG_BIT(28)
struct slpc_optimized_strategies {
u32 compute:1;
u32 async_flip:1;
u32 media:1;
u32 vsync_flip:1;
u32 reserved:28;
} __packed;
#define SLPC_OPTIMIZED_STRATEGY_COMPUTE REG_BIT(0)
/**
* DOC: SLPC H2G MESSAGE FORMAT
*
......
......@@ -537,6 +537,20 @@ int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val)
return ret;
}
int intel_guc_slpc_set_strategy(struct intel_guc_slpc *slpc, u32 val)
{
struct drm_i915_private *i915 = slpc_to_i915(slpc);
intel_wakeref_t wakeref;
int ret = 0;
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
ret = slpc_set_param(slpc,
SLPC_PARAM_STRATEGIES,
val);
return ret;
}
int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val)
{
struct drm_i915_private *i915 = slpc_to_i915(slpc);
......@@ -711,6 +725,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
/* Set cached media freq ratio mode */
intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode);
/* Enable SLPC Optimized Strategy for compute */
intel_guc_slpc_set_strategy(slpc, SLPC_OPTIMIZED_STRATEGY_COMPUTE);
return 0;
}
......
......@@ -45,5 +45,6 @@ void intel_guc_pm_intrmsk_enable(struct intel_gt *gt);
void intel_guc_slpc_boost(struct intel_guc_slpc *slpc);
void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc);
int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val);
int intel_guc_slpc_set_strategy(struct intel_guc_slpc *slpc, u32 val);
#endif
......@@ -2645,6 +2645,7 @@ MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY)
MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY)
MAKE_CONTEXT_POLICY_ADD(slpc_ctx_freq_req, SLPM_GT_FREQUENCY)
#undef MAKE_CONTEXT_POLICY_ADD
......@@ -2664,6 +2665,7 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
struct context_policy policy;
u32 execution_quantum;
u32 preemption_timeout;
u32 slpc_ctx_freq_req = 0;
unsigned long flags;
int ret;
......@@ -2675,11 +2677,15 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
execution_quantum = engine->props.timeslice_duration_ms * 1000;
preemption_timeout = engine->props.preempt_timeout_ms * 1000;
if (ce && (ce->flags & BIT(CONTEXT_LOW_LATENCY)))
slpc_ctx_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE;
__guc_context_policy_start_klv(&policy, ce->guc_id.id);
__guc_context_policy_add_priority(&policy, ce->guc_state.prio);
__guc_context_policy_add_execution_quantum(&policy, execution_quantum);
__guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
__guc_context_policy_add_slpc_ctx_freq_req(&policy, slpc_ctx_freq_req);
if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
__guc_context_policy_add_preempt_to_idle(&policy, 1);
......
......@@ -155,6 +155,12 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
*/
value = 1;
break;
case I915_PARAM_HAS_CONTEXT_FREQ_HINT:
if (intel_uc_uses_guc_submission(&to_gt(i915)->uc))
value = 1;
else
value = -EINVAL;
break;
case I915_PARAM_HAS_CONTEXT_ISOLATION:
value = intel_engines_has_context_isolation(i915);
break;
......
......@@ -806,6 +806,12 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_PARAM_PXP_STATUS 58
/*
* Query if kernel allows marking a context to send a Freq hint to SLPC. This
* will enable use of the strategies allowed by the SLPC algorithm.
*/
#define I915_PARAM_HAS_CONTEXT_FREQ_HINT 59
/* Must be kept compact -- no holes and well documented */
/**
......@@ -2148,6 +2154,15 @@ struct drm_i915_gem_context_param {
* -EIO: The firmware did not succeed in creating the protected context.
*/
#define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd
/*
* I915_CONTEXT_PARAM_LOW_LATENCY:
*
* Mark this context as a low latency workload which requires aggressive GT
* frequency scaling. Use I915_PARAM_HAS_CONTEXT_FREQ_HINT to check if the kernel
* supports this per context flag.
*/
#define I915_CONTEXT_PARAM_LOW_LATENCY 0xe
/* Must be kept compact -- no holes and well documented */
/** @value: Context parameter value to be set or queried */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment