Commit f81475bb authored by Chris Wilson's avatar Chris Wilson

drm/i915/gt: Resubmit the virtual engine on schedule-out

Having recognised that we do not change the sibling until we schedule
out, we can then defer the decision to resubmit the virtual engine from
the unwind of the active queue to scheduling out of the virtual context.
This improves our resilence in virtual engine scheduling, and should
eliminate the rare cases of gem_exec_balance failing.

By keeping the unwind order intact on the local engine, we can preserve
data dependency ordering while doing a preempt-to-busy pass until we
have determined the new ELSP. This means that if we try to timeslice
between a virtual engine and a data-dependent ordinary request, the pair
will maintain their relative ordering and we will avoid the
resubmission, cancelling the timeslicing until further change.

The dilemma though is that we then may end up in a situation where the
'demotion' of the virtual request to an ordinary request in the engine
queue results in filling the ELSP[] with virtual requests instead of
spreading the load across the engines. To compensate for this, we mark
each virtual request and refuse to resubmit a virtual request in the
secondary ELSP slots, thus forcing subsequent virtual requests to be
scheduled out after timeslicing. By delaying the decision until we
schedule out, we will avoid unnecessary resubmission.

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2079
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2098Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: default avatarMatthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201224135544.1713-7-chris@chris-wilson.co.uk
parent 66e40750
......@@ -388,38 +388,23 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
__i915_request_unsubmit(rq);
/*
* Push the request back into the queue for later resubmission.
* If this request is not native to this physical engine (i.e.
* it came from a virtual source), push it back onto the virtual
* engine so that it can be moved across onto another physical
* engine as load dictates.
*/
if (likely(rq->execution_mask == engine->mask)) {
GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
if (rq_prio(rq) != prio) {
prio = rq_prio(rq);
pl = i915_sched_lookup_priolist(engine, prio);
}
GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
if (rq_prio(rq) != prio) {
prio = rq_prio(rq);
pl = i915_sched_lookup_priolist(engine, prio);
}
GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
list_move(&rq->sched.link, pl);
set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
list_move(&rq->sched.link, pl);
set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
/* Check in case we rollback so far we wrap [size/2] */
if (intel_ring_direction(rq->ring,
rq->tail,
rq->ring->tail + 8) > 0)
rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
/* Check in case we rollback so far we wrap [size/2] */
if (intel_ring_direction(rq->ring,
rq->tail,
rq->ring->tail + 8) > 0)
rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
active = rq;
} else {
struct intel_engine_cs *owner = rq->context->engine;
WRITE_ONCE(rq->engine, owner);
owner->submit_request(rq);
active = NULL;
}
active = rq;
}
return active;
......@@ -578,9 +563,9 @@ static inline void execlists_schedule_in(struct i915_request *rq, int idx)
GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
}
static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
static void
resubmit_virtual_request(struct i915_request *rq, struct virtual_engine *ve)
{
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
struct intel_engine_cs *engine = rq->engine;
/* Flush concurrent rcu iterators in signal_irq_work */
......@@ -598,6 +583,30 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
cpu_relax();
}
spin_lock_irq(&engine->active.lock);
clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
WRITE_ONCE(rq->engine, &ve->base);
ve->base.submit_request(rq);
spin_unlock_irq(&engine->active.lock);
}
static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
{
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
struct intel_engine_cs *engine = rq->engine;
/*
* This engine is now too busy to run this virtual request, so
* see if we can find an alternative engine for it to execute on.
* Once a request has become bonded to this engine, we treat it the
* same as other native request.
*/
if (i915_request_in_priority_queue(rq) &&
rq->execution_mask != engine->mask)
resubmit_virtual_request(rq, ve);
if (READ_ONCE(ve->request))
tasklet_hi_schedule(&ve->base.execlists.tasklet);
}
......@@ -843,6 +852,20 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
}
sentinel = i915_request_has_sentinel(rq);
/*
* We want virtual requests to only be in the first slot so
* that they are never stuck behind a hog and can be immediately
* transferred onto the next idle engine.
*/
if (rq->execution_mask != engine->mask &&
port != execlists->pending) {
GEM_TRACE_ERR("%s: virtual engine:%llx not in prime position[%zd]\n",
engine->name,
ce->timeline->fence_context,
port - execlists->pending);
return false;
}
/* Hold tightly onto the lock to prevent concurrent retires! */
if (!spin_trylock_irqsave(&rq->lock, flags))
continue;
......@@ -1502,6 +1525,15 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (i915_request_has_sentinel(last))
goto done;
/*
* We avoid submitting virtual requests into
* the secondary ports so that we can migrate
* the request immediately to another engine
* rather than wait for the primary request.
*/
if (rq->execution_mask != engine->mask)
goto done;
/*
* If GVT overrides us we only ever submit
* port[0], leaving port[1] empty. Note that we
......@@ -3562,7 +3594,6 @@ static void virtual_submission_tasklet(unsigned long data)
static void virtual_submit_request(struct i915_request *rq)
{
struct virtual_engine *ve = to_virtual_engine(rq->engine);
struct i915_request *old;
unsigned long flags;
ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
......@@ -3573,28 +3604,27 @@ static void virtual_submit_request(struct i915_request *rq)
spin_lock_irqsave(&ve->base.active.lock, flags);
old = ve->request;
if (old) { /* background completion event from preempt-to-busy */
GEM_BUG_ON(!__i915_request_is_complete(old));
__i915_request_submit(old);
i915_request_put(old);
}
/* By the time we resubmit a request, it may be completed */
if (__i915_request_is_complete(rq)) {
__i915_request_submit(rq);
goto unlock;
}
ve->base.execlists.queue_priority_hint = INT_MIN;
ve->request = NULL;
} else {
ve->base.execlists.queue_priority_hint = rq_prio(rq);
ve->request = i915_request_get(rq);
if (ve->request) { /* background completion from preempt-to-busy */
GEM_BUG_ON(!i915_request_completed(ve->request));
__i915_request_submit(ve->request);
i915_request_put(ve->request);
}
GEM_BUG_ON(!list_empty(virtual_queue(ve)));
list_move_tail(&rq->sched.link, virtual_queue(ve));
ve->base.execlists.queue_priority_hint = rq_prio(rq);
ve->request = i915_request_get(rq);
tasklet_hi_schedule(&ve->base.execlists.tasklet);
}
GEM_BUG_ON(!list_empty(virtual_queue(ve)));
list_move_tail(&rq->sched.link, virtual_queue(ve));
tasklet_hi_schedule(&ve->base.execlists.tasklet);
unlock:
spin_unlock_irqrestore(&ve->base.active.lock, flags);
}
......
......@@ -4566,7 +4566,7 @@ static int reset_virtual_engine(struct intel_gt *gt,
spin_lock_irq(&engine->active.lock);
__unwind_incomplete_requests(engine);
spin_unlock_irq(&engine->active.lock);
GEM_BUG_ON(rq->engine != ve->engine);
GEM_BUG_ON(rq->engine != engine);
/* Reset the engine while keeping our active request on hold */
execlists_hold(engine, rq);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment