drm/i915/gt: Defer enabling the breadcrumb interrupt to after submission

Move the register slow register write and readback from out of the critical path for execlists submission and delay it until the following worker, shaving off around 200us. Note that the same signal_irq_work() is allowed to run concurrently on each CPU (but it will only be queued once, once running though it can be requeued and reexecuted) so we have to remember to lock the global interactions as we cannot rely on the signal_irq_work() itself providing the serialisation (in constrast to a tasklet). By pushing the arm/disarm into the central signaling worker we can close the race for disarming the interrupt (and dropping its associated GT wakeref) on parking the engine. If we loose the race, that GT wakeref may be held indefinitely, preventing the machine from sleeping while the GPU is ostensibly idle. v2: Move the self-arming parking of the signal_irq_work to a flush of the irq-work from intel_breadcrumbs_park(). Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2271 Fixes: e2300560 ("drm/i915/gt: Hold context/request reference while breadcrumbs are active") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20201123113717.20500-1-chris@chris-wilson.co.uk

drm/i915/gt: Defer enabling the breadcrumb interrupt to after submission
Move the register slow register write and readback from out of the critical path for execlists submission and delay it until the following worker, shaving off around 200us. Note that the same signal_irq_work() is allowed to run concurrently on each CPU (but it will only be queued once, once running though it can be requeued and reexecuted) so we have to remember to lock the global interactions as we cannot rely on the signal_irq_work() itself providing the serialisation (in constrast to a tasklet). By pushing the arm/disarm into the central signaling worker we can close the race for disarming the interrupt (and dropping its associated GT wakeref) on parking the engine. If we loose the race, that GT wakeref may be held indefinitely, preventing the machine from sleeping while the GPU is ostensibly idle. v2: Move the self-arming parking of the signal_irq_work to a flush of the irq-work from intel_breadcrumbs_park(). Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2271 Fixes: e2300560 ("drm/i915/gt: Hold context/request reference while breadcrumbs are active") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20201123113717.20500-1-chris@chris-wilson.co.uk
9d5612ca · Chris Wilson · 4ee73792 · 9d5612ca
Commit 9d5612ca authored Nov 23, 2020 by Chris Wilson
Hide whitespace changes
Inline Side-by-side

Showing with 70 additions and 39 deletions

drivers/gpu/drm/i915/gt/intel_breadcrumbs.c drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +70 -39

No files found.
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -30,18 +30,21 @@
 #include "i915_trace.h"
 #include "intel_breadcrumbs.h"
 #include "intel_context.h"
+#include "intel_engine_pm.h"
 #include "intel_gt_pm.h"
 #include "intel_gt_requests.h"
-static void irq_enable(struct intel_engine_cs *engine)
+static bool irq_enable(struct intel_engine_cs *engine)
 {
 	if (!engine->irq_enable)
-		return;
+		return false;
 	/* Caller disables interrupts */
 	spin_lock(&engine->gt->irq_lock);
 	engine->irq_enable(engine);
 	spin_unlock(&engine->gt->irq_lock);
+	return true;
 }
 static void irq_disable(struct intel_engine_cs *engine)
@@ -57,12 +60,11 @@ static void irq_disable(struct intel_engine_cs *engine)
 static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 {
-	lockdep_assert_held(&b->irq_lock);
+	/*
+	 * Since we are waiting on a request, the GPU should be busy
-	if (!b->irq_engine || b->irq_armed)
+	 * and should have its own rpm reference.
-		return;
+	 */
+	if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt)))
-	if (!intel_gt_pm_get_if_awake(b->irq_engine->gt))
 		return;
 	/*
@@ -73,25 +75,24 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 	 */
 	WRITE_ONCE(b->irq_armed, true);
-	/*
+	/* Requests may have completed before we could enable the interrupt. */
-	 * Since we are waiting on a request, the GPU should be busy
+	if (!b->irq_enabled++ && irq_enable(b->irq_engine))
-	 * and should have its own rpm reference. This is tracked
+		irq_work_queue(&b->irq_work);
-	 * by i915->gt.awake, we can forgo holding our own wakref
-	 * for the interrupt as before i915->gt.awake is released (when
-	 * the driver is idle) we disarm the breadcrumbs.
-	 */
-	if (!b->irq_enabled++)
-		irq_enable(b->irq_engine);
 }
-static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
+static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 {
-	lockdep_assert_held(&b->irq_lock);
+	if (!b->irq_engine)
-	if (!b->irq_engine || !b->irq_armed)
 		return;
+	spin_lock(&b->irq_lock);
+	if (!b->irq_armed)
+		__intel_breadcrumbs_arm_irq(b);
+	spin_unlock(&b->irq_lock);
+}
+static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
+{
 	GEM_BUG_ON(!b->irq_enabled);
 	if (!--b->irq_enabled)
 		irq_disable(b->irq_engine);
@@ -105,8 +106,6 @@ static void add_signaling_context(struct intel_breadcrumbs *b,
 {
 	intel_context_get(ce);
 	list_add_tail(&ce->signal_link, &b->signalers);
-	if (list_is_first(&ce->signal_link, &b->signalers))
-		__intel_breadcrumbs_arm_irq(b);
 }
 static void remove_signaling_context(struct intel_breadcrumbs *b,
@@ -197,7 +196,32 @@ static void signal_irq_work(struct irq_work *work)
 	spin_lock(&b->irq_lock);
-	if (list_empty(&b->signalers))
+	/*
+	 * Keep the irq armed until the interrupt after all listeners are gone.
+	 *
+	 * Enabling/disabling the interrupt is rather costly, roughly a couple
+	 * of hundred microseconds. If we are proactive and enable/disable
+	 * the interrupt around every request that wants a breadcrumb, we
+	 * quickly drown in the extra orders of magnitude of latency imposed
+	 * on request submission.
+	 *
+	 * So we try to be lazy, and keep the interrupts enabled until no
+	 * more listeners appear within a breadcrumb interrupt interval (that
+	 * is until a request completes that no one cares about). The
+	 * observation is that listeners come in batches, and will often
+	 * listen to a bunch of requests in succession. Though note on icl+,
+	 * interrupts are always enabled due to concerns with rc6 being
+	 * dysfunctional with per-engine interrupt masking.
+	 *
+	 * We also try to avoid raising too many interrupts, as they may
+	 * be generated by userspace batches and it is unfortunately rather
+	 * too easy to drown the CPU under a flood of GPU interrupts. Thus
+	 * whenever no one appears to be listening, we turn off the interrupts.
+	 * Fewer interrupts should conserve power -- at the very least, fewer
+	 * interrupt draw less ire from other users of the system and tools
+	 * like powertop.
+	 */
+	if (b->irq_armed && list_empty(&b->signalers))
 		__intel_breadcrumbs_disarm_irq(b);
 	list_splice_init(&b->signaled_requests, &signal);
@@ -251,6 +275,9 @@ static void signal_irq_work(struct irq_work *work)
 		i915_request_put(rq);
 	}
+	if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers))
+		intel_breadcrumbs_arm_irq(b);
 }
 struct intel_breadcrumbs *
@@ -292,21 +319,22 @@ void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
 void intel_breadcrumbs_park(struct intel_breadcrumbs *b)
 {
-	unsigned long flags;
+	/* Kick the work once more to drain the signalers */
+	irq_work_sync(&b->irq_work);
-	if (!READ_ONCE(b->irq_armed))
+	while (unlikely(READ_ONCE(b->irq_armed))) {
-		return;
+		local_irq_disable();
+		signal_irq_work(&b->irq_work);
-	spin_lock_irqsave(&b->irq_lock, flags);
+		local_irq_enable();
-	__intel_breadcrumbs_disarm_irq(b);
+		cond_resched();
-	spin_unlock_irqrestore(&b->irq_lock, flags);
+	}
+	GEM_BUG_ON(!list_empty(&b->signalers));
-	if (!list_empty(&b->signalers))
-		irq_work_queue(&b->irq_work);
 }
 void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
 {
+	irq_work_sync(&b->irq_work);
+	GEM_BUG_ON(!list_empty(&b->signalers));
+	GEM_BUG_ON(b->irq_armed);
 	kfree(b);
 }
@@ -362,9 +390,12 @@ static void insert_breadcrumb(struct i915_request *rq,
 	GEM_BUG_ON(!check_signal_order(ce, rq));
 	set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
-	/* Check after attaching to irq, interrupt may have already fired. */
+	/*
-	if (__request_completed(rq))
+	 * Defer enabling the interrupt to after HW submission and recheck
-		irq_work_queue(&b->irq_work);
+	 * the request as it may have completed and raised the interrupt as
+	 * we were attaching it into the lists.
+	 */
+	irq_work_queue(&b->irq_work);
 }
 bool i915_request_enable_breadcrumb(struct i915_request *rq)