drm/i915: Drop i915_request.lock requirement for intel_rps_boost()

Since we use a flag within i915_request.flags to indicate when we have boosted the request (so that we only apply the boost) once, this can be used as the serialisation with i915_request_retire() to avoid having to explicitly take the i915_request.lock which is more heavily contended. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20201231093149.19086-1-chris@chris-wilson.co.uk

drm/i915: Drop i915_request.lock requirement for intel_rps_boost()
Since we use a flag within i915_request.flags to indicate when we have boosted the request (so that we only apply the boost) once, this can be used as the serialisation with i915_request_retire() to avoid having to explicitly take the i915_request.lock which is more heavily contended. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20201231093149.19086-1-chris@chris-wilson.co.uk
4e5c8a99 · Chris Wilson · 9c080b0f · 4e5c8a99 · 4e5c8a99 · 4e5c8a99
Commit 4e5c8a99 authored Dec 31, 2020 by Chris Wilson
5 changed files
--- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
@@ -578,7 +578,7 @@ static int rps_boost_show(struct seq_file *m, void *data)
 		   intel_gpu_freq(rps, rps->efficient_freq),
 		   intel_gpu_freq(rps, rps->boost_freq));

-	seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts));
+	seq_printf(m, "Wait boosts: %d\n", READ_ONCE(rps->boosts));

 	if (INTEL_GEN(i915) >= 6 && intel_rps_is_active(rps)) {
 		struct intel_uncore *uncore = gt->uncore;

--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -862,6 +862,8 @@ void intel_rps_park(struct intel_rps *rps)
 {
 	int adj;

+	GEM_BUG_ON(atomic_read(&rps->num_waiters));
+
 	if (!intel_rps_clear_active(rps))
 		return;

@@ -917,28 +919,27 @@ void intel_rps_park(struct intel_rps *rps)

 void intel_rps_boost(struct i915_request *rq)
 {
+	if (i915_request_signaled(rq) || i915_request_has_waitboost(rq))
+		return;
+
+	/* Serializes with i915_request_retire() */
+	if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) {
 		struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
-	unsigned long flags;

-	if (i915_request_signaled(rq) || !intel_rps_is_active(rps))
+		if (atomic_fetch_inc(&rps->num_waiters))
 			return;

-	/* Serializes with i915_request_retire() */
-	spin_lock_irqsave(&rq->lock, flags);
-	if (!i915_request_has_waitboost(rq) &&
-	    !dma_fence_is_signaled_locked(&rq->fence)) {
-		set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags);
+		if (!intel_rps_is_active(rps))
+			return;

 		GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
 			 rq->fence.context, rq->fence.seqno);

-		if (!atomic_fetch_inc(&rps->num_waiters) &&
-		    READ_ONCE(rps->cur_freq) < rps->boost_freq)
+		if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
 			schedule_work(&rps->work);

-		atomic_inc(&rps->boosts);
+		WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */
 	}
-	spin_unlock_irqrestore(&rq->lock, flags);
 }

 int intel_rps_set(struct intel_rps *rps, u8 val)

--- a/drivers/gpu/drm/i915/gt/intel_rps_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h
@@ -93,7 +93,7 @@ struct intel_rps {
 	} power;

 	atomic_t num_waiters;
-	atomic_t boosts;
+	unsigned int boosts;

 	/* manual wa residency calculations */
 	struct intel_rps_ei ei;

--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1232,7 +1232,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 		   intel_gpu_freq(rps, rps->efficient_freq),
 		   intel_gpu_freq(rps, rps->boost_freq));

-	seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts));
+	seq_printf(m, "Wait boosts: %d\n", READ_ONCE(rps->boosts));

 	if (INTEL_GEN(dev_priv) >= 6 && intel_rps_is_active(rps)) {
 		u32 rpup, rpupei;

--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -307,10 +307,8 @@ bool i915_request_retire(struct i915_request *rq)
 		spin_unlock_irq(&rq->lock);
 	}

-	if (i915_request_has_waitboost(rq)) {
-		GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters));
+	if (test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags))
 		atomic_dec(&rq->engine->gt->rps.num_waiters);
-	}

 	/*
 	 * We only loosely track inflight requests across preemption,