drm/i915/bdw: New logical ring submission mechanism

Well, new-ish: if all this code looks familiar, that's because it's a clone of the existing submission mechanism (with some modifications here and there to adapt it to LRCs and Execlists). And why did we do this instead of reusing code, one might wonder? Well, there are some fears that the differences are big enough that they will end up breaking all platforms. Also, Execlists offer several advantages, like control over when the GPU is done with a given workload, that can help simplify the submission mechanism, no doubt. I am interested in getting Execlists to work first and foremost, but in the future this parallel submission mechanism will help us to fine tune the mechanism without affecting old gens. v2: Pass the ringbuffer only (whenever possible). Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> Reviewed-by: Damien Lespiau <damien.lespiau@intel.com> [danvet: Appease checkpatch. Again. And drop the legacy sarea gunk that somehow crept in.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>

drm/i915/bdw: New logical ring submission mechanism
Well, new-ish: if all this code looks familiar, that's because it's a clone of the existing submission mechanism (with some modifications here and there to adapt it to LRCs and Execlists). And why did we do this instead of reusing code, one might wonder? Well, there are some fears that the differences are big enough that they will end up breaking all platforms. Also, Execlists offer several advantages, like control over when the GPU is done with a given workload, that can help simplify the submission mechanism, no doubt. I am interested in getting Execlists to work first and foremost, but in the future this parallel submission mechanism will help us to fine tune the mechanism without affecting old gens. v2: Pass the ringbuffer only (whenever possible). Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> Reviewed-by: Damien Lespiau <damien.lespiau@intel.com> [danvet: Appease checkpatch. Again. And drop the legacy sarea gunk that somehow crept in.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
82e104cc · Oscar Mateo · Daniel Vetter · 26fbb774 · 82e104cc · 82e104cc
Commit 82e104cc authored Jul 24, 2014 by Oscar Mateo Committed by Daniel Vetter Aug 11, 2014
4 changed files
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -108,6 +108,195 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring)
 	/* TODO */
 }

+void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
+{
+	intel_logical_ring_advance(ringbuf);
+
+	if (intel_ring_stopped(ringbuf->ring))
+		return;
+
+	/* TODO: how to submit a context to the ELSP is not here yet */
+}
+
+static int logical_ring_alloc_seqno(struct intel_engine_cs *ring)
+{
+	if (ring->outstanding_lazy_seqno)
+		return 0;
+
+	if (ring->preallocated_lazy_request == NULL) {
+		struct drm_i915_gem_request *request;
+
+		request = kmalloc(sizeof(*request), GFP_KERNEL);
+		if (request == NULL)
+			return -ENOMEM;
+
+		ring->preallocated_lazy_request = request;
+	}
+
+	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
+}
+
+static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
+				     int bytes)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+	struct drm_i915_gem_request *request;
+	u32 seqno = 0;
+	int ret;
+
+	if (ringbuf->last_retired_head != -1) {
+		ringbuf->head = ringbuf->last_retired_head;
+		ringbuf->last_retired_head = -1;
+
+		ringbuf->space = intel_ring_space(ringbuf);
+		if (ringbuf->space >= bytes)
+			return 0;
+	}
+
+	list_for_each_entry(request, &ring->request_list, list) {
+		if (__intel_ring_space(request->tail, ringbuf->tail,
+				       ringbuf->size) >= bytes) {
+			seqno = request->seqno;
+			break;
+		}
+	}
+
+	if (seqno == 0)
+		return -ENOSPC;
+
+	ret = i915_wait_seqno(ring, seqno);
+	if (ret)
+		return ret;
+
+	/* TODO: make sure we update the right ringbuffer's last_retired_head
+	 * when retiring requests */
+	i915_gem_retire_requests_ring(ring);
+	ringbuf->head = ringbuf->last_retired_head;
+	ringbuf->last_retired_head = -1;
+
+	ringbuf->space = intel_ring_space(ringbuf);
+	return 0;
+}
+
+static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
+				       int bytes)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	unsigned long end;
+	int ret;
+
+	ret = logical_ring_wait_request(ringbuf, bytes);
+	if (ret != -ENOSPC)
+		return ret;
+
+	/* Force the context submission in case we have been skipping it */
+	intel_logical_ring_advance_and_submit(ringbuf);
+
+	/* With GEM the hangcheck timer should kick us out of the loop,
+	 * leaving it early runs the risk of corrupting GEM state (due
+	 * to running on almost untested codepaths). But on resume
+	 * timers don't work yet, so prevent a complete hang in that
+	 * case by choosing an insanely large timeout. */
+	end = jiffies + 60 * HZ;
+
+	do {
+		ringbuf->head = I915_READ_HEAD(ring);
+		ringbuf->space = intel_ring_space(ringbuf);
+		if (ringbuf->space >= bytes) {
+			ret = 0;
+			break;
+		}
+
+		msleep(1);
+
+		if (dev_priv->mm.interruptible && signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+
+		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
+					   dev_priv->mm.interruptible);
+		if (ret)
+			break;
+
+		if (time_after(jiffies, end)) {
+			ret = -EBUSY;
+			break;
+		}
+	} while (1);
+
+	return ret;
+}
+
+static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf)
+{
+	uint32_t __iomem *virt;
+	int rem = ringbuf->size - ringbuf->tail;
+
+	if (ringbuf->space < rem) {
+		int ret = logical_ring_wait_for_space(ringbuf, rem);
+
+		if (ret)
+			return ret;
+	}
+
+	virt = ringbuf->virtual_start + ringbuf->tail;
+	rem /= 4;
+	while (rem--)
+		iowrite32(MI_NOOP, virt++);
+
+	ringbuf->tail = 0;
+	ringbuf->space = intel_ring_space(ringbuf);
+
+	return 0;
+}
+
+static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes)
+{
+	int ret;
+
+	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
+		ret = logical_ring_wrap_buffer(ringbuf);
+		if (unlikely(ret))
+			return ret;
+	}
+
+	if (unlikely(ringbuf->space < bytes)) {
+		ret = logical_ring_wait_for_space(ringbuf, bytes);
+		if (unlikely(ret))
+			return ret;
+	}
+
+	return 0;
+}
+
+int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
+	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
+				   dev_priv->mm.interruptible);
+	if (ret)
+		return ret;
+
+	ret = logical_ring_prepare(ringbuf, num_dwords * sizeof(uint32_t));
+	if (ret)
+		return ret;
+
+	/* Preallocate the olr before touching the ring */
+	ret = logical_ring_alloc_seqno(ring);
+	if (ret)
+		return ret;
+
+	ringbuf->space -= num_dwords * sizeof(uint32_t);
+	return 0;
+}
+
 static int gen8_init_common_ring(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;

--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -29,6 +29,19 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring);
 void intel_logical_ring_cleanup(struct intel_engine_cs *ring);
 int intel_logical_rings_init(struct drm_device *dev);

+void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf);
+static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf)
+{
+	ringbuf->tail &= ringbuf->size - 1;
+}
+static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf,
+					   u32 data)
+{
+	iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
+	ringbuf->tail += 4;
+}
+int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords);
+
 /* Logical Ring Contexts */
 void intel_lr_context_free(struct intel_context *ctx);
 int intel_lr_context_deferred_create(struct intel_context *ctx,

--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -57,7 +57,7 @@ intel_ring_initialized(struct intel_engine_cs *ring)
 		return ring->buffer && ring->buffer->obj;
 }

-static inline int __ring_space(int head, int tail, int size)
+int __intel_ring_space(int head, int tail, int size)
 {
 	int space = head - (tail + I915_RING_FREE_SPACE);
 	if (space < 0)
@@ -65,12 +65,13 @@ static inline int __ring_space(int head, int tail, int size)
 	return space;
 }

-static inline int ring_space(struct intel_ringbuffer *ringbuf)
+int intel_ring_space(struct intel_ringbuffer *ringbuf)
 {
-	return __ring_space(ringbuf->head & HEAD_ADDR, ringbuf->tail, ringbuf->size);
+	return __intel_ring_space(ringbuf->head & HEAD_ADDR,
+				  ringbuf->tail, ringbuf->size);
 }

-static bool intel_ring_stopped(struct intel_engine_cs *ring)
+bool intel_ring_stopped(struct intel_engine_cs *ring)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
@@ -585,7 +586,7 @@ static int init_ring_common(struct intel_engine_cs *ring)
 	else {
 		ringbuf->head = I915_READ_HEAD(ring);
 		ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
-		ringbuf->space = ring_space(ringbuf);
+		ringbuf->space = intel_ring_space(ringbuf);
 		ringbuf->last_retired_head = -1;
 	}

@@ -1702,13 +1703,14 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
 		ringbuf->head = ringbuf->last_retired_head;
 		ringbuf->last_retired_head = -1;

-		ringbuf->space = ring_space(ringbuf);
+		ringbuf->space = intel_ring_space(ringbuf);
 		if (ringbuf->space >= n)
 			return 0;
 	}

 	list_for_each_entry(request, &ring->request_list, list) {
-		if (__ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) {
+		if (__intel_ring_space(request->tail, ringbuf->tail,
+				       ringbuf->size) >= n) {
 			seqno = request->seqno;
 			break;
 		}
@@ -1725,7 +1727,7 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
 	ringbuf->head = ringbuf->last_retired_head;
 	ringbuf->last_retired_head = -1;

-	ringbuf->space = ring_space(ringbuf);
+	ringbuf->space = intel_ring_space(ringbuf);
 	return 0;
 }

@@ -1754,7 +1756,7 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 	trace_i915_ring_wait_begin(ring);
 	do {
 		ringbuf->head = I915_READ_HEAD(ring);
-		ringbuf->space = ring_space(ringbuf);
+		ringbuf->space = intel_ring_space(ringbuf);
 		if (ringbuf->space >= n) {
 			ret = 0;
 			break;
@@ -1806,7 +1808,7 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
 		iowrite32(MI_NOOP, virt++);

 	ringbuf->tail = 0;
-	ringbuf->space = ring_space(ringbuf);
+	ringbuf->space = intel_ring_space(ringbuf);

 	return 0;
 }

--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -374,6 +374,9 @@ static inline void intel_ring_advance(struct intel_engine_cs *ring)
 	struct intel_ringbuffer *ringbuf = ring->buffer;
 	ringbuf->tail &= ringbuf->size - 1;
 }
+int __intel_ring_space(int head, int tail, int size);
+int intel_ring_space(struct intel_ringbuffer *ringbuf);
+bool intel_ring_stopped(struct intel_engine_cs *ring);
 void __intel_ring_advance(struct intel_engine_cs *ring);

 int __must_check intel_ring_idle(struct intel_engine_cs *ring);