Commit 82e104cc authored by Oscar Mateo's avatar Oscar Mateo Committed by Daniel Vetter

drm/i915/bdw: New logical ring submission mechanism

Well, new-ish: if all this code looks familiar, that's because it's
a clone of the existing submission mechanism (with some modifications
here and there to adapt it to LRCs and Execlists).

And why did we do this instead of reusing code, one might wonder?
Well, there are some fears that the differences are big enough that
they will end up breaking all platforms.

Also, Execlists offer several advantages, like control over when the
GPU is done with a given workload, that can help simplify the
submission mechanism, no doubt. I am interested in getting Execlists
to work first and foremost, but in the future this parallel submission
mechanism will help us to fine tune the mechanism without affecting
old gens.

v2: Pass the ringbuffer only (whenever possible).
Signed-off-by: default avatarOscar Mateo <oscar.mateo@intel.com>
Reviewed-by: default avatarDamien Lespiau <damien.lespiau@intel.com>
[danvet: Appease checkpatch. Again. And drop the legacy sarea gunk
that somehow crept in.]
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent 26fbb774
......@@ -108,6 +108,195 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring)
/* TODO */
}
void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
{
intel_logical_ring_advance(ringbuf);
if (intel_ring_stopped(ringbuf->ring))
return;
/* TODO: how to submit a context to the ELSP is not here yet */
}
static int logical_ring_alloc_seqno(struct intel_engine_cs *ring)
{
if (ring->outstanding_lazy_seqno)
return 0;
if (ring->preallocated_lazy_request == NULL) {
struct drm_i915_gem_request *request;
request = kmalloc(sizeof(*request), GFP_KERNEL);
if (request == NULL)
return -ENOMEM;
ring->preallocated_lazy_request = request;
}
return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
}
static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
int bytes)
{
struct intel_engine_cs *ring = ringbuf->ring;
struct drm_i915_gem_request *request;
u32 seqno = 0;
int ret;
if (ringbuf->last_retired_head != -1) {
ringbuf->head = ringbuf->last_retired_head;
ringbuf->last_retired_head = -1;
ringbuf->space = intel_ring_space(ringbuf);
if (ringbuf->space >= bytes)
return 0;
}
list_for_each_entry(request, &ring->request_list, list) {
if (__intel_ring_space(request->tail, ringbuf->tail,
ringbuf->size) >= bytes) {
seqno = request->seqno;
break;
}
}
if (seqno == 0)
return -ENOSPC;
ret = i915_wait_seqno(ring, seqno);
if (ret)
return ret;
/* TODO: make sure we update the right ringbuffer's last_retired_head
* when retiring requests */
i915_gem_retire_requests_ring(ring);
ringbuf->head = ringbuf->last_retired_head;
ringbuf->last_retired_head = -1;
ringbuf->space = intel_ring_space(ringbuf);
return 0;
}
static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
int bytes)
{
struct intel_engine_cs *ring = ringbuf->ring;
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
unsigned long end;
int ret;
ret = logical_ring_wait_request(ringbuf, bytes);
if (ret != -ENOSPC)
return ret;
/* Force the context submission in case we have been skipping it */
intel_logical_ring_advance_and_submit(ringbuf);
/* With GEM the hangcheck timer should kick us out of the loop,
* leaving it early runs the risk of corrupting GEM state (due
* to running on almost untested codepaths). But on resume
* timers don't work yet, so prevent a complete hang in that
* case by choosing an insanely large timeout. */
end = jiffies + 60 * HZ;
do {
ringbuf->head = I915_READ_HEAD(ring);
ringbuf->space = intel_ring_space(ringbuf);
if (ringbuf->space >= bytes) {
ret = 0;
break;
}
msleep(1);
if (dev_priv->mm.interruptible && signal_pending(current)) {
ret = -ERESTARTSYS;
break;
}
ret = i915_gem_check_wedge(&dev_priv->gpu_error,
dev_priv->mm.interruptible);
if (ret)
break;
if (time_after(jiffies, end)) {
ret = -EBUSY;
break;
}
} while (1);
return ret;
}
static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf)
{
uint32_t __iomem *virt;
int rem = ringbuf->size - ringbuf->tail;
if (ringbuf->space < rem) {
int ret = logical_ring_wait_for_space(ringbuf, rem);
if (ret)
return ret;
}
virt = ringbuf->virtual_start + ringbuf->tail;
rem /= 4;
while (rem--)
iowrite32(MI_NOOP, virt++);
ringbuf->tail = 0;
ringbuf->space = intel_ring_space(ringbuf);
return 0;
}
static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes)
{
int ret;
if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
ret = logical_ring_wrap_buffer(ringbuf);
if (unlikely(ret))
return ret;
}
if (unlikely(ringbuf->space < bytes)) {
ret = logical_ring_wait_for_space(ringbuf, bytes);
if (unlikely(ret))
return ret;
}
return 0;
}
int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
{
struct intel_engine_cs *ring = ringbuf->ring;
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
int ret;
ret = i915_gem_check_wedge(&dev_priv->gpu_error,
dev_priv->mm.interruptible);
if (ret)
return ret;
ret = logical_ring_prepare(ringbuf, num_dwords * sizeof(uint32_t));
if (ret)
return ret;
/* Preallocate the olr before touching the ring */
ret = logical_ring_alloc_seqno(ring);
if (ret)
return ret;
ringbuf->space -= num_dwords * sizeof(uint32_t);
return 0;
}
static int gen8_init_common_ring(struct intel_engine_cs *ring)
{
struct drm_device *dev = ring->dev;
......
......@@ -29,6 +29,19 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring);
void intel_logical_ring_cleanup(struct intel_engine_cs *ring);
int intel_logical_rings_init(struct drm_device *dev);
void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf);
static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf)
{
ringbuf->tail &= ringbuf->size - 1;
}
static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf,
u32 data)
{
iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
ringbuf->tail += 4;
}
int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords);
/* Logical Ring Contexts */
void intel_lr_context_free(struct intel_context *ctx);
int intel_lr_context_deferred_create(struct intel_context *ctx,
......
......@@ -57,7 +57,7 @@ intel_ring_initialized(struct intel_engine_cs *ring)
return ring->buffer && ring->buffer->obj;
}
static inline int __ring_space(int head, int tail, int size)
int __intel_ring_space(int head, int tail, int size)
{
int space = head - (tail + I915_RING_FREE_SPACE);
if (space < 0)
......@@ -65,12 +65,13 @@ static inline int __ring_space(int head, int tail, int size)
return space;
}
static inline int ring_space(struct intel_ringbuffer *ringbuf)
int intel_ring_space(struct intel_ringbuffer *ringbuf)
{
return __ring_space(ringbuf->head & HEAD_ADDR, ringbuf->tail, ringbuf->size);
return __intel_ring_space(ringbuf->head & HEAD_ADDR,
ringbuf->tail, ringbuf->size);
}
static bool intel_ring_stopped(struct intel_engine_cs *ring)
bool intel_ring_stopped(struct intel_engine_cs *ring)
{
struct drm_i915_private *dev_priv = ring->dev->dev_private;
return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
......@@ -585,7 +586,7 @@ static int init_ring_common(struct intel_engine_cs *ring)
else {
ringbuf->head = I915_READ_HEAD(ring);
ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
ringbuf->space = ring_space(ringbuf);
ringbuf->space = intel_ring_space(ringbuf);
ringbuf->last_retired_head = -1;
}
......@@ -1702,13 +1703,14 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
ringbuf->head = ringbuf->last_retired_head;
ringbuf->last_retired_head = -1;
ringbuf->space = ring_space(ringbuf);
ringbuf->space = intel_ring_space(ringbuf);
if (ringbuf->space >= n)
return 0;
}
list_for_each_entry(request, &ring->request_list, list) {
if (__ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) {
if (__intel_ring_space(request->tail, ringbuf->tail,
ringbuf->size) >= n) {
seqno = request->seqno;
break;
}
......@@ -1725,7 +1727,7 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
ringbuf->head = ringbuf->last_retired_head;
ringbuf->last_retired_head = -1;
ringbuf->space = ring_space(ringbuf);
ringbuf->space = intel_ring_space(ringbuf);
return 0;
}
......@@ -1754,7 +1756,7 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
trace_i915_ring_wait_begin(ring);
do {
ringbuf->head = I915_READ_HEAD(ring);
ringbuf->space = ring_space(ringbuf);
ringbuf->space = intel_ring_space(ringbuf);
if (ringbuf->space >= n) {
ret = 0;
break;
......@@ -1806,7 +1808,7 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
iowrite32(MI_NOOP, virt++);
ringbuf->tail = 0;
ringbuf->space = ring_space(ringbuf);
ringbuf->space = intel_ring_space(ringbuf);
return 0;
}
......
......@@ -374,6 +374,9 @@ static inline void intel_ring_advance(struct intel_engine_cs *ring)
struct intel_ringbuffer *ringbuf = ring->buffer;
ringbuf->tail &= ringbuf->size - 1;
}
int __intel_ring_space(int head, int tail, int size);
int intel_ring_space(struct intel_ringbuffer *ringbuf);
bool intel_ring_stopped(struct intel_engine_cs *ring);
void __intel_ring_advance(struct intel_engine_cs *ring);
int __must_check intel_ring_idle(struct intel_engine_cs *ring);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment