Commit 6faf5916 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation

The writing is on the wall for the existence of a single execution queue
along each engine, and as a consequence we will not be able to track
dependencies along the HW queue itself, i.e. we will not be able to use
HW semaphores on gen7 as they use a global set of registers (and unlike
gen8+ we can not effectively target memory to keep per-context seqno and
dependencies).

On the positive side, when we implement request reordering for gen7 we
also can not presume a simple execution queue and would also require
removing the current semaphore generation code. So this bring us another
step closer to request reordering for ringbuffer submission!

The negative side is that using interrupts to drive inter-engine
synchronisation is much slower (4us -> 15us to do a nop on each of the 3
engines on ivb). This is much better than it was at the time of introducing
the HW semaphores and equally important userspace weaned itself off
intermixing dependent BLT/RENDER operations (the prime culprit was glyph
rendering in UXA). So while we regress the microbenchmarks, it should not
impact the user.

References: https://bugs.freedesktop.org/show_bug.cgi?id=108888Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181228140736.32606-2-chris@chris-wilson.co.uk
parent 167bc759
...@@ -1041,21 +1041,7 @@ static const struct file_operations i915_error_state_fops = { ...@@ -1041,21 +1041,7 @@ static const struct file_operations i915_error_state_fops = {
static int static int
i915_next_seqno_set(void *data, u64 val) i915_next_seqno_set(void *data, u64 val)
{ {
struct drm_i915_private *dev_priv = data; return val ? 0 : -EINVAL;
struct drm_device *dev = &dev_priv->drm;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
ret = i915_gem_set_global_seqno(dev, val);
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
return ret;
} }
DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops, DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops,
...@@ -4101,9 +4087,6 @@ i915_drop_caches_set(void *data, u64 val) ...@@ -4101,9 +4087,6 @@ i915_drop_caches_set(void *data, u64 val)
I915_WAIT_LOCKED, I915_WAIT_LOCKED,
MAX_SCHEDULE_TIMEOUT); MAX_SCHEDULE_TIMEOUT);
if (ret == 0 && val & DROP_RESET_SEQNO)
ret = i915_gem_set_global_seqno(&i915->drm, 1);
if (val & DROP_RETIRE) if (val & DROP_RETIRE)
i915_retire_requests(i915); i915_retire_requests(i915);
......
...@@ -349,7 +349,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, ...@@ -349,7 +349,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
value = min_t(int, INTEL_PPGTT(dev_priv), I915_GEM_PPGTT_FULL); value = min_t(int, INTEL_PPGTT(dev_priv), I915_GEM_PPGTT_FULL);
break; break;
case I915_PARAM_HAS_SEMAPHORES: case I915_PARAM_HAS_SEMAPHORES:
value = HAS_LEGACY_SEMAPHORES(dev_priv); value = 0;
break; break;
case I915_PARAM_HAS_SECURE_BATCHES: case I915_PARAM_HAS_SECURE_BATCHES:
value = capable(CAP_SYS_ADMIN); value = capable(CAP_SYS_ADMIN);
......
...@@ -1948,7 +1948,6 @@ struct drm_i915_private { ...@@ -1948,7 +1948,6 @@ struct drm_i915_private {
struct list_head active_rings; struct list_head active_rings;
struct list_head closed_vma; struct list_head closed_vma;
u32 active_requests; u32 active_requests;
u32 request_serial;
/** /**
* Is the GPU currently considered idle, or busy executing * Is the GPU currently considered idle, or busy executing
...@@ -2396,8 +2395,6 @@ intel_info(const struct drm_i915_private *dev_priv) ...@@ -2396,8 +2395,6 @@ intel_info(const struct drm_i915_private *dev_priv)
#define HAS_BLT(dev_priv) HAS_ENGINE(dev_priv, BCS) #define HAS_BLT(dev_priv) HAS_ENGINE(dev_priv, BCS)
#define HAS_VEBOX(dev_priv) HAS_ENGINE(dev_priv, VECS) #define HAS_VEBOX(dev_priv) HAS_ENGINE(dev_priv, VECS)
#define HAS_LEGACY_SEMAPHORES(dev_priv) IS_GEN(dev_priv, 7)
#define HAS_LLC(dev_priv) ((dev_priv)->info.has_llc) #define HAS_LLC(dev_priv) ((dev_priv)->info.has_llc)
#define HAS_SNOOP(dev_priv) ((dev_priv)->info.has_snoop) #define HAS_SNOOP(dev_priv) ((dev_priv)->info.has_snoop)
#define HAS_EDRAM(dev_priv) (!!((dev_priv)->edram_cap & EDRAM_ENABLED)) #define HAS_EDRAM(dev_priv) (!!((dev_priv)->edram_cap & EDRAM_ENABLED))
......
...@@ -3318,7 +3318,7 @@ static void nop_submit_request(struct i915_request *request) ...@@ -3318,7 +3318,7 @@ static void nop_submit_request(struct i915_request *request)
spin_lock_irqsave(&request->engine->timeline.lock, flags); spin_lock_irqsave(&request->engine->timeline.lock, flags);
__i915_request_submit(request); __i915_request_submit(request);
intel_engine_init_global_seqno(request->engine, request->global_seqno); intel_engine_write_global_seqno(request->engine, request->global_seqno);
spin_unlock_irqrestore(&request->engine->timeline.lock, flags); spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
} }
...@@ -3359,7 +3359,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) ...@@ -3359,7 +3359,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
/* /*
* Make sure no request can slip through without getting completed by * Make sure no request can slip through without getting completed by
* either this call here to intel_engine_init_global_seqno, or the one * either this call here to intel_engine_write_global_seqno, or the one
* in nop_submit_request. * in nop_submit_request.
*/ */
synchronize_rcu(); synchronize_rcu();
......
...@@ -111,99 +111,10 @@ i915_request_remove_from_client(struct i915_request *request) ...@@ -111,99 +111,10 @@ i915_request_remove_from_client(struct i915_request *request)
spin_unlock(&file_priv->mm.lock); spin_unlock(&file_priv->mm.lock);
} }
static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) static void reserve_gt(struct drm_i915_private *i915)
{ {
struct intel_engine_cs *engine;
struct i915_timeline *timeline;
enum intel_engine_id id;
int ret;
/* Carefully retire all requests without writing to the rings */
ret = i915_gem_wait_for_idle(i915,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED,
MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
GEM_BUG_ON(i915->gt.active_requests);
/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
for_each_engine(engine, i915, id) {
GEM_TRACE("%s seqno %d (current %d) -> %d\n",
engine->name,
engine->timeline.seqno,
intel_engine_get_seqno(engine),
seqno);
if (seqno == engine->timeline.seqno)
continue;
kthread_park(engine->breadcrumbs.signaler);
if (!i915_seqno_passed(seqno, engine->timeline.seqno)) {
/* Flush any waiters before we reuse the seqno */
intel_engine_disarm_breadcrumbs(engine);
intel_engine_init_hangcheck(engine);
GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals));
}
/* Check we are idle before we fiddle with hw state! */
GEM_BUG_ON(!intel_engine_is_idle(engine));
GEM_BUG_ON(i915_gem_active_isset(&engine->timeline.last_request));
/* Finally reset hw state */
intel_engine_init_global_seqno(engine, seqno);
engine->timeline.seqno = seqno;
kthread_unpark(engine->breadcrumbs.signaler);
}
list_for_each_entry(timeline, &i915->gt.timelines, link)
memset(timeline->global_sync, 0, sizeof(timeline->global_sync));
i915->gt.request_serial = seqno;
return 0;
}
int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
{
struct drm_i915_private *i915 = to_i915(dev);
lockdep_assert_held(&i915->drm.struct_mutex);
if (seqno == 0)
return -EINVAL;
/* HWS page needs to be set less than what we will inject to ring */
return reset_all_global_seqno(i915, seqno - 1);
}
static int reserve_gt(struct drm_i915_private *i915)
{
int ret;
/*
* Reservation is fine until we may need to wrap around
*
* By incrementing the serial for every request, we know that no
* individual engine may exceed that serial (as each is reset to 0
* on any wrap). This protects even the most pessimistic of migrations
* of every request from all engines onto just one.
*/
while (unlikely(++i915->gt.request_serial == 0)) {
ret = reset_all_global_seqno(i915, 0);
if (ret) {
i915->gt.request_serial--;
return ret;
}
}
if (!i915->gt.active_requests++) if (!i915->gt.active_requests++)
i915_gem_unpark(i915); i915_gem_unpark(i915);
return 0;
} }
static void unreserve_gt(struct drm_i915_private *i915) static void unreserve_gt(struct drm_i915_private *i915)
...@@ -608,9 +519,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) ...@@ -608,9 +519,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
if (IS_ERR(ce)) if (IS_ERR(ce))
return ERR_CAST(ce); return ERR_CAST(ce);
ret = reserve_gt(i915); reserve_gt(i915);
if (ret)
goto err_unpin;
ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST); ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST);
if (ret) if (ret)
...@@ -743,7 +652,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) ...@@ -743,7 +652,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
kmem_cache_free(i915->requests, rq); kmem_cache_free(i915->requests, rq);
err_unreserve: err_unreserve:
unreserve_gt(i915); unreserve_gt(i915);
err_unpin:
intel_context_unpin(ce); intel_context_unpin(ce);
return ERR_PTR(ret); return ERR_PTR(ret);
} }
...@@ -771,34 +679,12 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) ...@@ -771,34 +679,12 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
&from->submit, &from->submit,
I915_FENCE_GFP); I915_FENCE_GFP);
return ret < 0 ? ret : 0; } else {
} ret = i915_sw_fence_await_dma_fence(&to->submit,
&from->fence, 0,
if (to->engine->semaphore.sync_to) { I915_FENCE_GFP);
u32 seqno;
GEM_BUG_ON(!from->engine->semaphore.signal);
seqno = i915_request_global_seqno(from);
if (!seqno)
goto await_dma_fence;
if (seqno <= to->timeline->global_sync[from->engine->id])
return 0;
trace_i915_gem_ring_sync_to(to, from);
ret = to->engine->semaphore.sync_to(to, from);
if (ret)
return ret;
to->timeline->global_sync[from->engine->id] = seqno;
return 0;
} }
await_dma_fence:
ret = i915_sw_fence_await_dma_fence(&to->submit,
&from->fence, 0,
I915_FENCE_GFP);
return ret < 0 ? ret : 0; return ret < 0 ? ret : 0;
} }
......
...@@ -63,14 +63,6 @@ struct i915_timeline { ...@@ -63,14 +63,6 @@ struct i915_timeline {
* redundant and we can discard it without loss of generality. * redundant and we can discard it without loss of generality.
*/ */
struct i915_syncmap *sync; struct i915_syncmap *sync;
/**
* Separately to the inter-context seqno map above, we track the last
* barrier (e.g. semaphore wait) to the global engine timelines. Note
* that this tracks global_seqno rather than the context.seqno, and
* so it is subject to the limitations of hw wraparound and that we
* may need to revoke global_seqno (on pre-emption).
*/
u32 global_sync[I915_NUM_ENGINES];
struct list_head link; struct list_head link;
const char *name; const char *name;
......
...@@ -585,35 +585,6 @@ TRACE_EVENT(i915_gem_evict_vm, ...@@ -585,35 +585,6 @@ TRACE_EVENT(i915_gem_evict_vm,
TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm) TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm)
); );
TRACE_EVENT(i915_gem_ring_sync_to,
TP_PROTO(struct i915_request *to, struct i915_request *from),
TP_ARGS(to, from),
TP_STRUCT__entry(
__field(u32, dev)
__field(u32, from_class)
__field(u32, from_instance)
__field(u32, to_class)
__field(u32, to_instance)
__field(u32, seqno)
),
TP_fast_assign(
__entry->dev = from->i915->drm.primary->index;
__entry->from_class = from->engine->uabi_class;
__entry->from_instance = from->engine->instance;
__entry->to_class = to->engine->uabi_class;
__entry->to_instance = to->engine->instance;
__entry->seqno = from->global_seqno;
),
TP_printk("dev=%u, sync-from=%u:%u, sync-to=%u:%u, seqno=%u",
__entry->dev,
__entry->from_class, __entry->from_instance,
__entry->to_class, __entry->to_instance,
__entry->seqno)
);
TRACE_EVENT(i915_request_queue, TRACE_EVENT(i915_request_queue,
TP_PROTO(struct i915_request *rq, u32 flags), TP_PROTO(struct i915_request *rq, u32 flags),
TP_ARGS(rq, flags), TP_ARGS(rq, flags),
......
...@@ -454,25 +454,8 @@ int intel_engines_init(struct drm_i915_private *dev_priv) ...@@ -454,25 +454,8 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
return err; return err;
} }
void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno)
{ {
struct drm_i915_private *dev_priv = engine->i915;
/* Our semaphore implementation is strictly monotonic (i.e. we proceed
* so long as the semaphore value in the register/page is greater
* than the sync value), so whenever we reset the seqno,
* so long as we reset the tracking semaphore value to 0, it will
* always be before the next request's seqno. If we don't reset
* the semaphore value, then when the seqno moves backwards all
* future waits will complete instantly (causing rendering corruption).
*/
if (IS_GEN_RANGE(dev_priv, 6, 7)) {
I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
if (HAS_VEBOX(dev_priv))
I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
}
intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
...@@ -1300,16 +1283,6 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, ...@@ -1300,16 +1283,6 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
drm_printf(m, "\tRING_IMR: %08x\n", I915_READ_IMR(engine)); drm_printf(m, "\tRING_IMR: %08x\n", I915_READ_IMR(engine));
} }
if (HAS_LEGACY_SEMAPHORES(dev_priv)) {
drm_printf(m, "\tSYNC_0: 0x%08x\n",
I915_READ(RING_SYNC_0(engine->mmio_base)));
drm_printf(m, "\tSYNC_1: 0x%08x\n",
I915_READ(RING_SYNC_1(engine->mmio_base)));
if (HAS_VEBOX(dev_priv))
drm_printf(m, "\tSYNC_2: 0x%08x\n",
I915_READ(RING_SYNC_2(engine->mmio_base)));
}
addr = intel_engine_get_active_head(engine); addr = intel_engine_get_active_head(engine);
drm_printf(m, "\tACTHD: 0x%08x_%08x\n", drm_printf(m, "\tACTHD: 0x%08x_%08x\n",
upper_32_bits(addr), lower_32_bits(addr)); upper_32_bits(addr), lower_32_bits(addr));
......
...@@ -24,144 +24,6 @@ ...@@ -24,144 +24,6 @@
#include "i915_drv.h" #include "i915_drv.h"
static bool
ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr)
{
ipehr &= ~MI_SEMAPHORE_SYNC_MASK;
return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE |
MI_SEMAPHORE_REGISTER);
}
static struct intel_engine_cs *
semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr,
u64 offset)
{
struct drm_i915_private *dev_priv = engine->i915;
u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK;
struct intel_engine_cs *signaller;
enum intel_engine_id id;
for_each_engine(signaller, dev_priv, id) {
if (engine == signaller)
continue;
if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id])
return signaller;
}
DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x\n",
engine->name, ipehr);
return ERR_PTR(-ENODEV);
}
static struct intel_engine_cs *
semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno)
{
struct drm_i915_private *dev_priv = engine->i915;
void __iomem *vaddr;
u32 cmd, ipehr, head;
u64 offset = 0;
int i, backwards;
/*
* This function does not support execlist mode - any attempt to
* proceed further into this function will result in a kernel panic
* when dereferencing ring->buffer, which is not set up in execlist
* mode.
*
* The correct way of doing it would be to derive the currently
* executing ring buffer from the current context, which is derived
* from the currently running request. Unfortunately, to get the
* current request we would have to grab the struct_mutex before doing
* anything else, which would be ill-advised since some other thread
* might have grabbed it already and managed to hang itself, causing
* the hang checker to deadlock.
*
* Therefore, this function does not support execlist mode in its
* current form. Just return NULL and move on.
*/
if (engine->buffer == NULL)
return NULL;
ipehr = I915_READ(RING_IPEHR(engine->mmio_base));
if (!ipehr_is_semaphore_wait(engine, ipehr))
return NULL;
/*
* HEAD is likely pointing to the dword after the actual command,
* so scan backwards until we find the MBOX. But limit it to just 3
* or 4 dwords depending on the semaphore wait command size.
* Note that we don't care about ACTHD here since that might
* point at at batch, and semaphores are always emitted into the
* ringbuffer itself.
*/
head = I915_READ_HEAD(engine) & HEAD_ADDR;
backwards = (INTEL_GEN(dev_priv) >= 8) ? 5 : 4;
vaddr = (void __iomem *)engine->buffer->vaddr;
for (i = backwards; i; --i) {
/*
* Be paranoid and presume the hw has gone off into the wild -
* our ring is smaller than what the hardware (and hence
* HEAD_ADDR) allows. Also handles wrap-around.
*/
head &= engine->buffer->size - 1;
/* This here seems to blow up */
cmd = ioread32(vaddr + head);
if (cmd == ipehr)
break;
head -= 4;
}
if (!i)
return NULL;
*seqno = ioread32(vaddr + head + 4) + 1;
return semaphore_wait_to_signaller_ring(engine, ipehr, offset);
}
static int semaphore_passed(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
struct intel_engine_cs *signaller;
u32 seqno;
engine->hangcheck.deadlock++;
signaller = semaphore_waits_for(engine, &seqno);
if (signaller == NULL)
return -1;
if (IS_ERR(signaller))
return 0;
/* Prevent pathological recursion due to driver bugs */
if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES)
return -1;
if (intel_engine_signaled(signaller, seqno))
return 1;
/* cursory check for an unkickable deadlock */
if (I915_READ_CTL(signaller) & RING_WAIT_SEMAPHORE &&
semaphore_passed(signaller) < 0)
return -1;
return 0;
}
static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
for_each_engine(engine, dev_priv, id)
engine->hangcheck.deadlock = 0;
}
static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone) static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
{ {
u32 tmp = current_instdone | *old_instdone; u32 tmp = current_instdone | *old_instdone;
...@@ -252,21 +114,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) ...@@ -252,21 +114,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
return ENGINE_WAIT_KICK; return ENGINE_WAIT_KICK;
} }
if (IS_GEN_RANGE(dev_priv, 6, 7) && tmp & RING_WAIT_SEMAPHORE) {
switch (semaphore_passed(engine)) {
default:
return ENGINE_DEAD;
case 1:
i915_handle_error(dev_priv, ALL_ENGINES, 0,
"stuck semaphore on %s",
engine->name);
I915_WRITE_CTL(engine, tmp);
return ENGINE_WAIT_KICK;
case 0:
return ENGINE_WAIT;
}
}
return ENGINE_DEAD; return ENGINE_DEAD;
} }
...@@ -433,8 +280,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work) ...@@ -433,8 +280,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
for_each_engine(engine, dev_priv, id) { for_each_engine(engine, dev_priv, id) {
struct intel_engine_hangcheck hc; struct intel_engine_hangcheck hc;
semaphore_clear_deadlocks(dev_priv);
hangcheck_load_sample(engine, &hc); hangcheck_load_sample(engine, &hc);
hangcheck_accumulate_sample(engine, &hc); hangcheck_accumulate_sample(engine, &hc);
hangcheck_store_sample(engine, &hc); hangcheck_store_sample(engine, &hc);
......
...@@ -556,13 +556,6 @@ static int init_ring_common(struct intel_engine_cs *engine) ...@@ -556,13 +556,6 @@ static int init_ring_common(struct intel_engine_cs *engine)
intel_engine_reset_breadcrumbs(engine); intel_engine_reset_breadcrumbs(engine);
if (HAS_LEGACY_SEMAPHORES(engine->i915)) {
I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
if (HAS_VEBOX(dev_priv))
I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
}
/* Enforce ordering by reading HEAD register back */ /* Enforce ordering by reading HEAD register back */
I915_READ_HEAD(engine); I915_READ_HEAD(engine);
...@@ -745,33 +738,6 @@ static int init_render_ring(struct intel_engine_cs *engine) ...@@ -745,33 +738,6 @@ static int init_render_ring(struct intel_engine_cs *engine)
return 0; return 0;
} }
static u32 *gen6_signal(struct i915_request *rq, u32 *cs)
{
struct drm_i915_private *dev_priv = rq->i915;
struct intel_engine_cs *engine;
enum intel_engine_id id;
int num_rings = 0;
for_each_engine(engine, dev_priv, id) {
i915_reg_t mbox_reg;
if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK))
continue;
mbox_reg = rq->engine->semaphore.mbox.signal[engine->hw_id];
if (i915_mmio_reg_valid(mbox_reg)) {
*cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(mbox_reg);
*cs++ = rq->global_seqno;
num_rings++;
}
}
if (num_rings & 1)
*cs++ = MI_NOOP;
return cs;
}
static void cancel_requests(struct intel_engine_cs *engine) static void cancel_requests(struct intel_engine_cs *engine)
{ {
struct i915_request *request; struct i915_request *request;
...@@ -822,39 +788,6 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) ...@@ -822,39 +788,6 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
static const int i9xx_emit_breadcrumb_sz = 4; static const int i9xx_emit_breadcrumb_sz = 4;
static void gen6_sema_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
return i9xx_emit_breadcrumb(rq, rq->engine->semaphore.signal(rq, cs));
}
static int
gen6_ring_sync_to(struct i915_request *rq, struct i915_request *signal)
{
u32 dw1 = MI_SEMAPHORE_MBOX |
MI_SEMAPHORE_COMPARE |
MI_SEMAPHORE_REGISTER;
u32 wait_mbox = signal->engine->semaphore.mbox.wait[rq->engine->hw_id];
u32 *cs;
WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs))
return PTR_ERR(cs);
*cs++ = dw1 | wait_mbox;
/* Throughout all of the GEM code, seqno passed implies our current
* seqno is >= the last seqno executed. However for hardware the
* comparison is strictly greater than.
*/
*cs++ = signal->global_seqno - 1;
*cs++ = 0;
*cs++ = MI_NOOP;
intel_ring_advance(rq, cs);
return 0;
}
static void static void
gen5_seqno_barrier(struct intel_engine_cs *engine) gen5_seqno_barrier(struct intel_engine_cs *engine)
{ {
...@@ -2151,66 +2084,6 @@ static int gen6_ring_flush(struct i915_request *rq, u32 mode) ...@@ -2151,66 +2084,6 @@ static int gen6_ring_flush(struct i915_request *rq, u32 mode)
return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB); return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
} }
static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
struct intel_engine_cs *engine)
{
int i;
if (!HAS_LEGACY_SEMAPHORES(dev_priv))
return;
GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
engine->semaphore.sync_to = gen6_ring_sync_to;
engine->semaphore.signal = gen6_signal;
/*
* The current semaphore is only applied on pre-gen8
* platform. And there is no VCS2 ring on the pre-gen8
* platform. So the semaphore between RCS and VCS2 is
* initialized as INVALID.
*/
for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) {
static const struct {
u32 wait_mbox;
i915_reg_t mbox_reg;
} sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = {
[RCS_HW] = {
[VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC },
[BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC },
[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
},
[VCS_HW] = {
[RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC },
[BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC },
[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
},
[BCS_HW] = {
[RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC },
[VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC },
[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
},
[VECS_HW] = {
[RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
[VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
[BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
},
};
u32 wait_mbox;
i915_reg_t mbox_reg;
if (i == engine->hw_id) {
wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
mbox_reg = GEN6_NOSYNC;
} else {
wait_mbox = sem_data[engine->hw_id][i].wait_mbox;
mbox_reg = sem_data[engine->hw_id][i].mbox_reg;
}
engine->semaphore.mbox.wait[i] = wait_mbox;
engine->semaphore.mbox.signal[i] = mbox_reg;
}
}
static void intel_ring_init_irq(struct drm_i915_private *dev_priv, static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
struct intel_engine_cs *engine) struct intel_engine_cs *engine)
{ {
...@@ -2253,7 +2126,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, ...@@ -2253,7 +2126,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8); GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8);
intel_ring_init_irq(dev_priv, engine); intel_ring_init_irq(dev_priv, engine);
intel_ring_init_semaphores(dev_priv, engine);
engine->init_hw = init_ring_common; engine->init_hw = init_ring_common;
engine->reset.prepare = reset_prepare; engine->reset.prepare = reset_prepare;
...@@ -2265,16 +2137,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, ...@@ -2265,16 +2137,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
engine->emit_breadcrumb = i9xx_emit_breadcrumb; engine->emit_breadcrumb = i9xx_emit_breadcrumb;
engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
if (HAS_LEGACY_SEMAPHORES(dev_priv)) {
int num_rings;
engine->emit_breadcrumb = gen6_sema_emit_breadcrumb;
num_rings = INTEL_INFO(dev_priv)->num_rings - 1;
engine->emit_breadcrumb_sz += num_rings * 3;
if (num_rings & 1)
engine->emit_breadcrumb_sz++;
}
engine->set_default_submission = i9xx_set_default_submission; engine->set_default_submission = i9xx_set_default_submission;
......
...@@ -510,60 +510,6 @@ struct intel_engine_cs { ...@@ -510,60 +510,6 @@ struct intel_engine_cs {
void (*irq_seqno_barrier)(struct intel_engine_cs *engine); void (*irq_seqno_barrier)(struct intel_engine_cs *engine);
void (*cleanup)(struct intel_engine_cs *engine); void (*cleanup)(struct intel_engine_cs *engine);
/* GEN8 signal/wait table - never trust comments!
* signal to signal to signal to signal to signal to
* RCS VCS BCS VECS VCS2
* --------------------------------------------------------------------
* RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) |
* |-------------------------------------------------------------------
* VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) |
* |-------------------------------------------------------------------
* BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) |
* |-------------------------------------------------------------------
* VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) | NOP (0x90) | VCS2 (0x98) |
* |-------------------------------------------------------------------
* VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP (0xc0) |
* |-------------------------------------------------------------------
*
* Generalization:
* f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id)
* ie. transpose of g(x, y)
*
* sync from sync from sync from sync from sync from
* RCS VCS BCS VECS VCS2
* --------------------------------------------------------------------
* RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) |
* |-------------------------------------------------------------------
* VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) |
* |-------------------------------------------------------------------
* BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) |
* |-------------------------------------------------------------------
* VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) | NOP (0x90) | VCS2 (0xb8) |
* |-------------------------------------------------------------------
* VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) | NOP (0xc0) |
* |-------------------------------------------------------------------
*
* Generalization:
* g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id)
* ie. transpose of f(x, y)
*/
struct {
#define GEN6_SEMAPHORE_LAST VECS_HW
#define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1)
#define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0)
struct {
/* our mbox written by others */
u32 wait[GEN6_NUM_SEMAPHORES];
/* mboxes this ring signals to */
i915_reg_t signal[GEN6_NUM_SEMAPHORES];
} mbox;
/* AKA wait() */
int (*sync_to)(struct i915_request *rq,
struct i915_request *signal);
u32 *(*signal)(struct i915_request *rq, u32 *cs);
} semaphore;
struct intel_engine_execlists execlists; struct intel_engine_execlists execlists;
/* Contexts are pinned whilst they are active on the GPU. The last /* Contexts are pinned whilst they are active on the GPU. The last
...@@ -889,7 +835,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) ...@@ -889,7 +835,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
return tail; return tail;
} }
void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno);
void intel_engine_setup_common(struct intel_engine_cs *engine); void intel_engine_setup_common(struct intel_engine_cs *engine);
int intel_engine_init_common(struct intel_engine_cs *engine); int intel_engine_init_common(struct intel_engine_cs *engine);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment