Commit 93323131 authored by monk.liu's avatar monk.liu Committed by Alex Deucher

drm/amdgpu: different emit_ib for gfx and compute

compute ring didn't use const engine byfar, so ignore CE things in
compute routine
Signed-off-by: default avatarmonk.liu <monk.liu@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
parent c193fa91
...@@ -2561,7 +2561,7 @@ static bool gfx_v7_0_ring_emit_semaphore(struct amdgpu_ring *ring, ...@@ -2561,7 +2561,7 @@ static bool gfx_v7_0_ring_emit_semaphore(struct amdgpu_ring *ring,
* sheduling on the ring. This function schedules the IB * sheduling on the ring. This function schedules the IB
* on the gfx ring for execution by the GPU. * on the gfx ring for execution by the GPU.
*/ */
static void gfx_v7_0_ring_emit_ib(struct amdgpu_ring *ring, static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
struct amdgpu_ib *ib) struct amdgpu_ib *ib)
{ {
bool need_ctx_switch = ring->current_ctx != ib->ctx; bool need_ctx_switch = ring->current_ctx != ib->ctx;
...@@ -2569,15 +2569,10 @@ static void gfx_v7_0_ring_emit_ib(struct amdgpu_ring *ring, ...@@ -2569,15 +2569,10 @@ static void gfx_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
u32 next_rptr = ring->wptr + 5; u32 next_rptr = ring->wptr + 5;
/* drop the CE preamble IB for the same context */ /* drop the CE preamble IB for the same context */
if ((ring->type == AMDGPU_RING_TYPE_GFX) && if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
(ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
!need_ctx_switch)
return; return;
if (ring->type == AMDGPU_RING_TYPE_COMPUTE) if (need_ctx_switch)
control |= INDIRECT_BUFFER_VALID;
if (need_ctx_switch && ring->type == AMDGPU_RING_TYPE_GFX)
next_rptr += 2; next_rptr += 2;
next_rptr += 4; next_rptr += 4;
...@@ -2588,7 +2583,7 @@ static void gfx_v7_0_ring_emit_ib(struct amdgpu_ring *ring, ...@@ -2588,7 +2583,7 @@ static void gfx_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, next_rptr); amdgpu_ring_write(ring, next_rptr);
/* insert SWITCH_BUFFER packet before first IB in the ring frame */ /* insert SWITCH_BUFFER packet before first IB in the ring frame */
if (need_ctx_switch && ring->type == AMDGPU_RING_TYPE_GFX) { if (need_ctx_switch) {
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, 0);
} }
...@@ -2611,6 +2606,35 @@ static void gfx_v7_0_ring_emit_ib(struct amdgpu_ring *ring, ...@@ -2611,6 +2606,35 @@ static void gfx_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, control); amdgpu_ring_write(ring, control);
} }
static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
struct amdgpu_ib *ib)
{
u32 header, control = 0;
u32 next_rptr = ring->wptr + 5;
control |= INDIRECT_BUFFER_VALID;
next_rptr += 4;
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
amdgpu_ring_write(ring, next_rptr);
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
control |= ib->length_dw |
(ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
amdgpu_ring_write(ring, header);
amdgpu_ring_write(ring,
#ifdef __BIG_ENDIAN
(2 << 0) |
#endif
(ib->gpu_addr & 0xFFFFFFFC));
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
amdgpu_ring_write(ring, control);
}
/** /**
* gfx_v7_0_ring_test_ib - basic ring IB test * gfx_v7_0_ring_test_ib - basic ring IB test
* *
...@@ -5555,7 +5579,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { ...@@ -5555,7 +5579,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
.get_wptr = gfx_v7_0_ring_get_wptr_gfx, .get_wptr = gfx_v7_0_ring_get_wptr_gfx,
.set_wptr = gfx_v7_0_ring_set_wptr_gfx, .set_wptr = gfx_v7_0_ring_set_wptr_gfx,
.parse_cs = NULL, .parse_cs = NULL,
.emit_ib = gfx_v7_0_ring_emit_ib, .emit_ib = gfx_v7_0_ring_emit_ib_gfx,
.emit_fence = gfx_v7_0_ring_emit_fence_gfx, .emit_fence = gfx_v7_0_ring_emit_fence_gfx,
.emit_semaphore = gfx_v7_0_ring_emit_semaphore, .emit_semaphore = gfx_v7_0_ring_emit_semaphore,
.emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
...@@ -5571,7 +5595,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { ...@@ -5571,7 +5595,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
.get_wptr = gfx_v7_0_ring_get_wptr_compute, .get_wptr = gfx_v7_0_ring_get_wptr_compute,
.set_wptr = gfx_v7_0_ring_set_wptr_compute, .set_wptr = gfx_v7_0_ring_set_wptr_compute,
.parse_cs = NULL, .parse_cs = NULL,
.emit_ib = gfx_v7_0_ring_emit_ib, .emit_ib = gfx_v7_0_ring_emit_ib_compute,
.emit_fence = gfx_v7_0_ring_emit_fence_compute, .emit_fence = gfx_v7_0_ring_emit_fence_compute,
.emit_semaphore = gfx_v7_0_ring_emit_semaphore, .emit_semaphore = gfx_v7_0_ring_emit_semaphore,
.emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
......
...@@ -3753,7 +3753,7 @@ static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) ...@@ -3753,7 +3753,7 @@ static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, 0x20); /* poll interval */ amdgpu_ring_write(ring, 0x20); /* poll interval */
} }
static void gfx_v8_0_ring_emit_ib(struct amdgpu_ring *ring, static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
struct amdgpu_ib *ib) struct amdgpu_ib *ib)
{ {
bool need_ctx_switch = ring->current_ctx != ib->ctx; bool need_ctx_switch = ring->current_ctx != ib->ctx;
...@@ -3761,15 +3761,10 @@ static void gfx_v8_0_ring_emit_ib(struct amdgpu_ring *ring, ...@@ -3761,15 +3761,10 @@ static void gfx_v8_0_ring_emit_ib(struct amdgpu_ring *ring,
u32 next_rptr = ring->wptr + 5; u32 next_rptr = ring->wptr + 5;
/* drop the CE preamble IB for the same context */ /* drop the CE preamble IB for the same context */
if ((ring->type == AMDGPU_RING_TYPE_GFX) && if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
(ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
!need_ctx_switch)
return; return;
if (ring->type == AMDGPU_RING_TYPE_COMPUTE) if (need_ctx_switch)
control |= INDIRECT_BUFFER_VALID;
if (need_ctx_switch && ring->type == AMDGPU_RING_TYPE_GFX)
next_rptr += 2; next_rptr += 2;
next_rptr += 4; next_rptr += 4;
...@@ -3780,7 +3775,7 @@ static void gfx_v8_0_ring_emit_ib(struct amdgpu_ring *ring, ...@@ -3780,7 +3775,7 @@ static void gfx_v8_0_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, next_rptr); amdgpu_ring_write(ring, next_rptr);
/* insert SWITCH_BUFFER packet before first IB in the ring frame */ /* insert SWITCH_BUFFER packet before first IB in the ring frame */
if (need_ctx_switch && ring->type == AMDGPU_RING_TYPE_GFX) { if (need_ctx_switch) {
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, 0);
} }
...@@ -3803,6 +3798,36 @@ static void gfx_v8_0_ring_emit_ib(struct amdgpu_ring *ring, ...@@ -3803,6 +3798,36 @@ static void gfx_v8_0_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, control); amdgpu_ring_write(ring, control);
} }
static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
struct amdgpu_ib *ib)
{
u32 header, control = 0;
u32 next_rptr = ring->wptr + 5;
control |= INDIRECT_BUFFER_VALID;
next_rptr += 4;
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
amdgpu_ring_write(ring, next_rptr);
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
control |= ib->length_dw |
(ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
amdgpu_ring_write(ring, header);
amdgpu_ring_write(ring,
#ifdef __BIG_ENDIAN
(2 << 0) |
#endif
(ib->gpu_addr & 0xFFFFFFFC));
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
amdgpu_ring_write(ring, control);
}
static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
u64 seq, unsigned flags) u64 seq, unsigned flags)
{ {
...@@ -4224,7 +4249,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { ...@@ -4224,7 +4249,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
.get_wptr = gfx_v8_0_ring_get_wptr_gfx, .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
.set_wptr = gfx_v8_0_ring_set_wptr_gfx, .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
.parse_cs = NULL, .parse_cs = NULL,
.emit_ib = gfx_v8_0_ring_emit_ib, .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
.emit_fence = gfx_v8_0_ring_emit_fence_gfx, .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
.emit_semaphore = gfx_v8_0_ring_emit_semaphore, .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
...@@ -4240,7 +4265,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { ...@@ -4240,7 +4265,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
.get_wptr = gfx_v8_0_ring_get_wptr_compute, .get_wptr = gfx_v8_0_ring_get_wptr_compute,
.set_wptr = gfx_v8_0_ring_set_wptr_compute, .set_wptr = gfx_v8_0_ring_set_wptr_compute,
.parse_cs = NULL, .parse_cs = NULL,
.emit_ib = gfx_v8_0_ring_emit_ib, .emit_ib = gfx_v8_0_ring_emit_ib_compute,
.emit_fence = gfx_v8_0_ring_emit_fence_compute, .emit_fence = gfx_v8_0_ring_emit_fence_compute,
.emit_semaphore = gfx_v8_0_ring_emit_semaphore, .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment