Commit e89d2fec authored by Sunil Khatri's avatar Sunil Khatri Committed by Alex Deucher

drm/amdgpu: optimize the padding for gfx10

Adding NOP packets one by one in the ring
does not use the CP efficiently.

Solution:
Use CP optimization while adding NOP packet's so PFP
can discard NOP packets based on information of count
from the Header instead of fetching all NOP packets
one by one.

Cc: Christian König <christian.koenig@amd.com>
Cc: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Cc: Tvrtko Ursulin <tursulin@igalia.com>
Cc: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarSunil Khatri <sunil.khatri@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 4a4c815b
...@@ -9397,6 +9397,24 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring) ...@@ -9397,6 +9397,24 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
} }
static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
{
int i;
/* Header itself is a NOP packet */
if (num_nop == 1) {
amdgpu_ring_write(ring, ring->funcs->nop);
return;
}
/* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
/* Header is at index 0, followed by num_nops - 1 NOP packet's */
for (i = 1; i < num_nop; i++)
amdgpu_ring_write(ring, ring->funcs->nop);
}
static void gfx_v10_ip_print(void *handle, struct drm_printer *p) static void gfx_v10_ip_print(void *handle, struct drm_printer *p)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
...@@ -9588,7 +9606,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { ...@@ -9588,7 +9606,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush, .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
.test_ring = gfx_v10_0_ring_test_ring, .test_ring = gfx_v10_0_ring_test_ring,
.test_ib = gfx_v10_0_ring_test_ib, .test_ib = gfx_v10_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop, .insert_nop = gfx_v10_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib, .pad_ib = amdgpu_ring_generic_pad_ib,
.emit_switch_buffer = gfx_v10_0_ring_emit_sb, .emit_switch_buffer = gfx_v10_0_ring_emit_sb,
.emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl, .emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl,
...@@ -9629,7 +9647,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { ...@@ -9629,7 +9647,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
.emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush, .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
.test_ring = gfx_v10_0_ring_test_ring, .test_ring = gfx_v10_0_ring_test_ring,
.test_ib = gfx_v10_0_ring_test_ib, .test_ib = gfx_v10_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop, .insert_nop = gfx_v10_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib, .pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment