Commit b8866c26 authored by Andres Rodriguez's avatar Andres Rodriguez Committed by Alex Deucher

drm/amdgpu: implement ring set_priority for gfx_v8 compute v9

Programming CP_HQD_QUEUE_PRIORITY enables a queue to take priority over
other queues on the same pipe. Multiple queues on a pipe are timesliced
so this gives us full precedence over other queues.

Programming CP_HQD_PIPE_PRIORITY changes the SPI_ARB_PRIORITY of the
wave as follows:
        0x2: CS_H
        0x1: CS_M
        0x0: CS_L

The SPI block will then dispatch work according to the policy set by
SPI_ARB_PRIORITY. In the current policy CS_H is higher priority than
gfx.

In order to prevent getting stuck in loops of resources bouncing between
GFX and high priority compute and introducing further latency, we
statically reserve a portion of the pipe.

v2: fix srbm_select to ring->queue and use ring->funcs->type
v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_*
v4: switch int to enum amd_sched_priority
v5: corresponding changes for srbm_lock
v6: change CU reservation to PIPE_PERCENT allocation
v7: use kiq instead of MMIO
v8: back to MMIO, and make the implementation sleep safe.
v9: corresponding changes for splitting HIGH into _HW/_SW
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAndres Rodriguez <andresx7@gmail.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent b2ff0e8a
...@@ -1035,6 +1035,10 @@ struct amdgpu_gfx { ...@@ -1035,6 +1035,10 @@ struct amdgpu_gfx {
bool in_suspend; bool in_suspend;
/* NGG */ /* NGG */
struct amdgpu_ngg ngg; struct amdgpu_ngg ngg;
/* pipe reservation */
struct mutex pipe_reserve_mutex;
DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
}; };
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
......
...@@ -2094,6 +2094,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, ...@@ -2094,6 +2094,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->vm_manager.vm_pte_num_rings = 0; adev->vm_manager.vm_pte_num_rings = 0;
adev->gart.gart_funcs = NULL; adev->gart.gart_funcs = NULL;
adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
adev->smc_rreg = &amdgpu_invalid_rreg; adev->smc_rreg = &amdgpu_invalid_rreg;
adev->smc_wreg = &amdgpu_invalid_wreg; adev->smc_wreg = &amdgpu_invalid_wreg;
...@@ -2122,6 +2123,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, ...@@ -2122,6 +2123,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->pm.mutex); mutex_init(&adev->pm.mutex);
mutex_init(&adev->gfx.gpu_clock_mutex); mutex_init(&adev->gfx.gpu_clock_mutex);
mutex_init(&adev->srbm_mutex); mutex_init(&adev->srbm_mutex);
mutex_init(&adev->gfx.pipe_reserve_mutex);
mutex_init(&adev->grbm_idx_mutex); mutex_init(&adev->grbm_idx_mutex);
mutex_init(&adev->mn_lock); mutex_init(&adev->mn_lock);
mutex_init(&adev->virt.vf_errors.lock); mutex_init(&adev->virt.vf_errors.lock);
......
...@@ -6394,6 +6394,104 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) ...@@ -6394,6 +6394,104 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} }
static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
bool acquire)
{
struct amdgpu_device *adev = ring->adev;
int pipe_num, tmp, reg;
int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
/* first me only has 2 entries, GFX and HP3D */
if (ring->me > 0)
pipe_num -= 2;
reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
tmp = RREG32(reg);
tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
WREG32(reg, tmp);
}
static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
bool acquire)
{
int i, pipe;
bool reserve;
struct amdgpu_ring *iring;
mutex_lock(&adev->gfx.pipe_reserve_mutex);
pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
if (acquire)
set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
else
clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
/* Clear all reservations - everyone reacquires all resources */
for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
true);
for (i = 0; i < adev->gfx.num_compute_rings; ++i)
gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
true);
} else {
/* Lower all pipes without a current reservation */
for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
iring = &adev->gfx.gfx_ring[i];
pipe = amdgpu_gfx_queue_to_bit(adev,
iring->me,
iring->pipe,
0);
reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
gfx_v8_0_ring_set_pipe_percent(iring, reserve);
}
for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
iring = &adev->gfx.compute_ring[i];
pipe = amdgpu_gfx_queue_to_bit(adev,
iring->me,
iring->pipe,
0);
reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
gfx_v8_0_ring_set_pipe_percent(iring, reserve);
}
}
mutex_unlock(&adev->gfx.pipe_reserve_mutex);
}
static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
bool acquire)
{
uint32_t pipe_priority = acquire ? 0x2 : 0x0;
uint32_t queue_priority = acquire ? 0xf : 0x0;
mutex_lock(&adev->srbm_mutex);
vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
enum amd_sched_priority priority)
{
struct amdgpu_device *adev = ring->adev;
bool acquire = priority == AMD_SCHED_PRIORITY_HIGH_HW;
if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
return;
gfx_v8_0_hqd_set_priority(adev, ring, acquire);
gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
}
static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
u64 addr, u64 seq, u64 addr, u64 seq,
unsigned flags) unsigned flags)
...@@ -6839,6 +6937,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { ...@@ -6839,6 +6937,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
.test_ib = gfx_v8_0_ring_test_ib, .test_ib = gfx_v8_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop, .insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib, .pad_ib = amdgpu_ring_generic_pad_ib,
.set_priority = gfx_v8_0_ring_set_priority_compute,
}; };
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment