Commit a85c3db6 authored by Jonathan Kim's avatar Jonathan Kim Committed by Alex Deucher

drm/amdkfd: fallback to pipe reset on queue reset fail for gfx9

If queue reset fails, tell the CP to reset the pipe.
Since queues multiplex context per pipe and we've issued a device wide
preemption prior to the hang, we can assume the hung pipe only has one
queue to reset on pipe reset.
Signed-off-by: default avatarJonathan Kim <jonathan.kim@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 9c081c11
...@@ -1173,12 +1173,30 @@ uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev, ...@@ -1173,12 +1173,30 @@ uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
return queue_addr; return queue_addr;
} }
/* assume queue acquired */
static int kgd_gfx_v9_hqd_dequeue_wait(struct amdgpu_device *adev, uint32_t inst,
unsigned int utimeout)
{
unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
uint32_t temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE);
if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
return 0;
if (time_after(jiffies, end_jiffies))
return -ETIME;
usleep_range(500, 1000);
}
}
uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev, uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id, uint32_t pipe_id, uint32_t queue_id,
uint32_t inst, unsigned int utimeout) uint32_t inst, unsigned int utimeout)
{ {
uint32_t low, high, temp; uint32_t low, high, pipe_reset_data = 0;
unsigned long end_jiffies;
uint64_t queue_addr = 0; uint64_t queue_addr = 0;
kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
...@@ -1202,25 +1220,23 @@ uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev, ...@@ -1202,25 +1220,23 @@ uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev,
/* assume previous dequeue request issued will take affect after reset */ /* assume previous dequeue request issued will take affect after reset */
WREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_COMPUTE_QUEUE_RESET, 0x1); WREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_COMPUTE_QUEUE_RESET, 0x1);
end_jiffies = (utimeout * HZ / 1000) + jiffies; if (!kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout))
while (true) { goto unlock_out;
temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE);
if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) pr_debug("Attempting pipe reset on XCC %i pipe id %i\n", inst, pipe_id);
break;
if (time_after(jiffies, end_jiffies)) { pipe_reset_data = REG_SET_FIELD(pipe_reset_data, CP_MEC_CNTL, MEC_ME1_PIPE0_RESET, 1);
queue_addr = 0; pipe_reset_data = pipe_reset_data << pipe_id;
break;
}
usleep_range(500, 1000); WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL, pipe_reset_data);
} WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL, 0);
pr_debug("queue reset on XCC %i pipe id %i queue id %i %s\n", if (kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout))
inst, pipe_id, queue_id, !!queue_addr ? "succeeded!" : "failed!"); queue_addr = 0;
unlock_out: unlock_out:
pr_debug("queue reset on XCC %i pipe id %i queue id %i %s\n",
inst, pipe_id, queue_id, !!queue_addr ? "succeeded!" : "failed!");
amdgpu_gfx_rlc_exit_safe_mode(adev, inst); amdgpu_gfx_rlc_exit_safe_mode(adev, inst);
kgd_gfx_v9_release_queue(adev, inst); kgd_gfx_v9_release_queue(adev, inst);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment