Commit ad17b124 authored by Prike Liang's avatar Prike Liang Committed by Alex Deucher

drm/amdgpu/gfx9.4.3: Implement compute pipe reset

Implement the compute pipe reset, and the driver will
fallback to pipe reset when queue reset fails.
The pipe reset only deactivates the queue which is
scheduled in the pipe, and meanwhile the MEC pipe
will be reset to the firmware _start pointer. So,
it seems pipe reset will cost more cycles than the
queue reset; therefore, the driver tries to recover
by doing queue reset first.
Reviewed-by: default avatarLijo Lazar <lijo.lazar@amd.com>
Signed-off-by: default avatarPrike Liang <Prike.Liang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 6c0a7c3c
......@@ -3469,6 +3469,98 @@ static void gfx_v9_4_3_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
}
}
static int gfx_v9_4_3_unmap_done(struct amdgpu_device *adev, uint32_t me,
uint32_t pipe, uint32_t queue,
uint32_t xcc_id)
{
int i, r;
/* make sure dequeue is complete*/
gfx_v9_4_3_xcc_set_safe_mode(adev, xcc_id);
mutex_lock(&adev->srbm_mutex);
soc15_grbm_select(adev, me, pipe, queue, 0, GET_INST(GC, xcc_id));
for (i = 0; i < adev->usec_timeout; i++) {
if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
break;
udelay(1);
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
else
r = 0;
soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
mutex_unlock(&adev->srbm_mutex);
gfx_v9_4_3_xcc_unset_safe_mode(adev, xcc_id);
return r;
}
static bool gfx_v9_4_3_pipe_reset_support(struct amdgpu_device *adev)
{
/*TODO: Need check gfx9.4.4 mec fw whether supports pipe reset as well.*/
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
adev->gfx.mec_fw_version >= 0x0000009b)
return true;
else
dev_warn_once(adev->dev, "Please use the latest MEC version to see whether support pipe reset\n");
return false;
}
static int gfx_v9_4_3_reset_hw_pipe(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t reset_pipe, clean_pipe;
int r;
if (!gfx_v9_4_3_pipe_reset_support(adev))
return -EINVAL;
gfx_v9_4_3_xcc_set_safe_mode(adev, ring->xcc_id);
mutex_lock(&adev->srbm_mutex);
reset_pipe = RREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regCP_MEC_CNTL);
clean_pipe = reset_pipe;
if (ring->me == 1) {
switch (ring->pipe) {
case 0:
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
MEC_ME1_PIPE0_RESET, 1);
break;
case 1:
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
MEC_ME1_PIPE1_RESET, 1);
break;
case 2:
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
MEC_ME1_PIPE2_RESET, 1);
break;
case 3:
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
MEC_ME1_PIPE3_RESET, 1);
break;
default:
break;
}
} else {
if (ring->pipe)
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
MEC_ME2_PIPE1_RESET, 1);
else
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
MEC_ME2_PIPE0_RESET, 1);
}
WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regCP_MEC_CNTL, reset_pipe);
WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regCP_MEC_CNTL, clean_pipe);
mutex_unlock(&adev->srbm_mutex);
gfx_v9_4_3_xcc_unset_safe_mode(adev, ring->xcc_id);
r = gfx_v9_4_3_unmap_done(adev, ring->me, ring->pipe, ring->queue, ring->xcc_id);
return r;
}
static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring,
unsigned int vmid)
{
......@@ -3476,7 +3568,7 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring,
struct amdgpu_kiq *kiq = &adev->gfx.kiq[ring->xcc_id];
struct amdgpu_ring *kiq_ring = &kiq->ring;
unsigned long flags;
int r, i;
int r;
if (!adev->debug_exp_resets)
return -EINVAL;
......@@ -3501,26 +3593,23 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring,
spin_unlock_irqrestore(&kiq->ring_lock, flags);
r = amdgpu_ring_test_ring(kiq_ring);
if (r) {
dev_err(adev->dev, "kiq ring test failed after ring: %s queue reset\n",
ring->name);
goto pipe_reset;
}
r = gfx_v9_4_3_unmap_done(adev, ring->me, ring->pipe, ring->queue, ring->xcc_id);
if (r)
return r;
dev_err(adev->dev, "fail to wait on hqd deactive and will try pipe reset\n");
/* make sure dequeue is complete*/
amdgpu_gfx_rlc_enter_safe_mode(adev, ring->xcc_id);
mutex_lock(&adev->srbm_mutex);
soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, ring->xcc_id));
for (i = 0; i < adev->usec_timeout; i++) {
if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
break;
udelay(1);
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, ring->xcc_id));
mutex_unlock(&adev->srbm_mutex);
amdgpu_gfx_rlc_exit_safe_mode(adev, ring->xcc_id);
if (r) {
dev_err(adev->dev, "fail to wait on hqd deactive\n");
return r;
pipe_reset:
if(r) {
r = gfx_v9_4_3_reset_hw_pipe(ring);
dev_info(adev->dev, "ring: %s pipe reset :%s\n", ring->name,
r ? "failed" : "successfully");
if (r)
return r;
}
r = amdgpu_bo_reserve(ring->mqd_obj, false);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment