Commit fdbd6948 authored by Jiadong Zhu's avatar Jiadong Zhu Committed by Alex Deucher

drm/amdgpu/gfx9: wait for reset done before remap

There is a racing condition that cp firmware modifies
MQD in reset sequence after driver updates it for
remapping. We have to wait till CP_HQD_ACTIVE becoming
false then remap the queue.

v2: fix KIQ locking (Alex)
v3: fix KIQ locking harder
Acked-by: default avatarVitaly Prosyak <vitaly.prosyak@amd.com>
Signed-off-by: default avatarJiadong Zhu <Jiadong.Zhu@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent b5e1a387
...@@ -7125,7 +7125,7 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, ...@@ -7125,7 +7125,7 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
struct amdgpu_ring *kiq_ring = &kiq->ring; struct amdgpu_ring *kiq_ring = &kiq->ring;
unsigned long flags; unsigned long flags;
int r; int i, r;
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL; return -EINVAL;
...@@ -7147,9 +7147,28 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, ...@@ -7147,9 +7147,28 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
if (r) if (r)
return r; return r;
/* make sure dequeue is complete*/
gfx_v9_0_set_safe_mode(adev, 0);
mutex_lock(&adev->srbm_mutex);
soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
for (i = 0; i < adev->usec_timeout; i++) {
if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
break;
udelay(1);
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
gfx_v9_0_unset_safe_mode(adev, 0);
if (r) {
dev_err(adev->dev, "fail to wait on hqd deactive\n");
return r;
}
r = amdgpu_bo_reserve(ring->mqd_obj, false); r = amdgpu_bo_reserve(ring->mqd_obj, false);
if (unlikely(r != 0)){ if (unlikely(r != 0)){
DRM_ERROR("fail to resv mqd_obj\n"); dev_err(adev->dev, "fail to resv mqd_obj\n");
return r; return r;
} }
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
...@@ -7159,14 +7178,21 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, ...@@ -7159,14 +7178,21 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
ring->mqd_ptr = NULL; ring->mqd_ptr = NULL;
} }
amdgpu_bo_unreserve(ring->mqd_obj); amdgpu_bo_unreserve(ring->mqd_obj);
if (r){ if (r) {
DRM_ERROR("fail to unresv mqd_obj\n"); dev_err(adev->dev, "fail to unresv mqd_obj\n");
return r; return r;
} }
spin_lock_irqsave(&kiq->ring_lock, flags);
r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
if (r) {
spin_unlock_irqrestore(&kiq->ring_lock, flags);
return -ENOMEM;
}
kiq->pmf->kiq_map_queues(kiq_ring, ring); kiq->pmf->kiq_map_queues(kiq_ring, ring);
amdgpu_ring_commit(kiq_ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
r = amdgpu_ring_test_ring(kiq_ring); r = amdgpu_ring_test_ring(kiq_ring);
if (r){ if (r) {
DRM_ERROR("fail to remap queue\n"); DRM_ERROR("fail to remap queue\n");
return r; return r;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment