Commit 403009bf authored by Christian König's avatar Christian König Committed by Alex Deucher

drm/amdgpu: fix shadow BO restoring

Don't grab the reservation lock any more and simplify the handling quite
a bit.
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarHuang Rui <ray.huang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent c33adbc7
...@@ -2950,54 +2950,6 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) ...@@ -2950,54 +2950,6 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
return 0; return 0;
} }
/**
* amdgpu_device_recover_vram_from_shadow - restore shadowed VRAM buffers
*
* @adev: amdgpu_device pointer
* @ring: amdgpu_ring for the engine handling the buffer operations
* @bo: amdgpu_bo buffer whose shadow is being restored
* @fence: dma_fence associated with the operation
*
* Restores the VRAM buffer contents from the shadow in GTT. Used to
* restore things like GPUVM page tables after a GPU reset where
* the contents of VRAM might be lost.
* Returns 0 on success, negative error code on failure.
*/
static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_bo *bo,
struct dma_fence **fence)
{
uint32_t domain;
int r;
if (!bo->shadow)
return 0;
r = amdgpu_bo_reserve(bo, true);
if (r)
return r;
domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
/* if bo has been evicted, then no need to recover */
if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
r = amdgpu_bo_validate(bo->shadow);
if (r) {
DRM_ERROR("bo validate failed!\n");
goto err;
}
r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
NULL, fence, true);
if (r) {
DRM_ERROR("recover page table failed!\n");
goto err;
}
}
err:
amdgpu_bo_unreserve(bo);
return r;
}
/** /**
* amdgpu_device_recover_vram - Recover some VRAM contents * amdgpu_device_recover_vram - Recover some VRAM contents
* *
...@@ -3006,16 +2958,15 @@ static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev, ...@@ -3006,16 +2958,15 @@ static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev,
* Restores the contents of VRAM buffers from the shadows in GTT. Used to * Restores the contents of VRAM buffers from the shadows in GTT. Used to
* restore things like GPUVM page tables after a GPU reset where * restore things like GPUVM page tables after a GPU reset where
* the contents of VRAM might be lost. * the contents of VRAM might be lost.
* Returns 0 on success, 1 on failure. *
* Returns:
* 0 on success, negative error code on failure.
*/ */
static int amdgpu_device_recover_vram(struct amdgpu_device *adev) static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
{ {
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct amdgpu_bo *bo, *tmp;
struct dma_fence *fence = NULL, *next = NULL; struct dma_fence *fence = NULL, *next = NULL;
long r = 1; struct amdgpu_bo *shadow;
int i = 0; long r = 1, tmo;
long tmo;
if (amdgpu_sriov_runtime(adev)) if (amdgpu_sriov_runtime(adev))
tmo = msecs_to_jiffies(8000); tmo = msecs_to_jiffies(8000);
...@@ -3024,44 +2975,40 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) ...@@ -3024,44 +2975,40 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
DRM_INFO("recover vram bo from shadow start\n"); DRM_INFO("recover vram bo from shadow start\n");
mutex_lock(&adev->shadow_list_lock); mutex_lock(&adev->shadow_list_lock);
list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) { list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
next = NULL;
amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next); /* No need to recover an evicted BO */
if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
continue;
r = amdgpu_bo_restore_shadow(shadow, &next);
if (r)
break;
if (fence) { if (fence) {
r = dma_fence_wait_timeout(fence, false, tmo); r = dma_fence_wait_timeout(fence, false, tmo);
if (r == 0)
pr_err("wait fence %p[%d] timeout\n", fence, i);
else if (r < 0)
pr_err("wait fence %p[%d] interrupted\n", fence, i);
if (r < 1) {
dma_fence_put(fence); dma_fence_put(fence);
fence = next; fence = next;
if (r <= 0)
break; break;
} } else {
i++;
}
dma_fence_put(fence);
fence = next; fence = next;
} }
}
mutex_unlock(&adev->shadow_list_lock); mutex_unlock(&adev->shadow_list_lock);
if (fence) { if (fence)
r = dma_fence_wait_timeout(fence, false, tmo); tmo = dma_fence_wait_timeout(fence, false, tmo);
if (r == 0)
pr_err("wait fence %p[%d] timeout\n", fence, i);
else if (r < 0)
pr_err("wait fence %p[%d] interrupted\n", fence, i);
}
dma_fence_put(fence); dma_fence_put(fence);
if (r > 0) if (r <= 0 || tmo <= 0) {
DRM_INFO("recover vram bo from shadow done\n");
else
DRM_ERROR("recover vram bo from shadow failed\n"); DRM_ERROR("recover vram bo from shadow failed\n");
return -EIO;
}
return (r > 0) ? 0 : 1; DRM_INFO("recover vram bo from shadow done\n");
return 0;
} }
/** /**
......
...@@ -553,7 +553,7 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, ...@@ -553,7 +553,7 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
if (!r) { if (!r) {
bo->shadow->parent = amdgpu_bo_ref(bo); bo->shadow->parent = amdgpu_bo_ref(bo);
mutex_lock(&adev->shadow_list_lock); mutex_lock(&adev->shadow_list_lock);
list_add_tail(&bo->shadow_list, &adev->shadow_list); list_add_tail(&bo->shadow->shadow_list, &adev->shadow_list);
mutex_unlock(&adev->shadow_list_lock); mutex_unlock(&adev->shadow_list_lock);
} }
...@@ -685,13 +685,10 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo) ...@@ -685,13 +685,10 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)
} }
/** /**
* amdgpu_bo_restore_from_shadow - restore an &amdgpu_bo buffer object * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow
* @adev: amdgpu device object *
* @ring: amdgpu_ring for the engine handling the buffer operations * @shadow: &amdgpu_bo shadow to be restored
* @bo: &amdgpu_bo buffer to be restored
* @resv: reservation object with embedded fence
* @fence: dma_fence associated with the operation * @fence: dma_fence associated with the operation
* @direct: whether to submit the job directly
* *
* Copies a buffer object's shadow content back to the object. * Copies a buffer object's shadow content back to the object.
* This is used for recovering a buffer from its shadow in case of a gpu * This is used for recovering a buffer from its shadow in case of a gpu
...@@ -700,36 +697,19 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo) ...@@ -700,36 +697,19 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)
* Returns: * Returns:
* 0 for success or a negative error code on failure. * 0 for success or a negative error code on failure.
*/ */
int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence)
struct amdgpu_ring *ring,
struct amdgpu_bo *bo,
struct reservation_object *resv,
struct dma_fence **fence,
bool direct)
{ {
struct amdgpu_bo *shadow = bo->shadow; struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev);
uint64_t bo_addr, shadow_addr; struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
int r; uint64_t shadow_addr, parent_addr;
if (!shadow) shadow_addr = amdgpu_bo_gpu_offset(shadow);
return -EINVAL; parent_addr = amdgpu_bo_gpu_offset(shadow->parent);
bo_addr = amdgpu_bo_gpu_offset(bo); return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
shadow_addr = amdgpu_bo_gpu_offset(bo->shadow); amdgpu_bo_size(shadow), NULL, fence,
true, false);
r = reservation_object_reserve_shared(bo->tbo.resv);
if (r)
goto err;
r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr,
amdgpu_bo_size(bo), resv, fence,
direct, false);
if (!r)
amdgpu_bo_fence(bo, *fence, true);
err:
return r;
} }
/** /**
......
...@@ -273,12 +273,8 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev, ...@@ -273,12 +273,8 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
struct reservation_object *resv, struct reservation_object *resv,
struct dma_fence **fence, bool direct); struct dma_fence **fence, bool direct);
int amdgpu_bo_validate(struct amdgpu_bo *bo); int amdgpu_bo_validate(struct amdgpu_bo *bo);
int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
struct amdgpu_ring *ring, struct dma_fence **fence);
struct amdgpu_bo *bo,
struct reservation_object *resv,
struct dma_fence **fence,
bool direct);
uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev, uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,
uint32_t domain); uint32_t domain);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment