Commit 47c0388b authored by ZhenGuo Yin's avatar ZhenGuo Yin Committed by Alex Deucher

drm/amdgpu: reset vm state machine after gpu reset(vram lost)

[Why]
Page table of compute VM in the VRAM will lost after gpu reset.
VRAM won't be restored since compute VM has no shadows.

[How]
Use higher 32-bit of vm->generation to record a vram_lost_counter.
Reset the VM state machine when vm->genertaion is not equal to
the new generation token.

v2: Check vm->generation instead of calling drm_sched_entity_error
in amdgpu_vm_validate.
v3: Use new generation token instead of vram_lost_counter for check.
Signed-off-by: default avatarZhenGuo Yin <zhenguo.yin@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 08ae395e
...@@ -434,7 +434,7 @@ uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm) ...@@ -434,7 +434,7 @@ uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm)
if (!vm) if (!vm)
return result; return result;
result += vm->generation; result += lower_32_bits(vm->generation);
/* Add one if the page tables will be re-generated on next CS */ /* Add one if the page tables will be re-generated on next CS */
if (drm_sched_entity_error(&vm->delayed)) if (drm_sched_entity_error(&vm->delayed))
++result; ++result;
...@@ -463,13 +463,14 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, ...@@ -463,13 +463,14 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int (*validate)(void *p, struct amdgpu_bo *bo), int (*validate)(void *p, struct amdgpu_bo *bo),
void *param) void *param)
{ {
uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm);
struct amdgpu_vm_bo_base *bo_base; struct amdgpu_vm_bo_base *bo_base;
struct amdgpu_bo *shadow; struct amdgpu_bo *shadow;
struct amdgpu_bo *bo; struct amdgpu_bo *bo;
int r; int r;
if (drm_sched_entity_error(&vm->delayed)) { if (vm->generation != new_vm_generation) {
++vm->generation; vm->generation = new_vm_generation;
amdgpu_vm_bo_reset_state_machine(vm); amdgpu_vm_bo_reset_state_machine(vm);
amdgpu_vm_fini_entities(vm); amdgpu_vm_fini_entities(vm);
r = amdgpu_vm_init_entities(adev, vm); r = amdgpu_vm_init_entities(adev, vm);
...@@ -2439,7 +2440,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, ...@@ -2439,7 +2440,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
vm->last_update = dma_fence_get_stub(); vm->last_update = dma_fence_get_stub();
vm->last_unlocked = dma_fence_get_stub(); vm->last_unlocked = dma_fence_get_stub();
vm->last_tlb_flush = dma_fence_get_stub(); vm->last_tlb_flush = dma_fence_get_stub();
vm->generation = 0; vm->generation = amdgpu_vm_generation(adev, NULL);
mutex_init(&vm->eviction_lock); mutex_init(&vm->eviction_lock);
vm->evicting = false; vm->evicting = false;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment