Commit a340847b authored by Victor Zhao's avatar Victor Zhao Committed by Alex Deucher

Revert "drm/amdgpu: let mode2 reset fallback to default when failure"

This reverts commit dac6b808.

This commit reverted the AMDGPU_SKIP_MODE2_RESET as it conflicts with
the original design of reset handler. Will redesign it.

Fixes: dac6b808 ("drm/amdgpu: let mode2 reset fallback to default when failure")
Signed-off-by: default avatarVictor Zhao <Victor.Zhao@amd.com>
Reviewed-by: default avatarLijo Lazar <lijo.lazar@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent afbaa155
...@@ -134,7 +134,6 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work) ...@@ -134,7 +134,6 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
reset_context.method = AMD_RESET_METHOD_NONE; reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev; reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context); amdgpu_device_gpu_recover(adev, NULL, &reset_context);
} }
......
...@@ -5210,7 +5210,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, ...@@ -5210,7 +5210,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
reset_context->job = job; reset_context->job = job;
reset_context->hive = hive; reset_context->hive = hive;
/* /*
* Build list of devices to reset. * Build list of devices to reset.
* In case we are in XGMI hive mode, resort the device list * In case we are in XGMI hive mode, resort the device list
...@@ -5337,11 +5336,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, ...@@ -5337,11 +5336,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
amdgpu_ras_resume(adev); amdgpu_ras_resume(adev);
} else { } else {
r = amdgpu_do_asic_reset(device_list_handle, reset_context); r = amdgpu_do_asic_reset(device_list_handle, reset_context);
if (r && r == -EAGAIN) { if (r && r == -EAGAIN)
set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags);
adev->asic_reset_res = 0;
goto retry; goto retry;
}
if (!r && gpu_reset_for_dev_remove) if (!r && gpu_reset_for_dev_remove)
goto recover_end; goto recover_end;
...@@ -5777,7 +5773,6 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) ...@@ -5777,7 +5773,6 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
reset_context.reset_req_dev = adev; reset_context.reset_req_dev = adev;
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
adev->no_hw_access = true; adev->no_hw_access = true;
r = amdgpu_device_pre_asic_reset(adev, &reset_context); r = amdgpu_device_pre_asic_reset(adev, &reset_context);
......
...@@ -72,7 +72,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) ...@@ -72,7 +72,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
reset_context.method = AMD_RESET_METHOD_NONE; reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev; reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context); r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context);
if (r) if (r)
......
...@@ -1950,7 +1950,6 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) ...@@ -1950,7 +1950,6 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
reset_context.method = AMD_RESET_METHOD_NONE; reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev; reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context); amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
} }
......
...@@ -74,9 +74,6 @@ int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev, ...@@ -74,9 +74,6 @@ int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,
{ {
struct amdgpu_reset_handler *reset_handler = NULL; struct amdgpu_reset_handler *reset_handler = NULL;
if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags))
return -ENOSYS;
if (adev->reset_cntl && adev->reset_cntl->get_reset_handler) if (adev->reset_cntl && adev->reset_cntl->get_reset_handler)
reset_handler = adev->reset_cntl->get_reset_handler( reset_handler = adev->reset_cntl->get_reset_handler(
adev->reset_cntl, reset_context); adev->reset_cntl, reset_context);
...@@ -93,9 +90,6 @@ int amdgpu_reset_perform_reset(struct amdgpu_device *adev, ...@@ -93,9 +90,6 @@ int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
int ret; int ret;
struct amdgpu_reset_handler *reset_handler = NULL; struct amdgpu_reset_handler *reset_handler = NULL;
if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags))
return -ENOSYS;
if (adev->reset_cntl) if (adev->reset_cntl)
reset_handler = adev->reset_cntl->get_reset_handler( reset_handler = adev->reset_cntl->get_reset_handler(
adev->reset_cntl, reset_context); adev->reset_cntl, reset_context);
......
...@@ -30,8 +30,7 @@ enum AMDGPU_RESET_FLAGS { ...@@ -30,8 +30,7 @@ enum AMDGPU_RESET_FLAGS {
AMDGPU_NEED_FULL_RESET = 0, AMDGPU_NEED_FULL_RESET = 0,
AMDGPU_SKIP_HW_RESET = 1, AMDGPU_SKIP_HW_RESET = 1,
AMDGPU_SKIP_MODE2_RESET = 2, AMDGPU_RESET_FOR_DEVICE_REMOVE = 2,
AMDGPU_RESET_FOR_DEVICE_REMOVE = 3,
}; };
struct amdgpu_reset_context { struct amdgpu_reset_context {
......
...@@ -290,7 +290,6 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) ...@@ -290,7 +290,6 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
reset_context.method = AMD_RESET_METHOD_NONE; reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev; reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context); amdgpu_device_gpu_recover(adev, NULL, &reset_context);
} }
......
...@@ -317,7 +317,6 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) ...@@ -317,7 +317,6 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
reset_context.method = AMD_RESET_METHOD_NONE; reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev; reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context); amdgpu_device_gpu_recover(adev, NULL, &reset_context);
} }
......
...@@ -529,7 +529,6 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) ...@@ -529,7 +529,6 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
reset_context.method = AMD_RESET_METHOD_NONE; reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev; reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context); amdgpu_device_gpu_recover(adev, NULL, &reset_context);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment