Commit b63ac5d3 authored by Tao Zhou's avatar Tao Zhou Committed by Alex Deucher

drm/amdgpu: refine RAS poison consumption handler

Qeury ras status before ras poison consumption handling, add more
comment and log.
Signed-off-by: default avatarTao Zhou <tao.zhou1@amd.com>
Reviewed-and-tested-by: default avatarMohammad Zafar Ziya <Mohammadzafar.ziya@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 36780606
...@@ -1538,33 +1538,42 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev) ...@@ -1538,33 +1538,42 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *obj, static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *obj,
struct amdgpu_iv_entry *entry) struct amdgpu_iv_entry *entry)
{ {
bool poison_stat = true, need_reset = true; bool poison_stat = false;
struct amdgpu_device *adev = obj->adev; struct amdgpu_device *adev = obj->adev;
struct ras_err_data err_data = {0, 0, 0, NULL}; struct ras_err_data err_data = {0, 0, 0, NULL};
struct amdgpu_ras_block_object *block_obj = struct amdgpu_ras_block_object *block_obj =
amdgpu_ras_get_ras_block(adev, obj->head.block, 0); amdgpu_ras_get_ras_block(adev, obj->head.block, 0);
if (!adev->gmc.xgmi.connected_to_cpu) if (!block_obj || !block_obj->hw_ops)
amdgpu_umc_poison_handler(adev, &err_data, false); return;
/* both query_poison_status and handle_poison_consumption are optional */
if (block_obj && block_obj->hw_ops) {
if (block_obj->hw_ops->query_poison_status) {
poison_stat = block_obj->hw_ops->query_poison_status(adev);
if (!poison_stat)
dev_info(adev->dev, "No RAS poison status in %s poison IH.\n",
block_obj->ras_comm.name);
}
if (poison_stat && block_obj->hw_ops->handle_poison_consumption) { /* both query_poison_status and handle_poison_consumption are optional,
poison_stat = block_obj->hw_ops->handle_poison_consumption(adev); * but at least one of them should be implemented if we need poison
need_reset = poison_stat; * consumption handler
*/
if (block_obj->hw_ops->query_poison_status) {
poison_stat = block_obj->hw_ops->query_poison_status(adev);
if (!poison_stat) {
/* Not poison consumption interrupt, no need to handle it */
dev_info(adev->dev, "No RAS poison status in %s poison IH.\n",
block_obj->ras_comm.name);
return;
} }
} }
/* gpu reset is fallback for all failed cases */ if (!adev->gmc.xgmi.connected_to_cpu)
if (need_reset) amdgpu_umc_poison_handler(adev, &err_data, false);
if (block_obj->hw_ops->handle_poison_consumption)
poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
/* gpu reset is fallback for failed and default cases */
if (poison_stat) {
dev_info(adev->dev, "GPU reset for %s RAS poison consumption is issued!\n",
block_obj->ras_comm.name);
amdgpu_ras_reset_gpu(adev); amdgpu_ras_reset_gpu(adev);
}
} }
static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj, static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment