Commit 8f6368a9 authored by John Clements's avatar John Clements Committed by Alex Deucher

drm/amdgpu: Conditionally reset RAS counters on boot

Only clear RAS error counters if perestent EDC harvesting is not supported
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarJohn Clements <john.clements@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 8ef4f94a
......@@ -607,7 +607,6 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev)
struct ras_ih_if ih_info = {
.cb = amdgpu_gfx_process_ras_data_cb,
};
struct ras_query_if info = { 0 };
if (!adev->gfx.ras_if) {
adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
......@@ -625,12 +624,8 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev)
goto free;
if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) {
if (adev->gmc.xgmi.connected_to_cpu) {
info.head = *adev->gfx.ras_if;
amdgpu_ras_query_error_status(adev, &info);
} else {
if (!amdgpu_persistent_edc_harvesting_supported(adev))
amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
}
r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
if (r)
......
......@@ -2194,7 +2194,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
return r;
}
static int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev)
int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev)
{
if (adev->gmc.xgmi.connected_to_cpu)
return 1;
......
......@@ -625,4 +625,7 @@ void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready);
bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev);
void amdgpu_release_ras_context(struct amdgpu_device *adev);
int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev);
#endif
......@@ -1268,6 +1268,7 @@ static int gmc_v9_0_late_init(void *handle)
}
}
if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
if (adev->mmhub.ras_funcs &&
adev->mmhub.ras_funcs->reset_ras_error_count)
adev->mmhub.ras_funcs->reset_ras_error_count(adev);
......@@ -1275,6 +1276,7 @@ static int gmc_v9_0_late_init(void *handle)
if (adev->hdp.ras_funcs &&
adev->hdp.ras_funcs->reset_ras_error_count)
adev->hdp.ras_funcs->reset_ras_error_count(adev);
}
r = amdgpu_gmc_ras_late_init(adev);
if (r)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment