Commit 86153f1b authored by Hawking Zhang's avatar Hawking Zhang Committed by Alex Deucher

drm/amdgpu: add reset_ras_error_count function for SDMA

SDMA ras error counters are dirty ones after cold reboot
Read operation is needed to reset them to 0
Signed-off-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Reviewed-by: default avatarTao Zhou <tao.zhou1@amd.com>
Reviewed-by: default avatarGuchun Chen <guchun.chen@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent e7429606
...@@ -56,6 +56,7 @@ struct amdgpu_sdma_ras_funcs { ...@@ -56,6 +56,7 @@ struct amdgpu_sdma_ras_funcs {
void (*ras_fini)(struct amdgpu_device *adev); void (*ras_fini)(struct amdgpu_device *adev);
int (*query_ras_error_count)(struct amdgpu_device *adev, int (*query_ras_error_count)(struct amdgpu_device *adev,
uint32_t instance, void *ras_error_status); uint32_t instance, void *ras_error_status);
void (*reset_ras_error_count)(struct amdgpu_device *adev);
}; };
struct amdgpu_sdma { struct amdgpu_sdma {
......
...@@ -1801,13 +1801,9 @@ static int sdma_v4_0_late_init(void *handle) ...@@ -1801,13 +1801,9 @@ static int sdma_v4_0_late_init(void *handle)
struct ras_ih_if ih_info = { struct ras_ih_if ih_info = {
.cb = sdma_v4_0_process_ras_data_cb, .cb = sdma_v4_0_process_ras_data_cb,
}; };
int i;
/* read back edc counter registers to clear the counters */ if (adev->sdma.funcs && adev->sdma.funcs->reset_ras_error_count)
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { adev->sdma.funcs->reset_ras_error_count(adev);
for (i = 0; i < adev->sdma.num_instances; i++)
RREG32_SDMA(i, mmSDMA0_EDC_COUNTER);
}
if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init) if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init)
return adev->sdma.funcs->ras_late_init(adev, &ih_info); return adev->sdma.funcs->ras_late_init(adev, &ih_info);
...@@ -2572,10 +2568,22 @@ static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, ...@@ -2572,10 +2568,22 @@ static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,
return 0; return 0;
}; };
static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
{
int i;
/* read back edc counter registers to clear the counters */
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
for (i = 0; i < adev->sdma.num_instances; i++)
RREG32_SDMA(i, mmSDMA0_EDC_COUNTER);
}
}
static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = { static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = {
.ras_late_init = amdgpu_sdma_ras_late_init, .ras_late_init = amdgpu_sdma_ras_late_init,
.ras_fini = amdgpu_sdma_ras_fini, .ras_fini = amdgpu_sdma_ras_fini,
.query_ras_error_count = sdma_v4_0_query_ras_error_count, .query_ras_error_count = sdma_v4_0_query_ras_error_count,
.reset_ras_error_count = sdma_v4_0_reset_ras_error_count,
}; };
static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev) static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment