Commit d59fcfb0 authored by Candice Li's avatar Candice Li Committed by Alex Deucher

drm/amdgpu: Identify data parity error corrected in replay mode

Use ErrorCodeExt field to identify data parity error in replay mode.
Signed-off-by: default avatarCandice Li <candice.li@amd.com>
Reviewed-by: default avatarTao Zhou <tao.zhou1@amd.com>
Reviewed-by: default avatarYang Wang <kevinyang.wang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent f7a17b2b
...@@ -88,6 +88,27 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev) ...@@ -88,6 +88,27 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev)
umc_v12_0_reset_error_count_per_channel, NULL); umc_v12_0_reset_error_count_per_channel, NULL);
} }
static bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status)
{
return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1));
}
static bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status)
{
return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 ||
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 &&
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 0) ||
/* Identify data parity error in replay mode */
((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0x5 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0xb) &&
!(umc_v12_0_is_uncorrectable_error(mc_umc_status)))));
}
static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev, static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev,
uint64_t umc_reg_offset, uint64_t umc_reg_offset,
unsigned long *error_count) unsigned long *error_count)
...@@ -104,10 +125,7 @@ static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev, ...@@ -104,10 +125,7 @@ static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev,
mc_umc_status = mc_umc_status =
RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4); RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && if (umc_v12_0_is_correctable_error(mc_umc_status))
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 ||
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 &&
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 0)))
*error_count += 1; *error_count += 1;
} }
...@@ -125,11 +143,7 @@ static void umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev ...@@ -125,11 +143,7 @@ static void umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev
mc_umc_status = mc_umc_status =
RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4); RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && if (umc_v12_0_is_uncorrectable_error(mc_umc_status))
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
*error_count += 1; *error_count += 1;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment