Commit 11003c68 authored by Dennis Li's avatar Dennis Li Committed by Alex Deucher

drm/amdgpu: remove unnecessary reading for epprom header

If the number of badpage records exceed the threshold, driver has
updated both epprom header and control->tbl_hdr.header before gpu reset,
therefore GPU recovery thread no need to read epprom header directly.

v2: merge amdgpu_ras_check_err_threshold into amdgpu_ras_eeprom_check_err_threshold
Signed-off-by: default avatarDennis Li <Dennis.Li@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent e0cd93b7
...@@ -4399,7 +4399,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, ...@@ -4399,7 +4399,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
* bad_page_threshold value to fix this once * bad_page_threshold value to fix this once
* probing driver again. * probing driver again.
*/ */
if (!amdgpu_ras_check_err_threshold(tmp_adev)) { if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
/* must succeed. */ /* must succeed. */
amdgpu_ras_resume(tmp_adev); amdgpu_ras_resume(tmp_adev);
} else { } else {
......
...@@ -2189,19 +2189,3 @@ bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev) ...@@ -2189,19 +2189,3 @@ bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev)
return false; return false;
} }
bool amdgpu_ras_check_err_threshold(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
bool exc_err_limit = false;
if (con && (amdgpu_bad_page_threshold != 0))
amdgpu_ras_eeprom_check_err_threshold(&con->eeprom_control,
&exc_err_limit);
/*
* We are only interested in variable exc_err_limit,
* as it says if GPU is in bad state or not.
*/
return exc_err_limit;
}
...@@ -491,8 +491,6 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev); ...@@ -491,8 +491,6 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev);
unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
bool is_ce); bool is_ce);
bool amdgpu_ras_check_err_threshold(struct amdgpu_device *adev);
/* error handling functions */ /* error handling functions */
int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
struct eeprom_table_record *bps, int pages); struct eeprom_table_record *bps, int pages);
......
...@@ -434,47 +434,21 @@ static uint32_t __correct_eeprom_dest_address(uint32_t curr_address) ...@@ -434,47 +434,21 @@ static uint32_t __correct_eeprom_dest_address(uint32_t curr_address)
return curr_address; return curr_address;
} }
int amdgpu_ras_eeprom_check_err_threshold( bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev)
struct amdgpu_ras_eeprom_control *control,
bool *exceed_err_limit)
{ {
struct amdgpu_device *adev = to_amdgpu_device(control); struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
unsigned char buff[EEPROM_ADDRESS_SIZE +
EEPROM_TABLE_HEADER_SIZE] = { 0 };
struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
struct i2c_msg msg = {
.addr = control->i2c_address,
.flags = I2C_M_RD,
.len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
.buf = buff,
};
int ret;
*exceed_err_limit = false;
if (!__is_ras_eeprom_supported(adev)) if (!__is_ras_eeprom_supported(adev))
return 0; return false;
/* read EEPROM table header */
mutex_lock(&control->tbl_mutex);
ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1);
if (ret < 1) {
dev_err(adev->dev, "Failed to read EEPROM table header.\n");
goto err;
}
__decode_table_header_from_buff(hdr, &buff[2]);
if (hdr->header == EEPROM_TABLE_HDR_BAD) { if (con->eeprom_control.tbl_hdr.header == EEPROM_TABLE_HDR_BAD) {
dev_warn(adev->dev, "This GPU is in BAD status."); dev_warn(adev->dev, "This GPU is in BAD status.");
dev_warn(adev->dev, "Please retire it or setting one bigger " dev_warn(adev->dev, "Please retire it or setting one bigger "
"threshold value when reloading driver.\n"); "threshold value when reloading driver.\n");
*exceed_err_limit = true; return true;
} }
err: return false;
mutex_unlock(&control->tbl_mutex);
return 0;
} }
int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
......
...@@ -80,9 +80,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, ...@@ -80,9 +80,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
bool *exceed_err_limit); bool *exceed_err_limit);
int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control); int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control);
int amdgpu_ras_eeprom_check_err_threshold( bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev);
struct amdgpu_ras_eeprom_control *control,
bool *exceed_err_limit);
int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
struct eeprom_table_record *records, struct eeprom_table_record *records,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment