Commit 5eeb4593 authored by Dennis Li's avatar Dennis Li Committed by Alex Deucher

drm/amdgpu: remove redundant GPU reset

Because bad pages saving has been moved to UMC error interrupt callback,
which will trigger a new GPU reset after saving.
Signed-off-by: default avatarDennis Li <Dennis.Li@amd.com>
Reviewed-by: default avatarHawking Zhang <hawking.zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 22503d80
...@@ -33,7 +33,6 @@ ...@@ -33,7 +33,6 @@
#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0) #define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0)
#define AMDGPU_RAS_FLAG_INIT_NEED_RESET (0x1 << 1) #define AMDGPU_RAS_FLAG_INIT_NEED_RESET (0x1 << 1)
#define AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV (0x1 << 2)
enum amdgpu_ras_block { enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__UMC = 0, AMDGPU_RAS_BLOCK__UMC = 0,
...@@ -513,14 +512,7 @@ static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) ...@@ -513,14 +512,7 @@ static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
{ {
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
/* if (in_task())
* Save bad page to eeprom before gpu reset, i2c may be unstable
* in gpu reset.
*
* Also, exclude the case when ras recovery issuer is
* eeprom page write itself.
*/
if (!(ras->flags & AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV) && in_task())
amdgpu_ras_reserve_bad_pages(adev); amdgpu_ras_reserve_bad_pages(adev);
if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
......
...@@ -479,7 +479,6 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, ...@@ -479,7 +479,6 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
int i, ret = 0; int i, ret = 0;
struct i2c_msg *msgs, *msg; struct i2c_msg *msgs, *msg;
unsigned char *buffs, *buff; unsigned char *buffs, *buff;
bool sched_ras_recovery = false;
struct eeprom_table_record *record; struct eeprom_table_record *record;
struct amdgpu_device *adev = to_amdgpu_device(control); struct amdgpu_device *adev = to_amdgpu_device(control);
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
...@@ -517,7 +516,6 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, ...@@ -517,7 +516,6 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
"Saved bad pages(%d) reaches threshold value(%d).\n", "Saved bad pages(%d) reaches threshold value(%d).\n",
control->num_recs + num, ras->bad_page_cnt_threshold); control->num_recs + num, ras->bad_page_cnt_threshold);
control->tbl_hdr.header = EEPROM_TABLE_HDR_BAD; control->tbl_hdr.header = EEPROM_TABLE_HDR_BAD;
sched_ras_recovery = true;
} }
/* In case of overflow just start from beginning to not lose newest records */ /* In case of overflow just start from beginning to not lose newest records */
...@@ -603,20 +601,6 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, ...@@ -603,20 +601,6 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
__update_tbl_checksum(control, records, num, old_hdr_byte_sum); __update_tbl_checksum(control, records, num, old_hdr_byte_sum);
__update_table_header(control, buffs); __update_table_header(control, buffs);
if (sched_ras_recovery) {
/*
* Before scheduling ras recovery, assert the related
* flag first, which shall bypass common bad page
* reservation execution in amdgpu_ras_reset_gpu.
*/
amdgpu_ras_get_context(adev)->flags |=
AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV;
dev_warn(adev->dev, "Conduct ras recovery due to bad "
"page threshold reached.\n");
amdgpu_ras_reset_gpu(adev);
}
} else if (!__validate_tbl_checksum(control, records, num)) { } else if (!__validate_tbl_checksum(control, records, num)) {
DRM_WARN("EEPROM Table checksum mismatch!"); DRM_WARN("EEPROM Table checksum mismatch!");
/* TODO Uncomment when EEPROM read/write is relliable */ /* TODO Uncomment when EEPROM read/write is relliable */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment