Commit 5c5b2ba0 authored by Evan Quan's avatar Evan Quan Committed by Alex Deucher

drm/amdgpu: fix possible pstate switch race condition

Added lock protection so that the p-state switch will
be guarded to be sequential. Also update the hive
pstate only all device from the hive are in the same
state.
Signed-off-by: default avatarEvan Quan <evan.quan@amd.com>
Reviewed-by: default avatarFeifei Xu <Feifei.Xu@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent b0adca4d
...@@ -977,6 +977,9 @@ struct amdgpu_device { ...@@ -977,6 +977,9 @@ struct amdgpu_device {
uint64_t unique_id; uint64_t unique_id;
uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS]; uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
/* device pstate */
int pstate;
}; };
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
......
...@@ -274,12 +274,18 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) ...@@ -274,12 +274,18 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
{ {
int ret = 0; int ret = 0;
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
struct amdgpu_device *tmp_adev;
bool update_hive_pstate = true;
if (!hive) if (!hive)
return 0; return 0;
if (hive->pstate == pstate) mutex_lock(&hive->hive_lock);
if (hive->pstate == pstate) {
mutex_unlock(&hive->hive_lock);
return 0; return 0;
}
dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate); dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate);
...@@ -290,11 +296,32 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) ...@@ -290,11 +296,32 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle, ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle,
pstate); pstate);
if (ret) if (ret) {
dev_err(adev->dev, dev_err(adev->dev,
"XGMI: Set pstate failure on device %llx, hive %llx, ret %d", "XGMI: Set pstate failure on device %llx, hive %llx, ret %d",
adev->gmc.xgmi.node_id, adev->gmc.xgmi.node_id,
adev->gmc.xgmi.hive_id, ret); adev->gmc.xgmi.hive_id, ret);
goto out;
}
/* Update device pstate */
adev->pstate = pstate;
/*
* Update the hive pstate only all devices of the hive
* are in the same pstate
*/
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
if (tmp_adev->pstate != adev->pstate) {
update_hive_pstate = false;
break;
}
}
if (update_hive_pstate)
hive->pstate = pstate;
out:
mutex_unlock(&hive->hive_lock);
return ret; return ret;
} }
...@@ -369,6 +396,9 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) ...@@ -369,6 +396,9 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
goto exit; goto exit;
} }
/* Set default device pstate */
adev->pstate = -1;
top_info = &adev->psp.xgmi_context.top_info; top_info = &adev->psp.xgmi_context.top_info;
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment