Commit 895797d9 authored by Graham Sider's avatar Graham Sider Committed by Alex Deucher

drm/amdgpu/bu: Add use_mtype_cc_wa module param

By default, set use_mtype_cc_wa to 1 to set PTE coherence flag MTYPE_CC
instead of MTYPE_RW by default. This is required for the time being to
mitigate a bug causing XCCs to hit stale data due to TCC marking fully
dirty lines as exclusive.
Signed-off-by: default avatarGraham Sider <Graham.Sider@amd.com>
Reviewed-by: default avatarJoseph Greathouse <Joseph.Greathouse@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 2e8cc5d3
...@@ -212,6 +212,7 @@ extern int amdgpu_noretry; ...@@ -212,6 +212,7 @@ extern int amdgpu_noretry;
extern int amdgpu_force_asic_type; extern int amdgpu_force_asic_type;
extern int amdgpu_smartshift_bias; extern int amdgpu_smartshift_bias;
extern int amdgpu_use_xgmi_p2p; extern int amdgpu_use_xgmi_p2p;
extern bool amdgpu_use_mtype_cc_wa;
#ifdef CONFIG_HSA_AMD #ifdef CONFIG_HSA_AMD
extern int sched_policy; extern int sched_policy;
extern bool debug_evictions; extern bool debug_evictions;
......
...@@ -822,6 +822,13 @@ MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault ( ...@@ -822,6 +822,13 @@ MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (
module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444); module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444);
#endif #endif
/**
* DOC: use_mtype_cc_wa (bool)
*/
bool amdgpu_use_mtype_cc_wa = true;
MODULE_PARM_DESC(use_mtype_cc_wa, "Use MTYPE_CC workaround (0 = use MTYPE_RW where applicable, 1 = use MTYPE_CC where applicable (default))");
module_param_named(use_mtype_cc_wa, amdgpu_use_mtype_cc_wa, bool, 0444);
/** /**
* DOC: pcie_p2p (bool) * DOC: pcie_p2p (bool)
* Enable PCIe P2P (requires large-BAR). Default value: true (on) * Enable PCIe P2P (requires large-BAR). Default value: true (on)
......
...@@ -1187,6 +1187,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, ...@@ -1187,6 +1187,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
unsigned int mtype; unsigned int mtype;
unsigned int mtype_default;
bool snoop = false; bool snoop = false;
switch (adev->ip_versions[GC_HWIP][0]) { switch (adev->ip_versions[GC_HWIP][0]) {
...@@ -1230,7 +1231,10 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, ...@@ -1230,7 +1231,10 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
/* FIXME: Needs more work for handling multiple memory /* FIXME: Needs more work for handling multiple memory
* partitions (> NPS1 mode) e.g. NPS4 for both APU and dGPU * partitions (> NPS1 mode) e.g. NPS4 for both APU and dGPU
* modes. * modes.
* FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable.
* To force use of MTYPE_RW, set use_mtype_cc_wa=0
*/ */
mtype_default = amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW;
snoop = true; snoop = true;
if (uncached) { if (uncached) {
mtype = MTYPE_UC; mtype = MTYPE_UC;
...@@ -1245,14 +1249,14 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, ...@@ -1245,14 +1249,14 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
* socket should be treated as remote access so MTYPE_RW * socket should be treated as remote access so MTYPE_RW
* cannot be used always. * cannot be used always.
*/ */
mtype = MTYPE_RW; mtype = mtype_default;
} else if (adev->flags & AMD_IS_APU) { } else if (adev->flags & AMD_IS_APU) {
/* APU on carve out mode */ /* APU on carve out mode */
mtype = MTYPE_RW; mtype = mtype_default;
} else { } else {
/* dGPU */ /* dGPU */
if (is_vram && bo_adev == adev) if (is_vram && bo_adev == adev)
mtype = MTYPE_RW; mtype = mtype_default;
else if (is_vram) else if (is_vram)
mtype = MTYPE_NC; mtype = MTYPE_NC;
else else
......
...@@ -1198,9 +1198,12 @@ svm_range_get_pte_flags(struct kfd_node *node, ...@@ -1198,9 +1198,12 @@ svm_range_get_pte_flags(struct kfd_node *node,
if (uncached) { if (uncached) {
mapping_flags |= AMDGPU_VM_MTYPE_UC; mapping_flags |= AMDGPU_VM_MTYPE_UC;
} else if (domain == SVM_RANGE_VRAM_DOMAIN) { } else if (domain == SVM_RANGE_VRAM_DOMAIN) {
/* local HBM region close to partition */ /* local HBM region close to partition
* FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable.
* To force use of MTYPE_RW, set use_mtype_cc_wa=0
*/
if (bo_node == node) if (bo_node == node)
mapping_flags |= AMDGPU_VM_MTYPE_RW; mapping_flags |= amdgpu_use_mtype_cc_wa ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
/* local HBM region far from partition or remote XGMI GPU */ /* local HBM region far from partition or remote XGMI GPU */
else if (svm_nodes_in_same_hive(bo_node, node)) else if (svm_nodes_in_same_hive(bo_node, node))
mapping_flags |= AMDGPU_VM_MTYPE_NC; mapping_flags |= AMDGPU_VM_MTYPE_NC;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment