Commit 75ee6487 authored by Felix Kuehling's avatar Felix Kuehling Committed by Alex Deucher

drm/amdkfd: Consistently apply noretry setting

Apply the same setting to SH_MEM_CONFIG and VM_CONTEXT1_CNTL. This
makes the noretry param no longer KFD-specific. On GFX10 I'm not
changing SH_MEM_CONFIG in this commit because GFX10 has different
retry behaviour in the SQ and I don't have a way to test it at the
moment.
Suggested-by: default avatarChristian König <Christian.Koenig@amd.com>
CC: Philip Yang <Philip.Yang@amd.com>
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by : Shaoyun.liu < Shaoyun.liu@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 7a17c8ce
...@@ -164,6 +164,7 @@ extern int amdgpu_async_gfx_ring; ...@@ -164,6 +164,7 @@ extern int amdgpu_async_gfx_ring;
extern int amdgpu_mcbp; extern int amdgpu_mcbp;
extern int amdgpu_discovery; extern int amdgpu_discovery;
extern int amdgpu_mes; extern int amdgpu_mes;
extern int amdgpu_noretry;
#ifdef CONFIG_DRM_AMDGPU_SI #ifdef CONFIG_DRM_AMDGPU_SI
extern int amdgpu_si_support; extern int amdgpu_si_support;
......
...@@ -142,6 +142,7 @@ int amdgpu_async_gfx_ring = 1; ...@@ -142,6 +142,7 @@ int amdgpu_async_gfx_ring = 1;
int amdgpu_mcbp = 0; int amdgpu_mcbp = 0;
int amdgpu_discovery = -1; int amdgpu_discovery = -1;
int amdgpu_mes = 0; int amdgpu_mes = 0;
int amdgpu_noretry;
struct amdgpu_mgpu_info mgpu_info = { struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
...@@ -608,6 +609,10 @@ MODULE_PARM_DESC(mes, ...@@ -608,6 +609,10 @@ MODULE_PARM_DESC(mes,
"Enable Micro Engine Scheduler (0 = disabled (default), 1 = enabled)"); "Enable Micro Engine Scheduler (0 = disabled (default), 1 = enabled)");
module_param_named(mes, amdgpu_mes, int, 0444); module_param_named(mes, amdgpu_mes, int, 0444);
MODULE_PARM_DESC(noretry,
"Disable retry faults (0 = retry enabled (default), 1 = retry disabled)");
module_param_named(noretry, amdgpu_noretry, int, 0644);
#ifdef CONFIG_HSA_AMD #ifdef CONFIG_HSA_AMD
/** /**
* DOC: sched_policy (int) * DOC: sched_policy (int)
...@@ -683,17 +688,6 @@ module_param(ignore_crat, int, 0444); ...@@ -683,17 +688,6 @@ module_param(ignore_crat, int, 0444);
MODULE_PARM_DESC(ignore_crat, MODULE_PARM_DESC(ignore_crat,
"Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)"); "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
/**
* DOC: noretry (int)
* This parameter sets sh_mem_config.retry_disable. Default value, 0, enables retry.
* Setting 1 disables retry.
* Retry is needed for recoverable page faults.
*/
int noretry;
module_param(noretry, int, 0644);
MODULE_PARM_DESC(noretry,
"Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)");
/** /**
* DOC: halt_if_hws_hang (int) * DOC: halt_if_hws_hang (int)
* Halt if HWS hang is detected. Default value, 0, disables the halt on hang. * Halt if HWS hang is detected. Default value, 0, disables the halt on hang.
......
...@@ -1942,11 +1942,15 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) ...@@ -1942,11 +1942,15 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
if (i == 0) { if (i == 0) {
tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED); SH_MEM_ALIGNMENT_MODE_UNALIGNED);
tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
!!amdgpu_noretry);
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
} else { } else {
tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED); SH_MEM_ALIGNMENT_MODE_UNALIGNED);
tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
!!amdgpu_noretry);
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
(adev->gmc.private_aperture_start >> 48)); (adev->gmc.private_aperture_start >> 48));
......
...@@ -236,7 +236,8 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) ...@@ -236,7 +236,8 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
block_size); block_size);
/* Send no-retry XNACK on fault to suppress VM fault storm. */ /* Send no-retry XNACK on fault to suppress VM fault storm. */
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 1); RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!amdgpu_noretry);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i, tmp); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i, tmp);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0);
......
...@@ -215,7 +215,8 @@ static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) ...@@ -215,7 +215,8 @@ static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
adev->vm_manager.block_size - 9); adev->vm_manager.block_size - 9);
/* Send no-retry XNACK on fault to suppress VM fault storm. */ /* Send no-retry XNACK on fault to suppress VM fault storm. */
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!amdgpu_noretry);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i, tmp); WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i, tmp);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0);
......
...@@ -265,7 +265,8 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) ...@@ -265,7 +265,8 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
block_size); block_size);
/* Send no-retry XNACK on fault to suppress VM fault storm. */ /* Send no-retry XNACK on fault to suppress VM fault storm. */
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 1); RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!amdgpu_noretry);
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i, tmp); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i, tmp);
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0);
......
...@@ -205,7 +205,8 @@ static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) ...@@ -205,7 +205,8 @@ static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
adev->vm_manager.block_size - 9); adev->vm_manager.block_size - 9);
/* Send no-retry XNACK on fault to suppress VM fault storm. */ /* Send no-retry XNACK on fault to suppress VM fault storm. */
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!amdgpu_noretry);
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i, tmp); WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i, tmp);
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0);
......
...@@ -61,7 +61,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm, ...@@ -61,7 +61,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
qpd->sh_mem_config = qpd->sh_mem_config =
SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
if (noretry && if (amdgpu_noretry &&
!dqm->dev->device_info->needs_iommu_device) !dqm->dev->device_info->needs_iommu_device)
qpd->sh_mem_config |= qpd->sh_mem_config |=
1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
......
...@@ -157,7 +157,7 @@ extern int ignore_crat; ...@@ -157,7 +157,7 @@ extern int ignore_crat;
/* /*
* Set sh_mem_config.retry_disable on Vega10 * Set sh_mem_config.retry_disable on Vega10
*/ */
extern int noretry; extern int amdgpu_noretry;
/* /*
* Halt if HWS hang is detected * Halt if HWS hang is detected
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment