Commit 74a5d165 authored by Jack Xiao's avatar Jack Xiao Committed by Alex Deucher

drm/amdgpu: allow unaligned memory access (v2)

Set up the CP and SDMA for proper unaligned memory access.
Required for OpenCL 2.x

v2: udpate commit message
Signed-off-by: default avatarJack Xiao <Jack.Xiao@amd.com>
Reviewed-by: default avatarMonk Liu <monk.liu@amd.com>
Reviewed-by: default avatarJammy Zhou <Jammy.Zhou@amd.com>
parent 0147ee0f
...@@ -33,6 +33,8 @@ ...@@ -33,6 +33,8 @@
#include "bif/bif_4_1_sh_mask.h" #include "bif/bif_4_1_sh_mask.h"
#include "gca/gfx_7_2_d.h" #include "gca/gfx_7_2_d.h"
#include "gca/gfx_7_2_enum.h"
#include "gca/gfx_7_2_sh_mask.h"
#include "gmc/gmc_7_1_d.h" #include "gmc/gmc_7_1_d.h"
#include "gmc/gmc_7_1_sh_mask.h" #include "gmc/gmc_7_1_sh_mask.h"
...@@ -837,6 +839,8 @@ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring, ...@@ -837,6 +839,8 @@ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring,
{ {
u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) | u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) |
SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */ SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */
u32 sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
if (vm_id < 8) { if (vm_id < 8) {
...@@ -857,7 +861,7 @@ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring, ...@@ -857,7 +861,7 @@ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
amdgpu_ring_write(ring, mmSH_MEM_CONFIG); amdgpu_ring_write(ring, mmSH_MEM_CONFIG);
amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, sh_mem_cfg);
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
amdgpu_ring_write(ring, mmSH_MEM_APE1_BASE); amdgpu_ring_write(ring, mmSH_MEM_APE1_BASE);
......
...@@ -2022,6 +2022,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) ...@@ -2022,6 +2022,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
u32 gb_addr_config; u32 gb_addr_config;
u32 mc_shared_chmap, mc_arb_ramcfg; u32 mc_shared_chmap, mc_arb_ramcfg;
u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
u32 sh_mem_cfg;
u32 tmp; u32 tmp;
int i; int i;
...@@ -2214,11 +2215,14 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) ...@@ -2214,11 +2215,14 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
/* XXX SH_MEM regs */ /* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */ /* where to put LDS, scratch, GPUVM in FSA64 space */
sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
mutex_lock(&adev->srbm_mutex); mutex_lock(&adev->srbm_mutex);
for (i = 0; i < 16; i++) { for (i = 0; i < 16; i++) {
cik_srbm_select(adev, 0, 0, 0, i); cik_srbm_select(adev, 0, 0, 0, i);
/* CP and shaders */ /* CP and shaders */
WREG32(mmSH_MEM_CONFIG, 0); WREG32(mmSH_MEM_CONFIG, sh_mem_cfg);
WREG32(mmSH_MEM_APE1_BASE, 1); WREG32(mmSH_MEM_APE1_BASE, 1);
WREG32(mmSH_MEM_APE1_LIMIT, 0); WREG32(mmSH_MEM_APE1_LIMIT, 0);
WREG32(mmSH_MEM_BASES, 0); WREG32(mmSH_MEM_BASES, 0);
......
...@@ -2050,10 +2050,14 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) ...@@ -2050,10 +2050,14 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
if (i == 0) { if (i == 0) {
tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
WREG32(mmSH_MEM_CONFIG, tmp); WREG32(mmSH_MEM_CONFIG, tmp);
} else { } else {
tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC); tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
WREG32(mmSH_MEM_CONFIG, tmp); WREG32(mmSH_MEM_CONFIG, tmp);
} }
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include "gmc/gmc_8_1_sh_mask.h" #include "gmc/gmc_8_1_sh_mask.h"
#include "gca/gfx_8_0_d.h" #include "gca/gfx_8_0_d.h"
#include "gca/gfx_8_0_enum.h"
#include "gca/gfx_8_0_sh_mask.h" #include "gca/gfx_8_0_sh_mask.h"
#include "bif/bif_5_0_d.h" #include "bif/bif_5_0_d.h"
...@@ -900,6 +901,8 @@ static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring, ...@@ -900,6 +901,8 @@ static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vm_id, uint64_t pd_addr) unsigned vm_id, uint64_t pd_addr)
{ {
u32 srbm_gfx_cntl = 0; u32 srbm_gfx_cntl = 0;
u32 sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
...@@ -925,7 +928,7 @@ static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring, ...@@ -925,7 +928,7 @@ static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
amdgpu_ring_write(ring, mmSH_MEM_CONFIG); amdgpu_ring_write(ring, mmSH_MEM_CONFIG);
amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, sh_mem_cfg);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include "gmc/gmc_8_1_sh_mask.h" #include "gmc/gmc_8_1_sh_mask.h"
#include "gca/gfx_8_0_d.h" #include "gca/gfx_8_0_d.h"
#include "gca/gfx_8_0_enum.h"
#include "gca/gfx_8_0_sh_mask.h" #include "gca/gfx_8_0_sh_mask.h"
#include "bif/bif_5_0_d.h" #include "bif/bif_5_0_d.h"
...@@ -963,6 +964,8 @@ static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring, ...@@ -963,6 +964,8 @@ static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vm_id, uint64_t pd_addr) unsigned vm_id, uint64_t pd_addr)
{ {
u32 srbm_gfx_cntl = 0; u32 srbm_gfx_cntl = 0;
u32 sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
...@@ -988,7 +991,7 @@ static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring, ...@@ -988,7 +991,7 @@ static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
amdgpu_ring_write(ring, mmSH_MEM_CONFIG); amdgpu_ring_write(ring, mmSH_MEM_CONFIG);
amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, sh_mem_cfg);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment