Commit 93521410 authored by James Zhu's avatar James Zhu Committed by Alex Deucher

drm/amdgpu/vcn2.0: Add firmware w/r ptr reset sync

Add firmware write/read point reset sync through shared memory
Signed-off-by: default avatarJames Zhu <James.Zhu@amd.com>
Reviewed-by: default avatarLeo Liu <leo.liu@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 2c68f0e3
...@@ -92,6 +92,7 @@ static int vcn_v2_0_sw_init(void *handle) ...@@ -92,6 +92,7 @@ static int vcn_v2_0_sw_init(void *handle)
struct amdgpu_ring *ring; struct amdgpu_ring *ring;
int i, r; int i, r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
volatile struct amdgpu_fw_shared *fw_shared;
/* VCN DEC TRAP */ /* VCN DEC TRAP */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
...@@ -174,6 +175,8 @@ static int vcn_v2_0_sw_init(void *handle) ...@@ -174,6 +175,8 @@ static int vcn_v2_0_sw_init(void *handle)
if (r) if (r)
return r; return r;
fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG);
return 0; return 0;
} }
...@@ -188,6 +191,9 @@ static int vcn_v2_0_sw_fini(void *handle) ...@@ -188,6 +191,9 @@ static int vcn_v2_0_sw_fini(void *handle)
{ {
int r; int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
fw_shared->present_flag_0 = 0;
amdgpu_virt_free_mm_table(adev); amdgpu_virt_free_mm_table(adev);
...@@ -354,6 +360,15 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) ...@@ -354,6 +360,15 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0);
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
/* non-cache window */
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
lower_32_bits(adev->vcn.inst->fw_shared_gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
upper_32_bits(adev->vcn.inst->fw_shared_gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
WREG32_SOC15(UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0,
AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
WREG32_SOC15(UVD, 0, mmUVD_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config); WREG32_SOC15(UVD, 0, mmUVD_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
} }
...@@ -437,13 +452,16 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec ...@@ -437,13 +452,16 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec
/* non-cache window */ /* non-cache window */
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0),
AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
/* VCN global tiling registers */ /* VCN global tiling registers */
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0( WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
...@@ -768,6 +786,7 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev) ...@@ -768,6 +786,7 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev)
static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
{ {
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp; uint32_t rb_bufsz, tmp;
...@@ -871,6 +890,8 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) ...@@ -871,6 +890,8 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
/* set the write pointer delay */ /* set the write pointer delay */
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0); WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0);
...@@ -893,6 +914,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) ...@@ -893,6 +914,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
lower_32_bits(ring->wptr)); lower_32_bits(ring->wptr));
fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
/* Unstall DPG */ /* Unstall DPG */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
...@@ -901,6 +923,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) ...@@ -901,6 +923,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
static int vcn_v2_0_start(struct amdgpu_device *adev) static int vcn_v2_0_start(struct amdgpu_device *adev)
{ {
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp; uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl; uint32_t lmi_swap_cntl;
...@@ -1035,6 +1058,7 @@ static int vcn_v2_0_start(struct amdgpu_device *adev) ...@@ -1035,6 +1058,7 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
/* programm the RB_BASE for ring buffer */ /* programm the RB_BASE for ring buffer */
WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
lower_32_bits(ring->gpu_addr)); lower_32_bits(ring->gpu_addr));
...@@ -1047,20 +1071,25 @@ static int vcn_v2_0_start(struct amdgpu_device *adev) ...@@ -1047,20 +1071,25 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
lower_32_bits(ring->wptr)); lower_32_bits(ring->wptr));
fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
ring = &adev->vcn.inst->ring_enc[0]; ring = &adev->vcn.inst->ring_enc[0];
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
ring = &adev->vcn.inst->ring_enc[1]; ring = &adev->vcn.inst->ring_enc[1];
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
return 0; return 0;
} }
...@@ -1182,6 +1211,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, ...@@ -1182,6 +1211,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
if (!ret_code) { if (!ret_code) {
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
/* pause DPG */ /* pause DPG */
reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
...@@ -1196,6 +1226,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, ...@@ -1196,6 +1226,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
/* Restore */ /* Restore */
fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
ring = &adev->vcn.inst->ring_enc[0]; ring = &adev->vcn.inst->ring_enc[0];
ring->wptr = 0; ring->wptr = 0;
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
...@@ -1203,7 +1234,9 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, ...@@ -1203,7 +1234,9 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
ring = &adev->vcn.inst->ring_enc[1]; ring = &adev->vcn.inst->ring_enc[1];
ring->wptr = 0; ring->wptr = 0;
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
...@@ -1211,9 +1244,12 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, ...@@ -1211,9 +1244,12 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
/* Unstall DPG */ /* Unstall DPG */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment