Commit d117fd29 authored by Samir Dhume's avatar Samir Dhume Committed by Alex Deucher

drm/amdgpu/vcn: sriov support for vcn_v4_0_3

initialization table handshake with mmsch
Signed-off-by: default avatarSamir Dhume <samir.dhume@amd.com>
Acked-by: default avatarLeo Liu <leo.liu@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 44fd83e9
......@@ -31,6 +31,7 @@
#include "soc15d.h"
#include "soc15_hw_ip.h"
#include "vcn_v2_0.h"
#include "mmsch_v4_0_3.h"
#include "vcn/vcn_4_0_3_offset.h"
#include "vcn/vcn_4_0_3_sh_mask.h"
......@@ -44,6 +45,7 @@
#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
#define VCN1_VID_SOC_ADDRESS_3_0 0x48300
static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
static int vcn_v4_0_3_set_powergating_state(void *handle,
......@@ -137,6 +139,12 @@ static int vcn_v4_0_3_sw_init(void *handle)
amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
}
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
}
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
......@@ -174,6 +182,9 @@ static int vcn_v4_0_3_sw_fini(void *handle)
drm_dev_exit(idx);
}
if (amdgpu_sriov_vf(adev))
amdgpu_virt_free_mm_table(adev);
r = amdgpu_vcn_suspend(adev);
if (r)
return r;
......@@ -196,33 +207,47 @@ static int vcn_v4_0_3_hw_init(void *handle)
struct amdgpu_ring *ring;
int i, r, vcn_inst;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
vcn_inst = GET_INST(VCN, i);
ring = &adev->vcn.inst[i].ring_enc[0];
if (amdgpu_sriov_vf(adev)) {
r = vcn_v4_0_3_start_sriov(adev);
if (r)
goto done;
if (ring->use_doorbell) {
adev->nbio.funcs->vcn_doorbell_range(
adev, ring->use_doorbell,
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
9 * vcn_inst,
adev->vcn.inst[i].aid_id);
WREG32_SOC15(
VCN, GET_INST(VCN, ring->me),
regVCN_RB1_DB_CTRL,
ring->doorbell_index
<< VCN_RB1_DB_CTRL__OFFSET__SHIFT |
VCN_RB1_DB_CTRL__EN_MASK);
/* Read DB_CTRL to flush the write DB_CTRL command. */
RREG32_SOC15(
VCN, GET_INST(VCN, ring->me),
regVCN_RB1_DB_CTRL);
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
ring = &adev->vcn.inst[i].ring_enc[0];
ring->wptr = 0;
ring->wptr_old = 0;
vcn_v4_0_3_unified_ring_set_wptr(ring);
ring->sched.ready = true;
}
} else {
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
vcn_inst = GET_INST(VCN, i);
ring = &adev->vcn.inst[i].ring_enc[0];
if (ring->use_doorbell) {
adev->nbio.funcs->vcn_doorbell_range(
adev, ring->use_doorbell,
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
9 * vcn_inst,
adev->vcn.inst[i].aid_id);
WREG32_SOC15(
VCN, GET_INST(VCN, ring->me),
regVCN_RB1_DB_CTRL,
ring->doorbell_index
<< VCN_RB1_DB_CTRL__OFFSET__SHIFT |
VCN_RB1_DB_CTRL__EN_MASK);
/* Read DB_CTRL to flush the write DB_CTRL command. */
RREG32_SOC15(
VCN, GET_INST(VCN, ring->me),
regVCN_RB1_DB_CTRL);
}
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
}
}
done:
......@@ -820,6 +845,193 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
return 0;
}
static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
{
int i, vcn_inst;
struct amdgpu_ring *ring_enc;
uint64_t cache_addr;
uint64_t rb_enc_addr;
uint64_t ctx_addr;
uint32_t param, resp, expected;
uint32_t offset, cache_size;
uint32_t tmp, timeout;
struct amdgpu_mm_table *table = &adev->virt.mm_table;
uint32_t *table_loc;
uint32_t table_size;
uint32_t size, size_dw;
uint32_t init_status;
uint32_t enabled_vcn;
struct mmsch_v4_0_cmd_direct_write
direct_wt = { {0} };
struct mmsch_v4_0_cmd_direct_read_modify_write
direct_rd_mod_wt = { {0} };
struct mmsch_v4_0_cmd_end end = { {0} };
struct mmsch_v4_0_3_init_header header;
volatile struct amdgpu_vcn4_fw_shared *fw_shared;
volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
direct_wt.cmd_header.command_type =
MMSCH_COMMAND__DIRECT_REG_WRITE;
direct_rd_mod_wt.cmd_header.command_type =
MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
end.cmd_header.command_type = MMSCH_COMMAND__END;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
vcn_inst = GET_INST(VCN, i);
memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
header.version = MMSCH_VERSION;
header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
table_loc = (uint32_t *)table->cpu_addr;
table_loc += header.total_size;
table_size = 0;
MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
offset = 0;
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
regUVD_VCPU_CACHE_OFFSET0), 0);
} else {
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst[i].gpu_addr));
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst[i].gpu_addr));
offset = cache_size;
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
regUVD_VCPU_CACHE_OFFSET0),
AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
}
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
regUVD_VCPU_CACHE_SIZE0),
cache_size);
cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset;
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr));
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
regUVD_VCPU_CACHE_OFFSET1), 0);
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE);
cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset +
AMDGPU_VCN_STACK_SIZE;
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr));
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
regUVD_VCPU_CACHE_OFFSET2), 0);
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE);
fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr;
rb_setup = &fw_shared->rb_setup;
ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0];
ring_enc->wptr = 0;
rb_enc_addr = ring_enc->gpu_addr;
rb_setup->is_rb_enabled_flags |= RB_ENABLED;
rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
rb_setup->rb_size = ring_enc->ring_size / 4;
fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
regUVD_VCPU_NONCACHE_SIZE0),
AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
MMSCH_V4_0_INSERT_END();
header.vcn0.init_status = 0;
header.vcn0.table_offset = header.total_size;
header.vcn0.table_size = table_size;
header.total_size += table_size;
/* Send init table to mmsch */
size = sizeof(struct mmsch_v4_0_3_init_header);
table_loc = (uint32_t *)table->cpu_addr;
memcpy((void *)table_loc, &header, size);
ctx_addr = table->gpu_addr;
WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID);
tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp);
size = header.total_size;
WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size);
WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0);
param = 0x00000001;
WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param);
tmp = 0;
timeout = 1000;
resp = 0;
expected = MMSCH_VF_MAILBOX_RESP__OK;
while (resp != expected) {
resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP);
if (resp != 0)
break;
udelay(10);
tmp = tmp + 10;
if (tmp >= timeout) {
DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
" waiting for regMMSCH_VF_MAILBOX_RESP "\
"(expected=0x%08x, readback=0x%08x)\n",
tmp, expected, resp);
return -EBUSY;
}
}
enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
init_status = ((struct mmsch_v4_0_3_init_header *)(table_loc))->vcn0.init_status;
if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
&& init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
"status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
}
}
return 0;
}
/**
* vcn_v4_0_3_start - VCN start
*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment