Commit e30a5223 authored by Alex Deucher's avatar Alex Deucher

drm/amdgpu/gfx9: Switch baremetal to use KIQ for compute ring management. (v4)

KIQ is the Kernel Interface Queue for managing the MEC.  Rather than setting
up rings via direct MMIO of ring registers, the rings are configured via
special packets sent to the KIQ.  The allows the MEC to better manage shared
resources and certain power events. It also reduces the code paths in the
driver to support and is required for MEC powergating.

v2: drop gfx_v9_0_cp_compute_fini() as well
v3: rebase on latest changes derived from gfx8, add unmap queues on
hw_fini
v4: fix copy/paste typo in error message (Rex)
Acked-by: default avatarTom St Denis <tom.stdenis@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent ba0c19f5
......@@ -1427,23 +1427,21 @@ static int gfx_v9_0_sw_init(void *handle)
return r;
}
if (amdgpu_sriov_vf(adev)) {
r = gfx_v9_0_kiq_init(adev);
if (r) {
DRM_ERROR("Failed to init KIQ BOs!\n");
return r;
}
r = gfx_v9_0_kiq_init(adev);
if (r) {
DRM_ERROR("Failed to init KIQ BOs!\n");
return r;
}
kiq = &adev->gfx.kiq;
r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
if (r)
return r;
kiq = &adev->gfx.kiq;
r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
if (r)
return r;
/* create MQD for all compute queues as wel as KIQ for SRIOV case */
r = gfx_v9_0_compute_mqd_sw_init(adev);
if (r)
return r;
}
/* create MQD for all compute queues as wel as KIQ for SRIOV case */
r = gfx_v9_0_compute_mqd_sw_init(adev);
if (r)
return r;
/* reserve GDS, GWS and OA resource for gfx */
r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
......@@ -1490,11 +1488,9 @@ static int gfx_v9_0_sw_fini(void *handle)
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
if (amdgpu_sriov_vf(adev)) {
gfx_v9_0_compute_mqd_sw_fini(adev);
gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
gfx_v9_0_kiq_fini(adev);
}
gfx_v9_0_compute_mqd_sw_fini(adev);
gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
gfx_v9_0_kiq_fini(adev);
gfx_v9_0_mec_fini(adev);
gfx_v9_0_ngg_fini(adev);
......@@ -2387,13 +2383,6 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
udelay(50);
}
static int gfx_v9_0_cp_compute_start(struct amdgpu_device *adev)
{
gfx_v9_0_cp_compute_enable(adev, true);
return 0;
}
static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
{
const struct gfx_firmware_header_v1_0 *mec_hdr;
......@@ -2436,45 +2425,6 @@ static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
return 0;
}
static void gfx_v9_0_cp_compute_fini(struct amdgpu_device *adev)
{
int i, r;
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
if (ring->mqd_obj) {
r = amdgpu_bo_reserve(ring->mqd_obj, true);
if (unlikely(r != 0))
dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
amdgpu_bo_unpin(ring->mqd_obj);
amdgpu_bo_unreserve(ring->mqd_obj);
amdgpu_bo_unref(&ring->mqd_obj);
ring->mqd_obj = NULL;
}
}
}
static int gfx_v9_0_init_queue(struct amdgpu_ring *ring);
static int gfx_v9_0_cp_compute_resume(struct amdgpu_device *adev)
{
int i, r;
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
if (gfx_v9_0_init_queue(ring))
dev_warn(adev->dev, "compute queue %d init failed!\n", i);
}
r = gfx_v9_0_cp_compute_start(adev);
if (r)
return r;
return 0;
}
/* KIQ functions */
static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
{
......@@ -2565,6 +2515,56 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
return r;
}
static int gfx_v9_0_kiq_kcq_disable(struct amdgpu_device *adev)
{
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
uint32_t scratch, tmp = 0;
int r, i;
r = amdgpu_gfx_scratch_get(adev, &scratch);
if (r) {
DRM_ERROR("Failed to get scratch reg (%d).\n", r);
return r;
}
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(kiq_ring, 6 + 3);
if (r) {
DRM_ERROR("Failed to lock KIQ (%d).\n", r);
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
/* unmap queues */
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
amdgpu_ring_write(kiq_ring,
PACKET3_UNMAP_QUEUES_ACTION(1)| /* RESET_QUEUES */
PACKET3_UNMAP_QUEUES_QUEUE_SEL(2)); /* select all queues */
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
/* write to scratch for completion */
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
amdgpu_ring_commit(kiq_ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(scratch);
if (tmp == 0xDEADBEEF)
break;
DRM_UDELAY(1);
}
if (i >= adev->usec_timeout) {
DRM_ERROR("KCQ disable failed (scratch(0x%04X)=0x%08X)\n",
scratch, tmp);
r = -EINVAL;
}
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
......@@ -2845,7 +2845,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
struct v9_mqd *mqd = ring->mqd_ptr;
int mqd_idx = ring - &adev->gfx.compute_ring[0];
if (!adev->gfx.in_reset) {
if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
......@@ -2938,10 +2938,7 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
if (r)
return r;
if (amdgpu_sriov_vf(adev))
r = gfx_v9_0_kiq_resume(adev);
else
r = gfx_v9_0_cp_compute_resume(adev);
r = gfx_v9_0_kiq_resume(adev);
if (r)
return r;
......@@ -2951,6 +2948,13 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
ring->ready = false;
return r;
}
ring = &adev->gfx.kiq.ring;
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r)
ring->ready = false;
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
......@@ -2960,14 +2964,6 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
ring->ready = false;
}
if (amdgpu_sriov_vf(adev)) {
ring = &adev->gfx.kiq.ring;
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r)
ring->ready = false;
}
gfx_v9_0_enable_gui_idle_interrupt(adev, true);
return 0;
......@@ -3013,9 +3009,9 @@ static int gfx_v9_0_hw_fini(void *handle)
pr_debug("For SRIOV client, shouldn't do anything.\n");
return 0;
}
gfx_v9_0_kiq_kcq_disable(adev);
gfx_v9_0_cp_enable(adev, false);
gfx_v9_0_rlc_stop(adev);
gfx_v9_0_cp_compute_fini(adev);
return 0;
}
......@@ -3024,14 +3020,18 @@ static int gfx_v9_0_suspend(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->gfx.in_suspend = true;
return gfx_v9_0_hw_fini(adev);
}
static int gfx_v9_0_resume(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
return gfx_v9_0_hw_init(adev);
r = gfx_v9_0_hw_init(adev);
adev->gfx.in_suspend = false;
return r;
}
static bool gfx_v9_0_is_idle(void *handle)
......@@ -4479,218 +4479,6 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
return 0;
}
static int gfx_v9_0_init_queue(struct amdgpu_ring *ring)
{
int r, j;
u32 tmp;
bool use_doorbell = true;
u64 hqd_gpu_addr;
u64 mqd_gpu_addr;
u64 eop_gpu_addr;
u64 wb_gpu_addr;
u32 *buf;
struct v9_mqd *mqd;
struct amdgpu_device *adev;
adev = ring->adev;
if (ring->mqd_obj == NULL) {
r = amdgpu_bo_create(adev,
sizeof(struct v9_mqd),
PAGE_SIZE,true,
AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
NULL, &ring->mqd_obj);
if (r) {
dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
return r;
}
}
r = amdgpu_bo_reserve(ring->mqd_obj, false);
if (unlikely(r != 0)) {
gfx_v9_0_cp_compute_fini(adev);
return r;
}
r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
&mqd_gpu_addr);
if (r) {
dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
gfx_v9_0_cp_compute_fini(adev);
return r;
}
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
if (r) {
dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
gfx_v9_0_cp_compute_fini(adev);
return r;
}
/* init the mqd struct */
memset(buf, 0, sizeof(struct v9_mqd));
mqd = (struct v9_mqd *)buf;
mqd->header = 0xC0310800;
mqd->compute_pipelinestat_enable = 0x00000001;
mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
mqd->compute_misc_reserved = 0x00000003;
mutex_lock(&adev->srbm_mutex);
soc15_grbm_select(adev, ring->me,
ring->pipe,
ring->queue, 0);
/* disable wptr polling */
WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
/* write the EOP addr */
BUG_ON(ring->me != 1 || ring->pipe != 0); /* can't handle other cases eop address */
eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring->queue * MEC_HPD_SIZE);
eop_gpu_addr >>= 8;
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, lower_32_bits(eop_gpu_addr));
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_gpu_addr);
mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_gpu_addr);
/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
(order_base_2(MEC_HPD_SIZE / 4) - 1));
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, tmp);
/* enable doorbell? */
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
if (use_doorbell)
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
else
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
mqd->cp_hqd_pq_doorbell_control = tmp;
/* disable the queue if it's active */
ring->wptr = 0;
mqd->cp_hqd_dequeue_request = 0;
mqd->cp_hqd_pq_rptr = 0;
mqd->cp_hqd_pq_wptr_lo = 0;
mqd->cp_hqd_pq_wptr_hi = 0;
if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
for (j = 0; j < adev->usec_timeout; j++) {
if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
break;
udelay(1);
}
WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi);
}
/* set the pointer to the MQD */
mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
/* set MQD vmid to 0 */
tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, tmp);
mqd->cp_mqd_control = tmp;
/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
hqd_gpu_addr = ring->gpu_addr >> 8;
mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
/* set up the HQD, this is similar to CP_RB0_CNTL */
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
(order_base_2(ring->ring_size / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
#ifdef __BIG_ENDIAN
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
#endif
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, tmp);
mqd->cp_hqd_pq_control = tmp;
/* set the wb address wether it's enabled or not */
wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_rptr_report_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
mqd->cp_hqd_pq_rptr_report_addr_lo);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
mqd->cp_hqd_pq_rptr_report_addr_hi);
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
mqd->cp_hqd_pq_wptr_poll_addr_lo);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
mqd->cp_hqd_pq_wptr_poll_addr_hi);
/* enable the doorbell if requested */
if (use_doorbell) {
WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
(AMDGPU_DOORBELL64_KIQ * 2) << 2);
WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
(AMDGPU_DOORBELL64_MEC_RING7 * 2) << 2);
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_OFFSET, ring->doorbell_index);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
mqd->cp_hqd_pq_doorbell_control = tmp;
} else {
mqd->cp_hqd_pq_doorbell_control = 0;
}
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
mqd->cp_hqd_pq_doorbell_control);
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi);
/* set the vmid for the queue */
mqd->cp_hqd_vmid = 0;
WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, tmp);
mqd->cp_hqd_persistent_state = tmp;
/* activate the queue */
mqd->cp_hqd_active = 1;
WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
soc15_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
amdgpu_bo_kunmap(ring->mqd_obj);
amdgpu_bo_unreserve(ring->mqd_obj);
if (use_doorbell)
WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
return 0;
}
const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
{
.type = AMD_IP_BLOCK_TYPE_GFX,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment