Commit 91963397 authored by Friedrich Vock's avatar Friedrich Vock Committed by Alex Deucher

drm/amdgpu: Enable tunneling on high-priority compute queues

This improves latency if the GPU is already busy with other work.
This is useful for VR compositors that submit highly latency-sensitive
compositing work on high-priority compute queues while the GPU is busy
rendering the next frame.

Userspace merge request:
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26462

v2: bump driver version (Alex)
Reviewed-by: default avatarMarek Olšák <marek.olsak@amd.com>
Signed-off-by: default avatarFriedrich Vock <friedrich.vock@gmx.de>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 94b1e028
...@@ -791,6 +791,7 @@ struct amdgpu_mqd_prop { ...@@ -791,6 +791,7 @@ struct amdgpu_mqd_prop {
uint64_t eop_gpu_addr; uint64_t eop_gpu_addr;
uint32_t hqd_pipe_priority; uint32_t hqd_pipe_priority;
uint32_t hqd_queue_priority; uint32_t hqd_queue_priority;
bool allow_tunneling;
bool hqd_active; bool hqd_active;
}; };
......
...@@ -115,9 +115,10 @@ ...@@ -115,9 +115,10 @@
* 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support * 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support
* - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query
* - 3.56.0 - Update IB start address and size alignment for decode and encode * - 3.56.0 - Update IB start address and size alignment for decode and encode
* - 3.57.0 - Compute tunneling on GFX10+
*/ */
#define KMS_DRIVER_MAJOR 3 #define KMS_DRIVER_MAJOR 3
#define KMS_DRIVER_MINOR 56 #define KMS_DRIVER_MINOR 57
#define KMS_DRIVER_PATCHLEVEL 0 #define KMS_DRIVER_PATCHLEVEL 0
/* /*
......
...@@ -642,6 +642,10 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring, ...@@ -642,6 +642,10 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
struct amdgpu_mqd_prop *prop) struct amdgpu_mqd_prop *prop)
{ {
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
bool is_high_prio_compute = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
amdgpu_gfx_is_high_priority_compute_queue(adev, ring);
bool is_high_prio_gfx = ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
amdgpu_gfx_is_high_priority_graphics_queue(adev, ring);
memset(prop, 0, sizeof(*prop)); memset(prop, 0, sizeof(*prop));
...@@ -659,10 +663,8 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring, ...@@ -659,10 +663,8 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
*/ */
prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ; prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ;
if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE && prop->allow_tunneling = is_high_prio_compute;
amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) || if (is_high_prio_compute || is_high_prio_gfx) {
(ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
amdgpu_gfx_is_high_priority_graphics_queue(adev, ring))) {
prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
} }
......
...@@ -6593,7 +6593,8 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m, ...@@ -6593,7 +6593,8 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
#endif #endif
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
prop->allow_tunneling);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
mqd->cp_hqd_pq_control = tmp; mqd->cp_hqd_pq_control = tmp;
......
...@@ -3847,7 +3847,8 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, ...@@ -3847,7 +3847,8 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
prop->allow_tunneling);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
mqd->cp_hqd_pq_control = tmp; mqd->cp_hqd_pq_control = tmp;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment