Commit 8abc1eb2 authored by Christian König's avatar Christian König

drm/amdkfd: switch over to using drm_exec v3

Avoids quite a bit of logic and kmalloc overhead.

v2: fix multiple problems pointed out by Felix
v3: two more nit picks from Felix fixed
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Acked-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230711133122.3710-4-christian.koenig@amd.com
parent 9710631c
......@@ -21,6 +21,7 @@ config DRM_AMDGPU
select INTERVAL_TREE
select DRM_BUDDY
select DRM_SUBALLOC_HELPER
select DRM_EXEC
# amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select to work
# ACPI_VIDEO's dependencies must also be selected.
select INPUT if ACPI
......
......@@ -25,6 +25,7 @@
#ifndef AMDGPU_AMDKFD_H_INCLUDED
#define AMDGPU_AMDKFD_H_INCLUDED
#include <linux/list.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/kthread.h>
......@@ -32,7 +33,6 @@
#include <linux/mmu_notifier.h>
#include <linux/memremap.h>
#include <kgd_kfd_interface.h>
#include <drm/ttm/ttm_execbuf_util.h>
#include "amdgpu_sync.h"
#include "amdgpu_vm.h"
#include "amdgpu_xcp.h"
......@@ -71,8 +71,7 @@ struct kgd_mem {
struct hmm_range *range;
struct list_head attachments;
/* protected by amdkfd_process_info.lock */
struct ttm_validate_buffer validate_list;
struct ttm_validate_buffer resv_list;
struct list_head validate_list;
uint32_t domain;
unsigned int mapped_to_gpu_memory;
uint64_t va;
......
......@@ -34,6 +34,7 @@
#include <drm/amdgpu_drm.h>
#include <drm/drm_drv.h>
#include <drm/ttm/ttm_tt.h>
#include <drm/drm_exec.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
......@@ -360,6 +361,23 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
list_add(&entry->tv.head, validated);
}
/**
* amdgpu_vm_lock_pd - lock PD in drm_exec
*
* @vm: vm providing the BOs
* @exec: drm execution context
* @num_fences: number of extra fences to reserve
*
* Lock the VM root PD in the DRM execution context.
*/
int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
unsigned int num_fences)
{
/* We need at least two fences for the VM PD/PT updates */
return drm_exec_prepare_obj(exec, &vm->root.bo->tbo.base,
2 + num_fences);
}
/**
* amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
*
......
......@@ -36,6 +36,8 @@
#include "amdgpu_ring.h"
#include "amdgpu_ids.h"
struct drm_exec;
struct amdgpu_bo_va;
struct amdgpu_job;
struct amdgpu_bo_list_entry;
......@@ -399,6 +401,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
struct list_head *validated,
struct amdgpu_bo_list_entry *entry);
int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
unsigned int num_fences);
bool amdgpu_vm_ready(struct amdgpu_vm *vm);
uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm);
int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
......
......@@ -24,6 +24,8 @@
#include <linux/types.h>
#include <linux/sched/task.h>
#include <drm/ttm/ttm_tt.h>
#include <drm/drm_exec.h>
#include "amdgpu_sync.h"
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
......@@ -1455,37 +1457,34 @@ struct svm_validate_context {
struct svm_range *prange;
bool intr;
DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
struct ttm_validate_buffer tv[MAX_GPU_INSTANCE];
struct list_head validate_list;
struct ww_acquire_ctx ticket;
struct drm_exec exec;
};
static int svm_range_reserve_bos(struct svm_validate_context *ctx)
static int svm_range_reserve_bos(struct svm_validate_context *ctx, bool intr)
{
struct kfd_process_device *pdd;
struct amdgpu_vm *vm;
uint32_t gpuidx;
int r;
INIT_LIST_HEAD(&ctx->validate_list);
for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
if (!pdd) {
pr_debug("failed to find device idx %d\n", gpuidx);
return -EINVAL;
}
vm = drm_priv_to_vm(pdd->drm_priv);
ctx->tv[gpuidx].bo = &vm->root.bo->tbo;
ctx->tv[gpuidx].num_shared = 4;
list_add(&ctx->tv[gpuidx].head, &ctx->validate_list);
}
drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0);
drm_exec_until_all_locked(&ctx->exec) {
for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
if (!pdd) {
pr_debug("failed to find device idx %d\n", gpuidx);
r = -EINVAL;
goto unreserve_out;
}
vm = drm_priv_to_vm(pdd->drm_priv);
r = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->validate_list,
ctx->intr, NULL);
if (r) {
pr_debug("failed %d to reserve bo\n", r);
return r;
r = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
drm_exec_retry_on_contention(&ctx->exec);
if (unlikely(r)) {
pr_debug("failed %d to reserve bo\n", r);
goto unreserve_out;
}
}
}
for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
......@@ -1508,13 +1507,13 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx)
return 0;
unreserve_out:
ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
drm_exec_fini(&ctx->exec);
return r;
}
static void svm_range_unreserve_bos(struct svm_validate_context *ctx)
{
ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
drm_exec_fini(&ctx->exec);
}
static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
......@@ -1613,7 +1612,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
goto free_ctx;
}
svm_range_reserve_bos(ctx);
svm_range_reserve_bos(ctx, intr);
p = container_of(prange->svms, struct kfd_process, svms);
owner = kfd_svm_page_owner(p, find_first_bit(ctx->bitmap,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment