Commit f3e9b1f4 authored by Matthew Brost's avatar Matthew Brost Committed by Rodrigo Vivi

drm/xe: Remove async worker and rework sync binds

Async worker is gone. All jobs and memory allocations done in IOCTL to
align with dma fencing rules.

Async vs. sync now means when do bind operations complete relative to
the IOCTL. Async completes when out-syncs signal while sync completes
when the IOCTL returns. In-syncs and out-syncs are only allowed in async
mode.

If memory allocations fail in the job creation step the VM is killed.
This is temporary, eventually a proper unwind will be done and VM will
be usable.
Signed-off-by: default avatarMatthew Brost <matthew.brost@intel.com>
Reviewed-by: default avatarThomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
parent b21ae51d
...@@ -196,27 +196,6 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ...@@ -196,27 +196,6 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
} }
} }
/*
* We can't install a job into the VM dma-resv shared slot before an
* async VM bind passed in as a fence without the risk of deadlocking as
* the bind can trigger an eviction which in turn depends on anything in
* the VM dma-resv shared slots. Not an ideal solution, but we wait for
* all dependent async VM binds to start (install correct fences into
* dma-resv slots) before moving forward.
*/
if (!xe_vm_no_dma_fences(vm) &&
vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS) {
for (i = 0; i < args->num_syncs; i++) {
struct dma_fence *fence = syncs[i].fence;
if (fence) {
err = xe_vm_async_fence_wait_start(fence);
if (err)
goto err_syncs;
}
}
}
retry: retry:
if (!xe_vm_no_dma_fences(vm) && xe_vm_userptr_check_repin(vm)) { if (!xe_vm_no_dma_fences(vm) && xe_vm_userptr_check_repin(vm)) {
err = down_write_killable(&vm->lock); err = down_write_killable(&vm->lock);
...@@ -229,28 +208,6 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ...@@ -229,28 +208,6 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
if (err) if (err)
goto err_syncs; goto err_syncs;
/* We don't allow execs while the VM is in error state */
if (vm->async_ops.error) {
err = vm->async_ops.error;
goto err_unlock_list;
}
/*
* Extreme corner where we exit a VM error state with a munmap style VM
* unbind inflight which requires a rebind. In this case the rebind
* needs to install some fences into the dma-resv slots. The worker to
* do this queued, let that worker make progress by dropping vm->lock,
* flushing the worker and retrying the exec.
*/
if (vm->async_ops.munmap_rebind_inflight) {
if (write_locked)
up_write(&vm->lock);
else
up_read(&vm->lock);
flush_work(&vm->async_ops.work);
goto retry;
}
if (write_locked) { if (write_locked) {
err = xe_vm_userptr_pin(vm); err = xe_vm_userptr_pin(vm);
downgrade_write(&vm->lock); downgrade_write(&vm->lock);
......
...@@ -621,7 +621,10 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, ...@@ -621,7 +621,10 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count))
return -EINVAL; return -EINVAL;
if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { if (eci[0].engine_class >= DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC) {
bool sync = eci[0].engine_class ==
DRM_XE_ENGINE_CLASS_VM_BIND_SYNC;
for_each_gt(gt, xe, id) { for_each_gt(gt, xe, id) {
struct xe_exec_queue *new; struct xe_exec_queue *new;
...@@ -647,6 +650,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, ...@@ -647,6 +650,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
args->width, hwe, args->width, hwe,
EXEC_QUEUE_FLAG_PERSISTENT | EXEC_QUEUE_FLAG_PERSISTENT |
EXEC_QUEUE_FLAG_VM | EXEC_QUEUE_FLAG_VM |
(sync ? 0 :
EXEC_QUEUE_FLAG_VM_ASYNC) |
(id ? (id ?
EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD :
0)); 0));
......
...@@ -77,6 +77,8 @@ struct xe_exec_queue { ...@@ -77,6 +77,8 @@ struct xe_exec_queue {
#define EXEC_QUEUE_FLAG_VM BIT(4) #define EXEC_QUEUE_FLAG_VM BIT(4)
/* child of VM queue for multi-tile VM jobs */ /* child of VM queue for multi-tile VM jobs */
#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(5) #define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(5)
/* VM jobs for this queue are asynchronous */
#define EXEC_QUEUE_FLAG_VM_ASYNC BIT(6)
/** /**
* @flags: flags for this exec queue, should statically setup aside from ban * @flags: flags for this exec queue, should statically setup aside from ban
......
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
#include "xe_sched_job_types.h" #include "xe_sched_job_types.h"
#define SYNC_FLAGS_TYPE_MASK 0x3 #define SYNC_FLAGS_TYPE_MASK 0x3
#define SYNC_FLAGS_FENCE_INSTALLED 0x10000
struct user_fence { struct user_fence {
struct xe_device *xe; struct xe_device *xe;
...@@ -223,12 +222,11 @@ int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job) ...@@ -223,12 +222,11 @@ int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job)
return 0; return 0;
} }
bool xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job, void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job,
struct dma_fence *fence) struct dma_fence *fence)
{ {
if (!(sync->flags & DRM_XE_SYNC_SIGNAL) || if (!(sync->flags & DRM_XE_SYNC_SIGNAL))
sync->flags & SYNC_FLAGS_FENCE_INSTALLED) return;
return false;
if (sync->chain_fence) { if (sync->chain_fence) {
drm_syncobj_add_point(sync->syncobj, sync->chain_fence, drm_syncobj_add_point(sync->syncobj, sync->chain_fence,
...@@ -260,12 +258,6 @@ bool xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job, ...@@ -260,12 +258,6 @@ bool xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job,
job->user_fence.addr = sync->addr; job->user_fence.addr = sync->addr;
job->user_fence.value = sync->timeline_value; job->user_fence.value = sync->timeline_value;
} }
/* TODO: external BO? */
sync->flags |= SYNC_FLAGS_FENCE_INSTALLED;
return true;
} }
void xe_sync_entry_cleanup(struct xe_sync_entry *sync) void xe_sync_entry_cleanup(struct xe_sync_entry *sync)
......
...@@ -19,7 +19,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, ...@@ -19,7 +19,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
int xe_sync_entry_wait(struct xe_sync_entry *sync); int xe_sync_entry_wait(struct xe_sync_entry *sync);
int xe_sync_entry_add_deps(struct xe_sync_entry *sync, int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
struct xe_sched_job *job); struct xe_sched_job *job);
bool xe_sync_entry_signal(struct xe_sync_entry *sync, void xe_sync_entry_signal(struct xe_sync_entry *sync,
struct xe_sched_job *job, struct xe_sched_job *job,
struct dma_fence *fence); struct dma_fence *fence);
void xe_sync_entry_cleanup(struct xe_sync_entry *sync); void xe_sync_entry_cleanup(struct xe_sync_entry *sync);
......
This diff is collapsed.
...@@ -177,8 +177,6 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); ...@@ -177,8 +177,6 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
int xe_vm_invalidate_vma(struct xe_vma *vma); int xe_vm_invalidate_vma(struct xe_vma *vma);
int xe_vm_async_fence_wait_start(struct dma_fence *fence);
extern struct ttm_device_funcs xe_ttm_funcs; extern struct ttm_device_funcs xe_ttm_funcs;
static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm) static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm)
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#include "xe_pt_types.h" #include "xe_pt_types.h"
#include "xe_range_fence.h" #include "xe_range_fence.h"
struct async_op_fence;
struct xe_bo; struct xe_bo;
struct xe_sync_entry; struct xe_sync_entry;
struct xe_vm; struct xe_vm;
...@@ -156,7 +155,7 @@ struct xe_vm { ...@@ -156,7 +155,7 @@ struct xe_vm {
*/ */
#define XE_VM_FLAG_64K BIT(0) #define XE_VM_FLAG_64K BIT(0)
#define XE_VM_FLAG_COMPUTE_MODE BIT(1) #define XE_VM_FLAG_COMPUTE_MODE BIT(1)
#define XE_VM_FLAG_ASYNC_BIND_OPS BIT(2) #define XE_VM_FLAG_ASYNC_DEFAULT BIT(2)
#define XE_VM_FLAG_MIGRATION BIT(3) #define XE_VM_FLAG_MIGRATION BIT(3)
#define XE_VM_FLAG_SCRATCH_PAGE BIT(4) #define XE_VM_FLAG_SCRATCH_PAGE BIT(4)
#define XE_VM_FLAG_FAULT_MODE BIT(5) #define XE_VM_FLAG_FAULT_MODE BIT(5)
...@@ -394,10 +393,6 @@ struct xe_vma_op { ...@@ -394,10 +393,6 @@ struct xe_vma_op {
u32 num_syncs; u32 num_syncs;
/** @link: async operation link */ /** @link: async operation link */
struct list_head link; struct list_head link;
/**
* @fence: async operation fence, signaled on last operation complete
*/
struct async_op_fence *fence;
/** @tile_mask: gt mask for this operation */ /** @tile_mask: gt mask for this operation */
u8 tile_mask; u8 tile_mask;
/** @flags: operation flags */ /** @flags: operation flags */
......
...@@ -134,10 +134,11 @@ struct drm_xe_engine_class_instance { ...@@ -134,10 +134,11 @@ struct drm_xe_engine_class_instance {
#define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE 3 #define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE 3
#define DRM_XE_ENGINE_CLASS_COMPUTE 4 #define DRM_XE_ENGINE_CLASS_COMPUTE 4
/* /*
* Kernel only class (not actual hardware engine class). Used for * Kernel only classes (not actual hardware engine class). Used for
* creating ordered queues of VM bind operations. * creating ordered queues of VM bind operations.
*/ */
#define DRM_XE_ENGINE_CLASS_VM_BIND 5 #define DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC 5
#define DRM_XE_ENGINE_CLASS_VM_BIND_SYNC 6
__u16 engine_class; __u16 engine_class;
__u16 engine_instance; __u16 engine_instance;
...@@ -577,7 +578,7 @@ struct drm_xe_vm_create { ...@@ -577,7 +578,7 @@ struct drm_xe_vm_create {
#define DRM_XE_VM_CREATE_SCRATCH_PAGE (0x1 << 0) #define DRM_XE_VM_CREATE_SCRATCH_PAGE (0x1 << 0)
#define DRM_XE_VM_CREATE_COMPUTE_MODE (0x1 << 1) #define DRM_XE_VM_CREATE_COMPUTE_MODE (0x1 << 1)
#define DRM_XE_VM_CREATE_ASYNC_BIND_OPS (0x1 << 2) #define DRM_XE_VM_CREATE_ASYNC_DEFAULT (0x1 << 2)
#define DRM_XE_VM_CREATE_FAULT_MODE (0x1 << 3) #define DRM_XE_VM_CREATE_FAULT_MODE (0x1 << 3)
/** @flags: Flags */ /** @flags: Flags */
__u32 flags; __u32 flags;
...@@ -637,34 +638,12 @@ struct drm_xe_vm_bind_op { ...@@ -637,34 +638,12 @@ struct drm_xe_vm_bind_op {
#define XE_VM_BIND_OP_MAP 0x0 #define XE_VM_BIND_OP_MAP 0x0
#define XE_VM_BIND_OP_UNMAP 0x1 #define XE_VM_BIND_OP_UNMAP 0x1
#define XE_VM_BIND_OP_MAP_USERPTR 0x2 #define XE_VM_BIND_OP_MAP_USERPTR 0x2
#define XE_VM_BIND_OP_RESTART 0x3 #define XE_VM_BIND_OP_UNMAP_ALL 0x3
#define XE_VM_BIND_OP_UNMAP_ALL 0x4 #define XE_VM_BIND_OP_PREFETCH 0x4
#define XE_VM_BIND_OP_PREFETCH 0x5
/** @op: Bind operation to perform */ /** @op: Bind operation to perform */
__u32 op; __u32 op;
#define XE_VM_BIND_FLAG_READONLY (0x1 << 0) #define XE_VM_BIND_FLAG_READONLY (0x1 << 0)
/*
* A bind ops completions are always async, hence the support for out
* sync. This flag indicates the allocation of the memory for new page
* tables and the job to program the pages tables is asynchronous
* relative to the IOCTL. That part of a bind operation can fail under
* memory pressure, the job in practice can't fail unless the system is
* totally shot.
*
* If this flag is clear and the IOCTL doesn't return an error, in
* practice the bind op is good and will complete.
*
* If this flag is set and doesn't return an error, the bind op can
* still fail and recovery is needed. It should free memory
* via non-async unbinds, and then restart all queued async binds op via
* XE_VM_BIND_OP_RESTART. Or alternatively the user should destroy the
* VM.
*
* This flag is only allowed when DRM_XE_VM_CREATE_ASYNC_BIND_OPS is
* configured in the VM and must be set if the VM is configured with
* DRM_XE_VM_CREATE_ASYNC_BIND_OPS and not in an error state.
*/
#define XE_VM_BIND_FLAG_ASYNC (0x1 << 1) #define XE_VM_BIND_FLAG_ASYNC (0x1 << 1)
/* /*
* Valid on a faulting VM only, do the MAP operation immediately rather * Valid on a faulting VM only, do the MAP operation immediately rather
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment