Commit d16ef1a1 authored by Francois Dugast's avatar Francois Dugast Committed by Matthew Brost

drm/xe/exec: Switch hw engine group execution mode upon job submission

If the job about to be submitted is a dma-fence job, update the current
execution mode of the hw engine group. This triggers an immediate suspend
of the exec queues running faulting long-running jobs.

If the job about to be submitted is a long-running job, kick a new worker
used to resume the exec queues running faulting long-running jobs once
the dma-fence jobs have completed.

v2: Kick the resume worker from exec IOCTL, switch to unordered workqueue,
    destroy it after use (Matt Brost)

v3: Do not resume if no exec queue was suspended (Matt Brost)

v4: Squash commits (Matt Brost)

v5: Do not kick the worker when xe_vm_in_preempt_fence_mode (Matt Brost)
Signed-off-by: default avatarFrancois Dugast <francois.dugast@intel.com>
Reviewed-by: default avatarMatthew Brost <matthew.brost@intel.com>
Signed-off-by: default avatarMatthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-10-francois.dugast@intel.com
parent 770bd1d3
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "xe_bo.h" #include "xe_bo.h"
#include "xe_device.h" #include "xe_device.h"
#include "xe_exec_queue.h" #include "xe_exec_queue.h"
#include "xe_hw_engine_group.h"
#include "xe_macros.h" #include "xe_macros.h"
#include "xe_ring_ops_types.h" #include "xe_ring_ops_types.h"
#include "xe_sched_job.h" #include "xe_sched_job.h"
...@@ -124,6 +125,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ...@@ -124,6 +125,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
bool write_locked, skip_retry = false; bool write_locked, skip_retry = false;
ktime_t end = 0; ktime_t end = 0;
int err = 0; int err = 0;
struct xe_hw_engine_group *group;
enum xe_hw_engine_group_execution_mode mode, previous_mode;
if (XE_IOCTL_DBG(xe, args->extensions) || if (XE_IOCTL_DBG(xe, args->extensions) ||
XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) || XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
...@@ -182,6 +185,15 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ...@@ -182,6 +185,15 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
} }
} }
group = q->hwe->hw_engine_group;
mode = xe_hw_engine_group_find_exec_mode(q);
if (mode == EXEC_MODE_DMA_FENCE) {
err = xe_hw_engine_group_get_mode(group, mode, &previous_mode);
if (err)
goto err_syncs;
}
retry: retry:
if (!xe_vm_in_lr_mode(vm) && xe_vm_userptr_check_repin(vm)) { if (!xe_vm_in_lr_mode(vm) && xe_vm_userptr_check_repin(vm)) {
err = down_write_killable(&vm->lock); err = down_write_killable(&vm->lock);
...@@ -199,7 +211,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ...@@ -199,7 +211,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
downgrade_write(&vm->lock); downgrade_write(&vm->lock);
write_locked = false; write_locked = false;
if (err) if (err)
goto err_unlock_list; goto err_hw_exec_mode;
} }
if (!args->num_batch_buffer) { if (!args->num_batch_buffer) {
...@@ -312,6 +324,9 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ...@@ -312,6 +324,9 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
spin_unlock(&xe->ttm.lru_lock); spin_unlock(&xe->ttm.lru_lock);
} }
if (mode == EXEC_MODE_LR)
xe_hw_engine_group_resume_faulting_lr_jobs(group);
err_repin: err_repin:
if (!xe_vm_in_lr_mode(vm)) if (!xe_vm_in_lr_mode(vm))
up_read(&vm->userptr.notifier_lock); up_read(&vm->userptr.notifier_lock);
...@@ -324,6 +339,9 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ...@@ -324,6 +339,9 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
up_read(&vm->lock); up_read(&vm->lock);
if (err == -EAGAIN && !skip_retry) if (err == -EAGAIN && !skip_retry)
goto retry; goto retry;
err_hw_exec_mode:
if (mode == EXEC_MODE_DMA_FENCE)
xe_hw_engine_group_put(group);
err_syncs: err_syncs:
while (num_syncs--) while (num_syncs--)
xe_sync_entry_cleanup(&syncs[num_syncs]); xe_sync_entry_cleanup(&syncs[num_syncs]);
......
...@@ -17,9 +17,36 @@ hw_engine_group_free(struct drm_device *drm, void *arg) ...@@ -17,9 +17,36 @@ hw_engine_group_free(struct drm_device *drm, void *arg)
{ {
struct xe_hw_engine_group *group = arg; struct xe_hw_engine_group *group = arg;
destroy_workqueue(group->resume_wq);
kfree(group); kfree(group);
} }
static void
hw_engine_group_resume_lr_jobs_func(struct work_struct *w)
{
struct xe_exec_queue *q;
struct xe_hw_engine_group *group = container_of(w, struct xe_hw_engine_group, resume_work);
int err;
enum xe_hw_engine_group_execution_mode previous_mode;
err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode);
if (err)
return;
if (previous_mode == EXEC_MODE_LR)
goto put;
list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
if (!xe_vm_in_fault_mode(q->vm))
continue;
q->ops->resume(q);
}
put:
xe_hw_engine_group_put(group);
}
static struct xe_hw_engine_group * static struct xe_hw_engine_group *
hw_engine_group_alloc(struct xe_device *xe) hw_engine_group_alloc(struct xe_device *xe)
{ {
...@@ -30,7 +57,12 @@ hw_engine_group_alloc(struct xe_device *xe) ...@@ -30,7 +57,12 @@ hw_engine_group_alloc(struct xe_device *xe)
if (!group) if (!group)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0);
if (!group->resume_wq)
return ERR_PTR(-ENOMEM);
init_rwsem(&group->mode_sem); init_rwsem(&group->mode_sem);
INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func);
INIT_LIST_HEAD(&group->exec_queue_list); INIT_LIST_HEAD(&group->exec_queue_list);
err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group); err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group);
...@@ -134,7 +166,7 @@ int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct x ...@@ -134,7 +166,7 @@ int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct x
if (err) if (err)
goto err_suspend; goto err_suspend;
queue_work(group->resume_wq, &group->resume_work); xe_hw_engine_group_resume_faulting_lr_jobs(group);
} }
list_add(&q->hw_engine_group_link, &group->exec_queue_list); list_add(&q->hw_engine_group_link, &group->exec_queue_list);
...@@ -167,6 +199,16 @@ void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct ...@@ -167,6 +199,16 @@ void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct
up_write(&group->mode_sem); up_write(&group->mode_sem);
} }
/**
* xe_hw_engine_group_resume_faulting_lr_jobs() - Asynchronously resume the hw engine group's
* faulting LR jobs
* @group: The hw engine group
*/
void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group)
{
queue_work(group->resume_wq, &group->resume_work);
}
/** /**
* xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group
* @group: The hw engine group * @group: The hw engine group
...@@ -177,6 +219,7 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group ...@@ -177,6 +219,7 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group
{ {
int err; int err;
struct xe_exec_queue *q; struct xe_exec_queue *q;
bool need_resume = false;
lockdep_assert_held_write(&group->mode_sem); lockdep_assert_held_write(&group->mode_sem);
...@@ -184,6 +227,7 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group ...@@ -184,6 +227,7 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group
if (!xe_vm_in_fault_mode(q->vm)) if (!xe_vm_in_fault_mode(q->vm))
continue; continue;
need_resume = true;
q->ops->suspend(q); q->ops->suspend(q);
} }
...@@ -196,6 +240,9 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group ...@@ -196,6 +240,9 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group
goto err_suspend; goto err_suspend;
} }
if (need_resume)
xe_hw_engine_group_resume_faulting_lr_jobs(group);
return 0; return 0;
err_suspend: err_suspend:
...@@ -310,3 +357,16 @@ __releases(&group->mode_sem) ...@@ -310,3 +357,16 @@ __releases(&group->mode_sem)
{ {
up_read(&group->mode_sem); up_read(&group->mode_sem);
} }
/**
* xe_hw_engine_group_find_exec_mode() - Find the execution mode for this exec queue
* @q: The exec_queue
*/
enum xe_hw_engine_group_execution_mode
xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q)
{
if (xe_vm_in_fault_mode(q->vm))
return EXEC_MODE_LR;
else
return EXEC_MODE_DMA_FENCE;
}
...@@ -22,4 +22,8 @@ int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, ...@@ -22,4 +22,8 @@ int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group,
enum xe_hw_engine_group_execution_mode *previous_mode); enum xe_hw_engine_group_execution_mode *previous_mode);
void xe_hw_engine_group_put(struct xe_hw_engine_group *group); void xe_hw_engine_group_put(struct xe_hw_engine_group *group);
enum xe_hw_engine_group_execution_mode
xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q);
void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group);
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment