Commit 3741540e authored by Andrey Grodzovsky's avatar Andrey Grodzovsky Committed by Alex Deucher

drm/sched: Rework HW fence processing.

Expedite job deletion from ring mirror list to the HW fence signal
callback instead from finish_work, together with waiting for all
such fences to signal in drm_sched_stop we garantee that
already signaled job will not be processed twice.
Remove the sched finish fence callback and just submit finish_work
directly from the HW fence callback.

v2: Fix comments.
v3: Attach  hw fence cb to sched_job
v5: Rebase
Suggested-by: default avatarChristian Koenig <Christian.Koenig@amd.com>
Signed-off-by: default avatarAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 222b5f04
...@@ -284,8 +284,6 @@ static void drm_sched_job_finish(struct work_struct *work) ...@@ -284,8 +284,6 @@ static void drm_sched_job_finish(struct work_struct *work)
cancel_delayed_work_sync(&sched->work_tdr); cancel_delayed_work_sync(&sched->work_tdr);
spin_lock_irqsave(&sched->job_list_lock, flags); spin_lock_irqsave(&sched->job_list_lock, flags);
/* remove job from ring_mirror_list */
list_del_init(&s_job->node);
/* queue TDR for next job */ /* queue TDR for next job */
drm_sched_start_timeout(sched); drm_sched_start_timeout(sched);
spin_unlock_irqrestore(&sched->job_list_lock, flags); spin_unlock_irqrestore(&sched->job_list_lock, flags);
...@@ -293,22 +291,11 @@ static void drm_sched_job_finish(struct work_struct *work) ...@@ -293,22 +291,11 @@ static void drm_sched_job_finish(struct work_struct *work)
sched->ops->free_job(s_job); sched->ops->free_job(s_job);
} }
static void drm_sched_job_finish_cb(struct dma_fence *f,
struct dma_fence_cb *cb)
{
struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
finish_cb);
schedule_work(&job->finish_work);
}
static void drm_sched_job_begin(struct drm_sched_job *s_job) static void drm_sched_job_begin(struct drm_sched_job *s_job)
{ {
struct drm_gpu_scheduler *sched = s_job->sched; struct drm_gpu_scheduler *sched = s_job->sched;
unsigned long flags; unsigned long flags;
dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb,
drm_sched_job_finish_cb);
spin_lock_irqsave(&sched->job_list_lock, flags); spin_lock_irqsave(&sched->job_list_lock, flags);
list_add_tail(&s_job->node, &sched->ring_mirror_list); list_add_tail(&s_job->node, &sched->ring_mirror_list);
drm_sched_start_timeout(sched); drm_sched_start_timeout(sched);
...@@ -403,7 +390,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched) ...@@ -403,7 +390,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched)
list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) { list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
if (s_job->s_fence->parent && if (s_job->s_fence->parent &&
dma_fence_remove_callback(s_job->s_fence->parent, dma_fence_remove_callback(s_job->s_fence->parent,
&s_job->s_fence->cb)) { &s_job->cb)) {
dma_fence_put(s_job->s_fence->parent); dma_fence_put(s_job->s_fence->parent);
s_job->s_fence->parent = NULL; s_job->s_fence->parent = NULL;
atomic_dec(&sched->hw_rq_count); atomic_dec(&sched->hw_rq_count);
...@@ -431,31 +418,34 @@ EXPORT_SYMBOL(drm_sched_stop); ...@@ -431,31 +418,34 @@ EXPORT_SYMBOL(drm_sched_stop);
void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery) void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
{ {
struct drm_sched_job *s_job, *tmp; struct drm_sched_job *s_job, *tmp;
unsigned long flags;
int r; int r;
if (!full_recovery) if (!full_recovery)
goto unpark; goto unpark;
spin_lock_irqsave(&sched->job_list_lock, flags); /*
* Locking the list is not required here as the sched thread is parked
* so no new jobs are being pushed in to HW and in drm_sched_stop we
* flushed all the jobs who were still in mirror list but who already
* signaled and removed them self from the list. Also concurrent
* GPU recovers can't run in parallel.
*/
list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
struct drm_sched_fence *s_fence = s_job->s_fence;
struct dma_fence *fence = s_job->s_fence->parent; struct dma_fence *fence = s_job->s_fence->parent;
if (fence) { if (fence) {
r = dma_fence_add_callback(fence, &s_fence->cb, r = dma_fence_add_callback(fence, &s_job->cb,
drm_sched_process_job); drm_sched_process_job);
if (r == -ENOENT) if (r == -ENOENT)
drm_sched_process_job(fence, &s_fence->cb); drm_sched_process_job(fence, &s_job->cb);
else if (r) else if (r)
DRM_ERROR("fence add callback failed (%d)\n", DRM_ERROR("fence add callback failed (%d)\n",
r); r);
} else } else
drm_sched_process_job(NULL, &s_fence->cb); drm_sched_process_job(NULL, &s_job->cb);
} }
drm_sched_start_timeout(sched); drm_sched_start_timeout(sched);
spin_unlock_irqrestore(&sched->job_list_lock, flags);
unpark: unpark:
kthread_unpark(sched->thread); kthread_unpark(sched->thread);
...@@ -604,18 +594,27 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched) ...@@ -604,18 +594,27 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched)
*/ */
static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb) static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
{ {
struct drm_sched_fence *s_fence = struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
container_of(cb, struct drm_sched_fence, cb); struct drm_sched_fence *s_fence = s_job->s_fence;
struct drm_gpu_scheduler *sched = s_fence->sched; struct drm_gpu_scheduler *sched = s_fence->sched;
unsigned long flags;
cancel_delayed_work(&sched->work_tdr);
dma_fence_get(&s_fence->finished);
atomic_dec(&sched->hw_rq_count); atomic_dec(&sched->hw_rq_count);
atomic_dec(&sched->num_jobs); atomic_dec(&sched->num_jobs);
spin_lock_irqsave(&sched->job_list_lock, flags);
/* remove job from ring_mirror_list */
list_del_init(&s_job->node);
spin_unlock_irqrestore(&sched->job_list_lock, flags);
drm_sched_fence_finished(s_fence); drm_sched_fence_finished(s_fence);
trace_drm_sched_process_job(s_fence); trace_drm_sched_process_job(s_fence);
dma_fence_put(&s_fence->finished);
wake_up_interruptible(&sched->wake_up_worker); wake_up_interruptible(&sched->wake_up_worker);
schedule_work(&s_job->finish_work);
} }
/** /**
...@@ -678,16 +677,16 @@ static int drm_sched_main(void *param) ...@@ -678,16 +677,16 @@ static int drm_sched_main(void *param)
if (fence) { if (fence) {
s_fence->parent = dma_fence_get(fence); s_fence->parent = dma_fence_get(fence);
r = dma_fence_add_callback(fence, &s_fence->cb, r = dma_fence_add_callback(fence, &sched_job->cb,
drm_sched_process_job); drm_sched_process_job);
if (r == -ENOENT) if (r == -ENOENT)
drm_sched_process_job(fence, &s_fence->cb); drm_sched_process_job(fence, &sched_job->cb);
else if (r) else if (r)
DRM_ERROR("fence add callback failed (%d)\n", DRM_ERROR("fence add callback failed (%d)\n",
r); r);
dma_fence_put(fence); dma_fence_put(fence);
} else } else
drm_sched_process_job(NULL, &s_fence->cb); drm_sched_process_job(NULL, &sched_job->cb);
wake_up(&sched->job_scheduled); wake_up(&sched->job_scheduled);
} }
......
...@@ -137,10 +137,6 @@ struct drm_sched_fence { ...@@ -137,10 +137,6 @@ struct drm_sched_fence {
*/ */
struct dma_fence finished; struct dma_fence finished;
/**
* @cb: the callback for the parent fence below.
*/
struct dma_fence_cb cb;
/** /**
* @parent: the fence returned by &drm_sched_backend_ops.run_job * @parent: the fence returned by &drm_sched_backend_ops.run_job
* when scheduling the job on hardware. We signal the * when scheduling the job on hardware. We signal the
...@@ -181,6 +177,7 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); ...@@ -181,6 +177,7 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
* be scheduled further. * be scheduled further.
* @s_priority: the priority of the job. * @s_priority: the priority of the job.
* @entity: the entity to which this job belongs. * @entity: the entity to which this job belongs.
* @cb: the callback for the parent fence in s_fence.
* *
* A job is created by the driver using drm_sched_job_init(), and * A job is created by the driver using drm_sched_job_init(), and
* should call drm_sched_entity_push_job() once it wants the scheduler * should call drm_sched_entity_push_job() once it wants the scheduler
...@@ -197,6 +194,7 @@ struct drm_sched_job { ...@@ -197,6 +194,7 @@ struct drm_sched_job {
atomic_t karma; atomic_t karma;
enum drm_sched_priority s_priority; enum drm_sched_priority s_priority;
struct drm_sched_entity *entity; struct drm_sched_entity *entity;
struct dma_fence_cb cb;
}; };
static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job, static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment