Commit 6a962430 authored by Nayan Deshmukh's avatar Nayan Deshmukh Committed by Alex Deucher

drm/scheduler: remove timeout work_struct from drm_sched_job (v3)

having a delayed work item per job is redundant as we only need one
per scheduler to track the time out the currently executing job.

v2: the first element of the ring mirror list is the currently
executing job so we don't need a additional variable for it

v3: squash in fixes for v3d and etnaviv
Signed-off-by: default avatarNayan Deshmukh <nayan26deshmukh@gmail.com>
Suggested-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 3ea81f71
...@@ -105,7 +105,7 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job) ...@@ -105,7 +105,7 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job)
change = dma_addr - gpu->hangcheck_dma_addr; change = dma_addr - gpu->hangcheck_dma_addr;
if (change < 0 || change > 16) { if (change < 0 || change > 16) {
gpu->hangcheck_dma_addr = dma_addr; gpu->hangcheck_dma_addr = dma_addr;
schedule_delayed_work(&sched_job->work_tdr, schedule_delayed_work(&sched_job->sched->work_tdr,
sched_job->sched->timeout); sched_job->sched->timeout);
return; return;
} }
......
...@@ -197,19 +197,15 @@ static void drm_sched_job_finish(struct work_struct *work) ...@@ -197,19 +197,15 @@ static void drm_sched_job_finish(struct work_struct *work)
* manages to find this job as the next job in the list, the fence * manages to find this job as the next job in the list, the fence
* signaled check below will prevent the timeout to be restarted. * signaled check below will prevent the timeout to be restarted.
*/ */
cancel_delayed_work_sync(&s_job->work_tdr); cancel_delayed_work_sync(&sched->work_tdr);
spin_lock(&sched->job_list_lock); spin_lock(&sched->job_list_lock);
/* queue TDR for next job */
if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
!list_is_last(&s_job->node, &sched->ring_mirror_list)) {
struct drm_sched_job *next = list_next_entry(s_job, node);
if (!dma_fence_is_signaled(&next->s_fence->finished))
schedule_delayed_work(&next->work_tdr, sched->timeout);
}
/* remove job from ring_mirror_list */ /* remove job from ring_mirror_list */
list_del(&s_job->node); list_del(&s_job->node);
/* queue TDR for next job */
if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
!list_empty(&sched->ring_mirror_list))
schedule_delayed_work(&sched->work_tdr, sched->timeout);
spin_unlock(&sched->job_list_lock); spin_unlock(&sched->job_list_lock);
dma_fence_put(&s_job->s_fence->finished); dma_fence_put(&s_job->s_fence->finished);
...@@ -236,15 +232,20 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job) ...@@ -236,15 +232,20 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)
if (sched->timeout != MAX_SCHEDULE_TIMEOUT && if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
list_first_entry_or_null(&sched->ring_mirror_list, list_first_entry_or_null(&sched->ring_mirror_list,
struct drm_sched_job, node) == s_job) struct drm_sched_job, node) == s_job)
schedule_delayed_work(&s_job->work_tdr, sched->timeout); schedule_delayed_work(&sched->work_tdr, sched->timeout);
spin_unlock(&sched->job_list_lock); spin_unlock(&sched->job_list_lock);
} }
static void drm_sched_job_timedout(struct work_struct *work) static void drm_sched_job_timedout(struct work_struct *work)
{ {
struct drm_sched_job *job = container_of(work, struct drm_sched_job, struct drm_gpu_scheduler *sched;
work_tdr.work); struct drm_sched_job *job;
sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
job = list_first_entry_or_null(&sched->ring_mirror_list,
struct drm_sched_job, node);
if (job)
job->sched->ops->timedout_job(job); job->sched->ops->timedout_job(job);
} }
...@@ -315,7 +316,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) ...@@ -315,7 +316,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
s_job = list_first_entry_or_null(&sched->ring_mirror_list, s_job = list_first_entry_or_null(&sched->ring_mirror_list,
struct drm_sched_job, node); struct drm_sched_job, node);
if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT) if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
schedule_delayed_work(&s_job->work_tdr, sched->timeout); schedule_delayed_work(&sched->work_tdr, sched->timeout);
list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
struct drm_sched_fence *s_fence = s_job->s_fence; struct drm_sched_fence *s_fence = s_job->s_fence;
...@@ -384,7 +385,6 @@ int drm_sched_job_init(struct drm_sched_job *job, ...@@ -384,7 +385,6 @@ int drm_sched_job_init(struct drm_sched_job *job,
INIT_WORK(&job->finish_work, drm_sched_job_finish); INIT_WORK(&job->finish_work, drm_sched_job_finish);
INIT_LIST_HEAD(&job->node); INIT_LIST_HEAD(&job->node);
INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout);
return 0; return 0;
} }
...@@ -575,6 +575,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, ...@@ -575,6 +575,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
INIT_LIST_HEAD(&sched->ring_mirror_list); INIT_LIST_HEAD(&sched->ring_mirror_list);
spin_lock_init(&sched->job_list_lock); spin_lock_init(&sched->job_list_lock);
atomic_set(&sched->hw_rq_count, 0); atomic_set(&sched->hw_rq_count, 0);
INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
atomic_set(&sched->num_jobs, 0); atomic_set(&sched->num_jobs, 0);
atomic64_set(&sched->job_id_count, 0); atomic64_set(&sched->job_id_count, 0);
......
...@@ -168,7 +168,7 @@ v3d_job_timedout(struct drm_sched_job *sched_job) ...@@ -168,7 +168,7 @@ v3d_job_timedout(struct drm_sched_job *sched_job)
job->timedout_ctca = ctca; job->timedout_ctca = ctca;
job->timedout_ctra = ctra; job->timedout_ctra = ctra;
schedule_delayed_work(&job->base.work_tdr, schedule_delayed_work(&job->base.sched->work_tdr,
job->base.sched->timeout); job->base.sched->timeout);
return; return;
} }
......
...@@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); ...@@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
* finished to remove the job from the * finished to remove the job from the
* @drm_gpu_scheduler.ring_mirror_list. * @drm_gpu_scheduler.ring_mirror_list.
* @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list. * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list.
* @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the timeout
* interval is over.
* @id: a unique id assigned to each job scheduled on the scheduler. * @id: a unique id assigned to each job scheduled on the scheduler.
* @karma: increment on every hang caused by this job. If this exceeds the hang * @karma: increment on every hang caused by this job. If this exceeds the hang
* limit of the scheduler then the job is marked guilty and will not * limit of the scheduler then the job is marked guilty and will not
...@@ -195,7 +193,6 @@ struct drm_sched_job { ...@@ -195,7 +193,6 @@ struct drm_sched_job {
struct dma_fence_cb finish_cb; struct dma_fence_cb finish_cb;
struct work_struct finish_work; struct work_struct finish_work;
struct list_head node; struct list_head node;
struct delayed_work work_tdr;
uint64_t id; uint64_t id;
atomic_t karma; atomic_t karma;
enum drm_sched_priority s_priority; enum drm_sched_priority s_priority;
...@@ -259,6 +256,8 @@ struct drm_sched_backend_ops { ...@@ -259,6 +256,8 @@ struct drm_sched_backend_ops {
* finished. * finished.
* @hw_rq_count: the number of jobs currently in the hardware queue. * @hw_rq_count: the number of jobs currently in the hardware queue.
* @job_id_count: used to assign unique id to the each job. * @job_id_count: used to assign unique id to the each job.
* @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
* timeout interval is over.
* @thread: the kthread on which the scheduler which run. * @thread: the kthread on which the scheduler which run.
* @ring_mirror_list: the list of jobs which are currently in the job queue. * @ring_mirror_list: the list of jobs which are currently in the job queue.
* @job_list_lock: lock to protect the ring_mirror_list. * @job_list_lock: lock to protect the ring_mirror_list.
...@@ -278,6 +277,7 @@ struct drm_gpu_scheduler { ...@@ -278,6 +277,7 @@ struct drm_gpu_scheduler {
wait_queue_head_t job_scheduled; wait_queue_head_t job_scheduled;
atomic_t hw_rq_count; atomic_t hw_rq_count;
atomic64_t job_id_count; atomic64_t job_id_count;
struct delayed_work work_tdr;
struct task_struct *thread; struct task_struct *thread;
struct list_head ring_mirror_list; struct list_head ring_mirror_list;
spinlock_t job_list_lock; spinlock_t job_list_lock;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment