Commit 40b2dffb authored by Dave Airlie's avatar Dave Airlie

Merge branch 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux into drm-next

- DP fixes for radeon and amdgpu
- IH ring fix for tonga and fiji
- Lots of GPU scheduler fixes
- Misc additional fixes

* 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux: (42 commits)
  drm/amdgpu: fix wait queue handling in the scheduler
  drm/amdgpu: remove extra parameters from scheduler callbacks
  drm/amdgpu: wake up scheduler only when neccessary
  drm/amdgpu: remove entity idle timeout v2
  drm/amdgpu: fix postclose order
  drm/amdgpu: use IB for copy buffer of eviction
  drm/amdgpu: adjust the judgement of removing fence callback
  drm/amdgpu: fix no sync_wait in copy_buffer
  drm/amdgpu: fix last_vm_update fence is not effetive for sched fence
  drm/amdgpu: add priv data to sched
  drm/amdgpu: add owner for sched fence
  drm/amdgpu: remove entity reference from sched fence
  drm/amdgpu: fix and cleanup amd_sched_entity_push_job
  drm/amdgpu: remove amdgpu_bo_list_clone
  drm/amdgpu: remove the context from amdgpu_job
  drm/amdgpu: remove unused parameters to amd_sched_create
  drm/amdgpu: remove sched_lock
  drm/amdgpu: remove prepare_job callback
  drm/amdgpu: cleanup a scheduler function name
  drm/amdgpu: reorder scheduler functions
  ...
parents db561760 c2b6bd7e
...@@ -183,6 +183,7 @@ struct amdgpu_vm; ...@@ -183,6 +183,7 @@ struct amdgpu_vm;
struct amdgpu_ring; struct amdgpu_ring;
struct amdgpu_semaphore; struct amdgpu_semaphore;
struct amdgpu_cs_parser; struct amdgpu_cs_parser;
struct amdgpu_job;
struct amdgpu_irq_src; struct amdgpu_irq_src;
struct amdgpu_fpriv; struct amdgpu_fpriv;
...@@ -246,7 +247,7 @@ struct amdgpu_buffer_funcs { ...@@ -246,7 +247,7 @@ struct amdgpu_buffer_funcs {
unsigned copy_num_dw; unsigned copy_num_dw;
/* used for buffer migration */ /* used for buffer migration */
void (*emit_copy_buffer)(struct amdgpu_ring *ring, void (*emit_copy_buffer)(struct amdgpu_ib *ib,
/* src addr in bytes */ /* src addr in bytes */
uint64_t src_offset, uint64_t src_offset,
/* dst addr in bytes */ /* dst addr in bytes */
...@@ -439,9 +440,12 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring); ...@@ -439,9 +440,12 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring);
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
signed long amdgpu_fence_wait_any(struct amdgpu_device *adev, signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev,
struct amdgpu_fence **fences, struct fence **array,
bool intr, long t); uint32_t count,
bool wait_all,
bool intr,
signed long t);
struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence); struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence);
void amdgpu_fence_unref(struct amdgpu_fence **fence); void amdgpu_fence_unref(struct amdgpu_fence **fence);
...@@ -514,7 +518,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, ...@@ -514,7 +518,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
uint64_t dst_offset, uint64_t dst_offset,
uint32_t byte_count, uint32_t byte_count,
struct reservation_object *resv, struct reservation_object *resv,
struct amdgpu_fence **fence); struct fence **fence);
int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
struct amdgpu_bo_list_entry { struct amdgpu_bo_list_entry {
...@@ -650,7 +654,7 @@ struct amdgpu_sa_bo { ...@@ -650,7 +654,7 @@ struct amdgpu_sa_bo {
struct amdgpu_sa_manager *manager; struct amdgpu_sa_manager *manager;
unsigned soffset; unsigned soffset;
unsigned eoffset; unsigned eoffset;
struct amdgpu_fence *fence; struct fence *fence;
}; };
/* /*
...@@ -692,7 +696,7 @@ bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring, ...@@ -692,7 +696,7 @@ bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring,
struct amdgpu_semaphore *semaphore); struct amdgpu_semaphore *semaphore);
void amdgpu_semaphore_free(struct amdgpu_device *adev, void amdgpu_semaphore_free(struct amdgpu_device *adev,
struct amdgpu_semaphore **semaphore, struct amdgpu_semaphore **semaphore,
struct amdgpu_fence *fence); struct fence *fence);
/* /*
* Synchronization * Synchronization
...@@ -700,7 +704,8 @@ void amdgpu_semaphore_free(struct amdgpu_device *adev, ...@@ -700,7 +704,8 @@ void amdgpu_semaphore_free(struct amdgpu_device *adev,
struct amdgpu_sync { struct amdgpu_sync {
struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS]; struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS];
struct amdgpu_fence *sync_to[AMDGPU_MAX_RINGS]; struct amdgpu_fence *sync_to[AMDGPU_MAX_RINGS];
struct amdgpu_fence *last_vm_update; DECLARE_HASHTABLE(fences, 4);
struct fence *last_vm_update;
}; };
void amdgpu_sync_create(struct amdgpu_sync *sync); void amdgpu_sync_create(struct amdgpu_sync *sync);
...@@ -712,8 +717,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, ...@@ -712,8 +717,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
void *owner); void *owner);
int amdgpu_sync_rings(struct amdgpu_sync *sync, int amdgpu_sync_rings(struct amdgpu_sync *sync,
struct amdgpu_ring *ring); struct amdgpu_ring *ring);
int amdgpu_sync_wait(struct amdgpu_sync *sync);
void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync, void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct amdgpu_fence *fence); struct fence *fence);
/* /*
* GART structures, functions & helpers * GART structures, functions & helpers
...@@ -871,7 +877,7 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, ...@@ -871,7 +877,7 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
struct amdgpu_ring *ring, struct amdgpu_ring *ring,
struct amdgpu_ib *ibs, struct amdgpu_ib *ibs,
unsigned num_ibs, unsigned num_ibs,
int (*free_job)(struct amdgpu_cs_parser *), int (*free_job)(struct amdgpu_job *),
void *owner, void *owner,
struct fence **fence); struct fence **fence);
...@@ -957,7 +963,7 @@ struct amdgpu_vm_id { ...@@ -957,7 +963,7 @@ struct amdgpu_vm_id {
unsigned id; unsigned id;
uint64_t pd_gpu_addr; uint64_t pd_gpu_addr;
/* last flushed PD/PT update */ /* last flushed PD/PT update */
struct amdgpu_fence *flushed_updates; struct fence *flushed_updates;
/* last use of vmid */ /* last use of vmid */
struct amdgpu_fence *last_id_use; struct amdgpu_fence *last_id_use;
}; };
...@@ -1042,7 +1048,7 @@ struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); ...@@ -1042,7 +1048,7 @@ struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
int amdgpu_ctx_put(struct amdgpu_ctx *ctx); int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
struct fence *fence, uint64_t queued_seq); struct fence *fence);
struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
struct amdgpu_ring *ring, uint64_t seq); struct amdgpu_ring *ring, uint64_t seq);
...@@ -1077,8 +1083,6 @@ struct amdgpu_bo_list { ...@@ -1077,8 +1083,6 @@ struct amdgpu_bo_list {
struct amdgpu_bo_list_entry *array; struct amdgpu_bo_list_entry *array;
}; };
struct amdgpu_bo_list *
amdgpu_bo_list_clone(struct amdgpu_bo_list *list);
struct amdgpu_bo_list * struct amdgpu_bo_list *
amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id); amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id);
void amdgpu_bo_list_put(struct amdgpu_bo_list *list); void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
...@@ -1255,14 +1259,16 @@ struct amdgpu_cs_parser { ...@@ -1255,14 +1259,16 @@ struct amdgpu_cs_parser {
/* user fence */ /* user fence */
struct amdgpu_user_fence uf; struct amdgpu_user_fence uf;
};
struct amdgpu_ring *ring; struct amdgpu_job {
struct amd_sched_job base;
struct amdgpu_device *adev;
struct amdgpu_ib *ibs;
uint32_t num_ibs;
struct mutex job_lock; struct mutex job_lock;
struct work_struct job_work; struct amdgpu_user_fence uf;
int (*prepare_job)(struct amdgpu_cs_parser *sched_job); int (*free_job)(struct amdgpu_job *sched_job);
int (*run_job)(struct amdgpu_cs_parser *sched_job);
int (*free_job)(struct amdgpu_cs_parser *sched_job);
struct amd_sched_fence *s_fence;
}; };
static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx) static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx)
...@@ -2241,7 +2247,7 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) ...@@ -2241,7 +2247,7 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r)) #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
#define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s)) #define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s))
#define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s)) #define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s))
#define amdgpu_emit_copy_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((r), (s), (d), (b)) #define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b))
#define amdgpu_emit_fill_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((r), (s), (d), (b)) #define amdgpu_emit_fill_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((r), (s), (d), (b))
#define amdgpu_dpm_get_temperature(adev) (adev)->pm.funcs->get_temperature((adev)) #define amdgpu_dpm_get_temperature(adev) (adev)->pm.funcs->get_temperature((adev))
#define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev)) #define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev))
...@@ -2343,7 +2349,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, ...@@ -2343,7 +2349,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_sync *sync); struct amdgpu_sync *sync);
void amdgpu_vm_flush(struct amdgpu_ring *ring, void amdgpu_vm_flush(struct amdgpu_ring *ring,
struct amdgpu_vm *vm, struct amdgpu_vm *vm,
struct amdgpu_fence *updates); struct fence *updates);
void amdgpu_vm_fence(struct amdgpu_device *adev, void amdgpu_vm_fence(struct amdgpu_device *adev,
struct amdgpu_vm *vm, struct amdgpu_vm *vm,
struct amdgpu_fence *fence); struct amdgpu_fence *fence);
...@@ -2373,7 +2379,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, ...@@ -2373,7 +2379,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
uint64_t addr); uint64_t addr);
void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va); struct amdgpu_bo_va *bo_va);
int amdgpu_vm_free_job(struct amdgpu_job *job);
/* /*
* functions used by amdgpu_encoder.c * functions used by amdgpu_encoder.c
*/ */
......
...@@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, ...@@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
{ {
unsigned long start_jiffies; unsigned long start_jiffies;
unsigned long end_jiffies; unsigned long end_jiffies;
struct amdgpu_fence *fence = NULL; struct fence *fence = NULL;
int i, r; int i, r;
start_jiffies = jiffies; start_jiffies = jiffies;
...@@ -42,17 +42,17 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, ...@@ -42,17 +42,17 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence); r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence);
if (r) if (r)
goto exit_do_move; goto exit_do_move;
r = fence_wait(&fence->base, false); r = fence_wait(fence, false);
if (r) if (r)
goto exit_do_move; goto exit_do_move;
amdgpu_fence_unref(&fence); fence_put(fence);
} }
end_jiffies = jiffies; end_jiffies = jiffies;
r = jiffies_to_msecs(end_jiffies - start_jiffies); r = jiffies_to_msecs(end_jiffies - start_jiffies);
exit_do_move: exit_do_move:
if (fence) if (fence)
amdgpu_fence_unref(&fence); fence_put(fence);
return r; return r;
} }
......
...@@ -62,39 +62,6 @@ static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv, ...@@ -62,39 +62,6 @@ static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv,
return 0; return 0;
} }
struct amdgpu_bo_list *
amdgpu_bo_list_clone(struct amdgpu_bo_list *list)
{
struct amdgpu_bo_list *result;
unsigned i;
result = kmalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL);
if (!result)
return NULL;
result->array = drm_calloc_large(list->num_entries,
sizeof(struct amdgpu_bo_list_entry));
if (!result->array) {
kfree(result);
return NULL;
}
mutex_init(&result->lock);
result->gds_obj = list->gds_obj;
result->gws_obj = list->gws_obj;
result->oa_obj = list->oa_obj;
result->has_userptr = list->has_userptr;
result->num_entries = list->num_entries;
memcpy(result->array, list->array, list->num_entries *
sizeof(struct amdgpu_bo_list_entry));
for (i = 0; i < result->num_entries; ++i)
amdgpu_bo_ref(result->array[i].robj);
return result;
}
static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
{ {
struct amdgpu_bo_list *list; struct amdgpu_bo_list *list;
......
...@@ -75,6 +75,11 @@ void amdgpu_connector_hotplug(struct drm_connector *connector) ...@@ -75,6 +75,11 @@ void amdgpu_connector_hotplug(struct drm_connector *connector)
if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) { if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
} else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) { } else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) {
/* Don't try to start link training before we
* have the dpcd */
if (!amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
return;
/* set it to OFF so that drm_helper_connector_dpms() /* set it to OFF so that drm_helper_connector_dpms()
* won't return immediately since the current state * won't return immediately since the current state
* is ON at this point. * is ON at this point.
......
...@@ -126,19 +126,6 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, ...@@ -126,19 +126,6 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
return 0; return 0;
} }
static void amdgpu_job_work_func(struct work_struct *work)
{
struct amdgpu_cs_parser *sched_job =
container_of(work, struct amdgpu_cs_parser,
job_work);
mutex_lock(&sched_job->job_lock);
if (sched_job->free_job)
sched_job->free_job(sched_job);
mutex_unlock(&sched_job->job_lock);
/* after processing job, free memory */
fence_put(&sched_job->s_fence->base);
kfree(sched_job);
}
struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev, struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
struct drm_file *filp, struct drm_file *filp,
struct amdgpu_ctx *ctx, struct amdgpu_ctx *ctx,
...@@ -157,10 +144,6 @@ struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev, ...@@ -157,10 +144,6 @@ struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
parser->ctx = ctx; parser->ctx = ctx;
parser->ibs = ibs; parser->ibs = ibs;
parser->num_ibs = num_ibs; parser->num_ibs = num_ibs;
if (amdgpu_enable_scheduler) {
mutex_init(&parser->job_lock);
INIT_WORK(&parser->job_work, amdgpu_job_work_func);
}
for (i = 0; i < num_ibs; i++) for (i = 0; i < num_ibs; i++)
ibs[i].ctx = ctx; ibs[i].ctx = ctx;
...@@ -173,7 +156,6 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) ...@@ -173,7 +156,6 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
uint64_t *chunk_array_user; uint64_t *chunk_array_user;
uint64_t *chunk_array = NULL; uint64_t *chunk_array = NULL;
struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_bo_list *bo_list = NULL;
unsigned size, i; unsigned size, i;
int r = 0; int r = 0;
...@@ -185,20 +167,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) ...@@ -185,20 +167,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
r = -EINVAL; r = -EINVAL;
goto out; goto out;
} }
bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
if (!amdgpu_enable_scheduler)
p->bo_list = bo_list;
else {
if (bo_list && !bo_list->has_userptr) {
p->bo_list = amdgpu_bo_list_clone(bo_list);
amdgpu_bo_list_put(bo_list);
if (!p->bo_list)
return -ENOMEM;
} else if (bo_list && bo_list->has_userptr)
p->bo_list = bo_list;
else
p->bo_list = NULL;
}
/* get chunks */ /* get chunks */
INIT_LIST_HEAD(&p->validated); INIT_LIST_HEAD(&p->validated);
...@@ -291,7 +260,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) ...@@ -291,7 +260,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
} }
p->ibs = kmalloc_array(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL); p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL);
if (!p->ibs) if (!p->ibs)
r = -ENOMEM; r = -ENOMEM;
...@@ -498,24 +467,23 @@ static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser) ...@@ -498,24 +467,23 @@ static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser)
unsigned i; unsigned i;
if (parser->ctx) if (parser->ctx)
amdgpu_ctx_put(parser->ctx); amdgpu_ctx_put(parser->ctx);
if (parser->bo_list) { if (parser->bo_list)
if (amdgpu_enable_scheduler && !parser->bo_list->has_userptr)
amdgpu_bo_list_free(parser->bo_list);
else
amdgpu_bo_list_put(parser->bo_list); amdgpu_bo_list_put(parser->bo_list);
}
drm_free_large(parser->vm_bos); drm_free_large(parser->vm_bos);
for (i = 0; i < parser->nchunks; i++) for (i = 0; i < parser->nchunks; i++)
drm_free_large(parser->chunks[i].kdata); drm_free_large(parser->chunks[i].kdata);
kfree(parser->chunks); kfree(parser->chunks);
if (!amdgpu_enable_scheduler)
{
if (parser->ibs) if (parser->ibs)
for (i = 0; i < parser->num_ibs; i++) for (i = 0; i < parser->num_ibs; i++)
amdgpu_ib_free(parser->adev, &parser->ibs[i]); amdgpu_ib_free(parser->adev, &parser->ibs[i]);
kfree(parser->ibs); kfree(parser->ibs);
if (parser->uf.bo) if (parser->uf.bo)
drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
}
if (!amdgpu_enable_scheduler)
kfree(parser); kfree(parser);
} }
...@@ -533,12 +501,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo ...@@ -533,12 +501,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
amdgpu_cs_parser_fini_late(parser); amdgpu_cs_parser_fini_late(parser);
} }
static int amdgpu_cs_parser_free_job(struct amdgpu_cs_parser *sched_job)
{
amdgpu_cs_parser_fini_late(sched_job);
return 0;
}
static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
struct amdgpu_vm *vm) struct amdgpu_vm *vm)
{ {
...@@ -810,68 +772,16 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, ...@@ -810,68 +772,16 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
return 0; return 0;
} }
static int amdgpu_cs_parser_prepare_job(struct amdgpu_cs_parser *sched_job) static int amdgpu_cs_free_job(struct amdgpu_job *sched_job)
{ {
int r, i; int i;
struct amdgpu_cs_parser *parser = sched_job; if (sched_job->ibs)
struct amdgpu_device *adev = sched_job->adev; for (i = 0; i < sched_job->num_ibs; i++)
bool reserved_buffers = false; amdgpu_ib_free(sched_job->adev, &sched_job->ibs[i]);
kfree(sched_job->ibs);
r = amdgpu_cs_parser_relocs(parser); if (sched_job->uf.bo)
if (r) { drm_gem_object_unreference_unlocked(&sched_job->uf.bo->gem_base);
if (r != -ERESTARTSYS) { return 0;
if (r == -ENOMEM)
DRM_ERROR("Not enough memory for command submission!\n");
else
DRM_ERROR("Failed to process the buffer list %d!\n", r);
}
}
if (!r) {
reserved_buffers = true;
r = amdgpu_cs_ib_fill(adev, parser);
}
if (!r) {
r = amdgpu_cs_dependencies(adev, parser);
if (r)
DRM_ERROR("Failed in the dependencies handling %d!\n", r);
}
if (r) {
amdgpu_cs_parser_fini(parser, r, reserved_buffers);
return r;
}
for (i = 0; i < parser->num_ibs; i++)
trace_amdgpu_cs(parser, i);
r = amdgpu_cs_ib_vm_chunk(adev, parser);
return r;
}
static struct amdgpu_ring *amdgpu_cs_parser_get_ring(
struct amdgpu_device *adev,
struct amdgpu_cs_parser *parser)
{
int i, r;
struct amdgpu_cs_chunk *chunk;
struct drm_amdgpu_cs_chunk_ib *chunk_ib;
struct amdgpu_ring *ring;
for (i = 0; i < parser->nchunks; i++) {
chunk = &parser->chunks[i];
chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
continue;
r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type,
chunk_ib->ip_instance, chunk_ib->ring,
&ring);
if (r)
return NULL;
break;
}
return ring;
} }
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
...@@ -879,7 +789,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ...@@ -879,7 +789,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
struct amdgpu_device *adev = dev->dev_private; struct amdgpu_device *adev = dev->dev_private;
union drm_amdgpu_cs *cs = data; union drm_amdgpu_cs *cs = data;
struct amdgpu_cs_parser *parser; struct amdgpu_cs_parser *parser;
int r; bool reserved_buffers = false;
int i, r;
down_read(&adev->exclusive_lock); down_read(&adev->exclusive_lock);
if (!adev->accel_working) { if (!adev->accel_working) {
...@@ -899,44 +810,79 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ...@@ -899,44 +810,79 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return r; return r;
} }
if (amdgpu_enable_scheduler && parser->num_ibs) { r = amdgpu_cs_parser_relocs(parser);
struct amdgpu_ring * ring = if (r == -ENOMEM)
amdgpu_cs_parser_get_ring(adev, parser); DRM_ERROR("Not enough memory for command submission!\n");
r = amdgpu_cs_parser_prepare_job(parser); else if (r && r != -ERESTARTSYS)
DRM_ERROR("Failed to process the buffer list %d!\n", r);
else if (!r) {
reserved_buffers = true;
r = amdgpu_cs_ib_fill(adev, parser);
}
if (!r) {
r = amdgpu_cs_dependencies(adev, parser);
if (r)
DRM_ERROR("Failed in the dependencies handling %d!\n", r);
}
if (r) if (r)
goto out; goto out;
parser->ring = ring;
parser->free_job = amdgpu_cs_parser_free_job; for (i = 0; i < parser->num_ibs; i++)
mutex_lock(&parser->job_lock); trace_amdgpu_cs(parser, i);
r = amd_sched_push_job(ring->scheduler,
&parser->ctx->rings[ring->idx].entity, r = amdgpu_cs_ib_vm_chunk(adev, parser);
parser, if (r)
&parser->s_fence); goto out;
if (amdgpu_enable_scheduler && parser->num_ibs) {
struct amdgpu_job *job;
struct amdgpu_ring * ring = parser->ibs->ring;
job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
if (!job)
return -ENOMEM;
job->base.sched = ring->scheduler;
job->base.s_entity = &parser->ctx->rings[ring->idx].entity;
job->adev = parser->adev;
job->ibs = parser->ibs;
job->num_ibs = parser->num_ibs;
job->base.owner = parser->filp;
mutex_init(&job->job_lock);
if (job->ibs[job->num_ibs - 1].user) {
memcpy(&job->uf, &parser->uf,
sizeof(struct amdgpu_user_fence));
job->ibs[job->num_ibs - 1].user = &job->uf;
}
job->free_job = amdgpu_cs_free_job;
mutex_lock(&job->job_lock);
r = amd_sched_entity_push_job((struct amd_sched_job *)job);
if (r) { if (r) {
mutex_unlock(&parser->job_lock); mutex_unlock(&job->job_lock);
amdgpu_cs_free_job(job);
kfree(job);
goto out; goto out;
} }
parser->ibs[parser->num_ibs - 1].sequence = cs->out.handle =
amdgpu_ctx_add_fence(parser->ctx, ring, amdgpu_ctx_add_fence(parser->ctx, ring,
&parser->s_fence->base, &job->base.s_fence->base);
parser->s_fence->v_seq); parser->ibs[parser->num_ibs - 1].sequence = cs->out.handle;
cs->out.handle = parser->s_fence->v_seq;
list_sort(NULL, &parser->validated, cmp_size_smaller_first); list_sort(NULL, &parser->validated, cmp_size_smaller_first);
ttm_eu_fence_buffer_objects(&parser->ticket, ttm_eu_fence_buffer_objects(&parser->ticket,
&parser->validated, &parser->validated,
&parser->s_fence->base); &job->base.s_fence->base);
mutex_unlock(&parser->job_lock); mutex_unlock(&job->job_lock);
amdgpu_cs_parser_fini_late(parser);
up_read(&adev->exclusive_lock); up_read(&adev->exclusive_lock);
return 0; return 0;
} }
r = amdgpu_cs_parser_prepare_job(parser);
if (r)
goto out;
cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence;
out: out:
amdgpu_cs_parser_fini(parser, r, true); amdgpu_cs_parser_fini(parser, r, reserved_buffers);
up_read(&adev->exclusive_lock); up_read(&adev->exclusive_lock);
r = amdgpu_cs_handle_lockup(adev, r); r = amdgpu_cs_handle_lockup(adev, r);
return r; return r;
......
...@@ -229,17 +229,13 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) ...@@ -229,17 +229,13 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
} }
uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
struct fence *fence, uint64_t queued_seq) struct fence *fence)
{ {
struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
uint64_t seq = 0; uint64_t seq = cring->sequence;
unsigned idx = 0; unsigned idx = 0;
struct fence *other = NULL; struct fence *other = NULL;
if (amdgpu_enable_scheduler)
seq = queued_seq;
else
seq = cring->sequence;
idx = seq % AMDGPU_CTX_MAX_CS_PENDING; idx = seq % AMDGPU_CTX_MAX_CS_PENDING;
other = cring->fences[idx]; other = cring->fences[idx];
if (other) { if (other) {
...@@ -253,7 +249,6 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, ...@@ -253,7 +249,6 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
spin_lock(&ctx->ring_lock); spin_lock(&ctx->ring_lock);
cring->fences[idx] = fence; cring->fences[idx] = fence;
if (!amdgpu_enable_scheduler)
cring->sequence++; cring->sequence++;
spin_unlock(&ctx->ring_lock); spin_unlock(&ctx->ring_lock);
...@@ -267,21 +262,16 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, ...@@ -267,21 +262,16 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
{ {
struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
struct fence *fence; struct fence *fence;
uint64_t queued_seq;
spin_lock(&ctx->ring_lock); spin_lock(&ctx->ring_lock);
if (amdgpu_enable_scheduler)
queued_seq = amd_sched_next_queued_seq(&cring->entity);
else
queued_seq = cring->sequence;
if (seq >= queued_seq) { if (seq >= cring->sequence) {
spin_unlock(&ctx->ring_lock); spin_unlock(&ctx->ring_lock);
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
if (seq + AMDGPU_CTX_MAX_CS_PENDING < queued_seq) { if (seq + AMDGPU_CTX_MAX_CS_PENDING < cring->sequence) {
spin_unlock(&ctx->ring_lock); spin_unlock(&ctx->ring_lock);
return NULL; return NULL;
} }
......
...@@ -49,9 +49,10 @@ ...@@ -49,9 +49,10 @@
/* /*
* KMS wrapper. * KMS wrapper.
* - 3.0.0 - initial driver * - 3.0.0 - initial driver
* - 3.1.0 - allow reading more status registers (GRBM, SRBM, SDMA, CP)
*/ */
#define KMS_DRIVER_MAJOR 3 #define KMS_DRIVER_MAJOR 3
#define KMS_DRIVER_MINOR 0 #define KMS_DRIVER_MINOR 1
#define KMS_DRIVER_PATCHLEVEL 0 #define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0; int amdgpu_vram_limit = 0;
......
...@@ -626,10 +626,10 @@ void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) ...@@ -626,10 +626,10 @@ void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
ring->fence_drv.ring = ring; ring->fence_drv.ring = ring;
if (amdgpu_enable_scheduler) { if (amdgpu_enable_scheduler) {
ring->scheduler = amd_sched_create((void *)ring->adev, ring->scheduler = amd_sched_create(&amdgpu_sched_ops,
&amdgpu_sched_ops, ring->idx,
ring->idx, 5, 0, amdgpu_sched_hw_submission,
amdgpu_sched_hw_submission); (void *)ring->adev);
if (!ring->scheduler) if (!ring->scheduler)
DRM_ERROR("Failed to create scheduler on ring %d.\n", DRM_ERROR("Failed to create scheduler on ring %d.\n",
ring->idx); ring->idx);
...@@ -836,22 +836,37 @@ static inline bool amdgpu_test_signaled(struct amdgpu_fence *fence) ...@@ -836,22 +836,37 @@ static inline bool amdgpu_test_signaled(struct amdgpu_fence *fence)
return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags); return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
} }
static inline bool amdgpu_test_signaled_any(struct amdgpu_fence **fences) static bool amdgpu_test_signaled_any(struct fence **fences, uint32_t count)
{ {
int idx; int idx;
struct amdgpu_fence *fence; struct fence *fence;
idx = 0; for (idx = 0; idx < count; ++idx) {
for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) {
fence = fences[idx]; fence = fences[idx];
if (fence) { if (fence) {
if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags)) if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
return true; return true;
} }
} }
return false; return false;
} }
static bool amdgpu_test_signaled_all(struct fence **fences, uint32_t count)
{
int idx;
struct fence *fence;
for (idx = 0; idx < count; ++idx) {
fence = fences[idx];
if (fence) {
if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
return false;
}
}
return true;
}
struct amdgpu_wait_cb { struct amdgpu_wait_cb {
struct fence_cb base; struct fence_cb base;
struct task_struct *task; struct task_struct *task;
...@@ -867,33 +882,56 @@ static void amdgpu_fence_wait_cb(struct fence *fence, struct fence_cb *cb) ...@@ -867,33 +882,56 @@ static void amdgpu_fence_wait_cb(struct fence *fence, struct fence_cb *cb)
static signed long amdgpu_fence_default_wait(struct fence *f, bool intr, static signed long amdgpu_fence_default_wait(struct fence *f, bool intr,
signed long t) signed long t)
{ {
struct amdgpu_fence *array[AMDGPU_MAX_RINGS];
struct amdgpu_fence *fence = to_amdgpu_fence(f); struct amdgpu_fence *fence = to_amdgpu_fence(f);
struct amdgpu_device *adev = fence->ring->adev; struct amdgpu_device *adev = fence->ring->adev;
memset(&array[0], 0, sizeof(array)); return amdgpu_fence_wait_multiple(adev, &f, 1, false, intr, t);
array[0] = fence;
return amdgpu_fence_wait_any(adev, array, intr, t);
} }
/* wait until any fence in array signaled */ /**
signed long amdgpu_fence_wait_any(struct amdgpu_device *adev, * Wait the fence array with timeout
struct amdgpu_fence **array, bool intr, signed long t) *
* @adev: amdgpu device
* @array: the fence array with amdgpu fence pointer
* @count: the number of the fence array
* @wait_all: the flag of wait all(true) or wait any(false)
* @intr: when sleep, set the current task interruptable or not
* @t: timeout to wait
*
* If wait_all is true, it will return when all fences are signaled or timeout.
* If wait_all is false, it will return when any fence is signaled or timeout.
*/
signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev,
struct fence **array,
uint32_t count,
bool wait_all,
bool intr,
signed long t)
{ {
long idx = 0; long idx = 0;
struct amdgpu_wait_cb cb[AMDGPU_MAX_RINGS]; struct amdgpu_wait_cb *cb;
struct amdgpu_fence *fence; struct fence *fence;
BUG_ON(!array); BUG_ON(!array);
for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) { cb = kcalloc(count, sizeof(struct amdgpu_wait_cb), GFP_KERNEL);
if (cb == NULL) {
t = -ENOMEM;
goto err_free_cb;
}
for (idx = 0; idx < count; ++idx) {
fence = array[idx]; fence = array[idx];
if (fence) { if (fence) {
cb[idx].task = current; cb[idx].task = current;
if (fence_add_callback(&fence->base, if (fence_add_callback(fence,
&cb[idx].base, amdgpu_fence_wait_cb)) &cb[idx].base, amdgpu_fence_wait_cb)) {
return t; /* return if fence is already signaled */ /* The fence is already signaled */
if (wait_all)
continue;
else
goto fence_rm_cb;
}
} }
} }
...@@ -907,7 +945,9 @@ signed long amdgpu_fence_wait_any(struct amdgpu_device *adev, ...@@ -907,7 +945,9 @@ signed long amdgpu_fence_wait_any(struct amdgpu_device *adev,
* amdgpu_test_signaled_any must be called after * amdgpu_test_signaled_any must be called after
* set_current_state to prevent a race with wake_up_process * set_current_state to prevent a race with wake_up_process
*/ */
if (amdgpu_test_signaled_any(array)) if (!wait_all && amdgpu_test_signaled_any(array, count))
break;
if (wait_all && amdgpu_test_signaled_all(array, count))
break; break;
if (adev->needs_reset) { if (adev->needs_reset) {
...@@ -923,13 +963,16 @@ signed long amdgpu_fence_wait_any(struct amdgpu_device *adev, ...@@ -923,13 +963,16 @@ signed long amdgpu_fence_wait_any(struct amdgpu_device *adev,
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
idx = 0; fence_rm_cb:
for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) { for (idx = 0; idx < count; ++idx) {
fence = array[idx]; fence = array[idx];
if (fence) if (fence && cb[idx].base.func)
fence_remove_callback(&fence->base, &cb[idx].base); fence_remove_callback(fence, &cb[idx].base);
} }
err_free_cb:
kfree(cb);
return t; return t;
} }
......
...@@ -73,29 +73,12 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, ...@@ -73,29 +73,12 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
if (!vm) if (!vm)
ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
else
ib->gpu_addr = 0;
} else {
ib->sa_bo = NULL;
ib->ptr = NULL;
ib->gpu_addr = 0;
} }
amdgpu_sync_create(&ib->sync); amdgpu_sync_create(&ib->sync);
ib->ring = ring; ib->ring = ring;
ib->fence = NULL;
ib->user = NULL;
ib->vm = vm; ib->vm = vm;
ib->ctx = NULL;
ib->gds_base = 0;
ib->gds_size = 0;
ib->gws_base = 0;
ib->gws_size = 0;
ib->oa_base = 0;
ib->oa_size = 0;
ib->flags = 0;
return 0; return 0;
} }
...@@ -110,8 +93,8 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, ...@@ -110,8 +93,8 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
*/ */
void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib) void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
{ {
amdgpu_sync_free(adev, &ib->sync, ib->fence); amdgpu_sync_free(adev, &ib->sync, &ib->fence->base);
amdgpu_sa_bo_free(adev, &ib->sa_bo, ib->fence); amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base);
amdgpu_fence_unref(&ib->fence); amdgpu_fence_unref(&ib->fence);
} }
...@@ -143,7 +126,6 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, ...@@ -143,7 +126,6 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
struct amdgpu_ring *ring; struct amdgpu_ring *ring;
struct amdgpu_ctx *ctx, *old_ctx; struct amdgpu_ctx *ctx, *old_ctx;
struct amdgpu_vm *vm; struct amdgpu_vm *vm;
uint64_t sequence;
unsigned i; unsigned i;
int r = 0; int r = 0;
...@@ -158,7 +140,11 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, ...@@ -158,7 +140,11 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
dev_err(adev->dev, "couldn't schedule ib\n"); dev_err(adev->dev, "couldn't schedule ib\n");
return -EINVAL; return -EINVAL;
} }
r = amdgpu_sync_wait(&ibs->sync);
if (r) {
dev_err(adev->dev, "IB sync failed (%d).\n", r);
return r;
}
r = amdgpu_ring_lock(ring, (256 + AMDGPU_NUM_SYNCS * 8) * num_ibs); r = amdgpu_ring_lock(ring, (256 + AMDGPU_NUM_SYNCS * 8) * num_ibs);
if (r) { if (r) {
dev_err(adev->dev, "scheduling IB failed (%d).\n", r); dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
...@@ -216,12 +202,9 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, ...@@ -216,12 +202,9 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
return r; return r;
} }
sequence = amdgpu_enable_scheduler ? ib->sequence : 0;
if (!amdgpu_enable_scheduler && ib->ctx) if (!amdgpu_enable_scheduler && ib->ctx)
ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring,
&ib->fence->base, &ib->fence->base);
sequence);
/* wrap the last IB with fence */ /* wrap the last IB with fence */
if (ib->user) { if (ib->user) {
......
...@@ -98,18 +98,12 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size, ...@@ -98,18 +98,12 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
/* add 8 bytes for the rptr/wptr shadows and /* add 8 bytes for the rptr/wptr shadows and
* add them to the end of the ring allocation. * add them to the end of the ring allocation.
*/ */
adev->irq.ih.ring = kzalloc(adev->irq.ih.ring_size + 8, GFP_KERNEL); adev->irq.ih.ring = pci_alloc_consistent(adev->pdev,
adev->irq.ih.ring_size + 8,
&adev->irq.ih.rb_dma_addr);
if (adev->irq.ih.ring == NULL) if (adev->irq.ih.ring == NULL)
return -ENOMEM; return -ENOMEM;
adev->irq.ih.rb_dma_addr = pci_map_single(adev->pdev, memset((void *)adev->irq.ih.ring, 0, adev->irq.ih.ring_size + 8);
(void *)adev->irq.ih.ring,
adev->irq.ih.ring_size,
PCI_DMA_BIDIRECTIONAL);
if (pci_dma_mapping_error(adev->pdev, adev->irq.ih.rb_dma_addr)) {
dev_err(&adev->pdev->dev, "Failed to DMA MAP the IH RB page\n");
kfree((void *)adev->irq.ih.ring);
return -ENOMEM;
}
adev->irq.ih.wptr_offs = (adev->irq.ih.ring_size / 4) + 0; adev->irq.ih.wptr_offs = (adev->irq.ih.ring_size / 4) + 0;
adev->irq.ih.rptr_offs = (adev->irq.ih.ring_size / 4) + 1; adev->irq.ih.rptr_offs = (adev->irq.ih.ring_size / 4) + 1;
} }
...@@ -149,9 +143,9 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev) ...@@ -149,9 +143,9 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev)
/* add 8 bytes for the rptr/wptr shadows and /* add 8 bytes for the rptr/wptr shadows and
* add them to the end of the ring allocation. * add them to the end of the ring allocation.
*/ */
pci_unmap_single(adev->pdev, adev->irq.ih.rb_dma_addr, pci_free_consistent(adev->pdev, adev->irq.ih.ring_size + 8,
adev->irq.ih.ring_size + 8, PCI_DMA_BIDIRECTIONAL); (void *)adev->irq.ih.ring,
kfree((void *)adev->irq.ih.ring); adev->irq.ih.rb_dma_addr);
adev->irq.ih.ring = NULL; adev->irq.ih.ring = NULL;
} }
} else { } else {
......
...@@ -560,6 +560,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, ...@@ -560,6 +560,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
if (!fpriv) if (!fpriv)
return; return;
amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
amdgpu_vm_fini(adev, &fpriv->vm); amdgpu_vm_fini(adev, &fpriv->vm);
idr_for_each_entry(&fpriv->bo_list_handles, list, handle) idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
...@@ -568,8 +570,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, ...@@ -568,8 +570,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
idr_destroy(&fpriv->bo_list_handles); idr_destroy(&fpriv->bo_list_handles);
mutex_destroy(&fpriv->bo_list_lock); mutex_destroy(&fpriv->bo_list_lock);
amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
kfree(fpriv); kfree(fpriv);
file_priv->driver_priv = NULL; file_priv->driver_priv = NULL;
} }
......
...@@ -193,7 +193,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev, ...@@ -193,7 +193,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
unsigned size, unsigned align); unsigned size, unsigned align);
void amdgpu_sa_bo_free(struct amdgpu_device *adev, void amdgpu_sa_bo_free(struct amdgpu_device *adev,
struct amdgpu_sa_bo **sa_bo, struct amdgpu_sa_bo **sa_bo,
struct amdgpu_fence *fence); struct fence *fence);
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
struct seq_file *m); struct seq_file *m);
......
...@@ -139,6 +139,20 @@ int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev, ...@@ -139,6 +139,20 @@ int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
return r; return r;
} }
static uint32_t amdgpu_sa_get_ring_from_fence(struct fence *f)
{
struct amdgpu_fence *a_fence;
struct amd_sched_fence *s_fence;
s_fence = to_amd_sched_fence(f);
if (s_fence)
return s_fence->scheduler->ring_id;
a_fence = to_amdgpu_fence(f);
if (a_fence)
return a_fence->ring->idx;
return 0;
}
static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo) static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
{ {
struct amdgpu_sa_manager *sa_manager = sa_bo->manager; struct amdgpu_sa_manager *sa_manager = sa_bo->manager;
...@@ -147,7 +161,7 @@ static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo) ...@@ -147,7 +161,7 @@ static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
} }
list_del_init(&sa_bo->olist); list_del_init(&sa_bo->olist);
list_del_init(&sa_bo->flist); list_del_init(&sa_bo->flist);
amdgpu_fence_unref(&sa_bo->fence); fence_put(sa_bo->fence);
kfree(sa_bo); kfree(sa_bo);
} }
...@@ -161,7 +175,7 @@ static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager) ...@@ -161,7 +175,7 @@ static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager)
sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist); sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist);
list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) { list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
if (sa_bo->fence == NULL || if (sa_bo->fence == NULL ||
!fence_is_signaled(&sa_bo->fence->base)) { !fence_is_signaled(sa_bo->fence)) {
return; return;
} }
amdgpu_sa_bo_remove_locked(sa_bo); amdgpu_sa_bo_remove_locked(sa_bo);
...@@ -246,7 +260,7 @@ static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager, ...@@ -246,7 +260,7 @@ static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager,
} }
static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
struct amdgpu_fence **fences, struct fence **fences,
unsigned *tries) unsigned *tries)
{ {
struct amdgpu_sa_bo *best_bo = NULL; struct amdgpu_sa_bo *best_bo = NULL;
...@@ -275,7 +289,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, ...@@ -275,7 +289,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
sa_bo = list_first_entry(&sa_manager->flist[i], sa_bo = list_first_entry(&sa_manager->flist[i],
struct amdgpu_sa_bo, flist); struct amdgpu_sa_bo, flist);
if (!fence_is_signaled(&sa_bo->fence->base)) { if (!fence_is_signaled(sa_bo->fence)) {
fences[i] = sa_bo->fence; fences[i] = sa_bo->fence;
continue; continue;
} }
...@@ -299,7 +313,8 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, ...@@ -299,7 +313,8 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
} }
if (best_bo) { if (best_bo) {
++tries[best_bo->fence->ring->idx]; uint32_t idx = amdgpu_sa_get_ring_from_fence(best_bo->fence);
++tries[idx];
sa_manager->hole = best_bo->olist.prev; sa_manager->hole = best_bo->olist.prev;
/* we knew that this one is signaled, /* we knew that this one is signaled,
...@@ -315,7 +330,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev, ...@@ -315,7 +330,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
struct amdgpu_sa_bo **sa_bo, struct amdgpu_sa_bo **sa_bo,
unsigned size, unsigned align) unsigned size, unsigned align)
{ {
struct amdgpu_fence *fences[AMDGPU_MAX_RINGS]; struct fence *fences[AMDGPU_MAX_RINGS];
unsigned tries[AMDGPU_MAX_RINGS]; unsigned tries[AMDGPU_MAX_RINGS];
int i, r; int i, r;
signed long t; signed long t;
...@@ -352,7 +367,8 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev, ...@@ -352,7 +367,8 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
} while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries)); } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
spin_unlock(&sa_manager->wq.lock); spin_unlock(&sa_manager->wq.lock);
t = amdgpu_fence_wait_any(adev, fences, false, MAX_SCHEDULE_TIMEOUT); t = amdgpu_fence_wait_multiple(adev, fences, AMDGPU_MAX_RINGS, false, false,
MAX_SCHEDULE_TIMEOUT);
r = (t > 0) ? 0 : t; r = (t > 0) ? 0 : t;
spin_lock(&sa_manager->wq.lock); spin_lock(&sa_manager->wq.lock);
/* if we have nothing to wait for block */ /* if we have nothing to wait for block */
...@@ -372,7 +388,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev, ...@@ -372,7 +388,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
} }
void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
struct amdgpu_fence *fence) struct fence *fence)
{ {
struct amdgpu_sa_manager *sa_manager; struct amdgpu_sa_manager *sa_manager;
...@@ -382,10 +398,11 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, ...@@ -382,10 +398,11 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
sa_manager = (*sa_bo)->manager; sa_manager = (*sa_bo)->manager;
spin_lock(&sa_manager->wq.lock); spin_lock(&sa_manager->wq.lock);
if (fence && !fence_is_signaled(&fence->base)) { if (fence && !fence_is_signaled(fence)) {
(*sa_bo)->fence = amdgpu_fence_ref(fence); uint32_t idx;
list_add_tail(&(*sa_bo)->flist, (*sa_bo)->fence = fence_get(fence);
&sa_manager->flist[fence->ring->idx]); idx = amdgpu_sa_get_ring_from_fence(fence);
list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
} else { } else {
amdgpu_sa_bo_remove_locked(*sa_bo); amdgpu_sa_bo_remove_locked(*sa_bo);
} }
...@@ -412,8 +429,16 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, ...@@ -412,8 +429,16 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
seq_printf(m, "[0x%010llx 0x%010llx] size %8lld", seq_printf(m, "[0x%010llx 0x%010llx] size %8lld",
soffset, eoffset, eoffset - soffset); soffset, eoffset, eoffset - soffset);
if (i->fence) { if (i->fence) {
struct amdgpu_fence *a_fence = to_amdgpu_fence(i->fence);
struct amd_sched_fence *s_fence = to_amd_sched_fence(i->fence);
if (a_fence)
seq_printf(m, " protected by 0x%016llx on ring %d", seq_printf(m, " protected by 0x%016llx on ring %d",
i->fence->seq, i->fence->ring->idx); a_fence->seq, a_fence->ring->idx);
if (s_fence)
seq_printf(m, " protected by 0x%016x on ring %d",
s_fence->base.seqno,
s_fence->scheduler->ring_id);
} }
seq_printf(m, "\n"); seq_printf(m, "\n");
} }
......
...@@ -27,55 +27,28 @@ ...@@ -27,55 +27,28 @@
#include <drm/drmP.h> #include <drm/drmP.h>
#include "amdgpu.h" #include "amdgpu.h"
static int amdgpu_sched_prepare_job(struct amd_gpu_scheduler *sched, static struct fence *amdgpu_sched_run_job(struct amd_sched_job *job)
struct amd_sched_entity *entity,
struct amd_sched_job *job)
{ {
int r = 0; struct amdgpu_job *sched_job;
struct amdgpu_cs_parser *sched_job;
if (!job || !job->data) {
DRM_ERROR("job is null\n");
return -EINVAL;
}
sched_job = (struct amdgpu_cs_parser *)job->data;
if (sched_job->prepare_job) {
r = sched_job->prepare_job(sched_job);
if (r) {
DRM_ERROR("Prepare job error\n");
schedule_work(&sched_job->job_work);
}
}
return r;
}
static struct fence *amdgpu_sched_run_job(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *entity,
struct amd_sched_job *job)
{
int r = 0;
struct amdgpu_cs_parser *sched_job;
struct amdgpu_fence *fence; struct amdgpu_fence *fence;
int r;
if (!job || !job->data) { if (!job) {
DRM_ERROR("job is null\n"); DRM_ERROR("job is null\n");
return NULL; return NULL;
} }
sched_job = (struct amdgpu_cs_parser *)job->data; sched_job = (struct amdgpu_job *)job;
mutex_lock(&sched_job->job_lock); mutex_lock(&sched_job->job_lock);
r = amdgpu_ib_schedule(sched_job->adev, r = amdgpu_ib_schedule(sched_job->adev,
sched_job->num_ibs, sched_job->num_ibs,
sched_job->ibs, sched_job->ibs,
sched_job->filp); sched_job->base.owner);
if (r) if (r)
goto err; goto err;
fence = amdgpu_fence_ref(sched_job->ibs[sched_job->num_ibs - 1].fence); fence = amdgpu_fence_ref(sched_job->ibs[sched_job->num_ibs - 1].fence);
if (sched_job->run_job) { if (sched_job->free_job)
r = sched_job->run_job(sched_job); sched_job->free_job(sched_job);
if (r)
goto err;
}
mutex_unlock(&sched_job->job_lock); mutex_unlock(&sched_job->job_lock);
return &fence->base; return &fence->base;
...@@ -83,25 +56,25 @@ static struct fence *amdgpu_sched_run_job(struct amd_gpu_scheduler *sched, ...@@ -83,25 +56,25 @@ static struct fence *amdgpu_sched_run_job(struct amd_gpu_scheduler *sched,
err: err:
DRM_ERROR("Run job error\n"); DRM_ERROR("Run job error\n");
mutex_unlock(&sched_job->job_lock); mutex_unlock(&sched_job->job_lock);
schedule_work(&sched_job->job_work); job->sched->ops->process_job(job);
return NULL; return NULL;
} }
static void amdgpu_sched_process_job(struct amd_gpu_scheduler *sched, static void amdgpu_sched_process_job(struct amd_sched_job *job)
struct amd_sched_job *job)
{ {
struct amdgpu_cs_parser *sched_job; struct amdgpu_job *sched_job;
if (!job || !job->data) { if (!job) {
DRM_ERROR("job is null\n"); DRM_ERROR("job is null\n");
return; return;
} }
sched_job = (struct amdgpu_cs_parser *)job->data; sched_job = (struct amdgpu_job *)job;
schedule_work(&sched_job->job_work); /* after processing job, free memory */
fence_put(&sched_job->base.s_fence->base);
kfree(sched_job);
} }
struct amd_sched_backend_ops amdgpu_sched_ops = { struct amd_sched_backend_ops amdgpu_sched_ops = {
.prepare_job = amdgpu_sched_prepare_job,
.run_job = amdgpu_sched_run_job, .run_job = amdgpu_sched_run_job,
.process_job = amdgpu_sched_process_job .process_job = amdgpu_sched_process_job
}; };
...@@ -110,36 +83,39 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, ...@@ -110,36 +83,39 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
struct amdgpu_ring *ring, struct amdgpu_ring *ring,
struct amdgpu_ib *ibs, struct amdgpu_ib *ibs,
unsigned num_ibs, unsigned num_ibs,
int (*free_job)(struct amdgpu_cs_parser *), int (*free_job)(struct amdgpu_job *),
void *owner, void *owner,
struct fence **f) struct fence **f)
{ {
int r = 0; int r = 0;
if (amdgpu_enable_scheduler) { if (amdgpu_enable_scheduler) {
struct amdgpu_cs_parser *sched_job = struct amdgpu_job *job =
amdgpu_cs_parser_create(adev, owner, &adev->kernel_ctx, kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
ibs, num_ibs); if (!job)
if(!sched_job) {
return -ENOMEM; return -ENOMEM;
} job->base.sched = ring->scheduler;
sched_job->free_job = free_job; job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity;
mutex_lock(&sched_job->job_lock); job->adev = adev;
r = amd_sched_push_job(ring->scheduler, job->ibs = ibs;
&adev->kernel_ctx.rings[ring->idx].entity, job->num_ibs = num_ibs;
sched_job, &sched_job->s_fence); job->base.owner = owner;
mutex_init(&job->job_lock);
job->free_job = free_job;
mutex_lock(&job->job_lock);
r = amd_sched_entity_push_job((struct amd_sched_job *)job);
if (r) { if (r) {
mutex_unlock(&sched_job->job_lock); mutex_unlock(&job->job_lock);
kfree(sched_job); kfree(job);
return r; return r;
} }
ibs[num_ibs - 1].sequence = sched_job->s_fence->v_seq; *f = fence_get(&job->base.s_fence->base);
*f = fence_get(&sched_job->s_fence->base); mutex_unlock(&job->job_lock);
mutex_unlock(&sched_job->job_lock);
} else { } else {
r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner);
if (r) if (r)
return r; return r;
*f = fence_get(&ibs[num_ibs - 1].fence->base); *f = fence_get(&ibs[num_ibs - 1].fence->base);
} }
return 0; return 0;
} }
...@@ -87,7 +87,7 @@ bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring, ...@@ -87,7 +87,7 @@ bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring,
void amdgpu_semaphore_free(struct amdgpu_device *adev, void amdgpu_semaphore_free(struct amdgpu_device *adev,
struct amdgpu_semaphore **semaphore, struct amdgpu_semaphore **semaphore,
struct amdgpu_fence *fence) struct fence *fence)
{ {
if (semaphore == NULL || *semaphore == NULL) { if (semaphore == NULL || *semaphore == NULL) {
return; return;
......
...@@ -32,6 +32,11 @@ ...@@ -32,6 +32,11 @@
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_trace.h" #include "amdgpu_trace.h"
struct amdgpu_sync_entry {
struct hlist_node node;
struct fence *fence;
};
/** /**
* amdgpu_sync_create - zero init sync object * amdgpu_sync_create - zero init sync object
* *
...@@ -49,9 +54,33 @@ void amdgpu_sync_create(struct amdgpu_sync *sync) ...@@ -49,9 +54,33 @@ void amdgpu_sync_create(struct amdgpu_sync *sync)
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
sync->sync_to[i] = NULL; sync->sync_to[i] = NULL;
hash_init(sync->fences);
sync->last_vm_update = NULL; sync->last_vm_update = NULL;
} }
static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f)
{
struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
if (a_fence)
return a_fence->ring->adev == adev;
if (s_fence)
return (struct amdgpu_device *)s_fence->scheduler->priv == adev;
return false;
}
static bool amdgpu_sync_test_owner(struct fence *f, void *owner)
{
struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
if (s_fence)
return s_fence->owner == owner;
if (a_fence)
return a_fence->owner == owner;
return false;
}
/** /**
* amdgpu_sync_fence - remember to sync to this fence * amdgpu_sync_fence - remember to sync to this fence
* *
...@@ -62,28 +91,54 @@ void amdgpu_sync_create(struct amdgpu_sync *sync) ...@@ -62,28 +91,54 @@ void amdgpu_sync_create(struct amdgpu_sync *sync)
int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct fence *f) struct fence *f)
{ {
struct amdgpu_sync_entry *e;
struct amdgpu_fence *fence; struct amdgpu_fence *fence;
struct amdgpu_fence *other; struct amdgpu_fence *other;
struct fence *tmp, *later;
if (!f) if (!f)
return 0; return 0;
if (amdgpu_sync_same_dev(adev, f) &&
amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM)) {
if (sync->last_vm_update) {
tmp = sync->last_vm_update;
BUG_ON(f->context != tmp->context);
later = (f->seqno - tmp->seqno <= INT_MAX) ? f : tmp;
sync->last_vm_update = fence_get(later);
fence_put(tmp);
} else
sync->last_vm_update = fence_get(f);
}
fence = to_amdgpu_fence(f); fence = to_amdgpu_fence(f);
if (!fence || fence->ring->adev != adev) if (!fence || fence->ring->adev != adev) {
return fence_wait(f, true); hash_for_each_possible(sync->fences, e, node, f->context) {
struct fence *new;
if (unlikely(e->fence->context != f->context))
continue;
new = fence_get(fence_later(e->fence, f));
if (new) {
fence_put(e->fence);
e->fence = new;
}
return 0;
}
e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL);
if (!e)
return -ENOMEM;
hash_add(sync->fences, &e->node, f->context);
e->fence = fence_get(f);
return 0;
}
other = sync->sync_to[fence->ring->idx]; other = sync->sync_to[fence->ring->idx];
sync->sync_to[fence->ring->idx] = amdgpu_fence_ref( sync->sync_to[fence->ring->idx] = amdgpu_fence_ref(
amdgpu_fence_later(fence, other)); amdgpu_fence_later(fence, other));
amdgpu_fence_unref(&other); amdgpu_fence_unref(&other);
if (fence->owner == AMDGPU_FENCE_OWNER_VM) {
other = sync->last_vm_update;
sync->last_vm_update = amdgpu_fence_ref(
amdgpu_fence_later(fence, other));
amdgpu_fence_unref(&other);
}
return 0; return 0;
} }
...@@ -147,6 +202,24 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, ...@@ -147,6 +202,24 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
return r; return r;
} }
int amdgpu_sync_wait(struct amdgpu_sync *sync)
{
struct amdgpu_sync_entry *e;
struct hlist_node *tmp;
int i, r;
hash_for_each_safe(sync->fences, i, tmp, e, node) {
r = fence_wait(e->fence, false);
if (r)
return r;
hash_del(&e->node);
fence_put(e->fence);
kfree(e);
}
return 0;
}
/** /**
* amdgpu_sync_rings - sync ring to all registered fences * amdgpu_sync_rings - sync ring to all registered fences
* *
...@@ -234,15 +307,23 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync, ...@@ -234,15 +307,23 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync,
*/ */
void amdgpu_sync_free(struct amdgpu_device *adev, void amdgpu_sync_free(struct amdgpu_device *adev,
struct amdgpu_sync *sync, struct amdgpu_sync *sync,
struct amdgpu_fence *fence) struct fence *fence)
{ {
struct amdgpu_sync_entry *e;
struct hlist_node *tmp;
unsigned i; unsigned i;
hash_for_each_safe(sync->fences, i, tmp, e, node) {
hash_del(&e->node);
fence_put(e->fence);
kfree(e);
}
for (i = 0; i < AMDGPU_NUM_SYNCS; ++i) for (i = 0; i < AMDGPU_NUM_SYNCS; ++i)
amdgpu_semaphore_free(adev, &sync->semaphores[i], fence); amdgpu_semaphore_free(adev, &sync->semaphores[i], fence);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
amdgpu_fence_unref(&sync->sync_to[i]); amdgpu_fence_unref(&sync->sync_to[i]);
amdgpu_fence_unref(&sync->last_vm_update); fence_put(sync->last_vm_update);
} }
...@@ -77,7 +77,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) ...@@ -77,7 +77,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
void *gtt_map, *vram_map; void *gtt_map, *vram_map;
void **gtt_start, **gtt_end; void **gtt_start, **gtt_end;
void **vram_start, **vram_end; void **vram_start, **vram_end;
struct amdgpu_fence *fence = NULL; struct fence *fence = NULL;
r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0, NULL, gtt_obj + i); AMDGPU_GEM_DOMAIN_GTT, 0, NULL, gtt_obj + i);
...@@ -116,13 +116,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) ...@@ -116,13 +116,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
goto out_lclean_unpin; goto out_lclean_unpin;
} }
r = fence_wait(&fence->base, false); r = fence_wait(fence, false);
if (r) { if (r) {
DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i); DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i);
goto out_lclean_unpin; goto out_lclean_unpin;
} }
amdgpu_fence_unref(&fence); fence_put(fence);
r = amdgpu_bo_kmap(vram_obj, &vram_map); r = amdgpu_bo_kmap(vram_obj, &vram_map);
if (r) { if (r) {
...@@ -161,13 +161,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) ...@@ -161,13 +161,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
goto out_lclean_unpin; goto out_lclean_unpin;
} }
r = fence_wait(&fence->base, false); r = fence_wait(fence, false);
if (r) { if (r) {
DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i); DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i);
goto out_lclean_unpin; goto out_lclean_unpin;
} }
amdgpu_fence_unref(&fence); fence_put(fence);
r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map); r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
if (r) { if (r) {
...@@ -214,7 +214,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) ...@@ -214,7 +214,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
amdgpu_bo_unref(&gtt_obj[i]); amdgpu_bo_unref(&gtt_obj[i]);
} }
if (fence) if (fence)
amdgpu_fence_unref(&fence); fence_put(fence);
break; break;
} }
......
...@@ -228,7 +228,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, ...@@ -228,7 +228,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
struct amdgpu_device *adev; struct amdgpu_device *adev;
struct amdgpu_ring *ring; struct amdgpu_ring *ring;
uint64_t old_start, new_start; uint64_t old_start, new_start;
struct amdgpu_fence *fence; struct fence *fence;
int r; int r;
adev = amdgpu_get_adev(bo->bdev); adev = amdgpu_get_adev(bo->bdev);
...@@ -269,9 +269,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, ...@@ -269,9 +269,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
new_mem->num_pages * PAGE_SIZE, /* bytes */ new_mem->num_pages * PAGE_SIZE, /* bytes */
bo->resv, &fence); bo->resv, &fence);
/* FIXME: handle copy error */ /* FIXME: handle copy error */
r = ttm_bo_move_accel_cleanup(bo, &fence->base, r = ttm_bo_move_accel_cleanup(bo, fence,
evict, no_wait_gpu, new_mem); evict, no_wait_gpu, new_mem);
amdgpu_fence_unref(&fence); fence_put(fence);
return r; return r;
} }
...@@ -987,46 +987,48 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, ...@@ -987,46 +987,48 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
uint64_t dst_offset, uint64_t dst_offset,
uint32_t byte_count, uint32_t byte_count,
struct reservation_object *resv, struct reservation_object *resv,
struct amdgpu_fence **fence) struct fence **fence)
{ {
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
struct amdgpu_sync sync;
uint32_t max_bytes; uint32_t max_bytes;
unsigned num_loops, num_dw; unsigned num_loops, num_dw;
struct amdgpu_ib *ib;
unsigned i; unsigned i;
int r; int r;
/* sync other rings */
amdgpu_sync_create(&sync);
if (resv) {
r = amdgpu_sync_resv(adev, &sync, resv, false);
if (r) {
DRM_ERROR("sync failed (%d).\n", r);
amdgpu_sync_free(adev, &sync, NULL);
return r;
}
}
max_bytes = adev->mman.buffer_funcs->copy_max_bytes; max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_loops = DIV_ROUND_UP(byte_count, max_bytes);
num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw; num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw;
/* for fence and sync */ /* for IB padding */
num_dw += 64 + AMDGPU_NUM_SYNCS * 8; while (num_dw & 0x7)
num_dw++;
ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
if (!ib)
return -ENOMEM;
r = amdgpu_ring_lock(ring, num_dw); r = amdgpu_ib_get(ring, NULL, num_dw * 4, ib);
if (r) { if (r) {
DRM_ERROR("ring lock failed (%d).\n", r); kfree(ib);
amdgpu_sync_free(adev, &sync, NULL);
return r; return r;
} }
amdgpu_sync_rings(&sync, ring); ib->length_dw = 0;
if (resv) {
r = amdgpu_sync_resv(adev, &ib->sync, resv,
AMDGPU_FENCE_OWNER_UNDEFINED);
if (r) {
DRM_ERROR("sync failed (%d).\n", r);
goto error_free;
}
}
for (i = 0; i < num_loops; i++) { for (i = 0; i < num_loops; i++) {
uint32_t cur_size_in_bytes = min(byte_count, max_bytes); uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
amdgpu_emit_copy_buffer(adev, ring, src_offset, dst_offset, amdgpu_emit_copy_buffer(adev, ib, src_offset, dst_offset,
cur_size_in_bytes); cur_size_in_bytes);
src_offset += cur_size_in_bytes; src_offset += cur_size_in_bytes;
...@@ -1034,17 +1036,24 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, ...@@ -1034,17 +1036,24 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
byte_count -= cur_size_in_bytes; byte_count -= cur_size_in_bytes;
} }
r = amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_MOVE, fence); amdgpu_vm_pad_ib(adev, ib);
if (r) { WARN_ON(ib->length_dw > num_dw);
amdgpu_ring_unlock_undo(ring); r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
amdgpu_sync_free(adev, &sync, NULL); &amdgpu_vm_free_job,
return r; AMDGPU_FENCE_OWNER_MOVE,
} fence);
if (r)
amdgpu_ring_unlock_commit(ring); goto error_free;
amdgpu_sync_free(adev, &sync, *fence);
if (!amdgpu_enable_scheduler) {
amdgpu_ib_free(adev, ib);
kfree(ib);
}
return 0; return 0;
error_free:
amdgpu_ib_free(adev, ib);
kfree(ib);
return r;
} }
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
......
...@@ -807,7 +807,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) ...@@ -807,7 +807,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
} }
static int amdgpu_uvd_free_job( static int amdgpu_uvd_free_job(
struct amdgpu_cs_parser *sched_job) struct amdgpu_job *sched_job)
{ {
amdgpu_ib_free(sched_job->adev, sched_job->ibs); amdgpu_ib_free(sched_job->adev, sched_job->ibs);
kfree(sched_job->ibs); kfree(sched_job->ibs);
......
...@@ -340,7 +340,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) ...@@ -340,7 +340,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
} }
static int amdgpu_vce_free_job( static int amdgpu_vce_free_job(
struct amdgpu_cs_parser *sched_job) struct amdgpu_job *sched_job)
{ {
amdgpu_ib_free(sched_job->adev, sched_job->ibs); amdgpu_ib_free(sched_job->adev, sched_job->ibs);
kfree(sched_job->ibs); kfree(sched_job->ibs);
......
...@@ -200,19 +200,29 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, ...@@ -200,19 +200,29 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
*/ */
void amdgpu_vm_flush(struct amdgpu_ring *ring, void amdgpu_vm_flush(struct amdgpu_ring *ring,
struct amdgpu_vm *vm, struct amdgpu_vm *vm,
struct amdgpu_fence *updates) struct fence *updates)
{ {
uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
struct amdgpu_fence *flushed_updates = vm_id->flushed_updates; struct fence *flushed_updates = vm_id->flushed_updates;
bool is_earlier = false;
if (flushed_updates && updates) {
BUG_ON(flushed_updates->context != updates->context);
is_earlier = (updates->seqno - flushed_updates->seqno <=
INT_MAX) ? true : false;
}
if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates || if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates ||
(updates && amdgpu_fence_is_earlier(flushed_updates, updates))) { is_earlier) {
trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id); trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id);
vm_id->flushed_updates = amdgpu_fence_ref( if (is_earlier) {
amdgpu_fence_later(flushed_updates, updates)); vm_id->flushed_updates = fence_get(updates);
amdgpu_fence_unref(&flushed_updates); fence_put(flushed_updates);
}
if (!flushed_updates)
vm_id->flushed_updates = fence_get(updates);
vm_id->pd_gpu_addr = pd_addr; vm_id->pd_gpu_addr = pd_addr;
amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr); amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr);
} }
...@@ -306,8 +316,7 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev, ...@@ -306,8 +316,7 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
} }
} }
static int amdgpu_vm_free_job( int amdgpu_vm_free_job(struct amdgpu_job *sched_job)
struct amdgpu_cs_parser *sched_job)
{ {
int i; int i;
for (i = 0; i < sched_job->num_ibs; i++) for (i = 0; i < sched_job->num_ibs; i++)
...@@ -1347,7 +1356,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) ...@@ -1347,7 +1356,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
fence_put(vm->page_directory_fence); fence_put(vm->page_directory_fence);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
amdgpu_fence_unref(&vm->ids[i].flushed_updates); fence_put(vm->ids[i].flushed_updates);
amdgpu_fence_unref(&vm->ids[i].last_id_use); amdgpu_fence_unref(&vm->ids[i].last_id_use);
} }
......
...@@ -630,6 +630,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring) ...@@ -630,6 +630,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
gpu_addr = adev->wb.gpu_addr + (index * 4); gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD; tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp); adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(ring, NULL, 256, &ib); r = amdgpu_ib_get(ring, NULL, 256, &ib);
if (r) { if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
...@@ -1338,18 +1339,18 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev) ...@@ -1338,18 +1339,18 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev)
* Used by the amdgpu ttm implementation to move pages if * Used by the amdgpu ttm implementation to move pages if
* registered as the asic copy callback. * registered as the asic copy callback.
*/ */
static void cik_sdma_emit_copy_buffer(struct amdgpu_ring *ring, static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset, uint64_t src_offset,
uint64_t dst_offset, uint64_t dst_offset,
uint32_t byte_count) uint32_t byte_count)
{ {
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0)); ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
amdgpu_ring_write(ring, byte_count); ib->ptr[ib->length_dw++] = byte_count;
amdgpu_ring_write(ring, 0); /* src/dst endian swap */ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
amdgpu_ring_write(ring, lower_32_bits(src_offset)); ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
amdgpu_ring_write(ring, upper_32_bits(src_offset)); ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
amdgpu_ring_write(ring, lower_32_bits(dst_offset)); ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
amdgpu_ring_write(ring, upper_32_bits(dst_offset)); ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
} }
/** /**
......
...@@ -2660,6 +2660,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring) ...@@ -2660,6 +2660,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
return r; return r;
} }
WREG32(scratch, 0xCAFEDEAD); WREG32(scratch, 0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(ring, NULL, 256, &ib); r = amdgpu_ib_get(ring, NULL, 256, &ib);
if (r) { if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
......
...@@ -622,6 +622,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring) ...@@ -622,6 +622,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
return r; return r;
} }
WREG32(scratch, 0xCAFEDEAD); WREG32(scratch, 0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(ring, NULL, 256, &ib); r = amdgpu_ib_get(ring, NULL, 256, &ib);
if (r) { if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
......
...@@ -689,6 +689,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) ...@@ -689,6 +689,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
gpu_addr = adev->wb.gpu_addr + (index * 4); gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD; tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp); adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(ring, NULL, 256, &ib); r = amdgpu_ib_get(ring, NULL, 256, &ib);
if (r) { if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
...@@ -1349,19 +1350,19 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev) ...@@ -1349,19 +1350,19 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev)
* Used by the amdgpu ttm implementation to move pages if * Used by the amdgpu ttm implementation to move pages if
* registered as the asic copy callback. * registered as the asic copy callback.
*/ */
static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ring *ring, static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset, uint64_t src_offset,
uint64_t dst_offset, uint64_t dst_offset,
uint32_t byte_count) uint32_t byte_count)
{ {
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR)); SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
amdgpu_ring_write(ring, byte_count); ib->ptr[ib->length_dw++] = byte_count;
amdgpu_ring_write(ring, 0); /* src/dst endian swap */ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
amdgpu_ring_write(ring, lower_32_bits(src_offset)); ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
amdgpu_ring_write(ring, upper_32_bits(src_offset)); ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
amdgpu_ring_write(ring, lower_32_bits(dst_offset)); ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
amdgpu_ring_write(ring, upper_32_bits(dst_offset)); ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
} }
/** /**
......
...@@ -810,6 +810,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) ...@@ -810,6 +810,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
gpu_addr = adev->wb.gpu_addr + (index * 4); gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD; tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp); adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(ring, NULL, 256, &ib); r = amdgpu_ib_get(ring, NULL, 256, &ib);
if (r) { if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
...@@ -1473,19 +1474,19 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev) ...@@ -1473,19 +1474,19 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev)
* Used by the amdgpu ttm implementation to move pages if * Used by the amdgpu ttm implementation to move pages if
* registered as the asic copy callback. * registered as the asic copy callback.
*/ */
static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ring *ring, static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset, uint64_t src_offset,
uint64_t dst_offset, uint64_t dst_offset,
uint32_t byte_count) uint32_t byte_count)
{ {
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR)); SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
amdgpu_ring_write(ring, byte_count); ib->ptr[ib->length_dw++] = byte_count;
amdgpu_ring_write(ring, 0); /* src/dst endian swap */ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
amdgpu_ring_write(ring, lower_32_bits(src_offset)); ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
amdgpu_ring_write(ring, upper_32_bits(src_offset)); ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
amdgpu_ring_write(ring, lower_32_bits(dst_offset)); ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
amdgpu_ring_write(ring, upper_32_bits(dst_offset)); ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
} }
/** /**
......
...@@ -27,30 +27,32 @@ ...@@ -27,30 +27,32 @@
#include <drm/drmP.h> #include <drm/drmP.h>
#include "gpu_scheduler.h" #include "gpu_scheduler.h"
static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
/* Initialize a given run queue struct */ /* Initialize a given run queue struct */
static void amd_sched_rq_init(struct amd_sched_rq *rq) static void amd_sched_rq_init(struct amd_sched_rq *rq)
{ {
spin_lock_init(&rq->lock);
INIT_LIST_HEAD(&rq->entities); INIT_LIST_HEAD(&rq->entities);
mutex_init(&rq->lock);
rq->current_entity = NULL; rq->current_entity = NULL;
} }
static void amd_sched_rq_add_entity(struct amd_sched_rq *rq, static void amd_sched_rq_add_entity(struct amd_sched_rq *rq,
struct amd_sched_entity *entity) struct amd_sched_entity *entity)
{ {
mutex_lock(&rq->lock); spin_lock(&rq->lock);
list_add_tail(&entity->list, &rq->entities); list_add_tail(&entity->list, &rq->entities);
mutex_unlock(&rq->lock); spin_unlock(&rq->lock);
} }
static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq, static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq,
struct amd_sched_entity *entity) struct amd_sched_entity *entity)
{ {
mutex_lock(&rq->lock); spin_lock(&rq->lock);
list_del_init(&entity->list); list_del_init(&entity->list);
if (rq->current_entity == entity) if (rq->current_entity == entity)
rq->current_entity = NULL; rq->current_entity = NULL;
mutex_unlock(&rq->lock); spin_unlock(&rq->lock);
} }
/** /**
...@@ -61,12 +63,16 @@ static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq, ...@@ -61,12 +63,16 @@ static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq,
static struct amd_sched_entity * static struct amd_sched_entity *
amd_sched_rq_select_entity(struct amd_sched_rq *rq) amd_sched_rq_select_entity(struct amd_sched_rq *rq)
{ {
struct amd_sched_entity *entity = rq->current_entity; struct amd_sched_entity *entity;
spin_lock(&rq->lock);
entity = rq->current_entity;
if (entity) { if (entity) {
list_for_each_entry_continue(entity, &rq->entities, list) { list_for_each_entry_continue(entity, &rq->entities, list) {
if (!kfifo_is_empty(&entity->job_queue)) { if (!kfifo_is_empty(&entity->job_queue)) {
rq->current_entity = entity; rq->current_entity = entity;
spin_unlock(&rq->lock);
return rq->current_entity; return rq->current_entity;
} }
} }
...@@ -76,6 +82,7 @@ amd_sched_rq_select_entity(struct amd_sched_rq *rq) ...@@ -76,6 +82,7 @@ amd_sched_rq_select_entity(struct amd_sched_rq *rq)
if (!kfifo_is_empty(&entity->job_queue)) { if (!kfifo_is_empty(&entity->job_queue)) {
rq->current_entity = entity; rq->current_entity = entity;
spin_unlock(&rq->lock);
return rq->current_entity; return rq->current_entity;
} }
...@@ -83,76 +90,9 @@ amd_sched_rq_select_entity(struct amd_sched_rq *rq) ...@@ -83,76 +90,9 @@ amd_sched_rq_select_entity(struct amd_sched_rq *rq)
break; break;
} }
return NULL; spin_unlock(&rq->lock);
}
/**
* Note: This function should only been called inside scheduler main
* function for thread safety, there is no other protection here.
* return ture if scheduler has something ready to run.
*
* For active_hw_rq, there is only one producer(scheduler thread) and
* one consumer(ISR). It should be safe to use this function in scheduler
* main thread to decide whether to continue emit more IBs.
*/
static bool is_scheduler_ready(struct amd_gpu_scheduler *sched)
{
unsigned long flags;
bool full;
spin_lock_irqsave(&sched->queue_lock, flags);
full = atomic64_read(&sched->hw_rq_count) <
sched->hw_submission_limit ? true : false;
spin_unlock_irqrestore(&sched->queue_lock, flags);
return full;
}
/**
* Select next entity from the kernel run queue, if not available,
* return null.
*/
static struct amd_sched_entity *
kernel_rq_select_context(struct amd_gpu_scheduler *sched)
{
struct amd_sched_entity *sched_entity;
struct amd_sched_rq *rq = &sched->kernel_rq;
mutex_lock(&rq->lock);
sched_entity = amd_sched_rq_select_entity(rq);
mutex_unlock(&rq->lock);
return sched_entity;
}
/**
* Select next entity containing real IB submissions
*/
static struct amd_sched_entity *
select_context(struct amd_gpu_scheduler *sched)
{
struct amd_sched_entity *wake_entity = NULL;
struct amd_sched_entity *tmp;
struct amd_sched_rq *rq;
if (!is_scheduler_ready(sched))
return NULL; return NULL;
/* Kernel run queue has higher priority than normal run queue*/
tmp = kernel_rq_select_context(sched);
if (tmp != NULL)
goto exit;
rq = &sched->sched_rq;
mutex_lock(&rq->lock);
tmp = amd_sched_rq_select_entity(rq);
mutex_unlock(&rq->lock);
exit:
if (sched->current_entity && (sched->current_entity != tmp))
wake_entity = sched->current_entity;
sched->current_entity = tmp;
if (wake_entity && wake_entity->need_wakeup)
wake_up(&wake_entity->wait_queue);
return tmp;
} }
/** /**
...@@ -171,31 +111,20 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, ...@@ -171,31 +111,20 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
struct amd_sched_rq *rq, struct amd_sched_rq *rq,
uint32_t jobs) uint32_t jobs)
{ {
uint64_t seq_ring = 0;
char name[20];
if (!(sched && entity && rq)) if (!(sched && entity && rq))
return -EINVAL; return -EINVAL;
memset(entity, 0, sizeof(struct amd_sched_entity)); memset(entity, 0, sizeof(struct amd_sched_entity));
seq_ring = ((uint64_t)sched->ring_id) << 60;
spin_lock_init(&entity->lock);
entity->belongto_rq = rq; entity->belongto_rq = rq;
entity->scheduler = sched; entity->scheduler = sched;
init_waitqueue_head(&entity->wait_queue);
init_waitqueue_head(&entity->wait_emit);
entity->fence_context = fence_context_alloc(1); entity->fence_context = fence_context_alloc(1);
snprintf(name, sizeof(name), "c_entity[%llu]", entity->fence_context);
memcpy(entity->name, name, 20);
entity->need_wakeup = false;
if(kfifo_alloc(&entity->job_queue, if(kfifo_alloc(&entity->job_queue,
jobs * sizeof(void *), jobs * sizeof(void *),
GFP_KERNEL)) GFP_KERNEL))
return -EINVAL; return -EINVAL;
spin_lock_init(&entity->queue_lock); spin_lock_init(&entity->queue_lock);
atomic64_set(&entity->last_queued_v_seq, seq_ring); atomic_set(&entity->fence_seq, 0);
atomic64_set(&entity->last_signaled_v_seq, seq_ring);
/* Add the entity to the run queue */ /* Add the entity to the run queue */
amd_sched_rq_add_entity(rq, entity); amd_sched_rq_add_entity(rq, entity);
...@@ -210,23 +139,24 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, ...@@ -210,23 +139,24 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
* *
* return true if entity is initialized, false otherwise * return true if entity is initialized, false otherwise
*/ */
static bool is_context_entity_initialized(struct amd_gpu_scheduler *sched, static bool amd_sched_entity_is_initialized(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *entity) struct amd_sched_entity *entity)
{ {
return entity->scheduler == sched && return entity->scheduler == sched &&
entity->belongto_rq != NULL; entity->belongto_rq != NULL;
} }
static bool is_context_entity_idle(struct amd_gpu_scheduler *sched, /**
struct amd_sched_entity *entity) * Check if entity is idle
{ *
/** * @entity The pointer to a valid scheduler entity
* Idle means no pending IBs, and the entity is not *
* currently being used. * Return true if entity don't has any unscheduled jobs.
*/ */
barrier(); static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity)
if ((sched->current_entity != entity) && {
kfifo_is_empty(&entity->job_queue)) rmb();
if (kfifo_is_empty(&entity->job_queue))
return true; return true;
return false; return false;
...@@ -238,84 +168,114 @@ static bool is_context_entity_idle(struct amd_gpu_scheduler *sched, ...@@ -238,84 +168,114 @@ static bool is_context_entity_idle(struct amd_gpu_scheduler *sched,
* @sched Pointer to scheduler instance * @sched Pointer to scheduler instance
* @entity The pointer to a valid scheduler entity * @entity The pointer to a valid scheduler entity
* *
* return 0 if succeed. negative error code on failure * Cleanup and free the allocated resources.
*/ */
int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *entity) struct amd_sched_entity *entity)
{ {
int r = 0;
struct amd_sched_rq *rq = entity->belongto_rq; struct amd_sched_rq *rq = entity->belongto_rq;
if (!is_context_entity_initialized(sched, entity)) if (!amd_sched_entity_is_initialized(sched, entity))
return 0; return;
entity->need_wakeup = true;
/** /**
* The client will not queue more IBs during this fini, consume existing * The client will not queue more IBs during this fini, consume existing
* queued IBs * queued IBs
*/ */
r = wait_event_timeout( wait_event(sched->job_scheduled, amd_sched_entity_is_idle(entity));
entity->wait_queue,
is_context_entity_idle(sched, entity),
msecs_to_jiffies(AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS)
) ? 0 : -1;
if (r) {
if (entity->is_pending)
DRM_INFO("Entity %p is in waiting state during fini,\
all pending ibs will be canceled.\n",
entity);
}
amd_sched_rq_remove_entity(rq, entity); amd_sched_rq_remove_entity(rq, entity);
kfifo_free(&entity->job_queue); kfifo_free(&entity->job_queue);
return r;
} }
/** /**
* Submit a normal job to the job queue * Helper to submit a job to the job queue
* *
* @sched The pointer to the scheduler
* @c_entity The pointer to amd_sched_entity
* @job The pointer to job required to submit * @job The pointer to job required to submit
* return 0 if succeed. -1 if failed. *
* -2 indicate queue is full for this client, client should wait untill * Returns true if we could submit the job.
* scheduler consum some queued command. */
* -1 other fail. static bool amd_sched_entity_in(struct amd_sched_job *job)
*/
int amd_sched_push_job(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *c_entity,
void *data,
struct amd_sched_fence **fence)
{ {
struct amd_sched_job *job; struct amd_sched_entity *entity = job->s_entity;
bool added, first = false;
spin_lock(&entity->queue_lock);
added = kfifo_in(&entity->job_queue, &job, sizeof(job)) == sizeof(job);
if (added && kfifo_len(&entity->job_queue) == sizeof(job))
first = true;
spin_unlock(&entity->queue_lock);
/* first job wakes up scheduler */
if (first)
amd_sched_wakeup(job->sched);
return added;
}
/**
* Submit a job to the job queue
*
* @job The pointer to job required to submit
*
* Returns 0 for success, negative error code otherwise.
*/
int amd_sched_entity_push_job(struct amd_sched_job *sched_job)
{
struct amd_sched_entity *entity = sched_job->s_entity;
struct amd_sched_fence *fence = amd_sched_fence_create(
entity, sched_job->owner);
int r;
if (!fence) if (!fence)
return -EINVAL;
job = kzalloc(sizeof(struct amd_sched_job), GFP_KERNEL);
if (!job)
return -ENOMEM; return -ENOMEM;
job->sched = sched;
job->s_entity = c_entity; fence_get(&fence->base);
job->data = data; sched_job->s_fence = fence;
*fence = amd_sched_fence_create(c_entity);
if ((*fence) == NULL) { r = wait_event_interruptible(entity->scheduler->job_scheduled,
kfree(job); amd_sched_entity_in(sched_job));
return -EINVAL;
} return r;
fence_get(&(*fence)->base); }
job->s_fence = *fence;
while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *), /**
&c_entity->queue_lock) != sizeof(void *)) { * Return ture if we can push more jobs to the hw.
/**
* Current context used up all its IB slots
* wait here, or need to check whether GPU is hung
*/ */
schedule(); static bool amd_sched_ready(struct amd_gpu_scheduler *sched)
} {
/* first job wake up scheduler */ return atomic_read(&sched->hw_rq_count) <
if ((kfifo_len(&c_entity->job_queue) / sizeof(void *)) == 1) sched->hw_submission_limit;
wake_up_interruptible(&sched->wait_queue); }
return 0;
/**
* Wake up the scheduler when it is ready
*/
static void amd_sched_wakeup(struct amd_gpu_scheduler *sched)
{
if (amd_sched_ready(sched))
wake_up_interruptible(&sched->wake_up_worker);
}
/**
* Select next entity containing real IB submissions
*/
static struct amd_sched_entity *
amd_sched_select_context(struct amd_gpu_scheduler *sched)
{
struct amd_sched_entity *tmp;
if (!amd_sched_ready(sched))
return NULL;
/* Kernel run queue has higher priority than normal run queue*/
tmp = amd_sched_rq_select_entity(&sched->kernel_rq);
if (tmp == NULL)
tmp = amd_sched_rq_select_entity(&sched->sched_rq);
return tmp;
} }
static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
...@@ -323,52 +283,41 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) ...@@ -323,52 +283,41 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
struct amd_sched_job *sched_job = struct amd_sched_job *sched_job =
container_of(cb, struct amd_sched_job, cb); container_of(cb, struct amd_sched_job, cb);
struct amd_gpu_scheduler *sched; struct amd_gpu_scheduler *sched;
unsigned long flags;
sched = sched_job->sched; sched = sched_job->sched;
atomic64_set(&sched_job->s_entity->last_signaled_v_seq,
sched_job->s_fence->v_seq);
amd_sched_fence_signal(sched_job->s_fence); amd_sched_fence_signal(sched_job->s_fence);
spin_lock_irqsave(&sched->queue_lock, flags); atomic_dec(&sched->hw_rq_count);
list_del(&sched_job->list);
atomic64_dec(&sched->hw_rq_count);
spin_unlock_irqrestore(&sched->queue_lock, flags);
sched->ops->process_job(sched, sched_job);
fence_put(&sched_job->s_fence->base); fence_put(&sched_job->s_fence->base);
kfree(sched_job); sched->ops->process_job(sched_job);
wake_up_interruptible(&sched->wait_queue); wake_up_interruptible(&sched->wake_up_worker);
} }
static int amd_sched_main(void *param) static int amd_sched_main(void *param)
{ {
int r;
struct amd_sched_job *job;
struct sched_param sparam = {.sched_priority = 1}; struct sched_param sparam = {.sched_priority = 1};
struct amd_sched_entity *c_entity = NULL;
struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param; struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param;
int r;
sched_setscheduler(current, SCHED_FIFO, &sparam); sched_setscheduler(current, SCHED_FIFO, &sparam);
while (!kthread_should_stop()) { while (!kthread_should_stop()) {
struct amd_sched_entity *c_entity = NULL;
struct amd_sched_job *job;
struct fence *fence; struct fence *fence;
wait_event_interruptible(sched->wait_queue, wait_event_interruptible(sched->wake_up_worker,
is_scheduler_ready(sched) && kthread_should_stop() ||
(c_entity = select_context(sched))); (c_entity = amd_sched_select_context(sched)));
if (!c_entity)
continue;
r = kfifo_out(&c_entity->job_queue, &job, sizeof(void *)); r = kfifo_out(&c_entity->job_queue, &job, sizeof(void *));
if (r != sizeof(void *)) if (r != sizeof(void *))
continue; continue;
r = sched->ops->prepare_job(sched, c_entity, job); atomic_inc(&sched->hw_rq_count);
if (!r) {
unsigned long flags; fence = sched->ops->run_job(job);
spin_lock_irqsave(&sched->queue_lock, flags);
list_add_tail(&job->list, &sched->active_hw_rq);
atomic64_inc(&sched->hw_rq_count);
spin_unlock_irqrestore(&sched->queue_lock, flags);
}
mutex_lock(&sched->sched_lock);
fence = sched->ops->run_job(sched, c_entity, job);
if (fence) { if (fence) {
r = fence_add_callback(fence, &job->cb, r = fence_add_callback(fence, &job->cb,
amd_sched_process_job); amd_sched_process_job);
...@@ -378,7 +327,8 @@ static int amd_sched_main(void *param) ...@@ -378,7 +327,8 @@ static int amd_sched_main(void *param)
DRM_ERROR("fence add callback failed (%d)\n", r); DRM_ERROR("fence add callback failed (%d)\n", r);
fence_put(fence); fence_put(fence);
} }
mutex_unlock(&sched->sched_lock);
wake_up(&sched->job_scheduled);
} }
return 0; return 0;
} }
...@@ -386,53 +336,42 @@ static int amd_sched_main(void *param) ...@@ -386,53 +336,42 @@ static int amd_sched_main(void *param)
/** /**
* Create a gpu scheduler * Create a gpu scheduler
* *
* @device The device context for this scheduler
* @ops The backend operations for this scheduler. * @ops The backend operations for this scheduler.
* @id The scheduler is per ring, here is ring id. * @ring The the ring id for the scheduler.
* @granularity The minumum ms unit the scheduler will scheduled. * @hw_submissions Number of hw submissions to do.
* @preemption Indicate whether this ring support preemption, 0 is no.
* *
* return the pointer to scheduler for success, otherwise return NULL * Return the pointer to scheduler for success, otherwise return NULL
*/ */
struct amd_gpu_scheduler *amd_sched_create(void *device, struct amd_gpu_scheduler *amd_sched_create(struct amd_sched_backend_ops *ops,
struct amd_sched_backend_ops *ops, unsigned ring, unsigned hw_submission,
unsigned ring, void *priv)
unsigned granularity,
unsigned preemption,
unsigned hw_submission)
{ {
struct amd_gpu_scheduler *sched; struct amd_gpu_scheduler *sched;
char name[20];
sched = kzalloc(sizeof(struct amd_gpu_scheduler), GFP_KERNEL); sched = kzalloc(sizeof(struct amd_gpu_scheduler), GFP_KERNEL);
if (!sched) if (!sched)
return NULL; return NULL;
sched->device = device;
sched->ops = ops; sched->ops = ops;
sched->granularity = granularity;
sched->ring_id = ring; sched->ring_id = ring;
sched->preemption = preemption;
sched->hw_submission_limit = hw_submission; sched->hw_submission_limit = hw_submission;
snprintf(name, sizeof(name), "gpu_sched[%d]", ring); sched->priv = priv;
mutex_init(&sched->sched_lock); snprintf(sched->name, sizeof(sched->name), "amdgpu[%d]", ring);
spin_lock_init(&sched->queue_lock);
amd_sched_rq_init(&sched->sched_rq); amd_sched_rq_init(&sched->sched_rq);
amd_sched_rq_init(&sched->kernel_rq); amd_sched_rq_init(&sched->kernel_rq);
init_waitqueue_head(&sched->wait_queue); init_waitqueue_head(&sched->wake_up_worker);
INIT_LIST_HEAD(&sched->active_hw_rq); init_waitqueue_head(&sched->job_scheduled);
atomic64_set(&sched->hw_rq_count, 0); atomic_set(&sched->hw_rq_count, 0);
/* Each scheduler will run on a seperate kernel thread */ /* Each scheduler will run on a seperate kernel thread */
sched->thread = kthread_create(amd_sched_main, sched, name); sched->thread = kthread_run(amd_sched_main, sched, sched->name);
if (sched->thread) { if (IS_ERR(sched->thread)) {
wake_up_process(sched->thread);
return sched;
}
DRM_ERROR("Failed to create scheduler for id %d.\n", ring); DRM_ERROR("Failed to create scheduler for id %d.\n", ring);
kfree(sched); kfree(sched);
return NULL; return NULL;
}
return sched;
} }
/** /**
...@@ -448,15 +387,3 @@ int amd_sched_destroy(struct amd_gpu_scheduler *sched) ...@@ -448,15 +387,3 @@ int amd_sched_destroy(struct amd_gpu_scheduler *sched)
kfree(sched); kfree(sched);
return 0; return 0;
} }
/**
* Get next queued sequence number
*
* @entity The context entity
*
* return the next queued sequence number
*/
uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity)
{
return atomic64_read(&c_entity->last_queued_v_seq) + 1;
}
...@@ -27,8 +27,6 @@ ...@@ -27,8 +27,6 @@
#include <linux/kfifo.h> #include <linux/kfifo.h>
#include <linux/fence.h> #include <linux/fence.h>
#define AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
struct amd_gpu_scheduler; struct amd_gpu_scheduler;
struct amd_sched_rq; struct amd_sched_rq;
...@@ -41,20 +39,12 @@ struct amd_sched_rq; ...@@ -41,20 +39,12 @@ struct amd_sched_rq;
struct amd_sched_entity { struct amd_sched_entity {
struct list_head list; struct list_head list;
struct amd_sched_rq *belongto_rq; struct amd_sched_rq *belongto_rq;
spinlock_t lock; atomic_t fence_seq;
/* the virtual_seq is unique per context per ring */
atomic64_t last_queued_v_seq;
atomic64_t last_signaled_v_seq;
/* the job_queue maintains the jobs submitted by clients */ /* the job_queue maintains the jobs submitted by clients */
struct kfifo job_queue; struct kfifo job_queue;
spinlock_t queue_lock; spinlock_t queue_lock;
struct amd_gpu_scheduler *scheduler; struct amd_gpu_scheduler *scheduler;
wait_queue_head_t wait_queue;
wait_queue_head_t wait_emit;
bool is_pending;
uint64_t fence_context; uint64_t fence_context;
char name[20];
bool need_wakeup;
}; };
/** /**
...@@ -63,26 +53,24 @@ struct amd_sched_entity { ...@@ -63,26 +53,24 @@ struct amd_sched_entity {
* the next entity to emit commands from. * the next entity to emit commands from.
*/ */
struct amd_sched_rq { struct amd_sched_rq {
struct mutex lock; spinlock_t lock;
struct list_head entities; struct list_head entities;
struct amd_sched_entity *current_entity; struct amd_sched_entity *current_entity;
}; };
struct amd_sched_fence { struct amd_sched_fence {
struct fence base; struct fence base;
struct fence_cb cb; struct amd_gpu_scheduler *scheduler;
struct amd_sched_entity *entity;
uint64_t v_seq;
spinlock_t lock; spinlock_t lock;
void *owner;
}; };
struct amd_sched_job { struct amd_sched_job {
struct list_head list;
struct fence_cb cb; struct fence_cb cb;
struct amd_gpu_scheduler *sched; struct amd_gpu_scheduler *sched;
struct amd_sched_entity *s_entity; struct amd_sched_entity *s_entity;
void *data;
struct amd_sched_fence *s_fence; struct amd_sched_fence *s_fence;
void *owner;
}; };
extern const struct fence_ops amd_sched_fence_ops; extern const struct fence_ops amd_sched_fence_ops;
...@@ -101,61 +89,42 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f) ...@@ -101,61 +89,42 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f)
* these functions should be implemented in driver side * these functions should be implemented in driver side
*/ */
struct amd_sched_backend_ops { struct amd_sched_backend_ops {
int (*prepare_job)(struct amd_gpu_scheduler *sched, struct fence *(*run_job)(struct amd_sched_job *job);
struct amd_sched_entity *c_entity, void (*process_job)(struct amd_sched_job *job);
struct amd_sched_job *job);
struct fence *(*run_job)(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *c_entity,
struct amd_sched_job *job);
void (*process_job)(struct amd_gpu_scheduler *sched,
struct amd_sched_job *job);
}; };
/** /**
* One scheduler is implemented for each hardware ring * One scheduler is implemented for each hardware ring
*/ */
struct amd_gpu_scheduler { struct amd_gpu_scheduler {
void *device;
struct task_struct *thread; struct task_struct *thread;
struct amd_sched_rq sched_rq; struct amd_sched_rq sched_rq;
struct amd_sched_rq kernel_rq; struct amd_sched_rq kernel_rq;
struct list_head active_hw_rq; atomic_t hw_rq_count;
atomic64_t hw_rq_count;
struct amd_sched_backend_ops *ops; struct amd_sched_backend_ops *ops;
uint32_t ring_id; uint32_t ring_id;
uint32_t granularity; /* in ms unit */ wait_queue_head_t wake_up_worker;
uint32_t preemption; wait_queue_head_t job_scheduled;
wait_queue_head_t wait_queue;
struct amd_sched_entity *current_entity;
struct mutex sched_lock;
spinlock_t queue_lock;
uint32_t hw_submission_limit; uint32_t hw_submission_limit;
char name[20];
void *priv;
}; };
struct amd_gpu_scheduler *amd_sched_create(void *device, struct amd_gpu_scheduler *
struct amd_sched_backend_ops *ops, amd_sched_create(struct amd_sched_backend_ops *ops,
uint32_t ring, uint32_t ring, uint32_t hw_submission, void *priv);
uint32_t granularity,
uint32_t preemption,
uint32_t hw_submission);
int amd_sched_destroy(struct amd_gpu_scheduler *sched); int amd_sched_destroy(struct amd_gpu_scheduler *sched);
int amd_sched_push_job(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *c_entity,
void *data,
struct amd_sched_fence **fence);
int amd_sched_entity_init(struct amd_gpu_scheduler *sched, int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *entity, struct amd_sched_entity *entity,
struct amd_sched_rq *rq, struct amd_sched_rq *rq,
uint32_t jobs); uint32_t jobs);
int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *entity); struct amd_sched_entity *entity);
int amd_sched_entity_push_job(struct amd_sched_job *sched_job);
uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity);
struct amd_sched_fence *amd_sched_fence_create( struct amd_sched_fence *amd_sched_fence_create(
struct amd_sched_entity *s_entity); struct amd_sched_entity *s_entity, void *owner);
void amd_sched_fence_signal(struct amd_sched_fence *fence); void amd_sched_fence_signal(struct amd_sched_fence *fence);
......
...@@ -27,19 +27,22 @@ ...@@ -27,19 +27,22 @@
#include <drm/drmP.h> #include <drm/drmP.h>
#include "gpu_scheduler.h" #include "gpu_scheduler.h"
struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity) struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity, void *owner)
{ {
struct amd_sched_fence *fence = NULL; struct amd_sched_fence *fence = NULL;
unsigned seq;
fence = kzalloc(sizeof(struct amd_sched_fence), GFP_KERNEL); fence = kzalloc(sizeof(struct amd_sched_fence), GFP_KERNEL);
if (fence == NULL) if (fence == NULL)
return NULL; return NULL;
fence->v_seq = atomic64_inc_return(&s_entity->last_queued_v_seq); fence->owner = owner;
fence->entity = s_entity; fence->scheduler = s_entity->scheduler;
spin_lock_init(&fence->lock); spin_lock_init(&fence->lock);
fence_init(&fence->base, &amd_sched_fence_ops,
&fence->lock, seq = atomic_inc_return(&s_entity->fence_seq);
s_entity->fence_context, fence_init(&fence->base, &amd_sched_fence_ops, &fence->lock,
fence->v_seq); s_entity->fence_context, seq);
return fence; return fence;
} }
...@@ -60,7 +63,7 @@ static const char *amd_sched_fence_get_driver_name(struct fence *fence) ...@@ -60,7 +63,7 @@ static const char *amd_sched_fence_get_driver_name(struct fence *fence)
static const char *amd_sched_fence_get_timeline_name(struct fence *f) static const char *amd_sched_fence_get_timeline_name(struct fence *f)
{ {
struct amd_sched_fence *fence = to_amd_sched_fence(f); struct amd_sched_fence *fence = to_amd_sched_fence(f);
return (const char *)fence->entity->name; return (const char *)fence->scheduler->name;
} }
static bool amd_sched_fence_enable_signaling(struct fence *f) static bool amd_sched_fence_enable_signaling(struct fence *f)
......
...@@ -95,6 +95,11 @@ void radeon_connector_hotplug(struct drm_connector *connector) ...@@ -95,6 +95,11 @@ void radeon_connector_hotplug(struct drm_connector *connector)
if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) { if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) {
drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
} else if (radeon_dp_needs_link_train(radeon_connector)) { } else if (radeon_dp_needs_link_train(radeon_connector)) {
/* Don't try to start link training before we
* have the dpcd */
if (!radeon_dp_getdpcd(radeon_connector))
return;
/* set it to OFF so that drm_helper_connector_dpms() /* set it to OFF so that drm_helper_connector_dpms()
* won't return immediately since the current state * won't return immediately since the current state
* is ON at this point. * is ON at this point.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment