Commit 2f137161 authored by Dave Airlie's avatar Dave Airlie

Merge branch 'drm-fixes-4.4' of git://people.freedesktop.org/~agd5f/linux into drm-fixes

Radeon and amdgpu fixes for 4.4:
- DPM fixes for r7xx devices
- VCE fixes for Stoney
- GPUVM fixes
- Scheduler fixes

* 'drm-fixes-4.4' of git://people.freedesktop.org/~agd5f/linux:
  drm/radeon: make some dpm errors debug only
  drm/radeon: make rv770_set_sw_state failures non-fatal
  drm/amdgpu: move dependency handling out of atomic section v2
  drm/amdgpu: optimize scheduler fence handling
  drm/amdgpu: remove vm->mutex
  drm/amdgpu: add mutex for ba_va->valids/invalids
  drm/amdgpu: adapt vce session create interface changes
  drm/amdgpu: vce use multiple cache surface starting from stoney
  drm/amdgpu: reset vce trap interrupt flag
parents 78c4a49a 9c565e33
...@@ -496,6 +496,7 @@ struct amdgpu_bo_va_mapping { ...@@ -496,6 +496,7 @@ struct amdgpu_bo_va_mapping {
/* bo virtual addresses in a specific vm */ /* bo virtual addresses in a specific vm */
struct amdgpu_bo_va { struct amdgpu_bo_va {
struct mutex mutex;
/* protected by bo being reserved */ /* protected by bo being reserved */
struct list_head bo_list; struct list_head bo_list;
struct fence *last_pt_update; struct fence *last_pt_update;
...@@ -928,8 +929,6 @@ struct amdgpu_vm_id { ...@@ -928,8 +929,6 @@ struct amdgpu_vm_id {
}; };
struct amdgpu_vm { struct amdgpu_vm {
struct mutex mutex;
struct rb_root va; struct rb_root va;
/* protecting invalidated */ /* protecting invalidated */
......
...@@ -784,8 +784,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ...@@ -784,8 +784,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{ {
struct amdgpu_device *adev = dev->dev_private; struct amdgpu_device *adev = dev->dev_private;
union drm_amdgpu_cs *cs = data; union drm_amdgpu_cs *cs = data;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_cs_parser parser = {}; struct amdgpu_cs_parser parser = {};
bool reserved_buffers = false; bool reserved_buffers = false;
int i, r; int i, r;
...@@ -803,7 +801,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ...@@ -803,7 +801,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
r = amdgpu_cs_handle_lockup(adev, r); r = amdgpu_cs_handle_lockup(adev, r);
return r; return r;
} }
mutex_lock(&vm->mutex);
r = amdgpu_cs_parser_relocs(&parser); r = amdgpu_cs_parser_relocs(&parser);
if (r == -ENOMEM) if (r == -ENOMEM)
DRM_ERROR("Not enough memory for command submission!\n"); DRM_ERROR("Not enough memory for command submission!\n");
...@@ -888,7 +885,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ...@@ -888,7 +885,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
out: out:
amdgpu_cs_parser_fini(&parser, r, reserved_buffers); amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
mutex_unlock(&vm->mutex);
r = amdgpu_cs_handle_lockup(adev, r); r = amdgpu_cs_handle_lockup(adev, r);
return r; return r;
} }
......
...@@ -115,12 +115,9 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_pri ...@@ -115,12 +115,9 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_pri
struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_va *bo_va; struct amdgpu_bo_va *bo_va;
int r; int r;
mutex_lock(&vm->mutex);
r = amdgpu_bo_reserve(rbo, false); r = amdgpu_bo_reserve(rbo, false);
if (r) { if (r)
mutex_unlock(&vm->mutex);
return r; return r;
}
bo_va = amdgpu_vm_bo_find(vm, rbo); bo_va = amdgpu_vm_bo_find(vm, rbo);
if (!bo_va) { if (!bo_va) {
...@@ -129,7 +126,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_pri ...@@ -129,7 +126,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_pri
++bo_va->ref_count; ++bo_va->ref_count;
} }
amdgpu_bo_unreserve(rbo); amdgpu_bo_unreserve(rbo);
mutex_unlock(&vm->mutex);
return 0; return 0;
} }
...@@ -142,10 +138,8 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, ...@@ -142,10 +138,8 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_va *bo_va; struct amdgpu_bo_va *bo_va;
int r; int r;
mutex_lock(&vm->mutex);
r = amdgpu_bo_reserve(rbo, true); r = amdgpu_bo_reserve(rbo, true);
if (r) { if (r) {
mutex_unlock(&vm->mutex);
dev_err(adev->dev, "leaking bo va because " dev_err(adev->dev, "leaking bo va because "
"we fail to reserve bo (%d)\n", r); "we fail to reserve bo (%d)\n", r);
return; return;
...@@ -157,7 +151,6 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, ...@@ -157,7 +151,6 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
} }
} }
amdgpu_bo_unreserve(rbo); amdgpu_bo_unreserve(rbo);
mutex_unlock(&vm->mutex);
} }
static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r) static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r)
...@@ -553,7 +546,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, ...@@ -553,7 +546,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
gobj = drm_gem_object_lookup(dev, filp, args->handle); gobj = drm_gem_object_lookup(dev, filp, args->handle);
if (gobj == NULL) if (gobj == NULL)
return -ENOENT; return -ENOENT;
mutex_lock(&fpriv->vm.mutex);
rbo = gem_to_amdgpu_bo(gobj); rbo = gem_to_amdgpu_bo(gobj);
INIT_LIST_HEAD(&list); INIT_LIST_HEAD(&list);
INIT_LIST_HEAD(&duplicates); INIT_LIST_HEAD(&duplicates);
...@@ -568,7 +560,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, ...@@ -568,7 +560,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
} }
r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates); r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
if (r) { if (r) {
mutex_unlock(&fpriv->vm.mutex);
drm_gem_object_unreference_unlocked(gobj); drm_gem_object_unreference_unlocked(gobj);
return r; return r;
} }
...@@ -577,7 +568,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, ...@@ -577,7 +568,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
if (!bo_va) { if (!bo_va) {
ttm_eu_backoff_reservation(&ticket, &list); ttm_eu_backoff_reservation(&ticket, &list);
drm_gem_object_unreference_unlocked(gobj); drm_gem_object_unreference_unlocked(gobj);
mutex_unlock(&fpriv->vm.mutex);
return -ENOENT; return -ENOENT;
} }
...@@ -602,7 +592,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, ...@@ -602,7 +592,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
ttm_eu_backoff_reservation(&ticket, &list); ttm_eu_backoff_reservation(&ticket, &list);
if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE)) if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE))
amdgpu_gem_va_update_vm(adev, bo_va, args->operation); amdgpu_gem_va_update_vm(adev, bo_va, args->operation);
mutex_unlock(&fpriv->vm.mutex);
drm_gem_object_unreference_unlocked(gobj); drm_gem_object_unreference_unlocked(gobj);
return r; return r;
} }
......
...@@ -392,6 +392,9 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, ...@@ -392,6 +392,9 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */ ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
ib->ptr[ib->length_dw++] = handle; ib->ptr[ib->length_dw++] = handle;
if ((ring->adev->vce.fw_version >> 24) >= 52)
ib->ptr[ib->length_dw++] = 0x00000040; /* len */
else
ib->ptr[ib->length_dw++] = 0x00000030; /* len */ ib->ptr[ib->length_dw++] = 0x00000030; /* len */
ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */ ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */
ib->ptr[ib->length_dw++] = 0x00000000; ib->ptr[ib->length_dw++] = 0x00000000;
...@@ -404,6 +407,12 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, ...@@ -404,6 +407,12 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
ib->ptr[ib->length_dw++] = 0x00000100; ib->ptr[ib->length_dw++] = 0x00000100;
ib->ptr[ib->length_dw++] = 0x0000000c; ib->ptr[ib->length_dw++] = 0x0000000c;
ib->ptr[ib->length_dw++] = 0x00000000; ib->ptr[ib->length_dw++] = 0x00000000;
if ((ring->adev->vce.fw_version >> 24) >= 52) {
ib->ptr[ib->length_dw++] = 0x00000000;
ib->ptr[ib->length_dw++] = 0x00000000;
ib->ptr[ib->length_dw++] = 0x00000000;
ib->ptr[ib->length_dw++] = 0x00000000;
}
ib->ptr[ib->length_dw++] = 0x00000014; /* len */ ib->ptr[ib->length_dw++] = 0x00000014; /* len */
ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */ ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
......
...@@ -922,8 +922,9 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, ...@@ -922,8 +922,9 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
bo_va = list_first_entry(&vm->invalidated, bo_va = list_first_entry(&vm->invalidated,
struct amdgpu_bo_va, vm_status); struct amdgpu_bo_va, vm_status);
spin_unlock(&vm->status_lock); spin_unlock(&vm->status_lock);
mutex_lock(&bo_va->mutex);
r = amdgpu_vm_bo_update(adev, bo_va, NULL); r = amdgpu_vm_bo_update(adev, bo_va, NULL);
mutex_unlock(&bo_va->mutex);
if (r) if (r)
return r; return r;
...@@ -967,7 +968,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, ...@@ -967,7 +968,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
INIT_LIST_HEAD(&bo_va->valids); INIT_LIST_HEAD(&bo_va->valids);
INIT_LIST_HEAD(&bo_va->invalids); INIT_LIST_HEAD(&bo_va->invalids);
INIT_LIST_HEAD(&bo_va->vm_status); INIT_LIST_HEAD(&bo_va->vm_status);
mutex_init(&bo_va->mutex);
list_add_tail(&bo_va->bo_list, &bo->va); list_add_tail(&bo_va->bo_list, &bo->va);
return bo_va; return bo_va;
...@@ -1045,7 +1046,9 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, ...@@ -1045,7 +1046,9 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
mapping->offset = offset; mapping->offset = offset;
mapping->flags = flags; mapping->flags = flags;
mutex_lock(&bo_va->mutex);
list_add(&mapping->list, &bo_va->invalids); list_add(&mapping->list, &bo_va->invalids);
mutex_unlock(&bo_va->mutex);
spin_lock(&vm->it_lock); spin_lock(&vm->it_lock);
interval_tree_insert(&mapping->it, &vm->va); interval_tree_insert(&mapping->it, &vm->va);
spin_unlock(&vm->it_lock); spin_unlock(&vm->it_lock);
...@@ -1121,7 +1124,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, ...@@ -1121,7 +1124,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
bool valid = true; bool valid = true;
saddr /= AMDGPU_GPU_PAGE_SIZE; saddr /= AMDGPU_GPU_PAGE_SIZE;
mutex_lock(&bo_va->mutex);
list_for_each_entry(mapping, &bo_va->valids, list) { list_for_each_entry(mapping, &bo_va->valids, list) {
if (mapping->it.start == saddr) if (mapping->it.start == saddr)
break; break;
...@@ -1135,10 +1138,12 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, ...@@ -1135,10 +1138,12 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
break; break;
} }
if (&mapping->list == &bo_va->invalids) if (&mapping->list == &bo_va->invalids) {
mutex_unlock(&bo_va->mutex);
return -ENOENT; return -ENOENT;
} }
}
mutex_unlock(&bo_va->mutex);
list_del(&mapping->list); list_del(&mapping->list);
spin_lock(&vm->it_lock); spin_lock(&vm->it_lock);
interval_tree_remove(&mapping->it, &vm->va); interval_tree_remove(&mapping->it, &vm->va);
...@@ -1190,8 +1195,8 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, ...@@ -1190,8 +1195,8 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
spin_unlock(&vm->it_lock); spin_unlock(&vm->it_lock);
kfree(mapping); kfree(mapping);
} }
fence_put(bo_va->last_pt_update); fence_put(bo_va->last_pt_update);
mutex_destroy(&bo_va->mutex);
kfree(bo_va); kfree(bo_va);
} }
...@@ -1236,7 +1241,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) ...@@ -1236,7 +1241,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
vm->ids[i].id = 0; vm->ids[i].id = 0;
vm->ids[i].flushed_updates = NULL; vm->ids[i].flushed_updates = NULL;
} }
mutex_init(&vm->mutex);
vm->va = RB_ROOT; vm->va = RB_ROOT;
spin_lock_init(&vm->status_lock); spin_lock_init(&vm->status_lock);
INIT_LIST_HEAD(&vm->invalidated); INIT_LIST_HEAD(&vm->invalidated);
...@@ -1320,7 +1324,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) ...@@ -1320,7 +1324,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
fence_put(vm->ids[i].flushed_updates); fence_put(vm->ids[i].flushed_updates);
} }
mutex_destroy(&vm->mutex);
} }
/** /**
......
...@@ -40,6 +40,9 @@ ...@@ -40,6 +40,9 @@
#define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT 0x04 #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT 0x04
#define GRBM_GFX_INDEX__VCE_INSTANCE_MASK 0x10 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK 0x10
#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0 0x8616
#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1 0x8617
#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2 0x8618
#define VCE_V3_0_FW_SIZE (384 * 1024) #define VCE_V3_0_FW_SIZE (384 * 1024)
#define VCE_V3_0_STACK_SIZE (64 * 1024) #define VCE_V3_0_STACK_SIZE (64 * 1024)
...@@ -130,7 +133,9 @@ static int vce_v3_0_start(struct amdgpu_device *adev) ...@@ -130,7 +133,9 @@ static int vce_v3_0_start(struct amdgpu_device *adev)
/* set BUSY flag */ /* set BUSY flag */
WREG32_P(mmVCE_STATUS, 1, ~1); WREG32_P(mmVCE_STATUS, 1, ~1);
if (adev->asic_type >= CHIP_STONEY)
WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
else
WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK, WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK,
~VCE_VCPU_CNTL__CLK_EN_MASK); ~VCE_VCPU_CNTL__CLK_EN_MASK);
...@@ -391,7 +396,11 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx) ...@@ -391,7 +396,11 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
WREG32(mmVCE_LMI_SWAP_CNTL, 0); WREG32(mmVCE_LMI_SWAP_CNTL, 0);
WREG32(mmVCE_LMI_SWAP_CNTL1, 0); WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
WREG32(mmVCE_LMI_VM_CTRL, 0); WREG32(mmVCE_LMI_VM_CTRL, 0);
if (adev->asic_type >= CHIP_STONEY) {
WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
} else
WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8)); WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
offset = AMDGPU_VCE_FIRMWARE_OFFSET; offset = AMDGPU_VCE_FIRMWARE_OFFSET;
size = VCE_V3_0_FW_SIZE; size = VCE_V3_0_FW_SIZE;
...@@ -576,6 +585,11 @@ static int vce_v3_0_process_interrupt(struct amdgpu_device *adev, ...@@ -576,6 +585,11 @@ static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry) struct amdgpu_iv_entry *entry)
{ {
DRM_DEBUG("IH: VCE\n"); DRM_DEBUG("IH: VCE\n");
WREG32_P(mmVCE_SYS_INT_STATUS,
VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
switch (entry->src_data) { switch (entry->src_data) {
case 0: case 0:
amdgpu_fence_process(&adev->vce.ring[0]); amdgpu_fence_process(&adev->vce.ring[0]);
......
...@@ -30,8 +30,7 @@ ...@@ -30,8 +30,7 @@
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include "gpu_sched_trace.h" #include "gpu_sched_trace.h"
static struct amd_sched_job * static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity);
amd_sched_entity_pop_job(struct amd_sched_entity *entity);
static void amd_sched_wakeup(struct amd_gpu_scheduler *sched); static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
struct kmem_cache *sched_fence_slab; struct kmem_cache *sched_fence_slab;
...@@ -64,36 +63,36 @@ static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq, ...@@ -64,36 +63,36 @@ static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq,
} }
/** /**
* Select next job from a specified run queue with round robin policy. * Select an entity which could provide a job to run
* Return NULL if nothing available. *
* @rq The run queue to check.
*
* Try to find a ready entity, returns NULL if none found.
*/ */
static struct amd_sched_job * static struct amd_sched_entity *
amd_sched_rq_select_job(struct amd_sched_rq *rq) amd_sched_rq_select_entity(struct amd_sched_rq *rq)
{ {
struct amd_sched_entity *entity; struct amd_sched_entity *entity;
struct amd_sched_job *sched_job;
spin_lock(&rq->lock); spin_lock(&rq->lock);
entity = rq->current_entity; entity = rq->current_entity;
if (entity) { if (entity) {
list_for_each_entry_continue(entity, &rq->entities, list) { list_for_each_entry_continue(entity, &rq->entities, list) {
sched_job = amd_sched_entity_pop_job(entity); if (amd_sched_entity_is_ready(entity)) {
if (sched_job) {
rq->current_entity = entity; rq->current_entity = entity;
spin_unlock(&rq->lock); spin_unlock(&rq->lock);
return sched_job; return entity;
} }
} }
} }
list_for_each_entry(entity, &rq->entities, list) { list_for_each_entry(entity, &rq->entities, list) {
sched_job = amd_sched_entity_pop_job(entity); if (amd_sched_entity_is_ready(entity)) {
if (sched_job) {
rq->current_entity = entity; rq->current_entity = entity;
spin_unlock(&rq->lock); spin_unlock(&rq->lock);
return sched_job; return entity;
} }
if (entity == rq->current_entity) if (entity == rq->current_entity)
...@@ -176,6 +175,24 @@ static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity) ...@@ -176,6 +175,24 @@ static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity)
return false; return false;
} }
/**
* Check if entity is ready
*
* @entity The pointer to a valid scheduler entity
*
* Return true if entity could provide a job.
*/
static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity)
{
if (kfifo_is_empty(&entity->job_queue))
return false;
if (ACCESS_ONCE(entity->dependency))
return false;
return true;
}
/** /**
* Destroy a context entity * Destroy a context entity
* *
...@@ -211,32 +228,53 @@ static void amd_sched_entity_wakeup(struct fence *f, struct fence_cb *cb) ...@@ -211,32 +228,53 @@ static void amd_sched_entity_wakeup(struct fence *f, struct fence_cb *cb)
amd_sched_wakeup(entity->sched); amd_sched_wakeup(entity->sched);
} }
static struct amd_sched_job * static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
amd_sched_entity_pop_job(struct amd_sched_entity *entity)
{ {
struct amd_gpu_scheduler *sched = entity->sched; struct amd_gpu_scheduler *sched = entity->sched;
struct amd_sched_job *sched_job; struct fence * fence = entity->dependency;
struct amd_sched_fence *s_fence;
if (ACCESS_ONCE(entity->dependency))
return NULL;
if (!kfifo_out_peek(&entity->job_queue, &sched_job, sizeof(sched_job)))
return NULL;
while ((entity->dependency = sched->ops->dependency(sched_job))) {
if (entity->dependency->context == entity->fence_context) { if (fence->context == entity->fence_context) {
/* We can ignore fences from ourself */ /* We can ignore fences from ourself */
fence_put(entity->dependency); fence_put(entity->dependency);
continue; return false;
} }
if (fence_add_callback(entity->dependency, &entity->cb, s_fence = to_amd_sched_fence(fence);
if (s_fence && s_fence->sched == sched) {
/* Fence is from the same scheduler */
if (test_bit(AMD_SCHED_FENCE_SCHEDULED_BIT, &fence->flags)) {
/* Ignore it when it is already scheduled */
fence_put(entity->dependency);
return false;
}
/* Wait for fence to be scheduled */
entity->cb.func = amd_sched_entity_wakeup;
list_add_tail(&entity->cb.node, &s_fence->scheduled_cb);
return true;
}
if (!fence_add_callback(entity->dependency, &entity->cb,
amd_sched_entity_wakeup)) amd_sched_entity_wakeup))
return true;
fence_put(entity->dependency); fence_put(entity->dependency);
else return false;
}
static struct amd_sched_job *
amd_sched_entity_pop_job(struct amd_sched_entity *entity)
{
struct amd_gpu_scheduler *sched = entity->sched;
struct amd_sched_job *sched_job;
if (!kfifo_out_peek(&entity->job_queue, &sched_job, sizeof(sched_job)))
return NULL;
while ((entity->dependency = sched->ops->dependency(sched_job)))
if (amd_sched_entity_add_dependency_cb(entity))
return NULL; return NULL;
}
return sched_job; return sched_job;
} }
...@@ -304,22 +342,22 @@ static void amd_sched_wakeup(struct amd_gpu_scheduler *sched) ...@@ -304,22 +342,22 @@ static void amd_sched_wakeup(struct amd_gpu_scheduler *sched)
} }
/** /**
* Select next to run * Select next entity to process
*/ */
static struct amd_sched_job * static struct amd_sched_entity *
amd_sched_select_job(struct amd_gpu_scheduler *sched) amd_sched_select_entity(struct amd_gpu_scheduler *sched)
{ {
struct amd_sched_job *sched_job; struct amd_sched_entity *entity;
if (!amd_sched_ready(sched)) if (!amd_sched_ready(sched))
return NULL; return NULL;
/* Kernel run queue has higher priority than normal run queue*/ /* Kernel run queue has higher priority than normal run queue*/
sched_job = amd_sched_rq_select_job(&sched->kernel_rq); entity = amd_sched_rq_select_entity(&sched->kernel_rq);
if (sched_job == NULL) if (entity == NULL)
sched_job = amd_sched_rq_select_job(&sched->sched_rq); entity = amd_sched_rq_select_entity(&sched->sched_rq);
return sched_job; return entity;
} }
static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
...@@ -381,13 +419,16 @@ static int amd_sched_main(void *param) ...@@ -381,13 +419,16 @@ static int amd_sched_main(void *param)
unsigned long flags; unsigned long flags;
wait_event_interruptible(sched->wake_up_worker, wait_event_interruptible(sched->wake_up_worker,
kthread_should_stop() || (entity = amd_sched_select_entity(sched)) ||
(sched_job = amd_sched_select_job(sched))); kthread_should_stop());
if (!entity)
continue;
sched_job = amd_sched_entity_pop_job(entity);
if (!sched_job) if (!sched_job)
continue; continue;
entity = sched_job->s_entity;
s_fence = sched_job->s_fence; s_fence = sched_job->s_fence;
if (sched->timeout != MAX_SCHEDULE_TIMEOUT) { if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
...@@ -400,6 +441,7 @@ static int amd_sched_main(void *param) ...@@ -400,6 +441,7 @@ static int amd_sched_main(void *param)
atomic_inc(&sched->hw_rq_count); atomic_inc(&sched->hw_rq_count);
fence = sched->ops->run_job(sched_job); fence = sched->ops->run_job(sched_job);
amd_sched_fence_scheduled(s_fence);
if (fence) { if (fence) {
r = fence_add_callback(fence, &s_fence->cb, r = fence_add_callback(fence, &s_fence->cb,
amd_sched_process_job); amd_sched_process_job);
......
...@@ -27,6 +27,8 @@ ...@@ -27,6 +27,8 @@
#include <linux/kfifo.h> #include <linux/kfifo.h>
#include <linux/fence.h> #include <linux/fence.h>
#define AMD_SCHED_FENCE_SCHEDULED_BIT FENCE_FLAG_USER_BITS
struct amd_gpu_scheduler; struct amd_gpu_scheduler;
struct amd_sched_rq; struct amd_sched_rq;
...@@ -68,6 +70,7 @@ struct amd_sched_rq { ...@@ -68,6 +70,7 @@ struct amd_sched_rq {
struct amd_sched_fence { struct amd_sched_fence {
struct fence base; struct fence base;
struct fence_cb cb; struct fence_cb cb;
struct list_head scheduled_cb;
struct amd_gpu_scheduler *sched; struct amd_gpu_scheduler *sched;
spinlock_t lock; spinlock_t lock;
void *owner; void *owner;
...@@ -134,7 +137,7 @@ void amd_sched_entity_push_job(struct amd_sched_job *sched_job); ...@@ -134,7 +137,7 @@ void amd_sched_entity_push_job(struct amd_sched_job *sched_job);
struct amd_sched_fence *amd_sched_fence_create( struct amd_sched_fence *amd_sched_fence_create(
struct amd_sched_entity *s_entity, void *owner); struct amd_sched_entity *s_entity, void *owner);
void amd_sched_fence_scheduled(struct amd_sched_fence *fence);
void amd_sched_fence_signal(struct amd_sched_fence *fence); void amd_sched_fence_signal(struct amd_sched_fence *fence);
#endif #endif
...@@ -35,6 +35,8 @@ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity ...@@ -35,6 +35,8 @@ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity
fence = kmem_cache_zalloc(sched_fence_slab, GFP_KERNEL); fence = kmem_cache_zalloc(sched_fence_slab, GFP_KERNEL);
if (fence == NULL) if (fence == NULL)
return NULL; return NULL;
INIT_LIST_HEAD(&fence->scheduled_cb);
fence->owner = owner; fence->owner = owner;
fence->sched = s_entity->sched; fence->sched = s_entity->sched;
spin_lock_init(&fence->lock); spin_lock_init(&fence->lock);
...@@ -55,6 +57,17 @@ void amd_sched_fence_signal(struct amd_sched_fence *fence) ...@@ -55,6 +57,17 @@ void amd_sched_fence_signal(struct amd_sched_fence *fence)
FENCE_TRACE(&fence->base, "was already signaled\n"); FENCE_TRACE(&fence->base, "was already signaled\n");
} }
void amd_sched_fence_scheduled(struct amd_sched_fence *s_fence)
{
struct fence_cb *cur, *tmp;
set_bit(AMD_SCHED_FENCE_SCHEDULED_BIT, &s_fence->base.flags);
list_for_each_entry_safe(cur, tmp, &s_fence->scheduled_cb, node) {
list_del_init(&cur->node);
cur->func(&s_fence->base, cur);
}
}
static const char *amd_sched_fence_get_driver_name(struct fence *fence) static const char *amd_sched_fence_get_driver_name(struct fence *fence)
{ {
return "amd_sched"; return "amd_sched";
......
...@@ -464,7 +464,7 @@ void rv730_stop_dpm(struct radeon_device *rdev) ...@@ -464,7 +464,7 @@ void rv730_stop_dpm(struct radeon_device *rdev)
result = rv770_send_msg_to_smc(rdev, PPSMC_MSG_TwoLevelsDisabled); result = rv770_send_msg_to_smc(rdev, PPSMC_MSG_TwoLevelsDisabled);
if (result != PPSMC_Result_OK) if (result != PPSMC_Result_OK)
DRM_ERROR("Could not force DPM to low\n"); DRM_DEBUG("Could not force DPM to low\n");
WREG32_P(GENERAL_PWRMGT, 0, ~GLOBAL_PWRMGT_EN); WREG32_P(GENERAL_PWRMGT, 0, ~GLOBAL_PWRMGT_EN);
......
...@@ -193,7 +193,7 @@ void rv770_stop_dpm(struct radeon_device *rdev) ...@@ -193,7 +193,7 @@ void rv770_stop_dpm(struct radeon_device *rdev)
result = rv770_send_msg_to_smc(rdev, PPSMC_MSG_TwoLevelsDisabled); result = rv770_send_msg_to_smc(rdev, PPSMC_MSG_TwoLevelsDisabled);
if (result != PPSMC_Result_OK) if (result != PPSMC_Result_OK)
DRM_ERROR("Could not force DPM to low.\n"); DRM_DEBUG("Could not force DPM to low.\n");
WREG32_P(GENERAL_PWRMGT, 0, ~GLOBAL_PWRMGT_EN); WREG32_P(GENERAL_PWRMGT, 0, ~GLOBAL_PWRMGT_EN);
...@@ -1418,7 +1418,7 @@ int rv770_resume_smc(struct radeon_device *rdev) ...@@ -1418,7 +1418,7 @@ int rv770_resume_smc(struct radeon_device *rdev)
int rv770_set_sw_state(struct radeon_device *rdev) int rv770_set_sw_state(struct radeon_device *rdev)
{ {
if (rv770_send_msg_to_smc(rdev, PPSMC_MSG_SwitchToSwState) != PPSMC_Result_OK) if (rv770_send_msg_to_smc(rdev, PPSMC_MSG_SwitchToSwState) != PPSMC_Result_OK)
return -EINVAL; DRM_DEBUG("rv770_set_sw_state failed\n");
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment