Commit 6ba59f3b authored by Dave Airlie's avatar Dave Airlie

Merge branch 'for-airlied-next' of git://people.freedesktop.org/~mlankhorst/linux into drm-next

Merge the move to generic fences for TTM using drivers.

* 'for-airlied-next' of git://people.freedesktop.org/~mlankhorst/linux:
  drm/nouveau: use shared fences for readable objects
  drm/nouveau: Keep only a single list for validation.
  drm/ttm: use rcu in core ttm
  drm/vmwgfx: use rcu in vmw_user_dmabuf_synccpu_grab
  drm/radeon: use rcu waits in some ioctls
  drm/nouveau: use rcu in nouveau_gem_ioctl_cpu_prep
  drm/ttm: flip the switch, and convert to dma_fence
  drm/qxl: rework to new fence interface
  drm/nouveau: rework to new fence interface
  drm/vmwgfx: rework to new fence interface, v2
  drm/vmwgfx: get rid of different types of fence_flags entirely
  drm/radeon: use common fence implementation for fences, v4
  drm/ttm: kill off some members to ttm_validate_buffer
  drm/ttm: add interruptible parameter to ttm_eu_reserve_buffers
  drm/ttm: kill fence_lock
  drm/ttm: call ttm_bo_wait while inside a reservation
  drm/nouveau: require reservations for nouveau_fence_sync and nouveau_bo_fence
  drm/nouveau: add reservation to nouveau_gem_ioctl_cpu_prep
parents a18b29f0 809e9447
......@@ -88,13 +88,13 @@ nv10_bo_get_tile_region(struct drm_device *dev, int i)
static void
nv10_bo_put_tile_region(struct drm_device *dev, struct nouveau_drm_tile *tile,
struct nouveau_fence *fence)
struct fence *fence)
{
struct nouveau_drm *drm = nouveau_drm(dev);
if (tile) {
spin_lock(&drm->tile.lock);
tile->fence = nouveau_fence_ref(fence);
tile->fence = (struct nouveau_fence *)fence_get(fence);
tile->used = false;
spin_unlock(&drm->tile.lock);
}
......@@ -970,13 +970,14 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
}
mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING);
ret = nouveau_fence_sync(bo->sync_obj, chan);
ret = nouveau_fence_sync(nouveau_bo(bo), chan, true);
if (ret == 0) {
ret = drm->ttm.move(chan, bo, &bo->mem, new_mem);
if (ret == 0) {
ret = nouveau_fence_new(chan, false, &fence);
if (ret == 0) {
ret = ttm_bo_move_accel_cleanup(bo, fence,
ret = ttm_bo_move_accel_cleanup(bo,
&fence->base,
evict,
no_wait_gpu,
new_mem);
......@@ -1167,8 +1168,9 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
{
struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
struct drm_device *dev = drm->dev;
struct fence *fence = reservation_object_get_excl(bo->resv);
nv10_bo_put_tile_region(dev, *old_tile, bo->sync_obj);
nv10_bo_put_tile_region(dev, *old_tile, fence);
*old_tile = new_tile;
}
......@@ -1212,9 +1214,7 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
}
/* Fallback to software copy. */
spin_lock(&bo->bdev->fence_lock);
ret = ttm_bo_wait(bo, true, intr, no_wait_gpu);
spin_unlock(&bo->bdev->fence_lock);
if (ret == 0)
ret = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
......@@ -1458,47 +1458,14 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm)
}
void
nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence)
nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence, bool exclusive)
{
struct nouveau_fence *new_fence = nouveau_fence_ref(fence);
struct nouveau_fence *old_fence = NULL;
struct reservation_object *resv = nvbo->bo.resv;
spin_lock(&nvbo->bo.bdev->fence_lock);
old_fence = nvbo->bo.sync_obj;
nvbo->bo.sync_obj = new_fence;
spin_unlock(&nvbo->bo.bdev->fence_lock);
nouveau_fence_unref(&old_fence);
}
static void
nouveau_bo_fence_unref(void **sync_obj)
{
nouveau_fence_unref((struct nouveau_fence **)sync_obj);
}
static void *
nouveau_bo_fence_ref(void *sync_obj)
{
return nouveau_fence_ref(sync_obj);
}
static bool
nouveau_bo_fence_signalled(void *sync_obj)
{
return nouveau_fence_done(sync_obj);
}
static int
nouveau_bo_fence_wait(void *sync_obj, bool lazy, bool intr)
{
return nouveau_fence_wait(sync_obj, lazy, intr);
}
static int
nouveau_bo_fence_flush(void *sync_obj)
{
return 0;
if (exclusive)
reservation_object_add_excl_fence(resv, &fence->base);
else if (fence)
reservation_object_add_shared_fence(resv, &fence->base);
}
struct ttm_bo_driver nouveau_bo_driver = {
......@@ -1511,11 +1478,6 @@ struct ttm_bo_driver nouveau_bo_driver = {
.move_notify = nouveau_bo_move_ntfy,
.move = nouveau_bo_move,
.verify_access = nouveau_bo_verify_access,
.sync_obj_signaled = nouveau_bo_fence_signalled,
.sync_obj_wait = nouveau_bo_fence_wait,
.sync_obj_flush = nouveau_bo_fence_flush,
.sync_obj_unref = nouveau_bo_fence_unref,
.sync_obj_ref = nouveau_bo_fence_ref,
.fault_reserve_notify = &nouveau_ttm_fault_reserve_notify,
.io_mem_reserve = &nouveau_ttm_io_mem_reserve,
.io_mem_free = &nouveau_ttm_io_mem_free,
......
......@@ -78,7 +78,7 @@ u16 nouveau_bo_rd16(struct nouveau_bo *, unsigned index);
void nouveau_bo_wr16(struct nouveau_bo *, unsigned index, u16 val);
u32 nouveau_bo_rd32(struct nouveau_bo *, unsigned index);
void nouveau_bo_wr32(struct nouveau_bo *, unsigned index, u32 val);
void nouveau_bo_fence(struct nouveau_bo *, struct nouveau_fence *);
void nouveau_bo_fence(struct nouveau_bo *, struct nouveau_fence *, bool exclusive);
int nouveau_bo_validate(struct nouveau_bo *, bool interruptible,
bool no_wait_gpu);
......
......@@ -658,7 +658,7 @@ nouveau_page_flip_emit(struct nouveau_channel *chan,
spin_unlock_irqrestore(&dev->event_lock, flags);
/* Synchronize with the old framebuffer */
ret = nouveau_fence_sync(old_bo->bo.sync_obj, chan);
ret = nouveau_fence_sync(old_bo, chan, false);
if (ret)
goto fail;
......@@ -717,19 +717,24 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
}
mutex_lock(&cli->mutex);
/* synchronise rendering channel with the kernel's channel */
spin_lock(&new_bo->bo.bdev->fence_lock);
fence = nouveau_fence_ref(new_bo->bo.sync_obj);
spin_unlock(&new_bo->bo.bdev->fence_lock);
ret = nouveau_fence_sync(fence, chan);
nouveau_fence_unref(&fence);
ret = ttm_bo_reserve(&new_bo->bo, true, false, false, NULL);
if (ret)
goto fail_unpin;
ret = ttm_bo_reserve(&old_bo->bo, true, false, false, NULL);
if (ret)
/* synchronise rendering channel with the kernel's channel */
ret = nouveau_fence_sync(new_bo, chan, false);
if (ret) {
ttm_bo_unreserve(&new_bo->bo);
goto fail_unpin;
}
if (new_bo != old_bo) {
ttm_bo_unreserve(&new_bo->bo);
ret = ttm_bo_reserve(&old_bo->bo, true, false, false, NULL);
if (ret)
goto fail_unpin;
}
/* Initialize a page flip struct */
*s = (struct nouveau_page_flip_state)
......@@ -775,7 +780,7 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
/* Update the crtc struct and cleanup */
crtc->primary->fb = fb;
nouveau_bo_fence(old_bo, fence);
nouveau_bo_fence(old_bo, fence, false);
ttm_bo_unreserve(&old_bo->bo);
if (old_bo != new_bo)
nouveau_bo_unpin(old_bo);
......
This diff is collapsed.
#ifndef __NOUVEAU_FENCE_H__
#define __NOUVEAU_FENCE_H__
#include <linux/fence.h>
#include <nvif/notify.h>
struct nouveau_drm;
struct nouveau_bo;
struct nouveau_fence {
struct fence base;
struct list_head head;
struct list_head work;
struct kref kref;
bool sysmem;
struct nouveau_channel *channel;
unsigned long timeout;
u32 sequence;
};
int nouveau_fence_new(struct nouveau_channel *, bool sysmem,
struct nouveau_fence **);
struct nouveau_fence *
nouveau_fence_ref(struct nouveau_fence *);
void nouveau_fence_unref(struct nouveau_fence **);
int nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *);
bool nouveau_fence_done(struct nouveau_fence *);
void nouveau_fence_work(struct nouveau_fence *, void (*)(void *), void *);
void nouveau_fence_work(struct fence *, void (*)(void *), void *);
int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr);
int nouveau_fence_sync(struct nouveau_fence *, struct nouveau_channel *);
int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive);
struct nouveau_fence_chan {
spinlock_t lock;
struct list_head pending;
struct list_head flip;
......@@ -38,8 +40,12 @@ struct nouveau_fence_chan {
int (*emit32)(struct nouveau_channel *, u64, u32);
int (*sync32)(struct nouveau_channel *, u64, u32);
spinlock_t lock;
u32 sequence;
u32 context;
char name[24];
struct nvif_notify notify;
int notify_ref;
};
struct nouveau_fence_priv {
......@@ -49,13 +55,13 @@ struct nouveau_fence_priv {
int (*context_new)(struct nouveau_channel *);
void (*context_del)(struct nouveau_channel *);
wait_queue_head_t waiting;
u32 contexts, context_base;
bool uevent;
};
#define nouveau_fence(drm) ((struct nouveau_fence_priv *)(drm)->fence)
void nouveau_fence_context_new(struct nouveau_fence_chan *);
void nouveau_fence_context_new(struct nouveau_channel *, struct nouveau_fence_chan *);
void nouveau_fence_context_del(struct nouveau_fence_chan *);
int nv04_fence_create(struct nouveau_drm *);
......
......@@ -98,17 +98,23 @@ static void
nouveau_gem_object_unmap(struct nouveau_bo *nvbo, struct nouveau_vma *vma)
{
const bool mapped = nvbo->bo.mem.mem_type != TTM_PL_SYSTEM;
struct nouveau_fence *fence = NULL;
struct reservation_object *resv = nvbo->bo.resv;
struct reservation_object_list *fobj;
struct fence *fence = NULL;
fobj = reservation_object_get_list(resv);
list_del(&vma->head);
if (mapped) {
spin_lock(&nvbo->bo.bdev->fence_lock);
fence = nouveau_fence_ref(nvbo->bo.sync_obj);
spin_unlock(&nvbo->bo.bdev->fence_lock);
}
if (fobj && fobj->shared_count > 1)
ttm_bo_wait(&nvbo->bo, true, false, false);
else if (fobj && fobj->shared_count == 1)
fence = rcu_dereference_protected(fobj->shared[0],
reservation_object_held(resv));
else
fence = reservation_object_get_excl(nvbo->bo.resv);
if (fence) {
if (fence && mapped) {
nouveau_fence_work(fence, nouveau_gem_object_delete, vma);
} else {
if (mapped)
......@@ -116,7 +122,6 @@ nouveau_gem_object_unmap(struct nouveau_bo *nvbo, struct nouveau_vma *vma)
nouveau_vm_put(vma);
kfree(vma);
}
nouveau_fence_unref(&fence);
}
void
......@@ -288,24 +293,23 @@ nouveau_gem_set_domain(struct drm_gem_object *gem, uint32_t read_domains,
}
struct validate_op {
struct list_head vram_list;
struct list_head gart_list;
struct list_head both_list;
struct list_head list;
struct ww_acquire_ctx ticket;
};
static void
validate_fini_list(struct list_head *list, struct nouveau_fence *fence,
struct ww_acquire_ctx *ticket)
validate_fini_no_ticket(struct validate_op *op, struct nouveau_fence *fence,
struct drm_nouveau_gem_pushbuf_bo *pbbo)
{
struct list_head *entry, *tmp;
struct nouveau_bo *nvbo;
struct drm_nouveau_gem_pushbuf_bo *b;
list_for_each_safe(entry, tmp, list) {
nvbo = list_entry(entry, struct nouveau_bo, entry);
while (!list_empty(&op->list)) {
nvbo = list_entry(op->list.next, struct nouveau_bo, entry);
b = &pbbo[nvbo->pbbo_index];
if (likely(fence))
nouveau_bo_fence(nvbo, fence);
nouveau_bo_fence(nvbo, fence, !!b->write_domains);
if (unlikely(nvbo->validate_mapped)) {
ttm_bo_kunmap(&nvbo->kmap);
......@@ -314,23 +318,16 @@ validate_fini_list(struct list_head *list, struct nouveau_fence *fence,
list_del(&nvbo->entry);
nvbo->reserved_by = NULL;
ttm_bo_unreserve_ticket(&nvbo->bo, ticket);
ttm_bo_unreserve_ticket(&nvbo->bo, &op->ticket);
drm_gem_object_unreference_unlocked(&nvbo->gem);
}
}
static void
validate_fini_no_ticket(struct validate_op *op, struct nouveau_fence *fence)
validate_fini(struct validate_op *op, struct nouveau_fence *fence,
struct drm_nouveau_gem_pushbuf_bo *pbbo)
{
validate_fini_list(&op->vram_list, fence, &op->ticket);
validate_fini_list(&op->gart_list, fence, &op->ticket);
validate_fini_list(&op->both_list, fence, &op->ticket);
}
static void
validate_fini(struct validate_op *op, struct nouveau_fence *fence)
{
validate_fini_no_ticket(op, fence);
validate_fini_no_ticket(op, fence, pbbo);
ww_acquire_fini(&op->ticket);
}
......@@ -344,6 +341,9 @@ validate_init(struct nouveau_channel *chan, struct drm_file *file_priv,
int trycnt = 0;
int ret, i;
struct nouveau_bo *res_bo = NULL;
LIST_HEAD(gart_list);
LIST_HEAD(vram_list);
LIST_HEAD(both_list);
ww_acquire_init(&op->ticket, &reservation_ww_class);
retry:
......@@ -360,9 +360,8 @@ validate_init(struct nouveau_channel *chan, struct drm_file *file_priv,
gem = drm_gem_object_lookup(dev, file_priv, b->handle);
if (!gem) {
NV_PRINTK(error, cli, "Unknown handle 0x%08x\n", b->handle);
ww_acquire_done(&op->ticket);
validate_fini(op, NULL);
return -ENOENT;
ret = -ENOENT;
break;
}
nvbo = nouveau_gem_object(gem);
if (nvbo == res_bo) {
......@@ -375,14 +374,16 @@ validate_init(struct nouveau_channel *chan, struct drm_file *file_priv,
NV_PRINTK(error, cli, "multiple instances of buffer %d on "
"validation list\n", b->handle);
drm_gem_object_unreference_unlocked(gem);
ww_acquire_done(&op->ticket);
validate_fini(op, NULL);
return -EINVAL;
ret = -EINVAL;
break;
}
ret = ttm_bo_reserve(&nvbo->bo, true, false, true, &op->ticket);
if (ret) {
validate_fini_no_ticket(op, NULL);
list_splice_tail_init(&vram_list, &op->list);
list_splice_tail_init(&gart_list, &op->list);
list_splice_tail_init(&both_list, &op->list);
validate_fini_no_ticket(op, NULL, NULL);
if (unlikely(ret == -EDEADLK)) {
ret = ttm_bo_reserve_slowpath(&nvbo->bo, true,
&op->ticket);
......@@ -390,12 +391,9 @@ validate_init(struct nouveau_channel *chan, struct drm_file *file_priv,
res_bo = nvbo;
}
if (unlikely(ret)) {
ww_acquire_done(&op->ticket);
ww_acquire_fini(&op->ticket);
drm_gem_object_unreference_unlocked(gem);
if (ret != -ERESTARTSYS)
NV_PRINTK(error, cli, "fail reserve\n");
return ret;
break;
}
}
......@@ -404,45 +402,32 @@ validate_init(struct nouveau_channel *chan, struct drm_file *file_priv,
nvbo->pbbo_index = i;
if ((b->valid_domains & NOUVEAU_GEM_DOMAIN_VRAM) &&
(b->valid_domains & NOUVEAU_GEM_DOMAIN_GART))
list_add_tail(&nvbo->entry, &op->both_list);
list_add_tail(&nvbo->entry, &both_list);
else
if (b->valid_domains & NOUVEAU_GEM_DOMAIN_VRAM)
list_add_tail(&nvbo->entry, &op->vram_list);
list_add_tail(&nvbo->entry, &vram_list);
else
if (b->valid_domains & NOUVEAU_GEM_DOMAIN_GART)
list_add_tail(&nvbo->entry, &op->gart_list);
list_add_tail(&nvbo->entry, &gart_list);
else {
NV_PRINTK(error, cli, "invalid valid domains: 0x%08x\n",
b->valid_domains);
list_add_tail(&nvbo->entry, &op->both_list);
ww_acquire_done(&op->ticket);
validate_fini(op, NULL);
return -EINVAL;
list_add_tail(&nvbo->entry, &both_list);
ret = -EINVAL;
break;
}
if (nvbo == res_bo)
goto retry;
}
ww_acquire_done(&op->ticket);
return 0;
}
static int
validate_sync(struct nouveau_channel *chan, struct nouveau_bo *nvbo)
{
struct nouveau_fence *fence = NULL;
int ret = 0;
spin_lock(&nvbo->bo.bdev->fence_lock);
fence = nouveau_fence_ref(nvbo->bo.sync_obj);
spin_unlock(&nvbo->bo.bdev->fence_lock);
if (fence) {
ret = nouveau_fence_sync(fence, chan);
nouveau_fence_unref(&fence);
}
list_splice_tail(&vram_list, &op->list);
list_splice_tail(&gart_list, &op->list);
list_splice_tail(&both_list, &op->list);
if (ret)
validate_fini(op, NULL, NULL);
return ret;
}
static int
......@@ -474,9 +459,10 @@ validate_list(struct nouveau_channel *chan, struct nouveau_cli *cli,
return ret;
}
ret = validate_sync(chan, nvbo);
ret = nouveau_fence_sync(nvbo, chan, !!b->write_domains);
if (unlikely(ret)) {
NV_PRINTK(error, cli, "fail post-validate sync\n");
if (ret != -ERESTARTSYS)
NV_PRINTK(error, cli, "fail post-validate sync\n");
return ret;
}
......@@ -513,11 +499,9 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
struct validate_op *op, int *apply_relocs)
{
struct nouveau_cli *cli = nouveau_cli(file_priv);
int ret, relocs = 0;
int ret;
INIT_LIST_HEAD(&op->vram_list);
INIT_LIST_HEAD(&op->gart_list);
INIT_LIST_HEAD(&op->both_list);
INIT_LIST_HEAD(&op->list);
if (nr_buffers == 0)
return 0;
......@@ -529,34 +513,14 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
return ret;
}
ret = validate_list(chan, cli, &op->vram_list, pbbo, user_buffers);
if (unlikely(ret < 0)) {
if (ret != -ERESTARTSYS)
NV_PRINTK(error, cli, "validate vram_list\n");
validate_fini(op, NULL);
return ret;
}
relocs += ret;
ret = validate_list(chan, cli, &op->gart_list, pbbo, user_buffers);
if (unlikely(ret < 0)) {
if (ret != -ERESTARTSYS)
NV_PRINTK(error, cli, "validate gart_list\n");
validate_fini(op, NULL);
return ret;
}
relocs += ret;
ret = validate_list(chan, cli, &op->both_list, pbbo, user_buffers);
ret = validate_list(chan, cli, &op->list, pbbo, user_buffers);
if (unlikely(ret < 0)) {
if (ret != -ERESTARTSYS)
NV_PRINTK(error, cli, "validate both_list\n");
validate_fini(op, NULL);
NV_PRINTK(error, cli, "validating bo list\n");
validate_fini(op, NULL, NULL);
return ret;
}
relocs += ret;
*apply_relocs = relocs;
*apply_relocs = ret;
return 0;
}
......@@ -659,9 +623,7 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli,
data |= r->vor;
}
spin_lock(&nvbo->bo.bdev->fence_lock);
ret = ttm_bo_wait(&nvbo->bo, false, false, false);
spin_unlock(&nvbo->bo.bdev->fence_lock);
ret = ttm_bo_wait(&nvbo->bo, true, false, false);
if (ret) {
NV_PRINTK(error, cli, "reloc wait_idle failed: %d\n", ret);
break;
......@@ -839,7 +801,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
}
out:
validate_fini(&op, fence);
validate_fini(&op, fence, bo);
nouveau_fence_unref(&fence);
out_prevalid:
......@@ -884,17 +846,29 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
struct drm_gem_object *gem;
struct nouveau_bo *nvbo;
bool no_wait = !!(req->flags & NOUVEAU_GEM_CPU_PREP_NOWAIT);
int ret = -EINVAL;
bool write = !!(req->flags & NOUVEAU_GEM_CPU_PREP_WRITE);
int ret;
gem = drm_gem_object_lookup(dev, file_priv, req->handle);
if (!gem)
return -ENOENT;
nvbo = nouveau_gem_object(gem);
spin_lock(&nvbo->bo.bdev->fence_lock);
ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait);
spin_unlock(&nvbo->bo.bdev->fence_lock);
if (no_wait)
ret = reservation_object_test_signaled_rcu(nvbo->bo.resv, write) ? 0 : -EBUSY;
else {
long lret;
lret = reservation_object_wait_timeout_rcu(nvbo->bo.resv, write, true, 30 * HZ);
if (!lret)
ret = -EBUSY;
else if (lret > 0)
ret = 0;
else
ret = lret;
}
drm_gem_object_unreference_unlocked(gem);
return ret;
}
......
......@@ -41,7 +41,7 @@ nv04_fence_emit(struct nouveau_fence *fence)
int ret = RING_SPACE(chan, 2);
if (ret == 0) {
BEGIN_NV04(chan, NvSubSw, 0x0150, 1);
OUT_RING (chan, fence->sequence);
OUT_RING (chan, fence->base.seqno);
FIRE_RING (chan);
}
return ret;
......@@ -75,7 +75,7 @@ nv04_fence_context_new(struct nouveau_channel *chan)
{
struct nv04_fence_chan *fctx = kzalloc(sizeof(*fctx), GFP_KERNEL);
if (fctx) {
nouveau_fence_context_new(&fctx->base);
nouveau_fence_context_new(chan, &fctx->base);
fctx->base.emit = nv04_fence_emit;
fctx->base.sync = nv04_fence_sync;
fctx->base.read = nv04_fence_read;
......@@ -105,5 +105,7 @@ nv04_fence_create(struct nouveau_drm *drm)
priv->base.dtor = nv04_fence_destroy;
priv->base.context_new = nv04_fence_context_new;
priv->base.context_del = nv04_fence_context_del;
priv->base.contexts = 15;
priv->base.context_base = fence_context_alloc(priv->base.contexts);
return 0;
}
......@@ -33,7 +33,7 @@ nv10_fence_emit(struct nouveau_fence *fence)
int ret = RING_SPACE(chan, 2);
if (ret == 0) {
BEGIN_NV04(chan, 0, NV10_SUBCHAN_REF_CNT, 1);
OUT_RING (chan, fence->sequence);
OUT_RING (chan, fence->base.seqno);
FIRE_RING (chan);
}
return ret;
......@@ -75,7 +75,7 @@ nv10_fence_context_new(struct nouveau_channel *chan)
if (!fctx)
return -ENOMEM;
nouveau_fence_context_new(&fctx->base);
nouveau_fence_context_new(chan, &fctx->base);
fctx->base.emit = nv10_fence_emit;
fctx->base.read = nv10_fence_read;
fctx->base.sync = nv10_fence_sync;
......@@ -106,6 +106,8 @@ nv10_fence_create(struct nouveau_drm *drm)
priv->base.dtor = nv10_fence_destroy;
priv->base.context_new = nv10_fence_context_new;
priv->base.context_del = nv10_fence_context_del;
priv->base.contexts = 31;
priv->base.context_base = fence_context_alloc(priv->base.contexts);
spin_lock_init(&priv->lock);
return 0;
}
......@@ -84,7 +84,7 @@ nv17_fence_context_new(struct nouveau_channel *chan)
if (!fctx)
return -ENOMEM;
nouveau_fence_context_new(&fctx->base);
nouveau_fence_context_new(chan, &fctx->base);
fctx->base.emit = nv10_fence_emit;
fctx->base.read = nv10_fence_read;
fctx->base.sync = nv17_fence_sync;
......@@ -124,6 +124,8 @@ nv17_fence_create(struct nouveau_drm *drm)
priv->base.resume = nv17_fence_resume;
priv->base.context_new = nv17_fence_context_new;
priv->base.context_del = nv10_fence_context_del;
priv->base.contexts = 31;
priv->base.context_base = fence_context_alloc(priv->base.contexts);
spin_lock_init(&priv->lock);
ret = nouveau_bo_new(drm->dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
......
......@@ -46,7 +46,7 @@ nv50_fence_context_new(struct nouveau_channel *chan)
if (!fctx)
return -ENOMEM;
nouveau_fence_context_new(&fctx->base);
nouveau_fence_context_new(chan, &fctx->base);
fctx->base.emit = nv10_fence_emit;
fctx->base.read = nv10_fence_read;
fctx->base.sync = nv17_fence_sync;
......@@ -95,6 +95,8 @@ nv50_fence_create(struct nouveau_drm *drm)
priv->base.resume = nv17_fence_resume;
priv->base.context_new = nv50_fence_context_new;
priv->base.context_del = nv10_fence_context_del;
priv->base.contexts = 127;
priv->base.context_base = fence_context_alloc(priv->base.contexts);
spin_lock_init(&priv->lock);
ret = nouveau_bo_new(drm->dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
......
......@@ -82,7 +82,7 @@ nv84_fence_emit(struct nouveau_fence *fence)
else
addr += fctx->vma.offset;
return fctx->base.emit32(chan, addr, fence->sequence);
return fctx->base.emit32(chan, addr, fence->base.seqno);
}
static int
......@@ -97,7 +97,7 @@ nv84_fence_sync(struct nouveau_fence *fence,
else
addr += fctx->vma.offset;
return fctx->base.sync32(chan, addr, fence->sequence);
return fctx->base.sync32(chan, addr, fence->base.seqno);
}
static u32
......@@ -139,12 +139,13 @@ nv84_fence_context_new(struct nouveau_channel *chan)
if (!fctx)
return -ENOMEM;
nouveau_fence_context_new(&fctx->base);
nouveau_fence_context_new(chan, &fctx->base);
fctx->base.emit = nv84_fence_emit;
fctx->base.sync = nv84_fence_sync;
fctx->base.read = nv84_fence_read;
fctx->base.emit32 = nv84_fence_emit32;
fctx->base.sync32 = nv84_fence_sync32;
fctx->base.sequence = nv84_fence_read(chan);
ret = nouveau_bo_vma_add(priv->bo, cli->vm, &fctx->vma);
if (ret == 0) {
......@@ -168,13 +169,12 @@ nv84_fence_context_new(struct nouveau_channel *chan)
static bool
nv84_fence_suspend(struct nouveau_drm *drm)
{
struct nouveau_fifo *pfifo = nvkm_fifo(&drm->device);
struct nv84_fence_priv *priv = drm->fence;
int i;
priv->suspend = vmalloc((pfifo->max + 1) * sizeof(u32));
priv->suspend = vmalloc(priv->base.contexts * sizeof(u32));
if (priv->suspend) {
for (i = 0; i <= pfifo->max; i++)
for (i = 0; i < priv->base.contexts; i++)
priv->suspend[i] = nouveau_bo_rd32(priv->bo, i*4);
}
......@@ -184,12 +184,11 @@ nv84_fence_suspend(struct nouveau_drm *drm)
static void
nv84_fence_resume(struct nouveau_drm *drm)
{
struct nouveau_fifo *pfifo = nvkm_fifo(&drm->device);
struct nv84_fence_priv *priv = drm->fence;
int i;
if (priv->suspend) {
for (i = 0; i <= pfifo->max; i++)
for (i = 0; i < priv->base.contexts; i++)
nouveau_bo_wr32(priv->bo, i*4, priv->suspend[i]);
vfree(priv->suspend);
priv->suspend = NULL;
......@@ -229,10 +228,11 @@ nv84_fence_create(struct nouveau_drm *drm)
priv->base.context_new = nv84_fence_context_new;
priv->base.context_del = nv84_fence_context_del;
init_waitqueue_head(&priv->base.waiting);
priv->base.contexts = pfifo->max + 1;
priv->base.context_base = fence_context_alloc(priv->base.contexts);
priv->base.uevent = true;
ret = nouveau_bo_new(drm->dev, 16 * (pfifo->max + 1), 0,
ret = nouveau_bo_new(drm->dev, 16 * priv->base.contexts, 0,
TTM_PL_FLAG_VRAM, 0, 0, NULL, &priv->bo);
if (ret == 0) {
ret = nouveau_bo_pin(priv->bo, TTM_PL_FLAG_VRAM);
......@@ -246,7 +246,7 @@ nv84_fence_create(struct nouveau_drm *drm)
}
if (ret == 0)
ret = nouveau_bo_new(drm->dev, 16 * (pfifo->max + 1), 0,
ret = nouveau_bo_new(drm->dev, 16 * priv->base.contexts, 0,
TTM_PL_FLAG_TT, 0, 0, NULL,
&priv->bo_gart);
if (ret == 0) {
......
......@@ -4,6 +4,6 @@
ccflags-y := -Iinclude/drm
qxl-y := qxl_drv.o qxl_kms.o qxl_display.o qxl_ttm.o qxl_fb.o qxl_object.o qxl_gem.o qxl_cmd.o qxl_image.o qxl_draw.o qxl_debugfs.o qxl_irq.o qxl_dumb.o qxl_ioctl.o qxl_fence.o qxl_release.o
qxl-y := qxl_drv.o qxl_kms.o qxl_display.o qxl_ttm.o qxl_fb.o qxl_object.o qxl_gem.o qxl_cmd.o qxl_image.o qxl_draw.o qxl_debugfs.o qxl_irq.o qxl_dumb.o qxl_ioctl.o qxl_release.o
obj-$(CONFIG_DRM_QXL)+= qxl.o
......@@ -620,17 +620,10 @@ static int qxl_reap_surf(struct qxl_device *qdev, struct qxl_bo *surf, bool stal
if (ret == -EBUSY)
return -EBUSY;
if (surf->fence.num_active_releases > 0 && stall == false) {
qxl_bo_unreserve(surf);
return -EBUSY;
}
if (stall)
mutex_unlock(&qdev->surf_evict_mutex);
spin_lock(&surf->tbo.bdev->fence_lock);
ret = ttm_bo_wait(&surf->tbo, true, true, !stall);
spin_unlock(&surf->tbo.bdev->fence_lock);
if (stall)
mutex_lock(&qdev->surf_evict_mutex);
......
......@@ -57,11 +57,21 @@ qxl_debugfs_buffers_info(struct seq_file *m, void *data)
struct qxl_device *qdev = node->minor->dev->dev_private;
struct qxl_bo *bo;
spin_lock(&qdev->release_lock);
list_for_each_entry(bo, &qdev->gem.objects, list) {
seq_printf(m, "size %ld, pc %d, sync obj %p, num releases %d\n",
(unsigned long)bo->gem_base.size, bo->pin_count,
bo->tbo.sync_obj, bo->fence.num_active_releases);
struct reservation_object_list *fobj;
int rel;
rcu_read_lock();
fobj = rcu_dereference(bo->tbo.resv->fence);
rel = fobj ? fobj->shared_count : 0;
rcu_read_unlock();
seq_printf(m, "size %ld, pc %d, num releases %d\n",
(unsigned long)bo->gem_base.size,
bo->pin_count, rel);
}
spin_unlock(&qdev->release_lock);
return 0;
}
......
......@@ -31,6 +31,7 @@
* Definitions taken from spice-protocol, plus kernel driver specific bits.
*/
#include <linux/fence.h>
#include <linux/workqueue.h>
#include <linux/firmware.h>
#include <linux/platform_device.h>
......@@ -95,13 +96,6 @@ enum {
QXL_INTERRUPT_IO_CMD |\
QXL_INTERRUPT_CLIENT_MONITORS_CONFIG)
struct qxl_fence {
struct qxl_device *qdev;
uint32_t num_active_releases;
uint32_t *release_ids;
struct radix_tree_root tree;
};
struct qxl_bo {
/* Protected by gem.mutex */
struct list_head list;
......@@ -113,13 +107,13 @@ struct qxl_bo {
unsigned pin_count;
void *kptr;
int type;
/* Constant after initialization */
struct drm_gem_object gem_base;
bool is_primary; /* is this now a primary surface */
bool hw_surf_alloc;
struct qxl_surface surf;
uint32_t surface_id;
struct qxl_fence fence; /* per bo fence - list of releases */
struct qxl_release *surf_create;
};
#define gem_to_qxl_bo(gobj) container_of((gobj), struct qxl_bo, gem_base)
......@@ -191,6 +185,8 @@ enum {
* spice-protocol/qxl_dev.h */
#define QXL_MAX_RES 96
struct qxl_release {
struct fence base;
int id;
int type;
uint32_t release_offset;
......@@ -284,7 +280,9 @@ struct qxl_device {
uint8_t slot_gen_bits;
uint64_t va_slot_mask;
spinlock_t release_lock;
struct idr release_idr;
uint32_t release_seqno;
spinlock_t release_idr_lock;
struct mutex async_io_mutex;
unsigned int last_sent_io_cmd;
......@@ -561,10 +559,4 @@ qxl_surface_lookup(struct drm_device *dev, int surface_id);
void qxl_surface_evict(struct qxl_device *qdev, struct qxl_bo *surf, bool freeing);
int qxl_update_surface(struct qxl_device *qdev, struct qxl_bo *surf);
/* qxl_fence.c */
void qxl_fence_add_release_locked(struct qxl_fence *qfence, uint32_t rel_id);
int qxl_fence_remove_release(struct qxl_fence *qfence, uint32_t rel_id);
int qxl_fence_init(struct qxl_device *qdev, struct qxl_fence *qfence);
void qxl_fence_fini(struct qxl_fence *qfence);
#endif
/*
* Copyright 2013 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Dave Airlie
* Alon Levy
*/
#include "qxl_drv.h"
/* QXL fencing-
When we submit operations to the GPU we pass a release reference to the GPU
with them, the release reference is then added to the release ring when
the GPU is finished with that particular operation and has removed it from
its tree.
So we have can have multiple outstanding non linear fences per object.
From a TTM POV we only care if the object has any outstanding releases on
it.
we wait until all outstanding releases are processeed.
sync object is just a list of release ids that represent that fence on
that buffer.
we just add new releases onto the sync object attached to the object.
This currently uses a radix tree to store the list of release ids.
For some reason every so often qxl hw fails to release, things go wrong.
*/
/* must be called with the fence lock held */
void qxl_fence_add_release_locked(struct qxl_fence *qfence, uint32_t rel_id)
{
radix_tree_insert(&qfence->tree, rel_id, qfence);
qfence->num_active_releases++;
}
int qxl_fence_remove_release(struct qxl_fence *qfence, uint32_t rel_id)
{
void *ret;
int retval = 0;
struct qxl_bo *bo = container_of(qfence, struct qxl_bo, fence);
spin_lock(&bo->tbo.bdev->fence_lock);
ret = radix_tree_delete(&qfence->tree, rel_id);
if (ret == qfence)
qfence->num_active_releases--;
else {
DRM_DEBUG("didn't find fence in radix tree for %d\n", rel_id);
retval = -ENOENT;
}
spin_unlock(&bo->tbo.bdev->fence_lock);
return retval;
}
int qxl_fence_init(struct qxl_device *qdev, struct qxl_fence *qfence)
{
qfence->qdev = qdev;
qfence->num_active_releases = 0;
INIT_RADIX_TREE(&qfence->tree, GFP_ATOMIC);
return 0;
}
void qxl_fence_fini(struct qxl_fence *qfence)
{
kfree(qfence->release_ids);
qfence->num_active_releases = 0;
}
......@@ -223,6 +223,7 @@ static int qxl_device_init(struct qxl_device *qdev,
idr_init(&qdev->release_idr);
spin_lock_init(&qdev->release_idr_lock);
spin_lock_init(&qdev->release_lock);
idr_init(&qdev->surf_id_idr);
spin_lock_init(&qdev->surf_id_idr_lock);
......
......@@ -36,7 +36,6 @@ static void qxl_ttm_bo_destroy(struct ttm_buffer_object *tbo)
qdev = (struct qxl_device *)bo->gem_base.dev->dev_private;
qxl_surface_evict(qdev, bo, false);
qxl_fence_fini(&bo->fence);
mutex_lock(&qdev->gem.mutex);
list_del_init(&bo->list);
mutex_unlock(&qdev->gem.mutex);
......@@ -102,7 +101,6 @@ int qxl_bo_create(struct qxl_device *qdev,
bo->type = domain;
bo->pin_count = pinned ? 1 : 0;
bo->surface_id = 0;
qxl_fence_init(qdev, &bo->fence);
INIT_LIST_HEAD(&bo->list);
if (surf)
......
......@@ -76,12 +76,10 @@ static inline int qxl_bo_wait(struct qxl_bo *bo, u32 *mem_type,
}
return r;
}
spin_lock(&bo->tbo.bdev->fence_lock);
if (mem_type)
*mem_type = bo->tbo.mem.mem_type;
if (bo->tbo.sync_obj)
r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
spin_unlock(&bo->tbo.bdev->fence_lock);
r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
ttm_bo_unreserve(&bo->tbo);
return r;
}
......
......@@ -21,6 +21,7 @@
*/
#include "qxl_drv.h"
#include "qxl_object.h"
#include <trace/events/fence.h>
/*
* drawable cmd cache - allocate a bunch of VRAM pages, suballocate
......@@ -39,6 +40,88 @@
static const int release_size_per_bo[] = { RELEASE_SIZE, SURFACE_RELEASE_SIZE, RELEASE_SIZE };
static const int releases_per_bo[] = { RELEASES_PER_BO, SURFACE_RELEASES_PER_BO, RELEASES_PER_BO };
static const char *qxl_get_driver_name(struct fence *fence)
{
return "qxl";
}
static const char *qxl_get_timeline_name(struct fence *fence)
{
return "release";
}
static bool qxl_nop_signaling(struct fence *fence)
{
/* fences are always automatically signaled, so just pretend we did this.. */
return true;
}
static long qxl_fence_wait(struct fence *fence, bool intr, signed long timeout)
{
struct qxl_device *qdev;
struct qxl_release *release;
int count = 0, sc = 0;
bool have_drawable_releases;
unsigned long cur, end = jiffies + timeout;
qdev = container_of(fence->lock, struct qxl_device, release_lock);
release = container_of(fence, struct qxl_release, base);
have_drawable_releases = release->type == QXL_RELEASE_DRAWABLE;
retry:
sc++;
if (fence_is_signaled_locked(fence))
goto signaled;
qxl_io_notify_oom(qdev);
for (count = 0; count < 11; count++) {
if (!qxl_queue_garbage_collect(qdev, true))
break;
if (fence_is_signaled_locked(fence))
goto signaled;
}
if (fence_is_signaled_locked(fence))
goto signaled;
if (have_drawable_releases || sc < 4) {
if (sc > 2)
/* back off */
usleep_range(500, 1000);
if (time_after(jiffies, end))
return 0;
if (have_drawable_releases && sc > 300) {
FENCE_WARN(fence, "failed to wait on release %d "
"after spincount %d\n",
fence->context & ~0xf0000000, sc);
goto signaled;
}
goto retry;
}
/*
* yeah, original sync_obj_wait gave up after 3 spins when
* have_drawable_releases is not set.
*/
signaled:
cur = jiffies;
if (time_after(cur, end))
return 0;
return end - cur;
}
static const struct fence_ops qxl_fence_ops = {
.get_driver_name = qxl_get_driver_name,
.get_timeline_name = qxl_get_timeline_name,
.enable_signaling = qxl_nop_signaling,
.wait = qxl_fence_wait,
};
static uint64_t
qxl_release_alloc(struct qxl_device *qdev, int type,
struct qxl_release **ret)
......@@ -46,13 +129,13 @@ qxl_release_alloc(struct qxl_device *qdev, int type,
struct qxl_release *release;
int handle;
size_t size = sizeof(*release);
int idr_ret;
release = kmalloc(size, GFP_KERNEL);
if (!release) {
DRM_ERROR("Out of memory\n");
return 0;
}
release->base.ops = NULL;
release->type = type;
release->release_offset = 0;
release->surface_release_id = 0;
......@@ -60,44 +143,59 @@ qxl_release_alloc(struct qxl_device *qdev, int type,
idr_preload(GFP_KERNEL);
spin_lock(&qdev->release_idr_lock);
idr_ret = idr_alloc(&qdev->release_idr, release, 1, 0, GFP_NOWAIT);
handle = idr_alloc(&qdev->release_idr, release, 1, 0, GFP_NOWAIT);
release->base.seqno = ++qdev->release_seqno;
spin_unlock(&qdev->release_idr_lock);
idr_preload_end();
handle = idr_ret;
if (idr_ret < 0)
goto release_fail;
if (handle < 0) {
kfree(release);
*ret = NULL;
return handle;
}
*ret = release;
QXL_INFO(qdev, "allocated release %lld\n", handle);
release->id = handle;
release_fail:
return handle;
}
static void
qxl_release_free_list(struct qxl_release *release)
{
while (!list_empty(&release->bos)) {
struct ttm_validate_buffer *entry;
entry = container_of(release->bos.next,
struct ttm_validate_buffer, head);
list_del(&entry->head);
kfree(entry);
}
}
void
qxl_release_free(struct qxl_device *qdev,
struct qxl_release *release)
{
struct qxl_bo_list *entry, *tmp;
QXL_INFO(qdev, "release %d, type %d\n", release->id,
release->type);
if (release->surface_release_id)
qxl_surface_id_dealloc(qdev, release->surface_release_id);
list_for_each_entry_safe(entry, tmp, &release->bos, tv.head) {
struct qxl_bo *bo = to_qxl_bo(entry->tv.bo);
QXL_INFO(qdev, "release %llx\n",
drm_vma_node_offset_addr(&entry->tv.bo->vma_node)
- DRM_FILE_OFFSET);
qxl_fence_remove_release(&bo->fence, release->id);
qxl_bo_unref(&bo);
kfree(entry);
}
spin_lock(&qdev->release_idr_lock);
idr_remove(&qdev->release_idr, release->id);
spin_unlock(&qdev->release_idr_lock);
kfree(release);
if (release->base.ops) {
WARN_ON(list_empty(&release->bos));
qxl_release_free_list(release);
fence_signal(&release->base);
fence_put(&release->base);
} else {
qxl_release_free_list(release);
kfree(release);
}
}
static int qxl_release_bo_alloc(struct qxl_device *qdev,
......@@ -142,6 +240,10 @@ static int qxl_release_validate_bo(struct qxl_bo *bo)
return ret;
}
ret = reservation_object_reserve_shared(bo->tbo.resv);
if (ret)
return ret;
/* allocate a surface for reserved + validated buffers */
ret = qxl_bo_check_id(bo->gem_base.dev->dev_private, bo);
if (ret)
......@@ -159,7 +261,7 @@ int qxl_release_reserve_list(struct qxl_release *release, bool no_intr)
if (list_is_singular(&release->bos))
return 0;
ret = ttm_eu_reserve_buffers(&release->ticket, &release->bos);
ret = ttm_eu_reserve_buffers(&release->ticket, &release->bos, !no_intr);
if (ret)
return ret;
......@@ -199,6 +301,8 @@ int qxl_alloc_surface_release_reserved(struct qxl_device *qdev,
/* stash the release after the create command */
idr_ret = qxl_release_alloc(qdev, QXL_RELEASE_SURFACE_CMD, release);
if (idr_ret < 0)
return idr_ret;
bo = qxl_bo_ref(to_qxl_bo(entry->tv.bo));
(*release)->release_offset = create_rel->release_offset + 64;
......@@ -239,6 +343,11 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size,
}
idr_ret = qxl_release_alloc(qdev, type, release);
if (idr_ret < 0) {
if (rbo)
*rbo = NULL;
return idr_ret;
}
mutex_lock(&qdev->release_mutex);
if (qdev->current_release_bo_offset[cur_idx] + 1 >= releases_per_bo[cur_idx]) {
......@@ -319,12 +428,13 @@ void qxl_release_unmap(struct qxl_device *qdev,
void qxl_release_fence_buffer_objects(struct qxl_release *release)
{
struct ttm_validate_buffer *entry;
struct ttm_buffer_object *bo;
struct ttm_bo_global *glob;
struct ttm_bo_device *bdev;
struct ttm_bo_driver *driver;
struct qxl_bo *qbo;
struct ttm_validate_buffer *entry;
struct qxl_device *qdev;
/* if only one object on the release its the release itself
since these objects are pinned no need to reserve */
......@@ -333,26 +443,32 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
bo = list_first_entry(&release->bos, struct ttm_validate_buffer, head)->bo;
bdev = bo->bdev;
qdev = container_of(bdev, struct qxl_device, mman.bdev);
/*
* Since we never really allocated a context and we don't want to conflict,
* set the highest bits. This will break if we really allow exporting of dma-bufs.
*/
fence_init(&release->base, &qxl_fence_ops, &qdev->release_lock,
release->id | 0xf0000000, release->base.seqno);
trace_fence_emit(&release->base);
driver = bdev->driver;
glob = bo->glob;
spin_lock(&glob->lru_lock);
spin_lock(&bdev->fence_lock);
/* acquire release_lock to protect bo->resv->fence and its contents */
spin_lock(&qdev->release_lock);
list_for_each_entry(entry, &release->bos, head) {
bo = entry->bo;
qbo = to_qxl_bo(bo);
if (!entry->bo->sync_obj)
entry->bo->sync_obj = &qbo->fence;
qxl_fence_add_release_locked(&qbo->fence, release->id);
reservation_object_add_shared_fence(bo->resv, &release->base);
ttm_bo_add_to_lru(bo);
__ttm_bo_unreserve(bo);
entry->reserved = false;
}
spin_unlock(&bdev->fence_lock);
spin_unlock(&qdev->release_lock);
spin_unlock(&glob->lru_lock);
ww_acquire_fini(&release->ticket);
}
......
......@@ -357,92 +357,6 @@ static int qxl_bo_move(struct ttm_buffer_object *bo,
return ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
}
static int qxl_sync_obj_wait(void *sync_obj,
bool lazy, bool interruptible)
{
struct qxl_fence *qfence = (struct qxl_fence *)sync_obj;
int count = 0, sc = 0;
struct qxl_bo *bo = container_of(qfence, struct qxl_bo, fence);
if (qfence->num_active_releases == 0)
return 0;
retry:
if (sc == 0) {
if (bo->type == QXL_GEM_DOMAIN_SURFACE)
qxl_update_surface(qfence->qdev, bo);
} else if (sc >= 1) {
qxl_io_notify_oom(qfence->qdev);
}
sc++;
for (count = 0; count < 10; count++) {
bool ret;
ret = qxl_queue_garbage_collect(qfence->qdev, true);
if (ret == false)
break;
if (qfence->num_active_releases == 0)
return 0;
}
if (qfence->num_active_releases) {
bool have_drawable_releases = false;
void **slot;
struct radix_tree_iter iter;
int release_id;
radix_tree_for_each_slot(slot, &qfence->tree, &iter, 0) {
struct qxl_release *release;
release_id = iter.index;
release = qxl_release_from_id_locked(qfence->qdev, release_id);
if (release == NULL)
continue;
if (release->type == QXL_RELEASE_DRAWABLE)
have_drawable_releases = true;
}
qxl_queue_garbage_collect(qfence->qdev, true);
if (have_drawable_releases || sc < 4) {
if (sc > 2)
/* back off */
usleep_range(500, 1000);
if (have_drawable_releases && sc > 300) {
WARN(1, "sync obj %d still has outstanding releases %d %d %d %ld %d\n", sc, bo->surface_id, bo->is_primary, bo->pin_count, (unsigned long)bo->gem_base.size, qfence->num_active_releases);
return -EBUSY;
}
goto retry;
}
}
return 0;
}
static int qxl_sync_obj_flush(void *sync_obj)
{
return 0;
}
static void qxl_sync_obj_unref(void **sync_obj)
{
*sync_obj = NULL;
}
static void *qxl_sync_obj_ref(void *sync_obj)
{
return sync_obj;
}
static bool qxl_sync_obj_signaled(void *sync_obj)
{
struct qxl_fence *qfence = (struct qxl_fence *)sync_obj;
return (qfence->num_active_releases == 0);
}
static void qxl_bo_move_notify(struct ttm_buffer_object *bo,
struct ttm_mem_reg *new_mem)
{
......@@ -469,16 +383,9 @@ static struct ttm_bo_driver qxl_bo_driver = {
.verify_access = &qxl_verify_access,
.io_mem_reserve = &qxl_ttm_io_mem_reserve,
.io_mem_free = &qxl_ttm_io_mem_free,
.sync_obj_signaled = &qxl_sync_obj_signaled,
.sync_obj_wait = &qxl_sync_obj_wait,
.sync_obj_flush = &qxl_sync_obj_flush,
.sync_obj_unref = &qxl_sync_obj_unref,
.sync_obj_ref = &qxl_sync_obj_ref,
.move_notify = &qxl_bo_move_notify,
};
int qxl_ttm_init(struct qxl_device *qdev)
{
int r;
......
......@@ -66,6 +66,7 @@
#include <linux/kref.h>
#include <linux/interval_tree.h>
#include <linux/hashtable.h>
#include <linux/fence.h>
#include <ttm/ttm_bo_api.h>
#include <ttm/ttm_bo_driver.h>
......@@ -354,17 +355,19 @@ struct radeon_fence_driver {
/* sync_seq is protected by ring emission lock */
uint64_t sync_seq[RADEON_NUM_RINGS];
atomic64_t last_seq;
bool initialized;
bool initialized, delayed_irq;
struct delayed_work lockup_work;
};
struct radeon_fence {
struct fence base;
struct radeon_device *rdev;
struct kref kref;
/* protected by radeon_fence.lock */
uint64_t seq;
/* RB, DMA, etc. */
unsigned ring;
wait_queue_t fence_wake;
};
int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
......@@ -782,6 +785,7 @@ struct radeon_irq {
int radeon_irq_kms_init(struct radeon_device *rdev);
void radeon_irq_kms_fini(struct radeon_device *rdev);
void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring);
bool radeon_irq_kms_sw_irq_get_delayed(struct radeon_device *rdev, int ring);
void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring);
void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc);
void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc);
......@@ -2308,6 +2312,7 @@ struct radeon_device {
struct radeon_mman mman;
struct radeon_fence_driver fence_drv[RADEON_NUM_RINGS];
wait_queue_head_t fence_queue;
unsigned fence_context;
struct mutex ring_lock;
struct radeon_ring ring[RADEON_NUM_RINGS];
bool ib_pool_ready;
......@@ -2441,7 +2446,17 @@ void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v);
/*
* Cast helper
*/
#define to_radeon_fence(p) ((struct radeon_fence *)(p))
extern const struct fence_ops radeon_fence_ops;
static inline struct radeon_fence *to_radeon_fence(struct fence *f)
{
struct radeon_fence *__f = container_of(f, struct radeon_fence, base);
if (__f->base.ops == &radeon_fence_ops)
return __f;
return NULL;
}
/*
* Registers read & write functions.
......
......@@ -253,11 +253,17 @@ static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
int i;
for (i = 0; i < p->nrelocs; i++) {
struct reservation_object *resv;
struct fence *fence;
if (!p->relocs[i].robj)
continue;
resv = p->relocs[i].robj->tbo.resv;
fence = reservation_object_get_excl(resv);
radeon_semaphore_sync_to(p->ib.semaphore,
p->relocs[i].robj->tbo.sync_obj);
(struct radeon_fence *)fence);
}
}
......@@ -427,7 +433,7 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo
ttm_eu_fence_buffer_objects(&parser->ticket,
&parser->validated,
parser->ib.fence);
&parser->ib.fence->base);
} else if (backoff) {
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
......
......@@ -1253,6 +1253,7 @@ int radeon_device_init(struct radeon_device *rdev,
for (i = 0; i < RADEON_NUM_RINGS; i++) {
rdev->ring[i].idx = i;
}
rdev->fence_context = fence_context_alloc(RADEON_NUM_RINGS);
DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X).\n",
radeon_family_name[rdev->family], pdev->vendor, pdev->device,
......
......@@ -476,11 +476,6 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc,
obj = new_radeon_fb->obj;
new_rbo = gem_to_radeon_bo(obj);
spin_lock(&new_rbo->tbo.bdev->fence_lock);
if (new_rbo->tbo.sync_obj)
work->fence = radeon_fence_ref(new_rbo->tbo.sync_obj);
spin_unlock(&new_rbo->tbo.bdev->fence_lock);
/* pin the new buffer */
DRM_DEBUG_DRIVER("flip-ioctl() cur_rbo = %p, new_rbo = %p\n",
work->old_rbo, new_rbo);
......@@ -499,6 +494,7 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc,
DRM_ERROR("failed to pin new rbo buffer before flip\n");
goto cleanup;
}
work->fence = (struct radeon_fence *)fence_get(reservation_object_get_excl(new_rbo->tbo.resv));
radeon_bo_get_tiling_flags(new_rbo, &tiling_flags, NULL);
radeon_bo_unreserve(new_rbo);
......@@ -582,7 +578,6 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc,
drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
radeon_fence_unref(&work->fence);
kfree(work);
return r;
}
......
......@@ -130,21 +130,59 @@ int radeon_fence_emit(struct radeon_device *rdev,
struct radeon_fence **fence,
int ring)
{
u64 seq = ++rdev->fence_drv[ring].sync_seq[ring];
/* we are protected by the ring emission mutex */
*fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
if ((*fence) == NULL) {
return -ENOMEM;
}
kref_init(&((*fence)->kref));
(*fence)->rdev = rdev;
(*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring];
(*fence)->seq = seq;
(*fence)->ring = ring;
fence_init(&(*fence)->base, &radeon_fence_ops,
&rdev->fence_queue.lock, rdev->fence_context + ring, seq);
radeon_fence_ring_emit(rdev, ring, *fence);
trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
radeon_fence_schedule_check(rdev, ring);
return 0;
}
/**
* radeon_fence_check_signaled - callback from fence_queue
*
* this function is called with fence_queue lock held, which is also used
* for the fence locking itself, so unlocked variants are used for
* fence_signal, and remove_wait_queue.
*/
static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
{
struct radeon_fence *fence;
u64 seq;
fence = container_of(wait, struct radeon_fence, fence_wake);
/*
* We cannot use radeon_fence_process here because we're already
* in the waitqueue, in a call from wake_up_all.
*/
seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq);
if (seq >= fence->seq) {
int ret = fence_signal_locked(&fence->base);
if (!ret)
FENCE_TRACE(&fence->base, "signaled from irq context\n");
else
FENCE_TRACE(&fence->base, "was already signaled\n");
radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring);
__remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake);
fence_put(&fence->base);
} else
FENCE_TRACE(&fence->base, "pending\n");
return 0;
}
/**
* radeon_fence_activity - check for fence activity
*
......@@ -242,6 +280,15 @@ static void radeon_fence_check_lockup(struct work_struct *work)
return;
}
if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) {
unsigned long irqflags;
fence_drv->delayed_irq = false;
spin_lock_irqsave(&rdev->irq.lock, irqflags);
radeon_irq_set(rdev);
spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
}
if (radeon_fence_activity(rdev, ring))
wake_up_all(&rdev->fence_queue);
......@@ -275,21 +322,6 @@ void radeon_fence_process(struct radeon_device *rdev, int ring)
wake_up_all(&rdev->fence_queue);
}
/**
* radeon_fence_destroy - destroy a fence
*
* @kref: fence kref
*
* Frees the fence object (all asics).
*/
static void radeon_fence_destroy(struct kref *kref)
{
struct radeon_fence *fence;
fence = container_of(kref, struct radeon_fence, kref);
kfree(fence);
}
/**
* radeon_fence_seq_signaled - check if a fence sequence number has signaled
*
......@@ -318,6 +350,75 @@ static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
return false;
}
static bool radeon_fence_is_signaled(struct fence *f)
{
struct radeon_fence *fence = to_radeon_fence(f);
struct radeon_device *rdev = fence->rdev;
unsigned ring = fence->ring;
u64 seq = fence->seq;
if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
return true;
}
if (down_read_trylock(&rdev->exclusive_lock)) {
radeon_fence_process(rdev, ring);
up_read(&rdev->exclusive_lock);
if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
return true;
}
}
return false;
}
/**
* radeon_fence_enable_signaling - enable signalling on fence
* @fence: fence
*
* This function is called with fence_queue lock held, and adds a callback
* to fence_queue that checks if this fence is signaled, and if so it
* signals the fence and removes itself.
*/
static bool radeon_fence_enable_signaling(struct fence *f)
{
struct radeon_fence *fence = to_radeon_fence(f);
struct radeon_device *rdev = fence->rdev;
if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq)
return false;
if (down_read_trylock(&rdev->exclusive_lock)) {
radeon_irq_kms_sw_irq_get(rdev, fence->ring);
if (radeon_fence_activity(rdev, fence->ring))
wake_up_all_locked(&rdev->fence_queue);
/* did fence get signaled after we enabled the sw irq? */
if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) {
radeon_irq_kms_sw_irq_put(rdev, fence->ring);
up_read(&rdev->exclusive_lock);
return false;
}
up_read(&rdev->exclusive_lock);
} else {
/* we're probably in a lockup, lets not fiddle too much */
if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring))
rdev->fence_drv[fence->ring].delayed_irq = true;
radeon_fence_schedule_check(rdev, fence->ring);
}
fence->fence_wake.flags = 0;
fence->fence_wake.private = NULL;
fence->fence_wake.func = radeon_fence_check_signaled;
__add_wait_queue(&rdev->fence_queue, &fence->fence_wake);
fence_get(f);
FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring);
return true;
}
/**
* radeon_fence_signaled - check if a fence has signaled
*
......@@ -330,8 +431,15 @@ bool radeon_fence_signaled(struct radeon_fence *fence)
{
if (!fence)
return true;
if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring))
if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
int ret;
ret = fence_signal(&fence->base);
if (!ret)
FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n");
return true;
}
return false;
}
......@@ -433,17 +541,15 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
uint64_t seq[RADEON_NUM_RINGS] = {};
long r;
if (fence == NULL) {
WARN(1, "Querying an invalid fence : %p !\n", fence);
return -EINVAL;
}
seq[fence->ring] = fence->seq;
r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
if (r < 0) {
return r;
}
r = fence_signal(&fence->base);
if (!r)
FENCE_TRACE(&fence->base, "signaled from fence_wait\n");
return 0;
}
......@@ -557,7 +663,7 @@ int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
*/
struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
{
kref_get(&fence->kref);
fence_get(&fence->base);
return fence;
}
......@@ -574,7 +680,7 @@ void radeon_fence_unref(struct radeon_fence **fence)
*fence = NULL;
if (tmp) {
kref_put(&tmp->kref, radeon_fence_destroy);
fence_put(&tmp->base);
}
}
......@@ -887,3 +993,72 @@ int radeon_debugfs_fence_init(struct radeon_device *rdev)
return 0;
#endif
}
static const char *radeon_fence_get_driver_name(struct fence *fence)
{
return "radeon";
}
static const char *radeon_fence_get_timeline_name(struct fence *f)
{
struct radeon_fence *fence = to_radeon_fence(f);
switch (fence->ring) {
case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx";
case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1";
case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2";
case R600_RING_TYPE_DMA_INDEX: return "radeon.dma";
case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1";
case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd";
case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1";
case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2";
default: WARN_ON_ONCE(1); return "radeon.unk";
}
}
static inline bool radeon_test_signaled(struct radeon_fence *fence)
{
return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
}
static signed long radeon_fence_default_wait(struct fence *f, bool intr,
signed long t)
{
struct radeon_fence *fence = to_radeon_fence(f);
struct radeon_device *rdev = fence->rdev;
bool signaled;
fence_enable_sw_signaling(&fence->base);
/*
* This function has to return -EDEADLK, but cannot hold
* exclusive_lock during the wait because some callers
* may already hold it. This means checking needs_reset without
* lock, and not fiddling with any gpu internals.
*
* The callback installed with fence_enable_sw_signaling will
* run before our wait_event_*timeout call, so we will see
* both the signaled fence and the changes to needs_reset.
*/
if (intr)
t = wait_event_interruptible_timeout(rdev->fence_queue,
((signaled = radeon_test_signaled(fence)) ||
rdev->needs_reset), t);
else
t = wait_event_timeout(rdev->fence_queue,
((signaled = radeon_test_signaled(fence)) ||
rdev->needs_reset), t);
if (t > 0 && !signaled)
return -EDEADLK;
return t;
}
const struct fence_ops radeon_fence_ops = {
.get_driver_name = radeon_fence_get_driver_name,
.get_timeline_name = radeon_fence_get_timeline_name,
.enable_signaling = radeon_fence_enable_signaling,
.signaled = radeon_fence_is_signaled,
.wait = radeon_fence_default_wait,
.release = NULL,
};
......@@ -94,7 +94,7 @@ static int radeon_gem_set_domain(struct drm_gem_object *gobj,
{
struct radeon_bo *robj;
uint32_t domain;
int r;
long r;
/* FIXME: reeimplement */
robj = gem_to_radeon_bo(gobj);
......@@ -110,9 +110,12 @@ static int radeon_gem_set_domain(struct drm_gem_object *gobj,
}
if (domain == RADEON_GEM_DOMAIN_CPU) {
/* Asking for cpu access wait for object idle */
r = radeon_bo_wait(robj, NULL, false);
if (r) {
printk(KERN_ERR "Failed to wait for object !\n");
r = reservation_object_wait_timeout_rcu(robj->tbo.resv, true, true, 30 * HZ);
if (!r)
r = -EBUSY;
if (r < 0 && r != -EINTR) {
printk(KERN_ERR "Failed to wait for object: %li\n", r);
return r;
}
}
......@@ -449,15 +452,22 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
struct drm_radeon_gem_wait_idle *args = data;
struct drm_gem_object *gobj;
struct radeon_bo *robj;
int r;
int r = 0;
uint32_t cur_placement = 0;
long ret;
gobj = drm_gem_object_lookup(dev, filp, args->handle);
if (gobj == NULL) {
return -ENOENT;
}
robj = gem_to_radeon_bo(gobj);
r = radeon_bo_wait(robj, &cur_placement, false);
ret = reservation_object_wait_timeout_rcu(robj->tbo.resv, true, true, 30 * HZ);
if (ret == 0)
r = -EBUSY;
else if (ret < 0)
r = ret;
/* Flush HDP cache via MMIO if necessary */
if (rdev->asic->mmio_hdp_flush &&
radeon_mem_type_to_domain(cur_placement) == RADEON_GEM_DOMAIN_VRAM)
......
......@@ -323,6 +323,21 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring)
}
}
/**
* radeon_irq_kms_sw_irq_get_delayed - enable software interrupt
*
* @rdev: radeon device pointer
* @ring: ring whose interrupt you want to enable
*
* Enables the software interrupt for a specific ring (all asics).
* The software interrupt is generally used to signal a fence on
* a particular ring.
*/
bool radeon_irq_kms_sw_irq_get_delayed(struct radeon_device *rdev, int ring)
{
return atomic_inc_return(&rdev->irq.ring_int[ring]) == 1;
}
/**
* radeon_irq_kms_sw_irq_put - disable software interrupt
*
......
......@@ -122,6 +122,7 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
it = interval_tree_iter_first(&rmn->objects, start, end);
while (it) {
struct radeon_bo *bo;
struct fence *fence;
int r;
bo = container_of(it, struct radeon_bo, mn_it);
......@@ -133,8 +134,9 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
continue;
}
if (bo->tbo.sync_obj) {
r = radeon_fence_wait(bo->tbo.sync_obj, false);
fence = reservation_object_get_excl(bo->tbo.resv);
if (fence) {
r = radeon_fence_wait((struct radeon_fence *)fence, false);
if (r)
DRM_ERROR("(%d) failed to wait for user bo\n", r);
}
......
......@@ -482,7 +482,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
u64 bytes_moved = 0, initial_bytes_moved;
u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);
r = ttm_eu_reserve_buffers(ticket, head);
r = ttm_eu_reserve_buffers(ticket, head, true);
if (unlikely(r != 0)) {
return r;
}
......@@ -779,12 +779,10 @@ int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait)
r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL);
if (unlikely(r != 0))
return r;
spin_lock(&bo->tbo.bdev->fence_lock);
if (mem_type)
*mem_type = bo->tbo.mem.mem_type;
if (bo->tbo.sync_obj)
r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
spin_unlock(&bo->tbo.bdev->fence_lock);
r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
ttm_bo_unreserve(&bo->tbo);
return r;
}
......@@ -270,12 +270,12 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0);
/* sync other rings */
fence = bo->sync_obj;
fence = (struct radeon_fence *)reservation_object_get_excl(bo->resv);
r = radeon_copy(rdev, old_start, new_start,
new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */
&fence);
/* FIXME: handle copy error */
r = ttm_bo_move_accel_cleanup(bo, (void *)fence,
r = ttm_bo_move_accel_cleanup(bo, &fence->base,
evict, no_wait_gpu, new_mem);
radeon_fence_unref(&fence);
return r;
......@@ -488,31 +488,6 @@ static void radeon_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
{
}
static int radeon_sync_obj_wait(void *sync_obj, bool lazy, bool interruptible)
{
return radeon_fence_wait((struct radeon_fence *)sync_obj, interruptible);
}
static int radeon_sync_obj_flush(void *sync_obj)
{
return 0;
}
static void radeon_sync_obj_unref(void **sync_obj)
{
radeon_fence_unref((struct radeon_fence **)sync_obj);
}
static void *radeon_sync_obj_ref(void *sync_obj)
{
return radeon_fence_ref((struct radeon_fence *)sync_obj);
}
static bool radeon_sync_obj_signaled(void *sync_obj)
{
return radeon_fence_signaled((struct radeon_fence *)sync_obj);
}
/*
* TTM backend functions.
*/
......@@ -847,11 +822,6 @@ static struct ttm_bo_driver radeon_bo_driver = {
.evict_flags = &radeon_evict_flags,
.move = &radeon_bo_move,
.verify_access = &radeon_verify_access,
.sync_obj_signaled = &radeon_sync_obj_signaled,
.sync_obj_wait = &radeon_sync_obj_wait,
.sync_obj_flush = &radeon_sync_obj_flush,
.sync_obj_unref = &radeon_sync_obj_unref,
.sync_obj_ref = &radeon_sync_obj_ref,
.move_notify = &radeon_bo_move_notify,
.fault_reserve_notify = &radeon_bo_fault_reserve_notify,
.io_mem_reserve = &radeon_ttm_io_mem_reserve,
......
......@@ -400,6 +400,7 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
{
int32_t *msg, msg_type, handle;
unsigned img_size = 0;
struct fence *f;
void *ptr;
int i, r;
......@@ -409,8 +410,9 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
return -EINVAL;
}
if (bo->tbo.sync_obj) {
r = radeon_fence_wait(bo->tbo.sync_obj, false);
f = reservation_object_get_excl(bo->tbo.resv);
if (f) {
r = radeon_fence_wait((struct radeon_fence *)f, false);
if (r) {
DRM_ERROR("Failed waiting for UVD message (%d)!\n", r);
return r;
......
......@@ -399,7 +399,7 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev,
INIT_LIST_HEAD(&head);
list_add(&tv.head, &head);
r = ttm_eu_reserve_buffers(&ticket, &head);
r = ttm_eu_reserve_buffers(&ticket, &head, true);
if (r)
return r;
......@@ -424,7 +424,7 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev,
if (r)
goto error;
ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence);
ttm_eu_fence_buffer_objects(&ticket, &head, &ib.fence->base);
radeon_ib_free(rdev, &ib);
return 0;
......@@ -693,8 +693,14 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev,
incr, R600_PTE_VALID);
if (ib.length_dw != 0) {
struct fence *fence;
radeon_asic_vm_pad_ib(rdev, &ib);
radeon_semaphore_sync_to(ib.semaphore, pd->tbo.sync_obj);
fence = reservation_object_get_excl(pd->tbo.resv);
radeon_semaphore_sync_to(ib.semaphore,
(struct radeon_fence *)fence);
radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use);
WARN_ON(ib.length_dw > ndw);
r = radeon_ib_schedule(rdev, &ib, NULL, false);
......@@ -820,8 +826,11 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
struct radeon_bo *pt = vm->page_tables[pt_idx].bo;
unsigned nptes;
uint64_t pte;
struct fence *fence;
radeon_semaphore_sync_to(ib->semaphore, pt->tbo.sync_obj);
fence = reservation_object_get_excl(pt->tbo.resv);
radeon_semaphore_sync_to(ib->semaphore,
(struct radeon_fence *)fence);
if ((addr & ~mask) == (end & ~mask))
nptes = end - addr;
......
......@@ -40,6 +40,7 @@
#include <linux/file.h>
#include <linux/module.h>
#include <linux/atomic.h>
#include <linux/reservation.h>
#define TTM_ASSERT_LOCKED(param)
#define TTM_DEBUG(fmt, arg...)
......@@ -142,7 +143,6 @@ static void ttm_bo_release_list(struct kref *list_kref)
BUG_ON(atomic_read(&bo->list_kref.refcount));
BUG_ON(atomic_read(&bo->kref.refcount));
BUG_ON(atomic_read(&bo->cpu_writers));
BUG_ON(bo->sync_obj != NULL);
BUG_ON(bo->mem.mm_node != NULL);
BUG_ON(!list_empty(&bo->lru));
BUG_ON(!list_empty(&bo->ddestroy));
......@@ -403,36 +403,48 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
ww_mutex_unlock (&bo->resv->lock);
}
static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
{
struct reservation_object_list *fobj;
struct fence *fence;
int i;
fobj = reservation_object_get_list(bo->resv);
fence = reservation_object_get_excl(bo->resv);
if (fence && !fence->ops->signaled)
fence_enable_sw_signaling(fence);
for (i = 0; fobj && i < fobj->shared_count; ++i) {
fence = rcu_dereference_protected(fobj->shared[i],
reservation_object_held(bo->resv));
if (!fence->ops->signaled)
fence_enable_sw_signaling(fence);
}
}
static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
{
struct ttm_bo_device *bdev = bo->bdev;
struct ttm_bo_global *glob = bo->glob;
struct ttm_bo_driver *driver = bdev->driver;
void *sync_obj = NULL;
int put_count;
int ret;
spin_lock(&glob->lru_lock);
ret = __ttm_bo_reserve(bo, false, true, false, NULL);
spin_lock(&bdev->fence_lock);
(void) ttm_bo_wait(bo, false, false, true);
if (!ret && !bo->sync_obj) {
spin_unlock(&bdev->fence_lock);
put_count = ttm_bo_del_from_lru(bo);
if (!ret) {
if (!ttm_bo_wait(bo, false, false, true)) {
put_count = ttm_bo_del_from_lru(bo);
spin_unlock(&glob->lru_lock);
ttm_bo_cleanup_memtype_use(bo);
spin_unlock(&glob->lru_lock);
ttm_bo_cleanup_memtype_use(bo);
ttm_bo_list_ref_sub(bo, put_count, true);
ttm_bo_list_ref_sub(bo, put_count, true);
return;
}
if (bo->sync_obj)
sync_obj = driver->sync_obj_ref(bo->sync_obj);
spin_unlock(&bdev->fence_lock);
if (!ret) {
return;
} else
ttm_bo_flush_all_fences(bo);
/*
* Make NO_EVICT bos immediately available to
......@@ -451,10 +463,6 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
list_add_tail(&bo->ddestroy, &bdev->ddestroy);
spin_unlock(&glob->lru_lock);
if (sync_obj) {
driver->sync_obj_flush(sync_obj);
driver->sync_obj_unref(&sync_obj);
}
schedule_delayed_work(&bdev->wq,
((HZ / 100) < 1) ? 1 : HZ / 100);
}
......@@ -475,44 +483,26 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
bool interruptible,
bool no_wait_gpu)
{
struct ttm_bo_device *bdev = bo->bdev;
struct ttm_bo_driver *driver = bdev->driver;
struct ttm_bo_global *glob = bo->glob;
int put_count;
int ret;
spin_lock(&bdev->fence_lock);
ret = ttm_bo_wait(bo, false, false, true);
if (ret && !no_wait_gpu) {
void *sync_obj;
/*
* Take a reference to the fence and unreserve,
* at this point the buffer should be dead, so
* no new sync objects can be attached.
*/
sync_obj = driver->sync_obj_ref(bo->sync_obj);
spin_unlock(&bdev->fence_lock);
__ttm_bo_unreserve(bo);
long lret;
ww_mutex_unlock(&bo->resv->lock);
spin_unlock(&glob->lru_lock);
ret = driver->sync_obj_wait(sync_obj, false, interruptible);
driver->sync_obj_unref(&sync_obj);
if (ret)
return ret;
lret = reservation_object_wait_timeout_rcu(bo->resv,
true,
interruptible,
30 * HZ);
/*
* remove sync_obj with ttm_bo_wait, the wait should be
* finished, and no new wait object should have been added.
*/
spin_lock(&bdev->fence_lock);
ret = ttm_bo_wait(bo, false, false, true);
WARN_ON(ret);
spin_unlock(&bdev->fence_lock);
if (ret)
return ret;
if (lret < 0)
return lret;
else if (lret == 0)
return -EBUSY;
spin_lock(&glob->lru_lock);
ret = __ttm_bo_reserve(bo, false, true, false, NULL);
......@@ -529,8 +519,14 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
spin_unlock(&glob->lru_lock);
return 0;
}
} else
spin_unlock(&bdev->fence_lock);
/*
* remove sync_obj with ttm_bo_wait, the wait should be
* finished, and no new wait object should have been added.
*/
ret = ttm_bo_wait(bo, false, false, true);
WARN_ON(ret);
}
if (ret || unlikely(list_empty(&bo->ddestroy))) {
__ttm_bo_unreserve(bo);
......@@ -668,9 +664,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
struct ttm_placement placement;
int ret = 0;
spin_lock(&bdev->fence_lock);
ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
spin_unlock(&bdev->fence_lock);
if (unlikely(ret != 0)) {
if (ret != -ERESTARTSYS) {
......@@ -961,7 +955,6 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
{
int ret = 0;
struct ttm_mem_reg mem;
struct ttm_bo_device *bdev = bo->bdev;
lockdep_assert_held(&bo->resv->lock.base);
......@@ -970,9 +963,7 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
* Have the driver move function wait for idle when necessary,
* instead of doing it here.
*/
spin_lock(&bdev->fence_lock);
ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
spin_unlock(&bdev->fence_lock);
if (ret)
return ret;
mem.num_pages = bo->num_pages;
......@@ -1462,7 +1453,6 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
bdev->glob = glob;
bdev->need_dma32 = need_dma32;
bdev->val_seq = 0;
spin_lock_init(&bdev->fence_lock);
mutex_lock(&glob->device_list_mutex);
list_add_tail(&bdev->device_list, &glob->device_list);
mutex_unlock(&glob->device_list_mutex);
......@@ -1515,65 +1505,56 @@ void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo)
EXPORT_SYMBOL(ttm_bo_unmap_virtual);
int ttm_bo_wait(struct ttm_buffer_object *bo,
bool lazy, bool interruptible, bool no_wait)
{
struct ttm_bo_driver *driver = bo->bdev->driver;
struct ttm_bo_device *bdev = bo->bdev;
void *sync_obj;
int ret = 0;
if (likely(bo->sync_obj == NULL))
return 0;
struct reservation_object_list *fobj;
struct reservation_object *resv;
struct fence *excl;
long timeout = 15 * HZ;
int i;
while (bo->sync_obj) {
resv = bo->resv;
fobj = reservation_object_get_list(resv);
excl = reservation_object_get_excl(resv);
if (excl) {
if (!fence_is_signaled(excl)) {
if (no_wait)
return -EBUSY;
if (driver->sync_obj_signaled(bo->sync_obj)) {
void *tmp_obj = bo->sync_obj;
bo->sync_obj = NULL;
clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
spin_unlock(&bdev->fence_lock);
driver->sync_obj_unref(&tmp_obj);
spin_lock(&bdev->fence_lock);
continue;
timeout = fence_wait_timeout(excl,
interruptible, timeout);
}
}
if (no_wait)
return -EBUSY;
for (i = 0; fobj && timeout > 0 && i < fobj->shared_count; ++i) {
struct fence *fence;
fence = rcu_dereference_protected(fobj->shared[i],
reservation_object_held(resv));
sync_obj = driver->sync_obj_ref(bo->sync_obj);
spin_unlock(&bdev->fence_lock);
ret = driver->sync_obj_wait(sync_obj,
lazy, interruptible);
if (unlikely(ret != 0)) {
driver->sync_obj_unref(&sync_obj);
spin_lock(&bdev->fence_lock);
return ret;
}
spin_lock(&bdev->fence_lock);
if (likely(bo->sync_obj == sync_obj)) {
void *tmp_obj = bo->sync_obj;
bo->sync_obj = NULL;
clear_bit(TTM_BO_PRIV_FLAG_MOVING,
&bo->priv_flags);
spin_unlock(&bdev->fence_lock);
driver->sync_obj_unref(&sync_obj);
driver->sync_obj_unref(&tmp_obj);
spin_lock(&bdev->fence_lock);
} else {
spin_unlock(&bdev->fence_lock);
driver->sync_obj_unref(&sync_obj);
spin_lock(&bdev->fence_lock);
if (!fence_is_signaled(fence)) {
if (no_wait)
return -EBUSY;
timeout = fence_wait_timeout(fence,
interruptible, timeout);
}
}
if (timeout < 0)
return timeout;
if (timeout == 0)
return -EBUSY;
reservation_object_add_excl_fence(resv, NULL);
clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
return 0;
}
EXPORT_SYMBOL(ttm_bo_wait);
int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait)
{
struct ttm_bo_device *bdev = bo->bdev;
int ret = 0;
/*
......@@ -1583,9 +1564,7 @@ int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait)
ret = ttm_bo_reserve(bo, true, no_wait, false, NULL);
if (unlikely(ret != 0))
return ret;
spin_lock(&bdev->fence_lock);
ret = ttm_bo_wait(bo, false, true, no_wait);
spin_unlock(&bdev->fence_lock);
if (likely(ret == 0))
atomic_inc(&bo->cpu_writers);
ttm_bo_unreserve(bo);
......@@ -1642,9 +1621,7 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
* Wait for GPU, then move to system cached.
*/
spin_lock(&bo->bdev->fence_lock);
ret = ttm_bo_wait(bo, false, false, false);
spin_unlock(&bo->bdev->fence_lock);
if (unlikely(ret != 0))
goto out;
......
......@@ -37,6 +37,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/reservation.h>
void ttm_bo_free_old_node(struct ttm_buffer_object *bo)
{
......@@ -444,8 +445,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
struct ttm_buffer_object **new_obj)
{
struct ttm_buffer_object *fbo;
struct ttm_bo_device *bdev = bo->bdev;
struct ttm_bo_driver *driver = bdev->driver;
int ret;
fbo = kmalloc(sizeof(*fbo), GFP_KERNEL);
......@@ -466,12 +465,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
drm_vma_node_reset(&fbo->vma_node);
atomic_set(&fbo->cpu_writers, 0);
spin_lock(&bdev->fence_lock);
if (bo->sync_obj)
fbo->sync_obj = driver->sync_obj_ref(bo->sync_obj);
else
fbo->sync_obj = NULL;
spin_unlock(&bdev->fence_lock);
kref_init(&fbo->list_kref);
kref_init(&fbo->kref);
fbo->destroy = &ttm_transfered_destroy;
......@@ -644,30 +637,20 @@ void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map)
EXPORT_SYMBOL(ttm_bo_kunmap);
int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
void *sync_obj,
struct fence *fence,
bool evict,
bool no_wait_gpu,
struct ttm_mem_reg *new_mem)
{
struct ttm_bo_device *bdev = bo->bdev;
struct ttm_bo_driver *driver = bdev->driver;
struct ttm_mem_type_manager *man = &bdev->man[new_mem->mem_type];
struct ttm_mem_reg *old_mem = &bo->mem;
int ret;
struct ttm_buffer_object *ghost_obj;
void *tmp_obj = NULL;
spin_lock(&bdev->fence_lock);
if (bo->sync_obj) {
tmp_obj = bo->sync_obj;
bo->sync_obj = NULL;
}
bo->sync_obj = driver->sync_obj_ref(sync_obj);
reservation_object_add_excl_fence(bo->resv, fence);
if (evict) {
ret = ttm_bo_wait(bo, false, false, false);
spin_unlock(&bdev->fence_lock);
if (tmp_obj)
driver->sync_obj_unref(&tmp_obj);
if (ret)
return ret;
......@@ -688,14 +671,13 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
*/
set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
spin_unlock(&bdev->fence_lock);
if (tmp_obj)
driver->sync_obj_unref(&tmp_obj);
ret = ttm_buffer_object_transfer(bo, &ghost_obj);
if (ret)
return ret;
reservation_object_add_excl_fence(ghost_obj->resv, fence);
/**
* If we're not moving to fixed memory, the TTM object
* needs to stay alive. Otherwhise hang it on the ghost
......
......@@ -45,10 +45,8 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
struct vm_area_struct *vma,
struct vm_fault *vmf)
{
struct ttm_bo_device *bdev = bo->bdev;
int ret = 0;
spin_lock(&bdev->fence_lock);
if (likely(!test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)))
goto out_unlock;
......@@ -82,7 +80,6 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
VM_FAULT_NOPAGE;
out_unlock:
spin_unlock(&bdev->fence_lock);
return ret;
}
......
......@@ -32,20 +32,12 @@
#include <linux/sched.h>
#include <linux/module.h>
static void ttm_eu_backoff_reservation_locked(struct list_head *list)
static void ttm_eu_backoff_reservation_reverse(struct list_head *list,
struct ttm_validate_buffer *entry)
{
struct ttm_validate_buffer *entry;
list_for_each_entry(entry, list, head) {
list_for_each_entry_continue_reverse(entry, list, head) {
struct ttm_buffer_object *bo = entry->bo;
if (!entry->reserved)
continue;
entry->reserved = false;
if (entry->removed) {
ttm_bo_add_to_lru(bo);
entry->removed = false;
}
__ttm_bo_unreserve(bo);
}
}
......@@ -56,27 +48,9 @@ static void ttm_eu_del_from_lru_locked(struct list_head *list)
list_for_each_entry(entry, list, head) {
struct ttm_buffer_object *bo = entry->bo;
if (!entry->reserved)
continue;
if (!entry->removed) {
entry->put_count = ttm_bo_del_from_lru(bo);
entry->removed = true;
}
}
}
static void ttm_eu_list_ref_sub(struct list_head *list)
{
struct ttm_validate_buffer *entry;
list_for_each_entry(entry, list, head) {
struct ttm_buffer_object *bo = entry->bo;
unsigned put_count = ttm_bo_del_from_lru(bo);
if (entry->put_count) {
ttm_bo_list_ref_sub(bo, entry->put_count, true);
entry->put_count = 0;
}
ttm_bo_list_ref_sub(bo, put_count, true);
}
}
......@@ -91,11 +65,18 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
entry = list_first_entry(list, struct ttm_validate_buffer, head);
glob = entry->bo->glob;
spin_lock(&glob->lru_lock);
ttm_eu_backoff_reservation_locked(list);
list_for_each_entry(entry, list, head) {
struct ttm_buffer_object *bo = entry->bo;
ttm_bo_add_to_lru(bo);
__ttm_bo_unreserve(bo);
}
spin_unlock(&glob->lru_lock);
if (ticket)
ww_acquire_fini(ticket);
spin_unlock(&glob->lru_lock);
}
EXPORT_SYMBOL(ttm_eu_backoff_reservation);
......@@ -112,7 +93,7 @@ EXPORT_SYMBOL(ttm_eu_backoff_reservation);
*/
int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
struct list_head *list)
struct list_head *list, bool intr)
{
struct ttm_bo_global *glob;
struct ttm_validate_buffer *entry;
......@@ -121,60 +102,55 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
if (list_empty(list))
return 0;
list_for_each_entry(entry, list, head) {
entry->reserved = false;
entry->put_count = 0;
entry->removed = false;
}
entry = list_first_entry(list, struct ttm_validate_buffer, head);
glob = entry->bo->glob;
if (ticket)
ww_acquire_init(ticket, &reservation_ww_class);
retry:
list_for_each_entry(entry, list, head) {
struct ttm_buffer_object *bo = entry->bo;
/* already slowpath reserved? */
if (entry->reserved)
ret = __ttm_bo_reserve(bo, intr, (ticket == NULL), true,
ticket);
if (!ret && unlikely(atomic_read(&bo->cpu_writers) > 0)) {
__ttm_bo_unreserve(bo);
ret = -EBUSY;
}
if (!ret)
continue;
ret = __ttm_bo_reserve(bo, true, (ticket == NULL), true,
ticket);
/* uh oh, we lost out, drop every reservation and try
* to only reserve this buffer, then start over if
* this succeeds.
*/
ttm_eu_backoff_reservation_reverse(list, entry);
if (ret == -EDEADLK) {
/* uh oh, we lost out, drop every reservation and try
* to only reserve this buffer, then start over if
* this succeeds.
*/
BUG_ON(ticket == NULL);
spin_lock(&glob->lru_lock);
ttm_eu_backoff_reservation_locked(list);
spin_unlock(&glob->lru_lock);
ttm_eu_list_ref_sub(list);
if (ret == -EDEADLK && intr) {
ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
ticket);
if (unlikely(ret != 0)) {
if (ret == -EINTR)
ret = -ERESTARTSYS;
goto err_fini;
}
} else if (ret == -EDEADLK) {
ww_mutex_lock_slow(&bo->resv->lock, ticket);
ret = 0;
}
entry->reserved = true;
if (unlikely(atomic_read(&bo->cpu_writers) > 0)) {
ret = -EBUSY;
goto err;
if (unlikely(ret != 0)) {
if (ret == -EINTR)
ret = -ERESTARTSYS;
if (ticket) {
ww_acquire_done(ticket);
ww_acquire_fini(ticket);
}
goto retry;
} else if (ret)
goto err;
entry->reserved = true;
if (unlikely(atomic_read(&bo->cpu_writers) > 0)) {
ret = -EBUSY;
goto err;
return ret;
}
/* move this item to the front of the list,
* forces correct iteration of the loop without keeping track
*/
list_del(&entry->head);
list_add(&entry->head, list);
}
if (ticket)
......@@ -182,25 +158,12 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
spin_lock(&glob->lru_lock);
ttm_eu_del_from_lru_locked(list);
spin_unlock(&glob->lru_lock);
ttm_eu_list_ref_sub(list);
return 0;
err:
spin_lock(&glob->lru_lock);
ttm_eu_backoff_reservation_locked(list);
spin_unlock(&glob->lru_lock);
ttm_eu_list_ref_sub(list);
err_fini:
if (ticket) {
ww_acquire_done(ticket);
ww_acquire_fini(ticket);
}
return ret;
}
EXPORT_SYMBOL(ttm_eu_reserve_buffers);
void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
struct list_head *list, void *sync_obj)
struct list_head *list, struct fence *fence)
{
struct ttm_validate_buffer *entry;
struct ttm_buffer_object *bo;
......@@ -217,24 +180,15 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
glob = bo->glob;
spin_lock(&glob->lru_lock);
spin_lock(&bdev->fence_lock);
list_for_each_entry(entry, list, head) {
bo = entry->bo;
entry->old_sync_obj = bo->sync_obj;
bo->sync_obj = driver->sync_obj_ref(sync_obj);
reservation_object_add_excl_fence(bo->resv, fence);
ttm_bo_add_to_lru(bo);
__ttm_bo_unreserve(bo);
entry->reserved = false;
}
spin_unlock(&bdev->fence_lock);
spin_unlock(&glob->lru_lock);
if (ticket)
ww_acquire_fini(ticket);
list_for_each_entry(entry, list, head) {
if (entry->old_sync_obj)
driver->sync_obj_unref(&entry->old_sync_obj);
}
}
EXPORT_SYMBOL(ttm_eu_fence_buffer_objects);
......@@ -801,44 +801,6 @@ static int vmw_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
return 0;
}
/**
* FIXME: We're using the old vmware polling method to sync.
* Do this with fences instead.
*/
static void *vmw_sync_obj_ref(void *sync_obj)
{
return (void *)
vmw_fence_obj_reference((struct vmw_fence_obj *) sync_obj);
}
static void vmw_sync_obj_unref(void **sync_obj)
{
vmw_fence_obj_unreference((struct vmw_fence_obj **) sync_obj);
}
static int vmw_sync_obj_flush(void *sync_obj)
{
vmw_fence_obj_flush((struct vmw_fence_obj *) sync_obj);
return 0;
}
static bool vmw_sync_obj_signaled(void *sync_obj)
{
return vmw_fence_obj_signaled((struct vmw_fence_obj *) sync_obj,
DRM_VMW_FENCE_FLAG_EXEC);
}
static int vmw_sync_obj_wait(void *sync_obj, bool lazy, bool interruptible)
{
return vmw_fence_obj_wait((struct vmw_fence_obj *) sync_obj,
DRM_VMW_FENCE_FLAG_EXEC,
lazy, interruptible,
VMW_FENCE_WAIT_TIMEOUT);
}
/**
* vmw_move_notify - TTM move_notify_callback
*
......@@ -863,11 +825,7 @@ static void vmw_move_notify(struct ttm_buffer_object *bo,
*/
static void vmw_swap_notify(struct ttm_buffer_object *bo)
{
struct ttm_bo_device *bdev = bo->bdev;
spin_lock(&bdev->fence_lock);
ttm_bo_wait(bo, false, false, false);
spin_unlock(&bdev->fence_lock);
}
......@@ -880,11 +838,6 @@ struct ttm_bo_driver vmw_bo_driver = {
.evict_flags = vmw_evict_flags,
.move = NULL,
.verify_access = vmw_verify_access,
.sync_obj_signaled = vmw_sync_obj_signaled,
.sync_obj_wait = vmw_sync_obj_wait,
.sync_obj_flush = vmw_sync_obj_flush,
.sync_obj_unref = vmw_sync_obj_unref,
.sync_obj_ref = vmw_sync_obj_ref,
.move_notify = vmw_move_notify,
.swap_notify = vmw_swap_notify,
.fault_reserve_notify = &vmw_ttm_fault_reserve_notify,
......
......@@ -342,7 +342,6 @@ struct vmw_sw_context{
uint32_t *cmd_bounce;
uint32_t cmd_bounce_size;
struct list_head resource_list;
uint32_t fence_flags;
struct ttm_buffer_object *cur_query_bo;
struct list_head res_relocations;
uint32_t *buf_start;
......@@ -704,6 +703,7 @@ extern void *vmw_fifo_reserve(struct vmw_private *dev_priv, uint32_t bytes);
extern void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes);
extern int vmw_fifo_send_fence(struct vmw_private *dev_priv,
uint32_t *seqno);
extern void vmw_fifo_ping_host_locked(struct vmw_private *, uint32_t reason);
extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason);
extern bool vmw_fifo_have_3d(struct vmw_private *dev_priv);
extern bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv);
......
......@@ -346,13 +346,10 @@ static int vmw_bo_to_validate_list(struct vmw_sw_context *sw_context,
++sw_context->cur_val_buf;
val_buf = &vval_buf->base;
val_buf->bo = ttm_bo_reference(bo);
val_buf->reserved = false;
list_add_tail(&val_buf->head, &sw_context->validate_nodes);
vval_buf->validate_as_mob = validate_as_mob;
}
sw_context->fence_flags |= DRM_VMW_FENCE_FLAG_EXEC;
if (p_val_node)
*p_val_node = val_node;
......@@ -2338,13 +2335,9 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv,
if (p_handle != NULL)
ret = vmw_user_fence_create(file_priv, dev_priv->fman,
sequence,
DRM_VMW_FENCE_FLAG_EXEC,
p_fence, p_handle);
sequence, p_fence, p_handle);
else
ret = vmw_fence_create(dev_priv->fman, sequence,
DRM_VMW_FENCE_FLAG_EXEC,
p_fence);
ret = vmw_fence_create(dev_priv->fman, sequence, p_fence);
if (unlikely(ret != 0 && !synced)) {
(void) vmw_fallback_wait(dev_priv, false, false,
......@@ -2396,7 +2389,7 @@ vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
BUG_ON(fence == NULL);
fence_rep.handle = fence_handle;
fence_rep.seqno = fence->seqno;
fence_rep.seqno = fence->base.seqno;
vmw_update_seqno(dev_priv, &dev_priv->fifo);
fence_rep.passed_seqno = dev_priv->last_read_seqno;
}
......@@ -2417,8 +2410,7 @@ vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
ttm_ref_object_base_unref(vmw_fp->tfile,
fence_handle, TTM_REF_USAGE);
DRM_ERROR("Fence copy error. Syncing.\n");
(void) vmw_fence_obj_wait(fence, fence->signal_mask,
false, false,
(void) vmw_fence_obj_wait(fence, false, false,
VMW_FENCE_WAIT_TIMEOUT);
}
}
......@@ -2470,7 +2462,6 @@ int vmw_execbuf_process(struct drm_file *file_priv,
sw_context->fp = vmw_fpriv(file_priv);
sw_context->cur_reloc = 0;
sw_context->cur_val_buf = 0;
sw_context->fence_flags = 0;
INIT_LIST_HEAD(&sw_context->resource_list);
sw_context->cur_query_bo = dev_priv->pinned_bo;
sw_context->last_query_ctx = NULL;
......@@ -2496,7 +2487,7 @@ int vmw_execbuf_process(struct drm_file *file_priv,
if (unlikely(ret != 0))
goto out_err_nores;
ret = ttm_eu_reserve_buffers(&ticket, &sw_context->validate_nodes);
ret = ttm_eu_reserve_buffers(&ticket, &sw_context->validate_nodes, true);
if (unlikely(ret != 0))
goto out_err;
......@@ -2684,10 +2675,7 @@ void __vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv,
query_val.bo = ttm_bo_reference(dev_priv->dummy_query_bo);
list_add_tail(&query_val.head, &validate_list);
do {
ret = ttm_eu_reserve_buffers(&ticket, &validate_list);
} while (ret == -ERESTARTSYS);
ret = ttm_eu_reserve_buffers(&ticket, &validate_list, false);
if (unlikely(ret != 0)) {
vmw_execbuf_unpin_panic(dev_priv);
goto out_no_reserve;
......
This diff is collapsed.
......@@ -27,6 +27,8 @@
#ifndef _VMWGFX_FENCE_H_
#include <linux/fence.h>
#define VMW_FENCE_WAIT_TIMEOUT (5*HZ)
struct vmw_private;
......@@ -50,16 +52,11 @@ struct vmw_fence_action {
};
struct vmw_fence_obj {
struct kref kref;
u32 seqno;
struct fence base;
struct vmw_fence_manager *fman;
struct list_head head;
uint32_t signaled;
uint32_t signal_mask;
struct list_head seq_passed_actions;
void (*destroy)(struct vmw_fence_obj *fence);
wait_queue_head_t queue;
};
extern struct vmw_fence_manager *
......@@ -67,17 +64,29 @@ vmw_fence_manager_init(struct vmw_private *dev_priv);
extern void vmw_fence_manager_takedown(struct vmw_fence_manager *fman);
extern void vmw_fence_obj_unreference(struct vmw_fence_obj **fence_p);
static inline void
vmw_fence_obj_unreference(struct vmw_fence_obj **fence_p)
{
struct vmw_fence_obj *fence = *fence_p;
*fence_p = NULL;
if (fence)
fence_put(&fence->base);
}
extern struct vmw_fence_obj *
vmw_fence_obj_reference(struct vmw_fence_obj *fence);
static inline struct vmw_fence_obj *
vmw_fence_obj_reference(struct vmw_fence_obj *fence)
{
if (fence)
fence_get(&fence->base);
return fence;
}
extern void vmw_fences_update(struct vmw_fence_manager *fman);
extern bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence,
uint32_t flags);
extern bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence);
extern int vmw_fence_obj_wait(struct vmw_fence_obj *fence, uint32_t flags,
extern int vmw_fence_obj_wait(struct vmw_fence_obj *fence,
bool lazy,
bool interruptible, unsigned long timeout);
......@@ -85,13 +94,11 @@ extern void vmw_fence_obj_flush(struct vmw_fence_obj *fence);
extern int vmw_fence_create(struct vmw_fence_manager *fman,
uint32_t seqno,
uint32_t mask,
struct vmw_fence_obj **p_fence);
extern int vmw_user_fence_create(struct drm_file *file_priv,
struct vmw_fence_manager *fman,
uint32_t sequence,
uint32_t mask,
struct vmw_fence_obj **p_fence,
uint32_t *p_handle);
......
......@@ -160,16 +160,21 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
return vmw_fifo_send_fence(dev_priv, &dummy);
}
void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason)
void vmw_fifo_ping_host_locked(struct vmw_private *dev_priv, uint32_t reason)
{
__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
mutex_lock(&dev_priv->hw_mutex);
if (unlikely(ioread32(fifo_mem + SVGA_FIFO_BUSY) == 0)) {
iowrite32(1, fifo_mem + SVGA_FIFO_BUSY);
vmw_write(dev_priv, SVGA_REG_SYNC, reason);
}
}
void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason)
{
mutex_lock(&dev_priv->hw_mutex);
vmw_fifo_ping_host_locked(dev_priv, reason);
mutex_unlock(&dev_priv->hw_mutex);
}
......
......@@ -567,13 +567,18 @@ static int vmw_user_dmabuf_synccpu_grab(struct vmw_user_dma_buffer *user_bo,
int ret;
if (flags & drm_vmw_synccpu_allow_cs) {
struct ttm_bo_device *bdev = bo->bdev;
bool nonblock = !!(flags & drm_vmw_synccpu_dontblock);
long lret;
spin_lock(&bdev->fence_lock);
ret = ttm_bo_wait(bo, false, true,
!!(flags & drm_vmw_synccpu_dontblock));
spin_unlock(&bdev->fence_lock);
return ret;
if (nonblock)
return reservation_object_test_signaled_rcu(bo->resv, true) ? 0 : -EBUSY;
lret = reservation_object_wait_timeout_rcu(bo->resv, true, true, MAX_SCHEDULE_TIMEOUT);
if (!lret)
return -EBUSY;
else if (lret < 0)
return lret;
return 0;
}
ret = ttm_bo_synccpu_write_grab
......@@ -1215,7 +1220,7 @@ vmw_resource_check_buffer(struct vmw_resource *res,
INIT_LIST_HEAD(&val_list);
val_buf->bo = ttm_bo_reference(&res->backup->base);
list_add_tail(&val_buf->head, &val_list);
ret = ttm_eu_reserve_buffers(NULL, &val_list);
ret = ttm_eu_reserve_buffers(NULL, &val_list, interruptible);
if (unlikely(ret != 0))
goto out_no_reserve;
......@@ -1419,25 +1424,16 @@ void vmw_fence_single_bo(struct ttm_buffer_object *bo,
struct vmw_fence_obj *fence)
{
struct ttm_bo_device *bdev = bo->bdev;
struct ttm_bo_driver *driver = bdev->driver;
struct vmw_fence_obj *old_fence_obj;
struct vmw_private *dev_priv =
container_of(bdev, struct vmw_private, bdev);
if (fence == NULL)
if (fence == NULL) {
vmw_execbuf_fence_commands(NULL, dev_priv, &fence, NULL);
else
driver->sync_obj_ref(fence);
spin_lock(&bdev->fence_lock);
old_fence_obj = bo->sync_obj;
bo->sync_obj = fence;
spin_unlock(&bdev->fence_lock);
if (old_fence_obj)
vmw_fence_obj_unreference(&old_fence_obj);
reservation_object_add_excl_fence(bo->resv, &fence->base);
fence_put(&fence->base);
} else
reservation_object_add_excl_fence(bo->resv, &fence->base);
}
/**
......@@ -1475,7 +1471,6 @@ void vmw_resource_move_notify(struct ttm_buffer_object *bo,
if (mem->mem_type != VMW_PL_MOB) {
struct vmw_resource *res, *n;
struct ttm_bo_device *bdev = bo->bdev;
struct ttm_validate_buffer val_buf;
val_buf.bo = bo;
......@@ -1491,9 +1486,7 @@ void vmw_resource_move_notify(struct ttm_buffer_object *bo,
list_del_init(&res->mob_head);
}
spin_lock(&bdev->fence_lock);
(void) ttm_bo_wait(bo, false, false, false);
spin_unlock(&bdev->fence_lock);
}
}
......
......@@ -173,7 +173,6 @@ struct ttm_tt;
* @lru: List head for the lru list.
* @ddestroy: List head for the delayed destroy list.
* @swap: List head for swap LRU list.
* @sync_obj: Pointer to a synchronization object.
* @priv_flags: Flags describing buffer object internal state.
* @vma_node: Address space manager node.
* @offset: The current GPU offset, which can have different meanings
......@@ -237,13 +236,9 @@ struct ttm_buffer_object {
struct list_head io_reserve_lru;
/**
* Members protected by struct buffer_object_device::fence_lock
* In addition, setting sync_obj to anything else
* than NULL requires bo::reserved to be held. This allows for
* checking NULL while reserved but not holding the mentioned lock.
* Members protected by a bo reservation.
*/
void *sync_obj;
unsigned long priv_flags;
struct drm_vma_offset_node vma_node;
......
......@@ -312,11 +312,6 @@ struct ttm_mem_type_manager {
* @move: Callback for a driver to hook in accelerated functions to
* move a buffer.
* If set to NULL, a potentially slow memcpy() move is used.
* @sync_obj_signaled: See ttm_fence_api.h
* @sync_obj_wait: See ttm_fence_api.h
* @sync_obj_flush: See ttm_fence_api.h
* @sync_obj_unref: See ttm_fence_api.h
* @sync_obj_ref: See ttm_fence_api.h
*/
struct ttm_bo_driver {
......@@ -418,23 +413,6 @@ struct ttm_bo_driver {
int (*verify_access) (struct ttm_buffer_object *bo,
struct file *filp);
/**
* In case a driver writer dislikes the TTM fence objects,
* the driver writer can replace those with sync objects of
* his / her own. If it turns out that no driver writer is
* using these. I suggest we remove these hooks and plug in
* fences directly. The bo driver needs the following functionality:
* See the corresponding functions in the fence object API
* documentation.
*/
bool (*sync_obj_signaled) (void *sync_obj);
int (*sync_obj_wait) (void *sync_obj,
bool lazy, bool interruptible);
int (*sync_obj_flush) (void *sync_obj);
void (*sync_obj_unref) (void **sync_obj);
void *(*sync_obj_ref) (void *sync_obj);
/* hook to notify driver about a driver move so it
* can do tiling things */
void (*move_notify)(struct ttm_buffer_object *bo,
......@@ -521,8 +499,6 @@ struct ttm_bo_global {
*
* @driver: Pointer to a struct ttm_bo_driver struct setup by the driver.
* @man: An array of mem_type_managers.
* @fence_lock: Protects the synchronizing members on *all* bos belonging
* to this device.
* @vma_manager: Address space manager
* lru_lock: Spinlock that protects the buffer+device lru lists and
* ddestroy lists.
......@@ -542,7 +518,6 @@ struct ttm_bo_device {
struct ttm_bo_global *glob;
struct ttm_bo_driver *driver;
struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
spinlock_t fence_lock;
/*
* Protected by internal locks.
......@@ -1025,7 +1000,7 @@ extern void ttm_bo_free_old_node(struct ttm_buffer_object *bo);
* ttm_bo_move_accel_cleanup.
*
* @bo: A pointer to a struct ttm_buffer_object.
* @sync_obj: A sync object that signals when moving is complete.
* @fence: A fence object that signals when moving is complete.
* @evict: This is an evict move. Don't return until the buffer is idle.
* @no_wait_gpu: Return immediately if the GPU is busy.
* @new_mem: struct ttm_mem_reg indicating where to move.
......@@ -1039,7 +1014,7 @@ extern void ttm_bo_free_old_node(struct ttm_buffer_object *bo);
*/
extern int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
void *sync_obj,
struct fence *fence,
bool evict, bool no_wait_gpu,
struct ttm_mem_reg *new_mem);
/**
......
......@@ -39,19 +39,11 @@
*
* @head: list head for thread-private list.
* @bo: refcounted buffer object pointer.
* @reserved: Indicates whether @bo has been reserved for validation.
* @removed: Indicates whether @bo has been removed from lru lists.
* @put_count: Number of outstanding references on bo::list_kref.
* @old_sync_obj: Pointer to a sync object about to be unreferenced
*/
struct ttm_validate_buffer {
struct list_head head;
struct ttm_buffer_object *bo;
bool reserved;
bool removed;
int put_count;
void *old_sync_obj;
};
/**
......@@ -73,6 +65,7 @@ extern void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
* @ticket: [out] ww_acquire_ctx filled in by call, or NULL if only
* non-blocking reserves should be tried.
* @list: thread private list of ttm_validate_buffer structs.
* @intr: should the wait be interruptible
*
* Tries to reserve bos pointed to by the list entries for validation.
* If the function returns 0, all buffers are marked as "unfenced",
......@@ -84,9 +77,9 @@ extern void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
* CPU write reservations to be cleared, and for other threads to
* unreserve their buffers.
*
* This function may return -ERESTART or -EAGAIN if the calling process
* receives a signal while waiting. In that case, no buffers on the list
* will be reserved upon return.
* If intr is set to true, this function may return -ERESTARTSYS if the
* calling process receives a signal while waiting. In that case, no
* buffers on the list will be reserved upon return.
*
* Buffers reserved by this function should be unreserved by
* a call to either ttm_eu_backoff_reservation() or
......@@ -95,14 +88,14 @@ extern void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
*/
extern int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
struct list_head *list);
struct list_head *list, bool intr);
/**
* function ttm_eu_fence_buffer_objects.
*
* @ticket: ww_acquire_ctx from reserve call
* @list: thread private list of ttm_validate_buffer structs.
* @sync_obj: The new sync object for the buffers.
* @fence: The new exclusive fence for the buffers.
*
* This function should be called when command submission is complete, and
* it will add a new sync object to bos pointed to by entries on @list.
......@@ -111,6 +104,7 @@ extern int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
*/
extern void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
struct list_head *list, void *sync_obj);
struct list_head *list,
struct fence *fence);
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment