Commit 21c16bf6 authored by Christian König's avatar Christian König Committed by Alex Deucher

drm/amdgpu: add user fence context map v2

This is a prerequisite for the GPU scheduler to make the order
of submission independent from the order of execution.

v2: properly implement the locking
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarJammy Zhou <Jammy.Zhou@amd.com>
Reviewed-by: default avatarChunming Zhou <david1.zhou@amd.com>
parent 91e1a520
...@@ -415,6 +415,8 @@ struct amdgpu_user_fence { ...@@ -415,6 +415,8 @@ struct amdgpu_user_fence {
struct amdgpu_bo *bo; struct amdgpu_bo *bo;
/* write-back address offset to bo start */ /* write-back address offset to bo start */
uint32_t offset; uint32_t offset;
/* resulting sequence number */
uint64_t sequence;
}; };
int amdgpu_fence_driver_init(struct amdgpu_device *adev); int amdgpu_fence_driver_init(struct amdgpu_device *adev);
...@@ -985,9 +987,18 @@ struct amdgpu_vm_manager { ...@@ -985,9 +987,18 @@ struct amdgpu_vm_manager {
* context related structures * context related structures
*/ */
#define AMDGPU_CTX_MAX_CS_PENDING 16
struct amdgpu_ctx_ring {
uint64_t sequence;
struct fence *fences[AMDGPU_CTX_MAX_CS_PENDING];
};
struct amdgpu_ctx { struct amdgpu_ctx {
struct kref refcount; struct kref refcount;
unsigned reset_counter; unsigned reset_counter;
spinlock_t ring_lock;
struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
}; };
struct amdgpu_ctx_mgr { struct amdgpu_ctx_mgr {
...@@ -1007,6 +1018,11 @@ void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv); ...@@ -1007,6 +1018,11 @@ void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv);
struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
int amdgpu_ctx_put(struct amdgpu_ctx *ctx); int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
struct fence *fence);
struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
struct amdgpu_ring *ring, uint64_t seq);
int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp); struct drm_file *filp);
......
...@@ -698,9 +698,9 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, ...@@ -698,9 +698,9 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
sizeof(struct drm_amdgpu_cs_chunk_dep); sizeof(struct drm_amdgpu_cs_chunk_dep);
for (j = 0; j < num_deps; ++j) { for (j = 0; j < num_deps; ++j) {
struct amdgpu_fence *fence;
struct amdgpu_ring *ring; struct amdgpu_ring *ring;
struct amdgpu_ctx *ctx; struct amdgpu_ctx *ctx;
struct fence *fence;
r = amdgpu_cs_get_ring(adev, deps[j].ip_type, r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
deps[j].ip_instance, deps[j].ip_instance,
...@@ -712,20 +712,20 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, ...@@ -712,20 +712,20 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
if (ctx == NULL) if (ctx == NULL)
return -EINVAL; return -EINVAL;
r = amdgpu_fence_recreate(ring, p->filp, fence = amdgpu_ctx_get_fence(ctx, ring,
deps[j].handle, deps[j].handle);
&fence); if (IS_ERR(fence)) {
if (r) { r = PTR_ERR(fence);
amdgpu_ctx_put(ctx); amdgpu_ctx_put(ctx);
return r; return r;
}
r = amdgpu_sync_fence(adev, &ib->sync, &fence->base);
amdgpu_fence_unref(&fence);
amdgpu_ctx_put(ctx);
if (r) } else if (fence) {
return r; r = amdgpu_sync_fence(adev, &ib->sync, fence);
fence_put(fence);
amdgpu_ctx_put(ctx);
if (r)
return r;
}
} }
} }
...@@ -773,8 +773,11 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ...@@ -773,8 +773,11 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
r = amdgpu_cs_ib_fill(adev, &parser); r = amdgpu_cs_ib_fill(adev, &parser);
} }
if (!r) if (!r) {
r = amdgpu_cs_dependencies(adev, &parser); r = amdgpu_cs_dependencies(adev, &parser);
if (r)
DRM_ERROR("Failed in the dependencies handling %d!\n", r);
}
if (r) { if (r) {
amdgpu_cs_parser_fini(&parser, r, reserved_buffers); amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
...@@ -791,7 +794,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ...@@ -791,7 +794,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
goto out; goto out;
} }
cs->out.handle = parser.ibs[parser.num_ibs - 1].fence->seq; cs->out.handle = parser.uf.sequence;
out: out:
amdgpu_cs_parser_fini(&parser, r, true); amdgpu_cs_parser_fini(&parser, r, true);
up_read(&adev->exclusive_lock); up_read(&adev->exclusive_lock);
...@@ -814,30 +817,31 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, ...@@ -814,30 +817,31 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
union drm_amdgpu_wait_cs *wait = data; union drm_amdgpu_wait_cs *wait = data;
struct amdgpu_device *adev = dev->dev_private; struct amdgpu_device *adev = dev->dev_private;
unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
struct amdgpu_fence *fence = NULL;
struct amdgpu_ring *ring = NULL; struct amdgpu_ring *ring = NULL;
struct amdgpu_ctx *ctx; struct amdgpu_ctx *ctx;
struct fence *fence;
long r; long r;
r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
wait->in.ring, &ring);
if (r)
return r;
ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
if (ctx == NULL) if (ctx == NULL)
return -EINVAL; return -EINVAL;
r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
wait->in.ring, &ring); if (IS_ERR(fence))
if (r) { r = PTR_ERR(fence);
amdgpu_ctx_put(ctx);
return r;
}
r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence); else if (fence) {
if (r) { r = fence_wait_timeout(fence, true, timeout);
amdgpu_ctx_put(ctx); fence_put(fence);
return r;
} } else
r = 1;
r = fence_wait_timeout(&fence->base, true, timeout);
amdgpu_fence_unref(&fence);
amdgpu_ctx_put(ctx); amdgpu_ctx_put(ctx);
if (r < 0) if (r < 0)
return r; return r;
......
...@@ -28,17 +28,22 @@ ...@@ -28,17 +28,22 @@
static void amdgpu_ctx_do_release(struct kref *ref) static void amdgpu_ctx_do_release(struct kref *ref)
{ {
struct amdgpu_ctx *ctx; struct amdgpu_ctx *ctx;
unsigned i, j;
ctx = container_of(ref, struct amdgpu_ctx, refcount); ctx = container_of(ref, struct amdgpu_ctx, refcount);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
for (j = 0; j < AMDGPU_CTX_MAX_CS_PENDING; ++j)
fence_put(ctx->rings[i].fences[j]);
kfree(ctx); kfree(ctx);
} }
int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv,
uint32_t *id) uint32_t *id)
{ {
int r;
struct amdgpu_ctx *ctx; struct amdgpu_ctx *ctx;
struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
int i, r;
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx) if (!ctx)
...@@ -55,6 +60,9 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, ...@@ -55,6 +60,9 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv,
memset(ctx, 0, sizeof(*ctx)); memset(ctx, 0, sizeof(*ctx));
kref_init(&ctx->refcount); kref_init(&ctx->refcount);
spin_lock_init(&ctx->ring_lock);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
ctx->rings[i].sequence = 1;
mutex_unlock(&mgr->lock); mutex_unlock(&mgr->lock);
return 0; return 0;
...@@ -177,3 +185,53 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) ...@@ -177,3 +185,53 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
kref_put(&ctx->refcount, amdgpu_ctx_do_release); kref_put(&ctx->refcount, amdgpu_ctx_do_release);
return 0; return 0;
} }
uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
struct fence *fence)
{
struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
uint64_t seq = cring->sequence;
unsigned idx = seq % AMDGPU_CTX_MAX_CS_PENDING;
struct fence *other = cring->fences[idx];
if (other) {
signed long r;
r = fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
if (r < 0)
DRM_ERROR("Error (%ld) waiting for fence!\n", r);
}
fence_get(fence);
spin_lock(&ctx->ring_lock);
cring->fences[idx] = fence;
cring->sequence++;
spin_unlock(&ctx->ring_lock);
fence_put(other);
return seq;
}
struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
struct amdgpu_ring *ring, uint64_t seq)
{
struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
struct fence *fence;
spin_lock(&ctx->ring_lock);
if (seq >= cring->sequence) {
spin_unlock(&ctx->ring_lock);
return ERR_PTR(-EINVAL);
}
if (seq < cring->sequence - AMDGPU_CTX_MAX_CS_PENDING) {
spin_unlock(&ctx->ring_lock);
return NULL;
}
fence = fence_get(cring->fences[seq % AMDGPU_CTX_MAX_CS_PENDING]);
spin_unlock(&ctx->ring_lock);
return fence;
}
...@@ -219,8 +219,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, ...@@ -219,8 +219,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
/* wrap the last IB with fence */ /* wrap the last IB with fence */
if (ib->user) { if (ib->user) {
uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo); uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo);
ib->user->sequence = amdgpu_ctx_add_fence(ib->ctx, ring,
&ib->fence->base);
addr += ib->user->offset; addr += ib->user->offset;
amdgpu_ring_emit_fence(ring, addr, ib->fence->seq, amdgpu_ring_emit_fence(ring, addr, ib->user->sequence,
AMDGPU_FENCE_FLAG_64BIT); AMDGPU_FENCE_FLAG_64BIT);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment