Commit 43f1214a authored by Alex Deucher's avatar Alex Deucher

drm/radeon: use IBs for VM page table updates v2

For very large page table updates, we can exceed the
size of the ring.  To avoid this, use an IB to perform
the page table update.

v2(ck): cleanup the IB infrastructure and the use it instead
        of filling the struct ourself.
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
parent 24178ec4
......@@ -1946,19 +1946,21 @@ uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags)
* cayman_vm_set_page - update the page tables using the CP
*
* @rdev: radeon_device pointer
* @ib: indirect buffer to fill with commands
* @pe: addr of the page entry
* @addr: dst addr to write into pe
* @count: number of page entries to update
* @incr: increase next addr by incr bytes
* @flags: access flags
*
* Update the page tables using the CP (cayman-si).
* Update the page tables using the CP (cayman/TN).
*/
void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
void cayman_vm_set_page(struct radeon_device *rdev,
struct radeon_ib *ib,
uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags)
{
struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
uint64_t value;
unsigned ndw;
......@@ -1969,9 +1971,9 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
if (ndw > 0x3FFF)
ndw = 0x3FFF;
radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw));
radeon_ring_write(ring, pe);
radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
ib->ptr[ib->length_dw++] = PACKET3(PACKET3_ME_WRITE, ndw);
ib->ptr[ib->length_dw++] = pe;
ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
for (; ndw > 1; ndw -= 2, --count, pe += 8) {
if (flags & RADEON_VM_PAGE_SYSTEM) {
value = radeon_vm_map_gart(rdev, addr);
......@@ -1983,8 +1985,8 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
}
addr += incr;
value |= r600_flags;
radeon_ring_write(ring, value);
radeon_ring_write(ring, upper_32_bits(value));
ib->ptr[ib->length_dw++] = value;
ib->ptr[ib->length_dw++] = upper_32_bits(value);
}
}
} else {
......@@ -1994,9 +1996,9 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
ndw = 0xFFFFE;
/* for non-physically contiguous pages (system) */
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw));
radeon_ring_write(ring, pe);
radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
ib->ptr[ib->length_dw++] = pe;
ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
for (; ndw > 0; ndw -= 2, --count, pe += 8) {
if (flags & RADEON_VM_PAGE_SYSTEM) {
value = radeon_vm_map_gart(rdev, addr);
......@@ -2008,10 +2010,12 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
}
addr += incr;
value |= r600_flags;
radeon_ring_write(ring, value);
radeon_ring_write(ring, upper_32_bits(value));
ib->ptr[ib->length_dw++] = value;
ib->ptr[ib->length_dw++] = upper_32_bits(value);
}
}
while (ib->length_dw & 0x7)
ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
}
}
......
......@@ -780,6 +780,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
struct radeon_ib *ib, struct radeon_vm *vm,
unsigned size);
void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib);
void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence);
int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
struct radeon_ib *const_ib);
int radeon_ib_pool_init(struct radeon_device *rdev);
......@@ -1188,7 +1189,9 @@ struct radeon_asic {
void (*fini)(struct radeon_device *rdev);
u32 pt_ring_index;
void (*set_page)(struct radeon_device *rdev, uint64_t pe,
void (*set_page)(struct radeon_device *rdev,
struct radeon_ib *ib,
uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags);
} vm;
......@@ -1810,7 +1813,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
#define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart.set_page((rdev), (i), (p))
#define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev))
#define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev))
#define radeon_asic_vm_set_page(rdev, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (pe), (addr), (count), (incr), (flags)))
#define radeon_asic_vm_set_page(rdev, ib, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (ib), (pe), (addr), (count), (incr), (flags)))
#define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp))
#define radeon_ring_test(rdev, r, cp) (rdev)->asic->ring[(r)].ring_test((rdev), (cp))
#define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)].ib_test((rdev), (cp))
......
......@@ -474,7 +474,9 @@ int cayman_vm_init(struct radeon_device *rdev);
void cayman_vm_fini(struct radeon_device *rdev);
void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags);
void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
void cayman_vm_set_page(struct radeon_device *rdev,
struct radeon_ib *ib,
uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags);
int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
......@@ -506,7 +508,9 @@ int si_irq_set(struct radeon_device *rdev);
int si_irq_process(struct radeon_device *rdev);
int si_vm_init(struct radeon_device *rdev);
void si_vm_fini(struct radeon_device *rdev);
void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
void si_vm_set_page(struct radeon_device *rdev,
struct radeon_ib *ib,
uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags);
void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
......
......@@ -125,18 +125,6 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
return 0;
}
static void radeon_cs_sync_to(struct radeon_cs_parser *p,
struct radeon_fence *fence)
{
struct radeon_fence *other;
if (!fence)
return;
other = p->ib.sync_to[fence->ring];
p->ib.sync_to[fence->ring] = radeon_fence_later(fence, other);
}
static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
{
int i;
......@@ -145,7 +133,7 @@ static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
if (!p->relocs[i].robj)
continue;
radeon_cs_sync_to(p, p->relocs[i].robj->tbo.sync_obj);
radeon_ib_sync_to(&p->ib, p->relocs[i].robj->tbo.sync_obj);
}
}
......@@ -472,8 +460,9 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
goto out;
}
radeon_cs_sync_rings(parser);
radeon_cs_sync_to(parser, vm->fence);
radeon_cs_sync_to(parser, radeon_vm_grab_id(rdev, vm, parser->ring));
radeon_ib_sync_to(&parser->ib, vm->fence);
radeon_ib_sync_to(&parser->ib, radeon_vm_grab_id(
rdev, vm, parser->ring));
if ((rdev->family >= CHIP_TAHITI) &&
(parser->chunk_const_ib_idx != -1)) {
......
......@@ -929,6 +929,7 @@ uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr)
*/
static int radeon_vm_update_pdes(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_ib *ib,
uint64_t start, uint64_t end)
{
static const uint32_t incr = RADEON_VM_PTE_COUNT * 8;
......@@ -971,7 +972,7 @@ static int radeon_vm_update_pdes(struct radeon_device *rdev,
((last_pt + incr * count) != pt)) {
if (count) {
radeon_asic_vm_set_page(rdev, last_pde,
radeon_asic_vm_set_page(rdev, ib, last_pde,
last_pt, count, incr,
RADEON_VM_PAGE_VALID);
}
......@@ -985,7 +986,7 @@ static int radeon_vm_update_pdes(struct radeon_device *rdev,
}
if (count) {
radeon_asic_vm_set_page(rdev, last_pde, last_pt, count,
radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count,
incr, RADEON_VM_PAGE_VALID);
}
......@@ -1009,6 +1010,7 @@ static int radeon_vm_update_pdes(struct radeon_device *rdev,
*/
static void radeon_vm_update_ptes(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_ib *ib,
uint64_t start, uint64_t end,
uint64_t dst, uint32_t flags)
{
......@@ -1038,7 +1040,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
if ((last_pte + 8 * count) != pte) {
if (count) {
radeon_asic_vm_set_page(rdev, last_pte,
radeon_asic_vm_set_page(rdev, ib, last_pte,
last_dst, count,
RADEON_GPU_PAGE_SIZE,
flags);
......@@ -1056,7 +1058,8 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
}
if (count) {
radeon_asic_vm_set_page(rdev, last_pte, last_dst, count,
radeon_asic_vm_set_page(rdev, ib, last_pte,
last_dst, count,
RADEON_GPU_PAGE_SIZE, flags);
}
}
......@@ -1080,8 +1083,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
struct ttm_mem_reg *mem)
{
unsigned ridx = rdev->asic->vm.pt_ring_index;
struct radeon_ring *ring = &rdev->ring[ridx];
struct radeon_semaphore *sem = NULL;
struct radeon_ib ib;
struct radeon_bo_va *bo_va;
unsigned nptes, npdes, ndw;
uint64_t addr;
......@@ -1124,25 +1126,13 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
bo_va->valid = false;
}
if (vm->fence && radeon_fence_signaled(vm->fence)) {
radeon_fence_unref(&vm->fence);
}
if (vm->fence && vm->fence->ring != ridx) {
r = radeon_semaphore_create(rdev, &sem);
if (r) {
return r;
}
}
nptes = radeon_bo_ngpu_pages(bo);
/* assume two extra pdes in case the mapping overlaps the borders */
npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2;
/* estimate number of dw needed */
/* semaphore, fence and padding */
ndw = 32;
/* padding, etc. */
ndw = 64;
if (RADEON_VM_BLOCK_SIZE > 11)
/* reserve space for one header for every 2k dwords */
......@@ -1161,33 +1151,31 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
/* reserve space for pde addresses */
ndw += npdes * 2;
r = radeon_ring_lock(rdev, ring, ndw);
if (r) {
return r;
}
/* update too big for an IB */
if (ndw > 0xfffff)
return -ENOMEM;
if (sem && radeon_fence_need_sync(vm->fence, ridx)) {
radeon_semaphore_sync_rings(rdev, sem, vm->fence->ring, ridx);
radeon_fence_note_sync(vm->fence, ridx);
}
r = radeon_ib_get(rdev, ridx, &ib, NULL, ndw * 4);
ib.length_dw = 0;
r = radeon_vm_update_pdes(rdev, vm, bo_va->soffset, bo_va->eoffset);
r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset);
if (r) {
radeon_ring_unlock_undo(rdev, ring);
radeon_ib_free(rdev, &ib);
return r;
}
radeon_vm_update_ptes(rdev, vm, bo_va->soffset, bo_va->eoffset,
radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset,
addr, bo_va->flags);
radeon_fence_unref(&vm->fence);
r = radeon_fence_emit(rdev, &vm->fence, ridx);
radeon_ib_sync_to(&ib, vm->fence);
r = radeon_ib_schedule(rdev, &ib, NULL);
if (r) {
radeon_ring_unlock_undo(rdev, ring);
radeon_ib_free(rdev, &ib);
return r;
}
radeon_ring_unlock_commit(rdev, ring);
radeon_semaphore_free(rdev, &sem, vm->fence);
radeon_fence_unref(&vm->fence);
vm->fence = radeon_fence_ref(ib.fence);
radeon_ib_free(rdev, &ib);
radeon_fence_unref(&vm->last_flush);
return 0;
......
......@@ -108,6 +108,25 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
radeon_fence_unref(&ib->fence);
}
/**
* radeon_ib_sync_to - sync to fence before executing the IB
*
* @ib: IB object to add fence to
* @fence: fence to sync to
*
* Sync to the fence before executing the IB
*/
void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence)
{
struct radeon_fence *other;
if (!fence)
return;
other = ib->sync_to[fence->ring];
ib->sync_to[fence->ring] = radeon_fence_later(fence, other);
}
/**
* radeon_ib_schedule - schedule an IB (Indirect Buffer) on the ring
*
......
......@@ -3043,19 +3043,21 @@ void si_vm_fini(struct radeon_device *rdev)
* si_vm_set_page - update the page tables using the CP
*
* @rdev: radeon_device pointer
* @ib: indirect buffer to fill with commands
* @pe: addr of the page entry
* @addr: dst addr to write into pe
* @count: number of page entries to update
* @incr: increase next addr by incr bytes
* @flags: access flags
*
* Update the page tables using the CP (cayman-si).
* Update the page tables using the CP (SI).
*/
void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
void si_vm_set_page(struct radeon_device *rdev,
struct radeon_ib *ib,
uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags)
{
struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
uint64_t value;
unsigned ndw;
......@@ -3066,11 +3068,11 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
if (ndw > 0x3FFE)
ndw = 0x3FFE;
radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw));
radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
WRITE_DATA_DST_SEL(1)));
radeon_ring_write(ring, pe);
radeon_ring_write(ring, upper_32_bits(pe));
ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
WRITE_DATA_DST_SEL(1));
ib->ptr[ib->length_dw++] = pe;
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
for (; ndw > 2; ndw -= 2, --count, pe += 8) {
if (flags & RADEON_VM_PAGE_SYSTEM) {
value = radeon_vm_map_gart(rdev, addr);
......@@ -3082,8 +3084,8 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
}
addr += incr;
value |= r600_flags;
radeon_ring_write(ring, value);
radeon_ring_write(ring, upper_32_bits(value));
ib->ptr[ib->length_dw++] = value;
ib->ptr[ib->length_dw++] = upper_32_bits(value);
}
}
} else {
......@@ -3095,9 +3097,9 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
ndw = 0xFFFFE;
/* for non-physically contiguous pages (system) */
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw));
radeon_ring_write(ring, pe);
radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
ib->ptr[ib->length_dw++] = pe;
ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
for (; ndw > 0; ndw -= 2, --count, pe += 8) {
if (flags & RADEON_VM_PAGE_SYSTEM) {
value = radeon_vm_map_gart(rdev, addr);
......@@ -3109,8 +3111,8 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
}
addr += incr;
value |= r600_flags;
radeon_ring_write(ring, value);
radeon_ring_write(ring, upper_32_bits(value));
ib->ptr[ib->length_dw++] = value;
ib->ptr[ib->length_dw++] = upper_32_bits(value);
}
}
} else {
......@@ -3124,20 +3126,22 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
else
value = 0;
/* for physically contiguous pages (vram) */
radeon_ring_write(ring, DMA_PTE_PDE_PACKET(ndw));
radeon_ring_write(ring, pe); /* dst addr */
radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
radeon_ring_write(ring, r600_flags); /* mask */
radeon_ring_write(ring, 0);
radeon_ring_write(ring, value); /* value */
radeon_ring_write(ring, upper_32_bits(value));
radeon_ring_write(ring, incr); /* increment size */
radeon_ring_write(ring, 0);
ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
ib->ptr[ib->length_dw++] = pe; /* dst addr */
ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
ib->ptr[ib->length_dw++] = r600_flags; /* mask */
ib->ptr[ib->length_dw++] = 0;
ib->ptr[ib->length_dw++] = value; /* value */
ib->ptr[ib->length_dw++] = upper_32_bits(value);
ib->ptr[ib->length_dw++] = incr; /* increment size */
ib->ptr[ib->length_dw++] = 0;
pe += ndw * 4;
addr += (ndw / 2) * incr;
count -= ndw / 2;
}
}
while (ib->length_dw & 0x7)
ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment