Commit 7c42bc1a authored by Christian König's avatar Christian König Committed by Alex Deucher

drm/radeon: use one VMID for each ring

Use multiple VMIDs for each VM, one for each ring. That allows
us to execute flushes separately on each ring, still not ideal
cause in a lot of cases rings can share IDs.
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent ad1a58a4
......@@ -4066,6 +4066,7 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
{
struct radeon_ring *ring = &rdev->ring[ib->ring];
unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
u32 header, control = INDIRECT_BUFFER_VALID;
if (ib->is_const_ib) {
......@@ -4094,8 +4095,7 @@ void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
}
control |= ib->length_dw |
(ib->vm ? (ib->vm->id << 24) : 0);
control |= ib->length_dw | (vm_id << 24);
radeon_ring_write(ring, header);
radeon_ring_write(ring,
......
......@@ -134,7 +134,7 @@ void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
struct radeon_ib *ib)
{
struct radeon_ring *ring = &rdev->ring[ib->ring];
u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
u32 extra_bits = (ib->vm ? ib->vm->ids[ib->ring].id : 0) & 0xf;
if (rdev->wb.enabled) {
u32 next_rptr = ring->wptr + 5;
......
......@@ -1373,6 +1373,7 @@ void cayman_fence_ring_emit(struct radeon_device *rdev,
void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
{
struct radeon_ring *ring = &rdev->ring[ib->ring];
unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA |
PACKET3_SH_ACTION_ENA;
......@@ -1395,15 +1396,14 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
#endif
(ib->gpu_addr & 0xFFFFFFFC));
radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
radeon_ring_write(ring, ib->length_dw |
(ib->vm ? (ib->vm->id << 24) : 0));
radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
/* flush read cache over gart for this vmid */
radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl);
radeon_ring_write(ring, 0xFFFFFFFF);
radeon_ring_write(ring, 0);
radeon_ring_write(ring, ((ib->vm ? ib->vm->id : 0) << 24) | 10); /* poll interval */
radeon_ring_write(ring, (vm_id << 24) | 10); /* poll interval */
}
static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
......
......@@ -123,6 +123,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
struct radeon_ib *ib)
{
struct radeon_ring *ring = &rdev->ring[ib->ring];
unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
if (rdev->wb.enabled) {
u32 next_rptr = ring->wptr + 4;
......@@ -140,7 +141,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
*/
while ((ring->wptr & 7) != 5)
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0));
radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
......
......@@ -905,9 +905,17 @@ struct radeon_vm_pt {
uint64_t addr;
};
struct radeon_vm_id {
unsigned id;
uint64_t pd_gpu_addr;
/* last flushed PD/PT update */
struct radeon_fence *flushed_updates;
/* last use of vmid */
struct radeon_fence *last_id_use;
};
struct radeon_vm {
struct rb_root va;
unsigned id;
/* BOs moved, but not yet updated in the PT */
struct list_head invalidated;
......@@ -917,7 +925,6 @@ struct radeon_vm {
/* contains the page directory */
struct radeon_bo *page_directory;
uint64_t pd_gpu_addr;
unsigned max_pde_used;
/* array of page tables, one for each page directory entry */
......@@ -928,10 +935,9 @@ struct radeon_vm {
struct mutex mutex;
/* last fence for cs using this vm */
struct radeon_fence *fence;
/* last flushed PD/PT update */
struct radeon_fence *flushed_updates;
/* last use of vmid */
struct radeon_fence *last_id_use;
/* for id and flush management per ring */
struct radeon_vm_id ids[RADEON_NUM_RINGS];
};
struct radeon_vm_manager {
......
......@@ -182,15 +182,18 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
struct radeon_vm *vm, int ring)
{
struct radeon_fence *best[RADEON_NUM_RINGS] = {};
struct radeon_vm_id *vm_id = &vm->ids[ring];
unsigned choices[2] = {};
unsigned i;
/* check if the id is still valid */
if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id])
if (vm_id->id && vm_id->last_id_use &&
vm_id->last_id_use == rdev->vm_manager.active[vm_id->id])
return NULL;
/* we definately need to flush */
vm->pd_gpu_addr = ~0ll;
vm_id->pd_gpu_addr = ~0ll;
/* skip over VMID 0, since it is the system VM */
for (i = 1; i < rdev->vm_manager.nvm; ++i) {
......@@ -198,8 +201,8 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
if (fence == NULL) {
/* found a free one */
vm->id = i;
trace_radeon_vm_grab_id(vm->id, ring);
vm_id->id = i;
trace_radeon_vm_grab_id(i, ring);
return NULL;
}
......@@ -211,8 +214,8 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
for (i = 0; i < 2; ++i) {
if (choices[i]) {
vm->id = choices[i];
trace_radeon_vm_grab_id(vm->id, ring);
vm_id->id = choices[i];
trace_radeon_vm_grab_id(choices[i], ring);
return rdev->vm_manager.active[choices[i]];
}
}
......@@ -239,16 +242,18 @@ void radeon_vm_flush(struct radeon_device *rdev,
int ring, struct radeon_fence *updates)
{
uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory);
struct radeon_vm_id *vm_id = &vm->ids[ring];
if (pd_addr != vm->pd_gpu_addr || !vm->flushed_updates ||
radeon_fence_is_earlier(vm->flushed_updates, updates)) {
if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates ||
radeon_fence_is_earlier(vm_id->flushed_updates, updates)) {
trace_radeon_vm_flush(pd_addr, ring, vm->id);
radeon_fence_unref(&vm->flushed_updates);
vm->flushed_updates = radeon_fence_ref(updates);
vm->pd_gpu_addr = pd_addr;
trace_radeon_vm_flush(pd_addr, ring, vm->ids[ring].id);
radeon_fence_unref(&vm_id->flushed_updates);
vm_id->flushed_updates = radeon_fence_ref(updates);
vm_id->pd_gpu_addr = pd_addr;
radeon_ring_vm_flush(rdev, &rdev->ring[ring],
vm->id, vm->pd_gpu_addr);
vm_id->id, vm_id->pd_gpu_addr);
}
}
......@@ -268,14 +273,16 @@ void radeon_vm_fence(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_fence *fence)
{
unsigned vm_id = vm->ids[fence->ring].id;
radeon_fence_unref(&vm->fence);
vm->fence = radeon_fence_ref(fence);
radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
radeon_fence_unref(&rdev->vm_manager.active[vm_id]);
rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence);
radeon_fence_unref(&vm->last_id_use);
vm->last_id_use = radeon_fence_ref(fence);
radeon_fence_unref(&vm->ids[fence->ring].last_id_use);
vm->ids[fence->ring].last_id_use = radeon_fence_ref(fence);
}
/**
......@@ -1120,13 +1127,16 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE,
RADEON_VM_PTE_COUNT * 8);
unsigned pd_size, pd_entries, pts_size;
int r;
int i, r;
vm->id = 0;
vm->ib_bo_va = NULL;
vm->fence = NULL;
vm->flushed_updates = NULL;
vm->last_id_use = NULL;
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
vm->ids[i].id = 0;
vm->ids[i].flushed_updates = NULL;
vm->ids[i].last_id_use = NULL;
}
mutex_init(&vm->mutex);
vm->va = RB_ROOT;
INIT_LIST_HEAD(&vm->invalidated);
......@@ -1197,8 +1207,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
radeon_bo_unref(&vm->page_directory);
radeon_fence_unref(&vm->fence);
radeon_fence_unref(&vm->flushed_updates);
radeon_fence_unref(&vm->last_id_use);
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
radeon_fence_unref(&vm->ids[i].flushed_updates);
radeon_fence_unref(&vm->ids[i].last_id_use);
}
mutex_destroy(&vm->mutex);
}
......@@ -3362,6 +3362,7 @@ void si_fence_ring_emit(struct radeon_device *rdev,
void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
{
struct radeon_ring *ring = &rdev->ring[ib->ring];
unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
u32 header;
if (ib->is_const_ib) {
......@@ -3397,14 +3398,13 @@ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
#endif
(ib->gpu_addr & 0xFFFFFFFC));
radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
radeon_ring_write(ring, ib->length_dw |
(ib->vm ? (ib->vm->id << 24) : 0));
radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
if (!ib->is_const_ib) {
/* flush read cache over gart for this vmid */
radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
radeon_ring_write(ring, vm_id);
radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
PACKET3_TC_ACTION_ENA |
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment