Commit 96105e53 authored by Christian König's avatar Christian König Committed by Alex Deucher

drm/amdgpu: stop splitting PTE commands into smaller ones

It doesn't make much sense to create bigger commands first which we then need
to split into smaller one again. Just make sure the commands we create aren't
to big in the first place.
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Reviewed-by: default avatarEdward O'Callaghan <funfunctor@folklore1984.net>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent dc157c6d
...@@ -833,6 +833,9 @@ struct amdgpu_ring { ...@@ -833,6 +833,9 @@ struct amdgpu_ring {
/* maximum number of VMIDs */ /* maximum number of VMIDs */
#define AMDGPU_NUM_VM 16 #define AMDGPU_NUM_VM 16
/* Maximum number of PTEs the hardware can write with one command */
#define AMDGPU_VM_MAX_UPDATE_SIZE 0x3FFFF
/* number of entries in page table */ /* number of entries in page table */
#define AMDGPU_VM_PTE_COUNT (1 << amdgpu_vm_block_size) #define AMDGPU_VM_PTE_COUNT (1 << amdgpu_vm_block_size)
......
...@@ -639,7 +639,8 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, ...@@ -639,7 +639,8 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
pde = pd_addr + pt_idx * 8; pde = pd_addr + pt_idx * 8;
if (((last_pde + 8 * count) != pde) || if (((last_pde + 8 * count) != pde) ||
((last_pt + incr * count) != pt)) { ((last_pt + incr * count) != pt) ||
(count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
if (count) { if (count) {
amdgpu_vm_update_pages(&params, last_pde, amdgpu_vm_update_pages(&params, last_pde,
...@@ -743,7 +744,8 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, ...@@ -743,7 +744,8 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
next_pe_start = amdgpu_bo_gpu_offset(pt); next_pe_start = amdgpu_bo_gpu_offset(pt);
next_pe_start += (addr & mask) * 8; next_pe_start += (addr & mask) * 8;
if ((cur_pe_start + 8 * cur_nptes) == next_pe_start) { if ((cur_pe_start + 8 * cur_nptes) == next_pe_start &&
((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) {
/* The next ptb is consecutive to current ptb. /* The next ptb is consecutive to current ptb.
* Don't call amdgpu_vm_update_pages now. * Don't call amdgpu_vm_update_pages now.
* Will update two ptbs together in future. * Will update two ptbs together in future.
......
...@@ -694,24 +694,16 @@ static void cik_sdma_vm_copy_pte(struct amdgpu_ib *ib, ...@@ -694,24 +694,16 @@ static void cik_sdma_vm_copy_pte(struct amdgpu_ib *ib,
uint64_t pe, uint64_t src, uint64_t pe, uint64_t src,
unsigned count) unsigned count)
{ {
while (count) { unsigned bytes = count * 8;
unsigned bytes = count * 8;
if (bytes > 0x1FFFF8) ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
bytes = 0x1FFFF8; SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
ib->ptr[ib->length_dw++] = bytes;
ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
SDMA_WRITE_SUB_OPCODE_LINEAR, 0); ib->ptr[ib->length_dw++] = lower_32_bits(src);
ib->ptr[ib->length_dw++] = bytes; ib->ptr[ib->length_dw++] = upper_32_bits(src);
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
ib->ptr[ib->length_dw++] = lower_32_bits(src); ib->ptr[ib->length_dw++] = upper_32_bits(pe);
ib->ptr[ib->length_dw++] = upper_32_bits(src);
ib->ptr[ib->length_dw++] = lower_32_bits(pe);
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
pe += bytes;
src += bytes;
count -= bytes / 8;
}
} }
/** /**
...@@ -755,40 +747,21 @@ static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, ...@@ -755,40 +747,21 @@ static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
* *
* Update the page tables using sDMA (CIK). * Update the page tables using sDMA (CIK).
*/ */
static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
uint64_t pe,
uint64_t addr, unsigned count, uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags) uint32_t incr, uint32_t flags)
{ {
uint64_t value; /* for physically contiguous pages (vram) */
unsigned ndw; ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
while (count) { ib->ptr[ib->length_dw++] = upper_32_bits(pe);
ndw = count; ib->ptr[ib->length_dw++] = flags; /* mask */
if (ndw > 0x7FFFF) ib->ptr[ib->length_dw++] = 0;
ndw = 0x7FFFF; ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
ib->ptr[ib->length_dw++] = upper_32_bits(addr);
if (flags & AMDGPU_PTE_VALID) ib->ptr[ib->length_dw++] = incr; /* increment size */
value = addr; ib->ptr[ib->length_dw++] = 0;
else ib->ptr[ib->length_dw++] = count; /* number of entries */
value = 0;
/* for physically contiguous pages (vram) */
ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
ib->ptr[ib->length_dw++] = pe; /* dst addr */
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
ib->ptr[ib->length_dw++] = flags; /* mask */
ib->ptr[ib->length_dw++] = 0;
ib->ptr[ib->length_dw++] = value; /* value */
ib->ptr[ib->length_dw++] = upper_32_bits(value);
ib->ptr[ib->length_dw++] = incr; /* increment size */
ib->ptr[ib->length_dw++] = 0;
ib->ptr[ib->length_dw++] = ndw; /* number of entries */
pe += ndw * 8;
addr += ndw * incr;
count -= ndw;
}
} }
/** /**
......
...@@ -749,24 +749,16 @@ static void sdma_v2_4_vm_copy_pte(struct amdgpu_ib *ib, ...@@ -749,24 +749,16 @@ static void sdma_v2_4_vm_copy_pte(struct amdgpu_ib *ib,
uint64_t pe, uint64_t src, uint64_t pe, uint64_t src,
unsigned count) unsigned count)
{ {
while (count) { unsigned bytes = count * 8;
unsigned bytes = count * 8;
if (bytes > 0x1FFFF8) ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
bytes = 0x1FFFF8; SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
ib->ptr[ib->length_dw++] = bytes;
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); ib->ptr[ib->length_dw++] = lower_32_bits(src);
ib->ptr[ib->length_dw++] = bytes; ib->ptr[ib->length_dw++] = upper_32_bits(src);
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
ib->ptr[ib->length_dw++] = lower_32_bits(src); ib->ptr[ib->length_dw++] = upper_32_bits(pe);
ib->ptr[ib->length_dw++] = upper_32_bits(src);
ib->ptr[ib->length_dw++] = lower_32_bits(pe);
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
pe += bytes;
src += bytes;
count -= bytes / 8;
}
} }
/** /**
...@@ -810,40 +802,21 @@ static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, ...@@ -810,40 +802,21 @@ static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
* *
* Update the page tables using sDMA (CIK). * Update the page tables using sDMA (CIK).
*/ */
static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
uint64_t pe,
uint64_t addr, unsigned count, uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags) uint32_t incr, uint32_t flags)
{ {
uint64_t value; /* for physically contiguous pages (vram) */
unsigned ndw; ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
while (count) { ib->ptr[ib->length_dw++] = upper_32_bits(pe);
ndw = count; ib->ptr[ib->length_dw++] = flags; /* mask */
if (ndw > 0x7FFFF) ib->ptr[ib->length_dw++] = 0;
ndw = 0x7FFFF; ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
ib->ptr[ib->length_dw++] = upper_32_bits(addr);
if (flags & AMDGPU_PTE_VALID) ib->ptr[ib->length_dw++] = incr; /* increment size */
value = addr; ib->ptr[ib->length_dw++] = 0;
else ib->ptr[ib->length_dw++] = count; /* number of entries */
value = 0;
/* for physically contiguous pages (vram) */
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
ib->ptr[ib->length_dw++] = pe; /* dst addr */
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
ib->ptr[ib->length_dw++] = flags; /* mask */
ib->ptr[ib->length_dw++] = 0;
ib->ptr[ib->length_dw++] = value; /* value */
ib->ptr[ib->length_dw++] = upper_32_bits(value);
ib->ptr[ib->length_dw++] = incr; /* increment size */
ib->ptr[ib->length_dw++] = 0;
ib->ptr[ib->length_dw++] = ndw; /* number of entries */
pe += ndw * 8;
addr += ndw * incr;
count -= ndw;
}
} }
/** /**
......
...@@ -976,24 +976,16 @@ static void sdma_v3_0_vm_copy_pte(struct amdgpu_ib *ib, ...@@ -976,24 +976,16 @@ static void sdma_v3_0_vm_copy_pte(struct amdgpu_ib *ib,
uint64_t pe, uint64_t src, uint64_t pe, uint64_t src,
unsigned count) unsigned count)
{ {
while (count) { unsigned bytes = count * 8;
unsigned bytes = count * 8;
if (bytes > 0x1FFFF8) ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
bytes = 0x1FFFF8; SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
ib->ptr[ib->length_dw++] = bytes;
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); ib->ptr[ib->length_dw++] = lower_32_bits(src);
ib->ptr[ib->length_dw++] = bytes; ib->ptr[ib->length_dw++] = upper_32_bits(src);
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
ib->ptr[ib->length_dw++] = lower_32_bits(src); ib->ptr[ib->length_dw++] = upper_32_bits(pe);
ib->ptr[ib->length_dw++] = upper_32_bits(src);
ib->ptr[ib->length_dw++] = lower_32_bits(pe);
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
pe += bytes;
src += bytes;
count -= bytes / 8;
}
} }
/** /**
...@@ -1037,40 +1029,21 @@ static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, ...@@ -1037,40 +1029,21 @@ static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
* *
* Update the page tables using sDMA (CIK). * Update the page tables using sDMA (CIK).
*/ */
static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
uint64_t pe,
uint64_t addr, unsigned count, uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags) uint32_t incr, uint32_t flags)
{ {
uint64_t value; /* for physically contiguous pages (vram) */
unsigned ndw; ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
while (count) { ib->ptr[ib->length_dw++] = upper_32_bits(pe);
ndw = count; ib->ptr[ib->length_dw++] = flags; /* mask */
if (ndw > 0x7FFFF) ib->ptr[ib->length_dw++] = 0;
ndw = 0x7FFFF; ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
ib->ptr[ib->length_dw++] = upper_32_bits(addr);
if (flags & AMDGPU_PTE_VALID) ib->ptr[ib->length_dw++] = incr; /* increment size */
value = addr; ib->ptr[ib->length_dw++] = 0;
else ib->ptr[ib->length_dw++] = count; /* number of entries */
value = 0;
/* for physically contiguous pages (vram) */
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
ib->ptr[ib->length_dw++] = pe; /* dst addr */
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
ib->ptr[ib->length_dw++] = flags; /* mask */
ib->ptr[ib->length_dw++] = 0;
ib->ptr[ib->length_dw++] = value; /* value */
ib->ptr[ib->length_dw++] = upper_32_bits(value);
ib->ptr[ib->length_dw++] = incr; /* increment size */
ib->ptr[ib->length_dw++] = 0;
ib->ptr[ib->length_dw++] = ndw; /* number of entries */
pe += ndw * 8;
addr += ndw * incr;
count -= ndw;
}
} }
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment