Commit c7ae72c0 authored by Chunming Zhou's avatar Chunming Zhou Committed by Alex Deucher

drm/amdgpu: use IB for copy buffer of eviction

This aids handling buffers moves with the scheduler.
Signed-off-by: default avatarChunming Zhou <david1.zhou@amd.com>
Reviewed-by: default avatarChristian K?nig <christian.koenig@amd.com>
parent 113cd9da
...@@ -247,7 +247,7 @@ struct amdgpu_buffer_funcs { ...@@ -247,7 +247,7 @@ struct amdgpu_buffer_funcs {
unsigned copy_num_dw; unsigned copy_num_dw;
/* used for buffer migration */ /* used for buffer migration */
void (*emit_copy_buffer)(struct amdgpu_ring *ring, void (*emit_copy_buffer)(struct amdgpu_ib *ib,
/* src addr in bytes */ /* src addr in bytes */
uint64_t src_offset, uint64_t src_offset,
/* dst addr in bytes */ /* dst addr in bytes */
...@@ -518,7 +518,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, ...@@ -518,7 +518,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
uint64_t dst_offset, uint64_t dst_offset,
uint32_t byte_count, uint32_t byte_count,
struct reservation_object *resv, struct reservation_object *resv,
struct amdgpu_fence **fence); struct fence **fence);
int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
struct amdgpu_bo_list_entry { struct amdgpu_bo_list_entry {
...@@ -2247,7 +2247,7 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) ...@@ -2247,7 +2247,7 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r)) #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
#define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s)) #define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s))
#define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s)) #define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s))
#define amdgpu_emit_copy_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((r), (s), (d), (b)) #define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b))
#define amdgpu_emit_fill_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((r), (s), (d), (b)) #define amdgpu_emit_fill_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((r), (s), (d), (b))
#define amdgpu_dpm_get_temperature(adev) (adev)->pm.funcs->get_temperature((adev)) #define amdgpu_dpm_get_temperature(adev) (adev)->pm.funcs->get_temperature((adev))
#define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev)) #define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev))
...@@ -2379,7 +2379,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, ...@@ -2379,7 +2379,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
uint64_t addr); uint64_t addr);
void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va); struct amdgpu_bo_va *bo_va);
int amdgpu_vm_free_job(struct amdgpu_job *job);
/* /*
* functions used by amdgpu_encoder.c * functions used by amdgpu_encoder.c
*/ */
......
...@@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, ...@@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
{ {
unsigned long start_jiffies; unsigned long start_jiffies;
unsigned long end_jiffies; unsigned long end_jiffies;
struct amdgpu_fence *fence = NULL; struct fence *fence = NULL;
int i, r; int i, r;
start_jiffies = jiffies; start_jiffies = jiffies;
...@@ -42,17 +42,17 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, ...@@ -42,17 +42,17 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence); r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence);
if (r) if (r)
goto exit_do_move; goto exit_do_move;
r = fence_wait(&fence->base, false); r = fence_wait(fence, false);
if (r) if (r)
goto exit_do_move; goto exit_do_move;
amdgpu_fence_unref(&fence); fence_put(fence);
} }
end_jiffies = jiffies; end_jiffies = jiffies;
r = jiffies_to_msecs(end_jiffies - start_jiffies); r = jiffies_to_msecs(end_jiffies - start_jiffies);
exit_do_move: exit_do_move:
if (fence) if (fence)
amdgpu_fence_unref(&fence); fence_put(fence);
return r; return r;
} }
......
...@@ -77,7 +77,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) ...@@ -77,7 +77,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
void *gtt_map, *vram_map; void *gtt_map, *vram_map;
void **gtt_start, **gtt_end; void **gtt_start, **gtt_end;
void **vram_start, **vram_end; void **vram_start, **vram_end;
struct amdgpu_fence *fence = NULL; struct fence *fence = NULL;
r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0, NULL, gtt_obj + i); AMDGPU_GEM_DOMAIN_GTT, 0, NULL, gtt_obj + i);
...@@ -116,13 +116,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) ...@@ -116,13 +116,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
goto out_lclean_unpin; goto out_lclean_unpin;
} }
r = fence_wait(&fence->base, false); r = fence_wait(fence, false);
if (r) { if (r) {
DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i); DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i);
goto out_lclean_unpin; goto out_lclean_unpin;
} }
amdgpu_fence_unref(&fence); fence_put(fence);
r = amdgpu_bo_kmap(vram_obj, &vram_map); r = amdgpu_bo_kmap(vram_obj, &vram_map);
if (r) { if (r) {
...@@ -161,13 +161,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) ...@@ -161,13 +161,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
goto out_lclean_unpin; goto out_lclean_unpin;
} }
r = fence_wait(&fence->base, false); r = fence_wait(fence, false);
if (r) { if (r) {
DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i); DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i);
goto out_lclean_unpin; goto out_lclean_unpin;
} }
amdgpu_fence_unref(&fence); fence_put(fence);
r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map); r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
if (r) { if (r) {
...@@ -214,7 +214,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) ...@@ -214,7 +214,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
amdgpu_bo_unref(&gtt_obj[i]); amdgpu_bo_unref(&gtt_obj[i]);
} }
if (fence) if (fence)
amdgpu_fence_unref(&fence); fence_put(fence);
break; break;
} }
......
...@@ -228,7 +228,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, ...@@ -228,7 +228,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
struct amdgpu_device *adev; struct amdgpu_device *adev;
struct amdgpu_ring *ring; struct amdgpu_ring *ring;
uint64_t old_start, new_start; uint64_t old_start, new_start;
struct amdgpu_fence *fence; struct fence *fence;
int r; int r;
adev = amdgpu_get_adev(bo->bdev); adev = amdgpu_get_adev(bo->bdev);
...@@ -269,9 +269,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, ...@@ -269,9 +269,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
new_mem->num_pages * PAGE_SIZE, /* bytes */ new_mem->num_pages * PAGE_SIZE, /* bytes */
bo->resv, &fence); bo->resv, &fence);
/* FIXME: handle copy error */ /* FIXME: handle copy error */
r = ttm_bo_move_accel_cleanup(bo, &fence->base, r = ttm_bo_move_accel_cleanup(bo, fence,
evict, no_wait_gpu, new_mem); evict, no_wait_gpu, new_mem);
amdgpu_fence_unref(&fence); fence_put(fence);
return r; return r;
} }
...@@ -987,52 +987,48 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, ...@@ -987,52 +987,48 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
uint64_t dst_offset, uint64_t dst_offset,
uint32_t byte_count, uint32_t byte_count,
struct reservation_object *resv, struct reservation_object *resv,
struct amdgpu_fence **fence) struct fence **fence)
{ {
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
struct amdgpu_sync sync;
uint32_t max_bytes; uint32_t max_bytes;
unsigned num_loops, num_dw; unsigned num_loops, num_dw;
struct amdgpu_ib *ib;
unsigned i; unsigned i;
int r; int r;
/* sync other rings */
amdgpu_sync_create(&sync);
if (resv) {
r = amdgpu_sync_resv(adev, &sync, resv, false);
if (r) {
DRM_ERROR("sync failed (%d).\n", r);
amdgpu_sync_free(adev, &sync, NULL);
return r;
}
}
max_bytes = adev->mman.buffer_funcs->copy_max_bytes; max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_loops = DIV_ROUND_UP(byte_count, max_bytes);
num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw; num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw;
/* for fence and sync */ /* for IB padding */
num_dw += 64 + AMDGPU_NUM_SYNCS * 8; while (num_dw & 0x7)
num_dw++;
ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
if (!ib)
return -ENOMEM;
r = amdgpu_sync_wait(&sync); r = amdgpu_ib_get(ring, NULL, num_dw * 4, ib);
if (r) { if (r) {
DRM_ERROR("sync wait failed (%d).\n", r); kfree(ib);
amdgpu_sync_free(adev, &sync, NULL);
return r; return r;
} }
r = amdgpu_ring_lock(ring, num_dw); ib->length_dw = 0;
if (r) {
DRM_ERROR("ring lock failed (%d).\n", r); if (resv) {
amdgpu_sync_free(adev, &sync, NULL); r = amdgpu_sync_resv(adev, &ib->sync, resv,
return r; AMDGPU_FENCE_OWNER_UNDEFINED);
if (r) {
DRM_ERROR("sync failed (%d).\n", r);
goto error_free;
}
} }
amdgpu_sync_rings(&sync, ring);
for (i = 0; i < num_loops; i++) { for (i = 0; i < num_loops; i++) {
uint32_t cur_size_in_bytes = min(byte_count, max_bytes); uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
amdgpu_emit_copy_buffer(adev, ring, src_offset, dst_offset, amdgpu_emit_copy_buffer(adev, ib, src_offset, dst_offset,
cur_size_in_bytes); cur_size_in_bytes);
src_offset += cur_size_in_bytes; src_offset += cur_size_in_bytes;
...@@ -1040,17 +1036,24 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, ...@@ -1040,17 +1036,24 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
byte_count -= cur_size_in_bytes; byte_count -= cur_size_in_bytes;
} }
r = amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_MOVE, fence); amdgpu_vm_pad_ib(adev, ib);
if (r) { WARN_ON(ib->length_dw > num_dw);
amdgpu_ring_unlock_undo(ring); r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
amdgpu_sync_free(adev, &sync, NULL); &amdgpu_vm_free_job,
return r; AMDGPU_FENCE_OWNER_MOVE,
} fence);
if (r)
amdgpu_ring_unlock_commit(ring); goto error_free;
amdgpu_sync_free(adev, &sync, &(*fence)->base);
if (!amdgpu_enable_scheduler) {
amdgpu_ib_free(adev, ib);
kfree(ib);
}
return 0; return 0;
error_free:
amdgpu_ib_free(adev, ib);
kfree(ib);
return r;
} }
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
......
...@@ -316,8 +316,7 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev, ...@@ -316,8 +316,7 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
} }
} }
static int amdgpu_vm_free_job( int amdgpu_vm_free_job(struct amdgpu_job *sched_job)
struct amdgpu_job *sched_job)
{ {
int i; int i;
for (i = 0; i < sched_job->num_ibs; i++) for (i = 0; i < sched_job->num_ibs; i++)
......
...@@ -1339,18 +1339,18 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev) ...@@ -1339,18 +1339,18 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev)
* Used by the amdgpu ttm implementation to move pages if * Used by the amdgpu ttm implementation to move pages if
* registered as the asic copy callback. * registered as the asic copy callback.
*/ */
static void cik_sdma_emit_copy_buffer(struct amdgpu_ring *ring, static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset, uint64_t src_offset,
uint64_t dst_offset, uint64_t dst_offset,
uint32_t byte_count) uint32_t byte_count)
{ {
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0)); ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
amdgpu_ring_write(ring, byte_count); ib->ptr[ib->length_dw++] = byte_count;
amdgpu_ring_write(ring, 0); /* src/dst endian swap */ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
amdgpu_ring_write(ring, lower_32_bits(src_offset)); ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
amdgpu_ring_write(ring, upper_32_bits(src_offset)); ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
amdgpu_ring_write(ring, lower_32_bits(dst_offset)); ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
amdgpu_ring_write(ring, upper_32_bits(dst_offset)); ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
} }
/** /**
......
...@@ -1350,19 +1350,19 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev) ...@@ -1350,19 +1350,19 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev)
* Used by the amdgpu ttm implementation to move pages if * Used by the amdgpu ttm implementation to move pages if
* registered as the asic copy callback. * registered as the asic copy callback.
*/ */
static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ring *ring, static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset, uint64_t src_offset,
uint64_t dst_offset, uint64_t dst_offset,
uint32_t byte_count) uint32_t byte_count)
{ {
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR)); SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
amdgpu_ring_write(ring, byte_count); ib->ptr[ib->length_dw++] = byte_count;
amdgpu_ring_write(ring, 0); /* src/dst endian swap */ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
amdgpu_ring_write(ring, lower_32_bits(src_offset)); ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
amdgpu_ring_write(ring, upper_32_bits(src_offset)); ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
amdgpu_ring_write(ring, lower_32_bits(dst_offset)); ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
amdgpu_ring_write(ring, upper_32_bits(dst_offset)); ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
} }
/** /**
......
...@@ -1474,19 +1474,19 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev) ...@@ -1474,19 +1474,19 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev)
* Used by the amdgpu ttm implementation to move pages if * Used by the amdgpu ttm implementation to move pages if
* registered as the asic copy callback. * registered as the asic copy callback.
*/ */
static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ring *ring, static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset, uint64_t src_offset,
uint64_t dst_offset, uint64_t dst_offset,
uint32_t byte_count) uint32_t byte_count)
{ {
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR)); SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
amdgpu_ring_write(ring, byte_count); ib->ptr[ib->length_dw++] = byte_count;
amdgpu_ring_write(ring, 0); /* src/dst endian swap */ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
amdgpu_ring_write(ring, lower_32_bits(src_offset)); ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
amdgpu_ring_write(ring, upper_32_bits(src_offset)); ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
amdgpu_ring_write(ring, lower_32_bits(dst_offset)); ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
amdgpu_ring_write(ring, upper_32_bits(dst_offset)); ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
} }
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment