Commit c3aaca43 authored by Mukul Joshi's avatar Mukul Joshi Committed by Alex Deucher

drm/amdgpu: Add a low priority scheduler for VRAM clearing

Add a low priority DRM scheduler for VRAM clearing instead of using
the exisiting high priority scheduler. Use the high priority scheduler
for migrations and evictions.
Signed-off-by: default avatarMukul Joshi <mukul.joshi@amd.com>
Acked-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 1fbc69b8
...@@ -627,7 +627,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, ...@@ -627,7 +627,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
bo->tbo.resource->mem_type == TTM_PL_VRAM) { bo->tbo.resource->mem_type == TTM_PL_VRAM) {
struct dma_fence *fence; struct dma_fence *fence;
r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence); r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
if (unlikely(r)) if (unlikely(r))
goto fail_unreserve; goto fail_unreserve;
...@@ -1354,7 +1354,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) ...@@ -1354,7 +1354,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv))) if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
return; return;
r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence); r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
if (!WARN_ON(r)) { if (!WARN_ON(r)) {
amdgpu_bo_fence(abo, fence, false); amdgpu_bo_fence(abo, fence, false);
dma_fence_put(fence); dma_fence_put(fence);
......
...@@ -383,7 +383,8 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, ...@@ -383,7 +383,8 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
struct dma_fence *wipe_fence = NULL; struct dma_fence *wipe_fence = NULL;
r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence); r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
false);
if (r) { if (r) {
goto error; goto error;
} else if (wipe_fence) { } else if (wipe_fence) {
...@@ -2036,8 +2037,18 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) ...@@ -2036,8 +2037,18 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
r); r);
return; return;
} }
r = drm_sched_entity_init(&adev->mman.delayed,
DRM_SCHED_PRIORITY_NORMAL, &sched,
1, NULL);
if (r) {
DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
r);
goto error_free_entity;
}
} else { } else {
drm_sched_entity_destroy(&adev->mman.entity); drm_sched_entity_destroy(&adev->mman.entity);
drm_sched_entity_destroy(&adev->mman.delayed);
dma_fence_put(man->move); dma_fence_put(man->move);
man->move = NULL; man->move = NULL;
} }
...@@ -2049,6 +2060,11 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) ...@@ -2049,6 +2060,11 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
size = adev->gmc.visible_vram_size; size = adev->gmc.visible_vram_size;
man->size = size; man->size = size;
adev->mman.buffer_funcs_enabled = enable; adev->mman.buffer_funcs_enabled = enable;
return;
error_free_entity:
drm_sched_entity_destroy(&adev->mman.entity);
} }
static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
...@@ -2056,14 +2072,16 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, ...@@ -2056,14 +2072,16 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
unsigned int num_dw, unsigned int num_dw,
struct dma_resv *resv, struct dma_resv *resv,
bool vm_needs_flush, bool vm_needs_flush,
struct amdgpu_job **job) struct amdgpu_job **job,
bool delayed)
{ {
enum amdgpu_ib_pool_type pool = direct_submit ? enum amdgpu_ib_pool_type pool = direct_submit ?
AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_DIRECT :
AMDGPU_IB_POOL_DELAYED; AMDGPU_IB_POOL_DELAYED;
int r; int r;
struct drm_sched_entity *entity = delayed ? &adev->mman.delayed :
r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, &adev->mman.entity;
r = amdgpu_job_alloc_with_ib(adev, entity,
AMDGPU_FENCE_OWNER_UNDEFINED, AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4, pool, job); num_dw * 4, pool, job);
if (r) if (r)
...@@ -2104,7 +2122,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, ...@@ -2104,7 +2122,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_loops = DIV_ROUND_UP(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw, r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw,
resv, vm_needs_flush, &job); resv, vm_needs_flush, &job, false);
if (r) if (r)
return r; return r;
...@@ -2140,7 +2158,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, ...@@ -2140,7 +2158,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
uint64_t dst_addr, uint32_t byte_count, uint64_t dst_addr, uint32_t byte_count,
struct dma_resv *resv, struct dma_resv *resv,
struct dma_fence **fence, struct dma_fence **fence,
bool vm_needs_flush) bool vm_needs_flush, bool delayed)
{ {
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
unsigned int num_loops, num_dw; unsigned int num_loops, num_dw;
...@@ -2153,7 +2171,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, ...@@ -2153,7 +2171,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes); num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush, r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush,
&job); &job, delayed);
if (r) if (r)
return r; return r;
...@@ -2176,7 +2194,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, ...@@ -2176,7 +2194,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
int amdgpu_fill_buffer(struct amdgpu_bo *bo, int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data, uint32_t src_data,
struct dma_resv *resv, struct dma_resv *resv,
struct dma_fence **f) struct dma_fence **f,
bool delayed)
{ {
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
...@@ -2205,7 +2224,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, ...@@ -2205,7 +2224,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
goto error; goto error;
r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv, r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
&next, true); &next, true, delayed);
if (r) if (r)
goto error; goto error;
......
...@@ -61,6 +61,8 @@ struct amdgpu_mman { ...@@ -61,6 +61,8 @@ struct amdgpu_mman {
struct mutex gtt_window_lock; struct mutex gtt_window_lock;
/* Scheduler entity for buffer moves */ /* Scheduler entity for buffer moves */
struct drm_sched_entity entity; struct drm_sched_entity entity;
/* Scheduler entity for VRAM clearing */
struct drm_sched_entity delayed;
struct amdgpu_vram_mgr vram_mgr; struct amdgpu_vram_mgr vram_mgr;
struct amdgpu_gtt_mgr gtt_mgr; struct amdgpu_gtt_mgr gtt_mgr;
...@@ -152,7 +154,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, ...@@ -152,7 +154,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
int amdgpu_fill_buffer(struct amdgpu_bo *bo, int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data, uint32_t src_data,
struct dma_resv *resv, struct dma_resv *resv,
struct dma_fence **fence); struct dma_fence **fence,
bool delayed);
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment