Commit 8ecee4cb authored by Guchun Chen's avatar Guchun Chen Committed by Alex Deucher

drm/amdgpu: fix slab-out-of-bounds issue in amdgpu_vm_pt_create

Recent code set xcp_id stored from file private data when opening
device to amdgpu bo for accounting memory usage etc, but not all
VMs are attached to this fpriv structure like the vm cases in
amdgpu_mes_self_test, otherwise, KASAN will complain below out
of bound access. And more importantly, VM code should not touch
fpriv structure, so drop fpriv code handling from amdgpu_vm_pt.

[   77.292314] BUG: KASAN: slab-out-of-bounds in amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
[   77.293845] Read of size 4 at addr ffff888102c48a48 by task modprobe/1069
[   77.294146] Call Trace:
[   77.294178]  <TASK>
[   77.294208]  dump_stack_lvl+0x49/0x63
[   77.294260]  print_report+0x16f/0x4a6
[   77.294307]  ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
[   77.295979]  ? kasan_complete_mode_report_info+0x3c/0x200
[   77.296057]  ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
[   77.297556]  kasan_report+0xb4/0x130
[   77.297609]  ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
[   77.299202]  __asan_load4+0x6f/0x90
[   77.299272]  amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
[   77.300796]  ? amdgpu_init+0x6e/0x1000 [amdgpu]
[   77.302222]  ? amdgpu_vm_pt_clear+0x750/0x750 [amdgpu]
[   77.303721]  ? preempt_count_sub+0x18/0xc0
[   77.303786]  amdgpu_vm_init+0x39e/0x870 [amdgpu]
[   77.305186]  ? amdgpu_vm_wait_idle+0x90/0x90 [amdgpu]
[   77.306683]  ? kasan_set_track+0x25/0x30
[   77.306737]  ? kasan_save_alloc_info+0x1b/0x30
[   77.306795]  ? __kasan_kmalloc+0x87/0xa0
[   77.306852]  amdgpu_mes_self_test+0x169/0x620 [amdgpu]

v2: without specifying xcp partition for PD/PT bo, the xcp id is -1.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2686
Fixes: 3ebfd221 ("drm/amdkfd: Store xcp partition id to amdgpu bo")
Signed-off-by: default avatarGuchun Chen <guchun.chen@amd.com>
Tested-by: default avatarMikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent dcaa32e1
...@@ -1233,7 +1233,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) ...@@ -1233,7 +1233,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
if (r) if (r)
goto error_pasid; goto error_pasid;
r = amdgpu_vm_init(adev, &fpriv->vm); r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id);
if (r) if (r)
goto error_pasid; goto error_pasid;
......
...@@ -1382,7 +1382,7 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev) ...@@ -1382,7 +1382,7 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
goto error_pasid; goto error_pasid;
} }
r = amdgpu_vm_init(adev, vm); r = amdgpu_vm_init(adev, vm, -1);
if (r) { if (r) {
DRM_ERROR("failed to initialize vm\n"); DRM_ERROR("failed to initialize vm\n");
goto error_pasid; goto error_pasid;
......
...@@ -2121,13 +2121,14 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) ...@@ -2121,13 +2121,14 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
* *
* @adev: amdgpu_device pointer * @adev: amdgpu_device pointer
* @vm: requested vm * @vm: requested vm
* @xcp_id: GPU partition selection id
* *
* Init @vm fields. * Init @vm fields.
* *
* Returns: * Returns:
* 0 for success, error for failure. * 0 for success, error for failure.
*/ */
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id)
{ {
struct amdgpu_bo *root_bo; struct amdgpu_bo *root_bo;
struct amdgpu_bo_vm *root; struct amdgpu_bo_vm *root;
...@@ -2177,7 +2178,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) ...@@ -2177,7 +2178,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
vm->evicting = false; vm->evicting = false;
r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level, r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
false, &root); false, &root, xcp_id);
if (r) if (r)
goto error_free_delayed; goto error_free_delayed;
root_bo = &root->bo; root_bo = &root->bo;
......
...@@ -392,7 +392,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm, ...@@ -392,7 +392,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
u32 pasid); u32 pasid);
long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout); long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm); int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id);
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
...@@ -475,7 +475,8 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, ...@@ -475,7 +475,8 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo_vm *vmbo, bool immediate); struct amdgpu_bo_vm *vmbo, bool immediate);
int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int level, bool immediate, struct amdgpu_bo_vm **vmbo); int level, bool immediate, struct amdgpu_bo_vm **vmbo,
int32_t xcp_id);
void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm);
bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev,
struct amdgpu_vm *vm); struct amdgpu_vm *vm);
......
...@@ -498,11 +498,12 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, ...@@ -498,11 +498,12 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
* @level: the page table level * @level: the page table level
* @immediate: use a immediate update * @immediate: use a immediate update
* @vmbo: pointer to the buffer object pointer * @vmbo: pointer to the buffer object pointer
* @xcp_id: GPU partition id
*/ */
int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int level, bool immediate, struct amdgpu_bo_vm **vmbo) int level, bool immediate, struct amdgpu_bo_vm **vmbo,
int32_t xcp_id)
{ {
struct amdgpu_fpriv *fpriv = container_of(vm, struct amdgpu_fpriv, vm);
struct amdgpu_bo_param bp; struct amdgpu_bo_param bp;
struct amdgpu_bo *bo; struct amdgpu_bo *bo;
struct dma_resv *resv; struct dma_resv *resv;
...@@ -535,7 +536,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, ...@@ -535,7 +536,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
bp.type = ttm_bo_type_kernel; bp.type = ttm_bo_type_kernel;
bp.no_wait_gpu = immediate; bp.no_wait_gpu = immediate;
bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1; bp.xcp_id_plus1 = xcp_id + 1;
if (vm->root.bo) if (vm->root.bo)
bp.resv = vm->root.bo->tbo.base.resv; bp.resv = vm->root.bo->tbo.base.resv;
...@@ -561,7 +562,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, ...@@ -561,7 +562,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
bp.type = ttm_bo_type_kernel; bp.type = ttm_bo_type_kernel;
bp.resv = bo->tbo.base.resv; bp.resv = bo->tbo.base.resv;
bp.bo_ptr_size = sizeof(struct amdgpu_bo); bp.bo_ptr_size = sizeof(struct amdgpu_bo);
bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1; bp.xcp_id_plus1 = xcp_id + 1;
r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
...@@ -606,7 +607,7 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, ...@@ -606,7 +607,7 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
return 0; return 0;
amdgpu_vm_eviction_unlock(vm); amdgpu_vm_eviction_unlock(vm);
r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt); r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt, 0);
amdgpu_vm_eviction_lock(vm); amdgpu_vm_eviction_lock(vm);
if (r) if (r)
return r; return r;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment