drm/amdgpu: Make enforce_isolation setting per GPU

This commit makes enforce_isolation setting to be per GPU and per partition by adding the enforce_isolation array to the adev structure. The adev variable is set based on the global enforce_isolation module parameter during device initialization. In amdgpu_ids.c, the adev->enforce_isolation value for the current GPU is used to determine whether to enforce isolation between graphics and compute processes on that GPU. In amdgpu_ids.c, the adev->enforce_isolation value for the current GPU and partition is used to determine whether to enforce isolation between graphics and compute processes on that GPU and partition. This allows the enforce_isolation setting to be controlled individually for each GPU and each partition, which is useful in a system with multiple GPUs and partitions where different isolation settings might be desired for different GPUs and partitions. v2: fix loop in amdgpu_vmid_mgr_init() (Alex) Cc: Christian König <christian.koenig@amd.com> Cc: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Suggested-by: Christian König <christian.koenig@amd.com>

drm/amdgpu: Make enforce_isolation setting per GPU
This commit makes enforce_isolation setting to be per GPU and per partition by adding the enforce_isolation array to the adev structure. The adev variable is set based on the global enforce_isolation module parameter during device initialization. In amdgpu_ids.c, the adev->enforce_isolation value for the current GPU is used to determine whether to enforce isolation between graphics and compute processes on that GPU. In amdgpu_ids.c, the adev->enforce_isolation value for the current GPU and partition is used to determine whether to enforce isolation between graphics and compute processes on that GPU and partition. This allows the enforce_isolation setting to be controlled individually for each GPU and each partition, which is useful in a system with multiple GPUs and partitions where different isolation settings might be desired for different GPUs and partitions. v2: fix loop in amdgpu_vmid_mgr_init() (Alex) Cc: Christian König <christian.koenig@amd.com> Cc: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Suggested-by: Christian König <christian.koenig@amd.com>
96595204 · Srinivasan Shanmugam · Alex Deucher · ee7a846e · 96595204 · 96595204
Commit 96595204 authored Jul 29, 2024 by Srinivasan Shanmugam Committed by Alex Deucher Aug 16, 2024
5 changed files
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1162,6 +1162,8 @@ struct amdgpu_device {
 	bool                            debug_disable_soft_recovery;
 	bool                            debug_use_vram_fw_buf;
 	bool                            debug_enable_ras_aca;
+	bool				enforce_isolation[MAX_XCP];
 };
 static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1110,7 +1110,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 			struct drm_gpu_scheduler *sched = entity->rq->sched;
 			struct amdgpu_ring *ring = to_amdgpu_ring(sched);
-			if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub))
+			if (amdgpu_vmid_uses_reserved(adev, vm, ring->vm_hub))
 				return -EINVAL;
 		}
 	}

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1916,6 +1916,8 @@ static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
 */
 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
 {
+	int i;
 	if (amdgpu_sched_jobs < 4) {
 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
 			 amdgpu_sched_jobs);
@@ -1970,6 +1972,9 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
+	for (i = 0; i < MAX_XCP; i++)
+		adev->enforce_isolation[i] = !!enforce_isolation;
 	return 0;
 }

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -424,7 +424,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	if (r || !idle)
 		goto error;
-	if (amdgpu_vmid_uses_reserved(vm, vmhub)) {
+	if (amdgpu_vmid_uses_reserved(adev, vm, vmhub)) {
 		r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence);
 		if (r || !id)
 			goto error;
@@ -476,15 +476,19 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 /*
 * amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID
+ * @adev: amdgpu_device pointer
 * @vm: the VM to check
 * @vmhub: the VMHUB which will be used
 *
 * Returns: True if the VM will use a reserved VMID.
 */
-bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub)
+bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev,
+			       struct amdgpu_vm *vm, unsigned int vmhub)
 {
 	return vm->reserved_vmid[vmhub] ||
-		(enforce_isolation && AMDGPU_IS_GFXHUB(vmhub));
+		(adev->enforce_isolation[(vm->root.bo->xcp_id != AMDGPU_XCP_NO_PARTITION) ?
+					 vm->root.bo->xcp_id : 0] &&
+		 AMDGPU_IS_GFXHUB(vmhub));
 }
 int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
@@ -600,9 +604,10 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
 		}
 	}
 	/* alloc a default reserved vmid to enforce isolation */
-	if (enforce_isolation)
+	for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
-		amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
+		if (adev->enforce_isolation[i])
+			amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
+	}
 }
 /**

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -78,7 +78,8 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
 bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
 			       struct amdgpu_vmid *id);
-bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub);
+bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev,
+			       struct amdgpu_vm *vm, unsigned int vmhub);
 int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
 				unsigned vmhub);
 void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,