Commit 29633d0e authored by Joseph Greathouse's avatar Joseph Greathouse Committed by Alex Deucher

drm/amdkfd: Enable GWS based on FW Support

Rather than only enabling GWS support based on the hws_gws_support
modparm, also check whether the GPU's HWS firmware supports GWS.
Leave the old modparm in place in case users want to test GWS
on GPUs not yet in the support list.

v2: fix broken syntax from the first patch.
Signed-off-by: default avatarJoseph Greathouse <Joseph.Greathouse@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 5bb4b78b
...@@ -689,13 +689,12 @@ MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (defau ...@@ -689,13 +689,12 @@ MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (defau
/** /**
* DOC: hws_gws_support(bool) * DOC: hws_gws_support(bool)
* Whether HWS support gws barriers. Default value: false (not supported) * Assume that HWS supports GWS barriers regardless of what firmware version
* This will be replaced with a MEC firmware version check once firmware * check says. Default value: false (rely on MEC2 firmware version check).
* is ready
*/ */
bool hws_gws_support; bool hws_gws_support;
module_param(hws_gws_support, bool, 0444); module_param(hws_gws_support, bool, 0444);
MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supported (Default), true = supported)"); MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false = rely on FW version check (Default), true = force supported)");
/** /**
* DOC: queue_preemption_timeout_ms (int) * DOC: queue_preemption_timeout_ms (int)
......
...@@ -1592,9 +1592,6 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep, ...@@ -1592,9 +1592,6 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep,
struct queue *q; struct queue *q;
struct kfd_dev *dev; struct kfd_dev *dev;
if (!hws_gws_support)
return -ENODEV;
mutex_lock(&p->mutex); mutex_lock(&p->mutex);
q = pqm_get_user_queue(&p->pqm, args->queue_id); q = pqm_get_user_queue(&p->pqm, args->queue_id);
...@@ -1605,6 +1602,11 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep, ...@@ -1605,6 +1602,11 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep,
goto out_unlock; goto out_unlock;
} }
if (!dev->gws) {
retval = -ENODEV;
goto out_unlock;
}
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
retval = -ENODEV; retval = -ENODEV;
goto out_unlock; goto out_unlock;
......
...@@ -569,6 +569,23 @@ static void kfd_cwsr_init(struct kfd_dev *kfd) ...@@ -569,6 +569,23 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
} }
} }
static int kfd_gws_init(struct kfd_dev *kfd)
{
int ret = 0;
if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
return 0;
if (hws_gws_support
|| (kfd->device_info->asic_family >= CHIP_VEGA10
&& kfd->device_info->asic_family <= CHIP_RAVEN
&& kfd->mec2_fw_version >= 0x1b3))
ret = amdgpu_amdkfd_alloc_gws(kfd->kgd,
amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws);
return ret;
}
bool kgd2kfd_device_init(struct kfd_dev *kfd, bool kgd2kfd_device_init(struct kfd_dev *kfd,
struct drm_device *ddev, struct drm_device *ddev,
const struct kgd2kfd_shared_resources *gpu_resources) const struct kgd2kfd_shared_resources *gpu_resources)
...@@ -578,6 +595,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -578,6 +595,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd->ddev = ddev; kfd->ddev = ddev;
kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
KGD_ENGINE_MEC1); KGD_ENGINE_MEC1);
kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
KGD_ENGINE_MEC2);
kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
KGD_ENGINE_SDMA1); KGD_ENGINE_SDMA1);
kfd->shared_resources = *gpu_resources; kfd->shared_resources = *gpu_resources;
...@@ -598,13 +617,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -598,13 +617,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
} else } else
kfd->max_proc_per_quantum = hws_max_conc_proc; kfd->max_proc_per_quantum = hws_max_conc_proc;
/* Allocate global GWS that is shared by all KFD processes */
if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd,
amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) {
dev_err(kfd_device, "Could not allocate %d gws\n",
amdgpu_amdkfd_get_num_gws(kfd->kgd));
goto out;
}
/* calculate max size of mqds needed for queues */ /* calculate max size of mqds needed for queues */
size = max_num_of_queues_per_device * size = max_num_of_queues_per_device *
kfd->device_info->mqd_size_aligned; kfd->device_info->mqd_size_aligned;
...@@ -662,6 +674,15 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -662,6 +674,15 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto device_queue_manager_error; goto device_queue_manager_error;
} }
/* If supported on this device, allocate global GWS that is shared
* by all KFD processes
*/
if (kfd_gws_init(kfd)) {
dev_err(kfd_device, "Could not allocate %d gws\n",
amdgpu_amdkfd_get_num_gws(kfd->kgd));
goto gws_error;
}
if (kfd_iommu_device_init(kfd)) { if (kfd_iommu_device_init(kfd)) {
dev_err(kfd_device, "Error initializing iommuv2\n"); dev_err(kfd_device, "Error initializing iommuv2\n");
goto device_iommu_error; goto device_iommu_error;
...@@ -691,6 +712,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -691,6 +712,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd_topology_add_device_error: kfd_topology_add_device_error:
kfd_resume_error: kfd_resume_error:
device_iommu_error: device_iommu_error:
gws_error:
device_queue_manager_uninit(kfd->dqm); device_queue_manager_uninit(kfd->dqm);
device_queue_manager_error: device_queue_manager_error:
kfd_interrupt_exit(kfd); kfd_interrupt_exit(kfd);
...@@ -701,7 +723,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -701,7 +723,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd_gtt_sa_init_error: kfd_gtt_sa_init_error:
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
alloc_gtt_mem_failure: alloc_gtt_mem_failure:
if (hws_gws_support) if (kfd->gws)
amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
dev_err(kfd_device, dev_err(kfd_device,
"device %x:%x NOT added due to errors\n", "device %x:%x NOT added due to errors\n",
...@@ -720,7 +742,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) ...@@ -720,7 +742,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
kfd_doorbell_fini(kfd); kfd_doorbell_fini(kfd);
kfd_gtt_sa_fini(kfd); kfd_gtt_sa_fini(kfd);
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
if (hws_gws_support) if (kfd->gws)
amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
} }
......
...@@ -282,6 +282,7 @@ struct kfd_dev { ...@@ -282,6 +282,7 @@ struct kfd_dev {
/* Firmware versions */ /* Firmware versions */
uint16_t mec_fw_version; uint16_t mec_fw_version;
uint16_t mec2_fw_version;
uint16_t sdma_fw_version; uint16_t sdma_fw_version;
/* Maximum process number mapped to HW scheduler */ /* Maximum process number mapped to HW scheduler */
......
...@@ -1319,7 +1319,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) ...@@ -1319,7 +1319,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
gpu->device_info->num_xgmi_sdma_engines; gpu->device_info->num_xgmi_sdma_engines;
dev->node_props.num_sdma_queues_per_engine = dev->node_props.num_sdma_queues_per_engine =
gpu->device_info->num_sdma_queues_per_engine; gpu->device_info->num_sdma_queues_per_engine;
dev->node_props.num_gws = (hws_gws_support && dev->node_props.num_gws = (dev->gpu->gws &&
dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0; amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm); dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment