Commit f0dc99a6 authored by Graham Sider's avatar Graham Sider Committed by Alex Deucher

drm/amdkfd: add kfd_device_info_init function

Initializes kfd->device_info given either asic_type (enum) if GFX
version is less than GFX9, or GC IP version if greater. Also takes in vf
and the target compiler gfx version. Uses SDMA version to determine
num_sdma_queues_per_engine.

Convert device_info to a non-pointer member of kfd, change references
accordingly.

Change unsupported asic condition to only probe f2g, move device_info
initialization post-switch.
Signed-off-by: default avatarGraham Sider <Graham.Sider@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent b7675b7b
This diff is collapsed.
...@@ -108,13 +108,13 @@ static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) ...@@ -108,13 +108,13 @@ static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
{ {
return kfd_get_num_sdma_engines(dqm->dev) * return kfd_get_num_sdma_engines(dqm->dev) *
dqm->dev->device_info->num_sdma_queues_per_engine; dqm->dev->device_info.num_sdma_queues_per_engine;
} }
unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
{ {
return kfd_get_num_xgmi_sdma_engines(dqm->dev) * return kfd_get_num_xgmi_sdma_engines(dqm->dev) *
dqm->dev->device_info->num_sdma_queues_per_engine; dqm->dev->device_info.num_sdma_queues_per_engine;
} }
void program_sh_mem_settings(struct device_queue_manager *dqm, void program_sh_mem_settings(struct device_queue_manager *dqm,
...@@ -1838,7 +1838,7 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) ...@@ -1838,7 +1838,7 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
get_num_all_sdma_engines(dqm) * get_num_all_sdma_engines(dqm) *
dev->device_info->num_sdma_queues_per_engine + dev->device_info.num_sdma_queues_per_engine +
dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
...@@ -2082,7 +2082,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) ...@@ -2082,7 +2082,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) { for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
for (queue = 0; for (queue = 0;
queue < dqm->dev->device_info->num_sdma_queues_per_engine; queue < dqm->dev->device_info.num_sdma_queues_per_engine;
queue++) { queue++) {
r = dqm->dev->kfd2kgd->hqd_sdma_dump( r = dqm->dev->kfd2kgd->hqd_sdma_dump(
dqm->dev->adev, pipe, queue, &dump, &n_regs); dqm->dev->adev, pipe, queue, &dump, &n_regs);
......
...@@ -48,7 +48,7 @@ ...@@ -48,7 +48,7 @@
/* # of doorbell bytes allocated for each process. */ /* # of doorbell bytes allocated for each process. */
size_t kfd_doorbell_process_slice(struct kfd_dev *kfd) size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
{ {
return roundup(kfd->device_info->doorbell_size * return roundup(kfd->device_info.doorbell_size *
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
PAGE_SIZE); PAGE_SIZE);
} }
...@@ -180,7 +180,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, ...@@ -180,7 +180,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
return NULL; return NULL;
inx *= kfd->device_info->doorbell_size / sizeof(u32); inx *= kfd->device_info.doorbell_size / sizeof(u32);
/* /*
* Calculating the kernel doorbell offset using the first * Calculating the kernel doorbell offset using the first
...@@ -201,7 +201,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) ...@@ -201,7 +201,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
unsigned int inx; unsigned int inx;
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr) inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
* sizeof(u32) / kfd->device_info->doorbell_size; * sizeof(u32) / kfd->device_info.doorbell_size;
mutex_lock(&kfd->doorbell_mutex); mutex_lock(&kfd->doorbell_mutex);
__clear_bit(inx, kfd->doorbell_available_index); __clear_bit(inx, kfd->doorbell_available_index);
...@@ -239,7 +239,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, ...@@ -239,7 +239,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
return kfd->doorbell_base_dw_offset + return kfd->doorbell_base_dw_offset +
pdd->doorbell_index pdd->doorbell_index
* kfd_doorbell_process_slice(kfd) / sizeof(u32) + * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
doorbell_id * kfd->device_info->doorbell_size / sizeof(u32); doorbell_id * kfd->device_info.doorbell_size / sizeof(u32);
} }
uint64_t kfd_get_number_elems(struct kfd_dev *kfd) uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
......
...@@ -135,7 +135,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev, ...@@ -135,7 +135,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
*patched_flag = true; *patched_flag = true;
memcpy(patched_ihre, ih_ring_entry, memcpy(patched_ihre, ih_ring_entry,
dev->device_info->ih_ring_entry_size); dev->device_info.ih_ring_entry_size);
pasid = dev->dqm->vmid_pasid[vmid]; pasid = dev->dqm->vmid_pasid[vmid];
......
...@@ -54,7 +54,7 @@ int kfd_interrupt_init(struct kfd_dev *kfd) ...@@ -54,7 +54,7 @@ int kfd_interrupt_init(struct kfd_dev *kfd)
int r; int r;
r = kfifo_alloc(&kfd->ih_fifo, r = kfifo_alloc(&kfd->ih_fifo,
KFD_IH_NUM_ENTRIES * kfd->device_info->ih_ring_entry_size, KFD_IH_NUM_ENTRIES * kfd->device_info.ih_ring_entry_size,
GFP_KERNEL); GFP_KERNEL);
if (r) { if (r) {
dev_err(kfd_chardev(), "Failed to allocate IH fifo\n"); dev_err(kfd_chardev(), "Failed to allocate IH fifo\n");
...@@ -114,8 +114,8 @@ bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry) ...@@ -114,8 +114,8 @@ bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry)
int count; int count;
count = kfifo_in(&kfd->ih_fifo, ih_ring_entry, count = kfifo_in(&kfd->ih_fifo, ih_ring_entry,
kfd->device_info->ih_ring_entry_size); kfd->device_info.ih_ring_entry_size);
if (count != kfd->device_info->ih_ring_entry_size) { if (count != kfd->device_info.ih_ring_entry_size) {
dev_err_ratelimited(kfd_chardev(), dev_err_ratelimited(kfd_chardev(),
"Interrupt ring overflow, dropping interrupt %d\n", "Interrupt ring overflow, dropping interrupt %d\n",
count); count);
...@@ -133,11 +133,11 @@ static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry) ...@@ -133,11 +133,11 @@ static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry)
int count; int count;
count = kfifo_out(&kfd->ih_fifo, ih_ring_entry, count = kfifo_out(&kfd->ih_fifo, ih_ring_entry,
kfd->device_info->ih_ring_entry_size); kfd->device_info.ih_ring_entry_size);
WARN_ON(count && count != kfd->device_info->ih_ring_entry_size); WARN_ON(count && count != kfd->device_info.ih_ring_entry_size);
return count == kfd->device_info->ih_ring_entry_size; return count == kfd->device_info.ih_ring_entry_size;
} }
static void interrupt_wq(struct work_struct *work) static void interrupt_wq(struct work_struct *work)
...@@ -146,13 +146,13 @@ static void interrupt_wq(struct work_struct *work) ...@@ -146,13 +146,13 @@ static void interrupt_wq(struct work_struct *work)
interrupt_work); interrupt_work);
uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE]; uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
if (dev->device_info->ih_ring_entry_size > sizeof(ih_ring_entry)) { if (dev->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) {
dev_err_once(kfd_chardev(), "Ring entry too small\n"); dev_err_once(kfd_chardev(), "Ring entry too small\n");
return; return;
} }
while (dequeue_ih_ring_entry(dev, ih_ring_entry)) while (dequeue_ih_ring_entry(dev, ih_ring_entry))
dev->device_info->event_interrupt_class->interrupt_wq(dev, dev->device_info.event_interrupt_class->interrupt_wq(dev,
ih_ring_entry); ih_ring_entry);
} }
...@@ -163,7 +163,7 @@ bool interrupt_is_wanted(struct kfd_dev *dev, ...@@ -163,7 +163,7 @@ bool interrupt_is_wanted(struct kfd_dev *dev,
/* integer and bitwise OR so there is no boolean short-circuiting */ /* integer and bitwise OR so there is no boolean short-circuiting */
unsigned int wanted = 0; unsigned int wanted = 0;
wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev, wanted |= dev->device_info.event_interrupt_class->interrupt_isr(dev,
ih_ring_entry, patched_ihre, flag); ih_ring_entry, patched_ihre, flag);
return wanted != 0; return wanted != 0;
......
...@@ -89,7 +89,7 @@ int kfd_iommu_device_init(struct kfd_dev *kfd) ...@@ -89,7 +89,7 @@ int kfd_iommu_device_init(struct kfd_dev *kfd)
} }
pasid_limit = min_t(unsigned int, pasid_limit = min_t(unsigned int,
(unsigned int)(1 << kfd->device_info->max_pasid_bits), (unsigned int)(1 << kfd->device_info.max_pasid_bits),
iommu_info.max_pasids); iommu_info.max_pasids);
if (!kfd_set_pasid_limit(pasid_limit)) { if (!kfd_set_pasid_limit(pasid_limit)) {
......
...@@ -111,7 +111,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev, ...@@ -111,7 +111,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->rptr_kernel = kq->rptr_mem->cpu_ptr; kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr; kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size, retval = kfd_gtt_sa_allocate(dev, dev->device_info.doorbell_size,
&kq->wptr_mem); &kq->wptr_mem);
if (retval != 0) if (retval != 0)
...@@ -297,7 +297,7 @@ void kq_submit_packet(struct kernel_queue *kq) ...@@ -297,7 +297,7 @@ void kq_submit_packet(struct kernel_queue *kq)
} }
pr_debug("\n"); pr_debug("\n");
#endif #endif
if (kq->dev->device_info->doorbell_size == 8) { if (kq->dev->device_info.doorbell_size == 8) {
*kq->wptr64_kernel = kq->pending_wptr64; *kq->wptr64_kernel = kq->pending_wptr64;
write_kernel_doorbell64(kq->queue->properties.doorbell_ptr, write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
kq->pending_wptr64); kq->pending_wptr64);
...@@ -310,7 +310,7 @@ void kq_submit_packet(struct kernel_queue *kq) ...@@ -310,7 +310,7 @@ void kq_submit_packet(struct kernel_queue *kq)
void kq_rollback_packet(struct kernel_queue *kq) void kq_rollback_packet(struct kernel_queue *kq)
{ {
if (kq->dev->device_info->doorbell_size == 8) { if (kq->dev->device_info.doorbell_size == 8) {
kq->pending_wptr64 = *kq->wptr64_kernel; kq->pending_wptr64 = *kq->wptr64_kernel;
kq->pending_wptr = *kq->wptr_kernel % kq->pending_wptr = *kq->wptr_kernel %
(kq->queue->properties.queue_size / 4); (kq->queue->properties.queue_size / 4);
......
...@@ -71,7 +71,7 @@ struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev, ...@@ -71,7 +71,7 @@ struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
return NULL; return NULL;
offset = (q->sdma_engine_id * offset = (q->sdma_engine_id *
dev->device_info->num_sdma_queues_per_engine + dev->device_info.num_sdma_queues_per_engine +
q->sdma_queue_id) * q->sdma_queue_id) *
dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size; dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
......
...@@ -230,7 +230,7 @@ struct kfd_vmid_info { ...@@ -230,7 +230,7 @@ struct kfd_vmid_info {
struct kfd_dev { struct kfd_dev {
struct amdgpu_device *adev; struct amdgpu_device *adev;
const struct kfd_device_info *device_info; struct kfd_device_info device_info;
struct pci_dev *pdev; struct pci_dev *pdev;
struct drm_device *ddev; struct drm_device *ddev;
......
...@@ -219,7 +219,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, ...@@ -219,7 +219,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
* Hence we also check the type as well * Hence we also check the type as well
*/ */
if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ)) if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ))
max_queues = dev->device_info->max_no_of_hqd/2; max_queues = dev->device_info.max_no_of_hqd/2;
if (pdd->qpd.queue_count >= max_queues) if (pdd->qpd.queue_count >= max_queues)
return -ENOSPC; return -ENOSPC;
......
...@@ -503,7 +503,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, ...@@ -503,7 +503,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
if (dev->gpu) { if (dev->gpu) {
log_max_watch_addr = log_max_watch_addr =
__ilog2_u32(dev->gpu->device_info->num_of_watch_points); __ilog2_u32(dev->gpu->device_info.num_of_watch_points);
if (log_max_watch_addr) { if (log_max_watch_addr) {
dev->node_props.capability |= dev->node_props.capability |=
...@@ -1382,7 +1382,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) ...@@ -1382,7 +1382,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->node_props.simd_arrays_per_engine = dev->node_props.simd_arrays_per_engine =
cu_info.num_shader_arrays_per_engine; cu_info.num_shader_arrays_per_engine;
dev->node_props.gfx_target_version = gpu->device_info->gfx_target_version; dev->node_props.gfx_target_version = gpu->device_info.gfx_target_version;
dev->node_props.vendor_id = gpu->pdev->vendor; dev->node_props.vendor_id = gpu->pdev->vendor;
dev->node_props.device_id = gpu->pdev->device; dev->node_props.device_id = gpu->pdev->device;
dev->node_props.capability |= dev->node_props.capability |=
...@@ -1402,7 +1402,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) ...@@ -1402,7 +1402,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->node_props.num_sdma_xgmi_engines = dev->node_props.num_sdma_xgmi_engines =
kfd_get_num_xgmi_sdma_engines(gpu); kfd_get_num_xgmi_sdma_engines(gpu);
dev->node_props.num_sdma_queues_per_engine = dev->node_props.num_sdma_queues_per_engine =
gpu->device_info->num_sdma_queues_per_engine; gpu->device_info.num_sdma_queues_per_engine;
dev->node_props.num_gws = (dev->gpu->gws && dev->node_props.num_gws = (dev->gpu->gws &&
dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
dev->gpu->adev->gds.gws_size : 0; dev->gpu->adev->gds.gws_size : 0;
...@@ -1578,7 +1578,7 @@ void kfd_double_confirm_iommu_support(struct kfd_dev *gpu) ...@@ -1578,7 +1578,7 @@ void kfd_double_confirm_iommu_support(struct kfd_dev *gpu)
gpu->use_iommu_v2 = false; gpu->use_iommu_v2 = false;
if (!gpu->device_info->needs_iommu_device) if (!gpu->device_info.needs_iommu_device)
return; return;
down_read(&topology_lock); down_read(&topology_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment