Commit 2f77b9a2 authored by Mukul Joshi's avatar Mukul Joshi Committed by Alex Deucher

drm/amdkfd: Update MQD management on multi XCC setup

Update MQD management for both HIQ and user-mode compute
queues on a multi XCC setup. MQDs needs to be allocated,
initialized, loaded and destroyed for each XCC in the KFD
node.

v2: squash in fix "drm/amdkfd: Fix SDMA+HIQ HQD allocation on GFX9.4.3"
Signed-off-by: default avatarMukul Joshi <mukul.joshi@amd.com>
Signed-off-by: default avatarAmber Lin <Amber.Lin@amd.com>
Tested-by: default avatarAmber Lin <Amber.Lin@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 74c5b85d
...@@ -800,6 +800,41 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev, ...@@ -800,6 +800,41 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
sg_free_table(ttm->sg); sg_free_table(ttm->sg);
} }
/*
* total_pages is constructed as MQD0+CtrlStack0 + MQD1+CtrlStack1 + ...
* MQDn+CtrlStackn where n is the number of XCCs per partition.
* pages_per_xcc is the size of one MQD+CtrlStack. The first page is MQD
* and uses memory type default, UC. The rest of pages_per_xcc are
* Ctrl stack and modify their memory type to NC.
*/
static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
struct ttm_tt *ttm, uint64_t flags)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
uint64_t total_pages = ttm->num_pages;
int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
uint64_t page_idx, pages_per_xcc = total_pages / num_xcc;
int i;
uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
/* MQD page: use default flags */
amdgpu_gart_bind(adev,
gtt->offset + (page_idx << PAGE_SHIFT),
1, &gtt->ttm.dma_address[page_idx], flags);
/*
* Ctrl pages - modify the memory type to NC (ctrl_flags) from
* the second page of the BO onward.
*/
amdgpu_gart_bind(adev,
gtt->offset + ((page_idx + 1) << PAGE_SHIFT),
pages_per_xcc - 1,
&gtt->ttm.dma_address[page_idx + 1],
ctrl_flags);
}
}
static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
struct ttm_buffer_object *tbo, struct ttm_buffer_object *tbo,
uint64_t flags) uint64_t flags)
...@@ -812,21 +847,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, ...@@ -812,21 +847,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
flags |= AMDGPU_PTE_TMZ; flags |= AMDGPU_PTE_TMZ;
if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) { if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
uint64_t page_idx = 1; amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags);
amdgpu_gart_bind(adev, gtt->offset, page_idx,
gtt->ttm.dma_address, flags);
/* The memory type of the first page defaults to UC. Now
* modify the memory type to NC from the second page of
* the BO onward.
*/
flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
amdgpu_gart_bind(adev, gtt->offset + (page_idx << PAGE_SHIFT),
ttm->num_pages - page_idx,
&(gtt->ttm.dma_address[page_idx]), flags);
} else { } else {
amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
gtt->ttm.dma_address, flags); gtt->ttm.dma_address, flags);
......
...@@ -2247,7 +2247,8 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) ...@@ -2247,7 +2247,8 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
get_num_all_sdma_engines(dqm) * get_num_all_sdma_engines(dqm) *
dev->kfd->device_info.num_sdma_queues_per_engine + dev->kfd->device_info.num_sdma_queues_per_engine +
dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; (dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size *
dqm->dev->num_xcc_per_node);
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
&(mem_obj->gtt_mem), &(mem_obj->gpu_addr), &(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
......
...@@ -76,7 +76,8 @@ struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_node *dev, ...@@ -76,7 +76,8 @@ struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_node *dev,
q->sdma_queue_id) * q->sdma_queue_id) *
dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size; dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size *
dev->num_xcc_per_node;
mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem
+ offset); + offset);
...@@ -246,3 +247,28 @@ bool kfd_is_occupied_sdma(struct mqd_manager *mm, void *mqd, ...@@ -246,3 +247,28 @@ bool kfd_is_occupied_sdma(struct mqd_manager *mm, void *mqd,
{ {
return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd); return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);
} }
uint64_t kfd_hiq_mqd_stride(struct kfd_node *dev)
{
return dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
}
void kfd_get_hiq_xcc_mqd(struct kfd_node *dev, struct kfd_mem_obj *mqd_mem_obj,
uint32_t virtual_xcc_id)
{
uint64_t offset;
offset = kfd_hiq_mqd_stride(dev) * virtual_xcc_id;
mqd_mem_obj->gtt_mem = (virtual_xcc_id == 0) ?
dev->dqm->hiq_sdma_mqd.gtt_mem : NULL;
mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset;
mqd_mem_obj->cpu_ptr = (uint32_t *)((uintptr_t)
dev->dqm->hiq_sdma_mqd.cpu_ptr + offset);
}
uint64_t kfd_mqd_stride(struct mqd_manager *mm,
struct queue_properties *q)
{
return mm->mqd_size;
}
...@@ -119,6 +119,8 @@ struct mqd_manager { ...@@ -119,6 +119,8 @@ struct mqd_manager {
int (*debugfs_show_mqd)(struct seq_file *m, void *data); int (*debugfs_show_mqd)(struct seq_file *m, void *data);
#endif #endif
uint32_t (*read_doorbell_id)(void *mqd); uint32_t (*read_doorbell_id)(void *mqd);
uint64_t (*mqd_stride)(struct mqd_manager *mm,
struct queue_properties *p);
struct mutex mqd_mutex; struct mutex mqd_mutex;
struct kfd_node *dev; struct kfd_node *dev;
...@@ -164,4 +166,10 @@ bool kfd_is_occupied_sdma(struct mqd_manager *mm, void *mqd, ...@@ -164,4 +166,10 @@ bool kfd_is_occupied_sdma(struct mqd_manager *mm, void *mqd,
uint64_t queue_address, uint32_t pipe_id, uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id); uint32_t queue_id);
void kfd_get_hiq_xcc_mqd(struct kfd_node *dev,
struct kfd_mem_obj *mqd_mem_obj, uint32_t virtual_xcc_id);
uint64_t kfd_hiq_mqd_stride(struct kfd_node *dev);
uint64_t kfd_mqd_stride(struct mqd_manager *mm,
struct queue_properties *q);
#endif /* KFD_MQD_MANAGER_H_ */ #endif /* KFD_MQD_MANAGER_H_ */
...@@ -428,6 +428,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, ...@@ -428,6 +428,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
mqd->destroy_mqd = kfd_destroy_mqd_cp; mqd->destroy_mqd = kfd_destroy_mqd_cp;
mqd->is_occupied = kfd_is_occupied_cp; mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct cik_mqd); mqd->mqd_size = sizeof(struct cik_mqd);
mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd; mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif #endif
...@@ -442,6 +443,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, ...@@ -442,6 +443,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
mqd->destroy_mqd = kfd_destroy_mqd_cp; mqd->destroy_mqd = kfd_destroy_mqd_cp;
mqd->is_occupied = kfd_is_occupied_cp; mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct cik_mqd); mqd->mqd_size = sizeof(struct cik_mqd);
mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd; mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif #endif
...@@ -457,6 +459,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, ...@@ -457,6 +459,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
mqd->checkpoint_mqd = checkpoint_mqd_sdma; mqd->checkpoint_mqd = checkpoint_mqd_sdma;
mqd->restore_mqd = restore_mqd_sdma; mqd->restore_mqd = restore_mqd_sdma;
mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers); mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers);
mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
#endif #endif
......
...@@ -432,6 +432,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, ...@@ -432,6 +432,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
mqd->get_wave_state = get_wave_state; mqd->get_wave_state = get_wave_state;
mqd->checkpoint_mqd = checkpoint_mqd; mqd->checkpoint_mqd = checkpoint_mqd;
mqd->restore_mqd = restore_mqd; mqd->restore_mqd = restore_mqd;
mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd; mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif #endif
...@@ -447,6 +448,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, ...@@ -447,6 +448,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
mqd->destroy_mqd = kfd_destroy_mqd_cp; mqd->destroy_mqd = kfd_destroy_mqd_cp;
mqd->is_occupied = kfd_is_occupied_cp; mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v10_compute_mqd); mqd->mqd_size = sizeof(struct v10_compute_mqd);
mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd; mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif #endif
...@@ -478,6 +480,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, ...@@ -478,6 +480,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
mqd->checkpoint_mqd = checkpoint_mqd_sdma; mqd->checkpoint_mqd = checkpoint_mqd_sdma;
mqd->restore_mqd = restore_mqd_sdma; mqd->restore_mqd = restore_mqd_sdma;
mqd->mqd_size = sizeof(struct v10_sdma_mqd); mqd->mqd_size = sizeof(struct v10_sdma_mqd);
mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
#endif #endif
......
...@@ -486,6 +486,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, ...@@ -486,6 +486,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
mqd->destroy_mqd = kfd_destroy_mqd_cp; mqd->destroy_mqd = kfd_destroy_mqd_cp;
mqd->is_occupied = kfd_is_occupied_cp; mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct vi_mqd); mqd->mqd_size = sizeof(struct vi_mqd);
mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd; mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif #endif
...@@ -500,6 +501,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, ...@@ -500,6 +501,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
mqd->destroy_mqd = kfd_destroy_mqd_cp; mqd->destroy_mqd = kfd_destroy_mqd_cp;
mqd->is_occupied = kfd_is_occupied_cp; mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct vi_mqd); mqd->mqd_size = sizeof(struct vi_mqd);
mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd; mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif #endif
...@@ -515,6 +517,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, ...@@ -515,6 +517,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
mqd->checkpoint_mqd = checkpoint_mqd_sdma; mqd->checkpoint_mqd = checkpoint_mqd_sdma;
mqd->restore_mqd = restore_mqd_sdma; mqd->restore_mqd = restore_mqd_sdma;
mqd->mqd_size = sizeof(struct vi_sdma_mqd); mqd->mqd_size = sizeof(struct vi_sdma_mqd);
mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
#endif #endif
......
...@@ -927,7 +927,9 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) ...@@ -927,7 +927,9 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
struct queue *q; struct queue *q;
enum KFD_MQD_TYPE mqd_type; enum KFD_MQD_TYPE mqd_type;
struct mqd_manager *mqd_mgr; struct mqd_manager *mqd_mgr;
int r = 0; int r = 0, xcc, num_xccs = 1;
void *mqd;
uint64_t size = 0;
list_for_each_entry(pqn, &pqm->queues, process_queue_list) { list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
if (pqn->q) { if (pqn->q) {
...@@ -943,6 +945,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) ...@@ -943,6 +945,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
seq_printf(m, " Compute queue on device %x\n", seq_printf(m, " Compute queue on device %x\n",
q->device->id); q->device->id);
mqd_type = KFD_MQD_TYPE_CP; mqd_type = KFD_MQD_TYPE_CP;
num_xccs = q->device->num_xcc_per_node;
break; break;
default: default:
seq_printf(m, seq_printf(m,
...@@ -951,6 +954,8 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) ...@@ -951,6 +954,8 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
continue; continue;
} }
mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type]; mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type];
size = mqd_mgr->mqd_stride(mqd_mgr,
&q->properties);
} else if (pqn->kq) { } else if (pqn->kq) {
q = pqn->kq->queue; q = pqn->kq->queue;
mqd_mgr = pqn->kq->mqd_mgr; mqd_mgr = pqn->kq->mqd_mgr;
...@@ -972,9 +977,12 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) ...@@ -972,9 +977,12 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
continue; continue;
} }
r = mqd_mgr->debugfs_show_mqd(m, q->mqd); for (xcc = 0; xcc < num_xccs; xcc++) {
if (r != 0) mqd = q->mqd + size * xcc;
break; r = mqd_mgr->debugfs_show_mqd(m, mqd);
if (r != 0)
break;
}
} }
return r; return r;
......
...@@ -196,10 +196,20 @@ struct v9_mqd { ...@@ -196,10 +196,20 @@ struct v9_mqd {
uint32_t compute_wave_restore_addr_lo; uint32_t compute_wave_restore_addr_lo;
uint32_t compute_wave_restore_addr_hi; uint32_t compute_wave_restore_addr_hi;
uint32_t compute_wave_restore_control; uint32_t compute_wave_restore_control;
uint32_t compute_static_thread_mgmt_se4; union {
uint32_t compute_static_thread_mgmt_se5; struct {
uint32_t compute_static_thread_mgmt_se6; uint32_t compute_static_thread_mgmt_se4;
uint32_t compute_static_thread_mgmt_se7; uint32_t compute_static_thread_mgmt_se5;
uint32_t compute_static_thread_mgmt_se6;
uint32_t compute_static_thread_mgmt_se7;
};
struct {
uint32_t compute_current_logic_xcc_id; // offset: 39 (0x27)
uint32_t compute_restart_cg_tg_id; // offset: 40 (0x28)
uint32_t compute_tg_chunk_size; // offset: 41 (0x29)
uint32_t compute_restore_tg_chunk_size; // offset: 42 (0x2A)
};
};
uint32_t reserved_43; uint32_t reserved_43;
uint32_t reserved_44; uint32_t reserved_44;
uint32_t reserved_45; uint32_t reserved_45;
...@@ -382,8 +392,16 @@ struct v9_mqd { ...@@ -382,8 +392,16 @@ struct v9_mqd {
uint32_t iqtimer_pkt_dw29; uint32_t iqtimer_pkt_dw29;
uint32_t iqtimer_pkt_dw30; uint32_t iqtimer_pkt_dw30;
uint32_t iqtimer_pkt_dw31; uint32_t iqtimer_pkt_dw31;
uint32_t reserved_225; union {
uint32_t reserved_226; struct {
uint32_t reserved_225;
uint32_t reserved_226;
};
struct {
uint32_t pm4_target_xcc_in_xcp; // offset: 225 (0xE1)
uint32_t cp_mqd_stride_size; // offset: 226 (0xE2)
};
};
uint32_t reserved_227; uint32_t reserved_227;
uint32_t set_resources_header; uint32_t set_resources_header;
uint32_t set_resources_dw1; uint32_t set_resources_dw1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment