Commit 610cc82b authored by Mukul Joshi's avatar Mukul Joshi Committed by Alex Deucher

drm/amdkfd: Update cache info reporting for GFX v9.4.3

Update cache info reporting in sysfs to report the correct
number of CUs and associated cache information based on
different spatial partitioning modes.
Signed-off-by: default avatarMukul Joshi <mukul.joshi@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 3cb9ebc9
......@@ -79,6 +79,10 @@ struct crat_header {
#define CRAT_SUBTYPE_IOLINK_AFFINITY 5
#define CRAT_SUBTYPE_MAX 6
/*
* Do not change the value of CRAT_SIBLINGMAP_SIZE from 32
* as it breaks the ABI.
*/
#define CRAT_SIBLINGMAP_SIZE 32
/*
......
......@@ -1596,14 +1596,17 @@ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
struct kfd_gpu_cache_info *pcache_info,
struct kfd_cu_info *cu_info,
int cache_type, unsigned int cu_processor_id)
int cache_type, unsigned int cu_processor_id,
struct kfd_node *knode)
{
unsigned int cu_sibling_map_mask;
int first_active_cu;
int i, j, k;
int i, j, k, xcc, start, end;
struct kfd_cache_properties *pcache = NULL;
cu_sibling_map_mask = cu_info->cu_bitmap[0][0][0];
start = ffs(knode->xcc_mask) - 1;
end = start + NUM_XCC(knode->xcc_mask);
cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0];
cu_sibling_map_mask &=
((1 << pcache_info[cache_type].num_cu_shared) - 1);
first_active_cu = ffs(cu_sibling_map_mask);
......@@ -1638,16 +1641,18 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1);
k = 0;
for (i = 0; i < cu_info->num_shader_engines; i++) {
for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
k += 4;
cu_sibling_map_mask = cu_info->cu_bitmap[0][i % 4][j + i / 4];
cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
for (xcc = start; xcc < end; xcc++) {
for (i = 0; i < cu_info->num_shader_engines; i++) {
for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
k += 4;
cu_sibling_map_mask = cu_info->cu_bitmap[xcc][i % 4][j + i / 4];
cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
}
}
}
pcache->sibling_map_size = k;
......@@ -1665,7 +1670,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_node *kdev)
{
struct kfd_gpu_cache_info *pcache_info = NULL;
int i, j, k;
int i, j, k, xcc, start, end;
int ct = 0;
unsigned int cu_processor_id;
int ret;
......@@ -1699,37 +1704,42 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
* then it will consider only one CU from
* the shared unit
*/
start = ffs(kdev->xcc_mask) - 1;
end = start + NUM_XCC(kdev->xcc_mask);
for (ct = 0; ct < num_of_cache_types; ct++) {
cu_processor_id = gpu_processor_id;
if (pcache_info[ct].cache_level == 1) {
for (i = 0; i < pcu_info->num_shader_engines; i++) {
for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
pcu_info->cu_bitmap[0][i % 4][j + i / 4], ct,
cu_processor_id, k);
if (ret < 0)
break;
if (!ret) {
num_of_entries++;
list_add_tail(&props_ext->list, &dev->cache_props);
for (xcc = start; xcc < end; xcc++) {
for (i = 0; i < pcu_info->num_shader_engines; i++) {
for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
pcu_info->cu_bitmap[xcc][i % 4][j + i / 4], ct,
cu_processor_id, k);
if (ret < 0)
break;
if (!ret) {
num_of_entries++;
list_add_tail(&props_ext->list, &dev->cache_props);
}
/* Move to next CU block */
num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
pcu_info->num_cu_per_sh) ?
pcache_info[ct].num_cu_shared :
(pcu_info->num_cu_per_sh - k);
cu_processor_id += num_cu_shared;
}
/* Move to next CU block */
num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
pcu_info->num_cu_per_sh) ?
pcache_info[ct].num_cu_shared :
(pcu_info->num_cu_per_sh - k);
cu_processor_id += num_cu_shared;
}
}
}
} else {
ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
pcu_info, ct, cu_processor_id);
pcu_info, ct, cu_processor_id, kdev);
if (ret < 0)
break;
......
......@@ -89,7 +89,7 @@ struct kfd_mem_properties {
struct attribute attr;
};
#define CACHE_SIBLINGMAP_SIZE 64
#define CACHE_SIBLINGMAP_SIZE 128
struct kfd_cache_properties {
struct list_head list;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment