Commit 45b3a914 authored by Alex Deucher's avatar Alex Deucher

drm/amdgpu/gmc9: fix 64 bit division in partition code

Rework logic or use do_div() to avoid problems on 32 bit.

v2: add a missing case for XCP macro
v3: fix out of bounds array access
v4: fix xcp handling harder

Acked-by: Guchun Chen <guchun.chen@amd.com> (v1)
Reviewed-by: Mukul Joshi <mukul.joshi@amd.com> (v3)
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 92ecb92c
...@@ -794,3 +794,18 @@ void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev) ...@@ -794,3 +794,18 @@ void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev)
{ {
kgd2kfd_unlock_kfd(); kgd2kfd_unlock_kfd();
} }
u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id)
{
u64 tmp;
s8 mem_id = KFD_XCP_MEM_ID(adev, xcp_id);
if (adev->gmc.num_mem_partitions && xcp_id >= 0 && mem_id >= 0) {
tmp = adev->gmc.mem_partitions[mem_id].size;
do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
return tmp;
} else {
return adev->gmc.real_vram_size;
}
}
...@@ -333,15 +333,14 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, ...@@ -333,15 +333,14 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag, int8_t xcp_id); uint64_t size, u32 alloc_flag, int8_t xcp_id);
u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id);
#define KFD_XCP_MEM_ID(adev, xcp_id) \ #define KFD_XCP_MEM_ID(adev, xcp_id) \
((adev)->xcp_mgr && (xcp_id) >= 0 ?\ ((adev)->xcp_mgr && (xcp_id) >= 0 ?\
(adev)->xcp_mgr->xcp[(xcp_id)].mem_id : -1) (adev)->xcp_mgr->xcp[(xcp_id)].mem_id : -1)
#define KFD_XCP_MEMORY_SIZE(adev, xcp_id)\ #define KFD_XCP_MEMORY_SIZE(adev, xcp_id) amdgpu_amdkfd_xcp_memory_size((adev), (xcp_id))
((adev)->gmc.num_mem_partitions && (xcp_id) >= 0 ?\
(adev)->gmc.mem_partitions[KFD_XCP_MEM_ID((adev), (xcp_id))].size /\
(adev)->xcp_mgr->num_xcp_per_mem_partition :\
(adev)->gmc.real_vram_size)
#if IS_ENABLED(CONFIG_HSA_AMD) #if IS_ENABLED(CONFIG_HSA_AMD)
void amdgpu_amdkfd_gpuvm_init_mem_limits(void); void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
......
...@@ -814,11 +814,14 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev, ...@@ -814,11 +814,14 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
struct amdgpu_ttm_tt *gtt = (void *)ttm; struct amdgpu_ttm_tt *gtt = (void *)ttm;
uint64_t total_pages = ttm->num_pages; uint64_t total_pages = ttm->num_pages;
int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp); int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
uint64_t page_idx, pages_per_xcc = total_pages / num_xcc; uint64_t page_idx, pages_per_xcc;
int i; int i;
uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
pages_per_xcc = total_pages;
do_div(pages_per_xcc, num_xcc);
for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) { for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
/* MQD page: use default flags */ /* MQD page: use default flags */
amdgpu_gart_bind(adev, amdgpu_gart_bind(adev,
......
...@@ -1914,9 +1914,10 @@ gmc_v9_0_init_acpi_mem_ranges(struct amdgpu_device *adev, ...@@ -1914,9 +1914,10 @@ gmc_v9_0_init_acpi_mem_ranges(struct amdgpu_device *adev,
adev->gmc.num_mem_partitions = num_ranges; adev->gmc.num_mem_partitions = num_ranges;
/* If there is only partition, don't use entire size */ /* If there is only partition, don't use entire size */
if (adev->gmc.num_mem_partitions == 1) if (adev->gmc.num_mem_partitions == 1) {
mem_ranges[0].size = mem_ranges[0].size = mem_ranges[0].size * (mem_groups - 1);
(mem_ranges[0].size * (mem_groups - 1) / mem_groups); do_div(mem_ranges[0].size, mem_groups);
}
} }
static void static void
...@@ -1948,8 +1949,8 @@ gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev, ...@@ -1948,8 +1949,8 @@ gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev,
break; break;
} }
size = (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) / size = adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT;
adev->gmc.num_mem_partitions; size /= adev->gmc.num_mem_partitions;
for (i = 0; i < adev->gmc.num_mem_partitions; ++i) { for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
mem_ranges[i].range.fpfn = start_addr; mem_ranges[i].range.fpfn = start_addr;
......
...@@ -1939,10 +1939,14 @@ void svm_range_set_max_pages(struct amdgpu_device *adev) ...@@ -1939,10 +1939,14 @@ void svm_range_set_max_pages(struct amdgpu_device *adev)
uint64_t max_pages; uint64_t max_pages;
uint64_t pages, _pages; uint64_t pages, _pages;
uint64_t min_pages = 0; uint64_t min_pages = 0;
int i; int i, id;
for (i = 0; i < adev->kfd.dev->num_nodes; i++) { for (i = 0; i < adev->kfd.dev->num_nodes; i++) {
pages = KFD_XCP_MEMORY_SIZE(adev, adev->kfd.dev->nodes[i]->xcp->id) >> 17; if (adev->kfd.dev->nodes[i]->xcp)
id = adev->kfd.dev->nodes[i]->xcp->id;
else
id = -1;
pages = KFD_XCP_MEMORY_SIZE(adev, id) >> 17;
pages = clamp(pages, 1ULL << 9, 1ULL << 18); pages = clamp(pages, 1ULL << 9, 1ULL << 18);
pages = rounddown_pow_of_two(pages); pages = rounddown_pow_of_two(pages);
min_pages = min_not_zero(min_pages, pages); min_pages = min_not_zero(min_pages, pages);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment