Commit dc12f9ed authored by Philip Yang's avatar Philip Yang Committed by Alex Deucher

drm/amdkfd: Update MTYPE for far memory partition

Use MTYPE RW/MTYPE_CC for mapping system memory or VRAM to KFD node
within the same memory partition, use MTYPE_NC for mapping on KFD node
from the far memory partition of the same socket or from another socket
on same XGMI hive.

On NPS4 or 4P system, MTYPE will be overridden per page depending on
the memory NUMA node id and vm->mem_id.
Signed-off-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 7f6db894
...@@ -1186,7 +1186,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, ...@@ -1186,7 +1186,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM; bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM;
bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
/* TODO: memory partitions struct amdgpu_vm *vm = mapping->bo_va->base.vm;*/ struct amdgpu_vm *vm = mapping->bo_va->base.vm;
unsigned int mtype_local, mtype; unsigned int mtype_local, mtype;
bool snoop = false; bool snoop = false;
bool is_local; bool is_local;
...@@ -1247,8 +1247,8 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, ...@@ -1247,8 +1247,8 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
} }
is_local = (!is_vram && (adev->flags & AMD_IS_APU) && is_local = (!is_vram && (adev->flags & AMD_IS_APU) &&
num_possible_nodes() <= 1) || num_possible_nodes() <= 1) ||
(is_vram && adev == bo_adev /* TODO: memory partitions && (is_vram && adev == bo_adev &&
bo->mem_id == vm->mem_id*/); bo->mem_id == vm->mem_id);
snoop = true; snoop = true;
if (uncached) { if (uncached) {
mtype = MTYPE_UC; mtype = MTYPE_UC;
...@@ -1335,13 +1335,12 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, ...@@ -1335,13 +1335,12 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
return; return;
} }
/* TODO: memory partitions. mem_id is hard-coded to 0 for now. /* FIXME: Only supported on native mode for now. For carve-out, the
* FIXME: Only supported on native mode for now. For carve-out, the
* NUMA affinity of the GPU/VM needs to come from the PCI info because * NUMA affinity of the GPU/VM needs to come from the PCI info because
* memory partitions are not associated with different NUMA nodes. * memory partitions are not associated with different NUMA nodes.
*/ */
if (adev->gmc.is_app_apu) { if (adev->gmc.is_app_apu && vm->mem_id >= 0) {
local_node = adev->gmc.mem_partitions[/*vm->mem_id*/0].numa.node; local_node = adev->gmc.mem_partitions[vm->mem_id].numa.node;
} else { } else {
dev_dbg(adev->dev, "Only native mode APU is supported.\n"); dev_dbg(adev->dev, "Only native mode APU is supported.\n");
return; return;
...@@ -1356,7 +1355,7 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, ...@@ -1356,7 +1355,7 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
} }
nid = pfn_to_nid(addr >> PAGE_SHIFT); nid = pfn_to_nid(addr >> PAGE_SHIFT);
dev_dbg(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n", dev_dbg(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n",
/*vm->mem_id*/0, local_node, nid); vm->mem_id, local_node, nid);
if (nid == local_node) { if (nid == local_node) {
uint64_t old_flags = *flags; uint64_t old_flags = *flags;
unsigned int mtype_local = MTYPE_RW; unsigned int mtype_local = MTYPE_RW;
......
...@@ -1203,8 +1203,8 @@ svm_range_get_pte_flags(struct kfd_node *node, ...@@ -1203,8 +1203,8 @@ svm_range_get_pte_flags(struct kfd_node *node,
mapping_flags |= AMDGPU_VM_MTYPE_UC; mapping_flags |= AMDGPU_VM_MTYPE_UC;
} else if (domain == SVM_RANGE_VRAM_DOMAIN) { } else if (domain == SVM_RANGE_VRAM_DOMAIN) {
/* local HBM region close to partition */ /* local HBM region close to partition */
if (bo_node->adev == node->adev /* TODO: memory partitions && if (bo_node->adev == node->adev &&
bo_node->mem_id == node->mem_id*/) (!bo_node->xcp || !node->xcp || bo_node->xcp->mem_id == node->xcp->mem_id))
mapping_flags |= mtype_local; mapping_flags |= mtype_local;
/* local HBM region far from partition or remote XGMI GPU */ /* local HBM region far from partition or remote XGMI GPU */
else if (svm_nodes_in_same_hive(bo_node, node)) else if (svm_nodes_in_same_hive(bo_node, node))
...@@ -1358,8 +1358,9 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, ...@@ -1358,8 +1358,9 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
(last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0, (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
pte_flags); pte_flags);
/* TODO: we still need to determine the vm_manager.vram_base_offset based on /* For dGPU mode, we use same vm_manager to allocate VRAM for
* the memory partition. * different memory partition based on fpfn/lpfn, we should use
* same vm_manager.vram_base_offset regardless memory partition.
*/ */
r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL, r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL,
last_start, prange->start + i, last_start, prange->start + i,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment