Commit 6fdd68b1 authored by Alex Deucher's avatar Alex Deucher

drm/amdgpu/gmc9: Adjust GART and AGP location with xgmi offset (v2)

On hives with xgmi enabled, the fb_location aperture is a size
which defines the total framebuffer size of all nodes in the
hive.  Each GPU in the hive has the same view via the fb_location
aperture.  GPU0 starts at offset (0 * segment size),
GPU1 starts at offset (1 * segment size), etc.

For access to local vram on each GPU, we need to take this offset into
account. This including on setting up GPUVM page table and GART table

v2: squash in "drm/amdgpu: Init correct fb region for none XGMI configuration"
Acked-by: default avatarHuang Rui <ray.huang@amd.com>
Acked-by: default avatarSlava Abramov <slava.abramov@amd.com>
Signed-off-by: default avatarShaoyun Liu <Shaoyun.Liu@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Acked-by: default avatarHuang Rui <ray.huang@amd.com>
parent bf0a60b7
...@@ -121,6 +121,11 @@ void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc, ...@@ -121,6 +121,11 @@ void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
mc->vram_end = mc->vram_start + mc->mc_vram_size - 1; mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
if (limit && limit < mc->real_vram_size) if (limit && limit < mc->real_vram_size)
mc->real_vram_size = limit; mc->real_vram_size = limit;
if (mc->xgmi.num_physical_nodes == 0) {
mc->fb_start = mc->vram_start;
mc->fb_end = mc->vram_end;
}
dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n", dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
mc->mc_vram_size >> 20, mc->vram_start, mc->mc_vram_size >> 20, mc->vram_start,
mc->vram_end, mc->real_vram_size >> 20); mc->vram_end, mc->real_vram_size >> 20);
...@@ -147,8 +152,8 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) ...@@ -147,8 +152,8 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
/* VCE doesn't like it when BOs cross a 4GB segment, so align /* VCE doesn't like it when BOs cross a 4GB segment, so align
* the GART base on a 4GB boundary as well. * the GART base on a 4GB boundary as well.
*/ */
size_bf = mc->vram_start; size_bf = mc->fb_start;
size_af = adev->gmc.mc_mask + 1 - ALIGN(mc->vram_end + 1, four_gb); size_af = adev->gmc.mc_mask + 1 - ALIGN(mc->fb_end + 1, four_gb);
if (mc->gart_size > max(size_bf, size_af)) { if (mc->gart_size > max(size_bf, size_af)) {
dev_warn(adev->dev, "limiting GART\n"); dev_warn(adev->dev, "limiting GART\n");
...@@ -184,23 +189,23 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) ...@@ -184,23 +189,23 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1); const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
u64 size_af, size_bf; u64 size_af, size_bf;
if (mc->vram_start > mc->gart_start) { if (mc->fb_start > mc->gart_start) {
size_bf = (mc->vram_start & sixteen_gb_mask) - size_bf = (mc->fb_start & sixteen_gb_mask) -
ALIGN(mc->gart_end + 1, sixteen_gb); ALIGN(mc->gart_end + 1, sixteen_gb);
size_af = mc->mc_mask + 1 - ALIGN(mc->vram_end + 1, sixteen_gb); size_af = mc->mc_mask + 1 - ALIGN(mc->fb_end + 1, sixteen_gb);
} else { } else {
size_bf = mc->vram_start & sixteen_gb_mask; size_bf = mc->fb_start & sixteen_gb_mask;
size_af = (mc->gart_start & sixteen_gb_mask) - size_af = (mc->gart_start & sixteen_gb_mask) -
ALIGN(mc->vram_end + 1, sixteen_gb); ALIGN(mc->fb_end + 1, sixteen_gb);
} }
if (size_bf > size_af) { if (size_bf > size_af) {
mc->agp_start = mc->vram_start > mc->gart_start ? mc->agp_start = mc->fb_start > mc->gart_start ?
mc->gart_end + 1 : 0; mc->gart_end + 1 : 0;
mc->agp_size = size_bf; mc->agp_size = size_bf;
} else { } else {
mc->agp_start = (mc->vram_start > mc->gart_start ? mc->agp_start = (mc->fb_start > mc->gart_start ?
mc->vram_end : mc->gart_end) + 1, mc->fb_end : mc->gart_end) + 1,
mc->agp_size = size_af; mc->agp_size = size_af;
} }
......
...@@ -114,6 +114,14 @@ struct amdgpu_gmc { ...@@ -114,6 +114,14 @@ struct amdgpu_gmc {
u64 gart_end; u64 gart_end;
u64 vram_start; u64 vram_start;
u64 vram_end; u64 vram_end;
/* FB region , it's same as local vram region in single GPU, in XGMI
* configuration, this region covers all GPUs in the same hive ,
* each GPU in the hive has the same view of this FB region .
* GPU0's vram starts at offset (0 * segment size) ,
* GPU1 starts at offset (1 * segment size), etc.
*/
u64 fb_start;
u64 fb_end;
unsigned vram_width; unsigned vram_width;
u64 real_vram_size; u64 real_vram_size;
int vram_mtrr; int vram_mtrr;
......
...@@ -44,6 +44,9 @@ int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev) ...@@ -44,6 +44,9 @@ int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev)
REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_LFB_REGION); REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_LFB_REGION);
if (adev->gmc.xgmi.physical_node_id > 3) if (adev->gmc.xgmi.physical_node_id > 3)
return -EINVAL; return -EINVAL;
adev->gmc.xgmi.node_segment_size = REG_GET_FIELD(
RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_SIZE),
MC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
} }
return 0; return 0;
......
...@@ -771,12 +771,18 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, ...@@ -771,12 +771,18 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
u64 base = 0; u64 base = 0;
if (!amdgpu_sriov_vf(adev)) if (!amdgpu_sriov_vf(adev))
base = mmhub_v1_0_get_fb_location(adev); base = mmhub_v1_0_get_fb_location(adev);
/* add the xgmi offset of the physical node */
base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
amdgpu_gmc_vram_location(adev, &adev->gmc, base); amdgpu_gmc_vram_location(adev, &adev->gmc, base);
amdgpu_gmc_gart_location(adev, mc); amdgpu_gmc_gart_location(adev, mc);
if (!amdgpu_sriov_vf(adev)) if (!amdgpu_sriov_vf(adev))
amdgpu_gmc_agp_location(adev, mc); amdgpu_gmc_agp_location(adev, mc);
/* base offset of vram pages */ /* base offset of vram pages */
adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
/* XXX: add the xgmi offset of the physical node? */
adev->vm_manager.vram_base_offset +=
adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
} }
/** /**
......
...@@ -38,10 +38,17 @@ ...@@ -38,10 +38,17 @@
u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
{ {
u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE); u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE);
u64 top = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP);
base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK; base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
base <<= 24; base <<= 24;
top &= MC_VM_FB_LOCATION_TOP__FB_TOP_MASK;
top <<= 24;
adev->gmc.fb_start = base;
adev->gmc.fb_end = top;
return base; return base;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment