Commit b97dfa27 authored by shaoyunl's avatar shaoyunl Committed by Oded Gabbay

drm/amdgpu: save vm fault information for amdkfd

amdgpu save the vm fault related information for KFD usage and keep the
copy until KFD read it.
Signed-off-by: default avatarshaoyun liu <shaoyun.liu@amd.com>
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent 101fee63
...@@ -183,6 +183,9 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, ...@@ -183,6 +183,9 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
struct dma_fence **ef); struct dma_fence **ef);
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
struct kfd_vm_fault_info *info);
void amdgpu_amdkfd_gpuvm_init_mem_limits(void); void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo); void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
......
...@@ -216,6 +216,7 @@ static const struct kfd2kgd_calls kfd2kgd = { ...@@ -216,6 +216,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid, .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib, .submit_ib = amdgpu_amdkfd_submit_ib,
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info
}; };
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
......
...@@ -176,6 +176,7 @@ static const struct kfd2kgd_calls kfd2kgd = { ...@@ -176,6 +176,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid, .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib, .submit_ib = amdgpu_amdkfd_submit_ib,
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info
}; };
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
......
...@@ -1621,6 +1621,20 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, ...@@ -1621,6 +1621,20 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
return ret; return ret;
} }
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
struct kfd_vm_fault_info *mem)
{
struct amdgpu_device *adev;
adev = (struct amdgpu_device *)kgd;
if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
*mem = *adev->gmc.vm_fault_info;
mb();
atomic_set(&adev->gmc.vm_fault_info_updated, 0);
}
return 0;
}
/* Evict a userptr BO by stopping the queues if necessary /* Evict a userptr BO by stopping the queues if necessary
* *
* Runs in MMU notifier, may be in RECLAIM_FS context. This means it * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
......
...@@ -105,6 +105,8 @@ struct amdgpu_gmc { ...@@ -105,6 +105,8 @@ struct amdgpu_gmc {
/* protects concurrent invalidation */ /* protects concurrent invalidation */
spinlock_t invalidate_lock; spinlock_t invalidate_lock;
bool translate_further; bool translate_further;
struct kfd_vm_fault_info *vm_fault_info;
atomic_t vm_fault_info_updated;
const struct amdgpu_gmc_funcs *gmc_funcs; const struct amdgpu_gmc_funcs *gmc_funcs;
}; };
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "cik.h" #include "cik.h"
#include "gmc_v7_0.h" #include "gmc_v7_0.h"
#include "amdgpu_ucode.h" #include "amdgpu_ucode.h"
#include "amdgpu_amdkfd.h"
#include "bif/bif_4_1_d.h" #include "bif/bif_4_1_d.h"
#include "bif/bif_4_1_sh_mask.h" #include "bif/bif_4_1_sh_mask.h"
...@@ -1078,6 +1079,12 @@ static int gmc_v7_0_sw_init(void *handle) ...@@ -1078,6 +1079,12 @@ static int gmc_v7_0_sw_init(void *handle)
adev->vm_manager.vram_base_offset = 0; adev->vm_manager.vram_base_offset = 0;
} }
adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
GFP_KERNEL);
if (!adev->gmc.vm_fault_info)
return -ENOMEM;
atomic_set(&adev->gmc.vm_fault_info_updated, 0);
return 0; return 0;
} }
...@@ -1087,6 +1094,7 @@ static int gmc_v7_0_sw_fini(void *handle) ...@@ -1087,6 +1094,7 @@ static int gmc_v7_0_sw_fini(void *handle)
amdgpu_gem_force_release(adev); amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev); amdgpu_vm_manager_fini(adev);
kfree(adev->gmc.vm_fault_info);
gmc_v7_0_gart_fini(adev); gmc_v7_0_gart_fini(adev);
amdgpu_bo_fini(adev); amdgpu_bo_fini(adev);
release_firmware(adev->gmc.fw); release_firmware(adev->gmc.fw);
...@@ -1276,7 +1284,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, ...@@ -1276,7 +1284,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source, struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry) struct amdgpu_iv_entry *entry)
{ {
u32 addr, status, mc_client; u32 addr, status, mc_client, vmid;
addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR); addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
...@@ -1301,6 +1309,29 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, ...@@ -1301,6 +1309,29 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
entry->pasid); entry->pasid);
} }
vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
VMID);
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
&& !atomic_read(&adev->gmc.vm_fault_info_updated)) {
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
u32 protections = REG_GET_FIELD(status,
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
PROTECTIONS);
info->vmid = vmid;
info->mc_id = REG_GET_FIELD(status,
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
MEMORY_CLIENT_ID);
info->status = status;
info->page_addr = addr;
info->prot_valid = protections & 0x7 ? true : false;
info->prot_read = protections & 0x8 ? true : false;
info->prot_write = protections & 0x10 ? true : false;
info->prot_exec = protections & 0x20 ? true : false;
mb();
atomic_set(&adev->gmc.vm_fault_info_updated, 1);
}
return 0; return 0;
} }
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "amdgpu.h" #include "amdgpu.h"
#include "gmc_v8_0.h" #include "gmc_v8_0.h"
#include "amdgpu_ucode.h" #include "amdgpu_ucode.h"
#include "amdgpu_amdkfd.h"
#include "gmc/gmc_8_1_d.h" #include "gmc/gmc_8_1_d.h"
#include "gmc/gmc_8_1_sh_mask.h" #include "gmc/gmc_8_1_sh_mask.h"
...@@ -1182,6 +1183,12 @@ static int gmc_v8_0_sw_init(void *handle) ...@@ -1182,6 +1183,12 @@ static int gmc_v8_0_sw_init(void *handle)
adev->vm_manager.vram_base_offset = 0; adev->vm_manager.vram_base_offset = 0;
} }
adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
GFP_KERNEL);
if (!adev->gmc.vm_fault_info)
return -ENOMEM;
atomic_set(&adev->gmc.vm_fault_info_updated, 0);
return 0; return 0;
} }
...@@ -1191,6 +1198,7 @@ static int gmc_v8_0_sw_fini(void *handle) ...@@ -1191,6 +1198,7 @@ static int gmc_v8_0_sw_fini(void *handle)
amdgpu_gem_force_release(adev); amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev); amdgpu_vm_manager_fini(adev);
kfree(adev->gmc.vm_fault_info);
gmc_v8_0_gart_fini(adev); gmc_v8_0_gart_fini(adev);
amdgpu_bo_fini(adev); amdgpu_bo_fini(adev);
release_firmware(adev->gmc.fw); release_firmware(adev->gmc.fw);
...@@ -1426,7 +1434,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, ...@@ -1426,7 +1434,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source, struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry) struct amdgpu_iv_entry *entry)
{ {
u32 addr, status, mc_client; u32 addr, status, mc_client, vmid;
if (amdgpu_sriov_vf(adev)) { if (amdgpu_sriov_vf(adev)) {
dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
...@@ -1463,6 +1471,29 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, ...@@ -1463,6 +1471,29 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
entry->pasid); entry->pasid);
} }
vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
VMID);
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
&& !atomic_read(&adev->gmc.vm_fault_info_updated)) {
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
u32 protections = REG_GET_FIELD(status,
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
PROTECTIONS);
info->vmid = vmid;
info->mc_id = REG_GET_FIELD(status,
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
MEMORY_CLIENT_ID);
info->status = status;
info->page_addr = addr;
info->prot_valid = protections & 0x7 ? true : false;
info->prot_read = protections & 0x8 ? true : false;
info->prot_write = protections & 0x10 ? true : false;
info->prot_exec = protections & 0x20 ? true : false;
mb();
atomic_set(&adev->gmc.vm_fault_info_updated, 1);
}
return 0; return 0;
} }
......
...@@ -47,6 +47,17 @@ enum kfd_preempt_type { ...@@ -47,6 +47,17 @@ enum kfd_preempt_type {
KFD_PREEMPT_TYPE_WAVEFRONT_RESET, KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
}; };
struct kfd_vm_fault_info {
uint64_t page_addr;
uint32_t vmid;
uint32_t mc_id;
uint32_t status;
bool prot_valid;
bool prot_read;
bool prot_write;
bool prot_exec;
};
struct kfd_cu_info { struct kfd_cu_info {
uint32_t num_shader_engines; uint32_t num_shader_engines;
uint32_t num_shader_arrays_per_engine; uint32_t num_shader_arrays_per_engine;
...@@ -259,6 +270,12 @@ struct tile_config { ...@@ -259,6 +270,12 @@ struct tile_config {
* IB to the corresponding ring (ring type). The IB is executed with the * IB to the corresponding ring (ring type). The IB is executed with the
* specified VMID in a user mode context. * specified VMID in a user mode context.
* *
* @get_vm_fault_info: Return information about a recent VM fault on
* GFXv7 and v8. If multiple VM faults occurred since the last call of
* this function, it will return information about the first of those
* faults. On GFXv9 VM fault information is fully contained in the IH
* packet and this function is not needed.
*
* This structure contains function pointers to services that the kgd driver * This structure contains function pointers to services that the kgd driver
* provides to amdkfd driver. * provides to amdkfd driver.
* *
...@@ -374,6 +391,9 @@ struct kfd2kgd_calls { ...@@ -374,6 +391,9 @@ struct kfd2kgd_calls {
int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine, int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr, uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len); uint32_t *ib_cmd, uint32_t ib_len);
int (*get_vm_fault_info)(struct kgd_dev *kgd,
struct kfd_vm_fault_info *info);
}; };
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment