Commit 6fa7324a authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-amdkfd-next-2018-03-11' of git://people.freedesktop.org/~gabbayo/linux into drm-next

Major points for this pull request:
- Add dGPU support for amdkfd initialization code and queue handling. It's
  not complete support since the GPUVM part is missing (the under debate stuff).
- Enable PCIe atomics for dGPU if present
- Various adjustments to the amdgpu<-->amdkfd interface for dGPUs
- Refactor IOMMUv2 code to allow loading amdkfd without IOMMUv2 in the system
- Add HSA process eviction code in case of system memory pressure
- Various fixes and small changes

* tag 'drm-amdkfd-next-2018-03-11' of git://people.freedesktop.org/~gabbayo/linux: (24 commits)
  uapi: Fix type used in ioctl parameter structures
  drm/amdkfd: Implement KFD process eviction/restore
  drm/amdkfd: Add GPUVM virtual address space to PDD
  drm/amdkfd: Remove unaligned memory access
  drm/amdkfd: Centralize IOMMUv2 code and make it conditional
  drm/amdgpu: Add submit IB function for KFD
  drm/amdgpu: Add GPUVM memory management functions for KFD
  drm/amdgpu: add amdgpu_sync_clone
  drm/amdgpu: Update kgd2kfd_shared_resources for dGPU support
  drm/amdgpu: Add KFD eviction fence
  drm/amdgpu: Remove unused kfd2kgd interface
  drm/amdgpu: Fix wrong mask in get_atc_vmid_pasid_mapping_pasid
  drm/amdgpu: Fix header file dependencies
  drm/amdgpu: Replace kgd_mem with amdgpu_bo for kernel pinned gtt mem
  drm/amdgpu: remove useless BUG_ONs
  drm/amdgpu: Enable KFD initialization on dGPUs
  drm/amdkfd: Add dGPU device IDs and device info
  drm/amdkfd: Add dGPU support to kernel_queue_init
  drm/amdkfd: Add dGPU support to the MQD manager
  drm/amdkfd: Add dGPU support to the device queue manager
  ...
parents 0b8eeac5 a1102445
......@@ -766,6 +766,8 @@ F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
F: drivers/gpu/drm/amd/amdkfd/
F: drivers/gpu/drm/amd/include/cik_structs.h
F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h
......
......@@ -171,6 +171,7 @@ void dma_fence_release(struct kref *kref)
trace_dma_fence_destroy(fence);
/* Failed to signal before release, could be a refcounting issue */
WARN_ON(!list_empty(&fence->cb_list));
if (fence->ops->release)
......
......@@ -129,6 +129,8 @@ amdgpu-y += \
# add amdkfd interfaces
amdgpu-y += \
amdgpu_amdkfd.o \
amdgpu_amdkfd_fence.o \
amdgpu_amdkfd_gpuvm.o \
amdgpu_amdkfd_gfx_v8.o
# add cgs
......
......@@ -30,6 +30,8 @@
const struct kgd2kfd_calls *kgd2kfd;
bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
static const unsigned int compute_vmid_bitmap = 0xFF00;
int amdgpu_amdkfd_init(void)
{
int ret;
......@@ -56,6 +58,7 @@ int amdgpu_amdkfd_init(void)
#else
ret = -ENOENT;
#endif
amdgpu_amdkfd_gpuvm_init_mem_limits();
return ret;
}
......@@ -78,10 +81,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
switch (adev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
case CHIP_HAWAII:
kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
break;
#endif
case CHIP_CARRIZO:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
break;
default:
......@@ -132,9 +140,13 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
int last_valid_bit;
if (adev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap = 0xFF00,
.compute_vmid_bitmap = compute_vmid_bitmap,
.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
.gpuvm_size = min(adev->vm_manager.max_pfn
<< AMDGPU_GPU_PAGE_SHIFT,
AMDGPU_VA_HOLE_START),
.drm_render_minor = adev->ddev->render->index
};
/* this is going to have a few of the MSBs set that we need to
......@@ -204,19 +216,13 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **cpu_ptr)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
struct amdgpu_bo *bo = NULL;
int r;
BUG_ON(kgd == NULL);
BUG_ON(gpu_addr == NULL);
BUG_ON(cpu_ptr == NULL);
*mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
if ((*mem) == NULL)
return -ENOMEM;
uint64_t gpu_addr_tmp = 0;
void *cpu_ptr_tmp = NULL;
r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo);
AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &bo);
if (r) {
dev_err(adev->dev,
"failed to allocate BO for amdkfd (%d)\n", r);
......@@ -224,54 +230,53 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
}
/* map the buffer */
r = amdgpu_bo_reserve((*mem)->bo, true);
r = amdgpu_bo_reserve(bo, true);
if (r) {
dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
goto allocate_mem_reserve_bo_failed;
}
r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT,
&(*mem)->gpu_addr);
r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT,
&gpu_addr_tmp);
if (r) {
dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
goto allocate_mem_pin_bo_failed;
}
*gpu_addr = (*mem)->gpu_addr;
r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
if (r) {
dev_err(adev->dev,
"(%d) failed to map bo to kernel for amdkfd\n", r);
goto allocate_mem_kmap_bo_failed;
}
*cpu_ptr = (*mem)->cpu_ptr;
amdgpu_bo_unreserve((*mem)->bo);
*mem_obj = bo;
*gpu_addr = gpu_addr_tmp;
*cpu_ptr = cpu_ptr_tmp;
amdgpu_bo_unreserve(bo);
return 0;
allocate_mem_kmap_bo_failed:
amdgpu_bo_unpin((*mem)->bo);
amdgpu_bo_unpin(bo);
allocate_mem_pin_bo_failed:
amdgpu_bo_unreserve((*mem)->bo);
amdgpu_bo_unreserve(bo);
allocate_mem_reserve_bo_failed:
amdgpu_bo_unref(&(*mem)->bo);
amdgpu_bo_unref(&bo);
return r;
}
void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
{
struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
BUG_ON(mem == NULL);
amdgpu_bo_reserve(mem->bo, true);
amdgpu_bo_kunmap(mem->bo);
amdgpu_bo_unpin(mem->bo);
amdgpu_bo_unreserve(mem->bo);
amdgpu_bo_unref(&(mem->bo));
kfree(mem);
amdgpu_bo_reserve(bo, true);
amdgpu_bo_kunmap(bo);
amdgpu_bo_unpin(bo);
amdgpu_bo_unreserve(bo);
amdgpu_bo_unref(&(bo));
}
void get_local_mem_info(struct kgd_dev *kgd,
......@@ -361,3 +366,68 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
}
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct amdgpu_ring *ring;
struct dma_fence *f = NULL;
int ret;
switch (engine) {
case KGD_ENGINE_MEC1:
ring = &adev->gfx.compute_ring[0];
break;
case KGD_ENGINE_SDMA1:
ring = &adev->sdma.instance[0].ring;
break;
case KGD_ENGINE_SDMA2:
ring = &adev->sdma.instance[1].ring;
break;
default:
pr_err("Invalid engine in IB submission: %d\n", engine);
ret = -EINVAL;
goto err;
}
ret = amdgpu_job_alloc(adev, 1, &job, NULL);
if (ret)
goto err;
ib = &job->ibs[0];
memset(ib, 0, sizeof(struct amdgpu_ib));
ib->gpu_addr = gpu_addr;
ib->ptr = ib_cmd;
ib->length_dw = ib_len;
/* This works for NO_HWS. TODO: need to handle without knowing VMID */
job->vmid = vmid;
ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
if (ret) {
DRM_ERROR("amdgpu: failed to schedule IB.\n");
goto err_ib_sched;
}
ret = dma_fence_wait(f, false);
err_ib_sched:
dma_fence_put(f);
amdgpu_job_free(job);
err:
return ret;
}
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
{
if (adev->kfd) {
if ((1 << vmid) & compute_vmid_bitmap)
return true;
}
return false;
}
......@@ -28,13 +28,89 @@
#include <linux/types.h>
#include <linux/mmu_context.h>
#include <kgd_kfd_interface.h>
#include <drm/ttm/ttm_execbuf_util.h>
#include "amdgpu_sync.h"
#include "amdgpu_vm.h"
extern const struct kgd2kfd_calls *kgd2kfd;
struct amdgpu_device;
struct kfd_bo_va_list {
struct list_head bo_list;
struct amdgpu_bo_va *bo_va;
void *kgd_dev;
bool is_mapped;
uint64_t va;
uint64_t pte_flags;
};
struct kgd_mem {
struct mutex lock;
struct amdgpu_bo *bo;
uint64_t gpu_addr;
void *cpu_ptr;
struct list_head bo_va_list;
/* protected by amdkfd_process_info.lock */
struct ttm_validate_buffer validate_list;
struct ttm_validate_buffer resv_list;
uint32_t domain;
unsigned int mapped_to_gpu_memory;
uint64_t va;
uint32_t mapping_flags;
struct amdkfd_process_info *process_info;
struct amdgpu_sync sync;
bool aql_queue;
};
/* KFD Memory Eviction */
struct amdgpu_amdkfd_fence {
struct dma_fence base;
struct mm_struct *mm;
spinlock_t lock;
char timeline_name[TASK_COMM_LEN];
};
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
struct mm_struct *mm);
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
struct amdkfd_process_info {
/* List head of all VMs that belong to a KFD process */
struct list_head vm_list_head;
/* List head for all KFD BOs that belong to a KFD process. */
struct list_head kfd_bo_list;
/* Lock to protect kfd_bo_list */
struct mutex lock;
/* Number of VMs */
unsigned int n_vms;
/* Eviction Fence */
struct amdgpu_amdkfd_fence *eviction_fence;
};
/* struct amdkfd_vm -
* For Memory Eviction KGD requires a mechanism to keep track of all KFD BOs
* belonging to a KFD process. All the VMs belonging to the same process point
* to the same amdkfd_process_info.
*/
struct amdkfd_vm {
/* Keep base as the first parameter for pointer compatibility between
* amdkfd_vm and amdgpu_vm.
*/
struct amdgpu_vm base;
/* List node in amdkfd_process_info.vm_list_head*/
struct list_head vm_list_node;
struct amdgpu_device *adev;
/* Points to the KFD process VM info*/
struct amdkfd_process_info *process_info;
uint64_t pd_phys_addr;
};
int amdgpu_amdkfd_init(void);
......@@ -48,9 +124,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
/* Shared API */
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
......@@ -79,4 +161,30 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
valid; \
})
/* GPUVM API */
int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
void **process_info,
struct dma_fence **ef);
void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct kgd_dev *kgd, uint64_t va, uint64_t size,
void *vm, struct kgd_mem **mem,
uint64_t *offset, uint32_t flags);
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem);
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
int amdgpu_amdkfd_gpuvm_sync_memory(
struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
struct kgd_mem *mem, void **kptr, uint64_t *size);
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
struct dma_fence **ef);
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
/*
* Copyright 2016-2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/dma-fence.h>
#include <linux/spinlock.h>
#include <linux/atomic.h>
#include <linux/stacktrace.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/sched/mm.h>
#include "amdgpu_amdkfd.h"
static const struct dma_fence_ops amdkfd_fence_ops;
static atomic_t fence_seq = ATOMIC_INIT(0);
/* Eviction Fence
* Fence helper functions to deal with KFD memory eviction.
* Big Idea - Since KFD submissions are done by user queues, a BO cannot be
* evicted unless all the user queues for that process are evicted.
*
* All the BOs in a process share an eviction fence. When process X wants
* to map VRAM memory but TTM can't find enough space, TTM will attempt to
* evict BOs from its LRU list. TTM checks if the BO is valuable to evict
* by calling ttm_bo_driver->eviction_valuable().
*
* ttm_bo_driver->eviction_valuable() - will return false if the BO belongs
* to process X. Otherwise, it will return true to indicate BO can be
* evicted by TTM.
*
* If ttm_bo_driver->eviction_valuable returns true, then TTM will continue
* the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move
* --> amdgpu_copy_buffer(). This sets up job in GPU scheduler.
*
* GPU Scheduler (amd_sched_main) - sets up a cb (fence_add_callback) to
* nofity when the BO is free to move. fence_add_callback --> enable_signaling
* --> amdgpu_amdkfd_fence.enable_signaling
*
* amdgpu_amdkfd_fence.enable_signaling - Start a work item that will quiesce
* user queues and signal fence. The work item will also start another delayed
* work item to restore BOs
*/
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
struct mm_struct *mm)
{
struct amdgpu_amdkfd_fence *fence;
fence = kzalloc(sizeof(*fence), GFP_KERNEL);
if (fence == NULL)
return NULL;
/* This reference gets released in amdkfd_fence_release */
mmgrab(mm);
fence->mm = mm;
get_task_comm(fence->timeline_name, current);
spin_lock_init(&fence->lock);
dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock,
context, atomic_inc_return(&fence_seq));
return fence;
}
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
{
struct amdgpu_amdkfd_fence *fence;
if (!f)
return NULL;
fence = container_of(f, struct amdgpu_amdkfd_fence, base);
if (fence && f->ops == &amdkfd_fence_ops)
return fence;
return NULL;
}
static const char *amdkfd_fence_get_driver_name(struct dma_fence *f)
{
return "amdgpu_amdkfd_fence";
}
static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f)
{
struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
return fence->timeline_name;
}
/**
* amdkfd_fence_enable_signaling - This gets called when TTM wants to evict
* a KFD BO and schedules a job to move the BO.
* If fence is already signaled return true.
* If fence is not signaled schedule a evict KFD process work item.
*/
static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
{
struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
if (!fence)
return false;
if (dma_fence_is_signaled(f))
return true;
if (!kgd2kfd->schedule_evict_and_restore_process(fence->mm, f))
return true;
return false;
}
/**
* amdkfd_fence_release - callback that fence can be freed
*
* @fence: fence
*
* This function is called when the reference count becomes zero.
* Drops the mm_struct reference and RCU schedules freeing up the fence.
*/
static void amdkfd_fence_release(struct dma_fence *f)
{
struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
/* Unconditionally signal the fence. The process is getting
* terminated.
*/
if (WARN_ON(!fence))
return; /* Not an amdgpu_amdkfd_fence */
mmdrop(fence->mm);
kfree_rcu(f, rcu);
}
/**
* amdkfd_fence_check_mm - Check if @mm is same as that of the fence @f
* if same return TRUE else return FALSE.
*
* @f: [IN] fence
* @mm: [IN] mm that needs to be verified
*/
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
{
struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
if (!fence)
return false;
else if (fence->mm == mm)
return true;
return false;
}
static const struct dma_fence_ops amdkfd_fence_ops = {
.get_driver_name = amdkfd_fence_get_driver_name,
.get_timeline_name = amdkfd_fence_get_timeline_name,
.enable_signaling = amdkfd_fence_enable_signaling,
.signaled = NULL,
.wait = dma_fence_default_wait,
.release = amdkfd_fence_release,
};
......@@ -139,11 +139,14 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
uint8_t vmid);
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
static void set_scratch_backing_va(struct kgd_dev *kgd,
uint64_t va, uint32_t vmid);
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base);
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
/* Because of REG_GET_FIELD() being used, we put this function in the
* asic specific file.
......@@ -196,12 +199,25 @@ static const struct kfd2kgd_calls kfd2kgd = {
.address_watch_get_offset = kgd_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
.get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
.write_vmid_invalidate_request = write_vmid_invalidate_request,
.get_fw_version = get_fw_version,
.set_scratch_backing_va = set_scratch_backing_va,
.get_tile_config = get_tile_config,
.get_cu_info = get_cu_info,
.get_vram_usage = amdgpu_amdkfd_get_vram_usage
.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
.invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib,
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
......@@ -787,14 +803,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
}
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
{
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
}
static void set_scratch_backing_va(struct kgd_dev *kgd,
......@@ -812,8 +821,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
const union amdgpu_firmware_header *hdr;
BUG_ON(kgd == NULL);
switch (type) {
case KGD_ENGINE_PFP:
hdr = (const union amdgpu_firmware_header *)
......@@ -866,3 +873,50 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
return hdr->common.ucode_version;
}
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID\n");
return;
}
WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
}
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
{
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
int vmid;
unsigned int tmp;
for (vmid = 0; vmid < 16; vmid++) {
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
continue;
tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
RREG32(mmVM_INVALIDATE_RESPONSE);
break;
}
}
return 0;
}
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
{
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("non kfd vmid\n");
return 0;
}
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
RREG32(mmVM_INVALIDATE_RESPONSE);
return 0;
}
......@@ -81,7 +81,6 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
uint32_t queue_id);
static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
unsigned int utimeout);
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
static int kgd_address_watch_disable(struct kgd_dev *kgd);
static int kgd_address_watch_execute(struct kgd_dev *kgd,
unsigned int watch_point_id,
......@@ -99,10 +98,13 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
uint8_t vmid);
static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
uint8_t vmid);
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
static void set_scratch_backing_va(struct kgd_dev *kgd,
uint64_t va, uint32_t vmid);
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base);
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
/* Because of REG_GET_FIELD() being used, we put this function in the
* asic specific file.
......@@ -157,12 +159,25 @@ static const struct kfd2kgd_calls kfd2kgd = {
get_atc_vmid_pasid_mapping_pasid,
.get_atc_vmid_pasid_mapping_valid =
get_atc_vmid_pasid_mapping_valid,
.write_vmid_invalidate_request = write_vmid_invalidate_request,
.get_fw_version = get_fw_version,
.set_scratch_backing_va = set_scratch_backing_va,
.get_tile_config = get_tile_config,
.get_cu_info = get_cu_info,
.get_vram_usage = amdgpu_amdkfd_get_vram_usage
.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
.invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib,
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
......@@ -704,14 +719,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
}
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
{
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
}
static int kgd_address_watch_disable(struct kgd_dev *kgd)
......@@ -775,8 +783,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
const union amdgpu_firmware_header *hdr;
BUG_ON(kgd == NULL);
switch (type) {
case KGD_ENGINE_PFP:
hdr = (const union amdgpu_firmware_header *)
......@@ -828,3 +834,51 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
/* Only 12 bit in use*/
return hdr->common.ucode_version;
}
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID\n");
return;
}
WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
}
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
{
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
int vmid;
unsigned int tmp;
for (vmid = 0; vmid < 16; vmid++) {
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
continue;
tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
RREG32(mmVM_INVALIDATE_RESPONSE);
break;
}
}
return 0;
}
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
{
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("non kfd vmid %d\n", vmid);
return -EINVAL;
}
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
RREG32(mmVM_INVALIDATE_RESPONSE);
return 0;
}
/*
* Copyright 2014-2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#define pr_fmt(fmt) "kfd2kgd: " fmt
#include <linux/list.h>
#include <drm/drmP.h>
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
#include "amdgpu_amdkfd.h"
/* Special VM and GART address alignment needed for VI pre-Fiji due to
* a HW bug.
*/
#define VI_BO_SIZE_ALIGN (0x8000)
/* Impose limit on how much memory KFD can use */
static struct {
uint64_t max_system_mem_limit;
int64_t system_mem_used;
spinlock_t mem_limit_lock;
} kfd_mem_limit;
/* Struct used for amdgpu_amdkfd_bo_validate */
struct amdgpu_vm_parser {
uint32_t domain;
bool wait;
};
static const char * const domain_bit_to_string[] = {
"CPU",
"GTT",
"VRAM",
"GDS",
"GWS",
"OA"
};
#define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
{
return (struct amdgpu_device *)kgd;
}
static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
struct kgd_mem *mem)
{
struct kfd_bo_va_list *entry;
list_for_each_entry(entry, &mem->bo_va_list, bo_list)
if (entry->bo_va->base.vm == avm)
return false;
return true;
}
/* Set memory usage limits. Current, limits are
* System (kernel) memory - 3/8th System RAM
*/
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
{
struct sysinfo si;
uint64_t mem;
si_meminfo(&si);
mem = si.totalram - si.totalhigh;
mem *= si.mem_unit;
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
pr_debug("Kernel memory limit %lluM\n",
(kfd_mem_limit.max_system_mem_limit >> 20));
}
static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 domain)
{
size_t acc_size;
int ret = 0;
acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
sizeof(struct amdgpu_bo));
spin_lock(&kfd_mem_limit.mem_limit_lock);
if (domain == AMDGPU_GEM_DOMAIN_GTT) {
if (kfd_mem_limit.system_mem_used + (acc_size + size) >
kfd_mem_limit.max_system_mem_limit) {
ret = -ENOMEM;
goto err_no_mem;
}
kfd_mem_limit.system_mem_used += (acc_size + size);
}
err_no_mem:
spin_unlock(&kfd_mem_limit.mem_limit_lock);
return ret;
}
static void unreserve_system_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 domain)
{
size_t acc_size;
acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
sizeof(struct amdgpu_bo));
spin_lock(&kfd_mem_limit.mem_limit_lock);
if (domain == AMDGPU_GEM_DOMAIN_GTT)
kfd_mem_limit.system_mem_used -= (acc_size + size);
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
"kfd system memory accounting unbalanced");
spin_unlock(&kfd_mem_limit.mem_limit_lock);
}
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
{
spin_lock(&kfd_mem_limit.mem_limit_lock);
if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
kfd_mem_limit.system_mem_used -=
(bo->tbo.acc_size + amdgpu_bo_size(bo));
}
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
"kfd system memory accounting unbalanced");
spin_unlock(&kfd_mem_limit.mem_limit_lock);
}
/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's
* reservation object.
*
* @bo: [IN] Remove eviction fence(s) from this BO
* @ef: [IN] If ef is specified, then this eviction fence is removed if it
* is present in the shared list.
* @ef_list: [OUT] Returns list of eviction fences. These fences are removed
* from BO's reservation object shared list.
* @ef_count: [OUT] Number of fences in ef_list.
*
* NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be
* called to restore the eviction fences and to avoid memory leak. This is
* useful for shared BOs.
* NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held.
*/
static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
struct amdgpu_amdkfd_fence *ef,
struct amdgpu_amdkfd_fence ***ef_list,
unsigned int *ef_count)
{
struct reservation_object_list *fobj;
struct reservation_object *resv;
unsigned int i = 0, j = 0, k = 0, shared_count;
unsigned int count = 0;
struct amdgpu_amdkfd_fence **fence_list;
if (!ef && !ef_list)
return -EINVAL;
if (ef_list) {
*ef_list = NULL;
*ef_count = 0;
}
resv = bo->tbo.resv;
fobj = reservation_object_get_list(resv);
if (!fobj)
return 0;
preempt_disable();
write_seqcount_begin(&resv->seq);
/* Go through all the shared fences in the resevation object. If
* ef is specified and it exists in the list, remove it and reduce the
* count. If ef is not specified, then get the count of eviction fences
* present.
*/
shared_count = fobj->shared_count;
for (i = 0; i < shared_count; ++i) {
struct dma_fence *f;
f = rcu_dereference_protected(fobj->shared[i],
reservation_object_held(resv));
if (ef) {
if (f->context == ef->base.context) {
dma_fence_put(f);
fobj->shared_count--;
} else {
RCU_INIT_POINTER(fobj->shared[j++], f);
}
} else if (to_amdgpu_amdkfd_fence(f))
count++;
}
write_seqcount_end(&resv->seq);
preempt_enable();
if (ef || !count)
return 0;
/* Alloc memory for count number of eviction fence pointers. Fill the
* ef_list array and ef_count
*/
fence_list = kcalloc(count, sizeof(struct amdgpu_amdkfd_fence *),
GFP_KERNEL);
if (!fence_list)
return -ENOMEM;
preempt_disable();
write_seqcount_begin(&resv->seq);
j = 0;
for (i = 0; i < shared_count; ++i) {
struct dma_fence *f;
struct amdgpu_amdkfd_fence *efence;
f = rcu_dereference_protected(fobj->shared[i],
reservation_object_held(resv));
efence = to_amdgpu_amdkfd_fence(f);
if (efence) {
fence_list[k++] = efence;
fobj->shared_count--;
} else {
RCU_INIT_POINTER(fobj->shared[j++], f);
}
}
write_seqcount_end(&resv->seq);
preempt_enable();
*ef_list = fence_list;
*ef_count = k;
return 0;
}
/* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's
* reservation object.
*
* @bo: [IN] Add eviction fences to this BO
* @ef_list: [IN] List of eviction fences to be added
* @ef_count: [IN] Number of fences in ef_list.
*
* NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this
* function.
*/
static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo,
struct amdgpu_amdkfd_fence **ef_list,
unsigned int ef_count)
{
int i;
if (!ef_list || !ef_count)
return;
for (i = 0; i < ef_count; i++) {
amdgpu_bo_fence(bo, &ef_list[i]->base, true);
/* Re-adding the fence takes an additional reference. Drop that
* reference.
*/
dma_fence_put(&ef_list[i]->base);
}
kfree(ef_list);
}
static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
bool wait)
{
struct ttm_operation_ctx ctx = { false, false };
int ret;
if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm),
"Called with userptr BO"))
return -EINVAL;
amdgpu_ttm_placement_from_domain(bo, domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret)
goto validate_fail;
if (wait) {
struct amdgpu_amdkfd_fence **ef_list;
unsigned int ef_count;
ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list,
&ef_count);
if (ret)
goto validate_fail;
ttm_bo_wait(&bo->tbo, false, false);
amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count);
}
validate_fail:
return ret;
}
static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo)
{
struct amdgpu_vm_parser *p = param;
return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait);
}
/* vm_validate_pt_pd_bos - Validate page table and directory BOs
*
* Page directories are not updated here because huge page handling
* during page table updates can invalidate page directory entries
* again. Page directories are only updated after updating page
* tables.
*/
static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm)
{
struct amdgpu_bo *pd = vm->base.root.base.bo;
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
struct amdgpu_vm_parser param;
uint64_t addr, flags = AMDGPU_PTE_VALID;
int ret;
param.domain = AMDGPU_GEM_DOMAIN_VRAM;
param.wait = false;
ret = amdgpu_vm_validate_pt_bos(adev, &vm->base, amdgpu_amdkfd_validate,
&param);
if (ret) {
pr_err("amdgpu: failed to validate PT BOs\n");
return ret;
}
ret = amdgpu_amdkfd_validate(&param, pd);
if (ret) {
pr_err("amdgpu: failed to validate PD\n");
return ret;
}
addr = amdgpu_bo_gpu_offset(vm->base.root.base.bo);
amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
vm->pd_phys_addr = addr;
if (vm->base.use_cpu_for_update) {
ret = amdgpu_bo_kmap(pd, NULL);
if (ret) {
pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret);
return ret;
}
}
return 0;
}
static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_fence *f)
{
int ret = amdgpu_sync_fence(adev, sync, f, false);
/* Sync objects can't handle multiple GPUs (contexts) updating
* sync->last_vm_update. Fortunately we don't need it for
* KFD's purposes, so we can just drop that fence.
*/
if (sync->last_vm_update) {
dma_fence_put(sync->last_vm_update);
sync->last_vm_update = NULL;
}
return ret;
}
static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
{
struct amdgpu_bo *pd = vm->root.base.bo;
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
int ret;
ret = amdgpu_vm_update_directories(adev, vm);
if (ret)
return ret;
return sync_vm_fence(adev, sync, vm->last_update);
}
/* add_bo_to_vm - Add a BO to a VM
*
* Everything that needs to bo done only once when a BO is first added
* to a VM. It can later be mapped and unmapped many times without
* repeating these steps.
*
* 1. Allocate and initialize BO VA entry data structure
* 2. Add BO to the VM
* 3. Determine ASIC-specific PTE flags
* 4. Alloc page tables and directories if needed
* 4a. Validate new page tables and directories
*/
static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
struct amdgpu_vm *avm, bool is_aql,
struct kfd_bo_va_list **p_bo_va_entry)
{
int ret;
struct kfd_bo_va_list *bo_va_entry;
struct amdkfd_vm *kvm = container_of(avm,
struct amdkfd_vm, base);
struct amdgpu_bo *pd = avm->root.base.bo;
struct amdgpu_bo *bo = mem->bo;
uint64_t va = mem->va;
struct list_head *list_bo_va = &mem->bo_va_list;
unsigned long bo_size = bo->tbo.mem.size;
if (!va) {
pr_err("Invalid VA when adding BO to VM\n");
return -EINVAL;
}
if (is_aql)
va += bo_size;
bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL);
if (!bo_va_entry)
return -ENOMEM;
pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
va + bo_size, avm);
/* Add BO to VM internal data structures*/
bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo);
if (!bo_va_entry->bo_va) {
ret = -EINVAL;
pr_err("Failed to add BO object to VM. ret == %d\n",
ret);
goto err_vmadd;
}
bo_va_entry->va = va;
bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev,
mem->mapping_flags);
bo_va_entry->kgd_dev = (void *)adev;
list_add(&bo_va_entry->bo_list, list_bo_va);
if (p_bo_va_entry)
*p_bo_va_entry = bo_va_entry;
/* Allocate new page tables if needed and validate
* them. Clearing of new page tables and validate need to wait
* on move fences. We don't want that to trigger the eviction
* fence, so remove it temporarily.
*/
amdgpu_amdkfd_remove_eviction_fence(pd,
kvm->process_info->eviction_fence,
NULL, NULL);
ret = amdgpu_vm_alloc_pts(adev, avm, va, amdgpu_bo_size(bo));
if (ret) {
pr_err("Failed to allocate pts, err=%d\n", ret);
goto err_alloc_pts;
}
ret = vm_validate_pt_pd_bos(kvm);
if (ret) {
pr_err("validate_pt_pd_bos() failed\n");
goto err_alloc_pts;
}
/* Add the eviction fence back */
amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
return 0;
err_alloc_pts:
amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
list_del(&bo_va_entry->bo_list);
err_vmadd:
kfree(bo_va_entry);
return ret;
}
static void remove_bo_from_vm(struct amdgpu_device *adev,
struct kfd_bo_va_list *entry, unsigned long size)
{
pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n",
entry->va,
entry->va + size, entry);
amdgpu_vm_bo_rmv(adev, entry->bo_va);
list_del(&entry->bo_list);
kfree(entry);
}
static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
struct amdkfd_process_info *process_info)
{
struct ttm_validate_buffer *entry = &mem->validate_list;
struct amdgpu_bo *bo = mem->bo;
INIT_LIST_HEAD(&entry->head);
entry->shared = true;
entry->bo = &bo->tbo;
mutex_lock(&process_info->lock);
list_add_tail(&entry->head, &process_info->kfd_bo_list);
mutex_unlock(&process_info->lock);
}
/* Reserving a BO and its page table BOs must happen atomically to
* avoid deadlocks. Some operations update multiple VMs at once. Track
* all the reservation info in a context structure. Optionally a sync
* object can track VM updates.
*/
struct bo_vm_reservation_context {
struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */
unsigned int n_vms; /* Number of VMs reserved */
struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries */
struct ww_acquire_ctx ticket; /* Reservation ticket */
struct list_head list, duplicates; /* BO lists */
struct amdgpu_sync *sync; /* Pointer to sync object */
bool reserved; /* Whether BOs are reserved */
};
enum bo_vm_match {
BO_VM_NOT_MAPPED = 0, /* Match VMs where a BO is not mapped */
BO_VM_MAPPED, /* Match VMs where a BO is mapped */
BO_VM_ALL, /* Match all VMs a BO was added to */
};
/**
* reserve_bo_and_vm - reserve a BO and a VM unconditionally.
* @mem: KFD BO structure.
* @vm: the VM to reserve.
* @ctx: the struct that will be used in unreserve_bo_and_vms().
*/
static int reserve_bo_and_vm(struct kgd_mem *mem,
struct amdgpu_vm *vm,
struct bo_vm_reservation_context *ctx)
{
struct amdgpu_bo *bo = mem->bo;
int ret;
WARN_ON(!vm);
ctx->reserved = false;
ctx->n_vms = 1;
ctx->sync = &mem->sync;
INIT_LIST_HEAD(&ctx->list);
INIT_LIST_HEAD(&ctx->duplicates);
ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL);
if (!ctx->vm_pd)
return -ENOMEM;
ctx->kfd_bo.robj = bo;
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.shared = true;
ctx->kfd_bo.user_pages = NULL;
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
false, &ctx->duplicates);
if (!ret)
ctx->reserved = true;
else {
pr_err("Failed to reserve buffers in ttm\n");
kfree(ctx->vm_pd);
ctx->vm_pd = NULL;
}
return ret;
}
/**
* reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally
* @mem: KFD BO structure.
* @vm: the VM to reserve. If NULL, then all VMs associated with the BO
* is used. Otherwise, a single VM associated with the BO.
* @map_type: the mapping status that will be used to filter the VMs.
* @ctx: the struct that will be used in unreserve_bo_and_vms().
*
* Returns 0 for success, negative for failure.
*/
static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
struct amdgpu_vm *vm, enum bo_vm_match map_type,
struct bo_vm_reservation_context *ctx)
{
struct amdgpu_bo *bo = mem->bo;
struct kfd_bo_va_list *entry;
unsigned int i;
int ret;
ctx->reserved = false;
ctx->n_vms = 0;
ctx->vm_pd = NULL;
ctx->sync = &mem->sync;
INIT_LIST_HEAD(&ctx->list);
INIT_LIST_HEAD(&ctx->duplicates);
list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
if ((vm && vm != entry->bo_va->base.vm) ||
(entry->is_mapped != map_type
&& map_type != BO_VM_ALL))
continue;
ctx->n_vms++;
}
if (ctx->n_vms != 0) {
ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd),
GFP_KERNEL);
if (!ctx->vm_pd)
return -ENOMEM;
}
ctx->kfd_bo.robj = bo;
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.shared = true;
ctx->kfd_bo.user_pages = NULL;
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
i = 0;
list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
if ((vm && vm != entry->bo_va->base.vm) ||
(entry->is_mapped != map_type
&& map_type != BO_VM_ALL))
continue;
amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list,
&ctx->vm_pd[i]);
i++;
}
ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
false, &ctx->duplicates);
if (!ret)
ctx->reserved = true;
else
pr_err("Failed to reserve buffers in ttm.\n");
if (ret) {
kfree(ctx->vm_pd);
ctx->vm_pd = NULL;
}
return ret;
}
/**
* unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context
* @ctx: Reservation context to unreserve
* @wait: Optionally wait for a sync object representing pending VM updates
* @intr: Whether the wait is interruptible
*
* Also frees any resources allocated in
* reserve_bo_and_(cond_)vm(s). Returns the status from
* amdgpu_sync_wait.
*/
static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
bool wait, bool intr)
{
int ret = 0;
if (wait)
ret = amdgpu_sync_wait(ctx->sync, intr);
if (ctx->reserved)
ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list);
kfree(ctx->vm_pd);
ctx->sync = NULL;
ctx->reserved = false;
ctx->vm_pd = NULL;
return ret;
}
static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
struct kfd_bo_va_list *entry,
struct amdgpu_sync *sync)
{
struct amdgpu_bo_va *bo_va = entry->bo_va;
struct amdgpu_vm *vm = bo_va->base.vm;
struct amdkfd_vm *kvm = container_of(vm, struct amdkfd_vm, base);
struct amdgpu_bo *pd = vm->root.base.bo;
/* Remove eviction fence from PD (and thereby from PTs too as
* they share the resv. object). Otherwise during PT update
* job (see amdgpu_vm_bo_update_mapping), eviction fence would
* get added to job->sync object and job execution would
* trigger the eviction fence.
*/
amdgpu_amdkfd_remove_eviction_fence(pd,
kvm->process_info->eviction_fence,
NULL, NULL);
amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
/* Add the eviction fence back */
amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
sync_vm_fence(adev, sync, bo_va->last_pt_update);
return 0;
}
static int update_gpuvm_pte(struct amdgpu_device *adev,
struct kfd_bo_va_list *entry,
struct amdgpu_sync *sync)
{
int ret;
struct amdgpu_vm *vm;
struct amdgpu_bo_va *bo_va;
struct amdgpu_bo *bo;
bo_va = entry->bo_va;
vm = bo_va->base.vm;
bo = bo_va->base.bo;
/* Update the page tables */
ret = amdgpu_vm_bo_update(adev, bo_va, false);
if (ret) {
pr_err("amdgpu_vm_bo_update failed\n");
return ret;
}
return sync_vm_fence(adev, sync, bo_va->last_pt_update);
}
static int map_bo_to_gpuvm(struct amdgpu_device *adev,
struct kfd_bo_va_list *entry, struct amdgpu_sync *sync)
{
int ret;
/* Set virtual address for the allocation */
ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0,
amdgpu_bo_size(entry->bo_va->base.bo),
entry->pte_flags);
if (ret) {
pr_err("Failed to map VA 0x%llx in vm. ret %d\n",
entry->va, ret);
return ret;
}
ret = update_gpuvm_pte(adev, entry, sync);
if (ret) {
pr_err("update_gpuvm_pte() failed\n");
goto update_gpuvm_pte_failed;
}
return 0;
update_gpuvm_pte_failed:
unmap_bo_from_gpuvm(adev, entry, sync);
return ret;
}
static int process_validate_vms(struct amdkfd_process_info *process_info)
{
struct amdkfd_vm *peer_vm;
int ret;
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
ret = vm_validate_pt_pd_bos(peer_vm);
if (ret)
return ret;
}
return 0;
}
static int process_update_pds(struct amdkfd_process_info *process_info,
struct amdgpu_sync *sync)
{
struct amdkfd_vm *peer_vm;
int ret;
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
ret = vm_update_pds(&peer_vm->base, sync);
if (ret)
return ret;
}
return 0;
}
int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
void **process_info,
struct dma_fence **ef)
{
int ret;
struct amdkfd_vm *new_vm;
struct amdkfd_process_info *info;
struct amdgpu_device *adev = get_amdgpu_device(kgd);
new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL);
if (!new_vm)
return -ENOMEM;
/* Initialize the VM context, allocate the page directory and zero it */
ret = amdgpu_vm_init(adev, &new_vm->base, AMDGPU_VM_CONTEXT_COMPUTE, 0);
if (ret) {
pr_err("Failed init vm ret %d\n", ret);
goto vm_init_fail;
}
new_vm->adev = adev;
if (!*process_info) {
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info) {
ret = -ENOMEM;
goto alloc_process_info_fail;
}
mutex_init(&info->lock);
INIT_LIST_HEAD(&info->vm_list_head);
INIT_LIST_HEAD(&info->kfd_bo_list);
info->eviction_fence =
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
current->mm);
if (!info->eviction_fence) {
pr_err("Failed to create eviction fence\n");
goto create_evict_fence_fail;
}
*process_info = info;
*ef = dma_fence_get(&info->eviction_fence->base);
}
new_vm->process_info = *process_info;
mutex_lock(&new_vm->process_info->lock);
list_add_tail(&new_vm->vm_list_node,
&(new_vm->process_info->vm_list_head));
new_vm->process_info->n_vms++;
mutex_unlock(&new_vm->process_info->lock);
*vm = (void *) new_vm;
pr_debug("Created process vm %p\n", *vm);
return ret;
create_evict_fence_fail:
mutex_destroy(&info->lock);
kfree(info);
alloc_process_info_fail:
amdgpu_vm_fini(adev, &new_vm->base);
vm_init_fail:
kfree(new_vm);
return ret;
}
void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *) vm;
struct amdgpu_vm *avm = &kfd_vm->base;
struct amdgpu_bo *pd;
struct amdkfd_process_info *process_info;
if (WARN_ON(!kgd || !vm))
return;
pr_debug("Destroying process vm %p\n", vm);
/* Release eviction fence from PD */
pd = avm->root.base.bo;
amdgpu_bo_reserve(pd, false);
amdgpu_bo_fence(pd, NULL, false);
amdgpu_bo_unreserve(pd);
process_info = kfd_vm->process_info;
mutex_lock(&process_info->lock);
process_info->n_vms--;
list_del(&kfd_vm->vm_list_node);
mutex_unlock(&process_info->lock);
/* Release per-process resources */
if (!process_info->n_vms) {
WARN_ON(!list_empty(&process_info->kfd_bo_list));
dma_fence_put(&process_info->eviction_fence->base);
mutex_destroy(&process_info->lock);
kfree(process_info);
}
/* Release the VM context */
amdgpu_vm_fini(adev, avm);
kfree(vm);
}
uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
{
struct amdkfd_vm *avm = (struct amdkfd_vm *)vm;
return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
}
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct kgd_dev *kgd, uint64_t va, uint64_t size,
void *vm, struct kgd_mem **mem,
uint64_t *offset, uint32_t flags)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm;
struct amdgpu_bo *bo;
int byte_align;
u32 alloc_domain;
u64 alloc_flags;
uint32_t mapping_flags;
int ret;
/*
* Check on which domain to allocate BO
*/
if (flags & ALLOC_MEM_FLAGS_VRAM) {
alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
} else if (flags & ALLOC_MEM_FLAGS_GTT) {
alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_flags = 0;
} else {
return -EINVAL;
}
*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
if (!*mem)
return -ENOMEM;
INIT_LIST_HEAD(&(*mem)->bo_va_list);
mutex_init(&(*mem)->lock);
(*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
/* Workaround for AQL queue wraparound bug. Map the same
* memory twice. That means we only actually allocate half
* the memory.
*/
if ((*mem)->aql_queue)
size = size >> 1;
/* Workaround for TLB bug on older VI chips */
byte_align = (adev->family == AMDGPU_FAMILY_VI &&
adev->asic_type != CHIP_FIJI &&
adev->asic_type != CHIP_POLARIS10 &&
adev->asic_type != CHIP_POLARIS11) ?
VI_BO_SIZE_ALIGN : 1;
mapping_flags = AMDGPU_VM_PAGE_READABLE;
if (flags & ALLOC_MEM_FLAGS_WRITABLE)
mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
if (flags & ALLOC_MEM_FLAGS_EXECUTABLE)
mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
if (flags & ALLOC_MEM_FLAGS_COHERENT)
mapping_flags |= AMDGPU_VM_MTYPE_UC;
else
mapping_flags |= AMDGPU_VM_MTYPE_NC;
(*mem)->mapping_flags = mapping_flags;
amdgpu_sync_create(&(*mem)->sync);
ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
if (ret) {
pr_debug("Insufficient system memory\n");
goto err_reserve_system_mem;
}
pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
va, size, domain_string(alloc_domain));
ret = amdgpu_bo_create(adev, size, byte_align, false,
alloc_domain, alloc_flags, NULL, NULL, &bo);
if (ret) {
pr_debug("Failed to create BO on domain %s. ret %d\n",
domain_string(alloc_domain), ret);
goto err_bo_create;
}
bo->kfd_bo = *mem;
(*mem)->bo = bo;
(*mem)->va = va;
(*mem)->domain = alloc_domain;
(*mem)->mapped_to_gpu_memory = 0;
(*mem)->process_info = kfd_vm->process_info;
add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info);
if (offset)
*offset = amdgpu_bo_mmap_offset(bo);
return 0;
err_bo_create:
unreserve_system_mem_limit(adev, size, alloc_domain);
err_reserve_system_mem:
mutex_destroy(&(*mem)->lock);
kfree(*mem);
return ret;
}
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem)
{
struct amdkfd_process_info *process_info = mem->process_info;
unsigned long bo_size = mem->bo->tbo.mem.size;
struct kfd_bo_va_list *entry, *tmp;
struct bo_vm_reservation_context ctx;
struct ttm_validate_buffer *bo_list_entry;
int ret;
mutex_lock(&mem->lock);
if (mem->mapped_to_gpu_memory > 0) {
pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
mem->va, bo_size);
mutex_unlock(&mem->lock);
return -EBUSY;
}
mutex_unlock(&mem->lock);
/* lock is not needed after this, since mem is unused and will
* be freed anyway
*/
/* Make sure restore workers don't access the BO any more */
bo_list_entry = &mem->validate_list;
mutex_lock(&process_info->lock);
list_del(&bo_list_entry->head);
mutex_unlock(&process_info->lock);
ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
if (unlikely(ret))
return ret;
/* The eviction fence should be removed by the last unmap.
* TODO: Log an error condition if the bo still has the eviction fence
* attached
*/
amdgpu_amdkfd_remove_eviction_fence(mem->bo,
process_info->eviction_fence,
NULL, NULL);
pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
mem->va + bo_size * (1 + mem->aql_queue));
/* Remove from VM internal data structures */
list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list)
remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev,
entry, bo_size);
ret = unreserve_bo_and_vms(&ctx, false, false);
/* Free the sync object */
amdgpu_sync_free(&mem->sync);
/* Free the BO*/
amdgpu_bo_unref(&mem->bo);
mutex_destroy(&mem->lock);
kfree(mem);
return ret;
}
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm;
int ret;
struct amdgpu_bo *bo;
uint32_t domain;
struct kfd_bo_va_list *entry;
struct bo_vm_reservation_context ctx;
struct kfd_bo_va_list *bo_va_entry = NULL;
struct kfd_bo_va_list *bo_va_entry_aql = NULL;
unsigned long bo_size;
/* Make sure restore is not running concurrently.
*/
mutex_lock(&mem->process_info->lock);
mutex_lock(&mem->lock);
bo = mem->bo;
if (!bo) {
pr_err("Invalid BO when mapping memory to GPU\n");
ret = -EINVAL;
goto out;
}
domain = mem->domain;
bo_size = bo->tbo.mem.size;
pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n",
mem->va,
mem->va + bo_size * (1 + mem->aql_queue),
vm, domain_string(domain));
ret = reserve_bo_and_vm(mem, vm, &ctx);
if (unlikely(ret))
goto out;
if (check_if_add_bo_to_vm((struct amdgpu_vm *)vm, mem)) {
ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, false,
&bo_va_entry);
if (ret)
goto add_bo_to_vm_failed;
if (mem->aql_queue) {
ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm,
true, &bo_va_entry_aql);
if (ret)
goto add_bo_to_vm_failed_aql;
}
} else {
ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm);
if (unlikely(ret))
goto add_bo_to_vm_failed;
}
if (mem->mapped_to_gpu_memory == 0) {
/* Validate BO only once. The eviction fence gets added to BO
* the first time it is mapped. Validate will wait for all
* background evictions to complete.
*/
ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
if (ret) {
pr_debug("Validate failed\n");
goto map_bo_to_gpuvm_failed;
}
}
list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
if (entry->bo_va->base.vm == vm && !entry->is_mapped) {
pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
entry->va, entry->va + bo_size,
entry);
ret = map_bo_to_gpuvm(adev, entry, ctx.sync);
if (ret) {
pr_err("Failed to map radeon bo to gpuvm\n");
goto map_bo_to_gpuvm_failed;
}
ret = vm_update_pds(vm, ctx.sync);
if (ret) {
pr_err("Failed to update page directories\n");
goto map_bo_to_gpuvm_failed;
}
entry->is_mapped = true;
mem->mapped_to_gpu_memory++;
pr_debug("\t INC mapping count %d\n",
mem->mapped_to_gpu_memory);
}
}
if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count)
amdgpu_bo_fence(bo,
&kfd_vm->process_info->eviction_fence->base,
true);
ret = unreserve_bo_and_vms(&ctx, false, false);
goto out;
map_bo_to_gpuvm_failed:
if (bo_va_entry_aql)
remove_bo_from_vm(adev, bo_va_entry_aql, bo_size);
add_bo_to_vm_failed_aql:
if (bo_va_entry)
remove_bo_from_vm(adev, bo_va_entry, bo_size);
add_bo_to_vm_failed:
unreserve_bo_and_vms(&ctx, false, false);
out:
mutex_unlock(&mem->process_info->lock);
mutex_unlock(&mem->lock);
return ret;
}
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdkfd_process_info *process_info =
((struct amdkfd_vm *)vm)->process_info;
unsigned long bo_size = mem->bo->tbo.mem.size;
struct kfd_bo_va_list *entry;
struct bo_vm_reservation_context ctx;
int ret;
mutex_lock(&mem->lock);
ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx);
if (unlikely(ret))
goto out;
/* If no VMs were reserved, it means the BO wasn't actually mapped */
if (ctx.n_vms == 0) {
ret = -EINVAL;
goto unreserve_out;
}
ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm);
if (unlikely(ret))
goto unreserve_out;
pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n",
mem->va,
mem->va + bo_size * (1 + mem->aql_queue),
vm);
list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
if (entry->bo_va->base.vm == vm && entry->is_mapped) {
pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
entry->va,
entry->va + bo_size,
entry);
ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync);
if (ret == 0) {
entry->is_mapped = false;
} else {
pr_err("failed to unmap VA 0x%llx\n",
mem->va);
goto unreserve_out;
}
mem->mapped_to_gpu_memory--;
pr_debug("\t DEC mapping count %d\n",
mem->mapped_to_gpu_memory);
}
}
/* If BO is unmapped from all VMs, unfence it. It can be evicted if
* required.
*/
if (mem->mapped_to_gpu_memory == 0 &&
!amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count)
amdgpu_amdkfd_remove_eviction_fence(mem->bo,
process_info->eviction_fence,
NULL, NULL);
unreserve_out:
unreserve_bo_and_vms(&ctx, false, false);
out:
mutex_unlock(&mem->lock);
return ret;
}
int amdgpu_amdkfd_gpuvm_sync_memory(
struct kgd_dev *kgd, struct kgd_mem *mem, bool intr)
{
struct amdgpu_sync sync;
int ret;
amdgpu_sync_create(&sync);
mutex_lock(&mem->lock);
amdgpu_sync_clone(&mem->sync, &sync);
mutex_unlock(&mem->lock);
ret = amdgpu_sync_wait(&sync, intr);
amdgpu_sync_free(&sync);
return ret;
}
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
struct kgd_mem *mem, void **kptr, uint64_t *size)
{
int ret;
struct amdgpu_bo *bo = mem->bo;
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
pr_err("userptr can't be mapped to kernel\n");
return -EINVAL;
}
/* delete kgd_mem from kfd_bo_list to avoid re-validating
* this BO in BO's restoring after eviction.
*/
mutex_lock(&mem->process_info->lock);
ret = amdgpu_bo_reserve(bo, true);
if (ret) {
pr_err("Failed to reserve bo. ret %d\n", ret);
goto bo_reserve_failed;
}
ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
if (ret) {
pr_err("Failed to pin bo. ret %d\n", ret);
goto pin_failed;
}
ret = amdgpu_bo_kmap(bo, kptr);
if (ret) {
pr_err("Failed to map bo to kernel. ret %d\n", ret);
goto kmap_failed;
}
amdgpu_amdkfd_remove_eviction_fence(
bo, mem->process_info->eviction_fence, NULL, NULL);
list_del_init(&mem->validate_list.head);
if (size)
*size = amdgpu_bo_size(bo);
amdgpu_bo_unreserve(bo);
mutex_unlock(&mem->process_info->lock);
return 0;
kmap_failed:
amdgpu_bo_unpin(bo);
pin_failed:
amdgpu_bo_unreserve(bo);
bo_reserve_failed:
mutex_unlock(&mem->process_info->lock);
return ret;
}
/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
* KFD process identified by process_info
*
* @process_info: amdkfd_process_info of the KFD process
*
* After memory eviction, restore thread calls this function. The function
* should be called when the Process is still valid. BO restore involves -
*
* 1. Release old eviction fence and create new one
* 2. Get two copies of PD BO list from all the VMs. Keep one copy as pd_list.
* 3 Use the second PD list and kfd_bo_list to create a list (ctx.list) of
* BOs that need to be reserved.
* 4. Reserve all the BOs
* 5. Validate of PD and PT BOs.
* 6. Validate all KFD BOs using kfd_bo_list and Map them and add new fence
* 7. Add fence to all PD and PT BOs.
* 8. Unreserve all BOs
*/
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
{
struct amdgpu_bo_list_entry *pd_bo_list;
struct amdkfd_process_info *process_info = info;
struct amdkfd_vm *peer_vm;
struct kgd_mem *mem;
struct bo_vm_reservation_context ctx;
struct amdgpu_amdkfd_fence *new_fence;
int ret = 0, i;
struct list_head duplicate_save;
struct amdgpu_sync sync_obj;
INIT_LIST_HEAD(&duplicate_save);
INIT_LIST_HEAD(&ctx.list);
INIT_LIST_HEAD(&ctx.duplicates);
pd_bo_list = kcalloc(process_info->n_vms,
sizeof(struct amdgpu_bo_list_entry),
GFP_KERNEL);
if (!pd_bo_list)
return -ENOMEM;
i = 0;
mutex_lock(&process_info->lock);
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node)
amdgpu_vm_get_pd_bo(&peer_vm->base, &ctx.list,
&pd_bo_list[i++]);
/* Reserve all BOs and page tables/directory. Add all BOs from
* kfd_bo_list to ctx.list
*/
list_for_each_entry(mem, &process_info->kfd_bo_list,
validate_list.head) {
list_add_tail(&mem->resv_list.head, &ctx.list);
mem->resv_list.bo = mem->validate_list.bo;
mem->resv_list.shared = mem->validate_list.shared;
}
ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
false, &duplicate_save);
if (ret) {
pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
goto ttm_reserve_fail;
}
amdgpu_sync_create(&sync_obj);
/* Validate PDs and PTs */
ret = process_validate_vms(process_info);
if (ret)
goto validate_map_fail;
/* Wait for PD/PTs validate to finish */
/* FIXME: I think this isn't needed */
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
struct amdgpu_bo *bo = peer_vm->base.root.base.bo;
ttm_bo_wait(&bo->tbo, false, false);
}
/* Validate BOs and map them to GPUVM (update VM page tables). */
list_for_each_entry(mem, &process_info->kfd_bo_list,
validate_list.head) {
struct amdgpu_bo *bo = mem->bo;
uint32_t domain = mem->domain;
struct kfd_bo_va_list *bo_va_entry;
ret = amdgpu_amdkfd_bo_validate(bo, domain, false);
if (ret) {
pr_debug("Memory eviction: Validate BOs failed. Try again\n");
goto validate_map_fail;
}
list_for_each_entry(bo_va_entry, &mem->bo_va_list,
bo_list) {
ret = update_gpuvm_pte((struct amdgpu_device *)
bo_va_entry->kgd_dev,
bo_va_entry,
&sync_obj);
if (ret) {
pr_debug("Memory eviction: update PTE failed. Try again\n");
goto validate_map_fail;
}
}
}
/* Update page directories */
ret = process_update_pds(process_info, &sync_obj);
if (ret) {
pr_debug("Memory eviction: update PDs failed. Try again\n");
goto validate_map_fail;
}
amdgpu_sync_wait(&sync_obj, false);
/* Release old eviction fence and create new one, because fence only
* goes from unsignaled to signaled, fence cannot be reused.
* Use context and mm from the old fence.
*/
new_fence = amdgpu_amdkfd_fence_create(
process_info->eviction_fence->base.context,
process_info->eviction_fence->mm);
if (!new_fence) {
pr_err("Failed to create eviction fence\n");
ret = -ENOMEM;
goto validate_map_fail;
}
dma_fence_put(&process_info->eviction_fence->base);
process_info->eviction_fence = new_fence;
*ef = dma_fence_get(&new_fence->base);
/* Wait for validate to finish and attach new eviction fence */
list_for_each_entry(mem, &process_info->kfd_bo_list,
validate_list.head)
ttm_bo_wait(&mem->bo->tbo, false, false);
list_for_each_entry(mem, &process_info->kfd_bo_list,
validate_list.head)
amdgpu_bo_fence(mem->bo,
&process_info->eviction_fence->base, true);
/* Attach eviction fence to PD / PT BOs */
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
struct amdgpu_bo *bo = peer_vm->base.root.base.bo;
amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true);
}
validate_map_fail:
ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list);
amdgpu_sync_free(&sync_obj);
ttm_reserve_fail:
mutex_unlock(&process_info->lock);
kfree(pd_bo_list);
return ret;
}
......@@ -36,6 +36,7 @@
#include <drm/drm_cache.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
static bool amdgpu_need_backup(struct amdgpu_device *adev)
{
......@@ -54,6 +55,9 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
if (bo->kfd_bo)
amdgpu_amdkfd_unreserve_system_memory_limit(bo);
amdgpu_bo_kunmap(bo);
drm_gem_object_release(&bo->gem_base);
......
......@@ -92,6 +92,8 @@ struct amdgpu_bo {
struct list_head mn_list;
struct list_head shadow_list;
};
struct kgd_mem *kfd_bo;
};
static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
......
......@@ -26,6 +26,7 @@
#include <drm/amdgpu_drm.h>
#include <drm/gpu_scheduler.h>
#include <drm/drm_print.h>
/* max number of rings */
#define AMDGPU_MAX_RINGS 18
......@@ -35,8 +36,9 @@
#define AMDGPU_MAX_UVD_ENC_RINGS 2
/* some special values for the owner field */
#define AMDGPU_FENCE_OWNER_UNDEFINED ((void*)0ul)
#define AMDGPU_FENCE_OWNER_VM ((void*)1ul)
#define AMDGPU_FENCE_OWNER_UNDEFINED ((void *)0ul)
#define AMDGPU_FENCE_OWNER_VM ((void *)1ul)
#define AMDGPU_FENCE_OWNER_KFD ((void *)2ul)
#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
#define AMDGPU_FENCE_FLAG_INT (1 << 1)
......
......@@ -31,6 +31,7 @@
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
struct amdgpu_sync_entry {
struct hlist_node node;
......@@ -85,11 +86,20 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
*/
static void *amdgpu_sync_get_owner(struct dma_fence *f)
{
struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
struct drm_sched_fence *s_fence;
struct amdgpu_amdkfd_fence *kfd_fence;
if (!f)
return AMDGPU_FENCE_OWNER_UNDEFINED;
s_fence = to_drm_sched_fence(f);
if (s_fence)
return s_fence->owner;
kfd_fence = to_amdgpu_amdkfd_fence(f);
if (kfd_fence)
return AMDGPU_FENCE_OWNER_KFD;
return AMDGPU_FENCE_OWNER_UNDEFINED;
}
......@@ -204,11 +214,18 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
for (i = 0; i < flist->shared_count; ++i) {
f = rcu_dereference_protected(flist->shared[i],
reservation_object_held(resv));
/* We only want to trigger KFD eviction fences on
* evict or move jobs. Skip KFD fences otherwise.
*/
fence_owner = amdgpu_sync_get_owner(f);
if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
owner != AMDGPU_FENCE_OWNER_UNDEFINED)
continue;
if (amdgpu_sync_same_dev(adev, f)) {
/* VM updates are only interesting
* for other VM updates and moves.
*/
fence_owner = amdgpu_sync_get_owner(f);
if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
(fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
((owner == AMDGPU_FENCE_OWNER_VM) !=
......@@ -305,6 +322,41 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit
return NULL;
}
/**
* amdgpu_sync_clone - clone a sync object
*
* @source: sync object to clone
* @clone: pointer to destination sync object
*
* Adds references to all unsignaled fences in @source to @clone. Also
* removes signaled fences from @source while at it.
*/
int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
{
struct amdgpu_sync_entry *e;
struct hlist_node *tmp;
struct dma_fence *f;
int i, r;
hash_for_each_safe(source->fences, i, tmp, e, node) {
f = e->fence;
if (!dma_fence_is_signaled(f)) {
r = amdgpu_sync_fence(NULL, clone, f, e->explicit);
if (r)
return r;
} else {
hash_del(&e->node);
dma_fence_put(f);
kmem_cache_free(amdgpu_sync_slab, e);
}
}
dma_fence_put(clone->last_vm_update);
clone->last_vm_update = dma_fence_get(source->last_vm_update);
return 0;
}
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
{
struct amdgpu_sync_entry *e;
......
......@@ -50,6 +50,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit);
int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
void amdgpu_sync_free(struct amdgpu_sync *sync);
int amdgpu_sync_init(void);
......
......@@ -46,6 +46,7 @@
#include "amdgpu.h"
#include "amdgpu_object.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
#include "bif/bif_4_1_d.h"
#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
......@@ -258,6 +259,13 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
{
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
/*
* Don't verify access for KFD BOs. They don't have a GEM
* object associated with them.
*/
if (abo->kfd_bo)
return 0;
if (amdgpu_ttm_tt_get_usermm(bo->ttm))
return -EPERM;
return drm_vma_node_verify_access(&abo->gem_base.vma_node,
......@@ -1171,6 +1179,23 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
{
unsigned long num_pages = bo->mem.num_pages;
struct drm_mm_node *node = bo->mem.mm_node;
struct reservation_object_list *flist;
struct dma_fence *f;
int i;
/* If bo is a KFD BO, check if the bo belongs to the current process.
* If true, then return false as any KFD process needs all its BOs to
* be resident to run successfully
*/
flist = reservation_object_get_list(bo->resv);
if (flist) {
for (i = 0; i < flist->shared_count; ++i) {
f = rcu_dereference_protected(flist->shared[i],
reservation_object_held(bo->resv));
if (amdkfd_fence_check_mm(f, current->mm))
return false;
}
}
switch (bo->mem.mem_type) {
case TTM_PL_TT:
......
......@@ -28,6 +28,7 @@
#include <linux/kfifo.h>
#include <linux/rbtree.h>
#include <drm/gpu_scheduler.h>
#include <drm/drm_file.h>
#include "amdgpu_sync.h"
#include "amdgpu_ring.h"
......
......@@ -4,6 +4,7 @@
config HSA_AMD
tristate "HSA kernel driver for AMD GPU devices"
depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64
depends on DRM_AMDGPU && X86_64
imply AMD_IOMMU_V2
help
Enable this if you want to use HSA features on AMD GPU devices.
......@@ -37,6 +37,10 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
ifneq ($(CONFIG_AMD_IOMMU_V2),)
amdkfd-y += kfd_iommu.o
endif
amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o
obj-$(CONFIG_HSA_AMD) += amdkfd.o
......@@ -901,7 +901,8 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
mutex_unlock(&p->mutex);
if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0)
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
pdd->qpd.vmid != 0)
dev->kfd2kgd->set_scratch_backing_va(
dev->kgd, args->va_addr, pdd->qpd.vmid);
......
......@@ -22,10 +22,10 @@
#include <linux/pci.h>
#include <linux/acpi.h>
#include <linux/amd-iommu.h>
#include "kfd_crat.h"
#include "kfd_priv.h"
#include "kfd_topology.h"
#include "kfd_iommu.h"
/* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
* GPU processor ID are expressed with Bit[31]=1.
......@@ -1037,15 +1037,11 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
struct crat_subtype_generic *sub_type_hdr;
struct crat_subtype_computeunit *cu;
struct kfd_cu_info cu_info;
struct amd_iommu_device_info iommu_info;
int avail_size = *size;
uint32_t total_num_of_cu;
int num_of_cache_entries = 0;
int cache_mem_filled = 0;
int ret = 0;
const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
struct kfd_local_mem_info local_mem_info;
if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU)
......@@ -1106,12 +1102,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
/* Check if this node supports IOMMU. During parsing this flag will
* translate to HSA_CAP_ATS_PRESENT
*/
iommu_info.flags = 0;
if (amd_iommu_device_info(kdev->pdev, &iommu_info) == 0) {
if ((iommu_info.flags & required_iommu_flags) ==
required_iommu_flags)
if (!kfd_iommu_check_device(kdev))
cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
}
crat_table->length += sub_type_hdr->length;
crat_table->total_entries++;
......
......@@ -33,6 +33,7 @@
#include "kfd_pm4_headers_diq.h"
#include "kfd_dbgmgr.h"
#include "kfd_dbgdev.h"
#include "kfd_device_queue_manager.h"
static DEFINE_MUTEX(kfd_dbgmgr_mutex);
......@@ -83,7 +84,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
}
/* get actual type of DBGDevice cpsch or not */
if (sched_policy == KFD_SCHED_POLICY_NO_HWS)
if (pdev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
type = DBGDEV_TYPE_NODIQ;
kfd_dbgdev_init(new_buff->dbgdev, pdev, type);
......
......@@ -20,7 +20,9 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
#include <linux/amd-iommu.h>
#endif
#include <linux/bsearch.h>
#include <linux/pci.h>
#include <linux/slab.h>
......@@ -28,9 +30,12 @@
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers_vi.h"
#include "cwsr_trap_handler_gfx8.asm"
#include "kfd_iommu.h"
#define MQD_SIZE_ALIGNED 768
static atomic_t kfd_device_suspended = ATOMIC_INIT(0);
#ifdef KFD_SUPPORT_IOMMU_V2
static const struct kfd_device_info kaveri_device_info = {
.asic_family = CHIP_KAVERI,
.max_pasid_bits = 16,
......@@ -41,6 +46,8 @@ static const struct kfd_device_info kaveri_device_info = {
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = false,
.needs_iommu_device = true,
.needs_pci_atomics = false,
};
static const struct kfd_device_info carrizo_device_info = {
......@@ -53,15 +60,125 @@ static const struct kfd_device_info carrizo_device_info = {
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = true,
.needs_iommu_device = true,
.needs_pci_atomics = false,
};
#endif
static const struct kfd_device_info hawaii_device_info = {
.asic_family = CHIP_HAWAII,
.max_pasid_bits = 16,
/* max num of queues for KV.TODO should be a dynamic value */
.max_no_of_hqd = 24,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = false,
.needs_iommu_device = false,
.needs_pci_atomics = false,
};
static const struct kfd_device_info tonga_device_info = {
.asic_family = CHIP_TONGA,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = false,
.needs_iommu_device = false,
.needs_pci_atomics = true,
};
static const struct kfd_device_info tonga_vf_device_info = {
.asic_family = CHIP_TONGA,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = false,
.needs_iommu_device = false,
.needs_pci_atomics = false,
};
static const struct kfd_device_info fiji_device_info = {
.asic_family = CHIP_FIJI,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = true,
.needs_iommu_device = false,
.needs_pci_atomics = true,
};
static const struct kfd_device_info fiji_vf_device_info = {
.asic_family = CHIP_FIJI,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = true,
.needs_iommu_device = false,
.needs_pci_atomics = false,
};
static const struct kfd_device_info polaris10_device_info = {
.asic_family = CHIP_POLARIS10,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = true,
.needs_iommu_device = false,
.needs_pci_atomics = true,
};
static const struct kfd_device_info polaris10_vf_device_info = {
.asic_family = CHIP_POLARIS10,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = true,
.needs_iommu_device = false,
.needs_pci_atomics = false,
};
static const struct kfd_device_info polaris11_device_info = {
.asic_family = CHIP_POLARIS11,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = true,
.needs_iommu_device = false,
.needs_pci_atomics = true,
};
struct kfd_deviceid {
unsigned short did;
const struct kfd_device_info *device_info;
};
/* Please keep this sorted by increasing device id. */
static const struct kfd_deviceid supported_devices[] = {
#ifdef KFD_SUPPORT_IOMMU_V2
{ 0x1304, &kaveri_device_info }, /* Kaveri */
{ 0x1305, &kaveri_device_info }, /* Kaveri */
{ 0x1306, &kaveri_device_info }, /* Kaveri */
......@@ -88,7 +205,51 @@ static const struct kfd_deviceid supported_devices[] = {
{ 0x9874, &carrizo_device_info }, /* Carrizo */
{ 0x9875, &carrizo_device_info }, /* Carrizo */
{ 0x9876, &carrizo_device_info }, /* Carrizo */
{ 0x9877, &carrizo_device_info } /* Carrizo */
{ 0x9877, &carrizo_device_info }, /* Carrizo */
#endif
{ 0x67A0, &hawaii_device_info }, /* Hawaii */
{ 0x67A1, &hawaii_device_info }, /* Hawaii */
{ 0x67A2, &hawaii_device_info }, /* Hawaii */
{ 0x67A8, &hawaii_device_info }, /* Hawaii */
{ 0x67A9, &hawaii_device_info }, /* Hawaii */
{ 0x67AA, &hawaii_device_info }, /* Hawaii */
{ 0x67B0, &hawaii_device_info }, /* Hawaii */
{ 0x67B1, &hawaii_device_info }, /* Hawaii */
{ 0x67B8, &hawaii_device_info }, /* Hawaii */
{ 0x67B9, &hawaii_device_info }, /* Hawaii */
{ 0x67BA, &hawaii_device_info }, /* Hawaii */
{ 0x67BE, &hawaii_device_info }, /* Hawaii */
{ 0x6920, &tonga_device_info }, /* Tonga */
{ 0x6921, &tonga_device_info }, /* Tonga */
{ 0x6928, &tonga_device_info }, /* Tonga */
{ 0x6929, &tonga_device_info }, /* Tonga */
{ 0x692B, &tonga_device_info }, /* Tonga */
{ 0x692F, &tonga_vf_device_info }, /* Tonga vf */
{ 0x6938, &tonga_device_info }, /* Tonga */
{ 0x6939, &tonga_device_info }, /* Tonga */
{ 0x7300, &fiji_device_info }, /* Fiji */
{ 0x730F, &fiji_vf_device_info }, /* Fiji vf*/
{ 0x67C0, &polaris10_device_info }, /* Polaris10 */
{ 0x67C1, &polaris10_device_info }, /* Polaris10 */
{ 0x67C2, &polaris10_device_info }, /* Polaris10 */
{ 0x67C4, &polaris10_device_info }, /* Polaris10 */
{ 0x67C7, &polaris10_device_info }, /* Polaris10 */
{ 0x67C8, &polaris10_device_info }, /* Polaris10 */
{ 0x67C9, &polaris10_device_info }, /* Polaris10 */
{ 0x67CA, &polaris10_device_info }, /* Polaris10 */
{ 0x67CC, &polaris10_device_info }, /* Polaris10 */
{ 0x67CF, &polaris10_device_info }, /* Polaris10 */
{ 0x67D0, &polaris10_vf_device_info }, /* Polaris10 vf*/
{ 0x67DF, &polaris10_device_info }, /* Polaris10 */
{ 0x67E0, &polaris11_device_info }, /* Polaris11 */
{ 0x67E1, &polaris11_device_info }, /* Polaris11 */
{ 0x67E3, &polaris11_device_info }, /* Polaris11 */
{ 0x67E7, &polaris11_device_info }, /* Polaris11 */
{ 0x67E8, &polaris11_device_info }, /* Polaris11 */
{ 0x67E9, &polaris11_device_info }, /* Polaris11 */
{ 0x67EB, &polaris11_device_info }, /* Polaris11 */
{ 0x67EF, &polaris11_device_info }, /* Polaris11 */
{ 0x67FF, &polaris11_device_info }, /* Polaris11 */
};
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
......@@ -127,6 +288,21 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
return NULL;
}
if (device_info->needs_pci_atomics) {
/* Allow BIF to recode atomics to PCIe 3.0
* AtomicOps. 32 and 64-bit requests are possible and
* must be supported.
*/
if (pci_enable_atomic_ops_to_root(pdev,
PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) {
dev_info(kfd_device,
"skipped device %x:%x, PCI rejects atomics",
pdev->vendor, pdev->device);
return NULL;
}
}
kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
if (!kfd)
return NULL;
......@@ -144,77 +320,6 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
return kfd;
}
static bool device_iommu_pasid_init(struct kfd_dev *kfd)
{
const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
struct amd_iommu_device_info iommu_info;
unsigned int pasid_limit;
int err;
err = amd_iommu_device_info(kfd->pdev, &iommu_info);
if (err < 0) {
dev_err(kfd_device,
"error getting iommu info. is the iommu enabled?\n");
return false;
}
if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) {
dev_err(kfd_device, "error required iommu flags ats %i, pri %i, pasid %i\n",
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP)
!= 0);
return false;
}
pasid_limit = min_t(unsigned int,
(unsigned int)(1 << kfd->device_info->max_pasid_bits),
iommu_info.max_pasids);
if (!kfd_set_pasid_limit(pasid_limit)) {
dev_err(kfd_device, "error setting pasid limit\n");
return false;
}
return true;
}
static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
{
struct kfd_dev *dev = kfd_device_by_pci_dev(pdev);
if (dev)
kfd_process_iommu_unbind_callback(dev, pasid);
}
/*
* This function called by IOMMU driver on PPR failure
*/
static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
unsigned long address, u16 flags)
{
struct kfd_dev *dev;
dev_warn(kfd_device,
"Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X",
PCI_BUS_NUM(pdev->devfn),
PCI_SLOT(pdev->devfn),
PCI_FUNC(pdev->devfn),
pasid,
address,
flags);
dev = kfd_device_by_pci_dev(pdev);
if (!WARN_ON(!dev))
kfd_signal_iommu_event(dev, pasid, address,
flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
return AMD_IOMMU_INV_PRI_RSP_INVALID;
}
static void kfd_cwsr_init(struct kfd_dev *kfd)
{
if (cwsr_enable && kfd->device_info->supports_cwsr) {
......@@ -304,11 +409,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto device_queue_manager_error;
}
if (!device_iommu_pasid_init(kfd)) {
dev_err(kfd_device,
"Error initializing iommuv2 for device %x:%x\n",
kfd->pdev->vendor, kfd->pdev->device);
goto device_iommu_pasid_error;
if (kfd_iommu_device_init(kfd)) {
dev_err(kfd_device, "Error initializing iommuv2\n");
goto device_iommu_error;
}
kfd_cwsr_init(kfd);
......@@ -323,12 +426,12 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd->pdev->device);
pr_debug("Starting kfd with the following scheduling policy %d\n",
sched_policy);
kfd->dqm->sched_policy);
goto out;
kfd_resume_error:
device_iommu_pasid_error:
device_iommu_error:
device_queue_manager_uninit(kfd->dqm);
device_queue_manager_error:
kfd_interrupt_exit(kfd);
......@@ -367,40 +470,45 @@ void kgd2kfd_suspend(struct kfd_dev *kfd)
if (!kfd->init_complete)
return;
kfd->dqm->ops.stop(kfd->dqm);
/* For first KFD device suspend all the KFD processes */
if (atomic_inc_return(&kfd_device_suspended) == 1)
kfd_suspend_all_processes();
kfd_unbind_processes_from_device(kfd);
kfd->dqm->ops.stop(kfd->dqm);
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
amd_iommu_free_device(kfd->pdev);
kfd_iommu_suspend(kfd);
}
int kgd2kfd_resume(struct kfd_dev *kfd)
{
int ret, count;
if (!kfd->init_complete)
return 0;
return kfd_resume(kfd);
ret = kfd_resume(kfd);
if (ret)
return ret;
count = atomic_dec_return(&kfd_device_suspended);
WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
if (count == 0)
ret = kfd_resume_all_processes();
return ret;
}
static int kfd_resume(struct kfd_dev *kfd)
{
int err = 0;
unsigned int pasid_limit = kfd_get_pasid_limit();
err = amd_iommu_init_device(kfd->pdev, pasid_limit);
if (err)
return -ENXIO;
amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
iommu_pasid_shutdown_callback);
amd_iommu_set_invalid_ppr_cb(kfd->pdev,
iommu_invalid_ppr_cb);
err = kfd_bind_processes_to_device(kfd);
if (err)
goto processes_bind_error;
err = kfd_iommu_resume(kfd);
if (err) {
dev_err(kfd_device,
"Failed to resume IOMMU for device %x:%x\n",
kfd->pdev->vendor, kfd->pdev->device);
return err;
}
err = kfd->dqm->ops.start(kfd->dqm);
if (err) {
......@@ -413,9 +521,7 @@ static int kfd_resume(struct kfd_dev *kfd)
return err;
dqm_start_error:
processes_bind_error:
amd_iommu_free_device(kfd->pdev);
kfd_iommu_suspend(kfd);
return err;
}
......@@ -435,6 +541,54 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
spin_unlock(&kfd->interrupt_lock);
}
/** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
* prepare for safe eviction of KFD BOs that belong to the specified
* process.
*
* @mm: mm_struct that identifies the specified KFD process
* @fence: eviction fence attached to KFD process BOs
*
*/
int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
struct dma_fence *fence)
{
struct kfd_process *p;
unsigned long active_time;
unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS);
if (!fence)
return -EINVAL;
if (dma_fence_is_signaled(fence))
return 0;
p = kfd_lookup_process_by_mm(mm);
if (!p)
return -ENODEV;
if (fence->seqno == p->last_eviction_seqno)
goto out;
p->last_eviction_seqno = fence->seqno;
/* Avoid KFD process starvation. Wait for at least
* PROCESS_ACTIVE_TIME_MS before evicting the process again
*/
active_time = get_jiffies_64() - p->last_restore_timestamp;
if (delay_jiffies > active_time)
delay_jiffies -= active_time;
else
delay_jiffies = 0;
/* During process initialization eviction_work.dwork is initialized
* to kfd_evict_bo_worker
*/
schedule_delayed_work(&p->eviction_work, delay_jiffies);
out:
kfd_unref_process(p);
return 0;
}
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
unsigned int chunk_size)
{
......
......@@ -21,10 +21,11 @@
*
*/
#include <linux/ratelimit.h>
#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/types.h>
#include <linux/printk.h>
#include <linux/bitops.h>
#include <linux/sched.h>
#include "kfd_priv.h"
......@@ -118,9 +119,8 @@ static int allocate_vmid(struct device_queue_manager *dqm,
if (dqm->vmid_bitmap == 0)
return -ENOMEM;
bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap,
dqm->dev->vm_info.vmid_num_kfd);
clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
bit = ffs(dqm->vmid_bitmap) - 1;
dqm->vmid_bitmap &= ~(1 << bit);
allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
pr_debug("vmid allocation %d\n", allocated_vmid);
......@@ -130,6 +130,15 @@ static int allocate_vmid(struct device_queue_manager *dqm,
set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
program_sh_mem_settings(dqm, qpd);
/* qpd->page_table_base is set earlier when register_process()
* is called, i.e. when the first queue is created.
*/
dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd,
qpd->vmid,
qpd->page_table_base);
/* invalidate the VM context after pasid and vmid mapping is set up */
kfd_flush_tlb(qpd_to_pdd(qpd));
return 0;
}
......@@ -139,10 +148,12 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
{
int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
kfd_flush_tlb(qpd_to_pdd(qpd));
/* Release the vmid mapping */
set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
set_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
dqm->vmid_bitmap |= (1 << bit);
qpd->vmid = 0;
q->properties.vmid = 0;
}
......@@ -170,6 +181,14 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
goto out_unlock;
}
q->properties.vmid = qpd->vmid;
/*
* Eviction state logic: we only mark active queues as evicted
* to avoid the overhead of restoring inactive queues later
*/
if (qpd->evicted)
q->properties.is_evicted = (q->properties.queue_size > 0 &&
q->properties.queue_percent > 0 &&
q->properties.queue_address != 0);
q->properties.tba_addr = qpd->tba_addr;
q->properties.tma_addr = qpd->tma_addr;
......@@ -223,12 +242,8 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
continue;
if (dqm->allocated_queues[pipe] != 0) {
bit = find_first_bit(
(unsigned long *)&dqm->allocated_queues[pipe],
get_queues_per_pipe(dqm));
clear_bit(bit,
(unsigned long *)&dqm->allocated_queues[pipe]);
bit = ffs(dqm->allocated_queues[pipe]) - 1;
dqm->allocated_queues[pipe] &= ~(1 << bit);
q->pipe = pipe;
q->queue = bit;
set = true;
......@@ -249,7 +264,7 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
static inline void deallocate_hqd(struct device_queue_manager *dqm,
struct queue *q)
{
set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]);
dqm->allocated_queues[q->pipe] |= (1 << q->queue);
}
static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
......@@ -371,21 +386,35 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
{
int retval;
struct mqd_manager *mqd;
struct kfd_process_device *pdd;
bool prev_active = false;
mutex_lock(&dqm->lock);
pdd = kfd_get_process_device_data(q->device, q->process);
if (!pdd) {
retval = -ENODEV;
goto out_unlock;
}
mqd = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd) {
retval = -ENOMEM;
goto out_unlock;
}
/*
* Eviction state logic: we only mark active queues as evicted
* to avoid the overhead of restoring inactive queues later
*/
if (pdd->qpd.evicted)
q->properties.is_evicted = (q->properties.queue_size > 0 &&
q->properties.queue_percent > 0 &&
q->properties.queue_address != 0);
/* Save previous activity state for counters */
prev_active = q->properties.is_active;
/* Make sure the queue is unmapped before updating the MQD */
if (sched_policy != KFD_SCHED_POLICY_NO_HWS) {
if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
retval = unmap_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
if (retval) {
......@@ -417,7 +446,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
else if (!q->properties.is_active && prev_active)
dqm->queue_count--;
if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
retval = map_queues_cpsch(dqm);
else if (q->properties.is_active &&
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
......@@ -451,10 +480,193 @@ static struct mqd_manager *get_mqd_manager(
return mqd;
}
static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct queue *q;
struct mqd_manager *mqd;
struct kfd_process_device *pdd;
int retval = 0;
mutex_lock(&dqm->lock);
if (qpd->evicted++ > 0) /* already evicted, do nothing */
goto out;
pdd = qpd_to_pdd(qpd);
pr_info_ratelimited("Evicting PASID %u queues\n",
pdd->process->pasid);
/* unactivate all active queues on the qpd */
list_for_each_entry(q, &qpd->queues_list, list) {
if (!q->properties.is_active)
continue;
mqd = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd) { /* should not be here */
pr_err("Cannot evict queue, mqd mgr is NULL\n");
retval = -ENOMEM;
goto out;
}
q->properties.is_evicted = true;
q->properties.is_active = false;
retval = mqd->destroy_mqd(mqd, q->mqd,
KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
if (retval)
goto out;
dqm->queue_count--;
}
out:
mutex_unlock(&dqm->lock);
return retval;
}
static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct queue *q;
struct kfd_process_device *pdd;
int retval = 0;
mutex_lock(&dqm->lock);
if (qpd->evicted++ > 0) /* already evicted, do nothing */
goto out;
pdd = qpd_to_pdd(qpd);
pr_info_ratelimited("Evicting PASID %u queues\n",
pdd->process->pasid);
/* unactivate all active queues on the qpd */
list_for_each_entry(q, &qpd->queues_list, list) {
if (!q->properties.is_active)
continue;
q->properties.is_evicted = true;
q->properties.is_active = false;
dqm->queue_count--;
}
retval = execute_queues_cpsch(dqm,
qpd->is_debug ?
KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
out:
mutex_unlock(&dqm->lock);
return retval;
}
static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct queue *q;
struct mqd_manager *mqd;
struct kfd_process_device *pdd;
uint32_t pd_base;
int retval = 0;
pdd = qpd_to_pdd(qpd);
/* Retrieve PD base */
pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
mutex_lock(&dqm->lock);
if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
goto out;
if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
qpd->evicted--;
goto out;
}
pr_info_ratelimited("Restoring PASID %u queues\n",
pdd->process->pasid);
/* Update PD Base in QPD */
qpd->page_table_base = pd_base;
pr_debug("Updated PD address to 0x%08x\n", pd_base);
if (!list_empty(&qpd->queues_list)) {
dqm->dev->kfd2kgd->set_vm_context_page_table_base(
dqm->dev->kgd,
qpd->vmid,
qpd->page_table_base);
kfd_flush_tlb(pdd);
}
/* activate all active queues on the qpd */
list_for_each_entry(q, &qpd->queues_list, list) {
if (!q->properties.is_evicted)
continue;
mqd = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd) { /* should not be here */
pr_err("Cannot restore queue, mqd mgr is NULL\n");
retval = -ENOMEM;
goto out;
}
q->properties.is_evicted = false;
q->properties.is_active = true;
retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
q->queue, &q->properties,
q->process->mm);
if (retval)
goto out;
dqm->queue_count++;
}
qpd->evicted = 0;
out:
mutex_unlock(&dqm->lock);
return retval;
}
static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct queue *q;
struct kfd_process_device *pdd;
uint32_t pd_base;
int retval = 0;
pdd = qpd_to_pdd(qpd);
/* Retrieve PD base */
pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
mutex_lock(&dqm->lock);
if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
goto out;
if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
qpd->evicted--;
goto out;
}
pr_info_ratelimited("Restoring PASID %u queues\n",
pdd->process->pasid);
/* Update PD Base in QPD */
qpd->page_table_base = pd_base;
pr_debug("Updated PD address to 0x%08x\n", pd_base);
/* activate all active queues on the qpd */
list_for_each_entry(q, &qpd->queues_list, list) {
if (!q->properties.is_evicted)
continue;
q->properties.is_evicted = false;
q->properties.is_active = true;
dqm->queue_count++;
}
retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
if (!retval)
qpd->evicted = 0;
out:
mutex_unlock(&dqm->lock);
return retval;
}
static int register_process(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct device_process_node *n;
struct kfd_process_device *pdd;
uint32_t pd_base;
int retval;
n = kzalloc(sizeof(*n), GFP_KERNEL);
......@@ -463,9 +675,16 @@ static int register_process(struct device_queue_manager *dqm,
n->qpd = qpd;
pdd = qpd_to_pdd(qpd);
/* Retrieve PD base */
pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
mutex_lock(&dqm->lock);
list_add(&n->list, &dqm->queues);
/* Update PD Base in QPD */
qpd->page_table_base = pd_base;
retval = dqm->asic_ops.update_qpd(dqm, qpd);
dqm->processes_count++;
......@@ -589,10 +808,8 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
if (dqm->sdma_bitmap == 0)
return -ENOMEM;
bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap,
CIK_SDMA_QUEUES);
clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap);
bit = ffs(dqm->sdma_bitmap) - 1;
dqm->sdma_bitmap &= ~(1 << bit);
*sdma_queue_id = bit;
return 0;
......@@ -603,7 +820,7 @@ static void deallocate_sdma_queue(struct device_queue_manager *dqm,
{
if (sdma_queue_id >= CIK_SDMA_QUEUES)
return;
set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap);
dqm->sdma_bitmap |= (1 << sdma_queue_id);
}
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
......@@ -840,6 +1057,14 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
retval = -ENOMEM;
goto out;
}
/*
* Eviction state logic: we only mark active queues as evicted
* to avoid the overhead of restoring inactive queues later
*/
if (qpd->evicted)
q->properties.is_evicted = (q->properties.queue_size > 0 &&
q->properties.queue_percent > 0 &&
q->properties.queue_address != 0);
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
......@@ -1097,7 +1322,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
alternate_aperture_base,
alternate_aperture_size);
if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
program_sh_mem_settings(dqm, qpd);
pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
......@@ -1242,8 +1467,24 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
if (!dqm)
return NULL;
switch (dev->device_info->asic_family) {
/* HWS is not available on Hawaii. */
case CHIP_HAWAII:
/* HWS depends on CWSR for timely dequeue. CWSR is not
* available on Tonga.
*
* FIXME: This argument also applies to Kaveri.
*/
case CHIP_TONGA:
dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
break;
default:
dqm->sched_policy = sched_policy;
break;
}
dqm->dev = dev;
switch (sched_policy) {
switch (dqm->sched_policy) {
case KFD_SCHED_POLICY_HWS:
case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
/* initialize dqm for cp scheduling */
......@@ -1262,6 +1503,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
dqm->ops.set_trap_handler = set_trap_handler;
dqm->ops.process_termination = process_termination_cpsch;
dqm->ops.evict_process_queues = evict_process_queues_cpsch;
dqm->ops.restore_process_queues = restore_process_queues_cpsch;
break;
case KFD_SCHED_POLICY_NO_HWS:
/* initialize dqm for no cp scheduling */
......@@ -1278,9 +1521,12 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
dqm->ops.set_trap_handler = set_trap_handler;
dqm->ops.process_termination = process_termination_nocpsch;
dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
dqm->ops.restore_process_queues =
restore_process_queues_nocpsch;
break;
default:
pr_err("Invalid scheduling policy %d\n", sched_policy);
pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
goto out_free;
}
......@@ -1292,6 +1538,17 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
case CHIP_KAVERI:
device_queue_manager_init_cik(&dqm->asic_ops);
break;
case CHIP_HAWAII:
device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
break;
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
device_queue_manager_init_vi_tonga(&dqm->asic_ops);
break;
default:
WARN(1, "Unexpected ASIC family %u",
dev->device_info->asic_family);
......
......@@ -79,6 +79,10 @@ struct device_process_node {
*
* @process_termination: Clears all process queues belongs to that device.
*
* @evict_process_queues: Evict all active queues of a process
*
* @restore_process_queues: Restore all evicted queues queues of a process
*
*/
struct device_queue_manager_ops {
......@@ -129,6 +133,11 @@ struct device_queue_manager_ops {
int (*process_termination)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
int (*evict_process_queues)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
int (*restore_process_queues)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
};
struct device_queue_manager_asic_ops {
......@@ -180,12 +189,17 @@ struct device_queue_manager {
unsigned int *fence_addr;
struct kfd_mem_obj *fence_mem;
bool active_runlist;
int sched_policy;
};
void device_queue_manager_init_cik(
struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_cik_hawaii(
struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_vi(
struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_vi_tonga(
struct device_queue_manager_asic_ops *asic_ops);
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
unsigned int get_queues_num(struct device_queue_manager *dqm);
......
......@@ -34,8 +34,13 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
uint64_t alternate_aperture_size);
static int update_qpd_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static int update_qpd_cik_hawaii(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
static void init_sdma_vm_hawaii(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
void device_queue_manager_init_cik(
struct device_queue_manager_asic_ops *asic_ops)
......@@ -45,6 +50,14 @@ void device_queue_manager_init_cik(
asic_ops->init_sdma_vm = init_sdma_vm;
}
void device_queue_manager_init_cik_hawaii(
struct device_queue_manager_asic_ops *asic_ops)
{
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
asic_ops->update_qpd = update_qpd_cik_hawaii;
asic_ops->init_sdma_vm = init_sdma_vm_hawaii;
}
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
{
/* In 64-bit mode, we can only control the top 3 bits of the LDS,
......@@ -132,6 +145,36 @@ static int update_qpd_cik(struct device_queue_manager *dqm,
return 0;
}
static int update_qpd_cik_hawaii(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct kfd_process_device *pdd;
unsigned int temp;
pdd = qpd_to_pdd(qpd);
/* check if sh_mem_config register already configured */
if (qpd->sh_mem_config == 0) {
qpd->sh_mem_config =
ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
DEFAULT_MTYPE(MTYPE_NONCACHED) |
APE1_MTYPE(MTYPE_NONCACHED);
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0;
}
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
*/
temp = get_sh_mem_bases_nybble_64(pdd);
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
return 0;
}
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd)
{
......@@ -147,3 +190,16 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
q->properties.sdma_vm_addr = value;
}
static void init_sdma_vm_hawaii(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd)
{
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
*/
q->properties.sdma_vm_addr =
((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
}
......@@ -33,10 +33,21 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size);
static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size);
static int update_qpd_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static int update_qpd_vi_tonga(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
static void init_sdma_vm_tonga(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
void device_queue_manager_init_vi(
struct device_queue_manager_asic_ops *asic_ops)
......@@ -46,6 +57,14 @@ void device_queue_manager_init_vi(
asic_ops->init_sdma_vm = init_sdma_vm;
}
void device_queue_manager_init_vi_tonga(
struct device_queue_manager_asic_ops *asic_ops)
{
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga;
asic_ops->update_qpd = update_qpd_vi_tonga;
asic_ops->init_sdma_vm = init_sdma_vm_tonga;
}
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
{
/* In 64-bit mode, we can only control the top 3 bits of the LDS,
......@@ -103,6 +122,33 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
return true;
}
static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size)
{
uint32_t default_mtype;
uint32_t ape1_mtype;
default_mtype = (default_policy == cache_policy_coherent) ?
MTYPE_UC :
MTYPE_NC;
ape1_mtype = (alternate_policy == cache_policy_coherent) ?
MTYPE_UC :
MTYPE_NC;
qpd->sh_mem_config =
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
return true;
}
static int update_qpd_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
......@@ -144,6 +190,40 @@ static int update_qpd_vi(struct device_queue_manager *dqm,
return 0;
}
static int update_qpd_vi_tonga(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct kfd_process_device *pdd;
unsigned int temp;
pdd = qpd_to_pdd(qpd);
/* check if sh_mem_config register already configured */
if (qpd->sh_mem_config == 0) {
qpd->sh_mem_config =
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
MTYPE_UC <<
SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
MTYPE_UC <<
SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0;
}
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
*/
temp = get_sh_mem_bases_nybble_64(pdd);
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n",
temp, qpd->sh_mem_bases);
return 0;
}
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd)
{
......@@ -159,3 +239,16 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
q->properties.sdma_vm_addr = value;
}
static void init_sdma_vm_tonga(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd)
{
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
*/
q->properties.sdma_vm_addr =
((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
}
......@@ -30,6 +30,7 @@
#include <linux/memory.h>
#include "kfd_priv.h"
#include "kfd_events.h"
#include "kfd_iommu.h"
#include <linux/device.h>
/*
......@@ -837,6 +838,7 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
}
}
#ifdef KFD_SUPPORT_IOMMU_V2
void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
unsigned long address, bool is_write_requested,
bool is_execute_requested)
......@@ -905,6 +907,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
mutex_unlock(&p->event_mutex);
kfd_unref_process(p);
}
#endif /* KFD_SUPPORT_IOMMU_V2 */
void kfd_signal_hw_exception_event(unsigned int pasid)
{
......
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/printk.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/pci.h>
#include <linux/amd-iommu.h>
#include "kfd_priv.h"
#include "kfd_dbgmgr.h"
#include "kfd_topology.h"
#include "kfd_iommu.h"
static const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
/** kfd_iommu_check_device - Check whether IOMMU is available for device
*/
int kfd_iommu_check_device(struct kfd_dev *kfd)
{
struct amd_iommu_device_info iommu_info;
int err;
if (!kfd->device_info->needs_iommu_device)
return -ENODEV;
iommu_info.flags = 0;
err = amd_iommu_device_info(kfd->pdev, &iommu_info);
if (err)
return err;
if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags)
return -ENODEV;
return 0;
}
/** kfd_iommu_device_init - Initialize IOMMU for device
*/
int kfd_iommu_device_init(struct kfd_dev *kfd)
{
struct amd_iommu_device_info iommu_info;
unsigned int pasid_limit;
int err;
if (!kfd->device_info->needs_iommu_device)
return 0;
iommu_info.flags = 0;
err = amd_iommu_device_info(kfd->pdev, &iommu_info);
if (err < 0) {
dev_err(kfd_device,
"error getting iommu info. is the iommu enabled?\n");
return -ENODEV;
}
if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) {
dev_err(kfd_device,
"error required iommu flags ats %i, pri %i, pasid %i\n",
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP)
!= 0);
return -ENODEV;
}
pasid_limit = min_t(unsigned int,
(unsigned int)(1 << kfd->device_info->max_pasid_bits),
iommu_info.max_pasids);
if (!kfd_set_pasid_limit(pasid_limit)) {
dev_err(kfd_device, "error setting pasid limit\n");
return -EBUSY;
}
return 0;
}
/** kfd_iommu_bind_process_to_device - Have the IOMMU bind a process
*
* Binds the given process to the given device using its PASID. This
* enables IOMMUv2 address translation for the process on the device.
*
* This function assumes that the process mutex is held.
*/
int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd)
{
struct kfd_dev *dev = pdd->dev;
struct kfd_process *p = pdd->process;
int err;
if (!dev->device_info->needs_iommu_device || pdd->bound == PDD_BOUND)
return 0;
if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) {
pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n");
return -EINVAL;
}
err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread);
if (!err)
pdd->bound = PDD_BOUND;
return err;
}
/** kfd_iommu_unbind_process - Unbind process from all devices
*
* This removes all IOMMU device bindings of the process. To be used
* before process termination.
*/
void kfd_iommu_unbind_process(struct kfd_process *p)
{
struct kfd_process_device *pdd;
list_for_each_entry(pdd, &p->per_device_data, per_device_list)
if (pdd->bound == PDD_BOUND)
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
}
/* Callback for process shutdown invoked by the IOMMU driver */
static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
{
struct kfd_dev *dev = kfd_device_by_pci_dev(pdev);
struct kfd_process *p;
struct kfd_process_device *pdd;
if (!dev)
return;
/*
* Look for the process that matches the pasid. If there is no such
* process, we either released it in amdkfd's own notifier, or there
* is a bug. Unfortunately, there is no way to tell...
*/
p = kfd_lookup_process_by_pasid(pasid);
if (!p)
return;
pr_debug("Unbinding process %d from IOMMU\n", pasid);
mutex_lock(kfd_get_dbgmgr_mutex());
if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
kfd_dbgmgr_destroy(dev->dbgmgr);
dev->dbgmgr = NULL;
}
}
mutex_unlock(kfd_get_dbgmgr_mutex());
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(dev, p);
if (pdd)
/* For GPU relying on IOMMU, we need to dequeue here
* when PASID is still bound.
*/
kfd_process_dequeue_from_device(pdd);
mutex_unlock(&p->mutex);
kfd_unref_process(p);
}
/* This function called by IOMMU driver on PPR failure */
static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
unsigned long address, u16 flags)
{
struct kfd_dev *dev;
dev_warn(kfd_device,
"Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X",
PCI_BUS_NUM(pdev->devfn),
PCI_SLOT(pdev->devfn),
PCI_FUNC(pdev->devfn),
pasid,
address,
flags);
dev = kfd_device_by_pci_dev(pdev);
if (!WARN_ON(!dev))
kfd_signal_iommu_event(dev, pasid, address,
flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
return AMD_IOMMU_INV_PRI_RSP_INVALID;
}
/*
* Bind processes do the device that have been temporarily unbound
* (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device.
*/
static int kfd_bind_processes_to_device(struct kfd_dev *kfd)
{
struct kfd_process_device *pdd;
struct kfd_process *p;
unsigned int temp;
int err = 0;
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(kfd, p);
if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) {
mutex_unlock(&p->mutex);
continue;
}
err = amd_iommu_bind_pasid(kfd->pdev, p->pasid,
p->lead_thread);
if (err < 0) {
pr_err("Unexpected pasid %d binding failure\n",
p->pasid);
mutex_unlock(&p->mutex);
break;
}
pdd->bound = PDD_BOUND;
mutex_unlock(&p->mutex);
}
srcu_read_unlock(&kfd_processes_srcu, idx);
return err;
}
/*
* Mark currently bound processes as PDD_BOUND_SUSPENDED. These
* processes will be restored to PDD_BOUND state in
* kfd_bind_processes_to_device.
*/
static void kfd_unbind_processes_from_device(struct kfd_dev *kfd)
{
struct kfd_process_device *pdd;
struct kfd_process *p;
unsigned int temp;
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(kfd, p);
if (WARN_ON(!pdd)) {
mutex_unlock(&p->mutex);
continue;
}
if (pdd->bound == PDD_BOUND)
pdd->bound = PDD_BOUND_SUSPENDED;
mutex_unlock(&p->mutex);
}
srcu_read_unlock(&kfd_processes_srcu, idx);
}
/** kfd_iommu_suspend - Prepare IOMMU for suspend
*
* This unbinds processes from the device and disables the IOMMU for
* the device.
*/
void kfd_iommu_suspend(struct kfd_dev *kfd)
{
if (!kfd->device_info->needs_iommu_device)
return;
kfd_unbind_processes_from_device(kfd);
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
amd_iommu_free_device(kfd->pdev);
}
/** kfd_iommu_resume - Restore IOMMU after resume
*
* This reinitializes the IOMMU for the device and re-binds previously
* suspended processes to the device.
*/
int kfd_iommu_resume(struct kfd_dev *kfd)
{
unsigned int pasid_limit;
int err;
if (!kfd->device_info->needs_iommu_device)
return 0;
pasid_limit = kfd_get_pasid_limit();
err = amd_iommu_init_device(kfd->pdev, pasid_limit);
if (err)
return -ENXIO;
amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
iommu_pasid_shutdown_callback);
amd_iommu_set_invalid_ppr_cb(kfd->pdev,
iommu_invalid_ppr_cb);
err = kfd_bind_processes_to_device(kfd);
if (err) {
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
amd_iommu_free_device(kfd->pdev);
return err;
}
return 0;
}
extern bool amd_iommu_pc_supported(void);
extern u8 amd_iommu_pc_get_max_banks(u16 devid);
extern u8 amd_iommu_pc_get_max_counters(u16 devid);
/** kfd_iommu_add_perf_counters - Add IOMMU performance counters to topology
*/
int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev)
{
struct kfd_perf_properties *props;
if (!(kdev->node_props.capability & HSA_CAP_ATS_PRESENT))
return 0;
if (!amd_iommu_pc_supported())
return 0;
props = kfd_alloc_struct(props);
if (!props)
return -ENOMEM;
strcpy(props->block_name, "iommu");
props->max_concurrent = amd_iommu_pc_get_max_banks(0) *
amd_iommu_pc_get_max_counters(0); /* assume one iommu */
list_add_tail(&props->list, &kdev->perf_props);
return 0;
}
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef __KFD_IOMMU_H__
#define __KFD_IOMMU_H__
#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
#define KFD_SUPPORT_IOMMU_V2
int kfd_iommu_check_device(struct kfd_dev *kfd);
int kfd_iommu_device_init(struct kfd_dev *kfd);
int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd);
void kfd_iommu_unbind_process(struct kfd_process *p);
void kfd_iommu_suspend(struct kfd_dev *kfd);
int kfd_iommu_resume(struct kfd_dev *kfd);
int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev);
#else
static inline int kfd_iommu_check_device(struct kfd_dev *kfd)
{
return -ENODEV;
}
static inline int kfd_iommu_device_init(struct kfd_dev *kfd)
{
return 0;
}
static inline int kfd_iommu_bind_process_to_device(
struct kfd_process_device *pdd)
{
return 0;
}
static inline void kfd_iommu_unbind_process(struct kfd_process *p)
{
/* empty */
}
static inline void kfd_iommu_suspend(struct kfd_dev *kfd)
{
/* empty */
}
static inline int kfd_iommu_resume(struct kfd_dev *kfd)
{
return 0;
}
static inline int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev)
{
return 0;
}
#endif /* defined(CONFIG_AMD_IOMMU_V2) */
#endif /* __KFD_IOMMU_H__ */
......@@ -297,10 +297,15 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
switch (dev->device_info->asic_family) {
case CHIP_CARRIZO:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
kernel_queue_init_vi(&kq->ops_asic_specific);
break;
case CHIP_KAVERI:
case CHIP_HAWAII:
kernel_queue_init_cik(&kq->ops_asic_specific);
break;
default:
......
......@@ -43,6 +43,8 @@ static const struct kgd2kfd_calls kgd2kfd = {
.interrupt = kgd2kfd_interrupt,
.suspend = kgd2kfd_suspend,
.resume = kgd2kfd_resume,
.schedule_evict_and_restore_process =
kgd2kfd_schedule_evict_and_restore_process,
};
int sched_policy = KFD_SCHED_POLICY_HWS;
......
......@@ -29,8 +29,15 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
switch (dev->device_info->asic_family) {
case CHIP_KAVERI:
return mqd_manager_init_cik(type, dev);
case CHIP_HAWAII:
return mqd_manager_init_cik_hawaii(type, dev);
case CHIP_CARRIZO:
return mqd_manager_init_vi(type, dev);
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
return mqd_manager_init_vi_tonga(type, dev);
default:
WARN(1, "Unexpected ASIC family %u",
dev->device_info->asic_family);
......
......@@ -170,14 +170,19 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
mms);
}
static int update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
static int __update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q, unsigned int atc_bit)
{
struct cik_mqd *m;
m = get_mqd(mqd);
m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE |
DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN;
DEFAULT_MIN_AVAIL_SIZE;
m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE;
if (atc_bit) {
m->cp_hqd_pq_control |= PQ_ATC_EN;
m->cp_hqd_ib_control |= IB_ATC_EN;
}
/*
* Calculating queue size which is log base 2 of actual queue size -1
......@@ -197,11 +202,24 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
q->queue_percent > 0);
q->queue_percent > 0 &&
!q->is_evicted);
return 0;
}
static int update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
{
return __update_mqd(mm, mqd, q, 1);
}
static int update_mqd_hawaii(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
{
return __update_mqd(mm, mqd, q, 0);
}
static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
{
......@@ -228,7 +246,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
q->queue_percent > 0);
q->queue_percent > 0 &&
!q->is_evicted);
return 0;
}
......@@ -360,7 +379,8 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
q->queue_percent > 0);
q->queue_percent > 0 &&
!q->is_evicted);
return 0;
}
......@@ -441,3 +461,15 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
return mqd;
}
struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
struct kfd_dev *dev)
{
struct mqd_manager *mqd;
mqd = mqd_manager_init_cik(type, dev);
if (!mqd)
return NULL;
if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE))
mqd->update_mqd = update_mqd_hawaii;
return mqd;
}
......@@ -151,6 +151,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
m->cp_hqd_pq_doorbell_control =
q->doorbell_off <<
......@@ -196,7 +198,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
q->queue_percent > 0);
q->queue_percent > 0 &&
!q->is_evicted);
return 0;
}
......@@ -208,6 +211,12 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
return __update_mqd(mm, mqd, q, MTYPE_CC, 1);
}
static int update_mqd_tonga(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
{
return __update_mqd(mm, mqd, q, MTYPE_UC, 0);
}
static int destroy_mqd(struct mqd_manager *mm, void *mqd,
enum kfd_preempt_type type,
unsigned int timeout, uint32_t pipe_id,
......@@ -334,7 +343,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
q->queue_percent > 0);
q->queue_percent > 0 &&
!q->is_evicted);
return 0;
}
......@@ -432,3 +442,16 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
return mqd;
}
struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
struct kfd_dev *dev)
{
struct mqd_manager *mqd;
mqd = mqd_manager_init_vi(type, dev);
if (!mqd)
return NULL;
if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE))
mqd->update_mqd = update_mqd_tonga;
return mqd;
}
......@@ -158,6 +158,8 @@ struct kfd_device_info {
uint8_t num_of_watch_points;
uint16_t mqd_size_aligned;
bool supports_cwsr;
bool needs_iommu_device;
bool needs_pci_atomics;
};
struct kfd_mem_obj {
......@@ -333,7 +335,11 @@ enum kfd_queue_format {
* @is_interop: Defines if this is a interop queue. Interop queue means that
* the queue can access both graphics and compute resources.
*
* @is_active: Defines if the queue is active or not.
* @is_evicted: Defines if the queue is evicted. Only active queues
* are evicted, rendering them inactive.
*
* @is_active: Defines if the queue is active or not. @is_active and
* @is_evicted are protected by the DQM lock.
*
* @vmid: If the scheduling mode is no cp scheduling the field defines the vmid
* of the queue.
......@@ -355,6 +361,7 @@ struct queue_properties {
uint32_t __iomem *doorbell_ptr;
uint32_t doorbell_off;
bool is_interop;
bool is_evicted;
bool is_active;
/* Not relevant for user mode queues in cp scheduling */
unsigned int vmid;
......@@ -458,6 +465,7 @@ struct qcm_process_device {
unsigned int queue_count;
unsigned int vmid;
bool is_debug;
unsigned int evicted; /* eviction counter, 0=active */
/* This flag tells if we should reset all wavefronts on
* process termination
......@@ -484,6 +492,17 @@ struct qcm_process_device {
uint64_t tma_addr;
};
/* KFD Memory Eviction */
/* Approx. wait time before attempting to restore evicted BOs */
#define PROCESS_RESTORE_TIME_MS 100
/* Approx. back off time if restore fails due to lack of memory */
#define PROCESS_BACK_OFF_TIME_MS 100
/* Approx. time before evicting the process again */
#define PROCESS_ACTIVE_TIME_MS 10
int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
struct dma_fence *fence);
enum kfd_pdd_bound {
PDD_UNBOUND = 0,
......@@ -516,8 +535,8 @@ struct kfd_process_device {
uint64_t scratch_base;
uint64_t scratch_limit;
/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
enum kfd_pdd_bound bound;
/* VM context for GPUVM allocations */
void *vm;
/* Flag used to tell the pdd has dequeued from the dqm.
* This is used to prevent dev->dqm->ops.process_termination() from
......@@ -525,6 +544,9 @@ struct kfd_process_device {
* function.
*/
bool already_dequeued;
/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
enum kfd_pdd_bound bound;
};
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
......@@ -587,8 +609,30 @@ struct kfd_process {
size_t signal_mapped_size;
size_t signal_event_count;
bool signal_event_limit_reached;
/* Information used for memory eviction */
void *kgd_process_info;
/* Eviction fence that is attached to all the BOs of this process. The
* fence will be triggered during eviction and new one will be created
* during restore
*/
struct dma_fence *ef;
/* Work items for evicting and restoring BOs */
struct delayed_work eviction_work;
struct delayed_work restore_work;
/* seqno of the last scheduled eviction */
unsigned int last_eviction_seqno;
/* Approx. the last timestamp (in jiffies) when the process was
* restored after an eviction
*/
unsigned long last_restore_timestamp;
};
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
extern struct srcu_struct kfd_processes_srcu;
/**
* Ioctl function type.
*
......@@ -612,13 +656,13 @@ void kfd_process_destroy_wq(void);
struct kfd_process *kfd_create_process(struct file *filep);
struct kfd_process *kfd_get_process(const struct task_struct *);
struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
void kfd_unref_process(struct kfd_process *p);
void kfd_suspend_all_processes(void);
int kfd_resume_all_processes(void);
struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
struct kfd_process *p);
int kfd_bind_processes_to_device(struct kfd_dev *dev);
void kfd_unbind_processes_from_device(struct kfd_dev *dev);
void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid);
struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
struct kfd_process *p);
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
......@@ -705,8 +749,12 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
......@@ -795,6 +843,8 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
uint64_t *event_page_offset, uint32_t *event_slot_index);
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
void kfd_flush_tlb(struct kfd_process_device *pdd);
int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
/* Debugfs */
......
......@@ -34,17 +34,18 @@
struct mm_struct;
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_dbgmgr.h"
#include "kfd_iommu.h"
/*
* List of struct kfd_process (field kfd_process).
* Unique/indexed by mm_struct*
*/
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
static DEFINE_MUTEX(kfd_processes_mutex);
DEFINE_STATIC_SRCU(kfd_processes_srcu);
DEFINE_SRCU(kfd_processes_srcu);
static struct workqueue_struct *kfd_process_wq;
......@@ -54,6 +55,9 @@ static struct kfd_process *create_process(const struct task_struct *thread,
struct file *filep);
static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep);
static void evict_process_worker(struct work_struct *work);
static void restore_process_worker(struct work_struct *work);
void kfd_process_create_wq(void)
{
......@@ -154,6 +158,10 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
pdd->dev->id, p->pasid);
if (pdd->vm)
pdd->dev->kfd2kgd->destroy_process_vm(
pdd->dev->kgd, pdd->vm);
list_del(&pdd->per_device_list);
if (pdd->qpd.cwsr_kaddr)
......@@ -173,16 +181,11 @@ static void kfd_process_wq_release(struct work_struct *work)
{
struct kfd_process *p = container_of(work, struct kfd_process,
release_work);
struct kfd_process_device *pdd;
pr_debug("Releasing process (pasid %d) in workqueue\n", p->pasid);
list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
if (pdd->bound == PDD_BOUND)
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
}
kfd_iommu_unbind_process(p);
kfd_process_destroy_pdds(p);
dma_fence_put(p->ef);
kfd_event_free_process(p);
......@@ -230,6 +233,9 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
mutex_unlock(&kfd_processes_mutex);
synchronize_srcu(&kfd_processes_srcu);
cancel_delayed_work_sync(&p->eviction_work);
cancel_delayed_work_sync(&p->restore_work);
mutex_lock(&p->mutex);
/* Iterate over all process device data structures and if the
......@@ -351,6 +357,10 @@ static struct kfd_process *create_process(const struct task_struct *thread,
if (err != 0)
goto err_init_apertures;
INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
process->last_restore_timestamp = get_jiffies_64();
err = kfd_process_init_cwsr(process, filep);
if (err)
goto err_init_cwsr;
......@@ -402,12 +412,24 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
pdd->qpd.dqm = dev->dqm;
pdd->qpd.pqm = &p->pqm;
pdd->qpd.evicted = 0;
pdd->process = p;
pdd->bound = PDD_UNBOUND;
pdd->already_dequeued = false;
list_add(&pdd->per_device_list, &p->per_device_data);
/* Create the GPUVM context for this specific device */
if (dev->kfd2kgd->create_process_vm(dev->kgd, &pdd->vm,
&p->kgd_process_info, &p->ef)) {
pr_err("Failed to create process VM object\n");
goto err_create_pdd;
}
return pdd;
err_create_pdd:
list_del(&pdd->per_device_list);
kfree(pdd);
return NULL;
}
/*
......@@ -429,174 +451,256 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
return ERR_PTR(-ENOMEM);
}
if (pdd->bound == PDD_BOUND) {
err = kfd_iommu_bind_process_to_device(pdd);
if (err)
return ERR_PTR(err);
return pdd;
} else if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) {
pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n");
return ERR_PTR(-EINVAL);
}
}
err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread);
if (err < 0)
return ERR_PTR(err);
struct kfd_process_device *kfd_get_first_process_device_data(
struct kfd_process *p)
{
return list_first_entry(&p->per_device_data,
struct kfd_process_device,
per_device_list);
}
pdd->bound = PDD_BOUND;
struct kfd_process_device *kfd_get_next_process_device_data(
struct kfd_process *p,
struct kfd_process_device *pdd)
{
if (list_is_last(&pdd->per_device_list, &p->per_device_data))
return NULL;
return list_next_entry(pdd, per_device_list);
}
return pdd;
bool kfd_has_process_device_data(struct kfd_process *p)
{
return !(list_empty(&p->per_device_data));
}
/*
* Bind processes do the device that have been temporarily unbound
* (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device.
*/
int kfd_bind_processes_to_device(struct kfd_dev *dev)
/* This increments the process->ref counter. */
struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
{
struct kfd_process_device *pdd;
struct kfd_process *p;
struct kfd_process *p, *ret_p = NULL;
unsigned int temp;
int err = 0;
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(dev, p);
if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) {
mutex_unlock(&p->mutex);
continue;
}
err = amd_iommu_bind_pasid(dev->pdev, p->pasid,
p->lead_thread);
if (err < 0) {
pr_err("Unexpected pasid %d binding failure\n",
p->pasid);
mutex_unlock(&p->mutex);
if (p->pasid == pasid) {
kref_get(&p->ref);
ret_p = p;
break;
}
pdd->bound = PDD_BOUND;
mutex_unlock(&p->mutex);
}
srcu_read_unlock(&kfd_processes_srcu, idx);
return err;
return ret_p;
}
/*
* Mark currently bound processes as PDD_BOUND_SUSPENDED. These
* processes will be restored to PDD_BOUND state in
* kfd_bind_processes_to_device.
*/
void kfd_unbind_processes_from_device(struct kfd_dev *dev)
/* This increments the process->ref counter. */
struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
{
struct kfd_process_device *pdd;
struct kfd_process *p;
unsigned int temp;
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(dev, p);
if (WARN_ON(!pdd)) {
mutex_unlock(&p->mutex);
continue;
}
if (pdd->bound == PDD_BOUND)
pdd->bound = PDD_BOUND_SUSPENDED;
mutex_unlock(&p->mutex);
}
p = find_process_by_mm(mm);
if (p)
kref_get(&p->ref);
srcu_read_unlock(&kfd_processes_srcu, idx);
return p;
}
void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
/* process_evict_queues - Evict all user queues of a process
*
* Eviction is reference-counted per process-device. This means multiple
* evictions from different sources can be nested safely.
*/
static int process_evict_queues(struct kfd_process *p)
{
struct kfd_process *p;
struct kfd_process_device *pdd;
int r = 0;
unsigned int n_evicted = 0;
/*
* Look for the process that matches the pasid. If there is no such
* process, we either released it in amdkfd's own notifier, or there
* is a bug. Unfortunately, there is no way to tell...
list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
&pdd->qpd);
if (r) {
pr_err("Failed to evict process queues\n");
goto fail;
}
n_evicted++;
}
return r;
fail:
/* To keep state consistent, roll back partial eviction by
* restoring queues
*/
p = kfd_lookup_process_by_pasid(pasid);
if (!p)
return;
list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
if (n_evicted == 0)
break;
if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
&pdd->qpd))
pr_err("Failed to restore queues\n");
pr_debug("Unbinding process %d from IOMMU\n", pasid);
n_evicted--;
}
mutex_lock(kfd_get_dbgmgr_mutex());
return r;
}
if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
kfd_dbgmgr_destroy(dev->dbgmgr);
dev->dbgmgr = NULL;
/* process_restore_queues - Restore all user queues of a process */
static int process_restore_queues(struct kfd_process *p)
{
struct kfd_process_device *pdd;
int r, ret = 0;
list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
&pdd->qpd);
if (r) {
pr_err("Failed to restore process queues\n");
if (!ret)
ret = r;
}
}
mutex_unlock(kfd_get_dbgmgr_mutex());
mutex_lock(&p->mutex);
return ret;
}
pdd = kfd_get_process_device_data(dev, p);
if (pdd)
/* For GPU relying on IOMMU, we need to dequeue here
* when PASID is still bound.
*/
kfd_process_dequeue_from_device(pdd);
static void evict_process_worker(struct work_struct *work)
{
int ret;
struct kfd_process *p;
struct delayed_work *dwork;
mutex_unlock(&p->mutex);
dwork = to_delayed_work(work);
kfd_unref_process(p);
/* Process termination destroys this worker thread. So during the
* lifetime of this thread, kfd_process p will be valid
*/
p = container_of(dwork, struct kfd_process, eviction_work);
WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
"Eviction fence mismatch\n");
/* Narrow window of overlap between restore and evict work
* item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
* unreserves KFD BOs, it is possible to evicted again. But
* restore has few more steps of finish. So lets wait for any
* previous restore work to complete
*/
flush_delayed_work(&p->restore_work);
pr_debug("Started evicting pasid %d\n", p->pasid);
ret = process_evict_queues(p);
if (!ret) {
dma_fence_signal(p->ef);
dma_fence_put(p->ef);
p->ef = NULL;
schedule_delayed_work(&p->restore_work,
msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
pr_debug("Finished evicting pasid %d\n", p->pasid);
} else
pr_err("Failed to evict queues of pasid %d\n", p->pasid);
}
struct kfd_process_device *kfd_get_first_process_device_data(
struct kfd_process *p)
static void restore_process_worker(struct work_struct *work)
{
return list_first_entry(&p->per_device_data,
struct delayed_work *dwork;
struct kfd_process *p;
struct kfd_process_device *pdd;
int ret = 0;
dwork = to_delayed_work(work);
/* Process termination destroys this worker thread. So during the
* lifetime of this thread, kfd_process p will be valid
*/
p = container_of(dwork, struct kfd_process, restore_work);
/* Call restore_process_bos on the first KGD device. This function
* takes care of restoring the whole process including other devices.
* Restore can fail if enough memory is not available. If so,
* reschedule again.
*/
pdd = list_first_entry(&p->per_device_data,
struct kfd_process_device,
per_device_list);
}
struct kfd_process_device *kfd_get_next_process_device_data(
struct kfd_process *p,
struct kfd_process_device *pdd)
{
if (list_is_last(&pdd->per_device_list, &p->per_device_data))
return NULL;
return list_next_entry(pdd, per_device_list);
pr_debug("Started restoring pasid %d\n", p->pasid);
/* Setting last_restore_timestamp before successful restoration.
* Otherwise this would have to be set by KGD (restore_process_bos)
* before KFD BOs are unreserved. If not, the process can be evicted
* again before the timestamp is set.
* If restore fails, the timestamp will be set again in the next
* attempt. This would mean that the minimum GPU quanta would be
* PROCESS_ACTIVE_TIME_MS - (time to execute the following two
* functions)
*/
p->last_restore_timestamp = get_jiffies_64();
ret = pdd->dev->kfd2kgd->restore_process_bos(p->kgd_process_info,
&p->ef);
if (ret) {
pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n",
p->pasid, PROCESS_BACK_OFF_TIME_MS);
ret = schedule_delayed_work(&p->restore_work,
msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
WARN(!ret, "reschedule restore work failed\n");
return;
}
ret = process_restore_queues(p);
if (!ret)
pr_debug("Finished restoring pasid %d\n", p->pasid);
else
pr_err("Failed to restore queues of pasid %d\n", p->pasid);
}
bool kfd_has_process_device_data(struct kfd_process *p)
void kfd_suspend_all_processes(void)
{
return !(list_empty(&p->per_device_data));
struct kfd_process *p;
unsigned int temp;
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
cancel_delayed_work_sync(&p->eviction_work);
cancel_delayed_work_sync(&p->restore_work);
if (process_evict_queues(p))
pr_err("Failed to suspend process %d\n", p->pasid);
dma_fence_signal(p->ef);
dma_fence_put(p->ef);
p->ef = NULL;
}
srcu_read_unlock(&kfd_processes_srcu, idx);
}
/* This increments the process->ref counter. */
struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
int kfd_resume_all_processes(void)
{
struct kfd_process *p, *ret_p = NULL;
struct kfd_process *p;
unsigned int temp;
int idx = srcu_read_lock(&kfd_processes_srcu);
int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
if (p->pasid == pasid) {
kref_get(&p->ref);
ret_p = p;
break;
if (!schedule_delayed_work(&p->restore_work, 0)) {
pr_err("Restore process %d failed during resume\n",
p->pasid);
ret = -EFAULT;
}
}
srcu_read_unlock(&kfd_processes_srcu, idx);
return ret_p;
return ret;
}
int kfd_reserved_mem_mmap(struct kfd_process *process,
......@@ -633,6 +737,22 @@ int kfd_reserved_mem_mmap(struct kfd_process *process,
KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
}
void kfd_flush_tlb(struct kfd_process_device *pdd)
{
struct kfd_dev *dev = pdd->dev;
const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
/* Nothing to flush until a VMID is assigned, which
* only happens when the first queue is created.
*/
if (pdd->qpd.vmid)
f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
} else {
f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
}
}
#if defined(CONFIG_DEBUG_FS)
int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
......
......@@ -208,7 +208,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
case KFD_QUEUE_TYPE_COMPUTE:
/* check if there is over subscription */
if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
if ((dev->dqm->sched_policy ==
KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) ||
(dev->dqm->queue_count >= get_queues_num(dev->dqm)))) {
pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n");
......
......@@ -35,6 +35,7 @@
#include "kfd_crat.h"
#include "kfd_topology.h"
#include "kfd_device_queue_manager.h"
#include "kfd_iommu.h"
/* topology_device_list - Master list of all topology devices */
static struct list_head topology_device_list;
......@@ -677,7 +678,7 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
}
/* All hardware blocks have the same number of attributes. */
num_attrs = sizeof(perf_attr_iommu)/sizeof(struct kfd_perf_attr);
num_attrs = ARRAY_SIZE(perf_attr_iommu);
list_for_each_entry(perf, &dev->perf_props, list) {
perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr)
* num_attrs + sizeof(struct attribute_group),
......@@ -875,19 +876,8 @@ static void find_system_memory(const struct dmi_header *dm,
*/
static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev)
{
struct kfd_perf_properties *props;
if (amd_iommu_pc_supported()) {
props = kfd_alloc_struct(props);
if (!props)
return -ENOMEM;
strcpy(props->block_name, "iommu");
props->max_concurrent = amd_iommu_pc_get_max_banks(0) *
amd_iommu_pc_get_max_counters(0); /* assume one iommu */
list_add_tail(&props->list, &kdev->perf_props);
}
return 0;
/* These are the only counters supported so far */
return kfd_iommu_add_perf_counters(kdev);
}
/* kfd_add_non_crat_information - Add information that is not currently
......
......@@ -25,7 +25,7 @@
#include <linux/types.h>
#include <linux/list.h>
#include "kfd_priv.h"
#include "kfd_crat.h"
#define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 128
......@@ -183,8 +183,4 @@ struct kfd_topology_device *kfd_create_topology_device(
struct list_head *device_list);
void kfd_release_topology_device_list(struct list_head *device_list);
extern bool amd_iommu_pc_supported(void);
extern u8 amd_iommu_pc_get_max_banks(u16 devid);
extern u8 amd_iommu_pc_get_max_counters(u16 devid);
#endif /* __KFD_TOPOLOGY_H__ */
......@@ -30,6 +30,7 @@
#include <linux/types.h>
#include <linux/bitmap.h>
#include <linux/dma-fence.h>
struct pci_dev;
......@@ -107,6 +108,12 @@ struct kgd2kfd_shared_resources {
/* Number of bytes at start of aperture reserved for KGD. */
size_t doorbell_start_offset;
/* GPUVM address space size in bytes */
uint64_t gpuvm_size;
/* Minor device number of the render node */
int drm_render_minor;
};
struct tile_config {
......@@ -120,6 +127,25 @@ struct tile_config {
uint32_t num_ranks;
};
/*
* Allocation flag domains
*/
#define ALLOC_MEM_FLAGS_VRAM (1 << 0)
#define ALLOC_MEM_FLAGS_GTT (1 << 1)
#define ALLOC_MEM_FLAGS_USERPTR (1 << 2) /* TODO */
#define ALLOC_MEM_FLAGS_DOORBELL (1 << 3) /* TODO */
/*
* Allocation flags attributes/access options.
*/
#define ALLOC_MEM_FLAGS_WRITABLE (1 << 31)
#define ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30)
#define ALLOC_MEM_FLAGS_PUBLIC (1 << 29)
#define ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28) /* TODO */
#define ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27)
#define ALLOC_MEM_FLAGS_COHERENT (1 << 26) /* For GFXv9 or later */
/**
* struct kfd2kgd_calls
*
......@@ -179,6 +205,45 @@ struct tile_config {
*
* @get_vram_usage: Returns current VRAM usage
*
* @create_process_vm: Create a VM address space for a given process and GPU
*
* @destroy_process_vm: Destroy a VM
*
* @get_process_page_dir: Get physical address of a VM page directory
*
* @set_vm_context_page_table_base: Program page table base for a VMID
*
* @alloc_memory_of_gpu: Allocate GPUVM memory
*
* @free_memory_of_gpu: Free GPUVM memory
*
* @map_memory_to_gpu: Map GPUVM memory into a specific VM address
* space. Allocates and updates page tables and page directories as
* needed. This function may return before all page table updates have
* completed. This allows multiple map operations (on multiple GPUs)
* to happen concurrently. Use sync_memory to synchronize with all
* pending updates.
*
* @unmap_memor_to_gpu: Unmap GPUVM memory from a specific VM address space
*
* @sync_memory: Wait for pending page table updates to complete
*
* @map_gtt_bo_to_kernel: Map a GTT BO for kernel access
* Pins the BO, maps it to kernel address space. Such BOs are never evicted.
* The kernel virtual address remains valid until the BO is freed.
*
* @restore_process_bos: Restore all BOs that belong to the
* process. This is intended for restoring memory mappings after a TTM
* eviction.
*
* @invalidate_tlbs: Invalidate TLBs for a specific PASID
*
* @invalidate_tlbs_vmid: Invalidate TLBs for a specific VMID
*
* @submit_ib: Submits an IB to the engine specified by inserting the
* IB to the corresponding ring (ring type). The IB is executed with the
* specified VMID in a user mode context.
*
* This structure contains function pointers to services that the kgd driver
* provides to amdkfd driver.
*
......@@ -258,8 +323,6 @@ struct kfd2kgd_calls {
uint16_t (*get_atc_vmid_pasid_mapping_pasid)(
struct kgd_dev *kgd,
uint8_t vmid);
void (*write_vmid_invalidate_request)(struct kgd_dev *kgd,
uint8_t vmid);
uint16_t (*get_fw_version)(struct kgd_dev *kgd,
enum kgd_engine_type type);
......@@ -270,6 +333,33 @@ struct kfd2kgd_calls {
void (*get_cu_info)(struct kgd_dev *kgd,
struct kfd_cu_info *cu_info);
uint64_t (*get_vram_usage)(struct kgd_dev *kgd);
int (*create_process_vm)(struct kgd_dev *kgd, void **vm,
void **process_info, struct dma_fence **ef);
void (*destroy_process_vm)(struct kgd_dev *kgd, void *vm);
uint32_t (*get_process_page_dir)(void *vm);
void (*set_vm_context_page_table_base)(struct kgd_dev *kgd,
uint32_t vmid, uint32_t page_table_base);
int (*alloc_memory_of_gpu)(struct kgd_dev *kgd, uint64_t va,
uint64_t size, void *vm,
struct kgd_mem **mem, uint64_t *offset,
uint32_t flags);
int (*free_memory_of_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem);
int (*map_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem,
void *vm);
int (*unmap_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem,
void *vm);
int (*sync_memory)(struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
int (*map_gtt_bo_to_kernel)(struct kgd_dev *kgd, struct kgd_mem *mem,
void **kptr, uint64_t *size);
int (*restore_process_bos)(void *process_info, struct dma_fence **ef);
int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid);
int (*invalidate_tlbs_vmid)(struct kgd_dev *kgd, uint16_t vmid);
int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len);
};
/**
......@@ -288,6 +378,9 @@ struct kfd2kgd_calls {
*
* @resume: Notifies amdkfd about a resume action done to a kgd device
*
* @schedule_evict_and_restore_process: Schedules work queue that will prepare
* for safe eviction of KFD BOs that belong to the specified process.
*
* This structure contains function callback pointers so the kgd driver
* will notify to the amdkfd about certain status changes.
*
......@@ -302,6 +395,8 @@ struct kgd2kfd_calls {
void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry);
void (*suspend)(struct kfd_dev *kfd);
int (*resume)(struct kfd_dev *kfd);
int (*schedule_evict_and_restore_process)(struct mm_struct *mm,
struct dma_fence *fence);
};
int kgd2kfd_init(unsigned interface_version,
......
......@@ -263,10 +263,10 @@ struct kfd_ioctl_get_tile_config_args {
};
struct kfd_ioctl_set_trap_handler_args {
uint64_t tba_addr; /* to KFD */
uint64_t tma_addr; /* to KFD */
uint32_t gpu_id; /* to KFD */
uint32_t pad;
__u64 tba_addr; /* to KFD */
__u64 tma_addr; /* to KFD */
__u32 gpu_id; /* to KFD */
__u32 pad;
};
#define AMDKFD_IOCTL_BASE 'K'
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment