Commit 6fa7324a authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-amdkfd-next-2018-03-11' of git://people.freedesktop.org/~gabbayo/linux into drm-next

Major points for this pull request:
- Add dGPU support for amdkfd initialization code and queue handling. It's
  not complete support since the GPUVM part is missing (the under debate stuff).
- Enable PCIe atomics for dGPU if present
- Various adjustments to the amdgpu<-->amdkfd interface for dGPUs
- Refactor IOMMUv2 code to allow loading amdkfd without IOMMUv2 in the system
- Add HSA process eviction code in case of system memory pressure
- Various fixes and small changes

* tag 'drm-amdkfd-next-2018-03-11' of git://people.freedesktop.org/~gabbayo/linux: (24 commits)
  uapi: Fix type used in ioctl parameter structures
  drm/amdkfd: Implement KFD process eviction/restore
  drm/amdkfd: Add GPUVM virtual address space to PDD
  drm/amdkfd: Remove unaligned memory access
  drm/amdkfd: Centralize IOMMUv2 code and make it conditional
  drm/amdgpu: Add submit IB function for KFD
  drm/amdgpu: Add GPUVM memory management functions for KFD
  drm/amdgpu: add amdgpu_sync_clone
  drm/amdgpu: Update kgd2kfd_shared_resources for dGPU support
  drm/amdgpu: Add KFD eviction fence
  drm/amdgpu: Remove unused kfd2kgd interface
  drm/amdgpu: Fix wrong mask in get_atc_vmid_pasid_mapping_pasid
  drm/amdgpu: Fix header file dependencies
  drm/amdgpu: Replace kgd_mem with amdgpu_bo for kernel pinned gtt mem
  drm/amdgpu: remove useless BUG_ONs
  drm/amdgpu: Enable KFD initialization on dGPUs
  drm/amdkfd: Add dGPU device IDs and device info
  drm/amdkfd: Add dGPU support to kernel_queue_init
  drm/amdkfd: Add dGPU support to the MQD manager
  drm/amdkfd: Add dGPU support to the device queue manager
  ...
parents 0b8eeac5 a1102445
......@@ -766,6 +766,8 @@ F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
F: drivers/gpu/drm/amd/amdkfd/
F: drivers/gpu/drm/amd/include/cik_structs.h
F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h
......
......@@ -171,6 +171,7 @@ void dma_fence_release(struct kref *kref)
trace_dma_fence_destroy(fence);
/* Failed to signal before release, could be a refcounting issue */
WARN_ON(!list_empty(&fence->cb_list));
if (fence->ops->release)
......
......@@ -129,6 +129,8 @@ amdgpu-y += \
# add amdkfd interfaces
amdgpu-y += \
amdgpu_amdkfd.o \
amdgpu_amdkfd_fence.o \
amdgpu_amdkfd_gpuvm.o \
amdgpu_amdkfd_gfx_v8.o
# add cgs
......
......@@ -30,6 +30,8 @@
const struct kgd2kfd_calls *kgd2kfd;
bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
static const unsigned int compute_vmid_bitmap = 0xFF00;
int amdgpu_amdkfd_init(void)
{
int ret;
......@@ -56,6 +58,7 @@ int amdgpu_amdkfd_init(void)
#else
ret = -ENOENT;
#endif
amdgpu_amdkfd_gpuvm_init_mem_limits();
return ret;
}
......@@ -78,10 +81,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
switch (adev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
case CHIP_HAWAII:
kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
break;
#endif
case CHIP_CARRIZO:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
break;
default:
......@@ -132,9 +140,13 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
int last_valid_bit;
if (adev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap = 0xFF00,
.compute_vmid_bitmap = compute_vmid_bitmap,
.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
.gpuvm_size = min(adev->vm_manager.max_pfn
<< AMDGPU_GPU_PAGE_SHIFT,
AMDGPU_VA_HOLE_START),
.drm_render_minor = adev->ddev->render->index
};
/* this is going to have a few of the MSBs set that we need to
......@@ -204,19 +216,13 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **cpu_ptr)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
struct amdgpu_bo *bo = NULL;
int r;
BUG_ON(kgd == NULL);
BUG_ON(gpu_addr == NULL);
BUG_ON(cpu_ptr == NULL);
*mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
if ((*mem) == NULL)
return -ENOMEM;
uint64_t gpu_addr_tmp = 0;
void *cpu_ptr_tmp = NULL;
r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo);
AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &bo);
if (r) {
dev_err(adev->dev,
"failed to allocate BO for amdkfd (%d)\n", r);
......@@ -224,54 +230,53 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
}
/* map the buffer */
r = amdgpu_bo_reserve((*mem)->bo, true);
r = amdgpu_bo_reserve(bo, true);
if (r) {
dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
goto allocate_mem_reserve_bo_failed;
}
r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT,
&(*mem)->gpu_addr);
r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT,
&gpu_addr_tmp);
if (r) {
dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
goto allocate_mem_pin_bo_failed;
}
*gpu_addr = (*mem)->gpu_addr;
r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
if (r) {
dev_err(adev->dev,
"(%d) failed to map bo to kernel for amdkfd\n", r);
goto allocate_mem_kmap_bo_failed;
}
*cpu_ptr = (*mem)->cpu_ptr;
amdgpu_bo_unreserve((*mem)->bo);
*mem_obj = bo;
*gpu_addr = gpu_addr_tmp;
*cpu_ptr = cpu_ptr_tmp;
amdgpu_bo_unreserve(bo);
return 0;
allocate_mem_kmap_bo_failed:
amdgpu_bo_unpin((*mem)->bo);
amdgpu_bo_unpin(bo);
allocate_mem_pin_bo_failed:
amdgpu_bo_unreserve((*mem)->bo);
amdgpu_bo_unreserve(bo);
allocate_mem_reserve_bo_failed:
amdgpu_bo_unref(&(*mem)->bo);
amdgpu_bo_unref(&bo);
return r;
}
void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
{
struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
BUG_ON(mem == NULL);
amdgpu_bo_reserve(mem->bo, true);
amdgpu_bo_kunmap(mem->bo);
amdgpu_bo_unpin(mem->bo);
amdgpu_bo_unreserve(mem->bo);
amdgpu_bo_unref(&(mem->bo));
kfree(mem);
amdgpu_bo_reserve(bo, true);
amdgpu_bo_kunmap(bo);
amdgpu_bo_unpin(bo);
amdgpu_bo_unreserve(bo);
amdgpu_bo_unref(&(bo));
}
void get_local_mem_info(struct kgd_dev *kgd,
......@@ -361,3 +366,68 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
}
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct amdgpu_ring *ring;
struct dma_fence *f = NULL;
int ret;
switch (engine) {
case KGD_ENGINE_MEC1:
ring = &adev->gfx.compute_ring[0];
break;
case KGD_ENGINE_SDMA1:
ring = &adev->sdma.instance[0].ring;
break;
case KGD_ENGINE_SDMA2:
ring = &adev->sdma.instance[1].ring;
break;
default:
pr_err("Invalid engine in IB submission: %d\n", engine);
ret = -EINVAL;
goto err;
}
ret = amdgpu_job_alloc(adev, 1, &job, NULL);
if (ret)
goto err;
ib = &job->ibs[0];
memset(ib, 0, sizeof(struct amdgpu_ib));
ib->gpu_addr = gpu_addr;
ib->ptr = ib_cmd;
ib->length_dw = ib_len;
/* This works for NO_HWS. TODO: need to handle without knowing VMID */
job->vmid = vmid;
ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
if (ret) {
DRM_ERROR("amdgpu: failed to schedule IB.\n");
goto err_ib_sched;
}
ret = dma_fence_wait(f, false);
err_ib_sched:
dma_fence_put(f);
amdgpu_job_free(job);
err:
return ret;
}
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
{
if (adev->kfd) {
if ((1 << vmid) & compute_vmid_bitmap)
return true;
}
return false;
}
......@@ -28,13 +28,89 @@
#include <linux/types.h>
#include <linux/mmu_context.h>
#include <kgd_kfd_interface.h>
#include <drm/ttm/ttm_execbuf_util.h>
#include "amdgpu_sync.h"
#include "amdgpu_vm.h"
extern const struct kgd2kfd_calls *kgd2kfd;
struct amdgpu_device;
struct kfd_bo_va_list {
struct list_head bo_list;
struct amdgpu_bo_va *bo_va;
void *kgd_dev;
bool is_mapped;
uint64_t va;
uint64_t pte_flags;
};
struct kgd_mem {
struct mutex lock;
struct amdgpu_bo *bo;
uint64_t gpu_addr;
void *cpu_ptr;
struct list_head bo_va_list;
/* protected by amdkfd_process_info.lock */
struct ttm_validate_buffer validate_list;
struct ttm_validate_buffer resv_list;
uint32_t domain;
unsigned int mapped_to_gpu_memory;
uint64_t va;
uint32_t mapping_flags;
struct amdkfd_process_info *process_info;
struct amdgpu_sync sync;
bool aql_queue;
};
/* KFD Memory Eviction */
struct amdgpu_amdkfd_fence {
struct dma_fence base;
struct mm_struct *mm;
spinlock_t lock;
char timeline_name[TASK_COMM_LEN];
};
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
struct mm_struct *mm);
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
struct amdkfd_process_info {
/* List head of all VMs that belong to a KFD process */
struct list_head vm_list_head;
/* List head for all KFD BOs that belong to a KFD process. */
struct list_head kfd_bo_list;
/* Lock to protect kfd_bo_list */
struct mutex lock;
/* Number of VMs */
unsigned int n_vms;
/* Eviction Fence */
struct amdgpu_amdkfd_fence *eviction_fence;
};
/* struct amdkfd_vm -
* For Memory Eviction KGD requires a mechanism to keep track of all KFD BOs
* belonging to a KFD process. All the VMs belonging to the same process point
* to the same amdkfd_process_info.
*/
struct amdkfd_vm {
/* Keep base as the first parameter for pointer compatibility between
* amdkfd_vm and amdgpu_vm.
*/
struct amdgpu_vm base;
/* List node in amdkfd_process_info.vm_list_head*/
struct list_head vm_list_node;
struct amdgpu_device *adev;
/* Points to the KFD process VM info*/
struct amdkfd_process_info *process_info;
uint64_t pd_phys_addr;
};
int amdgpu_amdkfd_init(void);
......@@ -48,9 +124,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
/* Shared API */
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
......@@ -79,4 +161,30 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
valid; \
})
/* GPUVM API */
int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
void **process_info,
struct dma_fence **ef);
void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct kgd_dev *kgd, uint64_t va, uint64_t size,
void *vm, struct kgd_mem **mem,
uint64_t *offset, uint32_t flags);
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem);
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
int amdgpu_amdkfd_gpuvm_sync_memory(
struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
struct kgd_mem *mem, void **kptr, uint64_t *size);
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
struct dma_fence **ef);
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
/*
* Copyright 2016-2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/dma-fence.h>
#include <linux/spinlock.h>
#include <linux/atomic.h>
#include <linux/stacktrace.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/sched/mm.h>
#include "amdgpu_amdkfd.h"
static const struct dma_fence_ops amdkfd_fence_ops;
static atomic_t fence_seq = ATOMIC_INIT(0);
/* Eviction Fence
* Fence helper functions to deal with KFD memory eviction.
* Big Idea - Since KFD submissions are done by user queues, a BO cannot be
* evicted unless all the user queues for that process are evicted.
*
* All the BOs in a process share an eviction fence. When process X wants
* to map VRAM memory but TTM can't find enough space, TTM will attempt to
* evict BOs from its LRU list. TTM checks if the BO is valuable to evict
* by calling ttm_bo_driver->eviction_valuable().
*
* ttm_bo_driver->eviction_valuable() - will return false if the BO belongs
* to process X. Otherwise, it will return true to indicate BO can be
* evicted by TTM.
*
* If ttm_bo_driver->eviction_valuable returns true, then TTM will continue
* the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move
* --> amdgpu_copy_buffer(). This sets up job in GPU scheduler.
*
* GPU Scheduler (amd_sched_main) - sets up a cb (fence_add_callback) to
* nofity when the BO is free to move. fence_add_callback --> enable_signaling
* --> amdgpu_amdkfd_fence.enable_signaling
*
* amdgpu_amdkfd_fence.enable_signaling - Start a work item that will quiesce
* user queues and signal fence. The work item will also start another delayed
* work item to restore BOs
*/
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
struct mm_struct *mm)
{
struct amdgpu_amdkfd_fence *fence;
fence = kzalloc(sizeof(*fence), GFP_KERNEL);
if (fence == NULL)
return NULL;
/* This reference gets released in amdkfd_fence_release */
mmgrab(mm);
fence->mm = mm;
get_task_comm(fence->timeline_name, current);
spin_lock_init(&fence->lock);
dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock,
context, atomic_inc_return(&fence_seq));
return fence;
}
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
{
struct amdgpu_amdkfd_fence *fence;
if (!f)
return NULL;
fence = container_of(f, struct amdgpu_amdkfd_fence, base);
if (fence && f->ops == &amdkfd_fence_ops)
return fence;
return NULL;
}
static const char *amdkfd_fence_get_driver_name(struct dma_fence *f)
{
return "amdgpu_amdkfd_fence";
}
static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f)
{
struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
return fence->timeline_name;
}
/**
* amdkfd_fence_enable_signaling - This gets called when TTM wants to evict
* a KFD BO and schedules a job to move the BO.
* If fence is already signaled return true.
* If fence is not signaled schedule a evict KFD process work item.
*/
static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
{
struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
if (!fence)
return false;
if (dma_fence_is_signaled(f))
return true;
if (!kgd2kfd->schedule_evict_and_restore_process(fence->mm, f))
return true;
return false;
}
/**
* amdkfd_fence_release - callback that fence can be freed
*
* @fence: fence
*
* This function is called when the reference count becomes zero.
* Drops the mm_struct reference and RCU schedules freeing up the fence.
*/
static void amdkfd_fence_release(struct dma_fence *f)
{
struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
/* Unconditionally signal the fence. The process is getting
* terminated.
*/
if (WARN_ON(!fence))
return; /* Not an amdgpu_amdkfd_fence */
mmdrop(fence->mm);
kfree_rcu(f, rcu);
}
/**
* amdkfd_fence_check_mm - Check if @mm is same as that of the fence @f
* if same return TRUE else return FALSE.
*
* @f: [IN] fence
* @mm: [IN] mm that needs to be verified
*/
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
{
struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
if (!fence)
return false;
else if (fence->mm == mm)
return true;
return false;
}
static const struct dma_fence_ops amdkfd_fence_ops = {
.get_driver_name = amdkfd_fence_get_driver_name,
.get_timeline_name = amdkfd_fence_get_timeline_name,
.enable_signaling = amdkfd_fence_enable_signaling,
.signaled = NULL,
.wait = dma_fence_default_wait,
.release = amdkfd_fence_release,
};
......@@ -139,11 +139,14 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
uint8_t vmid);
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
static void set_scratch_backing_va(struct kgd_dev *kgd,
uint64_t va, uint32_t vmid);
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base);
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
/* Because of REG_GET_FIELD() being used, we put this function in the
* asic specific file.
......@@ -196,12 +199,25 @@ static const struct kfd2kgd_calls kfd2kgd = {
.address_watch_get_offset = kgd_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
.get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
.write_vmid_invalidate_request = write_vmid_invalidate_request,
.get_fw_version = get_fw_version,
.set_scratch_backing_va = set_scratch_backing_va,
.get_tile_config = get_tile_config,
.get_cu_info = get_cu_info,
.get_vram_usage = amdgpu_amdkfd_get_vram_usage
.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
.invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib,
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
......@@ -787,14 +803,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
}
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
{
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
}
static void set_scratch_backing_va(struct kgd_dev *kgd,
......@@ -812,8 +821,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
const union amdgpu_firmware_header *hdr;
BUG_ON(kgd == NULL);
switch (type) {
case KGD_ENGINE_PFP:
hdr = (const union amdgpu_firmware_header *)
......@@ -866,3 +873,50 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
return hdr->common.ucode_version;
}
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID\n");
return;
}
WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
}
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
{
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
int vmid;
unsigned int tmp;
for (vmid = 0; vmid < 16; vmid++) {
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
continue;
tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
RREG32(mmVM_INVALIDATE_RESPONSE);
break;
}
}
return 0;
}
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
{
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("non kfd vmid\n");
return 0;
}
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
RREG32(mmVM_INVALIDATE_RESPONSE);
return 0;
}
......@@ -81,7 +81,6 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
uint32_t queue_id);
static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
unsigned int utimeout);
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
static int kgd_address_watch_disable(struct kgd_dev *kgd);
static int kgd_address_watch_execute(struct kgd_dev *kgd,
unsigned int watch_point_id,
......@@ -99,10 +98,13 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
uint8_t vmid);
static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
uint8_t vmid);
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
static void set_scratch_backing_va(struct kgd_dev *kgd,
uint64_t va, uint32_t vmid);
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base);
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
/* Because of REG_GET_FIELD() being used, we put this function in the
* asic specific file.
......@@ -157,12 +159,25 @@ static const struct kfd2kgd_calls kfd2kgd = {
get_atc_vmid_pasid_mapping_pasid,
.get_atc_vmid_pasid_mapping_valid =
get_atc_vmid_pasid_mapping_valid,
.write_vmid_invalidate_request = write_vmid_invalidate_request,
.get_fw_version = get_fw_version,
.set_scratch_backing_va = set_scratch_backing_va,
.get_tile_config = get_tile_config,
.get_cu_info = get_cu_info,
.get_vram_usage = amdgpu_amdkfd_get_vram_usage
.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
.invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib,
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
......@@ -704,14 +719,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
}
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
{
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
}
static int kgd_address_watch_disable(struct kgd_dev *kgd)
......@@ -775,8 +783,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
const union amdgpu_firmware_header *hdr;
BUG_ON(kgd == NULL);
switch (type) {
case KGD_ENGINE_PFP:
hdr = (const union amdgpu_firmware_header *)
......@@ -828,3 +834,51 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
/* Only 12 bit in use*/
return hdr->common.ucode_version;
}
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID\n");
return;
}
WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
}
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
{
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
int vmid;
unsigned int tmp;
for (vmid = 0; vmid < 16; vmid++) {
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
continue;
tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
RREG32(mmVM_INVALIDATE_RESPONSE);
break;
}
}
return 0;
}
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
{
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("non kfd vmid %d\n", vmid);
return -EINVAL;
}
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
RREG32(mmVM_INVALIDATE_RESPONSE);
return 0;
}
This diff is collapsed.
......@@ -36,6 +36,7 @@
#include <drm/drm_cache.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
static bool amdgpu_need_backup(struct amdgpu_device *adev)
{
......@@ -54,6 +55,9 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
if (bo->kfd_bo)
amdgpu_amdkfd_unreserve_system_memory_limit(bo);
amdgpu_bo_kunmap(bo);
drm_gem_object_release(&bo->gem_base);
......
......@@ -92,6 +92,8 @@ struct amdgpu_bo {
struct list_head mn_list;
struct list_head shadow_list;
};
struct kgd_mem *kfd_bo;
};
static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
......
......@@ -26,6 +26,7 @@
#include <drm/amdgpu_drm.h>
#include <drm/gpu_scheduler.h>
#include <drm/drm_print.h>
/* max number of rings */
#define AMDGPU_MAX_RINGS 18
......@@ -35,8 +36,9 @@
#define AMDGPU_MAX_UVD_ENC_RINGS 2
/* some special values for the owner field */
#define AMDGPU_FENCE_OWNER_UNDEFINED ((void*)0ul)
#define AMDGPU_FENCE_OWNER_VM ((void*)1ul)
#define AMDGPU_FENCE_OWNER_UNDEFINED ((void *)0ul)
#define AMDGPU_FENCE_OWNER_VM ((void *)1ul)
#define AMDGPU_FENCE_OWNER_KFD ((void *)2ul)
#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
#define AMDGPU_FENCE_FLAG_INT (1 << 1)
......
......@@ -31,6 +31,7 @@
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
struct amdgpu_sync_entry {
struct hlist_node node;
......@@ -85,11 +86,20 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
*/
static void *amdgpu_sync_get_owner(struct dma_fence *f)
{
struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
struct drm_sched_fence *s_fence;
struct amdgpu_amdkfd_fence *kfd_fence;
if (!f)
return AMDGPU_FENCE_OWNER_UNDEFINED;
s_fence = to_drm_sched_fence(f);
if (s_fence)
return s_fence->owner;
kfd_fence = to_amdgpu_amdkfd_fence(f);
if (kfd_fence)
return AMDGPU_FENCE_OWNER_KFD;
return AMDGPU_FENCE_OWNER_UNDEFINED;
}
......@@ -204,11 +214,18 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
for (i = 0; i < flist->shared_count; ++i) {
f = rcu_dereference_protected(flist->shared[i],
reservation_object_held(resv));
/* We only want to trigger KFD eviction fences on
* evict or move jobs. Skip KFD fences otherwise.
*/
fence_owner = amdgpu_sync_get_owner(f);
if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
owner != AMDGPU_FENCE_OWNER_UNDEFINED)
continue;
if (amdgpu_sync_same_dev(adev, f)) {
/* VM updates are only interesting
* for other VM updates and moves.
*/
fence_owner = amdgpu_sync_get_owner(f);
if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
(fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
((owner == AMDGPU_FENCE_OWNER_VM) !=
......@@ -305,6 +322,41 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit
return NULL;
}
/**
* amdgpu_sync_clone - clone a sync object
*
* @source: sync object to clone
* @clone: pointer to destination sync object
*
* Adds references to all unsignaled fences in @source to @clone. Also
* removes signaled fences from @source while at it.
*/
int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
{
struct amdgpu_sync_entry *e;
struct hlist_node *tmp;
struct dma_fence *f;
int i, r;
hash_for_each_safe(source->fences, i, tmp, e, node) {
f = e->fence;
if (!dma_fence_is_signaled(f)) {
r = amdgpu_sync_fence(NULL, clone, f, e->explicit);
if (r)
return r;
} else {
hash_del(&e->node);
dma_fence_put(f);
kmem_cache_free(amdgpu_sync_slab, e);
}
}
dma_fence_put(clone->last_vm_update);
clone->last_vm_update = dma_fence_get(source->last_vm_update);
return 0;
}
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
{
struct amdgpu_sync_entry *e;
......
......@@ -50,6 +50,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit);
int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
void amdgpu_sync_free(struct amdgpu_sync *sync);
int amdgpu_sync_init(void);
......
......@@ -46,6 +46,7 @@
#include "amdgpu.h"
#include "amdgpu_object.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
#include "bif/bif_4_1_d.h"
#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
......@@ -258,6 +259,13 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
{
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
/*
* Don't verify access for KFD BOs. They don't have a GEM
* object associated with them.
*/
if (abo->kfd_bo)
return 0;
if (amdgpu_ttm_tt_get_usermm(bo->ttm))
return -EPERM;
return drm_vma_node_verify_access(&abo->gem_base.vma_node,
......@@ -1171,6 +1179,23 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
{
unsigned long num_pages = bo->mem.num_pages;
struct drm_mm_node *node = bo->mem.mm_node;
struct reservation_object_list *flist;
struct dma_fence *f;
int i;
/* If bo is a KFD BO, check if the bo belongs to the current process.
* If true, then return false as any KFD process needs all its BOs to
* be resident to run successfully
*/
flist = reservation_object_get_list(bo->resv);
if (flist) {
for (i = 0; i < flist->shared_count; ++i) {
f = rcu_dereference_protected(flist->shared[i],
reservation_object_held(bo->resv));
if (amdkfd_fence_check_mm(f, current->mm))
return false;
}
}
switch (bo->mem.mem_type) {
case TTM_PL_TT:
......
......@@ -28,6 +28,7 @@
#include <linux/kfifo.h>
#include <linux/rbtree.h>
#include <drm/gpu_scheduler.h>
#include <drm/drm_file.h>
#include "amdgpu_sync.h"
#include "amdgpu_ring.h"
......
......@@ -4,6 +4,7 @@
config HSA_AMD
tristate "HSA kernel driver for AMD GPU devices"
depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64
depends on DRM_AMDGPU && X86_64
imply AMD_IOMMU_V2
help
Enable this if you want to use HSA features on AMD GPU devices.
......@@ -37,6 +37,10 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
ifneq ($(CONFIG_AMD_IOMMU_V2),)
amdkfd-y += kfd_iommu.o
endif
amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o
obj-$(CONFIG_HSA_AMD) += amdkfd.o
......@@ -901,7 +901,8 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
mutex_unlock(&p->mutex);
if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0)
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
pdd->qpd.vmid != 0)
dev->kfd2kgd->set_scratch_backing_va(
dev->kgd, args->va_addr, pdd->qpd.vmid);
......
......@@ -22,10 +22,10 @@
#include <linux/pci.h>
#include <linux/acpi.h>
#include <linux/amd-iommu.h>
#include "kfd_crat.h"
#include "kfd_priv.h"
#include "kfd_topology.h"
#include "kfd_iommu.h"
/* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
* GPU processor ID are expressed with Bit[31]=1.
......@@ -1037,15 +1037,11 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
struct crat_subtype_generic *sub_type_hdr;
struct crat_subtype_computeunit *cu;
struct kfd_cu_info cu_info;
struct amd_iommu_device_info iommu_info;
int avail_size = *size;
uint32_t total_num_of_cu;
int num_of_cache_entries = 0;
int cache_mem_filled = 0;
int ret = 0;
const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
struct kfd_local_mem_info local_mem_info;
if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU)
......@@ -1106,12 +1102,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
/* Check if this node supports IOMMU. During parsing this flag will
* translate to HSA_CAP_ATS_PRESENT
*/
iommu_info.flags = 0;
if (amd_iommu_device_info(kdev->pdev, &iommu_info) == 0) {
if ((iommu_info.flags & required_iommu_flags) ==
required_iommu_flags)
cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
}
if (!kfd_iommu_check_device(kdev))
cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
crat_table->length += sub_type_hdr->length;
crat_table->total_entries++;
......
......@@ -33,6 +33,7 @@
#include "kfd_pm4_headers_diq.h"
#include "kfd_dbgmgr.h"
#include "kfd_dbgdev.h"
#include "kfd_device_queue_manager.h"
static DEFINE_MUTEX(kfd_dbgmgr_mutex);
......@@ -83,7 +84,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
}
/* get actual type of DBGDevice cpsch or not */
if (sched_policy == KFD_SCHED_POLICY_NO_HWS)
if (pdev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
type = DBGDEV_TYPE_NODIQ;
kfd_dbgdev_init(new_buff->dbgdev, pdev, type);
......
This diff is collapsed.
......@@ -79,6 +79,10 @@ struct device_process_node {
*
* @process_termination: Clears all process queues belongs to that device.
*
* @evict_process_queues: Evict all active queues of a process
*
* @restore_process_queues: Restore all evicted queues queues of a process
*
*/
struct device_queue_manager_ops {
......@@ -129,6 +133,11 @@ struct device_queue_manager_ops {
int (*process_termination)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
int (*evict_process_queues)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
int (*restore_process_queues)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
};
struct device_queue_manager_asic_ops {
......@@ -180,12 +189,17 @@ struct device_queue_manager {
unsigned int *fence_addr;
struct kfd_mem_obj *fence_mem;
bool active_runlist;
int sched_policy;
};
void device_queue_manager_init_cik(
struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_cik_hawaii(
struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_vi(
struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_vi_tonga(
struct device_queue_manager_asic_ops *asic_ops);
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
unsigned int get_queues_num(struct device_queue_manager *dqm);
......
......@@ -34,8 +34,13 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
uint64_t alternate_aperture_size);
static int update_qpd_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static int update_qpd_cik_hawaii(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
static void init_sdma_vm_hawaii(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
void device_queue_manager_init_cik(
struct device_queue_manager_asic_ops *asic_ops)
......@@ -45,6 +50,14 @@ void device_queue_manager_init_cik(
asic_ops->init_sdma_vm = init_sdma_vm;
}
void device_queue_manager_init_cik_hawaii(
struct device_queue_manager_asic_ops *asic_ops)
{
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
asic_ops->update_qpd = update_qpd_cik_hawaii;
asic_ops->init_sdma_vm = init_sdma_vm_hawaii;
}
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
{
/* In 64-bit mode, we can only control the top 3 bits of the LDS,
......@@ -132,6 +145,36 @@ static int update_qpd_cik(struct device_queue_manager *dqm,
return 0;
}
static int update_qpd_cik_hawaii(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct kfd_process_device *pdd;
unsigned int temp;
pdd = qpd_to_pdd(qpd);
/* check if sh_mem_config register already configured */
if (qpd->sh_mem_config == 0) {
qpd->sh_mem_config =
ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
DEFAULT_MTYPE(MTYPE_NONCACHED) |
APE1_MTYPE(MTYPE_NONCACHED);
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0;
}
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
*/
temp = get_sh_mem_bases_nybble_64(pdd);
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
return 0;
}
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd)
{
......@@ -147,3 +190,16 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
q->properties.sdma_vm_addr = value;
}
static void init_sdma_vm_hawaii(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd)
{
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
*/
q->properties.sdma_vm_addr =
((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
}
......@@ -33,10 +33,21 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size);
static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size);
static int update_qpd_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static int update_qpd_vi_tonga(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
static void init_sdma_vm_tonga(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
void device_queue_manager_init_vi(
struct device_queue_manager_asic_ops *asic_ops)
......@@ -46,6 +57,14 @@ void device_queue_manager_init_vi(
asic_ops->init_sdma_vm = init_sdma_vm;
}
void device_queue_manager_init_vi_tonga(
struct device_queue_manager_asic_ops *asic_ops)
{
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga;
asic_ops->update_qpd = update_qpd_vi_tonga;
asic_ops->init_sdma_vm = init_sdma_vm_tonga;
}
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
{
/* In 64-bit mode, we can only control the top 3 bits of the LDS,
......@@ -103,6 +122,33 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
return true;
}
static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size)
{
uint32_t default_mtype;
uint32_t ape1_mtype;
default_mtype = (default_policy == cache_policy_coherent) ?
MTYPE_UC :
MTYPE_NC;
ape1_mtype = (alternate_policy == cache_policy_coherent) ?
MTYPE_UC :
MTYPE_NC;
qpd->sh_mem_config =
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
return true;
}
static int update_qpd_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
......@@ -144,6 +190,40 @@ static int update_qpd_vi(struct device_queue_manager *dqm,
return 0;
}
static int update_qpd_vi_tonga(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct kfd_process_device *pdd;
unsigned int temp;
pdd = qpd_to_pdd(qpd);
/* check if sh_mem_config register already configured */
if (qpd->sh_mem_config == 0) {
qpd->sh_mem_config =
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
MTYPE_UC <<
SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
MTYPE_UC <<
SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0;
}
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
*/
temp = get_sh_mem_bases_nybble_64(pdd);
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n",
temp, qpd->sh_mem_bases);
return 0;
}
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd)
{
......@@ -159,3 +239,16 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
q->properties.sdma_vm_addr = value;
}
static void init_sdma_vm_tonga(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd)
{
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
*/
q->properties.sdma_vm_addr =
((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
}
......@@ -30,6 +30,7 @@
#include <linux/memory.h>
#include "kfd_priv.h"
#include "kfd_events.h"
#include "kfd_iommu.h"
#include <linux/device.h>
/*
......@@ -837,6 +838,7 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
}
}
#ifdef KFD_SUPPORT_IOMMU_V2
void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
unsigned long address, bool is_write_requested,
bool is_execute_requested)
......@@ -905,6 +907,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
mutex_unlock(&p->event_mutex);
kfd_unref_process(p);
}
#endif /* KFD_SUPPORT_IOMMU_V2 */
void kfd_signal_hw_exception_event(unsigned int pasid)
{
......
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/printk.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/pci.h>
#include <linux/amd-iommu.h>
#include "kfd_priv.h"
#include "kfd_dbgmgr.h"
#include "kfd_topology.h"
#include "kfd_iommu.h"
static const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
/** kfd_iommu_check_device - Check whether IOMMU is available for device
*/
int kfd_iommu_check_device(struct kfd_dev *kfd)
{
struct amd_iommu_device_info iommu_info;
int err;
if (!kfd->device_info->needs_iommu_device)
return -ENODEV;
iommu_info.flags = 0;
err = amd_iommu_device_info(kfd->pdev, &iommu_info);
if (err)
return err;
if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags)
return -ENODEV;
return 0;
}
/** kfd_iommu_device_init - Initialize IOMMU for device
*/
int kfd_iommu_device_init(struct kfd_dev *kfd)
{
struct amd_iommu_device_info iommu_info;
unsigned int pasid_limit;
int err;
if (!kfd->device_info->needs_iommu_device)
return 0;
iommu_info.flags = 0;
err = amd_iommu_device_info(kfd->pdev, &iommu_info);
if (err < 0) {
dev_err(kfd_device,
"error getting iommu info. is the iommu enabled?\n");
return -ENODEV;
}
if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) {
dev_err(kfd_device,
"error required iommu flags ats %i, pri %i, pasid %i\n",
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP)
!= 0);
return -ENODEV;
}
pasid_limit = min_t(unsigned int,
(unsigned int)(1 << kfd->device_info->max_pasid_bits),
iommu_info.max_pasids);
if (!kfd_set_pasid_limit(pasid_limit)) {
dev_err(kfd_device, "error setting pasid limit\n");
return -EBUSY;
}
return 0;
}
/** kfd_iommu_bind_process_to_device - Have the IOMMU bind a process
*
* Binds the given process to the given device using its PASID. This
* enables IOMMUv2 address translation for the process on the device.
*
* This function assumes that the process mutex is held.
*/
int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd)
{
struct kfd_dev *dev = pdd->dev;
struct kfd_process *p = pdd->process;
int err;
if (!dev->device_info->needs_iommu_device || pdd->bound == PDD_BOUND)
return 0;
if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) {
pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n");
return -EINVAL;
}
err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread);
if (!err)
pdd->bound = PDD_BOUND;
return err;
}
/** kfd_iommu_unbind_process - Unbind process from all devices
*
* This removes all IOMMU device bindings of the process. To be used
* before process termination.
*/
void kfd_iommu_unbind_process(struct kfd_process *p)
{
struct kfd_process_device *pdd;
list_for_each_entry(pdd, &p->per_device_data, per_device_list)
if (pdd->bound == PDD_BOUND)
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
}
/* Callback for process shutdown invoked by the IOMMU driver */
static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
{
struct kfd_dev *dev = kfd_device_by_pci_dev(pdev);
struct kfd_process *p;
struct kfd_process_device *pdd;
if (!dev)
return;
/*
* Look for the process that matches the pasid. If there is no such
* process, we either released it in amdkfd's own notifier, or there
* is a bug. Unfortunately, there is no way to tell...
*/
p = kfd_lookup_process_by_pasid(pasid);
if (!p)
return;
pr_debug("Unbinding process %d from IOMMU\n", pasid);
mutex_lock(kfd_get_dbgmgr_mutex());
if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
kfd_dbgmgr_destroy(dev->dbgmgr);
dev->dbgmgr = NULL;
}
}
mutex_unlock(kfd_get_dbgmgr_mutex());
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(dev, p);
if (pdd)
/* For GPU relying on IOMMU, we need to dequeue here
* when PASID is still bound.
*/
kfd_process_dequeue_from_device(pdd);
mutex_unlock(&p->mutex);
kfd_unref_process(p);
}
/* This function called by IOMMU driver on PPR failure */
static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
unsigned long address, u16 flags)
{
struct kfd_dev *dev;
dev_warn(kfd_device,
"Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X",
PCI_BUS_NUM(pdev->devfn),
PCI_SLOT(pdev->devfn),
PCI_FUNC(pdev->devfn),
pasid,
address,
flags);
dev = kfd_device_by_pci_dev(pdev);
if (!WARN_ON(!dev))
kfd_signal_iommu_event(dev, pasid, address,
flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
return AMD_IOMMU_INV_PRI_RSP_INVALID;
}
/*
* Bind processes do the device that have been temporarily unbound
* (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device.
*/
static int kfd_bind_processes_to_device(struct kfd_dev *kfd)
{
struct kfd_process_device *pdd;
struct kfd_process *p;
unsigned int temp;
int err = 0;
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(kfd, p);
if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) {
mutex_unlock(&p->mutex);
continue;
}
err = amd_iommu_bind_pasid(kfd->pdev, p->pasid,
p->lead_thread);
if (err < 0) {
pr_err("Unexpected pasid %d binding failure\n",
p->pasid);
mutex_unlock(&p->mutex);
break;
}
pdd->bound = PDD_BOUND;
mutex_unlock(&p->mutex);
}
srcu_read_unlock(&kfd_processes_srcu, idx);
return err;
}
/*
* Mark currently bound processes as PDD_BOUND_SUSPENDED. These
* processes will be restored to PDD_BOUND state in
* kfd_bind_processes_to_device.
*/
static void kfd_unbind_processes_from_device(struct kfd_dev *kfd)
{
struct kfd_process_device *pdd;
struct kfd_process *p;
unsigned int temp;
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(kfd, p);
if (WARN_ON(!pdd)) {
mutex_unlock(&p->mutex);
continue;
}
if (pdd->bound == PDD_BOUND)
pdd->bound = PDD_BOUND_SUSPENDED;
mutex_unlock(&p->mutex);
}
srcu_read_unlock(&kfd_processes_srcu, idx);
}
/** kfd_iommu_suspend - Prepare IOMMU for suspend
*
* This unbinds processes from the device and disables the IOMMU for
* the device.
*/
void kfd_iommu_suspend(struct kfd_dev *kfd)
{
if (!kfd->device_info->needs_iommu_device)
return;
kfd_unbind_processes_from_device(kfd);
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
amd_iommu_free_device(kfd->pdev);
}
/** kfd_iommu_resume - Restore IOMMU after resume
*
* This reinitializes the IOMMU for the device and re-binds previously
* suspended processes to the device.
*/
int kfd_iommu_resume(struct kfd_dev *kfd)
{
unsigned int pasid_limit;
int err;
if (!kfd->device_info->needs_iommu_device)
return 0;
pasid_limit = kfd_get_pasid_limit();
err = amd_iommu_init_device(kfd->pdev, pasid_limit);
if (err)
return -ENXIO;
amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
iommu_pasid_shutdown_callback);
amd_iommu_set_invalid_ppr_cb(kfd->pdev,
iommu_invalid_ppr_cb);
err = kfd_bind_processes_to_device(kfd);
if (err) {
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
amd_iommu_free_device(kfd->pdev);
return err;
}
return 0;
}
extern bool amd_iommu_pc_supported(void);
extern u8 amd_iommu_pc_get_max_banks(u16 devid);
extern u8 amd_iommu_pc_get_max_counters(u16 devid);
/** kfd_iommu_add_perf_counters - Add IOMMU performance counters to topology
*/
int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev)
{
struct kfd_perf_properties *props;
if (!(kdev->node_props.capability & HSA_CAP_ATS_PRESENT))
return 0;
if (!amd_iommu_pc_supported())
return 0;
props = kfd_alloc_struct(props);
if (!props)
return -ENOMEM;
strcpy(props->block_name, "iommu");
props->max_concurrent = amd_iommu_pc_get_max_banks(0) *
amd_iommu_pc_get_max_counters(0); /* assume one iommu */
list_add_tail(&props->list, &kdev->perf_props);
return 0;
}
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef __KFD_IOMMU_H__
#define __KFD_IOMMU_H__
#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
#define KFD_SUPPORT_IOMMU_V2
int kfd_iommu_check_device(struct kfd_dev *kfd);
int kfd_iommu_device_init(struct kfd_dev *kfd);
int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd);
void kfd_iommu_unbind_process(struct kfd_process *p);
void kfd_iommu_suspend(struct kfd_dev *kfd);
int kfd_iommu_resume(struct kfd_dev *kfd);
int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev);
#else
static inline int kfd_iommu_check_device(struct kfd_dev *kfd)
{
return -ENODEV;
}
static inline int kfd_iommu_device_init(struct kfd_dev *kfd)
{
return 0;
}
static inline int kfd_iommu_bind_process_to_device(
struct kfd_process_device *pdd)
{
return 0;
}
static inline void kfd_iommu_unbind_process(struct kfd_process *p)
{
/* empty */
}
static inline void kfd_iommu_suspend(struct kfd_dev *kfd)
{
/* empty */
}
static inline int kfd_iommu_resume(struct kfd_dev *kfd)
{
return 0;
}
static inline int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev)
{
return 0;
}
#endif /* defined(CONFIG_AMD_IOMMU_V2) */
#endif /* __KFD_IOMMU_H__ */
......@@ -297,10 +297,15 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
switch (dev->device_info->asic_family) {
case CHIP_CARRIZO:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
kernel_queue_init_vi(&kq->ops_asic_specific);
break;
case CHIP_KAVERI:
case CHIP_HAWAII:
kernel_queue_init_cik(&kq->ops_asic_specific);
break;
default:
......
......@@ -43,6 +43,8 @@ static const struct kgd2kfd_calls kgd2kfd = {
.interrupt = kgd2kfd_interrupt,
.suspend = kgd2kfd_suspend,
.resume = kgd2kfd_resume,
.schedule_evict_and_restore_process =
kgd2kfd_schedule_evict_and_restore_process,
};
int sched_policy = KFD_SCHED_POLICY_HWS;
......
......@@ -29,8 +29,15 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
switch (dev->device_info->asic_family) {
case CHIP_KAVERI:
return mqd_manager_init_cik(type, dev);
case CHIP_HAWAII:
return mqd_manager_init_cik_hawaii(type, dev);
case CHIP_CARRIZO:
return mqd_manager_init_vi(type, dev);
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
return mqd_manager_init_vi_tonga(type, dev);
default:
WARN(1, "Unexpected ASIC family %u",
dev->device_info->asic_family);
......
......@@ -170,14 +170,19 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
mms);
}
static int update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
static int __update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q, unsigned int atc_bit)
{
struct cik_mqd *m;
m = get_mqd(mqd);
m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE |
DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN;
DEFAULT_MIN_AVAIL_SIZE;
m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE;
if (atc_bit) {
m->cp_hqd_pq_control |= PQ_ATC_EN;
m->cp_hqd_ib_control |= IB_ATC_EN;
}
/*
* Calculating queue size which is log base 2 of actual queue size -1
......@@ -197,11 +202,24 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
q->queue_percent > 0);
q->queue_percent > 0 &&
!q->is_evicted);
return 0;
}
static int update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
{
return __update_mqd(mm, mqd, q, 1);
}
static int update_mqd_hawaii(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
{
return __update_mqd(mm, mqd, q, 0);
}
static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
{
......@@ -228,7 +246,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
q->queue_percent > 0);
q->queue_percent > 0 &&
!q->is_evicted);
return 0;
}
......@@ -360,7 +379,8 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
q->queue_percent > 0);
q->queue_percent > 0 &&
!q->is_evicted);
return 0;
}
......@@ -441,3 +461,15 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
return mqd;
}
struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
struct kfd_dev *dev)
{
struct mqd_manager *mqd;
mqd = mqd_manager_init_cik(type, dev);
if (!mqd)
return NULL;
if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE))
mqd->update_mqd = update_mqd_hawaii;
return mqd;
}
......@@ -151,6 +151,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
m->cp_hqd_pq_doorbell_control =
q->doorbell_off <<
......@@ -196,7 +198,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
q->queue_percent > 0);
q->queue_percent > 0 &&
!q->is_evicted);
return 0;
}
......@@ -208,6 +211,12 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
return __update_mqd(mm, mqd, q, MTYPE_CC, 1);
}
static int update_mqd_tonga(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
{
return __update_mqd(mm, mqd, q, MTYPE_UC, 0);
}
static int destroy_mqd(struct mqd_manager *mm, void *mqd,
enum kfd_preempt_type type,
unsigned int timeout, uint32_t pipe_id,
......@@ -334,7 +343,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
q->queue_percent > 0);
q->queue_percent > 0 &&
!q->is_evicted);
return 0;
}
......@@ -432,3 +442,16 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
return mqd;
}
struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
struct kfd_dev *dev)
{
struct mqd_manager *mqd;
mqd = mqd_manager_init_vi(type, dev);
if (!mqd)
return NULL;
if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE))
mqd->update_mqd = update_mqd_tonga;
return mqd;
}
......@@ -158,6 +158,8 @@ struct kfd_device_info {
uint8_t num_of_watch_points;
uint16_t mqd_size_aligned;
bool supports_cwsr;
bool needs_iommu_device;
bool needs_pci_atomics;
};
struct kfd_mem_obj {
......@@ -333,7 +335,11 @@ enum kfd_queue_format {
* @is_interop: Defines if this is a interop queue. Interop queue means that
* the queue can access both graphics and compute resources.
*
* @is_active: Defines if the queue is active or not.
* @is_evicted: Defines if the queue is evicted. Only active queues
* are evicted, rendering them inactive.
*
* @is_active: Defines if the queue is active or not. @is_active and
* @is_evicted are protected by the DQM lock.
*
* @vmid: If the scheduling mode is no cp scheduling the field defines the vmid
* of the queue.
......@@ -355,6 +361,7 @@ struct queue_properties {
uint32_t __iomem *doorbell_ptr;
uint32_t doorbell_off;
bool is_interop;
bool is_evicted;
bool is_active;
/* Not relevant for user mode queues in cp scheduling */
unsigned int vmid;
......@@ -458,6 +465,7 @@ struct qcm_process_device {
unsigned int queue_count;
unsigned int vmid;
bool is_debug;
unsigned int evicted; /* eviction counter, 0=active */
/* This flag tells if we should reset all wavefronts on
* process termination
......@@ -484,6 +492,17 @@ struct qcm_process_device {
uint64_t tma_addr;
};
/* KFD Memory Eviction */
/* Approx. wait time before attempting to restore evicted BOs */
#define PROCESS_RESTORE_TIME_MS 100
/* Approx. back off time if restore fails due to lack of memory */
#define PROCESS_BACK_OFF_TIME_MS 100
/* Approx. time before evicting the process again */
#define PROCESS_ACTIVE_TIME_MS 10
int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
struct dma_fence *fence);
enum kfd_pdd_bound {
PDD_UNBOUND = 0,
......@@ -516,8 +535,8 @@ struct kfd_process_device {
uint64_t scratch_base;
uint64_t scratch_limit;
/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
enum kfd_pdd_bound bound;
/* VM context for GPUVM allocations */
void *vm;
/* Flag used to tell the pdd has dequeued from the dqm.
* This is used to prevent dev->dqm->ops.process_termination() from
......@@ -525,6 +544,9 @@ struct kfd_process_device {
* function.
*/
bool already_dequeued;
/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
enum kfd_pdd_bound bound;
};
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
......@@ -587,8 +609,30 @@ struct kfd_process {
size_t signal_mapped_size;
size_t signal_event_count;
bool signal_event_limit_reached;
/* Information used for memory eviction */
void *kgd_process_info;
/* Eviction fence that is attached to all the BOs of this process. The
* fence will be triggered during eviction and new one will be created
* during restore
*/
struct dma_fence *ef;
/* Work items for evicting and restoring BOs */
struct delayed_work eviction_work;
struct delayed_work restore_work;
/* seqno of the last scheduled eviction */
unsigned int last_eviction_seqno;
/* Approx. the last timestamp (in jiffies) when the process was
* restored after an eviction
*/
unsigned long last_restore_timestamp;
};
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
extern struct srcu_struct kfd_processes_srcu;
/**
* Ioctl function type.
*
......@@ -612,13 +656,13 @@ void kfd_process_destroy_wq(void);
struct kfd_process *kfd_create_process(struct file *filep);
struct kfd_process *kfd_get_process(const struct task_struct *);
struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
void kfd_unref_process(struct kfd_process *p);
void kfd_suspend_all_processes(void);
int kfd_resume_all_processes(void);
struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
struct kfd_process *p);
int kfd_bind_processes_to_device(struct kfd_dev *dev);
void kfd_unbind_processes_from_device(struct kfd_dev *dev);
void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid);
struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
struct kfd_process *p);
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
......@@ -705,8 +749,12 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
......@@ -795,6 +843,8 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
uint64_t *event_page_offset, uint32_t *event_slot_index);
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
void kfd_flush_tlb(struct kfd_process_device *pdd);
int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
/* Debugfs */
......
This diff is collapsed.
......@@ -208,7 +208,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
case KFD_QUEUE_TYPE_COMPUTE:
/* check if there is over subscription */
if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
if ((dev->dqm->sched_policy ==
KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) ||
(dev->dqm->queue_count >= get_queues_num(dev->dqm)))) {
pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n");
......
This diff is collapsed.
......@@ -25,7 +25,7 @@
#include <linux/types.h>
#include <linux/list.h>
#include "kfd_priv.h"
#include "kfd_crat.h"
#define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 128
......@@ -183,8 +183,4 @@ struct kfd_topology_device *kfd_create_topology_device(
struct list_head *device_list);
void kfd_release_topology_device_list(struct list_head *device_list);
extern bool amd_iommu_pc_supported(void);
extern u8 amd_iommu_pc_get_max_banks(u16 devid);
extern u8 amd_iommu_pc_get_max_counters(u16 devid);
#endif /* __KFD_TOPOLOGY_H__ */
......@@ -263,10 +263,10 @@ struct kfd_ioctl_get_tile_config_args {
};
struct kfd_ioctl_set_trap_handler_args {
uint64_t tba_addr; /* to KFD */
uint64_t tma_addr; /* to KFD */
uint32_t gpu_id; /* to KFD */
uint32_t pad;
__u64 tba_addr; /* to KFD */
__u64 tma_addr; /* to KFD */
__u32 gpu_id; /* to KFD */
__u32 pad;
};
#define AMDKFD_IOCTL_BASE 'K'
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment