Commit 9f36f9c8 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-amdkfd-next-2018-03-27' of git://people.freedesktop.org/~gabbayo/linux into drm-next

- GPUVM support for dGPUs
- KFD events support for dGPUs
- Fix live-lock situation when restoring multiple evicted processes
- Fix VM page table allocation on large-bar systems
- Fix for build failure on frv architecture

* tag 'drm-amdkfd-next-2018-03-27' of git://people.freedesktop.org/~gabbayo/linux:
  drm/amdkfd: Use ordered workqueue to restore processes
  drm/amdgpu: Fix acquiring VM on large-BAR systems
  drm/amdkfd: Add module option for testing large-BAR functionality
  drm/amdkfd: Kmap event page for dGPUs
  drm/amdkfd: Add ioctls for GPUVM memory management
  drm/amdkfd: Add TC flush on VMID deallocation for Hawaii
  drm/amdkfd: Allocate CWSR trap handler memory for dGPUs
  drm/amdkfd: Add per-process IDR for buffer handles
  drm/amdkfd: Aperture setup for dGPUs
  drm/amdkfd: Remove limit on number of GPUs
  drm/amdkfd: Populate DRM render device minor
  drm/amdkfd: Create KFD VMs on demand
  drm/amdgpu: Add kfd2kgd interface to acquire an existing VM
  drm/amdgpu: Add helper to turn an existing VM into a compute VM
  drm/amdgpu: Fix initial validation of PD BO for KFD VMs
  drm/amdgpu: Move KFD-specific fields into struct amdgpu_vm
  drm/amdkfd: fix uninitialized variable use
  drm/amdkfd: add missing include of mm.h
parents cb17aa52 1679ae8f
......@@ -26,6 +26,7 @@
#define AMDGPU_AMDKFD_H_INCLUDED
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/mmu_context.h>
#include <kgd_kfd_interface.h>
#include <drm/ttm/ttm_execbuf_util.h>
......@@ -92,27 +93,6 @@ struct amdkfd_process_info {
struct amdgpu_amdkfd_fence *eviction_fence;
};
/* struct amdkfd_vm -
* For Memory Eviction KGD requires a mechanism to keep track of all KFD BOs
* belonging to a KFD process. All the VMs belonging to the same process point
* to the same amdkfd_process_info.
*/
struct amdkfd_vm {
/* Keep base as the first parameter for pointer compatibility between
* amdkfd_vm and amdgpu_vm.
*/
struct amdgpu_vm base;
/* List node in amdkfd_process_info.vm_list_head*/
struct list_head vm_list_node;
struct amdgpu_device *adev;
/* Points to the KFD process VM info*/
struct amdkfd_process_info *process_info;
uint64_t pd_phys_addr;
};
int amdgpu_amdkfd_init(void);
void amdgpu_amdkfd_fini(void);
......@@ -165,6 +145,12 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
void **process_info,
struct dma_fence **ef);
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
struct file *filp,
void **vm, void **process_info,
struct dma_fence **ef);
void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
......
......@@ -205,6 +205,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.get_cu_info = get_cu_info,
.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
......
......@@ -165,6 +165,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.get_cu_info = get_cu_info,
.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
......
......@@ -32,6 +32,7 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
/*
* GPUVM
......@@ -2405,8 +2406,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (vm->use_cpu_for_update)
flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
else
flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
AMDGPU_GEM_CREATE_SHADOW);
flags |= AMDGPU_GEM_CREATE_SHADOW;
size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags,
......@@ -2461,6 +2461,73 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
return r;
}
/**
* amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
*
* This only works on GFX VMs that don't have any BOs added and no
* page tables allocated yet.
*
* Changes the following VM parameters:
* - use_cpu_for_update
* - pte_supports_ats
* - pasid (old PASID is released, because compute manages its own PASIDs)
*
* Reinitializes the page directory to reflect the changed ATS
* setting. May leave behind an unused shadow BO for the page
* directory when switching from SDMA updates to CPU updates.
*
* Returns 0 for success, -errno for errors.
*/
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
int r;
r = amdgpu_bo_reserve(vm->root.base.bo, true);
if (r)
return r;
/* Sanity checks */
if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) {
r = -EINVAL;
goto error;
}
/* Check if PD needs to be reinitialized and do it before
* changing any other state, in case it fails.
*/
if (pte_support_ats != vm->pte_support_ats) {
r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
adev->vm_manager.root_level,
pte_support_ats);
if (r)
goto error;
}
/* Update VM state */
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
AMDGPU_VM_USE_CPU_FOR_COMPUTE);
vm->pte_support_ats = pte_support_ats;
DRM_DEBUG_DRIVER("VM update mode is %s\n",
vm->use_cpu_for_update ? "CPU" : "SDMA");
WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
"CPU update of VM recommended only for large BAR system\n");
if (vm->pasid) {
unsigned long flags;
spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
vm->pasid = 0;
}
error:
amdgpu_bo_unreserve(vm->root.base.bo);
return r;
}
/**
* amdgpu_vm_free_levels - free PD/PT levels
*
......@@ -2508,6 +2575,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
u64 fault;
int i, r;
amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
/* Clear pending page faults from IH when the VM is destroyed */
while (kfifo_get(&vm->faults, &fault))
amdgpu_ih_clear_fault(adev, fault);
......
......@@ -207,6 +207,15 @@ struct amdgpu_vm {
/* Limit non-retry fault storms */
unsigned int fault_credit;
/* Points to the KFD process VM info */
struct amdkfd_process_info *process_info;
/* List node in amdkfd_process_info.vm_list_head */
struct list_head vm_list_node;
/* Valid while the PD is reserved or fenced */
uint64_t pd_phys_addr;
};
struct amdgpu_vm_manager {
......@@ -251,6 +260,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev);
void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int vm_context, unsigned int pasid);
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
unsigned int pasid);
......
This diff is collapsed.
......@@ -882,7 +882,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
crat_table->length = sizeof(struct crat_header);
status = acpi_get_table("DSDT", 0, &acpi_table);
if (status == AE_NOT_FOUND)
if (status != AE_OK)
pr_warn("DSDT table not found for OEM information\n");
else {
crat_table->oem_revision = acpi_table->revision;
......@@ -1117,6 +1117,9 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
sub_type_hdr->length);
if (debug_largebar)
local_mem_info.local_mem_size_private = 0;
if (local_mem_info.local_mem_size_private == 0)
ret = kfd_fill_gpu_memory_affinity(&avail_size,
kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC,
......
......@@ -142,12 +142,31 @@ static int allocate_vmid(struct device_queue_manager *dqm,
return 0;
}
static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
struct qcm_process_device *qpd)
{
uint32_t len;
if (!qpd->ib_kaddr)
return -ENOMEM;
len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len);
}
static void deallocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
{
int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
/* On GFX v7, CP doesn't flush TC at dequeue */
if (q->device->device_info->asic_family == CHIP_HAWAII)
if (flush_texture_cache_nocpsch(q->device, qpd))
pr_err("Failed to flush TC\n");
kfd_flush_tlb(qpd_to_pdd(qpd));
/* Release the vmid mapping */
......@@ -792,11 +811,12 @@ static void uninitialize(struct device_queue_manager *dqm)
static int start_nocpsch(struct device_queue_manager *dqm)
{
init_interrupts(dqm);
return 0;
return pm_init(&dqm->packets, dqm);
}
static int stop_nocpsch(struct device_queue_manager *dqm)
{
pm_uninit(&dqm->packets);
return 0;
}
......
......@@ -52,6 +52,7 @@ struct kfd_event_waiter {
struct kfd_signal_page {
uint64_t *kernel_address;
uint64_t __user *user_address;
bool need_to_free_pages;
};
......@@ -79,6 +80,7 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
KFD_SIGNAL_EVENT_LIMIT * 8);
page->kernel_address = backing_store;
page->need_to_free_pages = true;
pr_debug("Allocated new event signal page at %p, for process %p\n",
page, p);
......@@ -269,8 +271,9 @@ static void shutdown_signal_page(struct kfd_process *p)
struct kfd_signal_page *page = p->signal_page;
if (page) {
free_pages((unsigned long)page->kernel_address,
get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
if (page->need_to_free_pages)
free_pages((unsigned long)page->kernel_address,
get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
kfree(page);
}
}
......@@ -292,6 +295,30 @@ static bool event_can_be_cpu_signaled(const struct kfd_event *ev)
return ev->type == KFD_EVENT_TYPE_SIGNAL;
}
int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
uint64_t size)
{
struct kfd_signal_page *page;
if (p->signal_page)
return -EBUSY;
page = kzalloc(sizeof(*page), GFP_KERNEL);
if (!page)
return -ENOMEM;
/* Initialize all events to unsignaled */
memset(kernel_address, (uint8_t) UNSIGNALED_EVENT_SLOT,
KFD_SIGNAL_EVENT_LIMIT * 8);
page->kernel_address = kernel_address;
p->signal_page = page;
p->signal_mapped_size = size;
return 0;
}
int kfd_event_create(struct file *devkfd, struct kfd_process *p,
uint32_t event_type, bool auto_reset, uint32_t node_id,
uint32_t *event_id, uint32_t *event_trigger_data,
......
......@@ -278,21 +278,28 @@
#define MAKE_GPUVM_APP_BASE(gpu_num) \
(((uint64_t)(gpu_num) << 61) + 0x1000000000000L)
#define MAKE_GPUVM_APP_LIMIT(base) \
(((uint64_t)(base) & \
0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL)
#define MAKE_GPUVM_APP_LIMIT(base, size) \
(((uint64_t)(base) & 0xFFFFFF0000000000UL) + (size) - 1)
#define MAKE_SCRATCH_APP_BASE(gpu_num) \
(((uint64_t)(gpu_num) << 61) + 0x100000000L)
#define MAKE_SCRATCH_APP_BASE() \
(((uint64_t)(0x1UL) << 61) + 0x100000000L)
#define MAKE_SCRATCH_APP_LIMIT(base) \
(((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
#define MAKE_LDS_APP_BASE(gpu_num) \
(((uint64_t)(gpu_num) << 61) + 0x0)
#define MAKE_LDS_APP_BASE() \
(((uint64_t)(0x1UL) << 61) + 0x0)
#define MAKE_LDS_APP_LIMIT(base) \
(((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
/* User mode manages most of the SVM aperture address space. The low
* 16MB are reserved for kernel use (CWSR trap handler and kernel IB
* for now).
*/
#define SVM_USER_BASE 0x1000000ull
#define SVM_CWSR_BASE (SVM_USER_BASE - KFD_CWSR_TBA_TMA_SIZE)
#define SVM_IB_BASE (SVM_CWSR_BASE - PAGE_SIZE)
int kfd_init_apertures(struct kfd_process *process)
{
uint8_t id = 0;
......@@ -314,7 +321,7 @@ int kfd_init_apertures(struct kfd_process *process)
return -1;
}
/*
* For 64 bit process aperture will be statically reserved in
* For 64 bit process apertures will be statically reserved in
* the x86_64 non canonical process address space
* amdkfd doesn't currently support apertures for 32 bit process
*/
......@@ -323,23 +330,35 @@ int kfd_init_apertures(struct kfd_process *process)
pdd->gpuvm_base = pdd->gpuvm_limit = 0;
pdd->scratch_base = pdd->scratch_limit = 0;
} else {
/*
* node id couldn't be 0 - the three MSB bits of
* aperture shoudn't be 0
/* Same LDS and scratch apertures can be used
* on all GPUs. This allows using more dGPUs
* than placement options for apertures.
*/
pdd->lds_base = MAKE_LDS_APP_BASE(id + 1);
pdd->lds_base = MAKE_LDS_APP_BASE();
pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1);
pdd->gpuvm_limit =
MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base);
pdd->scratch_base = MAKE_SCRATCH_APP_BASE(id + 1);
pdd->scratch_base = MAKE_SCRATCH_APP_BASE();
pdd->scratch_limit =
MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
if (dev->device_info->needs_iommu_device) {
/* APUs: GPUVM aperture in
* non-canonical address space
*/
pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1);
pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(
pdd->gpuvm_base,
dev->shared_resources.gpuvm_size);
} else {
/* dGPUs: SVM aperture starting at 0
* with small reserved space for kernel
*/
pdd->gpuvm_base = SVM_USER_BASE;
pdd->gpuvm_limit =
dev->shared_resources.gpuvm_size - 1;
pdd->qpd.cwsr_base = SVM_CWSR_BASE;
pdd->qpd.ib_base = SVM_IB_BASE;
}
}
dev_dbg(kfd_device, "node id %u\n", id);
......
......@@ -71,6 +71,11 @@ module_param(send_sigterm, int, 0444);
MODULE_PARM_DESC(send_sigterm,
"Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
int debug_largebar;
module_param(debug_largebar, int, 0444);
MODULE_PARM_DESC(debug_largebar,
"Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)");
int ignore_crat;
module_param(ignore_crat, int, 0444);
MODULE_PARM_DESC(ignore_crat,
......@@ -128,7 +133,9 @@ static int __init kfd_module_init(void)
if (err < 0)
goto err_topology;
kfd_process_create_wq();
err = kfd_process_create_wq();
if (err < 0)
goto err_create_wq;
kfd_debugfs_init();
......@@ -138,6 +145,8 @@ static int __init kfd_module_init(void)
return 0;
err_create_wq:
kfd_topology_shutdown();
err_topology:
kfd_chardev_exit();
err_ioctl:
......
......@@ -356,6 +356,43 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
return retval;
}
/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size
* of this packet
* @gpu_addr - GPU address of the packet. It's a virtual address.
* @buffer - buffer to fill up with the packet. It's a CPU kernel pointer
* Return - length of the packet
*/
uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer)
{
struct pm4_mec_release_mem *packet;
WARN_ON(!buffer);
packet = (struct pm4_mec_release_mem *)buffer;
memset(buffer, 0, sizeof(*packet));
packet->header.u32All = build_pm4_header(IT_RELEASE_MEM,
sizeof(*packet));
packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
packet->bitfields2.tcl1_action_ena = 1;
packet->bitfields2.tc_action_ena = 1;
packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
packet->bitfields2.atc = 0;
packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
packet->bitfields3.int_sel =
int_sel___release_mem__send_interrupt_after_write_confirm;
packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
packet->address_hi = upper_32_bits(gpu_addr);
packet->data_lo = 0;
return sizeof(*packet) / sizeof(unsigned int);
}
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
{
pm->dqm = dqm;
......
......@@ -104,6 +104,12 @@ extern int cwsr_enable;
*/
extern int send_sigterm;
/*
* This kernel module is used to simulate large bar machine on non-large bar
* enabled machines.
*/
extern int debug_largebar;
/*
* Ignore CRAT table during KFD initialization, can be used to work around
* broken CRAT tables on some AMD systems
......@@ -488,8 +494,13 @@ struct qcm_process_device {
/* CWSR memory */
void *cwsr_kaddr;
uint64_t cwsr_base;
uint64_t tba_addr;
uint64_t tma_addr;
/* IB memory */
uint64_t ib_base;
void *ib_kaddr;
};
/* KFD Memory Eviction */
......@@ -504,6 +515,14 @@ struct qcm_process_device {
int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
struct dma_fence *fence);
/* 8 byte handle containing GPU ID in the most significant 4 bytes and
* idr_handle in the least significant 4 bytes
*/
#define MAKE_HANDLE(gpu_id, idr_handle) \
(((uint64_t)(gpu_id) << 32) + idr_handle)
#define GET_GPU_ID(handle) (handle >> 32)
#define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF)
enum kfd_pdd_bound {
PDD_UNBOUND = 0,
PDD_BOUND,
......@@ -536,8 +555,12 @@ struct kfd_process_device {
uint64_t scratch_limit;
/* VM context for GPUVM allocations */
struct file *drm_file;
void *vm;
/* GPUVM allocations storage */
struct idr alloc_idr;
/* Flag used to tell the pdd has dequeued from the dqm.
* This is used to prevent dev->dqm->ops.process_termination() from
* being called twice when it is already called in IOMMU callback
......@@ -651,7 +674,7 @@ struct amdkfd_ioctl_desc {
const char *name;
};
void kfd_process_create_wq(void);
int kfd_process_create_wq(void);
void kfd_process_destroy_wq(void);
struct kfd_process *kfd_create_process(struct file *filep);
struct kfd_process *kfd_get_process(const struct task_struct *);
......@@ -661,6 +684,8 @@ void kfd_unref_process(struct kfd_process *p);
void kfd_suspend_all_processes(void);
int kfd_resume_all_processes(void);
int kfd_process_device_init_vm(struct kfd_process_device *pdd,
struct file *drm_file);
struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
struct kfd_process *p);
struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
......@@ -671,6 +696,14 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
int kfd_reserved_mem_mmap(struct kfd_process *process,
struct vm_area_struct *vma);
/* KFD process API for creating and translating handles */
int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
void *mem);
void *kfd_process_device_translate_handle(struct kfd_process_device *p,
int handle);
void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
int handle);
/* Process device data iterator */
struct kfd_process_device *kfd_get_first_process_device_data(
struct kfd_process *p);
......@@ -816,6 +849,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
void pm_release_ib(struct packet_manager *pm);
uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer);
uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
/* Events */
......@@ -837,6 +872,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev,
void kfd_signal_hw_exception_event(unsigned int pasid);
int kfd_set_event(struct kfd_process *p, uint32_t event_id);
int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
uint64_t size);
int kfd_event_create(struct file *devkfd, struct kfd_process *p,
uint32_t event_type, bool auto_reset, uint32_t node_id,
uint32_t *event_id, uint32_t *event_trigger_data,
......
This diff is collapsed.
......@@ -441,6 +441,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.device_id);
sysfs_show_32bit_prop(buffer, "location_id",
dev->node_props.location_id);
sysfs_show_32bit_prop(buffer, "drm_render_minor",
dev->node_props.drm_render_minor);
if (dev->gpu) {
log_max_watch_addr =
......@@ -1214,6 +1216,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(dev->gpu->kgd);
dev->node_props.max_engine_clk_ccompute =
cpufreq_quick_get_max(0) / 1000;
dev->node_props.drm_render_minor =
gpu->shared_resources.drm_render_minor;
kfd_fill_mem_clk_max_info(dev);
kfd_fill_iolink_non_crat_info(dev);
......
......@@ -71,6 +71,7 @@ struct kfd_node_properties {
uint32_t location_id;
uint32_t max_engine_clk_fcompute;
uint32_t max_engine_clk_ccompute;
int32_t drm_render_minor;
uint16_t marketing_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
};
......
......@@ -130,6 +130,7 @@ struct tile_config {
/*
* Allocation flag domains
* NOTE: This must match the corresponding definitions in kfd_ioctl.h.
*/
#define ALLOC_MEM_FLAGS_VRAM (1 << 0)
#define ALLOC_MEM_FLAGS_GTT (1 << 1)
......@@ -138,6 +139,7 @@ struct tile_config {
/*
* Allocation flags attributes/access options.
* NOTE: This must match the corresponding definitions in kfd_ioctl.h.
*/
#define ALLOC_MEM_FLAGS_WRITABLE (1 << 31)
#define ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30)
......@@ -336,6 +338,8 @@ struct kfd2kgd_calls {
int (*create_process_vm)(struct kgd_dev *kgd, void **vm,
void **process_info, struct dma_fence **ef);
int (*acquire_process_vm)(struct kgd_dev *kgd, struct file *filp,
void **vm, void **process_info, struct dma_fence **ef);
void (*destroy_process_vm)(struct kgd_dev *kgd, void *vm);
uint32_t (*get_process_page_dir)(void *vm);
void (*set_vm_context_page_table_base)(struct kgd_dev *kgd,
......
......@@ -107,8 +107,6 @@ struct kfd_ioctl_get_clock_counters_args {
__u32 pad;
};
#define NUM_OF_SUPPORTED_GPUS 7
struct kfd_process_device_apertures {
__u64 lds_base; /* from KFD */
__u64 lds_limit; /* from KFD */
......@@ -120,6 +118,12 @@ struct kfd_process_device_apertures {
__u32 pad;
};
/*
* AMDKFD_IOC_GET_PROCESS_APERTURES is deprecated. Use
* AMDKFD_IOC_GET_PROCESS_APERTURES_NEW instead, which supports an
* unlimited number of GPUs.
*/
#define NUM_OF_SUPPORTED_GPUS 7
struct kfd_ioctl_get_process_apertures_args {
struct kfd_process_device_apertures
process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
......@@ -129,6 +133,19 @@ struct kfd_ioctl_get_process_apertures_args {
__u32 pad;
};
struct kfd_ioctl_get_process_apertures_new_args {
/* User allocated. Pointer to struct kfd_process_device_apertures
* filled in by Kernel
*/
__u64 kfd_process_device_apertures_ptr;
/* to KFD - indicates amount of memory present in
* kfd_process_device_apertures_ptr
* from KFD - Number of entries filled by KFD.
*/
__u32 num_of_nodes;
__u32 pad;
};
#define MAX_ALLOWED_NUM_POINTS 100
#define MAX_ALLOWED_AW_BUFF_SIZE 4096
#define MAX_ALLOWED_WAC_BUFF_SIZE 128
......@@ -269,6 +286,86 @@ struct kfd_ioctl_set_trap_handler_args {
__u32 pad;
};
struct kfd_ioctl_acquire_vm_args {
__u32 drm_fd; /* to KFD */
__u32 gpu_id; /* to KFD */
};
/* Allocation flags: memory types */
#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM (1 << 0)
#define KFD_IOC_ALLOC_MEM_FLAGS_GTT (1 << 1)
#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR (1 << 2)
#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL (1 << 3)
/* Allocation flags: attributes/access options */
#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE (1 << 31)
#define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30)
#define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC (1 << 29)
#define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28)
#define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27)
#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT (1 << 26)
/* Allocate memory for later SVM (shared virtual memory) mapping.
*
* @va_addr: virtual address of the memory to be allocated
* all later mappings on all GPUs will use this address
* @size: size in bytes
* @handle: buffer handle returned to user mode, used to refer to
* this allocation for mapping, unmapping and freeing
* @mmap_offset: for CPU-mapping the allocation by mmapping a render node
* for userptrs this is overloaded to specify the CPU address
* @gpu_id: device identifier
* @flags: memory type and attributes. See KFD_IOC_ALLOC_MEM_FLAGS above
*/
struct kfd_ioctl_alloc_memory_of_gpu_args {
__u64 va_addr; /* to KFD */
__u64 size; /* to KFD */
__u64 handle; /* from KFD */
__u64 mmap_offset; /* to KFD (userptr), from KFD (mmap offset) */
__u32 gpu_id; /* to KFD */
__u32 flags;
};
/* Free memory allocated with kfd_ioctl_alloc_memory_of_gpu
*
* @handle: memory handle returned by alloc
*/
struct kfd_ioctl_free_memory_of_gpu_args {
__u64 handle; /* to KFD */
};
/* Map memory to one or more GPUs
*
* @handle: memory handle returned by alloc
* @device_ids_array_ptr: array of gpu_ids (__u32 per device)
* @n_devices: number of devices in the array
* @n_success: number of devices mapped successfully
*
* @n_success returns information to the caller how many devices from
* the start of the array have mapped the buffer successfully. It can
* be passed into a subsequent retry call to skip those devices. For
* the first call the caller should initialize it to 0.
*
* If the ioctl completes with return code 0 (success), n_success ==
* n_devices.
*/
struct kfd_ioctl_map_memory_to_gpu_args {
__u64 handle; /* to KFD */
__u64 device_ids_array_ptr; /* to KFD */
__u32 n_devices; /* to KFD */
__u32 n_success; /* to/from KFD */
};
/* Unmap memory from one or more GPUs
*
* same arguments as for mapping
*/
struct kfd_ioctl_unmap_memory_from_gpu_args {
__u64 handle; /* to KFD */
__u64 device_ids_array_ptr; /* to KFD */
__u32 n_devices; /* to KFD */
__u32 n_success; /* to/from KFD */
};
#define AMDKFD_IOCTL_BASE 'K'
#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr)
#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type)
......@@ -332,7 +429,26 @@ struct kfd_ioctl_set_trap_handler_args {
#define AMDKFD_IOC_SET_TRAP_HANDLER \
AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args)
#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW \
AMDKFD_IOWR(0x14, \
struct kfd_ioctl_get_process_apertures_new_args)
#define AMDKFD_IOC_ACQUIRE_VM \
AMDKFD_IOW(0x15, struct kfd_ioctl_acquire_vm_args)
#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU \
AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_gpu_args)
#define AMDKFD_IOC_FREE_MEMORY_OF_GPU \
AMDKFD_IOW(0x17, struct kfd_ioctl_free_memory_of_gpu_args)
#define AMDKFD_IOC_MAP_MEMORY_TO_GPU \
AMDKFD_IOWR(0x18, struct kfd_ioctl_map_memory_to_gpu_args)
#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU \
AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args)
#define AMDKFD_COMMAND_START 0x01
#define AMDKFD_COMMAND_END 0x14
#define AMDKFD_COMMAND_END 0x1A
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment