Commit c76f0b2c authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-amdkfd-next-2018-05-14' of git://people.freedesktop.org/~gabbayo/linux into drm-next

This is amdkfd pull for 4.18. The major new features are:

- Add support for GFXv9 dGPUs (VEGA)
- Add support for userptr memory mapping

In addition, there are a couple of small fixes and improvements, such as:
- Fix lock handling
- Fix rollback packet in kernel kfd_queue
- Optimize kfd signal handling
- Fix CP hang in APU
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180514070126.GA1827@odedg-x270
parents 444ac87b af47b390
......@@ -767,12 +767,14 @@ F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
F: drivers/gpu/drm/amd/amdkfd/
F: drivers/gpu/drm/amd/include/cik_structs.h
F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h
F: drivers/gpu/drm/amd/include/vi_structs.h
F: drivers/gpu/drm/amd/include/v9_structs.h
F: include/uapi/linux/kfd_ioctl.h
AMD SEATTLE DEVICE TREE SUPPORT
......
......@@ -130,7 +130,8 @@ amdgpu-y += \
amdgpu_amdkfd.o \
amdgpu_amdkfd_fence.o \
amdgpu_amdkfd_gpuvm.o \
amdgpu_amdkfd_gfx_v8.o
amdgpu_amdkfd_gfx_v8.o \
amdgpu_amdkfd_gfx_v9.o
# add cgs
amdgpu-y += amdgpu_cgs.o
......
......@@ -92,6 +92,10 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
case CHIP_POLARIS11:
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
break;
case CHIP_VEGA10:
case CHIP_RAVEN:
kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
break;
default:
dev_dbg(adev->dev, "kfd not supported on this ASIC\n");
return;
......@@ -175,6 +179,28 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
&gpu_resources.doorbell_physical_address,
&gpu_resources.doorbell_aperture_size,
&gpu_resources.doorbell_start_offset);
if (adev->asic_type >= CHIP_VEGA10) {
/* On SOC15 the BIF is involved in routing
* doorbells using the low 12 bits of the
* address. Communicate the assignments to
* KFD. KFD uses two doorbell pages per
* process in case of 64-bit doorbells so we
* can use each doorbell assignment twice.
*/
gpu_resources.sdma_doorbell[0][0] =
AMDGPU_DOORBELL64_sDMA_ENGINE0;
gpu_resources.sdma_doorbell[0][1] =
AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200;
gpu_resources.sdma_doorbell[1][0] =
AMDGPU_DOORBELL64_sDMA_ENGINE1;
gpu_resources.sdma_doorbell[1][1] =
AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200;
/* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for
* SDMA, IH and VCN. So don't use them for the CP.
*/
gpu_resources.reserved_doorbell_mask = 0x1f0;
gpu_resources.reserved_doorbell_val = 0x0f0;
}
kgd2kfd->device_init(adev->kfd, &gpu_resources);
}
......
......@@ -28,6 +28,7 @@
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/mmu_context.h>
#include <linux/workqueue.h>
#include <kgd_kfd_interface.h>
#include <drm/ttm/ttm_execbuf_util.h>
#include "amdgpu_sync.h"
......@@ -59,7 +60,9 @@ struct kgd_mem {
uint32_t mapping_flags;
atomic_t invalid;
struct amdkfd_process_info *process_info;
struct page **user_pages;
struct amdgpu_sync sync;
......@@ -84,6 +87,9 @@ struct amdkfd_process_info {
struct list_head vm_list_head;
/* List head for all KFD BOs that belong to a KFD process. */
struct list_head kfd_bo_list;
/* List of userptr BOs that are valid or invalid */
struct list_head userptr_valid_list;
struct list_head userptr_inval_list;
/* Lock to protect kfd_bo_list */
struct mutex lock;
......@@ -91,6 +97,11 @@ struct amdkfd_process_info {
unsigned int n_vms;
/* Eviction Fence */
struct amdgpu_amdkfd_fence *eviction_fence;
/* MMU-notifier related fields */
atomic_t evicted_bos;
struct delayed_work restore_userptr_work;
struct pid *pid;
};
int amdgpu_amdkfd_init(void);
......@@ -104,12 +115,14 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
......
......@@ -98,8 +98,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
unsigned int vmid);
static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr);
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
......@@ -183,7 +181,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
.free_pasid = amdgpu_pasid_free,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
.init_pipeline = kgd_init_pipeline,
.init_interrupts = kgd_init_interrupts,
.hqd_load = kgd_hqd_load,
.hqd_sdma_load = kgd_hqd_sdma_load,
......@@ -309,13 +306,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
return 0;
}
static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr)
{
/* amdgpu owns the per-pipe state */
return 0;
}
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
......
......@@ -57,8 +57,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
uint32_t sh_mem_bases);
static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
unsigned int vmid);
static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr);
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
......@@ -141,7 +139,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
.free_pasid = amdgpu_pasid_free,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
.init_pipeline = kgd_init_pipeline,
.init_interrupts = kgd_init_interrupts,
.hqd_load = kgd_hqd_load,
.hqd_sdma_load = kgd_hqd_sdma_load,
......@@ -270,13 +267,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
return 0;
}
static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr)
{
/* amdgpu owns the per-pipe state */
return 0;
}
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
......
This diff is collapsed.
......@@ -536,7 +536,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
if (p->bo_list) {
amdgpu_bo_list_get_list(p->bo_list, &p->validated);
if (p->bo_list->first_userptr != p->bo_list->num_entries)
p->mn = amdgpu_mn_get(p->adev);
p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
}
INIT_LIST_HEAD(&duplicates);
......
......@@ -36,12 +36,14 @@
#include <drm/drm.h>
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
struct amdgpu_mn {
/* constant after initialisation */
struct amdgpu_device *adev;
struct mm_struct *mm;
struct mmu_notifier mn;
enum amdgpu_mn_type type;
/* only used on destruction */
struct work_struct work;
......@@ -185,7 +187,7 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
}
/**
* amdgpu_mn_invalidate_range_start - callback to notify about mm change
* amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
*
* @mn: our notifier
* @mn: the mm this callback is about
......@@ -195,10 +197,10 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
* We block for all BOs between start and end to be idle and
* unmap them by move them into system domain again.
*/
static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
unsigned long end)
static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
unsigned long end)
{
struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
struct interval_tree_node *it;
......@@ -219,6 +221,49 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
}
}
/**
* amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
*
* @mn: our notifier
* @mn: the mm this callback is about
* @start: start of updated range
* @end: end of updated range
*
* We temporarily evict all BOs between start and end. This
* necessitates evicting all user-mode queues of the process. The BOs
* are restorted in amdgpu_mn_invalidate_range_end_hsa.
*/
static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
unsigned long end)
{
struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
struct interval_tree_node *it;
/* notification is exclusive, but interval is inclusive */
end -= 1;
amdgpu_mn_read_lock(rmn);
it = interval_tree_iter_first(&rmn->objects, start, end);
while (it) {
struct amdgpu_mn_node *node;
struct amdgpu_bo *bo;
node = container_of(it, struct amdgpu_mn_node, it);
it = interval_tree_iter_next(it, start, end);
list_for_each_entry(bo, &node->bos, mn_list) {
struct kgd_mem *mem = bo->kfd_bo;
if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
start, end))
amdgpu_amdkfd_evict_userptr(mem, mm);
}
}
}
/**
* amdgpu_mn_invalidate_range_end - callback to notify about mm change
*
......@@ -239,23 +284,39 @@ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
amdgpu_mn_read_unlock(rmn);
}
static const struct mmu_notifier_ops amdgpu_mn_ops = {
.release = amdgpu_mn_release,
.invalidate_range_start = amdgpu_mn_invalidate_range_start,
.invalidate_range_end = amdgpu_mn_invalidate_range_end,
static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
[AMDGPU_MN_TYPE_GFX] = {
.release = amdgpu_mn_release,
.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
.invalidate_range_end = amdgpu_mn_invalidate_range_end,
},
[AMDGPU_MN_TYPE_HSA] = {
.release = amdgpu_mn_release,
.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
.invalidate_range_end = amdgpu_mn_invalidate_range_end,
},
};
/* Low bits of any reasonable mm pointer will be unused due to struct
* alignment. Use these bits to make a unique key from the mm pointer
* and notifier type.
*/
#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
/**
* amdgpu_mn_get - create notifier context
*
* @adev: amdgpu device pointer
* @type: type of MMU notifier context
*
* Creates a notifier context for current->mm.
*/
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
enum amdgpu_mn_type type)
{
struct mm_struct *mm = current->mm;
struct amdgpu_mn *rmn;
unsigned long key = AMDGPU_MN_KEY(mm, type);
int r;
mutex_lock(&adev->mn_lock);
......@@ -264,8 +325,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
return ERR_PTR(-EINTR);
}
hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm)
if (rmn->mm == mm)
hash_for_each_possible(adev->mn_hash, rmn, node, key)
if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key)
goto release_locks;
rmn = kzalloc(sizeof(*rmn), GFP_KERNEL);
......@@ -276,8 +337,9 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
rmn->adev = adev;
rmn->mm = mm;
rmn->mn.ops = &amdgpu_mn_ops;
init_rwsem(&rmn->lock);
rmn->type = type;
rmn->mn.ops = &amdgpu_mn_ops[type];
rmn->objects = RB_ROOT_CACHED;
mutex_init(&rmn->read_lock);
atomic_set(&rmn->recursion, 0);
......@@ -286,7 +348,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
if (r)
goto free_rmn;
hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm);
hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type));
release_locks:
up_write(&mm->mmap_sem);
......@@ -315,15 +377,21 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
{
unsigned long end = addr + amdgpu_bo_size(bo) - 1;
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
enum amdgpu_mn_type type =
bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
struct amdgpu_mn *rmn;
struct amdgpu_mn_node *node = NULL;
struct amdgpu_mn_node *node = NULL, *new_node;
struct list_head bos;
struct interval_tree_node *it;
rmn = amdgpu_mn_get(adev);
rmn = amdgpu_mn_get(adev, type);
if (IS_ERR(rmn))
return PTR_ERR(rmn);
new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
if (!new_node)
return -ENOMEM;
INIT_LIST_HEAD(&bos);
down_write(&rmn->lock);
......@@ -337,13 +405,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
list_splice(&node->bos, &bos);
}
if (!node) {
node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
if (!node) {
up_write(&rmn->lock);
return -ENOMEM;
}
}
if (!node)
node = new_node;
else
kfree(new_node);
bo->mn = rmn;
......
......@@ -29,16 +29,23 @@
*/
struct amdgpu_mn;
enum amdgpu_mn_type {
AMDGPU_MN_TYPE_GFX,
AMDGPU_MN_TYPE_HSA,
};
#if defined(CONFIG_MMU_NOTIFIER)
void amdgpu_mn_lock(struct amdgpu_mn *mn);
void amdgpu_mn_unlock(struct amdgpu_mn *mn);
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev);
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
enum amdgpu_mn_type type);
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
#else
static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
enum amdgpu_mn_type type)
{
return NULL;
}
......
......@@ -695,7 +695,7 @@ struct amdgpu_ttm_tt {
struct ttm_dma_tt ttm;
u64 offset;
uint64_t userptr;
struct mm_struct *usermm;
struct task_struct *usertask;
uint32_t userflags;
spinlock_t guptasklock;
struct list_head guptasks;
......@@ -706,14 +706,18 @@ struct amdgpu_ttm_tt {
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
struct mm_struct *mm = gtt->usertask->mm;
unsigned int flags = 0;
unsigned pinned = 0;
int r;
if (!mm) /* Happens during process shutdown */
return -ESRCH;
if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
flags |= FOLL_WRITE;
down_read(&current->mm->mmap_sem);
down_read(&mm->mmap_sem);
if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
/* check that we only use anonymous memory
......@@ -721,9 +725,9 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
struct vm_area_struct *vma;
vma = find_vma(gtt->usermm, gtt->userptr);
vma = find_vma(mm, gtt->userptr);
if (!vma || vma->vm_file || vma->vm_end < end) {
up_read(&current->mm->mmap_sem);
up_read(&mm->mmap_sem);
return -EPERM;
}
}
......@@ -739,7 +743,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
list_add(&guptask.list, &gtt->guptasks);
spin_unlock(&gtt->guptasklock);
r = get_user_pages(userptr, num_pages, flags, p, NULL);
if (mm == current->mm)
r = get_user_pages(userptr, num_pages, flags, p, NULL);
else
r = get_user_pages_remote(gtt->usertask,
mm, userptr, num_pages,
flags, p, NULL, NULL);
spin_lock(&gtt->guptasklock);
list_del(&guptask.list);
......@@ -752,12 +761,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
} while (pinned < ttm->num_pages);
up_read(&current->mm->mmap_sem);
up_read(&mm->mmap_sem);
return 0;
release_pages:
release_pages(pages, pinned);
up_read(&current->mm->mmap_sem);
up_read(&mm->mmap_sem);
return r;
}
......@@ -978,6 +987,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
if (gtt->usertask)
put_task_struct(gtt->usertask);
ttm_dma_tt_fini(&gtt->ttm);
kfree(gtt);
}
......@@ -1079,8 +1091,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
return -EINVAL;
gtt->userptr = addr;
gtt->usermm = current->mm;
gtt->userflags = flags;
if (gtt->usertask)
put_task_struct(gtt->usertask);
gtt->usertask = current->group_leader;
get_task_struct(gtt->usertask);
spin_lock_init(&gtt->guptasklock);
INIT_LIST_HEAD(&gtt->guptasks);
atomic_set(&gtt->mmu_invalidations, 0);
......@@ -1096,7 +1113,10 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
if (gtt == NULL)
return NULL;
return gtt->usermm;
if (gtt->usertask == NULL)
return NULL;
return gtt->usertask->mm;
}
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
......
......@@ -4686,6 +4686,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
cu_info->number = active_cu_number;
cu_info->ao_cu_mask = ao_cu_mask;
cu_info->simd_per_cu = NUM_SIMD_PER_CU;
return 0;
}
......
......@@ -268,6 +268,11 @@
* x=1: tmz_end
*/
#define PACKET3_INVALIDATE_TLBS 0x98
# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29)
#define PACKET3_SET_RESOURCES 0xA0
/* 1. header
* 2. CONTROL
......
......@@ -30,12 +30,14 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
kfd_process.o kfd_queue.o kfd_mqd_manager.o \
kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \
kfd_mqd_manager_v9.o \
kfd_kernel_queue.o kfd_kernel_queue_cik.o \
kfd_kernel_queue_vi.o kfd_packet_manager.o \
kfd_process_queue_manager.o kfd_device_queue_manager.o \
kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
kfd_kernel_queue_vi.o kfd_kernel_queue_v9.o \
kfd_packet_manager.o kfd_process_queue_manager.o \
kfd_device_queue_manager.o kfd_device_queue_manager_cik.o \
kfd_device_queue_manager_vi.o kfd_device_queue_manager_v9.o \
kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
kfd_int_process_v9.o kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
ifneq ($(CONFIG_AMD_IOMMU_V2),)
amdkfd-y += kfd_iommu.o
......
......@@ -27,18 +27,28 @@
static bool cik_event_interrupt_isr(struct kfd_dev *dev,
const uint32_t *ih_ring_entry)
{
unsigned int pasid;
const struct cik_ih_ring_entry *ihre =
(const struct cik_ih_ring_entry *)ih_ring_entry;
unsigned int vmid, pasid;
/* Only handle interrupts from KFD VMIDs */
vmid = (ihre->ring_id & 0x0000ff00) >> 8;
if (vmid < dev->vm_info.first_vmid_kfd ||
vmid > dev->vm_info.last_vmid_kfd)
return 0;
/* If there is no valid PASID, it's likely a firmware bug */
pasid = (ihre->ring_id & 0xffff0000) >> 16;
if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt"))
return 0;
/* Do not process in ISR, just request it to be forwarded to WQ. */
return (pasid != 0) &&
(ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
/* Interrupt types we care about: various signals and faults.
* They will be forwarded to a work queue (see below).
*/
return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
ihre->source_id == CIK_INTSRC_SDMA_TRAP ||
ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE);
ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE;
}
static void cik_event_interrupt_wq(struct kfd_dev *dev,
......
......@@ -33,7 +33,8 @@
#define APE1_MTYPE(x) ((x) << 7)
/* valid for both DEFAULT_MTYPE and APE1_MTYPE */
#define MTYPE_CACHED 0
#define MTYPE_CACHED_NV 0
#define MTYPE_CACHED 1
#define MTYPE_NONCACHED 3
#define DEFAULT_CP_HQD_PERSISTENT_STATE (0x33U << 8)
......
This diff is collapsed.
......@@ -20,9 +20,12 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#if 0
HW (VI) source code for CWSR trap handler
#Version 18 + multiple trap handler
/* To compile this assembly code:
* PROJECT=vi ./sp3 cwsr_trap_handler_gfx8.asm -hex tmp.hex
*/
/* HW (VI) source code for CWSR trap handler */
/* Version 18 + multiple trap handler */
// this performance-optimal version was originally from Seven Xu at SRDC
......@@ -98,6 +101,7 @@ var SWIZZLE_EN = 0 //whether we use swi
/**************************************************************************/
var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
......@@ -149,7 +153,7 @@ var s_save_spi_init_lo = exec_lo
var s_save_spi_init_hi = exec_hi
//tba_lo and tba_hi need to be saved/restored
var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3??h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
var s_save_pc_hi = ttmp1
var s_save_exec_lo = ttmp2
var s_save_exec_hi = ttmp3
......@@ -319,6 +323,10 @@ end
s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC
end
// Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for.
s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT)
s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp
L_SLEEP:
s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0
......@@ -1007,8 +1015,6 @@ end
s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS
s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
//for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise:
if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore)
......@@ -1044,6 +1050,7 @@ end
s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp
s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
......@@ -1127,258 +1134,3 @@ end
function get_hwreg_size_bytes
return 128 //HWREG size 128 bytes
end
#endif
static const uint32_t cwsr_trap_gfx8_hex[] = {
0xbf820001, 0xbf820123,
0xb8f4f802, 0x89748674,
0xb8f5f803, 0x8675ff75,
0x00000400, 0xbf850011,
0xc00a1e37, 0x00000000,
0xbf8c007f, 0x87777978,
0xbf840002, 0xb974f802,
0xbe801d78, 0xb8f5f803,
0x8675ff75, 0x000001ff,
0xbf850002, 0x80708470,
0x82718071, 0x8671ff71,
0x0000ffff, 0xb974f802,
0xbe801f70, 0xb8f5f803,
0x8675ff75, 0x00000100,
0xbf840006, 0xbefa0080,
0xb97a0203, 0x8671ff71,
0x0000ffff, 0x80f08870,
0x82f18071, 0xbefa0080,
0xb97a0283, 0xbef60068,
0xbef70069, 0xb8fa1c07,
0x8e7a9c7a, 0x87717a71,
0xb8fa03c7, 0x8e7a9b7a,
0x87717a71, 0xb8faf807,
0x867aff7a, 0x00007fff,
0xb97af807, 0xbef2007e,
0xbef3007f, 0xbefe0180,
0xbf900004, 0xbf8e0002,
0xbf88fffe, 0xbef8007e,
0x8679ff7f, 0x0000ffff,
0x8779ff79, 0x00040000,
0xbefa0080, 0xbefb00ff,
0x00807fac, 0x867aff7f,
0x08000000, 0x8f7a837a,
0x877b7a7b, 0x867aff7f,
0x70000000, 0x8f7a817a,
0x877b7a7b, 0xbeef007c,
0xbeee0080, 0xb8ee2a05,
0x806e816e, 0x8e6e8a6e,
0xb8fa1605, 0x807a817a,
0x8e7a867a, 0x806e7a6e,
0xbefa0084, 0xbefa00ff,
0x01000000, 0xbefe007c,
0xbefc006e, 0xc0611bfc,
0x0000007c, 0x806e846e,
0xbefc007e, 0xbefe007c,
0xbefc006e, 0xc0611c3c,
0x0000007c, 0x806e846e,
0xbefc007e, 0xbefe007c,
0xbefc006e, 0xc0611c7c,
0x0000007c, 0x806e846e,
0xbefc007e, 0xbefe007c,
0xbefc006e, 0xc0611cbc,
0x0000007c, 0x806e846e,
0xbefc007e, 0xbefe007c,
0xbefc006e, 0xc0611cfc,
0x0000007c, 0x806e846e,
0xbefc007e, 0xbefe007c,
0xbefc006e, 0xc0611d3c,
0x0000007c, 0x806e846e,
0xbefc007e, 0xb8f5f803,
0xbefe007c, 0xbefc006e,
0xc0611d7c, 0x0000007c,
0x806e846e, 0xbefc007e,
0xbefe007c, 0xbefc006e,
0xc0611dbc, 0x0000007c,
0x806e846e, 0xbefc007e,
0xbefe007c, 0xbefc006e,
0xc0611dfc, 0x0000007c,
0x806e846e, 0xbefc007e,
0xb8eff801, 0xbefe007c,
0xbefc006e, 0xc0611bfc,
0x0000007c, 0x806e846e,
0xbefc007e, 0xbefe007c,
0xbefc006e, 0xc0611b3c,
0x0000007c, 0x806e846e,
0xbefc007e, 0xbefe007c,
0xbefc006e, 0xc0611b7c,
0x0000007c, 0x806e846e,
0xbefc007e, 0x867aff7f,
0x04000000, 0xbef30080,
0x8773737a, 0xb8ee2a05,
0x806e816e, 0x8e6e8a6e,
0xb8f51605, 0x80758175,
0x8e758475, 0x8e7a8275,
0xbefa00ff, 0x01000000,
0xbef60178, 0x80786e78,
0x82798079, 0xbefc0080,
0xbe802b00, 0xbe822b02,
0xbe842b04, 0xbe862b06,
0xbe882b08, 0xbe8a2b0a,
0xbe8c2b0c, 0xbe8e2b0e,
0xc06b003c, 0x00000000,
0xc06b013c, 0x00000010,
0xc06b023c, 0x00000020,
0xc06b033c, 0x00000030,
0x8078c078, 0x82798079,
0x807c907c, 0xbf0a757c,
0xbf85ffeb, 0xbef80176,
0xbeee0080, 0xbefe00c1,
0xbeff00c1, 0xbefa00ff,
0x01000000, 0xe0724000,
0x6e1e0000, 0xe0724100,
0x6e1e0100, 0xe0724200,
0x6e1e0200, 0xe0724300,
0x6e1e0300, 0xbefe00c1,
0xbeff00c1, 0xb8f54306,
0x8675c175, 0xbf84002c,
0xbf8a0000, 0x867aff73,
0x04000000, 0xbf840028,
0x8e758675, 0x8e758275,
0xbefa0075, 0xb8ee2a05,
0x806e816e, 0x8e6e8a6e,
0xb8fa1605, 0x807a817a,
0x8e7a867a, 0x806e7a6e,
0x806eff6e, 0x00000080,
0xbefa00ff, 0x01000000,
0xbefc0080, 0xd28c0002,
0x000100c1, 0xd28d0003,
0x000204c1, 0xd1060002,
0x00011103, 0x7e0602ff,
0x00000200, 0xbefc00ff,
0x00010000, 0xbe80007b,
0x867bff7b, 0xff7fffff,
0x877bff7b, 0x00058000,
0xd8ec0000, 0x00000002,
0xbf8c007f, 0xe0765000,
0x6e1e0002, 0x32040702,
0xd0c9006a, 0x0000eb02,
0xbf87fff7, 0xbefb0000,
0xbeee00ff, 0x00000400,
0xbefe00c1, 0xbeff00c1,
0xb8f52a05, 0x80758175,
0x8e758275, 0x8e7a8875,
0xbefa00ff, 0x01000000,
0xbefc0084, 0xbf0a757c,
0xbf840015, 0xbf11017c,
0x8075ff75, 0x00001000,
0x7e000300, 0x7e020301,
0x7e040302, 0x7e060303,
0xe0724000, 0x6e1e0000,
0xe0724100, 0x6e1e0100,
0xe0724200, 0x6e1e0200,
0xe0724300, 0x6e1e0300,
0x807c847c, 0x806eff6e,
0x00000400, 0xbf0a757c,
0xbf85ffef, 0xbf9c0000,
0xbf8200ca, 0xbef8007e,
0x8679ff7f, 0x0000ffff,
0x8779ff79, 0x00040000,
0xbefa0080, 0xbefb00ff,
0x00807fac, 0x8676ff7f,
0x08000000, 0x8f768376,
0x877b767b, 0x8676ff7f,
0x70000000, 0x8f768176,
0x877b767b, 0x8676ff7f,
0x04000000, 0xbf84001e,
0xbefe00c1, 0xbeff00c1,
0xb8f34306, 0x8673c173,
0xbf840019, 0x8e738673,
0x8e738273, 0xbefa0073,
0xb8f22a05, 0x80728172,
0x8e728a72, 0xb8f61605,
0x80768176, 0x8e768676,
0x80727672, 0x8072ff72,
0x00000080, 0xbefa00ff,
0x01000000, 0xbefc0080,
0xe0510000, 0x721e0000,
0xe0510100, 0x721e0000,
0x807cff7c, 0x00000200,
0x8072ff72, 0x00000200,
0xbf0a737c, 0xbf85fff6,
0xbef20080, 0xbefe00c1,
0xbeff00c1, 0xb8f32a05,
0x80738173, 0x8e738273,
0x8e7a8873, 0xbefa00ff,
0x01000000, 0xbef60072,
0x8072ff72, 0x00000400,
0xbefc0084, 0xbf11087c,
0x8073ff73, 0x00008000,
0xe0524000, 0x721e0000,
0xe0524100, 0x721e0100,
0xe0524200, 0x721e0200,
0xe0524300, 0x721e0300,
0xbf8c0f70, 0x7e000300,
0x7e020301, 0x7e040302,
0x7e060303, 0x807c847c,
0x8072ff72, 0x00000400,
0xbf0a737c, 0xbf85ffee,
0xbf9c0000, 0xe0524000,
0x761e0000, 0xe0524100,
0x761e0100, 0xe0524200,
0x761e0200, 0xe0524300,
0x761e0300, 0xb8f22a05,
0x80728172, 0x8e728a72,
0xb8f61605, 0x80768176,
0x8e768676, 0x80727672,
0x80f2c072, 0xb8f31605,
0x80738173, 0x8e738473,
0x8e7a8273, 0xbefa00ff,
0x01000000, 0xbefc0073,
0xc031003c, 0x00000072,
0x80f2c072, 0xbf8c007f,
0x80fc907c, 0xbe802d00,
0xbe822d02, 0xbe842d04,
0xbe862d06, 0xbe882d08,
0xbe8a2d0a, 0xbe8c2d0c,
0xbe8e2d0e, 0xbf06807c,
0xbf84fff1, 0xb8f22a05,
0x80728172, 0x8e728a72,
0xb8f61605, 0x80768176,
0x8e768676, 0x80727672,
0xbefa0084, 0xbefa00ff,
0x01000000, 0xc0211cfc,
0x00000072, 0x80728472,
0xc0211c3c, 0x00000072,
0x80728472, 0xc0211c7c,
0x00000072, 0x80728472,
0xc0211bbc, 0x00000072,
0x80728472, 0xc0211bfc,
0x00000072, 0x80728472,
0xc0211d3c, 0x00000072,
0x80728472, 0xc0211d7c,
0x00000072, 0x80728472,
0xc0211a3c, 0x00000072,
0x80728472, 0xc0211a7c,
0x00000072, 0x80728472,
0xc0211dfc, 0x00000072,
0x80728472, 0xc0211b3c,
0x00000072, 0x80728472,
0xc0211b7c, 0x00000072,
0x80728472, 0xbf8c007f,
0x8671ff71, 0x0000ffff,
0xbefc0073, 0xbefe006e,
0xbeff006f, 0x867375ff,
0x000003ff, 0xb9734803,
0x867375ff, 0xfffff800,
0x8f738b73, 0xb973a2c3,
0xb977f801, 0x8673ff71,
0xf0000000, 0x8f739c73,
0x8e739073, 0xbef60080,
0x87767376, 0x8673ff71,
0x08000000, 0x8f739b73,
0x8e738f73, 0x87767376,
0x8673ff74, 0x00800000,
0x8f739773, 0xb976f807,
0x86fe7e7e, 0x86ea6a6a,
0xb974f802, 0xbf8a0000,
0x95807370, 0xbf810000,
};
This diff is collapsed.
......@@ -233,7 +233,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
pr_debug("Queue Size: 0x%llX, %u\n",
q_properties->queue_size, args->ring_size);
pr_debug("Queue r/w Pointers: %p, %p\n",
pr_debug("Queue r/w Pointers: %px, %px\n",
q_properties->read_ptr,
q_properties->write_ptr);
......@@ -292,8 +292,16 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
/* Return gpu_id as doorbell offset for mmap usage */
args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id);
args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
args->doorbell_offset <<= PAGE_SHIFT;
if (KFD_IS_SOC15(dev->device_info->asic_family))
/* On SOC15 ASICs, doorbell allocation must be
* per-device, and independent from the per-process
* queue_id. Return the doorbell offset within the
* doorbell aperture to user mode.
*/
args->doorbell_offset |= q_properties.doorbell_off;
mutex_unlock(&p->mutex);
......@@ -1296,8 +1304,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
return -EINVAL;
}
devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
GFP_KERNEL);
devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
GFP_KERNEL);
if (!devices_arr)
return -ENOMEM;
......@@ -1405,8 +1413,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
return -EINVAL;
}
devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
GFP_KERNEL);
devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
GFP_KERNEL);
if (!devices_arr)
return -ENOMEM;
......@@ -1645,23 +1653,33 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct kfd_process *process;
struct kfd_dev *dev = NULL;
unsigned long vm_pgoff;
unsigned int gpu_id;
process = kfd_get_process(current);
if (IS_ERR(process))
return PTR_ERR(process);
if ((vma->vm_pgoff & KFD_MMAP_DOORBELL_MASK) ==
KFD_MMAP_DOORBELL_MASK) {
vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_DOORBELL_MASK;
return kfd_doorbell_mmap(process, vma);
} else if ((vma->vm_pgoff & KFD_MMAP_EVENTS_MASK) ==
KFD_MMAP_EVENTS_MASK) {
vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK;
vm_pgoff = vma->vm_pgoff;
vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
if (gpu_id)
dev = kfd_device_by_id(gpu_id);
switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
case KFD_MMAP_TYPE_DOORBELL:
if (!dev)
return -ENODEV;
return kfd_doorbell_mmap(dev, process, vma);
case KFD_MMAP_TYPE_EVENTS:
return kfd_event_mmap(process, vma);
} else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) ==
KFD_MMAP_RESERVED_MEM_MASK) {
vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK;
return kfd_reserved_mem_mmap(process, vma);
case KFD_MMAP_TYPE_RESERVED_MEM:
if (!dev)
return -ENODEV;
return kfd_reserved_mem_mmap(dev, process, vma);
}
return -EFAULT;
......
......@@ -132,6 +132,9 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
#define fiji_cache_info carrizo_cache_info
#define polaris10_cache_info carrizo_cache_info
#define polaris11_cache_info carrizo_cache_info
/* TODO - check & update Vega10 cache details */
#define vega10_cache_info carrizo_cache_info
#define raven_cache_info carrizo_cache_info
static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
struct crat_subtype_computeunit *cu)
......@@ -603,6 +606,14 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
pcache_info = polaris11_cache_info;
num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
break;
case CHIP_VEGA10:
pcache_info = vega10_cache_info;
num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
break;
case CHIP_RAVEN:
pcache_info = raven_cache_info;
num_of_cache_types = ARRAY_SIZE(raven_cache_info);
break;
default:
return -EINVAL;
}
......
......@@ -20,16 +20,13 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
#include <linux/amd-iommu.h>
#endif
#include <linux/bsearch.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers_vi.h"
#include "cwsr_trap_handler_gfx8.asm"
#include "cwsr_trap_handler.h"
#include "kfd_iommu.h"
#define MQD_SIZE_ALIGNED 768
......@@ -41,6 +38,7 @@ static const struct kfd_device_info kaveri_device_info = {
.max_pasid_bits = 16,
/* max num of queues for KV.TODO should be a dynamic value */
.max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
......@@ -55,6 +53,7 @@ static const struct kfd_device_info carrizo_device_info = {
.max_pasid_bits = 16,
/* max num of queues for CZ.TODO should be a dynamic value */
.max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
......@@ -70,6 +69,7 @@ static const struct kfd_device_info hawaii_device_info = {
.max_pasid_bits = 16,
/* max num of queues for KV.TODO should be a dynamic value */
.max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
......@@ -83,6 +83,7 @@ static const struct kfd_device_info tonga_device_info = {
.asic_family = CHIP_TONGA,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
......@@ -96,6 +97,7 @@ static const struct kfd_device_info tonga_vf_device_info = {
.asic_family = CHIP_TONGA,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
......@@ -109,6 +111,7 @@ static const struct kfd_device_info fiji_device_info = {
.asic_family = CHIP_FIJI,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
......@@ -122,6 +125,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
.asic_family = CHIP_FIJI,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
......@@ -136,6 +140,7 @@ static const struct kfd_device_info polaris10_device_info = {
.asic_family = CHIP_POLARIS10,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
......@@ -149,6 +154,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
.asic_family = CHIP_POLARIS10,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
......@@ -162,6 +168,7 @@ static const struct kfd_device_info polaris11_device_info = {
.asic_family = CHIP_POLARIS11,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
......@@ -171,6 +178,34 @@ static const struct kfd_device_info polaris11_device_info = {
.needs_pci_atomics = true,
};
static const struct kfd_device_info vega10_device_info = {
.asic_family = CHIP_VEGA10,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.doorbell_size = 8,
.ih_ring_entry_size = 8 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_v9,
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = true,
.needs_iommu_device = false,
.needs_pci_atomics = false,
};
static const struct kfd_device_info vega10_vf_device_info = {
.asic_family = CHIP_VEGA10,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.doorbell_size = 8,
.ih_ring_entry_size = 8 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_v9,
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = true,
.needs_iommu_device = false,
.needs_pci_atomics = false,
};
struct kfd_deviceid {
unsigned short did;
......@@ -250,6 +285,15 @@ static const struct kfd_deviceid supported_devices[] = {
{ 0x67EB, &polaris11_device_info }, /* Polaris11 */
{ 0x67EF, &polaris11_device_info }, /* Polaris11 */
{ 0x67FF, &polaris11_device_info }, /* Polaris11 */
{ 0x6860, &vega10_device_info }, /* Vega10 */
{ 0x6861, &vega10_device_info }, /* Vega10 */
{ 0x6862, &vega10_device_info }, /* Vega10 */
{ 0x6863, &vega10_device_info }, /* Vega10 */
{ 0x6864, &vega10_device_info }, /* Vega10 */
{ 0x6867, &vega10_device_info }, /* Vega10 */
{ 0x6868, &vega10_device_info }, /* Vega10 */
{ 0x686C, &vega10_vf_device_info }, /* Vega10 vf*/
{ 0x687F, &vega10_device_info }, /* Vega10 */
};
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
......@@ -279,7 +323,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
struct pci_dev *pdev, const struct kfd2kgd_calls *f2g)
{
struct kfd_dev *kfd;
int ret;
const struct kfd_device_info *device_info =
lookup_device_info(pdev->device);
......@@ -288,19 +332,18 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
return NULL;
}
if (device_info->needs_pci_atomics) {
/* Allow BIF to recode atomics to PCIe 3.0
* AtomicOps. 32 and 64-bit requests are possible and
* must be supported.
*/
if (pci_enable_atomic_ops_to_root(pdev,
PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) {
dev_info(kfd_device,
"skipped device %x:%x, PCI rejects atomics",
pdev->vendor, pdev->device);
return NULL;
}
/* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
* 32 and 64-bit requests are possible and must be
* supported.
*/
ret = pci_enable_atomic_ops_to_root(pdev,
PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
PCI_EXP_DEVCAP2_ATOMIC_COMP64);
if (device_info->needs_pci_atomics && ret < 0) {
dev_info(kfd_device,
"skipped device %x:%x, PCI rejects atomics\n",
pdev->vendor, pdev->device);
return NULL;
}
kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
......@@ -323,10 +366,16 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
static void kfd_cwsr_init(struct kfd_dev *kfd)
{
if (cwsr_enable && kfd->device_info->supports_cwsr) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
if (kfd->device_info->asic_family < CHIP_VEGA10) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx8_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
} else {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx9_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
}
kfd->cwsr_isa = cwsr_trap_gfx8_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
kfd->cwsr_enabled = true;
}
}
......@@ -541,6 +590,44 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
spin_unlock(&kfd->interrupt_lock);
}
int kgd2kfd_quiesce_mm(struct mm_struct *mm)
{
struct kfd_process *p;
int r;
/* Because we are called from arbitrary context (workqueue) as opposed
* to process context, kfd_process could attempt to exit while we are
* running so the lookup function increments the process ref count.
*/
p = kfd_lookup_process_by_mm(mm);
if (!p)
return -ESRCH;
r = kfd_process_evict_queues(p);
kfd_unref_process(p);
return r;
}
int kgd2kfd_resume_mm(struct mm_struct *mm)
{
struct kfd_process *p;
int r;
/* Because we are called from arbitrary context (workqueue) as opposed
* to process context, kfd_process could attempt to exit while we are
* running so the lookup function increments the process ref count.
*/
p = kfd_lookup_process_by_mm(mm);
if (!p)
return -ESRCH;
r = kfd_process_restore_queues(p);
kfd_unref_process(p);
return r;
}
/** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
* prepare for safe eviction of KFD BOs that belong to the specified
* process.
......@@ -652,7 +739,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
return -ENOMEM;
*mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
*mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
if ((*mem_obj) == NULL)
return -ENOMEM;
......
......@@ -200,6 +200,8 @@ void device_queue_manager_init_vi(
struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_vi_tonga(
struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_v9(
struct device_queue_manager_asic_ops *asic_ops);
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
unsigned int get_queues_num(struct device_queue_manager *dqm);
......
/*
* Copyright 2016-2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "kfd_device_queue_manager.h"
#include "vega10_enum.h"
#include "gc/gc_9_0_offset.h"
#include "gc/gc_9_0_sh_mask.h"
#include "sdma0/sdma0_4_0_sh_mask.h"
static int update_qpd_v9(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
void device_queue_manager_init_v9(
struct device_queue_manager_asic_ops *asic_ops)
{
asic_ops->update_qpd = update_qpd_v9;
asic_ops->init_sdma_vm = init_sdma_vm_v9;
}
static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
{
uint32_t shared_base = pdd->lds_base >> 48;
uint32_t private_base = pdd->scratch_base >> 48;
return (shared_base << SH_MEM_BASES__SHARED_BASE__SHIFT) |
private_base;
}
static int update_qpd_v9(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct kfd_process_device *pdd;
pdd = qpd_to_pdd(qpd);
/* check if sh_mem_config register already configured */
if (qpd->sh_mem_config == 0) {
qpd->sh_mem_config =
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
if (vega10_noretry &&
!dqm->dev->device_info->needs_iommu_device)
qpd->sh_mem_config |=
1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0;
}
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
return 0;
}
static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd)
{
/* Not needed on SDMAv4 any more */
q->properties.sdma_vm_addr = 0;
}
This diff is collapsed.
......@@ -345,7 +345,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
case KFD_EVENT_TYPE_DEBUG:
ret = create_signal_event(devkfd, p, ev);
if (!ret) {
*event_page_offset = KFD_MMAP_EVENTS_MASK;
*event_page_offset = KFD_MMAP_TYPE_EVENTS;
*event_page_offset <<= PAGE_SHIFT;
*event_slot_index = ev->event_id;
}
......@@ -496,7 +496,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n",
partial_id, valid_id_bits);
if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) {
if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT / 64) {
/* With relatively few events, it's faster to
* iterate over the event IDR
*/
......
This diff is collapsed.
......@@ -139,10 +139,12 @@ static void interrupt_wq(struct work_struct *work)
{
struct kfd_dev *dev = container_of(work, struct kfd_dev,
interrupt_work);
uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
uint32_t ih_ring_entry[DIV_ROUND_UP(
dev->device_info->ih_ring_entry_size,
sizeof(uint32_t))];
if (dev->device_info->ih_ring_entry_size > sizeof(ih_ring_entry)) {
dev_err_once(kfd_chardev(), "Ring entry too small\n");
return;
}
while (dequeue_ih_ring_entry(dev, ih_ring_entry))
dev->device_info->event_interrupt_class->interrupt_wq(dev,
......
......@@ -72,6 +72,7 @@ struct kernel_queue {
struct kfd_dev *dev;
struct mqd_manager *mqd;
struct queue *queue;
uint64_t pending_wptr64;
uint32_t pending_wptr;
unsigned int nop_packet;
......@@ -79,7 +80,10 @@ struct kernel_queue {
uint32_t *rptr_kernel;
uint64_t rptr_gpu_addr;
struct kfd_mem_obj *wptr_mem;
uint32_t *wptr_kernel;
union {
uint64_t *wptr64_kernel;
uint32_t *wptr_kernel;
};
uint64_t wptr_gpu_addr;
struct kfd_mem_obj *pq;
uint64_t pq_gpu_addr;
......@@ -97,5 +101,6 @@ struct kernel_queue {
void kernel_queue_init_cik(struct kernel_queue_ops *ops);
void kernel_queue_init_vi(struct kernel_queue_ops *ops);
void kernel_queue_init_v9(struct kernel_queue_ops *ops);
#endif /* KFD_KERNEL_QUEUE_H_ */
This diff is collapsed.
......@@ -43,6 +43,8 @@ static const struct kgd2kfd_calls kgd2kfd = {
.interrupt = kgd2kfd_interrupt,
.suspend = kgd2kfd_suspend,
.resume = kgd2kfd_resume,
.quiesce_mm = kgd2kfd_quiesce_mm,
.resume_mm = kgd2kfd_resume_mm,
.schedule_evict_and_restore_process =
kgd2kfd_schedule_evict_and_restore_process,
};
......@@ -81,6 +83,11 @@ module_param(ignore_crat, int, 0444);
MODULE_PARM_DESC(ignore_crat,
"Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
int vega10_noretry;
module_param_named(noretry, vega10_noretry, int, 0644);
MODULE_PARM_DESC(noretry,
"Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)");
static int amdkfd_init_completed;
int kgd2kfd_init(unsigned int interface_version,
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment