Commit 5da612fa authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-amdkfd-next-2015-07-20' of git://people.freedesktop.org/~gabbayo/linux into drm-next

- Add Carrizo support for amdkfd, using the new amdgpu driver as the relevant
  kgd. The support includes interfaces with amdgpu both for gfx7 (Kaveri) and
  gfx8 (Carrizo). However, gfx7 interface is used for debugging purposes only,
  so amdkfd defaults to using radeon when Kaveri is installed.

I would like to note that no new IOCTLs are being introduced, and there is no
change in the current IOCTLs, as they are suited both for gfx7 and gfx8.

* tag 'drm-amdkfd-next-2015-07-20' of git://people.freedesktop.org/~gabbayo/linux:
  drm/amdkfd: Set correct doorbell packet type for Carrizo
  drm/amdkfd: Use generic defines in new amd headers
  drm/amdkfd: Implement create_map_queues() for Carrizo
  drm/amdkfd: fix runlist length calculation
  drm/amdkfd: Add support for VI in DQM
  drm/amdkfd: add support for VI in MQD manager
  drm/amdkfd: add CP HWS packet headers for VI
  drm/amdkfd: add supported CZ devices PCI IDs to amdkfd
  drm/amdkfd: Add dependency of DRM_AMDGPU to Kconfig
  drm/amdgpu: Add amdgpu <--> amdkfd gfx8 interface
  drm/amdgpu: add amdgpu <--> amdkfd gfx7 interface
  drm/amdgpu: Add H/W agnostic amdgpu <--> amdkfd interface
  drm/radeon: Modify kgd_engine_type enum to match CZ
parents 52721d9d 7639a8c4
...@@ -636,9 +636,14 @@ M: Oded Gabbay <oded.gabbay@gmail.com> ...@@ -636,9 +636,14 @@ M: Oded Gabbay <oded.gabbay@gmail.com>
L: dri-devel@lists.freedesktop.org L: dri-devel@lists.freedesktop.org
T: git git://people.freedesktop.org/~gabbayo/linux.git T: git git://people.freedesktop.org/~gabbayo/linux.git
S: Supported S: Supported
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
F: drivers/gpu/drm/amd/amdkfd/ F: drivers/gpu/drm/amd/amdkfd/
F: drivers/gpu/drm/amd/include/cik_structs.h F: drivers/gpu/drm/amd/include/cik_structs.h
F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h
F: drivers/gpu/drm/amd/include/vi_structs.h
F: drivers/gpu/drm/radeon/radeon_kfd.c F: drivers/gpu/drm/radeon/radeon_kfd.c
F: drivers/gpu/drm/radeon/radeon_kfd.h F: drivers/gpu/drm/radeon/radeon_kfd.h
F: include/uapi/linux/kfd_ioctl.h F: include/uapi/linux/kfd_ioctl.h
......
...@@ -71,6 +71,12 @@ amdgpu-y += \ ...@@ -71,6 +71,12 @@ amdgpu-y += \
amdgpu_vce.o \ amdgpu_vce.o \
vce_v3_0.o vce_v3_0.o
# add amdkfd interfaces
amdgpu-y += \
amdgpu_amdkfd.o \
amdgpu_amdkfd_gfx_v7.o \
amdgpu_amdkfd_gfx_v8.o
amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
......
...@@ -2011,6 +2011,9 @@ struct amdgpu_device { ...@@ -2011,6 +2011,9 @@ struct amdgpu_device {
/* tracking pinned memory */ /* tracking pinned memory */
u64 vram_pin_size; u64 vram_pin_size;
u64 gart_pin_size; u64 gart_pin_size;
/* amdkfd interface */
struct kfd_dev *kfd;
}; };
bool amdgpu_device_is_px(struct drm_device *dev); bool amdgpu_device_is_px(struct drm_device *dev);
......
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "amdgpu_amdkfd.h"
#include "amdgpu_family.h"
#include <drm/drmP.h>
#include "amdgpu.h"
#include <linux/module.h>
const struct kfd2kgd_calls *kfd2kgd;
const struct kgd2kfd_calls *kgd2kfd;
bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
bool amdgpu_amdkfd_init(void)
{
#if defined(CONFIG_HSA_AMD_MODULE)
bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
kgd2kfd_init_p = symbol_request(kgd2kfd_init);
if (kgd2kfd_init_p == NULL)
return false;
#endif
return true;
}
bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev)
{
#if defined(CONFIG_HSA_AMD_MODULE)
bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
#endif
switch (rdev->asic_type) {
case CHIP_KAVERI:
kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
break;
case CHIP_CARRIZO:
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
break;
default:
return false;
}
#if defined(CONFIG_HSA_AMD_MODULE)
kgd2kfd_init_p = symbol_request(kgd2kfd_init);
if (kgd2kfd_init_p == NULL) {
kfd2kgd = NULL;
return false;
}
if (!kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd)) {
symbol_put(kgd2kfd_init);
kfd2kgd = NULL;
kgd2kfd = NULL;
return false;
}
return true;
#elif defined(CONFIG_HSA_AMD)
if (!kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd)) {
kfd2kgd = NULL;
kgd2kfd = NULL;
return false;
}
return true;
#else
kfd2kgd = NULL;
return false;
#endif
}
void amdgpu_amdkfd_fini(void)
{
if (kgd2kfd) {
kgd2kfd->exit();
symbol_put(kgd2kfd_init);
}
}
void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev)
{
if (kgd2kfd)
rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
rdev->pdev, kfd2kgd);
}
void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev)
{
if (rdev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap = 0xFF00,
.first_compute_pipe = 1,
.compute_pipe_count = 4 - 1,
};
amdgpu_doorbell_get_kfd_info(rdev,
&gpu_resources.doorbell_physical_address,
&gpu_resources.doorbell_aperture_size,
&gpu_resources.doorbell_start_offset);
kgd2kfd->device_init(rdev->kfd, &gpu_resources);
}
}
void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev)
{
if (rdev->kfd) {
kgd2kfd->device_exit(rdev->kfd);
rdev->kfd = NULL;
}
}
void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev,
const void *ih_ring_entry)
{
if (rdev->kfd)
kgd2kfd->interrupt(rdev->kfd, ih_ring_entry);
}
void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev)
{
if (rdev->kfd)
kgd2kfd->suspend(rdev->kfd);
}
int amdgpu_amdkfd_resume(struct amdgpu_device *rdev)
{
int r = 0;
if (rdev->kfd)
r = kgd2kfd->resume(rdev->kfd);
return r;
}
u32 pool_to_domain(enum kgd_memory_pool p)
{
switch (p) {
case KGD_POOL_FRAMEBUFFER: return AMDGPU_GEM_DOMAIN_VRAM;
default: return AMDGPU_GEM_DOMAIN_GTT;
}
}
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr)
{
struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
int r;
BUG_ON(kgd == NULL);
BUG_ON(gpu_addr == NULL);
BUG_ON(cpu_ptr == NULL);
*mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
if ((*mem) == NULL)
return -ENOMEM;
r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, &(*mem)->bo);
if (r) {
dev_err(rdev->dev,
"failed to allocate BO for amdkfd (%d)\n", r);
return r;
}
/* map the buffer */
r = amdgpu_bo_reserve((*mem)->bo, true);
if (r) {
dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
goto allocate_mem_reserve_bo_failed;
}
r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT,
&(*mem)->gpu_addr);
if (r) {
dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r);
goto allocate_mem_pin_bo_failed;
}
*gpu_addr = (*mem)->gpu_addr;
r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
if (r) {
dev_err(rdev->dev,
"(%d) failed to map bo to kernel for amdkfd\n", r);
goto allocate_mem_kmap_bo_failed;
}
*cpu_ptr = (*mem)->cpu_ptr;
amdgpu_bo_unreserve((*mem)->bo);
return 0;
allocate_mem_kmap_bo_failed:
amdgpu_bo_unpin((*mem)->bo);
allocate_mem_pin_bo_failed:
amdgpu_bo_unreserve((*mem)->bo);
allocate_mem_reserve_bo_failed:
amdgpu_bo_unref(&(*mem)->bo);
return r;
}
void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
{
struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
BUG_ON(mem == NULL);
amdgpu_bo_reserve(mem->bo, true);
amdgpu_bo_kunmap(mem->bo);
amdgpu_bo_unpin(mem->bo);
amdgpu_bo_unreserve(mem->bo);
amdgpu_bo_unref(&(mem->bo));
kfree(mem);
}
uint64_t get_vmem_size(struct kgd_dev *kgd)
{
struct amdgpu_device *rdev =
(struct amdgpu_device *)kgd;
BUG_ON(kgd == NULL);
return rdev->mc.real_vram_size;
}
uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
{
struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
if (rdev->asic_funcs->get_gpu_clock_counter)
return rdev->asic_funcs->get_gpu_clock_counter(rdev);
return 0;
}
uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
{
struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
/* The sclk is in quantas of 10kHz */
return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
}
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/* amdgpu_amdkfd.h defines the private interface between amdgpu and amdkfd. */
#ifndef AMDGPU_AMDKFD_H_INCLUDED
#define AMDGPU_AMDKFD_H_INCLUDED
#include <linux/types.h>
#include <kgd_kfd_interface.h>
struct amdgpu_device;
struct kgd_mem {
struct amdgpu_bo *bo;
uint64_t gpu_addr;
void *cpu_ptr;
};
bool amdgpu_amdkfd_init(void);
void amdgpu_amdkfd_fini(void);
bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev);
void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev);
int amdgpu_amdkfd_resume(struct amdgpu_device *rdev);
void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev,
const void *ih_ring_entry);
void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev);
void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev);
void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
/* Shared API */
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr);
void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
uint64_t get_vmem_size(struct kgd_dev *kgd);
uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
This diff is collapsed.
This diff is collapsed.
...@@ -44,6 +44,8 @@ ...@@ -44,6 +44,8 @@
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_irq.h" #include "amdgpu_irq.h"
#include "amdgpu_amdkfd.h"
/* /*
* KMS wrapper. * KMS wrapper.
* - 3.0.0 - initial driver * - 3.0.0 - initial driver
...@@ -527,12 +529,15 @@ static int __init amdgpu_init(void) ...@@ -527,12 +529,15 @@ static int __init amdgpu_init(void)
driver->num_ioctls = amdgpu_max_kms_ioctl; driver->num_ioctls = amdgpu_max_kms_ioctl;
amdgpu_register_atpx_handler(); amdgpu_register_atpx_handler();
amdgpu_amdkfd_init();
/* let modprobe override vga console setting */ /* let modprobe override vga console setting */
return drm_pci_init(driver, pdriver); return drm_pci_init(driver, pdriver);
} }
static void __exit amdgpu_exit(void) static void __exit amdgpu_exit(void)
{ {
amdgpu_amdkfd_fini();
drm_pci_exit(driver, pdriver); drm_pci_exit(driver, pdriver);
amdgpu_unregister_atpx_handler(); amdgpu_unregister_atpx_handler();
} }
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <drm/drmP.h> #include <drm/drmP.h>
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_ih.h" #include "amdgpu_ih.h"
#include "amdgpu_amdkfd.h"
/** /**
* amdgpu_ih_ring_alloc - allocate memory for the IH ring * amdgpu_ih_ring_alloc - allocate memory for the IH ring
...@@ -199,6 +200,12 @@ int amdgpu_ih_process(struct amdgpu_device *adev) ...@@ -199,6 +200,12 @@ int amdgpu_ih_process(struct amdgpu_device *adev)
rmb(); rmb();
while (adev->irq.ih.rptr != wptr) { while (adev->irq.ih.rptr != wptr) {
u32 ring_index = adev->irq.ih.rptr >> 2;
/* Before dispatching irq to IP blocks, send it to amdkfd */
amdgpu_amdkfd_interrupt(adev,
(const void *) &adev->irq.ih.ring[ring_index]);
amdgpu_ih_decode_iv(adev, &entry); amdgpu_ih_decode_iv(adev, &entry);
adev->irq.ih.rptr &= adev->irq.ih.ptr_mask; adev->irq.ih.rptr &= adev->irq.ih.ptr_mask;
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include <linux/vga_switcheroo.h> #include <linux/vga_switcheroo.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/pm_runtime.h> #include <linux/pm_runtime.h>
#include "amdgpu_amdkfd.h"
#if defined(CONFIG_VGA_SWITCHEROO) #if defined(CONFIG_VGA_SWITCHEROO)
bool amdgpu_has_atpx(void); bool amdgpu_has_atpx(void);
...@@ -61,6 +62,8 @@ int amdgpu_driver_unload_kms(struct drm_device *dev) ...@@ -61,6 +62,8 @@ int amdgpu_driver_unload_kms(struct drm_device *dev)
pm_runtime_get_sync(dev->dev); pm_runtime_get_sync(dev->dev);
amdgpu_amdkfd_device_fini(adev);
amdgpu_acpi_fini(adev); amdgpu_acpi_fini(adev);
amdgpu_device_fini(adev); amdgpu_device_fini(adev);
...@@ -118,6 +121,10 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) ...@@ -118,6 +121,10 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
"Error during ACPI methods call\n"); "Error during ACPI methods call\n");
} }
amdgpu_amdkfd_load_interface(adev);
amdgpu_amdkfd_device_probe(adev);
amdgpu_amdkfd_device_init(adev);
if (amdgpu_device_is_px(dev)) { if (amdgpu_device_is_px(dev)) {
pm_runtime_use_autosuspend(dev->dev); pm_runtime_use_autosuspend(dev->dev);
pm_runtime_set_autosuspend_delay(dev->dev, 5000); pm_runtime_set_autosuspend_delay(dev->dev, 5000);
......
...@@ -64,6 +64,8 @@ ...@@ -64,6 +64,8 @@
#include "oss/oss_2_0_d.h" #include "oss/oss_2_0_d.h"
#include "oss/oss_2_0_sh_mask.h" #include "oss/oss_2_0_sh_mask.h"
#include "amdgpu_amdkfd.h"
/* /*
* Indirect registers accessor * Indirect registers accessor
*/ */
...@@ -2448,14 +2450,21 @@ static int cik_common_suspend(void *handle) ...@@ -2448,14 +2450,21 @@ static int cik_common_suspend(void *handle)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_amdkfd_suspend(adev);
return cik_common_hw_fini(adev); return cik_common_hw_fini(adev);
} }
static int cik_common_resume(void *handle) static int cik_common_resume(void *handle)
{ {
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
return cik_common_hw_init(adev); r = cik_common_hw_init(adev);
if (r)
return r;
return amdgpu_amdkfd_resume(adev);
} }
static bool cik_common_is_idle(void *handle) static bool cik_common_is_idle(void *handle)
......
...@@ -552,6 +552,12 @@ ...@@ -552,6 +552,12 @@
#define VCE_CMD_IB_AUTO 0x00000005 #define VCE_CMD_IB_AUTO 0x00000005
#define VCE_CMD_SEMAPHORE 0x00000006 #define VCE_CMD_SEMAPHORE 0x00000006
/* if PTR32, these are the bases for scratch and lds */
#define PRIVATE_BASE(x) ((x) << 0) /* scratch */
#define SHARED_BASE(x) ((x) << 16) /* LDS */
#define KFD_CIK_SDMA_QUEUE_OFFSET 0x200
/* valid for both DEFAULT_MTYPE and APE1_MTYPE */ /* valid for both DEFAULT_MTYPE and APE1_MTYPE */
enum { enum {
MTYPE_CACHED = 0, MTYPE_CACHED = 0,
......
...@@ -66,6 +66,11 @@ ...@@ -66,6 +66,11 @@
#define AMDGPU_NUM_OF_VMIDS 8 #define AMDGPU_NUM_OF_VMIDS 8
#define PIPEID(x) ((x) << 0)
#define MEID(x) ((x) << 2)
#define VMID(x) ((x) << 4)
#define QUEUEID(x) ((x) << 8)
#define RB_BITMAP_WIDTH_PER_SH 2 #define RB_BITMAP_WIDTH_PER_SH 2
#define MC_SEQ_MISC0__MT__MASK 0xf0000000 #define MC_SEQ_MISC0__MT__MASK 0xf0000000
......
...@@ -4,6 +4,6 @@ ...@@ -4,6 +4,6 @@
config HSA_AMD config HSA_AMD
tristate "HSA kernel driver for AMD GPU devices" tristate "HSA kernel driver for AMD GPU devices"
depends on DRM_RADEON && AMD_IOMMU_V2 && X86_64 depends on (DRM_RADEON || DRM_AMDGPU) && AMD_IOMMU_V2 && X86_64
help help
Enable this if you want to use HSA features on AMD GPU devices. Enable this if you want to use HSA features on AMD GPU devices.
...@@ -2,7 +2,8 @@ ...@@ -2,7 +2,8 @@
# Makefile for Heterogenous System Architecture support for AMD GPU devices # Makefile for Heterogenous System Architecture support for AMD GPU devices
# #
ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/ ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/ \
-Idrivers/gpu/drm/amd/include/asic_reg
amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \ kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
......
...@@ -65,17 +65,6 @@ ...@@ -65,17 +65,6 @@
#define AQL_ENABLE 1 #define AQL_ENABLE 1
#define SDMA_RB_VMID(x) (x << 24)
#define SDMA_RB_ENABLE (1 << 0)
#define SDMA_RB_SIZE(x) ((x) << 1) /* log2 */
#define SDMA_RPTR_WRITEBACK_ENABLE (1 << 12)
#define SDMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
#define SDMA_OFFSET(x) (x << 0)
#define SDMA_DB_ENABLE (1 << 28)
#define SDMA_ATC (1 << 0)
#define SDMA_VA_PTR32 (1 << 4)
#define SDMA_VA_SHARED_BASE(x) (x << 8)
#define GRBM_GFX_INDEX 0x30800 #define GRBM_GFX_INDEX 0x30800
#define ATC_VMID_PASID_MAPPING_VALID (1U << 31) #define ATC_VMID_PASID_MAPPING_VALID (1U << 31)
......
...@@ -80,7 +80,12 @@ static const struct kfd_deviceid supported_devices[] = { ...@@ -80,7 +80,12 @@ static const struct kfd_deviceid supported_devices[] = {
{ 0x1318, &kaveri_device_info }, /* Kaveri */ { 0x1318, &kaveri_device_info }, /* Kaveri */
{ 0x131B, &kaveri_device_info }, /* Kaveri */ { 0x131B, &kaveri_device_info }, /* Kaveri */
{ 0x131C, &kaveri_device_info }, /* Kaveri */ { 0x131C, &kaveri_device_info }, /* Kaveri */
{ 0x131D, &kaveri_device_info } /* Kaveri */ { 0x131D, &kaveri_device_info }, /* Kaveri */
{ 0x9870, &carrizo_device_info }, /* Carrizo */
{ 0x9874, &carrizo_device_info }, /* Carrizo */
{ 0x9875, &carrizo_device_info }, /* Carrizo */
{ 0x9876, &carrizo_device_info }, /* Carrizo */
{ 0x9877, &carrizo_device_info } /* Carrizo */
}; };
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "kfd_device_queue_manager.h" #include "kfd_device_queue_manager.h"
#include "cik_regs.h" #include "cik_regs.h"
#include "oss/oss_2_4_sh_mask.h"
static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd, struct qcm_process_device *qpd,
...@@ -135,13 +136,16 @@ static int register_process_cik(struct device_queue_manager *dqm, ...@@ -135,13 +136,16 @@ static int register_process_cik(struct device_queue_manager *dqm,
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd) struct qcm_process_device *qpd)
{ {
uint32_t value = SDMA_ATC; uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT);
if (q->process->is_32bit_user_mode) if (q->process->is_32bit_user_mode)
value |= SDMA_VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd)); value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) |
get_sh_mem_bases_32(qpd_to_pdd(qpd));
else else
value |= SDMA_VA_SHARED_BASE(get_sh_mem_bases_nybble_64( value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
qpd_to_pdd(qpd))); SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &&
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
q->properties.sdma_vm_addr = value; q->properties.sdma_vm_addr = value;
} }
......
...@@ -22,6 +22,10 @@ ...@@ -22,6 +22,10 @@
*/ */
#include "kfd_device_queue_manager.h" #include "kfd_device_queue_manager.h"
#include "gca/gfx_8_0_enum.h"
#include "gca/gfx_8_0_sh_mask.h"
#include "gca/gfx_8_0_enum.h"
#include "oss/oss_3_0_sh_mask.h"
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd, struct qcm_process_device *qpd,
...@@ -37,14 +41,40 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, ...@@ -37,14 +41,40 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops) void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops)
{ {
pr_warn("amdkfd: VI DQM is not currently supported\n");
ops->set_cache_memory_policy = set_cache_memory_policy_vi; ops->set_cache_memory_policy = set_cache_memory_policy_vi;
ops->register_process = register_process_vi; ops->register_process = register_process_vi;
ops->initialize = initialize_cpsch_vi; ops->initialize = initialize_cpsch_vi;
ops->init_sdma_vm = init_sdma_vm; ops->init_sdma_vm = init_sdma_vm;
} }
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
{
/* In 64-bit mode, we can only control the top 3 bits of the LDS,
* scratch and GPUVM apertures.
* The hardware fills in the remaining 59 bits according to the
* following pattern:
* LDS: X0000000'00000000 - X0000001'00000000 (4GB)
* Scratch: X0000001'00000000 - X0000002'00000000 (4GB)
* GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB)
*
* (where X/Y is the configurable nybble with the low-bit 0)
*
* LDS and scratch will have the same top nybble programmed in the
* top 3 bits of SH_MEM_BASES.PRIVATE_BASE.
* GPUVM can have a different top nybble programmed in the
* top 3 bits of SH_MEM_BASES.SHARED_BASE.
* We don't bother to support different top nybbles
* for LDS/Scratch and GPUVM.
*/
BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
top_address_nybble == 0);
return top_address_nybble << 12 |
(top_address_nybble << 12) <<
SH_MEM_BASES__SHARED_BASE__SHIFT;
}
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd, struct qcm_process_device *qpd,
enum cache_policy default_policy, enum cache_policy default_policy,
...@@ -52,18 +82,83 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, ...@@ -52,18 +82,83 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
void __user *alternate_aperture_base, void __user *alternate_aperture_base,
uint64_t alternate_aperture_size) uint64_t alternate_aperture_size)
{ {
return false; uint32_t default_mtype;
uint32_t ape1_mtype;
default_mtype = (default_policy == cache_policy_coherent) ?
MTYPE_CC :
MTYPE_NC;
ape1_mtype = (alternate_policy == cache_policy_coherent) ?
MTYPE_CC :
MTYPE_NC;
qpd->sh_mem_config = (qpd->sh_mem_config &
SH_MEM_CONFIG__ADDRESS_MODE_MASK) |
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT |
SH_MEM_CONFIG__PRIVATE_ATC_MASK;
return true;
} }
static int register_process_vi(struct device_queue_manager *dqm, static int register_process_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd) struct qcm_process_device *qpd)
{ {
return -1; struct kfd_process_device *pdd;
unsigned int temp;
BUG_ON(!dqm || !qpd);
pdd = qpd_to_pdd(qpd);
/* check if sh_mem_config register already configured */
if (qpd->sh_mem_config == 0) {
qpd->sh_mem_config =
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
MTYPE_CC << SH_MEM_CONFIG__APE1_MTYPE__SHIFT |
SH_MEM_CONFIG__PRIVATE_ATC_MASK;
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0;
}
if (qpd->pqm->process->is_32bit_user_mode) {
temp = get_sh_mem_bases_32(pdd);
qpd->sh_mem_bases = temp << SH_MEM_BASES__SHARED_BASE__SHIFT;
qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA32 <<
SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
} else {
temp = get_sh_mem_bases_nybble_64(pdd);
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 <<
SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
}
pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
return 0;
} }
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd) struct qcm_process_device *qpd)
{ {
uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT);
if (q->process->is_32bit_user_mode)
value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) |
get_sh_mem_bases_32(qpd_to_pdd(qpd));
else
value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &&
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
q->properties.sdma_vm_addr = value;
} }
static int initialize_cpsch_vi(struct device_queue_manager *dqm) static int initialize_cpsch_vi(struct device_queue_manager *dqm)
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "kfd_mqd_manager.h" #include "kfd_mqd_manager.h"
#include "cik_regs.h" #include "cik_regs.h"
#include "cik_structs.h" #include "cik_structs.h"
#include "oss/oss_2_4_sh_mask.h"
static inline struct cik_mqd *get_mqd(void *mqd) static inline struct cik_mqd *get_mqd(void *mqd)
{ {
...@@ -214,17 +215,20 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, ...@@ -214,17 +215,20 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
BUG_ON(!mm || !mqd || !q); BUG_ON(!mm || !mqd || !q);
m = get_sdma_mqd(mqd); m = get_sdma_mqd(mqd);
m->sdma_rlc_rb_cntl = m->sdma_rlc_rb_cntl = ffs(q->queue_size / sizeof(unsigned int)) <<
SDMA_RB_SIZE((ffs(q->queue_size / sizeof(unsigned int)))) | SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
SDMA_RB_VMID(q->vmid) | q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
SDMA_RPTR_WRITEBACK_ENABLE | 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
SDMA_RPTR_WRITEBACK_TIMER(6); 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
m->sdma_rlc_rb_base = lower_32_bits(q->queue_address >> 8); m->sdma_rlc_rb_base = lower_32_bits(q->queue_address >> 8);
m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8); m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8);
m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
m->sdma_rlc_doorbell = SDMA_OFFSET(q->doorbell_off) | SDMA_DB_ENABLE; m->sdma_rlc_doorbell = q->doorbell_off <<
SDMA0_RLC0_DOORBELL__OFFSET__SHIFT |
1 << SDMA0_RLC0_DOORBELL__ENABLE__SHIFT;
m->sdma_rlc_virtual_addr = q->sdma_vm_addr; m->sdma_rlc_virtual_addr = q->sdma_vm_addr;
m->sdma_engine_id = q->sdma_engine_id; m->sdma_engine_id = q->sdma_engine_id;
...@@ -234,7 +238,9 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, ...@@ -234,7 +238,9 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
if (q->queue_size > 0 && if (q->queue_size > 0 &&
q->queue_address != 0 && q->queue_address != 0 &&
q->queue_percent > 0) { q->queue_percent > 0) {
m->sdma_rlc_rb_cntl |= SDMA_RB_ENABLE; m->sdma_rlc_rb_cntl |=
1 << SDMA0_RLC0_RB_CNTL__RB_ENABLE__SHIFT;
q->is_active = true; q->is_active = true;
} }
......
...@@ -22,12 +22,255 @@ ...@@ -22,12 +22,255 @@
*/ */
#include <linux/printk.h> #include <linux/printk.h>
#include <linux/slab.h>
#include "kfd_priv.h" #include "kfd_priv.h"
#include "kfd_mqd_manager.h" #include "kfd_mqd_manager.h"
#include "vi_structs.h"
#include "gca/gfx_8_0_sh_mask.h"
#include "gca/gfx_8_0_enum.h"
#define CP_MQD_CONTROL__PRIV_STATE__SHIFT 0x8
static inline struct vi_mqd *get_mqd(void *mqd)
{
return (struct vi_mqd *)mqd;
}
static int init_mqd(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
{
int retval;
uint64_t addr;
struct vi_mqd *m;
retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct vi_mqd),
mqd_mem_obj);
if (retval != 0)
return -ENOMEM;
m = (struct vi_mqd *) (*mqd_mem_obj)->cpu_ptr;
addr = (*mqd_mem_obj)->gpu_addr;
memset(m, 0, sizeof(struct vi_mqd));
m->header = 0xC0310800;
m->compute_pipelinestat_enable = 1;
m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT |
MTYPE_UC << CP_MQD_CONTROL__MTYPE__SHIFT;
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
m->cp_mqd_base_addr_hi = upper_32_bits(addr);
m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
m->cp_hqd_pipe_priority = 1;
m->cp_hqd_queue_priority = 15;
m->cp_hqd_eop_rptr = 1 << CP_HQD_EOP_RPTR__INIT_FETCHER__SHIFT;
if (q->format == KFD_QUEUE_FORMAT_AQL)
m->cp_hqd_iq_rptr = 1;
*mqd = m;
if (gart_addr != NULL)
*gart_addr = addr;
retval = mm->update_mqd(mm, m, q);
return retval;
}
static int load_mqd(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr)
{
return mm->dev->kfd2kgd->hqd_load
(mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
}
static int __update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q, unsigned int mtype,
unsigned int atc_bit)
{
struct vi_mqd *m;
BUG_ON(!mm || !q || !mqd);
pr_debug("kfd: In func %s\n", __func__);
m = get_mqd(mqd);
m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT |
atc_bit << CP_HQD_PQ_CONTROL__PQ_ATC__SHIFT |
mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT;
m->cp_hqd_pq_control |=
ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
pr_debug("kfd: cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
m->cp_hqd_pq_doorbell_control =
1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN__SHIFT |
q->doorbell_off <<
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
pr_debug("kfd: cp_hqd_pq_doorbell_control 0x%x\n",
m->cp_hqd_pq_doorbell_control);
m->cp_hqd_eop_control = atc_bit << CP_HQD_EOP_CONTROL__EOP_ATC__SHIFT |
mtype << CP_HQD_EOP_CONTROL__MTYPE__SHIFT;
m->cp_hqd_ib_control = atc_bit << CP_HQD_IB_CONTROL__IB_ATC__SHIFT |
3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT |
mtype << CP_HQD_IB_CONTROL__MTYPE__SHIFT;
m->cp_hqd_eop_control |=
ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1;
m->cp_hqd_eop_base_addr_lo =
lower_32_bits(q->eop_ring_buffer_address >> 8);
m->cp_hqd_eop_base_addr_hi =
upper_32_bits(q->eop_ring_buffer_address >> 8);
m->cp_hqd_iq_timer = atc_bit << CP_HQD_IQ_TIMER__IQ_ATC__SHIFT |
mtype << CP_HQD_IQ_TIMER__MTYPE__SHIFT;
m->cp_hqd_vmid = q->vmid;
if (q->format == KFD_QUEUE_FORMAT_AQL) {
m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
}
m->cp_hqd_active = 0;
q->is_active = false;
if (q->queue_size > 0 &&
q->queue_address != 0 &&
q->queue_percent > 0) {
m->cp_hqd_active = 1;
q->is_active = true;
}
return 0;
}
static int update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
{
return __update_mqd(mm, mqd, q, MTYPE_CC, 1);
}
static int destroy_mqd(struct mqd_manager *mm, void *mqd,
enum kfd_preempt_type type,
unsigned int timeout, uint32_t pipe_id,
uint32_t queue_id)
{
return mm->dev->kfd2kgd->hqd_destroy
(mm->dev->kgd, type, timeout,
pipe_id, queue_id);
}
static void uninit_mqd(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
{
BUG_ON(!mm || !mqd);
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
}
static bool is_occupied(struct mqd_manager *mm, void *mqd,
uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id)
{
return mm->dev->kfd2kgd->hqd_is_occupied(
mm->dev->kgd, queue_address,
pipe_id, queue_id);
}
static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
{
struct vi_mqd *m;
int retval = init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
if (retval != 0)
return retval;
m = get_mqd(*mqd);
m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
return retval;
}
static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
{
struct vi_mqd *m;
int retval = __update_mqd(mm, mqd, q, MTYPE_UC, 0);
if (retval != 0)
return retval;
m = get_mqd(mqd);
m->cp_hqd_vmid = q->vmid;
return retval;
}
struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
struct kfd_dev *dev) struct kfd_dev *dev)
{ {
pr_warn("amdkfd: VI MQD is not currently supported\n"); struct mqd_manager *mqd;
return NULL;
BUG_ON(!dev);
BUG_ON(type >= KFD_MQD_TYPE_MAX);
pr_debug("kfd: In func %s\n", __func__);
mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL);
if (!mqd)
return NULL;
mqd->dev = dev;
switch (type) {
case KFD_MQD_TYPE_CP:
case KFD_MQD_TYPE_COMPUTE:
mqd->init_mqd = init_mqd;
mqd->uninit_mqd = uninit_mqd;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
break;
case KFD_MQD_TYPE_HIQ:
mqd->init_mqd = init_mqd_hiq;
mqd->uninit_mqd = uninit_mqd;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
break;
case KFD_MQD_TYPE_SDMA:
break;
default:
kfree(mqd);
return NULL;
}
return mqd;
} }
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "kfd_kernel_queue.h" #include "kfd_kernel_queue.h"
#include "kfd_priv.h" #include "kfd_priv.h"
#include "kfd_pm4_headers.h" #include "kfd_pm4_headers.h"
#include "kfd_pm4_headers_vi.h"
#include "kfd_pm4_opcodes.h" #include "kfd_pm4_opcodes.h"
static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
...@@ -55,6 +56,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm, ...@@ -55,6 +56,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
bool *over_subscription) bool *over_subscription)
{ {
unsigned int process_count, queue_count; unsigned int process_count, queue_count;
unsigned int map_queue_size;
BUG_ON(!pm || !rlib_size || !over_subscription); BUG_ON(!pm || !rlib_size || !over_subscription);
...@@ -69,9 +71,13 @@ static void pm_calc_rlib_size(struct packet_manager *pm, ...@@ -69,9 +71,13 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
pr_debug("kfd: over subscribed runlist\n"); pr_debug("kfd: over subscribed runlist\n");
} }
map_queue_size =
(pm->dqm->dev->device_info->asic_family == CHIP_CARRIZO) ?
sizeof(struct pm4_mes_map_queues) :
sizeof(struct pm4_map_queues);
/* calculate run list ib allocation size */ /* calculate run list ib allocation size */
*rlib_size = process_count * sizeof(struct pm4_map_process) + *rlib_size = process_count * sizeof(struct pm4_map_process) +
queue_count * sizeof(struct pm4_map_queues); queue_count * map_queue_size;
/* /*
* Increase the allocation size in case we need a chained run list * Increase the allocation size in case we need a chained run list
...@@ -176,6 +182,71 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, ...@@ -176,6 +182,71 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
return 0; return 0;
} }
static int pm_create_map_queue_vi(struct packet_manager *pm, uint32_t *buffer,
struct queue *q, bool is_static)
{
struct pm4_mes_map_queues *packet;
bool use_static = is_static;
BUG_ON(!pm || !buffer || !q);
pr_debug("kfd: In func %s\n", __func__);
packet = (struct pm4_mes_map_queues *)buffer;
memset(buffer, 0, sizeof(struct pm4_map_queues));
packet->header.u32all = build_pm4_header(IT_MAP_QUEUES,
sizeof(struct pm4_map_queues));
packet->bitfields2.alloc_format =
alloc_format__mes_map_queues__one_per_pipe_vi;
packet->bitfields2.num_queues = 1;
packet->bitfields2.queue_sel =
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
packet->bitfields2.engine_sel =
engine_sel__mes_map_queues__compute_vi;
packet->bitfields2.queue_type =
queue_type__mes_map_queues__normal_compute_vi;
switch (q->properties.type) {
case KFD_QUEUE_TYPE_COMPUTE:
if (use_static)
packet->bitfields2.queue_type =
queue_type__mes_map_queues__normal_latency_static_queue_vi;
break;
case KFD_QUEUE_TYPE_DIQ:
packet->bitfields2.queue_type =
queue_type__mes_map_queues__debug_interface_queue_vi;
break;
case KFD_QUEUE_TYPE_SDMA:
packet->bitfields2.engine_sel =
engine_sel__mes_map_queues__sdma0_vi;
use_static = false; /* no static queues under SDMA */
break;
default:
pr_err("kfd: in %s queue type %d\n", __func__,
q->properties.type);
BUG();
break;
}
packet->bitfields3.doorbell_offset =
q->properties.doorbell_off;
packet->mqd_addr_lo =
lower_32_bits(q->gart_mqd_addr);
packet->mqd_addr_hi =
upper_32_bits(q->gart_mqd_addr);
packet->wptr_addr_lo =
lower_32_bits((uint64_t)q->properties.write_ptr);
packet->wptr_addr_hi =
upper_32_bits((uint64_t)q->properties.write_ptr);
return 0;
}
static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
struct queue *q, bool is_static) struct queue *q, bool is_static)
{ {
...@@ -292,8 +363,17 @@ static int pm_create_runlist_ib(struct packet_manager *pm, ...@@ -292,8 +363,17 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n", pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n",
kq->queue->queue, qpd->is_debug); kq->queue->queue, qpd->is_debug);
retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], if (pm->dqm->dev->device_info->asic_family ==
kq->queue, qpd->is_debug); CHIP_CARRIZO)
retval = pm_create_map_queue_vi(pm,
&rl_buffer[rl_wptr],
kq->queue,
qpd->is_debug);
else
retval = pm_create_map_queue(pm,
&rl_buffer[rl_wptr],
kq->queue,
qpd->is_debug);
if (retval != 0) if (retval != 0)
return retval; return retval;
...@@ -309,8 +389,17 @@ static int pm_create_runlist_ib(struct packet_manager *pm, ...@@ -309,8 +389,17 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n", pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n",
q->queue, qpd->is_debug); q->queue, qpd->is_debug);
retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], if (pm->dqm->dev->device_info->asic_family ==
q, qpd->is_debug); CHIP_CARRIZO)
retval = pm_create_map_queue_vi(pm,
&rl_buffer[rl_wptr],
q,
qpd->is_debug);
else
retval = pm_create_map_queue(pm,
&rl_buffer[rl_wptr],
q,
qpd->is_debug);
if (retval != 0) if (retval != 0)
return retval; return retval;
......
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef F32_MES_PM4_PACKETS_H
#define F32_MES_PM4_PACKETS_H
#ifndef PM4_MES_HEADER_DEFINED
#define PM4_MES_HEADER_DEFINED
union PM4_MES_TYPE_3_HEADER {
struct {
uint32_t reserved1 : 8; /* < reserved */
uint32_t opcode : 8; /* < IT opcode */
uint32_t count : 14;/* < number of DWORDs - 1 in the
information body. */
uint32_t type : 2; /* < packet identifier.
It should be 3 for type 3 packets */
};
uint32_t u32All;
};
#endif /* PM4_MES_HEADER_DEFINED */
/*--------------------MES_SET_RESOURCES--------------------*/
#ifndef PM4_MES_SET_RESOURCES_DEFINED
#define PM4_MES_SET_RESOURCES_DEFINED
enum mes_set_resources_queue_type_enum {
queue_type__mes_set_resources__kernel_interface_queue_kiq = 0,
queue_type__mes_set_resources__hsa_interface_queue_hiq = 1,
queue_type__mes_set_resources__hsa_debug_interface_queue = 4
};
struct pm4_mes_set_resources {
union {
union PM4_MES_TYPE_3_HEADER header; /* header */
uint32_t ordinal1;
};
union {
struct {
uint32_t vmid_mask:16;
uint32_t unmap_latency:8;
uint32_t reserved1:5;
enum mes_set_resources_queue_type_enum queue_type:3;
} bitfields2;
uint32_t ordinal2;
};
uint32_t queue_mask_lo;
uint32_t queue_mask_hi;
uint32_t gws_mask_lo;
uint32_t gws_mask_hi;
union {
struct {
uint32_t oac_mask:16;
uint32_t reserved2:16;
} bitfields7;
uint32_t ordinal7;
};
union {
struct {
uint32_t gds_heap_base:6;
uint32_t reserved3:5;
uint32_t gds_heap_size:6;
uint32_t reserved4:15;
} bitfields8;
uint32_t ordinal8;
};
};
#endif
/*--------------------MES_RUN_LIST--------------------*/
#ifndef PM4_MES_RUN_LIST_DEFINED
#define PM4_MES_RUN_LIST_DEFINED
struct pm4_mes_runlist {
union {
union PM4_MES_TYPE_3_HEADER header; /* header */
uint32_t ordinal1;
};
union {
struct {
uint32_t reserved1:2;
uint32_t ib_base_lo:30;
} bitfields2;
uint32_t ordinal2;
};
union {
struct {
uint32_t ib_base_hi:16;
uint32_t reserved2:16;
} bitfields3;
uint32_t ordinal3;
};
union {
struct {
uint32_t ib_size:20;
uint32_t chain:1;
uint32_t offload_polling:1;
uint32_t reserved3:1;
uint32_t valid:1;
uint32_t reserved4:8;
} bitfields4;
uint32_t ordinal4;
};
};
#endif
/*--------------------MES_MAP_PROCESS--------------------*/
#ifndef PM4_MES_MAP_PROCESS_DEFINED
#define PM4_MES_MAP_PROCESS_DEFINED
struct pm4_mes_map_process {
union {
union PM4_MES_TYPE_3_HEADER header; /* header */
uint32_t ordinal1;
};
union {
struct {
uint32_t pasid:16;
uint32_t reserved1:8;
uint32_t diq_enable:1;
uint32_t process_quantum:7;
} bitfields2;
uint32_t ordinal2;
};
union {
struct {
uint32_t page_table_base:28;
uint32_t reserved2:4;
} bitfields3;
uint32_t ordinal3;
};
uint32_t sh_mem_bases;
uint32_t sh_mem_ape1_base;
uint32_t sh_mem_ape1_limit;
uint32_t sh_mem_config;
uint32_t gds_addr_lo;
uint32_t gds_addr_hi;
union {
struct {
uint32_t num_gws:6;
uint32_t reserved3:2;
uint32_t num_oac:4;
uint32_t reserved4:4;
uint32_t gds_size:6;
uint32_t num_queues:10;
} bitfields10;
uint32_t ordinal10;
};
};
#endif
/*--------------------MES_MAP_QUEUES--------------------*/
#ifndef PM4_MES_MAP_QUEUES_VI_DEFINED
#define PM4_MES_MAP_QUEUES_VI_DEFINED
enum mes_map_queues_queue_sel_vi_enum {
queue_sel__mes_map_queues__map_to_specified_queue_slots_vi = 0,
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi = 1
};
enum mes_map_queues_queue_type_vi_enum {
queue_type__mes_map_queues__normal_compute_vi = 0,
queue_type__mes_map_queues__debug_interface_queue_vi = 1,
queue_type__mes_map_queues__normal_latency_static_queue_vi = 2,
queue_type__mes_map_queues__low_latency_static_queue_vi = 3
};
enum mes_map_queues_alloc_format_vi_enum {
alloc_format__mes_map_queues__one_per_pipe_vi = 0,
alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
};
enum mes_map_queues_engine_sel_vi_enum {
engine_sel__mes_map_queues__compute_vi = 0,
engine_sel__mes_map_queues__sdma0_vi = 2,
engine_sel__mes_map_queues__sdma1_vi = 3
};
struct pm4_mes_map_queues {
union {
union PM4_MES_TYPE_3_HEADER header; /* header */
uint32_t ordinal1;
};
union {
struct {
uint32_t reserved1:4;
enum mes_map_queues_queue_sel_vi_enum queue_sel:2;
uint32_t reserved2:15;
enum mes_map_queues_queue_type_vi_enum queue_type:3;
enum mes_map_queues_alloc_format_vi_enum alloc_format:2;
enum mes_map_queues_engine_sel_vi_enum engine_sel:3;
uint32_t num_queues:3;
} bitfields2;
uint32_t ordinal2;
};
union {
struct {
uint32_t reserved3:1;
uint32_t check_disable:1;
uint32_t doorbell_offset:21;
uint32_t reserved4:3;
uint32_t queue:6;
} bitfields3;
uint32_t ordinal3;
};
uint32_t mqd_addr_lo;
uint32_t mqd_addr_hi;
uint32_t wptr_addr_lo;
uint32_t wptr_addr_hi;
};
#endif
/*--------------------MES_QUERY_STATUS--------------------*/
#ifndef PM4_MES_QUERY_STATUS_DEFINED
#define PM4_MES_QUERY_STATUS_DEFINED
enum mes_query_status_interrupt_sel_enum {
interrupt_sel__mes_query_status__completion_status = 0,
interrupt_sel__mes_query_status__process_status = 1,
interrupt_sel__mes_query_status__queue_status = 2
};
enum mes_query_status_command_enum {
command__mes_query_status__interrupt_only = 0,
command__mes_query_status__fence_only_immediate = 1,
command__mes_query_status__fence_only_after_write_ack = 2,
command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3
};
enum mes_query_status_engine_sel_enum {
engine_sel__mes_query_status__compute = 0,
engine_sel__mes_query_status__sdma0_queue = 2,
engine_sel__mes_query_status__sdma1_queue = 3
};
struct pm4_mes_query_status {
union {
union PM4_MES_TYPE_3_HEADER header; /* header */
uint32_t ordinal1;
};
union {
struct {
uint32_t context_id:28;
enum mes_query_status_interrupt_sel_enum
interrupt_sel:2;
enum mes_query_status_command_enum command:2;
} bitfields2;
uint32_t ordinal2;
};
union {
struct {
uint32_t pasid:16;
uint32_t reserved1:16;
} bitfields3a;
struct {
uint32_t reserved2:2;
uint32_t doorbell_offset:21;
uint32_t reserved3:2;
enum mes_query_status_engine_sel_enum engine_sel:3;
uint32_t reserved4:4;
} bitfields3b;
uint32_t ordinal3;
};
uint32_t addr_lo;
uint32_t addr_hi;
uint32_t data_lo;
uint32_t data_hi;
};
#endif
/*--------------------MES_UNMAP_QUEUES--------------------*/
#ifndef PM4_MES_UNMAP_QUEUES_DEFINED
#define PM4_MES_UNMAP_QUEUES_DEFINED
enum mes_unmap_queues_action_enum {
action__mes_unmap_queues__preempt_queues = 0,
action__mes_unmap_queues__reset_queues = 1,
action__mes_unmap_queues__disable_process_queues = 2,
action__mes_unmap_queues__reserved = 3
};
enum mes_unmap_queues_queue_sel_enum {
queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
queue_sel__mes_unmap_queues__unmap_all_queues = 2,
queue_sel__mes_unmap_queues__unmap_all_non_static_queues = 3
};
enum mes_unmap_queues_engine_sel_enum {
engine_sel__mes_unmap_queues__compute = 0,
engine_sel__mes_unmap_queues__sdma0 = 2,
engine_sel__mes_unmap_queues__sdmal = 3
};
struct PM4_MES_UNMAP_QUEUES {
union {
union PM4_MES_TYPE_3_HEADER header; /* header */
uint32_t ordinal1;
};
union {
struct {
enum mes_unmap_queues_action_enum action:2;
uint32_t reserved1:2;
enum mes_unmap_queues_queue_sel_enum queue_sel:2;
uint32_t reserved2:20;
enum mes_unmap_queues_engine_sel_enum engine_sel:3;
uint32_t num_queues:3;
} bitfields2;
uint32_t ordinal2;
};
union {
struct {
uint32_t pasid:16;
uint32_t reserved3:16;
} bitfields3a;
struct {
uint32_t reserved4:2;
uint32_t doorbell_offset0:21;
uint32_t reserved5:9;
} bitfields3b;
uint32_t ordinal3;
};
union {
struct {
uint32_t reserved6:2;
uint32_t doorbell_offset1:21;
uint32_t reserved7:9;
} bitfields4;
uint32_t ordinal4;
};
union {
struct {
uint32_t reserved8:2;
uint32_t doorbell_offset2:21;
uint32_t reserved9:9;
} bitfields5;
uint32_t ordinal5;
};
union {
struct {
uint32_t reserved10:2;
uint32_t doorbell_offset3:21;
uint32_t reserved11:9;
} bitfields6;
uint32_t ordinal6;
};
};
#endif
#endif
...@@ -1186,6 +1186,11 @@ int kfd_topology_add_device(struct kfd_dev *gpu) ...@@ -1186,6 +1186,11 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
* TODO: Retrieve max engine clock values from KGD * TODO: Retrieve max engine clock values from KGD
*/ */
if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
dev->node_props.capability |= HSA_CAP_DOORBELL_PACKET_TYPE;
pr_info("amdkfd: adding doorbell packet type capability\n");
}
res = 0; res = 0;
err: err:
......
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00 #define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00
#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8 #define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8
#define HSA_CAP_RESERVED 0xfffff000 #define HSA_CAP_RESERVED 0xfffff000
#define HSA_CAP_DOORBELL_PACKET_TYPE 0x00001000
struct kfd_node_properties { struct kfd_node_properties {
uint32_t cpu_cores_count; uint32_t cpu_cores_count;
......
...@@ -52,7 +52,8 @@ enum kgd_engine_type { ...@@ -52,7 +52,8 @@ enum kgd_engine_type {
KGD_ENGINE_MEC1, KGD_ENGINE_MEC1,
KGD_ENGINE_MEC2, KGD_ENGINE_MEC2,
KGD_ENGINE_RLC, KGD_ENGINE_RLC,
KGD_ENGINE_SDMA, KGD_ENGINE_SDMA1,
KGD_ENGINE_SDMA2,
KGD_ENGINE_MAX KGD_ENGINE_MAX
}; };
......
This diff is collapsed.
...@@ -845,7 +845,8 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) ...@@ -845,7 +845,8 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
hdr = (const union radeon_firmware_header *) rdev->rlc_fw->data; hdr = (const union radeon_firmware_header *) rdev->rlc_fw->data;
break; break;
case KGD_ENGINE_SDMA: case KGD_ENGINE_SDMA1:
case KGD_ENGINE_SDMA2:
hdr = (const union radeon_firmware_header *) hdr = (const union radeon_firmware_header *)
rdev->sdma_fw->data; rdev->sdma_fw->data;
break; break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment