Commit ca1130de authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-amdkfd-next-2015-01-21' of git://people.freedesktop.org/~gabbayo/linux into drm-next

- Infrastructure work in amdkfd to prepare for VI support. This work mainly
  includes separating modules into ASIC-specific functionality, adding
  new properties that are relevant for VI, making sure that shared code is
  reused, etc.

- Improve mechanism of submitting packets to HIQ (the kernel queue that amdkfd
  uses to issue commands to the GPU). The driver used to verify that each CS
  was read by the GPU. However, this proved to be both unnecessary and erroneous.
  Therefore, we cancelled this verification.

- Moved initialization of compute VMIDs into radeon driver

- Various minor fixes

* tag 'drm-amdkfd-next-2015-01-21' of git://people.freedesktop.org/~gabbayo/linux: (22 commits)
  drm/amdkfd: Fix description of sched_policy module parameter
  drm/amdkfd: Remove sync_with_hw() from amdkfd
  drm/amdkfd: Remove unused function busy_wait()
  drm/amdkfd: Replace cpu_relax() with schedule() in DQM
  drm/amdkfd: Fix for-loop when allocating HQD (non-HWS)
  drm/amdkfd: Add initial VI support for KQ
  drm/amdkfd: Encapsulate KQ functions in ops structure
  drm/amdkfd: Add initial VI support for DQM
  drm/amdkfd: Encapsulate DQM functions in ops structure
  drm/amdkfd: Don't BUG on freeing GART sub-allocation
  drm/amdkfd: Fix logic of destroy_queue_nocpsch()
  MAINTAINERS: Update amdkfd files
  drm/amdkfd: Change MQD manager to be H/W specific
  drm/amdkfd: Add asic property to kfd_device_info
  drm/amdkfd: Make KFD_MQD_TYPE enum types H/W agnostic
  drm/amdkfd: Add new VI-specific queue properties
  drm/radeon: Use new cik_structs.h file
  drm/amdkfd: Don't include header files from radeon
  drm/amd: Put cik structures in a common place
  drm/radeon: Don't use relative paths in #include
  ...
parents fc839753 cb2ac441
......@@ -624,6 +624,8 @@ L: dri-devel@lists.freedesktop.org
T: git git://people.freedesktop.org/~gabbayo/linux.git
S: Supported
F: drivers/gpu/drm/amd/amdkfd/
F: drivers/gpu/drm/amd/include/cik_structs.h
F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h
F: drivers/gpu/drm/radeon/radeon_kfd.c
F: drivers/gpu/drm/radeon/radeon_kfd.h
F: include/uapi/linux/kfd_ioctl.h
......
......@@ -7,8 +7,11 @@ ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/
amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
kfd_process.o kfd_queue.o kfd_mqd_manager.o \
kfd_kernel_queue.o kfd_packet_manager.o \
kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \
kfd_kernel_queue.o kfd_kernel_queue_cik.o \
kfd_kernel_queue_vi.o kfd_packet_manager.o \
kfd_process_queue_manager.o kfd_device_queue_manager.o \
kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
kfd_interrupt.o
obj-$(CONFIG_HSA_AMD) += amdkfd.o
......@@ -168,6 +168,8 @@
#define IB_ATC_EN (1U << 23)
#define DEFAULT_MIN_IB_AVAIL_SIZE (3U << 20)
#define AQL_ENABLE 1
#define CP_HQD_DEQUEUE_REQUEST 0xC974
#define DEQUEUE_REQUEST_DRAIN 1
#define DEQUEUE_REQUEST_RESET 2
......@@ -188,6 +190,17 @@
#define MQD_VMID_MASK (0xf << 0)
#define MQD_CONTROL_PRIV_STATE_EN (1U << 8)
#define SDMA_RB_VMID(x) (x << 24)
#define SDMA_RB_ENABLE (1 << 0)
#define SDMA_RB_SIZE(x) ((x) << 1) /* log2 */
#define SDMA_RPTR_WRITEBACK_ENABLE (1 << 12)
#define SDMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
#define SDMA_OFFSET(x) (x << 0)
#define SDMA_DB_ENABLE (1 << 28)
#define SDMA_ATC (1 << 0)
#define SDMA_VA_PTR32 (1 << 4)
#define SDMA_VA_SHARED_BASE(x) (x << 8)
#define GRBM_GFX_INDEX 0x30800
#define INSTANCE_INDEX(x) ((x) << 0)
#define SH_INDEX(x) ((x) << 8)
......
......@@ -145,6 +145,8 @@ static long kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
static int set_queue_properties_from_user(struct queue_properties *q_properties,
struct kfd_ioctl_create_queue_args *args)
{
void *tmp;
if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
return -EINVAL;
......@@ -182,6 +184,20 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
return -EFAULT;
}
tmp = (void *)(uintptr_t)args->eop_buffer_address;
if (tmp != NULL &&
!access_ok(VERIFY_WRITE, tmp, sizeof(uint32_t))) {
pr_debug("kfd: can't access eop buffer");
return -EFAULT;
}
tmp = (void *)(uintptr_t)args->ctx_save_restore_address;
if (tmp != NULL &&
!access_ok(VERIFY_WRITE, tmp, sizeof(uint32_t))) {
pr_debug("kfd: can't access ctx save restore buffer");
return -EFAULT;
}
q_properties->is_interop = false;
q_properties->queue_percent = args->queue_percentage;
q_properties->priority = args->queue_priority;
......@@ -189,6 +205,11 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
q_properties->queue_size = args->ring_size;
q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
q_properties->eop_ring_buffer_address = args->eop_buffer_address;
q_properties->eop_ring_buffer_size = args->eop_buffer_size;
q_properties->ctx_save_restore_area_address =
args->ctx_save_restore_address;
q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
......@@ -220,6 +241,11 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
pr_debug("Queue Format (%d)\n", q_properties->format);
pr_debug("Queue EOP (0x%llX)\n", q_properties->eop_ring_buffer_address);
pr_debug("Queue CTX save arex (0x%llX)\n",
q_properties->ctx_save_restore_area_address);
return 0;
}
......@@ -244,9 +270,12 @@ static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
if (err)
return err;
pr_debug("kfd: looking for gpu id 0x%x\n", args.gpu_id);
dev = kfd_device_by_id(args.gpu_id);
if (dev == NULL)
if (dev == NULL) {
pr_debug("kfd: gpu id 0x%x was not found\n", args.gpu_id);
return -EINVAL;
}
mutex_lock(&p->mutex);
......@@ -410,7 +439,7 @@ static long kfd_ioctl_set_memory_policy(struct file *filep,
(args.alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
? cache_policy_coherent : cache_policy_noncoherent;
if (!dev->dqm->set_cache_memory_policy(dev->dqm,
if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
&pdd->qpd,
default_policy,
alternate_policy,
......
......@@ -31,6 +31,14 @@
#define MQD_SIZE_ALIGNED 768
static const struct kfd_device_info kaveri_device_info = {
.asic_family = CHIP_KAVERI,
.max_pasid_bits = 16,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.mqd_size_aligned = MQD_SIZE_ALIGNED
};
static const struct kfd_device_info carrizo_device_info = {
.asic_family = CHIP_CARRIZO,
.max_pasid_bits = 16,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.num_of_watch_points = 4,
......@@ -65,7 +73,7 @@ static const struct kfd_deviceid supported_devices[] = {
{ 0x1318, &kaveri_device_info }, /* Kaveri */
{ 0x131B, &kaveri_device_info }, /* Kaveri */
{ 0x131C, &kaveri_device_info }, /* Kaveri */
{ 0x131D, &kaveri_device_info }, /* Kaveri */
{ 0x131D, &kaveri_device_info } /* Kaveri */
};
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
......@@ -245,7 +253,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto device_queue_manager_error;
}
if (kfd->dqm->start(kfd->dqm) != 0) {
if (kfd->dqm->ops.start(kfd->dqm) != 0) {
dev_err(kfd_device,
"Error starting queuen manager for device (%x:%x)\n",
kfd->pdev->vendor, kfd->pdev->device);
......@@ -299,7 +307,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd)
BUG_ON(kfd == NULL);
if (kfd->init_complete) {
kfd->dqm->stop(kfd->dqm);
kfd->dqm->ops.stop(kfd->dqm);
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
amd_iommu_free_device(kfd->pdev);
}
......@@ -320,7 +328,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
return -ENXIO;
amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
iommu_pasid_shutdown_callback);
kfd->dqm->start(kfd->dqm);
kfd->dqm->ops.start(kfd->dqm);
}
return 0;
......@@ -503,7 +511,10 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
unsigned int bit;
BUG_ON(!kfd);
BUG_ON(!mem_obj);
/* Act like kfree when trying to free a NULL object */
if (!mem_obj)
return 0;
pr_debug("kfd: free mem_obj = %p, range_start = %d, range_end = %d\n",
mem_obj, mem_obj->range_start, mem_obj->range_end);
......
......@@ -46,7 +46,7 @@ struct device_process_node {
};
/**
* struct device_queue_manager
* struct device_queue_manager_ops
*
* @create_queue: Queue creation routine.
*
......@@ -81,15 +81,9 @@ struct device_process_node {
* @set_cache_memory_policy: Sets memory policy (cached/ non cached) for the
* memory apertures.
*
* This struct is a base class for the kfd queues scheduler in the
* device level. The device base class should expose the basic operations
* for queue creation and queue destruction. This base class hides the
* scheduling mode of the driver and the specific implementation of the
* concrete device. This class is the only class in the queues scheduler
* that configures the H/W.
*/
struct device_queue_manager {
struct device_queue_manager_ops {
int (*create_queue)(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd,
......@@ -124,7 +118,23 @@ struct device_queue_manager {
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size);
};
/**
* struct device_queue_manager
*
* This struct is a base class for the kfd queues scheduler in the
* device level. The device base class should expose the basic operations
* for queue creation and queue destruction. This base class hides the
* scheduling mode of the driver and the specific implementation of the
* concrete device. This class is the only class in the queues scheduler
* that configures the H/W.
*
*/
struct device_queue_manager {
struct device_queue_manager_ops ops;
struct device_queue_manager_ops ops_asic_specific;
struct mqd_manager *mqds[KFD_MQD_TYPE_MAX];
struct packet_manager packets;
......@@ -146,6 +156,14 @@ struct device_queue_manager {
bool active_runlist;
};
void device_queue_manager_init_cik(struct device_queue_manager_ops *ops);
void device_queue_manager_init_vi(struct device_queue_manager_ops *ops);
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *qpd);
inline unsigned int get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd);
int init_pipelines(struct device_queue_manager *dqm,
unsigned int pipes_num, unsigned int first_pipe);
inline unsigned int get_pipes_num(struct device_queue_manager *dqm);
#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "kfd_device_queue_manager.h"
#include "cik_regs.h"
static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size);
static int register_process_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static int initialize_cpsch_cik(struct device_queue_manager *dqm);
void device_queue_manager_init_cik(struct device_queue_manager_ops *ops)
{
ops->set_cache_memory_policy = set_cache_memory_policy_cik;
ops->register_process = register_process_cik;
ops->initialize = initialize_cpsch_cik;
}
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
{
/* In 64-bit mode, we can only control the top 3 bits of the LDS,
* scratch and GPUVM apertures.
* The hardware fills in the remaining 59 bits according to the
* following pattern:
* LDS: X0000000'00000000 - X0000001'00000000 (4GB)
* Scratch: X0000001'00000000 - X0000002'00000000 (4GB)
* GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB)
*
* (where X/Y is the configurable nybble with the low-bit 0)
*
* LDS and scratch will have the same top nybble programmed in the
* top 3 bits of SH_MEM_BASES.PRIVATE_BASE.
* GPUVM can have a different top nybble programmed in the
* top 3 bits of SH_MEM_BASES.SHARED_BASE.
* We don't bother to support different top nybbles
* for LDS/Scratch and GPUVM.
*/
BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
top_address_nybble == 0);
return PRIVATE_BASE(top_address_nybble << 12) |
SHARED_BASE(top_address_nybble << 12);
}
static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size)
{
uint32_t default_mtype;
uint32_t ape1_mtype;
default_mtype = (default_policy == cache_policy_coherent) ?
MTYPE_NONCACHED :
MTYPE_CACHED;
ape1_mtype = (alternate_policy == cache_policy_coherent) ?
MTYPE_NONCACHED :
MTYPE_CACHED;
qpd->sh_mem_config = (qpd->sh_mem_config & PTR32)
| ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
| DEFAULT_MTYPE(default_mtype)
| APE1_MTYPE(ape1_mtype);
return true;
}
static int register_process_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct kfd_process_device *pdd;
unsigned int temp;
BUG_ON(!dqm || !qpd);
pdd = qpd_to_pdd(qpd);
/* check if sh_mem_config register already configured */
if (qpd->sh_mem_config == 0) {
qpd->sh_mem_config =
ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
DEFAULT_MTYPE(MTYPE_NONCACHED) |
APE1_MTYPE(MTYPE_NONCACHED);
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0;
}
if (qpd->pqm->process->is_32bit_user_mode) {
temp = get_sh_mem_bases_32(pdd);
qpd->sh_mem_bases = SHARED_BASE(temp);
qpd->sh_mem_config |= PTR32;
} else {
temp = get_sh_mem_bases_nybble_64(pdd);
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
}
pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
return 0;
}
static int initialize_cpsch_cik(struct device_queue_manager *dqm)
{
return init_pipelines(dqm, get_pipes_num(dqm), 0);
}
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "kfd_device_queue_manager.h"
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size);
static int register_process_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static int initialize_cpsch_vi(struct device_queue_manager *dqm);
void device_queue_manager_init_vi(struct device_queue_manager_ops *ops)
{
pr_warn("amdkfd: VI DQM is not currently supported\n");
ops->set_cache_memory_policy = set_cache_memory_policy_vi;
ops->register_process = register_process_vi;
ops->initialize = initialize_cpsch_vi;
}
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size)
{
return false;
}
static int register_process_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
return -1;
}
static int initialize_cpsch_vi(struct device_queue_manager *dqm)
{
return 0;
}
......@@ -56,8 +56,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
switch (type) {
case KFD_QUEUE_TYPE_DIQ:
case KFD_QUEUE_TYPE_HIQ:
kq->mqd = dev->dqm->get_mqd_manager(dev->dqm,
KFD_MQD_TYPE_CIK_HIQ);
kq->mqd = dev->dqm->ops.get_mqd_manager(dev->dqm,
KFD_MQD_TYPE_HIQ);
break;
default:
BUG();
......@@ -73,13 +73,16 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
goto err_get_kernel_doorbell;
retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq);
if (retval != 0)
goto err_pq_allocate_vidmem;
kq->pq_kernel_addr = kq->pq->cpu_ptr;
kq->pq_gpu_addr = kq->pq->gpu_addr;
retval = kq->ops_asic_specific.initialize(kq, dev, type, queue_size);
if (retval == false)
goto err_eop_allocate_vidmem;
retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel),
&kq->rptr_mem);
......@@ -111,6 +114,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
prop.queue_address = kq->pq_gpu_addr;
prop.read_ptr = (uint32_t *) kq->rptr_gpu_addr;
prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr;
prop.eop_ring_buffer_address = kq->eop_gpu_addr;
prop.eop_ring_buffer_size = PAGE_SIZE;
if (init_queue(&kq->queue, prop) != 0)
goto err_init_queue;
......@@ -156,6 +161,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
err_wptr_allocate_vidmem:
kfd_gtt_sa_free(dev, kq->rptr_mem);
err_rptr_allocate_vidmem:
kfd_gtt_sa_free(dev, kq->eop_mem);
err_eop_allocate_vidmem:
kfd_gtt_sa_free(dev, kq->pq);
err_pq_allocate_vidmem:
pr_err("kfd: error init pq\n");
......@@ -182,6 +189,7 @@ static void uninitialize(struct kernel_queue *kq)
kfd_gtt_sa_free(kq->dev, kq->rptr_mem);
kfd_gtt_sa_free(kq->dev, kq->wptr_mem);
kq->ops_asic_specific.uninitialize(kq);
kfd_gtt_sa_free(kq->dev, kq->pq);
kfd_release_kernel_doorbell(kq->dev,
kq->queue->properties.doorbell_ptr);
......@@ -254,28 +262,6 @@ static void submit_packet(struct kernel_queue *kq)
kq->pending_wptr);
}
static int sync_with_hw(struct kernel_queue *kq, unsigned long timeout_ms)
{
unsigned long org_timeout_ms;
BUG_ON(!kq);
org_timeout_ms = timeout_ms;
timeout_ms += jiffies * 1000 / HZ;
while (*kq->wptr_kernel != *kq->rptr_kernel) {
if (time_after(jiffies * 1000 / HZ, timeout_ms)) {
pr_err("kfd: kernel_queue %s timeout expired %lu\n",
__func__, org_timeout_ms);
pr_err("kfd: wptr: %d rptr: %d\n",
*kq->wptr_kernel, *kq->rptr_kernel);
return -ETIME;
}
schedule();
}
return 0;
}
static void rollback_packet(struct kernel_queue *kq)
{
BUG_ON(!kq);
......@@ -293,14 +279,20 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
if (!kq)
return NULL;
kq->initialize = initialize;
kq->uninitialize = uninitialize;
kq->acquire_packet_buffer = acquire_packet_buffer;
kq->submit_packet = submit_packet;
kq->sync_with_hw = sync_with_hw;
kq->rollback_packet = rollback_packet;
kq->ops.initialize = initialize;
kq->ops.uninitialize = uninitialize;
kq->ops.acquire_packet_buffer = acquire_packet_buffer;
kq->ops.submit_packet = submit_packet;
kq->ops.rollback_packet = rollback_packet;
switch (dev->device_info->asic_family) {
case CHIP_CARRIZO:
kernel_queue_init_vi(&kq->ops_asic_specific);
case CHIP_KAVERI:
kernel_queue_init_cik(&kq->ops_asic_specific);
}
if (kq->initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE) == false) {
if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE) == false) {
pr_err("kfd: failed to init kernel queue\n");
kfree(kq);
return NULL;
......@@ -312,7 +304,7 @@ void kernel_queue_uninit(struct kernel_queue *kq)
{
BUG_ON(!kq);
kq->uninitialize(kq);
kq->ops.uninitialize(kq);
kfree(kq);
}
......@@ -324,19 +316,18 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev)
BUG_ON(!dev);
pr_debug("kfd: starting kernel queue test\n");
pr_err("kfd: starting kernel queue test\n");
kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ);
BUG_ON(!kq);
retval = kq->acquire_packet_buffer(kq, 5, &buffer);
retval = kq->ops.acquire_packet_buffer(kq, 5, &buffer);
BUG_ON(retval != 0);
for (i = 0; i < 5; i++)
buffer[i] = kq->nop_packet;
kq->submit_packet(kq);
kq->sync_with_hw(kq, 1000);
kq->ops.submit_packet(kq);
pr_debug("kfd: ending kernel queue test\n");
pr_err("kfd: ending kernel queue test\n");
}
......@@ -28,8 +28,31 @@
#include <linux/types.h>
#include "kfd_priv.h"
struct kernel_queue {
/* interface */
/**
* struct kernel_queue_ops
*
* @initialize: Initialize a kernel queue, including allocations of GART memory
* needed for the queue.
*
* @uninitialize: Uninitialize a kernel queue and free all its memory usages.
*
* @acquire_packet_buffer: Returns a pointer to the location in the kernel
* queue ring buffer where the calling function can write its packet. It is
* Guaranteed that there is enough space for that packet. It also updates the
* pending write pointer to that location so subsequent calls to
* acquire_packet_buffer will get a correct write pointer
*
* @submit_packet: Update the write pointer and doorbell of a kernel queue.
*
* @sync_with_hw: Wait until the write pointer and the read pointer of a kernel
* queue are equal, which means the CP has read all the submitted packets.
*
* @rollback_packet: This routine is called if we failed to build an acquired
* packet for some reason. It just overwrites the pending wptr with the current
* one
*
*/
struct kernel_queue_ops {
bool (*initialize)(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size);
void (*uninitialize)(struct kernel_queue *kq);
......@@ -38,9 +61,12 @@ struct kernel_queue {
unsigned int **buffer_ptr);
void (*submit_packet)(struct kernel_queue *kq);
int (*sync_with_hw)(struct kernel_queue *kq,
unsigned long timeout_ms);
void (*rollback_packet)(struct kernel_queue *kq);
};
struct kernel_queue {
struct kernel_queue_ops ops;
struct kernel_queue_ops ops_asic_specific;
/* data */
struct kfd_dev *dev;
......@@ -58,6 +84,9 @@ struct kernel_queue {
struct kfd_mem_obj *pq;
uint64_t pq_gpu_addr;
uint32_t *pq_kernel_addr;
struct kfd_mem_obj *eop_mem;
uint64_t eop_gpu_addr;
uint32_t *eop_kernel_addr;
struct kfd_mem_obj *fence_mem_obj;
uint64_t fence_gpu_addr;
......@@ -66,4 +95,7 @@ struct kernel_queue {
struct list_head list;
};
void kernel_queue_init_cik(struct kernel_queue_ops *ops);
void kernel_queue_init_vi(struct kernel_queue_ops *ops);
#endif /* KFD_KERNEL_QUEUE_H_ */
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "kfd_kernel_queue.h"
static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size);
static void uninitialize_cik(struct kernel_queue *kq);
void kernel_queue_init_cik(struct kernel_queue_ops *ops)
{
ops->initialize = initialize_cik;
ops->uninitialize = uninitialize_cik;
}
static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size)
{
return true;
}
static void uninitialize_cik(struct kernel_queue *kq)
{
}
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "kfd_kernel_queue.h"
static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size);
static void uninitialize_vi(struct kernel_queue *kq);
void kernel_queue_init_vi(struct kernel_queue_ops *ops)
{
ops->initialize = initialize_vi;
ops->uninitialize = uninitialize_vi;
}
static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size)
{
int retval;
retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
if (retval != 0)
return false;
kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
return true;
}
static void uninitialize_vi(struct kernel_queue *kq)
{
kfd_gtt_sa_free(kq->dev, kq->eop_mem);
}
......@@ -48,7 +48,7 @@ static const struct kgd2kfd_calls kgd2kfd = {
int sched_policy = KFD_SCHED_POLICY_HWS;
module_param(sched_policy, int, 0444);
MODULE_PARM_DESC(sched_policy,
"Kernel cmdline parameter that defines the amdkfd scheduling policy");
"Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
int max_num_of_processes = KFD_MAX_NUM_OF_PROCESSES_DEFAULT;
module_param(max_num_of_processes, int, 0444);
......
This diff is collapsed.
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <linux/printk.h>
#include "kfd_priv.h"
#include "kfd_mqd_manager.h"
struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
struct kfd_dev *dev)
{
pr_warn("amdkfd: VI MQD is not currently supported\n");
return NULL;
}
......@@ -348,7 +348,7 @@ int pm_send_set_resources(struct packet_manager *pm,
pr_debug("kfd: In func %s\n", __func__);
mutex_lock(&pm->lock);
pm->priv_queue->acquire_packet_buffer(pm->priv_queue,
pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
sizeof(*packet) / sizeof(uint32_t),
(unsigned int **)&packet);
if (packet == NULL) {
......@@ -375,8 +375,7 @@ int pm_send_set_resources(struct packet_manager *pm,
packet->queue_mask_lo = lower_32_bits(res->queue_mask);
packet->queue_mask_hi = upper_32_bits(res->queue_mask);
pm->priv_queue->submit_packet(pm->priv_queue);
pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT);
pm->priv_queue->ops.submit_packet(pm->priv_queue);
mutex_unlock(&pm->lock);
......@@ -402,7 +401,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
packet_size_dwords = sizeof(struct pm4_runlist) / sizeof(uint32_t);
mutex_lock(&pm->lock);
retval = pm->priv_queue->acquire_packet_buffer(pm->priv_queue,
retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
packet_size_dwords, &rl_buffer);
if (retval != 0)
goto fail_acquire_packet_buffer;
......@@ -412,15 +411,14 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
if (retval != 0)
goto fail_create_runlist;
pm->priv_queue->submit_packet(pm->priv_queue);
pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT);
pm->priv_queue->ops.submit_packet(pm->priv_queue);
mutex_unlock(&pm->lock);
return retval;
fail_create_runlist:
pm->priv_queue->rollback_packet(pm->priv_queue);
pm->priv_queue->ops.rollback_packet(pm->priv_queue);
fail_acquire_packet_buffer:
mutex_unlock(&pm->lock);
fail_create_runlist_ib:
......@@ -438,7 +436,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
BUG_ON(!pm || !fence_address);
mutex_lock(&pm->lock);
retval = pm->priv_queue->acquire_packet_buffer(
retval = pm->priv_queue->ops.acquire_packet_buffer(
pm->priv_queue,
sizeof(struct pm4_query_status) / sizeof(uint32_t),
(unsigned int **)&packet);
......@@ -459,8 +457,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
packet->data_hi = upper_32_bits((uint64_t)fence_value);
packet->data_lo = lower_32_bits((uint64_t)fence_value);
pm->priv_queue->submit_packet(pm->priv_queue);
pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT);
pm->priv_queue->ops.submit_packet(pm->priv_queue);
mutex_unlock(&pm->lock);
return 0;
......@@ -482,7 +479,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
BUG_ON(!pm);
mutex_lock(&pm->lock);
retval = pm->priv_queue->acquire_packet_buffer(
retval = pm->priv_queue->ops.acquire_packet_buffer(
pm->priv_queue,
sizeof(struct pm4_unmap_queues) / sizeof(uint32_t),
&buffer);
......@@ -537,8 +534,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
break;
};
pm->priv_queue->submit_packet(pm->priv_queue);
pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT);
pm->priv_queue->ops.submit_packet(pm->priv_queue);
mutex_unlock(&pm->lock);
return 0;
......
......@@ -104,7 +104,13 @@ enum cache_policy {
cache_policy_noncoherent
};
enum asic_family_type {
CHIP_KAVERI = 0,
CHIP_CARRIZO
};
struct kfd_device_info {
unsigned int asic_family;
unsigned int max_pasid_bits;
size_t ih_ring_entry_size;
uint8_t num_of_watch_points;
......@@ -299,6 +305,11 @@ struct queue_properties {
uint32_t sdma_engine_id;
uint32_t sdma_queue_id;
uint32_t sdma_vm_addr;
/* Relevant only for VI */
uint64_t eop_ring_buffer_address;
uint32_t eop_ring_buffer_size;
uint64_t ctx_save_restore_area_address;
uint32_t ctx_save_restore_area_size;
};
/**
......@@ -351,10 +362,10 @@ struct queue {
* Please read the kfd_mqd_manager.h description.
*/
enum KFD_MQD_TYPE {
KFD_MQD_TYPE_CIK_COMPUTE = 0, /* for no cp scheduling */
KFD_MQD_TYPE_CIK_HIQ, /* for hiq */
KFD_MQD_TYPE_CIK_CP, /* for cp queues and diq */
KFD_MQD_TYPE_CIK_SDMA, /* for sdma queues */
KFD_MQD_TYPE_COMPUTE = 0, /* for no cp scheduling */
KFD_MQD_TYPE_HIQ, /* for hiq */
KFD_MQD_TYPE_CP, /* for cp queues and diq */
KFD_MQD_TYPE_SDMA, /* for sdma queues */
KFD_MQD_TYPE_MAX
};
......@@ -562,6 +573,10 @@ void print_queue(struct queue *q);
struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
......
......@@ -178,7 +178,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
if (list_empty(&pqm->queues)) {
pdd->qpd.pqm = pqm;
dev->dqm->register_process(dev->dqm, &pdd->qpd);
dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
}
pqn = kzalloc(sizeof(struct process_queue_node), GFP_KERNEL);
......@@ -204,7 +204,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
pqn->q = q;
pqn->kq = NULL;
retval = dev->dqm->create_queue(dev->dqm, q, &pdd->qpd,
retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd,
&q->properties.vmid);
print_queue(q);
break;
......@@ -217,7 +217,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
kq->queue->properties.queue_id = *qid;
pqn->kq = kq;
pqn->q = NULL;
retval = dev->dqm->create_kernel_queue(dev->dqm, kq, &pdd->qpd);
retval = dev->dqm->ops.create_kernel_queue(dev->dqm,
kq, &pdd->qpd);
break;
default:
BUG();
......@@ -285,13 +286,13 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
if (pqn->kq) {
/* destroy kernel queue (DIQ) */
dqm = pqn->kq->dev->dqm;
dqm->destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
kernel_queue_uninit(pqn->kq);
}
if (pqn->q) {
dqm = pqn->q->device->dqm;
retval = dqm->destroy_queue(dqm, &pdd->qpd, pqn->q);
retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
if (retval != 0)
return retval;
......@@ -303,7 +304,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
clear_bit(qid, pqm->queue_slot_bitmap);
if (list_empty(&pqm->queues))
dqm->unregister_process(dqm, &pdd->qpd);
dqm->ops.unregister_process(dqm, &pdd->qpd);
return retval;
}
......@@ -324,7 +325,8 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
pqn->q->properties.queue_percent = p->queue_percent;
pqn->q->properties.priority = p->priority;
retval = pqn->q->device->dqm->update_queue(pqn->q->device->dqm, pqn->q);
retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
pqn->q);
if (retval != 0)
return retval;
......
/*
* Copyright 2012 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef CIK_STRUCTS_H_
#define CIK_STRUCTS_H_
struct cik_mqd {
uint32_t header;
uint32_t compute_dispatch_initiator;
uint32_t compute_dim_x;
uint32_t compute_dim_y;
uint32_t compute_dim_z;
uint32_t compute_start_x;
uint32_t compute_start_y;
uint32_t compute_start_z;
uint32_t compute_num_thread_x;
uint32_t compute_num_thread_y;
uint32_t compute_num_thread_z;
uint32_t compute_pipelinestat_enable;
uint32_t compute_perfcount_enable;
uint32_t compute_pgm_lo;
uint32_t compute_pgm_hi;
uint32_t compute_tba_lo;
uint32_t compute_tba_hi;
uint32_t compute_tma_lo;
uint32_t compute_tma_hi;
uint32_t compute_pgm_rsrc1;
uint32_t compute_pgm_rsrc2;
uint32_t compute_vmid;
uint32_t compute_resource_limits;
uint32_t compute_static_thread_mgmt_se0;
uint32_t compute_static_thread_mgmt_se1;
uint32_t compute_tmpring_size;
uint32_t compute_static_thread_mgmt_se2;
uint32_t compute_static_thread_mgmt_se3;
uint32_t compute_restart_x;
uint32_t compute_restart_y;
uint32_t compute_restart_z;
uint32_t compute_thread_trace_enable;
uint32_t compute_misc_reserved;
uint32_t compute_user_data_0;
uint32_t compute_user_data_1;
uint32_t compute_user_data_2;
uint32_t compute_user_data_3;
uint32_t compute_user_data_4;
uint32_t compute_user_data_5;
uint32_t compute_user_data_6;
uint32_t compute_user_data_7;
uint32_t compute_user_data_8;
uint32_t compute_user_data_9;
uint32_t compute_user_data_10;
uint32_t compute_user_data_11;
uint32_t compute_user_data_12;
uint32_t compute_user_data_13;
uint32_t compute_user_data_14;
uint32_t compute_user_data_15;
uint32_t cp_compute_csinvoc_count_lo;
uint32_t cp_compute_csinvoc_count_hi;
uint32_t cp_mqd_base_addr_lo;
uint32_t cp_mqd_base_addr_hi;
uint32_t cp_hqd_active;
uint32_t cp_hqd_vmid;
uint32_t cp_hqd_persistent_state;
uint32_t cp_hqd_pipe_priority;
uint32_t cp_hqd_queue_priority;
uint32_t cp_hqd_quantum;
uint32_t cp_hqd_pq_base_lo;
uint32_t cp_hqd_pq_base_hi;
uint32_t cp_hqd_pq_rptr;
uint32_t cp_hqd_pq_rptr_report_addr_lo;
uint32_t cp_hqd_pq_rptr_report_addr_hi;
uint32_t cp_hqd_pq_wptr_poll_addr_lo;
uint32_t cp_hqd_pq_wptr_poll_addr_hi;
uint32_t cp_hqd_pq_doorbell_control;
uint32_t cp_hqd_pq_wptr;
uint32_t cp_hqd_pq_control;
uint32_t cp_hqd_ib_base_addr_lo;
uint32_t cp_hqd_ib_base_addr_hi;
uint32_t cp_hqd_ib_rptr;
uint32_t cp_hqd_ib_control;
uint32_t cp_hqd_iq_timer;
uint32_t cp_hqd_iq_rptr;
uint32_t cp_hqd_dequeue_request;
uint32_t cp_hqd_dma_offload;
uint32_t cp_hqd_sema_cmd;
uint32_t cp_hqd_msg_type;
uint32_t cp_hqd_atomic0_preop_lo;
uint32_t cp_hqd_atomic0_preop_hi;
uint32_t cp_hqd_atomic1_preop_lo;
uint32_t cp_hqd_atomic1_preop_hi;
uint32_t cp_hqd_hq_status0;
uint32_t cp_hqd_hq_control0;
uint32_t cp_mqd_control;
uint32_t cp_mqd_query_time_lo;
uint32_t cp_mqd_query_time_hi;
uint32_t cp_mqd_connect_start_time_lo;
uint32_t cp_mqd_connect_start_time_hi;
uint32_t cp_mqd_connect_end_time_lo;
uint32_t cp_mqd_connect_end_time_hi;
uint32_t cp_mqd_connect_end_wf_count;
uint32_t cp_mqd_connect_end_pq_rptr;
uint32_t cp_mqd_connect_end_pq_wptr;
uint32_t cp_mqd_connect_end_ib_rptr;
uint32_t reserved_96;
uint32_t reserved_97;
uint32_t reserved_98;
uint32_t reserved_99;
uint32_t iqtimer_pkt_header;
uint32_t iqtimer_pkt_dw0;
uint32_t iqtimer_pkt_dw1;
uint32_t iqtimer_pkt_dw2;
uint32_t iqtimer_pkt_dw3;
uint32_t iqtimer_pkt_dw4;
uint32_t iqtimer_pkt_dw5;
uint32_t iqtimer_pkt_dw6;
uint32_t reserved_108;
uint32_t reserved_109;
uint32_t reserved_110;
uint32_t reserved_111;
uint32_t queue_doorbell_id0;
uint32_t queue_doorbell_id1;
uint32_t queue_doorbell_id2;
uint32_t queue_doorbell_id3;
uint32_t queue_doorbell_id4;
uint32_t queue_doorbell_id5;
uint32_t queue_doorbell_id6;
uint32_t queue_doorbell_id7;
uint32_t queue_doorbell_id8;
uint32_t queue_doorbell_id9;
uint32_t queue_doorbell_id10;
uint32_t queue_doorbell_id11;
uint32_t queue_doorbell_id12;
uint32_t queue_doorbell_id13;
uint32_t queue_doorbell_id14;
uint32_t queue_doorbell_id15;
};
struct cik_sdma_rlc_registers {
uint32_t sdma_rlc_rb_cntl;
uint32_t sdma_rlc_rb_base;
uint32_t sdma_rlc_rb_base_hi;
uint32_t sdma_rlc_rb_rptr;
uint32_t sdma_rlc_rb_wptr;
uint32_t sdma_rlc_rb_wptr_poll_cntl;
uint32_t sdma_rlc_rb_wptr_poll_addr_hi;
uint32_t sdma_rlc_rb_wptr_poll_addr_lo;
uint32_t sdma_rlc_rb_rptr_addr_hi;
uint32_t sdma_rlc_rb_rptr_addr_lo;
uint32_t sdma_rlc_ib_cntl;
uint32_t sdma_rlc_ib_rptr;
uint32_t sdma_rlc_ib_offset;
uint32_t sdma_rlc_ib_base_lo;
uint32_t sdma_rlc_ib_base_hi;
uint32_t sdma_rlc_ib_size;
uint32_t sdma_rlc_skip_cntl;
uint32_t sdma_rlc_context_status;
uint32_t sdma_rlc_doorbell;
uint32_t sdma_rlc_virtual_addr;
uint32_t sdma_rlc_ape1_cntl;
uint32_t sdma_rlc_doorbell_log;
uint32_t reserved_22;
uint32_t reserved_23;
uint32_t reserved_24;
uint32_t reserved_25;
uint32_t reserved_26;
uint32_t reserved_27;
uint32_t reserved_28;
uint32_t reserved_29;
uint32_t reserved_30;
uint32_t reserved_31;
uint32_t reserved_32;
uint32_t reserved_33;
uint32_t reserved_34;
uint32_t reserved_35;
uint32_t reserved_36;
uint32_t reserved_37;
uint32_t reserved_38;
uint32_t reserved_39;
uint32_t reserved_40;
uint32_t reserved_41;
uint32_t reserved_42;
uint32_t reserved_43;
uint32_t reserved_44;
uint32_t reserved_45;
uint32_t reserved_46;
uint32_t reserved_47;
uint32_t reserved_48;
uint32_t reserved_49;
uint32_t reserved_50;
uint32_t reserved_51;
uint32_t reserved_52;
uint32_t reserved_53;
uint32_t reserved_54;
uint32_t reserved_55;
uint32_t reserved_56;
uint32_t reserved_57;
uint32_t reserved_58;
uint32_t reserved_59;
uint32_t reserved_60;
uint32_t reserved_61;
uint32_t reserved_62;
uint32_t reserved_63;
uint32_t reserved_64;
uint32_t reserved_65;
uint32_t reserved_66;
uint32_t reserved_67;
uint32_t reserved_68;
uint32_t reserved_69;
uint32_t reserved_70;
uint32_t reserved_71;
uint32_t reserved_72;
uint32_t reserved_73;
uint32_t reserved_74;
uint32_t reserved_75;
uint32_t reserved_76;
uint32_t reserved_77;
uint32_t reserved_78;
uint32_t reserved_79;
uint32_t reserved_80;
uint32_t reserved_81;
uint32_t reserved_82;
uint32_t reserved_83;
uint32_t reserved_84;
uint32_t reserved_85;
uint32_t reserved_86;
uint32_t reserved_87;
uint32_t reserved_88;
uint32_t reserved_89;
uint32_t reserved_90;
uint32_t reserved_91;
uint32_t reserved_92;
uint32_t reserved_93;
uint32_t reserved_94;
uint32_t reserved_95;
uint32_t reserved_96;
uint32_t reserved_97;
uint32_t reserved_98;
uint32_t reserved_99;
uint32_t reserved_100;
uint32_t reserved_101;
uint32_t reserved_102;
uint32_t reserved_103;
uint32_t reserved_104;
uint32_t reserved_105;
uint32_t reserved_106;
uint32_t reserved_107;
uint32_t reserved_108;
uint32_t reserved_109;
uint32_t reserved_110;
uint32_t reserved_111;
uint32_t reserved_112;
uint32_t reserved_113;
uint32_t reserved_114;
uint32_t reserved_115;
uint32_t reserved_116;
uint32_t reserved_117;
uint32_t reserved_118;
uint32_t reserved_119;
uint32_t reserved_120;
uint32_t reserved_121;
uint32_t reserved_122;
uint32_t reserved_123;
uint32_t reserved_124;
uint32_t reserved_125;
uint32_t reserved_126;
uint32_t reserved_127;
uint32_t sdma_engine_id;
uint32_t sdma_queue_id;
};
#endif /* CIK_STRUCTS_H_ */
......@@ -129,9 +129,6 @@ struct kgd2kfd_calls {
* @set_pasid_vmid_mapping: Exposes pasid/vmid pair to the H/W for no cp
* scheduling mode. Only used for no cp scheduling mode.
*
* @init_memory: Initializes memory apertures to fixed base/limit address
* and non cached memory types.
*
* @init_pipeline: Initialized the compute pipelines.
*
* @hqd_load: Loads the mqd structure to a H/W hqd slot. used only for no cp
......@@ -175,7 +172,6 @@ struct kfd2kgd_calls {
int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, unsigned int pasid,
unsigned int vmid);
int (*init_memory)(struct kgd_dev *kgd);
int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr);
......
......@@ -2,7 +2,7 @@
# Makefile for the drm device driver. This driver provides support for the
# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
ccflags-y := -Iinclude/drm
ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include
hostprogs-y := mkregtable
clean-files := rn50_reg_safe.h r100_reg_safe.h r200_reg_safe.h rv515_reg_safe.h r300_reg_safe.h r420_reg_safe.h rs600_reg_safe.h r600_reg_safe.h evergreen_reg_safe.h cayman_reg_safe.h
......
......@@ -5707,6 +5707,28 @@ void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
WREG32(VM_INVALIDATE_REQUEST, 0x1);
}
static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
{
int i;
uint32_t sh_mem_bases, sh_mem_config;
sh_mem_bases = 0x6000 | 0x6000 << 16;
sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
mutex_lock(&rdev->srbm_mutex);
for (i = 8; i < 16; i++) {
cik_srbm_select(rdev, 0, 0, 0, i);
/* CP and shaders */
WREG32(SH_MEM_CONFIG, sh_mem_config);
WREG32(SH_MEM_APE1_BASE, 1);
WREG32(SH_MEM_APE1_LIMIT, 0);
WREG32(SH_MEM_BASES, sh_mem_bases);
}
cik_srbm_select(rdev, 0, 0, 0, 0);
mutex_unlock(&rdev->srbm_mutex);
}
/**
* cik_pcie_gart_enable - gart enable
*
......@@ -5820,6 +5842,8 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev)
cik_srbm_select(rdev, 0, 0, 0, 0);
mutex_unlock(&rdev->srbm_mutex);
cik_pcie_init_compute_vmid(rdev);
cik_pcie_gart_tlb_flush(rdev);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(rdev->mc.gtt_size >> 20),
......
......@@ -184,268 +184,4 @@
#define SDMA0_CNTL 0xD010
#define SDMA1_CNTL 0xD810
struct cik_mqd {
uint32_t header;
uint32_t compute_dispatch_initiator;
uint32_t compute_dim_x;
uint32_t compute_dim_y;
uint32_t compute_dim_z;
uint32_t compute_start_x;
uint32_t compute_start_y;
uint32_t compute_start_z;
uint32_t compute_num_thread_x;
uint32_t compute_num_thread_y;
uint32_t compute_num_thread_z;
uint32_t compute_pipelinestat_enable;
uint32_t compute_perfcount_enable;
uint32_t compute_pgm_lo;
uint32_t compute_pgm_hi;
uint32_t compute_tba_lo;
uint32_t compute_tba_hi;
uint32_t compute_tma_lo;
uint32_t compute_tma_hi;
uint32_t compute_pgm_rsrc1;
uint32_t compute_pgm_rsrc2;
uint32_t compute_vmid;
uint32_t compute_resource_limits;
uint32_t compute_static_thread_mgmt_se0;
uint32_t compute_static_thread_mgmt_se1;
uint32_t compute_tmpring_size;
uint32_t compute_static_thread_mgmt_se2;
uint32_t compute_static_thread_mgmt_se3;
uint32_t compute_restart_x;
uint32_t compute_restart_y;
uint32_t compute_restart_z;
uint32_t compute_thread_trace_enable;
uint32_t compute_misc_reserved;
uint32_t compute_user_data_0;
uint32_t compute_user_data_1;
uint32_t compute_user_data_2;
uint32_t compute_user_data_3;
uint32_t compute_user_data_4;
uint32_t compute_user_data_5;
uint32_t compute_user_data_6;
uint32_t compute_user_data_7;
uint32_t compute_user_data_8;
uint32_t compute_user_data_9;
uint32_t compute_user_data_10;
uint32_t compute_user_data_11;
uint32_t compute_user_data_12;
uint32_t compute_user_data_13;
uint32_t compute_user_data_14;
uint32_t compute_user_data_15;
uint32_t cp_compute_csinvoc_count_lo;
uint32_t cp_compute_csinvoc_count_hi;
uint32_t cp_mqd_base_addr_lo;
uint32_t cp_mqd_base_addr_hi;
uint32_t cp_hqd_active;
uint32_t cp_hqd_vmid;
uint32_t cp_hqd_persistent_state;
uint32_t cp_hqd_pipe_priority;
uint32_t cp_hqd_queue_priority;
uint32_t cp_hqd_quantum;
uint32_t cp_hqd_pq_base_lo;
uint32_t cp_hqd_pq_base_hi;
uint32_t cp_hqd_pq_rptr;
uint32_t cp_hqd_pq_rptr_report_addr_lo;
uint32_t cp_hqd_pq_rptr_report_addr_hi;
uint32_t cp_hqd_pq_wptr_poll_addr_lo;
uint32_t cp_hqd_pq_wptr_poll_addr_hi;
uint32_t cp_hqd_pq_doorbell_control;
uint32_t cp_hqd_pq_wptr;
uint32_t cp_hqd_pq_control;
uint32_t cp_hqd_ib_base_addr_lo;
uint32_t cp_hqd_ib_base_addr_hi;
uint32_t cp_hqd_ib_rptr;
uint32_t cp_hqd_ib_control;
uint32_t cp_hqd_iq_timer;
uint32_t cp_hqd_iq_rptr;
uint32_t cp_hqd_dequeue_request;
uint32_t cp_hqd_dma_offload;
uint32_t cp_hqd_sema_cmd;
uint32_t cp_hqd_msg_type;
uint32_t cp_hqd_atomic0_preop_lo;
uint32_t cp_hqd_atomic0_preop_hi;
uint32_t cp_hqd_atomic1_preop_lo;
uint32_t cp_hqd_atomic1_preop_hi;
uint32_t cp_hqd_hq_status0;
uint32_t cp_hqd_hq_control0;
uint32_t cp_mqd_control;
uint32_t cp_mqd_query_time_lo;
uint32_t cp_mqd_query_time_hi;
uint32_t cp_mqd_connect_start_time_lo;
uint32_t cp_mqd_connect_start_time_hi;
uint32_t cp_mqd_connect_end_time_lo;
uint32_t cp_mqd_connect_end_time_hi;
uint32_t cp_mqd_connect_end_wf_count;
uint32_t cp_mqd_connect_end_pq_rptr;
uint32_t cp_mqd_connect_end_pq_wptr;
uint32_t cp_mqd_connect_end_ib_rptr;
uint32_t reserved_96;
uint32_t reserved_97;
uint32_t reserved_98;
uint32_t reserved_99;
uint32_t iqtimer_pkt_header;
uint32_t iqtimer_pkt_dw0;
uint32_t iqtimer_pkt_dw1;
uint32_t iqtimer_pkt_dw2;
uint32_t iqtimer_pkt_dw3;
uint32_t iqtimer_pkt_dw4;
uint32_t iqtimer_pkt_dw5;
uint32_t iqtimer_pkt_dw6;
uint32_t reserved_108;
uint32_t reserved_109;
uint32_t reserved_110;
uint32_t reserved_111;
uint32_t queue_doorbell_id0;
uint32_t queue_doorbell_id1;
uint32_t queue_doorbell_id2;
uint32_t queue_doorbell_id3;
uint32_t queue_doorbell_id4;
uint32_t queue_doorbell_id5;
uint32_t queue_doorbell_id6;
uint32_t queue_doorbell_id7;
uint32_t queue_doorbell_id8;
uint32_t queue_doorbell_id9;
uint32_t queue_doorbell_id10;
uint32_t queue_doorbell_id11;
uint32_t queue_doorbell_id12;
uint32_t queue_doorbell_id13;
uint32_t queue_doorbell_id14;
uint32_t queue_doorbell_id15;
};
struct cik_sdma_rlc_registers {
uint32_t sdma_rlc_rb_cntl;
uint32_t sdma_rlc_rb_base;
uint32_t sdma_rlc_rb_base_hi;
uint32_t sdma_rlc_rb_rptr;
uint32_t sdma_rlc_rb_wptr;
uint32_t sdma_rlc_rb_wptr_poll_cntl;
uint32_t sdma_rlc_rb_wptr_poll_addr_hi;
uint32_t sdma_rlc_rb_wptr_poll_addr_lo;
uint32_t sdma_rlc_rb_rptr_addr_hi;
uint32_t sdma_rlc_rb_rptr_addr_lo;
uint32_t sdma_rlc_ib_cntl;
uint32_t sdma_rlc_ib_rptr;
uint32_t sdma_rlc_ib_offset;
uint32_t sdma_rlc_ib_base_lo;
uint32_t sdma_rlc_ib_base_hi;
uint32_t sdma_rlc_ib_size;
uint32_t sdma_rlc_skip_cntl;
uint32_t sdma_rlc_context_status;
uint32_t sdma_rlc_doorbell;
uint32_t sdma_rlc_virtual_addr;
uint32_t sdma_rlc_ape1_cntl;
uint32_t sdma_rlc_doorbell_log;
uint32_t reserved_22;
uint32_t reserved_23;
uint32_t reserved_24;
uint32_t reserved_25;
uint32_t reserved_26;
uint32_t reserved_27;
uint32_t reserved_28;
uint32_t reserved_29;
uint32_t reserved_30;
uint32_t reserved_31;
uint32_t reserved_32;
uint32_t reserved_33;
uint32_t reserved_34;
uint32_t reserved_35;
uint32_t reserved_36;
uint32_t reserved_37;
uint32_t reserved_38;
uint32_t reserved_39;
uint32_t reserved_40;
uint32_t reserved_41;
uint32_t reserved_42;
uint32_t reserved_43;
uint32_t reserved_44;
uint32_t reserved_45;
uint32_t reserved_46;
uint32_t reserved_47;
uint32_t reserved_48;
uint32_t reserved_49;
uint32_t reserved_50;
uint32_t reserved_51;
uint32_t reserved_52;
uint32_t reserved_53;
uint32_t reserved_54;
uint32_t reserved_55;
uint32_t reserved_56;
uint32_t reserved_57;
uint32_t reserved_58;
uint32_t reserved_59;
uint32_t reserved_60;
uint32_t reserved_61;
uint32_t reserved_62;
uint32_t reserved_63;
uint32_t reserved_64;
uint32_t reserved_65;
uint32_t reserved_66;
uint32_t reserved_67;
uint32_t reserved_68;
uint32_t reserved_69;
uint32_t reserved_70;
uint32_t reserved_71;
uint32_t reserved_72;
uint32_t reserved_73;
uint32_t reserved_74;
uint32_t reserved_75;
uint32_t reserved_76;
uint32_t reserved_77;
uint32_t reserved_78;
uint32_t reserved_79;
uint32_t reserved_80;
uint32_t reserved_81;
uint32_t reserved_82;
uint32_t reserved_83;
uint32_t reserved_84;
uint32_t reserved_85;
uint32_t reserved_86;
uint32_t reserved_87;
uint32_t reserved_88;
uint32_t reserved_89;
uint32_t reserved_90;
uint32_t reserved_91;
uint32_t reserved_92;
uint32_t reserved_93;
uint32_t reserved_94;
uint32_t reserved_95;
uint32_t reserved_96;
uint32_t reserved_97;
uint32_t reserved_98;
uint32_t reserved_99;
uint32_t reserved_100;
uint32_t reserved_101;
uint32_t reserved_102;
uint32_t reserved_103;
uint32_t reserved_104;
uint32_t reserved_105;
uint32_t reserved_106;
uint32_t reserved_107;
uint32_t reserved_108;
uint32_t reserved_109;
uint32_t reserved_110;
uint32_t reserved_111;
uint32_t reserved_112;
uint32_t reserved_113;
uint32_t reserved_114;
uint32_t reserved_115;
uint32_t reserved_116;
uint32_t reserved_117;
uint32_t reserved_118;
uint32_t reserved_119;
uint32_t reserved_120;
uint32_t reserved_121;
uint32_t reserved_122;
uint32_t reserved_123;
uint32_t reserved_124;
uint32_t reserved_125;
uint32_t reserved_126;
uint32_t reserved_127;
uint32_t sdma_engine_id;
uint32_t sdma_queue_id;
};
#endif
......@@ -30,6 +30,7 @@
#include "radeon_kfd.h"
#include "radeon_ucode.h"
#include <linux/firmware.h>
#include "cik_structs.h"
#define CIK_PIPE_PER_MEC (4)
......@@ -63,8 +64,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
unsigned int vmid);
static int kgd_init_memory(struct kgd_dev *kgd);
static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr);
......@@ -89,7 +88,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
.init_memory = kgd_init_memory,
.init_pipeline = kgd_init_pipeline,
.hqd_load = kgd_hqd_load,
.hqd_sdma_load = kgd_hqd_sdma_load,
......@@ -375,42 +373,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
return 0;
}
static int kgd_init_memory(struct kgd_dev *kgd)
{
/*
* Configure apertures:
* LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
* Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
* GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
*/
int i;
uint32_t sh_mem_bases = PRIVATE_BASE(0x6000) | SHARED_BASE(0x6000);
for (i = 8; i < 16; i++) {
uint32_t sh_mem_config;
lock_srbm(kgd, 0, 0, 0, i);
sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
write_register(kgd, SH_MEM_CONFIG, sh_mem_config);
write_register(kgd, SH_MEM_BASES, sh_mem_bases);
/* Scratch aperture is not supported for now. */
write_register(kgd, SH_STATIC_MEM_CONFIG, 0);
/* APE1 disabled for now. */
write_register(kgd, SH_MEM_APE1_BASE, 1);
write_register(kgd, SH_MEM_APE1_LIMIT, 0);
unlock_srbm(kgd);
}
return 0;
}
static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr)
{
......
......@@ -29,7 +29,7 @@
#define RADEON_KFD_H_INCLUDED
#include <linux/types.h>
#include "../amd/include/kgd_kfd_interface.h"
#include "kgd_kfd_interface.h"
struct radeon_device;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment