Commit 9ad472e3 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-amdkfd-next-2017-11-02' of git://people.freedesktop.org/~gabbayo/linux into drm-next

- Usermode Events
The current events code implemented some data structures (waitqueue, fifo)
that were already implemented in the kernel. The patches below addresses
this issue by replacing them with the standard kernel implementation.
In addition, they simplify allocation of events IDs and memory for the events.

The patches also increase the maximum number of events while maintaining
compatibility with the older userspace library.

- Remove radeon support
Because Kaveri is fully supported in amdgpu and because current and future
versions of userspace libraries will only support amdgpu, we removed radeon
support from kfd. Current users can move to amdgpu while using the same
userspace libraries.

- Various bug fixes and cleanups

* tag 'drm-amdkfd-next-2017-11-02' of git://people.freedesktop.org/~gabbayo/linux: (26 commits)
  drm/amdkfd: Minor cleanups
  drm/amdkfd: Update queue_count before mapping queues
  drm/amdkfd: Cleanup DQM ASIC-specific ops
  drm/amdkfd: Register/Deregister process on qpd resolution
  drm/amdkfd: Fix debug unregister procedure on process termination
  drm/amdkfd: Avoid calling amd_iommu_unbind_pasid() when suspending
  drm/amdkfd: Disable CP/SDMA ring/doorbell in MQD
  drm/amdkfd: Clean up the data structure in kfd_process
  drm/radeon: deprecate and remove KFD interface
  drm/amdkfd: use a high priority workqueue for IH work
  drm/amdkfd: wait only for IH work on IH exit
  drm/amdkfd: increase IH num entries to 8192
  drm/amdkfd: use standard kernel kfifo for IH
  drm/amdkfd: increase limit of signal events to 4096 per process
  drm/amdkfd: Make event limit dependent on user mode mapping size
  drm/amdkfd: Use IH context ID for signal lookup
  drm/amdkfd: Simplify event ID and signal slot management
  drm/amdkfd: Simplify events page allocator
  drm/amdkfd: Use wait_queue_t to implement event waiting
  drm/amdkfd: remove redundant kfd_event_waiter.input_index
  ...
parents 85f6e0f6 894a8293
...@@ -759,8 +759,6 @@ F: drivers/gpu/drm/amd/amdkfd/ ...@@ -759,8 +759,6 @@ F: drivers/gpu/drm/amd/amdkfd/
F: drivers/gpu/drm/amd/include/cik_structs.h F: drivers/gpu/drm/amd/include/cik_structs.h
F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h
F: drivers/gpu/drm/amd/include/vi_structs.h F: drivers/gpu/drm/amd/include/vi_structs.h
F: drivers/gpu/drm/radeon/radeon_kfd.c
F: drivers/gpu/drm/radeon/radeon_kfd.h
F: include/uapi/linux/kfd_ioctl.h F: include/uapi/linux/kfd_ioctl.h
AMD SEATTLE DEVICE TREE SUPPORT AMD SEATTLE DEVICE TREE SUPPORT
......
...@@ -4,6 +4,6 @@ ...@@ -4,6 +4,6 @@
config HSA_AMD config HSA_AMD
tristate "HSA kernel driver for AMD GPU devices" tristate "HSA kernel driver for AMD GPU devices"
depends on (DRM_RADEON || DRM_AMDGPU) && AMD_IOMMU_V2 && X86_64 depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64
help help
Enable this if you want to use HSA features on AMD GPU devices. Enable this if you want to use HSA features on AMD GPU devices.
...@@ -36,6 +36,7 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, ...@@ -36,6 +36,7 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
/* Do not process in ISR, just request it to be forwarded to WQ. */ /* Do not process in ISR, just request it to be forwarded to WQ. */
return (pasid != 0) && return (pasid != 0) &&
(ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
ihre->source_id == CIK_INTSRC_SDMA_TRAP ||
ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG || ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE); ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE);
} }
...@@ -46,6 +47,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, ...@@ -46,6 +47,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
unsigned int pasid; unsigned int pasid;
const struct cik_ih_ring_entry *ihre = const struct cik_ih_ring_entry *ihre =
(const struct cik_ih_ring_entry *)ih_ring_entry; (const struct cik_ih_ring_entry *)ih_ring_entry;
uint32_t context_id = ihre->data & 0xfffffff;
pasid = (ihre->ring_id & 0xffff0000) >> 16; pasid = (ihre->ring_id & 0xffff0000) >> 16;
...@@ -53,9 +55,11 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, ...@@ -53,9 +55,11 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
return; return;
if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE) if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE)
kfd_signal_event_interrupt(pasid, 0, 0); kfd_signal_event_interrupt(pasid, context_id, 28);
else if (ihre->source_id == CIK_INTSRC_SDMA_TRAP)
kfd_signal_event_interrupt(pasid, context_id, 28);
else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG) else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG)
kfd_signal_event_interrupt(pasid, ihre->data & 0xFF, 8); kfd_signal_event_interrupt(pasid, context_id & 0xff, 8);
else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE) else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE)
kfd_signal_hw_exception_event(pasid); kfd_signal_hw_exception_event(pasid);
} }
......
...@@ -32,9 +32,10 @@ struct cik_ih_ring_entry { ...@@ -32,9 +32,10 @@ struct cik_ih_ring_entry {
uint32_t reserved; uint32_t reserved;
}; };
#define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6
#define CIK_INTSRC_CP_END_OF_PIPE 0xB5 #define CIK_INTSRC_CP_END_OF_PIPE 0xB5
#define CIK_INTSRC_CP_BAD_OPCODE 0xB7 #define CIK_INTSRC_CP_BAD_OPCODE 0xB7
#define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6
#define CIK_INTSRC_SDMA_TRAP 0xE0
#define CIK_INTSRC_SQ_INTERRUPT_MSG 0xEF #define CIK_INTSRC_SQ_INTERRUPT_MSG 0xEF
#endif #endif
......
...@@ -450,8 +450,8 @@ static int kfd_ioctl_dbg_register(struct file *filep, ...@@ -450,8 +450,8 @@ static int kfd_ioctl_dbg_register(struct file *filep,
return -EINVAL; return -EINVAL;
} }
mutex_lock(kfd_get_dbgmgr_mutex());
mutex_lock(&p->mutex); mutex_lock(&p->mutex);
mutex_lock(kfd_get_dbgmgr_mutex());
/* /*
* make sure that we have pdd, if this the first queue created for * make sure that we have pdd, if this the first queue created for
...@@ -479,8 +479,8 @@ static int kfd_ioctl_dbg_register(struct file *filep, ...@@ -479,8 +479,8 @@ static int kfd_ioctl_dbg_register(struct file *filep,
} }
out: out:
mutex_unlock(&p->mutex);
mutex_unlock(kfd_get_dbgmgr_mutex()); mutex_unlock(kfd_get_dbgmgr_mutex());
mutex_unlock(&p->mutex);
return status; return status;
} }
...@@ -835,15 +835,12 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, ...@@ -835,15 +835,12 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
void *data) void *data)
{ {
struct kfd_ioctl_wait_events_args *args = data; struct kfd_ioctl_wait_events_args *args = data;
enum kfd_event_wait_result wait_result;
int err; int err;
err = kfd_wait_on_events(p, args->num_events, err = kfd_wait_on_events(p, args->num_events,
(void __user *)args->events_ptr, (void __user *)args->events_ptr,
(args->wait_for_all != 0), (args->wait_for_all != 0),
args->timeout, &wait_result); args->timeout, &args->wait_result);
args->wait_result = wait_result;
return err; return err;
} }
......
...@@ -403,7 +403,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) ...@@ -403,7 +403,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
if (kfd->interrupts_active if (kfd->interrupts_active
&& interrupt_is_wanted(kfd, ih_ring_entry) && interrupt_is_wanted(kfd, ih_ring_entry)
&& enqueue_ih_ring_entry(kfd, ih_ring_entry)) && enqueue_ih_ring_entry(kfd, ih_ring_entry))
schedule_work(&kfd->interrupt_work); queue_work(kfd->ih_wq, &kfd->interrupt_work);
spin_unlock(&kfd->interrupt_lock); spin_unlock(&kfd->interrupt_lock);
} }
......
...@@ -389,12 +389,11 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) ...@@ -389,12 +389,11 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
if (sched_policy != KFD_SCHED_POLICY_NO_HWS) { if (sched_policy != KFD_SCHED_POLICY_NO_HWS) {
retval = unmap_queues_cpsch(dqm, retval = unmap_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
if (retval != 0) { if (retval) {
pr_err("unmap queue failed\n"); pr_err("unmap queue failed\n");
goto out_unlock; goto out_unlock;
} }
} else if (sched_policy == KFD_SCHED_POLICY_NO_HWS && } else if (prev_active &&
prev_active &&
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE || (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
q->properties.type == KFD_QUEUE_TYPE_SDMA)) { q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
retval = mqd->destroy_mqd(mqd, q->mqd, retval = mqd->destroy_mqd(mqd, q->mqd,
...@@ -408,24 +407,25 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) ...@@ -408,24 +407,25 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
retval = mqd->update_mqd(mqd, q->mqd, &q->properties); retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
retval = map_queues_cpsch(dqm);
else if (sched_policy == KFD_SCHED_POLICY_NO_HWS &&
q->properties.is_active &&
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
q->properties.type == KFD_QUEUE_TYPE_SDMA))
retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue,
&q->properties, q->process->mm);
/* /*
* check active state vs. the previous state * check active state vs. the previous state and modify
* and modify counter accordingly * counter accordingly. map_queues_cpsch uses the
* dqm->queue_count to determine whether a new runlist must be
* uploaded.
*/ */
if (q->properties.is_active && !prev_active) if (q->properties.is_active && !prev_active)
dqm->queue_count++; dqm->queue_count++;
else if (!q->properties.is_active && prev_active) else if (!q->properties.is_active && prev_active)
dqm->queue_count--; dqm->queue_count--;
if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
retval = map_queues_cpsch(dqm);
else if (q->properties.is_active &&
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
q->properties.type == KFD_QUEUE_TYPE_SDMA))
retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue,
&q->properties, q->process->mm);
out_unlock: out_unlock:
mutex_unlock(&dqm->lock); mutex_unlock(&dqm->lock);
return retval; return retval;
...@@ -467,7 +467,7 @@ static int register_process(struct device_queue_manager *dqm, ...@@ -467,7 +467,7 @@ static int register_process(struct device_queue_manager *dqm,
mutex_lock(&dqm->lock); mutex_lock(&dqm->lock);
list_add(&n->list, &dqm->queues); list_add(&n->list, &dqm->queues);
retval = dqm->ops_asic_specific.register_process(dqm, qpd); retval = dqm->asic_ops.update_qpd(dqm, qpd);
dqm->processes_count++; dqm->processes_count++;
...@@ -629,7 +629,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, ...@@ -629,7 +629,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties); &q->gart_mqd_addr, &q->properties);
if (retval) if (retval)
...@@ -696,8 +696,6 @@ static int set_sched_resources(struct device_queue_manager *dqm) ...@@ -696,8 +696,6 @@ static int set_sched_resources(struct device_queue_manager *dqm)
static int initialize_cpsch(struct device_queue_manager *dqm) static int initialize_cpsch(struct device_queue_manager *dqm)
{ {
int retval;
pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
mutex_init(&dqm->lock); mutex_init(&dqm->lock);
...@@ -706,11 +704,8 @@ static int initialize_cpsch(struct device_queue_manager *dqm) ...@@ -706,11 +704,8 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
dqm->sdma_queue_count = 0; dqm->sdma_queue_count = 0;
dqm->active_runlist = false; dqm->active_runlist = false;
dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
retval = dqm->ops_asic_specific.initialize(dqm);
if (retval)
mutex_destroy(&dqm->lock);
return retval; return 0;
} }
static int start_cpsch(struct device_queue_manager *dqm) static int start_cpsch(struct device_queue_manager *dqm)
...@@ -835,7 +830,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, ...@@ -835,7 +830,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
retval = allocate_sdma_queue(dqm, &q->sdma_id); retval = allocate_sdma_queue(dqm, &q->sdma_id);
if (retval != 0) if (retval)
goto out; goto out;
q->properties.sdma_queue_id = q->properties.sdma_queue_id =
q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE;
...@@ -850,7 +845,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, ...@@ -850,7 +845,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
goto out; goto out;
} }
dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties); &q->gart_mqd_addr, &q->properties);
if (retval) if (retval)
...@@ -1095,7 +1090,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, ...@@ -1095,7 +1090,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
qpd->sh_mem_ape1_limit = limit >> 16; qpd->sh_mem_ape1_limit = limit >> 16;
} }
retval = dqm->ops_asic_specific.set_cache_memory_policy( retval = dqm->asic_ops.set_cache_memory_policy(
dqm, dqm,
qpd, qpd,
default_policy, default_policy,
...@@ -1270,11 +1265,11 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) ...@@ -1270,11 +1265,11 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
switch (dev->device_info->asic_family) { switch (dev->device_info->asic_family) {
case CHIP_CARRIZO: case CHIP_CARRIZO:
device_queue_manager_init_vi(&dqm->ops_asic_specific); device_queue_manager_init_vi(&dqm->asic_ops);
break; break;
case CHIP_KAVERI: case CHIP_KAVERI:
device_queue_manager_init_cik(&dqm->ops_asic_specific); device_queue_manager_init_cik(&dqm->asic_ops);
break; break;
default: default:
WARN(1, "Unexpected ASIC family %u", WARN(1, "Unexpected ASIC family %u",
......
...@@ -128,9 +128,8 @@ struct device_queue_manager_ops { ...@@ -128,9 +128,8 @@ struct device_queue_manager_ops {
}; };
struct device_queue_manager_asic_ops { struct device_queue_manager_asic_ops {
int (*register_process)(struct device_queue_manager *dqm, int (*update_qpd)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd); struct qcm_process_device *qpd);
int (*initialize)(struct device_queue_manager *dqm);
bool (*set_cache_memory_policy)(struct device_queue_manager *dqm, bool (*set_cache_memory_policy)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd, struct qcm_process_device *qpd,
enum cache_policy default_policy, enum cache_policy default_policy,
...@@ -156,7 +155,7 @@ struct device_queue_manager_asic_ops { ...@@ -156,7 +155,7 @@ struct device_queue_manager_asic_ops {
struct device_queue_manager { struct device_queue_manager {
struct device_queue_manager_ops ops; struct device_queue_manager_ops ops;
struct device_queue_manager_asic_ops ops_asic_specific; struct device_queue_manager_asic_ops asic_ops;
struct mqd_manager *mqds[KFD_MQD_TYPE_MAX]; struct mqd_manager *mqds[KFD_MQD_TYPE_MAX];
struct packet_manager packets; struct packet_manager packets;
...@@ -179,8 +178,10 @@ struct device_queue_manager { ...@@ -179,8 +178,10 @@ struct device_queue_manager {
bool active_runlist; bool active_runlist;
}; };
void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops); void device_queue_manager_init_cik(
void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops); struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_vi(
struct device_queue_manager_asic_ops *asic_ops);
void program_sh_mem_settings(struct device_queue_manager *dqm, void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd); struct qcm_process_device *qpd);
unsigned int get_queues_num(struct device_queue_manager *dqm); unsigned int get_queues_num(struct device_queue_manager *dqm);
......
...@@ -32,18 +32,17 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, ...@@ -32,18 +32,17 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
enum cache_policy alternate_policy, enum cache_policy alternate_policy,
void __user *alternate_aperture_base, void __user *alternate_aperture_base,
uint64_t alternate_aperture_size); uint64_t alternate_aperture_size);
static int register_process_cik(struct device_queue_manager *dqm, static int update_qpd_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd); struct qcm_process_device *qpd);
static int initialize_cpsch_cik(struct device_queue_manager *dqm);
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd); struct qcm_process_device *qpd);
void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops) void device_queue_manager_init_cik(
struct device_queue_manager_asic_ops *asic_ops)
{ {
ops->set_cache_memory_policy = set_cache_memory_policy_cik; asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
ops->register_process = register_process_cik; asic_ops->update_qpd = update_qpd_cik;
ops->initialize = initialize_cpsch_cik; asic_ops->init_sdma_vm = init_sdma_vm;
ops->init_sdma_vm = init_sdma_vm;
} }
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
...@@ -99,7 +98,7 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, ...@@ -99,7 +98,7 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
return true; return true;
} }
static int register_process_cik(struct device_queue_manager *dqm, static int update_qpd_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd) struct qcm_process_device *qpd)
{ {
struct kfd_process_device *pdd; struct kfd_process_device *pdd;
...@@ -148,8 +147,3 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, ...@@ -148,8 +147,3 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
q->properties.sdma_vm_addr = value; q->properties.sdma_vm_addr = value;
} }
static int initialize_cpsch_cik(struct device_queue_manager *dqm)
{
return 0;
}
...@@ -33,18 +33,17 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, ...@@ -33,18 +33,17 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
enum cache_policy alternate_policy, enum cache_policy alternate_policy,
void __user *alternate_aperture_base, void __user *alternate_aperture_base,
uint64_t alternate_aperture_size); uint64_t alternate_aperture_size);
static int register_process_vi(struct device_queue_manager *dqm, static int update_qpd_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd); struct qcm_process_device *qpd);
static int initialize_cpsch_vi(struct device_queue_manager *dqm);
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd); struct qcm_process_device *qpd);
void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops) void device_queue_manager_init_vi(
struct device_queue_manager_asic_ops *asic_ops)
{ {
ops->set_cache_memory_policy = set_cache_memory_policy_vi; asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi;
ops->register_process = register_process_vi; asic_ops->update_qpd = update_qpd_vi;
ops->initialize = initialize_cpsch_vi; asic_ops->init_sdma_vm = init_sdma_vm;
ops->init_sdma_vm = init_sdma_vm;
} }
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
...@@ -104,7 +103,7 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, ...@@ -104,7 +103,7 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
return true; return true;
} }
static int register_process_vi(struct device_queue_manager *dqm, static int update_qpd_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd) struct qcm_process_device *qpd)
{ {
struct kfd_process_device *pdd; struct kfd_process_device *pdd;
...@@ -160,8 +159,3 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, ...@@ -160,8 +159,3 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
q->properties.sdma_vm_addr = value; q->properties.sdma_vm_addr = value;
} }
static int initialize_cpsch_vi(struct device_queue_manager *dqm)
{
return 0;
}
...@@ -24,8 +24,8 @@ ...@@ -24,8 +24,8 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/sched/signal.h> #include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/mm.h>
#include <linux/mman.h> #include <linux/mman.h>
#include <linux/memory.h> #include <linux/memory.h>
#include "kfd_priv.h" #include "kfd_priv.h"
...@@ -33,185 +33,89 @@ ...@@ -33,185 +33,89 @@
#include <linux/device.h> #include <linux/device.h>
/* /*
* A task can only be on a single wait_queue at a time, but we need to support * Wrapper around wait_queue_entry_t
* waiting on multiple events (any/all).
* Instead of each event simply having a wait_queue with sleeping tasks, it
* has a singly-linked list of tasks.
* A thread that wants to sleep creates an array of these, one for each event
* and adds one to each event's waiter chain.
*/ */
struct kfd_event_waiter { struct kfd_event_waiter {
struct list_head waiters; wait_queue_entry_t wait;
struct task_struct *sleeping_task; struct kfd_event *event; /* Event to wait for */
bool activated; /* Becomes true when event is signaled */
/* Transitions to true when the event this belongs to is signaled. */
bool activated;
/* Event */
struct kfd_event *event;
uint32_t input_index;
}; };
/* /*
* Over-complicated pooled allocator for event notification slots.
*
* Each signal event needs a 64-bit signal slot where the signaler will write * Each signal event needs a 64-bit signal slot where the signaler will write
* a 1 before sending an interrupt.l (This is needed because some interrupts * a 1 before sending an interrupt. (This is needed because some interrupts
* do not contain enough spare data bits to identify an event.) * do not contain enough spare data bits to identify an event.)
* We get whole pages from vmalloc and map them to the process VA. * We get whole pages and map them to the process VA.
* Individual signal events are then allocated a slot in a page. * Individual signal events use their event_id as slot index.
*/ */
struct kfd_signal_page {
struct signal_page {
struct list_head event_pages; /* kfd_process.signal_event_pages */
uint64_t *kernel_address; uint64_t *kernel_address;
uint64_t __user *user_address; uint64_t __user *user_address;
uint32_t page_index; /* Index into the mmap aperture. */
unsigned int free_slots;
unsigned long used_slot_bitmap[0];
}; };
#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT
#define SLOT_BITMAP_SIZE BITS_TO_LONGS(SLOTS_PER_PAGE)
#define BITS_PER_PAGE (ilog2(SLOTS_PER_PAGE)+1)
#define SIGNAL_PAGE_SIZE (sizeof(struct signal_page) + \
SLOT_BITMAP_SIZE * sizeof(long))
/* static uint64_t *page_slots(struct kfd_signal_page *page)
* For signal events, the event ID is used as the interrupt user data.
* For SQ s_sendmsg interrupts, this is limited to 8 bits.
*/
#define INTERRUPT_DATA_BITS 8
#define SIGNAL_EVENT_ID_SLOT_SHIFT 0
static uint64_t *page_slots(struct signal_page *page)
{ {
return page->kernel_address; return page->kernel_address;
} }
static bool allocate_free_slot(struct kfd_process *process, static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
struct signal_page **out_page,
unsigned int *out_slot_index)
{
struct signal_page *page;
list_for_each_entry(page, &process->signal_event_pages, event_pages) {
if (page->free_slots > 0) {
unsigned int slot =
find_first_zero_bit(page->used_slot_bitmap,
SLOTS_PER_PAGE);
__set_bit(slot, page->used_slot_bitmap);
page->free_slots--;
page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT;
*out_page = page;
*out_slot_index = slot;
pr_debug("Allocated event signal slot in page %p, slot %d\n",
page, slot);
return true;
}
}
pr_debug("No free event signal slots were found for process %p\n",
process);
return false;
}
#define list_tail_entry(head, type, member) \
list_entry((head)->prev, type, member)
static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p)
{ {
void *backing_store; void *backing_store;
struct signal_page *page; struct kfd_signal_page *page;
page = kzalloc(SIGNAL_PAGE_SIZE, GFP_KERNEL); page = kzalloc(sizeof(*page), GFP_KERNEL);
if (!page) if (!page)
goto fail_alloc_signal_page; return NULL;
page->free_slots = SLOTS_PER_PAGE;
backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, backing_store = (void *) __get_free_pages(GFP_KERNEL,
get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
if (!backing_store) if (!backing_store)
goto fail_alloc_signal_store; goto fail_alloc_signal_store;
/* prevent user-mode info leaks */ /* Initialize all events to unsignaled */
memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT, memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT,
KFD_SIGNAL_EVENT_LIMIT * 8); KFD_SIGNAL_EVENT_LIMIT * 8);
page->kernel_address = backing_store; page->kernel_address = backing_store;
if (list_empty(&p->signal_event_pages))
page->page_index = 0;
else
page->page_index = list_tail_entry(&p->signal_event_pages,
struct signal_page,
event_pages)->page_index + 1;
pr_debug("Allocated new event signal page at %p, for process %p\n", pr_debug("Allocated new event signal page at %p, for process %p\n",
page, p); page, p);
pr_debug("Page index is %d\n", page->page_index);
list_add(&page->event_pages, &p->signal_event_pages); return page;
return true;
fail_alloc_signal_store: fail_alloc_signal_store:
kfree(page); kfree(page);
fail_alloc_signal_page: return NULL;
return false;
} }
static bool allocate_event_notification_slot(struct file *devkfd, static int allocate_event_notification_slot(struct kfd_process *p,
struct kfd_process *p, struct kfd_event *ev)
struct signal_page **page,
unsigned int *signal_slot_index)
{ {
bool ret; int id;
ret = allocate_free_slot(p, page, signal_slot_index); if (!p->signal_page) {
if (!ret) { p->signal_page = allocate_signal_page(p);
ret = allocate_signal_page(devkfd, p); if (!p->signal_page)
if (ret) return -ENOMEM;
ret = allocate_free_slot(p, page, signal_slot_index); /* Oldest user mode expects 256 event slots */
p->signal_mapped_size = 256*8;
} }
return ret;
}
/* Assumes that the process's event_mutex is locked. */
static void release_event_notification_slot(struct signal_page *page,
size_t slot_index)
{
__clear_bit(slot_index, page->used_slot_bitmap);
page->free_slots++;
/* We don't free signal pages, they are retained by the process
* and reused until it exits.
*/
}
static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p,
unsigned int page_index)
{
struct signal_page *page;
/* /*
* This is safe because we don't delete signal pages until the * Compatibility with old user mode: Only use signal slots
* process exits. * user mode has mapped, may be less than
* KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
* of the event limit without breaking user mode.
*/ */
list_for_each_entry(page, &p->signal_event_pages, event_pages) id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
if (page->page_index == page_index) GFP_KERNEL);
return page; if (id < 0)
return id;
return NULL; ev->event_id = id;
page_slots(p->signal_page)[id] = UNSIGNALED_EVENT_SLOT;
return 0;
} }
/* /*
...@@ -220,99 +124,81 @@ static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, ...@@ -220,99 +124,81 @@ static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p,
*/ */
static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
{ {
struct kfd_event *ev; return idr_find(&p->event_idr, id);
hash_for_each_possible(p->events, ev, events, id)
if (ev->event_id == id)
return ev;
return NULL;
}
static u32 make_signal_event_id(struct signal_page *page,
unsigned int signal_slot_index)
{
return page->page_index |
(signal_slot_index << SIGNAL_EVENT_ID_SLOT_SHIFT);
} }
/* /**
* Produce a kfd event id for a nonsignal event. * lookup_signaled_event_by_partial_id - Lookup signaled event from partial ID
* These are arbitrary numbers, so we do a sequential search through * @p: Pointer to struct kfd_process
* the hash table for an unused number. * @id: ID to look up
* @bits: Number of valid bits in @id
*
* Finds the first signaled event with a matching partial ID. If no
* matching signaled event is found, returns NULL. In that case the
* caller should assume that the partial ID is invalid and do an
* exhaustive search of all siglaned events.
*
* If multiple events with the same partial ID signal at the same
* time, they will be found one interrupt at a time, not necessarily
* in the same order the interrupts occurred. As long as the number of
* interrupts is correct, all signaled events will be seen by the
* driver.
*/ */
static u32 make_nonsignal_event_id(struct kfd_process *p) static struct kfd_event *lookup_signaled_event_by_partial_id(
struct kfd_process *p, uint32_t id, uint32_t bits)
{ {
u32 id; struct kfd_event *ev;
for (id = p->next_nonsignal_event_id;
id < KFD_LAST_NONSIGNAL_EVENT_ID &&
lookup_event_by_id(p, id);
id++)
;
if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { if (!p->signal_page || id >= KFD_SIGNAL_EVENT_LIMIT)
return NULL;
/* /* Fast path for the common case that @id is not a partial ID
* What if id == LAST_NONSIGNAL_EVENT_ID - 1? * and we only need a single lookup.
* Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so
* the first loop fails immediately and we proceed with the
* wraparound loop below.
*/ */
p->next_nonsignal_event_id = id + 1; if (bits > 31 || (1U << bits) >= KFD_SIGNAL_EVENT_LIMIT) {
if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT)
return NULL;
return id; return idr_find(&p->event_idr, id);
} }
for (id = KFD_FIRST_NONSIGNAL_EVENT_ID; /* General case for partial IDs: Iterate over all matching IDs
id < KFD_LAST_NONSIGNAL_EVENT_ID && * and find the first one that has signaled.
lookup_event_by_id(p, id); */
id++) for (ev = NULL; id < KFD_SIGNAL_EVENT_LIMIT && !ev; id += 1U << bits) {
; if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT)
continue;
if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { ev = idr_find(&p->event_idr, id);
p->next_nonsignal_event_id = id + 1;
return id;
} }
p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; return ev;
return 0;
}
static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p,
struct signal_page *page,
unsigned int signal_slot)
{
return lookup_event_by_id(p, make_signal_event_id(page, signal_slot));
} }
static int create_signal_event(struct file *devkfd, static int create_signal_event(struct file *devkfd,
struct kfd_process *p, struct kfd_process *p,
struct kfd_event *ev) struct kfd_event *ev)
{ {
if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) { int ret;
if (p->signal_mapped_size &&
p->signal_event_count == p->signal_mapped_size / 8) {
if (!p->signal_event_limit_reached) { if (!p->signal_event_limit_reached) {
pr_warn("Signal event wasn't created because limit was reached\n"); pr_warn("Signal event wasn't created because limit was reached\n");
p->signal_event_limit_reached = true; p->signal_event_limit_reached = true;
} }
return -ENOMEM; return -ENOSPC;
} }
if (!allocate_event_notification_slot(devkfd, p, &ev->signal_page, ret = allocate_event_notification_slot(p, ev);
&ev->signal_slot_index)) { if (ret) {
pr_warn("Signal event wasn't created because out of kernel memory\n"); pr_warn("Signal event wasn't created because out of kernel memory\n");
return -ENOMEM; return ret;
} }
p->signal_event_count++; p->signal_event_count++;
ev->user_signal_address = ev->user_signal_address = &p->signal_page->user_address[ev->event_id];
&ev->signal_page->user_address[ev->signal_slot_index];
ev->event_id = make_signal_event_id(ev->signal_page,
ev->signal_slot_index);
pr_debug("Signal event number %zu created with id %d, address %p\n", pr_debug("Signal event number %zu created with id %d, address %p\n",
p->signal_event_count, ev->event_id, p->signal_event_count, ev->event_id,
ev->user_signal_address); ev->user_signal_address);
...@@ -320,16 +206,20 @@ static int create_signal_event(struct file *devkfd, ...@@ -320,16 +206,20 @@ static int create_signal_event(struct file *devkfd,
return 0; return 0;
} }
/*
* No non-signal events are supported yet.
* We create them as events that never signal.
* Set event calls from user-mode are failed.
*/
static int create_other_event(struct kfd_process *p, struct kfd_event *ev) static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
{ {
ev->event_id = make_nonsignal_event_id(p); /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
if (ev->event_id == 0) * intentional integer overflow to -1 without a compiler
return -ENOMEM; * warning. idr_alloc treats a negative value as "maximum
* signed integer".
*/
int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
(uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
GFP_KERNEL);
if (id < 0)
return id;
ev->event_id = id;
return 0; return 0;
} }
...@@ -337,50 +227,47 @@ static int create_other_event(struct kfd_process *p, struct kfd_event *ev) ...@@ -337,50 +227,47 @@ static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
void kfd_event_init_process(struct kfd_process *p) void kfd_event_init_process(struct kfd_process *p)
{ {
mutex_init(&p->event_mutex); mutex_init(&p->event_mutex);
hash_init(p->events); idr_init(&p->event_idr);
INIT_LIST_HEAD(&p->signal_event_pages); p->signal_page = NULL;
p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
p->signal_event_count = 0; p->signal_event_count = 0;
} }
static void destroy_event(struct kfd_process *p, struct kfd_event *ev) static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
{ {
if (ev->signal_page) { struct kfd_event_waiter *waiter;
release_event_notification_slot(ev->signal_page,
ev->signal_slot_index);
p->signal_event_count--;
}
/* /* Wake up pending waiters. They will return failure */
* Abandon the list of waiters. Individual waiting threads will list_for_each_entry(waiter, &ev->wq.head, wait.entry)
* clean up their own data. waiter->event = NULL;
*/ wake_up_all(&ev->wq);
list_del(&ev->waiters);
if (ev->type == KFD_EVENT_TYPE_SIGNAL ||
ev->type == KFD_EVENT_TYPE_DEBUG)
p->signal_event_count--;
hash_del(&ev->events); idr_remove(&p->event_idr, ev->event_id);
kfree(ev); kfree(ev);
} }
static void destroy_events(struct kfd_process *p) static void destroy_events(struct kfd_process *p)
{ {
struct kfd_event *ev; struct kfd_event *ev;
struct hlist_node *tmp; uint32_t id;
unsigned int hash_bkt;
hash_for_each_safe(p->events, hash_bkt, tmp, ev, events) idr_for_each_entry(&p->event_idr, ev, id)
destroy_event(p, ev); destroy_event(p, ev);
idr_destroy(&p->event_idr);
} }
/* /*
* We assume that the process is being destroyed and there is no need to * We assume that the process is being destroyed and there is no need to
* unmap the pages or keep bookkeeping data in order. * unmap the pages or keep bookkeeping data in order.
*/ */
static void shutdown_signal_pages(struct kfd_process *p) static void shutdown_signal_page(struct kfd_process *p)
{ {
struct signal_page *page, *tmp; struct kfd_signal_page *page = p->signal_page;
list_for_each_entry_safe(page, tmp, &p->signal_event_pages, if (page) {
event_pages) {
free_pages((unsigned long)page->kernel_address, free_pages((unsigned long)page->kernel_address,
get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
kfree(page); kfree(page);
...@@ -390,7 +277,7 @@ static void shutdown_signal_pages(struct kfd_process *p) ...@@ -390,7 +277,7 @@ static void shutdown_signal_pages(struct kfd_process *p)
void kfd_event_free_process(struct kfd_process *p) void kfd_event_free_process(struct kfd_process *p)
{ {
destroy_events(p); destroy_events(p);
shutdown_signal_pages(p); shutdown_signal_page(p);
} }
static bool event_can_be_gpu_signaled(const struct kfd_event *ev) static bool event_can_be_gpu_signaled(const struct kfd_event *ev)
...@@ -419,7 +306,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, ...@@ -419,7 +306,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
ev->auto_reset = auto_reset; ev->auto_reset = auto_reset;
ev->signaled = false; ev->signaled = false;
INIT_LIST_HEAD(&ev->waiters); init_waitqueue_head(&ev->wq);
*event_page_offset = 0; *event_page_offset = 0;
...@@ -430,10 +317,9 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, ...@@ -430,10 +317,9 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
case KFD_EVENT_TYPE_DEBUG: case KFD_EVENT_TYPE_DEBUG:
ret = create_signal_event(devkfd, p, ev); ret = create_signal_event(devkfd, p, ev);
if (!ret) { if (!ret) {
*event_page_offset = (ev->signal_page->page_index | *event_page_offset = KFD_MMAP_EVENTS_MASK;
KFD_MMAP_EVENTS_MASK);
*event_page_offset <<= PAGE_SHIFT; *event_page_offset <<= PAGE_SHIFT;
*event_slot_index = ev->signal_slot_index; *event_slot_index = ev->event_id;
} }
break; break;
default: default:
...@@ -442,8 +328,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, ...@@ -442,8 +328,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
} }
if (!ret) { if (!ret) {
hash_add(p->events, &ev->events, ev->event_id);
*event_id = ev->event_id; *event_id = ev->event_id;
*event_trigger_data = ev->event_id; *event_trigger_data = ev->event_id;
} else { } else {
...@@ -477,19 +361,18 @@ int kfd_event_destroy(struct kfd_process *p, uint32_t event_id) ...@@ -477,19 +361,18 @@ int kfd_event_destroy(struct kfd_process *p, uint32_t event_id)
static void set_event(struct kfd_event *ev) static void set_event(struct kfd_event *ev)
{ {
struct kfd_event_waiter *waiter; struct kfd_event_waiter *waiter;
struct kfd_event_waiter *next;
/* Auto reset if the list is non-empty and we're waking someone. */ /* Auto reset if the list is non-empty and we're waking
ev->signaled = !ev->auto_reset || list_empty(&ev->waiters); * someone. waitqueue_active is safe here because we're
* protected by the p->event_mutex, which is also held when
* updating the wait queues in kfd_wait_on_events.
*/
ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq);
list_for_each_entry_safe(waiter, next, &ev->waiters, waiters) { list_for_each_entry(waiter, &ev->wq.head, wait.entry)
waiter->activated = true; waiter->activated = true;
/* _init because free_waiters will call list_del */ wake_up_all(&ev->wq);
list_del_init(&waiter->waiters);
wake_up_process(waiter->sleeping_task);
}
} }
/* Assumes that p is current. */ /* Assumes that p is current. */
...@@ -538,13 +421,7 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id) ...@@ -538,13 +421,7 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id)
static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev) static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
{ {
page_slots(ev->signal_page)[ev->signal_slot_index] = page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT;
UNSIGNALED_EVENT_SLOT;
}
static bool is_slot_signaled(struct signal_page *page, unsigned int index)
{
return page_slots(page)[index] != UNSIGNALED_EVENT_SLOT;
} }
static void set_event_from_interrupt(struct kfd_process *p, static void set_event_from_interrupt(struct kfd_process *p,
...@@ -559,7 +436,7 @@ static void set_event_from_interrupt(struct kfd_process *p, ...@@ -559,7 +436,7 @@ static void set_event_from_interrupt(struct kfd_process *p,
void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
uint32_t valid_id_bits) uint32_t valid_id_bits)
{ {
struct kfd_event *ev; struct kfd_event *ev = NULL;
/* /*
* Because we are called from arbitrary context (workqueue) as opposed * Because we are called from arbitrary context (workqueue) as opposed
...@@ -573,26 +450,46 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, ...@@ -573,26 +450,46 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
mutex_lock(&p->event_mutex); mutex_lock(&p->event_mutex);
if (valid_id_bits >= INTERRUPT_DATA_BITS) { if (valid_id_bits)
/* Partial ID is a full ID. */ ev = lookup_signaled_event_by_partial_id(p, partial_id,
ev = lookup_event_by_id(p, partial_id); valid_id_bits);
if (ev) {
set_event_from_interrupt(p, ev); set_event_from_interrupt(p, ev);
} else { } else if (p->signal_page) {
/* /*
* Partial ID is in fact partial. For now we completely * Partial ID lookup failed. Assume that the event ID
* ignore it, but we could use any bits we did receive to * in the interrupt payload was invalid and do an
* search faster. * exhaustive search of signaled events.
*/
uint64_t *slots = page_slots(p->signal_page);
uint32_t id;
if (valid_id_bits)
pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n",
partial_id, valid_id_bits);
if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) {
/* With relatively few events, it's faster to
* iterate over the event IDR
*/ */
struct signal_page *page; idr_for_each_entry(&p->event_idr, ev, id) {
unsigned int i; if (id >= KFD_SIGNAL_EVENT_LIMIT)
break;
list_for_each_entry(page, &p->signal_event_pages, event_pages)
for (i = 0; i < SLOTS_PER_PAGE; i++) if (slots[id] != UNSIGNALED_EVENT_SLOT)
if (is_slot_signaled(page, i)) {
ev = lookup_event_by_page_slot(p,
page, i);
set_event_from_interrupt(p, ev); set_event_from_interrupt(p, ev);
} }
} else {
/* With relatively many events, it's faster to
* iterate over the signal slots and lookup
* only signaled events from the IDR.
*/
for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++)
if (slots[id] != UNSIGNALED_EVENT_SLOT) {
ev = lookup_event_by_id(p, id);
set_event_from_interrupt(p, ev);
}
}
} }
mutex_unlock(&p->event_mutex); mutex_unlock(&p->event_mutex);
...@@ -609,18 +506,16 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events) ...@@ -609,18 +506,16 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
GFP_KERNEL); GFP_KERNEL);
for (i = 0; (event_waiters) && (i < num_events) ; i++) { for (i = 0; (event_waiters) && (i < num_events) ; i++) {
INIT_LIST_HEAD(&event_waiters[i].waiters); init_wait(&event_waiters[i].wait);
event_waiters[i].sleeping_task = current;
event_waiters[i].activated = false; event_waiters[i].activated = false;
} }
return event_waiters; return event_waiters;
} }
static int init_event_waiter(struct kfd_process *p, static int init_event_waiter_get_status(struct kfd_process *p,
struct kfd_event_waiter *waiter, struct kfd_event_waiter *waiter,
uint32_t event_id, uint32_t event_id)
uint32_t input_index)
{ {
struct kfd_event *ev = lookup_event_by_id(p, event_id); struct kfd_event *ev = lookup_event_by_id(p, event_id);
...@@ -628,38 +523,60 @@ static int init_event_waiter(struct kfd_process *p, ...@@ -628,38 +523,60 @@ static int init_event_waiter(struct kfd_process *p,
return -EINVAL; return -EINVAL;
waiter->event = ev; waiter->event = ev;
waiter->input_index = input_index;
waiter->activated = ev->signaled; waiter->activated = ev->signaled;
ev->signaled = ev->signaled && !ev->auto_reset; ev->signaled = ev->signaled && !ev->auto_reset;
list_add(&waiter->waiters, &ev->waiters);
return 0; return 0;
} }
static bool test_event_condition(bool all, uint32_t num_events, static void init_event_waiter_add_to_waitlist(struct kfd_event_waiter *waiter)
{
struct kfd_event *ev = waiter->event;
/* Only add to the wait list if we actually need to
* wait on this event.
*/
if (!waiter->activated)
add_wait_queue(&ev->wq, &waiter->wait);
}
/* test_event_condition - Test condition of events being waited for
* @all: Return completion only if all events have signaled
* @num_events: Number of events to wait for
* @event_waiters: Array of event waiters, one per event
*
* Returns KFD_IOC_WAIT_RESULT_COMPLETE if all (or one) event(s) have
* signaled. Returns KFD_IOC_WAIT_RESULT_TIMEOUT if no (or not all)
* events have signaled. Returns KFD_IOC_WAIT_RESULT_FAIL if any of
* the events have been destroyed.
*/
static uint32_t test_event_condition(bool all, uint32_t num_events,
struct kfd_event_waiter *event_waiters) struct kfd_event_waiter *event_waiters)
{ {
uint32_t i; uint32_t i;
uint32_t activated_count = 0; uint32_t activated_count = 0;
for (i = 0; i < num_events; i++) { for (i = 0; i < num_events; i++) {
if (!event_waiters[i].event)
return KFD_IOC_WAIT_RESULT_FAIL;
if (event_waiters[i].activated) { if (event_waiters[i].activated) {
if (!all) if (!all)
return true; return KFD_IOC_WAIT_RESULT_COMPLETE;
activated_count++; activated_count++;
} }
} }
return activated_count == num_events; return activated_count == num_events ?
KFD_IOC_WAIT_RESULT_COMPLETE : KFD_IOC_WAIT_RESULT_TIMEOUT;
} }
/* /*
* Copy event specific data, if defined. * Copy event specific data, if defined.
* Currently only memory exception events have additional data to copy to user * Currently only memory exception events have additional data to copy to user
*/ */
static bool copy_signaled_event_data(uint32_t num_events, static int copy_signaled_event_data(uint32_t num_events,
struct kfd_event_waiter *event_waiters, struct kfd_event_waiter *event_waiters,
struct kfd_event_data __user *data) struct kfd_event_data __user *data)
{ {
...@@ -673,15 +590,15 @@ static bool copy_signaled_event_data(uint32_t num_events, ...@@ -673,15 +590,15 @@ static bool copy_signaled_event_data(uint32_t num_events,
waiter = &event_waiters[i]; waiter = &event_waiters[i];
event = waiter->event; event = waiter->event;
if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) { if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) {
dst = &data[waiter->input_index].memory_exception_data; dst = &data[i].memory_exception_data;
src = &event->memory_exception_data; src = &event->memory_exception_data;
if (copy_to_user(dst, src, if (copy_to_user(dst, src,
sizeof(struct kfd_hsa_memory_exception_data))) sizeof(struct kfd_hsa_memory_exception_data)))
return false; return -EFAULT;
} }
} }
return true; return 0;
} }
...@@ -710,7 +627,9 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters) ...@@ -710,7 +627,9 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
uint32_t i; uint32_t i;
for (i = 0; i < num_events; i++) for (i = 0; i < num_events; i++)
list_del(&waiters[i].waiters); if (waiters[i].event)
remove_wait_queue(&waiters[i].event->wq,
&waiters[i].wait);
kfree(waiters); kfree(waiters);
} }
...@@ -718,38 +637,56 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters) ...@@ -718,38 +637,56 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
int kfd_wait_on_events(struct kfd_process *p, int kfd_wait_on_events(struct kfd_process *p,
uint32_t num_events, void __user *data, uint32_t num_events, void __user *data,
bool all, uint32_t user_timeout_ms, bool all, uint32_t user_timeout_ms,
enum kfd_event_wait_result *wait_result) uint32_t *wait_result)
{ {
struct kfd_event_data __user *events = struct kfd_event_data __user *events =
(struct kfd_event_data __user *) data; (struct kfd_event_data __user *) data;
uint32_t i; uint32_t i;
int ret = 0; int ret = 0;
struct kfd_event_waiter *event_waiters = NULL; struct kfd_event_waiter *event_waiters = NULL;
long timeout = user_timeout_to_jiffies(user_timeout_ms); long timeout = user_timeout_to_jiffies(user_timeout_ms);
mutex_lock(&p->event_mutex);
event_waiters = alloc_event_waiters(num_events); event_waiters = alloc_event_waiters(num_events);
if (!event_waiters) { if (!event_waiters) {
ret = -ENOMEM; ret = -ENOMEM;
goto fail; goto out;
} }
mutex_lock(&p->event_mutex);
for (i = 0; i < num_events; i++) { for (i = 0; i < num_events; i++) {
struct kfd_event_data event_data; struct kfd_event_data event_data;
if (copy_from_user(&event_data, &events[i], if (copy_from_user(&event_data, &events[i],
sizeof(struct kfd_event_data))) { sizeof(struct kfd_event_data))) {
ret = -EFAULT; ret = -EFAULT;
goto fail; goto out_unlock;
} }
ret = init_event_waiter(p, &event_waiters[i], ret = init_event_waiter_get_status(p, &event_waiters[i],
event_data.event_id, i); event_data.event_id);
if (ret) if (ret)
goto fail; goto out_unlock;
}
/* Check condition once. */
*wait_result = test_event_condition(all, num_events, event_waiters);
if (*wait_result == KFD_IOC_WAIT_RESULT_COMPLETE) {
ret = copy_signaled_event_data(num_events,
event_waiters, events);
goto out_unlock;
} else if (WARN_ON(*wait_result == KFD_IOC_WAIT_RESULT_FAIL)) {
/* This should not happen. Events shouldn't be
* destroyed while we're holding the event_mutex
*/
goto out_unlock;
} }
/* Add to wait lists if we need to wait. */
for (i = 0; i < num_events; i++)
init_event_waiter_add_to_waitlist(&event_waiters[i]);
mutex_unlock(&p->event_mutex); mutex_unlock(&p->event_mutex);
while (true) { while (true) {
...@@ -771,62 +708,66 @@ int kfd_wait_on_events(struct kfd_process *p, ...@@ -771,62 +708,66 @@ int kfd_wait_on_events(struct kfd_process *p,
break; break;
} }
if (test_event_condition(all, num_events, event_waiters)) { /* Set task state to interruptible sleep before
if (copy_signaled_event_data(num_events, * checking wake-up conditions. A concurrent wake-up
event_waiters, events)) * will put the task back into runnable state. In that
*wait_result = KFD_WAIT_COMPLETE; * case schedule_timeout will not put the task to
else * sleep and we'll get a chance to re-check the
*wait_result = KFD_WAIT_ERROR; * updated conditions almost immediately. Otherwise,
* this race condition would lead to a soft hang or a
* very long sleep.
*/
set_current_state(TASK_INTERRUPTIBLE);
*wait_result = test_event_condition(all, num_events,
event_waiters);
if (*wait_result != KFD_IOC_WAIT_RESULT_TIMEOUT)
break; break;
}
if (timeout <= 0) { if (timeout <= 0)
*wait_result = KFD_WAIT_TIMEOUT;
break; break;
}
timeout = schedule_timeout_interruptible(timeout); timeout = schedule_timeout(timeout);
} }
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
mutex_lock(&p->event_mutex); /* copy_signaled_event_data may sleep. So this has to happen
free_waiters(num_events, event_waiters); * after the task state is set back to RUNNING.
mutex_unlock(&p->event_mutex); */
if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE)
return ret; ret = copy_signaled_event_data(num_events,
event_waiters, events);
fail: mutex_lock(&p->event_mutex);
if (event_waiters) out_unlock:
free_waiters(num_events, event_waiters); free_waiters(num_events, event_waiters);
mutex_unlock(&p->event_mutex); mutex_unlock(&p->event_mutex);
out:
*wait_result = KFD_WAIT_ERROR; if (ret)
*wait_result = KFD_IOC_WAIT_RESULT_FAIL;
else if (*wait_result == KFD_IOC_WAIT_RESULT_FAIL)
ret = -EIO;
return ret; return ret;
} }
int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
{ {
unsigned int page_index;
unsigned long pfn; unsigned long pfn;
struct signal_page *page; struct kfd_signal_page *page;
int ret;
/* check required size is logical */ /* check required size doesn't exceed the allocated size */
if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) != if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) <
get_order(vma->vm_end - vma->vm_start)) { get_order(vma->vm_end - vma->vm_start)) {
pr_err("Event page mmap requested illegal size\n"); pr_err("Event page mmap requested illegal size\n");
return -EINVAL; return -EINVAL;
} }
page_index = vma->vm_pgoff; page = p->signal_page;
page = lookup_signal_page_by_index(p, page_index);
if (!page) { if (!page) {
/* Probably KFD bug, but mmap is user-accessible. */ /* Probably KFD bug, but mmap is user-accessible. */
pr_debug("Signal page could not be found for page_index %u\n", pr_debug("Signal page could not be found\n");
page_index);
return -EINVAL; return -EINVAL;
} }
...@@ -847,8 +788,12 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) ...@@ -847,8 +788,12 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
page->user_address = (uint64_t __user *)vma->vm_start; page->user_address = (uint64_t __user *)vma->vm_start;
/* mapping the page to user process */ /* mapping the page to user process */
return remap_pfn_range(vma, vma->vm_start, pfn, ret = remap_pfn_range(vma, vma->vm_start, pfn,
vma->vm_end - vma->vm_start, vma->vm_page_prot); vma->vm_end - vma->vm_start, vma->vm_page_prot);
if (!ret)
p->signal_mapped_size = vma->vm_end - vma->vm_start;
return ret;
} }
/* /*
...@@ -860,12 +805,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, ...@@ -860,12 +805,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
{ {
struct kfd_hsa_memory_exception_data *ev_data; struct kfd_hsa_memory_exception_data *ev_data;
struct kfd_event *ev; struct kfd_event *ev;
int bkt; uint32_t id;
bool send_signal = true; bool send_signal = true;
ev_data = (struct kfd_hsa_memory_exception_data *) event_data; ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
hash_for_each(p->events, bkt, ev, events) id = KFD_FIRST_NONSIGNAL_EVENT_ID;
idr_for_each_entry_continue(&p->event_idr, ev, id)
if (ev->type == type) { if (ev->type == type) {
send_signal = false; send_signal = false;
dev_dbg(kfd_device, dev_dbg(kfd_device,
...@@ -904,14 +850,24 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, ...@@ -904,14 +850,24 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
* running so the lookup function returns a locked process. * running so the lookup function returns a locked process.
*/ */
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
struct mm_struct *mm;
if (!p) if (!p)
return; /* Presumably process exited. */ return; /* Presumably process exited. */
/* Take a safe reference to the mm_struct, which may otherwise
* disappear even while the kfd_process is still referenced.
*/
mm = get_task_mm(p->lead_thread);
if (!mm) {
mutex_unlock(&p->mutex);
return; /* Process is exiting */
}
memset(&memory_exception_data, 0, sizeof(memory_exception_data)); memset(&memory_exception_data, 0, sizeof(memory_exception_data));
down_read(&p->mm->mmap_sem); down_read(&mm->mmap_sem);
vma = find_vma(p->mm, address); vma = find_vma(mm, address);
memory_exception_data.gpu_id = dev->id; memory_exception_data.gpu_id = dev->id;
memory_exception_data.va = address; memory_exception_data.va = address;
...@@ -937,7 +893,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, ...@@ -937,7 +893,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
} }
} }
up_read(&p->mm->mmap_sem); up_read(&mm->mmap_sem);
mmput(mm);
mutex_lock(&p->event_mutex); mutex_lock(&p->event_mutex);
......
...@@ -27,12 +27,17 @@ ...@@ -27,12 +27,17 @@
#include <linux/hashtable.h> #include <linux/hashtable.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/wait.h>
#include "kfd_priv.h" #include "kfd_priv.h"
#include <uapi/linux/kfd_ioctl.h> #include <uapi/linux/kfd_ioctl.h>
#define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U /*
#define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK * IDR supports non-negative integer IDs. Small IDs are used for
#define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX * signal events to match their signal slot. Use the upper half of the
* ID space for non-signal events.
*/
#define KFD_FIRST_NONSIGNAL_EVENT_ID ((INT_MAX >> 1) + 1)
#define KFD_LAST_NONSIGNAL_EVENT_ID INT_MAX
/* /*
* Written into kfd_signal_slot_t to indicate that the event is not signaled. * Written into kfd_signal_slot_t to indicate that the event is not signaled.
...@@ -46,9 +51,6 @@ struct kfd_event_waiter; ...@@ -46,9 +51,6 @@ struct kfd_event_waiter;
struct signal_page; struct signal_page;
struct kfd_event { struct kfd_event {
/* All events in process, rooted at kfd_process.events. */
struct hlist_node events;
u32 event_id; u32 event_id;
bool signaled; bool signaled;
...@@ -56,11 +58,9 @@ struct kfd_event { ...@@ -56,11 +58,9 @@ struct kfd_event {
int type; int type;
struct list_head waiters; /* List of kfd_event_waiter by waiters. */ wait_queue_head_t wq; /* List of event waiters. */
/* Only for signal events. */ /* Only for signal events. */
struct signal_page *signal_page;
unsigned int signal_slot_index;
uint64_t __user *user_signal_address; uint64_t __user *user_signal_address;
/* type specific data */ /* type specific data */
......
...@@ -42,26 +42,26 @@ ...@@ -42,26 +42,26 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/device.h> #include <linux/device.h>
#include <linux/kfifo.h>
#include "kfd_priv.h" #include "kfd_priv.h"
#define KFD_INTERRUPT_RING_SIZE 1024 #define KFD_IH_NUM_ENTRIES 8192
static void interrupt_wq(struct work_struct *); static void interrupt_wq(struct work_struct *);
int kfd_interrupt_init(struct kfd_dev *kfd) int kfd_interrupt_init(struct kfd_dev *kfd)
{ {
void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE, int r;
kfd->device_info->ih_ring_entry_size,
GFP_KERNEL);
if (!interrupt_ring)
return -ENOMEM;
kfd->interrupt_ring = interrupt_ring; r = kfifo_alloc(&kfd->ih_fifo,
kfd->interrupt_ring_size = KFD_IH_NUM_ENTRIES * kfd->device_info->ih_ring_entry_size,
KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size; GFP_KERNEL);
atomic_set(&kfd->interrupt_ring_wptr, 0); if (r) {
atomic_set(&kfd->interrupt_ring_rptr, 0); dev_err(kfd_chardev(), "Failed to allocate IH fifo\n");
return r;
}
kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1);
spin_lock_init(&kfd->interrupt_lock); spin_lock_init(&kfd->interrupt_lock);
INIT_WORK(&kfd->interrupt_work, interrupt_wq); INIT_WORK(&kfd->interrupt_work, interrupt_wq);
...@@ -92,74 +92,47 @@ void kfd_interrupt_exit(struct kfd_dev *kfd) ...@@ -92,74 +92,47 @@ void kfd_interrupt_exit(struct kfd_dev *kfd)
spin_unlock_irqrestore(&kfd->interrupt_lock, flags); spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
/* /*
* Flush_scheduled_work ensures that there are no outstanding * flush_work ensures that there are no outstanding
* work-queue items that will access interrupt_ring. New work items * work-queue items that will access interrupt_ring. New work items
* can't be created because we stopped interrupt handling above. * can't be created because we stopped interrupt handling above.
*/ */
flush_scheduled_work(); flush_workqueue(kfd->ih_wq);
kfree(kfd->interrupt_ring); kfifo_free(&kfd->ih_fifo);
} }
/* /*
* This assumes that it can't be called concurrently with itself * Assumption: single reader/writer. This function is not re-entrant
* but only with dequeue_ih_ring_entry.
*/ */
bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry) bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry)
{ {
unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); int count;
unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
if ((rptr - wptr) % kfd->interrupt_ring_size == count = kfifo_in(&kfd->ih_fifo, ih_ring_entry,
kfd->device_info->ih_ring_entry_size) { kfd->device_info->ih_ring_entry_size);
/* This is very bad, the system is likely to hang. */ if (count != kfd->device_info->ih_ring_entry_size) {
dev_err_ratelimited(kfd_chardev(), dev_err_ratelimited(kfd_chardev(),
"Interrupt ring overflow, dropping interrupt.\n"); "Interrupt ring overflow, dropping interrupt %d\n",
count);
return false; return false;
} }
memcpy(kfd->interrupt_ring + wptr, ih_ring_entry,
kfd->device_info->ih_ring_entry_size);
wptr = (wptr + kfd->device_info->ih_ring_entry_size) %
kfd->interrupt_ring_size;
smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */
atomic_set(&kfd->interrupt_ring_wptr, wptr);
return true; return true;
} }
/* /*
* This assumes that it can't be called concurrently with itself * Assumption: single reader/writer. This function is not re-entrant
* but only with enqueue_ih_ring_entry.
*/ */
static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry) static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry)
{ {
/* int count;
* Assume that wait queues have an implicit barrier, i.e. anything that
* happened in the ISR before it queued work is visible.
*/
unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
if (rptr == wptr)
return false;
memcpy(ih_ring_entry, kfd->interrupt_ring + rptr, count = kfifo_out(&kfd->ih_fifo, ih_ring_entry,
kfd->device_info->ih_ring_entry_size); kfd->device_info->ih_ring_entry_size);
rptr = (rptr + kfd->device_info->ih_ring_entry_size) % WARN_ON(count && count != kfd->device_info->ih_ring_entry_size);
kfd->interrupt_ring_size;
/* return count == kfd->device_info->ih_ring_entry_size;
* Ensure the rptr write update is not visible until
* memcpy has finished reading.
*/
smp_mb();
atomic_set(&kfd->interrupt_ring_rptr, rptr);
return true;
} }
static void interrupt_wq(struct work_struct *work) static void interrupt_wq(struct work_struct *work)
......
...@@ -189,12 +189,9 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, ...@@ -189,12 +189,9 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
if (q->format == KFD_QUEUE_FORMAT_AQL) if (q->format == KFD_QUEUE_FORMAT_AQL)
m->cp_hqd_pq_control |= NO_UPDATE_RPTR; m->cp_hqd_pq_control |= NO_UPDATE_RPTR;
q->is_active = false; q->is_active = (q->queue_size > 0 &&
if (q->queue_size > 0 &&
q->queue_address != 0 && q->queue_address != 0 &&
q->queue_percent > 0) { q->queue_percent > 0);
q->is_active = true;
}
return 0; return 0;
} }
...@@ -215,24 +212,17 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, ...@@ -215,24 +212,17 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8); m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8);
m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
m->sdma_rlc_doorbell = q->doorbell_off << m->sdma_rlc_doorbell =
SDMA0_RLC0_DOORBELL__OFFSET__SHIFT | q->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
1 << SDMA0_RLC0_DOORBELL__ENABLE__SHIFT;
m->sdma_rlc_virtual_addr = q->sdma_vm_addr; m->sdma_rlc_virtual_addr = q->sdma_vm_addr;
m->sdma_engine_id = q->sdma_engine_id; m->sdma_engine_id = q->sdma_engine_id;
m->sdma_queue_id = q->sdma_queue_id; m->sdma_queue_id = q->sdma_queue_id;
q->is_active = false; q->is_active = (q->queue_size > 0 &&
if (q->queue_size > 0 &&
q->queue_address != 0 && q->queue_address != 0 &&
q->queue_percent > 0) { q->queue_percent > 0);
m->sdma_rlc_rb_cntl |=
1 << SDMA0_RLC0_RB_CNTL__RB_ENABLE__SHIFT;
q->is_active = true;
}
return 0; return 0;
} }
...@@ -359,19 +349,13 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, ...@@ -359,19 +349,13 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
m->cp_hqd_pq_doorbell_control = DOORBELL_EN | m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off);
DOORBELL_OFFSET(q->doorbell_off);
m->cp_hqd_vmid = q->vmid; m->cp_hqd_vmid = q->vmid;
m->cp_hqd_active = 0; q->is_active = (q->queue_size > 0 &&
q->is_active = false;
if (q->queue_size > 0 &&
q->queue_address != 0 && q->queue_address != 0 &&
q->queue_percent > 0) { q->queue_percent > 0);
m->cp_hqd_active = 1;
q->is_active = true;
}
return 0; return 0;
} }
......
...@@ -163,12 +163,9 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, ...@@ -163,12 +163,9 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT; 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
} }
q->is_active = false; q->is_active = (q->queue_size > 0 &&
if (q->queue_size > 0 &&
q->queue_address != 0 && q->queue_address != 0 &&
q->queue_percent > 0) { q->queue_percent > 0);
q->is_active = true;
}
return 0; return 0;
} }
......
...@@ -31,6 +31,8 @@ ...@@ -31,6 +31,8 @@
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/kfd_ioctl.h> #include <linux/kfd_ioctl.h>
#include <linux/idr.h>
#include <linux/kfifo.h>
#include <kgd_kfd_interface.h> #include <kgd_kfd_interface.h>
#include "amd_shared.h" #include "amd_shared.h"
...@@ -181,10 +183,8 @@ struct kfd_dev { ...@@ -181,10 +183,8 @@ struct kfd_dev {
unsigned int gtt_sa_num_of_chunks; unsigned int gtt_sa_num_of_chunks;
/* Interrupts */ /* Interrupts */
void *interrupt_ring; struct kfifo ih_fifo;
size_t interrupt_ring_size; struct workqueue_struct *ih_wq;
atomic_t interrupt_ring_rptr;
atomic_t interrupt_ring_wptr;
struct work_struct interrupt_work; struct work_struct interrupt_work;
spinlock_t interrupt_lock; spinlock_t interrupt_lock;
...@@ -494,7 +494,12 @@ struct kfd_process { ...@@ -494,7 +494,12 @@ struct kfd_process {
*/ */
struct hlist_node kfd_processes; struct hlist_node kfd_processes;
struct mm_struct *mm; /*
* Opaque pointer to mm_struct. We don't hold a reference to
* it so it should never be dereferenced from here. This is
* only used for looking up processes by their mm.
*/
void *mm;
struct mutex mutex; struct mutex mutex;
...@@ -502,6 +507,8 @@ struct kfd_process { ...@@ -502,6 +507,8 @@ struct kfd_process {
* In any process, the thread that started main() is the lead * In any process, the thread that started main() is the lead
* thread and outlives the rest. * thread and outlives the rest.
* It is here because amd_iommu_bind_pasid wants a task_struct. * It is here because amd_iommu_bind_pasid wants a task_struct.
* It can also be used for safely getting a reference to the
* mm_struct of the process.
*/ */
struct task_struct *lead_thread; struct task_struct *lead_thread;
...@@ -522,22 +529,16 @@ struct kfd_process { ...@@ -522,22 +529,16 @@ struct kfd_process {
struct process_queue_manager pqm; struct process_queue_manager pqm;
/* The process's queues. */
size_t queue_array_size;
/* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */
struct kfd_queue **queues;
/*Is the user space process 32 bit?*/ /*Is the user space process 32 bit?*/
bool is_32bit_user_mode; bool is_32bit_user_mode;
/* Event-related data */ /* Event-related data */
struct mutex event_mutex; struct mutex event_mutex;
/* All events in process hashed by ID, linked on kfd_event.events. */ /* Event ID allocator and lookup */
DECLARE_HASHTABLE(events, 4); struct idr event_idr;
/* struct slot_page_header.event_pages */ /* Event page */
struct list_head signal_event_pages; struct kfd_signal_page *signal_page;
u32 next_nonsignal_event_id; size_t signal_mapped_size;
size_t signal_event_count; size_t signal_event_count;
bool signal_event_limit_reached; bool signal_event_limit_reached;
}; };
...@@ -721,19 +722,13 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd); ...@@ -721,19 +722,13 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
extern const struct kfd_event_interrupt_class event_interrupt_class_cik; extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
extern const struct kfd_device_global_init_class device_global_init_class_cik; extern const struct kfd_device_global_init_class device_global_init_class_cik;
enum kfd_event_wait_result {
KFD_WAIT_COMPLETE,
KFD_WAIT_TIMEOUT,
KFD_WAIT_ERROR
};
void kfd_event_init_process(struct kfd_process *p); void kfd_event_init_process(struct kfd_process *p);
void kfd_event_free_process(struct kfd_process *p); void kfd_event_free_process(struct kfd_process *p);
int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma); int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma);
int kfd_wait_on_events(struct kfd_process *p, int kfd_wait_on_events(struct kfd_process *p,
uint32_t num_events, void __user *data, uint32_t num_events, void __user *data,
bool all, uint32_t user_timeout_ms, bool all, uint32_t user_timeout_ms,
enum kfd_event_wait_result *wait_result); uint32_t *wait_result);
void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
uint32_t valid_id_bits); uint32_t valid_id_bits);
void kfd_signal_iommu_event(struct kfd_dev *dev, void kfd_signal_iommu_event(struct kfd_dev *dev,
......
...@@ -34,13 +34,6 @@ struct mm_struct; ...@@ -34,13 +34,6 @@ struct mm_struct;
#include "kfd_priv.h" #include "kfd_priv.h"
#include "kfd_dbgmgr.h" #include "kfd_dbgmgr.h"
/*
* Initial size for the array of queues.
* The allocated size is doubled each time
* it is exceeded up to MAX_PROCESS_QUEUES.
*/
#define INITIAL_QUEUE_ARRAY_SIZE 16
/* /*
* List of struct kfd_process (field kfd_process). * List of struct kfd_process (field kfd_process).
* Unique/indexed by mm_struct* * Unique/indexed by mm_struct*
...@@ -187,8 +180,6 @@ static void kfd_process_wq_release(struct work_struct *work) ...@@ -187,8 +180,6 @@ static void kfd_process_wq_release(struct work_struct *work)
mutex_destroy(&p->mutex); mutex_destroy(&p->mutex);
kfree(p->queues);
kfree(p); kfree(p);
kfree(work); kfree(work);
...@@ -200,7 +191,6 @@ static void kfd_process_destroy_delayed(struct rcu_head *rcu) ...@@ -200,7 +191,6 @@ static void kfd_process_destroy_delayed(struct rcu_head *rcu)
struct kfd_process *p; struct kfd_process *p;
p = container_of(rcu, struct kfd_process, rcu); p = container_of(rcu, struct kfd_process, rcu);
WARN_ON(atomic_read(&p->mm->mm_count) <= 0);
mmdrop(p->mm); mmdrop(p->mm);
...@@ -234,17 +224,26 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, ...@@ -234,17 +224,26 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
mutex_lock(&p->mutex); mutex_lock(&p->mutex);
/* Iterate over all process device data structures and if the
* pdd is in debug mode, we should first force unregistration,
* then we will be able to destroy the queues
*/
list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
struct kfd_dev *dev = pdd->dev;
mutex_lock(kfd_get_dbgmgr_mutex());
if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
kfd_dbgmgr_destroy(dev->dbgmgr);
dev->dbgmgr = NULL;
}
}
mutex_unlock(kfd_get_dbgmgr_mutex());
}
kfd_process_dequeue_from_all_devices(p); kfd_process_dequeue_from_all_devices(p);
pqm_uninit(&p->pqm); pqm_uninit(&p->pqm);
/* Iterate over all process device data structure and check
* if we should delete debug managers
*/
list_for_each_entry(pdd, &p->per_device_data, per_device_list)
if ((pdd->dev->dbgmgr) &&
(pdd->dev->dbgmgr->pasid == p->pasid))
kfd_dbgmgr_destroy(pdd->dev->dbgmgr);
mutex_unlock(&p->mutex); mutex_unlock(&p->mutex);
/* /*
...@@ -271,11 +270,6 @@ static struct kfd_process *create_process(const struct task_struct *thread) ...@@ -271,11 +270,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
if (!process) if (!process)
goto err_alloc_process; goto err_alloc_process;
process->queues = kmalloc_array(INITIAL_QUEUE_ARRAY_SIZE,
sizeof(process->queues[0]), GFP_KERNEL);
if (!process->queues)
goto err_alloc_queues;
process->pasid = kfd_pasid_alloc(); process->pasid = kfd_pasid_alloc();
if (process->pasid == 0) if (process->pasid == 0)
goto err_alloc_pasid; goto err_alloc_pasid;
...@@ -298,8 +292,6 @@ static struct kfd_process *create_process(const struct task_struct *thread) ...@@ -298,8 +292,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
process->lead_thread = thread->group_leader; process->lead_thread = thread->group_leader;
process->queue_array_size = INITIAL_QUEUE_ARRAY_SIZE;
INIT_LIST_HEAD(&process->per_device_data); INIT_LIST_HEAD(&process->per_device_data);
kfd_event_init_process(process); kfd_event_init_process(process);
...@@ -328,8 +320,6 @@ static struct kfd_process *create_process(const struct task_struct *thread) ...@@ -328,8 +320,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
err_alloc_doorbells: err_alloc_doorbells:
kfd_pasid_free(process->pasid); kfd_pasid_free(process->pasid);
err_alloc_pasid: err_alloc_pasid:
kfree(process->queues);
err_alloc_queues:
kfree(process); kfree(process);
err_alloc_process: err_alloc_process:
return ERR_PTR(err); return ERR_PTR(err);
...@@ -426,7 +416,7 @@ int kfd_bind_processes_to_device(struct kfd_dev *dev) ...@@ -426,7 +416,7 @@ int kfd_bind_processes_to_device(struct kfd_dev *dev)
err = amd_iommu_bind_pasid(dev->pdev, p->pasid, err = amd_iommu_bind_pasid(dev->pdev, p->pasid,
p->lead_thread); p->lead_thread);
if (err < 0) { if (err < 0) {
pr_err("unexpected pasid %d binding failure\n", pr_err("Unexpected pasid %d binding failure\n",
p->pasid); p->pasid);
mutex_unlock(&p->mutex); mutex_unlock(&p->mutex);
break; break;
...@@ -442,29 +432,25 @@ int kfd_bind_processes_to_device(struct kfd_dev *dev) ...@@ -442,29 +432,25 @@ int kfd_bind_processes_to_device(struct kfd_dev *dev)
} }
/* /*
* Temporarily unbind currently bound processes from the device and * Mark currently bound processes as PDD_BOUND_SUSPENDED. These
* mark them as PDD_BOUND_SUSPENDED. These processes will be restored * processes will be restored to PDD_BOUND state in
* to PDD_BOUND state in kfd_bind_processes_to_device. * kfd_bind_processes_to_device.
*/ */
void kfd_unbind_processes_from_device(struct kfd_dev *dev) void kfd_unbind_processes_from_device(struct kfd_dev *dev)
{ {
struct kfd_process_device *pdd; struct kfd_process_device *pdd;
struct kfd_process *p; struct kfd_process *p;
unsigned int temp, temp_bound, temp_pasid; unsigned int temp;
int idx = srcu_read_lock(&kfd_processes_srcu); int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
mutex_lock(&p->mutex); mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(dev, p); pdd = kfd_get_process_device_data(dev, p);
temp_bound = pdd->bound;
temp_pasid = p->pasid;
if (pdd->bound == PDD_BOUND) if (pdd->bound == PDD_BOUND)
pdd->bound = PDD_BOUND_SUSPENDED; pdd->bound = PDD_BOUND_SUSPENDED;
mutex_unlock(&p->mutex); mutex_unlock(&p->mutex);
if (temp_bound == PDD_BOUND)
amd_iommu_unbind_pasid(dev->pdev, temp_pasid);
} }
srcu_read_unlock(&kfd_processes_srcu, idx); srcu_read_unlock(&kfd_processes_srcu, idx);
...@@ -486,8 +472,16 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) ...@@ -486,8 +472,16 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
pr_debug("Unbinding process %d from IOMMU\n", pasid); pr_debug("Unbinding process %d from IOMMU\n", pasid);
if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid)) mutex_lock(kfd_get_dbgmgr_mutex());
if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
kfd_dbgmgr_destroy(dev->dbgmgr); kfd_dbgmgr_destroy(dev->dbgmgr);
dev->dbgmgr = NULL;
}
}
mutex_unlock(kfd_get_dbgmgr_mutex());
pdd = kfd_get_process_device_data(dev, p); pdd = kfd_get_process_device_data(dev, p);
if (pdd) if (pdd)
......
...@@ -177,7 +177,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, ...@@ -177,7 +177,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
if (retval != 0) if (retval != 0)
return retval; return retval;
if (list_empty(&pqm->queues)) { if (list_empty(&pdd->qpd.queues_list) &&
list_empty(&pdd->qpd.priv_queue_list)) {
pdd->qpd.pqm = pqm; pdd->qpd.pqm = pqm;
dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
} }
...@@ -248,7 +249,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, ...@@ -248,7 +249,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
err_allocate_pqn: err_allocate_pqn:
/* check if queues list is empty unregister process from device */ /* check if queues list is empty unregister process from device */
clear_bit(*qid, pqm->queue_slot_bitmap); clear_bit(*qid, pqm->queue_slot_bitmap);
if (list_empty(&pqm->queues)) if (list_empty(&pdd->qpd.queues_list) &&
list_empty(&pdd->qpd.priv_queue_list))
dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd); dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd);
return retval; return retval;
} }
...@@ -302,7 +304,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) ...@@ -302,7 +304,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
kfree(pqn); kfree(pqn);
clear_bit(qid, pqm->queue_slot_bitmap); clear_bit(qid, pqm->queue_slot_bitmap);
if (list_empty(&pqm->queues)) if (list_empty(&pdd->qpd.queues_list) &&
list_empty(&pdd->qpd.priv_queue_list))
dqm->ops.unregister_process(dqm, &pdd->qpd); dqm->ops.unregister_process(dqm, &pdd->qpd);
return retval; return retval;
......
...@@ -102,8 +102,7 @@ radeon-y += \ ...@@ -102,8 +102,7 @@ radeon-y += \
radeon-y += \ radeon-y += \
radeon_vce.o \ radeon_vce.o \
vce_v1_0.o \ vce_v1_0.o \
vce_v2_0.o \ vce_v2_0.o
radeon_kfd.o
radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o
radeon-$(CONFIG_ACPI) += radeon_acpi.o radeon-$(CONFIG_ACPI) += radeon_acpi.o
......
...@@ -33,7 +33,6 @@ ...@@ -33,7 +33,6 @@
#include "cik_blit_shaders.h" #include "cik_blit_shaders.h"
#include "radeon_ucode.h" #include "radeon_ucode.h"
#include "clearstate_ci.h" #include "clearstate_ci.h"
#include "radeon_kfd.h"
#define SH_MEM_CONFIG_GFX_DEFAULT \ #define SH_MEM_CONFIG_GFX_DEFAULT \
ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
...@@ -5684,10 +5683,9 @@ int cik_vm_init(struct radeon_device *rdev) ...@@ -5684,10 +5683,9 @@ int cik_vm_init(struct radeon_device *rdev)
/* /*
* number of VMs * number of VMs
* VMID 0 is reserved for System * VMID 0 is reserved for System
* radeon graphics/compute will use VMIDs 1-7 * radeon graphics/compute will use VMIDs 1-15
* amdkfd will use VMIDs 8-15
*/ */
rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS; rdev->vm_manager.nvm = 16;
/* base offset of vram pages */ /* base offset of vram pages */
if (rdev->flags & RADEON_IS_IGP) { if (rdev->flags & RADEON_IS_IGP) {
u64 tmp = RREG32(MC_VM_FB_OFFSET); u64 tmp = RREG32(MC_VM_FB_OFFSET);
...@@ -7589,9 +7587,6 @@ int cik_irq_process(struct radeon_device *rdev) ...@@ -7589,9 +7587,6 @@ int cik_irq_process(struct radeon_device *rdev)
/* wptr/rptr are in bytes! */ /* wptr/rptr are in bytes! */
ring_index = rptr / 4; ring_index = rptr / 4;
radeon_kfd_interrupt(rdev,
(const void *) &rdev->ih.ring[ring_index]);
src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff; src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff; src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff; ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
...@@ -8486,10 +8481,6 @@ static int cik_startup(struct radeon_device *rdev) ...@@ -8486,10 +8481,6 @@ static int cik_startup(struct radeon_device *rdev)
if (r) if (r)
return r; return r;
r = radeon_kfd_resume(rdev);
if (r)
return r;
return 0; return 0;
} }
...@@ -8538,7 +8529,6 @@ int cik_resume(struct radeon_device *rdev) ...@@ -8538,7 +8529,6 @@ int cik_resume(struct radeon_device *rdev)
*/ */
int cik_suspend(struct radeon_device *rdev) int cik_suspend(struct radeon_device *rdev)
{ {
radeon_kfd_suspend(rdev);
radeon_pm_suspend(rdev); radeon_pm_suspend(rdev);
radeon_audio_fini(rdev); radeon_audio_fini(rdev);
radeon_vm_manager_fini(rdev); radeon_vm_manager_fini(rdev);
......
...@@ -30,8 +30,6 @@ ...@@ -30,8 +30,6 @@
#define CIK_RB_BITMAP_WIDTH_PER_SH 2 #define CIK_RB_BITMAP_WIDTH_PER_SH 2
#define HAWAII_RB_BITMAP_WIDTH_PER_SH 4 #define HAWAII_RB_BITMAP_WIDTH_PER_SH 4
#define RADEON_NUM_OF_VMIDS 8
/* DIDT IND registers */ /* DIDT IND registers */
#define DIDT_SQ_CTRL0 0x0 #define DIDT_SQ_CTRL0 0x0
# define DIDT_CTRL_EN (1 << 0) # define DIDT_CTRL_EN (1 << 0)
......
...@@ -2456,9 +2456,6 @@ struct radeon_device { ...@@ -2456,9 +2456,6 @@ struct radeon_device {
u64 vram_pin_size; u64 vram_pin_size;
u64 gart_pin_size; u64 gart_pin_size;
/* amdkfd interface */
struct kfd_dev *kfd;
struct mutex mn_lock; struct mutex mn_lock;
DECLARE_HASHTABLE(mn_hash, 7); DECLARE_HASHTABLE(mn_hash, 7);
}; };
......
...@@ -43,7 +43,6 @@ ...@@ -43,7 +43,6 @@
#include <drm/drm_fb_helper.h> #include <drm/drm_fb_helper.h>
#include <drm/drm_crtc_helper.h> #include <drm/drm_crtc_helper.h>
#include "radeon_kfd.h"
/* /*
* KMS wrapper. * KMS wrapper.
...@@ -338,14 +337,6 @@ static int radeon_pci_probe(struct pci_dev *pdev, ...@@ -338,14 +337,6 @@ static int radeon_pci_probe(struct pci_dev *pdev,
{ {
int ret; int ret;
/*
* Initialize amdkfd before starting radeon. If it was not loaded yet,
* defer radeon probing
*/
ret = radeon_kfd_init();
if (ret == -EPROBE_DEFER)
return ret;
if (vga_switcheroo_client_probe_defer(pdev)) if (vga_switcheroo_client_probe_defer(pdev))
return -EPROBE_DEFER; return -EPROBE_DEFER;
...@@ -645,7 +636,6 @@ static int __init radeon_init(void) ...@@ -645,7 +636,6 @@ static int __init radeon_init(void)
static void __exit radeon_exit(void) static void __exit radeon_exit(void)
{ {
radeon_kfd_fini();
pci_unregister_driver(pdriver); pci_unregister_driver(pdriver);
radeon_unregister_atpx_handler(); radeon_unregister_atpx_handler();
} }
......
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/module.h>
#include <linux/fdtable.h>
#include <linux/uaccess.h>
#include <drm/drmP.h>
#include "radeon.h"
#include "cikd.h"
#include "cik_reg.h"
#include "radeon_kfd.h"
#include "radeon_ucode.h"
#include <linux/firmware.h>
#include "cik_structs.h"
#define CIK_PIPE_PER_MEC (4)
static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
TCP_WATCH0_ADDR_H, TCP_WATCH0_ADDR_L, TCP_WATCH0_CNTL,
TCP_WATCH1_ADDR_H, TCP_WATCH1_ADDR_L, TCP_WATCH1_CNTL,
TCP_WATCH2_ADDR_H, TCP_WATCH2_ADDR_L, TCP_WATCH2_CNTL,
TCP_WATCH3_ADDR_H, TCP_WATCH3_ADDR_L, TCP_WATCH3_CNTL
};
struct kgd_mem {
struct radeon_bo *bo;
uint64_t gpu_addr;
void *cpu_ptr;
};
static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr);
static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
static uint64_t get_vmem_size(struct kgd_dev *kgd);
static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
static int alloc_pasid(unsigned int bits);
static void free_pasid(unsigned int pasid);
static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
/*
* Register access functions
*/
static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
unsigned int vmid);
static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr);
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
uint32_t wptr_shift, uint32_t wptr_mask,
struct mm_struct *mm);
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
uint32_t pipe_id, uint32_t queue_id);
static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type,
unsigned int timeout, uint32_t pipe_id,
uint32_t queue_id);
static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
unsigned int timeout);
static int kgd_address_watch_disable(struct kgd_dev *kgd);
static int kgd_address_watch_execute(struct kgd_dev *kgd,
unsigned int watch_point_id,
uint32_t cntl_val,
uint32_t addr_hi,
uint32_t addr_lo);
static int kgd_wave_control_execute(struct kgd_dev *kgd,
uint32_t gfx_index_val,
uint32_t sq_cmd);
static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
unsigned int watch_point_id,
unsigned int reg_offset);
static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
uint8_t vmid);
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
static const struct kfd2kgd_calls kfd2kgd = {
.init_gtt_mem_allocation = alloc_gtt_mem,
.free_gtt_mem = free_gtt_mem,
.get_vmem_size = get_vmem_size,
.get_gpu_clock_counter = get_gpu_clock_counter,
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
.alloc_pasid = alloc_pasid,
.free_pasid = free_pasid,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
.init_pipeline = kgd_init_pipeline,
.init_interrupts = kgd_init_interrupts,
.hqd_load = kgd_hqd_load,
.hqd_sdma_load = kgd_hqd_sdma_load,
.hqd_is_occupied = kgd_hqd_is_occupied,
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
.hqd_destroy = kgd_hqd_destroy,
.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
.address_watch_disable = kgd_address_watch_disable,
.address_watch_execute = kgd_address_watch_execute,
.wave_control_execute = kgd_wave_control_execute,
.address_watch_get_offset = kgd_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
.get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
.write_vmid_invalidate_request = write_vmid_invalidate_request,
.get_fw_version = get_fw_version
};
static const struct kgd2kfd_calls *kgd2kfd;
int radeon_kfd_init(void)
{
int ret;
#if defined(CONFIG_HSA_AMD_MODULE)
int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
kgd2kfd_init_p = symbol_request(kgd2kfd_init);
if (kgd2kfd_init_p == NULL)
return -ENOENT;
ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd);
if (ret) {
symbol_put(kgd2kfd_init);
kgd2kfd = NULL;
}
#elif defined(CONFIG_HSA_AMD)
ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
if (ret)
kgd2kfd = NULL;
#else
ret = -ENOENT;
#endif
return ret;
}
void radeon_kfd_fini(void)
{
if (kgd2kfd) {
kgd2kfd->exit();
symbol_put(kgd2kfd_init);
}
}
void radeon_kfd_device_probe(struct radeon_device *rdev)
{
if (kgd2kfd)
rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
rdev->pdev, &kfd2kgd);
}
void radeon_kfd_device_init(struct radeon_device *rdev)
{
int i, queue, pipe, mec;
if (rdev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap = 0xFF00,
.num_pipe_per_mec = 4,
.num_queue_per_pipe = 8
};
bitmap_zero(gpu_resources.queue_bitmap, KGD_MAX_QUEUES);
for (i = 0; i < KGD_MAX_QUEUES; ++i) {
queue = i % gpu_resources.num_queue_per_pipe;
pipe = (i / gpu_resources.num_queue_per_pipe)
% gpu_resources.num_pipe_per_mec;
mec = (i / gpu_resources.num_queue_per_pipe)
/ gpu_resources.num_pipe_per_mec;
if (mec == 0 && pipe > 0)
set_bit(i, gpu_resources.queue_bitmap);
}
radeon_doorbell_get_kfd_info(rdev,
&gpu_resources.doorbell_physical_address,
&gpu_resources.doorbell_aperture_size,
&gpu_resources.doorbell_start_offset);
kgd2kfd->device_init(rdev->kfd, &gpu_resources);
}
}
void radeon_kfd_device_fini(struct radeon_device *rdev)
{
if (rdev->kfd) {
kgd2kfd->device_exit(rdev->kfd);
rdev->kfd = NULL;
}
}
void radeon_kfd_interrupt(struct radeon_device *rdev, const void *ih_ring_entry)
{
if (rdev->kfd)
kgd2kfd->interrupt(rdev->kfd, ih_ring_entry);
}
void radeon_kfd_suspend(struct radeon_device *rdev)
{
if (rdev->kfd)
kgd2kfd->suspend(rdev->kfd);
}
int radeon_kfd_resume(struct radeon_device *rdev)
{
int r = 0;
if (rdev->kfd)
r = kgd2kfd->resume(rdev->kfd);
return r;
}
static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr)
{
struct radeon_device *rdev = (struct radeon_device *)kgd;
struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
int r;
BUG_ON(kgd == NULL);
BUG_ON(gpu_addr == NULL);
BUG_ON(cpu_ptr == NULL);
*mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
if ((*mem) == NULL)
return -ENOMEM;
r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT,
RADEON_GEM_GTT_WC, NULL, NULL, &(*mem)->bo);
if (r) {
dev_err(rdev->dev,
"failed to allocate BO for amdkfd (%d)\n", r);
return r;
}
/* map the buffer */
r = radeon_bo_reserve((*mem)->bo, true);
if (r) {
dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
goto allocate_mem_reserve_bo_failed;
}
r = radeon_bo_pin((*mem)->bo, RADEON_GEM_DOMAIN_GTT,
&(*mem)->gpu_addr);
if (r) {
dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r);
goto allocate_mem_pin_bo_failed;
}
*gpu_addr = (*mem)->gpu_addr;
r = radeon_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
if (r) {
dev_err(rdev->dev,
"(%d) failed to map bo to kernel for amdkfd\n", r);
goto allocate_mem_kmap_bo_failed;
}
*cpu_ptr = (*mem)->cpu_ptr;
radeon_bo_unreserve((*mem)->bo);
return 0;
allocate_mem_kmap_bo_failed:
radeon_bo_unpin((*mem)->bo);
allocate_mem_pin_bo_failed:
radeon_bo_unreserve((*mem)->bo);
allocate_mem_reserve_bo_failed:
radeon_bo_unref(&(*mem)->bo);
return r;
}
static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
{
struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
BUG_ON(mem == NULL);
radeon_bo_reserve(mem->bo, true);
radeon_bo_kunmap(mem->bo);
radeon_bo_unpin(mem->bo);
radeon_bo_unreserve(mem->bo);
radeon_bo_unref(&(mem->bo));
kfree(mem);
}
static uint64_t get_vmem_size(struct kgd_dev *kgd)
{
struct radeon_device *rdev = (struct radeon_device *)kgd;
BUG_ON(kgd == NULL);
return rdev->mc.real_vram_size;
}
static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
{
struct radeon_device *rdev = (struct radeon_device *)kgd;
return rdev->asic->get_gpu_clock_counter(rdev);
}
static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
{
struct radeon_device *rdev = (struct radeon_device *)kgd;
/* The sclk is in quantas of 10kHz */
return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
}
/*
* PASID manager
*/
static DEFINE_IDA(pasid_ida);
static int alloc_pasid(unsigned int bits)
{
int pasid = -EINVAL;
for (bits = min(bits, 31U); bits > 0; bits--) {
pasid = ida_simple_get(&pasid_ida,
1U << (bits - 1), 1U << bits,
GFP_KERNEL);
if (pasid != -ENOSPC)
break;
}
return pasid;
}
static void free_pasid(unsigned int pasid)
{
ida_simple_remove(&pasid_ida, pasid);
}
static inline struct radeon_device *get_radeon_device(struct kgd_dev *kgd)
{
return (struct radeon_device *)kgd;
}
static void write_register(struct kgd_dev *kgd, uint32_t offset, uint32_t value)
{
struct radeon_device *rdev = get_radeon_device(kgd);
writel(value, (void __iomem *)(rdev->rmmio + offset));
}
static uint32_t read_register(struct kgd_dev *kgd, uint32_t offset)
{
struct radeon_device *rdev = get_radeon_device(kgd);
return readl((void __iomem *)(rdev->rmmio + offset));
}
static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
uint32_t queue, uint32_t vmid)
{
struct radeon_device *rdev = get_radeon_device(kgd);
uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
mutex_lock(&rdev->srbm_mutex);
write_register(kgd, SRBM_GFX_CNTL, value);
}
static void unlock_srbm(struct kgd_dev *kgd)
{
struct radeon_device *rdev = get_radeon_device(kgd);
write_register(kgd, SRBM_GFX_CNTL, 0);
mutex_unlock(&rdev->srbm_mutex);
}
static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t queue_id)
{
uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
lock_srbm(kgd, mec, pipe, queue_id, 0);
}
static void release_queue(struct kgd_dev *kgd)
{
unlock_srbm(kgd);
}
static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases)
{
lock_srbm(kgd, 0, 0, 0, vmid);
write_register(kgd, SH_MEM_CONFIG, sh_mem_config);
write_register(kgd, SH_MEM_APE1_BASE, sh_mem_ape1_base);
write_register(kgd, SH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
write_register(kgd, SH_MEM_BASES, sh_mem_bases);
unlock_srbm(kgd);
}
static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
unsigned int vmid)
{
/*
* We have to assume that there is no outstanding mapping.
* The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0
* because a mapping is in progress or because a mapping finished and
* the SW cleared it.
* So the protocol is to always wait & clear.
*/
uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
ATC_VMID_PASID_MAPPING_VALID_MASK;
write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t),
pasid_mapping);
while (!(read_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS) &
(1U << vmid)))
cpu_relax();
write_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
/* Mapping vmid to pasid also for IH block */
write_register(kgd, IH_VMID_0_LUT + vmid * sizeof(uint32_t),
pasid_mapping);
return 0;
}
static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr)
{
/* nothing to do here */
return 0;
}
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
{
uint32_t mec;
uint32_t pipe;
mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
pipe = (pipe_id % CIK_PIPE_PER_MEC);
lock_srbm(kgd, mec, pipe, 0, 0);
write_register(kgd, CPC_INT_CNTL,
TIME_STAMP_INT_ENABLE | OPCODE_ERROR_INT_ENABLE);
unlock_srbm(kgd);
return 0;
}
static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
{
uint32_t retval;
retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
pr_debug("kfd: sdma base address: 0x%x\n", retval);
return retval;
}
static inline struct cik_mqd *get_mqd(void *mqd)
{
return (struct cik_mqd *)mqd;
}
static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
{
return (struct cik_sdma_rlc_registers *)mqd;
}
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
uint32_t wptr_shift, uint32_t wptr_mask,
struct mm_struct *mm)
{
uint32_t wptr_shadow, is_wptr_shadow_valid;
struct cik_mqd *m;
m = get_mqd(mqd);
is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
acquire_queue(kgd, pipe_id, queue_id);
write_register(kgd, CP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
write_register(kgd, CP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
write_register(kgd, CP_MQD_CONTROL, m->cp_mqd_control);
write_register(kgd, CP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
write_register(kgd, CP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
write_register(kgd, CP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
write_register(kgd, CP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
write_register(kgd, CP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
write_register(kgd, CP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
write_register(kgd, CP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
write_register(kgd, CP_HQD_PERSISTENT_STATE,
m->cp_hqd_persistent_state);
write_register(kgd, CP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
write_register(kgd, CP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
write_register(kgd, CP_HQD_ATOMIC0_PREOP_LO,
m->cp_hqd_atomic0_preop_lo);
write_register(kgd, CP_HQD_ATOMIC0_PREOP_HI,
m->cp_hqd_atomic0_preop_hi);
write_register(kgd, CP_HQD_ATOMIC1_PREOP_LO,
m->cp_hqd_atomic1_preop_lo);
write_register(kgd, CP_HQD_ATOMIC1_PREOP_HI,
m->cp_hqd_atomic1_preop_hi);
write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR,
m->cp_hqd_pq_rptr_report_addr_lo);
write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
m->cp_hqd_pq_rptr_report_addr_hi);
write_register(kgd, CP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR,
m->cp_hqd_pq_wptr_poll_addr_lo);
write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR_HI,
m->cp_hqd_pq_wptr_poll_addr_hi);
write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL,
m->cp_hqd_pq_doorbell_control);
write_register(kgd, CP_HQD_VMID, m->cp_hqd_vmid);
write_register(kgd, CP_HQD_QUANTUM, m->cp_hqd_quantum);
write_register(kgd, CP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
write_register(kgd, CP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
write_register(kgd, CP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
if (is_wptr_shadow_valid)
write_register(kgd, CP_HQD_PQ_WPTR, wptr_shadow);
write_register(kgd, CP_HQD_ACTIVE, m->cp_hqd_active);
release_queue(kgd);
return 0;
}
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
{
struct cik_sdma_rlc_registers *m;
uint32_t sdma_base_addr;
m = get_sdma_mqd(mqd);
sdma_base_addr = get_sdma_base_addr(m);
write_register(kgd,
sdma_base_addr + SDMA0_RLC0_VIRTUAL_ADDR,
m->sdma_rlc_virtual_addr);
write_register(kgd,
sdma_base_addr + SDMA0_RLC0_RB_BASE,
m->sdma_rlc_rb_base);
write_register(kgd,
sdma_base_addr + SDMA0_RLC0_RB_BASE_HI,
m->sdma_rlc_rb_base_hi);
write_register(kgd,
sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_LO,
m->sdma_rlc_rb_rptr_addr_lo);
write_register(kgd,
sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_HI,
m->sdma_rlc_rb_rptr_addr_hi);
write_register(kgd,
sdma_base_addr + SDMA0_RLC0_DOORBELL,
m->sdma_rlc_doorbell);
write_register(kgd,
sdma_base_addr + SDMA0_RLC0_RB_CNTL,
m->sdma_rlc_rb_cntl);
return 0;
}
static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
uint32_t pipe_id, uint32_t queue_id)
{
uint32_t act;
bool retval = false;
uint32_t low, high;
acquire_queue(kgd, pipe_id, queue_id);
act = read_register(kgd, CP_HQD_ACTIVE);
if (act) {
low = lower_32_bits(queue_address >> 8);
high = upper_32_bits(queue_address >> 8);
if (low == read_register(kgd, CP_HQD_PQ_BASE) &&
high == read_register(kgd, CP_HQD_PQ_BASE_HI))
retval = true;
}
release_queue(kgd);
return retval;
}
static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
{
struct cik_sdma_rlc_registers *m;
uint32_t sdma_base_addr;
uint32_t sdma_rlc_rb_cntl;
m = get_sdma_mqd(mqd);
sdma_base_addr = get_sdma_base_addr(m);
sdma_rlc_rb_cntl = read_register(kgd,
sdma_base_addr + SDMA0_RLC0_RB_CNTL);
if (sdma_rlc_rb_cntl & SDMA_RB_ENABLE)
return true;
return false;
}
static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type,
unsigned int timeout, uint32_t pipe_id,
uint32_t queue_id)
{
uint32_t temp;
acquire_queue(kgd, pipe_id, queue_id);
write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, 0);
write_register(kgd, CP_HQD_DEQUEUE_REQUEST, reset_type);
while (true) {
temp = read_register(kgd, CP_HQD_ACTIVE);
if (temp & 0x1)
break;
if (timeout == 0) {
pr_err("kfd: cp queue preemption time out (%dms)\n",
temp);
release_queue(kgd);
return -ETIME;
}
msleep(20);
timeout -= 20;
}
release_queue(kgd);
return 0;
}
static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
unsigned int timeout)
{
struct cik_sdma_rlc_registers *m;
uint32_t sdma_base_addr;
uint32_t temp;
m = get_sdma_mqd(mqd);
sdma_base_addr = get_sdma_base_addr(m);
temp = read_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL);
temp = temp & ~SDMA_RB_ENABLE;
write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL, temp);
while (true) {
temp = read_register(kgd, sdma_base_addr +
SDMA0_RLC0_CONTEXT_STATUS);
if (temp & SDMA_RLC_IDLE)
break;
if (timeout == 0)
return -ETIME;
msleep(20);
timeout -= 20;
}
write_register(kgd, sdma_base_addr + SDMA0_RLC0_DOORBELL, 0);
write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_RPTR, 0);
write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_WPTR, 0);
write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_BASE, 0);
return 0;
}
static int kgd_address_watch_disable(struct kgd_dev *kgd)
{
union TCP_WATCH_CNTL_BITS cntl;
unsigned int i;
cntl.u32All = 0;
cntl.bitfields.valid = 0;
cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
cntl.bitfields.atc = 1;
/* Turning off this address until we set all the registers */
for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
write_register(kgd,
watchRegs[i * ADDRESS_WATCH_REG_MAX +
ADDRESS_WATCH_REG_CNTL],
cntl.u32All);
return 0;
}
static int kgd_address_watch_execute(struct kgd_dev *kgd,
unsigned int watch_point_id,
uint32_t cntl_val,
uint32_t addr_hi,
uint32_t addr_lo)
{
union TCP_WATCH_CNTL_BITS cntl;
cntl.u32All = cntl_val;
/* Turning off this watch point until we set all the registers */
cntl.bitfields.valid = 0;
write_register(kgd,
watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
ADDRESS_WATCH_REG_CNTL],
cntl.u32All);
write_register(kgd,
watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
ADDRESS_WATCH_REG_ADDR_HI],
addr_hi);
write_register(kgd,
watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
ADDRESS_WATCH_REG_ADDR_LO],
addr_lo);
/* Enable the watch point */
cntl.bitfields.valid = 1;
write_register(kgd,
watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
ADDRESS_WATCH_REG_CNTL],
cntl.u32All);
return 0;
}
static int kgd_wave_control_execute(struct kgd_dev *kgd,
uint32_t gfx_index_val,
uint32_t sq_cmd)
{
struct radeon_device *rdev = get_radeon_device(kgd);
uint32_t data;
mutex_lock(&rdev->grbm_idx_mutex);
write_register(kgd, GRBM_GFX_INDEX, gfx_index_val);
write_register(kgd, SQ_CMD, sq_cmd);
/* Restore the GRBM_GFX_INDEX register */
data = INSTANCE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
SE_BROADCAST_WRITES;
write_register(kgd, GRBM_GFX_INDEX, data);
mutex_unlock(&rdev->grbm_idx_mutex);
return 0;
}
static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
unsigned int watch_point_id,
unsigned int reg_offset)
{
return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]
/ 4;
}
static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid)
{
uint32_t reg;
struct radeon_device *rdev = (struct radeon_device *) kgd;
reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4);
return reg & ATC_VMID_PASID_MAPPING_VALID_MASK;
}
static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
uint8_t vmid)
{
uint32_t reg;
struct radeon_device *rdev = (struct radeon_device *) kgd;
reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4);
return reg & ATC_VMID_PASID_MAPPING_PASID_MASK;
}
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
{
struct radeon_device *rdev = (struct radeon_device *) kgd;
return WREG32(VM_INVALIDATE_REQUEST, 1 << vmid);
}
static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
{
struct radeon_device *rdev = (struct radeon_device *) kgd;
const union radeon_firmware_header *hdr;
BUG_ON(kgd == NULL || rdev->mec_fw == NULL);
switch (type) {
case KGD_ENGINE_PFP:
hdr = (const union radeon_firmware_header *) rdev->pfp_fw->data;
break;
case KGD_ENGINE_ME:
hdr = (const union radeon_firmware_header *) rdev->me_fw->data;
break;
case KGD_ENGINE_CE:
hdr = (const union radeon_firmware_header *) rdev->ce_fw->data;
break;
case KGD_ENGINE_MEC1:
hdr = (const union radeon_firmware_header *) rdev->mec_fw->data;
break;
case KGD_ENGINE_MEC2:
hdr = (const union radeon_firmware_header *)
rdev->mec2_fw->data;
break;
case KGD_ENGINE_RLC:
hdr = (const union radeon_firmware_header *) rdev->rlc_fw->data;
break;
case KGD_ENGINE_SDMA1:
case KGD_ENGINE_SDMA2:
hdr = (const union radeon_firmware_header *)
rdev->sdma_fw->data;
break;
default:
return 0;
}
if (hdr == NULL)
return 0;
/* Only 12 bit in use*/
return hdr->common.ucode_version;
}
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* radeon_kfd.h defines the private interface between the
* AMD kernel graphics drivers and the AMD KFD.
*/
#ifndef RADEON_KFD_H_INCLUDED
#define RADEON_KFD_H_INCLUDED
#include <linux/types.h>
#include "kgd_kfd_interface.h"
struct radeon_device;
int radeon_kfd_init(void);
void radeon_kfd_fini(void);
void radeon_kfd_suspend(struct radeon_device *rdev);
int radeon_kfd_resume(struct radeon_device *rdev);
void radeon_kfd_interrupt(struct radeon_device *rdev,
const void *ih_ring_entry);
void radeon_kfd_device_probe(struct radeon_device *rdev);
void radeon_kfd_device_init(struct radeon_device *rdev);
void radeon_kfd_device_fini(struct radeon_device *rdev);
#endif /* RADEON_KFD_H_INCLUDED */
...@@ -34,8 +34,6 @@ ...@@ -34,8 +34,6 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/pm_runtime.h> #include <linux/pm_runtime.h>
#include "radeon_kfd.h"
#if defined(CONFIG_VGA_SWITCHEROO) #if defined(CONFIG_VGA_SWITCHEROO)
bool radeon_has_atpx(void); bool radeon_has_atpx(void);
#else #else
...@@ -68,8 +66,6 @@ void radeon_driver_unload_kms(struct drm_device *dev) ...@@ -68,8 +66,6 @@ void radeon_driver_unload_kms(struct drm_device *dev)
pm_runtime_forbid(dev->dev); pm_runtime_forbid(dev->dev);
} }
radeon_kfd_device_fini(rdev);
radeon_acpi_fini(rdev); radeon_acpi_fini(rdev);
radeon_modeset_fini(rdev); radeon_modeset_fini(rdev);
...@@ -174,9 +170,6 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) ...@@ -174,9 +170,6 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags)
"Error during ACPI methods call\n"); "Error during ACPI methods call\n");
} }
radeon_kfd_device_probe(rdev);
radeon_kfd_device_init(rdev);
if (radeon_is_px(dev)) { if (radeon_is_px(dev)) {
pm_runtime_use_autosuspend(dev->dev); pm_runtime_use_autosuspend(dev->dev);
pm_runtime_set_autosuspend_delay(dev->dev, 5000); pm_runtime_set_autosuspend_delay(dev->dev, 5000);
......
...@@ -169,7 +169,7 @@ struct kfd_ioctl_dbg_wave_control_args { ...@@ -169,7 +169,7 @@ struct kfd_ioctl_dbg_wave_control_args {
#define KFD_IOC_WAIT_RESULT_TIMEOUT 1 #define KFD_IOC_WAIT_RESULT_TIMEOUT 1
#define KFD_IOC_WAIT_RESULT_FAIL 2 #define KFD_IOC_WAIT_RESULT_FAIL 2
#define KFD_SIGNAL_EVENT_LIMIT 256 #define KFD_SIGNAL_EVENT_LIMIT 4096
struct kfd_ioctl_create_event_args { struct kfd_ioctl_create_event_args {
__u64 event_page_offset; /* from KFD */ __u64 event_page_offset; /* from KFD */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment