Merge tag 'drm-amdkfd-next-2017-11-02' of git://people.freedesktop.org/~gabbayo/linux into drm-next

- Usermode Events The current events code implemented some data structures (waitqueue, fifo) that were already implemented in the kernel. The patches below addresses this issue by replacing them with the standard kernel implementation. In addition, they simplify allocation of events IDs and memory for the events. The patches also increase the maximum number of events while maintaining compatibility with the older userspace library. - Remove radeon support Because Kaveri is fully supported in amdgpu and because current and future versions of userspace libraries will only support amdgpu, we removed radeon support from kfd. Current users can move to amdgpu while using the same userspace libraries. - Various bug fixes and cleanups * tag 'drm-amdkfd-next-2017-11-02' of git://people.freedesktop.org/~gabbayo/linux: (26 commits) drm/amdkfd: Minor cleanups drm/amdkfd: Update queue_count before mapping queues drm/amdkfd: Cleanup DQM ASIC-specific ops drm/amdkfd: Register/Deregister process on qpd resolution drm/amdkfd: Fix debug unregister procedure on process termination drm/amdkfd: Avoid calling amd_iommu_unbind_pasid() when suspending drm/amdkfd: Disable CP/SDMA ring/doorbell in MQD drm/amdkfd: Clean up the data structure in kfd_process drm/radeon: deprecate and remove KFD interface drm/amdkfd: use a high priority workqueue for IH work drm/amdkfd: wait only for IH work on IH exit drm/amdkfd: increase IH num entries to 8192 drm/amdkfd: use standard kernel kfifo for IH drm/amdkfd: increase limit of signal events to 4096 per process drm/amdkfd: Make event limit dependent on user mode mapping size drm/amdkfd: Use IH context ID for signal lookup drm/amdkfd: Simplify event ID and signal slot management drm/amdkfd: Simplify events page allocator drm/amdkfd: Use wait_queue_t to implement event waiting drm/amdkfd: remove redundant kfd_event_waiter.input_index ...

Merge tag 'drm-amdkfd-next-2017-11-02' of git://people.freedesktop.org/~gabbayo/linux into drm-next
- Usermode Events The current events code implemented some data structures (waitqueue, fifo) that were already implemented in the kernel. The patches below addresses this issue by replacing them with the standard kernel implementation. In addition, they simplify allocation of events IDs and memory for the events. The patches also increase the maximum number of events while maintaining compatibility with the older userspace library. - Remove radeon support Because Kaveri is fully supported in amdgpu and because current and future versions of userspace libraries will only support amdgpu, we removed radeon support from kfd. Current users can move to amdgpu while using the same userspace libraries. - Various bug fixes and cleanups * tag 'drm-amdkfd-next-2017-11-02' of git://people.freedesktop.org/~gabbayo/linux: (26 commits) drm/amdkfd: Minor cleanups drm/amdkfd: Update queue_count before mapping queues drm/amdkfd: Cleanup DQM ASIC-specific ops drm/amdkfd: Register/Deregister process on qpd resolution drm/amdkfd: Fix debug unregister procedure on process termination drm/amdkfd: Avoid calling amd_iommu_unbind_pasid() when suspending drm/amdkfd: Disable CP/SDMA ring/doorbell in MQD drm/amdkfd: Clean up the data structure in kfd_process drm/radeon: deprecate and remove KFD interface drm/amdkfd: use a high priority workqueue for IH work drm/amdkfd: wait only for IH work on IH exit drm/amdkfd: increase IH num entries to 8192 drm/amdkfd: use standard kernel kfifo for IH drm/amdkfd: increase limit of signal events to 4096 per process drm/amdkfd: Make event limit dependent on user mode mapping size drm/amdkfd: Use IH context ID for signal lookup drm/amdkfd: Simplify event ID and signal slot management drm/amdkfd: Simplify events page allocator drm/amdkfd: Use wait_queue_t to implement event waiting drm/amdkfd: remove redundant kfd_event_waiter.input_index ...
9ad472e3 · Dave Airlie · 85f6e0f6 · 894a8293 · 9ad472e3 · 9ad472e3
Commit 9ad472e3 authored Nov 03, 2017 by Dave Airlie
27 changed files
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -759,8 +759,6 @@ F:	drivers/gpu/drm/amd/amdkfd/
 F:	drivers/gpu/drm/amd/include/cik_structs.h
 F:	drivers/gpu/drm/amd/include/kgd_kfd_interface.h
 F:	drivers/gpu/drm/amd/include/vi_structs.h
-F:	drivers/gpu/drm/radeon/radeon_kfd.c
-F:	drivers/gpu/drm/radeon/radeon_kfd.h
 F:	include/uapi/linux/kfd_ioctl.h

 AMD SEATTLE DEVICE TREE SUPPORT

--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -4,6 +4,6 @@

 config HSA_AMD
 	tristate "HSA kernel driver for AMD GPU devices"
-	depends on (DRM_RADEON || DRM_AMDGPU) && AMD_IOMMU_V2 && X86_64
+	depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64
 	help
 	  Enable this if you want to use HSA features on AMD GPU devices.
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -36,6 +36,7 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
 	/* Do not process in ISR, just request it to be forwarded to WQ. */
 	return (pasid != 0) &&
 		(ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
+		ihre->source_id == CIK_INTSRC_SDMA_TRAP ||
 		ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
 		ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE);
 }
@@ -46,6 +47,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
 	unsigned int pasid;
 	const struct cik_ih_ring_entry *ihre =
 			(const struct cik_ih_ring_entry *)ih_ring_entry;
+	uint32_t context_id = ihre->data & 0xfffffff;

 	pasid = (ihre->ring_id & 0xffff0000) >> 16;

@@ -53,9 +55,11 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
 		return;

 	if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE)
-		kfd_signal_event_interrupt(pasid, 0, 0);
+		kfd_signal_event_interrupt(pasid, context_id, 28);
+	else if (ihre->source_id == CIK_INTSRC_SDMA_TRAP)
+		kfd_signal_event_interrupt(pasid, context_id, 28);
 	else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG)
-		kfd_signal_event_interrupt(pasid, ihre->data & 0xFF, 8);
+		kfd_signal_event_interrupt(pasid, context_id & 0xff, 8);
 	else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE)
 		kfd_signal_hw_exception_event(pasid);
 }

--- a/drivers/gpu/drm/amd/amdkfd/cik_int.h
+++ b/drivers/gpu/drm/amd/amdkfd/cik_int.h
@@ -32,9 +32,10 @@ struct cik_ih_ring_entry {
 	uint32_t reserved;
 };

-#define CIK_INTSRC_DEQUEUE_COMPLETE	0xC6
 #define CIK_INTSRC_CP_END_OF_PIPE	0xB5
 #define CIK_INTSRC_CP_BAD_OPCODE	0xB7
+#define CIK_INTSRC_DEQUEUE_COMPLETE	0xC6
+#define CIK_INTSRC_SDMA_TRAP		0xE0
 #define CIK_INTSRC_SQ_INTERRUPT_MSG	0xEF

 #endif

--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -450,8 +450,8 @@ static int kfd_ioctl_dbg_register(struct file *filep,
 		return -EINVAL;
 	}

-	mutex_lock(kfd_get_dbgmgr_mutex());
 	mutex_lock(&p->mutex);
+	mutex_lock(kfd_get_dbgmgr_mutex());

 	/*
 	 * make sure that we have pdd, if this the first queue created for
@@ -479,8 +479,8 @@ static int kfd_ioctl_dbg_register(struct file *filep,
 	}

 out:
-	mutex_unlock(&p->mutex);
 	mutex_unlock(kfd_get_dbgmgr_mutex());
+	mutex_unlock(&p->mutex);

 	return status;
 }
@@ -835,15 +835,12 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
 				void *data)
 {
 	struct kfd_ioctl_wait_events_args *args = data;
-	enum kfd_event_wait_result wait_result;
 	int err;

 	err = kfd_wait_on_events(p, args->num_events,
 			(void __user *)args->events_ptr,
 			(args->wait_for_all != 0),
-			args->timeout, &wait_result);
-
-	args->wait_result = wait_result;
+			args->timeout, &args->wait_result);

 	return err;
 }

--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -403,7 +403,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
 	if (kfd->interrupts_active
 	    && interrupt_is_wanted(kfd, ih_ring_entry)
 	    && enqueue_ih_ring_entry(kfd, ih_ring_entry))
-		schedule_work(&kfd->interrupt_work);
+		queue_work(kfd->ih_wq, &kfd->interrupt_work);

 	spin_unlock(&kfd->interrupt_lock);
 }

--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -389,12 +389,11 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
 	if (sched_policy != KFD_SCHED_POLICY_NO_HWS) {
 		retval = unmap_queues_cpsch(dqm,
 				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
-		if (retval != 0) {
+		if (retval) {
 			pr_err("unmap queue failed\n");
 			goto out_unlock;
 		}
-	} else if (sched_policy == KFD_SCHED_POLICY_NO_HWS &&
-		   prev_active &&
+	} else if (prev_active &&
 		   (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
 		    q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
 		retval = mqd->destroy_mqd(mqd, q->mqd,
@@ -408,24 +407,25 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)

 	retval = mqd->update_mqd(mqd, q->mqd, &q->properties);

-	if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
-		retval = map_queues_cpsch(dqm);
-	else if (sched_policy == KFD_SCHED_POLICY_NO_HWS &&
-		 q->properties.is_active &&
-		 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
-		  q->properties.type == KFD_QUEUE_TYPE_SDMA))
-		retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue,
-				       &q->properties, q->process->mm);
-
 	/*
-	 * check active state vs. the previous state
-	 * and modify counter accordingly
+	 * check active state vs. the previous state and modify
+	 * counter accordingly. map_queues_cpsch uses the
+	 * dqm->queue_count to determine whether a new runlist must be
+	 * uploaded.
 	 */
 	if (q->properties.is_active && !prev_active)
 		dqm->queue_count++;
 	else if (!q->properties.is_active && prev_active)
 		dqm->queue_count--;

+	if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
+		retval = map_queues_cpsch(dqm);
+	else if (q->properties.is_active &&
+		 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
+		  q->properties.type == KFD_QUEUE_TYPE_SDMA))
+		retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue,
+				       &q->properties, q->process->mm);
+
 out_unlock:
 	mutex_unlock(&dqm->lock);
 	return retval;
@@ -467,7 +467,7 @@ static int register_process(struct device_queue_manager *dqm,
 	mutex_lock(&dqm->lock);
 	list_add(&n->list, &dqm->queues);

-	retval = dqm->ops_asic_specific.register_process(dqm, qpd);
+	retval = dqm->asic_ops.update_qpd(dqm, qpd);

 	dqm->processes_count++;

@@ -629,7 +629,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
 	pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
 	pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);

-	dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
+	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
 				&q->gart_mqd_addr, &q->properties);
 	if (retval)
@@ -696,8 +696,6 @@ static int set_sched_resources(struct device_queue_manager *dqm)

 static int initialize_cpsch(struct device_queue_manager *dqm)
 {
-	int retval;
-
 	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));

 	mutex_init(&dqm->lock);
@@ -706,11 +704,8 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
 	dqm->sdma_queue_count = 0;
 	dqm->active_runlist = false;
 	dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
-	retval = dqm->ops_asic_specific.initialize(dqm);
-	if (retval)
-		mutex_destroy(&dqm->lock);

-	return retval;
+	return 0;
 }

 static int start_cpsch(struct device_queue_manager *dqm)
@@ -835,7 +830,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,

 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
 		retval = allocate_sdma_queue(dqm, &q->sdma_id);
-		if (retval != 0)
+		if (retval)
 			goto out;
 		q->properties.sdma_queue_id =
 			q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE;
@@ -850,7 +845,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 		goto out;
 	}

-	dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
+	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
 				&q->gart_mqd_addr, &q->properties);
 	if (retval)
@@ -1095,7 +1090,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
 		qpd->sh_mem_ape1_limit = limit >> 16;
 	}

-	retval = dqm->ops_asic_specific.set_cache_memory_policy(
+	retval = dqm->asic_ops.set_cache_memory_policy(
 			dqm,
 			qpd,
 			default_policy,
@@ -1270,11 +1265,11 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)

 	switch (dev->device_info->asic_family) {
 	case CHIP_CARRIZO:
-		device_queue_manager_init_vi(&dqm->ops_asic_specific);
+		device_queue_manager_init_vi(&dqm->asic_ops);
 		break;

 	case CHIP_KAVERI:
-		device_queue_manager_init_cik(&dqm->ops_asic_specific);
+		device_queue_manager_init_cik(&dqm->asic_ops);
 		break;
 	default:
 		WARN(1, "Unexpected ASIC family %u",

--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -128,9 +128,8 @@ struct device_queue_manager_ops {
 };

 struct device_queue_manager_asic_ops {
-	int	(*register_process)(struct device_queue_manager *dqm,
+	int	(*update_qpd)(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd);
-	int	(*initialize)(struct device_queue_manager *dqm);
 	bool	(*set_cache_memory_policy)(struct device_queue_manager *dqm,
 					   struct qcm_process_device *qpd,
 					   enum cache_policy default_policy,
@@ -156,7 +155,7 @@ struct device_queue_manager_asic_ops {

 struct device_queue_manager {
 	struct device_queue_manager_ops ops;
-	struct device_queue_manager_asic_ops ops_asic_specific;
+	struct device_queue_manager_asic_ops asic_ops;

 	struct mqd_manager	*mqds[KFD_MQD_TYPE_MAX];
 	struct packet_manager	packets;
@@ -179,8 +178,10 @@ struct device_queue_manager {
 	bool			active_runlist;
 };

-void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops);
-void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops);
+void device_queue_manager_init_cik(
+		struct device_queue_manager_asic_ops *asic_ops);
+void device_queue_manager_init_vi(
+		struct device_queue_manager_asic_ops *asic_ops);
 void program_sh_mem_settings(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd);
 unsigned int get_queues_num(struct device_queue_manager *dqm);

--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -32,18 +32,17 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
 				   enum cache_policy alternate_policy,
 				   void __user *alternate_aperture_base,
 				   uint64_t alternate_aperture_size);
-static int register_process_cik(struct device_queue_manager *dqm,
+static int update_qpd_cik(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd);
-static int initialize_cpsch_cik(struct device_queue_manager *dqm);
 static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
 				struct qcm_process_device *qpd);

-void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops)
+void device_queue_manager_init_cik(
+		struct device_queue_manager_asic_ops *asic_ops)
 {
-	ops->set_cache_memory_policy = set_cache_memory_policy_cik;
-	ops->register_process = register_process_cik;
-	ops->initialize = initialize_cpsch_cik;
-	ops->init_sdma_vm = init_sdma_vm;
+	asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
+	asic_ops->update_qpd = update_qpd_cik;
+	asic_ops->init_sdma_vm = init_sdma_vm;
 }

 static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
@@ -99,7 +98,7 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
 	return true;
 }

-static int register_process_cik(struct device_queue_manager *dqm,
+static int update_qpd_cik(struct device_queue_manager *dqm,
 		struct qcm_process_device *qpd)
 {
 	struct kfd_process_device *pdd;
@@ -148,8 +147,3 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,

 	q->properties.sdma_vm_addr = value;
 }
-
-static int initialize_cpsch_cik(struct device_queue_manager *dqm)
-{
-	return 0;
-}
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -33,18 +33,17 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
 				   enum cache_policy alternate_policy,
 				   void __user *alternate_aperture_base,
 				   uint64_t alternate_aperture_size);
-static int register_process_vi(struct device_queue_manager *dqm,
+static int update_qpd_vi(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd);
-static int initialize_cpsch_vi(struct device_queue_manager *dqm);
 static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
 				struct qcm_process_device *qpd);

-void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops)
+void device_queue_manager_init_vi(
+		struct device_queue_manager_asic_ops *asic_ops)
 {
-	ops->set_cache_memory_policy = set_cache_memory_policy_vi;
-	ops->register_process = register_process_vi;
-	ops->initialize = initialize_cpsch_vi;
-	ops->init_sdma_vm = init_sdma_vm;
+	asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi;
+	asic_ops->update_qpd = update_qpd_vi;
+	asic_ops->init_sdma_vm = init_sdma_vm;
 }

 static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
@@ -104,7 +103,7 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
 	return true;
 }

-static int register_process_vi(struct device_queue_manager *dqm,
+static int update_qpd_vi(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd)
 {
 	struct kfd_process_device *pdd;
@@ -160,8 +159,3 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,

 	q->properties.sdma_vm_addr = value;
 }
-
-static int initialize_cpsch_vi(struct device_queue_manager *dqm)
-{
-	return 0;
-}
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
@@ -27,12 +27,17 @@
 #include <linux/hashtable.h>
 #include <linux/types.h>
 #include <linux/list.h>
+#include <linux/wait.h>
 #include "kfd_priv.h"
 #include <uapi/linux/kfd_ioctl.h>

-#define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U
-#define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK
-#define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX
+/*
+ * IDR supports non-negative integer IDs. Small IDs are used for
+ * signal events to match their signal slot. Use the upper half of the
+ * ID space for non-signal events.
+ */
+#define KFD_FIRST_NONSIGNAL_EVENT_ID ((INT_MAX >> 1) + 1)
+#define KFD_LAST_NONSIGNAL_EVENT_ID INT_MAX

 /*
 * Written into kfd_signal_slot_t to indicate that the event is not signaled.
@@ -46,9 +51,6 @@ struct kfd_event_waiter;
 struct signal_page;

 struct kfd_event {
-	/* All events in process, rooted at kfd_process.events. */
-	struct hlist_node events;
-
 	u32 event_id;

 	bool signaled;
@@ -56,11 +58,9 @@ struct kfd_event {

 	int type;

-	struct list_head waiters; /* List of kfd_event_waiter by waiters. */
+	wait_queue_head_t wq; /* List of event waiters. */

 	/* Only for signal events. */
-	struct signal_page *signal_page;
-	unsigned int signal_slot_index;
 	uint64_t __user *user_signal_address;

 	/* type specific data */

--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -42,26 +42,26 @@

 #include <linux/slab.h>
 #include <linux/device.h>
+#include <linux/kfifo.h>
 #include "kfd_priv.h"

-#define KFD_INTERRUPT_RING_SIZE 1024
+#define KFD_IH_NUM_ENTRIES 8192

 static void interrupt_wq(struct work_struct *);

 int kfd_interrupt_init(struct kfd_dev *kfd)
 {
-	void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE,
-					kfd->device_info->ih_ring_entry_size,
-					GFP_KERNEL);
-	if (!interrupt_ring)
-		return -ENOMEM;
+	int r;

-	kfd->interrupt_ring = interrupt_ring;
-	kfd->interrupt_ring_size =
-		KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size;
-	atomic_set(&kfd->interrupt_ring_wptr, 0);
-	atomic_set(&kfd->interrupt_ring_rptr, 0);
+	r = kfifo_alloc(&kfd->ih_fifo,
+		KFD_IH_NUM_ENTRIES * kfd->device_info->ih_ring_entry_size,
+		GFP_KERNEL);
+	if (r) {
+		dev_err(kfd_chardev(), "Failed to allocate IH fifo\n");
+		return r;
+	}

+	kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1);
 	spin_lock_init(&kfd->interrupt_lock);

 	INIT_WORK(&kfd->interrupt_work, interrupt_wq);
@@ -92,74 +92,47 @@ void kfd_interrupt_exit(struct kfd_dev *kfd)
 	spin_unlock_irqrestore(&kfd->interrupt_lock, flags);

 	/*
-	 * Flush_scheduled_work ensures that there are no outstanding
+	 * flush_work ensures that there are no outstanding
 	 * work-queue items that will access interrupt_ring. New work items
 	 * can't be created because we stopped interrupt handling above.
 	 */
-	flush_scheduled_work();
+	flush_workqueue(kfd->ih_wq);

-	kfree(kfd->interrupt_ring);
+	kfifo_free(&kfd->ih_fifo);
 }

 /*
- * This assumes that it can't be called concurrently with itself
- * but only with dequeue_ih_ring_entry.
+ * Assumption: single reader/writer. This function is not re-entrant
 */
 bool enqueue_ih_ring_entry(struct kfd_dev *kfd,	const void *ih_ring_entry)
 {
-	unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
-	unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
+	int count;

-	if ((rptr - wptr) % kfd->interrupt_ring_size ==
-					kfd->device_info->ih_ring_entry_size) {
-		/* This is very bad, the system is likely to hang. */
+	count = kfifo_in(&kfd->ih_fifo, ih_ring_entry,
+				kfd->device_info->ih_ring_entry_size);
+	if (count != kfd->device_info->ih_ring_entry_size) {
 		dev_err_ratelimited(kfd_chardev(),
-			"Interrupt ring overflow, dropping interrupt.\n");
+			"Interrupt ring overflow, dropping interrupt %d\n",
+			count);
 		return false;
 	}

-	memcpy(kfd->interrupt_ring + wptr, ih_ring_entry,
-			kfd->device_info->ih_ring_entry_size);
-
-	wptr = (wptr + kfd->device_info->ih_ring_entry_size) %
-			kfd->interrupt_ring_size;
-	smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */
-	atomic_set(&kfd->interrupt_ring_wptr, wptr);
-
 	return true;
 }

 /*
- * This assumes that it can't be called concurrently with itself
- * but only with enqueue_ih_ring_entry.
+ * Assumption: single reader/writer. This function is not re-entrant
 */
 static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry)
 {
-	/*
-	 * Assume that wait queues have an implicit barrier, i.e. anything that
-	 * happened in the ISR before it queued work is visible.
-	 */
-
-	unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
-	unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
-
-	if (rptr == wptr)
-		return false;
+	int count;

-	memcpy(ih_ring_entry, kfd->interrupt_ring + rptr,
+	count = kfifo_out(&kfd->ih_fifo, ih_ring_entry,
 				kfd->device_info->ih_ring_entry_size);

-	rptr = (rptr + kfd->device_info->ih_ring_entry_size) %
-			kfd->interrupt_ring_size;
+	WARN_ON(count && count != kfd->device_info->ih_ring_entry_size);

-	/*
-	 * Ensure the rptr write update is not visible until
-	 * memcpy has finished reading.
-	 */
-	smp_mb();
-	atomic_set(&kfd->interrupt_ring_rptr, rptr);
-
-	return true;
+	return count == kfd->device_info->ih_ring_entry_size;
 }

 static void interrupt_wq(struct work_struct *work)

--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -189,12 +189,9 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
 	if (q->format == KFD_QUEUE_FORMAT_AQL)
 		m->cp_hqd_pq_control |= NO_UPDATE_RPTR;

-	q->is_active = false;
-	if (q->queue_size > 0 &&
+	q->is_active = (q->queue_size > 0 &&
 			q->queue_address != 0 &&
-			q->queue_percent > 0) {
-		q->is_active = true;
-	}
+			q->queue_percent > 0);

 	return 0;
 }
@@ -215,24 +212,17 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
 	m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8);
 	m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
 	m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
-	m->sdma_rlc_doorbell = q->doorbell_off <<
-			SDMA0_RLC0_DOORBELL__OFFSET__SHIFT |
-			1 << SDMA0_RLC0_DOORBELL__ENABLE__SHIFT;
+	m->sdma_rlc_doorbell =
+		q->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;

 	m->sdma_rlc_virtual_addr = q->sdma_vm_addr;

 	m->sdma_engine_id = q->sdma_engine_id;
 	m->sdma_queue_id = q->sdma_queue_id;

-	q->is_active = false;
-	if (q->queue_size > 0 &&
+	q->is_active = (q->queue_size > 0 &&
 			q->queue_address != 0 &&
-			q->queue_percent > 0) {
-		m->sdma_rlc_rb_cntl |=
-				1 << SDMA0_RLC0_RB_CNTL__RB_ENABLE__SHIFT;
-
-		q->is_active = true;
-	}
+			q->queue_percent > 0);

 	return 0;
 }
@@ -359,19 +349,13 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
 	m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
 	m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
 	m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
-	m->cp_hqd_pq_doorbell_control = DOORBELL_EN |
-					DOORBELL_OFFSET(q->doorbell_off);
+	m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off);

 	m->cp_hqd_vmid = q->vmid;

-	m->cp_hqd_active = 0;
-	q->is_active = false;
-	if (q->queue_size > 0 &&
+	q->is_active = (q->queue_size > 0 &&
 			q->queue_address != 0 &&
-			q->queue_percent > 0) {
-		m->cp_hqd_active = 1;
-		q->is_active = true;
-	}
+			q->queue_percent > 0);

 	return 0;
 }

--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -163,12 +163,9 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
 				2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
 	}

-	q->is_active = false;
-	if (q->queue_size > 0 &&
+	q->is_active = (q->queue_size > 0 &&
 			q->queue_address != 0 &&
-			q->queue_percent > 0) {
-		q->is_active = true;
-	}
+			q->queue_percent > 0);

 	return 0;
 }

--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -31,6 +31,8 @@
 #include <linux/workqueue.h>
 #include <linux/spinlock.h>
 #include <linux/kfd_ioctl.h>
+#include <linux/idr.h>
+#include <linux/kfifo.h>
 #include <kgd_kfd_interface.h>

 #include "amd_shared.h"
@@ -181,10 +183,8 @@ struct kfd_dev {
 	unsigned int gtt_sa_num_of_chunks;

 	/* Interrupts */
-	void *interrupt_ring;
-	size_t interrupt_ring_size;
-	atomic_t interrupt_ring_rptr;
-	atomic_t interrupt_ring_wptr;
+	struct kfifo ih_fifo;
+	struct workqueue_struct *ih_wq;
 	struct work_struct interrupt_work;
 	spinlock_t interrupt_lock;

@@ -494,7 +494,12 @@ struct kfd_process {
 	 */
 	struct hlist_node kfd_processes;

-	struct mm_struct *mm;
+	/*
+	 * Opaque pointer to mm_struct. We don't hold a reference to
+	 * it so it should never be dereferenced from here. This is
+	 * only used for looking up processes by their mm.
+	 */
+	void *mm;

 	struct mutex mutex;

@@ -502,6 +507,8 @@ struct kfd_process {
 	 * In any process, the thread that started main() is the lead
 	 * thread and outlives the rest.
 	 * It is here because amd_iommu_bind_pasid wants a task_struct.
+	 * It can also be used for safely getting a reference to the
+	 * mm_struct of the process.
 	 */
 	struct task_struct *lead_thread;

@@ -522,22 +529,16 @@ struct kfd_process {

 	struct process_queue_manager pqm;

-	/* The process's queues. */
-	size_t queue_array_size;
-
-	/* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */
-	struct kfd_queue **queues;
-
 	/*Is the user space process 32 bit?*/
 	bool is_32bit_user_mode;

 	/* Event-related data */
 	struct mutex event_mutex;
-	/* All events in process hashed by ID, linked on kfd_event.events. */
-	DECLARE_HASHTABLE(events, 4);
-	/* struct slot_page_header.event_pages */
-	struct list_head signal_event_pages;
-	u32 next_nonsignal_event_id;
+	/* Event ID allocator and lookup */
+	struct idr event_idr;
+	/* Event page */
+	struct kfd_signal_page *signal_page;
+	size_t signal_mapped_size;
 	size_t signal_event_count;
 	bool signal_event_limit_reached;
 };
@@ -721,19 +722,13 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
 extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
 extern const struct kfd_device_global_init_class device_global_init_class_cik;

-enum kfd_event_wait_result {
-	KFD_WAIT_COMPLETE,
-	KFD_WAIT_TIMEOUT,
-	KFD_WAIT_ERROR
-};
-
 void kfd_event_init_process(struct kfd_process *p);
 void kfd_event_free_process(struct kfd_process *p);
 int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma);
 int kfd_wait_on_events(struct kfd_process *p,
 		       uint32_t num_events, void __user *data,
 		       bool all, uint32_t user_timeout_ms,
-		       enum kfd_event_wait_result *wait_result);
+		       uint32_t *wait_result);
 void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
 				uint32_t valid_id_bits);
 void kfd_signal_iommu_event(struct kfd_dev *dev,

--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -34,13 +34,6 @@ struct mm_struct;
 #include "kfd_priv.h"
 #include "kfd_dbgmgr.h"

-/*
- * Initial size for the array of queues.
- * The allocated size is doubled each time
- * it is exceeded up to MAX_PROCESS_QUEUES.
- */
-#define INITIAL_QUEUE_ARRAY_SIZE 16
-
 /*
 * List of struct kfd_process (field kfd_process).
 * Unique/indexed by mm_struct*
@@ -187,8 +180,6 @@ static void kfd_process_wq_release(struct work_struct *work)

 	mutex_destroy(&p->mutex);

-	kfree(p->queues);
-
 	kfree(p);

 	kfree(work);
@@ -200,7 +191,6 @@ static void kfd_process_destroy_delayed(struct rcu_head *rcu)
 	struct kfd_process *p;

 	p = container_of(rcu, struct kfd_process, rcu);
-	WARN_ON(atomic_read(&p->mm->mm_count) <= 0);

 	mmdrop(p->mm);

@@ -234,17 +224,26 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,

 	mutex_lock(&p->mutex);

+	/* Iterate over all process device data structures and if the
+	 * pdd is in debug mode, we should first force unregistration,
+	 * then we will be able to destroy the queues
+	 */
+	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+		struct kfd_dev *dev = pdd->dev;
+
+		mutex_lock(kfd_get_dbgmgr_mutex());
+		if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
+			if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
+				kfd_dbgmgr_destroy(dev->dbgmgr);
+				dev->dbgmgr = NULL;
+			}
+		}
+		mutex_unlock(kfd_get_dbgmgr_mutex());
+	}
+
 	kfd_process_dequeue_from_all_devices(p);
 	pqm_uninit(&p->pqm);

-	/* Iterate over all process device data structure and check
-	 * if we should delete debug managers
-	 */
-	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
-		if ((pdd->dev->dbgmgr) &&
-				(pdd->dev->dbgmgr->pasid == p->pasid))
-			kfd_dbgmgr_destroy(pdd->dev->dbgmgr);
-
 	mutex_unlock(&p->mutex);

 	/*
@@ -271,11 +270,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
 	if (!process)
 		goto err_alloc_process;

-	process->queues = kmalloc_array(INITIAL_QUEUE_ARRAY_SIZE,
-					sizeof(process->queues[0]), GFP_KERNEL);
-	if (!process->queues)
-		goto err_alloc_queues;
-
 	process->pasid = kfd_pasid_alloc();
 	if (process->pasid == 0)
 		goto err_alloc_pasid;
@@ -298,8 +292,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)

 	process->lead_thread = thread->group_leader;

-	process->queue_array_size = INITIAL_QUEUE_ARRAY_SIZE;
-
 	INIT_LIST_HEAD(&process->per_device_data);

 	kfd_event_init_process(process);
@@ -328,8 +320,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
 err_alloc_doorbells:
 	kfd_pasid_free(process->pasid);
 err_alloc_pasid:
-	kfree(process->queues);
-err_alloc_queues:
 	kfree(process);
 err_alloc_process:
 	return ERR_PTR(err);
@@ -426,7 +416,7 @@ int kfd_bind_processes_to_device(struct kfd_dev *dev)
 		err = amd_iommu_bind_pasid(dev->pdev, p->pasid,
 				p->lead_thread);
 		if (err < 0) {
-			pr_err("unexpected pasid %d binding failure\n",
+			pr_err("Unexpected pasid %d binding failure\n",
 					p->pasid);
 			mutex_unlock(&p->mutex);
 			break;
@@ -442,29 +432,25 @@ int kfd_bind_processes_to_device(struct kfd_dev *dev)
 }

 /*
- * Temporarily unbind currently bound processes from the device and
- * mark them as PDD_BOUND_SUSPENDED. These processes will be restored
- * to PDD_BOUND state in kfd_bind_processes_to_device.
+ * Mark currently bound processes as PDD_BOUND_SUSPENDED. These
+ * processes will be restored to PDD_BOUND state in
+ * kfd_bind_processes_to_device.
 */
 void kfd_unbind_processes_from_device(struct kfd_dev *dev)
 {
 	struct kfd_process_device *pdd;
 	struct kfd_process *p;
-	unsigned int temp, temp_bound, temp_pasid;
+	unsigned int temp;

 	int idx = srcu_read_lock(&kfd_processes_srcu);

 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
 		mutex_lock(&p->mutex);
 		pdd = kfd_get_process_device_data(dev, p);
-		temp_bound = pdd->bound;
-		temp_pasid = p->pasid;
+
 		if (pdd->bound == PDD_BOUND)
 			pdd->bound = PDD_BOUND_SUSPENDED;
 		mutex_unlock(&p->mutex);
-
-		if (temp_bound == PDD_BOUND)
-			amd_iommu_unbind_pasid(dev->pdev, temp_pasid);
 	}

 	srcu_read_unlock(&kfd_processes_srcu, idx);
@@ -486,8 +472,16 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)

 	pr_debug("Unbinding process %d from IOMMU\n", pasid);

-	if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
+	mutex_lock(kfd_get_dbgmgr_mutex());
+
+	if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
+		if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
 			kfd_dbgmgr_destroy(dev->dbgmgr);
+			dev->dbgmgr = NULL;
+		}
+	}
+
+	mutex_unlock(kfd_get_dbgmgr_mutex());

 	pdd = kfd_get_process_device_data(dev, p);
 	if (pdd)

--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -177,7 +177,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 	if (retval != 0)
 		return retval;

-	if (list_empty(&pqm->queues)) {
+	if (list_empty(&pdd->qpd.queues_list) &&
+	    list_empty(&pdd->qpd.priv_queue_list)) {
 		pdd->qpd.pqm = pqm;
 		dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
 	}
@@ -248,7 +249,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 err_allocate_pqn:
 	/* check if queues list is empty unregister process from device */
 	clear_bit(*qid, pqm->queue_slot_bitmap);
-	if (list_empty(&pqm->queues))
+	if (list_empty(&pdd->qpd.queues_list) &&
+	    list_empty(&pdd->qpd.priv_queue_list))
 		dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd);
 	return retval;
 }
@@ -302,7 +304,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
 	kfree(pqn);
 	clear_bit(qid, pqm->queue_slot_bitmap);

-	if (list_empty(&pqm->queues))
+	if (list_empty(&pdd->qpd.queues_list) &&
+	    list_empty(&pdd->qpd.priv_queue_list))
 		dqm->ops.unregister_process(dqm, &pdd->qpd);

 	return retval;

--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -102,8 +102,7 @@ radeon-y += \
 radeon-y += \
 	radeon_vce.o \
 	vce_v1_0.o \
-	vce_v2_0.o \
-	radeon_kfd.o
+	vce_v2_0.o

 radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o
 radeon-$(CONFIG_ACPI) += radeon_acpi.o

--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -33,7 +33,6 @@
 #include "cik_blit_shaders.h"
 #include "radeon_ucode.h"
 #include "clearstate_ci.h"
-#include "radeon_kfd.h"

 #define SH_MEM_CONFIG_GFX_DEFAULT \
 	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
@@ -5684,10 +5683,9 @@ int cik_vm_init(struct radeon_device *rdev)
 	/*
 	 * number of VMs
 	 * VMID 0 is reserved for System
-	 * radeon graphics/compute will use VMIDs 1-7
-	 * amdkfd will use VMIDs 8-15
+	 * radeon graphics/compute will use VMIDs 1-15
 	 */
-	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
+	rdev->vm_manager.nvm = 16;
 	/* base offset of vram pages */
 	if (rdev->flags & RADEON_IS_IGP) {
 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
@@ -7589,9 +7587,6 @@ int cik_irq_process(struct radeon_device *rdev)
 		/* wptr/rptr are in bytes! */
 		ring_index = rptr / 4;

-		radeon_kfd_interrupt(rdev,
-				(const void *) &rdev->ih.ring[ring_index]);
-
 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
@@ -8486,10 +8481,6 @@ static int cik_startup(struct radeon_device *rdev)
 	if (r)
 		return r;

-	r = radeon_kfd_resume(rdev);
-	if (r)
-		return r;
-
 	return 0;
 }

@@ -8538,7 +8529,6 @@ int cik_resume(struct radeon_device *rdev)
 */
 int cik_suspend(struct radeon_device *rdev)
 {
-	radeon_kfd_suspend(rdev);
 	radeon_pm_suspend(rdev);
 	radeon_audio_fini(rdev);
 	radeon_vm_manager_fini(rdev);

--- a/drivers/gpu/drm/radeon/cikd.h
+++ b/drivers/gpu/drm/radeon/cikd.h
@@ -30,8 +30,6 @@
 #define CIK_RB_BITMAP_WIDTH_PER_SH     2
 #define HAWAII_RB_BITMAP_WIDTH_PER_SH  4

-#define RADEON_NUM_OF_VMIDS	8
-
 /* DIDT IND registers */
 #define DIDT_SQ_CTRL0                                     0x0
 #       define DIDT_CTRL_EN                               (1 << 0)

--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2456,9 +2456,6 @@ struct radeon_device {
 	u64 vram_pin_size;
 	u64 gart_pin_size;

-	/* amdkfd interface */
-	struct kfd_dev		*kfd;
-
 	struct mutex	mn_lock;
 	DECLARE_HASHTABLE(mn_hash, 7);
 };

--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -43,7 +43,6 @@
 #include <drm/drm_fb_helper.h>

 #include <drm/drm_crtc_helper.h>
-#include "radeon_kfd.h"

 /*
 * KMS wrapper.
@@ -338,14 +337,6 @@ static int radeon_pci_probe(struct pci_dev *pdev,
 {
 	int ret;

-	/*
-	 * Initialize amdkfd before starting radeon. If it was not loaded yet,
-	 * defer radeon probing
-	 */
-	ret = radeon_kfd_init();
-	if (ret == -EPROBE_DEFER)
-		return ret;
-
 	if (vga_switcheroo_client_probe_defer(pdev))
 		return -EPROBE_DEFER;

@@ -645,7 +636,6 @@ static int __init radeon_init(void)

 static void __exit radeon_exit(void)
 {
-	radeon_kfd_fini();
 	pci_unregister_driver(pdriver);
 	radeon_unregister_atpx_handler();
 }

--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
--- a/drivers/gpu/drm/radeon/radeon_kfd.h
+++ b/drivers/gpu/drm/radeon/radeon_kfd.h
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * radeon_kfd.h defines the private interface between the
- * AMD kernel graphics drivers and the AMD KFD.
- */
-
-#ifndef RADEON_KFD_H_INCLUDED
-#define RADEON_KFD_H_INCLUDED
-
-#include <linux/types.h>
-#include "kgd_kfd_interface.h"
-
-struct radeon_device;
-
-int radeon_kfd_init(void);
-void radeon_kfd_fini(void);
-
-void radeon_kfd_suspend(struct radeon_device *rdev);
-int radeon_kfd_resume(struct radeon_device *rdev);
-void radeon_kfd_interrupt(struct radeon_device *rdev,
-			const void *ih_ring_entry);
-void radeon_kfd_device_probe(struct radeon_device *rdev);
-void radeon_kfd_device_init(struct radeon_device *rdev);
-void radeon_kfd_device_fini(struct radeon_device *rdev);
-
-#endif /* RADEON_KFD_H_INCLUDED */
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -34,8 +34,6 @@
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>

-#include "radeon_kfd.h"
-
 #if defined(CONFIG_VGA_SWITCHEROO)
 bool radeon_has_atpx(void);
 #else
@@ -68,8 +66,6 @@ void radeon_driver_unload_kms(struct drm_device *dev)
 		pm_runtime_forbid(dev->dev);
 	}

-	radeon_kfd_device_fini(rdev);
-
 	radeon_acpi_fini(rdev);
 	
 	radeon_modeset_fini(rdev);
@@ -174,9 +170,6 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags)
 				"Error during ACPI methods call\n");
 	}

-	radeon_kfd_device_probe(rdev);
-	radeon_kfd_device_init(rdev);
-
 	if (radeon_is_px(dev)) {
 		pm_runtime_use_autosuspend(dev->dev);
 		pm_runtime_set_autosuspend_delay(dev->dev, 5000);

--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -169,7 +169,7 @@ struct kfd_ioctl_dbg_wave_control_args {
 #define KFD_IOC_WAIT_RESULT_TIMEOUT		1
 #define KFD_IOC_WAIT_RESULT_FAIL		2

-#define KFD_SIGNAL_EVENT_LIMIT			256
+#define KFD_SIGNAL_EVENT_LIMIT			4096

 struct kfd_ioctl_create_event_args {
 	__u64 event_page_offset;	/* from KFD */