Commit 64d1c3a4 authored by Felix Kuehling's avatar Felix Kuehling Committed by Oded Gabbay

drm/amdkfd: Centralize IOMMUv2 code and make it conditional

dGPUs work without IOMMUv2. Make IOMMUv2 initialization dependent on
ASIC information. Also allow building KFD without IOMMUv2 support.
This is still useful for dGPUs and prepares for enabling KFD on
architectures that don't support AMD IOMMUv2.

v2:
* Centralize IOMMUv2 code to avoid #ifdefs in too many places

v3:
* Imply AMD_IOMMU_V2 in Kconfig
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Acked-by: default avatarChristian Konig <christian.koenig@amd.com>
Reviewed-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent 4c660c8f
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
config HSA_AMD config HSA_AMD
tristate "HSA kernel driver for AMD GPU devices" tristate "HSA kernel driver for AMD GPU devices"
depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64 depends on DRM_AMDGPU && X86_64
imply AMD_IOMMU_V2
help help
Enable this if you want to use HSA features on AMD GPU devices. Enable this if you want to use HSA features on AMD GPU devices.
...@@ -37,6 +37,10 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ ...@@ -37,6 +37,10 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_interrupt.o kfd_events.o cik_event_interrupt.o \ kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
ifneq ($(CONFIG_AMD_IOMMU_V2),)
amdkfd-y += kfd_iommu.o
endif
amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o
obj-$(CONFIG_HSA_AMD) += amdkfd.o obj-$(CONFIG_HSA_AMD) += amdkfd.o
...@@ -22,10 +22,10 @@ ...@@ -22,10 +22,10 @@
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/acpi.h> #include <linux/acpi.h>
#include <linux/amd-iommu.h>
#include "kfd_crat.h" #include "kfd_crat.h"
#include "kfd_priv.h" #include "kfd_priv.h"
#include "kfd_topology.h" #include "kfd_topology.h"
#include "kfd_iommu.h"
/* GPU Processor ID base for dGPUs for which VCRAT needs to be created. /* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
* GPU processor ID are expressed with Bit[31]=1. * GPU processor ID are expressed with Bit[31]=1.
...@@ -1037,15 +1037,11 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, ...@@ -1037,15 +1037,11 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
struct crat_subtype_generic *sub_type_hdr; struct crat_subtype_generic *sub_type_hdr;
struct crat_subtype_computeunit *cu; struct crat_subtype_computeunit *cu;
struct kfd_cu_info cu_info; struct kfd_cu_info cu_info;
struct amd_iommu_device_info iommu_info;
int avail_size = *size; int avail_size = *size;
uint32_t total_num_of_cu; uint32_t total_num_of_cu;
int num_of_cache_entries = 0; int num_of_cache_entries = 0;
int cache_mem_filled = 0; int cache_mem_filled = 0;
int ret = 0; int ret = 0;
const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
struct kfd_local_mem_info local_mem_info; struct kfd_local_mem_info local_mem_info;
if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU)
...@@ -1106,12 +1102,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, ...@@ -1106,12 +1102,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
/* Check if this node supports IOMMU. During parsing this flag will /* Check if this node supports IOMMU. During parsing this flag will
* translate to HSA_CAP_ATS_PRESENT * translate to HSA_CAP_ATS_PRESENT
*/ */
iommu_info.flags = 0; if (!kfd_iommu_check_device(kdev))
if (amd_iommu_device_info(kdev->pdev, &iommu_info) == 0) { cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
if ((iommu_info.flags & required_iommu_flags) ==
required_iommu_flags)
cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
}
crat_table->length += sub_type_hdr->length; crat_table->length += sub_type_hdr->length;
crat_table->total_entries++; crat_table->total_entries++;
......
...@@ -20,7 +20,9 @@ ...@@ -20,7 +20,9 @@
* OTHER DEALINGS IN THE SOFTWARE. * OTHER DEALINGS IN THE SOFTWARE.
*/ */
#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
#include <linux/amd-iommu.h> #include <linux/amd-iommu.h>
#endif
#include <linux/bsearch.h> #include <linux/bsearch.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/slab.h> #include <linux/slab.h>
...@@ -28,9 +30,11 @@ ...@@ -28,9 +30,11 @@
#include "kfd_device_queue_manager.h" #include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers_vi.h" #include "kfd_pm4_headers_vi.h"
#include "cwsr_trap_handler_gfx8.asm" #include "cwsr_trap_handler_gfx8.asm"
#include "kfd_iommu.h"
#define MQD_SIZE_ALIGNED 768 #define MQD_SIZE_ALIGNED 768
#ifdef KFD_SUPPORT_IOMMU_V2
static const struct kfd_device_info kaveri_device_info = { static const struct kfd_device_info kaveri_device_info = {
.asic_family = CHIP_KAVERI, .asic_family = CHIP_KAVERI,
.max_pasid_bits = 16, .max_pasid_bits = 16,
...@@ -41,6 +45,7 @@ static const struct kfd_device_info kaveri_device_info = { ...@@ -41,6 +45,7 @@ static const struct kfd_device_info kaveri_device_info = {
.num_of_watch_points = 4, .num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED, .mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = false, .supports_cwsr = false,
.needs_iommu_device = true,
.needs_pci_atomics = false, .needs_pci_atomics = false,
}; };
...@@ -54,8 +59,10 @@ static const struct kfd_device_info carrizo_device_info = { ...@@ -54,8 +59,10 @@ static const struct kfd_device_info carrizo_device_info = {
.num_of_watch_points = 4, .num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED, .mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = true, .supports_cwsr = true,
.needs_iommu_device = true,
.needs_pci_atomics = false, .needs_pci_atomics = false,
}; };
#endif
static const struct kfd_device_info hawaii_device_info = { static const struct kfd_device_info hawaii_device_info = {
.asic_family = CHIP_HAWAII, .asic_family = CHIP_HAWAII,
...@@ -67,6 +74,7 @@ static const struct kfd_device_info hawaii_device_info = { ...@@ -67,6 +74,7 @@ static const struct kfd_device_info hawaii_device_info = {
.num_of_watch_points = 4, .num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED, .mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = false, .supports_cwsr = false,
.needs_iommu_device = false,
.needs_pci_atomics = false, .needs_pci_atomics = false,
}; };
...@@ -79,6 +87,7 @@ static const struct kfd_device_info tonga_device_info = { ...@@ -79,6 +87,7 @@ static const struct kfd_device_info tonga_device_info = {
.num_of_watch_points = 4, .num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED, .mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = false, .supports_cwsr = false,
.needs_iommu_device = false,
.needs_pci_atomics = true, .needs_pci_atomics = true,
}; };
...@@ -91,6 +100,7 @@ static const struct kfd_device_info tonga_vf_device_info = { ...@@ -91,6 +100,7 @@ static const struct kfd_device_info tonga_vf_device_info = {
.num_of_watch_points = 4, .num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED, .mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = false, .supports_cwsr = false,
.needs_iommu_device = false,
.needs_pci_atomics = false, .needs_pci_atomics = false,
}; };
...@@ -103,6 +113,7 @@ static const struct kfd_device_info fiji_device_info = { ...@@ -103,6 +113,7 @@ static const struct kfd_device_info fiji_device_info = {
.num_of_watch_points = 4, .num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED, .mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = true, .supports_cwsr = true,
.needs_iommu_device = false,
.needs_pci_atomics = true, .needs_pci_atomics = true,
}; };
...@@ -115,6 +126,7 @@ static const struct kfd_device_info fiji_vf_device_info = { ...@@ -115,6 +126,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
.num_of_watch_points = 4, .num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED, .mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = true, .supports_cwsr = true,
.needs_iommu_device = false,
.needs_pci_atomics = false, .needs_pci_atomics = false,
}; };
...@@ -128,6 +140,7 @@ static const struct kfd_device_info polaris10_device_info = { ...@@ -128,6 +140,7 @@ static const struct kfd_device_info polaris10_device_info = {
.num_of_watch_points = 4, .num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED, .mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = true, .supports_cwsr = true,
.needs_iommu_device = false,
.needs_pci_atomics = true, .needs_pci_atomics = true,
}; };
...@@ -140,6 +153,7 @@ static const struct kfd_device_info polaris10_vf_device_info = { ...@@ -140,6 +153,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
.num_of_watch_points = 4, .num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED, .mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = true, .supports_cwsr = true,
.needs_iommu_device = false,
.needs_pci_atomics = false, .needs_pci_atomics = false,
}; };
...@@ -152,6 +166,7 @@ static const struct kfd_device_info polaris11_device_info = { ...@@ -152,6 +166,7 @@ static const struct kfd_device_info polaris11_device_info = {
.num_of_watch_points = 4, .num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED, .mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = true, .supports_cwsr = true,
.needs_iommu_device = false,
.needs_pci_atomics = true, .needs_pci_atomics = true,
}; };
...@@ -162,6 +177,7 @@ struct kfd_deviceid { ...@@ -162,6 +177,7 @@ struct kfd_deviceid {
}; };
static const struct kfd_deviceid supported_devices[] = { static const struct kfd_deviceid supported_devices[] = {
#ifdef KFD_SUPPORT_IOMMU_V2
{ 0x1304, &kaveri_device_info }, /* Kaveri */ { 0x1304, &kaveri_device_info }, /* Kaveri */
{ 0x1305, &kaveri_device_info }, /* Kaveri */ { 0x1305, &kaveri_device_info }, /* Kaveri */
{ 0x1306, &kaveri_device_info }, /* Kaveri */ { 0x1306, &kaveri_device_info }, /* Kaveri */
...@@ -189,6 +205,7 @@ static const struct kfd_deviceid supported_devices[] = { ...@@ -189,6 +205,7 @@ static const struct kfd_deviceid supported_devices[] = {
{ 0x9875, &carrizo_device_info }, /* Carrizo */ { 0x9875, &carrizo_device_info }, /* Carrizo */
{ 0x9876, &carrizo_device_info }, /* Carrizo */ { 0x9876, &carrizo_device_info }, /* Carrizo */
{ 0x9877, &carrizo_device_info }, /* Carrizo */ { 0x9877, &carrizo_device_info }, /* Carrizo */
#endif
{ 0x67A0, &hawaii_device_info }, /* Hawaii */ { 0x67A0, &hawaii_device_info }, /* Hawaii */
{ 0x67A1, &hawaii_device_info }, /* Hawaii */ { 0x67A1, &hawaii_device_info }, /* Hawaii */
{ 0x67A2, &hawaii_device_info }, /* Hawaii */ { 0x67A2, &hawaii_device_info }, /* Hawaii */
...@@ -302,77 +319,6 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, ...@@ -302,77 +319,6 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
return kfd; return kfd;
} }
static bool device_iommu_pasid_init(struct kfd_dev *kfd)
{
const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
struct amd_iommu_device_info iommu_info;
unsigned int pasid_limit;
int err;
err = amd_iommu_device_info(kfd->pdev, &iommu_info);
if (err < 0) {
dev_err(kfd_device,
"error getting iommu info. is the iommu enabled?\n");
return false;
}
if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) {
dev_err(kfd_device, "error required iommu flags ats %i, pri %i, pasid %i\n",
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP)
!= 0);
return false;
}
pasid_limit = min_t(unsigned int,
(unsigned int)(1 << kfd->device_info->max_pasid_bits),
iommu_info.max_pasids);
if (!kfd_set_pasid_limit(pasid_limit)) {
dev_err(kfd_device, "error setting pasid limit\n");
return false;
}
return true;
}
static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
{
struct kfd_dev *dev = kfd_device_by_pci_dev(pdev);
if (dev)
kfd_process_iommu_unbind_callback(dev, pasid);
}
/*
* This function called by IOMMU driver on PPR failure
*/
static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
unsigned long address, u16 flags)
{
struct kfd_dev *dev;
dev_warn(kfd_device,
"Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X",
PCI_BUS_NUM(pdev->devfn),
PCI_SLOT(pdev->devfn),
PCI_FUNC(pdev->devfn),
pasid,
address,
flags);
dev = kfd_device_by_pci_dev(pdev);
if (!WARN_ON(!dev))
kfd_signal_iommu_event(dev, pasid, address,
flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
return AMD_IOMMU_INV_PRI_RSP_INVALID;
}
static void kfd_cwsr_init(struct kfd_dev *kfd) static void kfd_cwsr_init(struct kfd_dev *kfd)
{ {
if (cwsr_enable && kfd->device_info->supports_cwsr) { if (cwsr_enable && kfd->device_info->supports_cwsr) {
...@@ -462,11 +408,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -462,11 +408,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto device_queue_manager_error; goto device_queue_manager_error;
} }
if (!device_iommu_pasid_init(kfd)) { if (kfd_iommu_device_init(kfd)) {
dev_err(kfd_device, dev_err(kfd_device, "Error initializing iommuv2\n");
"Error initializing iommuv2 for device %x:%x\n", goto device_iommu_error;
kfd->pdev->vendor, kfd->pdev->device);
goto device_iommu_pasid_error;
} }
kfd_cwsr_init(kfd); kfd_cwsr_init(kfd);
...@@ -486,7 +430,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -486,7 +430,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto out; goto out;
kfd_resume_error: kfd_resume_error:
device_iommu_pasid_error: device_iommu_error:
device_queue_manager_uninit(kfd->dqm); device_queue_manager_uninit(kfd->dqm);
device_queue_manager_error: device_queue_manager_error:
kfd_interrupt_exit(kfd); kfd_interrupt_exit(kfd);
...@@ -527,11 +471,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd) ...@@ -527,11 +471,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd)
kfd->dqm->ops.stop(kfd->dqm); kfd->dqm->ops.stop(kfd->dqm);
kfd_unbind_processes_from_device(kfd); kfd_iommu_suspend(kfd);
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
amd_iommu_free_device(kfd->pdev);
} }
int kgd2kfd_resume(struct kfd_dev *kfd) int kgd2kfd_resume(struct kfd_dev *kfd)
...@@ -546,19 +486,14 @@ int kgd2kfd_resume(struct kfd_dev *kfd) ...@@ -546,19 +486,14 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
static int kfd_resume(struct kfd_dev *kfd) static int kfd_resume(struct kfd_dev *kfd)
{ {
int err = 0; int err = 0;
unsigned int pasid_limit = kfd_get_pasid_limit();
err = amd_iommu_init_device(kfd->pdev, pasid_limit);
if (err)
return -ENXIO;
amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
iommu_pasid_shutdown_callback);
amd_iommu_set_invalid_ppr_cb(kfd->pdev,
iommu_invalid_ppr_cb);
err = kfd_bind_processes_to_device(kfd); err = kfd_iommu_resume(kfd);
if (err) if (err) {
goto processes_bind_error; dev_err(kfd_device,
"Failed to resume IOMMU for device %x:%x\n",
kfd->pdev->vendor, kfd->pdev->device);
return err;
}
err = kfd->dqm->ops.start(kfd->dqm); err = kfd->dqm->ops.start(kfd->dqm);
if (err) { if (err) {
...@@ -571,9 +506,7 @@ static int kfd_resume(struct kfd_dev *kfd) ...@@ -571,9 +506,7 @@ static int kfd_resume(struct kfd_dev *kfd)
return err; return err;
dqm_start_error: dqm_start_error:
processes_bind_error: kfd_iommu_suspend(kfd);
amd_iommu_free_device(kfd->pdev);
return err; return err;
} }
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <linux/memory.h> #include <linux/memory.h>
#include "kfd_priv.h" #include "kfd_priv.h"
#include "kfd_events.h" #include "kfd_events.h"
#include "kfd_iommu.h"
#include <linux/device.h> #include <linux/device.h>
/* /*
...@@ -837,6 +838,7 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, ...@@ -837,6 +838,7 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
} }
} }
#ifdef KFD_SUPPORT_IOMMU_V2
void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
unsigned long address, bool is_write_requested, unsigned long address, bool is_write_requested,
bool is_execute_requested) bool is_execute_requested)
...@@ -905,6 +907,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, ...@@ -905,6 +907,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
mutex_unlock(&p->event_mutex); mutex_unlock(&p->event_mutex);
kfd_unref_process(p); kfd_unref_process(p);
} }
#endif /* KFD_SUPPORT_IOMMU_V2 */
void kfd_signal_hw_exception_event(unsigned int pasid) void kfd_signal_hw_exception_event(unsigned int pasid)
{ {
......
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/printk.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/pci.h>
#include <linux/amd-iommu.h>
#include "kfd_priv.h"
#include "kfd_dbgmgr.h"
#include "kfd_topology.h"
#include "kfd_iommu.h"
static const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
/** kfd_iommu_check_device - Check whether IOMMU is available for device
*/
int kfd_iommu_check_device(struct kfd_dev *kfd)
{
struct amd_iommu_device_info iommu_info;
int err;
if (!kfd->device_info->needs_iommu_device)
return -ENODEV;
iommu_info.flags = 0;
err = amd_iommu_device_info(kfd->pdev, &iommu_info);
if (err)
return err;
if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags)
return -ENODEV;
return 0;
}
/** kfd_iommu_device_init - Initialize IOMMU for device
*/
int kfd_iommu_device_init(struct kfd_dev *kfd)
{
struct amd_iommu_device_info iommu_info;
unsigned int pasid_limit;
int err;
if (!kfd->device_info->needs_iommu_device)
return 0;
iommu_info.flags = 0;
err = amd_iommu_device_info(kfd->pdev, &iommu_info);
if (err < 0) {
dev_err(kfd_device,
"error getting iommu info. is the iommu enabled?\n");
return -ENODEV;
}
if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) {
dev_err(kfd_device,
"error required iommu flags ats %i, pri %i, pasid %i\n",
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP)
!= 0);
return -ENODEV;
}
pasid_limit = min_t(unsigned int,
(unsigned int)(1 << kfd->device_info->max_pasid_bits),
iommu_info.max_pasids);
if (!kfd_set_pasid_limit(pasid_limit)) {
dev_err(kfd_device, "error setting pasid limit\n");
return -EBUSY;
}
return 0;
}
/** kfd_iommu_bind_process_to_device - Have the IOMMU bind a process
*
* Binds the given process to the given device using its PASID. This
* enables IOMMUv2 address translation for the process on the device.
*
* This function assumes that the process mutex is held.
*/
int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd)
{
struct kfd_dev *dev = pdd->dev;
struct kfd_process *p = pdd->process;
int err;
if (!dev->device_info->needs_iommu_device || pdd->bound == PDD_BOUND)
return 0;
if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) {
pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n");
return -EINVAL;
}
err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread);
if (!err)
pdd->bound = PDD_BOUND;
return err;
}
/** kfd_iommu_unbind_process - Unbind process from all devices
*
* This removes all IOMMU device bindings of the process. To be used
* before process termination.
*/
void kfd_iommu_unbind_process(struct kfd_process *p)
{
struct kfd_process_device *pdd;
list_for_each_entry(pdd, &p->per_device_data, per_device_list)
if (pdd->bound == PDD_BOUND)
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
}
/* Callback for process shutdown invoked by the IOMMU driver */
static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
{
struct kfd_dev *dev = kfd_device_by_pci_dev(pdev);
struct kfd_process *p;
struct kfd_process_device *pdd;
if (!dev)
return;
/*
* Look for the process that matches the pasid. If there is no such
* process, we either released it in amdkfd's own notifier, or there
* is a bug. Unfortunately, there is no way to tell...
*/
p = kfd_lookup_process_by_pasid(pasid);
if (!p)
return;
pr_debug("Unbinding process %d from IOMMU\n", pasid);
mutex_lock(kfd_get_dbgmgr_mutex());
if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
kfd_dbgmgr_destroy(dev->dbgmgr);
dev->dbgmgr = NULL;
}
}
mutex_unlock(kfd_get_dbgmgr_mutex());
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(dev, p);
if (pdd)
/* For GPU relying on IOMMU, we need to dequeue here
* when PASID is still bound.
*/
kfd_process_dequeue_from_device(pdd);
mutex_unlock(&p->mutex);
kfd_unref_process(p);
}
/* This function called by IOMMU driver on PPR failure */
static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
unsigned long address, u16 flags)
{
struct kfd_dev *dev;
dev_warn(kfd_device,
"Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X",
PCI_BUS_NUM(pdev->devfn),
PCI_SLOT(pdev->devfn),
PCI_FUNC(pdev->devfn),
pasid,
address,
flags);
dev = kfd_device_by_pci_dev(pdev);
if (!WARN_ON(!dev))
kfd_signal_iommu_event(dev, pasid, address,
flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
return AMD_IOMMU_INV_PRI_RSP_INVALID;
}
/*
* Bind processes do the device that have been temporarily unbound
* (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device.
*/
static int kfd_bind_processes_to_device(struct kfd_dev *kfd)
{
struct kfd_process_device *pdd;
struct kfd_process *p;
unsigned int temp;
int err = 0;
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(kfd, p);
if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) {
mutex_unlock(&p->mutex);
continue;
}
err = amd_iommu_bind_pasid(kfd->pdev, p->pasid,
p->lead_thread);
if (err < 0) {
pr_err("Unexpected pasid %d binding failure\n",
p->pasid);
mutex_unlock(&p->mutex);
break;
}
pdd->bound = PDD_BOUND;
mutex_unlock(&p->mutex);
}
srcu_read_unlock(&kfd_processes_srcu, idx);
return err;
}
/*
* Mark currently bound processes as PDD_BOUND_SUSPENDED. These
* processes will be restored to PDD_BOUND state in
* kfd_bind_processes_to_device.
*/
static void kfd_unbind_processes_from_device(struct kfd_dev *kfd)
{
struct kfd_process_device *pdd;
struct kfd_process *p;
unsigned int temp;
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(kfd, p);
if (WARN_ON(!pdd)) {
mutex_unlock(&p->mutex);
continue;
}
if (pdd->bound == PDD_BOUND)
pdd->bound = PDD_BOUND_SUSPENDED;
mutex_unlock(&p->mutex);
}
srcu_read_unlock(&kfd_processes_srcu, idx);
}
/** kfd_iommu_suspend - Prepare IOMMU for suspend
*
* This unbinds processes from the device and disables the IOMMU for
* the device.
*/
void kfd_iommu_suspend(struct kfd_dev *kfd)
{
if (!kfd->device_info->needs_iommu_device)
return;
kfd_unbind_processes_from_device(kfd);
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
amd_iommu_free_device(kfd->pdev);
}
/** kfd_iommu_resume - Restore IOMMU after resume
*
* This reinitializes the IOMMU for the device and re-binds previously
* suspended processes to the device.
*/
int kfd_iommu_resume(struct kfd_dev *kfd)
{
unsigned int pasid_limit;
int err;
if (!kfd->device_info->needs_iommu_device)
return 0;
pasid_limit = kfd_get_pasid_limit();
err = amd_iommu_init_device(kfd->pdev, pasid_limit);
if (err)
return -ENXIO;
amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
iommu_pasid_shutdown_callback);
amd_iommu_set_invalid_ppr_cb(kfd->pdev,
iommu_invalid_ppr_cb);
err = kfd_bind_processes_to_device(kfd);
if (err) {
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
amd_iommu_free_device(kfd->pdev);
return err;
}
return 0;
}
extern bool amd_iommu_pc_supported(void);
extern u8 amd_iommu_pc_get_max_banks(u16 devid);
extern u8 amd_iommu_pc_get_max_counters(u16 devid);
/** kfd_iommu_add_perf_counters - Add IOMMU performance counters to topology
*/
int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev)
{
struct kfd_perf_properties *props;
if (!(kdev->node_props.capability & HSA_CAP_ATS_PRESENT))
return 0;
if (!amd_iommu_pc_supported())
return 0;
props = kfd_alloc_struct(props);
if (!props)
return -ENOMEM;
strcpy(props->block_name, "iommu");
props->max_concurrent = amd_iommu_pc_get_max_banks(0) *
amd_iommu_pc_get_max_counters(0); /* assume one iommu */
list_add_tail(&props->list, &kdev->perf_props);
return 0;
}
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef __KFD_IOMMU_H__
#define __KFD_IOMMU_H__
#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
#define KFD_SUPPORT_IOMMU_V2
int kfd_iommu_check_device(struct kfd_dev *kfd);
int kfd_iommu_device_init(struct kfd_dev *kfd);
int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd);
void kfd_iommu_unbind_process(struct kfd_process *p);
void kfd_iommu_suspend(struct kfd_dev *kfd);
int kfd_iommu_resume(struct kfd_dev *kfd);
int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev);
#else
static inline int kfd_iommu_check_device(struct kfd_dev *kfd)
{
return -ENODEV;
}
static inline int kfd_iommu_device_init(struct kfd_dev *kfd)
{
return 0;
}
static inline int kfd_iommu_bind_process_to_device(
struct kfd_process_device *pdd)
{
return 0;
}
static inline void kfd_iommu_unbind_process(struct kfd_process *p)
{
/* empty */
}
static inline void kfd_iommu_suspend(struct kfd_dev *kfd)
{
/* empty */
}
static inline int kfd_iommu_resume(struct kfd_dev *kfd)
{
return 0;
}
static inline int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev)
{
return 0;
}
#endif /* defined(CONFIG_AMD_IOMMU_V2) */
#endif /* __KFD_IOMMU_H__ */
...@@ -158,6 +158,7 @@ struct kfd_device_info { ...@@ -158,6 +158,7 @@ struct kfd_device_info {
uint8_t num_of_watch_points; uint8_t num_of_watch_points;
uint16_t mqd_size_aligned; uint16_t mqd_size_aligned;
bool supports_cwsr; bool supports_cwsr;
bool needs_iommu_device;
bool needs_pci_atomics; bool needs_pci_atomics;
}; };
...@@ -517,15 +518,15 @@ struct kfd_process_device { ...@@ -517,15 +518,15 @@ struct kfd_process_device {
uint64_t scratch_base; uint64_t scratch_base;
uint64_t scratch_limit; uint64_t scratch_limit;
/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
enum kfd_pdd_bound bound;
/* Flag used to tell the pdd has dequeued from the dqm. /* Flag used to tell the pdd has dequeued from the dqm.
* This is used to prevent dev->dqm->ops.process_termination() from * This is used to prevent dev->dqm->ops.process_termination() from
* being called twice when it is already called in IOMMU callback * being called twice when it is already called in IOMMU callback
* function. * function.
*/ */
bool already_dequeued; bool already_dequeued;
/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
enum kfd_pdd_bound bound;
}; };
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
...@@ -590,6 +591,10 @@ struct kfd_process { ...@@ -590,6 +591,10 @@ struct kfd_process {
bool signal_event_limit_reached; bool signal_event_limit_reached;
}; };
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
extern struct srcu_struct kfd_processes_srcu;
/** /**
* Ioctl function type. * Ioctl function type.
* *
...@@ -617,9 +622,6 @@ void kfd_unref_process(struct kfd_process *p); ...@@ -617,9 +622,6 @@ void kfd_unref_process(struct kfd_process *p);
struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
struct kfd_process *p); struct kfd_process *p);
int kfd_bind_processes_to_device(struct kfd_dev *dev);
void kfd_unbind_processes_from_device(struct kfd_dev *dev);
void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid);
struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
struct kfd_process *p); struct kfd_process *p);
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
......
...@@ -35,16 +35,16 @@ struct mm_struct; ...@@ -35,16 +35,16 @@ struct mm_struct;
#include "kfd_priv.h" #include "kfd_priv.h"
#include "kfd_dbgmgr.h" #include "kfd_dbgmgr.h"
#include "kfd_iommu.h"
/* /*
* List of struct kfd_process (field kfd_process). * List of struct kfd_process (field kfd_process).
* Unique/indexed by mm_struct* * Unique/indexed by mm_struct*
*/ */
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
static DEFINE_MUTEX(kfd_processes_mutex); static DEFINE_MUTEX(kfd_processes_mutex);
DEFINE_STATIC_SRCU(kfd_processes_srcu); DEFINE_SRCU(kfd_processes_srcu);
static struct workqueue_struct *kfd_process_wq; static struct workqueue_struct *kfd_process_wq;
...@@ -173,14 +173,8 @@ static void kfd_process_wq_release(struct work_struct *work) ...@@ -173,14 +173,8 @@ static void kfd_process_wq_release(struct work_struct *work)
{ {
struct kfd_process *p = container_of(work, struct kfd_process, struct kfd_process *p = container_of(work, struct kfd_process,
release_work); release_work);
struct kfd_process_device *pdd;
pr_debug("Releasing process (pasid %d) in workqueue\n", p->pasid); kfd_iommu_unbind_process(p);
list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
if (pdd->bound == PDD_BOUND)
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
}
kfd_process_destroy_pdds(p); kfd_process_destroy_pdds(p);
...@@ -429,133 +423,13 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, ...@@ -429,133 +423,13 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
if (pdd->bound == PDD_BOUND) { err = kfd_iommu_bind_process_to_device(pdd);
return pdd; if (err)
} else if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) {
pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n");
return ERR_PTR(-EINVAL);
}
err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread);
if (err < 0)
return ERR_PTR(err); return ERR_PTR(err);
pdd->bound = PDD_BOUND;
return pdd; return pdd;
} }
/*
* Bind processes do the device that have been temporarily unbound
* (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device.
*/
int kfd_bind_processes_to_device(struct kfd_dev *dev)
{
struct kfd_process_device *pdd;
struct kfd_process *p;
unsigned int temp;
int err = 0;
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(dev, p);
if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) {
mutex_unlock(&p->mutex);
continue;
}
err = amd_iommu_bind_pasid(dev->pdev, p->pasid,
p->lead_thread);
if (err < 0) {
pr_err("Unexpected pasid %d binding failure\n",
p->pasid);
mutex_unlock(&p->mutex);
break;
}
pdd->bound = PDD_BOUND;
mutex_unlock(&p->mutex);
}
srcu_read_unlock(&kfd_processes_srcu, idx);
return err;
}
/*
* Mark currently bound processes as PDD_BOUND_SUSPENDED. These
* processes will be restored to PDD_BOUND state in
* kfd_bind_processes_to_device.
*/
void kfd_unbind_processes_from_device(struct kfd_dev *dev)
{
struct kfd_process_device *pdd;
struct kfd_process *p;
unsigned int temp;
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(dev, p);
if (WARN_ON(!pdd)) {
mutex_unlock(&p->mutex);
continue;
}
if (pdd->bound == PDD_BOUND)
pdd->bound = PDD_BOUND_SUSPENDED;
mutex_unlock(&p->mutex);
}
srcu_read_unlock(&kfd_processes_srcu, idx);
}
void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
{
struct kfd_process *p;
struct kfd_process_device *pdd;
/*
* Look for the process that matches the pasid. If there is no such
* process, we either released it in amdkfd's own notifier, or there
* is a bug. Unfortunately, there is no way to tell...
*/
p = kfd_lookup_process_by_pasid(pasid);
if (!p)
return;
pr_debug("Unbinding process %d from IOMMU\n", pasid);
mutex_lock(kfd_get_dbgmgr_mutex());
if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
kfd_dbgmgr_destroy(dev->dbgmgr);
dev->dbgmgr = NULL;
}
}
mutex_unlock(kfd_get_dbgmgr_mutex());
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(dev, p);
if (pdd)
/* For GPU relying on IOMMU, we need to dequeue here
* when PASID is still bound.
*/
kfd_process_dequeue_from_device(pdd);
mutex_unlock(&p->mutex);
kfd_unref_process(p);
}
struct kfd_process_device *kfd_get_first_process_device_data( struct kfd_process_device *kfd_get_first_process_device_data(
struct kfd_process *p) struct kfd_process *p)
{ {
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include "kfd_crat.h" #include "kfd_crat.h"
#include "kfd_topology.h" #include "kfd_topology.h"
#include "kfd_device_queue_manager.h" #include "kfd_device_queue_manager.h"
#include "kfd_iommu.h"
/* topology_device_list - Master list of all topology devices */ /* topology_device_list - Master list of all topology devices */
static struct list_head topology_device_list; static struct list_head topology_device_list;
...@@ -875,19 +876,8 @@ static void find_system_memory(const struct dmi_header *dm, ...@@ -875,19 +876,8 @@ static void find_system_memory(const struct dmi_header *dm,
*/ */
static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev) static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev)
{ {
struct kfd_perf_properties *props; /* These are the only counters supported so far */
return kfd_iommu_add_perf_counters(kdev);
if (amd_iommu_pc_supported()) {
props = kfd_alloc_struct(props);
if (!props)
return -ENOMEM;
strcpy(props->block_name, "iommu");
props->max_concurrent = amd_iommu_pc_get_max_banks(0) *
amd_iommu_pc_get_max_counters(0); /* assume one iommu */
list_add_tail(&props->list, &kdev->perf_props);
}
return 0;
} }
/* kfd_add_non_crat_information - Add information that is not currently /* kfd_add_non_crat_information - Add information that is not currently
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/list.h> #include <linux/list.h>
#include "kfd_priv.h" #include "kfd_crat.h"
#define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 128 #define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 128
...@@ -183,8 +183,4 @@ struct kfd_topology_device *kfd_create_topology_device( ...@@ -183,8 +183,4 @@ struct kfd_topology_device *kfd_create_topology_device(
struct list_head *device_list); struct list_head *device_list);
void kfd_release_topology_device_list(struct list_head *device_list); void kfd_release_topology_device_list(struct list_head *device_list);
extern bool amd_iommu_pc_supported(void);
extern u8 amd_iommu_pc_get_max_banks(u16 devid);
extern u8 amd_iommu_pc_get_max_counters(u16 devid);
#endif /* __KFD_TOPOLOGY_H__ */ #endif /* __KFD_TOPOLOGY_H__ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment