Commit 143c7bc6 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd

Pull iommufd updates from Jason Gunthorpe:
 "Some polishing and small fixes for iommufd:

   - Remove IOMMU_CAP_INTR_REMAP, instead rely on the interrupt
     subsystem

   - Use GFP_KERNEL_ACCOUNT inside the iommu_domains

   - Support VFIO_NOIOMMU mode with iommufd

   - Various typos

   - A list corruption bug if HWPTs are used for attach"

* tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd:
  iommufd: Do not add the same hwpt to the ioas->hwpt_list twice
  iommufd: Make sure to zero vfio_iommu_type1_info before copying to user
  vfio: Support VFIO_NOIOMMU with iommufd
  iommufd: Add three missing structures in ucmd_buffer
  selftests: iommu: Fix test_cmd_destroy_access() call in user_copy
  iommu: Remove IOMMU_CAP_INTR_REMAP
  irq/s390: Add arch_is_isolated_msi() for s390
  iommu/x86: Replace IOMMU_CAP_INTR_REMAP with IRQ_DOMAIN_FLAG_ISOLATED_MSI
  genirq/msi: Rename IRQ_DOMAIN_MSI_REMAP to IRQ_DOMAIN_ISOLATED_MSI
  genirq/irqdomain: Remove unused irq_domain_check_msi_remap() code
  iommufd: Convert to msi_device_has_isolated_msi()
  vfio/type1: Convert to iommu_group_has_isolated_msi()
  iommu: Add iommu_group_has_isolated_msi()
  genirq/msi: Add msi_device_has_isolated_msi()
parents a13de74e 939204e4
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_S390_MSI_H
#define _ASM_S390_MSI_H
#include <asm-generic/msi.h>
/*
* Work around S390 not using irq_domain at all so we can't set
* IRQ_DOMAIN_FLAG_ISOLATED_MSI. See for an explanation how it works:
*
* https://lore.kernel.org/r/31af8174-35e9-ebeb-b9ef-74c90d4bfd93@linux.ibm.com/
*
* Note this is less isolated than the ARM/x86 versions as userspace can trigger
* MSI belonging to kernel devices within the same gisa.
*/
#define arch_is_isolated_msi() true
#endif
...@@ -277,7 +277,7 @@ static int usnic_uiom_map_sorted_intervals(struct list_head *intervals, ...@@ -277,7 +277,7 @@ static int usnic_uiom_map_sorted_intervals(struct list_head *intervals,
usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x", usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x",
va_start, &pa_start, size, flags); va_start, &pa_start, size, flags);
err = iommu_map(pd->domain, va_start, pa_start, err = iommu_map(pd->domain, va_start, pa_start,
size, flags, GFP_KERNEL); size, flags, GFP_ATOMIC);
if (err) { if (err) {
usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
va_start, &pa_start, size, err); va_start, &pa_start, size, err);
...@@ -294,7 +294,7 @@ static int usnic_uiom_map_sorted_intervals(struct list_head *intervals, ...@@ -294,7 +294,7 @@ static int usnic_uiom_map_sorted_intervals(struct list_head *intervals,
usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n", usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n",
va_start, &pa_start, size, flags); va_start, &pa_start, size, flags);
err = iommu_map(pd->domain, va_start, pa_start, err = iommu_map(pd->domain, va_start, pa_start,
size, flags, GFP_KERNEL); size, flags, GFP_ATOMIC);
if (err) { if (err) {
usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
va_start, &pa_start, size, err); va_start, &pa_start, size, err);
......
...@@ -2278,8 +2278,6 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap) ...@@ -2278,8 +2278,6 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
switch (cap) { switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY: case IOMMU_CAP_CACHE_COHERENCY:
return true; return true;
case IOMMU_CAP_INTR_REMAP:
return (irq_remapping_enabled == 1);
case IOMMU_CAP_NOEXEC: case IOMMU_CAP_NOEXEC:
return false; return false;
case IOMMU_CAP_PRE_BOOT_PROTECTION: case IOMMU_CAP_PRE_BOOT_PROTECTION:
...@@ -3682,7 +3680,8 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu) ...@@ -3682,7 +3680,8 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
} }
irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_AMDVI); irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_AMDVI);
iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT; iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT |
IRQ_DOMAIN_FLAG_ISOLATED_MSI;
if (amd_iommu_np_cache) if (amd_iommu_np_cache)
iommu->ir_domain->msi_parent_ops = &virt_amdvi_msi_parent_ops; iommu->ir_domain->msi_parent_ops = &virt_amdvi_msi_parent_ops;
......
...@@ -4478,8 +4478,6 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap) ...@@ -4478,8 +4478,6 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
switch (cap) { switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY: case IOMMU_CAP_CACHE_COHERENCY:
return true; return true;
case IOMMU_CAP_INTR_REMAP:
return irq_remapping_enabled == 1;
case IOMMU_CAP_PRE_BOOT_PROTECTION: case IOMMU_CAP_PRE_BOOT_PROTECTION:
return dmar_platform_optin(); return dmar_platform_optin();
case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
......
...@@ -573,7 +573,8 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) ...@@ -573,7 +573,8 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
} }
irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_DMAR); irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_DMAR);
iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT; iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT |
IRQ_DOMAIN_FLAG_ISOLATED_MSI;
if (cap_caching_mode(iommu->cap)) if (cap_caching_mode(iommu->cap))
iommu->ir_domain->msi_parent_ops = &virt_dmar_msi_parent_ops; iommu->ir_domain->msi_parent_ops = &virt_dmar_msi_parent_ops;
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <linux/cc_platform.h> #include <linux/cc_platform.h>
#include <trace/events/iommu.h> #include <trace/events/iommu.h>
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#include <linux/msi.h>
#include "dma-iommu.h" #include "dma-iommu.h"
...@@ -1904,6 +1905,29 @@ bool device_iommu_capable(struct device *dev, enum iommu_cap cap) ...@@ -1904,6 +1905,29 @@ bool device_iommu_capable(struct device *dev, enum iommu_cap cap)
} }
EXPORT_SYMBOL_GPL(device_iommu_capable); EXPORT_SYMBOL_GPL(device_iommu_capable);
/**
* iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi()
* for a group
* @group: Group to query
*
* IOMMU groups should not have differing values of
* msi_device_has_isolated_msi() for devices in a group. However nothing
* directly prevents this, so ensure mistakes don't result in isolation failures
* by checking that all the devices are the same.
*/
bool iommu_group_has_isolated_msi(struct iommu_group *group)
{
struct group_device *group_dev;
bool ret = true;
mutex_lock(&group->mutex);
list_for_each_entry(group_dev, &group->devices, list)
ret &= msi_device_has_isolated_msi(group_dev->dev);
mutex_unlock(&group->mutex);
return ret;
}
EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi);
/** /**
* iommu_set_fault_handler() - set a fault handler for an iommu domain * iommu_set_fault_handler() - set a fault handler for an iommu domain
* @domain: iommu domain * @domain: iommu domain
......
...@@ -23,7 +23,7 @@ config IOMMUFD_VFIO_CONTAINER ...@@ -23,7 +23,7 @@ config IOMMUFD_VFIO_CONTAINER
removed. removed.
IOMMUFD VFIO container emulation is known to lack certain features IOMMUFD VFIO container emulation is known to lack certain features
of the native VFIO container, such as no-IOMMU support, peer-to-peer of the native VFIO container, such as peer-to-peer
DMA mapping, PPC IOMMU support, as well as other potentially DMA mapping, PPC IOMMU support, as well as other potentially
undiscovered gaps. This option is currently intended for the undiscovered gaps. This option is currently intended for the
purpose of testing IOMMUFD with unmodified userspace supporting VFIO purpose of testing IOMMUFD with unmodified userspace supporting VFIO
......
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
#include <linux/iommufd.h> #include <linux/iommufd.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/iommu.h> #include <linux/iommu.h>
#include <linux/irqdomain.h>
#include "io_pagetable.h" #include "io_pagetable.h"
#include "iommufd_private.h" #include "iommufd_private.h"
...@@ -169,8 +168,7 @@ static int iommufd_device_setup_msi(struct iommufd_device *idev, ...@@ -169,8 +168,7 @@ static int iommufd_device_setup_msi(struct iommufd_device *idev,
* operation from the device (eg a simple DMA) cannot trigger an * operation from the device (eg a simple DMA) cannot trigger an
* interrupt outside this iommufd context. * interrupt outside this iommufd context.
*/ */
if (!device_iommu_capable(idev->dev, IOMMU_CAP_INTR_REMAP) && if (!iommu_group_has_isolated_msi(idev->group)) {
!irq_domain_check_msi_remap()) {
if (!allow_unsafe_interrupts) if (!allow_unsafe_interrupts)
return -EPERM; return -EPERM;
...@@ -346,10 +344,6 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) ...@@ -346,10 +344,6 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
rc = iommufd_device_do_attach(idev, hwpt); rc = iommufd_device_do_attach(idev, hwpt);
if (rc) if (rc)
goto out_put_pt_obj; goto out_put_pt_obj;
mutex_lock(&hwpt->ioas->mutex);
list_add_tail(&hwpt->hwpt_item, &hwpt->ioas->hwpt_list);
mutex_unlock(&hwpt->ioas->mutex);
break; break;
} }
case IOMMUFD_OBJ_IOAS: { case IOMMUFD_OBJ_IOAS: {
......
...@@ -18,6 +18,8 @@ struct iommufd_ctx { ...@@ -18,6 +18,8 @@ struct iommufd_ctx {
struct xarray objects; struct xarray objects;
u8 account_mode; u8 account_mode;
/* Compatibility with VFIO no iommu */
u8 no_iommu_mode;
struct iommufd_ioas *vfio_ioas; struct iommufd_ioas *vfio_ioas;
}; };
......
...@@ -252,9 +252,12 @@ union ucmd_buffer { ...@@ -252,9 +252,12 @@ union ucmd_buffer {
struct iommu_destroy destroy; struct iommu_destroy destroy;
struct iommu_ioas_alloc alloc; struct iommu_ioas_alloc alloc;
struct iommu_ioas_allow_iovas allow_iovas; struct iommu_ioas_allow_iovas allow_iovas;
struct iommu_ioas_copy ioas_copy;
struct iommu_ioas_iova_ranges iova_ranges; struct iommu_ioas_iova_ranges iova_ranges;
struct iommu_ioas_map map; struct iommu_ioas_map map;
struct iommu_ioas_unmap unmap; struct iommu_ioas_unmap unmap;
struct iommu_option option;
struct iommu_vfio_ioas vfio_ioas;
#ifdef CONFIG_IOMMUFD_TEST #ifdef CONFIG_IOMMUFD_TEST
struct iommu_test_cmd test; struct iommu_test_cmd test;
#endif #endif
......
...@@ -26,39 +26,84 @@ static struct iommufd_ioas *get_compat_ioas(struct iommufd_ctx *ictx) ...@@ -26,39 +26,84 @@ static struct iommufd_ioas *get_compat_ioas(struct iommufd_ctx *ictx)
} }
/** /**
* iommufd_vfio_compat_ioas_id - Return the IOAS ID that vfio should use * iommufd_vfio_compat_ioas_get_id - Ensure a compat IOAS exists
* @ictx: Context to operate on
* @out_ioas_id: The IOAS ID of the compatibility IOAS
*
* Return the ID of the current compatibility IOAS. The ID can be passed into
* other functions that take an ioas_id.
*/
int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
{
struct iommufd_ioas *ioas;
ioas = get_compat_ioas(ictx);
if (IS_ERR(ioas))
return PTR_ERR(ioas);
*out_ioas_id = ioas->obj.id;
iommufd_put_object(&ioas->obj);
return 0;
}
EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_get_id, IOMMUFD_VFIO);
/**
* iommufd_vfio_compat_set_no_iommu - Called when a no-iommu device is attached
* @ictx: Context to operate on
*
* This allows selecting the VFIO_NOIOMMU_IOMMU and blocks normal types.
*/
int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx)
{
int ret;
xa_lock(&ictx->objects);
if (!ictx->vfio_ioas) {
ictx->no_iommu_mode = 1;
ret = 0;
} else {
ret = -EINVAL;
}
xa_unlock(&ictx->objects);
return ret;
}
EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_set_no_iommu, IOMMUFD_VFIO);
/**
* iommufd_vfio_compat_ioas_create - Ensure the compat IOAS is created
* @ictx: Context to operate on * @ictx: Context to operate on
* @out_ioas_id: The ioas_id the caller should use
* *
* The compatibility IOAS is the IOAS that the vfio compatibility ioctls operate * The compatibility IOAS is the IOAS that the vfio compatibility ioctls operate
* on since they do not have an IOAS ID input in their ABI. Only attaching a * on since they do not have an IOAS ID input in their ABI. Only attaching a
* group should cause a default creation of the internal ioas, this returns the * group should cause a default creation of the internal ioas, this does nothing
* existing ioas if it has already been assigned somehow. * if an existing ioas has already been assigned somehow.
*/ */
int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx)
{ {
struct iommufd_ioas *ioas = NULL; struct iommufd_ioas *ioas = NULL;
struct iommufd_ioas *out_ioas; int ret;
ioas = iommufd_ioas_alloc(ictx); ioas = iommufd_ioas_alloc(ictx);
if (IS_ERR(ioas)) if (IS_ERR(ioas))
return PTR_ERR(ioas); return PTR_ERR(ioas);
xa_lock(&ictx->objects); xa_lock(&ictx->objects);
if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj)) /*
out_ioas = ictx->vfio_ioas; * VFIO won't allow attaching a container to both iommu and no iommu
else { * operation
out_ioas = ioas; */
ictx->vfio_ioas = ioas; if (ictx->no_iommu_mode) {
ret = -EINVAL;
goto out_abort;
} }
xa_unlock(&ictx->objects);
*out_ioas_id = out_ioas->obj.id; if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj)) {
if (out_ioas != ioas) { ret = 0;
iommufd_put_object(&out_ioas->obj); iommufd_put_object(&ictx->vfio_ioas->obj);
iommufd_object_abort(ictx, &ioas->obj); goto out_abort;
return 0;
} }
ictx->vfio_ioas = ioas;
xa_unlock(&ictx->objects);
/* /*
* An automatically created compat IOAS is treated as a userspace * An automatically created compat IOAS is treated as a userspace
* created object. Userspace can learn the ID via IOMMU_VFIO_IOAS_GET, * created object. Userspace can learn the ID via IOMMU_VFIO_IOAS_GET,
...@@ -67,8 +112,13 @@ int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) ...@@ -67,8 +112,13 @@ int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
*/ */
iommufd_object_finalize(ictx, &ioas->obj); iommufd_object_finalize(ictx, &ioas->obj);
return 0; return 0;
out_abort:
xa_unlock(&ictx->objects);
iommufd_object_abort(ictx, &ioas->obj);
return ret;
} }
EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_id, IOMMUFD_VFIO); EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_create, IOMMUFD_VFIO);
int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd) int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd)
{ {
...@@ -235,6 +285,9 @@ static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx, ...@@ -235,6 +285,9 @@ static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx,
case VFIO_UNMAP_ALL: case VFIO_UNMAP_ALL:
return 1; return 1;
case VFIO_NOIOMMU_IOMMU:
return IS_ENABLED(CONFIG_VFIO_NOIOMMU);
case VFIO_DMA_CC_IOMMU: case VFIO_DMA_CC_IOMMU:
return iommufd_vfio_cc_iommu(ictx); return iommufd_vfio_cc_iommu(ictx);
...@@ -261,10 +314,24 @@ static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx, ...@@ -261,10 +314,24 @@ static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx,
static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type) static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type)
{ {
bool no_iommu_mode = READ_ONCE(ictx->no_iommu_mode);
struct iommufd_ioas *ioas = NULL; struct iommufd_ioas *ioas = NULL;
int rc = 0; int rc = 0;
if (type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU) /*
* Emulation for NOIOMMU is imperfect in that VFIO blocks almost all
* other ioctls. We let them keep working but they mostly fail since no
* IOAS should exist.
*/
if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && type == VFIO_NOIOMMU_IOMMU &&
no_iommu_mode) {
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
return 0;
}
if ((type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU) ||
no_iommu_mode)
return -EINVAL; return -EINVAL;
/* VFIO fails the set_iommu if there is no group */ /* VFIO fails the set_iommu if there is no group */
...@@ -381,7 +448,7 @@ static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx, ...@@ -381,7 +448,7 @@ static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx,
}; };
size_t minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); size_t minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
struct vfio_info_cap_header __user *last_cap = NULL; struct vfio_info_cap_header __user *last_cap = NULL;
struct vfio_iommu_type1_info info; struct vfio_iommu_type1_info info = {};
struct iommufd_ioas *ioas; struct iommufd_ioas *ioas;
size_t total_cap_size; size_t total_cap_size;
int rc; int rc;
......
...@@ -34,8 +34,6 @@ static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap) ...@@ -34,8 +34,6 @@ static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap)
switch (cap) { switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY: case IOMMU_CAP_CACHE_COHERENCY:
return true; return true;
case IOMMU_CAP_INTR_REMAP:
return true;
default: default:
return false; return false;
} }
......
...@@ -4692,7 +4692,7 @@ static bool __maybe_unused its_enable_quirk_socionext_synquacer(void *data) ...@@ -4692,7 +4692,7 @@ static bool __maybe_unused its_enable_quirk_socionext_synquacer(void *data)
} }
/* the pre-ITS breaks isolation, so disable MSI remapping */ /* the pre-ITS breaks isolation, so disable MSI remapping */
its->msi_domain_flags &= ~IRQ_DOMAIN_FLAG_MSI_REMAP; its->msi_domain_flags &= ~IRQ_DOMAIN_FLAG_ISOLATED_MSI;
return true; return true;
} }
return false; return false;
...@@ -5075,7 +5075,7 @@ static int __init its_probe_one(struct resource *res, ...@@ -5075,7 +5075,7 @@ static int __init its_probe_one(struct resource *res,
its->cmd_write = its->cmd_base; its->cmd_write = its->cmd_base;
its->fwnode_handle = handle; its->fwnode_handle = handle;
its->get_msi_base = its_irq_get_msi_base; its->get_msi_base = its_irq_get_msi_base;
its->msi_domain_flags = IRQ_DOMAIN_FLAG_MSI_REMAP; its->msi_domain_flags = IRQ_DOMAIN_FLAG_ISOLATED_MSI;
its_enable_quirks(its); its_enable_quirks(its);
......
...@@ -32,6 +32,7 @@ config VFIO_IOMMU_SPAPR_TCE ...@@ -32,6 +32,7 @@ config VFIO_IOMMU_SPAPR_TCE
tristate tristate
depends on SPAPR_TCE_IOMMU depends on SPAPR_TCE_IOMMU
default VFIO default VFIO
endif
config VFIO_NOIOMMU config VFIO_NOIOMMU
bool "VFIO No-IOMMU support" bool "VFIO No-IOMMU support"
...@@ -46,7 +47,6 @@ config VFIO_NOIOMMU ...@@ -46,7 +47,6 @@ config VFIO_NOIOMMU
this mode since there is no IOMMU to provide DMA translation. this mode since there is no IOMMU to provide DMA translation.
If you don't know what to do here, say N. If you don't know what to do here, say N.
endif
config VFIO_VIRQFD config VFIO_VIRQFD
bool bool
......
...@@ -29,13 +29,6 @@ static struct vfio { ...@@ -29,13 +29,6 @@ static struct vfio {
struct mutex iommu_drivers_lock; struct mutex iommu_drivers_lock;
} vfio; } vfio;
#ifdef CONFIG_VFIO_NOIOMMU
bool vfio_noiommu __read_mostly;
module_param_named(enable_unsafe_noiommu_mode,
vfio_noiommu, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
#endif
static void *vfio_noiommu_open(unsigned long arg) static void *vfio_noiommu_open(unsigned long arg)
{ {
if (arg != VFIO_NOIOMMU_IOMMU) if (arg != VFIO_NOIOMMU_IOMMU)
......
...@@ -133,9 +133,12 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group, ...@@ -133,9 +133,12 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group,
iommufd = iommufd_ctx_from_file(f.file); iommufd = iommufd_ctx_from_file(f.file);
if (!IS_ERR(iommufd)) { if (!IS_ERR(iommufd)) {
u32 ioas_id; if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) &&
group->type == VFIO_NO_IOMMU)
ret = iommufd_vfio_compat_set_no_iommu(iommufd);
else
ret = iommufd_vfio_compat_ioas_create(iommufd);
ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id);
if (ret) { if (ret) {
iommufd_ctx_put(group->iommufd); iommufd_ctx_put(group->iommufd);
goto out_unlock; goto out_unlock;
......
...@@ -18,6 +18,20 @@ int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) ...@@ -18,6 +18,20 @@ int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx)
lockdep_assert_held(&vdev->dev_set->lock); lockdep_assert_held(&vdev->dev_set->lock);
if (vfio_device_is_noiommu(vdev)) {
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
/*
* Require no compat ioas to be assigned to proceed. The basic
* statement is that the user cannot have done something that
* implies they expected translation to exist
*/
if (!iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id))
return -EPERM;
return 0;
}
/* /*
* If the driver doesn't provide this op then it means the device does * If the driver doesn't provide this op then it means the device does
* not do DMA at all. So nothing to do. * not do DMA at all. So nothing to do.
...@@ -29,7 +43,7 @@ int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) ...@@ -29,7 +43,7 @@ int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx)
if (ret) if (ret)
return ret; return ret;
ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id); ret = iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id);
if (ret) if (ret)
goto err_unbind; goto err_unbind;
ret = vdev->ops->attach_ioas(vdev, &ioas_id); ret = vdev->ops->attach_ioas(vdev, &ioas_id);
...@@ -52,6 +66,9 @@ void vfio_iommufd_unbind(struct vfio_device *vdev) ...@@ -52,6 +66,9 @@ void vfio_iommufd_unbind(struct vfio_device *vdev)
{ {
lockdep_assert_held(&vdev->dev_set->lock); lockdep_assert_held(&vdev->dev_set->lock);
if (vfio_device_is_noiommu(vdev))
return;
if (vdev->ops->unbind_iommufd) if (vdev->ops->unbind_iommufd)
vdev->ops->unbind_iommufd(vdev); vdev->ops->unbind_iommufd(vdev);
} }
......
...@@ -10,10 +10,10 @@ ...@@ -10,10 +10,10 @@
#include <linux/device.h> #include <linux/device.h>
#include <linux/cdev.h> #include <linux/cdev.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/vfio.h>
struct iommufd_ctx; struct iommufd_ctx;
struct iommu_group; struct iommu_group;
struct vfio_device;
struct vfio_container; struct vfio_container;
void vfio_device_put_registration(struct vfio_device *device); void vfio_device_put_registration(struct vfio_device *device);
...@@ -88,6 +88,12 @@ bool vfio_device_has_container(struct vfio_device *device); ...@@ -88,6 +88,12 @@ bool vfio_device_has_container(struct vfio_device *device);
int __init vfio_group_init(void); int __init vfio_group_init(void);
void vfio_group_cleanup(void); void vfio_group_cleanup(void);
static inline bool vfio_device_is_noiommu(struct vfio_device *vdev)
{
return IS_ENABLED(CONFIG_VFIO_NOIOMMU) &&
vdev->group->type == VFIO_NO_IOMMU;
}
#if IS_ENABLED(CONFIG_VFIO_CONTAINER) #if IS_ENABLED(CONFIG_VFIO_CONTAINER)
/* events for the backend driver notify callback */ /* events for the backend driver notify callback */
enum vfio_iommu_notify_type { enum vfio_iommu_notify_type {
......
...@@ -37,7 +37,6 @@ ...@@ -37,7 +37,6 @@
#include <linux/vfio.h> #include <linux/vfio.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/irqdomain.h>
#include "vfio.h" #include "vfio.h"
#define DRIVER_VERSION "0.2" #define DRIVER_VERSION "0.2"
...@@ -2170,12 +2169,6 @@ static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu, ...@@ -2170,12 +2169,6 @@ static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu,
list_splice_tail(iova_copy, iova); list_splice_tail(iova_copy, iova);
} }
/* Redundantly walks non-present capabilities to simplify caller */
static int vfio_iommu_device_capable(struct device *dev, void *data)
{
return device_iommu_capable(dev, (enum iommu_cap)data);
}
static int vfio_iommu_domain_alloc(struct device *dev, void *data) static int vfio_iommu_domain_alloc(struct device *dev, void *data)
{ {
struct iommu_domain **domain = data; struct iommu_domain **domain = data;
...@@ -2190,7 +2183,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, ...@@ -2190,7 +2183,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
struct vfio_iommu *iommu = iommu_data; struct vfio_iommu *iommu = iommu_data;
struct vfio_iommu_group *group; struct vfio_iommu_group *group;
struct vfio_domain *domain, *d; struct vfio_domain *domain, *d;
bool resv_msi, msi_remap; bool resv_msi;
phys_addr_t resv_msi_base = 0; phys_addr_t resv_msi_base = 0;
struct iommu_domain_geometry *geo; struct iommu_domain_geometry *geo;
LIST_HEAD(iova_copy); LIST_HEAD(iova_copy);
...@@ -2288,11 +2281,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, ...@@ -2288,11 +2281,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
INIT_LIST_HEAD(&domain->group_list); INIT_LIST_HEAD(&domain->group_list);
list_add(&group->next, &domain->group_list); list_add(&group->next, &domain->group_list);
msi_remap = irq_domain_check_msi_remap() || if (!allow_unsafe_interrupts &&
iommu_group_for_each_dev(iommu_group, (void *)IOMMU_CAP_INTR_REMAP, !iommu_group_has_isolated_msi(iommu_group)) {
vfio_iommu_device_capable);
if (!allow_unsafe_interrupts && !msi_remap) {
pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n", pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n",
__func__); __func__);
ret = -EPERM; ret = -EPERM;
......
...@@ -45,6 +45,13 @@ static struct vfio { ...@@ -45,6 +45,13 @@ static struct vfio {
struct ida device_ida; struct ida device_ida;
} vfio; } vfio;
#ifdef CONFIG_VFIO_NOIOMMU
bool vfio_noiommu __read_mostly;
module_param_named(enable_unsafe_noiommu_mode,
vfio_noiommu, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
#endif
static DEFINE_XARRAY(vfio_device_set_xa); static DEFINE_XARRAY(vfio_device_set_xa);
int vfio_assign_device_set(struct vfio_device *device, void *set_id) int vfio_assign_device_set(struct vfio_device *device, void *set_id)
......
...@@ -120,7 +120,6 @@ static inline bool iommu_is_dma_domain(struct iommu_domain *domain) ...@@ -120,7 +120,6 @@ static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
enum iommu_cap { enum iommu_cap {
IOMMU_CAP_CACHE_COHERENCY, /* IOMMU_CACHE is supported */ IOMMU_CAP_CACHE_COHERENCY, /* IOMMU_CACHE is supported */
IOMMU_CAP_INTR_REMAP, /* IOMMU supports interrupt isolation */
IOMMU_CAP_NOEXEC, /* IOMMU_NOEXEC flag */ IOMMU_CAP_NOEXEC, /* IOMMU_NOEXEC flag */
IOMMU_CAP_PRE_BOOT_PROTECTION, /* Firmware says it used the IOMMU for IOMMU_CAP_PRE_BOOT_PROTECTION, /* Firmware says it used the IOMMU for
DMA protection and we should too */ DMA protection and we should too */
...@@ -459,6 +458,7 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) ...@@ -459,6 +458,7 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
extern int bus_iommu_probe(struct bus_type *bus); extern int bus_iommu_probe(struct bus_type *bus);
extern bool iommu_present(struct bus_type *bus); extern bool iommu_present(struct bus_type *bus);
extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap);
extern bool iommu_group_has_isolated_msi(struct iommu_group *group);
extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus); extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus);
extern struct iommu_group *iommu_group_get_by_id(int id); extern struct iommu_group *iommu_group_get_by_id(int id);
extern void iommu_domain_free(struct iommu_domain *domain); extern void iommu_domain_free(struct iommu_domain *domain);
......
...@@ -57,7 +57,9 @@ void iommufd_access_unpin_pages(struct iommufd_access *access, ...@@ -57,7 +57,9 @@ void iommufd_access_unpin_pages(struct iommufd_access *access,
unsigned long iova, unsigned long length); unsigned long iova, unsigned long length);
int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
void *data, size_t len, unsigned int flags); void *data, size_t len, unsigned int flags);
int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id); int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id);
int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx);
int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx);
#else /* !CONFIG_IOMMUFD */ #else /* !CONFIG_IOMMUFD */
static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file)
{ {
...@@ -89,8 +91,12 @@ static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long ...@@ -89,8 +91,12 @@ static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
static inline int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, static inline int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx)
u32 *out_ioas_id) {
return -EOPNOTSUPP;
}
static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx)
{ {
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
......
...@@ -194,8 +194,10 @@ enum { ...@@ -194,8 +194,10 @@ enum {
/* Irq domain implements MSIs */ /* Irq domain implements MSIs */
IRQ_DOMAIN_FLAG_MSI = (1 << 4), IRQ_DOMAIN_FLAG_MSI = (1 << 4),
/* Irq domain implements MSI remapping */ /*
IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5), * Irq domain implements isolated MSI, see msi_device_has_isolated_msi()
*/
IRQ_DOMAIN_FLAG_ISOLATED_MSI = (1 << 5),
/* Irq domain doesn't translate anything */ /* Irq domain doesn't translate anything */
IRQ_DOMAIN_FLAG_NO_MAP = (1 << 6), IRQ_DOMAIN_FLAG_NO_MAP = (1 << 6),
...@@ -278,7 +280,6 @@ struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, ...@@ -278,7 +280,6 @@ struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode,
void *host_data); void *host_data);
extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
enum irq_domain_bus_token bus_token); enum irq_domain_bus_token bus_token);
extern bool irq_domain_check_msi_remap(void);
extern void irq_set_default_host(struct irq_domain *host); extern void irq_set_default_host(struct irq_domain *host);
extern struct irq_domain *irq_get_default_host(void); extern struct irq_domain *irq_get_default_host(void);
extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
...@@ -561,13 +562,6 @@ static inline bool irq_domain_is_msi(struct irq_domain *domain) ...@@ -561,13 +562,6 @@ static inline bool irq_domain_is_msi(struct irq_domain *domain)
return domain->flags & IRQ_DOMAIN_FLAG_MSI; return domain->flags & IRQ_DOMAIN_FLAG_MSI;
} }
static inline bool irq_domain_is_msi_remap(struct irq_domain *domain)
{
return domain->flags & IRQ_DOMAIN_FLAG_MSI_REMAP;
}
extern bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain);
static inline bool irq_domain_is_msi_parent(struct irq_domain *domain) static inline bool irq_domain_is_msi_parent(struct irq_domain *domain)
{ {
return domain->flags & IRQ_DOMAIN_FLAG_MSI_PARENT; return domain->flags & IRQ_DOMAIN_FLAG_MSI_PARENT;
...@@ -613,17 +607,6 @@ static inline bool irq_domain_is_msi(struct irq_domain *domain) ...@@ -613,17 +607,6 @@ static inline bool irq_domain_is_msi(struct irq_domain *domain)
return false; return false;
} }
static inline bool irq_domain_is_msi_remap(struct irq_domain *domain)
{
return false;
}
static inline bool
irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain)
{
return false;
}
static inline bool irq_domain_is_msi_parent(struct irq_domain *domain) static inline bool irq_domain_is_msi_parent(struct irq_domain *domain)
{ {
return false; return false;
...@@ -643,10 +626,6 @@ static inline struct irq_domain *irq_find_matching_fwnode( ...@@ -643,10 +626,6 @@ static inline struct irq_domain *irq_find_matching_fwnode(
{ {
return NULL; return NULL;
} }
static inline bool irq_domain_check_msi_remap(void)
{
return false;
}
#endif /* !CONFIG_IRQ_DOMAIN */ #endif /* !CONFIG_IRQ_DOMAIN */
#endif /* _LINUX_IRQDOMAIN_H */ #endif /* _LINUX_IRQDOMAIN_H */
...@@ -48,6 +48,10 @@ typedef struct arch_msi_msg_data { ...@@ -48,6 +48,10 @@ typedef struct arch_msi_msg_data {
} __attribute__ ((packed)) arch_msi_msg_data_t; } __attribute__ ((packed)) arch_msi_msg_data_t;
#endif #endif
#ifndef arch_is_isolated_msi
#define arch_is_isolated_msi() false
#endif
/** /**
* msi_msg - Representation of a MSI message * msi_msg - Representation of a MSI message
* @address_lo: Low 32 bits of msi message address * @address_lo: Low 32 bits of msi message address
...@@ -649,6 +653,19 @@ int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int vir ...@@ -649,6 +653,19 @@ int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int vir
void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq, void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nvec); unsigned int nvec);
void *platform_msi_get_host_data(struct irq_domain *domain); void *platform_msi_get_host_data(struct irq_domain *domain);
bool msi_device_has_isolated_msi(struct device *dev);
#else /* CONFIG_GENERIC_MSI_IRQ */
static inline bool msi_device_has_isolated_msi(struct device *dev)
{
/*
* Arguably if the platform does not enable MSI support then it has
* "isolated MSI", as an interrupt controller that cannot receive MSIs
* is inherently isolated by our definition. The default definition for
* arch_is_isolated_msi() is conservative and returns false anyhow.
*/
return arch_is_isolated_msi();
}
#endif /* CONFIG_GENERIC_MSI_IRQ */ #endif /* CONFIG_GENERIC_MSI_IRQ */
/* PCI specific interfaces */ /* PCI specific interfaces */
......
...@@ -469,31 +469,6 @@ struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, ...@@ -469,31 +469,6 @@ struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
} }
EXPORT_SYMBOL_GPL(irq_find_matching_fwspec); EXPORT_SYMBOL_GPL(irq_find_matching_fwspec);
/**
* irq_domain_check_msi_remap - Check whether all MSI irq domains implement
* IRQ remapping
*
* Return: false if any MSI irq domain does not support IRQ remapping,
* true otherwise (including if there is no MSI irq domain)
*/
bool irq_domain_check_msi_remap(void)
{
struct irq_domain *h;
bool ret = true;
mutex_lock(&irq_domain_mutex);
list_for_each_entry(h, &irq_domain_list, link) {
if (irq_domain_is_msi(h) &&
!irq_domain_hierarchical_is_msi_remap(h)) {
ret = false;
break;
}
}
mutex_unlock(&irq_domain_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(irq_domain_check_msi_remap);
/** /**
* irq_set_default_host() - Set a "default" irq domain * irq_set_default_host() - Set a "default" irq domain
* @domain: default domain pointer * @domain: default domain pointer
...@@ -1890,20 +1865,6 @@ static void irq_domain_check_hierarchy(struct irq_domain *domain) ...@@ -1890,20 +1865,6 @@ static void irq_domain_check_hierarchy(struct irq_domain *domain)
if (domain->ops->alloc) if (domain->ops->alloc)
domain->flags |= IRQ_DOMAIN_FLAG_HIERARCHY; domain->flags |= IRQ_DOMAIN_FLAG_HIERARCHY;
} }
/**
* irq_domain_hierarchical_is_msi_remap - Check if the domain or any
* parent has MSI remapping support
* @domain: domain pointer
*/
bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain)
{
for (; domain; domain = domain->parent) {
if (irq_domain_is_msi_remap(domain))
return true;
}
return false;
}
#else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */
/** /**
* irq_domain_get_irq_data - Get irq_data associated with @virq and @domain * irq_domain_get_irq_data - Get irq_data associated with @virq and @domain
......
...@@ -1627,3 +1627,30 @@ struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain) ...@@ -1627,3 +1627,30 @@ struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain)
{ {
return (struct msi_domain_info *)domain->host_data; return (struct msi_domain_info *)domain->host_data;
} }
/**
* msi_device_has_isolated_msi - True if the device has isolated MSI
* @dev: The device to check
*
* Isolated MSI means that HW modeled by an irq_domain on the path from the
* initiating device to the CPU will validate that the MSI message specifies an
* interrupt number that the device is authorized to trigger. This must block
* devices from triggering interrupts they are not authorized to trigger.
* Currently authorization means the MSI vector is one assigned to the device.
*
* This is interesting for securing VFIO use cases where a rouge MSI (eg created
* by abusing a normal PCI MemWr DMA) must not allow the VFIO userspace to
* impact outside its security domain, eg userspace triggering interrupts on
* kernel drivers, a VM triggering interrupts on the hypervisor, or a VM
* triggering interrupts on another VM.
*/
bool msi_device_has_isolated_msi(struct device *dev)
{
struct irq_domain *domain = dev_get_msi_domain(dev);
for (; domain; domain = domain->parent)
if (domain->flags & IRQ_DOMAIN_FLAG_ISOLATED_MSI)
return true;
return arch_is_isolated_msi();
}
EXPORT_SYMBOL_GPL(msi_device_has_isolated_msi);
...@@ -1259,7 +1259,7 @@ TEST_F(iommufd_mock_domain, user_copy) ...@@ -1259,7 +1259,7 @@ TEST_F(iommufd_mock_domain, user_copy)
test_cmd_destroy_access_pages( test_cmd_destroy_access_pages(
access_cmd.id, access_cmd.access_pages.out_access_pages_id); access_cmd.id, access_cmd.access_pages.out_access_pages_id);
test_cmd_destroy_access(access_cmd.id) test_ioctl_destroy(ioas_id); test_cmd_destroy_access(access_cmd.id);
test_ioctl_destroy(ioas_id); test_ioctl_destroy(ioas_id);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment