Commit dcf40ed3 authored by Jason Gunthorpe's avatar Jason Gunthorpe Committed by Joerg Roedel

iommu: Make __iommu_group_set_domain() handle error unwind

Let's try to have a consistent and clear strategy for error handling
during domain attach failures.

There are two broad categories, the first is callers doing destruction and
trying to set the domain back to a previously good domain. These cases
cannot handle failure during destruction flows and must succeed, or at
least avoid a UAF on the current group->domain which is likely about to be
freed.

Many of the drivers are well behaved here and will not hit the WARN_ON's
or a UAF, but some are doing hypercalls/etc that can fail unpredictably
and don't meet the expectations.

The second case is attaching a domain for the first time in a failable
context, failure should restore the attachment back to group->domain using
the above unfailable operation.

Have __iommu_group_set_domain_internal() execute a common algorithm that
tries to achieve this, and in the worst case, would leave a device
"detached" or assigned to a global blocking domain. This relies on some
existing common driver behaviors where attach failure will also do detatch
and true IOMMU_DOMAIN_BLOCK implementations that are not allowed to ever
fail.

Name the first case with __iommu_group_set_domain_nofail() to make it
clear.

Pull all the error handling and WARN_ON generation into
__iommu_group_set_domain_internal().

Avoid the obfuscating use of __iommu_group_for_each_dev() and be more
careful about what should happen during failures by only touching devices
we've already touched.
Reviewed-by: default avatarLu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: default avatarKevin Tian <kevin.tian@intel.com>
Tested-by: default avatarHeiko Stuebner <heiko@sntech.de>
Tested-by: default avatarNiklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/3-v5-1b99ae392328+44574-iommu_err_unwind_jgg@nvidia.comSigned-off-by: default avatarJoerg Roedel <jroedel@suse.de>
parent 3006b15b
...@@ -101,8 +101,26 @@ static int __iommu_attach_device(struct iommu_domain *domain, ...@@ -101,8 +101,26 @@ static int __iommu_attach_device(struct iommu_domain *domain,
struct device *dev); struct device *dev);
static int __iommu_attach_group(struct iommu_domain *domain, static int __iommu_attach_group(struct iommu_domain *domain,
struct iommu_group *group); struct iommu_group *group);
enum {
IOMMU_SET_DOMAIN_MUST_SUCCEED = 1 << 0,
};
static int __iommu_group_set_domain_internal(struct iommu_group *group,
struct iommu_domain *new_domain,
unsigned int flags);
static int __iommu_group_set_domain(struct iommu_group *group, static int __iommu_group_set_domain(struct iommu_group *group,
struct iommu_domain *new_domain); struct iommu_domain *new_domain)
{
return __iommu_group_set_domain_internal(group, new_domain, 0);
}
static void __iommu_group_set_domain_nofail(struct iommu_group *group,
struct iommu_domain *new_domain)
{
WARN_ON(__iommu_group_set_domain_internal(
group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED));
}
static int iommu_create_device_direct_mappings(struct iommu_group *group, static int iommu_create_device_direct_mappings(struct iommu_group *group,
struct device *dev); struct device *dev);
static struct iommu_group *iommu_group_get_for_dev(struct device *dev); static struct iommu_group *iommu_group_get_for_dev(struct device *dev);
...@@ -2022,15 +2040,13 @@ EXPORT_SYMBOL_GPL(iommu_domain_free); ...@@ -2022,15 +2040,13 @@ EXPORT_SYMBOL_GPL(iommu_domain_free);
static void __iommu_group_set_core_domain(struct iommu_group *group) static void __iommu_group_set_core_domain(struct iommu_group *group)
{ {
struct iommu_domain *new_domain; struct iommu_domain *new_domain;
int ret;
if (group->owner) if (group->owner)
new_domain = group->blocking_domain; new_domain = group->blocking_domain;
else else
new_domain = group->default_domain; new_domain = group->default_domain;
ret = __iommu_group_set_domain(group, new_domain); __iommu_group_set_domain_nofail(group, new_domain);
WARN(ret, "iommu driver failed to attach the default/blocking domain");
} }
static int __iommu_attach_device(struct iommu_domain *domain, static int __iommu_attach_device(struct iommu_domain *domain,
...@@ -2215,21 +2231,55 @@ int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) ...@@ -2215,21 +2231,55 @@ int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
} }
EXPORT_SYMBOL_GPL(iommu_attach_group); EXPORT_SYMBOL_GPL(iommu_attach_group);
static int iommu_group_do_set_platform_dma(struct device *dev, void *data) static int __iommu_device_set_domain(struct iommu_group *group,
struct device *dev,
struct iommu_domain *new_domain,
unsigned int flags)
{ {
const struct iommu_ops *ops = dev_iommu_ops(dev); int ret;
if (!WARN_ON(!ops->set_platform_dma_ops))
ops->set_platform_dma_ops(dev);
ret = __iommu_attach_device(new_domain, dev);
if (ret) {
/*
* If we have a blocking domain then try to attach that in hopes
* of avoiding a UAF. Modern drivers should implement blocking
* domains as global statics that cannot fail.
*/
if ((flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) &&
group->blocking_domain &&
group->blocking_domain != new_domain)
__iommu_attach_device(group->blocking_domain, dev);
return ret;
}
return 0; return 0;
} }
static int __iommu_group_set_domain(struct iommu_group *group, /*
struct iommu_domain *new_domain) * If 0 is returned the group's domain is new_domain. If an error is returned
{ * then the group's domain will be set back to the existing domain unless
* IOMMU_SET_DOMAIN_MUST_SUCCEED, otherwise an error is returned and the group's
* domains is left inconsistent. This is a driver bug to fail attach with a
* previously good domain. We try to avoid a kernel UAF because of this.
*
* IOMMU groups are really the natural working unit of the IOMMU, but the IOMMU
* API works on domains and devices. Bridge that gap by iterating over the
* devices in a group. Ideally we'd have a single device which represents the
* requestor ID of the group, but we also allow IOMMU drivers to create policy
* defined minimum sets, where the physical hardware may be able to distiguish
* members, but we wish to group them at a higher level (ex. untrusted
* multi-function PCI devices). Thus we attach each device.
*/
static int __iommu_group_set_domain_internal(struct iommu_group *group,
struct iommu_domain *new_domain,
unsigned int flags)
{
struct group_device *last_gdev;
struct group_device *gdev;
int result;
int ret; int ret;
lockdep_assert_held(&group->mutex);
if (group->domain == new_domain) if (group->domain == new_domain)
return 0; return 0;
...@@ -2239,8 +2289,12 @@ static int __iommu_group_set_domain(struct iommu_group *group, ...@@ -2239,8 +2289,12 @@ static int __iommu_group_set_domain(struct iommu_group *group,
* platform specific behavior. * platform specific behavior.
*/ */
if (!new_domain) { if (!new_domain) {
__iommu_group_for_each_dev(group, NULL, for_each_group_device(group, gdev) {
iommu_group_do_set_platform_dma); const struct iommu_ops *ops = dev_iommu_ops(gdev->dev);
if (!WARN_ON(!ops->set_platform_dma_ops))
ops->set_platform_dma_ops(gdev->dev);
}
group->domain = NULL; group->domain = NULL;
return 0; return 0;
} }
...@@ -2250,16 +2304,52 @@ static int __iommu_group_set_domain(struct iommu_group *group, ...@@ -2250,16 +2304,52 @@ static int __iommu_group_set_domain(struct iommu_group *group,
* domain. This switch does not have to be atomic and DMA can be * domain. This switch does not have to be atomic and DMA can be
* discarded during the transition. DMA must only be able to access * discarded during the transition. DMA must only be able to access
* either new_domain or group->domain, never something else. * either new_domain or group->domain, never something else.
*
* Note that this is called in error unwind paths, attaching to a
* domain that has already been attached cannot fail.
*/ */
ret = __iommu_group_for_each_dev(group, new_domain, result = 0;
iommu_group_do_attach_device); for_each_group_device(group, gdev) {
if (ret) ret = __iommu_device_set_domain(group, gdev->dev, new_domain,
return ret; flags);
if (ret) {
result = ret;
/*
* Keep trying the other devices in the group. If a
* driver fails attach to an otherwise good domain, and
* does not support blocking domains, it should at least
* drop its reference on the current domain so we don't
* UAF.
*/
if (flags & IOMMU_SET_DOMAIN_MUST_SUCCEED)
continue;
goto err_revert;
}
}
group->domain = new_domain; group->domain = new_domain;
return 0; return result;
err_revert:
/*
* This is called in error unwind paths. A well behaved driver should
* always allow us to attach to a domain that was already attached.
*/
last_gdev = gdev;
for_each_group_device(group, gdev) {
const struct iommu_ops *ops = dev_iommu_ops(gdev->dev);
/*
* If set_platform_dma_ops is not present a NULL domain can
* happen only for first probe, in which case we leave
* group->domain as NULL and let release clean everything up.
*/
if (group->domain)
WARN_ON(__iommu_device_set_domain(
group, gdev->dev, group->domain,
IOMMU_SET_DOMAIN_MUST_SUCCEED));
else if (ops->set_platform_dma_ops)
ops->set_platform_dma_ops(gdev->dev);
if (gdev == last_gdev)
break;
}
return ret;
} }
void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group)
...@@ -3176,16 +3266,13 @@ EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner); ...@@ -3176,16 +3266,13 @@ EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner);
static void __iommu_release_dma_ownership(struct iommu_group *group) static void __iommu_release_dma_ownership(struct iommu_group *group)
{ {
int ret;
if (WARN_ON(!group->owner_cnt || !group->owner || if (WARN_ON(!group->owner_cnt || !group->owner ||
!xa_empty(&group->pasid_array))) !xa_empty(&group->pasid_array)))
return; return;
group->owner_cnt = 0; group->owner_cnt = 0;
group->owner = NULL; group->owner = NULL;
ret = __iommu_group_set_domain(group, group->default_domain); __iommu_group_set_domain_nofail(group, group->default_domain);
WARN(ret, "iommu driver failed to attach the default domain");
} }
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment