Commit b022f597 authored by Fenghua Yu's avatar Fenghua Yu Committed by Vinod Koul

dmaengine: idxd: add idxd_copy_cr() to copy user completion record during page fault handling

Define idxd_copy_cr() to copy completion record to fault address in
user address that is found by work queue (wq) and PASID.

It will be used to write the user's completion record that the hardware
device is not able to write due to user completion record page fault.

An xarray is added to associate the PASID and mm with the
struct idxd_user_context so mm can be found by PASID and wq.

It is called when handling the completion record fault in a kernel thread
context. Switch to the mm using kthread_use_vm() and copy the
completion record to the mm via copy_to_user(). Once the copy is
completed, switch back to the current mm using kthread_unuse_mm().
Suggested-by: default avatarChristoph Hellwig <hch@infradead.org>
Suggested-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Suggested-by: default avatarTony Luck <tony.luck@intel.com>
Tested-by: default avatarTony Zhu <tony.zhu@intel.com>
Signed-off-by: default avatarFenghua Yu <fenghua.yu@intel.com>
Reviewed-by: default avatarDave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/20230407203143.2189681-9-fenghua.yu@intel.comSigned-off-by: default avatarVinod Koul <vkoul@kernel.org>
parent c2f156bf
...@@ -11,7 +11,9 @@ ...@@ -11,7 +11,9 @@
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/poll.h> #include <linux/poll.h>
#include <linux/iommu.h> #include <linux/iommu.h>
#include <linux/highmem.h>
#include <uapi/linux/idxd.h> #include <uapi/linux/idxd.h>
#include <linux/xarray.h>
#include "registers.h" #include "registers.h"
#include "idxd.h" #include "idxd.h"
...@@ -34,6 +36,7 @@ struct idxd_user_context { ...@@ -34,6 +36,7 @@ struct idxd_user_context {
struct idxd_wq *wq; struct idxd_wq *wq;
struct task_struct *task; struct task_struct *task;
unsigned int pasid; unsigned int pasid;
struct mm_struct *mm;
unsigned int flags; unsigned int flags;
struct iommu_sva *sva; struct iommu_sva *sva;
}; };
...@@ -68,6 +71,19 @@ static inline struct idxd_wq *inode_wq(struct inode *inode) ...@@ -68,6 +71,19 @@ static inline struct idxd_wq *inode_wq(struct inode *inode)
return idxd_cdev->wq; return idxd_cdev->wq;
} }
static void idxd_xa_pasid_remove(struct idxd_user_context *ctx)
{
struct idxd_wq *wq = ctx->wq;
void *ptr;
mutex_lock(&wq->uc_lock);
ptr = xa_cmpxchg(&wq->upasid_xa, ctx->pasid, ctx, NULL, GFP_KERNEL);
if (ptr != (void *)ctx)
dev_warn(&wq->idxd->pdev->dev, "xarray cmpxchg failed for pasid %u\n",
ctx->pasid);
mutex_unlock(&wq->uc_lock);
}
static int idxd_cdev_open(struct inode *inode, struct file *filp) static int idxd_cdev_open(struct inode *inode, struct file *filp)
{ {
struct idxd_user_context *ctx; struct idxd_user_context *ctx;
...@@ -108,20 +124,26 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) ...@@ -108,20 +124,26 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
pasid = iommu_sva_get_pasid(sva); pasid = iommu_sva_get_pasid(sva);
if (pasid == IOMMU_PASID_INVALID) { if (pasid == IOMMU_PASID_INVALID) {
iommu_sva_unbind_device(sva);
rc = -EINVAL; rc = -EINVAL;
goto failed; goto failed_get_pasid;
} }
ctx->sva = sva; ctx->sva = sva;
ctx->pasid = pasid; ctx->pasid = pasid;
ctx->mm = current->mm;
mutex_lock(&wq->uc_lock);
rc = xa_insert(&wq->upasid_xa, pasid, ctx, GFP_KERNEL);
mutex_unlock(&wq->uc_lock);
if (rc < 0)
dev_warn(dev, "PASID entry already exist in xarray.\n");
if (wq_dedicated(wq)) { if (wq_dedicated(wq)) {
rc = idxd_wq_set_pasid(wq, pasid); rc = idxd_wq_set_pasid(wq, pasid);
if (rc < 0) { if (rc < 0) {
iommu_sva_unbind_device(sva); iommu_sva_unbind_device(sva);
dev_err(dev, "wq set pasid failed: %d\n", rc); dev_err(dev, "wq set pasid failed: %d\n", rc);
goto failed; goto failed_set_pasid;
} }
} }
} }
...@@ -130,7 +152,13 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) ...@@ -130,7 +152,13 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
mutex_unlock(&wq->wq_lock); mutex_unlock(&wq->wq_lock);
return 0; return 0;
failed: failed_set_pasid:
if (device_user_pasid_enabled(idxd))
idxd_xa_pasid_remove(ctx);
failed_get_pasid:
if (device_user_pasid_enabled(idxd))
iommu_sva_unbind_device(sva);
failed:
mutex_unlock(&wq->wq_lock); mutex_unlock(&wq->wq_lock);
kfree(ctx); kfree(ctx);
return rc; return rc;
...@@ -161,8 +189,10 @@ static int idxd_cdev_release(struct inode *node, struct file *filep) ...@@ -161,8 +189,10 @@ static int idxd_cdev_release(struct inode *node, struct file *filep)
} }
} }
if (ctx->sva) if (ctx->sva) {
iommu_sva_unbind_device(ctx->sva); iommu_sva_unbind_device(ctx->sva);
idxd_xa_pasid_remove(ctx);
}
kfree(ctx); kfree(ctx);
mutex_lock(&wq->wq_lock); mutex_lock(&wq->wq_lock);
idxd_wq_put(wq); idxd_wq_put(wq);
...@@ -418,3 +448,70 @@ void idxd_cdev_remove(void) ...@@ -418,3 +448,70 @@ void idxd_cdev_remove(void)
ida_destroy(&ictx[i].minor_ida); ida_destroy(&ictx[i].minor_ida);
} }
} }
/**
* idxd_copy_cr - copy completion record to user address space found by wq and
* PASID
* @wq: work queue
* @pasid: PASID
* @addr: user fault address to write
* @cr: completion record
* @len: number of bytes to copy
*
* This is called by a work that handles completion record fault.
*
* Return: number of bytes copied.
*/
int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr,
void *cr, int len)
{
struct device *dev = &wq->idxd->pdev->dev;
int left = len, status_size = 1;
struct idxd_user_context *ctx;
struct mm_struct *mm;
mutex_lock(&wq->uc_lock);
ctx = xa_load(&wq->upasid_xa, pasid);
if (!ctx) {
dev_warn(dev, "No user context\n");
goto out;
}
mm = ctx->mm;
/*
* The completion record fault handling work is running in kernel
* thread context. It temporarily switches to the mm to copy cr
* to addr in the mm.
*/
kthread_use_mm(mm);
left = copy_to_user((void __user *)addr + status_size, cr + status_size,
len - status_size);
/*
* Copy status only after the rest of completion record is copied
* successfully so that the user gets the complete completion record
* when a non-zero status is polled.
*/
if (!left) {
u8 status;
/*
* Ensure that the completion record's status field is written
* after the rest of the completion record has been written.
* This ensures that the user receives the correct completion
* record information once polling for a non-zero status.
*/
wmb();
status = *(u8 *)cr;
if (put_user(status, (u8 __user *)addr))
left += status_size;
} else {
left += status_size;
}
kthread_unuse_mm(mm);
out:
mutex_unlock(&wq->uc_lock);
return len - left;
}
...@@ -215,6 +215,10 @@ struct idxd_wq { ...@@ -215,6 +215,10 @@ struct idxd_wq {
char name[WQ_NAME_SIZE + 1]; char name[WQ_NAME_SIZE + 1];
u64 max_xfer_bytes; u64 max_xfer_bytes;
u32 max_batch_size; u32 max_batch_size;
/* Lock to protect upasid_xa access. */
struct mutex uc_lock;
struct xarray upasid_xa;
}; };
struct idxd_engine { struct idxd_engine {
...@@ -702,6 +706,8 @@ void idxd_cdev_remove(void); ...@@ -702,6 +706,8 @@ void idxd_cdev_remove(void);
int idxd_cdev_get_major(struct idxd_device *idxd); int idxd_cdev_get_major(struct idxd_device *idxd);
int idxd_wq_add_cdev(struct idxd_wq *wq); int idxd_wq_add_cdev(struct idxd_wq *wq);
void idxd_wq_del_cdev(struct idxd_wq *wq); void idxd_wq_del_cdev(struct idxd_wq *wq);
int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr,
void *buf, int len);
/* perfmon */ /* perfmon */
#if IS_ENABLED(CONFIG_INTEL_IDXD_PERFMON) #if IS_ENABLED(CONFIG_INTEL_IDXD_PERFMON)
......
...@@ -199,6 +199,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd) ...@@ -199,6 +199,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
} }
bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS); bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS);
} }
mutex_init(&wq->uc_lock);
xa_init(&wq->upasid_xa);
idxd->wqs[i] = wq; idxd->wqs[i] = wq;
} }
......
...@@ -1292,6 +1292,7 @@ static void idxd_conf_wq_release(struct device *dev) ...@@ -1292,6 +1292,7 @@ static void idxd_conf_wq_release(struct device *dev)
bitmap_free(wq->opcap_bmap); bitmap_free(wq->opcap_bmap);
kfree(wq->wqcfg); kfree(wq->wqcfg);
xa_destroy(&wq->upasid_xa);
kfree(wq); kfree(wq);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment