Commit 67f269b3 authored by Jason Gunthorpe's avatar Jason Gunthorpe

RDMA/ucontext: Fix regression with disassociate

When this code was consolidated the intention was that the VMA would
become backed by anonymous zero pages after the zap_vma_pte - however this
very subtly relied on setting the vm_ops = NULL and clearing the VM_SHARED
bits to transform the VMA into an anonymous VMA. Since the vm_ops was
removed this broke.

Now userspace gets a SIGBUS if it touches the vma after disassociation.

Instead of converting the VMA to anonymous provide a fault handler that
puts a zero'd page into the VMA when user-space touches it after
disassociation.

Cc: stable@vger.kernel.org
Suggested-by: default avatarAndrea Arcangeli <aarcange@redhat.com>
Fixes: 5f9794dc ("RDMA/ucontext: Add a core API for mmaping driver IO memory")
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
Signed-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent d5e560d3
...@@ -160,6 +160,7 @@ struct ib_uverbs_file { ...@@ -160,6 +160,7 @@ struct ib_uverbs_file {
struct mutex umap_lock; struct mutex umap_lock;
struct list_head umaps; struct list_head umaps;
struct page *disassociate_page;
struct idr idr; struct idr idr;
/* spinlock protects write access to idr */ /* spinlock protects write access to idr */
......
...@@ -208,6 +208,9 @@ void ib_uverbs_release_file(struct kref *ref) ...@@ -208,6 +208,9 @@ void ib_uverbs_release_file(struct kref *ref)
kref_put(&file->async_file->ref, kref_put(&file->async_file->ref,
ib_uverbs_release_async_event_file); ib_uverbs_release_async_event_file);
put_device(&file->device->dev); put_device(&file->device->dev);
if (file->disassociate_page)
__free_pages(file->disassociate_page, 0);
kfree(file); kfree(file);
} }
...@@ -877,9 +880,50 @@ static void rdma_umap_close(struct vm_area_struct *vma) ...@@ -877,9 +880,50 @@ static void rdma_umap_close(struct vm_area_struct *vma)
kfree(priv); kfree(priv);
} }
/*
* Once the zap_vma_ptes has been called touches to the VMA will come here and
* we return a dummy writable zero page for all the pfns.
*/
static vm_fault_t rdma_umap_fault(struct vm_fault *vmf)
{
struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data;
struct rdma_umap_priv *priv = vmf->vma->vm_private_data;
vm_fault_t ret = 0;
if (!priv)
return VM_FAULT_SIGBUS;
/* Read only pages can just use the system zero page. */
if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) {
vmf->page = ZERO_PAGE(vmf->vm_start);
get_page(vmf->page);
return 0;
}
mutex_lock(&ufile->umap_lock);
if (!ufile->disassociate_page)
ufile->disassociate_page =
alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0);
if (ufile->disassociate_page) {
/*
* This VMA is forced to always be shared so this doesn't have
* to worry about COW.
*/
vmf->page = ufile->disassociate_page;
get_page(vmf->page);
} else {
ret = VM_FAULT_SIGBUS;
}
mutex_unlock(&ufile->umap_lock);
return ret;
}
static const struct vm_operations_struct rdma_umap_ops = { static const struct vm_operations_struct rdma_umap_ops = {
.open = rdma_umap_open, .open = rdma_umap_open,
.close = rdma_umap_close, .close = rdma_umap_close,
.fault = rdma_umap_fault,
}; };
static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
...@@ -889,6 +933,9 @@ static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, ...@@ -889,6 +933,9 @@ static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
struct ib_uverbs_file *ufile = ucontext->ufile; struct ib_uverbs_file *ufile = ucontext->ufile;
struct rdma_umap_priv *priv; struct rdma_umap_priv *priv;
if (!(vma->vm_flags & VM_SHARED))
return ERR_PTR(-EINVAL);
if (vma->vm_end - vma->vm_start != size) if (vma->vm_end - vma->vm_start != size)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
...@@ -992,7 +1039,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) ...@@ -992,7 +1039,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
* at a time to get the lock ordering right. Typically there * at a time to get the lock ordering right. Typically there
* will only be one mm, so no big deal. * will only be one mm, so no big deal.
*/ */
down_write(&mm->mmap_sem); down_read(&mm->mmap_sem);
mutex_lock(&ufile->umap_lock); mutex_lock(&ufile->umap_lock);
list_for_each_entry_safe (priv, next_priv, &ufile->umaps, list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
list) { list) {
...@@ -1004,10 +1051,9 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) ...@@ -1004,10 +1051,9 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
zap_vma_ptes(vma, vma->vm_start, zap_vma_ptes(vma, vma->vm_start,
vma->vm_end - vma->vm_start); vma->vm_end - vma->vm_start);
vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
} }
mutex_unlock(&ufile->umap_lock); mutex_unlock(&ufile->umap_lock);
up_write(&mm->mmap_sem); up_read(&mm->mmap_sem);
mmput(mm); mmput(mm);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment