Commit f7c6a7b5 authored by Roland Dreier's avatar Roland Dreier

IB/uverbs: Export ib_umem_get()/ib_umem_release() to modules

Export ib_umem_get()/ib_umem_release() and put low-level drivers in
control of when to call ib_umem_get() to pin and DMA map userspace,
rather than always calling it in ib_uverbs_reg_mr() before calling the
low-level driver's reg_user_mr method.

Also move these functions to be in the ib_core module instead of
ib_uverbs, so that driver modules using them do not depend on
ib_uverbs.

This has a number of advantages:
 - It is better design from the standpoint of making generic code a
   library that can be used or overridden by device-specific code as
   the details of specific devices dictate.
 - Drivers that do not need to pin userspace memory regions do not
   need to take the performance hit of calling ib_mem_get().  For
   example, although I have not tried to implement it in this patch,
   the ipath driver should be able to avoid pinning memory and just
   use copy_{to,from}_user() to access userspace memory regions.
 - Buffers that need special mapping treatment can be identified by
   the low-level driver.  For example, it may be possible to solve
   some Altix-specific memory ordering issues with mthca CQs in
   userspace by mapping CQ buffers with extra flags.
 - Drivers that need to pin and DMA map userspace memory for things
   other than memory regions can use ib_umem_get() directly, instead
   of hacks using extra parameters to their reg_phys_mr method.  For
   example, the mlx4 driver that is pending being merged needs to pin
   and DMA map QP and CQ buffers, but it does not need to create a
   memory key for these buffers.  So the cleanest solution is for mlx4
   to call ib_umem_get() in the create_qp and create_cq methods.
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 36f021b5
...@@ -29,6 +29,11 @@ config INFINIBAND_USER_ACCESS ...@@ -29,6 +29,11 @@ config INFINIBAND_USER_ACCESS
libibverbs, libibcm and a hardware driver library from libibverbs, libibcm and a hardware driver library from
<http://www.openib.org>. <http://www.openib.org>.
config INFINIBAND_USER_MEM
bool
depends on INFINIBAND_USER_ACCESS != n
default y
config INFINIBAND_ADDR_TRANS config INFINIBAND_ADDR_TRANS
bool bool
depends on INFINIBAND && INET depends on INFINIBAND && INET
......
...@@ -9,6 +9,7 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ ...@@ -9,6 +9,7 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
device.o fmr_pool.o cache.o device.o fmr_pool.o cache.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_mad-y := mad.o smi.o agent.o mad_rmpp.o ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
...@@ -28,5 +29,4 @@ ib_umad-y := user_mad.o ...@@ -28,5 +29,4 @@ ib_umad-y := user_mad.o
ib_ucm-y := ucm.o ib_ucm-y := ucm.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o
uverbs_marshall.o
...@@ -613,6 +613,8 @@ static void __exit ib_core_cleanup(void) ...@@ -613,6 +613,8 @@ static void __exit ib_core_cleanup(void)
{ {
ib_cache_cleanup(); ib_cache_cleanup();
ib_sysfs_cleanup(); ib_sysfs_cleanup();
/* Make sure that any pending umem accounting work is done. */
flush_scheduled_work();
} }
module_init(ib_core_init); module_init(ib_core_init);
......
...@@ -64,35 +64,56 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d ...@@ -64,35 +64,56 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
} }
} }
int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, /**
void *addr, size_t size, int write) * ib_umem_get - Pin and DMA map userspace memory.
* @context: userspace context to pin memory for
* @addr: userspace virtual address to start at
* @size: length of region to pin
* @access: IB_ACCESS_xxx flags for memory being pinned
*/
struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
size_t size, int access)
{ {
struct ib_umem *umem;
struct page **page_list; struct page **page_list;
struct ib_umem_chunk *chunk; struct ib_umem_chunk *chunk;
unsigned long locked; unsigned long locked;
unsigned long lock_limit; unsigned long lock_limit;
unsigned long cur_base; unsigned long cur_base;
unsigned long npages; unsigned long npages;
int ret = 0; int ret;
int off; int off;
int i; int i;
if (!can_do_mlock()) if (!can_do_mlock())
return -EPERM; return ERR_PTR(-EPERM);
page_list = (struct page **) __get_free_page(GFP_KERNEL); umem = kmalloc(sizeof *umem, GFP_KERNEL);
if (!page_list) if (!umem)
return -ENOMEM; return ERR_PTR(-ENOMEM);
umem->context = context;
umem->length = size;
umem->offset = addr & ~PAGE_MASK;
umem->page_size = PAGE_SIZE;
/*
* We ask for writable memory if any access flags other than
* "remote read" are set. "Local write" and "remote write"
* obviously require write access. "Remote atomic" can do
* things like fetch and add, which will modify memory, and
* "MW bind" can change permissions by binding a window.
*/
umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
mem->user_base = (unsigned long) addr; INIT_LIST_HEAD(&umem->chunk_list);
mem->length = size;
mem->offset = (unsigned long) addr & ~PAGE_MASK;
mem->page_size = PAGE_SIZE;
mem->writable = write;
INIT_LIST_HEAD(&mem->chunk_list); page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) {
kfree(umem);
return ERR_PTR(-ENOMEM);
}
npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT; npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
down_write(&current->mm->mmap_sem); down_write(&current->mm->mmap_sem);
...@@ -104,13 +125,13 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, ...@@ -104,13 +125,13 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
goto out; goto out;
} }
cur_base = (unsigned long) addr & PAGE_MASK; cur_base = addr & PAGE_MASK;
while (npages) { while (npages) {
ret = get_user_pages(current, current->mm, cur_base, ret = get_user_pages(current, current->mm, cur_base,
min_t(int, npages, min_t(int, npages,
PAGE_SIZE / sizeof (struct page *)), PAGE_SIZE / sizeof (struct page *)),
1, !write, page_list, NULL); 1, !umem->writable, page_list, NULL);
if (ret < 0) if (ret < 0)
goto out; goto out;
...@@ -136,7 +157,7 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, ...@@ -136,7 +157,7 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
chunk->page_list[i].length = PAGE_SIZE; chunk->page_list[i].length = PAGE_SIZE;
} }
chunk->nmap = ib_dma_map_sg(dev, chunk->nmap = ib_dma_map_sg(context->device,
&chunk->page_list[0], &chunk->page_list[0],
chunk->nents, chunk->nents,
DMA_BIDIRECTIONAL); DMA_BIDIRECTIONAL);
...@@ -151,33 +172,25 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, ...@@ -151,33 +172,25 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
ret -= chunk->nents; ret -= chunk->nents;
off += chunk->nents; off += chunk->nents;
list_add_tail(&chunk->list, &mem->chunk_list); list_add_tail(&chunk->list, &umem->chunk_list);
} }
ret = 0; ret = 0;
} }
out: out:
if (ret < 0) if (ret < 0) {
__ib_umem_release(dev, mem, 0); __ib_umem_release(context->device, umem, 0);
else kfree(umem);
} else
current->mm->locked_vm = locked; current->mm->locked_vm = locked;
up_write(&current->mm->mmap_sem); up_write(&current->mm->mmap_sem);
free_page((unsigned long) page_list); free_page((unsigned long) page_list);
return ret; return ret < 0 ? ERR_PTR(ret) : umem;
}
void ib_umem_release(struct ib_device *dev, struct ib_umem *umem)
{
__ib_umem_release(dev, umem, 1);
down_write(&current->mm->mmap_sem);
current->mm->locked_vm -=
PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
up_write(&current->mm->mmap_sem);
} }
EXPORT_SYMBOL(ib_umem_get);
static void ib_umem_account(struct work_struct *_work) static void ib_umem_account(struct work_struct *_work)
{ {
...@@ -191,26 +204,35 @@ static void ib_umem_account(struct work_struct *_work) ...@@ -191,26 +204,35 @@ static void ib_umem_account(struct work_struct *_work)
kfree(work); kfree(work);
} }
void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem) /**
* ib_umem_release - release memory pinned with ib_umem_get
* @umem: umem struct to release
*/
void ib_umem_release(struct ib_umem *umem)
{ {
struct ib_umem_account_work *work; struct ib_umem_account_work *work;
struct ib_ucontext *context = umem->context;
struct mm_struct *mm; struct mm_struct *mm;
unsigned long diff;
__ib_umem_release(dev, umem, 1); __ib_umem_release(umem->context->device, umem, 1);
mm = get_task_mm(current); mm = get_task_mm(current);
if (!mm) if (!mm)
return; return;
diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
kfree(umem);
/* /*
* We may be called with the mm's mmap_sem already held. This * We may be called with the mm's mmap_sem already held. This
* can happen when a userspace munmap() is the call that drops * can happen when a userspace munmap() is the call that drops
* the last reference to our file and calls our release * the last reference to our file and calls our release
* method. If there are memory regions to destroy, we'll end * method. If there are memory regions to destroy, we'll end
* up here and not be able to take the mmap_sem. Therefore we * up here and not be able to take the mmap_sem. In that case
* defer the vm_locked accounting to the system workqueue. * we defer the vm_locked accounting to the system workqueue.
*/ */
if (context->closing && !down_write_trylock(&mm->mmap_sem)) {
work = kmalloc(sizeof *work, GFP_KERNEL); work = kmalloc(sizeof *work, GFP_KERNEL);
if (!work) { if (!work) {
mmput(mm); mmput(mm);
...@@ -219,7 +241,33 @@ void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem) ...@@ -219,7 +241,33 @@ void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem)
INIT_WORK(&work->work, ib_umem_account); INIT_WORK(&work->work, ib_umem_account);
work->mm = mm; work->mm = mm;
work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; work->diff = diff;
schedule_work(&work->work); schedule_work(&work->work);
return;
} else
down_write(&mm->mmap_sem);
current->mm->locked_vm -= diff;
up_write(&mm->mmap_sem);
mmput(mm);
}
EXPORT_SYMBOL(ib_umem_release);
int ib_umem_page_count(struct ib_umem *umem)
{
struct ib_umem_chunk *chunk;
int shift;
int i;
int n;
shift = ilog2(umem->page_size);
n = 0;
list_for_each_entry(chunk, &umem->chunk_list, list)
for (i = 0; i < chunk->nmap; ++i)
n += sg_dma_len(&chunk->page_list[i]) >> shift;
return n;
} }
EXPORT_SYMBOL(ib_umem_page_count);
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
#include <linux/completion.h> #include <linux/completion.h>
#include <rdma/ib_verbs.h> #include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h> #include <rdma/ib_user_verbs.h>
/* /*
...@@ -163,11 +164,6 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); ...@@ -163,11 +164,6 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler, void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event); struct ib_event *event);
int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
void *addr, size_t size, int write);
void ib_umem_release(struct ib_device *dev, struct ib_umem *umem);
void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem);
#define IB_UVERBS_DECLARE_CMD(name) \ #define IB_UVERBS_DECLARE_CMD(name) \
ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
const char __user *buf, int in_len, \ const char __user *buf, int in_len, \
......
/* /*
* Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
* Copyright (c) 2005 PathScale, Inc. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved.
* Copyright (c) 2006 Mellanox Technologies. All rights reserved. * Copyright (c) 2006 Mellanox Technologies. All rights reserved.
* *
...@@ -295,6 +295,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, ...@@ -295,6 +295,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
INIT_LIST_HEAD(&ucontext->qp_list); INIT_LIST_HEAD(&ucontext->qp_list);
INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->srq_list);
INIT_LIST_HEAD(&ucontext->ah_list); INIT_LIST_HEAD(&ucontext->ah_list);
ucontext->closing = 0;
resp.num_comp_vectors = file->device->num_comp_vectors; resp.num_comp_vectors = file->device->num_comp_vectors;
...@@ -573,7 +574,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, ...@@ -573,7 +574,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
struct ib_uverbs_reg_mr cmd; struct ib_uverbs_reg_mr cmd;
struct ib_uverbs_reg_mr_resp resp; struct ib_uverbs_reg_mr_resp resp;
struct ib_udata udata; struct ib_udata udata;
struct ib_umem_object *obj; struct ib_uobject *uobj;
struct ib_pd *pd; struct ib_pd *pd;
struct ib_mr *mr; struct ib_mr *mr;
int ret; int ret;
...@@ -599,35 +600,21 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, ...@@ -599,35 +600,21 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
!(cmd.access_flags & IB_ACCESS_LOCAL_WRITE)) !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
return -EINVAL; return -EINVAL;
obj = kmalloc(sizeof *obj, GFP_KERNEL); uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
if (!obj) if (!uobj)
return -ENOMEM; return -ENOMEM;
init_uobj(&obj->uobject, 0, file->ucontext, &mr_lock_key); init_uobj(uobj, 0, file->ucontext, &mr_lock_key);
down_write(&obj->uobject.mutex); down_write(&uobj->mutex);
/*
* We ask for writable memory if any access flags other than
* "remote read" are set. "Local write" and "remote write"
* obviously require write access. "Remote atomic" can do
* things like fetch and add, which will modify memory, and
* "MW bind" can change permissions by binding a window.
*/
ret = ib_umem_get(file->device->ib_dev, &obj->umem,
(void *) (unsigned long) cmd.start, cmd.length,
!!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ));
if (ret)
goto err_free;
obj->umem.virt_base = cmd.hca_va;
pd = idr_read_pd(cmd.pd_handle, file->ucontext); pd = idr_read_pd(cmd.pd_handle, file->ucontext);
if (!pd) { if (!pd) {
ret = -EINVAL; ret = -EINVAL;
goto err_release; goto err_free;
} }
mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata); mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
cmd.access_flags, &udata);
if (IS_ERR(mr)) { if (IS_ERR(mr)) {
ret = PTR_ERR(mr); ret = PTR_ERR(mr);
goto err_put; goto err_put;
...@@ -635,19 +622,19 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, ...@@ -635,19 +622,19 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
mr->device = pd->device; mr->device = pd->device;
mr->pd = pd; mr->pd = pd;
mr->uobject = &obj->uobject; mr->uobject = uobj;
atomic_inc(&pd->usecnt); atomic_inc(&pd->usecnt);
atomic_set(&mr->usecnt, 0); atomic_set(&mr->usecnt, 0);
obj->uobject.object = mr; uobj->object = mr;
ret = idr_add_uobj(&ib_uverbs_mr_idr, &obj->uobject); ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
if (ret) if (ret)
goto err_unreg; goto err_unreg;
memset(&resp, 0, sizeof resp); memset(&resp, 0, sizeof resp);
resp.lkey = mr->lkey; resp.lkey = mr->lkey;
resp.rkey = mr->rkey; resp.rkey = mr->rkey;
resp.mr_handle = obj->uobject.id; resp.mr_handle = uobj->id;
if (copy_to_user((void __user *) (unsigned long) cmd.response, if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) { &resp, sizeof resp)) {
...@@ -658,17 +645,17 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, ...@@ -658,17 +645,17 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
put_pd_read(pd); put_pd_read(pd);
mutex_lock(&file->mutex); mutex_lock(&file->mutex);
list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); list_add_tail(&uobj->list, &file->ucontext->mr_list);
mutex_unlock(&file->mutex); mutex_unlock(&file->mutex);
obj->uobject.live = 1; uobj->live = 1;
up_write(&obj->uobject.mutex); up_write(&uobj->mutex);
return in_len; return in_len;
err_copy: err_copy:
idr_remove_uobj(&ib_uverbs_mr_idr, &obj->uobject); idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
err_unreg: err_unreg:
ib_dereg_mr(mr); ib_dereg_mr(mr);
...@@ -676,11 +663,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, ...@@ -676,11 +663,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
err_put: err_put:
put_pd_read(pd); put_pd_read(pd);
err_release:
ib_umem_release(file->device->ib_dev, &obj->umem);
err_free: err_free:
put_uobj_write(&obj->uobject); put_uobj_write(uobj);
return ret; return ret;
} }
...@@ -691,7 +675,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, ...@@ -691,7 +675,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
struct ib_uverbs_dereg_mr cmd; struct ib_uverbs_dereg_mr cmd;
struct ib_mr *mr; struct ib_mr *mr;
struct ib_uobject *uobj; struct ib_uobject *uobj;
struct ib_umem_object *memobj;
int ret = -EINVAL; int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd)) if (copy_from_user(&cmd, buf, sizeof cmd))
...@@ -701,7 +684,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, ...@@ -701,7 +684,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
if (!uobj) if (!uobj)
return -EINVAL; return -EINVAL;
memobj = container_of(uobj, struct ib_umem_object, uobject);
mr = uobj->object; mr = uobj->object;
ret = ib_dereg_mr(mr); ret = ib_dereg_mr(mr);
...@@ -719,8 +701,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, ...@@ -719,8 +701,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
list_del(&uobj->list); list_del(&uobj->list);
mutex_unlock(&file->mutex); mutex_unlock(&file->mutex);
ib_umem_release(file->device->ib_dev, &memobj->umem);
put_uobj(uobj); put_uobj(uobj);
return in_len; return in_len;
......
...@@ -183,6 +183,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, ...@@ -183,6 +183,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
if (!context) if (!context)
return 0; return 0;
context->closing = 1;
list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
struct ib_ah *ah = uobj->object; struct ib_ah *ah = uobj->object;
...@@ -230,16 +232,10 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, ...@@ -230,16 +232,10 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
struct ib_mr *mr = uobj->object; struct ib_mr *mr = uobj->object;
struct ib_device *mrdev = mr->device;
struct ib_umem_object *memobj;
idr_remove_uobj(&ib_uverbs_mr_idr, uobj); idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
ib_dereg_mr(mr); ib_dereg_mr(mr);
kfree(uobj);
memobj = container_of(uobj, struct ib_umem_object, uobject);
ib_umem_release_on_close(mrdev, &memobj->umem);
kfree(memobj);
} }
list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
...@@ -906,7 +902,6 @@ static void __exit ib_uverbs_cleanup(void) ...@@ -906,7 +902,6 @@ static void __exit ib_uverbs_cleanup(void)
unregister_filesystem(&uverbs_event_fs); unregister_filesystem(&uverbs_event_fs);
class_destroy(uverbs_class); class_destroy(uverbs_class);
unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
flush_scheduled_work();
idr_destroy(&ib_uverbs_pd_idr); idr_destroy(&ib_uverbs_pd_idr);
idr_destroy(&ib_uverbs_mr_idr); idr_destroy(&ib_uverbs_mr_idr);
idr_destroy(&ib_uverbs_mw_idr); idr_destroy(&ib_uverbs_mw_idr);
......
...@@ -56,6 +56,7 @@ ...@@ -56,6 +56,7 @@
#include <asm/byteorder.h> #include <asm/byteorder.h>
#include <rdma/ib_smi.h> #include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h> #include <rdma/ib_user_verbs.h>
#include "c2.h" #include "c2.h"
#include "c2_provider.h" #include "c2_provider.h"
...@@ -396,6 +397,7 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd, ...@@ -396,6 +397,7 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
} }
mr->pd = to_c2pd(ib_pd); mr->pd = to_c2pd(ib_pd);
mr->umem = NULL;
pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, " pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
"*iova_start %llx, first pa %llx, last pa %llx\n", "*iova_start %llx, first pa %llx, last pa %llx\n",
__FUNCTION__, page_shift, pbl_depth, total_len, __FUNCTION__, page_shift, pbl_depth, total_len,
...@@ -428,8 +430,8 @@ static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc) ...@@ -428,8 +430,8 @@ static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
return c2_reg_phys_mr(pd, &bl, 1, acc, &kva); return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
} }
static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int acc, struct ib_udata *udata) u64 virt, int acc, struct ib_udata *udata)
{ {
u64 *pages; u64 *pages;
u64 kva = 0; u64 kva = 0;
...@@ -441,15 +443,23 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -441,15 +443,23 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
struct c2_mr *c2mr; struct c2_mr *c2mr;
pr_debug("%s:%u\n", __FUNCTION__, __LINE__); pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
shift = ffs(region->page_size) - 1;
c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL); c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
if (!c2mr) if (!c2mr)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
c2mr->pd = c2pd; c2mr->pd = c2pd;
c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
if (IS_ERR(c2mr->umem)) {
err = PTR_ERR(c2mr->umem);
kfree(c2mr);
return ERR_PTR(err);
}
shift = ffs(c2mr->umem->page_size) - 1;
n = 0; n = 0;
list_for_each_entry(chunk, &region->chunk_list, list) list_for_each_entry(chunk, &c2mr->umem->chunk_list, list)
n += chunk->nents; n += chunk->nents;
pages = kmalloc(n * sizeof(u64), GFP_KERNEL); pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
...@@ -459,35 +469,34 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -459,35 +469,34 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
} }
i = 0; i = 0;
list_for_each_entry(chunk, &region->chunk_list, list) { list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) {
for (j = 0; j < chunk->nmap; ++j) { for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift; len = sg_dma_len(&chunk->page_list[j]) >> shift;
for (k = 0; k < len; ++k) { for (k = 0; k < len; ++k) {
pages[i++] = pages[i++] =
sg_dma_address(&chunk->page_list[j]) + sg_dma_address(&chunk->page_list[j]) +
(region->page_size * k); (c2mr->umem->page_size * k);
} }
} }
} }
kva = (u64)region->virt_base; kva = virt;
err = c2_nsmr_register_phys_kern(to_c2dev(pd->device), err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
pages, pages,
region->page_size, c2mr->umem->page_size,
i, i,
region->length, length,
region->offset, c2mr->umem->offset,
&kva, &kva,
c2_convert_access(acc), c2_convert_access(acc),
c2mr); c2mr);
kfree(pages); kfree(pages);
if (err) { if (err)
kfree(c2mr); goto err;
return ERR_PTR(err);
}
return &c2mr->ibmr; return &c2mr->ibmr;
err: err:
ib_umem_release(c2mr->umem);
kfree(c2mr); kfree(c2mr);
return ERR_PTR(err); return ERR_PTR(err);
} }
...@@ -502,8 +511,11 @@ static int c2_dereg_mr(struct ib_mr *ib_mr) ...@@ -502,8 +511,11 @@ static int c2_dereg_mr(struct ib_mr *ib_mr)
err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey); err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
if (err) if (err)
pr_debug("c2_stag_dealloc failed: %d\n", err); pr_debug("c2_stag_dealloc failed: %d\n", err);
else else {
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr); kfree(mr);
}
return err; return err;
} }
......
...@@ -73,6 +73,7 @@ struct c2_pd { ...@@ -73,6 +73,7 @@ struct c2_pd {
struct c2_mr { struct c2_mr {
struct ib_mr ibmr; struct ib_mr ibmr;
struct c2_pd *pd; struct c2_pd *pd;
struct ib_umem *umem;
}; };
struct c2_av; struct c2_av;
......
...@@ -47,6 +47,7 @@ ...@@ -47,6 +47,7 @@
#include <rdma/iw_cm.h> #include <rdma/iw_cm.h>
#include <rdma/ib_verbs.h> #include <rdma/ib_verbs.h>
#include <rdma/ib_smi.h> #include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h> #include <rdma/ib_user_verbs.h>
#include "cxio_hal.h" #include "cxio_hal.h"
...@@ -443,6 +444,8 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr) ...@@ -443,6 +444,8 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
remove_handle(rhp, &rhp->mmidr, mmid); remove_handle(rhp, &rhp->mmidr, mmid);
if (mhp->kva) if (mhp->kva)
kfree((void *) (unsigned long) mhp->kva); kfree((void *) (unsigned long) mhp->kva);
if (mhp->umem)
ib_umem_release(mhp->umem);
PDBG("%s mmid 0x%x ptr %p\n", __FUNCTION__, mmid, mhp); PDBG("%s mmid 0x%x ptr %p\n", __FUNCTION__, mmid, mhp);
kfree(mhp); kfree(mhp);
return 0; return 0;
...@@ -577,8 +580,8 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr, ...@@ -577,8 +580,8 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
} }
static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int acc, struct ib_udata *udata) u64 virt, int acc, struct ib_udata *udata)
{ {
__be64 *pages; __be64 *pages;
int shift, n, len; int shift, n, len;
...@@ -591,7 +594,6 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -591,7 +594,6 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
struct iwch_reg_user_mr_resp uresp; struct iwch_reg_user_mr_resp uresp;
PDBG("%s ib_pd %p\n", __FUNCTION__, pd); PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
shift = ffs(region->page_size) - 1;
php = to_iwch_pd(pd); php = to_iwch_pd(pd);
rhp = php->rhp; rhp = php->rhp;
...@@ -599,8 +601,17 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -599,8 +601,17 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
if (!mhp) if (!mhp)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc);
if (IS_ERR(mhp->umem)) {
err = PTR_ERR(mhp->umem);
kfree(mhp);
return ERR_PTR(err);
}
shift = ffs(mhp->umem->page_size) - 1;
n = 0; n = 0;
list_for_each_entry(chunk, &region->chunk_list, list) list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
n += chunk->nents; n += chunk->nents;
pages = kmalloc(n * sizeof(u64), GFP_KERNEL); pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
...@@ -611,13 +622,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -611,13 +622,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
i = n = 0; i = n = 0;
list_for_each_entry(chunk, &region->chunk_list, list) list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) { for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift; len = sg_dma_len(&chunk->page_list[j]) >> shift;
for (k = 0; k < len; ++k) { for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address( pages[i++] = cpu_to_be64(sg_dma_address(
&chunk->page_list[j]) + &chunk->page_list[j]) +
region->page_size * k); mhp->umem->page_size * k);
} }
} }
...@@ -625,9 +636,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -625,9 +636,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
mhp->attr.pdid = php->pdid; mhp->attr.pdid = php->pdid;
mhp->attr.zbva = 0; mhp->attr.zbva = 0;
mhp->attr.perms = iwch_ib_to_tpt_access(acc); mhp->attr.perms = iwch_ib_to_tpt_access(acc);
mhp->attr.va_fbo = region->virt_base; mhp->attr.va_fbo = virt;
mhp->attr.page_size = shift - 12; mhp->attr.page_size = shift - 12;
mhp->attr.len = (u32) region->length; mhp->attr.len = (u32) length;
mhp->attr.pbl_size = i; mhp->attr.pbl_size = i;
err = iwch_register_mem(rhp, php, mhp, shift, pages); err = iwch_register_mem(rhp, php, mhp, shift, pages);
kfree(pages); kfree(pages);
...@@ -650,6 +661,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -650,6 +661,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
return &mhp->ibmr; return &mhp->ibmr;
err: err:
ib_umem_release(mhp->umem);
kfree(mhp); kfree(mhp);
return ERR_PTR(err); return ERR_PTR(err);
} }
......
...@@ -73,6 +73,7 @@ struct tpt_attributes { ...@@ -73,6 +73,7 @@ struct tpt_attributes {
struct iwch_mr { struct iwch_mr {
struct ib_mr ibmr; struct ib_mr ibmr;
struct ib_umem *umem;
struct iwch_dev *rhp; struct iwch_dev *rhp;
u64 kva; u64 kva;
struct tpt_attributes attr; struct tpt_attributes attr;
......
...@@ -176,6 +176,7 @@ struct ehca_mr { ...@@ -176,6 +176,7 @@ struct ehca_mr {
struct ib_mr ib_mr; /* must always be first in ehca_mr */ struct ib_mr ib_mr; /* must always be first in ehca_mr */
struct ib_fmr ib_fmr; /* must always be first in ehca_mr */ struct ib_fmr ib_fmr; /* must always be first in ehca_mr */
} ib; } ib;
struct ib_umem *umem;
spinlock_t mrlock; spinlock_t mrlock;
enum ehca_mr_flag flags; enum ehca_mr_flag flags;
......
...@@ -78,8 +78,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, ...@@ -78,8 +78,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
int num_phys_buf, int num_phys_buf,
int mr_access_flags, u64 *iova_start); int mr_access_flags, u64 *iova_start);
struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt,
struct ib_umem *region,
int mr_access_flags, struct ib_udata *udata); int mr_access_flags, struct ib_udata *udata);
int ehca_rereg_phys_mr(struct ib_mr *mr, int ehca_rereg_phys_mr(struct ib_mr *mr,
......
...@@ -39,6 +39,8 @@ ...@@ -39,6 +39,8 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <rdma/ib_umem.h>
#include <asm/current.h> #include <asm/current.h>
#include "ehca_iverbs.h" #include "ehca_iverbs.h"
...@@ -238,10 +240,8 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, ...@@ -238,10 +240,8 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
/*----------------------------------------------------------------------*/ /*----------------------------------------------------------------------*/
struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt,
struct ib_umem *region, int mr_access_flags, struct ib_udata *udata)
int mr_access_flags,
struct ib_udata *udata)
{ {
struct ib_mr *ib_mr; struct ib_mr *ib_mr;
struct ehca_mr *e_mr; struct ehca_mr *e_mr;
...@@ -257,11 +257,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, ...@@ -257,11 +257,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
ehca_gen_err("bad pd=%p", pd); ehca_gen_err("bad pd=%p", pd);
return ERR_PTR(-EFAULT); return ERR_PTR(-EFAULT);
} }
if (!region) {
ehca_err(pd->device, "bad input values: region=%p", region);
ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit0;
}
if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
!(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
...@@ -275,17 +271,10 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, ...@@ -275,17 +271,10 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
ib_mr = ERR_PTR(-EINVAL); ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit0; goto reg_user_mr_exit0;
} }
if (region->page_size != PAGE_SIZE) {
ehca_err(pd->device, "page size not supported, "
"region->page_size=%x", region->page_size);
ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit0;
}
if ((region->length == 0) || if (length == 0 || virt + length < virt) {
((region->virt_base + region->length) < region->virt_base)) {
ehca_err(pd->device, "bad input values: length=%lx " ehca_err(pd->device, "bad input values: length=%lx "
"virt_base=%lx", region->length, region->virt_base); "virt_base=%lx", length, virt);
ib_mr = ERR_PTR(-EINVAL); ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit0; goto reg_user_mr_exit0;
} }
...@@ -297,40 +286,55 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, ...@@ -297,40 +286,55 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
goto reg_user_mr_exit0; goto reg_user_mr_exit0;
} }
e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
mr_access_flags);
if (IS_ERR(e_mr->umem)) {
ib_mr = (void *) e_mr->umem;
goto reg_user_mr_exit1;
}
if (e_mr->umem->page_size != PAGE_SIZE) {
ehca_err(pd->device, "page size not supported, "
"e_mr->umem->page_size=%x", e_mr->umem->page_size);
ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit2;
}
/* determine number of MR pages */ /* determine number of MR pages */
num_pages_mr = (((region->virt_base % PAGE_SIZE) + region->length + num_pages_mr = (((virt % PAGE_SIZE) + length + PAGE_SIZE - 1) /
PAGE_SIZE - 1) / PAGE_SIZE); PAGE_SIZE);
num_pages_4k = (((region->virt_base % EHCA_PAGESIZE) + region->length + num_pages_4k = (((virt % EHCA_PAGESIZE) + length + EHCA_PAGESIZE - 1) /
EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); EHCA_PAGESIZE);
/* register MR on HCA */ /* register MR on HCA */
pginfo.type = EHCA_MR_PGI_USER; pginfo.type = EHCA_MR_PGI_USER;
pginfo.num_pages = num_pages_mr; pginfo.num_pages = num_pages_mr;
pginfo.num_4k = num_pages_4k; pginfo.num_4k = num_pages_4k;
pginfo.region = region; pginfo.region = e_mr->umem;
pginfo.next_4k = region->offset / EHCA_PAGESIZE; pginfo.next_4k = e_mr->umem->offset / EHCA_PAGESIZE;
pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk, pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk,
(&region->chunk_list), (&e_mr->umem->chunk_list),
list); list);
ret = ehca_reg_mr(shca, e_mr, (u64*)region->virt_base, ret = ehca_reg_mr(shca, e_mr, (u64*) virt, length, mr_access_flags, e_pd,
region->length, mr_access_flags, e_pd, &pginfo, &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
&e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
if (ret) { if (ret) {
ib_mr = ERR_PTR(ret); ib_mr = ERR_PTR(ret);
goto reg_user_mr_exit1; goto reg_user_mr_exit2;
} }
/* successful registration of all pages */ /* successful registration of all pages */
return &e_mr->ib.ib_mr; return &e_mr->ib.ib_mr;
reg_user_mr_exit2:
ib_umem_release(e_mr->umem);
reg_user_mr_exit1: reg_user_mr_exit1:
ehca_mr_delete(e_mr); ehca_mr_delete(e_mr);
reg_user_mr_exit0: reg_user_mr_exit0:
if (IS_ERR(ib_mr)) if (IS_ERR(ib_mr))
ehca_err(pd->device, "rc=%lx pd=%p region=%p mr_access_flags=%x" ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x"
" udata=%p", " udata=%p",
PTR_ERR(ib_mr), pd, region, mr_access_flags, udata); PTR_ERR(ib_mr), pd, mr_access_flags, udata);
return ib_mr; return ib_mr;
} /* end ehca_reg_user_mr() */ } /* end ehca_reg_user_mr() */
...@@ -596,6 +600,9 @@ int ehca_dereg_mr(struct ib_mr *mr) ...@@ -596,6 +600,9 @@ int ehca_dereg_mr(struct ib_mr *mr)
goto dereg_mr_exit0; goto dereg_mr_exit0;
} }
if (e_mr->umem)
ib_umem_release(e_mr->umem);
/* successful deregistration */ /* successful deregistration */
ehca_mr_delete(e_mr); ehca_mr_delete(e_mr);
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
* SOFTWARE. * SOFTWARE.
*/ */
#include <rdma/ib_umem.h>
#include <rdma/ib_pack.h> #include <rdma/ib_pack.h>
#include <rdma/ib_smi.h> #include <rdma/ib_smi.h>
...@@ -147,6 +148,7 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd, ...@@ -147,6 +148,7 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
mr->mr.offset = 0; mr->mr.offset = 0;
mr->mr.access_flags = acc; mr->mr.access_flags = acc;
mr->mr.max_segs = num_phys_buf; mr->mr.max_segs = num_phys_buf;
mr->umem = NULL;
m = 0; m = 0;
n = 0; n = 0;
...@@ -170,46 +172,56 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd, ...@@ -170,46 +172,56 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
/** /**
* ipath_reg_user_mr - register a userspace memory region * ipath_reg_user_mr - register a userspace memory region
* @pd: protection domain for this memory region * @pd: protection domain for this memory region
* @region: the user memory region * @start: starting userspace address
* @length: length of region to register
* @virt_addr: virtual address to use (from HCA's point of view)
* @mr_access_flags: access flags for this memory region * @mr_access_flags: access flags for this memory region
* @udata: unused by the InfiniPath driver * @udata: unused by the InfiniPath driver
* *
* Returns the memory region on success, otherwise returns an errno. * Returns the memory region on success, otherwise returns an errno.
*/ */
struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int mr_access_flags, struct ib_udata *udata) u64 virt_addr, int mr_access_flags,
struct ib_udata *udata)
{ {
struct ipath_mr *mr; struct ipath_mr *mr;
struct ib_umem *umem;
struct ib_umem_chunk *chunk; struct ib_umem_chunk *chunk;
int n, m, i; int n, m, i;
struct ib_mr *ret; struct ib_mr *ret;
if (region->length == 0) { if (length == 0) {
ret = ERR_PTR(-EINVAL); ret = ERR_PTR(-EINVAL);
goto bail; goto bail;
} }
umem = ib_umem_get(pd->uobject->context, start, length, mr_access_flags);
if (IS_ERR(umem))
return (void *) umem;
n = 0; n = 0;
list_for_each_entry(chunk, &region->chunk_list, list) list_for_each_entry(chunk, &umem->chunk_list, list)
n += chunk->nents; n += chunk->nents;
mr = alloc_mr(n, &to_idev(pd->device)->lk_table); mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
if (!mr) { if (!mr) {
ret = ERR_PTR(-ENOMEM); ret = ERR_PTR(-ENOMEM);
ib_umem_release(umem);
goto bail; goto bail;
} }
mr->mr.pd = pd; mr->mr.pd = pd;
mr->mr.user_base = region->user_base; mr->mr.user_base = start;
mr->mr.iova = region->virt_base; mr->mr.iova = virt_addr;
mr->mr.length = region->length; mr->mr.length = length;
mr->mr.offset = region->offset; mr->mr.offset = umem->offset;
mr->mr.access_flags = mr_access_flags; mr->mr.access_flags = mr_access_flags;
mr->mr.max_segs = n; mr->mr.max_segs = n;
mr->umem = umem;
m = 0; m = 0;
n = 0; n = 0;
list_for_each_entry(chunk, &region->chunk_list, list) { list_for_each_entry(chunk, &umem->chunk_list, list) {
for (i = 0; i < chunk->nents; i++) { for (i = 0; i < chunk->nents; i++) {
void *vaddr; void *vaddr;
...@@ -219,7 +231,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -219,7 +231,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
goto bail; goto bail;
} }
mr->mr.map[m]->segs[n].vaddr = vaddr; mr->mr.map[m]->segs[n].vaddr = vaddr;
mr->mr.map[m]->segs[n].length = region->page_size; mr->mr.map[m]->segs[n].length = umem->page_size;
n++; n++;
if (n == IPATH_SEGSZ) { if (n == IPATH_SEGSZ) {
m++; m++;
...@@ -253,6 +265,10 @@ int ipath_dereg_mr(struct ib_mr *ibmr) ...@@ -253,6 +265,10 @@ int ipath_dereg_mr(struct ib_mr *ibmr)
i--; i--;
kfree(mr->mr.map[i]); kfree(mr->mr.map[i]);
} }
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr); kfree(mr);
return 0; return 0;
} }
......
...@@ -251,6 +251,7 @@ struct ipath_sge { ...@@ -251,6 +251,7 @@ struct ipath_sge {
/* Memory region */ /* Memory region */
struct ipath_mr { struct ipath_mr {
struct ib_mr ibmr; struct ib_mr ibmr;
struct ib_umem *umem;
struct ipath_mregion mr; /* must be last */ struct ipath_mregion mr; /* must be last */
}; };
...@@ -751,8 +752,8 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd, ...@@ -751,8 +752,8 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
struct ib_phys_buf *buffer_list, struct ib_phys_buf *buffer_list,
int num_phys_buf, int acc, u64 *iova_start); int num_phys_buf, int acc, u64 *iova_start);
struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int mr_access_flags, u64 virt_addr, int mr_access_flags,
struct ib_udata *udata); struct ib_udata *udata);
int ipath_dereg_mr(struct ib_mr *ibmr); int ipath_dereg_mr(struct ib_mr *ibmr);
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
*/ */
#include <rdma/ib_smi.h> #include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h> #include <rdma/ib_user_verbs.h>
#include <linux/mm.h> #include <linux/mm.h>
...@@ -908,6 +909,8 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc) ...@@ -908,6 +909,8 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
return ERR_PTR(err); return ERR_PTR(err);
} }
mr->umem = NULL;
return &mr->ibmr; return &mr->ibmr;
} }
...@@ -1003,11 +1006,13 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, ...@@ -1003,11 +1006,13 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
} }
kfree(page_list); kfree(page_list);
mr->umem = NULL;
return &mr->ibmr; return &mr->ibmr;
} }
static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int acc, struct ib_udata *udata) u64 virt, int acc, struct ib_udata *udata)
{ {
struct mthca_dev *dev = to_mdev(pd->device); struct mthca_dev *dev = to_mdev(pd->device);
struct ib_umem_chunk *chunk; struct ib_umem_chunk *chunk;
...@@ -1018,20 +1023,26 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -1018,20 +1023,26 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
int err = 0; int err = 0;
int write_mtt_size; int write_mtt_size;
shift = ffs(region->page_size) - 1;
mr = kmalloc(sizeof *mr, GFP_KERNEL); mr = kmalloc(sizeof *mr, GFP_KERNEL);
if (!mr) if (!mr)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err;
}
shift = ffs(mr->umem->page_size) - 1;
n = 0; n = 0;
list_for_each_entry(chunk, &region->chunk_list, list) list_for_each_entry(chunk, &mr->umem->chunk_list, list)
n += chunk->nents; n += chunk->nents;
mr->mtt = mthca_alloc_mtt(dev, n); mr->mtt = mthca_alloc_mtt(dev, n);
if (IS_ERR(mr->mtt)) { if (IS_ERR(mr->mtt)) {
err = PTR_ERR(mr->mtt); err = PTR_ERR(mr->mtt);
goto err; goto err_umem;
} }
pages = (u64 *) __get_free_page(GFP_KERNEL); pages = (u64 *) __get_free_page(GFP_KERNEL);
...@@ -1044,12 +1055,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -1044,12 +1055,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages)); write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
list_for_each_entry(chunk, &region->chunk_list, list) list_for_each_entry(chunk, &mr->umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) { for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift; len = sg_dma_len(&chunk->page_list[j]) >> shift;
for (k = 0; k < len; ++k) { for (k = 0; k < len; ++k) {
pages[i++] = sg_dma_address(&chunk->page_list[j]) + pages[i++] = sg_dma_address(&chunk->page_list[j]) +
region->page_size * k; mr->umem->page_size * k;
/* /*
* Be friendly to write_mtt and pass it chunks * Be friendly to write_mtt and pass it chunks
* of appropriate size. * of appropriate size.
...@@ -1071,8 +1082,8 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -1071,8 +1082,8 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
if (err) if (err)
goto err_mtt; goto err_mtt;
err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base, err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, virt, length,
region->length, convert_access(acc), mr); convert_access(acc), mr);
if (err) if (err)
goto err_mtt; goto err_mtt;
...@@ -1082,6 +1093,9 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -1082,6 +1093,9 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
err_mtt: err_mtt:
mthca_free_mtt(dev, mr->mtt); mthca_free_mtt(dev, mr->mtt);
err_umem:
ib_umem_release(mr->umem);
err: err:
kfree(mr); kfree(mr);
return ERR_PTR(err); return ERR_PTR(err);
...@@ -1090,8 +1104,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, ...@@ -1090,8 +1104,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
static int mthca_dereg_mr(struct ib_mr *mr) static int mthca_dereg_mr(struct ib_mr *mr)
{ {
struct mthca_mr *mmr = to_mmr(mr); struct mthca_mr *mmr = to_mmr(mr);
mthca_free_mr(to_mdev(mr->device), mmr); mthca_free_mr(to_mdev(mr->device), mmr);
if (mmr->umem)
ib_umem_release(mmr->umem);
kfree(mmr); kfree(mmr);
return 0; return 0;
} }
......
...@@ -73,6 +73,7 @@ struct mthca_mtt; ...@@ -73,6 +73,7 @@ struct mthca_mtt;
struct mthca_mr { struct mthca_mr {
struct ib_mr ibmr; struct ib_mr ibmr;
struct ib_umem *umem;
struct mthca_mtt *mtt; struct mthca_mtt *mtt;
}; };
......
/*
* Copyright (c) 2007 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef IB_UMEM_H
#define IB_UMEM_H
#include <linux/list.h>
#include <linux/scatterlist.h>
struct ib_ucontext;
struct ib_umem {
struct ib_ucontext *context;
size_t length;
int offset;
int page_size;
int writable;
struct list_head chunk_list;
};
struct ib_umem_chunk {
struct list_head list;
int nents;
int nmap;
struct scatterlist page_list[0];
};
#ifdef CONFIG_INFINIBAND_USER_MEM
struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
size_t size, int access);
void ib_umem_release(struct ib_umem *umem);
int ib_umem_page_count(struct ib_umem *umem);
#else /* CONFIG_INFINIBAND_USER_MEM */
#include <linux/err.h>
static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context,
unsigned long addr, size_t size,
int access) {
return ERR_PTR(-EINVAL);
}
static inline void ib_umem_release(struct ib_umem *umem) { }
static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; }
#endif /* CONFIG_INFINIBAND_USER_MEM */
#endif /* IB_UMEM_H */
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU * licenses. You may choose to be licensed under the terms of the GNU
...@@ -710,6 +710,7 @@ struct ib_ucontext { ...@@ -710,6 +710,7 @@ struct ib_ucontext {
struct list_head qp_list; struct list_head qp_list;
struct list_head srq_list; struct list_head srq_list;
struct list_head ah_list; struct list_head ah_list;
int closing;
}; };
struct ib_uobject { struct ib_uobject {
...@@ -723,23 +724,6 @@ struct ib_uobject { ...@@ -723,23 +724,6 @@ struct ib_uobject {
int live; int live;
}; };
struct ib_umem {
unsigned long user_base;
unsigned long virt_base;
size_t length;
int offset;
int page_size;
int writable;
struct list_head chunk_list;
};
struct ib_umem_chunk {
struct list_head list;
int nents;
int nmap;
struct scatterlist page_list[0];
};
struct ib_udata { struct ib_udata {
void __user *inbuf; void __user *inbuf;
void __user *outbuf; void __user *outbuf;
...@@ -752,11 +736,6 @@ struct ib_udata { ...@@ -752,11 +736,6 @@ struct ib_udata {
((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \ ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \
(void *) &((struct ib_umem_chunk *) 0)->page_list[0])) (void *) &((struct ib_umem_chunk *) 0)->page_list[0]))
struct ib_umem_object {
struct ib_uobject uobject;
struct ib_umem umem;
};
struct ib_pd { struct ib_pd {
struct ib_device *device; struct ib_device *device;
struct ib_uobject *uobject; struct ib_uobject *uobject;
...@@ -1003,7 +982,8 @@ struct ib_device { ...@@ -1003,7 +982,8 @@ struct ib_device {
int mr_access_flags, int mr_access_flags,
u64 *iova_start); u64 *iova_start);
struct ib_mr * (*reg_user_mr)(struct ib_pd *pd, struct ib_mr * (*reg_user_mr)(struct ib_pd *pd,
struct ib_umem *region, u64 start, u64 length,
u64 virt_addr,
int mr_access_flags, int mr_access_flags,
struct ib_udata *udata); struct ib_udata *udata);
int (*query_mr)(struct ib_mr *mr, int (*query_mr)(struct ib_mr *mr,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment