Commit a060b562 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Doug Ledford

IB/core: generic RDMA READ/WRITE API

This supports both manual mapping of lots of SGEs, as well as using MRs
from the QP's MR pool, for iWarp or other cases where it's more optimal.
For now, MRs are only used for iWARP transports.  The user of the RDMA-RW
API must allocate the QP MR pool as well as size the SQ accordingly.

Thanks to Steve Wise for testing, fixing and rewriting the iWarp support,
and to Sagi Grimberg for ideas, reviews and fixes.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent d4a85c30
...@@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o ...@@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
$(user_access-y) $(user_access-y)
ib_core-y := packer.o ud_header.o verbs.o cq.o sysfs.o \ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \ device.o fmr_pool.o cache.o netlink.o \
roce_gid_mgmt.o mr_pool.o roce_gid_mgmt.o mr_pool.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
......
This diff is collapsed.
...@@ -48,6 +48,7 @@ ...@@ -48,6 +48,7 @@
#include <rdma/ib_verbs.h> #include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h> #include <rdma/ib_cache.h>
#include <rdma/ib_addr.h> #include <rdma/ib_addr.h>
#include <rdma/rw.h>
#include "core_priv.h" #include "core_priv.h"
...@@ -751,6 +752,16 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, ...@@ -751,6 +752,16 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
{ {
struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device; struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device;
struct ib_qp *qp; struct ib_qp *qp;
int ret;
/*
* If the callers is using the RDMA API calculate the resources
* needed for the RDMA READ/WRITE operations.
*
* Note that these callers need to pass in a port number.
*/
if (qp_init_attr->cap.max_rdma_ctxs)
rdma_rw_init_qp(device, qp_init_attr);
qp = device->create_qp(pd, qp_init_attr, NULL); qp = device->create_qp(pd, qp_init_attr, NULL);
if (IS_ERR(qp)) if (IS_ERR(qp))
...@@ -764,6 +775,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, ...@@ -764,6 +775,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
atomic_set(&qp->usecnt, 0); atomic_set(&qp->usecnt, 0);
qp->mrs_used = 0; qp->mrs_used = 0;
spin_lock_init(&qp->mr_lock); spin_lock_init(&qp->mr_lock);
INIT_LIST_HEAD(&qp->rdma_mrs);
if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) if (qp_init_attr->qp_type == IB_QPT_XRC_TGT)
return ib_create_xrc_qp(qp, qp_init_attr); return ib_create_xrc_qp(qp, qp_init_attr);
...@@ -787,6 +799,16 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, ...@@ -787,6 +799,16 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
atomic_inc(&pd->usecnt); atomic_inc(&pd->usecnt);
atomic_inc(&qp_init_attr->send_cq->usecnt); atomic_inc(&qp_init_attr->send_cq->usecnt);
if (qp_init_attr->cap.max_rdma_ctxs) {
ret = rdma_rw_init_mrs(qp, qp_init_attr);
if (ret) {
pr_err("failed to init MR pool ret= %d\n", ret);
ib_destroy_qp(qp);
qp = ERR_PTR(ret);
}
}
return qp; return qp;
} }
EXPORT_SYMBOL(ib_create_qp); EXPORT_SYMBOL(ib_create_qp);
...@@ -1271,6 +1293,9 @@ int ib_destroy_qp(struct ib_qp *qp) ...@@ -1271,6 +1293,9 @@ int ib_destroy_qp(struct ib_qp *qp)
rcq = qp->recv_cq; rcq = qp->recv_cq;
srq = qp->srq; srq = qp->srq;
if (!qp->uobject)
rdma_rw_cleanup_mrs(qp);
ret = qp->device->destroy_qp(qp); ret = qp->device->destroy_qp(qp);
if (!ret) { if (!ret) {
if (pd) if (pd)
......
...@@ -931,6 +931,13 @@ struct ib_qp_cap { ...@@ -931,6 +931,13 @@ struct ib_qp_cap {
u32 max_send_sge; u32 max_send_sge;
u32 max_recv_sge; u32 max_recv_sge;
u32 max_inline_data; u32 max_inline_data;
/*
* Maximum number of rdma_rw_ctx structures in flight at a time.
* ib_create_qp() will calculate the right amount of neededed WRs
* and MRs based on this.
*/
u32 max_rdma_ctxs;
}; };
enum ib_sig_type { enum ib_sig_type {
...@@ -1002,7 +1009,11 @@ struct ib_qp_init_attr { ...@@ -1002,7 +1009,11 @@ struct ib_qp_init_attr {
enum ib_sig_type sq_sig_type; enum ib_sig_type sq_sig_type;
enum ib_qp_type qp_type; enum ib_qp_type qp_type;
enum ib_qp_create_flags create_flags; enum ib_qp_create_flags create_flags;
u8 port_num; /* special QP types only */
/*
* Only needed for special QP types, or when using the RW API.
*/
u8 port_num;
}; };
struct ib_qp_open_attr { struct ib_qp_open_attr {
...@@ -1423,6 +1434,7 @@ struct ib_qp { ...@@ -1423,6 +1434,7 @@ struct ib_qp {
struct ib_cq *recv_cq; struct ib_cq *recv_cq;
spinlock_t mr_lock; spinlock_t mr_lock;
int mrs_used; int mrs_used;
struct list_head rdma_mrs;
struct ib_srq *srq; struct ib_srq *srq;
struct ib_xrcd *xrcd; /* XRC TGT QPs only */ struct ib_xrcd *xrcd; /* XRC TGT QPs only */
struct list_head xrcd_list; struct list_head xrcd_list;
......
/*
* Copyright (c) 2016 HGST, a Western Digital Company.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef _RDMA_RW_H
#define _RDMA_RW_H
#include <linux/dma-mapping.h>
#include <linux/scatterlist.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include <rdma/mr_pool.h>
struct rdma_rw_ctx {
/* number of RDMA READ/WRITE WRs (not counting MR WRs) */
u32 nr_ops;
/* tag for the union below: */
u8 type;
union {
/* for mapping a single SGE: */
struct {
struct ib_sge sge;
struct ib_rdma_wr wr;
} single;
/* for mapping of multiple SGEs: */
struct {
struct ib_sge *sges;
struct ib_rdma_wr *wrs;
} map;
/* for registering multiple WRs: */
struct rdma_rw_reg_ctx {
struct ib_sge sge;
struct ib_rdma_wr wr;
struct ib_reg_wr reg_wr;
struct ib_send_wr inv_wr;
struct ib_mr *mr;
} *reg;
};
};
int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
struct scatterlist *sg, u32 sg_cnt, u32 sg_offset,
u64 remote_addr, u32 rkey, enum dma_data_direction dir);
void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
struct scatterlist *sg, u32 sg_cnt,
enum dma_data_direction dir);
struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr);
int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
struct ib_cqe *cqe, struct ib_send_wr *chain_wr);
void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr);
int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr);
void rdma_rw_cleanup_mrs(struct ib_qp *qp);
#endif /* _RDMA_RW_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment