Commit 32548870 authored by Wenpeng Liang's avatar Wenpeng Liang Committed by Jason Gunthorpe

RDMA/hns: Add support for XRC on HIP09

The HIP09 supports XRC transport service, it greatly saves the number of
QPs required to connect all processes in a large cluster.

Link: https://lore.kernel.org/r/1614826558-35423-1-git-send-email-liweihang@huawei.comSigned-off-by: default avatarWenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: default avatarWeihang Li <liweihang@huawei.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent c33d516a
......@@ -304,6 +304,9 @@ int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev)
{
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
hns_roce_cleanup_xrcd_table(hr_dev);
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
hns_roce_cleanup_srq_table(hr_dev);
hns_roce_cleanup_qp_table(hr_dev);
......
......@@ -137,6 +137,7 @@ enum {
SERV_TYPE_UC,
SERV_TYPE_RD,
SERV_TYPE_UD,
SERV_TYPE_XRC = 5,
};
enum hns_roce_qp_state {
......@@ -168,6 +169,8 @@ enum hns_roce_event {
HNS_ROCE_EVENT_TYPE_DB_OVERFLOW = 0x12,
HNS_ROCE_EVENT_TYPE_MB = 0x13,
HNS_ROCE_EVENT_TYPE_FLR = 0x15,
HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION = 0x16,
HNS_ROCE_EVENT_TYPE_INVALID_XRCETH = 0x17,
};
#define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12
......@@ -179,6 +182,7 @@ enum {
HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3),
HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4),
HNS_ROCE_CAP_FLAG_SRQ = BIT(5),
HNS_ROCE_CAP_FLAG_XRC = BIT(6),
HNS_ROCE_CAP_FLAG_MW = BIT(7),
HNS_ROCE_CAP_FLAG_FRMR = BIT(8),
HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9),
......@@ -244,6 +248,11 @@ struct hns_roce_pd {
unsigned long pdn;
};
struct hns_roce_xrcd {
struct ib_xrcd ibxrcd;
u32 xrcdn;
};
struct hns_roce_bitmap {
/* Bitmap Traversal last a bit which is 1 */
unsigned long last;
......@@ -467,6 +476,7 @@ struct hns_roce_srq {
u32 rsv_sge;
int wqe_shift;
u32 cqn;
u32 xrcdn;
void __iomem *db_reg_l;
atomic_t refcount;
......@@ -640,6 +650,8 @@ struct hns_roce_qp {
enum hns_roce_event event_type);
unsigned long qpn;
u32 xrcdn;
atomic_t refcount;
struct completion free;
......@@ -766,6 +778,8 @@ struct hns_roce_caps {
int reserved_uars;
int num_pds;
int reserved_pds;
u32 num_xrcds;
u32 reserved_xrcds;
u32 mtt_entry_sz;
u32 cqe_sz;
u32 page_size_cap;
......@@ -963,6 +977,7 @@ struct hns_roce_dev {
struct hns_roce_cmdq cmd;
struct hns_roce_bitmap pd_bitmap;
struct hns_roce_bitmap xrcd_bitmap;
struct hns_roce_uar_table uar_table;
struct hns_roce_mr_table mr_table;
struct hns_roce_cq_table cq_table;
......@@ -1004,6 +1019,11 @@ static inline struct hns_roce_pd *to_hr_pd(struct ib_pd *ibpd)
return container_of(ibpd, struct hns_roce_pd, ibpd);
}
static inline struct hns_roce_xrcd *to_hr_xrcd(struct ib_xrcd *ibxrcd)
{
return container_of(ibxrcd, struct hns_roce_xrcd, ibxrcd);
}
static inline struct hns_roce_ah *to_hr_ah(struct ib_ah *ibah)
{
return container_of(ibah, struct hns_roce_ah, ibah);
......@@ -1136,6 +1156,7 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_xrcd_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev);
......@@ -1143,6 +1164,7 @@ void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_xrcd_table(struct hns_roce_dev *hr_dev);
int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj);
void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj,
......@@ -1207,6 +1229,9 @@ int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
struct ib_udata *udata);
int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata);
int hns_roce_dealloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata);
struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
......
This diff is collapsed.
......@@ -74,6 +74,8 @@
#define HNS_ROCE_V2_MAX_SRQWQE_SEGS 0x1000000
#define HNS_ROCE_V2_MAX_IDX_SEGS 0x1000000
#define HNS_ROCE_V2_MAX_PD_NUM 0x1000000
#define HNS_ROCE_V2_MAX_XRCD_NUM 0x1000000
#define HNS_ROCE_V2_RSV_XRCD_NUM 0
#define HNS_ROCE_V2_MAX_QP_INIT_RDMA 128
#define HNS_ROCE_V2_MAX_QP_DEST_RDMA 128
#define HNS_ROCE_V2_MAX_SQ_DESC_SZ 64
......
......@@ -207,6 +207,9 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
props->max_fast_reg_page_list_len = HNS_ROCE_FRMR_MAX_PA;
}
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
props->device_cap_flags |= IB_DEVICE_XRC;
return 0;
}
......@@ -300,6 +303,7 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
return -EAGAIN;
resp.qp_tab_size = hr_dev->caps.num_qps;
resp.srq_tab_size = hr_dev->caps.num_srqs;
ret = hns_roce_uar_alloc(hr_dev, &context->uar);
if (ret)
......@@ -461,6 +465,13 @@ static const struct ib_device_ops hns_roce_dev_srq_ops = {
INIT_RDMA_OBJ_SIZE(ib_srq, hns_roce_srq, ibsrq),
};
static const struct ib_device_ops hns_roce_dev_xrcd_ops = {
.alloc_xrcd = hns_roce_alloc_xrcd,
.dealloc_xrcd = hns_roce_dealloc_xrcd,
INIT_RDMA_OBJ_SIZE(ib_xrcd, hns_roce_xrcd, ibxrcd),
};
static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
{
int ret;
......@@ -484,20 +495,20 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR)
ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops);
/* MW */
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW)
ib_set_device_ops(ib_dev, &hns_roce_dev_mw_ops);
/* FRMR */
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR)
ib_set_device_ops(ib_dev, &hns_roce_dev_frmr_ops);
/* SRQ */
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
ib_set_device_ops(ib_dev, &hns_roce_dev_srq_ops);
ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops);
}
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
ib_set_device_ops(ib_dev, &hns_roce_dev_xrcd_ops);
ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops);
ib_set_device_ops(ib_dev, &hns_roce_dev_ops);
for (i = 0; i < hr_dev->caps.num_ports; i++) {
......@@ -727,10 +738,19 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
goto err_uar_alloc_free;
}
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC) {
ret = hns_roce_init_xrcd_table(hr_dev);
if (ret) {
dev_err(dev, "failed to init xrcd table, ret = %d.\n",
ret);
goto err_pd_table_free;
}
}
ret = hns_roce_init_mr_table(hr_dev);
if (ret) {
dev_err(dev, "Failed to init memory region table.\n");
goto err_pd_table_free;
goto err_xrcd_table_free;
}
hns_roce_init_cq_table(hr_dev);
......@@ -759,6 +779,10 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
hns_roce_cleanup_cq_table(hr_dev);
hns_roce_cleanup_mr_table(hr_dev);
err_xrcd_table_free:
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
hns_roce_cleanup_xrcd_table(hr_dev);
err_pd_table_free:
hns_roce_cleanup_pd_table(hr_dev);
......
......@@ -137,3 +137,54 @@ void hns_roce_cleanup_uar_table(struct hns_roce_dev *hr_dev)
{
hns_roce_bitmap_cleanup(&hr_dev->uar_table.bitmap);
}
static int hns_roce_xrcd_alloc(struct hns_roce_dev *hr_dev, u32 *xrcdn)
{
return hns_roce_bitmap_alloc(&hr_dev->xrcd_bitmap,
(unsigned long *)xrcdn);
}
static void hns_roce_xrcd_free(struct hns_roce_dev *hr_dev,
u32 xrcdn)
{
hns_roce_bitmap_free(&hr_dev->xrcd_bitmap, xrcdn, BITMAP_NO_RR);
}
int hns_roce_init_xrcd_table(struct hns_roce_dev *hr_dev)
{
return hns_roce_bitmap_init(&hr_dev->xrcd_bitmap,
hr_dev->caps.num_xrcds,
hr_dev->caps.num_xrcds - 1,
hr_dev->caps.reserved_xrcds, 0);
}
void hns_roce_cleanup_xrcd_table(struct hns_roce_dev *hr_dev)
{
hns_roce_bitmap_cleanup(&hr_dev->xrcd_bitmap);
}
int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_xrcd->device);
struct hns_roce_xrcd *xrcd = to_hr_xrcd(ib_xrcd);
int ret;
if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC))
return -EINVAL;
ret = hns_roce_xrcd_alloc(hr_dev, &xrcd->xrcdn);
if (ret) {
dev_err(hr_dev->dev, "failed to alloc xrcdn, ret = %d.\n", ret);
return ret;
}
return 0;
}
int hns_roce_dealloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata)
{
hns_roce_xrcd_free(to_hr_dev(ib_xrcd->device),
to_hr_xrcd(ib_xrcd)->xrcdn);
return 0;
}
......@@ -98,7 +98,9 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
if (hr_dev->hw_rev != HNS_ROCE_HW_VER1 &&
(event_type == HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR ||
event_type == HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR ||
event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR)) {
event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR ||
event_type == HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION ||
event_type == HNS_ROCE_EVENT_TYPE_INVALID_XRCETH)) {
qp->state = IB_QPS_ERR;
if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG, &qp->flush_flag))
init_flush_work(hr_dev, qp);
......@@ -142,6 +144,8 @@ static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
event.event = IB_EVENT_QP_REQ_ERR;
break;
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
event.event = IB_EVENT_QP_ACCESS_ERR;
break;
default:
......@@ -366,7 +370,12 @@ void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
unsigned long flags;
list_del(&hr_qp->node);
if (hr_qp->ibqp.qp_type != IB_QPT_XRC_TGT)
list_del(&hr_qp->sq_node);
if (hr_qp->ibqp.qp_type != IB_QPT_XRC_INI &&
hr_qp->ibqp.qp_type != IB_QPT_XRC_TGT)
list_del(&hr_qp->rq_node);
xa_lock_irqsave(xa, flags);
......@@ -1105,11 +1114,16 @@ static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type,
bool is_user)
{
switch (type) {
case IB_QPT_XRC_INI:
case IB_QPT_XRC_TGT:
if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC))
goto out;
break;
case IB_QPT_UD:
if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 &&
is_user)
goto out;
fallthrough;
break;
case IB_QPT_RC:
case IB_QPT_GSI:
break;
......@@ -1129,8 +1143,8 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
struct ib_device *ibdev = &hr_dev->ib_dev;
struct ib_device *ibdev = pd ? pd->device : init_attr->xrcd->device;
struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
struct hns_roce_qp *hr_qp;
int ret;
......@@ -1142,6 +1156,15 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
if (!hr_qp)
return ERR_PTR(-ENOMEM);
if (init_attr->qp_type == IB_QPT_XRC_INI)
init_attr->recv_cq = NULL;
if (init_attr->qp_type == IB_QPT_XRC_TGT) {
hr_qp->xrcdn = to_hr_xrcd(init_attr->xrcd)->xrcdn;
init_attr->recv_cq = NULL;
init_attr->send_cq = NULL;
}
if (init_attr->qp_type == IB_QPT_GSI) {
hr_qp->port = init_attr->port_num - 1;
hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
......@@ -1161,20 +1184,20 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
int to_hr_qp_type(int qp_type)
{
int transport_type;
if (qp_type == IB_QPT_RC)
transport_type = SERV_TYPE_RC;
else if (qp_type == IB_QPT_UC)
transport_type = SERV_TYPE_UC;
else if (qp_type == IB_QPT_UD)
transport_type = SERV_TYPE_UD;
else if (qp_type == IB_QPT_GSI)
transport_type = SERV_TYPE_UD;
else
transport_type = -1;
return transport_type;
switch (qp_type) {
case IB_QPT_RC:
return SERV_TYPE_RC;
case IB_QPT_UC:
return SERV_TYPE_UC;
case IB_QPT_UD:
case IB_QPT_GSI:
return SERV_TYPE_UD;
case IB_QPT_XRC_INI:
case IB_QPT_XRC_TGT:
return SERV_TYPE_XRC;
default:
return -1;
}
}
static int check_mtu_validate(struct hns_roce_dev *hr_dev,
......
......@@ -314,6 +314,9 @@ static void set_srq_ext_param(struct hns_roce_srq *srq,
{
srq->cqn = ib_srq_has_cq(init_attr->srq_type) ?
to_hr_cq(init_attr->ext.cq)->cqn : 0;
srq->xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ?
to_hr_xrcd(init_attr->ext.xrc.xrcd)->xrcdn : 0;
}
static int set_srq_param(struct hns_roce_srq *srq,
......
......@@ -86,6 +86,8 @@ struct hns_roce_ib_create_qp_resp {
struct hns_roce_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
__u32 cqe_size;
__u32 srq_tab_size;
__u32 reserved;
};
struct hns_roce_ib_alloc_pd_resp {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment