Commit 9b17f588 authored by Ka-Cheong Poon's avatar Ka-Cheong Poon Committed by David S. Miller

net/rds: Use DMA memory pool allocation for rds_header

Currently, RDS calls ib_dma_alloc_coherent() to allocate a large piece
of contiguous DMA coherent memory to store struct rds_header for
sending/receiving packets.  The memory allocated is then partitioned
into struct rds_header.  This is not necessary and can be costly at
times when memory is fragmented.  Instead, RDS should use the DMA
memory pool interface to handle this.  The DMA addresses of the pre-
allocated headers are stored in an array.  At send/receive ring
initialization and refill time, this arrary is de-referenced to get
the DMA addresses.  This array is not accessed at send/receive packet
processing.
Suggested-by: default avatarHåkon Bugge <haakon.bugge@oracle.com>
Signed-off-by: default avatarKa-Cheong Poon <ka-cheong.poon@oracle.com>
Acked-by: default avatarSantosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent df1025fc
/* /*
* Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU * licenses. You may choose to be licensed under the terms of the GNU
...@@ -107,6 +107,8 @@ static void rds_ib_dev_free(struct work_struct *work) ...@@ -107,6 +107,8 @@ static void rds_ib_dev_free(struct work_struct *work)
rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool); rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool);
if (rds_ibdev->pd) if (rds_ibdev->pd)
ib_dealloc_pd(rds_ibdev->pd); ib_dealloc_pd(rds_ibdev->pd);
if (rds_ibdev->rid_hdrs_pool)
dma_pool_destroy(rds_ibdev->rid_hdrs_pool);
list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) {
list_del(&i_ipaddr->list); list_del(&i_ipaddr->list);
...@@ -179,6 +181,12 @@ static void rds_ib_add_one(struct ib_device *device) ...@@ -179,6 +181,12 @@ static void rds_ib_add_one(struct ib_device *device)
rds_ibdev->pd = NULL; rds_ibdev->pd = NULL;
goto put_dev; goto put_dev;
} }
rds_ibdev->rid_hdrs_pool = dma_pool_create(device->name,
device->dma_device,
sizeof(struct rds_header),
L1_CACHE_BYTES, 0);
if (!rds_ibdev->rid_hdrs_pool)
goto put_dev;
rds_ibdev->mr_1m_pool = rds_ibdev->mr_1m_pool =
rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL); rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL);
......
...@@ -165,8 +165,8 @@ struct rds_ib_connection { ...@@ -165,8 +165,8 @@ struct rds_ib_connection {
/* tx */ /* tx */
struct rds_ib_work_ring i_send_ring; struct rds_ib_work_ring i_send_ring;
struct rm_data_op *i_data_op; struct rm_data_op *i_data_op;
struct rds_header *i_send_hdrs; struct rds_header **i_send_hdrs;
dma_addr_t i_send_hdrs_dma; dma_addr_t *i_send_hdrs_dma;
struct rds_ib_send_work *i_sends; struct rds_ib_send_work *i_sends;
atomic_t i_signaled_sends; atomic_t i_signaled_sends;
...@@ -175,8 +175,8 @@ struct rds_ib_connection { ...@@ -175,8 +175,8 @@ struct rds_ib_connection {
struct rds_ib_work_ring i_recv_ring; struct rds_ib_work_ring i_recv_ring;
struct rds_ib_incoming *i_ibinc; struct rds_ib_incoming *i_ibinc;
u32 i_recv_data_rem; u32 i_recv_data_rem;
struct rds_header *i_recv_hdrs; struct rds_header **i_recv_hdrs;
dma_addr_t i_recv_hdrs_dma; dma_addr_t *i_recv_hdrs_dma;
struct rds_ib_recv_work *i_recvs; struct rds_ib_recv_work *i_recvs;
u64 i_ack_recv; /* last ACK received */ u64 i_ack_recv; /* last ACK received */
struct rds_ib_refill_cache i_cache_incs; struct rds_ib_refill_cache i_cache_incs;
...@@ -246,6 +246,7 @@ struct rds_ib_device { ...@@ -246,6 +246,7 @@ struct rds_ib_device {
struct list_head conn_list; struct list_head conn_list;
struct ib_device *dev; struct ib_device *dev;
struct ib_pd *pd; struct ib_pd *pd;
struct dma_pool *rid_hdrs_pool; /* RDS headers DMA pool */
bool use_fastreg; bool use_fastreg;
unsigned int max_mrs; unsigned int max_mrs;
...@@ -381,7 +382,11 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, ...@@ -381,7 +382,11 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6); int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6);
void rds_ib_cm_connect_complete(struct rds_connection *conn, void rds_ib_cm_connect_complete(struct rds_connection *conn,
struct rdma_cm_event *event); struct rdma_cm_event *event);
struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev,
struct dma_pool *pool,
dma_addr_t **dma_addrs, u32 num_hdrs);
void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs,
dma_addr_t *dma_addrs, u32 num_hdrs);
#define rds_ib_conn_error(conn, fmt...) \ #define rds_ib_conn_error(conn, fmt...) \
__rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt) __rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt)
......
/* /*
* Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU * licenses. You may choose to be licensed under the terms of the GNU
...@@ -439,6 +439,68 @@ static inline void ibdev_put_vector(struct rds_ib_device *rds_ibdev, int index) ...@@ -439,6 +439,68 @@ static inline void ibdev_put_vector(struct rds_ib_device *rds_ibdev, int index)
rds_ibdev->vector_load[index]--; rds_ibdev->vector_load[index]--;
} }
/* Allocate DMA coherent memory to be used to store struct rds_header for
* sending/receiving packets. The pointers to the DMA memory and the
* associated DMA addresses are stored in two arrays.
*
* @ibdev: the IB device
* @pool: the DMA memory pool
* @dma_addrs: pointer to the array for storing DMA addresses
* @num_hdrs: number of headers to allocate
*
* It returns the pointer to the array storing the DMA memory pointers. On
* error, NULL pointer is returned.
*/
struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev,
struct dma_pool *pool,
dma_addr_t **dma_addrs, u32 num_hdrs)
{
struct rds_header **hdrs;
dma_addr_t *hdr_daddrs;
u32 i;
hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL,
ibdev_to_node(ibdev));
if (!hdrs)
return NULL;
hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL,
ibdev_to_node(ibdev));
if (!hdr_daddrs) {
kvfree(hdrs);
return NULL;
}
for (i = 0; i < num_hdrs; i++) {
hdrs[i] = dma_pool_zalloc(pool, GFP_KERNEL, &hdr_daddrs[i]);
if (!hdrs[i]) {
rds_dma_hdrs_free(pool, hdrs, hdr_daddrs, i);
return NULL;
}
}
*dma_addrs = hdr_daddrs;
return hdrs;
}
/* Free the DMA memory used to store struct rds_header.
*
* @pool: the DMA memory pool
* @hdrs: pointer to the array storing DMA memory pointers
* @dma_addrs: pointer to the array storing DMA addresses
* @num_hdars: number of headers to free.
*/
void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs,
dma_addr_t *dma_addrs, u32 num_hdrs)
{
u32 i;
for (i = 0; i < num_hdrs; i++)
dma_pool_free(pool, hdrs[i], dma_addrs[i]);
kvfree(hdrs);
kvfree(dma_addrs);
}
/* /*
* This needs to be very careful to not leave IS_ERR pointers around for * This needs to be very careful to not leave IS_ERR pointers around for
* cleanup to trip over. * cleanup to trip over.
...@@ -451,6 +513,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) ...@@ -451,6 +513,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
struct ib_cq_init_attr cq_attr = {}; struct ib_cq_init_attr cq_attr = {};
struct rds_ib_device *rds_ibdev; struct rds_ib_device *rds_ibdev;
int ret, fr_queue_space; int ret, fr_queue_space;
struct dma_pool *pool;
/* /*
* It's normal to see a null device if an incoming connection races * It's normal to see a null device if an incoming connection races
...@@ -541,31 +604,28 @@ static int rds_ib_setup_qp(struct rds_connection *conn) ...@@ -541,31 +604,28 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
goto recv_cq_out; goto recv_cq_out;
} }
ic->i_send_hdrs = ib_dma_alloc_coherent(dev, pool = rds_ibdev->rid_hdrs_pool;
ic->i_send_ring.w_nr * ic->i_send_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_send_hdrs_dma,
sizeof(struct rds_header), ic->i_send_ring.w_nr);
&ic->i_send_hdrs_dma, GFP_KERNEL);
if (!ic->i_send_hdrs) { if (!ic->i_send_hdrs) {
ret = -ENOMEM; ret = -ENOMEM;
rdsdebug("ib_dma_alloc_coherent send failed\n"); rdsdebug("DMA send hdrs alloc failed\n");
goto qp_out; goto qp_out;
} }
ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, ic->i_recv_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_recv_hdrs_dma,
ic->i_recv_ring.w_nr * ic->i_recv_ring.w_nr);
sizeof(struct rds_header),
&ic->i_recv_hdrs_dma, GFP_KERNEL);
if (!ic->i_recv_hdrs) { if (!ic->i_recv_hdrs) {
ret = -ENOMEM; ret = -ENOMEM;
rdsdebug("ib_dma_alloc_coherent recv failed\n"); rdsdebug("DMA recv hdrs alloc failed\n");
goto send_hdrs_dma_out; goto send_hdrs_dma_out;
} }
ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), ic->i_ack = dma_pool_zalloc(pool, GFP_KERNEL,
&ic->i_ack_dma, GFP_KERNEL); &ic->i_ack_dma);
if (!ic->i_ack) { if (!ic->i_ack) {
ret = -ENOMEM; ret = -ENOMEM;
rdsdebug("ib_dma_alloc_coherent ack failed\n"); rdsdebug("DMA ack header alloc failed\n");
goto recv_hdrs_dma_out; goto recv_hdrs_dma_out;
} }
...@@ -596,17 +656,23 @@ static int rds_ib_setup_qp(struct rds_connection *conn) ...@@ -596,17 +656,23 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
sends_out: sends_out:
vfree(ic->i_sends); vfree(ic->i_sends);
ack_dma_out: ack_dma_out:
ib_dma_free_coherent(dev, sizeof(struct rds_header), dma_pool_free(pool, ic->i_ack, ic->i_ack_dma);
ic->i_ack, ic->i_ack_dma); ic->i_ack = NULL;
recv_hdrs_dma_out: recv_hdrs_dma_out:
ib_dma_free_coherent(dev, ic->i_recv_ring.w_nr * rds_dma_hdrs_free(pool, ic->i_recv_hdrs, ic->i_recv_hdrs_dma,
sizeof(struct rds_header), ic->i_recv_ring.w_nr);
ic->i_recv_hdrs, ic->i_recv_hdrs_dma); ic->i_recv_hdrs = NULL;
ic->i_recv_hdrs_dma = NULL;
send_hdrs_dma_out: send_hdrs_dma_out:
ib_dma_free_coherent(dev, ic->i_send_ring.w_nr * rds_dma_hdrs_free(pool, ic->i_send_hdrs, ic->i_send_hdrs_dma,
sizeof(struct rds_header), ic->i_send_ring.w_nr);
ic->i_send_hdrs, ic->i_send_hdrs_dma); ic->i_send_hdrs = NULL;
ic->i_send_hdrs_dma = NULL;
qp_out: qp_out:
rdma_destroy_qp(ic->i_cm_id); rdma_destroy_qp(ic->i_cm_id);
recv_cq_out: recv_cq_out:
...@@ -984,8 +1050,6 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) ...@@ -984,8 +1050,6 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
ic->i_cm_id ? ic->i_cm_id->qp : NULL); ic->i_cm_id ? ic->i_cm_id->qp : NULL);
if (ic->i_cm_id) { if (ic->i_cm_id) {
struct ib_device *dev = ic->i_cm_id->device;
rdsdebug("disconnecting cm %p\n", ic->i_cm_id); rdsdebug("disconnecting cm %p\n", ic->i_cm_id);
err = rdma_disconnect(ic->i_cm_id); err = rdma_disconnect(ic->i_cm_id);
if (err) { if (err) {
...@@ -1035,24 +1099,39 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) ...@@ -1035,24 +1099,39 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
ib_destroy_cq(ic->i_recv_cq); ib_destroy_cq(ic->i_recv_cq);
} }
/* then free the resources that ib callbacks use */ if (ic->rds_ibdev) {
if (ic->i_send_hdrs) struct dma_pool *pool;
ib_dma_free_coherent(dev,
ic->i_send_ring.w_nr * pool = ic->rds_ibdev->rid_hdrs_pool;
sizeof(struct rds_header),
ic->i_send_hdrs, /* then free the resources that ib callbacks use */
ic->i_send_hdrs_dma); if (ic->i_send_hdrs) {
rds_dma_hdrs_free(pool, ic->i_send_hdrs,
if (ic->i_recv_hdrs) ic->i_send_hdrs_dma,
ib_dma_free_coherent(dev, ic->i_send_ring.w_nr);
ic->i_recv_ring.w_nr * ic->i_send_hdrs = NULL;
sizeof(struct rds_header), ic->i_send_hdrs_dma = NULL;
ic->i_recv_hdrs, }
ic->i_recv_hdrs_dma);
if (ic->i_recv_hdrs) {
if (ic->i_ack) rds_dma_hdrs_free(pool, ic->i_recv_hdrs,
ib_dma_free_coherent(dev, sizeof(struct rds_header), ic->i_recv_hdrs_dma,
ic->i_ack, ic->i_ack_dma); ic->i_recv_ring.w_nr);
ic->i_recv_hdrs = NULL;
ic->i_recv_hdrs_dma = NULL;
}
if (ic->i_ack) {
dma_pool_free(pool, ic->i_ack, ic->i_ack_dma);
ic->i_ack = NULL;
}
} else {
WARN_ON(ic->i_send_hdrs);
WARN_ON(ic->i_send_hdrs_dma);
WARN_ON(ic->i_recv_hdrs);
WARN_ON(ic->i_recv_hdrs_dma);
WARN_ON(ic->i_ack);
}
if (ic->i_sends) if (ic->i_sends)
rds_ib_send_clear_ring(ic); rds_ib_send_clear_ring(ic);
...@@ -1071,9 +1150,6 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) ...@@ -1071,9 +1150,6 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
ic->i_pd = NULL; ic->i_pd = NULL;
ic->i_send_cq = NULL; ic->i_send_cq = NULL;
ic->i_recv_cq = NULL; ic->i_recv_cq = NULL;
ic->i_send_hdrs = NULL;
ic->i_recv_hdrs = NULL;
ic->i_ack = NULL;
} }
BUG_ON(ic->rds_ibdev); BUG_ON(ic->rds_ibdev);
......
/* /*
* Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU * licenses. You may choose to be licensed under the terms of the GNU
...@@ -61,7 +61,7 @@ void rds_ib_recv_init_ring(struct rds_ib_connection *ic) ...@@ -61,7 +61,7 @@ void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
recv->r_wr.num_sge = RDS_IB_RECV_SGE; recv->r_wr.num_sge = RDS_IB_RECV_SGE;
sge = &recv->r_sge[0]; sge = &recv->r_sge[0];
sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header)); sge->addr = ic->i_recv_hdrs_dma[i];
sge->length = sizeof(struct rds_header); sge->length = sizeof(struct rds_header);
sge->lkey = ic->i_pd->local_dma_lkey; sge->lkey = ic->i_pd->local_dma_lkey;
...@@ -343,7 +343,7 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn, ...@@ -343,7 +343,7 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,
WARN_ON(ret != 1); WARN_ON(ret != 1);
sge = &recv->r_sge[0]; sge = &recv->r_sge[0];
sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header); sge->addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs];
sge->length = sizeof(struct rds_header); sge->length = sizeof(struct rds_header);
sge = &recv->r_sge[1]; sge = &recv->r_sge[1];
...@@ -861,7 +861,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, ...@@ -861,7 +861,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
} }
data_len -= sizeof(struct rds_header); data_len -= sizeof(struct rds_header);
ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs]; ihdr = ic->i_recv_hdrs[recv - ic->i_recvs];
/* Validate the checksum. */ /* Validate the checksum. */
if (!rds_message_verify_checksum(ihdr)) { if (!rds_message_verify_checksum(ihdr)) {
......
/* /*
* Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU * licenses. You may choose to be licensed under the terms of the GNU
...@@ -201,7 +201,8 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic) ...@@ -201,7 +201,8 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic)
send->s_wr.ex.imm_data = 0; send->s_wr.ex.imm_data = 0;
sge = &send->s_sge[0]; sge = &send->s_sge[0];
sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header)); sge->addr = ic->i_send_hdrs_dma[i];
sge->length = sizeof(struct rds_header); sge->length = sizeof(struct rds_header);
sge->lkey = ic->i_pd->local_dma_lkey; sge->lkey = ic->i_pd->local_dma_lkey;
...@@ -631,11 +632,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, ...@@ -631,11 +632,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
send->s_queued = jiffies; send->s_queued = jiffies;
send->s_op = NULL; send->s_op = NULL;
send->s_sge[0].addr = ic->i_send_hdrs_dma send->s_sge[0].addr = ic->i_send_hdrs_dma[pos];
+ (pos * sizeof(struct rds_header));
send->s_sge[0].length = sizeof(struct rds_header); send->s_sge[0].length = sizeof(struct rds_header);
memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header)); memcpy(ic->i_send_hdrs[pos], &rm->m_inc.i_hdr,
sizeof(struct rds_header));
/* Set up the data, if present */ /* Set up the data, if present */
if (i < work_alloc if (i < work_alloc
...@@ -674,7 +677,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, ...@@ -674,7 +677,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
&send->s_wr, send->s_wr.num_sge, send->s_wr.next); &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
if (ic->i_flowctl && adv_credits) { if (ic->i_flowctl && adv_credits) {
struct rds_header *hdr = &ic->i_send_hdrs[pos]; struct rds_header *hdr = ic->i_send_hdrs[pos];
/* add credit and redo the header checksum */ /* add credit and redo the header checksum */
hdr->h_credit = adv_credits; hdr->h_credit = adv_credits;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment