Commit ae6e843f authored by Bob Pearson's avatar Bob Pearson Committed by Jason Gunthorpe

RDMA/rxe: Add memory barriers to kernel queues

Earlier patches added memory barriers to protect user space to kernel
space communications. The user space queues were previously shown to have
occasional memory synchonization errors which were removed by adding
smp_load_acquire, smp_store_release barriers.  This patch extends that to
the case where queues are used between kernel space threads.

This patch also extends the queue types to include kernel ULP queues which
access the other end of the queues in kernel verbs calls like poll_cq and
post_send/recv.

Link: https://lore.kernel.org/r/20210914164206.19768-2-rpearsonhpe@gmail.comSigned-off-by: default avatarBob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 6bda3914
......@@ -142,10 +142,7 @@ static inline enum comp_state get_wqe(struct rxe_qp *qp,
/* we come here whether or not we found a response packet to see if
* there are any posted WQEs
*/
if (qp->is_user)
wqe = queue_head(qp->sq.queue, QUEUE_TYPE_FROM_USER);
else
wqe = queue_head(qp->sq.queue, QUEUE_TYPE_KERNEL);
wqe = queue_head(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT);
*wqe_p = wqe;
/* no WQE or requester has not started it yet */
......@@ -432,10 +429,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
if (post)
make_send_cqe(qp, wqe, &cqe);
if (qp->is_user)
advance_consumer(qp->sq.queue, QUEUE_TYPE_FROM_USER);
else
advance_consumer(qp->sq.queue, QUEUE_TYPE_KERNEL);
queue_advance_consumer(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT);
if (post)
rxe_cq_post(qp->scq, &cqe, 0);
......@@ -539,7 +533,7 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify)
wqe->status = IB_WC_WR_FLUSH_ERR;
do_complete(qp, wqe);
} else {
advance_consumer(q, q->type);
queue_advance_consumer(q, q->type);
}
}
}
......
......@@ -25,11 +25,7 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
}
if (cq) {
if (cq->is_user)
count = queue_count(cq->queue, QUEUE_TYPE_TO_USER);
else
count = queue_count(cq->queue, QUEUE_TYPE_KERNEL);
count = queue_count(cq->queue, QUEUE_TYPE_TO_CLIENT);
if (cqe < count) {
pr_warn("cqe(%d) < current # elements in queue (%d)",
cqe, count);
......@@ -65,7 +61,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
int err;
enum queue_type type;
type = uresp ? QUEUE_TYPE_TO_USER : QUEUE_TYPE_KERNEL;
type = QUEUE_TYPE_TO_CLIENT;
cq->queue = rxe_queue_init(rxe, &cqe,
sizeof(struct rxe_cqe), type);
if (!cq->queue) {
......@@ -117,11 +113,7 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
spin_lock_irqsave(&cq->cq_lock, flags);
if (cq->is_user)
full = queue_full(cq->queue, QUEUE_TYPE_TO_USER);
else
full = queue_full(cq->queue, QUEUE_TYPE_KERNEL);
full = queue_full(cq->queue, QUEUE_TYPE_TO_CLIENT);
if (unlikely(full)) {
spin_unlock_irqrestore(&cq->cq_lock, flags);
if (cq->ibcq.event_handler) {
......@@ -134,17 +126,10 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
return -EBUSY;
}
if (cq->is_user)
addr = producer_addr(cq->queue, QUEUE_TYPE_TO_USER);
else
addr = producer_addr(cq->queue, QUEUE_TYPE_KERNEL);
addr = queue_producer_addr(cq->queue, QUEUE_TYPE_TO_CLIENT);
memcpy(addr, cqe, sizeof(*cqe));
if (cq->is_user)
advance_producer(cq->queue, QUEUE_TYPE_TO_USER);
else
advance_producer(cq->queue, QUEUE_TYPE_KERNEL);
queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT);
spin_unlock_irqrestore(&cq->cq_lock, flags);
......
......@@ -229,7 +229,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
qp->sq.max_inline = init->cap.max_inline_data = wqe_size;
wqe_size += sizeof(struct rxe_send_wqe);
type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL;
type = QUEUE_TYPE_FROM_CLIENT;
qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr,
wqe_size, type);
if (!qp->sq.queue)
......@@ -246,12 +246,8 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
return err;
}
if (qp->is_user)
qp->req.wqe_index = producer_index(qp->sq.queue,
QUEUE_TYPE_FROM_USER);
else
qp->req.wqe_index = producer_index(qp->sq.queue,
QUEUE_TYPE_KERNEL);
qp->req.wqe_index = queue_get_producer(qp->sq.queue,
QUEUE_TYPE_FROM_CLIENT);
qp->req.state = QP_STATE_RESET;
qp->req.opcode = -1;
......@@ -291,7 +287,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n",
qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size);
type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL;
type = QUEUE_TYPE_FROM_CLIENT;
qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr,
wqe_size, type);
if (!qp->rq.queue)
......
......@@ -111,17 +111,33 @@ struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem,
static int resize_finish(struct rxe_queue *q, struct rxe_queue *new_q,
unsigned int num_elem)
{
if (!queue_empty(q, q->type) && (num_elem < queue_count(q, q->type)))
enum queue_type type = q->type;
u32 prod;
u32 cons;
if (!queue_empty(q, q->type) && (num_elem < queue_count(q, type)))
return -EINVAL;
while (!queue_empty(q, q->type)) {
memcpy(producer_addr(new_q, new_q->type),
consumer_addr(q, q->type),
new_q->elem_size);
advance_producer(new_q, new_q->type);
advance_consumer(q, q->type);
prod = queue_get_producer(new_q, type);
cons = queue_get_consumer(q, type);
while (!queue_empty(q, type)) {
memcpy(queue_addr_from_index(new_q, prod),
queue_addr_from_index(q, cons), new_q->elem_size);
prod = queue_next_index(new_q, prod);
cons = queue_next_index(q, cons);
}
new_q->buf->producer_index = prod;
q->buf->consumer_index = cons;
/* update private index copies */
if (type == QUEUE_TYPE_TO_CLIENT)
new_q->index = new_q->buf->producer_index;
else
q->index = q->buf->consumer_index;
/* exchange rxe_queue headers */
swap(*q, *new_q);
return 0;
......
This diff is collapsed.
......@@ -49,21 +49,16 @@ static void req_retry(struct rxe_qp *qp)
unsigned int cons;
unsigned int prod;
if (qp->is_user) {
cons = consumer_index(q, QUEUE_TYPE_FROM_USER);
prod = producer_index(q, QUEUE_TYPE_FROM_USER);
} else {
cons = consumer_index(q, QUEUE_TYPE_KERNEL);
prod = producer_index(q, QUEUE_TYPE_KERNEL);
}
cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT);
prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT);
qp->req.wqe_index = cons;
qp->req.psn = qp->comp.psn;
qp->req.opcode = -1;
for (wqe_index = cons; wqe_index != prod;
wqe_index = next_index(q, wqe_index)) {
wqe = addr_from_index(qp->sq.queue, wqe_index);
wqe_index = queue_next_index(q, wqe_index)) {
wqe = queue_addr_from_index(qp->sq.queue, wqe_index);
mask = wr_opcode_mask(wqe->wr.opcode, qp);
if (wqe->state == wqe_state_posted)
......@@ -121,15 +116,9 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
unsigned int cons;
unsigned int prod;
if (qp->is_user) {
wqe = queue_head(q, QUEUE_TYPE_FROM_USER);
cons = consumer_index(q, QUEUE_TYPE_FROM_USER);
prod = producer_index(q, QUEUE_TYPE_FROM_USER);
} else {
wqe = queue_head(q, QUEUE_TYPE_KERNEL);
cons = consumer_index(q, QUEUE_TYPE_KERNEL);
prod = producer_index(q, QUEUE_TYPE_KERNEL);
}
wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT);
cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT);
prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT);
if (unlikely(qp->req.state == QP_STATE_DRAIN)) {
/* check to see if we are drained;
......@@ -170,7 +159,7 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
if (index == prod)
return NULL;
wqe = addr_from_index(q, index);
wqe = queue_addr_from_index(q, index);
if (unlikely((qp->req.state == QP_STATE_DRAIN ||
qp->req.state == QP_STATE_DRAINED) &&
......@@ -560,7 +549,8 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
qp->req.opcode = pkt->opcode;
if (pkt->mask & RXE_END_MASK)
qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index);
qp->req.wqe_index = queue_next_index(qp->sq.queue,
qp->req.wqe_index);
qp->need_req_skb = 0;
......@@ -614,7 +604,7 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index);
qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index);
if ((wqe->wr.send_flags & IB_SEND_SIGNALED) ||
qp->sq_sig_type == IB_SIGNAL_ALL_WR)
......@@ -645,7 +635,8 @@ int rxe_requester(void *arg)
goto exit;
if (unlikely(qp->req.state == QP_STATE_RESET)) {
qp->req.wqe_index = consumer_index(q, q->type);
qp->req.wqe_index = queue_get_consumer(q,
QUEUE_TYPE_FROM_CLIENT);
qp->req.opcode = -1;
qp->req.need_rd_atomic = 0;
qp->req.wait_psn = 0;
......@@ -711,7 +702,7 @@ int rxe_requester(void *arg)
wqe->last_psn = qp->req.psn;
qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
qp->req.opcode = IB_OPCODE_UD_SEND_ONLY;
qp->req.wqe_index = next_index(qp->sq.queue,
qp->req.wqe_index = queue_next_index(qp->sq.queue,
qp->req.wqe_index);
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
......
......@@ -303,10 +303,7 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp)
spin_lock_bh(&srq->rq.consumer_lock);
if (qp->is_user)
wqe = queue_head(q, QUEUE_TYPE_FROM_USER);
else
wqe = queue_head(q, QUEUE_TYPE_KERNEL);
wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT);
if (!wqe) {
spin_unlock_bh(&srq->rq.consumer_lock);
return RESPST_ERR_RNR;
......@@ -322,13 +319,8 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp)
memcpy(&qp->resp.srq_wqe, wqe, size);
qp->resp.wqe = &qp->resp.srq_wqe.wqe;
if (qp->is_user) {
advance_consumer(q, QUEUE_TYPE_FROM_USER);
count = queue_count(q, QUEUE_TYPE_FROM_USER);
} else {
advance_consumer(q, QUEUE_TYPE_KERNEL);
count = queue_count(q, QUEUE_TYPE_KERNEL);
}
queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT);
count = queue_count(q, QUEUE_TYPE_FROM_CLIENT);
if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) {
srq->limit = 0;
......@@ -357,12 +349,8 @@ static enum resp_states check_resource(struct rxe_qp *qp,
qp->resp.status = IB_WC_WR_FLUSH_ERR;
return RESPST_COMPLETE;
} else if (!srq) {
if (qp->is_user)
qp->resp.wqe = queue_head(qp->rq.queue,
QUEUE_TYPE_FROM_USER);
else
qp->resp.wqe = queue_head(qp->rq.queue,
QUEUE_TYPE_KERNEL);
qp->resp.wqe = queue_head(qp->rq.queue,
QUEUE_TYPE_FROM_CLIENT);
if (qp->resp.wqe) {
qp->resp.status = IB_WC_WR_FLUSH_ERR;
return RESPST_COMPLETE;
......@@ -389,12 +377,8 @@ static enum resp_states check_resource(struct rxe_qp *qp,
if (srq)
return get_srq_wqe(qp);
if (qp->is_user)
qp->resp.wqe = queue_head(qp->rq.queue,
QUEUE_TYPE_FROM_USER);
else
qp->resp.wqe = queue_head(qp->rq.queue,
QUEUE_TYPE_KERNEL);
qp->resp.wqe = queue_head(qp->rq.queue,
QUEUE_TYPE_FROM_CLIENT);
return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR;
}
......@@ -936,12 +920,8 @@ static enum resp_states do_complete(struct rxe_qp *qp,
}
/* have copy for srq and reference for !srq */
if (!qp->srq) {
if (qp->is_user)
advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_USER);
else
advance_consumer(qp->rq.queue, QUEUE_TYPE_KERNEL);
}
if (!qp->srq)
queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT);
qp->resp.wqe = NULL;
......@@ -1213,7 +1193,7 @@ static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
return;
while (!qp->srq && q && queue_head(q, q->type))
advance_consumer(q, q->type);
queue_advance_consumer(q, q->type);
}
int rxe_responder(void *arg)
......
......@@ -93,7 +93,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
spin_lock_init(&srq->rq.producer_lock);
spin_lock_init(&srq->rq.consumer_lock);
type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL;
type = QUEUE_TYPE_FROM_CLIENT;
q = rxe_queue_init(rxe, &srq->rq.max_wr,
srq_wqe_size, type);
if (!q) {
......
......@@ -215,11 +215,7 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
int num_sge = ibwr->num_sge;
int full;
if (rq->is_user)
full = queue_full(rq->queue, QUEUE_TYPE_FROM_USER);
else
full = queue_full(rq->queue, QUEUE_TYPE_KERNEL);
full = queue_full(rq->queue, QUEUE_TYPE_TO_DRIVER);
if (unlikely(full)) {
err = -ENOMEM;
goto err1;
......@@ -234,11 +230,7 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
for (i = 0; i < num_sge; i++)
length += ibwr->sg_list[i].length;
if (rq->is_user)
recv_wqe = producer_addr(rq->queue, QUEUE_TYPE_FROM_USER);
else
recv_wqe = producer_addr(rq->queue, QUEUE_TYPE_KERNEL);
recv_wqe = queue_producer_addr(rq->queue, QUEUE_TYPE_TO_DRIVER);
recv_wqe->wr_id = ibwr->wr_id;
recv_wqe->num_sge = num_sge;
......@@ -251,10 +243,7 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
recv_wqe->dma.cur_sge = 0;
recv_wqe->dma.sge_offset = 0;
if (rq->is_user)
advance_producer(rq->queue, QUEUE_TYPE_FROM_USER);
else
advance_producer(rq->queue, QUEUE_TYPE_KERNEL);
queue_advance_producer(rq->queue, QUEUE_TYPE_TO_DRIVER);
return 0;
......@@ -630,27 +619,17 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
spin_lock_irqsave(&qp->sq.sq_lock, flags);
if (qp->is_user)
full = queue_full(sq->queue, QUEUE_TYPE_FROM_USER);
else
full = queue_full(sq->queue, QUEUE_TYPE_KERNEL);
full = queue_full(sq->queue, QUEUE_TYPE_TO_DRIVER);
if (unlikely(full)) {
spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
return -ENOMEM;
}
if (qp->is_user)
send_wqe = producer_addr(sq->queue, QUEUE_TYPE_FROM_USER);
else
send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL);
send_wqe = queue_producer_addr(sq->queue, QUEUE_TYPE_TO_DRIVER);
init_send_wqe(qp, ibwr, mask, length, send_wqe);
if (qp->is_user)
advance_producer(sq->queue, QUEUE_TYPE_FROM_USER);
else
advance_producer(sq->queue, QUEUE_TYPE_KERNEL);
queue_advance_producer(sq->queue, QUEUE_TYPE_TO_DRIVER);
spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
......@@ -842,18 +821,12 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
spin_lock_irqsave(&cq->cq_lock, flags);
for (i = 0; i < num_entries; i++) {
if (cq->is_user)
cqe = queue_head(cq->queue, QUEUE_TYPE_TO_USER);
else
cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL);
cqe = queue_head(cq->queue, QUEUE_TYPE_FROM_DRIVER);
if (!cqe)
break;
memcpy(wc++, &cqe->ibwc, sizeof(*wc));
if (cq->is_user)
advance_consumer(cq->queue, QUEUE_TYPE_TO_USER);
else
advance_consumer(cq->queue, QUEUE_TYPE_KERNEL);
queue_advance_consumer(cq->queue, QUEUE_TYPE_FROM_DRIVER);
}
spin_unlock_irqrestore(&cq->cq_lock, flags);
......@@ -865,10 +838,7 @@ static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt)
struct rxe_cq *cq = to_rcq(ibcq);
int count;
if (cq->is_user)
count = queue_count(cq->queue, QUEUE_TYPE_TO_USER);
else
count = queue_count(cq->queue, QUEUE_TYPE_KERNEL);
count = queue_count(cq->queue, QUEUE_TYPE_FROM_DRIVER);
return (count > wc_cnt) ? wc_cnt : count;
}
......@@ -884,10 +854,7 @@ static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
if (cq->notify != IB_CQ_NEXT_COMP)
cq->notify = flags & IB_CQ_SOLICITED_MASK;
if (cq->is_user)
empty = queue_empty(cq->queue, QUEUE_TYPE_TO_USER);
else
empty = queue_empty(cq->queue, QUEUE_TYPE_KERNEL);
empty = queue_empty(cq->queue, QUEUE_TYPE_FROM_DRIVER);
if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !empty)
ret = 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment