Commit 24b11923 authored by Kaike Wan's avatar Kaike Wan Committed by Doug Ledford

IB/hfi1: Integrate TID RDMA READ protocol into RC protocol

This patch integrates the TID RDMA READ protocol into the IB RC protocol.
This protocol is an end-to-end protocol between the hfi1 drivers on two
OPA nodes that converts a qualified RDMA READ request into a TID RDMA
READ request to avoid data copying on the requester side. The following
codes are added in this patch:
- Send the TID RDMA READ request;
- Complete the TID RDMA READ send request;
- Send the TID RDMA READ response;
- Complete the TID RDMA READ request;
Reviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarKaike Wan <kaike.wan@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 039cd3da
...@@ -761,6 +761,7 @@ void quiesce_qp(struct rvt_qp *qp) ...@@ -761,6 +761,7 @@ void quiesce_qp(struct rvt_qp *qp)
void notify_qp_reset(struct rvt_qp *qp) void notify_qp_reset(struct rvt_qp *qp)
{ {
hfi1_qp_kern_exp_rcv_clear_all(qp);
qp->r_adefered = 0; qp->r_adefered = 0;
clear_ahg(qp); clear_ahg(qp);
......
...@@ -112,12 +112,14 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, ...@@ -112,12 +112,14 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
{ {
struct rvt_ack_entry *e; struct rvt_ack_entry *e;
u32 hwords; u32 hwords;
u32 len; u32 len = 0;
u32 bth0, bth2; u32 bth0 = 0, bth2 = 0;
u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT); u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT);
int middle = 0; int middle = 0;
u32 pmtu = qp->pmtu; u32 pmtu = qp->pmtu;
struct hfi1_qp_priv *priv = qp->priv; struct hfi1_qp_priv *priv = qp->priv;
bool last_pkt;
u32 delta;
lockdep_assert_held(&qp->s_lock); lockdep_assert_held(&qp->s_lock);
/* Don't send an ACK if we aren't supposed to. */ /* Don't send an ACK if we aren't supposed to. */
...@@ -190,6 +192,26 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, ...@@ -190,6 +192,26 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
hwords++; hwords++;
qp->s_ack_rdma_psn = e->psn; qp->s_ack_rdma_psn = e->psn;
bth2 = mask_psn(qp->s_ack_rdma_psn++); bth2 = mask_psn(qp->s_ack_rdma_psn++);
} else if (e->opcode == TID_OP(READ_REQ)) {
/*
* If a TID RDMA read response is being resent and
* we haven't seen the duplicate request yet,
* then stop sending the remaining responses the
* responder has seen until the requester re-sends it.
*/
len = e->rdma_sge.sge_length;
if (len && !e->rdma_sge.mr) {
qp->s_tail_ack_queue = qp->r_head_ack_queue;
goto bail;
}
/* Copy SGE state in case we need to resend */
ps->s_txreq->mr = e->rdma_sge.mr;
if (ps->s_txreq->mr)
rvt_get_mr(ps->s_txreq->mr);
qp->s_ack_rdma_sge.sge = e->rdma_sge;
qp->s_ack_rdma_sge.num_sge = 1;
qp->s_ack_state = TID_OP(READ_RESP);
goto read_resp;
} else { } else {
/* COMPARE_SWAP or FETCH_ADD */ /* COMPARE_SWAP or FETCH_ADD */
ps->s_txreq->ss = NULL; ps->s_txreq->ss = NULL;
...@@ -227,6 +249,28 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, ...@@ -227,6 +249,28 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
bth2 = mask_psn(qp->s_ack_rdma_psn++); bth2 = mask_psn(qp->s_ack_rdma_psn++);
break; break;
case TID_OP(READ_RESP):
read_resp:
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
ps->s_txreq->ss = &qp->s_ack_rdma_sge;
delta = hfi1_build_tid_rdma_read_resp(qp, e, ohdr, &bth0,
&bth1, &bth2, &len,
&last_pkt);
if (delta == 0)
goto error_qp;
hwords += delta;
if (last_pkt) {
e->sent = 1;
/*
* Increment qp->s_tail_ack_queue through s_ack_state
* transition.
*/
qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
}
break;
case TID_OP(READ_REQ):
goto bail;
default: default:
normal: normal:
/* /*
...@@ -256,7 +300,14 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, ...@@ -256,7 +300,14 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
ps->s_txreq->hdr_dwords = hwords; ps->s_txreq->hdr_dwords = hwords;
hfi1_make_ruc_header(qp, ohdr, bth0, bth1, bth2, middle, ps); hfi1_make_ruc_header(qp, ohdr, bth0, bth1, bth2, middle, ps);
return 1; return 1;
error_qp:
spin_unlock_irqrestore(&qp->s_lock, ps->flags);
spin_lock_irqsave(&qp->r_lock, ps->flags);
spin_lock(&qp->s_lock);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
spin_unlock(&qp->s_lock);
spin_unlock_irqrestore(&qp->r_lock, ps->flags);
spin_lock_irqsave(&qp->s_lock, ps->flags);
bail: bail:
qp->s_ack_state = OP(ACKNOWLEDGE); qp->s_ack_state = OP(ACKNOWLEDGE);
/* /*
...@@ -283,16 +334,20 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) ...@@ -283,16 +334,20 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
struct hfi1_qp_priv *priv = qp->priv; struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
struct ib_other_headers *ohdr; struct ib_other_headers *ohdr;
struct rvt_sge_state *ss; struct rvt_sge_state *ss = NULL;
struct rvt_swqe *wqe; struct rvt_swqe *wqe;
u32 hwords; struct hfi1_swqe_priv *wpriv;
u32 len; struct tid_rdma_request *req = NULL;
u32 bth0 = 0, bth2; /* header size in 32-bit words LRH+BTH = (8+12)/4. */
u32 hwords = 5;
u32 len = 0;
u32 bth0 = 0, bth2 = 0;
u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT); u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT);
u32 pmtu = qp->pmtu; u32 pmtu = qp->pmtu;
char newreq; char newreq;
int middle = 0; int middle = 0;
int delta; int delta;
struct tid_rdma_flow *flow = NULL;
lockdep_assert_held(&qp->s_lock); lockdep_assert_held(&qp->s_lock);
ps->s_txreq = get_txreq(ps->dev, qp); ps->s_txreq = get_txreq(ps->dev, qp);
...@@ -334,7 +389,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) ...@@ -334,7 +389,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
} }
clear_ahg(qp); clear_ahg(qp);
wqe = rvt_get_swqe_ptr(qp, qp->s_last); wqe = rvt_get_swqe_ptr(qp, qp->s_last);
rvt_send_complete(qp, wqe, qp->s_last != qp->s_acked ? hfi1_trdma_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR); IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
/* will get called again */ /* will get called again */
goto done_free_tx; goto done_free_tx;
...@@ -354,6 +409,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) ...@@ -354,6 +409,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
/* Send a request. */ /* Send a request. */
wqe = rvt_get_swqe_ptr(qp, qp->s_cur); wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
check_s_state:
switch (qp->s_state) { switch (qp->s_state) {
default: default:
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK))
...@@ -375,9 +431,13 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) ...@@ -375,9 +431,13 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
/* /*
* If a fence is requested, wait for previous * If a fence is requested, wait for previous
* RDMA read and atomic operations to finish. * RDMA read and atomic operations to finish.
* However, there is no need to guard against
* TID RDMA READ after TID RDMA READ.
*/ */
if ((wqe->wr.send_flags & IB_SEND_FENCE) && if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
qp->s_num_rd_atomic) { qp->s_num_rd_atomic &&
(wqe->wr.opcode != IB_WR_TID_RDMA_READ ||
priv->pending_tid_r_segs < qp->s_num_rd_atomic)) {
qp->s_flags |= RVT_S_WAIT_FENCE; qp->s_flags |= RVT_S_WAIT_FENCE;
goto bail; goto bail;
} }
...@@ -526,6 +586,75 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) ...@@ -526,6 +586,75 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_cur = 0; qp->s_cur = 0;
break; break;
case IB_WR_TID_RDMA_READ:
wpriv = wqe->priv;
req = wqe_to_tid_req(wqe);
delta = cmp_psn(qp->s_psn, wqe->psn);
/*
* Don't allow more operations to be started
* than the QP limits allow. We could get here under
* three conditions; (1) It's a new request; (2) We are
* sending the second or later segment of a request,
* but the qp->s_state is set to OP(RDMA_READ_REQUEST)
* when the last segment of a previous request is
* received just before this; (3) We are re-sending a
* request.
*/
if (qp->s_num_rd_atomic >= qp->s_max_rd_atomic) {
qp->s_flags |= RVT_S_WAIT_RDMAR;
goto bail;
}
if (newreq) {
struct tid_rdma_flow *flow =
&req->flows[req->setup_head];
/*
* Set up s_sge as it is needed for TID
* allocation. However, if the pages have been
* walked and mapped, skip it. An earlier try
* has failed to allocate the TID entries.
*/
if (!flow->npagesets) {
qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1;
qp->s_sge.num_sge = wqe->wr.num_sge;
qp->s_sge.total_len = wqe->length;
qp->s_len = wqe->length;
req->isge = 0;
req->clear_tail = req->setup_head;
req->flow_idx = req->setup_head;
req->state = TID_REQUEST_ACTIVE;
}
} else if (delta == 0) {
/* Re-send a request */
req->cur_seg = 0;
req->comp_seg = 0;
req->ack_pending = 0;
req->flow_idx = req->clear_tail;
req->state = TID_REQUEST_RESEND;
}
req->s_next_psn = qp->s_psn;
/* Read one segment at a time */
len = min_t(u32, req->seg_len,
wqe->length - req->seg_len * req->cur_seg);
delta = hfi1_build_tid_rdma_read_req(qp, wqe, ohdr,
&bth1, &bth2,
&len);
if (delta <= 0) {
/* Wait for TID space */
goto bail;
}
if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
hwords += delta;
ss = &wpriv->ss;
/* Check if this is the last segment */
if (req->cur_seg >= req->total_segs &&
++qp->s_cur == qp->s_size)
qp->s_cur = 0;
break;
case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD: case IB_WR_ATOMIC_FETCH_AND_ADD:
/* /*
...@@ -571,11 +700,13 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) ...@@ -571,11 +700,13 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
default: default:
goto bail; goto bail;
} }
if (wqe->wr.opcode != IB_WR_TID_RDMA_READ) {
qp->s_sge.sge = wqe->sg_list[0]; qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1; qp->s_sge.sg_list = wqe->sg_list + 1;
qp->s_sge.num_sge = wqe->wr.num_sge; qp->s_sge.num_sge = wqe->wr.num_sge;
qp->s_sge.total_len = wqe->length; qp->s_sge.total_len = wqe->length;
qp->s_len = wqe->length; qp->s_len = wqe->length;
}
if (newreq) { if (newreq) {
qp->s_tail++; qp->s_tail++;
if (qp->s_tail >= qp->s_size) if (qp->s_tail >= qp->s_size)
...@@ -583,6 +714,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) ...@@ -583,6 +714,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
} }
if (wqe->wr.opcode == IB_WR_RDMA_READ) if (wqe->wr.opcode == IB_WR_RDMA_READ)
qp->s_psn = wqe->lpsn + 1; qp->s_psn = wqe->lpsn + 1;
else if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
qp->s_psn = req->s_next_psn;
else else
qp->s_psn++; qp->s_psn++;
break; break;
...@@ -699,6 +832,99 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) ...@@ -699,6 +832,99 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
if (qp->s_cur == qp->s_size) if (qp->s_cur == qp->s_size)
qp->s_cur = 0; qp->s_cur = 0;
break; break;
case TID_OP(READ_RESP):
if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
goto bail;
/* This is used to restart a TID read request */
req = wqe_to_tid_req(wqe);
wpriv = wqe->priv;
/*
* Back down. The field qp->s_psn has been set to the psn with
* which the request should be restart. It's OK to use division
* as this is on the retry path.
*/
req->cur_seg = delta_psn(qp->s_psn, wqe->psn) / priv->pkts_ps;
/*
* The following function need to be redefined to return the
* status to make sure that we find the flow. At the same
* time, we can use the req->state change to check if the
* call succeeds or not.
*/
req->state = TID_REQUEST_RESEND;
hfi1_tid_rdma_restart_req(qp, wqe, &bth2);
if (req->state != TID_REQUEST_ACTIVE) {
/*
* Failed to find the flow. Release all allocated tid
* resources.
*/
hfi1_kern_exp_rcv_clear_all(req);
hfi1_kern_clear_hw_flow(priv->rcd, qp);
hfi1_trdma_send_complete(qp, wqe, IB_WC_LOC_QP_OP_ERR);
goto bail;
}
req->state = TID_REQUEST_RESEND;
len = min_t(u32, req->seg_len,
wqe->length - req->seg_len * req->cur_seg);
flow = &req->flows[req->flow_idx];
len -= flow->sent;
req->s_next_psn = flow->flow_state.ib_lpsn + 1;
delta = hfi1_build_tid_rdma_read_packet(wqe, ohdr, &bth1,
&bth2, &len);
if (delta <= 0) {
/* Wait for TID space */
goto bail;
}
hwords += delta;
ss = &wpriv->ss;
/* Check if this is the last segment */
if (req->cur_seg >= req->total_segs &&
++qp->s_cur == qp->s_size)
qp->s_cur = 0;
qp->s_psn = req->s_next_psn;
break;
case TID_OP(READ_REQ):
req = wqe_to_tid_req(wqe);
delta = cmp_psn(qp->s_psn, wqe->psn);
/*
* If the current WR is not TID RDMA READ, or this is the start
* of a new request, we need to change the qp->s_state so that
* the request can be set up properly.
*/
if (wqe->wr.opcode != IB_WR_TID_RDMA_READ || delta == 0 ||
qp->s_cur == qp->s_tail) {
qp->s_state = OP(RDMA_READ_REQUEST);
if (delta == 0 || qp->s_cur == qp->s_tail)
goto check_s_state;
else
goto bail;
}
/* Rate limiting */
if (qp->s_num_rd_atomic >= qp->s_max_rd_atomic) {
qp->s_flags |= RVT_S_WAIT_RDMAR;
goto bail;
}
wpriv = wqe->priv;
/* Read one segment at a time */
len = min_t(u32, req->seg_len,
wqe->length - req->seg_len * req->cur_seg);
delta = hfi1_build_tid_rdma_read_req(qp, wqe, ohdr, &bth1,
&bth2, &len);
if (delta <= 0) {
/* Wait for TID space */
goto bail;
}
hwords += delta;
ss = &wpriv->ss;
/* Check if this is the last segment */
if (req->cur_seg >= req->total_segs &&
++qp->s_cur == qp->s_size)
qp->s_cur = 0;
qp->s_psn = req->s_next_psn;
break;
} }
qp->s_sending_hpsn = bth2; qp->s_sending_hpsn = bth2;
delta = delta_psn(bth2, wqe->psn); delta = delta_psn(bth2, wqe->psn);
...@@ -1148,7 +1374,7 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait) ...@@ -1148,7 +1374,7 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
hfi1_kern_clear_hw_flow(priv->rcd, qp); hfi1_kern_clear_hw_flow(priv->rcd, qp);
} }
rvt_send_complete(qp, wqe, hfi1_trdma_send_complete(qp, wqe,
IB_WC_RETRY_EXC_ERR); IB_WC_RETRY_EXC_ERR);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
} }
...@@ -1189,7 +1415,8 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn) ...@@ -1189,7 +1415,8 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
for (;;) { for (;;) {
wqe = rvt_get_swqe_ptr(qp, n); wqe = rvt_get_swqe_ptr(qp, n);
if (cmp_psn(psn, wqe->lpsn) <= 0) { if (cmp_psn(psn, wqe->lpsn) <= 0) {
if (wqe->wr.opcode == IB_WR_RDMA_READ) if (wqe->wr.opcode == IB_WR_RDMA_READ ||
wqe->wr.opcode == IB_WR_TID_RDMA_READ)
qp->s_sending_psn = wqe->lpsn + 1; qp->s_sending_psn = wqe->lpsn + 1;
else else
qp->s_sending_psn = psn + 1; qp->s_sending_psn = psn + 1;
...@@ -1238,8 +1465,9 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) ...@@ -1238,8 +1465,9 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
} }
opcode = ib_bth_get_opcode(ohdr); opcode = ib_bth_get_opcode(ohdr);
if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && if ((opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
opcode <= OP(ATOMIC_ACKNOWLEDGE)) { opcode <= OP(ATOMIC_ACKNOWLEDGE)) ||
opcode == TID_OP(READ_RESP)) {
WARN_ON(!qp->s_rdma_ack_cnt); WARN_ON(!qp->s_rdma_ack_cnt);
qp->s_rdma_ack_cnt--; qp->s_rdma_ack_cnt--;
return; return;
...@@ -1255,8 +1483,12 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) ...@@ -1255,8 +1483,12 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail && if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
!(qp->s_flags & !(qp->s_flags &
(RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) && (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
if (opcode == TID_OP(READ_REQ))
rvt_add_retry_timer_ext(qp, priv->timeout_shift);
else
rvt_add_retry_timer(qp); rvt_add_retry_timer(qp);
}
while (qp->s_last != qp->s_acked) { while (qp->s_last != qp->s_acked) {
u32 s_last; u32 s_last;
...@@ -1265,6 +1497,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) ...@@ -1265,6 +1497,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 && if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 &&
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
break; break;
trdma_clean_swqe(qp, wqe);
rvt_qp_wqe_unreserve(qp, wqe); rvt_qp_wqe_unreserve(qp, wqe);
s_last = qp->s_last; s_last = qp->s_last;
trace_hfi1_qp_send_completion(qp, wqe, s_last); trace_hfi1_qp_send_completion(qp, wqe, s_last);
...@@ -1317,6 +1550,7 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, ...@@ -1317,6 +1550,7 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
u32 s_last; u32 s_last;
trdma_clean_swqe(qp, wqe);
rvt_put_swqe(wqe); rvt_put_swqe(wqe);
rvt_qp_wqe_unreserve(qp, wqe); rvt_qp_wqe_unreserve(qp, wqe);
s_last = qp->s_last; s_last = qp->s_last;
...@@ -1393,6 +1627,7 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, ...@@ -1393,6 +1627,7 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
{ {
struct hfi1_ibport *ibp; struct hfi1_ibport *ibp;
enum ib_wc_status status; enum ib_wc_status status;
struct hfi1_qp_priv *qpriv = qp->priv;
struct rvt_swqe *wqe; struct rvt_swqe *wqe;
int ret = 0; int ret = 0;
u32 ack_psn; u32 ack_psn;
...@@ -1439,6 +1674,8 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, ...@@ -1439,6 +1674,8 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
*/ */
if ((wqe->wr.opcode == IB_WR_RDMA_READ && if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
(opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) || (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
(wqe->wr.opcode == IB_WR_TID_RDMA_READ &&
(opcode != TID_OP(READ_RESP) || diff != 0)) ||
((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
(opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) { (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
...@@ -1492,7 +1729,13 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, ...@@ -1492,7 +1729,13 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
switch (aeth >> IB_AETH_NAK_SHIFT) { switch (aeth >> IB_AETH_NAK_SHIFT) {
case 0: /* ACK */ case 0: /* ACK */
this_cpu_inc(*ibp->rvp.rc_acks); this_cpu_inc(*ibp->rvp.rc_acks);
if (qp->s_acked != qp->s_tail) { if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
if (wqe_to_tid_req(wqe)->ack_pending)
rvt_mod_retry_timer_ext(qp,
qpriv->timeout_shift);
else
rvt_stop_rc_timers(qp);
} else if (qp->s_acked != qp->s_tail) {
/* /*
* We are expecting more ACKs so * We are expecting more ACKs so
* mod the retry timer. * mod the retry timer.
...@@ -1581,7 +1824,10 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, ...@@ -1581,7 +1824,10 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
ibp->rvp.n_other_naks++; ibp->rvp.n_other_naks++;
class_b: class_b:
if (qp->s_last == qp->s_acked) { if (qp->s_last == qp->s_acked) {
rvt_send_complete(qp, wqe, status); if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
hfi1_kern_read_tid_flow_free(qp);
hfi1_trdma_send_complete(qp, wqe, status);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
} }
break; break;
...@@ -1622,6 +1868,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, ...@@ -1622,6 +1868,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
while (cmp_psn(psn, wqe->lpsn) > 0) { while (cmp_psn(psn, wqe->lpsn) > 0) {
if (wqe->wr.opcode == IB_WR_RDMA_READ || if (wqe->wr.opcode == IB_WR_RDMA_READ ||
wqe->wr.opcode == IB_WR_TID_RDMA_READ ||
wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
break; break;
......
...@@ -2796,3 +2796,36 @@ void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe, ...@@ -2796,3 +2796,36 @@ void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
req->state = TID_REQUEST_ACTIVE; req->state = TID_REQUEST_ACTIVE;
} }
void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp)
{
int i, ret;
struct hfi1_qp_priv *qpriv = qp->priv;
struct tid_flow_state *fs;
if (qp->ibqp.qp_type != IB_QPT_RC || !HFI1_CAP_IS_KSET(TID_RDMA))
return;
/*
* First, clear the flow to help prevent any delayed packets from
* being delivered.
*/
fs = &qpriv->flow_state;
if (fs->index != RXE_NUM_TID_FLOWS)
hfi1_kern_clear_hw_flow(qpriv->rcd, qp);
for (i = qp->s_acked; i != qp->s_head;) {
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, i);
if (++i == qp->s_size)
i = 0;
/* Free only locally allocated TID entries */
if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
continue;
do {
struct hfi1_swqe_priv *priv = wqe->priv;
ret = hfi1_kern_exp_rcv_clear(&priv->tid_req);
} while (!ret);
}
}
...@@ -209,5 +209,6 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, ...@@ -209,5 +209,6 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
struct hfi1_packet *packet); struct hfi1_packet *packet);
void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe, void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
u32 *bth2); u32 *bth2);
void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp);
#endif /* HFI1_TID_RDMA_H */ #endif /* HFI1_TID_RDMA_H */
...@@ -165,6 +165,7 @@ const enum ib_wc_opcode ib_hfi1_wc_opcode[] = { ...@@ -165,6 +165,7 @@ const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
[IB_WR_SEND] = IB_WC_SEND, [IB_WR_SEND] = IB_WC_SEND,
[IB_WR_SEND_WITH_IMM] = IB_WC_SEND, [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
[IB_WR_RDMA_READ] = IB_WC_RDMA_READ, [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
[IB_WR_TID_RDMA_READ] = IB_WC_RDMA_READ,
[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD, [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
[IB_WR_SEND_WITH_INV] = IB_WC_SEND, [IB_WR_SEND_WITH_INV] = IB_WC_SEND,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment