Commit a0b34f75 authored by Kaike Wan's avatar Kaike Wan Committed by Doug Ledford

IB/hfi1: Add interlock between a TID RDMA request and other requests

This locking mechanism is designed to provent vavious memory corruption
scenarios from occurring when requests are pipelined, especially when
RDMA READ/WRITE requests are interleaved with TID RDMA READ/WRITE
requests:
1. READ-AFTER-READ;
2. READ-AFTER-WRITE;
3. WRITE-AFTER-READ;
When memory corruption is likely, a request will be held back until
previous requests have been completed.
Reviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarMitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: default avatarKaike Wan <kaike.wan@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 24b11923
......@@ -482,6 +482,15 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
len = wqe->length;
ss = &qp->s_sge;
bth2 = mask_psn(qp->s_psn);
/*
* Interlock between various IB requests and TID RDMA
* if necessary.
*/
if ((priv->s_flags & HFI1_S_TID_WAIT_INTERLCK) ||
hfi1_tid_rdma_wqe_interlock(qp, wqe))
goto bail;
switch (wqe->wr.opcode) {
case IB_WR_SEND:
case IB_WR_SEND_WITH_IMM:
......@@ -1321,6 +1330,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
qp->s_state = OP(SEND_LAST);
}
done:
priv->s_flags &= ~HFI1_S_TID_WAIT_INTERLCK;
qp->s_psn = psn;
/*
* Set RVT_S_WAIT_PSN as rc_complete() may start the timer
......@@ -1540,6 +1550,8 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
struct rvt_swqe *wqe,
struct hfi1_ibport *ibp)
{
struct hfi1_qp_priv *priv = qp->priv;
lockdep_assert_held(&qp->s_lock);
/*
* Don't decrement refcount and don't generate a
......@@ -1608,6 +1620,10 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
qp->s_draining = 0;
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
}
if (priv->s_flags & HFI1_S_TID_WAIT_INTERLCK) {
priv->s_flags &= ~HFI1_S_TID_WAIT_INTERLCK;
hfi1_schedule_send(qp);
}
return wqe;
}
......
......@@ -2829,3 +2829,40 @@ void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp)
} while (!ret);
}
}
bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
{
struct rvt_swqe *prev;
struct hfi1_qp_priv *priv = qp->priv;
u32 s_prev;
s_prev = (qp->s_cur == 0 ? qp->s_size : qp->s_cur) - 1;
prev = rvt_get_swqe_ptr(qp, s_prev);
switch (wqe->wr.opcode) {
case IB_WR_SEND:
case IB_WR_SEND_WITH_IMM:
case IB_WR_SEND_WITH_INV:
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_READ:
break;
case IB_WR_TID_RDMA_READ:
switch (prev->wr.opcode) {
case IB_WR_RDMA_READ:
if (qp->s_acked != qp->s_cur)
goto interlock;
break;
default:
break;
}
default:
break;
}
return false;
interlock:
priv->s_flags |= HFI1_S_TID_WAIT_INTERLCK;
return true;
}
......@@ -17,6 +17,16 @@
#define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
#define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT)
/*
* Bit definitions for priv->s_flags.
* These bit flags overload the bit flags defined for the QP's s_flags.
* Due to the fact that these bit fields are used only for the QP priv
* s_flags, there are no collisions.
*
* HFI1_S_TID_WAIT_INTERLCK - QP is waiting for requester interlock
*/
#define HFI1_S_TID_WAIT_INTERLCK BIT(5)
struct tid_rdma_params {
struct rcu_head rcu_head;
u32 qp;
......@@ -210,5 +220,6 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
u32 *bth2);
void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp);
bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe);
#endif /* HFI1_TID_RDMA_H */
......@@ -171,6 +171,9 @@ struct hfi1_qp_priv {
u8 hdr_type; /* 9B or 16B */
unsigned long tid_timer_timeout_jiffies;
/* variables for the TID RDMA SE state machine */
u32 s_flags;
/* For TID RDMA READ */
u32 tid_r_reqs; /* Num of tid reads requested */
u32 tid_r_comp; /* Num of tid reads completed */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment