Commit 0fd7e1d8 authored by Roland Dreier's avatar Roland Dreier

IB/mlx4: Fix memory ordering problem when posting LSO sends

The current work request posting code writes the LSO segment before
writing any data segments.  This leaves a window where the LSO segment
overwrites the stamping in one cacheline that the HCA prefetches
before the rest of the cacheline is filled with the correct data
segments.  When the HCA processes this work request, a local
protection error may result.

Fix this by saving the LSO header size field off and writing it only
after all data segments are written.  This fix is a cleaned-up version
of a patch from Jack Morgenstein <jackm@dev.mellanox.co.il>.

This fixes <https://bugs.openfabrics.org/show_bug.cgi?id=1383>.
Reported-by: default avatarJack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent d3b924d9
...@@ -1462,7 +1462,8 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) ...@@ -1462,7 +1462,8 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
} }
static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
struct mlx4_ib_qp *qp, unsigned *lso_seg_len) struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
__be32 *lso_hdr_sz)
{ {
unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
...@@ -1479,12 +1480,8 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, ...@@ -1479,12 +1480,8 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen); memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
/* make sure LSO header is written before overwriting stamping */ *lso_hdr_sz = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
wmb();
wqe->mss_hdr_size = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
wr->wr.ud.hlen); wr->wr.ud.hlen);
*lso_seg_len = halign; *lso_seg_len = halign;
return 0; return 0;
} }
...@@ -1518,6 +1515,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -1518,6 +1515,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int uninitialized_var(stamp); int uninitialized_var(stamp);
int uninitialized_var(size); int uninitialized_var(size);
unsigned uninitialized_var(seglen); unsigned uninitialized_var(seglen);
__be32 dummy;
__be32 *lso_wqe;
__be32 uninitialized_var(lso_hdr_sz);
int i; int i;
spin_lock_irqsave(&qp->sq.lock, flags); spin_lock_irqsave(&qp->sq.lock, flags);
...@@ -1525,6 +1525,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -1525,6 +1525,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
ind = qp->sq_next_wqe; ind = qp->sq_next_wqe;
for (nreq = 0; wr; ++nreq, wr = wr->next) { for (nreq = 0; wr; ++nreq, wr = wr->next) {
lso_wqe = &dummy;
if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
err = -ENOMEM; err = -ENOMEM;
*bad_wr = wr; *bad_wr = wr;
...@@ -1606,11 +1608,12 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -1606,11 +1608,12 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += sizeof (struct mlx4_wqe_datagram_seg) / 16; size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
if (wr->opcode == IB_WR_LSO) { if (wr->opcode == IB_WR_LSO) {
err = build_lso_seg(wqe, wr, qp, &seglen); err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz);
if (unlikely(err)) { if (unlikely(err)) {
*bad_wr = wr; *bad_wr = wr;
goto out; goto out;
} }
lso_wqe = (__be32 *) wqe;
wqe += seglen; wqe += seglen;
size += seglen / 16; size += seglen / 16;
} }
...@@ -1652,6 +1655,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -1652,6 +1655,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
for (i = wr->num_sge - 1; i >= 0; --i, --dseg) for (i = wr->num_sge - 1; i >= 0; --i, --dseg)
set_data_seg(dseg, wr->sg_list + i); set_data_seg(dseg, wr->sg_list + i);
/*
* Possibly overwrite stamping in cacheline with LSO
* segment only after making sure all data segments
* are written.
*/
wmb();
*lso_wqe = lso_hdr_sz;
ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ? ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
MLX4_WQE_CTRL_FENCE : 0) | size; MLX4_WQE_CTRL_FENCE : 0) | size;
...@@ -1686,7 +1697,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -1686,7 +1697,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
stamp_send_wqe(qp, stamp, size * 16); stamp_send_wqe(qp, stamp, size * 16);
ind = pad_wraparound(qp, ind); ind = pad_wraparound(qp, ind);
} }
} }
out: out:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment