Commit 2d4b6466 authored by Eugenia Emantayev's avatar Eugenia Emantayev Committed by David S. Miller

net/mlx4_en: Fix BlueFlame race

Fix a race between BlueFlame flow and stamping in post send flow.
Example:
	SW: Build WQE 0 on the TX buffer, except the ownership bit
	SW: Set ownership for WQE 0 on the TX buffer
	SW: Ring doorbell for WQE 0
	SW: Build WQE 1 on the TX buffer, except the ownership bit
	SW: Set ownership for WQE 1 on the TX buffer
	HW: Read WQE 0 and then WQE 1, before doorbell was rung/BF was done for WQE 1
	HW: Produce CQEs for WQE 0 and WQE 1
	SW: Process the CQEs, and stamp WQE 0 and WQE 1 accordingly (on the TX buffer)
	SW: Copy WQE 1 from the TX buffer to the BF register - ALREADY STAMPED!
	HW: CQE error with index 0xFFFF  - the BF WQE's control segment is STAMPED,
		so the BF index is 0xFFFF. Error: Invalid Opcode.
As a result QP enters the error state and no traffic can be sent.

Solution:
When stamping - do not stamp last completed wqe.
Signed-off-by: default avatarEugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: default avatarAmir Vadai <amirv@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 73d94e94
...@@ -191,6 +191,39 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, ...@@ -191,6 +191,39 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp); MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp);
} }
static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring *ring, int index,
u8 owner)
{
__be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT));
struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE;
struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
void *end = ring->buf + ring->buf_size;
__be32 *ptr = (__be32 *)tx_desc;
int i;
/* Optimize the common case when there are no wraparounds */
if (likely((void *)tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) {
/* Stamp the freed descriptor */
for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE;
i += STAMP_STRIDE) {
*ptr = stamp;
ptr += STAMP_DWORDS;
}
} else {
/* Stamp the freed descriptor */
for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE;
i += STAMP_STRIDE) {
*ptr = stamp;
ptr += STAMP_DWORDS;
if ((void *)ptr >= end) {
ptr = ring->buf;
stamp ^= cpu_to_be32(0x80000000);
}
}
}
}
static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring *ring, struct mlx4_en_tx_ring *ring,
...@@ -205,8 +238,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, ...@@ -205,8 +238,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
void *end = ring->buf + ring->buf_size; void *end = ring->buf + ring->buf_size;
int frags = skb_shinfo(skb)->nr_frags; int frags = skb_shinfo(skb)->nr_frags;
int i; int i;
__be32 *ptr = (__be32 *)tx_desc;
__be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT));
struct skb_shared_hwtstamps hwts; struct skb_shared_hwtstamps hwts;
if (timestamp) { if (timestamp) {
...@@ -232,12 +263,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, ...@@ -232,12 +263,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
skb_frag_size(frag), PCI_DMA_TODEVICE); skb_frag_size(frag), PCI_DMA_TODEVICE);
} }
} }
/* Stamp the freed descriptor */
for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) {
*ptr = stamp;
ptr += STAMP_DWORDS;
}
} else { } else {
if (!tx_info->inl) { if (!tx_info->inl) {
if ((void *) data >= end) { if ((void *) data >= end) {
...@@ -263,16 +288,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, ...@@ -263,16 +288,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
++data; ++data;
} }
} }
/* Stamp the freed descriptor */
for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) {
*ptr = stamp;
ptr += STAMP_DWORDS;
if ((void *) ptr >= end) {
ptr = ring->buf;
stamp ^= cpu_to_be32(0x80000000);
}
}
} }
dev_kfree_skb_any(skb); dev_kfree_skb_any(skb);
return tx_info->nr_txbb; return tx_info->nr_txbb;
...@@ -318,8 +333,9 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) ...@@ -318,8 +333,9 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring];
struct mlx4_cqe *cqe; struct mlx4_cqe *cqe;
u16 index; u16 index;
u16 new_index, ring_index; u16 new_index, ring_index, stamp_index;
u32 txbbs_skipped = 0; u32 txbbs_skipped = 0;
u32 txbbs_stamp = 0;
u32 cons_index = mcq->cons_index; u32 cons_index = mcq->cons_index;
int size = cq->size; int size = cq->size;
u32 size_mask = ring->size_mask; u32 size_mask = ring->size_mask;
...@@ -335,6 +351,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) ...@@ -335,6 +351,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
index = cons_index & size_mask; index = cons_index & size_mask;
cqe = &buf[(index << factor) + factor]; cqe = &buf[(index << factor) + factor];
ring_index = ring->cons & size_mask; ring_index = ring->cons & size_mask;
stamp_index = ring_index;
/* Process all completed CQEs */ /* Process all completed CQEs */
while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
...@@ -359,6 +376,12 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) ...@@ -359,6 +376,12 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
priv, ring, ring_index, priv, ring, ring_index,
!!((ring->cons + txbbs_skipped) & !!((ring->cons + txbbs_skipped) &
ring->size), timestamp); ring->size), timestamp);
mlx4_en_stamp_wqe(priv, ring, stamp_index,
!!((ring->cons + txbbs_stamp) &
ring->size));
stamp_index = ring_index;
txbbs_stamp = txbbs_skipped;
packets++; packets++;
bytes += ring->tx_info[ring_index].nr_bytes; bytes += ring->tx_info[ring_index].nr_bytes;
} while (ring_index != new_index); } while (ring_index != new_index);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment