Commit 3eb1a40f authored by Alexander Duyck's avatar Alexander Duyck Committed by Jeff Kirsher

igbvf: Make next_to_watch a pointer and adjust memory barriers to avoid races

This change is meant to address several race issues that become possible
because next_to_watch could possibly be set to a value that shows that the
descriptor is done when it is not.  In order to correct that we instead make
next_to_watch a pointer that is set to NULL during cleanup, and set to the
eop_desc after the descriptor rings have been written.

To enforce proper ordering the next_to_watch pointer is not set until after
a wmb writing the values to the last descriptor in a transmit.  In order to
guarantee that the descriptor is not read until after the eop_desc we use the
read_barrier_depends which is only really necessary on the alpha architecture.
Signed-off-by: default avatarAlexander Duyck <alexander.h.duyck@intel.com>
Acked-by: default avatarGreg Rose <gregory.v.rose@intel.com>
Tested-by: default avatarSibai Li <sibai.li@intel.com>
Signed-off-by: default avatarJeff Kirsher <jeffrey.t.kirsher@intel.com>
parent e792cd91
...@@ -127,8 +127,8 @@ struct igbvf_buffer { ...@@ -127,8 +127,8 @@ struct igbvf_buffer {
/* Tx */ /* Tx */
struct { struct {
unsigned long time_stamp; unsigned long time_stamp;
union e1000_adv_tx_desc *next_to_watch;
u16 length; u16 length;
u16 next_to_watch;
u16 mapped_as_page; u16 mapped_as_page;
}; };
/* Rx */ /* Rx */
......
...@@ -797,20 +797,31 @@ static bool igbvf_clean_tx_irq(struct igbvf_ring *tx_ring) ...@@ -797,20 +797,31 @@ static bool igbvf_clean_tx_irq(struct igbvf_ring *tx_ring)
struct sk_buff *skb; struct sk_buff *skb;
union e1000_adv_tx_desc *tx_desc, *eop_desc; union e1000_adv_tx_desc *tx_desc, *eop_desc;
unsigned int total_bytes = 0, total_packets = 0; unsigned int total_bytes = 0, total_packets = 0;
unsigned int i, eop, count = 0; unsigned int i, count = 0;
bool cleaned = false; bool cleaned = false;
i = tx_ring->next_to_clean; i = tx_ring->next_to_clean;
eop = tx_ring->buffer_info[i].next_to_watch; buffer_info = &tx_ring->buffer_info[i];
eop_desc = IGBVF_TX_DESC_ADV(*tx_ring, eop); eop_desc = buffer_info->next_to_watch;
do {
/* if next_to_watch is not set then there is no work pending */
if (!eop_desc)
break;
/* prevent any other reads prior to eop_desc */
read_barrier_depends();
/* if DD is not set pending work has not been completed */
if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
break;
/* clear next_to_watch to prevent false hangs */
buffer_info->next_to_watch = NULL;
while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
(count < tx_ring->count)) {
rmb(); /* read buffer_info after eop_desc status */
for (cleaned = false; !cleaned; count++) { for (cleaned = false; !cleaned; count++) {
tx_desc = IGBVF_TX_DESC_ADV(*tx_ring, i); tx_desc = IGBVF_TX_DESC_ADV(*tx_ring, i);
buffer_info = &tx_ring->buffer_info[i]; cleaned = (tx_desc == eop_desc);
cleaned = (i == eop);
skb = buffer_info->skb; skb = buffer_info->skb;
if (skb) { if (skb) {
...@@ -831,10 +842,12 @@ static bool igbvf_clean_tx_irq(struct igbvf_ring *tx_ring) ...@@ -831,10 +842,12 @@ static bool igbvf_clean_tx_irq(struct igbvf_ring *tx_ring)
i++; i++;
if (i == tx_ring->count) if (i == tx_ring->count)
i = 0; i = 0;
buffer_info = &tx_ring->buffer_info[i];
} }
eop = tx_ring->buffer_info[i].next_to_watch;
eop_desc = IGBVF_TX_DESC_ADV(*tx_ring, eop); eop_desc = buffer_info->next_to_watch;
} } while (count < tx_ring->count);
tx_ring->next_to_clean = i; tx_ring->next_to_clean = i;
...@@ -1961,7 +1974,6 @@ static int igbvf_tso(struct igbvf_adapter *adapter, ...@@ -1961,7 +1974,6 @@ static int igbvf_tso(struct igbvf_adapter *adapter,
context_desc->seqnum_seed = 0; context_desc->seqnum_seed = 0;
buffer_info->time_stamp = jiffies; buffer_info->time_stamp = jiffies;
buffer_info->next_to_watch = i;
buffer_info->dma = 0; buffer_info->dma = 0;
i++; i++;
if (i == tx_ring->count) if (i == tx_ring->count)
...@@ -2021,7 +2033,6 @@ static inline bool igbvf_tx_csum(struct igbvf_adapter *adapter, ...@@ -2021,7 +2033,6 @@ static inline bool igbvf_tx_csum(struct igbvf_adapter *adapter,
context_desc->mss_l4len_idx = 0; context_desc->mss_l4len_idx = 0;
buffer_info->time_stamp = jiffies; buffer_info->time_stamp = jiffies;
buffer_info->next_to_watch = i;
buffer_info->dma = 0; buffer_info->dma = 0;
i++; i++;
if (i == tx_ring->count) if (i == tx_ring->count)
...@@ -2061,8 +2072,7 @@ static int igbvf_maybe_stop_tx(struct net_device *netdev, int size) ...@@ -2061,8 +2072,7 @@ static int igbvf_maybe_stop_tx(struct net_device *netdev, int size)
static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter, static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
struct igbvf_ring *tx_ring, struct igbvf_ring *tx_ring,
struct sk_buff *skb, struct sk_buff *skb)
unsigned int first)
{ {
struct igbvf_buffer *buffer_info; struct igbvf_buffer *buffer_info;
struct pci_dev *pdev = adapter->pdev; struct pci_dev *pdev = adapter->pdev;
...@@ -2077,7 +2087,6 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter, ...@@ -2077,7 +2087,6 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
buffer_info->length = len; buffer_info->length = len;
/* set time_stamp *before* dma to help avoid a possible race */ /* set time_stamp *before* dma to help avoid a possible race */
buffer_info->time_stamp = jiffies; buffer_info->time_stamp = jiffies;
buffer_info->next_to_watch = i;
buffer_info->mapped_as_page = false; buffer_info->mapped_as_page = false;
buffer_info->dma = dma_map_single(&pdev->dev, skb->data, len, buffer_info->dma = dma_map_single(&pdev->dev, skb->data, len,
DMA_TO_DEVICE); DMA_TO_DEVICE);
...@@ -2100,7 +2109,6 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter, ...@@ -2100,7 +2109,6 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
BUG_ON(len >= IGBVF_MAX_DATA_PER_TXD); BUG_ON(len >= IGBVF_MAX_DATA_PER_TXD);
buffer_info->length = len; buffer_info->length = len;
buffer_info->time_stamp = jiffies; buffer_info->time_stamp = jiffies;
buffer_info->next_to_watch = i;
buffer_info->mapped_as_page = true; buffer_info->mapped_as_page = true;
buffer_info->dma = skb_frag_dma_map(&pdev->dev, frag, 0, len, buffer_info->dma = skb_frag_dma_map(&pdev->dev, frag, 0, len,
DMA_TO_DEVICE); DMA_TO_DEVICE);
...@@ -2109,7 +2117,6 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter, ...@@ -2109,7 +2117,6 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
} }
tx_ring->buffer_info[i].skb = skb; tx_ring->buffer_info[i].skb = skb;
tx_ring->buffer_info[first].next_to_watch = i;
return ++count; return ++count;
...@@ -2120,7 +2127,6 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter, ...@@ -2120,7 +2127,6 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
buffer_info->dma = 0; buffer_info->dma = 0;
buffer_info->time_stamp = 0; buffer_info->time_stamp = 0;
buffer_info->length = 0; buffer_info->length = 0;
buffer_info->next_to_watch = 0;
buffer_info->mapped_as_page = false; buffer_info->mapped_as_page = false;
if (count) if (count)
count--; count--;
...@@ -2139,7 +2145,8 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter, ...@@ -2139,7 +2145,8 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter, static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter,
struct igbvf_ring *tx_ring, struct igbvf_ring *tx_ring,
int tx_flags, int count, u32 paylen, int tx_flags, int count,
unsigned int first, u32 paylen,
u8 hdr_len) u8 hdr_len)
{ {
union e1000_adv_tx_desc *tx_desc = NULL; union e1000_adv_tx_desc *tx_desc = NULL;
...@@ -2189,6 +2196,7 @@ static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter, ...@@ -2189,6 +2196,7 @@ static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter,
* such as IA-64). */ * such as IA-64). */
wmb(); wmb();
tx_ring->buffer_info[first].next_to_watch = tx_desc;
tx_ring->next_to_use = i; tx_ring->next_to_use = i;
writel(i, adapter->hw.hw_addr + tx_ring->tail); writel(i, adapter->hw.hw_addr + tx_ring->tail);
/* we need this if more than one processor can write to our tail /* we need this if more than one processor can write to our tail
...@@ -2255,11 +2263,11 @@ static netdev_tx_t igbvf_xmit_frame_ring_adv(struct sk_buff *skb, ...@@ -2255,11 +2263,11 @@ static netdev_tx_t igbvf_xmit_frame_ring_adv(struct sk_buff *skb,
* count reflects descriptors mapped, if 0 then mapping error * count reflects descriptors mapped, if 0 then mapping error
* has occurred and we need to rewind the descriptor queue * has occurred and we need to rewind the descriptor queue
*/ */
count = igbvf_tx_map_adv(adapter, tx_ring, skb, first); count = igbvf_tx_map_adv(adapter, tx_ring, skb);
if (count) { if (count) {
igbvf_tx_queue_adv(adapter, tx_ring, tx_flags, count, igbvf_tx_queue_adv(adapter, tx_ring, tx_flags, count,
skb->len, hdr_len); first, skb->len, hdr_len);
/* Make sure there is space in the ring for the next send. */ /* Make sure there is space in the ring for the next send. */
igbvf_maybe_stop_tx(netdev, MAX_SKB_FRAGS + 4); igbvf_maybe_stop_tx(netdev, MAX_SKB_FRAGS + 4);
} else { } else {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment