Commit a620c163 authored by David Daney's avatar David Daney Committed by Ralf Baechle

Staging: octeon-ethernet: Fix race freeing transmit buffers.

The existing code had the following race:

Thread-1                       Thread-2

inc/read in_use
                               inc/read in_use
inc tx_free_list[qos].len
                               inc tx_free_list[qos].len

The actual in_use value was incremented twice, but thread-1 is going
to free memory based on its stale value, and will free one too many
times.  The result is that memory is freed back to the kernel while
its packet is still in the transmit buffer.  If the memory is
overwritten before it is transmitted, the hardware will put a valid
checksum on it and send it out (just like it does with good packets).
If by chance the TCP flags are clobbered but not the addresses or
ports, the result can be a broken TCP stream.

The fix is to track the number of freed packets in a single location
(a Fetch-and-Add Unit register).  That way it can never get out of sync
with itself.

We try to free up to MAX_SKB_TO_FREE (currently 10) buffers at a time.
If fewer are available we adjust the free count with the difference.
The action of claiming buffers to free is atomic so two threads cannot
claim the same buffers.
Signed-off-by: default avatarDavid Daney <ddaney@caviumnetworks.com>
Signed-off-by: default avatarRalf Baechle <ralf@linux-mips.org>
parent f696a108
...@@ -117,6 +117,8 @@ ...@@ -117,6 +117,8 @@
/* Maximum number of packets to process per interrupt. */ /* Maximum number of packets to process per interrupt. */
#define MAX_RX_PACKETS 120 #define MAX_RX_PACKETS 120
/* Maximum number of SKBs to try to free per xmit packet. */
#define MAX_SKB_TO_FREE 10
#define MAX_OUT_QUEUE_DEPTH 1000 #define MAX_OUT_QUEUE_DEPTH 1000
#ifndef CONFIG_SMP #ifndef CONFIG_SMP
......
...@@ -47,6 +47,7 @@ ...@@ -47,6 +47,7 @@
#include "ethernet-defines.h" #include "ethernet-defines.h"
#include "octeon-ethernet.h" #include "octeon-ethernet.h"
#include "ethernet-tx.h"
#include "ethernet-util.h" #include "ethernet-util.h"
#include "cvmx-wqe.h" #include "cvmx-wqe.h"
...@@ -82,8 +83,10 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -82,8 +83,10 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
uint64_t old_scratch2; uint64_t old_scratch2;
int dropped; int dropped;
int qos; int qos;
int queue_it_up;
struct octeon_ethernet *priv = netdev_priv(dev); struct octeon_ethernet *priv = netdev_priv(dev);
int32_t in_use; int32_t skb_to_free;
int32_t undo;
int32_t buffers_to_free; int32_t buffers_to_free;
#if REUSE_SKBUFFS_WITHOUT_FREE #if REUSE_SKBUFFS_WITHOUT_FREE
unsigned char *fpa_head; unsigned char *fpa_head;
...@@ -120,15 +123,15 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -120,15 +123,15 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
old_scratch2 = cvmx_scratch_read64(CVMX_SCR_SCRATCH + 8); old_scratch2 = cvmx_scratch_read64(CVMX_SCR_SCRATCH + 8);
/* /*
* Assume we're going to be able t osend this * Fetch and increment the number of packets to be
* packet. Fetch and increment the number of pending * freed.
* packets for output.
*/ */
cvmx_fau_async_fetch_and_add32(CVMX_SCR_SCRATCH + 8, cvmx_fau_async_fetch_and_add32(CVMX_SCR_SCRATCH + 8,
FAU_NUM_PACKET_BUFFERS_TO_FREE, FAU_NUM_PACKET_BUFFERS_TO_FREE,
0); 0);
cvmx_fau_async_fetch_and_add32(CVMX_SCR_SCRATCH, cvmx_fau_async_fetch_and_add32(CVMX_SCR_SCRATCH,
priv->fau + qos * 4, 1); priv->fau + qos * 4,
MAX_SKB_TO_FREE);
} }
/* /*
...@@ -286,15 +289,29 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -286,15 +289,29 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
if (USE_ASYNC_IOBDMA) { if (USE_ASYNC_IOBDMA) {
/* Get the number of skbuffs in use by the hardware */ /* Get the number of skbuffs in use by the hardware */
CVMX_SYNCIOBDMA; CVMX_SYNCIOBDMA;
in_use = cvmx_scratch_read64(CVMX_SCR_SCRATCH); skb_to_free = cvmx_scratch_read64(CVMX_SCR_SCRATCH);
buffers_to_free = cvmx_scratch_read64(CVMX_SCR_SCRATCH + 8); buffers_to_free = cvmx_scratch_read64(CVMX_SCR_SCRATCH + 8);
} else { } else {
/* Get the number of skbuffs in use by the hardware */ /* Get the number of skbuffs in use by the hardware */
in_use = cvmx_fau_fetch_and_add32(priv->fau + qos * 4, 1); skb_to_free = cvmx_fau_fetch_and_add32(priv->fau + qos * 4,
MAX_SKB_TO_FREE);
buffers_to_free = buffers_to_free =
cvmx_fau_fetch_and_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, 0); cvmx_fau_fetch_and_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, 0);
} }
/*
* We try to claim MAX_SKB_TO_FREE buffers. If there were not
* that many available, we have to un-claim (undo) any that
* were in excess. If skb_to_free is positive we will free
* that many buffers.
*/
undo = skb_to_free > 0 ?
MAX_SKB_TO_FREE : skb_to_free + MAX_SKB_TO_FREE;
if (undo > 0)
cvmx_fau_atomic_add32(priv->fau+qos*4, -undo);
skb_to_free = -skb_to_free > MAX_SKB_TO_FREE ?
MAX_SKB_TO_FREE : -skb_to_free;
/* /*
* If we're sending faster than the receive can free them then * If we're sending faster than the receive can free them then
* don't do the HW free. * don't do the HW free.
...@@ -330,38 +347,31 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -330,38 +347,31 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
cvmx_scratch_write64(CVMX_SCR_SCRATCH + 8, old_scratch2); cvmx_scratch_write64(CVMX_SCR_SCRATCH + 8, old_scratch2);
} }
queue_it_up = 0;
if (unlikely(dropped)) { if (unlikely(dropped)) {
dev_kfree_skb_any(skb); dev_kfree_skb_any(skb);
cvmx_fau_atomic_add32(priv->fau + qos * 4, -1);
priv->stats.tx_dropped++; priv->stats.tx_dropped++;
} else { } else {
if (USE_SKBUFFS_IN_HW) { if (USE_SKBUFFS_IN_HW) {
/* Put this packet on the queue to be freed later */ /* Put this packet on the queue to be freed later */
if (pko_command.s.dontfree) if (pko_command.s.dontfree)
skb_queue_tail(&priv->tx_free_list[qos], skb); queue_it_up = 1;
else { else
cvmx_fau_atomic_add32 cvmx_fau_atomic_add32
(FAU_NUM_PACKET_BUFFERS_TO_FREE, -1); (FAU_NUM_PACKET_BUFFERS_TO_FREE, -1);
cvmx_fau_atomic_add32(priv->fau + qos * 4, -1);
}
} else { } else {
/* Put this packet on the queue to be freed later */ /* Put this packet on the queue to be freed later */
skb_queue_tail(&priv->tx_free_list[qos], skb); queue_it_up = 1;
} }
} }
/* Free skbuffs not in use by the hardware, possibly two at a time */ if (queue_it_up) {
if (skb_queue_len(&priv->tx_free_list[qos]) > in_use) {
spin_lock(&priv->tx_free_list[qos].lock); spin_lock(&priv->tx_free_list[qos].lock);
/* __skb_queue_tail(&priv->tx_free_list[qos], skb);
* Check again now that we have the lock. It might cvm_oct_free_tx_skbs(priv, skb_to_free, qos, 0);
* have changed.
*/
if (skb_queue_len(&priv->tx_free_list[qos]) > in_use)
dev_kfree_skb(__skb_dequeue(&priv->tx_free_list[qos]));
if (skb_queue_len(&priv->tx_free_list[qos]) > in_use)
dev_kfree_skb(__skb_dequeue(&priv->tx_free_list[qos]));
spin_unlock(&priv->tx_free_list[qos].lock); spin_unlock(&priv->tx_free_list[qos].lock);
} else {
cvm_oct_free_tx_skbs(priv, skb_to_free, qos, 1);
} }
return 0; return 0;
......
...@@ -30,3 +30,28 @@ int cvm_oct_xmit_pow(struct sk_buff *skb, struct net_device *dev); ...@@ -30,3 +30,28 @@ int cvm_oct_xmit_pow(struct sk_buff *skb, struct net_device *dev);
int cvm_oct_transmit_qos(struct net_device *dev, void *work_queue_entry, int cvm_oct_transmit_qos(struct net_device *dev, void *work_queue_entry,
int do_free, int qos); int do_free, int qos);
void cvm_oct_tx_shutdown(struct net_device *dev); void cvm_oct_tx_shutdown(struct net_device *dev);
/**
* Free dead transmit skbs.
*
* @priv: The driver data
* @skb_to_free: The number of SKBs to free (free none if negative).
* @qos: The queue to free from.
* @take_lock: If true, acquire the skb list lock.
*/
static inline void cvm_oct_free_tx_skbs(struct octeon_ethernet *priv,
int skb_to_free,
int qos, int take_lock)
{
/* Free skbuffs not in use by the hardware. */
if (skb_to_free > 0) {
if (take_lock)
spin_lock(&priv->tx_free_list[qos].lock);
while (skb_to_free > 0) {
dev_kfree_skb(__skb_dequeue(&priv->tx_free_list[qos]));
skb_to_free--;
}
if (take_lock)
spin_unlock(&priv->tx_free_list[qos].lock);
}
}
...@@ -37,13 +37,14 @@ ...@@ -37,13 +37,14 @@
#include <asm/octeon/octeon.h> #include <asm/octeon/octeon.h>
#include "ethernet-defines.h" #include "ethernet-defines.h"
#include "octeon-ethernet.h"
#include "ethernet-mem.h" #include "ethernet-mem.h"
#include "ethernet-rx.h" #include "ethernet-rx.h"
#include "ethernet-tx.h" #include "ethernet-tx.h"
#include "ethernet-mdio.h" #include "ethernet-mdio.h"
#include "ethernet-util.h" #include "ethernet-util.h"
#include "ethernet-proc.h" #include "ethernet-proc.h"
#include "octeon-ethernet.h"
#include "cvmx-pip.h" #include "cvmx-pip.h"
#include "cvmx-pko.h" #include "cvmx-pko.h"
...@@ -130,13 +131,25 @@ extern struct semaphore mdio_sem; ...@@ -130,13 +131,25 @@ extern struct semaphore mdio_sem;
*/ */
static void cvm_do_timer(unsigned long arg) static void cvm_do_timer(unsigned long arg)
{ {
static int port; int32_t skb_to_free, undo;
if (port < CVMX_PIP_NUM_INPUT_PORTS) {
if (cvm_oct_device[port]) {
int queues_per_port; int queues_per_port;
int qos; int qos;
struct octeon_ethernet *priv = struct octeon_ethernet *priv;
netdev_priv(cvm_oct_device[port]); static int port;
if (port >= CVMX_PIP_NUM_INPUT_PORTS) {
/*
* All ports have been polled. Start the next
* iteration through the ports in one second.
*/
port = 0;
mod_timer(&cvm_oct_poll_timer, jiffies + HZ);
return;
}
if (!cvm_oct_device[port])
goto out;
priv = netdev_priv(cvm_oct_device[port]);
if (priv->poll) { if (priv->poll) {
/* skip polling if we don't get the lock */ /* skip polling if we don't get the lock */
if (!down_trylock(&mdio_sem)) { if (!down_trylock(&mdio_sem)) {
...@@ -148,35 +161,25 @@ static void cvm_do_timer(unsigned long arg) ...@@ -148,35 +161,25 @@ static void cvm_do_timer(unsigned long arg)
queues_per_port = cvmx_pko_get_num_queues(port); queues_per_port = cvmx_pko_get_num_queues(port);
/* Drain any pending packets in the free list */ /* Drain any pending packets in the free list */
for (qos = 0; qos < queues_per_port; qos++) { for (qos = 0; qos < queues_per_port; qos++) {
if (skb_queue_len(&priv->tx_free_list[qos])) { if (skb_queue_len(&priv->tx_free_list[qos]) == 0)
spin_lock(&priv->tx_free_list[qos]. continue;
lock); skb_to_free = cvmx_fau_fetch_and_add32(priv->fau + qos * 4,
while (skb_queue_len MAX_SKB_TO_FREE);
(&priv->tx_free_list[qos]) > undo = skb_to_free > 0 ?
cvmx_fau_fetch_and_add32(priv-> MAX_SKB_TO_FREE : skb_to_free + MAX_SKB_TO_FREE;
fau + if (undo > 0)
qos * 4, cvmx_fau_atomic_add32(priv->fau+qos*4, -undo);
0)) skb_to_free = -skb_to_free > MAX_SKB_TO_FREE ?
dev_kfree_skb(__skb_dequeue MAX_SKB_TO_FREE : -skb_to_free;
(&priv-> cvm_oct_free_tx_skbs(priv, skb_to_free, qos, 1);
tx_free_list
[qos]));
spin_unlock(&priv->tx_free_list[qos].
lock);
}
} }
cvm_oct_device[port]->netdev_ops->ndo_get_stats(cvm_oct_device[port]); cvm_oct_device[port]->netdev_ops->ndo_get_stats(cvm_oct_device[port]);
}
out:
port++; port++;
/* Poll the next port in a 50th of a second. /* Poll the next port in a 50th of a second.
This spreads the polling of ports out a little bit */ This spreads the polling of ports out a little bit */
mod_timer(&cvm_oct_poll_timer, jiffies + HZ / 50); mod_timer(&cvm_oct_poll_timer, jiffies + HZ / 50);
} else {
port = 0;
/* All ports have been polled. Start the next iteration through
the ports in one second */
mod_timer(&cvm_oct_poll_timer, jiffies + HZ);
}
} }
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment