Commit 000f42a2 authored by David S. Miller's avatar David S. Miller

Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net

Jeff Kirsher says:

====================
Intel Wired LAN Driver Updates 2015-01-06

This series contains fixes to i40e only.

Jesse provides a fix for when the driver was polling with interrupts
disabled the hardware would occasionally not write back descriptors.
His fix causes the driver to detect this situation and force an interrupt
to fire which will flush the stuck descriptor.

Anjali provides a couple of fixes, the first corrects an issue where
the receive port checksum error counter was incrementing incorrectly with
UDP encapsulated tunneled traffic.  The second fix resolves an issue where
the driver was examining the outer protocol layer to set the inner protocol
layer checksum offload.  In the case of TCP over IPv6 over an IPv4 based
VXLAN, the inner checksum offloads would be set to look for IPv4/UDP
instead of IPv6/TCP, so fixed the issue so that the driver will look at
the proper layer for encapsulation offload settings.

v2: fixed a bug in patch 01 of the series, where the interrupt rate impacted
    4 port workloads by reducing throughput.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents bdec4196 df23075f
...@@ -658,6 +658,8 @@ static inline u32 i40e_get_head(struct i40e_ring *tx_ring) ...@@ -658,6 +658,8 @@ static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
return le32_to_cpu(*(volatile __le32 *)head); return le32_to_cpu(*(volatile __le32 *)head);
} }
#define WB_STRIDE 0x3
/** /**
* i40e_clean_tx_irq - Reclaim resources after transmit completes * i40e_clean_tx_irq - Reclaim resources after transmit completes
* @tx_ring: tx ring to clean * @tx_ring: tx ring to clean
...@@ -759,6 +761,18 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) ...@@ -759,6 +761,18 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
tx_ring->q_vector->tx.total_bytes += total_bytes; tx_ring->q_vector->tx.total_bytes += total_bytes;
tx_ring->q_vector->tx.total_packets += total_packets; tx_ring->q_vector->tx.total_packets += total_packets;
/* check to see if there are any non-cache aligned descriptors
* waiting to be written back, and kick the hardware to force
* them to be written back in case of napi polling
*/
if (budget &&
!((i & WB_STRIDE) == WB_STRIDE) &&
!test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
(I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
tx_ring->arm_wb = true;
else
tx_ring->arm_wb = false;
if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) { if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) {
/* schedule immediate reset if we believe we hung */ /* schedule immediate reset if we believe we hung */
dev_info(tx_ring->dev, "Detected Tx Unit Hang\n" dev_info(tx_ring->dev, "Detected Tx Unit Hang\n"
...@@ -777,13 +791,16 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) ...@@ -777,13 +791,16 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
dev_info(tx_ring->dev, dev_info(tx_ring->dev,
"tx hang detected on queue %d, resetting adapter\n", "tx hang detected on queue %d, reset requested\n",
tx_ring->queue_index); tx_ring->queue_index);
tx_ring->netdev->netdev_ops->ndo_tx_timeout(tx_ring->netdev); /* do not fire the reset immediately, wait for the stack to
* decide we are truly stuck, also prevents every queue from
* simultaneously requesting a reset
*/
/* the adapter is about to reset, no point in enabling stuff */ /* the adapter is about to reset, no point in enabling polling */
return true; budget = 1;
} }
netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
...@@ -806,7 +823,25 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) ...@@ -806,7 +823,25 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
} }
} }
return budget > 0; return !!budget;
}
/**
* i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors
* @vsi: the VSI we care about
* @q_vector: the vector on which to force writeback
*
**/
static void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
{
u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK
/* allow 00 to be written to the index */;
wr32(&vsi->back->hw,
I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1),
val);
} }
/** /**
...@@ -1290,9 +1325,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, ...@@ -1290,9 +1325,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
* so the total length of IPv4 header is IHL*4 bytes * so the total length of IPv4 header is IHL*4 bytes
* The UDP_0 bit *may* bet set if the *inner* header is UDP * The UDP_0 bit *may* bet set if the *inner* header is UDP
*/ */
if (ipv4_tunnel && if (ipv4_tunnel) {
(decoded.inner_prot != I40E_RX_PTYPE_INNER_PROT_UDP) &&
!(rx_status & (1 << I40E_RX_DESC_STATUS_UDP_0_SHIFT))) {
skb->transport_header = skb->mac_header + skb->transport_header = skb->mac_header +
sizeof(struct ethhdr) + sizeof(struct ethhdr) +
(ip_hdr(skb)->ihl * 4); (ip_hdr(skb)->ihl * 4);
...@@ -1302,15 +1335,19 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, ...@@ -1302,15 +1335,19 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
skb->protocol == htons(ETH_P_8021AD)) skb->protocol == htons(ETH_P_8021AD))
? VLAN_HLEN : 0; ? VLAN_HLEN : 0;
rx_udp_csum = udp_csum(skb); if ((ip_hdr(skb)->protocol == IPPROTO_UDP) &&
iph = ip_hdr(skb); (udp_hdr(skb)->check != 0)) {
csum = csum_tcpudp_magic( rx_udp_csum = udp_csum(skb);
iph->saddr, iph->daddr, iph = ip_hdr(skb);
(skb->len - skb_transport_offset(skb)), csum = csum_tcpudp_magic(
IPPROTO_UDP, rx_udp_csum); iph->saddr, iph->daddr,
(skb->len - skb_transport_offset(skb)),
IPPROTO_UDP, rx_udp_csum);
if (udp_hdr(skb)->check != csum) if (udp_hdr(skb)->check != csum)
goto checksum_fail; goto checksum_fail;
} /* else its GRE and so no outer UDP header */
} }
skb->ip_summed = CHECKSUM_UNNECESSARY; skb->ip_summed = CHECKSUM_UNNECESSARY;
...@@ -1581,6 +1618,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) ...@@ -1581,6 +1618,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
struct i40e_vsi *vsi = q_vector->vsi; struct i40e_vsi *vsi = q_vector->vsi;
struct i40e_ring *ring; struct i40e_ring *ring;
bool clean_complete = true; bool clean_complete = true;
bool arm_wb = false;
int budget_per_ring; int budget_per_ring;
if (test_bit(__I40E_DOWN, &vsi->state)) { if (test_bit(__I40E_DOWN, &vsi->state)) {
...@@ -1591,8 +1629,10 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) ...@@ -1591,8 +1629,10 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
/* Since the actual Tx work is minimal, we can give the Tx a larger /* Since the actual Tx work is minimal, we can give the Tx a larger
* budget and be more aggressive about cleaning up the Tx descriptors. * budget and be more aggressive about cleaning up the Tx descriptors.
*/ */
i40e_for_each_ring(ring, q_vector->tx) i40e_for_each_ring(ring, q_vector->tx) {
clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
arm_wb |= ring->arm_wb;
}
/* We attempt to distribute budget to each Rx queue fairly, but don't /* We attempt to distribute budget to each Rx queue fairly, but don't
* allow the budget to go below 1 because that would exit polling early. * allow the budget to go below 1 because that would exit polling early.
...@@ -1603,8 +1643,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) ...@@ -1603,8 +1643,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
clean_complete &= i40e_clean_rx_irq(ring, budget_per_ring); clean_complete &= i40e_clean_rx_irq(ring, budget_per_ring);
/* If work not completed, return budget and polling will return */ /* If work not completed, return budget and polling will return */
if (!clean_complete) if (!clean_complete) {
if (arm_wb)
i40e_force_wb(vsi, q_vector);
return budget; return budget;
}
/* Work is done so exit the polling mode and re-enable the interrupt */ /* Work is done so exit the polling mode and re-enable the interrupt */
napi_complete(napi); napi_complete(napi);
...@@ -1840,17 +1883,16 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, ...@@ -1840,17 +1883,16 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
if (err < 0) if (err < 0)
return err; return err;
if (protocol == htons(ETH_P_IP)) { iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
if (iph->version == 4) {
tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
iph->tot_len = 0; iph->tot_len = 0;
iph->check = 0; iph->check = 0;
tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
0, IPPROTO_TCP, 0); 0, IPPROTO_TCP, 0);
} else if (skb_is_gso_v6(skb)) { } else if (ipv6h->version == 6) {
ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb)
: ipv6_hdr(skb);
tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
ipv6h->payload_len = 0; ipv6h->payload_len = 0;
tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
...@@ -1946,13 +1988,9 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags, ...@@ -1946,13 +1988,9 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags,
I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
} }
} else if (tx_flags & I40E_TX_FLAGS_IPV6) { } else if (tx_flags & I40E_TX_FLAGS_IPV6) {
if (tx_flags & I40E_TX_FLAGS_TSO) { *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; if (tx_flags & I40E_TX_FLAGS_TSO)
ip_hdr(skb)->check = 0; ip_hdr(skb)->check = 0;
} else {
*cd_tunneling |=
I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
}
} }
/* Now set the ctx descriptor fields */ /* Now set the ctx descriptor fields */
...@@ -1962,7 +2000,10 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags, ...@@ -1962,7 +2000,10 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags,
((skb_inner_network_offset(skb) - ((skb_inner_network_offset(skb) -
skb_transport_offset(skb)) >> 1) << skb_transport_offset(skb)) >> 1) <<
I40E_TXD_CTX_QW0_NATLEN_SHIFT; I40E_TXD_CTX_QW0_NATLEN_SHIFT;
if (this_ip_hdr->version == 6) {
tx_flags &= ~I40E_TX_FLAGS_IPV4;
tx_flags |= I40E_TX_FLAGS_IPV6;
}
} else { } else {
network_hdr_len = skb_network_header_len(skb); network_hdr_len = skb_network_header_len(skb);
this_ip_hdr = ip_hdr(skb); this_ip_hdr = ip_hdr(skb);
...@@ -2198,7 +2239,6 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, ...@@ -2198,7 +2239,6 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
/* Place RS bit on last descriptor of any packet that spans across the /* Place RS bit on last descriptor of any packet that spans across the
* 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline. * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
*/ */
#define WB_STRIDE 0x3
if (((i & WB_STRIDE) != WB_STRIDE) && if (((i & WB_STRIDE) != WB_STRIDE) &&
(first <= &tx_ring->tx_bi[i]) && (first <= &tx_ring->tx_bi[i]) &&
(first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) { (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
......
...@@ -241,6 +241,7 @@ struct i40e_ring { ...@@ -241,6 +241,7 @@ struct i40e_ring {
unsigned long last_rx_timestamp; unsigned long last_rx_timestamp;
bool ring_active; /* is ring online or not */ bool ring_active; /* is ring online or not */
bool arm_wb; /* do something to arm write back */
/* stats structs */ /* stats structs */
struct i40e_queue_stats stats; struct i40e_queue_stats stats;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment