Commit 6b5525c8 authored by David S. Miller's avatar David S. Miller

Merge branch 'sfc-encap-offloads-on-EF10'

Edward Cree says:

====================
sfc: encap offloads on EF10

EF10 NICs from the 8000 series onwards support TX offloads (checksumming,
 TSO) on VXLAN- and NVGRE-encapsulated packets.  This series adds driver
 support for these offloads.

Changes from v1:
 * Fix 'no TXQ of type' error handling in patch #1 (and clear up the
   misleading comment that inspired the wrong version)
 * Add comment in patch #5 explaining what the deal with TSOv3 is
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 4a681bf3 24b2c375
...@@ -601,10 +601,14 @@ static int efx_ef10_probe(struct efx_nic *efx) ...@@ -601,10 +601,14 @@ static int efx_ef10_probe(struct efx_nic *efx)
efx_ef10_read_licensed_features(efx); efx_ef10_read_licensed_features(efx);
/* We can have one VI for each vi_stride-byte region. /* We can have one VI for each vi_stride-byte region.
* However, until we use TX option descriptors we need two TX queues * However, until we use TX option descriptors we need up to four
* per channel. * TX queues per channel for different checksumming combinations.
*/ */
efx->tx_queues_per_channel = 2; if (nic_data->datapath_caps &
(1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN))
efx->tx_queues_per_channel = 4;
else
efx->tx_queues_per_channel = 2;
efx->max_vis = efx_ef10_mem_map_size(efx) / efx->vi_stride; efx->max_vis = efx_ef10_mem_map_size(efx) / efx->vi_stride;
if (!efx->max_vis) { if (!efx->max_vis) {
netif_err(efx, drv, efx->net_dev, "error determining max VIs\n"); netif_err(efx, drv, efx->net_dev, "error determining max VIs\n");
...@@ -1300,6 +1304,7 @@ static void efx_ef10_fini_nic(struct efx_nic *efx) ...@@ -1300,6 +1304,7 @@ static void efx_ef10_fini_nic(struct efx_nic *efx)
static int efx_ef10_init_nic(struct efx_nic *efx) static int efx_ef10_init_nic(struct efx_nic *efx)
{ {
struct efx_ef10_nic_data *nic_data = efx->nic_data; struct efx_ef10_nic_data *nic_data = efx->nic_data;
netdev_features_t hw_enc_features = 0;
int rc; int rc;
if (nic_data->must_check_datapath_caps) { if (nic_data->must_check_datapath_caps) {
...@@ -1344,6 +1349,21 @@ static int efx_ef10_init_nic(struct efx_nic *efx) ...@@ -1344,6 +1349,21 @@ static int efx_ef10_init_nic(struct efx_nic *efx)
nic_data->must_restore_piobufs = false; nic_data->must_restore_piobufs = false;
} }
/* add encapsulated checksum offload features */
if (efx_has_cap(efx, VXLAN_NVGRE) && !efx_ef10_is_vf(efx))
hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
/* add encapsulated TSO features */
if (efx_has_cap(efx, TX_TSO_V2_ENCAP)) {
netdev_features_t encap_tso_features;
encap_tso_features = NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE |
NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM;
hw_enc_features |= encap_tso_features | NETIF_F_TSO;
efx->net_dev->features |= encap_tso_features;
}
efx->net_dev->hw_enc_features = hw_enc_features;
/* don't fail init if RSS setup doesn't work */ /* don't fail init if RSS setup doesn't work */
rc = efx->type->rx_push_rss_config(efx, false, rc = efx->type->rx_push_rss_config(efx, false,
efx->rss_context.rx_indir_table, NULL); efx->rss_context.rx_indir_table, NULL);
...@@ -2146,6 +2166,9 @@ static int efx_ef10_irq_test_generate(struct efx_nic *efx) ...@@ -2146,6 +2166,9 @@ static int efx_ef10_irq_test_generate(struct efx_nic *efx)
static int efx_ef10_tx_probe(struct efx_tx_queue *tx_queue) static int efx_ef10_tx_probe(struct efx_tx_queue *tx_queue)
{ {
/* low two bits of label are what we want for type */
BUILD_BUG_ON((EFX_TXQ_TYPE_OUTER_CSUM | EFX_TXQ_TYPE_INNER_CSUM) != 3);
tx_queue->type = tx_queue->label & 3;
return efx_nic_alloc_buffer(tx_queue->efx, &tx_queue->txd.buf, return efx_nic_alloc_buffer(tx_queue->efx, &tx_queue->txd.buf,
(tx_queue->ptr_mask + 1) * (tx_queue->ptr_mask + 1) *
sizeof(efx_qword_t), sizeof(efx_qword_t),
...@@ -2168,15 +2191,15 @@ static inline void efx_ef10_push_tx_desc(struct efx_tx_queue *tx_queue, ...@@ -2168,15 +2191,15 @@ static inline void efx_ef10_push_tx_desc(struct efx_tx_queue *tx_queue,
/* Add Firmware-Assisted TSO v2 option descriptors to a queue. /* Add Firmware-Assisted TSO v2 option descriptors to a queue.
*/ */
static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
struct sk_buff *skb, bool *data_mapped)
bool *data_mapped)
{ {
struct efx_tx_buffer *buffer; struct efx_tx_buffer *buffer;
u16 inner_ipv4_id = 0;
u16 outer_ipv4_id = 0;
struct tcphdr *tcp; struct tcphdr *tcp;
struct iphdr *ip; struct iphdr *ip;
u16 ip_tot_len;
u16 ipv4_id;
u32 seqnum; u32 seqnum;
u32 mss; u32 mss;
...@@ -2189,21 +2212,43 @@ static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, ...@@ -2189,21 +2212,43 @@ static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
return -EINVAL; return -EINVAL;
} }
ip = ip_hdr(skb); if (skb->encapsulation) {
if (!tx_queue->tso_encap)
return -EINVAL;
ip = ip_hdr(skb);
if (ip->version == 4)
outer_ipv4_id = ntohs(ip->id);
ip = inner_ip_hdr(skb);
tcp = inner_tcp_hdr(skb);
} else {
ip = ip_hdr(skb);
tcp = tcp_hdr(skb);
}
/* 8000-series EF10 hardware requires that IP Total Length be
* greater than or equal to the value it will have in each segment
* (which is at most mss + 208 + TCP header length), but also less
* than (0x10000 - inner_network_header). Otherwise the TCP
* checksum calculation will be broken for encapsulated packets.
* We fill in ip->tot_len with 0xff30, which should satisfy the
* first requirement unless the MSS is ridiculously large (which
* should be impossible as the driver max MTU is 9216); it is
* guaranteed to satisfy the second as we only attempt TSO if
* inner_network_header <= 208.
*/
ip_tot_len = -EFX_TSO2_MAX_HDRLEN;
EFX_WARN_ON_ONCE_PARANOID(mss + EFX_TSO2_MAX_HDRLEN +
(tcp->doff << 2u) > ip_tot_len);
if (ip->version == 4) { if (ip->version == 4) {
/* Modify IPv4 header if needed. */ ip->tot_len = htons(ip_tot_len);
ip->tot_len = 0;
ip->check = 0; ip->check = 0;
ipv4_id = ntohs(ip->id); inner_ipv4_id = ntohs(ip->id);
} else { } else {
/* Modify IPv6 header if needed. */ ((struct ipv6hdr *)ip)->payload_len = htons(ip_tot_len);
struct ipv6hdr *ipv6 = ipv6_hdr(skb);
ipv6->payload_len = 0;
ipv4_id = 0;
} }
tcp = tcp_hdr(skb);
seqnum = ntohl(tcp->seq); seqnum = ntohl(tcp->seq);
buffer = efx_tx_queue_get_insert_buffer(tx_queue); buffer = efx_tx_queue_get_insert_buffer(tx_queue);
...@@ -2216,7 +2261,7 @@ static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, ...@@ -2216,7 +2261,7 @@ static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_TSO, ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_TSO,
ESF_DZ_TX_TSO_OPTION_TYPE, ESF_DZ_TX_TSO_OPTION_TYPE,
ESE_DZ_TX_TSO_OPTION_DESC_FATSO2A, ESE_DZ_TX_TSO_OPTION_DESC_FATSO2A,
ESF_DZ_TX_TSO_IP_ID, ipv4_id, ESF_DZ_TX_TSO_IP_ID, inner_ipv4_id,
ESF_DZ_TX_TSO_TCP_SEQNO, seqnum ESF_DZ_TX_TSO_TCP_SEQNO, seqnum
); );
++tx_queue->insert_count; ++tx_queue->insert_count;
...@@ -2226,11 +2271,12 @@ static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, ...@@ -2226,11 +2271,12 @@ static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
buffer->flags = EFX_TX_BUF_OPTION; buffer->flags = EFX_TX_BUF_OPTION;
buffer->len = 0; buffer->len = 0;
buffer->unmap_len = 0; buffer->unmap_len = 0;
EFX_POPULATE_QWORD_4(buffer->option, EFX_POPULATE_QWORD_5(buffer->option,
ESF_DZ_TX_DESC_IS_OPT, 1, ESF_DZ_TX_DESC_IS_OPT, 1,
ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_TSO, ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_TSO,
ESF_DZ_TX_TSO_OPTION_TYPE, ESF_DZ_TX_TSO_OPTION_TYPE,
ESE_DZ_TX_TSO_OPTION_DESC_FATSO2B, ESE_DZ_TX_TSO_OPTION_DESC_FATSO2B,
ESF_DZ_TX_TSO_OUTER_IPID, outer_ipv4_id,
ESF_DZ_TX_TSO_TCP_MSS, mss ESF_DZ_TX_TSO_TCP_MSS, mss
); );
++tx_queue->insert_count; ++tx_queue->insert_count;
...@@ -2254,11 +2300,11 @@ static u32 efx_ef10_tso_versions(struct efx_nic *efx) ...@@ -2254,11 +2300,11 @@ static u32 efx_ef10_tso_versions(struct efx_nic *efx)
static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
{ {
bool csum_offload = tx_queue->label & EFX_TXQ_TYPE_OFFLOAD; bool csum_offload = tx_queue->type & EFX_TXQ_TYPE_OUTER_CSUM;
bool inner_csum = tx_queue->type & EFX_TXQ_TYPE_INNER_CSUM;
struct efx_channel *channel = tx_queue->channel; struct efx_channel *channel = tx_queue->channel;
struct efx_nic *efx = tx_queue->efx; struct efx_nic *efx = tx_queue->efx;
struct efx_ef10_nic_data *nic_data; struct efx_ef10_nic_data *nic_data;
bool tso_v2 = false;
efx_qword_t *txd; efx_qword_t *txd;
int rc; int rc;
...@@ -2281,15 +2327,18 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) ...@@ -2281,15 +2327,18 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
* TSOv2 cannot be used with Hardware timestamping, and is never needed * TSOv2 cannot be used with Hardware timestamping, and is never needed
* for XDP tx. * for XDP tx.
*/ */
if (csum_offload && (nic_data->datapath_caps2 & if (efx_has_cap(efx, TX_TSO_V2)) {
(1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN)) && if ((csum_offload || inner_csum) &&
!tx_queue->timestamping && !tx_queue->xdp_tx) { !tx_queue->timestamping && !tx_queue->xdp_tx) {
tso_v2 = true; tx_queue->tso_version = 2;
netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n", netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n",
channel->channel); channel->channel);
}
} else if (efx_has_cap(efx, TX_TSO)) {
tx_queue->tso_version = 1;
} }
rc = efx_mcdi_tx_init(tx_queue, tso_v2); rc = efx_mcdi_tx_init(tx_queue);
if (rc) if (rc)
goto fail; goto fail;
...@@ -2302,22 +2351,19 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) ...@@ -2302,22 +2351,19 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
tx_queue->buffer[0].flags = EFX_TX_BUF_OPTION; tx_queue->buffer[0].flags = EFX_TX_BUF_OPTION;
tx_queue->insert_count = 1; tx_queue->insert_count = 1;
txd = efx_tx_desc(tx_queue, 0); txd = efx_tx_desc(tx_queue, 0);
EFX_POPULATE_QWORD_5(*txd, EFX_POPULATE_QWORD_7(*txd,
ESF_DZ_TX_DESC_IS_OPT, true, ESF_DZ_TX_DESC_IS_OPT, true,
ESF_DZ_TX_OPTION_TYPE, ESF_DZ_TX_OPTION_TYPE,
ESE_DZ_TX_OPTION_DESC_CRC_CSUM, ESE_DZ_TX_OPTION_DESC_CRC_CSUM,
ESF_DZ_TX_OPTION_UDP_TCP_CSUM, csum_offload, ESF_DZ_TX_OPTION_UDP_TCP_CSUM, csum_offload,
ESF_DZ_TX_OPTION_IP_CSUM, csum_offload, ESF_DZ_TX_OPTION_IP_CSUM, csum_offload && tx_queue->tso_version != 2,
ESF_DZ_TX_OPTION_INNER_UDP_TCP_CSUM, inner_csum,
ESF_DZ_TX_OPTION_INNER_IP_CSUM, inner_csum && tx_queue->tso_version != 2,
ESF_DZ_TX_TIMESTAMP, tx_queue->timestamping); ESF_DZ_TX_TIMESTAMP, tx_queue->timestamping);
tx_queue->write_count = 1; tx_queue->write_count = 1;
if (tso_v2) { if (tx_queue->tso_version == 2 && efx_has_cap(efx, TX_TSO_V2_ENCAP))
tx_queue->handle_tso = efx_ef10_tx_tso_desc; tx_queue->tso_encap = true;
tx_queue->tso_version = 2;
} else if (nic_data->datapath_caps &
(1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN)) {
tx_queue->tso_version = 1;
}
wmb(); wmb();
efx_ef10_push_tx_desc(tx_queue, txd); efx_ef10_push_tx_desc(tx_queue, txd);
...@@ -2880,7 +2926,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) ...@@ -2880,7 +2926,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
/* Get the transmit queue */ /* Get the transmit queue */
tx_ev_q_label = EFX_QWORD_FIELD(*event, ESF_DZ_TX_QLABEL); tx_ev_q_label = EFX_QWORD_FIELD(*event, ESF_DZ_TX_QLABEL);
tx_queue = efx_channel_get_tx_queue(channel, tx_queue = efx_channel_get_tx_queue(channel,
tx_ev_q_label % EFX_TXQ_TYPES); tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
if (!tx_queue->timestamping) { if (!tx_queue->timestamping) {
/* Transmit completion */ /* Transmit completion */
......
...@@ -37,7 +37,14 @@ void ef100_tx_init(struct efx_tx_queue *tx_queue) ...@@ -37,7 +37,14 @@ void ef100_tx_init(struct efx_tx_queue *tx_queue)
tx_queue->channel->channel - tx_queue->channel->channel -
tx_queue->efx->tx_channel_offset); tx_queue->efx->tx_channel_offset);
if (efx_mcdi_tx_init(tx_queue, false)) /* This value is purely documentational; as EF100 never passes through
* the switch statement in tx.c:__efx_enqueue_skb(), that switch does
* not handle case 3. EF100's TSOv3 descriptors are generated by
* ef100_make_tso_desc().
* Meanwhile, all efx_mcdi_tx_init() cares about is that it's not 2.
*/
tx_queue->tso_version = 3;
if (efx_mcdi_tx_init(tx_queue))
netdev_WARN(tx_queue->efx->net_dev, netdev_WARN(tx_queue->efx->net_dev,
"failed to initialise TXQ %d\n", tx_queue->queue); "failed to initialise TXQ %d\n", tx_queue->queue);
} }
......
...@@ -596,6 +596,7 @@ static const struct net_device_ops efx_netdev_ops = { ...@@ -596,6 +596,7 @@ static const struct net_device_ops efx_netdev_ops = {
.ndo_set_mac_address = efx_set_mac_address, .ndo_set_mac_address = efx_set_mac_address,
.ndo_set_rx_mode = efx_set_rx_mode, .ndo_set_rx_mode = efx_set_rx_mode,
.ndo_set_features = efx_set_features, .ndo_set_features = efx_set_features,
.ndo_features_check = efx_features_check,
.ndo_vlan_rx_add_vid = efx_vlan_rx_add_vid, .ndo_vlan_rx_add_vid = efx_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = efx_vlan_rx_kill_vid, .ndo_vlan_rx_kill_vid = efx_vlan_rx_kill_vid,
#ifdef CONFIG_SFC_SRIOV #ifdef CONFIG_SFC_SRIOV
......
...@@ -151,7 +151,7 @@ static int efx_allocate_msix_channels(struct efx_nic *efx, ...@@ -151,7 +151,7 @@ static int efx_allocate_msix_channels(struct efx_nic *efx,
*/ */
n_xdp_tx = num_possible_cpus(); n_xdp_tx = num_possible_cpus();
n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_TXQ_TYPES); n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_MAX_TXQ_PER_CHANNEL);
vec_count = pci_msix_vec_count(efx->pci_dev); vec_count = pci_msix_vec_count(efx->pci_dev);
if (vec_count < 0) if (vec_count < 0)
...@@ -179,7 +179,7 @@ static int efx_allocate_msix_channels(struct efx_nic *efx, ...@@ -179,7 +179,7 @@ static int efx_allocate_msix_channels(struct efx_nic *efx,
efx->xdp_tx_queue_count = 0; efx->xdp_tx_queue_count = 0;
} else { } else {
efx->n_xdp_channels = n_xdp_ev; efx->n_xdp_channels = n_xdp_ev;
efx->xdp_tx_per_channel = EFX_TXQ_TYPES; efx->xdp_tx_per_channel = EFX_MAX_TXQ_PER_CHANNEL;
efx->xdp_tx_queue_count = n_xdp_tx; efx->xdp_tx_queue_count = n_xdp_tx;
n_channels += n_xdp_ev; n_channels += n_xdp_ev;
netif_dbg(efx, drv, efx->net_dev, netif_dbg(efx, drv, efx->net_dev,
...@@ -520,7 +520,7 @@ static struct efx_channel *efx_alloc_channel(struct efx_nic *efx, int i) ...@@ -520,7 +520,7 @@ static struct efx_channel *efx_alloc_channel(struct efx_nic *efx, int i)
channel->channel = i; channel->channel = i;
channel->type = &efx_default_channel_type; channel->type = &efx_default_channel_type;
for (j = 0; j < EFX_TXQ_TYPES; j++) { for (j = 0; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
tx_queue = &channel->tx_queue[j]; tx_queue = &channel->tx_queue[j];
tx_queue->efx = efx; tx_queue->efx = efx;
tx_queue->queue = -1; tx_queue->queue = -1;
...@@ -594,7 +594,7 @@ struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel) ...@@ -594,7 +594,7 @@ struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel)
channel->napi_str.state = 0; channel->napi_str.state = 0;
memset(&channel->eventq, 0, sizeof(channel->eventq)); memset(&channel->eventq, 0, sizeof(channel->eventq));
for (j = 0; j < EFX_TXQ_TYPES; j++) { for (j = 0; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
tx_queue = &channel->tx_queue[j]; tx_queue = &channel->tx_queue[j];
if (tx_queue->channel) if (tx_queue->channel)
tx_queue->channel = channel; tx_queue->channel = channel;
...@@ -894,7 +894,7 @@ int efx_set_channels(struct efx_nic *efx) ...@@ -894,7 +894,7 @@ int efx_set_channels(struct efx_nic *efx)
xdp_queue_number, tx_queue->queue); xdp_queue_number, tx_queue->queue);
/* We may have a few left-over XDP TX /* We may have a few left-over XDP TX
* queues owing to xdp_tx_queue_count * queues owing to xdp_tx_queue_count
* not dividing evenly by EFX_TXQ_TYPES. * not dividing evenly by EFX_MAX_TXQ_PER_CHANNEL.
* We still allocate and probe those * We still allocate and probe those
* TXQs, but never use them. * TXQs, but never use them.
*/ */
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "net_driver.h" #include "net_driver.h"
#include <linux/module.h> #include <linux/module.h>
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <net/gre.h>
#include "efx_common.h" #include "efx_common.h"
#include "efx_channels.h" #include "efx_channels.h"
#include "efx.h" #include "efx.h"
...@@ -1287,6 +1288,89 @@ const struct pci_error_handlers efx_err_handlers = { ...@@ -1287,6 +1288,89 @@ const struct pci_error_handlers efx_err_handlers = {
.resume = efx_io_resume, .resume = efx_io_resume,
}; };
/* Determine whether the NIC will be able to handle TX offloads for a given
* encapsulated packet.
*/
static bool efx_can_encap_offloads(struct efx_nic *efx, struct sk_buff *skb)
{
struct gre_base_hdr *greh;
__be16 dst_port;
u8 ipproto;
/* Does the NIC support encap offloads?
* If not, we should never get here, because we shouldn't have
* advertised encap offload feature flags in the first place.
*/
if (WARN_ON_ONCE(!efx->type->udp_tnl_has_port))
return false;
/* Determine encapsulation protocol in use */
switch (skb->protocol) {
case htons(ETH_P_IP):
ipproto = ip_hdr(skb)->protocol;
break;
case htons(ETH_P_IPV6):
/* If there are extension headers, this will cause us to
* think we can't offload something that we maybe could have.
*/
ipproto = ipv6_hdr(skb)->nexthdr;
break;
default:
/* Not IP, so can't offload it */
return false;
}
switch (ipproto) {
case IPPROTO_GRE:
/* We support NVGRE but not IP over GRE or random gretaps.
* Specifically, the NIC will accept GRE as encapsulated if
* the inner protocol is Ethernet, but only handle it
* correctly if the GRE header is 8 bytes long. Moreover,
* it will not update the Checksum or Sequence Number fields
* if they are present. (The Routing Present flag,
* GRE_ROUTING, cannot be set else the header would be more
* than 8 bytes long; so we don't have to worry about it.)
*/
if (skb->inner_protocol_type != ENCAP_TYPE_ETHER)
return false;
if (ntohs(skb->inner_protocol) != ETH_P_TEB)
return false;
if (skb_inner_mac_header(skb) - skb_transport_header(skb) != 8)
return false;
greh = (struct gre_base_hdr *)skb_transport_header(skb);
return !(greh->flags & (GRE_CSUM | GRE_SEQ));
case IPPROTO_UDP:
/* If the port is registered for a UDP tunnel, we assume the
* packet is for that tunnel, and the NIC will handle it as
* such. If not, the NIC won't know what to do with it.
*/
dst_port = udp_hdr(skb)->dest;
return efx->type->udp_tnl_has_port(efx, dst_port);
default:
return false;
}
}
netdev_features_t efx_features_check(struct sk_buff *skb, struct net_device *dev,
netdev_features_t features)
{
struct efx_nic *efx = netdev_priv(dev);
if (skb->encapsulation) {
if (features & NETIF_F_GSO_MASK)
/* Hardware can only do TSO with at most 208 bytes
* of headers.
*/
if (skb_inner_transport_offset(skb) >
EFX_TSO2_MAX_HDRLEN)
features &= ~(NETIF_F_GSO_MASK);
if (features & (NETIF_F_GSO_MASK | NETIF_F_CSUM_MASK))
if (!efx_can_encap_offloads(efx, skb))
features &= ~(NETIF_F_GSO_MASK |
NETIF_F_CSUM_MASK);
}
return features;
}
int efx_get_phys_port_id(struct net_device *net_dev, int efx_get_phys_port_id(struct net_device *net_dev,
struct netdev_phys_item_id *ppid) struct netdev_phys_item_id *ppid)
{ {
......
...@@ -105,6 +105,9 @@ int efx_change_mtu(struct net_device *net_dev, int new_mtu); ...@@ -105,6 +105,9 @@ int efx_change_mtu(struct net_device *net_dev, int new_mtu);
extern const struct pci_error_handlers efx_err_handlers; extern const struct pci_error_handlers efx_err_handlers;
netdev_features_t efx_features_check(struct sk_buff *skb, struct net_device *dev,
netdev_features_t features);
int efx_get_phys_port_id(struct net_device *net_dev, int efx_get_phys_port_id(struct net_device *net_dev,
struct netdev_phys_item_id *ppid); struct netdev_phys_item_id *ppid);
......
...@@ -407,7 +407,7 @@ static size_t efx_describe_per_queue_stats(struct efx_nic *efx, u8 *strings) ...@@ -407,7 +407,7 @@ static size_t efx_describe_per_queue_stats(struct efx_nic *efx, u8 *strings)
snprintf(strings, ETH_GSTRING_LEN, snprintf(strings, ETH_GSTRING_LEN,
"tx-%u.tx_packets", "tx-%u.tx_packets",
channel->tx_queue[0].queue / channel->tx_queue[0].queue /
EFX_TXQ_TYPES); EFX_MAX_TXQ_PER_CHANNEL);
strings += ETH_GSTRING_LEN; strings += ETH_GSTRING_LEN;
} }
......
...@@ -372,6 +372,8 @@ int efx_farch_tx_probe(struct efx_tx_queue *tx_queue) ...@@ -372,6 +372,8 @@ int efx_farch_tx_probe(struct efx_tx_queue *tx_queue)
struct efx_nic *efx = tx_queue->efx; struct efx_nic *efx = tx_queue->efx;
unsigned entries; unsigned entries;
tx_queue->type = ((tx_queue->label & 1) ? EFX_TXQ_TYPE_OUTER_CSUM : 0) |
((tx_queue->label & 2) ? EFX_TXQ_TYPE_HIGHPRI : 0);
entries = tx_queue->ptr_mask + 1; entries = tx_queue->ptr_mask + 1;
return efx_alloc_special_buffer(efx, &tx_queue->txd, return efx_alloc_special_buffer(efx, &tx_queue->txd,
entries * sizeof(efx_qword_t)); entries * sizeof(efx_qword_t));
...@@ -379,7 +381,7 @@ int efx_farch_tx_probe(struct efx_tx_queue *tx_queue) ...@@ -379,7 +381,7 @@ int efx_farch_tx_probe(struct efx_tx_queue *tx_queue)
void efx_farch_tx_init(struct efx_tx_queue *tx_queue) void efx_farch_tx_init(struct efx_tx_queue *tx_queue)
{ {
int csum = tx_queue->label & EFX_TXQ_TYPE_OFFLOAD; int csum = tx_queue->type & EFX_TXQ_TYPE_OUTER_CSUM;
struct efx_nic *efx = tx_queue->efx; struct efx_nic *efx = tx_queue->efx;
efx_oword_t reg; efx_oword_t reg;
...@@ -409,10 +411,12 @@ void efx_farch_tx_init(struct efx_tx_queue *tx_queue) ...@@ -409,10 +411,12 @@ void efx_farch_tx_init(struct efx_tx_queue *tx_queue)
EFX_POPULATE_OWORD_1(reg, EFX_POPULATE_OWORD_1(reg,
FRF_BZ_TX_PACE, FRF_BZ_TX_PACE,
(tx_queue->label & EFX_TXQ_TYPE_HIGHPRI) ? (tx_queue->type & EFX_TXQ_TYPE_HIGHPRI) ?
FFE_BZ_TX_PACE_OFF : FFE_BZ_TX_PACE_OFF :
FFE_BZ_TX_PACE_RESERVED); FFE_BZ_TX_PACE_RESERVED);
efx_writeo_table(efx, &reg, FR_BZ_TX_PACE_TBL, tx_queue->queue); efx_writeo_table(efx, &reg, FR_BZ_TX_PACE_TBL, tx_queue->queue);
tx_queue->tso_version = 1;
} }
static void efx_farch_flush_tx_queue(struct efx_tx_queue *tx_queue) static void efx_farch_flush_tx_queue(struct efx_tx_queue *tx_queue)
...@@ -832,13 +836,13 @@ efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) ...@@ -832,13 +836,13 @@ efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_DESC_PTR); tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_DESC_PTR);
tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL); tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
tx_queue = efx_channel_get_tx_queue( tx_queue = efx_channel_get_tx_queue(
channel, tx_ev_q_label % EFX_TXQ_TYPES); channel, tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
efx_xmit_done(tx_queue, tx_ev_desc_ptr); efx_xmit_done(tx_queue, tx_ev_desc_ptr);
} else if (EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_WQ_FF_FULL)) { } else if (EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_WQ_FF_FULL)) {
/* Rewrite the FIFO write pointer */ /* Rewrite the FIFO write pointer */
tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL); tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
tx_queue = efx_channel_get_tx_queue( tx_queue = efx_channel_get_tx_queue(
channel, tx_ev_q_label % EFX_TXQ_TYPES); channel, tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
netif_tx_lock(efx->net_dev); netif_tx_lock(efx->net_dev);
efx_farch_notify_tx_desc(tx_queue); efx_farch_notify_tx_desc(tx_queue);
...@@ -1080,9 +1084,9 @@ efx_farch_handle_tx_flush_done(struct efx_nic *efx, efx_qword_t *event) ...@@ -1080,9 +1084,9 @@ efx_farch_handle_tx_flush_done(struct efx_nic *efx, efx_qword_t *event)
int qid; int qid;
qid = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA); qid = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA);
if (qid < EFX_TXQ_TYPES * (efx->n_tx_channels + efx->n_extra_tx_channels)) { if (qid < EFX_MAX_TXQ_PER_CHANNEL * (efx->n_tx_channels + efx->n_extra_tx_channels)) {
tx_queue = efx_get_tx_queue(efx, qid / EFX_TXQ_TYPES, tx_queue = efx_get_tx_queue(efx, qid / EFX_MAX_TXQ_PER_CHANNEL,
qid % EFX_TXQ_TYPES); qid % EFX_MAX_TXQ_PER_CHANNEL);
if (atomic_cmpxchg(&tx_queue->flush_outstanding, 1, 0)) { if (atomic_cmpxchg(&tx_queue->flush_outstanding, 1, 0)) {
efx_farch_magic_event(tx_queue->channel, efx_farch_magic_event(tx_queue->channel,
EFX_CHANNEL_MAGIC_TX_DRAIN(tx_queue)); EFX_CHANNEL_MAGIC_TX_DRAIN(tx_queue));
...@@ -1675,10 +1679,10 @@ void efx_farch_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw) ...@@ -1675,10 +1679,10 @@ void efx_farch_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw)
* and the descriptor caches for those channels. * and the descriptor caches for those channels.
*/ */
buftbl_min = ((efx->n_rx_channels * EFX_MAX_DMAQ_SIZE + buftbl_min = ((efx->n_rx_channels * EFX_MAX_DMAQ_SIZE +
total_tx_channels * EFX_TXQ_TYPES * EFX_MAX_DMAQ_SIZE + total_tx_channels * EFX_MAX_TXQ_PER_CHANNEL * EFX_MAX_DMAQ_SIZE +
efx->n_channels * EFX_MAX_EVQ_SIZE) efx->n_channels * EFX_MAX_EVQ_SIZE)
* sizeof(efx_qword_t) / EFX_BUF_SIZE); * sizeof(efx_qword_t) / EFX_BUF_SIZE);
vi_count = max(efx->n_channels, total_tx_channels * EFX_TXQ_TYPES); vi_count = max(efx->n_channels, total_tx_channels * EFX_MAX_TXQ_PER_CHANNEL);
#ifdef CONFIG_SFC_SRIOV #ifdef CONFIG_SFC_SRIOV
if (efx->type->sriov_wanted) { if (efx->type->sriov_wanted) {
......
...@@ -160,11 +160,12 @@ void efx_mcdi_ev_fini(struct efx_channel *channel) ...@@ -160,11 +160,12 @@ void efx_mcdi_ev_fini(struct efx_channel *channel)
outbuf, outlen, rc); outbuf, outlen, rc);
} }
int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2) int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue)
{ {
MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 / MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
EFX_BUF_SIZE)); EFX_BUF_SIZE));
bool csum_offload = tx_queue->label & EFX_TXQ_TYPE_OFFLOAD; bool csum_offload = tx_queue->type & EFX_TXQ_TYPE_OUTER_CSUM;
bool inner_csum = tx_queue->type & EFX_TXQ_TYPE_INNER_CSUM;
size_t entries = tx_queue->txd.buf.len / EFX_BUF_SIZE; size_t entries = tx_queue->txd.buf.len / EFX_BUF_SIZE;
struct efx_channel *channel = tx_queue->channel; struct efx_channel *channel = tx_queue->channel;
struct efx_nic *efx = tx_queue->efx; struct efx_nic *efx = tx_queue->efx;
...@@ -194,22 +195,31 @@ int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2) ...@@ -194,22 +195,31 @@ int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2)
inlen = MC_CMD_INIT_TXQ_IN_LEN(entries); inlen = MC_CMD_INIT_TXQ_IN_LEN(entries);
do { do {
MCDI_POPULATE_DWORD_4(inbuf, INIT_TXQ_IN_FLAGS, bool tso_v2 = tx_queue->tso_version == 2;
/* TSOv2 implies IP header checksum offload for TSO frames,
* so we can safely disable IP header checksum offload for
* everything else. If we don't have TSOv2, then we have to
* enable IP header checksum offload, which is strictly
* incorrect but better than breaking TSO.
*/
MCDI_POPULATE_DWORD_6(inbuf, INIT_TXQ_IN_FLAGS,
/* This flag was removed from mcdi_pcol.h for /* This flag was removed from mcdi_pcol.h for
* the non-_EXT version of INIT_TXQ. However, * the non-_EXT version of INIT_TXQ. However,
* firmware still honours it. * firmware still honours it.
*/ */
INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2, INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2,
INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload, INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !(csum_offload && tso_v2),
INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload, INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload,
INIT_TXQ_EXT_IN_FLAG_TIMESTAMP, INIT_TXQ_EXT_IN_FLAG_TIMESTAMP, tx_queue->timestamping,
tx_queue->timestamping); INIT_TXQ_IN_FLAG_INNER_IP_CSUM_EN, inner_csum && !tso_v2,
INIT_TXQ_IN_FLAG_INNER_TCP_CSUM_EN, inner_csum);
rc = efx_mcdi_rpc_quiet(efx, MC_CMD_INIT_TXQ, inbuf, inlen, rc = efx_mcdi_rpc_quiet(efx, MC_CMD_INIT_TXQ, inbuf, inlen,
NULL, 0, NULL); NULL, 0, NULL);
if (rc == -ENOSPC && tso_v2) { if (rc == -ENOSPC && tso_v2) {
/* Retry without TSOv2 if we're short on contexts. */ /* Retry without TSOv2 if we're short on contexts. */
tso_v2 = false; tx_queue->tso_version = 0;
netif_warn(efx, probe, efx->net_dev, netif_warn(efx, probe, efx->net_dev,
"TSOv2 context not available to segment in " "TSOv2 context not available to segment in "
"hardware. TCP performance may be reduced.\n" "hardware. TCP performance may be reduced.\n"
......
...@@ -19,7 +19,7 @@ int efx_mcdi_ev_probe(struct efx_channel *channel); ...@@ -19,7 +19,7 @@ int efx_mcdi_ev_probe(struct efx_channel *channel);
int efx_mcdi_ev_init(struct efx_channel *channel, bool v1_cut_thru, bool v2); int efx_mcdi_ev_init(struct efx_channel *channel, bool v1_cut_thru, bool v2);
void efx_mcdi_ev_remove(struct efx_channel *channel); void efx_mcdi_ev_remove(struct efx_channel *channel);
void efx_mcdi_ev_fini(struct efx_channel *channel); void efx_mcdi_ev_fini(struct efx_channel *channel);
int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2); int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue);
void efx_mcdi_tx_remove(struct efx_tx_queue *tx_queue); void efx_mcdi_tx_remove(struct efx_tx_queue *tx_queue);
void efx_mcdi_tx_fini(struct efx_tx_queue *tx_queue); void efx_mcdi_tx_fini(struct efx_tx_queue *tx_queue);
int efx_mcdi_rx_probe(struct efx_rx_queue *rx_queue); int efx_mcdi_rx_probe(struct efx_rx_queue *rx_queue);
......
...@@ -63,10 +63,13 @@ ...@@ -63,10 +63,13 @@
* queues. */ * queues. */
#define EFX_MAX_TX_TC 2 #define EFX_MAX_TX_TC 2
#define EFX_MAX_CORE_TX_QUEUES (EFX_MAX_TX_TC * EFX_MAX_CHANNELS) #define EFX_MAX_CORE_TX_QUEUES (EFX_MAX_TX_TC * EFX_MAX_CHANNELS)
#define EFX_TXQ_TYPE_OFFLOAD 1 /* flag */ #define EFX_TXQ_TYPE_OUTER_CSUM 1 /* Outer checksum offload */
#define EFX_TXQ_TYPE_HIGHPRI 2 /* flag */ #define EFX_TXQ_TYPE_INNER_CSUM 2 /* Inner checksum offload */
#define EFX_TXQ_TYPES 4 #define EFX_TXQ_TYPE_HIGHPRI 4 /* High-priority (for TC) */
#define EFX_MAX_TX_QUEUES (EFX_TXQ_TYPES * EFX_MAX_CHANNELS) #define EFX_TXQ_TYPES 8
/* HIGHPRI is Siena-only, and INNER_CSUM is EF10, so no need for both */
#define EFX_MAX_TXQ_PER_CHANNEL 4
#define EFX_MAX_TX_QUEUES (EFX_MAX_TXQ_PER_CHANNEL * EFX_MAX_CHANNELS)
/* Maximum possible MTU the driver supports */ /* Maximum possible MTU the driver supports */
#define EFX_MAX_MTU (9 * 1024) #define EFX_MAX_MTU (9 * 1024)
...@@ -74,6 +77,9 @@ ...@@ -74,6 +77,9 @@
/* Minimum MTU, from RFC791 (IP) */ /* Minimum MTU, from RFC791 (IP) */
#define EFX_MIN_MTU 68 #define EFX_MIN_MTU 68
/* Maximum total header length for TSOv2 */
#define EFX_TSO2_MAX_HDRLEN 208
/* Size of an RX scatter buffer. Small enough to pack 2 into a 4K page, /* Size of an RX scatter buffer. Small enough to pack 2 into a 4K page,
* and should be a multiple of the cache line size. * and should be a multiple of the cache line size.
*/ */
...@@ -190,7 +196,9 @@ struct efx_tx_buffer { ...@@ -190,7 +196,9 @@ struct efx_tx_buffer {
* @queue: DMA queue number * @queue: DMA queue number
* @label: Label for TX completion events. * @label: Label for TX completion events.
* Is our index within @channel->tx_queue array. * Is our index within @channel->tx_queue array.
* @type: configuration type of this TX queue. A bitmask of %EFX_TXQ_TYPE_* flags.
* @tso_version: Version of TSO in use for this queue. * @tso_version: Version of TSO in use for this queue.
* @tso_encap: Is encapsulated TSO supported? Supported in TSOv2 on 8000 series.
* @channel: The associated channel * @channel: The associated channel
* @core_txq: The networking core TX queue structure * @core_txq: The networking core TX queue structure
* @buffer: The software buffer ring * @buffer: The software buffer ring
...@@ -204,8 +212,6 @@ struct efx_tx_buffer { ...@@ -204,8 +212,6 @@ struct efx_tx_buffer {
* @initialised: Has hardware queue been initialised? * @initialised: Has hardware queue been initialised?
* @timestamping: Is timestamping enabled for this channel? * @timestamping: Is timestamping enabled for this channel?
* @xdp_tx: Is this an XDP tx queue? * @xdp_tx: Is this an XDP tx queue?
* @handle_tso: TSO xmit preparation handler. Sets up the TSO metadata and
* may also map tx data, depending on the nature of the TSO implementation.
* @read_count: Current read pointer. * @read_count: Current read pointer.
* This is the number of buffers that have been removed from both rings. * This is the number of buffers that have been removed from both rings.
* @old_write_count: The value of @write_count when last checked. * @old_write_count: The value of @write_count when last checked.
...@@ -254,7 +260,9 @@ struct efx_tx_queue { ...@@ -254,7 +260,9 @@ struct efx_tx_queue {
struct efx_nic *efx ____cacheline_aligned_in_smp; struct efx_nic *efx ____cacheline_aligned_in_smp;
unsigned int queue; unsigned int queue;
unsigned int label; unsigned int label;
unsigned int type;
unsigned int tso_version; unsigned int tso_version;
bool tso_encap;
struct efx_channel *channel; struct efx_channel *channel;
struct netdev_queue *core_txq; struct netdev_queue *core_txq;
struct efx_tx_buffer *buffer; struct efx_tx_buffer *buffer;
...@@ -267,9 +275,6 @@ struct efx_tx_queue { ...@@ -267,9 +275,6 @@ struct efx_tx_queue {
bool timestamping; bool timestamping;
bool xdp_tx; bool xdp_tx;
/* Function pointers used in the fast path. */
int (*handle_tso)(struct efx_tx_queue*, struct sk_buff*, bool *);
/* Members used mainly on the completion path */ /* Members used mainly on the completion path */
unsigned int read_count ____cacheline_aligned_in_smp; unsigned int read_count ____cacheline_aligned_in_smp;
unsigned int old_write_count; unsigned int old_write_count;
...@@ -479,6 +484,7 @@ enum efx_sync_events_state { ...@@ -479,6 +484,7 @@ enum efx_sync_events_state {
* @rx_list: list of SKBs from current RX, awaiting processing * @rx_list: list of SKBs from current RX, awaiting processing
* @rx_queue: RX queue for this channel * @rx_queue: RX queue for this channel
* @tx_queue: TX queues for this channel * @tx_queue: TX queues for this channel
* @tx_queue_by_type: pointers into @tx_queue, or %NULL, indexed by txq type
* @sync_events_state: Current state of sync events on this channel * @sync_events_state: Current state of sync events on this channel
* @sync_timestamp_major: Major part of the last ptp sync event * @sync_timestamp_major: Major part of the last ptp sync event
* @sync_timestamp_minor: Minor part of the last ptp sync event * @sync_timestamp_minor: Minor part of the last ptp sync event
...@@ -540,7 +546,8 @@ struct efx_channel { ...@@ -540,7 +546,8 @@ struct efx_channel {
struct list_head *rx_list; struct list_head *rx_list;
struct efx_rx_queue rx_queue; struct efx_rx_queue rx_queue;
struct efx_tx_queue tx_queue[EFX_TXQ_TYPES]; struct efx_tx_queue tx_queue[EFX_MAX_TXQ_PER_CHANNEL];
struct efx_tx_queue *tx_queue_by_type[EFX_TXQ_TYPES];
enum efx_sync_events_state sync_events_state; enum efx_sync_events_state sync_events_state;
u32 sync_timestamp_major; u32 sync_timestamp_major;
...@@ -1200,7 +1207,7 @@ struct efx_udp_tunnel { ...@@ -1200,7 +1207,7 @@ struct efx_udp_tunnel {
* a pointer to the &struct efx_msi_context for the channel. * a pointer to the &struct efx_msi_context for the channel.
* @irq_handle_legacy: Handle legacy interrupt. The @dev_id argument * @irq_handle_legacy: Handle legacy interrupt. The @dev_id argument
* is a pointer to the &struct efx_nic. * is a pointer to the &struct efx_nic.
* @tx_probe: Allocate resources for TX queue * @tx_probe: Allocate resources for TX queue (and select TXQ type)
* @tx_init: Initialise TX queue on the NIC * @tx_init: Initialise TX queue on the NIC
* @tx_remove: Free resources for TX queue * @tx_remove: Free resources for TX queue
* @tx_write: Write TX descriptors and doorbell * @tx_write: Write TX descriptors and doorbell
...@@ -1495,14 +1502,6 @@ efx_get_tx_channel(struct efx_nic *efx, unsigned int index) ...@@ -1495,14 +1502,6 @@ efx_get_tx_channel(struct efx_nic *efx, unsigned int index)
return efx->channel[efx->tx_channel_offset + index]; return efx->channel[efx->tx_channel_offset + index];
} }
static inline struct efx_tx_queue *
efx_get_tx_queue(struct efx_nic *efx, unsigned index, unsigned type)
{
EFX_WARN_ON_ONCE_PARANOID(index >= efx->n_tx_channels ||
type >= efx->tx_queues_per_channel);
return &efx->channel[efx->tx_channel_offset + index]->tx_queue[type];
}
static inline struct efx_channel * static inline struct efx_channel *
efx_get_xdp_channel(struct efx_nic *efx, unsigned int index) efx_get_xdp_channel(struct efx_nic *efx, unsigned int index)
{ {
...@@ -1529,10 +1528,18 @@ static inline unsigned int efx_channel_num_tx_queues(struct efx_channel *channel ...@@ -1529,10 +1528,18 @@ static inline unsigned int efx_channel_num_tx_queues(struct efx_channel *channel
} }
static inline struct efx_tx_queue * static inline struct efx_tx_queue *
efx_channel_get_tx_queue(struct efx_channel *channel, unsigned type) efx_channel_get_tx_queue(struct efx_channel *channel, unsigned int type)
{ {
EFX_WARN_ON_ONCE_PARANOID(type >= efx_channel_num_tx_queues(channel)); EFX_WARN_ON_ONCE_PARANOID(type >= EFX_TXQ_TYPES);
return &channel->tx_queue[type]; return channel->tx_queue_by_type[type];
}
static inline struct efx_tx_queue *
efx_get_tx_queue(struct efx_nic *efx, unsigned int index, unsigned int type)
{
struct efx_channel *channel = efx_get_tx_channel(efx, index);
return efx_channel_get_tx_queue(channel, type);
} }
/* Iterate over all TX queues belonging to a channel */ /* Iterate over all TX queues belonging to a channel */
......
...@@ -297,6 +297,10 @@ struct efx_ef10_nic_data { ...@@ -297,6 +297,10 @@ struct efx_ef10_nic_data {
u64 licensed_features; u64 licensed_features;
}; };
/* TSOv2 */
int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
bool *data_mapped);
int efx_init_sriov(void); int efx_init_sriov(void);
void efx_fini_sriov(void); void efx_fini_sriov(void);
......
...@@ -43,6 +43,7 @@ ...@@ -43,6 +43,7 @@
#include "mcdi_pcol.h" #include "mcdi_pcol.h"
#include "io.h" #include "io.h"
#include "farch_regs.h" #include "farch_regs.h"
#include "tx.h"
#include "nic.h" /* indirectly includes ptp.h */ #include "nic.h" /* indirectly includes ptp.h */
/* Maximum number of events expected to make up a PTP event */ /* Maximum number of events expected to make up a PTP event */
...@@ -1082,10 +1083,10 @@ static int efx_ptp_synchronize(struct efx_nic *efx, unsigned int num_readings) ...@@ -1082,10 +1083,10 @@ static int efx_ptp_synchronize(struct efx_nic *efx, unsigned int num_readings)
static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb) static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb)
{ {
struct efx_ptp_data *ptp_data = efx->ptp_data; struct efx_ptp_data *ptp_data = efx->ptp_data;
u8 type = efx_tx_csum_type_skb(skb);
struct efx_tx_queue *tx_queue; struct efx_tx_queue *tx_queue;
u8 type = skb->ip_summed == CHECKSUM_PARTIAL ? EFX_TXQ_TYPE_OFFLOAD : 0;
tx_queue = &ptp_data->channel->tx_queue[type]; tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type);
if (tx_queue && tx_queue->timestamping) { if (tx_queue && tx_queue->timestamping) {
efx_enqueue_skb(tx_queue, skb); efx_enqueue_skb(tx_queue, skb);
} else { } else {
......
...@@ -656,8 +656,8 @@ static int efx_test_loopbacks(struct efx_nic *efx, struct efx_self_tests *tests, ...@@ -656,8 +656,8 @@ static int efx_test_loopbacks(struct efx_nic *efx, struct efx_self_tests *tests,
/* Test all enabled types of TX queue */ /* Test all enabled types of TX queue */
efx_for_each_channel_tx_queue(tx_queue, channel) { efx_for_each_channel_tx_queue(tx_queue, channel) {
state->offload_csum = (tx_queue->label & state->offload_csum = (tx_queue->type &
EFX_TXQ_TYPE_OFFLOAD); EFX_TXQ_TYPE_OUTER_CSUM);
rc = efx_test_loopback(tx_queue, rc = efx_test_loopback(tx_queue,
&tests->loopback[mode]); &tests->loopback[mode]);
if (rc) if (rc)
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
*/ */
struct efx_loopback_self_tests { struct efx_loopback_self_tests {
int tx_sent[EFX_TXQ_TYPES]; int tx_sent[EFX_MAX_TXQ_PER_CHANNEL];
int tx_done[EFX_TXQ_TYPES]; int tx_done[EFX_MAX_TXQ_PER_CHANNEL];
int rx_good; int rx_good;
int rx_bad; int rx_bad;
}; };
......
...@@ -338,8 +338,18 @@ netdev_tx_t __efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb ...@@ -338,8 +338,18 @@ netdev_tx_t __efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb
* size limit. * size limit.
*/ */
if (segments) { if (segments) {
EFX_WARN_ON_ONCE_PARANOID(!tx_queue->handle_tso); switch (tx_queue->tso_version) {
rc = tx_queue->handle_tso(tx_queue, skb, &data_mapped); case 1:
rc = efx_enqueue_skb_tso(tx_queue, skb, &data_mapped);
break;
case 2:
rc = efx_ef10_tx_tso_desc(tx_queue, skb, &data_mapped);
break;
case 0: /* No TSO on this queue, SW fallback needed */
default:
rc = -EINVAL;
break;
}
if (rc == -EINVAL) { if (rc == -EINVAL) {
rc = efx_tx_tso_fallback(tx_queue, skb); rc = efx_tx_tso_fallback(tx_queue, skb);
tx_queue->tso_fallbacks++; tx_queue->tso_fallbacks++;
...@@ -491,13 +501,10 @@ int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs, ...@@ -491,13 +501,10 @@ int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs,
} }
/* Initiate a packet transmission. We use one channel per CPU /* Initiate a packet transmission. We use one channel per CPU
* (sharing when we have more CPUs than channels). On Falcon, the TX * (sharing when we have more CPUs than channels).
* completion events will be directed back to the CPU that transmitted
* the packet, which should be cache-efficient.
* *
* Context: non-blocking. * Context: non-blocking.
* Note that returning anything other than NETDEV_TX_OK will cause the * Should always return NETDEV_TX_OK and consume the skb.
* OS to free the skb.
*/ */
netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb, netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
struct net_device *net_dev) struct net_device *net_dev)
...@@ -509,7 +516,7 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb, ...@@ -509,7 +516,7 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
EFX_WARN_ON_PARANOID(!netif_device_present(net_dev)); EFX_WARN_ON_PARANOID(!netif_device_present(net_dev));
index = skb_get_queue_mapping(skb); index = skb_get_queue_mapping(skb);
type = skb->ip_summed == CHECKSUM_PARTIAL ? EFX_TXQ_TYPE_OFFLOAD : 0; type = efx_tx_csum_type_skb(skb);
if (index >= efx->n_tx_channels) { if (index >= efx->n_tx_channels) {
index -= efx->n_tx_channels; index -= efx->n_tx_channels;
type |= EFX_TXQ_TYPE_HIGHPRI; type |= EFX_TXQ_TYPE_HIGHPRI;
...@@ -527,6 +534,20 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb, ...@@ -527,6 +534,20 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
} }
tx_queue = efx_get_tx_queue(efx, index, type); tx_queue = efx_get_tx_queue(efx, index, type);
if (WARN_ON_ONCE(!tx_queue)) {
/* We don't have a TXQ of the right type.
* This should never happen, as we don't advertise offload
* features unless we can support them.
*/
dev_kfree_skb_any(skb);
/* If we're not expecting another transmit and we had something to push
* on this queue or a partner queue then we need to push here to get the
* previous packets out.
*/
if (!netdev_xmit_more())
efx_tx_send_pending(tx_queue->channel);
return NETDEV_TX_OK;
}
return __efx_enqueue_skb(tx_queue, skb); return __efx_enqueue_skb(tx_queue, skb);
} }
...@@ -577,7 +598,7 @@ void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue) ...@@ -577,7 +598,7 @@ void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue)
tx_queue->core_txq = tx_queue->core_txq =
netdev_get_tx_queue(efx->net_dev, netdev_get_tx_queue(efx->net_dev,
tx_queue->channel->channel + tx_queue->channel->channel +
((tx_queue->label & EFX_TXQ_TYPE_HIGHPRI) ? ((tx_queue->type & EFX_TXQ_TYPE_HIGHPRI) ?
efx->n_tx_channels : 0)); efx->n_tx_channels : 0));
} }
......
...@@ -18,4 +18,30 @@ unsigned int efx_tx_limit_len(struct efx_tx_queue *tx_queue, ...@@ -18,4 +18,30 @@ unsigned int efx_tx_limit_len(struct efx_tx_queue *tx_queue,
u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue, u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue,
struct efx_tx_buffer *buffer, size_t len); struct efx_tx_buffer *buffer, size_t len);
/* What TXQ type will satisfy the checksum offloads required for this skb? */
static inline unsigned int efx_tx_csum_type_skb(struct sk_buff *skb)
{
if (skb->ip_summed != CHECKSUM_PARTIAL)
return 0; /* no checksum offload */
if (skb->encapsulation &&
skb_checksum_start_offset(skb) == skb_inner_transport_offset(skb)) {
/* we only advertise features for IPv4 and IPv6 checksums on
* encapsulated packets, so if the checksum is for the inner
* packet, it must be one of them; no further checking required.
*/
/* Do we also need to offload the outer header checksum? */
if (skb_shinfo(skb)->gso_segs > 1 &&
!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
return EFX_TXQ_TYPE_OUTER_CSUM | EFX_TXQ_TYPE_INNER_CSUM;
return EFX_TXQ_TYPE_INNER_CSUM;
}
/* similarly, we only advertise features for IPv4 and IPv6 checksums,
* so it must be one of them. No need for further checks.
*/
return EFX_TXQ_TYPE_OUTER_CSUM;
}
#endif /* EFX_TX_H */ #endif /* EFX_TX_H */
...@@ -47,11 +47,12 @@ int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) ...@@ -47,11 +47,12 @@ int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
goto fail1; goto fail1;
} }
/* Allocate hardware ring */ /* Allocate hardware ring, determine TXQ type */
rc = efx_nic_probe_tx(tx_queue); rc = efx_nic_probe_tx(tx_queue);
if (rc) if (rc)
goto fail2; goto fail2;
tx_queue->channel->tx_queue_by_type[tx_queue->type] = tx_queue;
return 0; return 0;
fail2: fail2:
...@@ -85,11 +86,7 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue) ...@@ -85,11 +86,7 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
tx_queue->completed_timestamp_minor = 0; tx_queue->completed_timestamp_minor = 0;
tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel); tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel);
tx_queue->tso_version = 0;
/* Set up default function pointers. These may get replaced by
* efx_nic_init_tx() based off NIC/queue capabilities.
*/
tx_queue->handle_tso = efx_enqueue_skb_tso;
/* Set up TX descriptor ring */ /* Set up TX descriptor ring */
efx_nic_init_tx(tx_queue); efx_nic_init_tx(tx_queue);
...@@ -141,6 +138,7 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) ...@@ -141,6 +138,7 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
kfree(tx_queue->buffer); kfree(tx_queue->buffer);
tx_queue->buffer = NULL; tx_queue->buffer = NULL;
tx_queue->channel->tx_queue_by_type[tx_queue->type] = NULL;
} }
void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment