Commit a43e8e9f authored by Haiyang Zhang's avatar Haiyang Zhang Committed by Paolo Abeni

net: mana: Fix oversized sge0 for GSO packets

Handle the case when GSO SKB linear length is too large.

MANA NIC requires GSO packets to put only the header part to SGE0,
otherwise the TX queue may stop at the HW level.

So, use 2 SGEs for the skb linear part which contains more than the
packet header.

Fixes: ca9c54d2 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)")
Signed-off-by: default avatarHaiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: default avatarSimon Horman <horms@kernel.org>
Reviewed-by: default avatarShradha Gupta <shradhagupta@linux.microsoft.com>
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parent 7a54de92
...@@ -91,63 +91,137 @@ static unsigned int mana_checksum_info(struct sk_buff *skb) ...@@ -91,63 +91,137 @@ static unsigned int mana_checksum_info(struct sk_buff *skb)
return 0; return 0;
} }
static void mana_add_sge(struct mana_tx_package *tp, struct mana_skb_head *ash,
int sg_i, dma_addr_t da, int sge_len, u32 gpa_mkey)
{
ash->dma_handle[sg_i] = da;
ash->size[sg_i] = sge_len;
tp->wqe_req.sgl[sg_i].address = da;
tp->wqe_req.sgl[sg_i].mem_key = gpa_mkey;
tp->wqe_req.sgl[sg_i].size = sge_len;
}
static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc, static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc,
struct mana_tx_package *tp) struct mana_tx_package *tp, int gso_hs)
{ {
struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; struct mana_skb_head *ash = (struct mana_skb_head *)skb->head;
int hsg = 1; /* num of SGEs of linear part */
struct gdma_dev *gd = apc->ac->gdma_dev; struct gdma_dev *gd = apc->ac->gdma_dev;
int skb_hlen = skb_headlen(skb);
int sge0_len, sge1_len = 0;
struct gdma_context *gc; struct gdma_context *gc;
struct device *dev; struct device *dev;
skb_frag_t *frag; skb_frag_t *frag;
dma_addr_t da; dma_addr_t da;
int sg_i;
int i; int i;
gc = gd->gdma_context; gc = gd->gdma_context;
dev = gc->dev; dev = gc->dev;
da = dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE);
if (gso_hs && gso_hs < skb_hlen) {
sge0_len = gso_hs;
sge1_len = skb_hlen - gso_hs;
} else {
sge0_len = skb_hlen;
}
da = dma_map_single(dev, skb->data, sge0_len, DMA_TO_DEVICE);
if (dma_mapping_error(dev, da)) if (dma_mapping_error(dev, da))
return -ENOMEM; return -ENOMEM;
ash->dma_handle[0] = da; mana_add_sge(tp, ash, 0, da, sge0_len, gd->gpa_mkey);
ash->size[0] = skb_headlen(skb);
tp->wqe_req.sgl[0].address = ash->dma_handle[0]; if (sge1_len) {
tp->wqe_req.sgl[0].mem_key = gd->gpa_mkey; sg_i = 1;
tp->wqe_req.sgl[0].size = ash->size[0]; da = dma_map_single(dev, skb->data + sge0_len, sge1_len,
DMA_TO_DEVICE);
if (dma_mapping_error(dev, da))
goto frag_err;
mana_add_sge(tp, ash, sg_i, da, sge1_len, gd->gpa_mkey);
hsg = 2;
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
sg_i = hsg + i;
frag = &skb_shinfo(skb)->frags[i]; frag = &skb_shinfo(skb)->frags[i];
da = skb_frag_dma_map(dev, frag, 0, skb_frag_size(frag), da = skb_frag_dma_map(dev, frag, 0, skb_frag_size(frag),
DMA_TO_DEVICE); DMA_TO_DEVICE);
if (dma_mapping_error(dev, da)) if (dma_mapping_error(dev, da))
goto frag_err; goto frag_err;
ash->dma_handle[i + 1] = da; mana_add_sge(tp, ash, sg_i, da, skb_frag_size(frag),
ash->size[i + 1] = skb_frag_size(frag); gd->gpa_mkey);
tp->wqe_req.sgl[i + 1].address = ash->dma_handle[i + 1];
tp->wqe_req.sgl[i + 1].mem_key = gd->gpa_mkey;
tp->wqe_req.sgl[i + 1].size = ash->size[i + 1];
} }
return 0; return 0;
frag_err: frag_err:
for (i = i - 1; i >= 0; i--) for (i = sg_i - 1; i >= hsg; i--)
dma_unmap_page(dev, ash->dma_handle[i + 1], ash->size[i + 1], dma_unmap_page(dev, ash->dma_handle[i], ash->size[i],
DMA_TO_DEVICE); DMA_TO_DEVICE);
dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE); for (i = hsg - 1; i >= 0; i--)
dma_unmap_single(dev, ash->dma_handle[i], ash->size[i],
DMA_TO_DEVICE);
return -ENOMEM; return -ENOMEM;
} }
/* Handle the case when GSO SKB linear length is too large.
* MANA NIC requires GSO packets to put only the packet header to SGE0.
* So, we need 2 SGEs for the skb linear part which contains more than the
* header.
* Return a positive value for the number of SGEs, or a negative value
* for an error.
*/
static int mana_fix_skb_head(struct net_device *ndev, struct sk_buff *skb,
int gso_hs)
{
int num_sge = 1 + skb_shinfo(skb)->nr_frags;
int skb_hlen = skb_headlen(skb);
if (gso_hs < skb_hlen) {
num_sge++;
} else if (gso_hs > skb_hlen) {
if (net_ratelimit())
netdev_err(ndev,
"TX nonlinear head: hs:%d, skb_hlen:%d\n",
gso_hs, skb_hlen);
return -EINVAL;
}
return num_sge;
}
/* Get the GSO packet's header size */
static int mana_get_gso_hs(struct sk_buff *skb)
{
int gso_hs;
if (skb->encapsulation) {
gso_hs = skb_inner_tcp_all_headers(skb);
} else {
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
gso_hs = skb_transport_offset(skb) +
sizeof(struct udphdr);
} else {
gso_hs = skb_tcp_all_headers(skb);
}
}
return gso_hs;
}
netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
{ {
enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT; enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT;
struct mana_port_context *apc = netdev_priv(ndev); struct mana_port_context *apc = netdev_priv(ndev);
int gso_hs = 0; /* zero for non-GSO pkts */
u16 txq_idx = skb_get_queue_mapping(skb); u16 txq_idx = skb_get_queue_mapping(skb);
struct gdma_dev *gd = apc->ac->gdma_dev; struct gdma_dev *gd = apc->ac->gdma_dev;
bool ipv4 = false, ipv6 = false; bool ipv4 = false, ipv6 = false;
...@@ -159,7 +233,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) ...@@ -159,7 +233,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
struct mana_txq *txq; struct mana_txq *txq;
struct mana_cq *cq; struct mana_cq *cq;
int err, len; int err, len;
u16 ihs;
if (unlikely(!apc->port_is_up)) if (unlikely(!apc->port_is_up))
goto tx_drop; goto tx_drop;
...@@ -209,19 +282,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) ...@@ -209,19 +282,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
pkg.wqe_req.client_data_unit = 0; pkg.wqe_req.client_data_unit = 0;
pkg.wqe_req.num_sge = 1 + skb_shinfo(skb)->nr_frags; pkg.wqe_req.num_sge = 1 + skb_shinfo(skb)->nr_frags;
WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
pkg.wqe_req.sgl = pkg.sgl_array;
} else {
pkg.sgl_ptr = kmalloc_array(pkg.wqe_req.num_sge,
sizeof(struct gdma_sge),
GFP_ATOMIC);
if (!pkg.sgl_ptr)
goto tx_drop_count;
pkg.wqe_req.sgl = pkg.sgl_ptr;
}
if (skb->protocol == htons(ETH_P_IP)) if (skb->protocol == htons(ETH_P_IP))
ipv4 = true; ipv4 = true;
...@@ -229,6 +289,26 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) ...@@ -229,6 +289,26 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
ipv6 = true; ipv6 = true;
if (skb_is_gso(skb)) { if (skb_is_gso(skb)) {
int num_sge;
gso_hs = mana_get_gso_hs(skb);
num_sge = mana_fix_skb_head(ndev, skb, gso_hs);
if (num_sge > 0)
pkg.wqe_req.num_sge = num_sge;
else
goto tx_drop_count;
u64_stats_update_begin(&tx_stats->syncp);
if (skb->encapsulation) {
tx_stats->tso_inner_packets++;
tx_stats->tso_inner_bytes += skb->len - gso_hs;
} else {
tx_stats->tso_packets++;
tx_stats->tso_bytes += skb->len - gso_hs;
}
u64_stats_update_end(&tx_stats->syncp);
pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4;
pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6;
...@@ -252,26 +332,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) ...@@ -252,26 +332,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
&ipv6_hdr(skb)->daddr, 0, &ipv6_hdr(skb)->daddr, 0,
IPPROTO_TCP, 0); IPPROTO_TCP, 0);
} }
if (skb->encapsulation) {
ihs = skb_inner_tcp_all_headers(skb);
u64_stats_update_begin(&tx_stats->syncp);
tx_stats->tso_inner_packets++;
tx_stats->tso_inner_bytes += skb->len - ihs;
u64_stats_update_end(&tx_stats->syncp);
} else {
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
ihs = skb_transport_offset(skb) + sizeof(struct udphdr);
} else {
ihs = skb_tcp_all_headers(skb);
}
u64_stats_update_begin(&tx_stats->syncp);
tx_stats->tso_packets++;
tx_stats->tso_bytes += skb->len - ihs;
u64_stats_update_end(&tx_stats->syncp);
}
} else if (skb->ip_summed == CHECKSUM_PARTIAL) { } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
csum_type = mana_checksum_info(skb); csum_type = mana_checksum_info(skb);
...@@ -294,11 +354,25 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) ...@@ -294,11 +354,25 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
} else { } else {
/* Can't do offload of this type of checksum */ /* Can't do offload of this type of checksum */
if (skb_checksum_help(skb)) if (skb_checksum_help(skb))
goto free_sgl_ptr; goto tx_drop_count;
} }
} }
if (mana_map_skb(skb, apc, &pkg)) { WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
pkg.wqe_req.sgl = pkg.sgl_array;
} else {
pkg.sgl_ptr = kmalloc_array(pkg.wqe_req.num_sge,
sizeof(struct gdma_sge),
GFP_ATOMIC);
if (!pkg.sgl_ptr)
goto tx_drop_count;
pkg.wqe_req.sgl = pkg.sgl_ptr;
}
if (mana_map_skb(skb, apc, &pkg, gso_hs)) {
u64_stats_update_begin(&tx_stats->syncp); u64_stats_update_begin(&tx_stats->syncp);
tx_stats->mana_map_err++; tx_stats->mana_map_err++;
u64_stats_update_end(&tx_stats->syncp); u64_stats_update_end(&tx_stats->syncp);
...@@ -1256,11 +1330,16 @@ static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc) ...@@ -1256,11 +1330,16 @@ static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; struct mana_skb_head *ash = (struct mana_skb_head *)skb->head;
struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
struct device *dev = gc->dev; struct device *dev = gc->dev;
int i; int hsg, i;
/* Number of SGEs of linear part */
hsg = (skb_is_gso(skb) && skb_headlen(skb) > ash->size[0]) ? 2 : 1;
dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE); for (i = 0; i < hsg; i++)
dma_unmap_single(dev, ash->dma_handle[i], ash->size[i],
DMA_TO_DEVICE);
for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++) for (i = hsg; i < skb_shinfo(skb)->nr_frags + hsg; i++)
dma_unmap_page(dev, ash->dma_handle[i], ash->size[i], dma_unmap_page(dev, ash->dma_handle[i], ash->size[i],
DMA_TO_DEVICE); DMA_TO_DEVICE);
} }
......
...@@ -103,9 +103,10 @@ struct mana_txq { ...@@ -103,9 +103,10 @@ struct mana_txq {
/* skb data and frags dma mappings */ /* skb data and frags dma mappings */
struct mana_skb_head { struct mana_skb_head {
dma_addr_t dma_handle[MAX_SKB_FRAGS + 1]; /* GSO pkts may have 2 SGEs for the linear part*/
dma_addr_t dma_handle[MAX_SKB_FRAGS + 2];
u32 size[MAX_SKB_FRAGS + 1]; u32 size[MAX_SKB_FRAGS + 2];
}; };
#define MANA_HEADROOM sizeof(struct mana_skb_head) #define MANA_HEADROOM sizeof(struct mana_skb_head)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment