Commit 9cde9450 authored by Felix Fietkau's avatar Felix Fietkau Committed by David S. Miller

bgmac: implement scatter/gather support

Always use software checksumming, since the hardware does not have any
checksum offload support.
This significantly improves local TCP tx performance.
Signed-off-by: default avatarFelix Fietkau <nbd@openwrt.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 45c9b3c0
...@@ -115,53 +115,91 @@ static void bgmac_dma_tx_enable(struct bgmac *bgmac, ...@@ -115,53 +115,91 @@ static void bgmac_dma_tx_enable(struct bgmac *bgmac,
bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl); bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl);
} }
static void
bgmac_dma_tx_add_buf(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
int i, int len, u32 ctl0)
{
struct bgmac_slot_info *slot;
struct bgmac_dma_desc *dma_desc;
u32 ctl1;
if (i == ring->num_slots - 1)
ctl0 |= BGMAC_DESC_CTL0_EOT;
ctl1 = len & BGMAC_DESC_CTL1_LEN;
slot = &ring->slots[i];
dma_desc = &ring->cpu_base[i];
dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
dma_desc->ctl0 = cpu_to_le32(ctl0);
dma_desc->ctl1 = cpu_to_le32(ctl1);
}
static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac, static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
struct bgmac_dma_ring *ring, struct bgmac_dma_ring *ring,
struct sk_buff *skb) struct sk_buff *skb)
{ {
struct device *dma_dev = bgmac->core->dma_dev; struct device *dma_dev = bgmac->core->dma_dev;
struct net_device *net_dev = bgmac->net_dev; struct net_device *net_dev = bgmac->net_dev;
struct bgmac_dma_desc *dma_desc; struct bgmac_slot_info *slot = &ring->slots[ring->end];
struct bgmac_slot_info *slot;
u32 ctl0, ctl1;
int free_slots; int free_slots;
int nr_frags;
u32 flags;
int index = ring->end;
int i;
if (skb->len > BGMAC_DESC_CTL1_LEN) { if (skb->len > BGMAC_DESC_CTL1_LEN) {
bgmac_err(bgmac, "Too long skb (%d)\n", skb->len); bgmac_err(bgmac, "Too long skb (%d)\n", skb->len);
goto err_stop_drop; goto err_drop;
} }
if (skb->ip_summed == CHECKSUM_PARTIAL)
skb_checksum_help(skb);
nr_frags = skb_shinfo(skb)->nr_frags;
if (ring->start <= ring->end) if (ring->start <= ring->end)
free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS; free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
else else
free_slots = ring->start - ring->end; free_slots = ring->start - ring->end;
if (free_slots == 1) {
if (free_slots <= nr_frags + 1) {
bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n"); bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
netif_stop_queue(net_dev); netif_stop_queue(net_dev);
return NETDEV_TX_BUSY; return NETDEV_TX_BUSY;
} }
slot = &ring->slots[ring->end]; slot->dma_addr = dma_map_single(dma_dev, skb->data, skb_headlen(skb),
slot->skb = skb;
slot->dma_addr = dma_map_single(dma_dev, skb->data, skb->len,
DMA_TO_DEVICE); DMA_TO_DEVICE);
if (dma_mapping_error(dma_dev, slot->dma_addr)) { if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n", goto err_dma_head;
ring->mmio_base);
goto err_stop_drop;
}
ctl0 = BGMAC_DESC_CTL0_IOC | BGMAC_DESC_CTL0_SOF | BGMAC_DESC_CTL0_EOF; flags = BGMAC_DESC_CTL0_SOF;
if (ring->end == ring->num_slots - 1) if (!nr_frags)
ctl0 |= BGMAC_DESC_CTL0_EOT; flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
ctl1 = skb->len & BGMAC_DESC_CTL1_LEN;
dma_desc = ring->cpu_base; bgmac_dma_tx_add_buf(bgmac, ring, index, skb_headlen(skb), flags);
dma_desc += ring->end; flags = 0;
dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr)); for (i = 0; i < nr_frags; i++) {
dma_desc->ctl0 = cpu_to_le32(ctl0); struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
dma_desc->ctl1 = cpu_to_le32(ctl1); int len = skb_frag_size(frag);
index = (index + 1) % BGMAC_TX_RING_SLOTS;
slot = &ring->slots[index];
slot->dma_addr = skb_frag_dma_map(dma_dev, frag, 0,
len, DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
goto err_dma;
if (i == nr_frags - 1)
flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
bgmac_dma_tx_add_buf(bgmac, ring, index, len, flags);
}
slot->skb = skb;
netdev_sent_queue(net_dev, skb->len); netdev_sent_queue(net_dev, skb->len);
...@@ -170,20 +208,35 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac, ...@@ -170,20 +208,35 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
/* Increase ring->end to point empty slot. We tell hardware the first /* Increase ring->end to point empty slot. We tell hardware the first
* slot it should *not* read. * slot it should *not* read.
*/ */
if (++ring->end >= BGMAC_TX_RING_SLOTS) ring->end = (index + 1) % BGMAC_TX_RING_SLOTS;
ring->end = 0;
bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX, bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
ring->index_base + ring->index_base +
ring->end * sizeof(struct bgmac_dma_desc)); ring->end * sizeof(struct bgmac_dma_desc));
/* Always keep one slot free to allow detecting bugged calls. */ free_slots -= nr_frags + 1;
if (--free_slots == 1) if (free_slots < 8)
netif_stop_queue(net_dev); netif_stop_queue(net_dev);
return NETDEV_TX_OK; return NETDEV_TX_OK;
err_stop_drop: err_dma:
netif_stop_queue(net_dev); dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
DMA_TO_DEVICE);
while (i > 0) {
int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
struct bgmac_slot_info *slot = &ring->slots[index];
u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
int len = ctl1 & BGMAC_DESC_CTL1_LEN;
dma_unmap_page(dma_dev, slot->dma_addr, len, DMA_TO_DEVICE);
}
err_dma_head:
bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
ring->mmio_base);
err_drop:
dev_kfree_skb(skb); dev_kfree_skb(skb);
return NETDEV_TX_OK; return NETDEV_TX_OK;
} }
...@@ -205,32 +258,45 @@ static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring) ...@@ -205,32 +258,45 @@ static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
while (ring->start != empty_slot) { while (ring->start != empty_slot) {
struct bgmac_slot_info *slot = &ring->slots[ring->start]; struct bgmac_slot_info *slot = &ring->slots[ring->start];
u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1);
int len = ctl1 & BGMAC_DESC_CTL1_LEN;
if (slot->skb) { if (!slot->dma_addr) {
bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
ring->start, ring->end);
goto next;
}
if (ctl1 & BGMAC_DESC_CTL0_SOF)
/* Unmap no longer used buffer */ /* Unmap no longer used buffer */
dma_unmap_single(dma_dev, slot->dma_addr, dma_unmap_single(dma_dev, slot->dma_addr, len,
slot->skb->len, DMA_TO_DEVICE); DMA_TO_DEVICE);
slot->dma_addr = 0; else
dma_unmap_page(dma_dev, slot->dma_addr, len,
DMA_TO_DEVICE);
if (slot->skb) {
bytes_compl += slot->skb->len; bytes_compl += slot->skb->len;
pkts_compl++; pkts_compl++;
/* Free memory! :) */ /* Free memory! :) */
dev_kfree_skb(slot->skb); dev_kfree_skb(slot->skb);
slot->skb = NULL; slot->skb = NULL;
} else {
bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
ring->start, ring->end);
} }
next:
slot->dma_addr = 0;
if (++ring->start >= BGMAC_TX_RING_SLOTS) if (++ring->start >= BGMAC_TX_RING_SLOTS)
ring->start = 0; ring->start = 0;
freed = true; freed = true;
} }
if (!pkts_compl)
return;
netdev_completed_queue(bgmac->net_dev, pkts_compl, bytes_compl); netdev_completed_queue(bgmac->net_dev, pkts_compl, bytes_compl);
if (freed && netif_queue_stopped(bgmac->net_dev)) if (netif_queue_stopped(bgmac->net_dev))
netif_wake_queue(bgmac->net_dev); netif_wake_queue(bgmac->net_dev);
} }
...@@ -439,17 +505,25 @@ static void bgmac_dma_tx_ring_free(struct bgmac *bgmac, ...@@ -439,17 +505,25 @@ static void bgmac_dma_tx_ring_free(struct bgmac *bgmac,
struct bgmac_dma_ring *ring) struct bgmac_dma_ring *ring)
{ {
struct device *dma_dev = bgmac->core->dma_dev; struct device *dma_dev = bgmac->core->dma_dev;
struct bgmac_dma_desc *dma_desc = ring->cpu_base;
struct bgmac_slot_info *slot; struct bgmac_slot_info *slot;
int i; int i;
for (i = 0; i < ring->num_slots; i++) { for (i = 0; i < ring->num_slots; i++) {
int len = dma_desc[i].ctl1 & BGMAC_DESC_CTL1_LEN;
slot = &ring->slots[i]; slot = &ring->slots[i];
if (slot->skb) {
if (slot->dma_addr)
dma_unmap_single(dma_dev, slot->dma_addr,
slot->skb->len, DMA_TO_DEVICE);
dev_kfree_skb(slot->skb); dev_kfree_skb(slot->skb);
}
if (!slot->dma_addr)
continue;
if (slot->skb)
dma_unmap_single(dma_dev, slot->dma_addr,
len, DMA_TO_DEVICE);
else
dma_unmap_page(dma_dev, slot->dma_addr,
len, DMA_TO_DEVICE);
} }
} }
...@@ -1583,6 +1657,10 @@ static int bgmac_probe(struct bcma_device *core) ...@@ -1583,6 +1657,10 @@ static int bgmac_probe(struct bcma_device *core)
goto err_dma_free; goto err_dma_free;
} }
net_dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
net_dev->hw_features = net_dev->features;
net_dev->vlan_features = net_dev->features;
err = register_netdev(bgmac->net_dev); err = register_netdev(bgmac->net_dev);
if (err) { if (err) {
bgmac_err(bgmac, "Cannot register net device\n"); bgmac_err(bgmac, "Cannot register net device\n");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment