Commit c036cab4 authored by David S. Miller's avatar David S. Miller

Merge branch 'mediatek-various-performance-improvements'

John Crispin says:

====================
net-next: mediatek: various performance improvements

During development we mainly ran testing using iperf doing 1500 byte
tcp frames. It was pointed out recently, that the driver does not perform
very well when using 512 byte udp frames. The biggest problem was that
RPS was not working as no rx queue was being set. fixing this more than
doubled the throughput. Additionally the IRQ mask register is now locked
independently for RX and TX. RX IRQ aggregation is also added. With all
these patches applied we can almost triple the throughput.

While at it we also add PHY status change reporting for GMACs connecting
directly to a PHY.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents cf67bebe a2d5e7b4
......@@ -221,6 +221,9 @@ static void mtk_phy_link_adjust(struct net_device *dev)
netif_carrier_on(dev);
else
netif_carrier_off(dev);
if (!of_phy_is_fixed_link(mac->of_node))
phy_print_status(dev->phydev);
}
static int mtk_phy_connect_node(struct mtk_eth *eth, struct mtk_mac *mac,
......@@ -369,28 +372,48 @@ static void mtk_mdio_cleanup(struct mtk_eth *eth)
mdiobus_unregister(eth->mii_bus);
}
static inline void mtk_irq_disable(struct mtk_eth *eth,
unsigned reg, u32 mask)
static inline void mtk_tx_irq_disable(struct mtk_eth *eth, u32 mask)
{
unsigned long flags;
u32 val;
spin_lock_irqsave(&eth->tx_irq_lock, flags);
val = mtk_r32(eth, MTK_QDMA_INT_MASK);
mtk_w32(eth, val & ~mask, MTK_QDMA_INT_MASK);
spin_unlock_irqrestore(&eth->tx_irq_lock, flags);
}
static inline void mtk_tx_irq_enable(struct mtk_eth *eth, u32 mask)
{
unsigned long flags;
u32 val;
spin_lock_irqsave(&eth->irq_lock, flags);
val = mtk_r32(eth, reg);
mtk_w32(eth, val & ~mask, reg);
spin_unlock_irqrestore(&eth->irq_lock, flags);
spin_lock_irqsave(&eth->tx_irq_lock, flags);
val = mtk_r32(eth, MTK_QDMA_INT_MASK);
mtk_w32(eth, val | mask, MTK_QDMA_INT_MASK);
spin_unlock_irqrestore(&eth->tx_irq_lock, flags);
}
static inline void mtk_irq_enable(struct mtk_eth *eth,
unsigned reg, u32 mask)
static inline void mtk_rx_irq_disable(struct mtk_eth *eth, u32 mask)
{
unsigned long flags;
u32 val;
spin_lock_irqsave(&eth->irq_lock, flags);
val = mtk_r32(eth, reg);
mtk_w32(eth, val | mask, reg);
spin_unlock_irqrestore(&eth->irq_lock, flags);
spin_lock_irqsave(&eth->rx_irq_lock, flags);
val = mtk_r32(eth, MTK_PDMA_INT_MASK);
mtk_w32(eth, val & ~mask, MTK_PDMA_INT_MASK);
spin_unlock_irqrestore(&eth->rx_irq_lock, flags);
}
static inline void mtk_rx_irq_enable(struct mtk_eth *eth, u32 mask)
{
unsigned long flags;
u32 val;
spin_lock_irqsave(&eth->rx_irq_lock, flags);
val = mtk_r32(eth, MTK_PDMA_INT_MASK);
mtk_w32(eth, val | mask, MTK_PDMA_INT_MASK);
spin_unlock_irqrestore(&eth->rx_irq_lock, flags);
}
static int mtk_set_mac_address(struct net_device *dev, void *p)
......@@ -969,6 +992,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
RX_DMA_VID(trxd.rxd3))
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
RX_DMA_VID(trxd.rxd3));
skb_record_rx_queue(skb, 0);
napi_gro_receive(napi, skb);
ring->data[idx] = new_data;
......@@ -1098,7 +1122,7 @@ static int mtk_napi_tx(struct napi_struct *napi, int budget)
return budget;
napi_complete(napi);
mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
mtk_tx_irq_enable(eth, MTK_TX_DONE_INT);
return tx_done;
}
......@@ -1132,7 +1156,7 @@ static int mtk_napi_rx(struct napi_struct *napi, int budget)
goto poll_again;
}
napi_complete(napi);
mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
mtk_rx_irq_enable(eth, MTK_RX_DONE_INT);
return rx_done + budget - remain_budget;
}
......@@ -1667,7 +1691,7 @@ static irqreturn_t mtk_handle_irq_rx(int irq, void *_eth)
if (likely(napi_schedule_prep(&eth->rx_napi))) {
__napi_schedule(&eth->rx_napi);
mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
mtk_rx_irq_disable(eth, MTK_RX_DONE_INT);
}
return IRQ_HANDLED;
......@@ -1679,7 +1703,7 @@ static irqreturn_t mtk_handle_irq_tx(int irq, void *_eth)
if (likely(napi_schedule_prep(&eth->tx_napi))) {
__napi_schedule(&eth->tx_napi);
mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
mtk_tx_irq_disable(eth, MTK_TX_DONE_INT);
}
return IRQ_HANDLED;
......@@ -1691,11 +1715,11 @@ static void mtk_poll_controller(struct net_device *dev)
struct mtk_mac *mac = netdev_priv(dev);
struct mtk_eth *eth = mac->hw;
mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
mtk_tx_irq_disable(eth, MTK_TX_DONE_INT);
mtk_rx_irq_disable(eth, MTK_RX_DONE_INT);
mtk_handle_irq_rx(eth->irq[2], dev);
mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
mtk_tx_irq_enable(eth, MTK_TX_DONE_INT);
mtk_rx_irq_enable(eth, MTK_RX_DONE_INT);
}
#endif
......@@ -1736,8 +1760,8 @@ static int mtk_open(struct net_device *dev)
napi_enable(&eth->tx_napi);
napi_enable(&eth->rx_napi);
mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
mtk_tx_irq_enable(eth, MTK_TX_DONE_INT);
mtk_rx_irq_enable(eth, MTK_RX_DONE_INT);
}
atomic_inc(&eth->dma_refcnt);
......@@ -1782,8 +1806,8 @@ static int mtk_stop(struct net_device *dev)
if (!atomic_dec_and_test(&eth->dma_refcnt))
return 0;
mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
mtk_tx_irq_disable(eth, MTK_TX_DONE_INT);
mtk_rx_irq_disable(eth, MTK_RX_DONE_INT);
napi_disable(&eth->tx_napi);
napi_disable(&eth->rx_napi);
......@@ -1858,11 +1882,13 @@ static int mtk_hw_init(struct mtk_eth *eth)
/* Enable RX VLan Offloading */
mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
/* enable interrupt delay for RX */
mtk_w32(eth, MTK_PDMA_DELAY_RX_DELAY, MTK_PDMA_DELAY_INT);
/* disable delay and normal interrupt */
mtk_w32(eth, 0, MTK_QDMA_DELAY_INT);
mtk_w32(eth, 0, MTK_PDMA_DELAY_INT);
mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0);
mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0);
mtk_tx_irq_disable(eth, ~0);
mtk_rx_irq_disable(eth, ~0);
mtk_w32(eth, RST_GL_PSE, MTK_RST_GL);
mtk_w32(eth, 0, MTK_RST_GL);
......@@ -1933,8 +1959,8 @@ static void mtk_uninit(struct net_device *dev)
phy_disconnect(dev->phydev);
if (of_phy_is_fixed_link(mac->of_node))
of_phy_deregister_fixed_link(mac->of_node);
mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0);
mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0);
mtk_tx_irq_disable(eth, ~0);
mtk_rx_irq_disable(eth, ~0);
}
static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
......@@ -2394,7 +2420,8 @@ static int mtk_probe(struct platform_device *pdev)
return PTR_ERR(eth->base);
spin_lock_init(&eth->page_lock);
spin_lock_init(&eth->irq_lock);
spin_lock_init(&eth->tx_irq_lock);
spin_lock_init(&eth->rx_irq_lock);
eth->ethsys = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
"mediatek,ethsys");
......
......@@ -125,7 +125,14 @@
#define MTK_PST_DRX_IDX_CFG(x) (MTK_PST_DRX_IDX0 << (x))
/* PDMA Delay Interrupt Register */
#define MTK_PDMA_DELAY_INT 0xa0c
#define MTK_PDMA_DELAY_INT 0xa0c
#define MTK_PDMA_DELAY_RX_EN BIT(15)
#define MTK_PDMA_DELAY_RX_PINT 4
#define MTK_PDMA_DELAY_RX_PINT_SHIFT 8
#define MTK_PDMA_DELAY_RX_PTIME 4
#define MTK_PDMA_DELAY_RX_DELAY \
(MTK_PDMA_DELAY_RX_EN | MTK_PDMA_DELAY_RX_PTIME | \
(MTK_PDMA_DELAY_RX_PINT << MTK_PDMA_DELAY_RX_PINT_SHIFT))
/* PDMA Interrupt Status Register */
#define MTK_PDMA_INT_STATUS 0xa20
......@@ -206,6 +213,7 @@
/* QDMA Interrupt Status Register */
#define MTK_QMTK_INT_STATUS 0x1A18
#define MTK_RX_DONE_DLY BIT(30)
#define MTK_RX_DONE_INT3 BIT(19)
#define MTK_RX_DONE_INT2 BIT(18)
#define MTK_RX_DONE_INT1 BIT(17)
......@@ -214,8 +222,7 @@
#define MTK_TX_DONE_INT2 BIT(2)
#define MTK_TX_DONE_INT1 BIT(1)
#define MTK_TX_DONE_INT0 BIT(0)
#define MTK_RX_DONE_INT (MTK_RX_DONE_INT0 | MTK_RX_DONE_INT1 | \
MTK_RX_DONE_INT2 | MTK_RX_DONE_INT3)
#define MTK_RX_DONE_INT MTK_RX_DONE_DLY
#define MTK_TX_DONE_INT (MTK_TX_DONE_INT0 | MTK_TX_DONE_INT1 | \
MTK_TX_DONE_INT2 | MTK_TX_DONE_INT3)
......@@ -512,6 +519,8 @@ struct mtk_rx_ring {
* @dev: The device pointer
* @base: The mapped register i/o base
* @page_lock: Make sure that register operations are atomic
* @tx_irq__lock: Make sure that IRQ register operations are atomic
* @rx_irq__lock: Make sure that IRQ register operations are atomic
* @dummy_dev: we run 2 netdevs on 1 physical DMA ring and need a
* dummy for NAPI to work
* @netdev: The netdev instances
......@@ -540,7 +549,8 @@ struct mtk_eth {
struct device *dev;
void __iomem *base;
spinlock_t page_lock;
spinlock_t irq_lock;
spinlock_t tx_irq_lock;
spinlock_t rx_irq_lock;
struct net_device dummy_dev;
struct net_device *netdev[MTK_MAX_DEVS];
struct mtk_mac *mac[MTK_MAX_DEVS];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment