Commit 030fae30 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'mlx4-add-support-for-netdev-genl-api'

Joe Damato says:

====================
mlx4: Add support for netdev-genl API

There are no functional changes from v5, which I mistakenly sent right
after net-next was closed (oops). This revision, however, includes
Tariq's Reviewed-by tags of the v5 in each commit message. See the
changelog below.

This series adds support to mlx4 for the netdev-genl API which makes it
much easier for users and user programs to map NAPI IDs back to
ifindexes, queues, and IRQs. This is extremely useful for a number of
use cases, including epoll-based busy poll.

In addition, this series includes a patch to generate per-queue
statistics using the netlink API, as well.

To facilitate the stats, patch 1/3 adds a field "alloc_fail" to the ring
structure. This is incremented by the driver in an appropriate place and
used in patch 3/3 as alloc_fail.

Please note: I do not have access to mlx4 hardware, but I've been
working closely with Martin Karsten from University of Waterloo (CC'd)
who has very graciously tested my patches on their mlx4 hardware (hence
his Tested-by attribution in each commit). His latest research work is
particularly interesting [1] and this series helps to support that (and
future) work.

Martin re-test v4 using Jakub's suggested tool [2] and the
stats.pkt_byte_sum and stats.qstat_by_ifindex tests passed. He also
adjusted the queue count and re-ran test to confirm it still passed even
if the queue count was modified.

[1]: https://dl.acm.org/doi/pdf/10.1145/3626780
[2]: https://lore.kernel.org/lkml/20240423175718.4ad4dc5a@kernel.org/
====================

Link: https://lore.kernel.org/r/20240528181139.515070-1-jdamato@fastly.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 95cd03f3 a5602c6e
......@@ -126,6 +126,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
cq_idx = cq_idx % priv->rx_ring_num;
rx_cq = priv->rx_cq[cq_idx];
cq->vector = rx_cq->vector;
irq = mlx4_eq_get_irq(mdev->dev, cq->vector);
}
if (cq->type == RX)
......@@ -142,18 +143,23 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
if (err)
goto free_eq;
cq->cq_idx = cq_idx;
cq->mcq.event = mlx4_en_cq_event;
switch (cq->type) {
case TX:
cq->mcq.comp = mlx4_en_tx_irq;
netif_napi_add_tx(cq->dev, &cq->napi, mlx4_en_poll_tx_cq);
netif_napi_set_irq(&cq->napi, irq);
napi_enable(&cq->napi);
netif_queue_set_napi(cq->dev, cq_idx, NETDEV_QUEUE_TYPE_TX, &cq->napi);
break;
case RX:
cq->mcq.comp = mlx4_en_rx_irq;
netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq);
netif_napi_set_irq(&cq->napi, irq);
napi_enable(&cq->napi);
netif_queue_set_napi(cq->dev, cq_idx, NETDEV_QUEUE_TYPE_RX, &cq->napi);
break;
case TX_XDP:
/* nothing regarding napi, it's shared with rx ring */
......@@ -189,6 +195,14 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq)
void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
{
if (cq->type != TX_XDP) {
enum netdev_queue_type qtype;
if (cq->type == RX)
qtype = NETDEV_QUEUE_TYPE_RX;
else
qtype = NETDEV_QUEUE_TYPE_TX;
netif_queue_set_napi(cq->dev, cq->cq_idx, qtype, NULL);
napi_disable(&cq->napi);
netif_napi_del(&cq->napi);
}
......
......@@ -43,6 +43,7 @@
#include <net/vxlan.h>
#include <net/devlink.h>
#include <net/rps.h>
#include <net/netdev_queues.h>
#include <linux/mlx4/driver.h>
#include <linux/mlx4/device.h>
......@@ -2073,6 +2074,7 @@ static void mlx4_en_clear_stats(struct net_device *dev)
priv->rx_ring[i]->csum_ok = 0;
priv->rx_ring[i]->csum_none = 0;
priv->rx_ring[i]->csum_complete = 0;
priv->rx_ring[i]->alloc_fail = 0;
}
}
......@@ -3099,6 +3101,77 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
last_i += NUM_PHY_STATS;
}
static void mlx4_get_queue_stats_rx(struct net_device *dev, int i,
struct netdev_queue_stats_rx *stats)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
const struct mlx4_en_rx_ring *ring;
spin_lock_bh(&priv->stats_lock);
if (!priv->port_up || mlx4_is_master(priv->mdev->dev))
goto out_unlock;
ring = priv->rx_ring[i];
stats->packets = READ_ONCE(ring->packets);
stats->bytes = READ_ONCE(ring->bytes);
stats->alloc_fail = READ_ONCE(ring->alloc_fail);
out_unlock:
spin_unlock_bh(&priv->stats_lock);
}
static void mlx4_get_queue_stats_tx(struct net_device *dev, int i,
struct netdev_queue_stats_tx *stats)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
const struct mlx4_en_tx_ring *ring;
spin_lock_bh(&priv->stats_lock);
if (!priv->port_up || mlx4_is_master(priv->mdev->dev))
goto out_unlock;
ring = priv->tx_ring[TX][i];
stats->packets = READ_ONCE(ring->packets);
stats->bytes = READ_ONCE(ring->bytes);
out_unlock:
spin_unlock_bh(&priv->stats_lock);
}
static void mlx4_get_base_stats(struct net_device *dev,
struct netdev_queue_stats_rx *rx,
struct netdev_queue_stats_tx *tx)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
spin_lock_bh(&priv->stats_lock);
if (!priv->port_up || mlx4_is_master(priv->mdev->dev))
goto out_unlock;
if (priv->rx_ring_num) {
rx->packets = 0;
rx->bytes = 0;
rx->alloc_fail = 0;
}
if (priv->tx_ring_num[TX]) {
tx->packets = 0;
tx->bytes = 0;
}
out_unlock:
spin_unlock_bh(&priv->stats_lock);
}
static const struct netdev_stat_ops mlx4_stat_ops = {
.get_queue_stats_rx = mlx4_get_queue_stats_rx,
.get_queue_stats_tx = mlx4_get_queue_stats_tx,
.get_base_stats = mlx4_get_base_stats,
};
int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
struct mlx4_en_port_profile *prof)
{
......@@ -3262,6 +3335,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
dev->stat_ops = &mlx4_stat_ops;
dev->ethtool_ops = &mlx4_en_ethtool_ops;
/*
......
......@@ -82,8 +82,10 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
for (i = 0; i < priv->num_frags; i++, frags++) {
if (!frags->page) {
if (mlx4_alloc_page(priv, frags, gfp))
if (mlx4_alloc_page(priv, frags, gfp)) {
ring->alloc_fail++;
return -ENOMEM;
}
ring->rx_alloc_pages++;
}
rx_desc->data[i].addr = cpu_to_be64(frags->dma +
......
......@@ -355,6 +355,7 @@ struct mlx4_en_rx_ring {
unsigned long xdp_tx;
unsigned long xdp_tx_full;
unsigned long dropped;
unsigned long alloc_fail;
int hwtstamp_rx_filter;
cpumask_var_t affinity_mask;
struct xdp_rxq_info xdp_rxq;
......@@ -379,6 +380,7 @@ struct mlx4_en_cq {
#define MLX4_EN_OPCODE_ERROR 0x1e
const struct cpumask *aff_mask;
int cq_idx;
};
struct mlx4_en_port_profile {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment