Commit 55454e9e authored by David S. Miller's avatar David S. Miller

Merge branch 'mlx4-XDP-tx-refactor'

Tariq Toukan says:

====================
mlx4 XDP TX refactor

This patchset refactors the XDP forwarding case, so that
its dedicated transmit queues are managed in a complete
separation from the other regular ones.

It also adds ethtool counters for XDP cases.

Series generated against net-next commit:
22ca904a genetlink: fix error return code in genl_register_family()

Thanks,
Tariq.

v3:
* Exposed per ring counters.

v2:
* Added ethtool counters.
* Rebased, now patch 2 reverts Brenden's fix, as the bug no longer exists:
  958b3d39 ("net/mlx4_en: fixup xdp tx irq to match rx")
* Updated commit message of patch 2.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e4ff952a 15fca2c8
...@@ -65,7 +65,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, ...@@ -65,7 +65,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
cq->buf_size = cq->size * mdev->dev->caps.cqe_size; cq->buf_size = cq->size * mdev->dev->caps.cqe_size;
cq->ring = ring; cq->ring = ring;
cq->is_tx = mode; cq->type = mode;
cq->vector = mdev->dev->caps.num_comp_vectors; cq->vector = mdev->dev->caps.num_comp_vectors;
/* Allocate HW buffers on provided NUMA node. /* Allocate HW buffers on provided NUMA node.
...@@ -104,7 +104,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, ...@@ -104,7 +104,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
*cq->mcq.arm_db = 0; *cq->mcq.arm_db = 0;
memset(cq->buf, 0, cq->buf_size); memset(cq->buf, 0, cq->buf_size);
if (cq->is_tx == RX) { if (cq->type == RX) {
if (!mlx4_is_eq_vector_valid(mdev->dev, priv->port, if (!mlx4_is_eq_vector_valid(mdev->dev, priv->port,
cq->vector)) { cq->vector)) {
cq->vector = cpumask_first(priv->rx_ring[cq->ring]->affinity_mask); cq->vector = cpumask_first(priv->rx_ring[cq->ring]->affinity_mask);
...@@ -127,25 +127,17 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, ...@@ -127,25 +127,17 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
/* For TX we use the same irq per /* For TX we use the same irq per
ring we assigned for the RX */ ring we assigned for the RX */
struct mlx4_en_cq *rx_cq; struct mlx4_en_cq *rx_cq;
int xdp_index;
/* The xdp tx irq must align with the rx ring that forwards to
* it, so reindex these from 0. This should only happen when
* tx_ring_num is not a multiple of rx_ring_num.
*/
xdp_index = (priv->xdp_ring_num - priv->tx_ring_num) + cq_idx;
if (xdp_index >= 0)
cq_idx = xdp_index;
cq_idx = cq_idx % priv->rx_ring_num; cq_idx = cq_idx % priv->rx_ring_num;
rx_cq = priv->rx_cq[cq_idx]; rx_cq = priv->rx_cq[cq_idx];
cq->vector = rx_cq->vector; cq->vector = rx_cq->vector;
} }
if (!cq->is_tx) if (cq->type == RX)
cq->size = priv->rx_ring[cq->ring]->actual_size; cq->size = priv->rx_ring[cq->ring]->actual_size;
if ((cq->is_tx && priv->hwtstamp_config.tx_type) || if ((cq->type != RX && priv->hwtstamp_config.tx_type) ||
(!cq->is_tx && priv->hwtstamp_config.rx_filter)) (cq->type == RX && priv->hwtstamp_config.rx_filter))
timestamp_en = 1; timestamp_en = 1;
err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt,
...@@ -154,10 +146,10 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, ...@@ -154,10 +146,10 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
if (err) if (err)
goto free_eq; goto free_eq;
cq->mcq.comp = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq; cq->mcq.comp = cq->type != RX ? mlx4_en_tx_irq : mlx4_en_rx_irq;
cq->mcq.event = mlx4_en_cq_event; cq->mcq.event = mlx4_en_cq_event;
if (cq->is_tx) if (cq->type != RX)
netif_tx_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, netif_tx_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq,
NAPI_POLL_WEIGHT); NAPI_POLL_WEIGHT);
else else
...@@ -181,7 +173,7 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq) ...@@ -181,7 +173,7 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq)
mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size); mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
if (mlx4_is_eq_vector_valid(mdev->dev, priv->port, cq->vector) && if (mlx4_is_eq_vector_valid(mdev->dev, priv->port, cq->vector) &&
cq->is_tx == RX) cq->type == RX)
mlx4_release_eq(priv->mdev->dev, cq->vector); mlx4_release_eq(priv->mdev->dev, cq->vector);
cq->vector = 0; cq->vector = 0;
cq->buf_size = 0; cq->buf_size = 0;
...@@ -193,7 +185,7 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq) ...@@ -193,7 +185,7 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq)
void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
{ {
napi_disable(&cq->napi); napi_disable(&cq->napi);
if (!cq->is_tx) { if (cq->type == RX) {
napi_hash_del(&cq->napi); napi_hash_del(&cq->napi);
synchronize_rcu(); synchronize_rcu();
} }
......
...@@ -49,18 +49,21 @@ ...@@ -49,18 +49,21 @@
static int mlx4_en_moderation_update(struct mlx4_en_priv *priv) static int mlx4_en_moderation_update(struct mlx4_en_priv *priv)
{ {
int i; int i, t;
int err = 0; int err = 0;
for (i = 0; i < priv->tx_ring_num; i++) { for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) {
priv->tx_cq[i]->moder_cnt = priv->tx_frames; for (i = 0; i < priv->tx_ring_num[t]; i++) {
priv->tx_cq[i]->moder_time = priv->tx_usecs; priv->tx_cq[t][i]->moder_cnt = priv->tx_frames;
priv->tx_cq[t][i]->moder_time = priv->tx_usecs;
if (priv->port_up) { if (priv->port_up) {
err = mlx4_en_set_cq_moder(priv, priv->tx_cq[i]); err = mlx4_en_set_cq_moder(priv,
priv->tx_cq[t][i]);
if (err) if (err)
return err; return err;
} }
} }
}
if (priv->adaptive_rx_coal) if (priv->adaptive_rx_coal)
return 0; return 0;
...@@ -192,6 +195,10 @@ static const char main_strings[][ETH_GSTRING_LEN] = { ...@@ -192,6 +195,10 @@ static const char main_strings[][ETH_GSTRING_LEN] = {
"tx_prio_7_packets", "tx_prio_7_bytes", "tx_prio_7_packets", "tx_prio_7_bytes",
"tx_novlan_packets", "tx_novlan_bytes", "tx_novlan_packets", "tx_novlan_bytes",
/* xdp statistics */
"rx_xdp_drop",
"rx_xdp_tx",
"rx_xdp_tx_full",
}; };
static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= { static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= {
...@@ -336,8 +343,8 @@ static int mlx4_en_get_sset_count(struct net_device *dev, int sset) ...@@ -336,8 +343,8 @@ static int mlx4_en_get_sset_count(struct net_device *dev, int sset)
switch (sset) { switch (sset) {
case ETH_SS_STATS: case ETH_SS_STATS:
return bitmap_iterator_count(&it) + return bitmap_iterator_count(&it) +
(priv->tx_ring_num * 2) + (priv->tx_ring_num[TX] * 2) +
(priv->rx_ring_num * 3); (priv->rx_ring_num * (3 + NUM_XDP_STATS));
case ETH_SS_TEST: case ETH_SS_TEST:
return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.flags return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.flags
& MLX4_DEV_CAP_FLAG_UC_LOOPBACK) * 2; & MLX4_DEV_CAP_FLAG_UC_LOOPBACK) * 2;
...@@ -397,14 +404,21 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev, ...@@ -397,14 +404,21 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
if (bitmap_iterator_test(&it)) if (bitmap_iterator_test(&it))
data[index++] = ((unsigned long *)&priv->pkstats)[i]; data[index++] = ((unsigned long *)&priv->pkstats)[i];
for (i = 0; i < priv->tx_ring_num; i++) { for (i = 0; i < NUM_XDP_STATS; i++, bitmap_iterator_inc(&it))
data[index++] = priv->tx_ring[i]->packets; if (bitmap_iterator_test(&it))
data[index++] = priv->tx_ring[i]->bytes; data[index++] = ((unsigned long *)&priv->xdp_stats)[i];
for (i = 0; i < priv->tx_ring_num[TX]; i++) {
data[index++] = priv->tx_ring[TX][i]->packets;
data[index++] = priv->tx_ring[TX][i]->bytes;
} }
for (i = 0; i < priv->rx_ring_num; i++) { for (i = 0; i < priv->rx_ring_num; i++) {
data[index++] = priv->rx_ring[i]->packets; data[index++] = priv->rx_ring[i]->packets;
data[index++] = priv->rx_ring[i]->bytes; data[index++] = priv->rx_ring[i]->bytes;
data[index++] = priv->rx_ring[i]->dropped; data[index++] = priv->rx_ring[i]->dropped;
data[index++] = priv->rx_ring[i]->xdp_drop;
data[index++] = priv->rx_ring[i]->xdp_tx;
data[index++] = priv->rx_ring[i]->xdp_tx_full;
} }
spin_unlock_bh(&priv->stats_lock); spin_unlock_bh(&priv->stats_lock);
...@@ -467,7 +481,13 @@ static void mlx4_en_get_strings(struct net_device *dev, ...@@ -467,7 +481,13 @@ static void mlx4_en_get_strings(struct net_device *dev,
strcpy(data + (index++) * ETH_GSTRING_LEN, strcpy(data + (index++) * ETH_GSTRING_LEN,
main_strings[strings]); main_strings[strings]);
for (i = 0; i < priv->tx_ring_num; i++) { for (i = 0; i < NUM_XDP_STATS; i++, strings++,
bitmap_iterator_inc(&it))
if (bitmap_iterator_test(&it))
strcpy(data + (index++) * ETH_GSTRING_LEN,
main_strings[strings]);
for (i = 0; i < priv->tx_ring_num[TX]; i++) {
sprintf(data + (index++) * ETH_GSTRING_LEN, sprintf(data + (index++) * ETH_GSTRING_LEN,
"tx%d_packets", i); "tx%d_packets", i);
sprintf(data + (index++) * ETH_GSTRING_LEN, sprintf(data + (index++) * ETH_GSTRING_LEN,
...@@ -480,6 +500,12 @@ static void mlx4_en_get_strings(struct net_device *dev, ...@@ -480,6 +500,12 @@ static void mlx4_en_get_strings(struct net_device *dev,
"rx%d_bytes", i); "rx%d_bytes", i);
sprintf(data + (index++) * ETH_GSTRING_LEN, sprintf(data + (index++) * ETH_GSTRING_LEN,
"rx%d_dropped", i); "rx%d_dropped", i);
sprintf(data + (index++) * ETH_GSTRING_LEN,
"rx%d_xdp_drop", i);
sprintf(data + (index++) * ETH_GSTRING_LEN,
"rx%d_xdp_tx", i);
sprintf(data + (index++) * ETH_GSTRING_LEN,
"rx%d_xdp_tx_full", i);
} }
break; break;
case ETH_SS_PRIV_FLAGS: case ETH_SS_PRIV_FLAGS:
...@@ -1060,7 +1086,7 @@ static int mlx4_en_set_ringparam(struct net_device *dev, ...@@ -1060,7 +1086,7 @@ static int mlx4_en_set_ringparam(struct net_device *dev,
if (rx_size == (priv->port_up ? priv->rx_ring[0]->actual_size : if (rx_size == (priv->port_up ? priv->rx_ring[0]->actual_size :
priv->rx_ring[0]->size) && priv->rx_ring[0]->size) &&
tx_size == priv->tx_ring[0]->size) tx_size == priv->tx_ring[TX][0]->size)
return 0; return 0;
tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
...@@ -1105,7 +1131,7 @@ static void mlx4_en_get_ringparam(struct net_device *dev, ...@@ -1105,7 +1131,7 @@ static void mlx4_en_get_ringparam(struct net_device *dev,
param->tx_max_pending = MLX4_EN_MAX_TX_SIZE; param->tx_max_pending = MLX4_EN_MAX_TX_SIZE;
param->rx_pending = priv->port_up ? param->rx_pending = priv->port_up ?
priv->rx_ring[0]->actual_size : priv->rx_ring[0]->size; priv->rx_ring[0]->actual_size : priv->rx_ring[0]->size;
param->tx_pending = priv->tx_ring[0]->size; param->tx_pending = priv->tx_ring[TX][0]->size;
} }
static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev) static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev)
...@@ -1710,7 +1736,7 @@ static void mlx4_en_get_channels(struct net_device *dev, ...@@ -1710,7 +1736,7 @@ static void mlx4_en_get_channels(struct net_device *dev,
channel->max_tx = MLX4_EN_MAX_TX_RING_P_UP; channel->max_tx = MLX4_EN_MAX_TX_RING_P_UP;
channel->rx_count = priv->rx_ring_num; channel->rx_count = priv->rx_ring_num;
channel->tx_count = priv->tx_ring_num / MLX4_EN_NUM_UP; channel->tx_count = priv->tx_ring_num[TX] / MLX4_EN_NUM_UP;
} }
static int mlx4_en_set_channels(struct net_device *dev, static int mlx4_en_set_channels(struct net_device *dev,
...@@ -1721,6 +1747,7 @@ static int mlx4_en_set_channels(struct net_device *dev, ...@@ -1721,6 +1747,7 @@ static int mlx4_en_set_channels(struct net_device *dev,
struct mlx4_en_port_profile new_prof; struct mlx4_en_port_profile new_prof;
struct mlx4_en_priv *tmp; struct mlx4_en_priv *tmp;
int port_up = 0; int port_up = 0;
int xdp_count;
int err = 0; int err = 0;
if (channel->other_count || channel->combined_count || if (channel->other_count || channel->combined_count ||
...@@ -1729,20 +1756,25 @@ static int mlx4_en_set_channels(struct net_device *dev, ...@@ -1729,20 +1756,25 @@ static int mlx4_en_set_channels(struct net_device *dev,
!channel->tx_count || !channel->rx_count) !channel->tx_count || !channel->rx_count)
return -EINVAL; return -EINVAL;
if (channel->tx_count * MLX4_EN_NUM_UP <= priv->xdp_ring_num) {
en_err(priv, "Minimum %d tx channels required with XDP on\n",
priv->xdp_ring_num / MLX4_EN_NUM_UP + 1);
return -EINVAL;
}
tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
if (!tmp) if (!tmp)
return -ENOMEM; return -ENOMEM;
mutex_lock(&mdev->state_lock); mutex_lock(&mdev->state_lock);
xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0;
if (channel->tx_count * MLX4_EN_NUM_UP + xdp_count > MAX_TX_RINGS) {
err = -EINVAL;
en_err(priv,
"Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n",
channel->tx_count * MLX4_EN_NUM_UP + xdp_count,
MAX_TX_RINGS);
goto out;
}
memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
new_prof.num_tx_rings_p_up = channel->tx_count; new_prof.num_tx_rings_p_up = channel->tx_count;
new_prof.tx_ring_num = channel->tx_count * MLX4_EN_NUM_UP; new_prof.tx_ring_num[TX] = channel->tx_count * MLX4_EN_NUM_UP;
new_prof.tx_ring_num[TX_XDP] = xdp_count;
new_prof.rx_ring_num = channel->rx_count; new_prof.rx_ring_num = channel->rx_count;
err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof); err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
...@@ -1756,14 +1788,13 @@ static int mlx4_en_set_channels(struct net_device *dev, ...@@ -1756,14 +1788,13 @@ static int mlx4_en_set_channels(struct net_device *dev,
mlx4_en_safe_replace_resources(priv, tmp); mlx4_en_safe_replace_resources(priv, tmp);
netif_set_real_num_tx_queues(dev, priv->tx_ring_num - netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
priv->xdp_ring_num);
netif_set_real_num_rx_queues(dev, priv->rx_ring_num); netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
if (dev->num_tc) if (dev->num_tc)
mlx4_en_setup_tc(dev, MLX4_EN_NUM_UP); mlx4_en_setup_tc(dev, MLX4_EN_NUM_UP);
en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num); en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num[TX]);
en_warn(priv, "Using %d RX rings\n", priv->rx_ring_num); en_warn(priv, "Using %d RX rings\n", priv->rx_ring_num);
if (port_up) { if (port_up) {
...@@ -1774,8 +1805,8 @@ static int mlx4_en_set_channels(struct net_device *dev, ...@@ -1774,8 +1805,8 @@ static int mlx4_en_set_channels(struct net_device *dev,
err = mlx4_en_moderation_update(priv); err = mlx4_en_moderation_update(priv);
out: out:
kfree(tmp);
mutex_unlock(&mdev->state_lock); mutex_unlock(&mdev->state_lock);
kfree(tmp);
return err; return err;
} }
...@@ -1823,11 +1854,15 @@ static int mlx4_en_set_priv_flags(struct net_device *dev, u32 flags) ...@@ -1823,11 +1854,15 @@ static int mlx4_en_set_priv_flags(struct net_device *dev, u32 flags)
int ret = 0; int ret = 0;
if (bf_enabled_new != bf_enabled_old) { if (bf_enabled_new != bf_enabled_old) {
int t;
if (bf_enabled_new) { if (bf_enabled_new) {
bool bf_supported = true; bool bf_supported = true;
for (i = 0; i < priv->tx_ring_num; i++) for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++)
bf_supported &= priv->tx_ring[i]->bf_alloced; for (i = 0; i < priv->tx_ring_num[t]; i++)
bf_supported &=
priv->tx_ring[t][i]->bf_alloced;
if (!bf_supported) { if (!bf_supported) {
en_err(priv, "BlueFlame is not supported\n"); en_err(priv, "BlueFlame is not supported\n");
...@@ -1839,8 +1874,10 @@ static int mlx4_en_set_priv_flags(struct net_device *dev, u32 flags) ...@@ -1839,8 +1874,10 @@ static int mlx4_en_set_priv_flags(struct net_device *dev, u32 flags)
priv->pflags &= ~MLX4_EN_PRIV_FLAGS_BLUEFLAME; priv->pflags &= ~MLX4_EN_PRIV_FLAGS_BLUEFLAME;
} }
for (i = 0; i < priv->tx_ring_num; i++) for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++)
priv->tx_ring[i]->bf_enabled = bf_enabled_new; for (i = 0; i < priv->tx_ring_num[t]; i++)
priv->tx_ring[t][i]->bf_enabled =
bf_enabled_new;
en_info(priv, "BlueFlame %s\n", en_info(priv, "BlueFlame %s\n",
bf_enabled_new ? "Enabled" : "Disabled"); bf_enabled_new ? "Enabled" : "Disabled");
......
...@@ -169,7 +169,7 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) ...@@ -169,7 +169,7 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev)
params->prof[i].tx_ppp = pfctx; params->prof[i].tx_ppp = pfctx;
params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE; params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE;
params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE; params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE;
params->prof[i].tx_ring_num = params->num_tx_rings_p_up * params->prof[i].tx_ring_num[TX] = params->num_tx_rings_p_up *
MLX4_EN_NUM_UP; MLX4_EN_NUM_UP;
params->prof[i].rss_rings = 0; params->prof[i].rss_rings = 0;
params->prof[i].inline_thold = inline_thold; params->prof[i].inline_thold = inline_thold;
......
...@@ -179,6 +179,9 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) ...@@ -179,6 +179,9 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
priv->port_stats.rx_chksum_good = 0; priv->port_stats.rx_chksum_good = 0;
priv->port_stats.rx_chksum_none = 0; priv->port_stats.rx_chksum_none = 0;
priv->port_stats.rx_chksum_complete = 0; priv->port_stats.rx_chksum_complete = 0;
priv->xdp_stats.rx_xdp_drop = 0;
priv->xdp_stats.rx_xdp_tx = 0;
priv->xdp_stats.rx_xdp_tx_full = 0;
for (i = 0; i < priv->rx_ring_num; i++) { for (i = 0; i < priv->rx_ring_num; i++) {
stats->rx_packets += priv->rx_ring[i]->packets; stats->rx_packets += priv->rx_ring[i]->packets;
stats->rx_bytes += priv->rx_ring[i]->bytes; stats->rx_bytes += priv->rx_ring[i]->bytes;
...@@ -186,6 +189,9 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) ...@@ -186,6 +189,9 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
priv->port_stats.rx_chksum_good += priv->rx_ring[i]->csum_ok; priv->port_stats.rx_chksum_good += priv->rx_ring[i]->csum_ok;
priv->port_stats.rx_chksum_none += priv->rx_ring[i]->csum_none; priv->port_stats.rx_chksum_none += priv->rx_ring[i]->csum_none;
priv->port_stats.rx_chksum_complete += priv->rx_ring[i]->csum_complete; priv->port_stats.rx_chksum_complete += priv->rx_ring[i]->csum_complete;
priv->xdp_stats.rx_xdp_drop += priv->rx_ring[i]->xdp_drop;
priv->xdp_stats.rx_xdp_tx += priv->rx_ring[i]->xdp_tx;
priv->xdp_stats.rx_xdp_tx_full += priv->rx_ring[i]->xdp_tx_full;
} }
stats->tx_packets = 0; stats->tx_packets = 0;
stats->tx_bytes = 0; stats->tx_bytes = 0;
...@@ -196,8 +202,8 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) ...@@ -196,8 +202,8 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
priv->port_stats.tso_packets = 0; priv->port_stats.tso_packets = 0;
priv->port_stats.xmit_more = 0; priv->port_stats.xmit_more = 0;
for (i = 0; i < priv->tx_ring_num; i++) { for (i = 0; i < priv->tx_ring_num[TX]; i++) {
const struct mlx4_en_tx_ring *ring = priv->tx_ring[i]; const struct mlx4_en_tx_ring *ring = priv->tx_ring[TX][i];
stats->tx_packets += ring->packets; stats->tx_packets += ring->packets;
stats->tx_bytes += ring->bytes; stats->tx_bytes += ring->bytes;
......
...@@ -788,7 +788,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud ...@@ -788,7 +788,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
struct bpf_prog *xdp_prog; struct bpf_prog *xdp_prog;
int doorbell_pending; int doorbell_pending;
struct sk_buff *skb; struct sk_buff *skb;
int tx_index;
int index; int index;
int nr; int nr;
unsigned int length; unsigned int length;
...@@ -808,7 +807,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud ...@@ -808,7 +807,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
rcu_read_lock(); rcu_read_lock();
xdp_prog = rcu_dereference(ring->xdp_prog); xdp_prog = rcu_dereference(ring->xdp_prog);
doorbell_pending = 0; doorbell_pending = 0;
tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring;
/* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
* descriptor offset can be deduced from the CQE index instead of * descriptor offset can be deduced from the CQE index instead of
...@@ -877,8 +875,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud ...@@ -877,8 +875,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
*/ */
length = be32_to_cpu(cqe->byte_cnt); length = be32_to_cpu(cqe->byte_cnt);
length -= ring->fcs_del; length -= ring->fcs_del;
ring->bytes += length;
ring->packets++;
l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) && l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
(cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
...@@ -904,22 +900,26 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud ...@@ -904,22 +900,26 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
case XDP_PASS: case XDP_PASS:
break; break;
case XDP_TX: case XDP_TX:
if (likely(!mlx4_en_xmit_frame(frags, dev, if (likely(!mlx4_en_xmit_frame(ring, frags, dev,
length, tx_index, length, cq->ring,
&doorbell_pending))) &doorbell_pending)))
goto consumed; goto consumed;
goto xdp_drop; /* Drop on xmit failure */ goto xdp_drop_no_cnt; /* Drop on xmit failure */
default: default:
bpf_warn_invalid_xdp_action(act); bpf_warn_invalid_xdp_action(act);
case XDP_ABORTED: case XDP_ABORTED:
case XDP_DROP: case XDP_DROP:
xdp_drop: ring->xdp_drop++;
xdp_drop_no_cnt:
if (likely(mlx4_en_rx_recycle(ring, frags))) if (likely(mlx4_en_rx_recycle(ring, frags)))
goto consumed; goto consumed;
goto next; goto next;
} }
} }
ring->bytes += length;
ring->packets++;
if (likely(dev->features & NETIF_F_RXCSUM)) { if (likely(dev->features & NETIF_F_RXCSUM)) {
if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
MLX4_CQE_STATUS_UDP)) { MLX4_CQE_STATUS_UDP)) {
...@@ -1082,7 +1082,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud ...@@ -1082,7 +1082,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
out: out:
rcu_read_unlock(); rcu_read_unlock();
if (doorbell_pending) if (doorbell_pending)
mlx4_en_xmit_doorbell(priv->tx_ring[tx_index]); mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq->ring]);
AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
mlx4_cq_set_ci(&cq->mcq); mlx4_cq_set_ci(&cq->mcq);
...@@ -1162,7 +1162,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) ...@@ -1162,7 +1162,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
/* bpf requires buffers to be set up as 1 packet per page. /* bpf requires buffers to be set up as 1 packet per page.
* This only works when num_frags == 1. * This only works when num_frags == 1.
*/ */
if (priv->xdp_ring_num) { if (priv->tx_ring_num[TX_XDP]) {
dma_dir = PCI_DMA_BIDIRECTIONAL; dma_dir = PCI_DMA_BIDIRECTIONAL;
/* This will gain efficient xdp frame recycling at the expense /* This will gain efficient xdp frame recycling at the expense
* of more costly truesize accounting * of more costly truesize accounting
......
...@@ -392,6 +392,7 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) ...@@ -392,6 +392,7 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
cnt++; cnt++;
} }
if (ring->tx_queue)
netdev_tx_reset_queue(ring->tx_queue); netdev_tx_reset_queue(ring->tx_queue);
if (cnt) if (cnt)
...@@ -405,7 +406,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, ...@@ -405,7 +406,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
{ {
struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_cq *mcq = &cq->mcq; struct mlx4_cq *mcq = &cq->mcq;
struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring]; struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->type][cq->ring];
struct mlx4_cqe *cqe; struct mlx4_cqe *cqe;
u16 index; u16 index;
u16 new_index, ring_index, stamp_index; u16 new_index, ring_index, stamp_index;
...@@ -807,7 +808,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -807,7 +808,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
bool bf_ok; bool bf_ok;
tx_ind = skb_get_queue_mapping(skb); tx_ind = skb_get_queue_mapping(skb);
ring = priv->tx_ring[tx_ind]; ring = priv->tx_ring[TX][tx_ind];
if (!priv->port_up) if (!priv->port_up)
goto tx_drop; goto tx_drop;
...@@ -1078,7 +1079,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -1078,7 +1079,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK; return NETDEV_TX_OK;
} }
netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame, netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
struct mlx4_en_rx_alloc *frame,
struct net_device *dev, unsigned int length, struct net_device *dev, unsigned int length,
int tx_ind, int *doorbell_pending) int tx_ind, int *doorbell_pending)
{ {
...@@ -1101,7 +1103,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame, ...@@ -1101,7 +1103,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
BUILD_BUG_ON_MSG(ALIGN(CTRL_SIZE + DS_SIZE, TXBB_SIZE) != TXBB_SIZE, BUILD_BUG_ON_MSG(ALIGN(CTRL_SIZE + DS_SIZE, TXBB_SIZE) != TXBB_SIZE,
"mlx4_en_xmit_frame requires minimum size tx desc"); "mlx4_en_xmit_frame requires minimum size tx desc");
ring = priv->tx_ring[tx_ind]; ring = priv->tx_ring[TX_XDP][tx_ind];
if (!priv->port_up) if (!priv->port_up)
goto tx_drop; goto tx_drop;
...@@ -1153,8 +1155,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame, ...@@ -1153,8 +1155,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
((ring->prod & ring->size) ? ((ring->prod & ring->size) ?
cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
ring->packets++; rx_ring->xdp_tx++;
ring->bytes += tx_info->nr_bytes;
AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, length); AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, length);
ring->prod += nr_txbb; ring->prod += nr_txbb;
...@@ -1178,7 +1179,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame, ...@@ -1178,7 +1179,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
return NETDEV_TX_OK; return NETDEV_TX_OK;
tx_drop_count: tx_drop_count:
ring->tx_dropped++; rx_ring->xdp_tx_full++;
tx_drop: tx_drop:
return NETDEV_TX_BUSY; return NETDEV_TX_BUSY;
} }
...@@ -207,8 +207,11 @@ enum { ...@@ -207,8 +207,11 @@ enum {
*/ */
enum cq_type { enum cq_type {
RX = 0, /* keep tx types first */
TX = 1, TX,
TX_XDP,
#define MLX4_EN_NUM_TX_TYPES (TX_XDP + 1)
RX,
}; };
...@@ -347,6 +350,9 @@ struct mlx4_en_rx_ring { ...@@ -347,6 +350,9 @@ struct mlx4_en_rx_ring {
unsigned long csum_ok; unsigned long csum_ok;
unsigned long csum_none; unsigned long csum_none;
unsigned long csum_complete; unsigned long csum_complete;
unsigned long xdp_drop;
unsigned long xdp_tx;
unsigned long xdp_tx_full;
unsigned long dropped; unsigned long dropped;
int hwtstamp_rx_filter; int hwtstamp_rx_filter;
cpumask_var_t affinity_mask; cpumask_var_t affinity_mask;
...@@ -361,7 +367,7 @@ struct mlx4_en_cq { ...@@ -361,7 +367,7 @@ struct mlx4_en_cq {
int size; int size;
int buf_size; int buf_size;
int vector; int vector;
enum cq_type is_tx; enum cq_type type;
u16 moder_time; u16 moder_time;
u16 moder_cnt; u16 moder_cnt;
struct mlx4_cqe *buf; struct mlx4_cqe *buf;
...@@ -372,7 +378,7 @@ struct mlx4_en_cq { ...@@ -372,7 +378,7 @@ struct mlx4_en_cq {
struct mlx4_en_port_profile { struct mlx4_en_port_profile {
u32 flags; u32 flags;
u32 tx_ring_num; u32 tx_ring_num[MLX4_EN_NUM_TX_TYPES];
u32 rx_ring_num; u32 rx_ring_num;
u32 tx_ring_size; u32 tx_ring_size;
u32 rx_ring_size; u32 rx_ring_size;
...@@ -569,17 +575,16 @@ struct mlx4_en_priv { ...@@ -569,17 +575,16 @@ struct mlx4_en_priv {
u32 flags; u32 flags;
u8 num_tx_rings_p_up; u8 num_tx_rings_p_up;
u32 tx_work_limit; u32 tx_work_limit;
u32 tx_ring_num; u32 tx_ring_num[MLX4_EN_NUM_TX_TYPES];
u32 rx_ring_num; u32 rx_ring_num;
u32 rx_skb_size; u32 rx_skb_size;
struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS]; struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS];
u16 num_frags; u16 num_frags;
u16 log_rx_info; u16 log_rx_info;
int xdp_ring_num;
struct mlx4_en_tx_ring **tx_ring; struct mlx4_en_tx_ring **tx_ring[MLX4_EN_NUM_TX_TYPES];
struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS]; struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS];
struct mlx4_en_cq **tx_cq; struct mlx4_en_cq **tx_cq[MLX4_EN_NUM_TX_TYPES];
struct mlx4_en_cq *rx_cq[MAX_RX_RINGS]; struct mlx4_en_cq *rx_cq[MAX_RX_RINGS];
struct mlx4_qp drop_qp; struct mlx4_qp drop_qp;
struct work_struct rx_mode_task; struct work_struct rx_mode_task;
...@@ -597,6 +602,7 @@ struct mlx4_en_priv { ...@@ -597,6 +602,7 @@ struct mlx4_en_priv {
struct mlx4_en_flow_stats_rx rx_flowstats; struct mlx4_en_flow_stats_rx rx_flowstats;
struct mlx4_en_flow_stats_tx tx_flowstats; struct mlx4_en_flow_stats_tx tx_flowstats;
struct mlx4_en_port_stats port_stats; struct mlx4_en_port_stats port_stats;
struct mlx4_en_xdp_stats xdp_stats;
struct mlx4_en_stats_bitmap stats_bitmap; struct mlx4_en_stats_bitmap stats_bitmap;
struct list_head mc_list; struct list_head mc_list;
struct list_head curr_list; struct list_head curr_list;
...@@ -685,7 +691,8 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq); ...@@ -685,7 +691,8 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq);
u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
void *accel_priv, select_queue_fallback_t fallback); void *accel_priv, select_queue_fallback_t fallback);
netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame, netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
struct mlx4_en_rx_alloc *frame,
struct net_device *dev, unsigned int length, struct net_device *dev, unsigned int length,
int tx_ind, int *doorbell_pending); int tx_ind, int *doorbell_pending);
void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring); void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring);
......
...@@ -55,6 +55,13 @@ struct mlx4_en_perf_stats { ...@@ -55,6 +55,13 @@ struct mlx4_en_perf_stats {
#define NUM_PERF_COUNTERS 6 #define NUM_PERF_COUNTERS 6
}; };
struct mlx4_en_xdp_stats {
unsigned long rx_xdp_drop;
unsigned long rx_xdp_tx;
unsigned long rx_xdp_tx_full;
#define NUM_XDP_STATS 3
};
#define NUM_MAIN_STATS 21 #define NUM_MAIN_STATS 21
#define MLX4_NUM_PRIORITIES 8 #define MLX4_NUM_PRIORITIES 8
...@@ -107,7 +114,8 @@ enum { ...@@ -107,7 +114,8 @@ enum {
}; };
#define NUM_ALL_STATS (NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + \ #define NUM_ALL_STATS (NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + \
NUM_FLOW_STATS + NUM_PERF_STATS + NUM_PF_STATS) NUM_FLOW_STATS + NUM_PERF_STATS + NUM_PF_STATS + \
NUM_XDP_STATS)
#define MLX4_FIND_NETDEV_STAT(n) (offsetof(struct net_device_stats, n) / \ #define MLX4_FIND_NETDEV_STAT(n) (offsetof(struct net_device_stats, n) / \
sizeof(((struct net_device_stats *)0)->n)) sizeof(((struct net_device_stats *)0)->n))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment