Commit bc37b24e authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'mlx5-xsk-updates-part3-2022-09-30'

Saeed Mahameed says:

====================
mlx5 xsk updates part3 2022-09-30

The gist of this 4 part series is in this patchset's last patch

This series contains performance optimizations. XSK starts using the
batching allocator, and XSK data path gets separated from the regular
RX, allowing to drop some branches not relevant for non-XSK use cases.
Some minor optimizations for indirect calls and need_wakeup are also
included.

Other than that, this series adds a few features to the mlx5e
implementation of XSK:

1. XDP metadata support on XSK RQs.

2. RSS contexts support for XSK RQs.

3. Some other optimizations

4. Last but not least, change the queuing scheme, so that XSK RQs no longer
use higher indices, but replace the regular RQs.

Maxim Says:
==========

In the initial implementation of XSK in mlx5e, XSK RQs coexisted with
regular RQs in the same channel. The main idea was to allow RSS work the
same for regular traffic, without need to reconfigure RSS to exclude XSK
queues.

However, this scheme didn't prove to be beneficial, mainly because of
incompatibility with other vendors. Some tools don't properly support
using higher indices for XSK queues, some tools get confused with the
double amount of RQs exposed in sysfs. Some use cases are purely XSK,
and allocating the same amount of unused regular RQs is a waste of
resources.

This commit changes the queuing scheme to the standard one, where XSK
RQs replace regular RQs on the channels where XSK sockets are open. Two
RQs still exist in the channel to allow failsafe disable of XSK, but
only one is exposed at a time. The next commit will achieve the desired
memory save by flushing the buffers when the regular RQ is unused.

As the result of this transition:

1. It's possible to use RSS contexts over XSK RQs.

2. It's possible to dedicate all queues to XSK.

3. When XSK RQs coexist with regular RQs, the admin should make sure no
unwanted traffic goes into XSK RQs by either excluding them from RSS or
settings up the XDP program to return XDP_PASS for non-XSK traffic.

4. When using a mixed fleet of mlx5e devices and other netdevs, the same
configuration can be applied. If the application supports the fallback
to copy mode on unsupported drivers, it will work too.

==========

Part 4 will include some final xsk optimizations and minor improvements

part 1: https://lore.kernel.org/netdev/20220927203611.244301-1-saeed@kernel.org/
part 2: https://lore.kernel.org/netdev/20220929072156.93299-1-saeed@kernel.org/
====================

Link: https://lore.kernel.org/r/20220930162903.62262-1-saeed@kernel.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 5fcc2cfc 3db4c85c
......@@ -181,12 +181,6 @@ do { \
#define mlx5e_state_dereference(priv, p) \
rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock))
enum mlx5e_rq_group {
MLX5E_RQ_GROUP_REGULAR,
MLX5E_RQ_GROUP_XSK,
#define MLX5E_NUM_RQ_GROUPS(g) (1 + MLX5E_RQ_GROUP_##g)
};
static inline u8 mlx5e_get_num_lag_ports(struct mlx5_core_dev *mdev)
{
if (mlx5_lag_is_lacp_owner(mdev))
......@@ -660,6 +654,7 @@ struct mlx5e_rq_frags_info {
u8 num_frags;
u8 log_num_frags;
u8 wqe_bulk;
u8 wqe_index_mask;
};
struct mlx5e_dma_info {
......@@ -1004,7 +999,6 @@ struct mlx5e_profile {
mlx5e_stats_grp_t *stats_grps;
const struct mlx5e_rx_handlers *rx_handlers;
int max_tc;
u8 rq_groups;
u32 features;
};
......@@ -1096,7 +1090,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx);
int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state);
int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state);
void mlx5e_activate_rq(struct mlx5e_rq *rq);
void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
void mlx5e_activate_icosq(struct mlx5e_icosq *icosq);
......
......@@ -10,28 +10,33 @@ unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs)
return chs->num;
}
void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
static struct mlx5e_channel *mlx5e_channels_get(struct mlx5e_channels *chs, unsigned int ix)
{
struct mlx5e_channel *c;
WARN_ON_ONCE(ix >= mlx5e_channels_get_num(chs));
return chs->c[ix];
}
WARN_ON(ix >= mlx5e_channels_get_num(chs));
c = chs->c[ix];
bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix)
{
struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);
*rqn = c->rq.rqn;
return test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
}
bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
{
struct mlx5e_channel *c;
struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);
WARN_ON(ix >= mlx5e_channels_get_num(chs));
c = chs->c[ix];
*rqn = c->rq.rqn;
}
if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
return false;
void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
{
struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);
WARN_ON_ONCE(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state));
*rqn = c->xskrq.rqn;
return true;
}
bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn)
......
......@@ -9,8 +9,9 @@
struct mlx5e_channels;
unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs);
bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix);
void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn);
#endif /* __MLX5_EN_CHANNELS_H__ */
......@@ -586,7 +586,14 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
info->arr[0].frag_size = byte_count;
info->arr[0].frag_stride = frag_stride;
info->num_frags = 1;
info->wqe_bulk = PAGE_SIZE / frag_stride;
/* N WQEs share the same page, N = PAGE_SIZE / frag_stride. The
* first WQE in the page is responsible for allocation of this
* page, this WQE's index is k*N. If WQEs [k*N+1; k*N+N-1] are
* still not completed, the allocation must stop before k*N.
*/
info->wqe_index_mask = (PAGE_SIZE / frag_stride) - 1;
goto out;
}
......@@ -635,11 +642,40 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
i++;
}
info->num_frags = i;
/* number of different wqes sharing a page */
info->wqe_bulk = 1 + (info->num_frags % 2);
/* The last fragment of WQE with index 2*N may share the page with the
* first fragment of WQE with index 2*N+1 in certain cases. If WQE 2*N+1
* is not completed yet, WQE 2*N must not be allocated, as it's
* responsible for allocating a new page.
*/
if (frag_size_max == PAGE_SIZE) {
/* No WQE can start in the middle of a page. */
info->wqe_index_mask = 0;
} else {
/* PAGE_SIZEs starting from 8192 don't use 2K-sized fragments,
* because there would be more than MLX5E_MAX_RX_FRAGS of them.
*/
WARN_ON(PAGE_SIZE != 2 * DEFAULT_FRAG_SIZE);
/* Odd number of fragments allows to pack the last fragment of
* the previous WQE and the first fragment of the next WQE into
* the same page.
* As long as DEFAULT_FRAG_SIZE is 2048, and MLX5E_MAX_RX_FRAGS
* is 4, the last fragment can be bigger than the rest only if
* it's the fourth one, so WQEs consisting of 3 fragments will
* always share a page.
* When a page is shared, WQE bulk size is 2, otherwise just 1.
*/
info->wqe_index_mask = info->num_frags % 2;
}
out:
info->wqe_bulk = max_t(u8, info->wqe_bulk, 8);
/* Bulking optimization to skip allocation until at least 8 WQEs can be
* allocated in a row. At the same time, never start allocation when
* the page is still used by older WQEs.
*/
info->wqe_bulk = max_t(u8, info->wqe_index_mask + 1, 8);
info->log_num_frags = order_base_2(info->num_frags);
return 0;
......
......@@ -53,38 +53,6 @@ struct mlx5e_create_sq_param {
u8 min_inline_mode;
};
static inline bool mlx5e_qid_get_ch_if_in_group(struct mlx5e_params *params,
u16 qid,
enum mlx5e_rq_group group,
u16 *ix)
{
int nch = params->num_channels;
int ch = qid - nch * group;
if (ch < 0 || ch >= nch)
return false;
*ix = ch;
return true;
}
static inline void mlx5e_qid_get_ch_and_group(struct mlx5e_params *params,
u16 qid,
u16 *ix,
enum mlx5e_rq_group *group)
{
u16 nch = params->num_channels;
*ix = qid % nch;
*group = qid / nch;
}
static inline bool mlx5e_qid_validate(const struct mlx5e_profile *profile,
struct mlx5e_params *params, u64 qid)
{
return qid < params->num_channels * profile->rq_groups;
}
/* Striding RQ dynamic parameters */
u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk);
......
......@@ -134,34 +134,13 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
return err;
}
static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
{
struct net_device *dev = rq->netdev;
int err;
err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST);
if (err) {
netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn);
return err;
}
err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
if (err) {
netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn);
return err;
}
return 0;
}
static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx)
{
struct mlx5e_rq *rq = ctx;
int err;
mlx5e_deactivate_rq(rq);
mlx5e_free_rx_descs(rq);
err = mlx5e_rq_to_ready(rq, MLX5_RQC_STATE_ERR);
err = mlx5e_flush_rq(rq, MLX5_RQC_STATE_ERR);
clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
if (err)
return err;
......
......@@ -24,8 +24,6 @@ struct mlx5e_rx_res {
struct {
struct mlx5e_rqt direct_rqt;
struct mlx5e_tir direct_tir;
struct mlx5e_rqt xsk_rqt;
struct mlx5e_tir xsk_tir;
} *channels;
struct {
......@@ -320,48 +318,8 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res)
mlx5e_tir_builder_clear(builder);
}
if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
goto out;
for (ix = 0; ix < res->max_nch; ix++) {
err = mlx5e_rqt_init_direct(&res->channels[ix].xsk_rqt,
res->mdev, false, res->drop_rqn);
if (err) {
mlx5_core_warn(res->mdev, "Failed to create an XSK RQT: err = %d, ix = %u\n",
err, ix);
goto err_destroy_xsk_rqts;
}
}
for (ix = 0; ix < res->max_nch; ix++) {
mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
inner_ft_support);
mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param);
mlx5e_tir_builder_build_direct(builder);
err = mlx5e_tir_init(&res->channels[ix].xsk_tir, builder, res->mdev, true);
if (err) {
mlx5_core_warn(res->mdev, "Failed to create an XSK TIR: err = %d, ix = %u\n",
err, ix);
goto err_destroy_xsk_tirs;
}
mlx5e_tir_builder_clear(builder);
}
goto out;
err_destroy_xsk_tirs:
while (--ix >= 0)
mlx5e_tir_destroy(&res->channels[ix].xsk_tir);
ix = res->max_nch;
err_destroy_xsk_rqts:
while (--ix >= 0)
mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt);
ix = res->max_nch;
err_destroy_direct_tirs:
while (--ix >= 0)
mlx5e_tir_destroy(&res->channels[ix].direct_tir);
......@@ -420,12 +378,6 @@ static void mlx5e_rx_res_channels_destroy(struct mlx5e_rx_res *res)
for (ix = 0; ix < res->max_nch; ix++) {
mlx5e_tir_destroy(&res->channels[ix].direct_tir);
mlx5e_rqt_destroy(&res->channels[ix].direct_rqt);
if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
continue;
mlx5e_tir_destroy(&res->channels[ix].xsk_tir);
mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt);
}
kvfree(res->channels);
......@@ -491,13 +443,6 @@ u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix)
return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir);
}
u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix)
{
WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_XSK));
return mlx5e_tir_get_tirn(&res->channels[ix].xsk_tir);
}
u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
{
struct mlx5e_rss *rss = res->rss[0];
......@@ -527,26 +472,14 @@ static void mlx5e_rx_res_channel_activate_direct(struct mlx5e_rx_res *res,
struct mlx5e_channels *chs,
unsigned int ix)
{
u32 rqn;
u32 rqn = res->rss_rqns[ix];
int err;
mlx5e_channels_get_regular_rqn(chs, ix, &rqn);
err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn);
if (err)
mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n",
mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
rqn, ix, err);
if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
return;
if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn))
rqn = res->drop_rqn;
err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn);
if (err)
mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to RQ %#x (channel %u): err = %d\n",
mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
rqn, ix, err);
}
static void mlx5e_rx_res_channel_deactivate_direct(struct mlx5e_rx_res *res,
......@@ -559,15 +492,6 @@ static void mlx5e_rx_res_channel_deactivate_direct(struct mlx5e_rx_res *res,
mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
res->drop_rqn, ix, err);
if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
return;
err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
if (err)
mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
res->drop_rqn, ix, err);
}
void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs)
......@@ -577,8 +501,12 @@ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_chann
nch = mlx5e_channels_get_num(chs);
for (ix = 0; ix < chs->num; ix++)
mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
for (ix = 0; ix < chs->num; ix++) {
if (mlx5e_channels_is_xsk(chs, ix))
mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]);
else
mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
}
res->rss_nch = chs->num;
mlx5e_rx_res_rss_enable(res);
......@@ -621,33 +549,17 @@ void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res)
}
}
int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
unsigned int ix)
void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
unsigned int ix, bool xsk)
{
u32 rqn;
int err;
if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn))
return -EINVAL;
err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn);
if (err)
mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to XSK RQ %#x (channel %u): err = %d\n",
mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
rqn, ix, err);
return err;
}
if (xsk)
mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]);
else
mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix)
{
int err;
mlx5e_rx_res_rss_enable(res);
err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
if (err)
mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
res->drop_rqn, ix, err);
return err;
mlx5e_rx_res_channel_activate_direct(res, chs, ix);
}
int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
......
......@@ -17,8 +17,7 @@ struct mlx5e_rss_params_hash;
enum mlx5e_rx_res_features {
MLX5E_RX_RES_FEATURE_INNER_FT = BIT(0),
MLX5E_RX_RES_FEATURE_XSK = BIT(1),
MLX5E_RX_RES_FEATURE_PTP = BIT(2),
MLX5E_RX_RES_FEATURE_PTP = BIT(1),
};
/* Setup */
......@@ -32,7 +31,6 @@ void mlx5e_rx_res_free(struct mlx5e_rx_res *res);
/* TIRN getters for flow steering */
u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix);
u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix);
u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res);
......@@ -40,9 +38,8 @@ u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res);
/* Activate/deactivate API */
void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs);
void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res);
int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
unsigned int ix);
int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix);
void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
unsigned int ix, bool xsk);
/* Configuration API */
void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch);
......
......@@ -452,4 +452,11 @@ static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size
return mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room);
}
static inline struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i)
{
size_t isz = struct_size(rq->mpwqe.info, alloc_units, rq->mpwqe.pages_per_wqe);
return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz));
}
#endif
......@@ -124,16 +124,10 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
* any Fill Ring entries at the setup stage.
*/
err = mlx5e_rx_res_xsk_activate(priv->rx_res, &priv->channels, ix);
if (unlikely(err))
goto err_deactivate;
mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, true);
return 0;
err_deactivate:
mlx5e_deactivate_xsk(c);
mlx5e_close_xsk(c);
err_remove_pool:
mlx5e_xsk_remove_pool(&priv->xsk, ix);
......@@ -171,7 +165,7 @@ static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix)
goto remove_pool;
c = priv->channels.c[ix];
mlx5e_rx_res_xsk_deactivate(priv->rx_res, ix);
mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, false);
mlx5e_deactivate_xsk(c);
mlx5e_close_xsk(c);
......@@ -209,11 +203,10 @@ int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_params *params = &priv->channels.params;
u16 ix;
if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
if (unlikely(qid >= params->num_channels))
return -EINVAL;
return pool ? mlx5e_xsk_enable_pool(priv, pool, ix) :
mlx5e_xsk_disable_pool(priv, ix);
return pool ? mlx5e_xsk_enable_pool(priv, pool, qid) :
mlx5e_xsk_disable_pool(priv, qid);
}
......@@ -8,18 +8,174 @@
/* RX data path */
static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, void *data,
u32 cqe_bcnt)
int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
{
struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
struct mlx5e_icosq *icosq = rq->icosq;
struct mlx5_wq_cyc *wq = &icosq->wq;
struct mlx5e_umr_wqe *umr_wqe;
int batch, i;
u32 offset; /* 17-bit value with MTT. */
u16 pi;
if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe)))
goto err;
BUILD_BUG_ON(sizeof(wi->alloc_units[0]) != sizeof(wi->alloc_units[0].xsk));
batch = xsk_buff_alloc_batch(rq->xsk_pool, (struct xdp_buff **)wi->alloc_units,
rq->mpwqe.pages_per_wqe);
/* If batch < pages_per_wqe, either:
* 1. Some (or all) descriptors were invalid.
* 2. dma_need_sync is true, and it fell back to allocating one frame.
* In either case, try to continue allocating frames one by one, until
* the first error, which will mean there are no more valid descriptors.
*/
for (; batch < rq->mpwqe.pages_per_wqe; batch++) {
wi->alloc_units[batch].xsk = xsk_buff_alloc(rq->xsk_pool);
if (unlikely(!wi->alloc_units[batch].xsk))
goto err_reuse_batch;
}
pi = mlx5e_icosq_get_next_pi(icosq, rq->mpwqe.umr_wqebbs);
umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
if (unlikely(rq->mpwqe.unaligned)) {
for (i = 0; i < batch; i++) {
dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
.key = rq->mkey_be,
.va = cpu_to_be64(addr),
};
}
} else {
for (i = 0; i < batch; i++) {
dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
.ptag = cpu_to_be64(addr | MLX5_EN_WR),
};
}
}
bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
wi->consumed_strides = 0;
umr_wqe->ctrl.opmod_idx_opcode =
cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR);
offset = ix * rq->mpwqe.mtts_per_wqe;
if (likely(!rq->mpwqe.unaligned))
offset = MLX5_ALIGNED_MTTS_OCTW(offset);
umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
.wqe_type = MLX5E_ICOSQ_WQE_UMR_RX,
.num_wqebbs = rq->mpwqe.umr_wqebbs,
.umr.rq = rq,
};
icosq->pc += rq->mpwqe.umr_wqebbs;
icosq->doorbell_cseg = &umr_wqe->ctrl;
return 0;
err_reuse_batch:
while (--batch >= 0)
xsk_buff_free(wi->alloc_units[batch].xsk);
err:
rq->stats->buff_alloc_err++;
return -ENOMEM;
}
int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
{
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
struct xdp_buff **buffs;
u32 contig, alloc;
int i;
/* mlx5e_init_frags_partition creates a 1:1 mapping between
* rq->wqe.frags and rq->wqe.alloc_units, which allows us to
* allocate XDP buffers straight into alloc_units.
*/
BUILD_BUG_ON(sizeof(rq->wqe.alloc_units[0]) !=
sizeof(rq->wqe.alloc_units[0].xsk));
buffs = (struct xdp_buff **)rq->wqe.alloc_units;
contig = mlx5_wq_cyc_get_size(wq) - ix;
if (wqe_bulk <= contig) {
alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk);
} else {
alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, contig);
if (likely(alloc == contig))
alloc += xsk_buff_alloc_batch(rq->xsk_pool, buffs, wqe_bulk - contig);
}
for (i = 0; i < alloc; i++) {
int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
struct mlx5e_wqe_frag_info *frag;
struct mlx5e_rx_wqe_cyc *wqe;
dma_addr_t addr;
wqe = mlx5_wq_cyc_get_wqe(wq, j);
/* Assumes log_num_frags == 0. */
frag = &rq->wqe.frags[j];
addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
}
return alloc;
}
int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
{
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
int i;
for (i = 0; i < wqe_bulk; i++) {
int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
struct mlx5e_wqe_frag_info *frag;
struct mlx5e_rx_wqe_cyc *wqe;
dma_addr_t addr;
wqe = mlx5_wq_cyc_get_wqe(wq, j);
/* Assumes log_num_frags == 0. */
frag = &rq->wqe.frags[j];
frag->au->xsk = xsk_buff_alloc(rq->xsk_pool);
if (unlikely(!frag->au->xsk))
return i;
addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
}
return wqe_bulk;
}
static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, struct xdp_buff *xdp)
{
u32 totallen = xdp->data_end - xdp->data_meta;
u32 metalen = xdp->data - xdp->data_meta;
struct sk_buff *skb;
skb = napi_alloc_skb(rq->cq.napi, cqe_bcnt);
skb = napi_alloc_skb(rq->cq.napi, totallen);
if (unlikely(!skb)) {
rq->stats->buff_alloc_err++;
return NULL;
}
skb_put_data(skb, data, cqe_bcnt);
skb_put_data(skb, xdp->data_meta, totallen);
if (metalen) {
skb_metadata_set(skb, metalen);
__skb_pull(skb, metalen);
}
return skb;
}
......@@ -46,8 +202,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
*/
WARN_ON_ONCE(head_offset);
xdp->data_end = xdp->data + cqe_bcnt;
xdp_set_data_meta_invalid(xdp);
xsk_buff_set_size(xdp, cqe_bcnt);
xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
net_prefetch(xdp->data);
......@@ -76,7 +231,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
/* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
* frame. On SKB allocation failure, NULL is returned.
*/
return mlx5e_xsk_construct_skb(rq, xdp->data, xdp->data_end - xdp->data);
return mlx5e_xsk_construct_skb(rq, xdp);
}
struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
......@@ -93,8 +248,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
*/
WARN_ON_ONCE(wi->offset);
xdp->data_end = xdp->data + cqe_bcnt;
xdp_set_data_meta_invalid(xdp);
xsk_buff_set_size(xdp, cqe_bcnt);
xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
net_prefetch(xdp->data);
......@@ -103,8 +257,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
return NULL; /* page/packet was consumed by XDP */
/* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
* will be handled by mlx5e_put_rx_frag.
* will be handled by mlx5e_free_rx_wqe.
* On SKB allocation failure, NULL is returned.
*/
return mlx5e_xsk_construct_skb(rq, xdp->data, xdp->data_end - xdp->data);
return mlx5e_xsk_construct_skb(rq, xdp);
}
......@@ -9,6 +9,9 @@
/* RX data path */
int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk);
int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk);
struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
struct mlx5e_mpw_info *wi,
u16 cqe_bcnt,
......
......@@ -66,7 +66,7 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c,
rq->xsk_pool = pool;
rq->stats = &c->priv->channel_stats[c->ix]->xskrq;
rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
rq_xdp_ix = c->ix + params->num_channels * MLX5E_RQ_GROUP_XSK;
rq_xdp_ix = c->ix;
err = mlx5e_rq_set_handlers(rq, params, xsk);
if (err)
return err;
......@@ -154,7 +154,7 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
void mlx5e_close_xsk(struct mlx5e_channel *c)
{
clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
synchronize_net(); /* Sync with the XSK wakeup and with NAPI. */
synchronize_net(); /* Sync with NAPI. */
mlx5e_close_rq(&c->xskrq);
mlx5e_close_cq(&c->xskrq.cq);
......
......@@ -12,18 +12,14 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_params *params = &priv->channels.params;
struct mlx5e_channel *c;
u16 ix;
if (unlikely(!mlx5e_xdp_is_active(priv)))
return -ENETDOWN;
if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
if (unlikely(qid >= params->num_channels))
return -EINVAL;
c = priv->channels.c[ix];
if (unlikely(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)))
return -EINVAL;
c = priv->channels.c[qid];
if (!napi_if_scheduled_mark_missed(&c->napi)) {
/* To avoid WQE overrun, don't post a NOP if async_icosq is not
......@@ -36,9 +32,7 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
if (test_and_set_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state))
return 0;
spin_lock_bh(&c->async_icosq_lock);
mlx5e_trigger_irq(&c->async_icosq);
spin_unlock_bh(&c->async_icosq_lock);
mlx5e_trigger_napi_icosq(c);
}
return 0;
......
......@@ -451,15 +451,7 @@ static int flow_get_tirn(struct mlx5e_priv *priv,
eth_rule->rss = rss;
mlx5e_rss_refcnt_inc(eth_rule->rss);
} else {
struct mlx5e_params *params = &priv->channels.params;
enum mlx5e_rq_group group;
u16 ix;
mlx5e_qid_get_ch_and_group(params, fs->ring_cookie, &ix, &group);
*tirn = group == MLX5E_RQ_GROUP_XSK ?
mlx5e_rx_res_get_tirn_xsk(priv->rx_res, ix) :
mlx5e_rx_res_get_tirn_direct(priv->rx_res, ix);
*tirn = mlx5e_rx_res_get_tirn_direct(priv->rx_res, fs->ring_cookie);
}
return 0;
......@@ -682,8 +674,7 @@ static int validate_flow(struct mlx5e_priv *priv,
return -ENOSPC;
if (fs->ring_cookie != RX_CLS_FLOW_DISC)
if (!mlx5e_qid_validate(priv->profile, &priv->channels.params,
fs->ring_cookie))
if (fs->ring_cookie >= priv->channels.params.num_channels)
return -EINVAL;
switch (flow_type_mask(fs->flow_type)) {
......
......@@ -433,6 +433,13 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
struct mlx5e_wqe_frag_info *prev = NULL;
int i;
if (rq->xsk_pool) {
/* Assumptions used by XSK batched allocator. */
WARN_ON(rq->wqe.info.num_frags != 1);
WARN_ON(rq->wqe.info.log_num_frags != 0);
WARN_ON(rq->wqe.info.arr[0].frag_stride != PAGE_SIZE);
}
next_frag.au = &rq->wqe.alloc_units[0];
for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) {
......@@ -892,7 +899,7 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
return err;
}
int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
{
struct mlx5_core_dev *mdev = rq->mdev;
......@@ -921,6 +928,32 @@ int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
return err;
}
static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
{
struct net_device *dev = rq->netdev;
int err;
err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST);
if (err) {
netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn);
return err;
}
err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
if (err) {
netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn);
return err;
}
return 0;
}
int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state)
{
mlx5e_free_rx_descs(rq);
return mlx5e_rq_to_ready(rq, curr_state);
}
static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable)
{
struct mlx5_core_dev *mdev = rq->mdev;
......@@ -2657,7 +2690,7 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
struct netdev_tc_txq old_tc_to_txq[TC_MAX_QUEUE], *tc_to_txq;
struct net_device *netdev = priv->netdev;
int old_num_txqs, old_ntc;
int num_rxqs, nch, ntc;
int nch, ntc;
int err;
int i;
......@@ -2668,7 +2701,6 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
nch = priv->channels.params.num_channels;
ntc = priv->channels.params.mqprio.num_tc;
num_rxqs = nch * priv->profile->rq_groups;
tc_to_txq = priv->channels.params.mqprio.tc_to_txq;
err = mlx5e_netdev_set_tcs(netdev, nch, ntc, tc_to_txq);
......@@ -2677,7 +2709,7 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
err = mlx5e_update_tx_netdev_queues(priv);
if (err)
goto err_tcs;
err = netif_set_real_num_rx_queues(netdev, num_rxqs);
err = netif_set_real_num_rx_queues(netdev, nch);
if (err) {
netdev_warn(netdev, "netif_set_real_num_rx_queues failed, %d\n", err);
goto err_txqs;
......@@ -5166,7 +5198,7 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
goto err_destroy_q_counters;
}
features = MLX5E_RX_RES_FEATURE_XSK | MLX5E_RX_RES_FEATURE_PTP;
features = MLX5E_RX_RES_FEATURE_PTP;
if (priv->channels.params.tunneled_offload_en)
features |= MLX5E_RX_RES_FEATURE_INNER_FT;
err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features,
......@@ -5357,7 +5389,6 @@ static const struct mlx5e_profile mlx5e_nic_profile = {
.update_carrier = mlx5e_update_carrier,
.rx_handlers = &mlx5e_rx_handlers_nic,
.max_tc = MLX5E_MAX_NUM_TC,
.rq_groups = MLX5E_NUM_RQ_GROUPS(XSK),
.stats_grps = mlx5e_nic_stats_grps,
.stats_grps_num = mlx5e_nic_stats_grps_num,
.features = BIT(MLX5E_PROFILE_FEATURE_PTP_RX) |
......@@ -5390,8 +5421,7 @@ mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev,
max_nch = mlx5e_profile_max_num_channels(mdev, profile);
/* netdev rx queues */
tmp = netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1);
max_nch = min_t(unsigned int, max_nch, tmp);
max_nch = min_t(unsigned int, max_nch, netdev->num_rx_queues);
/* netdev tx queues */
tmp = netdev->num_tx_queues;
......@@ -5535,11 +5565,7 @@ static unsigned int mlx5e_get_max_num_txqs(struct mlx5_core_dev *mdev,
static unsigned int mlx5e_get_max_num_rxqs(struct mlx5_core_dev *mdev,
const struct mlx5e_profile *profile)
{
unsigned int nch;
nch = mlx5e_profile_max_num_channels(mdev, profile);
return nch * profile->rq_groups;
return mlx5e_profile_max_num_channels(mdev, profile);
}
struct net_device *
......
......@@ -1224,7 +1224,6 @@ static const struct mlx5e_profile mlx5e_rep_profile = {
.update_stats = mlx5e_stats_update_ndo_stats,
.rx_handlers = &mlx5e_rx_handlers_rep,
.max_tc = 1,
.rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR),
.stats_grps = mlx5e_rep_stats_grps,
.stats_grps_num = mlx5e_rep_stats_grps_num,
.max_nch_limit = mlx5e_rep_max_nch_limit,
......@@ -1244,8 +1243,6 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
.update_carrier = mlx5e_update_carrier,
.rx_handlers = &mlx5e_rx_handlers_rep,
.max_tc = MLX5E_MAX_NUM_TC,
/* XSK is needed so we can replace profile with NIC netdev */
.rq_groups = MLX5E_NUM_RQ_GROUPS(XSK),
.stats_grps = mlx5e_ul_rep_stats_grps,
.stats_grps_num = mlx5e_ul_rep_stats_grps_num,
};
......
......@@ -463,7 +463,6 @@ static const struct mlx5e_profile mlx5i_nic_profile = {
.update_carrier = NULL, /* no HW update in IB link */
.rx_handlers = &mlx5i_rx_handlers,
.max_tc = MLX5I_MAX_NUM_TC,
.rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR),
.stats_grps = mlx5i_stats_grps,
.stats_grps_num = mlx5i_stats_grps_num,
};
......
......@@ -349,7 +349,6 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = {
.update_stats = NULL,
.rx_handlers = &mlx5i_rx_handlers,
.max_tc = MLX5I_MAX_NUM_TC,
.rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR),
};
const struct mlx5e_profile *mlx5i_pkey_get_profile(void)
......
......@@ -123,7 +123,7 @@ static inline void mlx5_wq_cyc_push(struct mlx5_wq_cyc *wq)
wq->cur_sz++;
}
static inline void mlx5_wq_cyc_push_n(struct mlx5_wq_cyc *wq, u8 n)
static inline void mlx5_wq_cyc_push_n(struct mlx5_wq_cyc *wq, u16 n)
{
wq->wqe_ctr += n;
wq->cur_sz += n;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment