Commit 308daa19 authored by David S. Miller's avatar David S. Miller

Merge tag 'mlx5-fixes-2021-02-11' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

mlx5-fixes-2021-02-11

Saeed Mahameed says:

====================
mlx5 fixes 2021-02-11

This series introduces some fixes to mlx5 driver.
Please pull and let me know if there is any problem.

For -stable v5.4
 ('net/mlx5e: E-switch, Fix rate calculation for overflow')i

For -stable v5.10
 ('net/mlx5: Disallow RoCE on multi port slave device')
 ('net/mlx5: Disable devlink reload for multi port slave device')
 ('net/mlx5e: Don't change interrupt moderation params when DIM is enabled')
 ('net/mlx5e: Replace synchronize_rcu with synchronize_net')
 ('net/mlx5e: Enable XDP for Connect-X IPsec capable devices')
 ('net/mlx5e: kTLS, Use refcounts to free kTLS RX priv context')
 ('net/mlx5e: Check tunnel offload is required before setting SWP')
 ('net/mlx5: Fix health error state handling')
 ('net/mlx5: Disable devlink reload for lag devices')
 ('net/mlx5e: CT: manage the lifetime of the ct entry object')
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 9c899aa6 e1c3940c
......@@ -128,6 +128,11 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
if (mlx5_lag_is_active(dev)) {
NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode\n");
return -EOPNOTSUPP;
}
switch (action) {
case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
mlx5_unload_one(dev, false);
......@@ -273,6 +278,10 @@ static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id,
NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE");
return -EOPNOTSUPP;
}
if (mlx5_core_is_mp_slave(dev) || mlx5_lag_is_active(dev)) {
NL_SET_ERR_MSG_MOD(extack, "Multi port slave/Lag device can't configure RoCE");
return -EOPNOTSUPP;
}
return 0;
}
......
......@@ -83,7 +83,7 @@ static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv)
clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
/* Let other device's napi(s) and XSK wakeups see our new state. */
synchronize_rcu();
synchronize_net();
}
static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv)
......
......@@ -111,7 +111,7 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
void mlx5e_close_xsk(struct mlx5e_channel *c)
{
clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
synchronize_rcu(); /* Sync with the XSK wakeup and with NAPI. */
synchronize_net(); /* Sync with the XSK wakeup and with NAPI. */
mlx5e_close_rq(&c->xskrq);
mlx5e_close_cq(&c->xskrq.cq);
......
......@@ -173,7 +173,7 @@ static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv,
#endif
#if IS_ENABLED(CONFIG_GENEVE)
if (skb->encapsulation)
if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
mlx5e_tx_tunnel_accel(skb, eseg, ihs);
#endif
......
......@@ -57,6 +57,20 @@ struct mlx5e_ktls_offload_context_rx {
struct mlx5e_ktls_rx_resync_ctx resync;
};
static bool mlx5e_ktls_priv_rx_put(struct mlx5e_ktls_offload_context_rx *priv_rx)
{
if (!refcount_dec_and_test(&priv_rx->resync.refcnt))
return false;
kfree(priv_rx);
return true;
}
static void mlx5e_ktls_priv_rx_get(struct mlx5e_ktls_offload_context_rx *priv_rx)
{
refcount_inc(&priv_rx->resync.refcnt);
}
static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, u32 *tirn, u32 rqtn)
{
int err, inlen;
......@@ -326,7 +340,7 @@ static void resync_handle_work(struct work_struct *work)
priv_rx = container_of(resync, struct mlx5e_ktls_offload_context_rx, resync);
if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) {
refcount_dec(&resync->refcnt);
mlx5e_ktls_priv_rx_put(priv_rx);
return;
}
......@@ -334,7 +348,7 @@ static void resync_handle_work(struct work_struct *work)
sq = &c->async_icosq;
if (resync_post_get_progress_params(sq, priv_rx))
refcount_dec(&resync->refcnt);
mlx5e_ktls_priv_rx_put(priv_rx);
}
static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync,
......@@ -377,7 +391,11 @@ static int resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_rx
return err;
}
/* Function is called with elevated refcount, it decreases it. */
/* Function can be called with the refcount being either elevated or not.
* It decreases the refcount and may free the kTLS priv context.
* Refcount is not elevated only if tls_dev_del has been called, but GET_PSV was
* already in flight.
*/
void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
struct mlx5e_icosq *sq)
{
......@@ -410,7 +428,7 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
tls_offload_rx_resync_async_request_end(priv_rx->sk, cpu_to_be32(hw_seq));
priv_rx->stats->tls_resync_req_end++;
out:
refcount_dec(&resync->refcnt);
mlx5e_ktls_priv_rx_put(priv_rx);
dma_unmap_single(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE);
kfree(buf);
}
......@@ -431,9 +449,9 @@ static bool resync_queue_get_psv(struct sock *sk)
return false;
resync = &priv_rx->resync;
refcount_inc(&resync->refcnt);
mlx5e_ktls_priv_rx_get(priv_rx);
if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work)))
refcount_dec(&resync->refcnt);
mlx5e_ktls_priv_rx_put(priv_rx);
return true;
}
......@@ -625,31 +643,6 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
return err;
}
/* Elevated refcount on the resync object means there are
* outstanding operations (uncompleted GET_PSV WQEs) that
* will read the resync / priv_rx objects once completed.
* Wait for them to avoid use-after-free.
*/
static void wait_for_resync(struct net_device *netdev,
struct mlx5e_ktls_rx_resync_ctx *resync)
{
#define MLX5E_KTLS_RX_RESYNC_TIMEOUT 20000 /* msecs */
unsigned long exp_time = jiffies + msecs_to_jiffies(MLX5E_KTLS_RX_RESYNC_TIMEOUT);
unsigned int refcnt;
do {
refcnt = refcount_read(&resync->refcnt);
if (refcnt == 1)
return;
msleep(20);
} while (time_before(jiffies, exp_time));
netdev_warn(netdev,
"Failed waiting for kTLS RX resync refcnt to be released (%u).\n",
refcnt);
}
void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx)
{
struct mlx5e_ktls_offload_context_rx *priv_rx;
......@@ -663,7 +656,7 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx)
priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_ctx);
set_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags);
mlx5e_set_ktls_rx_priv_ctx(tls_ctx, NULL);
synchronize_rcu(); /* Sync with NAPI */
synchronize_net(); /* Sync with NAPI */
if (!cancel_work_sync(&priv_rx->rule.work))
/* completion is needed, as the priv_rx in the add flow
* is maintained on the wqe info (wi), not on the socket.
......@@ -671,8 +664,7 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx)
wait_for_completion(&priv_rx->add_ctx);
resync = &priv_rx->resync;
if (cancel_work_sync(&resync->work))
refcount_dec(&resync->refcnt);
wait_for_resync(netdev, resync);
mlx5e_ktls_priv_rx_put(priv_rx);
priv_rx->stats->tls_del++;
if (priv_rx->rule.rule)
......@@ -680,5 +672,9 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx)
mlx5_core_destroy_tir(mdev, priv_rx->tirn);
mlx5_ktls_destroy_key(mdev, priv_rx->key_id);
kfree(priv_rx);
/* priv_rx should normally be freed here, but if there is an outstanding
* GET_PSV, deallocation will be delayed until the CQE for GET_PSV is
* processed.
*/
mlx5e_ktls_priv_rx_put(priv_rx);
}
......@@ -525,7 +525,7 @@ static int mlx5e_get_coalesce(struct net_device *netdev,
#define MLX5E_MAX_COAL_FRAMES MLX5_MAX_CQ_COUNT
static void
mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
{
struct mlx5_core_dev *mdev = priv->mdev;
int tc;
......@@ -540,6 +540,17 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc
coal->tx_coalesce_usecs,
coal->tx_max_coalesced_frames);
}
}
}
static void
mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
{
struct mlx5_core_dev *mdev = priv->mdev;
int i;
for (i = 0; i < priv->channels.num; ++i) {
struct mlx5e_channel *c = priv->channels.c[i];
mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq,
coal->rx_coalesce_usecs,
......@@ -586,21 +597,9 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
tx_moder->pkts = coal->tx_max_coalesced_frames;
new_channels.params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce;
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
priv->channels.params = new_channels.params;
goto out;
}
/* we are opened */
reset_rx = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled;
reset_tx = !!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled;
if (!reset_rx && !reset_tx) {
mlx5e_set_priv_channels_coalesce(priv, coal);
priv->channels.params = new_channels.params;
goto out;
}
if (reset_rx) {
u8 mode = MLX5E_GET_PFLAG(&new_channels.params,
MLX5E_PFLAG_RX_CQE_BASED_MODER);
......@@ -614,6 +613,20 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
mlx5e_reset_tx_moderation(&new_channels.params, mode);
}
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
priv->channels.params = new_channels.params;
goto out;
}
if (!reset_rx && !reset_tx) {
if (!coal->use_adaptive_rx_coalesce)
mlx5e_set_priv_channels_rx_coalesce(priv, coal);
if (!coal->use_adaptive_tx_coalesce)
mlx5e_set_priv_channels_tx_coalesce(priv, coal);
priv->channels.params = new_channels.params;
goto out;
}
err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
out:
......
......@@ -65,6 +65,7 @@
#include "en/devlink.h"
#include "lib/mlx5.h"
#include "en/ptp.h"
#include "fpga/ipsec.h"
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
{
......@@ -106,7 +107,7 @@ bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
return false;
if (MLX5_IPSEC_DEV(mdev))
if (mlx5_fpga_is_ipsec_device(mdev))
return false;
if (params->xdp_prog) {
......@@ -914,7 +915,7 @@ void mlx5e_activate_rq(struct mlx5e_rq *rq)
void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
{
clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
synchronize_rcu(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */
synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */
}
void mlx5e_close_rq(struct mlx5e_rq *rq)
......@@ -1348,7 +1349,7 @@ void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
struct mlx5_wq_cyc *wq = &sq->wq;
clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
synchronize_rcu(); /* Sync with NAPI to prevent netif_tx_wake_queue. */
synchronize_net(); /* Sync with NAPI to prevent netif_tx_wake_queue. */
mlx5e_tx_disable_queue(sq->txq);
......@@ -1423,7 +1424,7 @@ void mlx5e_activate_icosq(struct mlx5e_icosq *icosq)
void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq)
{
clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
synchronize_rcu(); /* Sync with NAPI. */
synchronize_net(); /* Sync with NAPI. */
}
void mlx5e_close_icosq(struct mlx5e_icosq *sq)
......@@ -1502,7 +1503,7 @@ void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
struct mlx5e_channel *c = sq->channel;
clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
synchronize_rcu(); /* Sync with NAPI. */
synchronize_net(); /* Sync with NAPI. */
mlx5e_destroy_sq(c->mdev, sq->sqn);
mlx5e_free_xdpsq_descs(sq);
......@@ -1826,12 +1827,12 @@ static int mlx5e_open_queues(struct mlx5e_channel *c,
mlx5e_build_create_cq_param(&ccp, c);
err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->icosq.cqp, &ccp,
err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->async_icosq.cqp, &ccp,
&c->async_icosq.cq);
if (err)
return err;
err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->async_icosq.cqp, &ccp,
err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->icosq.cqp, &ccp,
&c->icosq.cq);
if (err)
goto err_close_async_icosq_cq;
......@@ -2069,7 +2070,7 @@ static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
int i;
#ifdef CONFIG_MLX5_EN_IPSEC
if (MLX5_IPSEC_DEV(mdev))
if (mlx5_fpga_is_ipsec_device(mdev))
byte_count += MLX5E_METADATA_ETHER_LEN;
#endif
......@@ -4455,8 +4456,9 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
return -EINVAL;
}
if (MLX5_IPSEC_DEV(priv->mdev)) {
netdev_warn(netdev, "can't set XDP with IPSec offload\n");
if (mlx5_fpga_is_ipsec_device(priv->mdev)) {
netdev_warn(netdev,
"XDP is not available on Innova cards with IPsec support\n");
return -EINVAL;
}
......
......@@ -1795,8 +1795,8 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool
rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe;
#ifdef CONFIG_MLX5_EN_IPSEC
if (MLX5_IPSEC_DEV(mdev)) {
netdev_err(netdev, "MPWQE RQ with IPSec offload not supported\n");
if (mlx5_fpga_is_ipsec_device(mdev)) {
netdev_err(netdev, "MPWQE RQ with Innova IPSec offload not supported\n");
return -EINVAL;
}
#endif
......
......@@ -5040,7 +5040,7 @@ static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
*/
if (rate) {
rate = (rate * BITS_PER_BYTE) + 500000;
rate_mbps = max_t(u32, do_div(rate, 1000000), 1);
rate_mbps = max_t(u64, do_div(rate, 1000000), 1);
}
err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
......
......@@ -124,7 +124,7 @@ struct mlx5_fpga_ipsec {
struct ida halloc;
};
static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev)
bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev)
{
if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga))
return false;
......
......@@ -43,6 +43,7 @@ u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
const struct mlx5_flow_cmds *
mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type);
void mlx5_fpga_ipsec_build_fs_cmds(void);
bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev);
#else
static inline
const struct mlx5_accel_ipsec_ops *mlx5_fpga_ipsec_ops(struct mlx5_core_dev *mdev)
......@@ -55,6 +56,7 @@ mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
}
static inline void mlx5_fpga_ipsec_build_fs_cmds(void) {};
static inline bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) { return false; }
#endif /* CONFIG_MLX5_FPGA_IPSEC */
#endif /* __MLX5_FPGA_IPSEC_H__ */
......@@ -190,6 +190,16 @@ static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
return true;
}
static void enter_error_state(struct mlx5_core_dev *dev, bool force)
{
if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
mlx5_cmd_flush(dev);
}
mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
}
void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
{
bool err_detected = false;
......@@ -208,12 +218,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
goto unlock;
}
if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
mlx5_cmd_flush(dev);
}
mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
enter_error_state(dev, force);
unlock:
mutex_unlock(&dev->intf_state_mutex);
}
......@@ -613,7 +618,7 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
priv = container_of(health, struct mlx5_priv, health);
dev = container_of(priv, struct mlx5_core_dev, priv);
mlx5_enter_error_state(dev, false);
enter_error_state(dev, false);
if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
if (mlx5_health_try_recover(dev))
mlx5_core_err(dev, "health recovery failed\n");
......@@ -707,8 +712,9 @@ static void poll_health(struct timer_list *t)
mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
dev->priv.health.fatal_error = fatal_error;
print_health_info(dev);
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
mlx5_trigger_health_work(dev);
goto out;
return;
}
count = ioread32be(health->health_counter);
......
......@@ -1396,6 +1396,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
pci_save_state(pdev);
if (!mlx5_core_is_mp_slave(dev))
devlink_reload_enable(devlink);
return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment