Commit 6c829efe authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'support-tunnel-mode-in-mlx5-ipsec-packet-offload'

Leon Romanovsky says:

====================
Support tunnel mode in mlx5 IPsec packet offload

This series extends mlx5 to support tunnel mode in its IPsec packet
offload implementation.

v0: https://lore.kernel.org/all/cover.1681106636.git.leonro@nvidia.com
====================

Link: https://lore.kernel.org/r/cover.1681388425.git.leonro@nvidia.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 99676a57 c941da23
......@@ -35,12 +35,14 @@
#include <crypto/aead.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <net/netevent.h>
#include "en.h"
#include "ipsec.h"
#include "ipsec_rxtx.h"
#define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000)
#define MLX5E_IPSEC_TUNNEL_SA XA_MARK_1
static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
{
......@@ -242,6 +244,57 @@ static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry,
attrs->lft.numb_rounds_soft = (u64)n;
}
static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry,
struct mlx5_accel_esp_xfrm_attrs *attrs)
{
struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
struct xfrm_state *x = sa_entry->x;
struct net_device *netdev;
struct neighbour *n;
u8 addr[ETH_ALEN];
if (attrs->mode != XFRM_MODE_TUNNEL ||
attrs->type != XFRM_DEV_OFFLOAD_PACKET)
return;
netdev = x->xso.real_dev;
mlx5_query_mac_address(mdev, addr);
switch (attrs->dir) {
case XFRM_DEV_OFFLOAD_IN:
ether_addr_copy(attrs->dmac, addr);
n = neigh_lookup(&arp_tbl, &attrs->saddr.a4, netdev);
if (!n) {
n = neigh_create(&arp_tbl, &attrs->saddr.a4, netdev);
if (IS_ERR(n))
return;
neigh_event_send(n, NULL);
attrs->drop = true;
break;
}
neigh_ha_snapshot(addr, n, netdev);
ether_addr_copy(attrs->smac, addr);
break;
case XFRM_DEV_OFFLOAD_OUT:
ether_addr_copy(attrs->smac, addr);
n = neigh_lookup(&arp_tbl, &attrs->daddr.a4, netdev);
if (!n) {
n = neigh_create(&arp_tbl, &attrs->daddr.a4, netdev);
if (IS_ERR(n))
return;
neigh_event_send(n, NULL);
attrs->drop = true;
break;
}
neigh_ha_snapshot(addr, n, netdev);
ether_addr_copy(attrs->dmac, addr);
break;
default:
return;
}
neigh_release(n);
}
void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
struct mlx5_accel_esp_xfrm_attrs *attrs)
{
......@@ -297,8 +350,10 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
attrs->upspec.sport = ntohs(x->sel.sport);
attrs->upspec.sport_mask = ntohs(x->sel.sport_mask);
attrs->upspec.proto = x->sel.proto;
attrs->mode = x->props.mode;
mlx5e_ipsec_init_limits(sa_entry, attrs);
mlx5e_ipsec_init_macs(sa_entry, attrs);
}
static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
......@@ -367,6 +422,11 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
return -EINVAL;
}
if (x->props.mode != XFRM_MODE_TRANSPORT && x->props.mode != XFRM_MODE_TUNNEL) {
NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded");
return -EINVAL;
}
switch (x->xso.type) {
case XFRM_DEV_OFFLOAD_CRYPTO:
if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO)) {
......@@ -374,11 +434,6 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
return -EINVAL;
}
if (x->props.mode != XFRM_MODE_TRANSPORT &&
x->props.mode != XFRM_MODE_TUNNEL) {
NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded");
return -EINVAL;
}
break;
case XFRM_DEV_OFFLOAD_PACKET:
if (!(mlx5_ipsec_device_caps(mdev) &
......@@ -387,8 +442,9 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
return -EINVAL;
}
if (x->props.mode != XFRM_MODE_TRANSPORT) {
NL_SET_ERR_MSG_MOD(extack, "Only transport xfrm states may be offloaded in packet mode");
if (x->props.mode == XFRM_MODE_TUNNEL &&
!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)) {
NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported for tunnel mode");
return -EINVAL;
}
......@@ -458,34 +514,81 @@ static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry)
sa_entry->set_iv_op = mlx5e_ipsec_set_iv;
}
static void mlx5e_ipsec_handle_netdev_event(struct work_struct *_work)
{
struct mlx5e_ipsec_work *work =
container_of(_work, struct mlx5e_ipsec_work, work);
struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
struct mlx5e_ipsec_netevent_data *data = work->data;
struct mlx5_accel_esp_xfrm_attrs *attrs;
attrs = &sa_entry->attrs;
switch (attrs->dir) {
case XFRM_DEV_OFFLOAD_IN:
ether_addr_copy(attrs->smac, data->addr);
break;
case XFRM_DEV_OFFLOAD_OUT:
ether_addr_copy(attrs->dmac, data->addr);
break;
default:
WARN_ON_ONCE(true);
}
attrs->drop = false;
mlx5e_accel_ipsec_fs_modify(sa_entry);
}
static int mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry *sa_entry)
{
struct xfrm_state *x = sa_entry->x;
struct mlx5e_ipsec_work *work;
void *data = NULL;
switch (x->xso.type) {
case XFRM_DEV_OFFLOAD_CRYPTO:
if (!(x->props.flags & XFRM_STATE_ESN))
return 0;
break;
case XFRM_DEV_OFFLOAD_PACKET:
if (x->props.mode != XFRM_MODE_TUNNEL)
return 0;
break;
default:
return 0;
break;
}
work = kzalloc(sizeof(*work), GFP_KERNEL);
if (!work)
return -ENOMEM;
work->data = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
if (!work->data) {
kfree(work);
return -ENOMEM;
switch (x->xso.type) {
case XFRM_DEV_OFFLOAD_CRYPTO:
data = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
if (!data)
goto free_work;
INIT_WORK(&work->work, mlx5e_ipsec_modify_state);
break;
case XFRM_DEV_OFFLOAD_PACKET:
data = kzalloc(sizeof(struct mlx5e_ipsec_netevent_data),
GFP_KERNEL);
if (!data)
goto free_work;
INIT_WORK(&work->work, mlx5e_ipsec_handle_netdev_event);
break;
default:
break;
}
INIT_WORK(&work->work, mlx5e_ipsec_modify_state);
work->data = data;
work->sa_entry = sa_entry;
sa_entry->work = work;
return 0;
free_work:
kfree(work);
return -ENOMEM;
}
static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry)
......@@ -566,6 +669,14 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
if (err)
goto err_hw_ctx;
if (x->props.mode == XFRM_MODE_TUNNEL &&
x->xso.type == XFRM_DEV_OFFLOAD_PACKET &&
!mlx5e_ipsec_fs_tunnel_enabled(sa_entry)) {
NL_SET_ERR_MSG_MOD(extack, "Packet offload tunnel mode is disabled due to encap settings");
err = -EINVAL;
goto err_add_rule;
}
/* We use *_bh() variant because xfrm_timer_handler(), which runs
* in softirq context, can reach our state delete logic and we need
* xa_erase_bh() there.
......@@ -580,6 +691,12 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
if (sa_entry->dwork)
queue_delayed_work(ipsec->wq, &sa_entry->dwork->dwork,
MLX5_IPSEC_RESCHED);
if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET &&
x->props.mode == XFRM_MODE_TUNNEL)
xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id,
MLX5E_IPSEC_TUNNEL_SA);
out:
x->xso.offload_handle = (unsigned long)sa_entry;
return 0;
......@@ -602,6 +719,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
static void mlx5e_xfrm_del_state(struct xfrm_state *x)
{
struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
struct mlx5e_ipsec_sa_entry *old;
......@@ -610,6 +728,12 @@ static void mlx5e_xfrm_del_state(struct xfrm_state *x)
old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id);
WARN_ON(old != sa_entry);
if (attrs->mode == XFRM_MODE_TUNNEL &&
attrs->type == XFRM_DEV_OFFLOAD_PACKET)
/* Make sure that no ARP requests are running in parallel */
flush_workqueue(ipsec->wq);
}
static void mlx5e_xfrm_free_state(struct xfrm_state *x)
......@@ -634,6 +758,46 @@ static void mlx5e_xfrm_free_state(struct xfrm_state *x)
kfree(sa_entry);
}
static int mlx5e_ipsec_netevent_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct mlx5_accel_esp_xfrm_attrs *attrs;
struct mlx5e_ipsec_netevent_data *data;
struct mlx5e_ipsec_sa_entry *sa_entry;
struct mlx5e_ipsec *ipsec;
struct neighbour *n = ptr;
struct net_device *netdev;
struct xfrm_state *x;
unsigned long idx;
if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID))
return NOTIFY_DONE;
ipsec = container_of(nb, struct mlx5e_ipsec, netevent_nb);
xa_for_each_marked(&ipsec->sadb, idx, sa_entry, MLX5E_IPSEC_TUNNEL_SA) {
attrs = &sa_entry->attrs;
if (attrs->family == AF_INET) {
if (!neigh_key_eq32(n, &attrs->saddr.a4) &&
!neigh_key_eq32(n, &attrs->daddr.a4))
continue;
} else {
if (!neigh_key_eq128(n, &attrs->saddr.a4) &&
!neigh_key_eq128(n, &attrs->daddr.a4))
continue;
}
x = sa_entry->x;
netdev = x->xso.real_dev;
data = sa_entry->work->data;
neigh_ha_snapshot(data->addr, n, netdev);
queue_work(ipsec->wq, &sa_entry->work->work);
}
return NOTIFY_DONE;
}
void mlx5e_ipsec_init(struct mlx5e_priv *priv)
{
struct mlx5e_ipsec *ipsec;
......@@ -662,6 +826,13 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv)
goto err_aso;
}
if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) {
ipsec->netevent_nb.notifier_call = mlx5e_ipsec_netevent_event;
ret = register_netevent_notifier(&ipsec->netevent_nb);
if (ret)
goto clear_aso;
}
ret = mlx5e_accel_ipsec_fs_init(ipsec);
if (ret)
goto err_fs_init;
......@@ -672,6 +843,9 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv)
return;
err_fs_init:
if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL)
unregister_netevent_notifier(&ipsec->netevent_nb);
clear_aso:
if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
mlx5e_ipsec_aso_cleanup(ipsec);
err_aso:
......@@ -690,6 +864,8 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
return;
mlx5e_accel_ipsec_fs_cleanup(ipsec);
if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL)
unregister_netevent_notifier(&ipsec->netevent_nb);
if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
mlx5e_ipsec_aso_cleanup(ipsec);
destroy_workqueue(ipsec->wq);
......
......@@ -77,7 +77,7 @@ struct mlx5_replay_esn {
struct mlx5_accel_esp_xfrm_attrs {
u32 spi;
u32 flags;
u32 mode;
struct aes_gcm_keymat aes_gcm;
union {
......@@ -99,6 +99,8 @@ struct mlx5_accel_esp_xfrm_attrs {
u32 authsize;
u32 reqid;
struct mlx5_ipsec_lft lft;
u8 smac[ETH_ALEN];
u8 dmac[ETH_ALEN];
};
enum mlx5_ipsec_cap {
......@@ -107,6 +109,7 @@ enum mlx5_ipsec_cap {
MLX5_IPSEC_CAP_PACKET_OFFLOAD = 1 << 2,
MLX5_IPSEC_CAP_ROCE = 1 << 3,
MLX5_IPSEC_CAP_PRIO = 1 << 4,
MLX5_IPSEC_CAP_TUNNEL = 1 << 5,
};
struct mlx5e_priv;
......@@ -141,6 +144,10 @@ struct mlx5e_ipsec_work {
void *data;
};
struct mlx5e_ipsec_netevent_data {
u8 addr[ETH_ALEN];
};
struct mlx5e_ipsec_dwork {
struct delayed_work dwork;
struct mlx5e_ipsec_sa_entry *sa_entry;
......@@ -166,6 +173,7 @@ struct mlx5e_ipsec {
struct mlx5e_ipsec_tx *tx;
struct mlx5e_ipsec_aso *aso;
struct notifier_block nb;
struct notifier_block netevent_nb;
struct mlx5_ipsec_fs *roce;
};
......@@ -243,6 +251,7 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry);
int mlx5e_accel_ipsec_fs_add_pol(struct mlx5e_ipsec_pol_entry *pol_entry);
void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry);
void mlx5e_accel_ipsec_fs_modify(struct mlx5e_ipsec_sa_entry *sa_entry);
bool mlx5e_ipsec_fs_tunnel_enabled(struct mlx5e_ipsec_sa_entry *sa_entry);
int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry);
void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry);
......
......@@ -48,6 +48,12 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) &&
MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level))
caps |= MLX5_IPSEC_CAP_PRIO;
if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev,
reformat_l2_to_l3_esp_tunnel) &&
MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
reformat_l3_esp_tunnel_to_l2))
caps |= MLX5_IPSEC_CAP_TUNNEL;
}
if (mlx5_get_roce_state(mdev) &&
......
......@@ -263,6 +263,7 @@ struct mlx5_esw_offload {
const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES];
u8 inline_mode;
atomic64_t num_flows;
u64 num_block_encap;
enum devlink_eswitch_encap_mode encap;
struct ida vport_metadata_ida;
unsigned int host_number; /* ECPF supports one external host */
......@@ -748,6 +749,9 @@ void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
struct mlx5_eswitch *slave_esw);
int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);
bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev);
void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev);
static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw)
{
if (mlx5_esw_allowed(esw))
......@@ -761,6 +765,7 @@ mlx5_eswitch_get_slow_fdb(struct mlx5_eswitch *esw)
{
return esw->fdb_table.offloads.slow_fdb;
}
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
......@@ -805,6 +810,15 @@ mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
{
return 0;
}
static inline bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev)
{
return true;
}
static inline void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev)
{
}
#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_ESWITCH_H__ */
......@@ -3586,6 +3586,47 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
return err;
}
bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev)
{
struct devlink *devlink = priv_to_devlink(dev);
struct mlx5_eswitch *esw;
devl_lock(devlink);
esw = mlx5_devlink_eswitch_get(devlink);
if (IS_ERR(esw)) {
devl_unlock(devlink);
/* Failure means no eswitch => not possible to change encap */
return true;
}
down_write(&esw->mode_lock);
if (esw->mode != MLX5_ESWITCH_LEGACY &&
esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
up_write(&esw->mode_lock);
devl_unlock(devlink);
return false;
}
esw->offloads.num_block_encap++;
up_write(&esw->mode_lock);
devl_unlock(devlink);
return true;
}
void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev)
{
struct devlink *devlink = priv_to_devlink(dev);
struct mlx5_eswitch *esw;
esw = mlx5_devlink_eswitch_get(devlink);
if (IS_ERR(esw))
return;
down_write(&esw->mode_lock);
esw->offloads.num_block_encap--;
up_write(&esw->mode_lock);
}
int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
enum devlink_eswitch_encap_mode encap,
struct netlink_ext_ack *extack)
......@@ -3627,6 +3668,13 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
goto unlock;
}
if (esw->offloads.num_block_encap) {
NL_SET_ERR_MSG_MOD(extack,
"Can't set encapsulation when IPsec SA and/or policies are configured");
err = -EOPNOTSUPP;
goto unlock;
}
esw_destroy_offloads_fdb_tables(esw);
esw->offloads.encap = encap;
......
......@@ -463,9 +463,11 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 max_ft_level[0x8];
u8 reformat_add_esp_trasport[0x1];
u8 reserved_at_41[0x2];
u8 reformat_l2_to_l3_esp_tunnel[0x1];
u8 reserved_at_42[0x1];
u8 reformat_del_esp_trasport[0x1];
u8 reserved_at_44[0x2];
u8 reformat_l3_esp_tunnel_to_l2[0x1];
u8 reserved_at_45[0x1];
u8 execute_aso[0x1];
u8 reserved_at_47[0x19];
......@@ -6630,7 +6632,9 @@ enum mlx5_reformat_ctx_type {
MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x3,
MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4,
MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4 = 0x5,
MLX5_REFORMAT_TYPE_L2_TO_L3_ESP_TUNNEL = 0x6,
MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT = 0x8,
MLX5_REFORMAT_TYPE_L3_ESP_TUNNEL_TO_L2 = 0x9,
MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6 = 0xb,
MLX5_REFORMAT_TYPE_INSERT_HDR = 0xf,
MLX5_REFORMAT_TYPE_REMOVE_HDR = 0x10,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment