Commit de4cc8bd authored by David S. Miller's avatar David S. Miller

Merge branch 'mlxsw-Handle-changes-in-GRE-configuration'

Jiri Pirko says:

====================
mlxsw: Handle changes in GRE configuration

Petr says:

Until now, when an IP tunnel was offloaded by the mlxsw driver, the
offload was pretty much static, and changes in Linux configuration were
not reflected in the hardware. That led to discrepancies between traffic
flows in slow path and fast path. The work-around used to be to remove
all routes that forward to the netdevice and re-add them. This is
clearly suboptimal, but actually, as of the decap-only patchset, it's
not even enough anymore, and one needs to go all the way and simply drop
the tunnel and recreate it correctly.

With this patchset, the NETDEV_CHANGE events that are generated for
changes of up'd tunnel netdevices are captured and interpreted to
correctly reconfigure the HW in accordance with changes requested at the
software layer. In addition, NETDEV_CHANGEUPPER, NETDEV_UP and
NETDEV_DOWN are now handled not only for tunnel devices themselves, but
also for their bound devices. Each change is then translated to one or
more of the following updates to the HW configuration:

- refresh of offload of local route that corresponds to tunnel's local
  address
- refresh of the loopback RIF
- refresh of offloads of routes that forward to the changed tunnel
- removal of tunnel offloads

These tools are used to implement the following configuration changes:

- addition of a new offloadable tunnel with local address that conflicts
  with that of an already-offloaded tunnel (the existing tunnel is
  onloaded, the new one isn't offloaded)
- changes to TTL, TOS that make tunnel unsuitable for offloading
- changes to ikey, okey, remote
- changes to local, which when they cause conflict with another
  tunnel, lead to onloading of both newly-conflicting tunnels
- migration of a bound device of an offloaded tunnel device to a
  different VRF
- changes to what device is bound to a tunnel device (i.e. like what
  "ip tunnel change name g dev another" does)
- changes to up / down state of a bound device. A down bound device
  doesn't forward encapsulated traffic anymore, but decap still works.

This patchset starts with a suite of patches that adapt the existing
code base step by step to facilitate introduction of the offloading
code. The five substantial patches at the end then implement the changes
mentioned above.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 6ee79b6e 44b0fff1
......@@ -4542,8 +4542,12 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
int err = 0;
mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb);
if (mlxsw_sp_netdev_is_ipip(mlxsw_sp, dev))
err = mlxsw_sp_netdevice_ipip_event(mlxsw_sp, dev, event, ptr);
if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
event, ptr);
else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev))
err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev,
event, ptr);
else if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU)
err = mlxsw_sp_netdevice_router_port_event(dev);
else if (mlxsw_sp_is_vrf_event(event, ptr))
......
......@@ -396,13 +396,19 @@ int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
unsigned long event, void *ptr);
int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
struct netdev_notifier_changeupper_info *info);
bool mlxsw_sp_netdev_is_ipip(const struct mlxsw_sp *mlxsw_sp,
bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *dev);
bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *dev);
int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *l3_dev,
unsigned long event,
struct netdev_notifier_info *info);
int
mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *l3_dev,
unsigned long event,
struct netdev_notifier_changeupper_info *info);
struct netdev_notifier_info *info);
void
mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
......
......@@ -36,36 +36,123 @@
#include "spectrum_ipip.h"
static bool
mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev)
struct ip_tunnel_parm
mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev)
{
struct ip_tunnel *tun = netdev_priv(ol_dev);
return !!(tun->parms.i_flags & TUNNEL_KEY);
return tun->parms;
}
static bool
mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev)
static bool mlxsw_sp_ipip_parms_has_ikey(struct ip_tunnel_parm parms)
{
struct ip_tunnel *tun = netdev_priv(ol_dev);
return !!(parms.i_flags & TUNNEL_KEY);
}
return !!(tun->parms.o_flags & TUNNEL_KEY);
static bool mlxsw_sp_ipip_parms_has_okey(struct ip_tunnel_parm parms)
{
return !!(parms.o_flags & TUNNEL_KEY);
}
static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev)
static u32 mlxsw_sp_ipip_parms_ikey(struct ip_tunnel_parm parms)
{
struct ip_tunnel *tun = netdev_priv(ol_dev);
return mlxsw_sp_ipip_parms_has_ikey(parms) ?
be32_to_cpu(parms.i_key) : 0;
}
static u32 mlxsw_sp_ipip_parms_okey(struct ip_tunnel_parm parms)
{
return mlxsw_sp_ipip_parms_has_okey(parms) ?
be32_to_cpu(parms.o_key) : 0;
}
static __be32 mlxsw_sp_ipip_parms_saddr4(struct ip_tunnel_parm parms)
{
return parms.iph.saddr;
}
static union mlxsw_sp_l3addr
mlxsw_sp_ipip_parms_saddr(enum mlxsw_sp_l3proto proto,
struct ip_tunnel_parm parms)
{
switch (proto) {
case MLXSW_SP_L3_PROTO_IPV4:
return (union mlxsw_sp_l3addr) {
.addr4 = mlxsw_sp_ipip_parms_saddr4(parms),
};
case MLXSW_SP_L3_PROTO_IPV6:
break;
}
WARN_ON(1);
return (union mlxsw_sp_l3addr) {
.addr4 = 0,
};
}
static __be32 mlxsw_sp_ipip_parms_daddr4(struct ip_tunnel_parm parms)
{
return parms.iph.daddr;
}
static union mlxsw_sp_l3addr
mlxsw_sp_ipip_parms_daddr(enum mlxsw_sp_l3proto proto,
struct ip_tunnel_parm parms)
{
switch (proto) {
case MLXSW_SP_L3_PROTO_IPV4:
return (union mlxsw_sp_l3addr) {
.addr4 = mlxsw_sp_ipip_parms_daddr4(parms),
};
case MLXSW_SP_L3_PROTO_IPV6:
break;
}
WARN_ON(1);
return (union mlxsw_sp_l3addr) {
.addr4 = 0,
};
}
static bool mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev)
{
return mlxsw_sp_ipip_parms_has_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev));
}
return mlxsw_sp_ipip_netdev_has_ikey(ol_dev) ?
be32_to_cpu(tun->parms.i_key) : 0;
static bool mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev)
{
return mlxsw_sp_ipip_parms_has_okey(mlxsw_sp_ipip_netdev_parms(ol_dev));
}
static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev)
{
return mlxsw_sp_ipip_parms_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev));
}
static u32 mlxsw_sp_ipip_netdev_okey(const struct net_device *ol_dev)
{
struct ip_tunnel *tun = netdev_priv(ol_dev);
return mlxsw_sp_ipip_parms_okey(mlxsw_sp_ipip_netdev_parms(ol_dev));
}
return mlxsw_sp_ipip_netdev_has_okey(ol_dev) ?
be32_to_cpu(tun->parms.o_key) : 0;
union mlxsw_sp_l3addr
mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev)
{
return mlxsw_sp_ipip_parms_saddr(proto,
mlxsw_sp_ipip_netdev_parms(ol_dev));
}
static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev)
{
return mlxsw_sp_ipip_parms_daddr4(mlxsw_sp_ipip_netdev_parms(ol_dev));
}
static union mlxsw_sp_l3addr
mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev)
{
return mlxsw_sp_ipip_parms_daddr(proto,
mlxsw_sp_ipip_netdev_parms(ol_dev));
}
static int
......@@ -200,6 +287,73 @@ mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp,
};
}
static int
mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_ipip_entry *ipip_entry,
struct netlink_ext_ack *extack)
{
union mlxsw_sp_l3addr old_saddr, new_saddr;
union mlxsw_sp_l3addr old_daddr, new_daddr;
struct ip_tunnel_parm new_parms;
bool update_tunnel = false;
bool update_decap = false;
bool update_nhs = false;
int err = 0;
new_parms = mlxsw_sp_ipip_netdev_parms(ipip_entry->ol_dev);
new_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4,
new_parms);
old_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4,
ipip_entry->parms);
new_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4,
new_parms);
old_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4,
ipip_entry->parms);
if (!mlxsw_sp_l3addr_eq(&new_saddr, &old_saddr)) {
u16 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
/* Since the local address has changed, if there is another
* tunnel with a matching saddr, both need to be demoted.
*/
if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp,
MLXSW_SP_L3_PROTO_IPV4,
new_saddr, ul_tb_id,
ipip_entry)) {
mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
return 0;
}
update_tunnel = true;
} else if ((mlxsw_sp_ipip_parms_okey(ipip_entry->parms) !=
mlxsw_sp_ipip_parms_okey(new_parms)) ||
ipip_entry->parms.link != new_parms.link) {
update_tunnel = true;
} else if (!mlxsw_sp_l3addr_eq(&new_daddr, &old_daddr)) {
update_nhs = true;
} else if (mlxsw_sp_ipip_parms_ikey(ipip_entry->parms) !=
mlxsw_sp_ipip_parms_ikey(new_parms)) {
update_decap = true;
}
if (update_tunnel)
err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
true, true, true,
extack);
else if (update_nhs)
err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
false, false, true,
extack);
else if (update_decap)
err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
false, false, false,
extack);
ipip_entry->parms = new_parms;
return err;
}
static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = {
.dev_type = ARPHRD_IPGRE,
.ul_proto = MLXSW_SP_L3_PROTO_IPV4,
......@@ -207,6 +361,7 @@ static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = {
.fib_entry_op = mlxsw_sp_ipip_fib_entry_op_gre4,
.can_offload = mlxsw_sp_ipip_can_offload_gre4,
.ol_loopback_config = mlxsw_sp_ipip_ol_loopback_config_gre4,
.ol_netdev_change = mlxsw_sp_ipip_ol_netdev_change_gre4,
};
const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[] = {
......
......@@ -38,6 +38,13 @@
#include "spectrum_router.h"
#include <net/ip_fib.h>
struct ip_tunnel_parm
mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev);
union mlxsw_sp_l3addr
mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev);
enum mlxsw_sp_ipip_type {
MLXSW_SP_IPIP_TYPE_GRE4,
MLXSW_SP_IPIP_TYPE_MAX,
......@@ -49,6 +56,7 @@ struct mlxsw_sp_ipip_entry {
struct mlxsw_sp_rif_ipip_lb *ol_lb;
struct mlxsw_sp_fib_entry *decap_fib_entry;
struct list_head ipip_list_node;
struct ip_tunnel_parm parms;
};
struct mlxsw_sp_ipip_ops {
......@@ -71,6 +79,10 @@ struct mlxsw_sp_ipip_ops {
struct mlxsw_sp_ipip_entry *ipip_entry,
enum mlxsw_reg_ralue_op op,
u32 tunnel_index);
int (*ol_netdev_change)(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_ipip_entry *ipip_entry,
struct netlink_ext_ack *extack);
};
extern const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[];
......
......@@ -943,7 +943,7 @@ __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
return __dev_get_by_index(net, tun->parms.link);
}
static u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
{
struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
......@@ -961,7 +961,8 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
static struct mlxsw_sp_rif_ipip_lb *
mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
enum mlxsw_sp_ipip_type ipipt,
struct net_device *ol_dev)
struct net_device *ol_dev,
struct netlink_ext_ack *extack)
{
struct mlxsw_sp_rif_params_ipip_lb lb_params;
const struct mlxsw_sp_ipip_ops *ipip_ops;
......@@ -974,7 +975,7 @@ mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
};
rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, NULL);
rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
if (IS_ERR(rif))
return ERR_CAST(rif);
return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
......@@ -993,7 +994,7 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
return ERR_PTR(-ENOMEM);
ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
ol_dev);
ol_dev, NULL);
if (IS_ERR(ipip_entry->ol_lb)) {
ret = ERR_CAST(ipip_entry->ol_lb);
goto err_ol_ipip_lb_create;
......@@ -1001,6 +1002,7 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
ipip_entry->ipipt = ipipt;
ipip_entry->ol_dev = ol_dev;
ipip_entry->parms = mlxsw_sp_ipip_netdev_parms(ol_dev);
return ipip_entry;
......@@ -1016,65 +1018,6 @@ mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
kfree(ipip_entry);
}
static __be32
mlxsw_sp_ipip_netdev_saddr4(const struct net_device *ol_dev)
{
struct ip_tunnel *tun = netdev_priv(ol_dev);
return tun->parms.iph.saddr;
}
union mlxsw_sp_l3addr
mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev)
{
switch (proto) {
case MLXSW_SP_L3_PROTO_IPV4:
return (union mlxsw_sp_l3addr) {
.addr4 = mlxsw_sp_ipip_netdev_saddr4(ol_dev),
};
case MLXSW_SP_L3_PROTO_IPV6:
break;
};
WARN_ON(1);
return (union mlxsw_sp_l3addr) {
.addr4 = 0,
};
}
__be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev)
{
struct ip_tunnel *tun = netdev_priv(ol_dev);
return tun->parms.iph.daddr;
}
union mlxsw_sp_l3addr
mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev)
{
switch (proto) {
case MLXSW_SP_L3_PROTO_IPV4:
return (union mlxsw_sp_l3addr) {
.addr4 = mlxsw_sp_ipip_netdev_daddr4(ol_dev),
};
case MLXSW_SP_L3_PROTO_IPV6:
break;
};
WARN_ON(1);
return (union mlxsw_sp_l3addr) {
.addr4 = 0,
};
}
static bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1,
const union mlxsw_sp_l3addr *addr2)
{
return !memcmp(addr1, addr2, sizeof(*addr1));
}
static bool
mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
const enum mlxsw_sp_l3proto ul_proto,
......@@ -1211,24 +1154,7 @@ mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
enum mlxsw_sp_ipip_type ipipt,
struct net_device *ol_dev)
{
u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
struct mlxsw_sp_router *router = mlxsw_sp->router;
struct mlxsw_sp_ipip_entry *ipip_entry;
enum mlxsw_sp_l3proto ul_proto;
union mlxsw_sp_l3addr saddr;
/* The configuration where several tunnels have the same local address
* in the same underlay table needs special treatment in the HW. That is
* currently not implemented in the driver.
*/
list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
ipip_list_node) {
ul_proto = router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
if (mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
ul_tb_id, ipip_entry))
return ERR_PTR(-EEXIST);
}
ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
if (IS_ERR(ipip_entry))
......@@ -1306,7 +1232,7 @@ static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
return false;
}
bool mlxsw_sp_netdev_is_ipip(const struct mlxsw_sp *mlxsw_sp,
bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *dev)
{
return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
......@@ -1326,28 +1252,75 @@ mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
return NULL;
}
static int mlxsw_sp_netdevice_ipip_reg_event(struct mlxsw_sp *mlxsw_sp,
static struct mlxsw_sp_ipip_entry *
mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *ul_dev,
struct mlxsw_sp_ipip_entry *start)
{
struct mlxsw_sp_ipip_entry *ipip_entry;
ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
ipip_list_node);
list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
ipip_list_node) {
struct net_device *ipip_ul_dev =
__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
if (ipip_ul_dev == ul_dev)
return ipip_entry;
}
return NULL;
}
bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *dev)
{
return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
}
static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
const struct net_device *ol_dev,
enum mlxsw_sp_ipip_type ipipt)
{
const struct mlxsw_sp_ipip_ops *ops
= mlxsw_sp->router->ipip_ops_arr[ipipt];
/* For deciding whether decap should be offloaded, we don't care about
* overlay protocol, so ask whether either one is supported.
*/
return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
}
static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *ol_dev)
{
struct mlxsw_sp_router *router = mlxsw_sp->router;
struct mlxsw_sp_ipip_entry *ipip_entry;
enum mlxsw_sp_l3proto ul_proto;
enum mlxsw_sp_ipip_type ipipt;
union mlxsw_sp_l3addr saddr;
u32 ul_tb_id;
mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
if (router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, ol_dev,
MLXSW_SP_L3_PROTO_IPV4) ||
router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, ol_dev,
MLXSW_SP_L3_PROTO_IPV6)) {
if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
saddr, ul_tb_id,
NULL)) {
ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
ol_dev);
if (IS_ERR(ipip_entry))
return PTR_ERR(ipip_entry);
}
}
return 0;
}
static void mlxsw_sp_netdevice_ipip_unreg_event(struct mlxsw_sp *mlxsw_sp,
static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *ol_dev)
{
struct mlxsw_sp_ipip_entry *ipip_entry;
......@@ -1357,98 +1330,338 @@ static void mlxsw_sp_netdevice_ipip_unreg_event(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
}
static int mlxsw_sp_netdevice_ipip_up_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *ol_dev)
static void
mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_ipip_entry *ipip_entry)
{
struct mlxsw_sp_fib_entry *decap_fib_entry;
struct mlxsw_sp_ipip_entry *ipip_entry;
ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
if (ipip_entry) {
decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp,
ipip_entry);
decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
if (decap_fib_entry)
mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
decap_fib_entry);
}
return 0;
}
static void mlxsw_sp_netdevice_ipip_down_event(struct mlxsw_sp *mlxsw_sp,
static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *ol_dev)
{
struct mlxsw_sp_ipip_entry *ipip_entry;
ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
if (ipip_entry && ipip_entry->decap_fib_entry)
if (ipip_entry)
mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
}
static void
mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_ipip_entry *ipip_entry)
{
if (ipip_entry->decap_fib_entry)
mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
}
static int mlxsw_sp_netdevice_ipip_vrf_event(struct mlxsw_sp *mlxsw_sp,
static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *ol_dev)
{
struct mlxsw_sp_fib_entry *decap_fib_entry;
struct mlxsw_sp_ipip_entry *ipip_entry;
struct mlxsw_sp_rif_ipip_lb *lb_rif;
ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
if (!ipip_entry)
if (ipip_entry)
mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
}
static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_rif *rif);
static int
mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_ipip_entry *ipip_entry,
bool keep_encap,
struct netlink_ext_ack *extack)
{
struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
ipip_entry->ipipt,
ipip_entry->ol_dev,
extack);
if (IS_ERR(new_lb_rif))
return PTR_ERR(new_lb_rif);
ipip_entry->ol_lb = new_lb_rif;
if (keep_encap) {
list_splice_init(&old_lb_rif->common.nexthop_list,
&new_lb_rif->common.nexthop_list);
mlxsw_sp_nexthop_rif_update(mlxsw_sp, &new_lb_rif->common);
}
mlxsw_sp_rif_destroy(&old_lb_rif->common);
return 0;
}
/**
* Update the offload related to an IPIP entry. This always updates decap, and
* in addition to that it also:
* @recreate_loopback: recreates the associated loopback RIF
* @keep_encap: updates next hops that use the tunnel netdevice. This is only
* relevant when recreate_loopback is true.
* @update_nexthops: updates next hops, keeping the current loopback RIF. This
* is only relevant when recreate_loopback is false.
*/
int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_ipip_entry *ipip_entry,
bool recreate_loopback,
bool keep_encap,
bool update_nexthops,
struct netlink_ext_ack *extack)
{
int err;
/* When a tunneling device is moved to a different VRF, we need to
* update the backing loopback. Since RIFs can't be edited, we need to
* destroy and recreate it. That might create a window of opportunity
* where RALUE and RATR registers end up referencing a RIF that's
* already gone. RATRs are handled by the RIF destroy, and to take care
/* RIFs can't be edited, so to update loopback, we need to destroy and
* recreate it. That creates a window of opportunity where RALUE and
* RATR registers end up referencing a RIF that's already gone. RATRs
* are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
* of RALUE, demote the decap route back.
*/
if (ipip_entry->decap_fib_entry)
mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipip_entry->ipipt,
ol_dev);
if (IS_ERR(lb_rif))
return PTR_ERR(lb_rif);
mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
ipip_entry->ol_lb = lb_rif;
if (ol_dev->flags & IFF_UP) {
decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp,
ipip_entry);
if (decap_fib_entry)
mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
decap_fib_entry);
if (recreate_loopback) {
err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
keep_encap, extack);
if (err)
return err;
} else if (update_nexthops) {
mlxsw_sp_nexthop_rif_update(mlxsw_sp,
&ipip_entry->ol_lb->common);
}
if (ipip_entry->ol_dev->flags & IFF_UP)
mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
return 0;
}
static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *ol_dev,
struct netlink_ext_ack *extack)
{
struct mlxsw_sp_ipip_entry *ipip_entry =
mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
if (!ipip_entry)
return 0;
return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
true, false, false, extack);
}
int mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp,
static int
mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_ipip_entry *ipip_entry,
struct net_device *ul_dev,
struct netlink_ext_ack *extack)
{
return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
true, true, false, extack);
}
static int
mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_ipip_entry *ipip_entry,
struct net_device *ul_dev)
{
return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
false, false, true, NULL);
}
static int
mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_ipip_entry *ipip_entry,
struct net_device *ul_dev)
{
/* A down underlay device causes encapsulated packets to not be
* forwarded, but decap still works. So refresh next hops without
* touching anything else.
*/
return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
false, false, true, NULL);
}
static int
mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *ol_dev,
struct netlink_ext_ack *extack)
{
const struct mlxsw_sp_ipip_ops *ipip_ops;
struct mlxsw_sp_ipip_entry *ipip_entry;
int err;
ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
if (!ipip_entry)
/* A change might make a tunnel eligible for offloading, but
* that is currently not implemented. What falls to slow path
* stays there.
*/
return 0;
/* A change might make a tunnel not eligible for offloading. */
if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
ipip_entry->ipipt)) {
mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
return 0;
}
ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
return err;
}
void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_ipip_entry *ipip_entry)
{
struct net_device *ol_dev = ipip_entry->ol_dev;
if (ol_dev->flags & IFF_UP)
mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
}
/* The configuration where several tunnels have the same local address in the
* same underlay table needs special treatment in the HW. That is currently not
* implemented in the driver. This function finds and demotes the first tunnel
* with a given source address, except the one passed in in the argument
* `except'.
*/
bool
mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
enum mlxsw_sp_l3proto ul_proto,
union mlxsw_sp_l3addr saddr,
u32 ul_tb_id,
const struct mlxsw_sp_ipip_entry *except)
{
struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
ipip_list_node) {
if (ipip_entry != except &&
mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
ul_tb_id, ipip_entry)) {
mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
return true;
}
}
return false;
}
static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
struct net_device *ul_dev)
{
struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
ipip_list_node) {
struct net_device *ipip_ul_dev =
__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
if (ipip_ul_dev == ul_dev)
mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
}
}
int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *ol_dev,
unsigned long event,
struct netdev_notifier_changeupper_info *info)
struct netdev_notifier_info *info)
{
struct netdev_notifier_changeupper_info *chup;
struct netlink_ext_ack *extack;
switch (event) {
case NETDEV_REGISTER:
return mlxsw_sp_netdevice_ipip_reg_event(mlxsw_sp, ol_dev);
return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
case NETDEV_UNREGISTER:
mlxsw_sp_netdevice_ipip_unreg_event(mlxsw_sp, ol_dev);
mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
return 0;
case NETDEV_UP:
return mlxsw_sp_netdevice_ipip_up_event(mlxsw_sp, ol_dev);
mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
return 0;
case NETDEV_DOWN:
mlxsw_sp_netdevice_ipip_down_event(mlxsw_sp, ol_dev);
mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
return 0;
case NETDEV_CHANGEUPPER:
if (netif_is_l3_master(info->upper_dev))
return mlxsw_sp_netdevice_ipip_vrf_event(mlxsw_sp,
ol_dev);
chup = container_of(info, typeof(*chup), info);
extack = info->extack;
if (netif_is_l3_master(chup->upper_dev))
return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
ol_dev,
extack);
return 0;
case NETDEV_CHANGE:
extack = info->extack;
return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
ol_dev, extack);
}
return 0;
}
static int
__mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_ipip_entry *ipip_entry,
struct net_device *ul_dev,
unsigned long event,
struct netdev_notifier_info *info)
{
struct netdev_notifier_changeupper_info *chup;
struct netlink_ext_ack *extack;
switch (event) {
case NETDEV_CHANGEUPPER:
chup = container_of(info, typeof(*chup), info);
extack = info->extack;
if (netif_is_l3_master(chup->upper_dev))
return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
ipip_entry,
ul_dev,
extack);
break;
case NETDEV_UP:
return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
ul_dev);
case NETDEV_DOWN:
return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
ipip_entry,
ul_dev);
}
return 0;
}
int
mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *ul_dev,
unsigned long event,
struct netdev_notifier_info *info)
{
struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
int err;
while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
ul_dev,
ipip_entry))) {
err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
ul_dev, event, info);
if (err) {
mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
ul_dev);
return err;
}
}
return 0;
}
struct mlxsw_sp_neigh_key {
struct neighbour *n;
};
......@@ -3114,10 +3327,19 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
neigh_release(n);
}
static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
{
struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
return ul_dev ? (ul_dev->flags & IFF_UP) : true;
}
static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh,
struct net_device *ol_dev)
{
bool removing;
if (!nh->nh_grp->gateway || nh->ipip_entry)
return 0;
......@@ -3125,7 +3347,8 @@ static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
if (!nh->ipip_entry)
return -ENOENT;
__mlxsw_sp_nexthop_neigh_update(nh, false);
removing = !mlxsw_sp_ipip_netdev_ul_up(ol_dev);
__mlxsw_sp_nexthop_neigh_update(nh, removing);
return 0;
}
......@@ -3289,6 +3512,30 @@ static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
}
static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_rif *rif)
{
struct mlxsw_sp_nexthop *nh;
bool removing;
list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
switch (nh->type) {
case MLXSW_SP_NEXTHOP_TYPE_ETH:
removing = false;
break;
case MLXSW_SP_NEXTHOP_TYPE_IPIP:
removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
break;
default:
WARN_ON(1);
continue;
}
__mlxsw_sp_nexthop_neigh_update(nh, removing);
mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
}
}
static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_rif *rif)
{
......
......@@ -63,12 +63,14 @@ enum mlxsw_sp_rif_counter_dir {
struct mlxsw_sp_neigh_entry;
struct mlxsw_sp_nexthop;
struct mlxsw_sp_ipip_entry;
struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
u16 rif_index);
u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif);
u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif);
u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif);
u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev);
int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif);
u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp);
const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif);
......@@ -103,13 +105,20 @@ mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_neigh_entry *neigh_entry,
bool adding);
bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry);
union mlxsw_sp_l3addr
mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev);
union mlxsw_sp_l3addr
mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev);
__be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev);
int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_ipip_entry *ipip_entry,
bool recreate_loopback,
bool keep_encap,
bool update_nexthops,
struct netlink_ext_ack *extack);
void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_ipip_entry *ipip_entry);
bool
mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
enum mlxsw_sp_l3proto ul_proto,
union mlxsw_sp_l3addr saddr,
u32 ul_tb_id,
const struct mlxsw_sp_ipip_entry *except);
struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
struct mlxsw_sp_nexthop *nh);
bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh);
......@@ -130,4 +139,10 @@ void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh);
static inline bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1,
const union mlxsw_sp_l3addr *addr2)
{
return !memcmp(addr1, addr2, sizeof(*addr1));
}
#endif /* _MLXSW_ROUTER_H_*/
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment