Commit ccfdf21b authored by David S. Miller's avatar David S. Miller

Merge branch 'mlxsw-Offloading-GRE-tunnels'

Jiri Pirko says:

====================
mlxsw: Offloading GRE tunnels

Petr says:

This patch series introduces to mlxsw driver support for offloading
IP-in-IP tunnels in general, and for (subset of) GRE in particular.

This patchset supports two ways of configuring GRE:

- So called "hierarchical configuration", where the GRE device has a bound
  dummy device, which is in a different VRF. The VRF with host traffic is
  called "overlay", the one with encapsulated traffic is called "underlay".

- So called "flat configuration", where the GRE device doesn't have a bound
  device, and overlay and underlay are both in the same VRF (possibly the
  default one).

Two routes are then interesting: a route that directs traffic to a GRE
device (which would typically be in overlay VRF, but could be in another
one), and a local route for the tunnel's local address (in underlay).
Handling of these two route types is then introduced as patches to support,
respectively, IPv4 and IPv6 encapsulation and IPv4 decapsulation.

The encap and decap routes then reference a loopback device, a new type of
RIF introduced by this patchset for the specific use of offloading tunnels.

The encap and decap code is abstract with respect to the particulars of
individual L3 tunnel types. This patchset introduces support for GRE
tunnels in particular.

Limitations:

- Each tunnel needs to have a different local address (within a given VRF).
  When two tunnels are used that are in conflict, FIB abort is triggered
  and the driver ceases offloading FIBs. Full handling of such
  configurations needs special setup in the hardware, such that the tunnels
  that share an address are dispatched correctly according to their key (or
  lack thereof). That's currently not implemented, and to keep things
  deterministic, the driver triggers FIB abort.

- A next hop that uses an incompletely-specified tunnel (e.g. such that are
  used for LWT) is not offloaded, but doesn't trigger FIB abort like the
  above. If such routes end up being in a de facto conflict with other
  tunnels, then if there already is an offload for that address, the
  traffic for the conflicting tunnel will end up mismatching the
  configuration of the offloaded tunnel, and thus gets to slow path through
  an error trap.

- GRE checksumming and sequence numbers are not supported and TTL and TOS
  need to be set to inherit. Tunnels with a different configuration are not
  offloaded and their traffic is trapping to slow path.

  Note in particular that TOS of inherit is not the default configuration
  and needs to be explicitly specified when the tunnel is created.

- The only feature that is not graciously handled is that if a change is
  made to the tunnel, e.g. through "ip tunnel change", such changes are not
  reflected in the driver. There is currently no notification mechanism for
  these changes. Introduction of this mechanism and its leverage in the
  driver will be subject of follow-up work. For now this limitation can be
  worked around by removing and re-adding the encap route.

---
v1->v2:
-fix order of patch 5
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 45f79291 ee954d1a
......@@ -16,8 +16,8 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \
spectrum_switchdev.o spectrum_router.o \
spectrum_kvdl.o spectrum_acl_tcam.o \
spectrum_acl.o spectrum_flower.o \
spectrum_cnt.o \
spectrum_fid.o
spectrum_cnt.o spectrum_fid.o \
spectrum_ipip.o
mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o
mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o
......
This diff is collapsed.
......@@ -3400,6 +3400,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
MLXSW_SP_RXL_MARK(HOST_MISS_IPV6, TRAP_TO_CPU, HOST_MISS, false),
MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false),
MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false),
MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false),
/* PKT Sample trap */
MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU,
false, SP_IP2ME, DISCARD),
......
......@@ -77,6 +77,7 @@ enum mlxsw_sp_rif_type {
MLXSW_SP_RIF_TYPE_SUBPORT,
MLXSW_SP_RIF_TYPE_VLAN,
MLXSW_SP_RIF_TYPE_FID,
MLXSW_SP_RIF_TYPE_IPIP_LB, /* IP-in-IP loopback. */
MLXSW_SP_RIF_TYPE_MAX,
};
......
/*
* drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
* Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* Alternatively, this software may be distributed under the terms of the
* GNU General Public License ("GPL") version 2 as published by the Free
* Software Foundation.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <net/ip_tunnels.h>
#include "spectrum_ipip.h"
static bool
mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev)
{
struct ip_tunnel *tun = netdev_priv(ol_dev);
return !!(tun->parms.i_flags & TUNNEL_KEY);
}
static bool
mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev)
{
struct ip_tunnel *tun = netdev_priv(ol_dev);
return !!(tun->parms.o_flags & TUNNEL_KEY);
}
static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev)
{
struct ip_tunnel *tun = netdev_priv(ol_dev);
return mlxsw_sp_ipip_netdev_has_ikey(ol_dev) ?
be32_to_cpu(tun->parms.i_key) : 0;
}
static u32 mlxsw_sp_ipip_netdev_okey(const struct net_device *ol_dev)
{
struct ip_tunnel *tun = netdev_priv(ol_dev);
return mlxsw_sp_ipip_netdev_has_okey(ol_dev) ?
be32_to_cpu(tun->parms.o_key) : 0;
}
static int
mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
struct mlxsw_sp_ipip_entry *ipip_entry)
{
u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
__be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev);
char ratr_pl[MLXSW_REG_RATR_LEN];
mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
true, MLXSW_REG_RATR_TYPE_IPIP,
adj_index, rif_index);
mlxsw_reg_ratr_ipip4_entry_pack(ratr_pl, be32_to_cpu(daddr4));
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
}
static int
mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp,
u32 tunnel_index,
struct mlxsw_sp_ipip_entry *ipip_entry)
{
bool has_ikey = mlxsw_sp_ipip_netdev_has_ikey(ipip_entry->ol_dev);
u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
u32 ikey = mlxsw_sp_ipip_netdev_ikey(ipip_entry->ol_dev);
char rtdp_pl[MLXSW_REG_RTDP_LEN];
unsigned int type_check;
u32 daddr4;
mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index);
type_check = has_ikey ?
MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY :
MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE;
/* Linux demuxes tunnels based on packet SIP (which must match tunnel
* remote IP). Thus configure decap so that it filters out packets that
* are not IPv4 or have the wrong SIP. IPIP_DECAP_ERROR trap is
* generated for packets that fail this criterion. Linux then handles
* such packets in slow path and generates ICMP destination unreachable.
*/
daddr4 = be32_to_cpu(mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev));
mlxsw_reg_rtdp_ipip4_pack(rtdp_pl, rif_index,
MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV4,
type_check, has_ikey, daddr4, ikey);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl);
}
static int
mlxsw_sp_ipip_fib_entry_op_gre4_ralue(struct mlxsw_sp *mlxsw_sp,
u32 dip, u8 prefix_len, u16 ul_vr_id,
enum mlxsw_reg_ralue_op op,
u32 tunnel_index)
{
char ralue_pl[MLXSW_REG_RALUE_LEN];
mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_REG_RALXX_PROTOCOL_IPV4, op,
ul_vr_id, prefix_len, dip);
mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, tunnel_index);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
}
static int mlxsw_sp_ipip_fib_entry_op_gre4(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_ipip_entry *ipip_entry,
enum mlxsw_reg_ralue_op op,
u32 tunnel_index)
{
u16 ul_vr_id = mlxsw_sp_ipip_lb_ul_vr_id(ipip_entry->ol_lb);
__be32 dip;
int err;
err = mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(mlxsw_sp, tunnel_index,
ipip_entry);
if (err)
return err;
dip = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4,
ipip_entry->ol_dev).addr4;
return mlxsw_sp_ipip_fib_entry_op_gre4_ralue(mlxsw_sp, be32_to_cpu(dip),
32, ul_vr_id, op,
tunnel_index);
}
static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev)
{
union mlxsw_sp_l3addr saddr = mlxsw_sp_ipip_netdev_saddr(proto, ol_dev);
union mlxsw_sp_l3addr daddr = mlxsw_sp_ipip_netdev_daddr(proto, ol_dev);
union mlxsw_sp_l3addr naddr = {0};
/* Tunnels with unset local or remote address are valid in Linux and
* used for lightweight tunnels (LWT) and Non-Broadcast Multi-Access
* (NBMA) tunnels. In principle these can be offloaded, but the driver
* currently doesn't support this. So punt.
*/
return memcmp(&saddr, &naddr, sizeof(naddr)) &&
memcmp(&daddr, &naddr, sizeof(naddr));
}
static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *ol_dev,
enum mlxsw_sp_l3proto ol_proto)
{
struct ip_tunnel *tunnel = netdev_priv(ol_dev);
__be16 okflags = TUNNEL_KEY; /* We can't offload any other features. */
bool inherit_ttl = tunnel->parms.iph.ttl == 0;
bool inherit_tos = tunnel->parms.iph.tos & 0x1;
return (tunnel->parms.i_flags & ~okflags) == 0 &&
(tunnel->parms.o_flags & ~okflags) == 0 &&
inherit_ttl && inherit_tos &&
mlxsw_sp_ipip_tunnel_complete(MLXSW_SP_L3_PROTO_IPV4, ol_dev);
}
static struct mlxsw_sp_rif_ipip_lb_config
mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp,
const struct net_device *ol_dev)
{
enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
lb_ipipt = mlxsw_sp_ipip_netdev_has_okey(ol_dev) ?
MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP :
MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP;
return (struct mlxsw_sp_rif_ipip_lb_config){
.lb_ipipt = lb_ipipt,
.okey = mlxsw_sp_ipip_netdev_okey(ol_dev),
.ul_protocol = MLXSW_SP_L3_PROTO_IPV4,
.saddr = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4,
ol_dev),
};
}
static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = {
.dev_type = ARPHRD_IPGRE,
.ul_proto = MLXSW_SP_L3_PROTO_IPV4,
.nexthop_update = mlxsw_sp_ipip_nexthop_update_gre4,
.fib_entry_op = mlxsw_sp_ipip_fib_entry_op_gre4,
.can_offload = mlxsw_sp_ipip_can_offload_gre4,
.ol_loopback_config = mlxsw_sp_ipip_ol_loopback_config_gre4,
};
const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[] = {
[MLXSW_SP_IPIP_TYPE_GRE4] = &mlxsw_sp_ipip_gre4_ops,
};
/*
* drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
* Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* Alternatively, this software may be distributed under the terms of the
* GNU General Public License ("GPL") version 2 as published by the Free
* Software Foundation.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _MLXSW_IPIP_H_
#define _MLXSW_IPIP_H_
#include "spectrum_router.h"
#include <net/ip_fib.h>
enum mlxsw_sp_ipip_type {
MLXSW_SP_IPIP_TYPE_GRE4,
MLXSW_SP_IPIP_TYPE_MAX,
};
struct mlxsw_sp_ipip_entry {
enum mlxsw_sp_ipip_type ipipt;
struct net_device *ol_dev; /* Overlay. */
struct mlxsw_sp_rif_ipip_lb *ol_lb;
unsigned int ref_count; /* Number of next hops using the tunnel. */
struct mlxsw_sp_fib_entry *decap_fib_entry;
struct list_head ipip_list_node;
};
struct mlxsw_sp_ipip_ops {
int dev_type;
enum mlxsw_sp_l3proto ul_proto; /* Underlay. */
int (*nexthop_update)(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
struct mlxsw_sp_ipip_entry *ipip_entry);
bool (*can_offload)(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *ol_dev,
enum mlxsw_sp_l3proto ol_proto);
/* Return a configuration for creating an overlay loopback RIF. */
struct mlxsw_sp_rif_ipip_lb_config
(*ol_loopback_config)(struct mlxsw_sp *mlxsw_sp,
const struct net_device *ol_dev);
int (*fib_entry_op)(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_ipip_entry *ipip_entry,
enum mlxsw_reg_ralue_op op,
u32 tunnel_index);
};
extern const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[];
#endif /* _MLXSW_IPIP_H_*/
......@@ -36,6 +36,25 @@
#define _MLXSW_ROUTER_H_
#include "spectrum.h"
#include "reg.h"
enum mlxsw_sp_l3proto {
MLXSW_SP_L3_PROTO_IPV4,
MLXSW_SP_L3_PROTO_IPV6,
};
union mlxsw_sp_l3addr {
__be32 addr4;
struct in6_addr addr6;
};
struct mlxsw_sp_rif_ipip_lb;
struct mlxsw_sp_rif_ipip_lb_config {
enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
u32 okey;
enum mlxsw_sp_l3proto ul_protocol; /* Underlay. */
union mlxsw_sp_l3addr saddr;
};
enum mlxsw_sp_rif_counter_dir {
MLXSW_SP_RIF_COUNTER_INGRESS,
......@@ -47,6 +66,8 @@ struct mlxsw_sp_neigh_entry;
struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
u16 rif_index);
u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif);
u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif);
u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif);
int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif);
int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_rif *rif,
......@@ -79,5 +100,12 @@ mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_neigh_entry *neigh_entry,
bool adding);
bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry);
union mlxsw_sp_l3addr
mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev);
union mlxsw_sp_l3addr
mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev);
__be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev);
#endif /* _MLXSW_ROUTER_H_*/
......@@ -85,6 +85,7 @@ enum {
MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90,
MLXSW_TRAP_ID_IPV6_MC_LINK_LOCAL_DEST = 0x91,
MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92,
MLXSW_TRAP_ID_IPIP_DECAP_ERROR = 0xB1,
MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6,
MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7,
MLXSW_TRAP_ID_ACL0 = 0x1C0,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment