Commit e4467f2e authored by David S. Miller's avatar David S. Miller

Merge branch 'mlxsw-GRE-Offload-decap-without-encap'

Jiri Pirko says:

====================
mlxsw: GRE: Offload decap without encap

Petr says:

The current code doesn't offload GRE decapsulation unless there's a
corresponding encapsulation route as well. While not strictly incorrect (when
encap route is absent, the decap route traps traffic to CPU and the kernel
handles it), it's a missed optimization opportunity.

With this patchset, IPIP entries are created as soon as offloadable tunneling
netdevice is created. This then leads to offloading of decap route, if one
exists, or is added afterwards, even when no encap route is present.

In Linux, when there is a decap route, matching IP-in-IP packets are always
decapsulated. However, with IPv4 overlays in particular, whether the inner
packet is then forwarded depends on setting of net.ipv4.conf.*.rp_filter. When
RP filtering is turned on, inner packets aren't forwarded unless there's a
matching encap route. The mlxsw driver doesn't reflect this behavior in other
router interfaces, and thus it's not implemented for tunnel types either. A
better support for this will be subject of follow-up work.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 36c0a9df 4cccb737
......@@ -3667,6 +3667,9 @@ static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
}
static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
unsigned long event, void *ptr);
static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
const struct mlxsw_bus_info *mlxsw_bus_info)
{
......@@ -3736,6 +3739,16 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
goto err_router_init;
}
/* Initialize netdevice notifier after router is initialized, so that
* the event handler can use router structures.
*/
mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event;
err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to register netdev notifier\n");
goto err_netdev_notifier;
}
err = mlxsw_sp_span_init(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
......@@ -3769,6 +3782,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
err_acl_init:
mlxsw_sp_span_fini(mlxsw_sp);
err_span_init:
unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
err_netdev_notifier:
mlxsw_sp_router_fini(mlxsw_sp);
err_router_init:
mlxsw_sp_afa_fini(mlxsw_sp);
......@@ -3795,6 +3810,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
mlxsw_sp_dpipe_fini(mlxsw_sp);
mlxsw_sp_acl_fini(mlxsw_sp);
mlxsw_sp_span_fini(mlxsw_sp);
unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
mlxsw_sp_router_fini(mlxsw_sp);
mlxsw_sp_afa_fini(mlxsw_sp);
mlxsw_sp_counter_pool_fini(mlxsw_sp);
......@@ -4481,13 +4497,17 @@ static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr)
return netif_is_l3_master(info->upper_dev);
}
static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct mlxsw_sp *mlxsw_sp;
int err = 0;
if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU)
mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb);
if (mlxsw_sp_netdev_is_ipip(mlxsw_sp, dev))
err = mlxsw_sp_netdevice_ipip_event(mlxsw_sp, dev, event, ptr);
else if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU)
err = mlxsw_sp_netdevice_router_port_event(dev);
else if (mlxsw_sp_is_vrf_event(event, ptr))
err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr);
......@@ -4501,10 +4521,6 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
return notifier_from_errno(err);
}
static struct notifier_block mlxsw_sp_netdevice_nb __read_mostly = {
.notifier_call = mlxsw_sp_netdevice_event,
};
static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = {
.notifier_call = mlxsw_sp_inetaddr_event,
.priority = 10, /* Must be called before FIB notifier block */
......@@ -4532,7 +4548,6 @@ static int __init mlxsw_sp_module_init(void)
{
int err;
register_netdevice_notifier(&mlxsw_sp_netdevice_nb);
register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
register_netevent_notifier(&mlxsw_sp_router_netevent_nb);
......@@ -4553,7 +4568,6 @@ static int __init mlxsw_sp_module_init(void)
unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
return err;
}
......@@ -4564,7 +4578,6 @@ static void __exit mlxsw_sp_module_exit(void)
unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
}
module_init(mlxsw_sp_module_init);
......
......@@ -161,6 +161,7 @@ struct mlxsw_sp {
struct {
DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE);
} kvdl;
struct notifier_block netdevice_nb;
struct mlxsw_sp_counter_pool *counter_pool;
struct {
......@@ -394,6 +395,13 @@ int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
unsigned long event, void *ptr);
int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
struct netdev_notifier_changeupper_info *info);
bool mlxsw_sp_netdev_is_ipip(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *dev);
int
mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *l3_dev,
unsigned long event,
struct netdev_notifier_changeupper_info *info);
void
mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
......
......@@ -47,7 +47,6 @@ struct mlxsw_sp_ipip_entry {
enum mlxsw_sp_ipip_type ipipt;
struct net_device *ol_dev; /* Overlay. */
struct mlxsw_sp_rif_ipip_lb *ol_lb;
unsigned int ref_count; /* Number of next hops using the tunnel. */
struct mlxsw_sp_fib_entry *decap_fib_entry;
struct list_head ipip_list_node;
};
......
......@@ -1002,9 +1002,8 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
}
static void
mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp_ipip_entry *ipip_entry)
mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
{
WARN_ON(ipip_entry->ref_count > 0);
mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
kfree(ipip_entry);
}
......@@ -1200,26 +1199,22 @@ mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
}
static struct mlxsw_sp_ipip_entry *
mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp,
enum mlxsw_sp_ipip_type ipipt,
struct net_device *ol_dev)
mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
enum mlxsw_sp_ipip_type ipipt,
struct net_device *ol_dev)
{
u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
struct mlxsw_sp_router *router = mlxsw_sp->router;
struct mlxsw_sp_fib_entry *decap_fib_entry;
struct mlxsw_sp_ipip_entry *ipip_entry;
enum mlxsw_sp_l3proto ul_proto;
union mlxsw_sp_l3addr saddr;
/* The configuration where several tunnels have the same local address
* in the same underlay table needs special treatment in the HW. That is
* currently not implemented in the driver.
*/
list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
ipip_list_node) {
if (ipip_entry->ol_dev == ol_dev)
goto inc_ref_count;
/* The configuration where several tunnels have the same local
* address in the same underlay table needs special treatment in
* the HW. That is currently not implemented in the driver.
*/
ul_proto = router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
if (mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
......@@ -1231,29 +1226,18 @@ mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp,
if (IS_ERR(ipip_entry))
return ipip_entry;
decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
if (decap_fib_entry)
mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
decap_fib_entry);
list_add_tail(&ipip_entry->ipip_list_node,
&mlxsw_sp->router->ipip_list);
inc_ref_count:
++ipip_entry->ref_count;
return ipip_entry;
}
static void
mlxsw_sp_ipip_entry_put(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_ipip_entry *ipip_entry)
mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_ipip_entry *ipip_entry)
{
if (--ipip_entry->ref_count == 0) {
list_del(&ipip_entry->ipip_list_node);
if (ipip_entry->decap_fib_entry)
mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
mlxsw_sp_ipip_entry_destroy(ipip_entry);
}
list_del(&ipip_entry->ipip_list_node);
mlxsw_sp_ipip_entry_dealloc(ipip_entry);
}
static bool
......@@ -1295,6 +1279,168 @@ mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
return NULL;
}
static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *dev,
enum mlxsw_sp_ipip_type *p_type)
{
struct mlxsw_sp_router *router = mlxsw_sp->router;
const struct mlxsw_sp_ipip_ops *ipip_ops;
enum mlxsw_sp_ipip_type ipipt;
for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
ipip_ops = router->ipip_ops_arr[ipipt];
if (dev->type == ipip_ops->dev_type) {
if (p_type)
*p_type = ipipt;
return true;
}
}
return false;
}
bool mlxsw_sp_netdev_is_ipip(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *dev)
{
return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
}
static struct mlxsw_sp_ipip_entry *
mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
const struct net_device *ol_dev)
{
struct mlxsw_sp_ipip_entry *ipip_entry;
list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
ipip_list_node)
if (ipip_entry->ol_dev == ol_dev)
return ipip_entry;
return NULL;
}
static int mlxsw_sp_netdevice_ipip_reg_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *ol_dev)
{
struct mlxsw_sp_router *router = mlxsw_sp->router;
struct mlxsw_sp_ipip_entry *ipip_entry;
enum mlxsw_sp_ipip_type ipipt;
mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
if (router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, ol_dev,
MLXSW_SP_L3_PROTO_IPV4) ||
router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, ol_dev,
MLXSW_SP_L3_PROTO_IPV6)) {
ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
ol_dev);
if (IS_ERR(ipip_entry))
return PTR_ERR(ipip_entry);
}
return 0;
}
static void mlxsw_sp_netdevice_ipip_unreg_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *ol_dev)
{
struct mlxsw_sp_ipip_entry *ipip_entry;
ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
if (ipip_entry)
mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
}
static int mlxsw_sp_netdevice_ipip_up_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *ol_dev)
{
struct mlxsw_sp_fib_entry *decap_fib_entry;
struct mlxsw_sp_ipip_entry *ipip_entry;
ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
if (ipip_entry) {
decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp,
ipip_entry);
if (decap_fib_entry)
mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
decap_fib_entry);
}
return 0;
}
static void mlxsw_sp_netdevice_ipip_down_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *ol_dev)
{
struct mlxsw_sp_ipip_entry *ipip_entry;
ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
if (ipip_entry && ipip_entry->decap_fib_entry)
mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
}
static int mlxsw_sp_netdevice_ipip_vrf_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *ol_dev)
{
struct mlxsw_sp_fib_entry *decap_fib_entry;
struct mlxsw_sp_ipip_entry *ipip_entry;
struct mlxsw_sp_rif_ipip_lb *lb_rif;
ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
if (!ipip_entry)
return 0;
/* When a tunneling device is moved to a different VRF, we need to
* update the backing loopback. Since RIFs can't be edited, we need to
* destroy and recreate it. That might create a window of opportunity
* where RALUE and RATR registers end up referencing a RIF that's
* already gone. RATRs are handled by the RIF destroy, and to take care
* of RALUE, demote the decap route back.
*/
if (ipip_entry->decap_fib_entry)
mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipip_entry->ipipt,
ol_dev);
if (IS_ERR(lb_rif))
return PTR_ERR(lb_rif);
mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
ipip_entry->ol_lb = lb_rif;
if (ol_dev->flags & IFF_UP) {
decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp,
ipip_entry);
if (decap_fib_entry)
mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
decap_fib_entry);
}
return 0;
}
int mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *ol_dev,
unsigned long event,
struct netdev_notifier_changeupper_info *info)
{
switch (event) {
case NETDEV_REGISTER:
return mlxsw_sp_netdevice_ipip_reg_event(mlxsw_sp, ol_dev);
case NETDEV_UNREGISTER:
mlxsw_sp_netdevice_ipip_unreg_event(mlxsw_sp, ol_dev);
return 0;
case NETDEV_UP:
return mlxsw_sp_netdevice_ipip_up_event(mlxsw_sp, ol_dev);
case NETDEV_DOWN:
mlxsw_sp_netdevice_ipip_down_event(mlxsw_sp, ol_dev);
return 0;
case NETDEV_CHANGEUPPER:
if (netif_is_l3_master(info->upper_dev))
return mlxsw_sp_netdevice_ipip_vrf_event(mlxsw_sp,
ol_dev);
return 0;
}
return 0;
}
struct mlxsw_sp_neigh_key {
struct neighbour *n;
};
......@@ -2785,36 +2931,16 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
neigh_release(n);
}
static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *dev,
enum mlxsw_sp_ipip_type *p_type)
{
struct mlxsw_sp_router *router = mlxsw_sp->router;
const struct mlxsw_sp_ipip_ops *ipip_ops;
enum mlxsw_sp_ipip_type ipipt;
for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
ipip_ops = router->ipip_ops_arr[ipipt];
if (dev->type == ipip_ops->dev_type) {
if (p_type)
*p_type = ipipt;
return true;
}
}
return false;
}
static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
enum mlxsw_sp_ipip_type ipipt,
struct mlxsw_sp_nexthop *nh,
struct net_device *ol_dev)
{
if (!nh->nh_grp->gateway || nh->ipip_entry)
return 0;
nh->ipip_entry = mlxsw_sp_ipip_entry_get(mlxsw_sp, ipipt, ol_dev);
if (IS_ERR(nh->ipip_entry))
return PTR_ERR(nh->ipip_entry);
nh->ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
if (!nh->ipip_entry)
return -ENOENT;
__mlxsw_sp_nexthop_neigh_update(nh, false);
return 0;
......@@ -2829,7 +2955,6 @@ static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
return;
__mlxsw_sp_nexthop_neigh_update(nh, true);
mlxsw_sp_ipip_entry_put(mlxsw_sp, ipip_entry);
nh->ipip_entry = NULL;
}
......@@ -2873,7 +2998,7 @@ static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev,
MLXSW_SP_L3_PROTO_IPV4)) {
nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev);
err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev);
if (err)
return err;
mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common);
......@@ -4135,7 +4260,7 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev,
MLXSW_SP_L3_PROTO_IPV6)) {
nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev);
err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev);
if (err)
return err;
mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment