Commit 30c8bd5a authored by Sridhar Samudrala's avatar Sridhar Samudrala Committed by David S. Miller

net: Introduce generic failover module

The failover module provides a generic interface for paravirtual drivers
to register a netdev and a set of ops with a failover instance. The ops
are used as event handlers that get called to handle netdev register/
unregister/link change/name change events on slave pci ethernet devices
with the same mac address as the failover netdev.

This enables paravirtual drivers to use a VF as an accelerated low latency
datapath. It also allows migration of VMs with direct attached VFs by
failing over to the paravirtual datapath when the VF is unplugged.
Signed-off-by: default avatarSridhar Samudrala <sridhar.samudrala@intel.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent cb160394
.. SPDX-License-Identifier: GPL-2.0
========
FAILOVER
========
Overview
========
The failover module provides a generic interface for paravirtual drivers
to register a netdev and a set of ops with a failover instance. The ops
are used as event handlers that get called to handle netdev register/
unregister/link change/name change events on slave pci ethernet devices
with the same mac address as the failover netdev.
This enables paravirtual drivers to use a VF as an accelerated low latency
datapath. It also allows live migration of VMs with direct attached VFs by
failing over to the paravirtual datapath when the VF is unplugged.
...@@ -5411,6 +5411,14 @@ S: Maintained ...@@ -5411,6 +5411,14 @@ S: Maintained
F: Documentation/hwmon/f71805f F: Documentation/hwmon/f71805f
F: drivers/hwmon/f71805f.c F: drivers/hwmon/f71805f.c
FAILOVER MODULE
M: Sridhar Samudrala <sridhar.samudrala@intel.com>
L: netdev@vger.kernel.org
S: Supported
F: net/core/failover.c
F: include/net/failover.h
F: Documentation/networking/failover.rst
FANOTIFY FANOTIFY
M: Jan Kara <jack@suse.cz> M: Jan Kara <jack@suse.cz>
R: Amir Goldstein <amir73il@gmail.com> R: Amir Goldstein <amir73il@gmail.com>
......
...@@ -1425,6 +1425,8 @@ struct net_device_ops { ...@@ -1425,6 +1425,8 @@ struct net_device_ops {
* entity (i.e. the master device for bridged veth) * entity (i.e. the master device for bridged veth)
* @IFF_MACSEC: device is a MACsec device * @IFF_MACSEC: device is a MACsec device
* @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook
* @IFF_FAILOVER: device is a failover master device
* @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
*/ */
enum netdev_priv_flags { enum netdev_priv_flags {
IFF_802_1Q_VLAN = 1<<0, IFF_802_1Q_VLAN = 1<<0,
...@@ -1454,6 +1456,8 @@ enum netdev_priv_flags { ...@@ -1454,6 +1456,8 @@ enum netdev_priv_flags {
IFF_PHONY_HEADROOM = 1<<24, IFF_PHONY_HEADROOM = 1<<24,
IFF_MACSEC = 1<<25, IFF_MACSEC = 1<<25,
IFF_NO_RX_HANDLER = 1<<26, IFF_NO_RX_HANDLER = 1<<26,
IFF_FAILOVER = 1<<27,
IFF_FAILOVER_SLAVE = 1<<28,
}; };
#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
...@@ -1482,6 +1486,8 @@ enum netdev_priv_flags { ...@@ -1482,6 +1486,8 @@ enum netdev_priv_flags {
#define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED #define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED
#define IFF_MACSEC IFF_MACSEC #define IFF_MACSEC IFF_MACSEC
#define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER #define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER
#define IFF_FAILOVER IFF_FAILOVER
#define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE
/** /**
* struct net_device - The DEVICE structure. * struct net_device - The DEVICE structure.
...@@ -4336,6 +4342,16 @@ static inline bool netif_is_rxfh_configured(const struct net_device *dev) ...@@ -4336,6 +4342,16 @@ static inline bool netif_is_rxfh_configured(const struct net_device *dev)
return dev->priv_flags & IFF_RXFH_CONFIGURED; return dev->priv_flags & IFF_RXFH_CONFIGURED;
} }
static inline bool netif_is_failover(const struct net_device *dev)
{
return dev->priv_flags & IFF_FAILOVER;
}
static inline bool netif_is_failover_slave(const struct net_device *dev)
{
return dev->priv_flags & IFF_FAILOVER_SLAVE;
}
/* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
static inline void netif_keep_dst(struct net_device *dev) static inline void netif_keep_dst(struct net_device *dev)
{ {
......
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2018, Intel Corporation. */
#ifndef _FAILOVER_H
#define _FAILOVER_H
#include <linux/netdevice.h>
struct failover_ops {
int (*slave_pre_register)(struct net_device *slave_dev,
struct net_device *failover_dev);
int (*slave_register)(struct net_device *slave_dev,
struct net_device *failover_dev);
int (*slave_pre_unregister)(struct net_device *slave_dev,
struct net_device *failover_dev);
int (*slave_unregister)(struct net_device *slave_dev,
struct net_device *failover_dev);
int (*slave_link_change)(struct net_device *slave_dev,
struct net_device *failover_dev);
int (*slave_name_change)(struct net_device *slave_dev,
struct net_device *failover_dev);
rx_handler_result_t (*slave_handle_frame)(struct sk_buff **pskb);
};
struct failover {
struct list_head list;
struct net_device __rcu *failover_dev;
struct failover_ops __rcu *ops;
};
struct failover *failover_register(struct net_device *dev,
struct failover_ops *ops);
void failover_unregister(struct failover *failover);
int failover_slave_unregister(struct net_device *slave_dev);
#endif /* _FAILOVER_H */
...@@ -432,6 +432,19 @@ config MAY_USE_DEVLINK ...@@ -432,6 +432,19 @@ config MAY_USE_DEVLINK
config PAGE_POOL config PAGE_POOL
bool bool
config FAILOVER
tristate "Generic failover module"
help
The failover module provides a generic interface for paravirtual
drivers to register a netdev and a set of ops with a failover
instance. The ops are used as event handlers that get called to
handle netdev register/unregister/link change/name change events
on slave pci ethernet devices with the same mac address as the
failover netdev. This enables paravirtual drivers to use a
VF as an accelerated low latency datapath. It also allows live
migration of VMs with direct attached VFs by failing over to the
paravirtual datapath when the VF is unplugged.
endif # if NET endif # if NET
# Used by archs to tell that they support BPF JIT compiler plus which flavour. # Used by archs to tell that they support BPF JIT compiler plus which flavour.
......
...@@ -31,3 +31,4 @@ obj-$(CONFIG_DST_CACHE) += dst_cache.o ...@@ -31,3 +31,4 @@ obj-$(CONFIG_DST_CACHE) += dst_cache.o
obj-$(CONFIG_HWBM) += hwbm.o obj-$(CONFIG_HWBM) += hwbm.o
obj-$(CONFIG_NET_DEVLINK) += devlink.o obj-$(CONFIG_NET_DEVLINK) += devlink.o
obj-$(CONFIG_GRO_CELLS) += gro_cells.o obj-$(CONFIG_GRO_CELLS) += gro_cells.o
obj-$(CONFIG_FAILOVER) += failover.o
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018, Intel Corporation. */
/* A common module to handle registrations and notifications for paravirtual
* drivers to enable accelerated datapath and support VF live migration.
*
* The notifier and event handling code is based on netvsc driver.
*/
#include <linux/module.h>
#include <linux/etherdevice.h>
#include <uapi/linux/if_arp.h>
#include <linux/rtnetlink.h>
#include <linux/if_vlan.h>
#include <net/failover.h>
static LIST_HEAD(failover_list);
static DEFINE_SPINLOCK(failover_lock);
static struct net_device *failover_get_bymac(u8 *mac, struct failover_ops **ops)
{
struct net_device *failover_dev;
struct failover *failover;
spin_lock(&failover_lock);
list_for_each_entry(failover, &failover_list, list) {
failover_dev = rtnl_dereference(failover->failover_dev);
if (ether_addr_equal(failover_dev->perm_addr, mac)) {
*ops = rtnl_dereference(failover->ops);
spin_unlock(&failover_lock);
return failover_dev;
}
}
spin_unlock(&failover_lock);
return NULL;
}
/**
* failover_slave_register - Register a slave netdev
*
* @slave_dev: slave netdev that is being registered
*
* Registers a slave device to a failover instance. Only ethernet devices
* are supported.
*/
static int failover_slave_register(struct net_device *slave_dev)
{
struct netdev_lag_upper_info lag_upper_info;
struct net_device *failover_dev;
struct failover_ops *fops;
int err;
if (slave_dev->type != ARPHRD_ETHER)
goto done;
ASSERT_RTNL();
failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
if (!failover_dev)
goto done;
if (fops && fops->slave_pre_register &&
fops->slave_pre_register(slave_dev, failover_dev))
goto done;
err = netdev_rx_handler_register(slave_dev, fops->slave_handle_frame,
failover_dev);
if (err) {
netdev_err(slave_dev, "can not register failover rx handler (err = %d)\n",
err);
goto done;
}
lag_upper_info.tx_type = NETDEV_LAG_TX_TYPE_ACTIVEBACKUP;
err = netdev_master_upper_dev_link(slave_dev, failover_dev, NULL,
&lag_upper_info, NULL);
if (err) {
netdev_err(slave_dev, "can not set failover device %s (err = %d)\n",
failover_dev->name, err);
goto err_upper_link;
}
slave_dev->priv_flags |= IFF_FAILOVER_SLAVE;
if (fops && fops->slave_register &&
!fops->slave_register(slave_dev, failover_dev))
return NOTIFY_OK;
netdev_upper_dev_unlink(slave_dev, failover_dev);
slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
err_upper_link:
netdev_rx_handler_unregister(slave_dev);
done:
return NOTIFY_DONE;
}
/**
* failover_slave_unregister - Unregister a slave netdev
*
* @slave_dev: slave netdev that is being unregistered
*
* Unregisters a slave device from a failover instance.
*/
int failover_slave_unregister(struct net_device *slave_dev)
{
struct net_device *failover_dev;
struct failover_ops *fops;
if (!netif_is_failover_slave(slave_dev))
goto done;
ASSERT_RTNL();
failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
if (!failover_dev)
goto done;
if (fops && fops->slave_pre_unregister &&
fops->slave_pre_unregister(slave_dev, failover_dev))
goto done;
netdev_rx_handler_unregister(slave_dev);
netdev_upper_dev_unlink(slave_dev, failover_dev);
slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
if (fops && fops->slave_unregister &&
!fops->slave_unregister(slave_dev, failover_dev))
return NOTIFY_OK;
done:
return NOTIFY_DONE;
}
EXPORT_SYMBOL_GPL(failover_slave_unregister);
static int failover_slave_link_change(struct net_device *slave_dev)
{
struct net_device *failover_dev;
struct failover_ops *fops;
if (!netif_is_failover_slave(slave_dev))
goto done;
ASSERT_RTNL();
failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
if (!failover_dev)
goto done;
if (!netif_running(failover_dev))
goto done;
if (fops && fops->slave_link_change &&
!fops->slave_link_change(slave_dev, failover_dev))
return NOTIFY_OK;
done:
return NOTIFY_DONE;
}
static int failover_slave_name_change(struct net_device *slave_dev)
{
struct net_device *failover_dev;
struct failover_ops *fops;
if (!netif_is_failover_slave(slave_dev))
goto done;
ASSERT_RTNL();
failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
if (!failover_dev)
goto done;
if (!netif_running(failover_dev))
goto done;
if (fops && fops->slave_name_change &&
!fops->slave_name_change(slave_dev, failover_dev))
return NOTIFY_OK;
done:
return NOTIFY_DONE;
}
static int
failover_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
/* Skip parent events */
if (netif_is_failover(event_dev))
return NOTIFY_DONE;
switch (event) {
case NETDEV_REGISTER:
return failover_slave_register(event_dev);
case NETDEV_UNREGISTER:
return failover_slave_unregister(event_dev);
case NETDEV_UP:
case NETDEV_DOWN:
case NETDEV_CHANGE:
return failover_slave_link_change(event_dev);
case NETDEV_CHANGENAME:
return failover_slave_name_change(event_dev);
default:
return NOTIFY_DONE;
}
}
static struct notifier_block failover_notifier = {
.notifier_call = failover_event,
};
static void
failover_existing_slave_register(struct net_device *failover_dev)
{
struct net *net = dev_net(failover_dev);
struct net_device *dev;
rtnl_lock();
for_each_netdev(net, dev) {
if (netif_is_failover(dev))
continue;
if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr))
failover_slave_register(dev);
}
rtnl_unlock();
}
/**
* failover_register - Register a failover instance
*
* @dev: failover netdev
* @ops: failover ops
*
* Allocate and register a failover instance for a failover netdev. ops
* provides handlers for slave device register/unregister/link change/
* name change events.
*
* Return: pointer to failover instance
*/
struct failover *failover_register(struct net_device *dev,
struct failover_ops *ops)
{
struct failover *failover;
if (dev->type != ARPHRD_ETHER)
return ERR_PTR(-EINVAL);
failover = kzalloc(sizeof(*failover), GFP_KERNEL);
if (!failover)
return ERR_PTR(-ENOMEM);
rcu_assign_pointer(failover->ops, ops);
dev_hold(dev);
dev->priv_flags |= IFF_FAILOVER;
rcu_assign_pointer(failover->failover_dev, dev);
spin_lock(&failover_lock);
list_add_tail(&failover->list, &failover_list);
spin_unlock(&failover_lock);
netdev_info(dev, "failover master:%s registered\n", dev->name);
failover_existing_slave_register(dev);
return failover;
}
EXPORT_SYMBOL_GPL(failover_register);
/**
* failover_unregister - Unregister a failover instance
*
* @failover: pointer to failover instance
*
* Unregisters and frees a failover instance.
*/
void failover_unregister(struct failover *failover)
{
struct net_device *failover_dev;
failover_dev = rcu_dereference(failover->failover_dev);
netdev_info(failover_dev, "failover master:%s unregistered\n",
failover_dev->name);
failover_dev->priv_flags &= ~IFF_FAILOVER;
dev_put(failover_dev);
spin_lock(&failover_lock);
list_del(&failover->list);
spin_unlock(&failover_lock);
kfree(failover);
}
EXPORT_SYMBOL_GPL(failover_unregister);
static __init int
failover_init(void)
{
register_netdevice_notifier(&failover_notifier);
return 0;
}
module_init(failover_init);
static __exit
void failover_exit(void)
{
unregister_netdevice_notifier(&failover_notifier);
}
module_exit(failover_exit);
MODULE_DESCRIPTION("Generic failover infrastructure/interface");
MODULE_LICENSE("GPL v2");
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment