Commit 74cc6d18 authored by Jakub Kicinski's avatar Jakub Kicinski Committed by David S. Miller

udp_tunnel: add the ability to share port tables

Unfortunately recent Intel NIC designs share the UDP port table
across netdevs. So far the UDP tunnel port state was maintained
per netdev, we need to extend that to cater to Intel NICs.

Expect NICs to allocate the info structure dynamically and link
to the state from there. All the shared NICs will record port
offload information in the one instance of the table so we need
to make sure that the use count can accommodate larger numbers.
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 8744c0a8
...@@ -200,11 +200,27 @@ enum udp_tunnel_nic_info_flags { ...@@ -200,11 +200,27 @@ enum udp_tunnel_nic_info_flags {
UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(3), UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(3),
}; };
struct udp_tunnel_nic;
#define UDP_TUNNEL_NIC_MAX_SHARING_DEVICES (U16_MAX / 2)
struct udp_tunnel_nic_shared {
struct udp_tunnel_nic *udp_tunnel_nic_info;
struct list_head devices;
};
struct udp_tunnel_nic_shared_node {
struct net_device *dev;
struct list_head list;
};
/** /**
* struct udp_tunnel_nic_info - driver UDP tunnel offload information * struct udp_tunnel_nic_info - driver UDP tunnel offload information
* @set_port: callback for adding a new port * @set_port: callback for adding a new port
* @unset_port: callback for removing a port * @unset_port: callback for removing a port
* @sync_table: callback for syncing the entire port table at once * @sync_table: callback for syncing the entire port table at once
* @shared: reference to device global state (optional)
* @flags: device flags from enum udp_tunnel_nic_info_flags * @flags: device flags from enum udp_tunnel_nic_info_flags
* @tables: UDP port tables this device has * @tables: UDP port tables this device has
* @tables.n_entries: number of entries in this table * @tables.n_entries: number of entries in this table
...@@ -213,6 +229,12 @@ enum udp_tunnel_nic_info_flags { ...@@ -213,6 +229,12 @@ enum udp_tunnel_nic_info_flags {
* Drivers are expected to provide either @set_port and @unset_port callbacks * Drivers are expected to provide either @set_port and @unset_port callbacks
* or the @sync_table callback. Callbacks are invoked with rtnl lock held. * or the @sync_table callback. Callbacks are invoked with rtnl lock held.
* *
* Devices which (misguidedly) share the UDP tunnel port table across multiple
* netdevs should allocate an instance of struct udp_tunnel_nic_shared and
* point @shared at it.
* There must never be more than %UDP_TUNNEL_NIC_MAX_SHARING_DEVICES devices
* sharing a table.
*
* Known limitations: * Known limitations:
* - UDP tunnel port notifications are fundamentally best-effort - * - UDP tunnel port notifications are fundamentally best-effort -
* it is likely the driver will both see skbs which use a UDP tunnel port, * it is likely the driver will both see skbs which use a UDP tunnel port,
...@@ -234,6 +256,8 @@ struct udp_tunnel_nic_info { ...@@ -234,6 +256,8 @@ struct udp_tunnel_nic_info {
/* all at once */ /* all at once */
int (*sync_table)(struct net_device *dev, unsigned int table); int (*sync_table)(struct net_device *dev, unsigned int table);
struct udp_tunnel_nic_shared *shared;
unsigned int flags; unsigned int flags;
struct udp_tunnel_nic_table_info { struct udp_tunnel_nic_table_info {
......
...@@ -19,8 +19,9 @@ enum udp_tunnel_nic_table_entry_flags { ...@@ -19,8 +19,9 @@ enum udp_tunnel_nic_table_entry_flags {
struct udp_tunnel_nic_table_entry { struct udp_tunnel_nic_table_entry {
__be16 port; __be16 port;
u8 type; u8 type;
u8 use_cnt;
u8 flags; u8 flags;
u16 use_cnt;
#define UDP_TUNNEL_NIC_USE_CNT_MAX U16_MAX
u8 hw_priv; u8 hw_priv;
}; };
...@@ -370,6 +371,8 @@ udp_tunnel_nic_entry_adj(struct udp_tunnel_nic *utn, ...@@ -370,6 +371,8 @@ udp_tunnel_nic_entry_adj(struct udp_tunnel_nic *utn,
bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL; bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
unsigned int from, to; unsigned int from, to;
WARN_ON(entry->use_cnt + (u32)use_cnt_adj > U16_MAX);
/* If not going from used to unused or vice versa - all done. /* If not going from used to unused or vice versa - all done.
* For dodgy entries make sure we try to sync again (queue the entry). * For dodgy entries make sure we try to sync again (queue the entry).
*/ */
...@@ -675,6 +678,7 @@ static void ...@@ -675,6 +678,7 @@ static void
udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn) udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
{ {
const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
struct udp_tunnel_nic_shared_node *node;
unsigned int i, j; unsigned int i, j;
/* Freeze all the ports we are already tracking so that the replay /* Freeze all the ports we are already tracking so that the replay
...@@ -686,7 +690,12 @@ udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn) ...@@ -686,7 +690,12 @@ udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
utn->missed = 0; utn->missed = 0;
utn->need_replay = 0; utn->need_replay = 0;
udp_tunnel_get_rx_info(dev); if (!info->shared) {
udp_tunnel_get_rx_info(dev);
} else {
list_for_each_entry(node, &info->shared->devices, list)
udp_tunnel_get_rx_info(node->dev);
}
for (i = 0; i < utn->n_tables; i++) for (i = 0; i < utn->n_tables; i++)
for (j = 0; j < info->tables[i].n_entries; j++) for (j = 0; j < info->tables[i].n_entries; j++)
...@@ -742,20 +751,39 @@ udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info, ...@@ -742,20 +751,39 @@ udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info,
return NULL; return NULL;
} }
static void udp_tunnel_nic_free(struct udp_tunnel_nic *utn)
{
unsigned int i;
for (i = 0; i < utn->n_tables; i++)
kfree(utn->entries[i]);
kfree(utn->entries);
kfree(utn);
}
static int udp_tunnel_nic_register(struct net_device *dev) static int udp_tunnel_nic_register(struct net_device *dev)
{ {
const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
struct udp_tunnel_nic_shared_node *node = NULL;
struct udp_tunnel_nic *utn; struct udp_tunnel_nic *utn;
unsigned int n_tables, i; unsigned int n_tables, i;
BUILD_BUG_ON(sizeof(utn->missed) * BITS_PER_BYTE < BUILD_BUG_ON(sizeof(utn->missed) * BITS_PER_BYTE <
UDP_TUNNEL_NIC_MAX_TABLES); UDP_TUNNEL_NIC_MAX_TABLES);
/* Expect use count of at most 2 (IPv4, IPv6) per device */
BUILD_BUG_ON(UDP_TUNNEL_NIC_USE_CNT_MAX <
UDP_TUNNEL_NIC_MAX_SHARING_DEVICES * 2);
/* Check that the driver info is sane */
if (WARN_ON(!info->set_port != !info->unset_port) || if (WARN_ON(!info->set_port != !info->unset_port) ||
WARN_ON(!info->set_port == !info->sync_table) || WARN_ON(!info->set_port == !info->sync_table) ||
WARN_ON(!info->tables[0].n_entries)) WARN_ON(!info->tables[0].n_entries))
return -EINVAL; return -EINVAL;
if (WARN_ON(info->shared &&
info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY))
return -EINVAL;
n_tables = 1; n_tables = 1;
for (i = 1; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) { for (i = 1; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) {
if (!info->tables[i].n_entries) if (!info->tables[i].n_entries)
...@@ -766,9 +794,33 @@ static int udp_tunnel_nic_register(struct net_device *dev) ...@@ -766,9 +794,33 @@ static int udp_tunnel_nic_register(struct net_device *dev)
return -EINVAL; return -EINVAL;
} }
utn = udp_tunnel_nic_alloc(info, n_tables); /* Create UDP tunnel state structures */
if (!utn) if (info->shared) {
return -ENOMEM; node = kzalloc(sizeof(*node), GFP_KERNEL);
if (!node)
return -ENOMEM;
node->dev = dev;
}
if (info->shared && info->shared->udp_tunnel_nic_info) {
utn = info->shared->udp_tunnel_nic_info;
} else {
utn = udp_tunnel_nic_alloc(info, n_tables);
if (!utn) {
kfree(node);
return -ENOMEM;
}
}
if (info->shared) {
if (!info->shared->udp_tunnel_nic_info) {
INIT_LIST_HEAD(&info->shared->devices);
info->shared->udp_tunnel_nic_info = utn;
}
list_add_tail(&node->list, &info->shared->devices);
}
utn->dev = dev; utn->dev = dev;
dev_hold(dev); dev_hold(dev);
...@@ -783,7 +835,33 @@ static int udp_tunnel_nic_register(struct net_device *dev) ...@@ -783,7 +835,33 @@ static int udp_tunnel_nic_register(struct net_device *dev)
static void static void
udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
{ {
unsigned int i; const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
/* For a shared table remove this dev from the list of sharing devices
* and if there are other devices just detach.
*/
if (info->shared) {
struct udp_tunnel_nic_shared_node *node, *first;
list_for_each_entry(node, &info->shared->devices, list)
if (node->dev == dev)
break;
if (node->dev != dev)
return;
list_del(&node->list);
kfree(node);
first = list_first_entry_or_null(&info->shared->devices,
typeof(*first), list);
if (first) {
udp_tunnel_drop_rx_info(dev);
utn->dev = first->dev;
goto release_dev;
}
info->shared->udp_tunnel_nic_info = NULL;
}
/* Flush before we check work, so we don't waste time adding entries /* Flush before we check work, so we don't waste time adding entries
* from the work which we will boot immediately. * from the work which we will boot immediately.
...@@ -796,10 +874,8 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) ...@@ -796,10 +874,8 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
if (utn->work_pending) if (utn->work_pending)
return; return;
for (i = 0; i < utn->n_tables; i++) udp_tunnel_nic_free(utn);
kfree(utn->entries[i]); release_dev:
kfree(utn->entries);
kfree(utn);
dev->udp_tunnel_nic = NULL; dev->udp_tunnel_nic = NULL;
dev_put(dev); dev_put(dev);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment