Commit 6437b112 authored by David S. Miller's avatar David S. Miller

Merge branch 'mlxsw-Align-multipath-hash-parameters-with-kernels'

Jiri Pirko says:

====================
mlxsw: Align multipath hash parameters with kernel's

Ido says:

This set makes sure the device is using the same parameters as the
kernel when it computes the multipath hash during IP forwarding.

First patch adds a new netevent to let interested listeners know that
the multipath hash policy has changed.

Next two patches do small and non-functional changes in the mlxsw
driver.

Last patches configure the multipath hash policy upon driver
initialization and as a response to netevents.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents a882d20c 28678f07
......@@ -5844,6 +5844,137 @@ static inline void mlxsw_reg_rigr2_erif_entry_pack(char *payload, int index,
mlxsw_reg_rigr2_erif_entry_erif_set(payload, index, erif);
}
/* RECR-V2 - Router ECMP Configuration Version 2 Register
* ------------------------------------------------------
*/
#define MLXSW_REG_RECR2_ID 0x8025
#define MLXSW_REG_RECR2_LEN 0x38
MLXSW_REG_DEFINE(recr2, MLXSW_REG_RECR2_ID, MLXSW_REG_RECR2_LEN);
/* reg_recr2_pp
* Per-port configuration
* Access: Index
*/
MLXSW_ITEM32(reg, recr2, pp, 0x00, 24, 1);
/* reg_recr2_sh
* Symmetric hash
* Access: RW
*/
MLXSW_ITEM32(reg, recr2, sh, 0x00, 8, 1);
/* reg_recr2_seed
* Seed
* Access: RW
*/
MLXSW_ITEM32(reg, recr2, seed, 0x08, 0, 32);
enum {
/* Enable IPv4 fields if packet is not TCP and not UDP */
MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP = 3,
/* Enable IPv4 fields if packet is TCP or UDP */
MLXSW_REG_RECR2_IPV4_EN_TCP_UDP = 4,
/* Enable IPv6 fields if packet is not TCP and not UDP */
MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP = 5,
/* Enable IPv6 fields if packet is TCP or UDP */
MLXSW_REG_RECR2_IPV6_EN_TCP_UDP = 6,
/* Enable TCP/UDP header fields if packet is IPv4 */
MLXSW_REG_RECR2_TCP_UDP_EN_IPV4 = 7,
/* Enable TCP/UDP header fields if packet is IPv6 */
MLXSW_REG_RECR2_TCP_UDP_EN_IPV6 = 8,
};
/* reg_recr2_outer_header_enables
* Bit mask where each bit enables a specific layer to be included in
* the hash calculation.
* Access: RW
*/
MLXSW_ITEM_BIT_ARRAY(reg, recr2, outer_header_enables, 0x10, 0x04, 1);
enum {
/* IPv4 Source IP */
MLXSW_REG_RECR2_IPV4_SIP0 = 9,
MLXSW_REG_RECR2_IPV4_SIP3 = 12,
/* IPv4 Destination IP */
MLXSW_REG_RECR2_IPV4_DIP0 = 13,
MLXSW_REG_RECR2_IPV4_DIP3 = 16,
/* IP Protocol */
MLXSW_REG_RECR2_IPV4_PROTOCOL = 17,
/* IPv6 Source IP */
MLXSW_REG_RECR2_IPV6_SIP0_7 = 21,
MLXSW_REG_RECR2_IPV6_SIP8 = 29,
MLXSW_REG_RECR2_IPV6_SIP15 = 36,
/* IPv6 Destination IP */
MLXSW_REG_RECR2_IPV6_DIP0_7 = 37,
MLXSW_REG_RECR2_IPV6_DIP8 = 45,
MLXSW_REG_RECR2_IPV6_DIP15 = 52,
/* IPv6 Next Header */
MLXSW_REG_RECR2_IPV6_NEXT_HEADER = 53,
/* IPv6 Flow Label */
MLXSW_REG_RECR2_IPV6_FLOW_LABEL = 57,
/* TCP/UDP Source Port */
MLXSW_REG_RECR2_TCP_UDP_SPORT = 74,
/* TCP/UDP Destination Port */
MLXSW_REG_RECR2_TCP_UDP_DPORT = 75,
};
/* reg_recr2_outer_header_fields_enable
* Packet fields to enable for ECMP hash subject to outer_header_enable.
* Access: RW
*/
MLXSW_ITEM_BIT_ARRAY(reg, recr2, outer_header_fields_enable, 0x14, 0x14, 1);
static inline void mlxsw_reg_recr2_ipv4_sip_enable(char *payload)
{
int i;
for (i = MLXSW_REG_RECR2_IPV4_SIP0; i <= MLXSW_REG_RECR2_IPV4_SIP3; i++)
mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i,
true);
}
static inline void mlxsw_reg_recr2_ipv4_dip_enable(char *payload)
{
int i;
for (i = MLXSW_REG_RECR2_IPV4_DIP0; i <= MLXSW_REG_RECR2_IPV4_DIP3; i++)
mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i,
true);
}
static inline void mlxsw_reg_recr2_ipv6_sip_enable(char *payload)
{
int i = MLXSW_REG_RECR2_IPV6_SIP0_7;
mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, true);
i = MLXSW_REG_RECR2_IPV6_SIP8;
for (; i <= MLXSW_REG_RECR2_IPV6_SIP15; i++)
mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i,
true);
}
static inline void mlxsw_reg_recr2_ipv6_dip_enable(char *payload)
{
int i = MLXSW_REG_RECR2_IPV6_DIP0_7;
mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, true);
i = MLXSW_REG_RECR2_IPV6_DIP8;
for (; i <= MLXSW_REG_RECR2_IPV6_DIP15; i++)
mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i,
true);
}
static inline void mlxsw_reg_recr2_pack(char *payload, u32 seed)
{
MLXSW_REG_ZERO(recr2, payload);
mlxsw_reg_recr2_pp_set(payload, false);
mlxsw_reg_recr2_sh_set(payload, true);
mlxsw_reg_recr2_seed_set(payload, seed);
}
/* RMFT-V2 - Router Multicast Forwarding Table Version 2 Register
* --------------------------------------------------------------
* The RMFT_V2 register is used to configure and query the multicast table.
......@@ -7313,6 +7444,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(raleu),
MLXSW_REG(rauhtd),
MLXSW_REG(rigr2),
MLXSW_REG(recr2),
MLXSW_REG(rmft2),
MLXSW_REG(mfcr),
MLXSW_REG(mfsc),
......
......@@ -4574,10 +4574,6 @@ static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = {
.notifier_call = mlxsw_sp_inet6addr_event,
};
static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = {
.notifier_call = mlxsw_sp_router_netevent_event,
};
static const struct pci_device_id mlxsw_sp_pci_id_table[] = {
{PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM), 0},
{0, },
......@@ -4596,7 +4592,6 @@ static int __init mlxsw_sp_module_init(void)
register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
register_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb);
register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
register_netevent_notifier(&mlxsw_sp_router_netevent_nb);
err = mlxsw_core_driver_register(&mlxsw_sp_driver);
if (err)
......@@ -4611,7 +4606,6 @@ static int __init mlxsw_sp_module_init(void)
err_pci_driver_register:
mlxsw_core_driver_unregister(&mlxsw_sp_driver);
err_core_driver_register:
unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb);
unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
......@@ -4623,7 +4617,6 @@ static void __exit mlxsw_sp_module_exit(void)
{
mlxsw_pci_driver_unregister(&mlxsw_sp_pci_driver);
mlxsw_core_driver_unregister(&mlxsw_sp_driver);
unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb);
unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
......
......@@ -385,8 +385,6 @@ static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port)
/* spectrum_router.c */
int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp);
int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
unsigned long event, void *ptr);
int mlxsw_sp_netdevice_router_port_event(struct net_device *dev);
int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
unsigned long event, void *ptr);
......
......@@ -47,6 +47,7 @@
#include <linux/socket.h>
#include <linux/route.h>
#include <linux/gcd.h>
#include <linux/random.h>
#include <net/netevent.h>
#include <net/neighbour.h>
#include <net/arp.h>
......@@ -96,6 +97,7 @@ struct mlxsw_sp_router {
struct list_head ipip_list;
bool aborted;
struct notifier_block fib_nb;
struct notifier_block netevent_nb;
const struct mlxsw_sp_rif_ops **rif_ops_arr;
const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
};
......@@ -2025,7 +2027,7 @@ mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
}
struct mlxsw_sp_neigh_event_work {
struct mlxsw_sp_netevent_work {
struct work_struct work;
struct mlxsw_sp *mlxsw_sp;
struct neighbour *n;
......@@ -2033,11 +2035,11 @@ struct mlxsw_sp_neigh_event_work {
static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
{
struct mlxsw_sp_neigh_event_work *neigh_work =
container_of(work, struct mlxsw_sp_neigh_event_work, work);
struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp;
struct mlxsw_sp_netevent_work *net_work =
container_of(work, struct mlxsw_sp_netevent_work, work);
struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
struct mlxsw_sp_neigh_entry *neigh_entry;
struct neighbour *n = neigh_work->n;
struct neighbour *n = net_work->n;
unsigned char ha[ETH_ALEN];
bool entry_connected;
u8 nud_state, dead;
......@@ -2073,18 +2075,32 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
out:
rtnl_unlock();
neigh_release(n);
kfree(neigh_work);
kfree(net_work);
}
int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
unsigned long event, void *ptr)
static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
{
struct mlxsw_sp_netevent_work *net_work =
container_of(work, struct mlxsw_sp_netevent_work, work);
struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
mlxsw_sp_mp_hash_init(mlxsw_sp);
kfree(net_work);
}
static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct mlxsw_sp_neigh_event_work *neigh_work;
struct mlxsw_sp_netevent_work *net_work;
struct mlxsw_sp_port *mlxsw_sp_port;
struct mlxsw_sp_router *router;
struct mlxsw_sp *mlxsw_sp;
unsigned long interval;
struct neigh_parms *p;
struct neighbour *n;
struct net *net;
switch (event) {
case NETEVENT_DELAY_PROBE_TIME_UPDATE:
......@@ -2118,24 +2134,39 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
if (!mlxsw_sp_port)
return NOTIFY_DONE;
neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC);
if (!neigh_work) {
net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
if (!net_work) {
mlxsw_sp_port_dev_put(mlxsw_sp_port);
return NOTIFY_BAD;
}
INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work);
neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
neigh_work->n = n;
INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
net_work->n = n;
/* Take a reference to ensure the neighbour won't be
* destructed until we drop the reference in delayed
* work.
*/
neigh_clone(n);
mlxsw_core_schedule_work(&neigh_work->work);
mlxsw_core_schedule_work(&net_work->work);
mlxsw_sp_port_dev_put(mlxsw_sp_port);
break;
case NETEVENT_MULTIPATH_HASH_UPDATE:
net = ptr;
if (!net_eq(net, &init_net))
return NOTIFY_DONE;
net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
if (!net_work)
return NOTIFY_BAD;
router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
INIT_WORK(&net_work->work, mlxsw_sp_router_mp_hash_event_work);
net_work->mlxsw_sp = router->mlxsw_sp;
mlxsw_core_schedule_work(&net_work->work);
break;
}
return NOTIFY_DONE;
......@@ -6643,6 +6674,64 @@ static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
mlxsw_sp_router_fib_flush(router->mlxsw_sp);
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
{
mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
}
static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
{
mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
}
static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
{
bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
mlxsw_sp_mp_hash_header_set(recr2_pl,
MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
if (only_l3)
return;
mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
}
static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
{
mlxsw_sp_mp_hash_header_set(recr2_pl,
MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
}
static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
{
char recr2_pl[MLXSW_REG_RECR2_LEN];
u32 seed;
get_random_bytes(&seed, sizeof(seed));
mlxsw_reg_recr2_pack(recr2_pl, seed);
mlxsw_sp_mp4_hash_init(recr2_pl);
mlxsw_sp_mp6_hash_init(recr2_pl);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
}
#else
static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
{
return 0;
}
#endif
static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
{
char rgcr_pl[MLXSW_REG_RGCR_LEN];
......@@ -6720,6 +6809,16 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
if (err)
goto err_neigh_init;
mlxsw_sp->router->netevent_nb.notifier_call =
mlxsw_sp_router_netevent_event;
err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
if (err)
goto err_register_netevent_notifier;
err = mlxsw_sp_mp_hash_init(mlxsw_sp);
if (err)
goto err_mp_hash_init;
mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
mlxsw_sp_router_fib_dump_flush);
......@@ -6729,6 +6828,9 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
return 0;
err_register_fib_notifier:
err_mp_hash_init:
unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
err_register_netevent_notifier:
mlxsw_sp_neigh_fini(mlxsw_sp);
err_neigh_init:
mlxsw_sp_vrs_fini(mlxsw_sp);
......@@ -6754,6 +6856,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
{
unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
mlxsw_sp_neigh_fini(mlxsw_sp);
mlxsw_sp_vrs_fini(mlxsw_sp);
mlxsw_sp_mr_fini(mlxsw_sp);
......
......@@ -25,6 +25,7 @@ enum netevent_notif_type {
NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */
NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */
NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
NETEVENT_MULTIPATH_HASH_UPDATE, /* arg is struct net ptr */
};
int register_netevent_notifier(struct notifier_block *nb);
......
......@@ -25,6 +25,7 @@
#include <net/inet_frag.h>
#include <net/ping.h>
#include <net/protocol.h>
#include <net/netevent.h>
static int zero;
static int one = 1;
......@@ -385,6 +386,23 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
return ret;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
struct net *net = container_of(table->data, struct net,
ipv4.sysctl_fib_multipath_hash_policy);
int ret;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (write && ret == 0)
call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net);
return ret;
}
#endif
static struct ctl_table ipv4_table[] = {
{
.procname = "tcp_max_orphans",
......@@ -907,7 +925,7 @@ static struct ctl_table ipv4_net_table[] = {
.data = &init_net.ipv4.sysctl_fib_multipath_hash_policy,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.proc_handler = proc_fib_multipath_hash_policy,
.extra1 = &zero,
.extra2 = &one,
},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment