Commit b33cc2ce authored by David S. Miller's avatar David S. Miller

Merge branch 'net-ipv6-Add-support-for-path-selection-using-hash-of-5-tuple'

David Ahern says:

====================
net/ipv6: Add support for path selection using hash of 5-tuple

Hardware supports multipath selection using the standard L4 5-tuple
instead of just L3 and the flow label. In addition, some network
operators prefer IPv6 path selection to use the 5-tuple. To that end,
add support to IPv6 for multipath hash policy similar to
bf4e0a3d ("net: ipv4: add support for ECMP hash policy choice").
The default is still L3 which covers source and destination addresses
along with flow label and IPv6 protocol. This gives users a choice in
hash algorithms if they believe L3 only and the IPv6 flow label are not
sufficient for their use case.

A separate sysctl is added for IPv6, allowing IPv4 and IPv6 to use
different algorithms if desired.

The first 3 patches modify the IPv4 variant so that at the end of the
patch set the ipv4 and ipv6 implementations are direct parallels.

Patch 4 refactors the existing rt6_multipath_hash in preparation for
adding the policy option.

Patch 5 renames the existing netevent to have IPv4 in the name so ipv4
changes can be distinguished from IPv6 if the netevent handler cares.

Patch 6 adds the skb as an argument through the FIB lookup functions
to the multipath selection. Needed for the forwarding case.

Patch 7 adds the L4 hash support.

Patch 8 adds the hook for the netevent to the spectrum driver to update
the ASIC.

Patch 9 removes no longer used code.

Patch 10 adds a testcase for IPv6 multipath with L4 hash.

v3
- comments from Ido:
  - removed fib_info arg in patch 1; left by mistake on rebase to net-next
  - removed __get_hash_from_flowi4 declaration
  - line wrap change to spectrum_router.c to maintain 80 chars

v2
- rebased to top of tree
- added refactor of fib_multipath_hash following recent change
- plumb skb through lookup functions to multipath selection
- fix sysctl setting; was missing the data set in ipv6_sysctl_net_init
- added test case

RFC to v1:
- rebase to top of net-next
- fix addr_type in hash_keys and removed flow label as noticed by Ido
- added a comment to cover letter about choice in algorithms based on
  use case per Or's comments
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e871cae7 91a5c1ec
...@@ -1363,6 +1363,13 @@ flowlabel_reflect - BOOLEAN ...@@ -1363,6 +1363,13 @@ flowlabel_reflect - BOOLEAN
FALSE: disabled FALSE: disabled
Default: FALSE Default: FALSE
fib_multipath_hash_policy - INTEGER
Controls which hash policy to use for multipath routes.
Default: 0 (Layer 3)
Possible values:
0 - Layer 3 (source and destination addresses plus flow label)
1 - Layer 4 (standard 5-tuple)
anycast_src_echo_reply - BOOLEAN anycast_src_echo_reply - BOOLEAN
Controls the use of anycast addresses as source addresses for ICMPv6 Controls the use of anycast addresses as source addresses for ICMPv6
echo reply echo reply
......
...@@ -1334,7 +1334,7 @@ static bool validate_ipv6_net_dev(struct net_device *net_dev, ...@@ -1334,7 +1334,7 @@ static bool validate_ipv6_net_dev(struct net_device *net_dev,
IPV6_ADDR_LINKLOCAL; IPV6_ADDR_LINKLOCAL;
struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr, struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr,
&src_addr->sin6_addr, net_dev->ifindex, &src_addr->sin6_addr, net_dev->ifindex,
strict); NULL, strict);
bool ret; bool ret;
if (!rt) if (!rt)
......
...@@ -2430,7 +2430,8 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, ...@@ -2430,7 +2430,8 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
mlxsw_core_schedule_work(&net_work->work); mlxsw_core_schedule_work(&net_work->work);
mlxsw_sp_port_dev_put(mlxsw_sp_port); mlxsw_sp_port_dev_put(mlxsw_sp_port);
break; break;
case NETEVENT_MULTIPATH_HASH_UPDATE: case NETEVENT_IPV4_MPATH_HASH_UPDATE:
case NETEVENT_IPV6_MPATH_HASH_UPDATE:
net = ptr; net = ptr;
if (!net_eq(net, &init_net)) if (!net_eq(net, &init_net))
...@@ -7030,13 +7031,25 @@ static void mlxsw_sp_mp4_hash_init(char *recr2_pl) ...@@ -7030,13 +7031,25 @@ static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
static void mlxsw_sp_mp6_hash_init(char *recr2_pl) static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
{ {
bool only_l3 = !init_net.ipv6.sysctl.multipath_hash_policy;
mlxsw_sp_mp_hash_header_set(recr2_pl, mlxsw_sp_mp_hash_header_set(recr2_pl,
MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP); MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP); mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl); mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl); mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER); mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
if (only_l3) {
mlxsw_sp_mp_hash_field_set(recr2_pl,
MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
} else {
mlxsw_sp_mp_hash_header_set(recr2_pl,
MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
mlxsw_sp_mp_hash_field_set(recr2_pl,
MLXSW_REG_RECR2_TCP_UDP_SPORT);
mlxsw_sp_mp_hash_field_set(recr2_pl,
MLXSW_REG_RECR2_TCP_UDP_DPORT);
}
} }
static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
......
...@@ -817,7 +817,8 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, ...@@ -817,7 +817,8 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
}; };
skb_dst_drop(skb); skb_dst_drop(skb);
dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags); dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6,
skb, flags);
skb_dst_set(skb, dst); skb_dst_set(skb, dst);
break; break;
} }
......
...@@ -941,6 +941,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net, ...@@ -941,6 +941,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
const struct net_device *dev, const struct net_device *dev,
struct flowi6 *fl6, struct flowi6 *fl6,
int ifindex, int ifindex,
const struct sk_buff *skb,
int flags) int flags)
{ {
struct net_vrf *vrf = netdev_priv(dev); struct net_vrf *vrf = netdev_priv(dev);
...@@ -959,7 +960,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net, ...@@ -959,7 +960,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
if (!table) if (!table)
return NULL; return NULL;
return ip6_pol_route(net, table, ifindex, fl6, flags); return ip6_pol_route(net, table, ifindex, fl6, skb, flags);
} }
static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev, static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
...@@ -977,7 +978,7 @@ static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev, ...@@ -977,7 +978,7 @@ static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
struct net *net = dev_net(vrf_dev); struct net *net = dev_net(vrf_dev);
struct rt6_info *rt6; struct rt6_info *rt6;
rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, skb,
RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE); RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE);
if (unlikely(!rt6)) if (unlikely(!rt6))
return; return;
...@@ -1110,7 +1111,7 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, ...@@ -1110,7 +1111,7 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
if (!ipv6_addr_any(&fl6->saddr)) if (!ipv6_addr_any(&fl6->saddr))
flags |= RT6_LOOKUP_F_HAS_SADDR; flags |= RT6_LOOKUP_F_HAS_SADDR;
rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags); rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, NULL, flags);
if (rt) if (rt)
dst = &rt->dst; dst = &rt->dst;
......
...@@ -47,6 +47,7 @@ struct fib_rule { ...@@ -47,6 +47,7 @@ struct fib_rule {
struct fib_lookup_arg { struct fib_lookup_arg {
void *lookup_ptr; void *lookup_ptr;
const void *lookup_data;
void *result; void *result;
struct fib_rule *rule; struct fib_rule *rule;
u32 table; u32 table;
......
...@@ -222,20 +222,4 @@ static inline unsigned int flow_key_size(u16 family) ...@@ -222,20 +222,4 @@ static inline unsigned int flow_key_size(u16 family)
__u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys); __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys);
static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6)
{
struct flow_keys keys;
return __get_hash_from_flowi6(fl6, &keys);
}
__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys);
static inline __u32 get_hash_from_flowi4(const struct flowi4 *fl4)
{
struct flow_keys keys;
return __get_hash_from_flowi4(fl4, &keys);
}
#endif #endif
...@@ -350,7 +350,8 @@ struct fib6_table { ...@@ -350,7 +350,8 @@ struct fib6_table {
typedef struct rt6_info *(*pol_lookup_t)(struct net *, typedef struct rt6_info *(*pol_lookup_t)(struct net *,
struct fib6_table *, struct fib6_table *,
struct flowi6 *, int); struct flowi6 *,
const struct sk_buff *, int);
struct fib6_entry_notifier_info { struct fib6_entry_notifier_info {
struct fib_notifier_info info; /* must be first */ struct fib_notifier_info info; /* must be first */
...@@ -364,6 +365,7 @@ struct fib6_entry_notifier_info { ...@@ -364,6 +365,7 @@ struct fib6_entry_notifier_info {
struct fib6_table *fib6_get_table(struct net *net, u32 id); struct fib6_table *fib6_get_table(struct net *net, u32 id);
struct fib6_table *fib6_new_table(struct net *net, u32 id); struct fib6_table *fib6_new_table(struct net *net, u32 id);
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
const struct sk_buff *skb,
int flags, pol_lookup_t lookup); int flags, pol_lookup_t lookup);
struct fib6_node *fib6_lookup(struct fib6_node *root, struct fib6_node *fib6_lookup(struct fib6_node *root,
......
...@@ -75,7 +75,8 @@ static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt) ...@@ -75,7 +75,8 @@ static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt)
void ip6_route_input(struct sk_buff *skb); void ip6_route_input(struct sk_buff *skb);
struct dst_entry *ip6_route_input_lookup(struct net *net, struct dst_entry *ip6_route_input_lookup(struct net *net,
struct net_device *dev, struct net_device *dev,
struct flowi6 *fl6, int flags); struct flowi6 *fl6,
const struct sk_buff *skb, int flags);
struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
struct flowi6 *fl6, int flags); struct flowi6 *fl6, int flags);
...@@ -88,9 +89,10 @@ static inline struct dst_entry *ip6_route_output(struct net *net, ...@@ -88,9 +89,10 @@ static inline struct dst_entry *ip6_route_output(struct net *net,
} }
struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
int flags); const struct sk_buff *skb, int flags);
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
int ifindex, struct flowi6 *fl6, int flags); int ifindex, struct flowi6 *fl6,
const struct sk_buff *skb, int flags);
void ip6_route_init_special_entries(void); void ip6_route_init_special_entries(void);
int ip6_route_init(void); int ip6_route_init(void);
...@@ -126,9 +128,10 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, ...@@ -126,9 +128,10 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
} }
struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
const struct in6_addr *saddr, int oif, int flags); const struct in6_addr *saddr, int oif,
u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb, const struct sk_buff *skb, int flags);
struct flow_keys *hkeys); u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
const struct sk_buff *skb, struct flow_keys *hkeys);
struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
......
...@@ -395,7 +395,7 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local); ...@@ -395,7 +395,7 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local);
int fib_sync_up(struct net_device *dev, unsigned int nh_flags); int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
#ifdef CONFIG_IP_ROUTE_MULTIPATH #ifdef CONFIG_IP_ROUTE_MULTIPATH
int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
const struct sk_buff *skb, struct flow_keys *flkeys); const struct sk_buff *skb, struct flow_keys *flkeys);
#endif #endif
void fib_select_multipath(struct fib_result *res, int hash); void fib_select_multipath(struct fib_result *res, int hash);
......
...@@ -26,7 +26,8 @@ enum netevent_notif_type { ...@@ -26,7 +26,8 @@ enum netevent_notif_type {
NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */
NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */
NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */ NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
NETEVENT_MULTIPATH_HASH_UPDATE, /* arg is struct net ptr */ NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */
NETEVENT_IPV6_MPATH_HASH_UPDATE, /* arg is struct net ptr */
}; };
int register_netevent_notifier(struct notifier_block *nb); int register_netevent_notifier(struct notifier_block *nb);
......
...@@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 { ...@@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 {
int ip6_rt_gc_elasticity; int ip6_rt_gc_elasticity;
int ip6_rt_mtu_expires; int ip6_rt_mtu_expires;
int ip6_rt_min_advmss; int ip6_rt_min_advmss;
int multipath_hash_policy;
int flowlabel_consistency; int flowlabel_consistency;
int auto_flowlabels; int auto_flowlabels;
int icmpv6_time; int icmpv6_time;
......
...@@ -1341,22 +1341,6 @@ __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys) ...@@ -1341,22 +1341,6 @@ __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys)
} }
EXPORT_SYMBOL(__get_hash_from_flowi6); EXPORT_SYMBOL(__get_hash_from_flowi6);
__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys)
{
memset(keys, 0, sizeof(*keys));
keys->addrs.v4addrs.src = fl4->saddr;
keys->addrs.v4addrs.dst = fl4->daddr;
keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
keys->ports.src = fl4->fl4_sport;
keys->ports.dst = fl4->fl4_dport;
keys->keyid.keyid = fl4->fl4_gre_key;
keys->basic.ip_proto = fl4->flowi4_proto;
return flow_hash_from_keys(keys);
}
EXPORT_SYMBOL(__get_hash_from_flowi4);
static const struct flow_dissector_key flow_keys_dissector_keys[] = { static const struct flow_dissector_key flow_keys_dissector_keys[] = {
{ {
.key_id = FLOW_DISSECTOR_KEY_CONTROL, .key_id = FLOW_DISSECTOR_KEY_CONTROL,
......
...@@ -1770,7 +1770,7 @@ void fib_select_path(struct net *net, struct fib_result *res, ...@@ -1770,7 +1770,7 @@ void fib_select_path(struct net *net, struct fib_result *res,
#ifdef CONFIG_IP_ROUTE_MULTIPATH #ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi->fib_nhs > 1) { if (res->fi->fib_nhs > 1) {
int h = fib_multipath_hash(res->fi, fl4, skb, NULL); int h = fib_multipath_hash(net, fl4, skb, NULL);
fib_select_multipath(res, h); fib_select_multipath(res, h);
} }
......
...@@ -1748,44 +1748,45 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb, ...@@ -1748,44 +1748,45 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
struct flow_keys *hash_keys) struct flow_keys *hash_keys)
{ {
const struct iphdr *outer_iph = ip_hdr(skb); const struct iphdr *outer_iph = ip_hdr(skb);
const struct iphdr *key_iph = outer_iph;
const struct iphdr *inner_iph; const struct iphdr *inner_iph;
const struct icmphdr *icmph; const struct icmphdr *icmph;
struct iphdr _inner_iph; struct iphdr _inner_iph;
struct icmphdr _icmph; struct icmphdr _icmph;
hash_keys->addrs.v4addrs.src = outer_iph->saddr;
hash_keys->addrs.v4addrs.dst = outer_iph->daddr;
if (likely(outer_iph->protocol != IPPROTO_ICMP)) if (likely(outer_iph->protocol != IPPROTO_ICMP))
return; goto out;
if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0)) if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
return; goto out;
icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph), icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
&_icmph); &_icmph);
if (!icmph) if (!icmph)
return; goto out;
if (icmph->type != ICMP_DEST_UNREACH && if (icmph->type != ICMP_DEST_UNREACH &&
icmph->type != ICMP_REDIRECT && icmph->type != ICMP_REDIRECT &&
icmph->type != ICMP_TIME_EXCEEDED && icmph->type != ICMP_TIME_EXCEEDED &&
icmph->type != ICMP_PARAMETERPROB) icmph->type != ICMP_PARAMETERPROB)
return; goto out;
inner_iph = skb_header_pointer(skb, inner_iph = skb_header_pointer(skb,
outer_iph->ihl * 4 + sizeof(_icmph), outer_iph->ihl * 4 + sizeof(_icmph),
sizeof(_inner_iph), &_inner_iph); sizeof(_inner_iph), &_inner_iph);
if (!inner_iph) if (!inner_iph)
return; goto out;
hash_keys->addrs.v4addrs.src = inner_iph->saddr;
hash_keys->addrs.v4addrs.dst = inner_iph->daddr; key_iph = inner_iph;
out:
hash_keys->addrs.v4addrs.src = key_iph->saddr;
hash_keys->addrs.v4addrs.dst = key_iph->daddr;
} }
/* if skb is set it will be used and fl4 can be NULL */ /* if skb is set it will be used and fl4 can be NULL */
int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
const struct sk_buff *skb, struct flow_keys *flkeys) const struct sk_buff *skb, struct flow_keys *flkeys)
{ {
struct net *net = fi->fib_net;
struct flow_keys hash_keys; struct flow_keys hash_keys;
u32 mhash; u32 mhash;
...@@ -1809,24 +1810,20 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, ...@@ -1809,24 +1810,20 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
/* short-circuit if we already have L4 hash present */ /* short-circuit if we already have L4 hash present */
if (skb->l4_hash) if (skb->l4_hash)
return skb_get_hash_raw(skb) >> 1; return skb_get_hash_raw(skb) >> 1;
memset(&hash_keys, 0, sizeof(hash_keys)); memset(&hash_keys, 0, sizeof(hash_keys));
if (flkeys) { if (!flkeys) {
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
hash_keys.ports.src = flkeys->ports.src;
hash_keys.ports.dst = flkeys->ports.dst;
hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
} else {
skb_flow_dissect_flow_keys(skb, &keys, flag); skb_flow_dissect_flow_keys(skb, &keys, flag);
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; flkeys = &keys;
hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
hash_keys.ports.src = keys.ports.src;
hash_keys.ports.dst = keys.ports.dst;
hash_keys.basic.ip_proto = keys.basic.ip_proto;
} }
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
hash_keys.ports.src = flkeys->ports.src;
hash_keys.ports.dst = flkeys->ports.dst;
hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
} else { } else {
memset(&hash_keys, 0, sizeof(hash_keys)); memset(&hash_keys, 0, sizeof(hash_keys));
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
...@@ -1852,7 +1849,7 @@ static int ip_mkroute_input(struct sk_buff *skb, ...@@ -1852,7 +1849,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
{ {
#ifdef CONFIG_IP_ROUTE_MULTIPATH #ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && res->fi->fib_nhs > 1) { if (res->fi && res->fi->fib_nhs > 1) {
int h = fib_multipath_hash(res->fi, NULL, skb, hkeys); int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
fib_select_multipath(res, h); fib_select_multipath(res, h);
} }
......
...@@ -400,7 +400,7 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write, ...@@ -400,7 +400,7 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (write && ret == 0) if (write && ret == 0)
call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net); call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net);
return ret; return ret;
} }
......
...@@ -78,7 +78,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) ...@@ -78,7 +78,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
if (ifindex == 0) { if (ifindex == 0) {
struct rt6_info *rt; struct rt6_info *rt;
rt = rt6_lookup(net, addr, NULL, 0, 0); rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
if (rt) { if (rt) {
dev = rt->dst.dev; dev = rt->dst.dev;
ip6_rt_put(rt); ip6_rt_put(rt);
......
...@@ -61,11 +61,13 @@ unsigned int fib6_rules_seq_read(struct net *net) ...@@ -61,11 +61,13 @@ unsigned int fib6_rules_seq_read(struct net *net)
} }
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
const struct sk_buff *skb,
int flags, pol_lookup_t lookup) int flags, pol_lookup_t lookup)
{ {
if (net->ipv6.fib6_has_custom_rules) { if (net->ipv6.fib6_has_custom_rules) {
struct fib_lookup_arg arg = { struct fib_lookup_arg arg = {
.lookup_ptr = lookup, .lookup_ptr = lookup,
.lookup_data = skb,
.flags = FIB_LOOKUP_NOREF, .flags = FIB_LOOKUP_NOREF,
}; };
...@@ -80,11 +82,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, ...@@ -80,11 +82,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
} else { } else {
struct rt6_info *rt; struct rt6_info *rt;
rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags); rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags);
if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN) if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
return &rt->dst; return &rt->dst;
ip6_rt_put(rt); ip6_rt_put(rt);
rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
if (rt->dst.error != -EAGAIN) if (rt->dst.error != -EAGAIN)
return &rt->dst; return &rt->dst;
ip6_rt_put(rt); ip6_rt_put(rt);
...@@ -130,7 +132,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, ...@@ -130,7 +132,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
goto out; goto out;
} }
rt = lookup(net, table, flp6, flags); rt = lookup(net, table, flp6, arg->lookup_data, flags);
if (rt != net->ipv6.ip6_null_entry) { if (rt != net->ipv6.ip6_null_entry) {
struct fib6_rule *r = (struct fib6_rule *)rule; struct fib6_rule *r = (struct fib6_rule *)rule;
......
...@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, ...@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
fl6.fl6_icmp_type = type; fl6.fl6_icmp_type = type;
fl6.fl6_icmp_code = code; fl6.fl6_icmp_code = code;
fl6.flowi6_uid = sock_net_uid(net, NULL); fl6.flowi6_uid = sock_net_uid(net, NULL);
fl6.mp_hash = rt6_multipath_hash(&fl6, skb, NULL); fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
sk = icmpv6_xmit_lock(net); sk = icmpv6_xmit_lock(net);
...@@ -629,7 +629,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, ...@@ -629,7 +629,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
skb_pull(skb2, nhs); skb_pull(skb2, nhs);
skb_reset_network_header(skb2); skb_reset_network_header(skb2);
rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0); rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
skb, 0);
if (rt && rt->dst.dev) if (rt && rt->dst.dev)
skb2->dev = rt->dst.dev; skb2->dev = rt->dst.dev;
......
...@@ -299,11 +299,12 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) ...@@ -299,11 +299,12 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
} }
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
const struct sk_buff *skb,
int flags, pol_lookup_t lookup) int flags, pol_lookup_t lookup)
{ {
struct rt6_info *rt; struct rt6_info *rt;
rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
if (rt->dst.error == -EAGAIN) { if (rt->dst.error == -EAGAIN) {
ip6_rt_put(rt); ip6_rt_put(rt);
rt = net->ipv6.ip6_null_entry; rt = net->ipv6.ip6_null_entry;
......
...@@ -1053,7 +1053,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) ...@@ -1053,7 +1053,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
struct rt6_info *rt = rt6_lookup(t->net, struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr, &p->raddr, &p->laddr,
p->link, strict); p->link, NULL, strict);
if (!rt) if (!rt)
return; return;
......
...@@ -679,7 +679,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ...@@ -679,7 +679,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
/* Try to guess incoming interface */ /* Try to guess incoming interface */
rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
NULL, 0, 0); NULL, 0, skb2, 0);
if (rt && rt->dst.dev) if (rt && rt->dst.dev)
skb2->dev = rt->dst.dev; skb2->dev = rt->dst.dev;
...@@ -1444,7 +1444,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) ...@@ -1444,7 +1444,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
struct rt6_info *rt = rt6_lookup(t->net, struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr, &p->raddr, &p->laddr,
p->link, strict); p->link, NULL, strict);
if (!rt) if (!rt)
return; return;
......
...@@ -645,7 +645,7 @@ static void vti6_link_config(struct ip6_tnl *t) ...@@ -645,7 +645,7 @@ static void vti6_link_config(struct ip6_tnl *t)
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
struct rt6_info *rt = rt6_lookup(t->net, struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr, &p->raddr, &p->laddr,
p->link, strict); p->link, NULL, strict);
if (rt) if (rt)
tdev = rt->dst.dev; tdev = rt->dst.dev;
......
...@@ -165,7 +165,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) ...@@ -165,7 +165,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
if (ifindex == 0) { if (ifindex == 0) {
struct rt6_info *rt; struct rt6_info *rt;
rt = rt6_lookup(net, addr, NULL, 0, 0); rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
if (rt) { if (rt) {
dev = rt->dst.dev; dev = rt->dst.dev;
ip6_rt_put(rt); ip6_rt_put(rt);
...@@ -254,7 +254,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, ...@@ -254,7 +254,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
struct inet6_dev *idev = NULL; struct inet6_dev *idev = NULL;
if (ifindex == 0) { if (ifindex == 0) {
struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0); struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, NULL, 0);
if (rt) { if (rt) {
dev = rt->dst.dev; dev = rt->dst.dev;
......
...@@ -53,7 +53,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, ...@@ -53,7 +53,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
lookup_flags |= RT6_LOOKUP_F_IFACE; lookup_flags |= RT6_LOOKUP_F_IFACE;
} }
rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags); rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
if (rt->dst.error) if (rt->dst.error)
goto out; goto out;
......
...@@ -181,7 +181,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, ...@@ -181,7 +181,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
*dest = 0; *dest = 0;
again: again:
rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags); rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb,
lookup_flags);
if (rt->dst.error) if (rt->dst.error)
goto put_rt_err; goto put_rt_err;
......
This diff is collapsed.
...@@ -161,7 +161,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, ...@@ -161,7 +161,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
if (!tbl_id) { if (!tbl_id) {
dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags); dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags);
} else { } else {
struct fib6_table *table; struct fib6_table *table;
...@@ -169,7 +169,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, ...@@ -169,7 +169,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
if (!table) if (!table)
goto out; goto out;
rt = ip6_pol_route(net, table, 0, &fl6, flags); rt = ip6_pol_route(net, table, 0, &fl6, skb, flags);
dst = &rt->dst; dst = &rt->dst;
} }
......
...@@ -16,14 +16,31 @@ ...@@ -16,14 +16,31 @@
#include <net/ipv6.h> #include <net/ipv6.h>
#include <net/addrconf.h> #include <net/addrconf.h>
#include <net/inet_frag.h> #include <net/inet_frag.h>
#include <net/netevent.h>
#ifdef CONFIG_NETLABEL #ifdef CONFIG_NETLABEL
#include <net/calipso.h> #include <net/calipso.h>
#endif #endif
static int zero;
static int one = 1; static int one = 1;
static int auto_flowlabels_min; static int auto_flowlabels_min;
static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
struct net *net;
int ret;
net = container_of(table->data, struct net,
ipv6.sysctl.multipath_hash_policy);
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (write && ret == 0)
call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net);
return ret;
}
static struct ctl_table ipv6_table_template[] = { static struct ctl_table ipv6_table_template[] = {
{ {
...@@ -126,6 +143,15 @@ static struct ctl_table ipv6_table_template[] = { ...@@ -126,6 +143,15 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec .proc_handler = proc_dointvec
}, },
{
.procname = "fib_multipath_hash_policy",
.data = &init_net.ipv6.sysctl.multipath_hash_policy,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_rt6_multipath_hash_policy,
.extra1 = &zero,
.extra2 = &one,
},
{ } { }
}; };
...@@ -190,6 +216,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ...@@ -190,6 +216,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt; ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt;
ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len; ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len; ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy,
ipv6_route_table = ipv6_route_sysctl_init(net); ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table) if (!ipv6_route_table)
......
...@@ -235,6 +235,45 @@ multipath4_test() ...@@ -235,6 +235,45 @@ multipath4_test()
sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy
} }
multipath6_l4_test()
{
local desc="$1"
local weight_rp12=$2
local weight_rp13=$3
local t0_rp12 t0_rp13 t1_rp12 t1_rp13
local packets_rp12 packets_rp13
local hash_policy
# Transmit multiple flows from h1 to h2 and make sure they are
# distributed between both multipath links (rp12 and rp13)
# according to the configured weights.
hash_policy=$(sysctl -n net.ipv6.fib_multipath_hash_policy)
sysctl -q -w net.ipv6.fib_multipath_hash_policy=1
ip route replace 2001:db8:2::/64 vrf vrf-r1 \
nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
t0_rp12=$(link_stats_tx_packets_get $rp12)
t0_rp13=$(link_stats_tx_packets_get $rp13)
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
-d 1msec -t udp "sp=1024,dp=0-32768"
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
let "packets_rp12 = $t1_rp12 - $t0_rp12"
let "packets_rp13 = $t1_rp13 - $t0_rp13"
multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
ip route replace 2001:db8:2::/64 vrf vrf-r1 \
nexthop via fe80:2::22 dev $rp12 \
nexthop via fe80:3::23 dev $rp13
sysctl -q -w net.ipv6.fib_multipath_hash_policy=$hash_policy
}
multipath6_test() multipath6_test()
{ {
local desc="$1" local desc="$1"
...@@ -278,6 +317,11 @@ multipath_test() ...@@ -278,6 +317,11 @@ multipath_test()
multipath6_test "ECMP" 1 1 multipath6_test "ECMP" 1 1
multipath6_test "Weighted MP 2:1" 2 1 multipath6_test "Weighted MP 2:1" 2 1
multipath6_test "Weighted MP 11:45" 11 45 multipath6_test "Weighted MP 11:45" 11 45
log_info "Running IPv6 L4 hash multipath tests"
multipath6_l4_test "ECMP" 1 1
multipath6_l4_test "Weighted MP 2:1" 2 1
multipath6_l4_test "Weighted MP 11:45" 11 45
} }
setup_prepare() setup_prepare()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment