Commit b4bac172 authored by David Ahern's avatar David Ahern Committed by David S. Miller

net/ipv6: Add support for path selection using hash of 5-tuple

Some operators prefer IPv6 path selection to use a standard 5-tuple
hash rather than just an L3 hash with the flow the label. To that end
add support to IPv6 for multipath hash policy similar to bf4e0a3d
("net: ipv4: add support for ECMP hash policy choice"). The default
is still L3 which covers source and destination addresses along with
flow label and IPv6 protocol.
Signed-off-by: default avatarDavid Ahern <dsahern@gmail.com>
Reviewed-by: default avatarIdo Schimmel <idosch@mellanox.com>
Tested-by: default avatarIdo Schimmel <idosch@mellanox.com>
Reviewed-by: default avatarNikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent b75cc8f9
...@@ -1363,6 +1363,13 @@ flowlabel_reflect - BOOLEAN ...@@ -1363,6 +1363,13 @@ flowlabel_reflect - BOOLEAN
FALSE: disabled FALSE: disabled
Default: FALSE Default: FALSE
fib_multipath_hash_policy - INTEGER
Controls which hash policy to use for multipath routes.
Default: 0 (Layer 3)
Possible values:
0 - Layer 3 (source and destination addresses plus flow label)
1 - Layer 4 (standard 5-tuple)
anycast_src_echo_reply - BOOLEAN anycast_src_echo_reply - BOOLEAN
Controls the use of anycast addresses as source addresses for ICMPv6 Controls the use of anycast addresses as source addresses for ICMPv6
echo reply echo reply
......
...@@ -130,8 +130,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, ...@@ -130,8 +130,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
const struct in6_addr *saddr, int oif, const struct in6_addr *saddr, int oif,
const struct sk_buff *skb, int flags); const struct sk_buff *skb, int flags);
u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb, u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
struct flow_keys *hkeys); const struct sk_buff *skb, struct flow_keys *hkeys);
struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
......
...@@ -27,6 +27,7 @@ enum netevent_notif_type { ...@@ -27,6 +27,7 @@ enum netevent_notif_type {
NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */
NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */ NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */ NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */
NETEVENT_IPV6_MPATH_HASH_UPDATE, /* arg is struct net ptr */
}; };
int register_netevent_notifier(struct notifier_block *nb); int register_netevent_notifier(struct notifier_block *nb);
......
...@@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 { ...@@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 {
int ip6_rt_gc_elasticity; int ip6_rt_gc_elasticity;
int ip6_rt_mtu_expires; int ip6_rt_mtu_expires;
int ip6_rt_min_advmss; int ip6_rt_min_advmss;
int multipath_hash_policy;
int flowlabel_consistency; int flowlabel_consistency;
int auto_flowlabels; int auto_flowlabels;
int icmpv6_time; int icmpv6_time;
......
...@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, ...@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
fl6.fl6_icmp_type = type; fl6.fl6_icmp_type = type;
fl6.fl6_icmp_code = code; fl6.fl6_icmp_code = code;
fl6.flowi6_uid = sock_net_uid(net, NULL); fl6.flowi6_uid = sock_net_uid(net, NULL);
fl6.mp_hash = rt6_multipath_hash(&fl6, skb, NULL); fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
sk = icmpv6_xmit_lock(net); sk = icmpv6_xmit_lock(net);
......
...@@ -450,7 +450,8 @@ static bool rt6_check_expired(const struct rt6_info *rt) ...@@ -450,7 +450,8 @@ static bool rt6_check_expired(const struct rt6_info *rt)
return false; return false;
} }
static struct rt6_info *rt6_multipath_select(struct rt6_info *match, static struct rt6_info *rt6_multipath_select(const struct net *net,
struct rt6_info *match,
struct flowi6 *fl6, int oif, struct flowi6 *fl6, int oif,
const struct sk_buff *skb, const struct sk_buff *skb,
int strict) int strict)
...@@ -461,7 +462,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match, ...@@ -461,7 +462,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
* case it will always be non-zero. Otherwise now is the time to do it. * case it will always be non-zero. Otherwise now is the time to do it.
*/ */
if (!fl6->mp_hash) if (!fl6->mp_hash)
fl6->mp_hash = rt6_multipath_hash(fl6, skb, NULL); fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound)) if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
return match; return match;
...@@ -932,7 +933,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, ...@@ -932,7 +933,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
rt = rt6_device_match(net, rt, &fl6->saddr, rt = rt6_device_match(net, rt, &fl6->saddr,
fl6->flowi6_oif, flags); fl6->flowi6_oif, flags);
if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
skb, flags); skb, flags);
} }
if (rt == net->ipv6.ip6_null_entry) { if (rt == net->ipv6.ip6_null_entry) {
...@@ -1674,7 +1675,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, ...@@ -1674,7 +1675,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
redo_rt6_select: redo_rt6_select:
rt = rt6_select(net, fn, oif, strict); rt = rt6_select(net, fn, oif, strict);
if (rt->rt6i_nsiblings) if (rt->rt6i_nsiblings)
rt = rt6_multipath_select(rt, fl6, oif, skb, strict); rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
if (rt == net->ipv6.ip6_null_entry) { if (rt == net->ipv6.ip6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr); fn = fib6_backtrack(fn, &fl6->saddr);
if (fn) if (fn)
...@@ -1839,21 +1840,56 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb, ...@@ -1839,21 +1840,56 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
} }
/* if skb is set it will be used and fl6 can be NULL */ /* if skb is set it will be used and fl6 can be NULL */
u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb, u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
struct flow_keys *flkeys) const struct sk_buff *skb, struct flow_keys *flkeys)
{ {
struct flow_keys hash_keys; struct flow_keys hash_keys;
u32 mhash; u32 mhash;
memset(&hash_keys, 0, sizeof(hash_keys)); switch (net->ipv6.sysctl.multipath_hash_policy) {
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; case 0:
if (skb) { memset(&hash_keys, 0, sizeof(hash_keys));
ip6_multipath_l3_keys(skb, &hash_keys, flkeys); hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
} else { if (skb) {
hash_keys.addrs.v6addrs.src = fl6->saddr; ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
hash_keys.addrs.v6addrs.dst = fl6->daddr; } else {
hash_keys.tags.flow_label = (__force u32)fl6->flowlabel; hash_keys.addrs.v6addrs.src = fl6->saddr;
hash_keys.basic.ip_proto = fl6->flowi6_proto; hash_keys.addrs.v6addrs.dst = fl6->daddr;
hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
break;
case 1:
if (skb) {
unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
struct flow_keys keys;
/* short-circuit if we already have L4 hash present */
if (skb->l4_hash)
return skb_get_hash_raw(skb) >> 1;
memset(&hash_keys, 0, sizeof(hash_keys));
if (!flkeys) {
skb_flow_dissect_flow_keys(skb, &keys, flag);
flkeys = &keys;
}
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
hash_keys.ports.src = flkeys->ports.src;
hash_keys.ports.dst = flkeys->ports.dst;
hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
} else {
memset(&hash_keys, 0, sizeof(hash_keys));
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
hash_keys.addrs.v6addrs.src = fl6->saddr;
hash_keys.addrs.v6addrs.dst = fl6->daddr;
hash_keys.ports.src = fl6->fl6_sport;
hash_keys.ports.dst = fl6->fl6_dport;
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
break;
} }
mhash = flow_hash_from_keys(&hash_keys); mhash = flow_hash_from_keys(&hash_keys);
...@@ -1884,7 +1920,7 @@ void ip6_route_input(struct sk_buff *skb) ...@@ -1884,7 +1920,7 @@ void ip6_route_input(struct sk_buff *skb)
flkeys = &_flkeys; flkeys = &_flkeys;
if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6)) if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
fl6.mp_hash = rt6_multipath_hash(&fl6, skb, flkeys); fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
skb_dst_drop(skb); skb_dst_drop(skb);
skb_dst_set(skb, skb_dst_set(skb,
ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags)); ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
......
...@@ -16,14 +16,31 @@ ...@@ -16,14 +16,31 @@
#include <net/ipv6.h> #include <net/ipv6.h>
#include <net/addrconf.h> #include <net/addrconf.h>
#include <net/inet_frag.h> #include <net/inet_frag.h>
#include <net/netevent.h>
#ifdef CONFIG_NETLABEL #ifdef CONFIG_NETLABEL
#include <net/calipso.h> #include <net/calipso.h>
#endif #endif
static int zero;
static int one = 1; static int one = 1;
static int auto_flowlabels_min; static int auto_flowlabels_min;
static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
struct net *net;
int ret;
net = container_of(table->data, struct net,
ipv6.sysctl.multipath_hash_policy);
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (write && ret == 0)
call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net);
return ret;
}
static struct ctl_table ipv6_table_template[] = { static struct ctl_table ipv6_table_template[] = {
{ {
...@@ -126,6 +143,15 @@ static struct ctl_table ipv6_table_template[] = { ...@@ -126,6 +143,15 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec .proc_handler = proc_dointvec
}, },
{
.procname = "fib_multipath_hash_policy",
.data = &init_net.ipv6.sysctl.multipath_hash_policy,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_rt6_multipath_hash_policy,
.extra1 = &zero,
.extra2 = &one,
},
{ } { }
}; };
...@@ -190,6 +216,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ...@@ -190,6 +216,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt; ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt;
ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len; ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len; ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy,
ipv6_route_table = ipv6_route_sysctl_init(net); ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table) if (!ipv6_route_table)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment