Commit 4253b498 authored by Ido Schimmel's avatar Ido Schimmel Committed by David S. Miller

ipv4: Add custom multipath hash policy

Add a new multipath hash policy where the packet fields used for hash
calculation are determined by user space via the
fib_multipath_hash_fields sysctl that was introduced in the previous
patch.

The current set of available packet fields includes both outer and inner
fields, which requires two invocations of the flow dissector. Avoid
unnecessary dissection of the outer or inner flows by skipping
dissection if none of the outer or inner fields are required.

In accordance with the existing policies, when an skb is not available,
packet fields are extracted from the provided flow key. In which case,
only outer fields are considered.
Signed-off-by: default avatarIdo Schimmel <idosch@nvidia.com>
Reviewed-by: default avatarDavid Ahern <dsahern@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent ce5c9c20
......@@ -99,6 +99,8 @@ fib_multipath_hash_policy - INTEGER
- 0 - Layer 3
- 1 - Layer 4
- 2 - Layer 3 or inner Layer 3 if present
- 3 - Custom multipath hash. Fields used for multipath hash calculation
are determined by fib_multipath_hash_fields sysctl
fib_multipath_hash_fields - UNSIGNED INTEGER
When fib_multipath_hash_policy is set to 3 (custom multipath hash), the
......
......@@ -1906,6 +1906,121 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
hash_keys->addrs.v4addrs.dst = key_iph->daddr;
}
static u32 fib_multipath_custom_hash_outer(const struct net *net,
const struct sk_buff *skb,
bool *p_has_inner)
{
u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
struct flow_keys keys, hash_keys;
if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
return 0;
memset(&hash_keys, 0, sizeof(hash_keys));
skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP);
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
hash_keys.basic.ip_proto = keys.basic.ip_proto;
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
hash_keys.ports.src = keys.ports.src;
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
hash_keys.ports.dst = keys.ports.dst;
*p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
return flow_hash_from_keys(&hash_keys);
}
static u32 fib_multipath_custom_hash_inner(const struct net *net,
const struct sk_buff *skb,
bool has_inner)
{
u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
struct flow_keys keys, hash_keys;
/* We assume the packet carries an encapsulation, but if none was
* encountered during dissection of the outer flow, then there is no
* point in calling the flow dissector again.
*/
if (!has_inner)
return 0;
if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK))
return 0;
memset(&hash_keys, 0, sizeof(hash_keys));
skb_flow_dissect_flow_keys(skb, &keys, 0);
if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION))
return 0;
if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
} else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
hash_keys.tags.flow_label = keys.tags.flow_label;
}
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
hash_keys.basic.ip_proto = keys.basic.ip_proto;
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
hash_keys.ports.src = keys.ports.src;
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
hash_keys.ports.dst = keys.ports.dst;
return flow_hash_from_keys(&hash_keys);
}
static u32 fib_multipath_custom_hash_skb(const struct net *net,
const struct sk_buff *skb)
{
u32 mhash, mhash_inner;
bool has_inner = true;
mhash = fib_multipath_custom_hash_outer(net, skb, &has_inner);
mhash_inner = fib_multipath_custom_hash_inner(net, skb, has_inner);
return jhash_2words(mhash, mhash_inner, 0);
}
static u32 fib_multipath_custom_hash_fl4(const struct net *net,
const struct flowi4 *fl4)
{
u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
struct flow_keys hash_keys;
if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
return 0;
memset(&hash_keys, 0, sizeof(hash_keys));
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
hash_keys.addrs.v4addrs.src = fl4->saddr;
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
hash_keys.addrs.v4addrs.dst = fl4->daddr;
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
hash_keys.basic.ip_proto = fl4->flowi4_proto;
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
hash_keys.ports.src = fl4->fl4_sport;
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
hash_keys.ports.dst = fl4->fl4_dport;
return flow_hash_from_keys(&hash_keys);
}
/* if skb is set it will be used and fl4 can be NULL */
int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
const struct sk_buff *skb, struct flow_keys *flkeys)
......@@ -1991,6 +2106,12 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
}
mhash = flow_hash_from_keys(&hash_keys);
break;
case 3:
if (skb)
mhash = fib_multipath_custom_hash_skb(net, skb);
else
mhash = fib_multipath_custom_hash_fl4(net, fl4);
break;
}
if (multipath_hash)
......
......@@ -30,6 +30,7 @@
#include <net/netevent.h>
static int two = 2;
static int three __maybe_unused = 3;
static int four = 4;
static int thousand = 1000;
static int tcp_retr1_max = 255;
......@@ -1053,7 +1054,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_fib_multipath_hash_policy,
.extra1 = SYSCTL_ZERO,
.extra2 = &two,
.extra2 = &three,
},
{
.procname = "fib_multipath_hash_fields",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment