Commit 7b8fc010 authored by Jarod Wilson's avatar Jarod Wilson Committed by Jakub Kicinski

bonding: add a vlan+srcmac tx hashing option

This comes from an end-user request, where they're running multiple VMs on
hosts with bonded interfaces connected to some interest switch topologies,
where 802.3ad isn't an option. They're currently running a proprietary
solution that effectively achieves load-balancing of VMs and bandwidth
utilization improvements with a similar form of transmission algorithm.

Basically, each VM has it's own vlan, so it always sends its traffic out
the same interface, unless that interface fails. Traffic gets split
between the interfaces, maintaining a consistent path, with failover still
available if an interface goes down.

Unlike bond_eth_hash(), this hash function is using the full source MAC
address instead of just the last byte, as there are so few components to
the hash, and in the no-vlan case, we would be returning just the last
byte of the source MAC as the hash value. It's entirely possible to have
two NICs in a bond with the same last byte of their MAC, but not the same
MAC, so this adjustment should guarantee distinct hashes in all cases.

This has been rudimetarily tested to provide similar results to the
proprietary solution it is aiming to replace. A patch for iproute2 is also
posted, to properly support the new mode there as well.

Cc: Jay Vosburgh <j.vosburgh@gmail.com>
Cc: Veaceslav Falico <vfalico@gmail.com>
Cc: Andy Gospodarek <andy@greyhouse.net>
Cc: Thomas Davis <tadavis@lbl.gov>
Signed-off-by: default avatarJarod Wilson <jarod@redhat.com>
Link: https://lore.kernel.org/r/20210119010927.1191922-1-jarod@redhat.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 00b229f7
...@@ -951,6 +951,19 @@ xmit_hash_policy ...@@ -951,6 +951,19 @@ xmit_hash_policy
packets will be distributed according to the encapsulated packets will be distributed according to the encapsulated
flows. flows.
vlan+srcmac
This policy uses a very rudimentary vlan ID and source mac
hash to load-balance traffic per-vlan, with failover
should one leg fail. The intended use case is for a bond
shared by multiple virtual machines, all configured to
use their own vlan, to give lacp-like functionality
without requiring lacp-capable switching hardware.
The formula for the hash is simply
hash = (vlan ID) XOR (source MAC vendor) XOR (source MAC dev)
The default value is layer2. This option was added in bonding The default value is layer2. This option was added in bonding
version 2.6.3. In earlier versions of bonding, this parameter version 2.6.3. In earlier versions of bonding, this parameter
does not exist, and the layer2 policy is the only policy. The does not exist, and the layer2 policy is the only policy. The
......
...@@ -167,7 +167,7 @@ module_param(xmit_hash_policy, charp, 0); ...@@ -167,7 +167,7 @@ module_param(xmit_hash_policy, charp, 0);
MODULE_PARM_DESC(xmit_hash_policy, "balance-alb, balance-tlb, balance-xor, 802.3ad hashing method; " MODULE_PARM_DESC(xmit_hash_policy, "balance-alb, balance-tlb, balance-xor, 802.3ad hashing method; "
"0 for layer 2 (default), 1 for layer 3+4, " "0 for layer 2 (default), 1 for layer 3+4, "
"2 for layer 2+3, 3 for encap layer 2+3, " "2 for layer 2+3, 3 for encap layer 2+3, "
"4 for encap layer 3+4"); "4 for encap layer 3+4, 5 for vlan+srcmac");
module_param(arp_interval, int, 0); module_param(arp_interval, int, 0);
MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
module_param_array(arp_ip_target, charp, NULL, 0); module_param_array(arp_ip_target, charp, NULL, 0);
...@@ -1457,6 +1457,8 @@ static enum netdev_lag_hash bond_lag_hash_type(struct bonding *bond, ...@@ -1457,6 +1457,8 @@ static enum netdev_lag_hash bond_lag_hash_type(struct bonding *bond,
return NETDEV_LAG_HASH_E23; return NETDEV_LAG_HASH_E23;
case BOND_XMIT_POLICY_ENCAP34: case BOND_XMIT_POLICY_ENCAP34:
return NETDEV_LAG_HASH_E34; return NETDEV_LAG_HASH_E34;
case BOND_XMIT_POLICY_VLAN_SRCMAC:
return NETDEV_LAG_HASH_VLAN_SRCMAC;
default: default:
return NETDEV_LAG_HASH_UNKNOWN; return NETDEV_LAG_HASH_UNKNOWN;
} }
...@@ -3519,6 +3521,27 @@ static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, ...@@ -3519,6 +3521,27 @@ static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk,
return true; return true;
} }
static u32 bond_vlan_srcmac_hash(struct sk_buff *skb)
{
struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
u32 srcmac_vendor = 0, srcmac_dev = 0;
u16 vlan;
int i;
for (i = 0; i < 3; i++)
srcmac_vendor = (srcmac_vendor << 8) | mac_hdr->h_source[i];
for (i = 3; i < ETH_ALEN; i++)
srcmac_dev = (srcmac_dev << 8) | mac_hdr->h_source[i];
if (!skb_vlan_tag_present(skb))
return srcmac_vendor ^ srcmac_dev;
vlan = skb_vlan_tag_get(skb);
return vlan ^ srcmac_vendor ^ srcmac_dev;
}
/* Extract the appropriate headers based on bond's xmit policy */ /* Extract the appropriate headers based on bond's xmit policy */
static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
struct flow_keys *fk) struct flow_keys *fk)
...@@ -3526,10 +3549,14 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, ...@@ -3526,10 +3549,14 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34; bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34;
int noff, proto = -1; int noff, proto = -1;
if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) { switch (bond->params.xmit_policy) {
case BOND_XMIT_POLICY_ENCAP23:
case BOND_XMIT_POLICY_ENCAP34:
memset(fk, 0, sizeof(*fk)); memset(fk, 0, sizeof(*fk));
return __skb_flow_dissect(NULL, skb, &flow_keys_bonding, return __skb_flow_dissect(NULL, skb, &flow_keys_bonding,
fk, NULL, 0, 0, 0, 0); fk, NULL, 0, 0, 0, 0);
default:
break;
} }
fk->ports.ports = 0; fk->ports.ports = 0;
...@@ -3591,6 +3618,9 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) ...@@ -3591,6 +3618,9 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
skb->l4_hash) skb->l4_hash)
return skb->hash; return skb->hash;
if (bond->params.xmit_policy == BOND_XMIT_POLICY_VLAN_SRCMAC)
return bond_vlan_srcmac_hash(skb);
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 || if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
!bond_flow_dissect(bond, skb, &flow)) !bond_flow_dissect(bond, skb, &flow))
return bond_eth_hash(skb); return bond_eth_hash(skb);
......
...@@ -101,6 +101,7 @@ static const struct bond_opt_value bond_xmit_hashtype_tbl[] = { ...@@ -101,6 +101,7 @@ static const struct bond_opt_value bond_xmit_hashtype_tbl[] = {
{ "layer2+3", BOND_XMIT_POLICY_LAYER23, 0}, { "layer2+3", BOND_XMIT_POLICY_LAYER23, 0},
{ "encap2+3", BOND_XMIT_POLICY_ENCAP23, 0}, { "encap2+3", BOND_XMIT_POLICY_ENCAP23, 0},
{ "encap3+4", BOND_XMIT_POLICY_ENCAP34, 0}, { "encap3+4", BOND_XMIT_POLICY_ENCAP34, 0},
{ "vlan+srcmac", BOND_XMIT_POLICY_VLAN_SRCMAC, 0},
{ NULL, -1, 0}, { NULL, -1, 0},
}; };
......
...@@ -2617,6 +2617,7 @@ enum netdev_lag_hash { ...@@ -2617,6 +2617,7 @@ enum netdev_lag_hash {
NETDEV_LAG_HASH_L23, NETDEV_LAG_HASH_L23,
NETDEV_LAG_HASH_E23, NETDEV_LAG_HASH_E23,
NETDEV_LAG_HASH_E34, NETDEV_LAG_HASH_E34,
NETDEV_LAG_HASH_VLAN_SRCMAC,
NETDEV_LAG_HASH_UNKNOWN, NETDEV_LAG_HASH_UNKNOWN,
}; };
......
...@@ -94,6 +94,7 @@ ...@@ -94,6 +94,7 @@
#define BOND_XMIT_POLICY_LAYER23 2 /* layer 2+3 (IP ^ MAC) */ #define BOND_XMIT_POLICY_LAYER23 2 /* layer 2+3 (IP ^ MAC) */
#define BOND_XMIT_POLICY_ENCAP23 3 /* encapsulated layer 2+3 */ #define BOND_XMIT_POLICY_ENCAP23 3 /* encapsulated layer 2+3 */
#define BOND_XMIT_POLICY_ENCAP34 4 /* encapsulated layer 3+4 */ #define BOND_XMIT_POLICY_ENCAP34 4 /* encapsulated layer 3+4 */
#define BOND_XMIT_POLICY_VLAN_SRCMAC 5 /* vlan + source MAC */
/* 802.3ad port state definitions (43.4.2.2 in the 802.3ad standard) */ /* 802.3ad port state definitions (43.4.2.2 in the 802.3ad standard) */
#define LACP_STATE_LACP_ACTIVITY 0x1 #define LACP_STATE_LACP_ACTIVITY 0x1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment