Commit 29f52875 authored by David S. Miller's avatar David S. Miller

Merge branch 'ICMP-flow-improvements'

Matteo Croce says:

====================
ICMP flow improvements

This series improves the flow inspector handling of ICMP packets:
The first two patches just add some comments in the code which would have saved
me a few minutes of time, and refactor a piece of code.
The third one adds to the flow inspector the capability to extract the
Identifier field, if present, so echo requests and replies are classified
as part of the same flow.
The fourth patch uses the function introduced earlier to the bonding driver,
so echo replies can be balanced across bonding slaves.

v1 -> v2:
 - remove unused struct members
 - add an helper to check for the Id field
 - use a local flow_dissector_key in the bonding to avoid
   changing behaviour of the flow dissector
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents c4917bfc 58deb77c
...@@ -200,6 +200,51 @@ atomic_t netpoll_block_tx = ATOMIC_INIT(0); ...@@ -200,6 +200,51 @@ atomic_t netpoll_block_tx = ATOMIC_INIT(0);
unsigned int bond_net_id __read_mostly; unsigned int bond_net_id __read_mostly;
static const struct flow_dissector_key flow_keys_bonding_keys[] = {
{
.key_id = FLOW_DISSECTOR_KEY_CONTROL,
.offset = offsetof(struct flow_keys, control),
},
{
.key_id = FLOW_DISSECTOR_KEY_BASIC,
.offset = offsetof(struct flow_keys, basic),
},
{
.key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
.offset = offsetof(struct flow_keys, addrs.v4addrs),
},
{
.key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
.offset = offsetof(struct flow_keys, addrs.v6addrs),
},
{
.key_id = FLOW_DISSECTOR_KEY_TIPC,
.offset = offsetof(struct flow_keys, addrs.tipckey),
},
{
.key_id = FLOW_DISSECTOR_KEY_PORTS,
.offset = offsetof(struct flow_keys, ports),
},
{
.key_id = FLOW_DISSECTOR_KEY_ICMP,
.offset = offsetof(struct flow_keys, icmp),
},
{
.key_id = FLOW_DISSECTOR_KEY_VLAN,
.offset = offsetof(struct flow_keys, vlan),
},
{
.key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL,
.offset = offsetof(struct flow_keys, tags),
},
{
.key_id = FLOW_DISSECTOR_KEY_GRE_KEYID,
.offset = offsetof(struct flow_keys, keyid),
},
};
static struct flow_dissector flow_keys_bonding __read_mostly;
/*-------------------------- Forward declarations ---------------------------*/ /*-------------------------- Forward declarations ---------------------------*/
static int bond_init(struct net_device *bond_dev); static int bond_init(struct net_device *bond_dev);
...@@ -3263,10 +3308,14 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, ...@@ -3263,10 +3308,14 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
const struct iphdr *iph; const struct iphdr *iph;
int noff, proto = -1; int noff, proto = -1;
if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) {
return skb_flow_dissect_flow_keys(skb, fk, 0); memset(fk, 0, sizeof(*fk));
return __skb_flow_dissect(NULL, skb, &flow_keys_bonding,
fk, NULL, 0, 0, 0, 0);
}
fk->ports.ports = 0; fk->ports.ports = 0;
memset(&fk->icmp, 0, sizeof(fk->icmp));
noff = skb_network_offset(skb); noff = skb_network_offset(skb);
if (skb->protocol == htons(ETH_P_IP)) { if (skb->protocol == htons(ETH_P_IP)) {
if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph)))) if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph))))
...@@ -3286,8 +3335,14 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, ...@@ -3286,8 +3335,14 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
} else { } else {
return false; return false;
} }
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0) if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0) {
fk->ports.ports = skb_flow_get_ports(skb, noff, proto); if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6)
skb_flow_get_icmp_tci(skb, &fk->icmp, skb->data,
skb_transport_offset(skb),
skb_headlen(skb));
else
fk->ports.ports = skb_flow_get_ports(skb, noff, proto);
}
return true; return true;
} }
...@@ -3314,10 +3369,14 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) ...@@ -3314,10 +3369,14 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
return bond_eth_hash(skb); return bond_eth_hash(skb);
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 || if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) {
hash = bond_eth_hash(skb); hash = bond_eth_hash(skb);
else } else {
hash = (__force u32)flow.ports.ports; if (flow.icmp.id)
memcpy(&hash, &flow.icmp, sizeof(hash));
else
memcpy(&hash, &flow.ports.ports, sizeof(hash));
}
hash ^= (__force u32)flow_get_u32_dst(&flow) ^ hash ^= (__force u32)flow_get_u32_dst(&flow) ^
(__force u32)flow_get_u32_src(&flow); (__force u32)flow_get_u32_src(&flow);
hash ^= (hash >> 16); hash ^= (hash >> 16);
...@@ -4901,6 +4960,10 @@ static int __init bonding_init(void) ...@@ -4901,6 +4960,10 @@ static int __init bonding_init(void)
goto err; goto err;
} }
skb_flow_dissector_init(&flow_keys_bonding,
flow_keys_bonding_keys,
ARRAY_SIZE(flow_keys_bonding_keys));
register_netdevice_notifier(&bond_netdev_notifier); register_netdevice_notifier(&bond_netdev_notifier);
out: out:
return res; return res;
......
...@@ -6,6 +6,8 @@ ...@@ -6,6 +6,8 @@
#include <linux/in6.h> #include <linux/in6.h>
#include <uapi/linux/if_ether.h> #include <uapi/linux/if_ether.h>
struct sk_buff;
/** /**
* struct flow_dissector_key_control: * struct flow_dissector_key_control:
* @thoff: Transport header offset * @thoff: Transport header offset
...@@ -156,19 +158,16 @@ struct flow_dissector_key_ports { ...@@ -156,19 +158,16 @@ struct flow_dissector_key_ports {
/** /**
* flow_dissector_key_icmp: * flow_dissector_key_icmp:
* @ports: type and code of ICMP header
* icmp: ICMP type (high) and code (low)
* type: ICMP type * type: ICMP type
* code: ICMP code * code: ICMP code
* id: session identifier
*/ */
struct flow_dissector_key_icmp { struct flow_dissector_key_icmp {
union { struct {
__be16 icmp; u8 type;
struct { u8 code;
u8 type;
u8 code;
};
}; };
u16 id;
}; };
/** /**
...@@ -282,6 +281,8 @@ struct flow_keys { ...@@ -282,6 +281,8 @@ struct flow_keys {
struct flow_dissector_key_vlan cvlan; struct flow_dissector_key_vlan cvlan;
struct flow_dissector_key_keyid keyid; struct flow_dissector_key_keyid keyid;
struct flow_dissector_key_ports ports; struct flow_dissector_key_ports ports;
struct flow_dissector_key_icmp icmp;
/* 'addrs' must be the last member */
struct flow_dissector_key_addrs addrs; struct flow_dissector_key_addrs addrs;
}; };
...@@ -315,6 +316,9 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys) ...@@ -315,6 +316,9 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys)
} }
u32 flow_hash_from_keys(struct flow_keys *keys); u32 flow_hash_from_keys(struct flow_keys *keys);
void skb_flow_get_icmp_tci(const struct sk_buff *skb,
struct flow_dissector_key_icmp *key_icmp,
void *data, int thoff, int hlen);
static inline bool dissector_uses_key(const struct flow_dissector *flow_dissector, static inline bool dissector_uses_key(const struct flow_dissector *flow_dissector,
enum flow_dissector_key_id key_id) enum flow_dissector_key_id key_id)
......
...@@ -178,27 +178,6 @@ int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) ...@@ -178,27 +178,6 @@ int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
mutex_unlock(&flow_dissector_mutex); mutex_unlock(&flow_dissector_mutex);
return 0; return 0;
} }
/**
* skb_flow_get_be16 - extract be16 entity
* @skb: sk_buff to extract from
* @poff: offset to extract at
* @data: raw buffer pointer to the packet
* @hlen: packet header length
*
* The function will try to retrieve a be32 entity at
* offset poff
*/
static __be16 skb_flow_get_be16(const struct sk_buff *skb, int poff,
void *data, int hlen)
{
__be16 *u, _u;
u = __skb_header_pointer(skb, poff, sizeof(_u), data, hlen, &_u);
if (u)
return *u;
return 0;
}
/** /**
* __skb_flow_get_ports - extract the upper layer ports and return them * __skb_flow_get_ports - extract the upper layer ports and return them
...@@ -234,6 +213,72 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, ...@@ -234,6 +213,72 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
} }
EXPORT_SYMBOL(__skb_flow_get_ports); EXPORT_SYMBOL(__skb_flow_get_ports);
static bool icmp_has_id(u8 type)
{
switch (type) {
case ICMP_ECHO:
case ICMP_ECHOREPLY:
case ICMP_TIMESTAMP:
case ICMP_TIMESTAMPREPLY:
case ICMPV6_ECHO_REQUEST:
case ICMPV6_ECHO_REPLY:
return true;
}
return false;
}
/**
* skb_flow_get_icmp_tci - extract ICMP(6) Type, Code and Identifier fields
* @skb: sk_buff to extract from
* @key_icmp: struct flow_dissector_key_icmp to fill
* @data: raw buffer pointer to the packet
* @toff: offset to extract at
* @hlen: packet header length
*/
void skb_flow_get_icmp_tci(const struct sk_buff *skb,
struct flow_dissector_key_icmp *key_icmp,
void *data, int thoff, int hlen)
{
struct icmphdr *ih, _ih;
ih = __skb_header_pointer(skb, thoff, sizeof(_ih), data, hlen, &_ih);
if (!ih)
return;
key_icmp->type = ih->type;
key_icmp->code = ih->code;
/* As we use 0 to signal that the Id field is not present,
* avoid confusion with packets without such field
*/
if (icmp_has_id(ih->type))
key_icmp->id = ih->un.echo.id ? : 1;
else
key_icmp->id = 0;
}
EXPORT_SYMBOL(skb_flow_get_icmp_tci);
/* If FLOW_DISSECTOR_KEY_ICMP is set, dissect an ICMP packet
* using skb_flow_get_icmp_tci().
*/
static void __skb_flow_dissect_icmp(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container,
void *data, int thoff, int hlen)
{
struct flow_dissector_key_icmp *key_icmp;
if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ICMP))
return;
key_icmp = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_ICMP,
target_container);
skb_flow_get_icmp_tci(skb, key_icmp, data, thoff, hlen);
}
void skb_flow_dissect_meta(const struct sk_buff *skb, void skb_flow_dissect_meta(const struct sk_buff *skb,
struct flow_dissector *flow_dissector, struct flow_dissector *flow_dissector,
void *target_container) void *target_container)
...@@ -884,7 +929,6 @@ bool __skb_flow_dissect(const struct net *net, ...@@ -884,7 +929,6 @@ bool __skb_flow_dissect(const struct net *net,
struct flow_dissector_key_basic *key_basic; struct flow_dissector_key_basic *key_basic;
struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_addrs *key_addrs;
struct flow_dissector_key_ports *key_ports; struct flow_dissector_key_ports *key_ports;
struct flow_dissector_key_icmp *key_icmp;
struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_tags *key_tags;
struct flow_dissector_key_vlan *key_vlan; struct flow_dissector_key_vlan *key_vlan;
struct bpf_prog *attached = NULL; struct bpf_prog *attached = NULL;
...@@ -1329,6 +1373,12 @@ bool __skb_flow_dissect(const struct net *net, ...@@ -1329,6 +1373,12 @@ bool __skb_flow_dissect(const struct net *net,
data, nhoff, hlen); data, nhoff, hlen);
break; break;
case IPPROTO_ICMP:
case IPPROTO_ICMPV6:
__skb_flow_dissect_icmp(skb, flow_dissector, target_container,
data, nhoff, hlen);
break;
default: default:
break; break;
} }
...@@ -1342,14 +1392,6 @@ bool __skb_flow_dissect(const struct net *net, ...@@ -1342,14 +1392,6 @@ bool __skb_flow_dissect(const struct net *net,
data, hlen); data, hlen);
} }
if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ICMP)) {
key_icmp = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_ICMP,
target_container);
key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen);
}
/* Process result of IP proto processing */ /* Process result of IP proto processing */
switch (fdret) { switch (fdret) {
case FLOW_DISSECT_RET_PROTO_AGAIN: case FLOW_DISSECT_RET_PROTO_AGAIN:
...@@ -1408,6 +1450,9 @@ static inline size_t flow_keys_hash_length(const struct flow_keys *flow) ...@@ -1408,6 +1450,9 @@ static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
{ {
size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs); size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs);
BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32)); BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32));
/* flow.addrs MUST be the last member in struct flow_keys because
* different L3 protocols have different address length
*/
BUILD_BUG_ON(offsetof(typeof(*flow), addrs) != BUILD_BUG_ON(offsetof(typeof(*flow), addrs) !=
sizeof(*flow) - sizeof(flow->addrs)); sizeof(*flow) - sizeof(flow->addrs));
...@@ -1455,6 +1500,9 @@ __be32 flow_get_u32_dst(const struct flow_keys *flow) ...@@ -1455,6 +1500,9 @@ __be32 flow_get_u32_dst(const struct flow_keys *flow)
} }
EXPORT_SYMBOL(flow_get_u32_dst); EXPORT_SYMBOL(flow_get_u32_dst);
/* Sort the source and destination IP (and the ports if the IP are the same),
* to have consistent hash within the two directions
*/
static inline void __flow_hash_consistentify(struct flow_keys *keys) static inline void __flow_hash_consistentify(struct flow_keys *keys)
{ {
int addr_diff, i; int addr_diff, i;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment