Commit a507c346 authored by David S. Miller's avatar David S. Miller

Merge branch 'openvswitch-Conntrack-integration-improvements'

parents 9878f602 316d4d78
/*
* Copyright (c) 2007-2013 Nicira, Inc.
* Copyright (c) 2007-2017 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
......@@ -331,6 +331,8 @@ enum ovs_key_attr {
OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */
OVS_KEY_ATTR_CT_MARK, /* u32 connection tracking mark */
OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking label */
OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, /* struct ovs_key_ct_tuple_ipv4 */
OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, /* struct ovs_key_ct_tuple_ipv6 */
#ifdef __KERNEL__
OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */
......@@ -446,9 +448,13 @@ struct ovs_key_nd {
__u8 nd_tll[ETH_ALEN];
};
#define OVS_CT_LABELS_LEN 16
#define OVS_CT_LABELS_LEN_32 4
#define OVS_CT_LABELS_LEN (OVS_CT_LABELS_LEN_32 * sizeof(__u32))
struct ovs_key_ct_labels {
__u8 ct_labels[OVS_CT_LABELS_LEN];
union {
__u8 ct_labels[OVS_CT_LABELS_LEN];
__u32 ct_labels_32[OVS_CT_LABELS_LEN_32];
};
};
/* OVS_KEY_ATTR_CT_STATE flags */
......@@ -468,6 +474,22 @@ struct ovs_key_ct_labels {
#define OVS_CS_F_NAT_MASK (OVS_CS_F_SRC_NAT | OVS_CS_F_DST_NAT)
struct ovs_key_ct_tuple_ipv4 {
__be32 ipv4_src;
__be32 ipv4_dst;
__be16 src_port;
__be16 dst_port;
__u8 ipv4_proto;
};
struct ovs_key_ct_tuple_ipv6 {
__be32 ipv6_src[4];
__be32 ipv6_dst[4];
__be16 src_port;
__be16 dst_port;
__u8 ipv6_proto;
};
/**
* enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
* @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
......@@ -652,6 +674,10 @@ struct ovs_action_hash {
* @OVS_CT_ATTR_HELPER: variable length string defining conntrack ALG.
* @OVS_CT_ATTR_NAT: Nested OVS_NAT_ATTR_* for performing L3 network address
* translation (NAT) on the packet.
* @OVS_CT_ATTR_FORCE_COMMIT: Like %OVS_CT_ATTR_COMMIT, but instead of doing
* nothing if the connection is already committed will check that the current
* packet is in conntrack entry's original direction. If directionality does
* not match, will delete the existing conntrack entry and commit a new one.
*/
enum ovs_ct_attr {
OVS_CT_ATTR_UNSPEC,
......@@ -662,6 +688,7 @@ enum ovs_ct_attr {
OVS_CT_ATTR_HELPER, /* netlink helper to assist detection of
related connections. */
OVS_CT_ATTR_NAT, /* Nested OVS_NAT_ATTR_* */
OVS_CT_ATTR_FORCE_COMMIT, /* No argument */
__OVS_CT_ATTR_MAX
};
......
......@@ -1074,6 +1074,8 @@ static int execute_masked_set_action(struct sk_buff *skb,
case OVS_KEY_ATTR_CT_ZONE:
case OVS_KEY_ATTR_CT_MARK:
case OVS_KEY_ATTR_CT_LABELS:
case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
err = -EINVAL;
break;
}
......
This diff is collapsed.
......@@ -32,7 +32,8 @@ int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *,
const struct ovs_conntrack_info *);
void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb);
int ovs_ct_put_key(const struct sw_flow_key *swkey,
const struct sw_flow_key *output, struct sk_buff *skb);
void ovs_ct_free_action(const struct nlattr *a);
#define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \
......@@ -75,13 +76,18 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb,
static inline void ovs_ct_fill_key(const struct sk_buff *skb,
struct sw_flow_key *key)
{
key->ct.state = 0;
key->ct.zone = 0;
key->ct_state = 0;
key->ct_zone = 0;
key->ct.mark = 0;
memset(&key->ct.labels, 0, sizeof(key->ct.labels));
/* Clear 'ct_orig_proto' to mark the non-existence of original
* direction key fields.
*/
key->ct_orig_proto = 0;
}
static inline int ovs_ct_put_key(const struct sw_flow_key *key,
static inline int ovs_ct_put_key(const struct sw_flow_key *swkey,
const struct sw_flow_key *output,
struct sk_buff *skb)
{
return 0;
......
......@@ -765,7 +765,7 @@ static int key_extract_mac_proto(struct sk_buff *skb)
int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
struct sk_buff *skb, struct sw_flow_key *key)
{
int res;
int res, err;
/* Extract metadata from packet. */
if (tun_info) {
......@@ -792,7 +792,6 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
key->phy.priority = skb->priority;
key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
key->phy.skb_mark = skb->mark;
ovs_ct_fill_key(skb, key);
key->ovs_flow_hash = 0;
res = key_extract_mac_proto(skb);
if (res < 0)
......@@ -800,17 +799,26 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
key->mac_proto = res;
key->recirc_id = 0;
return key_extract(skb, key);
err = key_extract(skb, key);
if (!err)
ovs_ct_fill_key(skb, key); /* Must be after key_extract(). */
return err;
}
int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
struct sk_buff *skb,
struct sw_flow_key *key, bool log)
{
const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
u64 attrs = 0;
int err;
err = parse_flow_nlattrs(attr, a, &attrs, log);
if (err)
return -EINVAL;
/* Extract metadata from netlink attributes. */
err = ovs_nla_get_flow_metadata(net, attr, key, log);
err = ovs_nla_get_flow_metadata(net, a, attrs, key, log);
if (err)
return err;
......@@ -824,5 +832,21 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
*/
skb->protocol = key->eth.type;
return key_extract(skb, key);
err = key_extract(skb, key);
if (err)
return err;
/* Check that we have conntrack original direction tuple metadata only
* for packets for which it makes sense. Otherwise the key may be
* corrupted due to overlapping key fields.
*/
if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4) &&
key->eth.type != htons(ETH_P_IP))
return -EINVAL;
if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6) &&
(key->eth.type != htons(ETH_P_IPV6) ||
sw_flow_key_is_nd(key)))
return -EINVAL;
return 0;
}
/*
* Copyright (c) 2007-2014 Nicira, Inc.
* Copyright (c) 2007-2017 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
......@@ -85,6 +85,11 @@ struct sw_flow_key {
struct vlan_head cvlan;
__be16 type; /* Ethernet frame type. */
} eth;
/* Filling a hole of two bytes. */
u8 ct_state;
u8 ct_orig_proto; /* CT original direction tuple IP
* protocol.
*/
union {
struct {
__be32 top_lse; /* top label stack entry */
......@@ -96,6 +101,7 @@ struct sw_flow_key {
u8 frag; /* One of OVS_FRAG_TYPE_*. */
} ip;
};
u16 ct_zone; /* Conntrack zone. */
struct {
__be16 src; /* TCP/UDP/SCTP source port. */
__be16 dst; /* TCP/UDP/SCTP destination port. */
......@@ -107,10 +113,16 @@ struct sw_flow_key {
__be32 src; /* IP source address. */
__be32 dst; /* IP destination address. */
} addr;
struct {
u8 sha[ETH_ALEN]; /* ARP source hardware address. */
u8 tha[ETH_ALEN]; /* ARP target hardware address. */
} arp;
union {
struct {
__be32 src;
__be32 dst;
} ct_orig; /* Conntrack original direction fields. */
struct {
u8 sha[ETH_ALEN]; /* ARP source hardware address. */
u8 tha[ETH_ALEN]; /* ARP target hardware address. */
} arp;
};
} ipv4;
struct {
struct {
......@@ -118,23 +130,40 @@ struct sw_flow_key {
struct in6_addr dst; /* IPv6 destination address. */
} addr;
__be32 label; /* IPv6 flow label. */
struct {
struct in6_addr target; /* ND target address. */
u8 sll[ETH_ALEN]; /* ND source link layer address. */
u8 tll[ETH_ALEN]; /* ND target link layer address. */
} nd;
union {
struct {
struct in6_addr src;
struct in6_addr dst;
} ct_orig; /* Conntrack original direction fields. */
struct {
struct in6_addr target; /* ND target address. */
u8 sll[ETH_ALEN]; /* ND source link layer address. */
u8 tll[ETH_ALEN]; /* ND target link layer address. */
} nd;
};
} ipv6;
};
struct {
/* Connection tracking fields. */
u16 zone;
/* Connection tracking fields not packed above. */
struct {
__be16 src; /* CT orig tuple tp src port. */
__be16 dst; /* CT orig tuple tp dst port. */
} orig_tp;
u32 mark;
u8 state;
struct ovs_key_ct_labels labels;
} ct;
} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
static inline bool sw_flow_key_is_nd(const struct sw_flow_key *key)
{
return key->eth.type == htons(ETH_P_IPV6) &&
key->ip.proto == NEXTHDR_ICMP &&
key->tp.dst == 0 &&
(key->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT));
}
struct sw_flow_key_range {
unsigned short int start;
unsigned short int end;
......
......@@ -129,7 +129,9 @@ static bool match_validate(const struct sw_flow_match *match,
/* The following mask attributes allowed only if they
* pass the validation tests. */
mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
| (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)
| (1 << OVS_KEY_ATTR_IPV6)
| (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)
| (1 << OVS_KEY_ATTR_TCP)
| (1 << OVS_KEY_ATTR_TCP_FLAGS)
| (1 << OVS_KEY_ATTR_UDP)
......@@ -161,8 +163,10 @@ static bool match_validate(const struct sw_flow_match *match,
if (match->key->eth.type == htons(ETH_P_IP)) {
key_expected |= 1 << OVS_KEY_ATTR_IPV4;
if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4;
}
if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
if (match->key->ip.proto == IPPROTO_UDP) {
......@@ -196,8 +200,10 @@ static bool match_validate(const struct sw_flow_match *match,
if (match->key->eth.type == htons(ETH_P_IPV6)) {
key_expected |= 1 << OVS_KEY_ATTR_IPV6;
if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6;
}
if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
if (match->key->ip.proto == IPPROTO_UDP) {
......@@ -230,6 +236,12 @@ static bool match_validate(const struct sw_flow_match *match,
htons(NDISC_NEIGHBOUR_SOLICITATION) ||
match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
key_expected |= 1 << OVS_KEY_ATTR_ND;
/* Original direction conntrack tuple
* uses the same space as the ND fields
* in the key, so both are not allowed
* at the same time.
*/
mask_allowed &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
if (match->mask && (match->mask->key.tp.src == htons(0xff)))
mask_allowed |= 1 << OVS_KEY_ATTR_ND;
}
......@@ -282,7 +294,7 @@ size_t ovs_key_attr_size(void)
/* Whenever adding new OVS_KEY_ FIELDS, we should consider
* updating this function.
*/
BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 26);
BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28);
return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
......@@ -295,6 +307,7 @@ size_t ovs_key_attr_size(void)
+ nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */
+ nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */
+ nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */
+ nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
+ nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
+ nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
......@@ -355,6 +368,10 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) },
[OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) },
[OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = {
.len = sizeof(struct ovs_key_ct_tuple_ipv4) },
[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
.len = sizeof(struct ovs_key_ct_tuple_ipv6) },
};
static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
......@@ -430,9 +447,8 @@ static int parse_flow_mask_nlattrs(const struct nlattr *attr,
return __parse_flow_nlattrs(attr, a, attrsp, log, true);
}
static int parse_flow_nlattrs(const struct nlattr *attr,
const struct nlattr *a[], u64 *attrsp,
bool log)
int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[],
u64 *attrsp, bool log)
{
return __parse_flow_nlattrs(attr, a, attrsp, log, false);
}
......@@ -1056,14 +1072,14 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
return -EINVAL;
}
SW_FLOW_KEY_PUT(match, ct.state, ct_state, is_mask);
SW_FLOW_KEY_PUT(match, ct_state, ct_state, is_mask);
*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE);
}
if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) &&
ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) {
u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]);
SW_FLOW_KEY_PUT(match, ct.zone, ct_zone, is_mask);
SW_FLOW_KEY_PUT(match, ct_zone, ct_zone, is_mask);
*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE);
}
if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) &&
......@@ -1082,6 +1098,34 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
sizeof(*cl), is_mask);
*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
}
if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)) {
const struct ovs_key_ct_tuple_ipv4 *ct;
ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4]);
SW_FLOW_KEY_PUT(match, ipv4.ct_orig.src, ct->ipv4_src, is_mask);
SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask);
SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv4_proto, is_mask);
*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4);
}
if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) {
const struct ovs_key_ct_tuple_ipv6 *ct;
ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6]);
SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.src, &ct->ipv6_src,
sizeof(match->key->ipv6.ct_orig.src),
is_mask);
SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.dst, &ct->ipv6_dst,
sizeof(match->key->ipv6.ct_orig.dst),
is_mask);
SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv6_proto, is_mask);
*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
}
/* For layer 3 packets the Ethernet type is provided
* and treated as metadata but no MAC addresses are provided.
......@@ -1493,9 +1537,12 @@ u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
/**
* ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
* @key: Receives extracted in_port, priority, tun_key and skb_mark.
* @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
* sequence.
* @net: Network namespace.
* @key: Receives extracted in_port, priority, tun_key, skb_mark and conntrack
* metadata.
* @a: Array of netlink attributes holding parsed %OVS_KEY_ATTR_* Netlink
* attributes.
* @attrs: Bit mask for the netlink attributes included in @a.
* @log: Boolean to allow kernel error logging. Normally true, but when
* probing for feature compatibility this should be passed in as false to
* suppress unnecessary error logging.
......@@ -1504,25 +1551,26 @@ u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
* take the same form accepted by flow_from_nlattrs(), but only enough of it to
* get the metadata, that is, the parts of the flow key that cannot be
* extracted from the packet itself.
*
* This must be called before the packet key fields are filled in 'key'.
*/
int ovs_nla_get_flow_metadata(struct net *net, const struct nlattr *attr,
struct sw_flow_key *key,
bool log)
int ovs_nla_get_flow_metadata(struct net *net,
const struct nlattr *a[OVS_KEY_ATTR_MAX + 1],
u64 attrs, struct sw_flow_key *key, bool log)
{
const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
struct sw_flow_match match;
u64 attrs = 0;
int err;
err = parse_flow_nlattrs(attr, a, &attrs, log);
if (err)
return -EINVAL;
memset(&match, 0, sizeof(match));
match.key = key;
key->ct_state = 0;
key->ct_zone = 0;
key->ct_orig_proto = 0;
memset(&key->ct, 0, sizeof(key->ct));
memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig));
memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig));
key->phy.in_port = DP_MAX_PORTS;
return metadata_from_nlattrs(net, &match, &attrs, a, false, log);
......@@ -1584,7 +1632,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
goto nla_put_failure;
if (ovs_ct_put_key(output, skb))
if (ovs_ct_put_key(swkey, output, skb))
goto nla_put_failure;
if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) {
......
......@@ -46,8 +46,11 @@ void ovs_match_init(struct sw_flow_match *match,
int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *,
int attr, bool is_mask, struct sk_buff *);
int ovs_nla_get_flow_metadata(struct net *, const struct nlattr *,
struct sw_flow_key *, bool log);
int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[],
u64 *attrsp, bool log);
int ovs_nla_get_flow_metadata(struct net *net,
const struct nlattr *a[OVS_KEY_ATTR_MAX + 1],
u64 attrs, struct sw_flow_key *key, bool log);
int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb);
int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment