Commit a507c346 authored by David S. Miller's avatar David S. Miller

Merge branch 'openvswitch-Conntrack-integration-improvements'

parents 9878f602 316d4d78
/* /*
* Copyright (c) 2007-2013 Nicira, Inc. * Copyright (c) 2007-2017 Nicira, Inc.
* *
* This program is free software; you can redistribute it and/or * This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public * modify it under the terms of version 2 of the GNU General Public
...@@ -331,6 +331,8 @@ enum ovs_key_attr { ...@@ -331,6 +331,8 @@ enum ovs_key_attr {
OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */ OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */
OVS_KEY_ATTR_CT_MARK, /* u32 connection tracking mark */ OVS_KEY_ATTR_CT_MARK, /* u32 connection tracking mark */
OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking label */ OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking label */
OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, /* struct ovs_key_ct_tuple_ipv4 */
OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, /* struct ovs_key_ct_tuple_ipv6 */
#ifdef __KERNEL__ #ifdef __KERNEL__
OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */
...@@ -446,9 +448,13 @@ struct ovs_key_nd { ...@@ -446,9 +448,13 @@ struct ovs_key_nd {
__u8 nd_tll[ETH_ALEN]; __u8 nd_tll[ETH_ALEN];
}; };
#define OVS_CT_LABELS_LEN 16 #define OVS_CT_LABELS_LEN_32 4
#define OVS_CT_LABELS_LEN (OVS_CT_LABELS_LEN_32 * sizeof(__u32))
struct ovs_key_ct_labels { struct ovs_key_ct_labels {
__u8 ct_labels[OVS_CT_LABELS_LEN]; union {
__u8 ct_labels[OVS_CT_LABELS_LEN];
__u32 ct_labels_32[OVS_CT_LABELS_LEN_32];
};
}; };
/* OVS_KEY_ATTR_CT_STATE flags */ /* OVS_KEY_ATTR_CT_STATE flags */
...@@ -468,6 +474,22 @@ struct ovs_key_ct_labels { ...@@ -468,6 +474,22 @@ struct ovs_key_ct_labels {
#define OVS_CS_F_NAT_MASK (OVS_CS_F_SRC_NAT | OVS_CS_F_DST_NAT) #define OVS_CS_F_NAT_MASK (OVS_CS_F_SRC_NAT | OVS_CS_F_DST_NAT)
struct ovs_key_ct_tuple_ipv4 {
__be32 ipv4_src;
__be32 ipv4_dst;
__be16 src_port;
__be16 dst_port;
__u8 ipv4_proto;
};
struct ovs_key_ct_tuple_ipv6 {
__be32 ipv6_src[4];
__be32 ipv6_dst[4];
__be16 src_port;
__be16 dst_port;
__u8 ipv6_proto;
};
/** /**
* enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
* @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
...@@ -652,6 +674,10 @@ struct ovs_action_hash { ...@@ -652,6 +674,10 @@ struct ovs_action_hash {
* @OVS_CT_ATTR_HELPER: variable length string defining conntrack ALG. * @OVS_CT_ATTR_HELPER: variable length string defining conntrack ALG.
* @OVS_CT_ATTR_NAT: Nested OVS_NAT_ATTR_* for performing L3 network address * @OVS_CT_ATTR_NAT: Nested OVS_NAT_ATTR_* for performing L3 network address
* translation (NAT) on the packet. * translation (NAT) on the packet.
* @OVS_CT_ATTR_FORCE_COMMIT: Like %OVS_CT_ATTR_COMMIT, but instead of doing
* nothing if the connection is already committed will check that the current
* packet is in conntrack entry's original direction. If directionality does
* not match, will delete the existing conntrack entry and commit a new one.
*/ */
enum ovs_ct_attr { enum ovs_ct_attr {
OVS_CT_ATTR_UNSPEC, OVS_CT_ATTR_UNSPEC,
...@@ -662,6 +688,7 @@ enum ovs_ct_attr { ...@@ -662,6 +688,7 @@ enum ovs_ct_attr {
OVS_CT_ATTR_HELPER, /* netlink helper to assist detection of OVS_CT_ATTR_HELPER, /* netlink helper to assist detection of
related connections. */ related connections. */
OVS_CT_ATTR_NAT, /* Nested OVS_NAT_ATTR_* */ OVS_CT_ATTR_NAT, /* Nested OVS_NAT_ATTR_* */
OVS_CT_ATTR_FORCE_COMMIT, /* No argument */
__OVS_CT_ATTR_MAX __OVS_CT_ATTR_MAX
}; };
......
...@@ -1074,6 +1074,8 @@ static int execute_masked_set_action(struct sk_buff *skb, ...@@ -1074,6 +1074,8 @@ static int execute_masked_set_action(struct sk_buff *skb,
case OVS_KEY_ATTR_CT_ZONE: case OVS_KEY_ATTR_CT_ZONE:
case OVS_KEY_ATTR_CT_MARK: case OVS_KEY_ATTR_CT_MARK:
case OVS_KEY_ATTR_CT_LABELS: case OVS_KEY_ATTR_CT_LABELS:
case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
err = -EINVAL; err = -EINVAL;
break; break;
} }
......
...@@ -65,6 +65,7 @@ struct ovs_conntrack_info { ...@@ -65,6 +65,7 @@ struct ovs_conntrack_info {
struct nf_conn *ct; struct nf_conn *ct;
u8 commit : 1; u8 commit : 1;
u8 nat : 3; /* enum ovs_ct_nat */ u8 nat : 3; /* enum ovs_ct_nat */
u8 force : 1;
u16 family; u16 family;
struct md_mark mark; struct md_mark mark;
struct md_labels labels; struct md_labels labels;
...@@ -73,6 +74,8 @@ struct ovs_conntrack_info { ...@@ -73,6 +74,8 @@ struct ovs_conntrack_info {
#endif #endif
}; };
static bool labels_nonzero(const struct ovs_key_ct_labels *labels);
static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info); static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info);
static u16 key_to_nfproto(const struct sw_flow_key *key) static u16 key_to_nfproto(const struct sw_flow_key *key)
...@@ -129,21 +132,33 @@ static u32 ovs_ct_get_mark(const struct nf_conn *ct) ...@@ -129,21 +132,33 @@ static u32 ovs_ct_get_mark(const struct nf_conn *ct)
#endif #endif
} }
/* Guard against conntrack labels max size shrinking below 128 bits. */
#if NF_CT_LABELS_MAX_SIZE < 16
#error NF_CT_LABELS_MAX_SIZE must be at least 16 bytes
#endif
static void ovs_ct_get_labels(const struct nf_conn *ct, static void ovs_ct_get_labels(const struct nf_conn *ct,
struct ovs_key_ct_labels *labels) struct ovs_key_ct_labels *labels)
{ {
struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL; struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;
if (cl) { if (cl)
size_t len = sizeof(cl->bits); memcpy(labels, cl->bits, OVS_CT_LABELS_LEN);
else
memset(labels, 0, OVS_CT_LABELS_LEN);
}
if (len > OVS_CT_LABELS_LEN) static void __ovs_ct_update_key_orig_tp(struct sw_flow_key *key,
len = OVS_CT_LABELS_LEN; const struct nf_conntrack_tuple *orig,
else if (len < OVS_CT_LABELS_LEN) u8 icmp_proto)
memset(labels, 0, OVS_CT_LABELS_LEN); {
memcpy(labels, cl->bits, len); key->ct_orig_proto = orig->dst.protonum;
if (orig->dst.protonum == icmp_proto) {
key->ct.orig_tp.src = htons(orig->dst.u.icmp.type);
key->ct.orig_tp.dst = htons(orig->dst.u.icmp.code);
} else { } else {
memset(labels, 0, OVS_CT_LABELS_LEN); key->ct.orig_tp.src = orig->src.u.all;
key->ct.orig_tp.dst = orig->dst.u.all;
} }
} }
...@@ -151,13 +166,42 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, ...@@ -151,13 +166,42 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
const struct nf_conntrack_zone *zone, const struct nf_conntrack_zone *zone,
const struct nf_conn *ct) const struct nf_conn *ct)
{ {
key->ct.state = state; key->ct_state = state;
key->ct.zone = zone->id; key->ct_zone = zone->id;
key->ct.mark = ovs_ct_get_mark(ct); key->ct.mark = ovs_ct_get_mark(ct);
ovs_ct_get_labels(ct, &key->ct.labels); ovs_ct_get_labels(ct, &key->ct.labels);
if (ct) {
const struct nf_conntrack_tuple *orig;
/* Use the master if we have one. */
if (ct->master)
ct = ct->master;
orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
/* IP version must match with the master connection. */
if (key->eth.type == htons(ETH_P_IP) &&
nf_ct_l3num(ct) == NFPROTO_IPV4) {
key->ipv4.ct_orig.src = orig->src.u3.ip;
key->ipv4.ct_orig.dst = orig->dst.u3.ip;
__ovs_ct_update_key_orig_tp(key, orig, IPPROTO_ICMP);
return;
} else if (key->eth.type == htons(ETH_P_IPV6) &&
!sw_flow_key_is_nd(key) &&
nf_ct_l3num(ct) == NFPROTO_IPV6) {
key->ipv6.ct_orig.src = orig->src.u3.in6;
key->ipv6.ct_orig.dst = orig->dst.u3.in6;
__ovs_ct_update_key_orig_tp(key, orig, NEXTHDR_ICMP);
return;
}
}
/* Clear 'ct_orig_proto' to mark the non-existence of conntrack
* original direction key fields.
*/
key->ct_orig_proto = 0;
} }
/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has /* Update 'key' based on skb->_nfct. If 'post_ct' is true, then OVS has
* previously sent the packet to conntrack via the ct action. If * previously sent the packet to conntrack via the ct action. If
* 'keep_nat_flags' is true, the existing NAT flags retained, else they are * 'keep_nat_flags' is true, the existing NAT flags retained, else they are
* initialized from the connection status. * initialized from the connection status.
...@@ -184,7 +228,7 @@ static void ovs_ct_update_key(const struct sk_buff *skb, ...@@ -184,7 +228,7 @@ static void ovs_ct_update_key(const struct sk_buff *skb,
if (ct->master) if (ct->master)
state |= OVS_CS_F_RELATED; state |= OVS_CS_F_RELATED;
if (keep_nat_flags) { if (keep_nat_flags) {
state |= key->ct.state & OVS_CS_F_NAT_MASK; state |= key->ct_state & OVS_CS_F_NAT_MASK;
} else { } else {
if (ct->status & IPS_SRC_NAT) if (ct->status & IPS_SRC_NAT)
state |= OVS_CS_F_SRC_NAT; state |= OVS_CS_F_SRC_NAT;
...@@ -208,44 +252,69 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) ...@@ -208,44 +252,69 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
ovs_ct_update_key(skb, NULL, key, false, false); ovs_ct_update_key(skb, NULL, key, false, false);
} }
int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) #define IN6_ADDR_INITIALIZER(ADDR) \
{ (ADDR).s6_addr32[0], (ADDR).s6_addr32[1], \
(ADDR).s6_addr32[2], (ADDR).s6_addr32[3] }
int ovs_ct_put_key(const struct sw_flow_key *swkey,
const struct sw_flow_key *output, struct sk_buff *skb)
{ {
if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state)) if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, output->ct_state))
return -EMSGSIZE; return -EMSGSIZE;
if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone)) nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, output->ct_zone))
return -EMSGSIZE; return -EMSGSIZE;
if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, key->ct.mark)) nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, output->ct.mark))
return -EMSGSIZE; return -EMSGSIZE;
if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(key->ct.labels), nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(output->ct.labels),
&key->ct.labels)) &output->ct.labels))
return -EMSGSIZE; return -EMSGSIZE;
if (swkey->ct_orig_proto) {
if (swkey->eth.type == htons(ETH_P_IP)) {
struct ovs_key_ct_tuple_ipv4 orig = {
output->ipv4.ct_orig.src,
output->ipv4.ct_orig.dst,
output->ct.orig_tp.src,
output->ct.orig_tp.dst,
output->ct_orig_proto,
};
if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4,
sizeof(orig), &orig))
return -EMSGSIZE;
} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
struct ovs_key_ct_tuple_ipv6 orig = {
IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.src),
IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.dst),
output->ct.orig_tp.src,
output->ct.orig_tp.dst,
output->ct_orig_proto,
};
if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6,
sizeof(orig), &orig))
return -EMSGSIZE;
}
}
return 0; return 0;
} }
static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key, static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key,
u32 ct_mark, u32 mask) u32 ct_mark, u32 mask)
{ {
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
u32 new_mark; u32 new_mark;
/* The connection could be invalid, in which case set_mark is no-op. */
ct = nf_ct_get(skb, &ctinfo);
if (!ct)
return 0;
new_mark = ct_mark | (ct->mark & ~(mask)); new_mark = ct_mark | (ct->mark & ~(mask));
if (ct->mark != new_mark) { if (ct->mark != new_mark) {
ct->mark = new_mark; ct->mark = new_mark;
nf_conntrack_event_cache(IPCT_MARK, ct); if (nf_ct_is_confirmed(ct))
nf_conntrack_event_cache(IPCT_MARK, ct);
key->ct.mark = new_mark; key->ct.mark = new_mark;
} }
...@@ -255,34 +324,80 @@ static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key, ...@@ -255,34 +324,80 @@ static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key,
#endif #endif
} }
static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key, static struct nf_conn_labels *ovs_ct_get_conn_labels(struct nf_conn *ct)
const struct ovs_key_ct_labels *labels,
const struct ovs_key_ct_labels *mask)
{ {
enum ip_conntrack_info ctinfo;
struct nf_conn_labels *cl; struct nf_conn_labels *cl;
struct nf_conn *ct;
int err;
/* The connection could be invalid, in which case set_label is no-op.*/
ct = nf_ct_get(skb, &ctinfo);
if (!ct)
return 0;
cl = nf_ct_labels_find(ct); cl = nf_ct_labels_find(ct);
if (!cl) { if (!cl) {
nf_ct_labels_ext_add(ct); nf_ct_labels_ext_add(ct);
cl = nf_ct_labels_find(ct); cl = nf_ct_labels_find(ct);
} }
if (!cl || sizeof(cl->bits) < OVS_CT_LABELS_LEN)
return cl;
}
/* Initialize labels for a new, yet to be committed conntrack entry. Note that
* since the new connection is not yet confirmed, and thus no-one else has
* access to it's labels, we simply write them over. Also, we refrain from
* triggering events, as receiving change events before the create event would
* be confusing.
*/
static int ovs_ct_init_labels(struct nf_conn *ct, struct sw_flow_key *key,
const struct ovs_key_ct_labels *labels,
const struct ovs_key_ct_labels *mask)
{
struct nf_conn_labels *cl, *master_cl;
bool have_mask = labels_nonzero(mask);
/* Inherit master's labels to the related connection? */
master_cl = ct->master ? nf_ct_labels_find(ct->master) : NULL;
if (!master_cl && !have_mask)
return 0; /* Nothing to do. */
cl = ovs_ct_get_conn_labels(ct);
if (!cl)
return -ENOSPC;
/* Inherit the master's labels, if any. */
if (master_cl)
*cl = *master_cl;
if (have_mask) {
u32 *dst = (u32 *)cl->bits;
int i;
for (i = 0; i < OVS_CT_LABELS_LEN_32; i++)
dst[i] = (dst[i] & ~mask->ct_labels_32[i]) |
(labels->ct_labels_32[i]
& mask->ct_labels_32[i]);
}
memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN);
return 0;
}
static int ovs_ct_set_labels(struct nf_conn *ct, struct sw_flow_key *key,
const struct ovs_key_ct_labels *labels,
const struct ovs_key_ct_labels *mask)
{
struct nf_conn_labels *cl;
int err;
cl = ovs_ct_get_conn_labels(ct);
if (!cl)
return -ENOSPC; return -ENOSPC;
err = nf_connlabels_replace(ct, (u32 *)labels, (u32 *)mask, err = nf_connlabels_replace(ct, labels->ct_labels_32,
OVS_CT_LABELS_LEN / sizeof(u32)); mask->ct_labels_32,
OVS_CT_LABELS_LEN_32);
if (err) if (err)
return err; return err;
ovs_ct_get_labels(ct, &key->ct.labels); memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN);
return 0; return 0;
} }
...@@ -421,16 +536,16 @@ ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h) ...@@ -421,16 +536,16 @@ ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h)
/* Find an existing connection which this packet belongs to without /* Find an existing connection which this packet belongs to without
* re-attributing statistics or modifying the connection state. This allows an * re-attributing statistics or modifying the connection state. This allows an
* skb->nfct lost due to an upcall to be recovered during actions execution. * skb->_nfct lost due to an upcall to be recovered during actions execution.
* *
* Must be called with rcu_read_lock. * Must be called with rcu_read_lock.
* *
* On success, populates skb->nfct and skb->nfctinfo, and returns the * On success, populates skb->_nfct and returns the connection. Returns NULL
* connection. Returns NULL if there is no existing entry. * if there is no existing entry.
*/ */
static struct nf_conn * static struct nf_conn *
ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
u8 l3num, struct sk_buff *skb) u8 l3num, struct sk_buff *skb, bool natted)
{ {
struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l3proto *l3proto;
struct nf_conntrack_l4proto *l4proto; struct nf_conntrack_l4proto *l4proto;
...@@ -453,6 +568,17 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, ...@@ -453,6 +568,17 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
return NULL; return NULL;
} }
/* Must invert the tuple if skb has been transformed by NAT. */
if (natted) {
struct nf_conntrack_tuple inverse;
if (!nf_ct_invert_tuple(&inverse, &tuple, l3proto, l4proto)) {
pr_debug("ovs_ct_find_existing: Inversion failed!\n");
return NULL;
}
tuple = inverse;
}
/* look for tuple match */ /* look for tuple match */
h = nf_conntrack_find_get(net, zone, &tuple); h = nf_conntrack_find_get(net, zone, &tuple);
if (!h) if (!h)
...@@ -460,11 +586,18 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, ...@@ -460,11 +586,18 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
ct = nf_ct_tuplehash_to_ctrack(h); ct = nf_ct_tuplehash_to_ctrack(h);
/* Inverted packet tuple matches the reverse direction conntrack tuple,
* select the other tuplehash to get the right 'ctinfo' bits for this
* packet.
*/
if (natted)
h = &ct->tuplehash[!h->tuple.dst.dir];
nf_ct_set(skb, ct, ovs_ct_get_info(h)); nf_ct_set(skb, ct, ovs_ct_get_info(h));
return ct; return ct;
} }
/* Determine whether skb->nfct is equal to the result of conntrack lookup. */ /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */
static bool skb_nfct_cached(struct net *net, static bool skb_nfct_cached(struct net *net,
const struct sw_flow_key *key, const struct sw_flow_key *key,
const struct ovs_conntrack_info *info, const struct ovs_conntrack_info *info,
...@@ -475,14 +608,19 @@ static bool skb_nfct_cached(struct net *net, ...@@ -475,14 +608,19 @@ static bool skb_nfct_cached(struct net *net,
ct = nf_ct_get(skb, &ctinfo); ct = nf_ct_get(skb, &ctinfo);
/* If no ct, check if we have evidence that an existing conntrack entry /* If no ct, check if we have evidence that an existing conntrack entry
* might be found for this skb. This happens when we lose a skb->nfct * might be found for this skb. This happens when we lose a skb->_nfct
* due to an upcall. If the connection was not confirmed, it is not * due to an upcall. If the connection was not confirmed, it is not
* cached and needs to be run through conntrack again. * cached and needs to be run through conntrack again.
*/ */
if (!ct && key->ct.state & OVS_CS_F_TRACKED && if (!ct && key->ct_state & OVS_CS_F_TRACKED &&
!(key->ct.state & OVS_CS_F_INVALID) && !(key->ct_state & OVS_CS_F_INVALID) &&
key->ct.zone == info->zone.id) key->ct_zone == info->zone.id) {
ct = ovs_ct_find_existing(net, &info->zone, info->family, skb); ct = ovs_ct_find_existing(net, &info->zone, info->family, skb,
!!(key->ct_state
& OVS_CS_F_NAT_MASK));
if (ct)
nf_ct_get(skb, &ctinfo);
}
if (!ct) if (!ct)
return false; return false;
if (!net_eq(net, read_pnet(&ct->ct_net))) if (!net_eq(net, read_pnet(&ct->ct_net)))
...@@ -496,6 +634,18 @@ static bool skb_nfct_cached(struct net *net, ...@@ -496,6 +634,18 @@ static bool skb_nfct_cached(struct net *net,
if (help && rcu_access_pointer(help->helper) != info->helper) if (help && rcu_access_pointer(help->helper) != info->helper)
return false; return false;
} }
/* Force conntrack entry direction to the current packet? */
if (info->force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
/* Delete the conntrack entry if confirmed, else just release
* the reference.
*/
if (nf_ct_is_confirmed(ct))
nf_ct_delete(ct, 0, 0);
else
nf_conntrack_put(&ct->ct_general);
nf_ct_set(skb, NULL, 0);
return false;
}
return true; return true;
} }
...@@ -590,7 +740,7 @@ static void ovs_nat_update_key(struct sw_flow_key *key, ...@@ -590,7 +740,7 @@ static void ovs_nat_update_key(struct sw_flow_key *key,
if (maniptype == NF_NAT_MANIP_SRC) { if (maniptype == NF_NAT_MANIP_SRC) {
__be16 src; __be16 src;
key->ct.state |= OVS_CS_F_SRC_NAT; key->ct_state |= OVS_CS_F_SRC_NAT;
if (key->eth.type == htons(ETH_P_IP)) if (key->eth.type == htons(ETH_P_IP))
key->ipv4.addr.src = ip_hdr(skb)->saddr; key->ipv4.addr.src = ip_hdr(skb)->saddr;
else if (key->eth.type == htons(ETH_P_IPV6)) else if (key->eth.type == htons(ETH_P_IPV6))
...@@ -612,7 +762,7 @@ static void ovs_nat_update_key(struct sw_flow_key *key, ...@@ -612,7 +762,7 @@ static void ovs_nat_update_key(struct sw_flow_key *key,
} else { } else {
__be16 dst; __be16 dst;
key->ct.state |= OVS_CS_F_DST_NAT; key->ct_state |= OVS_CS_F_DST_NAT;
if (key->eth.type == htons(ETH_P_IP)) if (key->eth.type == htons(ETH_P_IP))
key->ipv4.addr.dst = ip_hdr(skb)->daddr; key->ipv4.addr.dst = ip_hdr(skb)->daddr;
else if (key->eth.type == htons(ETH_P_IPV6)) else if (key->eth.type == htons(ETH_P_IPV6))
...@@ -699,7 +849,7 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, ...@@ -699,7 +849,7 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
/* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if /* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if
* not done already. Update key with new CT state after passing the packet * not done already. Update key with new CT state after passing the packet
* through conntrack. * through conntrack.
* Note that if the packet is deemed invalid by conntrack, skb->nfct will be * Note that if the packet is deemed invalid by conntrack, skb->_nfct will be
* set to NULL and 0 will be returned. * set to NULL and 0 will be returned.
*/ */
static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
...@@ -736,7 +886,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, ...@@ -736,7 +886,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
* NAT after the nf_conntrack_in() call. We can actually clear * NAT after the nf_conntrack_in() call. We can actually clear
* the whole state, as it will be re-initialized below. * the whole state, as it will be re-initialized below.
*/ */
key->ct.state = 0; key->ct_state = 0;
/* Update the key, but keep the NAT flags. */ /* Update the key, but keep the NAT flags. */
ovs_ct_update_key(skb, info, key, true, true); ovs_ct_update_key(skb, info, key, true, true);
...@@ -752,9 +902,9 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, ...@@ -752,9 +902,9 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
* *
* NAT will be done only if the CT action has NAT, and only * NAT will be done only if the CT action has NAT, and only
* once per packet (per zone), as guarded by the NAT bits in * once per packet (per zone), as guarded by the NAT bits in
* the key->ct.state. * the key->ct_state.
*/ */
if (info->nat && !(key->ct.state & OVS_CS_F_NAT_MASK) && if (info->nat && !(key->ct_state & OVS_CS_F_NAT_MASK) &&
(nf_ct_is_confirmed(ct) || info->commit) && (nf_ct_is_confirmed(ct) || info->commit) &&
ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) { ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) {
return -EINVAL; return -EINVAL;
...@@ -830,8 +980,8 @@ static bool labels_nonzero(const struct ovs_key_ct_labels *labels) ...@@ -830,8 +980,8 @@ static bool labels_nonzero(const struct ovs_key_ct_labels *labels)
{ {
size_t i; size_t i;
for (i = 0; i < sizeof(*labels); i++) for (i = 0; i < OVS_CT_LABELS_LEN_32; i++)
if (labels->ct_labels[i]) if (labels->ct_labels_32[i])
return true; return true;
return false; return false;
...@@ -842,24 +992,36 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, ...@@ -842,24 +992,36 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
const struct ovs_conntrack_info *info, const struct ovs_conntrack_info *info,
struct sk_buff *skb) struct sk_buff *skb)
{ {
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
int err; int err;
err = __ovs_ct_lookup(net, key, info, skb); err = __ovs_ct_lookup(net, key, info, skb);
if (err) if (err)
return err; return err;
/* The connection could be invalid, in which case this is a no-op.*/
ct = nf_ct_get(skb, &ctinfo);
if (!ct)
return 0;
/* Apply changes before confirming the connection so that the initial /* Apply changes before confirming the connection so that the initial
* conntrack NEW netlink event carries the values given in the CT * conntrack NEW netlink event carries the values given in the CT
* action. * action.
*/ */
if (info->mark.mask) { if (info->mark.mask) {
err = ovs_ct_set_mark(skb, key, info->mark.value, err = ovs_ct_set_mark(ct, key, info->mark.value,
info->mark.mask); info->mark.mask);
if (err) if (err)
return err; return err;
} }
if (labels_nonzero(&info->labels.mask)) { if (!nf_ct_is_confirmed(ct)) {
err = ovs_ct_set_labels(skb, key, &info->labels.value, err = ovs_ct_init_labels(ct, key, &info->labels.value,
&info->labels.mask);
if (err)
return err;
} else if (labels_nonzero(&info->labels.mask)) {
err = ovs_ct_set_labels(ct, key, &info->labels.value,
&info->labels.mask); &info->labels.mask);
if (err) if (err)
return err; return err;
...@@ -1061,6 +1223,7 @@ static int parse_nat(const struct nlattr *attr, ...@@ -1061,6 +1223,7 @@ static int parse_nat(const struct nlattr *attr,
static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
[OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 }, [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 },
[OVS_CT_ATTR_FORCE_COMMIT] = { .minlen = 0, .maxlen = 0 },
[OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16), [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16),
.maxlen = sizeof(u16) }, .maxlen = sizeof(u16) },
[OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark), [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark),
...@@ -1100,6 +1263,9 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, ...@@ -1100,6 +1263,9 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
} }
switch (type) { switch (type) {
case OVS_CT_ATTR_FORCE_COMMIT:
info->force = true;
/* fall through. */
case OVS_CT_ATTR_COMMIT: case OVS_CT_ATTR_COMMIT:
info->commit = true; info->commit = true;
break; break;
...@@ -1326,7 +1492,9 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, ...@@ -1326,7 +1492,9 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
if (!start) if (!start)
return -EMSGSIZE; return -EMSGSIZE;
if (ct_info->commit && nla_put_flag(skb, OVS_CT_ATTR_COMMIT)) if (ct_info->commit && nla_put_flag(skb, ct_info->force
? OVS_CT_ATTR_FORCE_COMMIT
: OVS_CT_ATTR_COMMIT))
return -EMSGSIZE; return -EMSGSIZE;
if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id)) nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
......
...@@ -32,7 +32,8 @@ int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *, ...@@ -32,7 +32,8 @@ int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *,
const struct ovs_conntrack_info *); const struct ovs_conntrack_info *);
void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key); void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb); int ovs_ct_put_key(const struct sw_flow_key *swkey,
const struct sw_flow_key *output, struct sk_buff *skb);
void ovs_ct_free_action(const struct nlattr *a); void ovs_ct_free_action(const struct nlattr *a);
#define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \ #define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \
...@@ -75,13 +76,18 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb, ...@@ -75,13 +76,18 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb,
static inline void ovs_ct_fill_key(const struct sk_buff *skb, static inline void ovs_ct_fill_key(const struct sk_buff *skb,
struct sw_flow_key *key) struct sw_flow_key *key)
{ {
key->ct.state = 0; key->ct_state = 0;
key->ct.zone = 0; key->ct_zone = 0;
key->ct.mark = 0; key->ct.mark = 0;
memset(&key->ct.labels, 0, sizeof(key->ct.labels)); memset(&key->ct.labels, 0, sizeof(key->ct.labels));
/* Clear 'ct_orig_proto' to mark the non-existence of original
* direction key fields.
*/
key->ct_orig_proto = 0;
} }
static inline int ovs_ct_put_key(const struct sw_flow_key *key, static inline int ovs_ct_put_key(const struct sw_flow_key *swkey,
const struct sw_flow_key *output,
struct sk_buff *skb) struct sk_buff *skb)
{ {
return 0; return 0;
......
...@@ -765,7 +765,7 @@ static int key_extract_mac_proto(struct sk_buff *skb) ...@@ -765,7 +765,7 @@ static int key_extract_mac_proto(struct sk_buff *skb)
int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
struct sk_buff *skb, struct sw_flow_key *key) struct sk_buff *skb, struct sw_flow_key *key)
{ {
int res; int res, err;
/* Extract metadata from packet. */ /* Extract metadata from packet. */
if (tun_info) { if (tun_info) {
...@@ -792,7 +792,6 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, ...@@ -792,7 +792,6 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
key->phy.priority = skb->priority; key->phy.priority = skb->priority;
key->phy.in_port = OVS_CB(skb)->input_vport->port_no; key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
key->phy.skb_mark = skb->mark; key->phy.skb_mark = skb->mark;
ovs_ct_fill_key(skb, key);
key->ovs_flow_hash = 0; key->ovs_flow_hash = 0;
res = key_extract_mac_proto(skb); res = key_extract_mac_proto(skb);
if (res < 0) if (res < 0)
...@@ -800,17 +799,26 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, ...@@ -800,17 +799,26 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
key->mac_proto = res; key->mac_proto = res;
key->recirc_id = 0; key->recirc_id = 0;
return key_extract(skb, key); err = key_extract(skb, key);
if (!err)
ovs_ct_fill_key(skb, key); /* Must be after key_extract(). */
return err;
} }
int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
struct sk_buff *skb, struct sk_buff *skb,
struct sw_flow_key *key, bool log) struct sw_flow_key *key, bool log)
{ {
const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
u64 attrs = 0;
int err; int err;
err = parse_flow_nlattrs(attr, a, &attrs, log);
if (err)
return -EINVAL;
/* Extract metadata from netlink attributes. */ /* Extract metadata from netlink attributes. */
err = ovs_nla_get_flow_metadata(net, attr, key, log); err = ovs_nla_get_flow_metadata(net, a, attrs, key, log);
if (err) if (err)
return err; return err;
...@@ -824,5 +832,21 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, ...@@ -824,5 +832,21 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
*/ */
skb->protocol = key->eth.type; skb->protocol = key->eth.type;
return key_extract(skb, key); err = key_extract(skb, key);
if (err)
return err;
/* Check that we have conntrack original direction tuple metadata only
* for packets for which it makes sense. Otherwise the key may be
* corrupted due to overlapping key fields.
*/
if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4) &&
key->eth.type != htons(ETH_P_IP))
return -EINVAL;
if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6) &&
(key->eth.type != htons(ETH_P_IPV6) ||
sw_flow_key_is_nd(key)))
return -EINVAL;
return 0;
} }
/* /*
* Copyright (c) 2007-2014 Nicira, Inc. * Copyright (c) 2007-2017 Nicira, Inc.
* *
* This program is free software; you can redistribute it and/or * This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public * modify it under the terms of version 2 of the GNU General Public
...@@ -85,6 +85,11 @@ struct sw_flow_key { ...@@ -85,6 +85,11 @@ struct sw_flow_key {
struct vlan_head cvlan; struct vlan_head cvlan;
__be16 type; /* Ethernet frame type. */ __be16 type; /* Ethernet frame type. */
} eth; } eth;
/* Filling a hole of two bytes. */
u8 ct_state;
u8 ct_orig_proto; /* CT original direction tuple IP
* protocol.
*/
union { union {
struct { struct {
__be32 top_lse; /* top label stack entry */ __be32 top_lse; /* top label stack entry */
...@@ -96,6 +101,7 @@ struct sw_flow_key { ...@@ -96,6 +101,7 @@ struct sw_flow_key {
u8 frag; /* One of OVS_FRAG_TYPE_*. */ u8 frag; /* One of OVS_FRAG_TYPE_*. */
} ip; } ip;
}; };
u16 ct_zone; /* Conntrack zone. */
struct { struct {
__be16 src; /* TCP/UDP/SCTP source port. */ __be16 src; /* TCP/UDP/SCTP source port. */
__be16 dst; /* TCP/UDP/SCTP destination port. */ __be16 dst; /* TCP/UDP/SCTP destination port. */
...@@ -107,10 +113,16 @@ struct sw_flow_key { ...@@ -107,10 +113,16 @@ struct sw_flow_key {
__be32 src; /* IP source address. */ __be32 src; /* IP source address. */
__be32 dst; /* IP destination address. */ __be32 dst; /* IP destination address. */
} addr; } addr;
struct { union {
u8 sha[ETH_ALEN]; /* ARP source hardware address. */ struct {
u8 tha[ETH_ALEN]; /* ARP target hardware address. */ __be32 src;
} arp; __be32 dst;
} ct_orig; /* Conntrack original direction fields. */
struct {
u8 sha[ETH_ALEN]; /* ARP source hardware address. */
u8 tha[ETH_ALEN]; /* ARP target hardware address. */
} arp;
};
} ipv4; } ipv4;
struct { struct {
struct { struct {
...@@ -118,23 +130,40 @@ struct sw_flow_key { ...@@ -118,23 +130,40 @@ struct sw_flow_key {
struct in6_addr dst; /* IPv6 destination address. */ struct in6_addr dst; /* IPv6 destination address. */
} addr; } addr;
__be32 label; /* IPv6 flow label. */ __be32 label; /* IPv6 flow label. */
struct { union {
struct in6_addr target; /* ND target address. */ struct {
u8 sll[ETH_ALEN]; /* ND source link layer address. */ struct in6_addr src;
u8 tll[ETH_ALEN]; /* ND target link layer address. */ struct in6_addr dst;
} nd; } ct_orig; /* Conntrack original direction fields. */
struct {
struct in6_addr target; /* ND target address. */
u8 sll[ETH_ALEN]; /* ND source link layer address. */
u8 tll[ETH_ALEN]; /* ND target link layer address. */
} nd;
};
} ipv6; } ipv6;
}; };
struct { struct {
/* Connection tracking fields. */ /* Connection tracking fields not packed above. */
u16 zone; struct {
__be16 src; /* CT orig tuple tp src port. */
__be16 dst; /* CT orig tuple tp dst port. */
} orig_tp;
u32 mark; u32 mark;
u8 state;
struct ovs_key_ct_labels labels; struct ovs_key_ct_labels labels;
} ct; } ct;
} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
static inline bool sw_flow_key_is_nd(const struct sw_flow_key *key)
{
return key->eth.type == htons(ETH_P_IPV6) &&
key->ip.proto == NEXTHDR_ICMP &&
key->tp.dst == 0 &&
(key->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT));
}
struct sw_flow_key_range { struct sw_flow_key_range {
unsigned short int start; unsigned short int start;
unsigned short int end; unsigned short int end;
......
...@@ -129,7 +129,9 @@ static bool match_validate(const struct sw_flow_match *match, ...@@ -129,7 +129,9 @@ static bool match_validate(const struct sw_flow_match *match,
/* The following mask attributes allowed only if they /* The following mask attributes allowed only if they
* pass the validation tests. */ * pass the validation tests. */
mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4) mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
| (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)
| (1 << OVS_KEY_ATTR_IPV6) | (1 << OVS_KEY_ATTR_IPV6)
| (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)
| (1 << OVS_KEY_ATTR_TCP) | (1 << OVS_KEY_ATTR_TCP)
| (1 << OVS_KEY_ATTR_TCP_FLAGS) | (1 << OVS_KEY_ATTR_TCP_FLAGS)
| (1 << OVS_KEY_ATTR_UDP) | (1 << OVS_KEY_ATTR_UDP)
...@@ -161,8 +163,10 @@ static bool match_validate(const struct sw_flow_match *match, ...@@ -161,8 +163,10 @@ static bool match_validate(const struct sw_flow_match *match,
if (match->key->eth.type == htons(ETH_P_IP)) { if (match->key->eth.type == htons(ETH_P_IP)) {
key_expected |= 1 << OVS_KEY_ATTR_IPV4; key_expected |= 1 << OVS_KEY_ATTR_IPV4;
if (match->mask && (match->mask->key.eth.type == htons(0xffff))) if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
mask_allowed |= 1 << OVS_KEY_ATTR_IPV4; mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4;
}
if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
if (match->key->ip.proto == IPPROTO_UDP) { if (match->key->ip.proto == IPPROTO_UDP) {
...@@ -196,8 +200,10 @@ static bool match_validate(const struct sw_flow_match *match, ...@@ -196,8 +200,10 @@ static bool match_validate(const struct sw_flow_match *match,
if (match->key->eth.type == htons(ETH_P_IPV6)) { if (match->key->eth.type == htons(ETH_P_IPV6)) {
key_expected |= 1 << OVS_KEY_ATTR_IPV6; key_expected |= 1 << OVS_KEY_ATTR_IPV6;
if (match->mask && (match->mask->key.eth.type == htons(0xffff))) if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
mask_allowed |= 1 << OVS_KEY_ATTR_IPV6; mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6;
}
if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
if (match->key->ip.proto == IPPROTO_UDP) { if (match->key->ip.proto == IPPROTO_UDP) {
...@@ -230,6 +236,12 @@ static bool match_validate(const struct sw_flow_match *match, ...@@ -230,6 +236,12 @@ static bool match_validate(const struct sw_flow_match *match,
htons(NDISC_NEIGHBOUR_SOLICITATION) || htons(NDISC_NEIGHBOUR_SOLICITATION) ||
match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
key_expected |= 1 << OVS_KEY_ATTR_ND; key_expected |= 1 << OVS_KEY_ATTR_ND;
/* Original direction conntrack tuple
* uses the same space as the ND fields
* in the key, so both are not allowed
* at the same time.
*/
mask_allowed &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
if (match->mask && (match->mask->key.tp.src == htons(0xff))) if (match->mask && (match->mask->key.tp.src == htons(0xff)))
mask_allowed |= 1 << OVS_KEY_ATTR_ND; mask_allowed |= 1 << OVS_KEY_ATTR_ND;
} }
...@@ -282,7 +294,7 @@ size_t ovs_key_attr_size(void) ...@@ -282,7 +294,7 @@ size_t ovs_key_attr_size(void)
/* Whenever adding new OVS_KEY_ FIELDS, we should consider /* Whenever adding new OVS_KEY_ FIELDS, we should consider
* updating this function. * updating this function.
*/ */
BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 26); BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28);
return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
...@@ -295,6 +307,7 @@ size_t ovs_key_attr_size(void) ...@@ -295,6 +307,7 @@ size_t ovs_key_attr_size(void)
+ nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */ + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */
+ nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */
+ nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */ + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */
+ nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
+ nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
+ nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
...@@ -355,6 +368,10 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { ...@@ -355,6 +368,10 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) },
[OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) },
[OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) }, [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = {
.len = sizeof(struct ovs_key_ct_tuple_ipv4) },
[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
.len = sizeof(struct ovs_key_ct_tuple_ipv6) },
}; };
static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
...@@ -430,9 +447,8 @@ static int parse_flow_mask_nlattrs(const struct nlattr *attr, ...@@ -430,9 +447,8 @@ static int parse_flow_mask_nlattrs(const struct nlattr *attr,
return __parse_flow_nlattrs(attr, a, attrsp, log, true); return __parse_flow_nlattrs(attr, a, attrsp, log, true);
} }
static int parse_flow_nlattrs(const struct nlattr *attr, int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[],
const struct nlattr *a[], u64 *attrsp, u64 *attrsp, bool log)
bool log)
{ {
return __parse_flow_nlattrs(attr, a, attrsp, log, false); return __parse_flow_nlattrs(attr, a, attrsp, log, false);
} }
...@@ -1056,14 +1072,14 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, ...@@ -1056,14 +1072,14 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
return -EINVAL; return -EINVAL;
} }
SW_FLOW_KEY_PUT(match, ct.state, ct_state, is_mask); SW_FLOW_KEY_PUT(match, ct_state, ct_state, is_mask);
*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE);
} }
if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) && if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) &&
ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) { ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) {
u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]); u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]);
SW_FLOW_KEY_PUT(match, ct.zone, ct_zone, is_mask); SW_FLOW_KEY_PUT(match, ct_zone, ct_zone, is_mask);
*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE);
} }
if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) && if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) &&
...@@ -1082,6 +1098,34 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, ...@@ -1082,6 +1098,34 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
sizeof(*cl), is_mask); sizeof(*cl), is_mask);
*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
} }
if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)) {
const struct ovs_key_ct_tuple_ipv4 *ct;
ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4]);
SW_FLOW_KEY_PUT(match, ipv4.ct_orig.src, ct->ipv4_src, is_mask);
SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask);
SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv4_proto, is_mask);
*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4);
}
if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) {
const struct ovs_key_ct_tuple_ipv6 *ct;
ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6]);
SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.src, &ct->ipv6_src,
sizeof(match->key->ipv6.ct_orig.src),
is_mask);
SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.dst, &ct->ipv6_dst,
sizeof(match->key->ipv6.ct_orig.dst),
is_mask);
SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv6_proto, is_mask);
*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
}
/* For layer 3 packets the Ethernet type is provided /* For layer 3 packets the Ethernet type is provided
* and treated as metadata but no MAC addresses are provided. * and treated as metadata but no MAC addresses are provided.
...@@ -1493,9 +1537,12 @@ u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) ...@@ -1493,9 +1537,12 @@ u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
/** /**
* ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
* @key: Receives extracted in_port, priority, tun_key and skb_mark. * @net: Network namespace.
* @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute * @key: Receives extracted in_port, priority, tun_key, skb_mark and conntrack
* sequence. * metadata.
* @a: Array of netlink attributes holding parsed %OVS_KEY_ATTR_* Netlink
* attributes.
* @attrs: Bit mask for the netlink attributes included in @a.
* @log: Boolean to allow kernel error logging. Normally true, but when * @log: Boolean to allow kernel error logging. Normally true, but when
* probing for feature compatibility this should be passed in as false to * probing for feature compatibility this should be passed in as false to
* suppress unnecessary error logging. * suppress unnecessary error logging.
...@@ -1504,25 +1551,26 @@ u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) ...@@ -1504,25 +1551,26 @@ u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
* take the same form accepted by flow_from_nlattrs(), but only enough of it to * take the same form accepted by flow_from_nlattrs(), but only enough of it to
* get the metadata, that is, the parts of the flow key that cannot be * get the metadata, that is, the parts of the flow key that cannot be
* extracted from the packet itself. * extracted from the packet itself.
*
* This must be called before the packet key fields are filled in 'key'.
*/ */
int ovs_nla_get_flow_metadata(struct net *net, const struct nlattr *attr, int ovs_nla_get_flow_metadata(struct net *net,
struct sw_flow_key *key, const struct nlattr *a[OVS_KEY_ATTR_MAX + 1],
bool log) u64 attrs, struct sw_flow_key *key, bool log)
{ {
const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
struct sw_flow_match match; struct sw_flow_match match;
u64 attrs = 0;
int err;
err = parse_flow_nlattrs(attr, a, &attrs, log);
if (err)
return -EINVAL;
memset(&match, 0, sizeof(match)); memset(&match, 0, sizeof(match));
match.key = key; match.key = key;
key->ct_state = 0;
key->ct_zone = 0;
key->ct_orig_proto = 0;
memset(&key->ct, 0, sizeof(key->ct)); memset(&key->ct, 0, sizeof(key->ct));
memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig));
memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig));
key->phy.in_port = DP_MAX_PORTS; key->phy.in_port = DP_MAX_PORTS;
return metadata_from_nlattrs(net, &match, &attrs, a, false, log); return metadata_from_nlattrs(net, &match, &attrs, a, false, log);
...@@ -1584,7 +1632,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, ...@@ -1584,7 +1632,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
goto nla_put_failure; goto nla_put_failure;
if (ovs_ct_put_key(output, skb)) if (ovs_ct_put_key(swkey, output, skb))
goto nla_put_failure; goto nla_put_failure;
if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) { if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) {
......
...@@ -46,8 +46,11 @@ void ovs_match_init(struct sw_flow_match *match, ...@@ -46,8 +46,11 @@ void ovs_match_init(struct sw_flow_match *match,
int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *, int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *,
int attr, bool is_mask, struct sk_buff *); int attr, bool is_mask, struct sk_buff *);
int ovs_nla_get_flow_metadata(struct net *, const struct nlattr *, int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[],
struct sw_flow_key *, bool log); u64 *attrsp, bool log);
int ovs_nla_get_flow_metadata(struct net *net,
const struct nlattr *a[OVS_KEY_ATTR_MAX + 1],
u64 attrs, struct sw_flow_key *key, bool log);
int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb); int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb);
int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb); int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment