Commit 2d6d7d6c authored by Paolo Abeni's avatar Paolo Abeni

Merge branch 'net-handle-the-exp-removal-problem-with-ovs-upcall-properly'

Xin Long says:

====================
net: handle the exp removal problem with ovs upcall properly

With the OVS upcall, the original ct in the skb will be dropped, and when
the skb comes back from userspace it has to create a new ct again through
nf_conntrack_in() in either OVS __ovs_ct_lookup() or TC tcf_ct_act().

However, the new ct will not be able to have the exp as the original ct
has taken it away from the hash table in nf_ct_find_expectation(). This
will cause some flow never to be matched, like:

  'ip,ct_state=-trk,in_port=1 actions=ct(zone=1)'
  'ip,ct_state=+trk+new+rel,in_port=1 actions=ct(commit,zone=1)'
  'ip,ct_state=+trk+new+rel,in_port=1 actions=ct(commit,zone=2),normal'

if the 2nd flow triggers the OVS upcall, the 3rd flow will never get
matched.

OVS conntrack works around this by adding its own exp lookup function to
not remove the exp from the hash table and saving the exp and its master
info to the flow keys instead of create a real ct. But this way doesn't
work for TC act_ct.

The patch 1/3 allows nf_ct_find_expectation() not to remove the exp from
the hash table if tmpl is set with IPS_CONFIRMED when doing lookup. This
allows both OVS conntrack and TC act_ct to have a simple and clear fix
for this problem in the patch 2/3 and 3/3.
====================

Link: https://lore.kernel.org/r/cover.1689541664.git.lucien.xin@gmail.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 03b123de 8c8b7332
...@@ -100,7 +100,7 @@ nf_ct_expect_find_get(struct net *net, ...@@ -100,7 +100,7 @@ nf_ct_expect_find_get(struct net *net,
struct nf_conntrack_expect * struct nf_conntrack_expect *
nf_ct_find_expectation(struct net *net, nf_ct_find_expectation(struct net *net,
const struct nf_conntrack_zone *zone, const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple); const struct nf_conntrack_tuple *tuple, bool unlink);
void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
u32 portid, int report); u32 portid, int report);
......
...@@ -1756,7 +1756,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, ...@@ -1756,7 +1756,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
cnet = nf_ct_pernet(net); cnet = nf_ct_pernet(net);
if (cnet->expect_count) { if (cnet->expect_count) {
spin_lock_bh(&nf_conntrack_expect_lock); spin_lock_bh(&nf_conntrack_expect_lock);
exp = nf_ct_find_expectation(net, zone, tuple); exp = nf_ct_find_expectation(net, zone, tuple, !tmpl || nf_ct_is_confirmed(tmpl));
if (exp) { if (exp) {
/* Welcome, Mr. Bond. We've been expecting you... */ /* Welcome, Mr. Bond. We've been expecting you... */
__set_bit(IPS_EXPECTED_BIT, &ct->status); __set_bit(IPS_EXPECTED_BIT, &ct->status);
......
...@@ -171,7 +171,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get); ...@@ -171,7 +171,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
struct nf_conntrack_expect * struct nf_conntrack_expect *
nf_ct_find_expectation(struct net *net, nf_ct_find_expectation(struct net *net,
const struct nf_conntrack_zone *zone, const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple) const struct nf_conntrack_tuple *tuple, bool unlink)
{ {
struct nf_conntrack_net *cnet = nf_ct_pernet(net); struct nf_conntrack_net *cnet = nf_ct_pernet(net);
struct nf_conntrack_expect *i, *exp = NULL; struct nf_conntrack_expect *i, *exp = NULL;
...@@ -211,7 +211,7 @@ nf_ct_find_expectation(struct net *net, ...@@ -211,7 +211,7 @@ nf_ct_find_expectation(struct net *net,
!refcount_inc_not_zero(&exp->master->ct_general.use))) !refcount_inc_not_zero(&exp->master->ct_general.use)))
return NULL; return NULL;
if (exp->flags & NF_CT_EXPECT_PERMANENT) { if (exp->flags & NF_CT_EXPECT_PERMANENT || !unlink) {
refcount_inc(&exp->use); refcount_inc(&exp->use);
return exp; return exp;
} else if (del_timer(&exp->timeout)) { } else if (del_timer(&exp->timeout)) {
......
...@@ -262,6 +262,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, ...@@ -262,6 +262,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
regs->verdict.code = NF_DROP; regs->verdict.code = NF_DROP;
return; return;
} }
__set_bit(IPS_CONFIRMED_BIT, &ct->status);
} }
nf_ct_set(skb, ct, IP_CT_NEW); nf_ct_set(skb, ct, IP_CT_NEW);
...@@ -368,6 +369,7 @@ static bool nft_ct_tmpl_alloc_pcpu(void) ...@@ -368,6 +369,7 @@ static bool nft_ct_tmpl_alloc_pcpu(void)
return false; return false;
} }
__set_bit(IPS_CONFIRMED_BIT, &tmp->status);
per_cpu(nft_ct_pcpu_template, cpu) = tmp; per_cpu(nft_ct_pcpu_template, cpu) = tmp;
} }
......
...@@ -455,45 +455,6 @@ static int ovs_ct_handle_fragments(struct net *net, struct sw_flow_key *key, ...@@ -455,45 +455,6 @@ static int ovs_ct_handle_fragments(struct net *net, struct sw_flow_key *key,
return 0; return 0;
} }
static struct nf_conntrack_expect *
ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
u16 proto, const struct sk_buff *skb)
{
struct nf_conntrack_tuple tuple;
struct nf_conntrack_expect *exp;
if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
return NULL;
exp = __nf_ct_expect_find(net, zone, &tuple);
if (exp) {
struct nf_conntrack_tuple_hash *h;
/* Delete existing conntrack entry, if it clashes with the
* expectation. This can happen since conntrack ALGs do not
* check for clashes between (new) expectations and existing
* conntrack entries. nf_conntrack_in() will check the
* expectations only if a conntrack entry can not be found,
* which can lead to OVS finding the expectation (here) in the
* init direction, but which will not be removed by the
* nf_conntrack_in() call, if a matching conntrack entry is
* found instead. In this case all init direction packets
* would be reported as new related packets, while reply
* direction packets would be reported as un-related
* established packets.
*/
h = nf_conntrack_find_get(net, zone, &tuple);
if (h) {
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
nf_ct_delete(ct, 0, 0);
nf_ct_put(ct);
}
}
return exp;
}
/* This replicates logic from nf_conntrack_core.c that is not exported. */ /* This replicates logic from nf_conntrack_core.c that is not exported. */
static enum ip_conntrack_info static enum ip_conntrack_info
ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h) ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h)
...@@ -852,25 +813,6 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, ...@@ -852,25 +813,6 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
const struct ovs_conntrack_info *info, const struct ovs_conntrack_info *info,
struct sk_buff *skb) struct sk_buff *skb)
{ {
struct nf_conntrack_expect *exp;
/* If we pass an expected packet through nf_conntrack_in() the
* expectation is typically removed, but the packet could still be
* lost in upcall processing. To prevent this from happening we
* perform an explicit expectation lookup. Expected connections are
* always new, and will be passed through conntrack only when they are
* committed, as it is OK to remove the expectation at that time.
*/
exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
if (exp) {
u8 state;
/* NOTE: New connections are NATted and Helped only when
* committed, so we are not calling into NAT here.
*/
state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
__ovs_ct_update_key(key, state, &info->zone, exp->master);
} else {
struct nf_conn *ct; struct nf_conn *ct;
int err; int err;
...@@ -881,7 +823,6 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, ...@@ -881,7 +823,6 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
ct = (struct nf_conn *)skb_nfct(skb); ct = (struct nf_conn *)skb_nfct(skb);
if (ct) if (ct)
nf_ct_deliver_cached_events(ct); nf_ct_deliver_cached_events(ct);
}
return 0; return 0;
} }
...@@ -1460,6 +1401,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, ...@@ -1460,6 +1401,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
if (err) if (err)
goto err_free_ct; goto err_free_ct;
if (ct_info.commit)
__set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status); __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
return 0; return 0;
err_free_ct: err_free_ct:
......
...@@ -1238,6 +1238,7 @@ static int tcf_ct_fill_params(struct net *net, ...@@ -1238,6 +1238,7 @@ static int tcf_ct_fill_params(struct net *net,
} }
} }
if (p->ct_action & TCA_CT_ACT_COMMIT)
__set_bit(IPS_CONFIRMED_BIT, &tmpl->status); __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
return 0; return 0;
err: err:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment