Commit fd31cb0c authored by Jakub Kicinski's avatar Jakub Kicinski

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

1) Fix bogus compilter warning in nfnetlink_queue, from Florian Westphal.

2) Don't run conntrack on vrf with !dflt qdisc, from Nicolas Dichtel.

3) Fix nft_pipapo bucket load in AVX2 lookup routine for six 8-bit
   groups, from Stefano Brivio.

4) Break rule evaluation on malformed TCP options.

5) Use socat instead of nc in selftests/netfilter/nft_zones_many.sh,
   also from Florian

6) Fix KCSAN data-race in conntrack timeout updates, from Eric Dumazet.

* git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf:
  netfilter: conntrack: annotate data-races around ct->timeout
  selftests: netfilter: switch zone stress to socat
  netfilter: nft_exthdr: break evaluation if setting TCP option fails
  selftests: netfilter: Add correctness test for mac,net set type
  nft_set_pipapo: Fix bucket load in AVX2 lookup routine for six 8-bit groups
  vrf: don't run conntrack on vrf with !dflt qdisc
  netfilter: nfnetlink_queue: silence bogus compiler warning
====================

Link: https://lore.kernel.org/r/20211209000847.102598-1-pablo@netfilter.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents b5b6b6ba 802a7dc5
...@@ -770,8 +770,6 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev, ...@@ -770,8 +770,6 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev,
skb->dev = vrf_dev; skb->dev = vrf_dev;
vrf_nf_set_untracked(skb);
err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk,
skb, NULL, vrf_dev, vrf_ip6_out_direct_finish); skb, NULL, vrf_dev, vrf_ip6_out_direct_finish);
...@@ -792,6 +790,8 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, ...@@ -792,6 +790,8 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
if (rt6_need_strict(&ipv6_hdr(skb)->daddr)) if (rt6_need_strict(&ipv6_hdr(skb)->daddr))
return skb; return skb;
vrf_nf_set_untracked(skb);
if (qdisc_tx_is_default(vrf_dev) || if (qdisc_tx_is_default(vrf_dev) ||
IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
return vrf_ip6_out_direct(vrf_dev, sk, skb); return vrf_ip6_out_direct(vrf_dev, sk, skb);
...@@ -1000,8 +1000,6 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev, ...@@ -1000,8 +1000,6 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
skb->dev = vrf_dev; skb->dev = vrf_dev;
vrf_nf_set_untracked(skb);
err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
skb, NULL, vrf_dev, vrf_ip_out_direct_finish); skb, NULL, vrf_dev, vrf_ip_out_direct_finish);
...@@ -1023,6 +1021,8 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, ...@@ -1023,6 +1021,8 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
ipv4_is_lbcast(ip_hdr(skb)->daddr)) ipv4_is_lbcast(ip_hdr(skb)->daddr))
return skb; return skb;
vrf_nf_set_untracked(skb);
if (qdisc_tx_is_default(vrf_dev) || if (qdisc_tx_is_default(vrf_dev) ||
IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
return vrf_ip_out_direct(vrf_dev, sk, skb); return vrf_ip_out_direct(vrf_dev, sk, skb);
......
...@@ -276,14 +276,14 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) ...@@ -276,14 +276,14 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
/* jiffies until ct expires, 0 if already expired */ /* jiffies until ct expires, 0 if already expired */
static inline unsigned long nf_ct_expires(const struct nf_conn *ct) static inline unsigned long nf_ct_expires(const struct nf_conn *ct)
{ {
s32 timeout = ct->timeout - nfct_time_stamp; s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
return timeout > 0 ? timeout : 0; return timeout > 0 ? timeout : 0;
} }
static inline bool nf_ct_is_expired(const struct nf_conn *ct) static inline bool nf_ct_is_expired(const struct nf_conn *ct)
{ {
return (__s32)(ct->timeout - nfct_time_stamp) <= 0; return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0;
} }
/* use after obtaining a reference count */ /* use after obtaining a reference count */
...@@ -302,7 +302,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct) ...@@ -302,7 +302,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
static inline void nf_ct_offload_timeout(struct nf_conn *ct) static inline void nf_ct_offload_timeout(struct nf_conn *ct)
{ {
if (nf_ct_expires(ct) < NF_CT_DAY / 2) if (nf_ct_expires(ct) < NF_CT_DAY / 2)
ct->timeout = nfct_time_stamp + NF_CT_DAY; WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
} }
struct kernel_param; struct kernel_param;
......
...@@ -684,7 +684,7 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) ...@@ -684,7 +684,7 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
tstamp = nf_conn_tstamp_find(ct); tstamp = nf_conn_tstamp_find(ct);
if (tstamp) { if (tstamp) {
s32 timeout = ct->timeout - nfct_time_stamp; s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
tstamp->stop = ktime_get_real_ns(); tstamp->stop = ktime_get_real_ns();
if (timeout < 0) if (timeout < 0)
...@@ -1036,7 +1036,7 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx) ...@@ -1036,7 +1036,7 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
} }
/* We want the clashing entry to go away real soon: 1 second timeout. */ /* We want the clashing entry to go away real soon: 1 second timeout. */
loser_ct->timeout = nfct_time_stamp + HZ; WRITE_ONCE(loser_ct->timeout, nfct_time_stamp + HZ);
/* IPS_NAT_CLASH removes the entry automatically on the first /* IPS_NAT_CLASH removes the entry automatically on the first
* reply. Also prevents UDP tracker from moving the entry to * reply. Also prevents UDP tracker from moving the entry to
...@@ -1560,7 +1560,7 @@ __nf_conntrack_alloc(struct net *net, ...@@ -1560,7 +1560,7 @@ __nf_conntrack_alloc(struct net *net,
/* save hash for reusing when confirming */ /* save hash for reusing when confirming */
*(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
ct->status = 0; ct->status = 0;
ct->timeout = 0; WRITE_ONCE(ct->timeout, 0);
write_pnet(&ct->ct_net, net); write_pnet(&ct->ct_net, net);
memset(&ct->__nfct_init_offset, 0, memset(&ct->__nfct_init_offset, 0,
offsetof(struct nf_conn, proto) - offsetof(struct nf_conn, proto) -
......
...@@ -1998,7 +1998,7 @@ static int ctnetlink_change_timeout(struct nf_conn *ct, ...@@ -1998,7 +1998,7 @@ static int ctnetlink_change_timeout(struct nf_conn *ct,
if (timeout > INT_MAX) if (timeout > INT_MAX)
timeout = INT_MAX; timeout = INT_MAX;
ct->timeout = nfct_time_stamp + (u32)timeout; WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
if (test_bit(IPS_DYING_BIT, &ct->status)) if (test_bit(IPS_DYING_BIT, &ct->status))
return -ETIME; return -ETIME;
......
...@@ -201,8 +201,8 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) ...@@ -201,8 +201,8 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
if (timeout < 0) if (timeout < 0)
timeout = 0; timeout = 0;
if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout) if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
ct->timeout = nfct_time_stamp + timeout; WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
} }
static void flow_offload_fixup_ct_state(struct nf_conn *ct) static void flow_offload_fixup_ct_state(struct nf_conn *ct)
......
...@@ -387,7 +387,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, ...@@ -387,7 +387,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
struct net_device *indev; struct net_device *indev;
struct net_device *outdev; struct net_device *outdev;
struct nf_conn *ct = NULL; struct nf_conn *ct = NULL;
enum ip_conntrack_info ctinfo; enum ip_conntrack_info ctinfo = 0;
struct nfnl_ct_hook *nfnl_ct; struct nfnl_ct_hook *nfnl_ct;
bool csum_verify; bool csum_verify;
char *secdata = NULL; char *secdata = NULL;
......
...@@ -236,7 +236,7 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, ...@@ -236,7 +236,7 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
if (!tcph) if (!tcph)
return; goto err;
opt = (u8 *)tcph; opt = (u8 *)tcph;
for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) { for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
...@@ -251,16 +251,16 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, ...@@ -251,16 +251,16 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
continue; continue;
if (i + optl > tcphdr_len || priv->len + priv->offset > optl) if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
return; goto err;
if (skb_ensure_writable(pkt->skb, if (skb_ensure_writable(pkt->skb,
nft_thoff(pkt) + i + priv->len)) nft_thoff(pkt) + i + priv->len))
return; goto err;
tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
&tcphdr_len); &tcphdr_len);
if (!tcph) if (!tcph)
return; goto err;
offset = i + priv->offset; offset = i + priv->offset;
...@@ -303,6 +303,9 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, ...@@ -303,6 +303,9 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
return; return;
} }
return;
err:
regs->verdict.code = NFT_BREAK;
} }
static void nft_exthdr_sctp_eval(const struct nft_expr *expr, static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
......
...@@ -886,7 +886,7 @@ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill, ...@@ -886,7 +886,7 @@ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill,
NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 4, pkt[4], bsize); NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 4, pkt[4], bsize);
NFT_PIPAPO_AVX2_AND(5, 0, 1); NFT_PIPAPO_AVX2_AND(5, 0, 1);
NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt, 6, pkt[5], bsize); NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt, 5, pkt[5], bsize);
NFT_PIPAPO_AVX2_AND(7, 2, 3); NFT_PIPAPO_AVX2_AND(7, 2, 3);
/* Stall */ /* Stall */
......
...@@ -150,11 +150,27 @@ EOF ...@@ -150,11 +150,27 @@ EOF
# oifname is the vrf device. # oifname is the vrf device.
test_masquerade_vrf() test_masquerade_vrf()
{ {
local qdisc=$1
if [ "$qdisc" != "default" ]; then
tc -net $ns0 qdisc add dev tvrf root $qdisc
fi
ip netns exec $ns0 conntrack -F 2>/dev/null ip netns exec $ns0 conntrack -F 2>/dev/null
ip netns exec $ns0 nft -f - <<EOF ip netns exec $ns0 nft -f - <<EOF
flush ruleset flush ruleset
table ip nat { table ip nat {
chain rawout {
type filter hook output priority raw;
oif tvrf ct state untracked counter
}
chain postrouting2 {
type filter hook postrouting priority mangle;
oif tvrf ct state untracked counter
}
chain postrouting { chain postrouting {
type nat hook postrouting priority 0; type nat hook postrouting priority 0;
# NB: masquerade should always be combined with 'oif(name) bla', # NB: masquerade should always be combined with 'oif(name) bla',
...@@ -171,13 +187,18 @@ EOF ...@@ -171,13 +187,18 @@ EOF
fi fi
# must also check that nat table was evaluated on second (lower device) iteration. # must also check that nat table was evaluated on second (lower device) iteration.
ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' &&
ip netns exec $ns0 nft list table ip nat |grep -q 'untracked counter packets [1-9]'
if [ $? -eq 0 ]; then if [ $? -eq 0 ]; then
echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device" echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)"
else else
echo "FAIL: vrf masq rule has unexpected counter value" echo "FAIL: vrf rules have unexpected counter value"
ret=1 ret=1
fi fi
if [ "$qdisc" != "default" ]; then
tc -net $ns0 qdisc del dev tvrf root
fi
} }
# add masq rule that gets evaluated w. outif set to veth device. # add masq rule that gets evaluated w. outif set to veth device.
...@@ -213,7 +234,8 @@ EOF ...@@ -213,7 +234,8 @@ EOF
} }
test_ct_zone_in test_ct_zone_in
test_masquerade_vrf test_masquerade_vrf "default"
test_masquerade_vrf "pfifo"
test_masquerade_veth test_masquerade_veth
exit $ret exit $ret
...@@ -23,8 +23,8 @@ TESTS="reported_issues correctness concurrency timeout" ...@@ -23,8 +23,8 @@ TESTS="reported_issues correctness concurrency timeout"
# Set types, defined by TYPE_ variables below # Set types, defined by TYPE_ variables below
TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
net_port_net net_mac net_mac_icmp net6_mac_icmp net6_port_net6_port net_port_net net_mac mac_net net_mac_icmp net6_mac_icmp
net_port_mac_proto_net" net6_port_net6_port net_port_mac_proto_net"
# Reported bugs, also described by TYPE_ variables below # Reported bugs, also described by TYPE_ variables below
BUGS="flush_remove_add" BUGS="flush_remove_add"
...@@ -277,6 +277,23 @@ perf_entries 1000 ...@@ -277,6 +277,23 @@ perf_entries 1000
perf_proto ipv4 perf_proto ipv4
" "
TYPE_mac_net="
display mac,net
type_spec ether_addr . ipv4_addr
chain_spec ether saddr . ip saddr
dst
src mac addr4
start 1
count 5
src_delta 2000
tools sendip nc bash
proto udp
race_repeat 0
perf_duration 0
"
TYPE_net_mac_icmp=" TYPE_net_mac_icmp="
display net,mac - ICMP display net,mac - ICMP
type_spec ipv4_addr . ether_addr type_spec ipv4_addr . ether_addr
...@@ -984,7 +1001,8 @@ format() { ...@@ -984,7 +1001,8 @@ format() {
fi fi
done done
for f in ${src}; do for f in ${src}; do
__expr="${__expr} . " [ "${__expr}" != "{ " ] && __expr="${__expr} . "
__start="$(eval format_"${f}" "${srcstart}")" __start="$(eval format_"${f}" "${srcstart}")"
__end="$(eval format_"${f}" "${srcend}")" __end="$(eval format_"${f}" "${srcend}")"
......
...@@ -18,11 +18,17 @@ cleanup() ...@@ -18,11 +18,17 @@ cleanup()
ip netns del $ns ip netns del $ns
} }
ip netns add $ns checktool (){
if [ $? -ne 0 ];then if ! $1 > /dev/null 2>&1; then
echo "SKIP: Could not create net namespace $gw" echo "SKIP: Could not $2"
exit $ksft_skip exit $ksft_skip
fi fi
}
checktool "nft --version" "run test without nft tool"
checktool "ip -Version" "run test without ip tool"
checktool "socat -V" "run test without socat tool"
checktool "ip netns add $ns" "create net namespace"
trap cleanup EXIT trap cleanup EXIT
...@@ -71,7 +77,8 @@ EOF ...@@ -71,7 +77,8 @@ EOF
local start=$(date +%s%3N) local start=$(date +%s%3N)
i=$((i + 10000)) i=$((i + 10000))
j=$((j + 1)) j=$((j + 1))
dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" nc -w 1 -q 1 -u -p 12345 127.0.0.1 12345 > /dev/null # nft rule in output places each packet in a different zone.
dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345
if [ $? -ne 0 ] ;then if [ $? -ne 0 ] ;then
ret=1 ret=1
break break
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment