Commit 4761df52 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

1) Use kfree_rcu(ptr, rcu) variant, using kfree_rcu(ptr) was not
   intentional. From Eric Dumazet.

2) Use-after-free in netfilter hook core, from Eric Dumazet.

3) Missing rcu read lock side for netfilter egress hook,
   from Florian Westphal.

4) nf_queue assume state->sk is full socket while it might not be.
   Invoke sock_gen_put(), from Florian Westphal.

5) Add selftest to exercise the reported KASAN splat in 4)

6) Fix possible use-after-free in nf_queue in case sk_refcnt is 0.
   Also from Florian.

7) Use input interface index only for hardware offload, not for
   the software plane. This breaks tc ct action. Patch from Paul Blakey.

* git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  net/sched: act_ct: Fix flow table lookup failure with no originating ifindex
  netfilter: nf_queue: handle socket prefetch
  netfilter: nf_queue: fix possible use-after-free
  selftests: netfilter: add nfqueue TCP_NEW_SYN_RECV socket race test
  netfilter: nf_queue: don't assume sk is full socket
  netfilter: egress: silence egress hook lockdep splats
  netfilter: fix use-after-free in __nf_register_net_hook()
  netfilter: nf_tables: prefer kfree_rcu(ptr, rcu) variant
====================

Link: https://lore.kernel.org/r/20220301215337.378405-1-pablo@netfilter.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents b8d06ce7 db6140e5
......@@ -101,7 +101,11 @@ static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc,
nf_hook_state_init(&state, NF_NETDEV_EGRESS,
NFPROTO_NETDEV, dev, NULL, NULL,
dev_net(dev), NULL);
/* nf assumes rcu_read_lock, not just read_lock_bh */
rcu_read_lock();
ret = nf_hook_slow(skb, &state, e, 0);
rcu_read_unlock();
if (ret == 1) {
return skb;
......
......@@ -96,6 +96,7 @@ enum flow_offload_xmit_type {
FLOW_OFFLOAD_XMIT_NEIGH,
FLOW_OFFLOAD_XMIT_XFRM,
FLOW_OFFLOAD_XMIT_DIRECT,
FLOW_OFFLOAD_XMIT_TC,
};
#define NF_FLOW_TABLE_ENCAP_MAX 2
......@@ -127,7 +128,7 @@ struct flow_offload_tuple {
struct { } __hash;
u8 dir:2,
xmit_type:2,
xmit_type:3,
encap_num:2,
in_vlan_ingress:2;
u16 mtu;
......@@ -142,6 +143,9 @@ struct flow_offload_tuple {
u8 h_source[ETH_ALEN];
u8 h_dest[ETH_ALEN];
} out;
struct {
u32 iifidx;
} tc;
};
};
......
......@@ -37,7 +37,7 @@ void nf_register_queue_handler(const struct nf_queue_handler *qh);
void nf_unregister_queue_handler(void);
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
void nf_queue_entry_get_refs(struct nf_queue_entry *entry);
bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
void nf_queue_entry_free(struct nf_queue_entry *entry);
static inline void init_hashrandom(u32 *jhash_initval)
......
......@@ -428,14 +428,15 @@ static int __nf_register_net_hook(struct net *net, int pf,
p = nf_entry_dereference(*pp);
new_hooks = nf_hook_entries_grow(p, reg);
if (!IS_ERR(new_hooks))
if (!IS_ERR(new_hooks)) {
hooks_validate(new_hooks);
rcu_assign_pointer(*pp, new_hooks);
}
mutex_unlock(&nf_hook_mutex);
if (IS_ERR(new_hooks))
return PTR_ERR(new_hooks);
hooks_validate(new_hooks);
#ifdef CONFIG_NETFILTER_INGRESS
if (nf_ingress_hook(reg, pf))
net_inc_ingress_queue();
......
......@@ -110,7 +110,11 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
nf_flow_rule_lwt_match(match, tun_info);
}
key->meta.ingress_ifindex = tuple->iifidx;
if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_TC)
key->meta.ingress_ifindex = tuple->tc.iifidx;
else
key->meta.ingress_ifindex = tuple->iifidx;
mask->meta.ingress_ifindex = 0xffffffff;
if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
......
......@@ -46,6 +46,15 @@ void nf_unregister_queue_handler(void)
}
EXPORT_SYMBOL(nf_unregister_queue_handler);
static void nf_queue_sock_put(struct sock *sk)
{
#ifdef CONFIG_INET
sock_gen_put(sk);
#else
sock_put(sk);
#endif
}
static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
{
struct nf_hook_state *state = &entry->state;
......@@ -54,7 +63,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
dev_put(state->in);
dev_put(state->out);
if (state->sk)
sock_put(state->sk);
nf_queue_sock_put(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
dev_put(entry->physin);
......@@ -87,19 +96,21 @@ static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry)
}
/* Bump dev refs so they don't vanish while packet is out */
void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
{
struct nf_hook_state *state = &entry->state;
if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt))
return false;
dev_hold(state->in);
dev_hold(state->out);
if (state->sk)
sock_hold(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
dev_hold(entry->physin);
dev_hold(entry->physout);
#endif
return true;
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
......@@ -169,6 +180,18 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
break;
}
if (skb_sk_is_prefetched(skb)) {
struct sock *sk = skb->sk;
if (!sk_is_refcounted(sk)) {
if (!refcount_inc_not_zero(&sk->sk_refcnt))
return -ENOTCONN;
/* drop refcount on skb_orphan */
skb->destructor = sock_edemux;
}
}
entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC);
if (!entry)
return -ENOMEM;
......@@ -187,7 +210,10 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
__nf_queue_entry_init_physdevs(entry);
nf_queue_entry_get_refs(entry);
if (!nf_queue_entry_get_refs(entry)) {
kfree(entry);
return -ENOTCONN;
}
switch (entry->state.pf) {
case AF_INET:
......
......@@ -4502,7 +4502,7 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
list_del_rcu(&catchall->list);
nft_set_elem_destroy(set, catchall->elem, true);
kfree_rcu(catchall);
kfree_rcu(catchall, rcu);
}
}
......@@ -5669,7 +5669,7 @@ static void nft_setelem_catchall_remove(const struct net *net,
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
if (catchall->elem == elem->priv) {
list_del_rcu(&catchall->list);
kfree_rcu(catchall);
kfree_rcu(catchall, rcu);
break;
}
}
......
......@@ -710,9 +710,15 @@ static struct nf_queue_entry *
nf_queue_entry_dup(struct nf_queue_entry *e)
{
struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
if (entry)
nf_queue_entry_get_refs(entry);
return entry;
if (!entry)
return NULL;
if (nf_queue_entry_get_refs(entry))
return entry;
kfree(entry);
return NULL;
}
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
......
......@@ -361,6 +361,13 @@ static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
}
}
static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry,
struct nf_conn_act_ct_ext *act_ct_ext, u8 dir)
{
entry->tuplehash[dir].tuple.xmit_type = FLOW_OFFLOAD_XMIT_TC;
entry->tuplehash[dir].tuple.tc.iifidx = act_ct_ext->ifindex[dir];
}
static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
struct nf_conn *ct,
bool tcp)
......@@ -385,10 +392,8 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
act_ct_ext = nf_conn_act_ct_ext_find(ct);
if (act_ct_ext) {
entry->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
act_ct_ext->ifindex[IP_CT_DIR_ORIGINAL];
entry->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
act_ct_ext->ifindex[IP_CT_DIR_REPLY];
tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_ORIGINAL);
tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_REPLY);
}
err = flow_offload_add(&ct_ft->nf_ft, entry);
......
# SPDX-License-Identifier: GPL-2.0-only
nf-queue
connect_close
......@@ -9,6 +9,6 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
conntrack_vrf.sh nft_synproxy.sh
LDLIBS = -lmnl
TEST_GEN_FILES = nf-queue
TEST_GEN_FILES = nf-queue connect_close
include ../lib.mk
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <string.h>
#include <unistd.h>
#include <signal.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#define PORT 12345
#define RUNTIME 10
static struct {
unsigned int timeout;
unsigned int port;
} opts = {
.timeout = RUNTIME,
.port = PORT,
};
static void handler(int sig)
{
_exit(sig == SIGALRM ? 0 : 1);
}
static void set_timeout(void)
{
struct sigaction action = {
.sa_handler = handler,
};
sigaction(SIGALRM, &action, NULL);
alarm(opts.timeout);
}
static void do_connect(const struct sockaddr_in *dst)
{
int s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (s >= 0)
fcntl(s, F_SETFL, O_NONBLOCK);
connect(s, (struct sockaddr *)dst, sizeof(*dst));
close(s);
}
static void do_accept(const struct sockaddr_in *src)
{
int c, one = 1, s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (s < 0)
return;
setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
bind(s, (struct sockaddr *)src, sizeof(*src));
listen(s, 16);
c = accept(s, NULL, NULL);
if (c >= 0)
close(c);
close(s);
}
static int accept_loop(void)
{
struct sockaddr_in src = {
.sin_family = AF_INET,
.sin_port = htons(opts.port),
};
inet_pton(AF_INET, "127.0.0.1", &src.sin_addr);
set_timeout();
for (;;)
do_accept(&src);
return 1;
}
static int connect_loop(void)
{
struct sockaddr_in dst = {
.sin_family = AF_INET,
.sin_port = htons(opts.port),
};
inet_pton(AF_INET, "127.0.0.1", &dst.sin_addr);
set_timeout();
for (;;)
do_connect(&dst);
return 1;
}
static void parse_opts(int argc, char **argv)
{
int c;
while ((c = getopt(argc, argv, "t:p:")) != -1) {
switch (c) {
case 't':
opts.timeout = atoi(optarg);
break;
case 'p':
opts.port = atoi(optarg);
break;
}
}
}
int main(int argc, char *argv[])
{
pid_t p;
parse_opts(argc, argv);
p = fork();
if (p < 0)
return 111;
if (p > 0)
return accept_loop();
return connect_loop();
}
......@@ -113,6 +113,7 @@ table inet $name {
chain output {
type filter hook output priority $prio; policy accept;
tcp dport 12345 queue num 3
tcp sport 23456 queue num 3
jump nfq
}
chain post {
......@@ -296,6 +297,23 @@ test_tcp_localhost()
wait 2>/dev/null
}
test_tcp_localhost_connectclose()
{
tmpfile=$(mktemp) || exit 1
ip netns exec ${nsrouter} ./connect_close -p 23456 -t $timeout &
ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout &
local nfqpid=$!
sleep 1
rm -f "$tmpfile"
wait $rpid
[ $? -eq 0 ] && echo "PASS: tcp via loopback with connect/close"
wait 2>/dev/null
}
test_tcp_localhost_requeue()
{
ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
......@@ -424,6 +442,7 @@ test_queue 20
test_tcp_forward
test_tcp_localhost
test_tcp_localhost_connectclose
test_tcp_localhost_requeue
test_icmp_vrf
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment