Commit 859191b2 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains Netfilter fixes for net:

1) Incorrect netlink report logic in flowtable and genID.

2) Add a selftest to check that wireguard passes the right sk
   to ip_route_me_harder, from Jason A. Donenfeld.

3) Pass the actual sk to ip_route_me_harder(), also from Jason.

4) Missing expression validation of updates via nft --check.

5) Update byte and packet counters regardless of whether they
   match, from Stefano Brivio.
====================
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 20149e9e 7d10e62c
......@@ -24,6 +24,12 @@ struct nfnl_callback {
const u_int16_t attr_count; /* number of nlattr's */
};
enum nfnl_abort_action {
NFNL_ABORT_NONE = 0,
NFNL_ABORT_AUTOLOAD,
NFNL_ABORT_VALIDATE,
};
struct nfnetlink_subsystem {
const char *name;
__u8 subsys_id; /* nfnetlink subsystem ID */
......@@ -31,7 +37,8 @@ struct nfnetlink_subsystem {
const struct nfnl_callback *cb; /* callback for individual types */
struct module *owner;
int (*commit)(struct net *net, struct sk_buff *skb);
int (*abort)(struct net *net, struct sk_buff *skb, bool autoload);
int (*abort)(struct net *net, struct sk_buff *skb,
enum nfnl_abort_action action);
void (*cleanup)(struct net *net);
bool (*valid_genid)(struct net *net, u32 genid);
};
......
......@@ -16,7 +16,7 @@ struct ip_rt_info {
u_int32_t mark;
};
int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type);
int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned addr_type);
struct nf_queue_entry;
......
......@@ -42,7 +42,7 @@ struct nf_ipv6_ops {
#if IS_MODULE(CONFIG_IPV6)
int (*chk_addr)(struct net *net, const struct in6_addr *addr,
const struct net_device *dev, int strict);
int (*route_me_harder)(struct net *net, struct sk_buff *skb);
int (*route_me_harder)(struct net *net, struct sock *sk, struct sk_buff *skb);
int (*dev_get_saddr)(struct net *net, const struct net_device *dev,
const struct in6_addr *daddr, unsigned int srcprefs,
struct in6_addr *saddr);
......@@ -143,9 +143,9 @@ static inline int nf_br_ip6_fragment(struct net *net, struct sock *sk,
#endif
}
int ip6_route_me_harder(struct net *net, struct sk_buff *skb);
int ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb);
static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb)
static inline int nf_ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb)
{
#if IS_MODULE(CONFIG_IPV6)
const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
......@@ -153,9 +153,9 @@ static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb)
if (!v6_ops)
return -EHOSTUNREACH;
return v6_ops->route_me_harder(net, skb);
return v6_ops->route_me_harder(net, sk, skb);
#elif IS_BUILTIN(CONFIG_IPV6)
return ip6_route_me_harder(net, skb);
return ip6_route_me_harder(net, sk, skb);
#else
return -EHOSTUNREACH;
#endif
......
......@@ -17,17 +17,19 @@
#include <net/netfilter/nf_queue.h>
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_type)
int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int addr_type)
{
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
struct flowi4 fl4 = {};
__be32 saddr = iph->saddr;
const struct sock *sk = skb_to_full_sk(skb);
__u8 flags = sk ? inet_sk_flowi_flags(sk) : 0;
__u8 flags;
struct net_device *dev = skb_dst(skb)->dev;
unsigned int hh_len;
sk = sk_to_full_sk(sk);
flags = sk ? inet_sk_flowi_flags(sk) : 0;
if (addr_type == RTN_UNSPEC)
addr_type = inet_addr_type_dev_table(net, dev, saddr);
if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST)
......
......@@ -62,7 +62,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
iph->daddr != daddr ||
skb->mark != mark ||
iph->tos != tos) {
err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
if (err < 0)
ret = NF_DROP_ERR(err);
}
......
......@@ -145,7 +145,7 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
ip4_dst_hoplimit(skb_dst(nskb)));
nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC))
goto free_nskb;
niph = ip_hdr(nskb);
......
......@@ -20,10 +20,10 @@
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include "../bridge/br_private.h"
int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff *skb)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct sock *sk = sk_to_full_sk(skb->sk);
struct sock *sk = sk_to_full_sk(sk_partial);
unsigned int hh_len;
struct dst_entry *dst;
int strict = (ipv6_addr_type(&iph->daddr) &
......@@ -84,7 +84,7 @@ static int nf_ip6_reroute(struct sk_buff *skb,
if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
!ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
skb->mark != rt_info->mark)
return ip6_route_me_harder(entry->state.net, skb);
return ip6_route_me_harder(entry->state.net, entry->state.sk, skb);
}
return 0;
}
......
......@@ -57,7 +57,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
skb->mark != mark ||
ipv6_hdr(skb)->hop_limit != hop_limit ||
flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
err = ip6_route_me_harder(state->net, skb);
err = ip6_route_me_harder(state->net, state->sk, skb);
if (err < 0)
ret = NF_DROP_ERR(err);
}
......
......@@ -637,13 +637,14 @@ ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext,
if (SET_WITH_COUNTER(set)) {
struct ip_set_counter *counter = ext_counter(data, set);
ip_set_update_counter(counter, ext, flags);
if (flags & IPSET_FLAG_MATCH_COUNTERS &&
!(ip_set_match_counter(ip_set_get_packets(counter),
mext->packets, mext->packets_op) &&
ip_set_match_counter(ip_set_get_bytes(counter),
mext->bytes, mext->bytes_op)))
return false;
ip_set_update_counter(counter, ext, flags);
}
if (SET_WITH_SKBINFO(set))
ip_set_get_skbinfo(ext_skbinfo(data, set),
......
......@@ -742,12 +742,12 @@ static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af,
struct dst_entry *dst = skb_dst(skb);
if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) &&
ip6_route_me_harder(ipvs->net, skb) != 0)
ip6_route_me_harder(ipvs->net, skb->sk, skb) != 0)
return 1;
} else
#endif
if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
ip_route_me_harder(ipvs->net, skb, RTN_LOCAL) != 0)
ip_route_me_harder(ipvs->net, skb->sk, skb, RTN_LOCAL) != 0)
return 1;
return 0;
......
......@@ -715,7 +715,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
if (ct->tuplehash[dir].tuple.dst.u3.ip !=
ct->tuplehash[!dir].tuple.src.u3.ip) {
err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
if (err < 0)
ret = NF_DROP_ERR(err);
}
......@@ -953,7 +953,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
&ct->tuplehash[!dir].tuple.src.u3)) {
err = nf_ip6_route_me_harder(state->net, skb);
err = nf_ip6_route_me_harder(state->net, state->sk, skb);
if (err < 0)
ret = NF_DROP_ERR(err);
}
......
......@@ -446,7 +446,7 @@ synproxy_send_tcp(struct net *net,
skb_dst_set_noref(nskb, skb_dst(skb));
nskb->protocol = htons(ETH_P_IP);
if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC))
goto free_nskb;
if (nfct) {
......
......@@ -7137,7 +7137,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
GFP_KERNEL);
kfree(buf);
if (ctx->report &&
if (!ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return;
......@@ -7259,7 +7259,7 @@ static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb,
audit_log_nfcfg("?:0;?:0", 0, net->nft.base_seq,
AUDIT_NFT_OP_GEN_REGISTER, GFP_KERNEL);
if (nlmsg_report(nlh) &&
if (!nlmsg_report(nlh) &&
!nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
return;
......@@ -8053,12 +8053,16 @@ static void nf_tables_abort_release(struct nft_trans *trans)
kfree(trans);
}
static int __nf_tables_abort(struct net *net, bool autoload)
static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
{
struct nft_trans *trans, *next;
struct nft_trans_elem *te;
struct nft_hook *hook;
if (action == NFNL_ABORT_VALIDATE &&
nf_tables_validate(net) < 0)
return -EAGAIN;
list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list,
list) {
switch (trans->msg_type) {
......@@ -8190,7 +8194,7 @@ static int __nf_tables_abort(struct net *net, bool autoload)
nf_tables_abort_release(trans);
}
if (autoload)
if (action == NFNL_ABORT_AUTOLOAD)
nf_tables_module_autoload(net);
else
nf_tables_module_autoload_cleanup(net);
......@@ -8203,9 +8207,10 @@ static void nf_tables_cleanup(struct net *net)
nft_validate_state_update(net, NFT_VALIDATE_SKIP);
}
static int nf_tables_abort(struct net *net, struct sk_buff *skb, bool autoload)
static int nf_tables_abort(struct net *net, struct sk_buff *skb,
enum nfnl_abort_action action)
{
int ret = __nf_tables_abort(net, autoload);
int ret = __nf_tables_abort(net, action);
mutex_unlock(&net->nft.commit_mutex);
......@@ -8836,7 +8841,7 @@ static void __net_exit nf_tables_exit_net(struct net *net)
{
mutex_lock(&net->nft.commit_mutex);
if (!list_empty(&net->nft.commit_list))
__nf_tables_abort(net, false);
__nf_tables_abort(net, NFNL_ABORT_NONE);
__nft_release_tables(net);
mutex_unlock(&net->nft.commit_mutex);
WARN_ON_ONCE(!list_empty(&net->nft.tables));
......
......@@ -333,7 +333,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
return netlink_ack(skb, nlh, -EINVAL, NULL);
replay:
status = 0;
replay_abort:
skb = netlink_skb_clone(oskb, GFP_KERNEL);
if (!skb)
return netlink_ack(oskb, nlh, -ENOMEM, NULL);
......@@ -499,7 +499,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
}
done:
if (status & NFNL_BATCH_REPLAY) {
ss->abort(net, oskb, true);
ss->abort(net, oskb, NFNL_ABORT_AUTOLOAD);
nfnl_err_reset(&err_list);
kfree_skb(skb);
module_put(ss->owner);
......@@ -510,11 +510,25 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
status |= NFNL_BATCH_REPLAY;
goto done;
} else if (err) {
ss->abort(net, oskb, false);
ss->abort(net, oskb, NFNL_ABORT_NONE);
netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL);
}
} else {
ss->abort(net, oskb, false);
enum nfnl_abort_action abort_action;
if (status & NFNL_BATCH_FAILURE)
abort_action = NFNL_ABORT_NONE;
else
abort_action = NFNL_ABORT_VALIDATE;
err = ss->abort(net, oskb, abort_action);
if (err == -EAGAIN) {
nfnl_err_reset(&err_list);
kfree_skb(skb);
module_put(ss->owner);
status |= NFNL_BATCH_FAILURE;
goto replay_abort;
}
}
if (ss->cleanup)
ss->cleanup(net);
......
......@@ -42,7 +42,7 @@ static unsigned int nf_route_table_hook4(void *priv,
iph->daddr != daddr ||
skb->mark != mark ||
iph->tos != tos) {
err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
if (err < 0)
ret = NF_DROP_ERR(err);
}
......@@ -92,7 +92,7 @@ static unsigned int nf_route_table_hook6(void *priv,
skb->mark != mark ||
ipv6_hdr(skb)->hop_limit != hop_limit ||
flowlabel != *((u32 *)ipv6_hdr(skb)))) {
err = nf_ip6_route_me_harder(state->net, skb);
err = nf_ip6_route_me_harder(state->net, state->sk, skb);
if (err < 0)
ret = NF_DROP_ERR(err);
}
......
......@@ -191,8 +191,8 @@ static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry
skb->mark == rt_info->mark &&
iph->daddr == rt_info->daddr &&
iph->saddr == rt_info->saddr))
return ip_route_me_harder(entry->state.net, skb,
RTN_UNSPEC);
return ip_route_me_harder(entry->state.net, entry->state.sk,
skb, RTN_UNSPEC);
}
#endif
return 0;
......
......@@ -316,6 +316,14 @@ pp sleep 3
n2 ping -W 1 -c 1 192.168.241.1
n1 wg set wg0 peer "$pub2" persistent-keepalive 0
# Test that sk_bound_dev_if works
n1 ping -I wg0 -c 1 -W 1 192.168.241.2
# What about when the mark changes and the packet must be rerouted?
n1 iptables -t mangle -I OUTPUT -j MARK --set-xmark 1
n1 ping -c 1 -W 1 192.168.241.2 # First the boring case
n1 ping -I wg0 -c 1 -W 1 192.168.241.2 # Then the sk_bound_dev_if case
n1 iptables -t mangle -D OUTPUT -j MARK --set-xmark 1
# Test that onion routing works, even when it loops
n1 wg set wg0 peer "$pub3" allowed-ips 192.168.242.2/32 endpoint 192.168.241.2:5
ip1 addr add 192.168.242.1/24 dev wg0
......
......@@ -18,10 +18,12 @@ CONFIG_NF_NAT=y
CONFIG_NETFILTER_XTABLES=y
CONFIG_NETFILTER_XT_NAT=y
CONFIG_NETFILTER_XT_MATCH_LENGTH=y
CONFIG_NETFILTER_XT_MARK=y
CONFIG_NF_CONNTRACK_IPV4=y
CONFIG_NF_NAT_IPV4=y
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_FILTER=y
CONFIG_IP_NF_MANGLE=y
CONFIG_IP_NF_NAT=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTIPLE_TABLES=y
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment