Commit 1ffdd8e1 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

The following patchset contains Netfilter/IPVS updates for your net-next
tree, the most relevant things in this batch are:

1) Compile masquerade infrastructure into NAT module, from Florian Westphal.
   Same thing with the redirection support.

2) Abort transaction if early initialization of the commit phase fails.
   Also from Florian.

3) Get rid of synchronize_rcu() by using rule array in nf_tables, from
   Florian.

4) Abort nf_tables batch if fatal signal is pending, from Florian.

5) Use .call_rcu nfnetlink from nf_tables to make dumps fully lockless.
   From Florian Westphal.

6) Support to match transparent sockets from nf_tables, from Máté Eckl.

7) Audit support for nf_tables, from Phil Sutter.

8) Validate chain dependencies from commit phase, fall back to fine grain
   validation only in case of errors.

9) Attach dst to skbuff from netfilter flowtable packet path, from
   Jason A. Donenfeld.

10) Use artificial maximum attribute cap to remove VLA from nfnetlink.
    Patch from Kees Cook.

11) Add extension to allow to forward packets through neighbour layer.

12) Add IPv6 conntrack helper support to IPVS, from Julian Anastasov.

13) Add IPv6 FTP conntrack support to IPVS, from Julian Anastasov.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents f39c6b29 d12e1229
...@@ -31,6 +31,7 @@ struct nfnetlink_subsystem { ...@@ -31,6 +31,7 @@ struct nfnetlink_subsystem {
const struct nfnl_callback *cb; /* callback for individual types */ const struct nfnl_callback *cb; /* callback for individual types */
int (*commit)(struct net *net, struct sk_buff *skb); int (*commit)(struct net *net, struct sk_buff *skb);
int (*abort)(struct net *net, struct sk_buff *skb); int (*abort)(struct net *net, struct sk_buff *skb);
void (*cleanup)(struct net *net);
bool (*valid_genid)(struct net *net, u32 genid); bool (*valid_genid)(struct net *net, u32 genid);
}; };
......
...@@ -763,14 +763,14 @@ struct ip_vs_app { ...@@ -763,14 +763,14 @@ struct ip_vs_app {
* 2=Mangled but checksum was not updated * 2=Mangled but checksum was not updated
*/ */
int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *, int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *,
struct sk_buff *, int *diff); struct sk_buff *, int *diff, struct ip_vs_iphdr *ipvsh);
/* input hook: Process packet in outin direction, diff set for TCP. /* input hook: Process packet in outin direction, diff set for TCP.
* Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok, * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok,
* 2=Mangled but checksum was not updated * 2=Mangled but checksum was not updated
*/ */
int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *, int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *,
struct sk_buff *, int *diff); struct sk_buff *, int *diff, struct ip_vs_iphdr *ipvsh);
/* ip_vs_app initializer */ /* ip_vs_app initializer */
int (*init_conn)(struct ip_vs_app *, struct ip_vs_conn *); int (*init_conn)(struct ip_vs_app *, struct ip_vs_conn *);
...@@ -1328,8 +1328,10 @@ int register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 ...@@ -1328,8 +1328,10 @@ int register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16
int ip_vs_app_inc_get(struct ip_vs_app *inc); int ip_vs_app_inc_get(struct ip_vs_app *inc);
void ip_vs_app_inc_put(struct ip_vs_app *inc); void ip_vs_app_inc_put(struct ip_vs_app *inc);
int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb); int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb,
int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb); struct ip_vs_iphdr *ipvsh);
int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb,
struct ip_vs_iphdr *ipvsh);
int register_ip_vs_pe(struct ip_vs_pe *pe); int register_ip_vs_pe(struct ip_vs_pe *pe);
int unregister_ip_vs_pe(struct ip_vs_pe *pe); int unregister_ip_vs_pe(struct ip_vs_pe *pe);
......
...@@ -2,10 +2,8 @@ ...@@ -2,10 +2,8 @@
#ifndef _NF_SOCK_H_ #ifndef _NF_SOCK_H_
#define _NF_SOCK_H_ #define _NF_SOCK_H_
struct net_device; #include <net/sock.h>
struct sk_buff; #include <net/inet_timewait_sock.h>
struct sock;
struct net;
static inline bool nf_sk_is_transparent(struct sock *sk) static inline bool nf_sk_is_transparent(struct sock *sk)
{ {
......
...@@ -858,6 +858,8 @@ enum nft_chain_flags { ...@@ -858,6 +858,8 @@ enum nft_chain_flags {
* @name: name of the chain * @name: name of the chain
*/ */
struct nft_chain { struct nft_chain {
struct nft_rule *__rcu *rules_gen_0;
struct nft_rule *__rcu *rules_gen_1;
struct list_head rules; struct list_head rules;
struct list_head list; struct list_head list;
struct nft_table *table; struct nft_table *table;
...@@ -867,8 +869,13 @@ struct nft_chain { ...@@ -867,8 +869,13 @@ struct nft_chain {
u8 flags:6, u8 flags:6,
genmask:2; genmask:2;
char *name; char *name;
/* Only used during control plane commit phase: */
struct nft_rule **rules_next;
}; };
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain);
enum nft_chain_types { enum nft_chain_types {
NFT_CHAIN_T_DEFAULT = 0, NFT_CHAIN_T_DEFAULT = 0,
NFT_CHAIN_T_ROUTE, NFT_CHAIN_T_ROUTE,
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
#ifndef _NET_NF_TABLES_CORE_H #ifndef _NET_NF_TABLES_CORE_H
#define _NET_NF_TABLES_CORE_H #define _NET_NF_TABLES_CORE_H
#include <net/netfilter/nf_tables.h>
extern struct nft_expr_type nft_imm_type; extern struct nft_expr_type nft_imm_type;
extern struct nft_expr_type nft_cmp_type; extern struct nft_expr_type nft_cmp_type;
extern struct nft_expr_type nft_lookup_type; extern struct nft_expr_type nft_lookup_type;
...@@ -23,6 +25,12 @@ struct nft_cmp_fast_expr { ...@@ -23,6 +25,12 @@ struct nft_cmp_fast_expr {
u8 len; u8 len;
}; };
struct nft_immediate_expr {
struct nft_data data;
enum nft_registers dreg:8;
u8 dlen;
};
/* Calculate the mask for the nft_cmp_fast expression. On big endian the /* Calculate the mask for the nft_cmp_fast expression. On big endian the
* mask needs to include the *upper* bytes when interpreting that data as * mask needs to include the *upper* bytes when interpreting that data as
* something smaller than the full u32, therefore a cpu_to_le32 is done. * something smaller than the full u32, therefore a cpu_to_le32 is done.
......
...@@ -9,6 +9,7 @@ struct netns_nftables { ...@@ -9,6 +9,7 @@ struct netns_nftables {
struct list_head commit_list; struct list_head commit_list;
unsigned int base_seq; unsigned int base_seq;
u8 gencursor; u8 gencursor;
u8 validate_state;
}; };
#endif #endif
...@@ -904,6 +904,31 @@ enum nft_rt_attributes { ...@@ -904,6 +904,31 @@ enum nft_rt_attributes {
}; };
#define NFTA_RT_MAX (__NFTA_RT_MAX - 1) #define NFTA_RT_MAX (__NFTA_RT_MAX - 1)
/**
* enum nft_socket_attributes - nf_tables socket expression netlink attributes
*
* @NFTA_SOCKET_KEY: socket key to match
* @NFTA_SOCKET_DREG: destination register
*/
enum nft_socket_attributes {
NFTA_SOCKET_UNSPEC,
NFTA_SOCKET_KEY,
NFTA_SOCKET_DREG,
__NFTA_SOCKET_MAX
};
#define NFTA_SOCKET_MAX (__NFTA_SOCKET_MAX - 1)
/*
* enum nft_socket_keys - nf_tables socket expression keys
*
* @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option_
*/
enum nft_socket_keys {
NFT_SOCKET_TRANSPARENT,
__NFT_SOCKET_MAX
};
#define NFT_SOCKET_MAX (__NFT_SOCKET_MAX - 1)
/** /**
* enum nft_ct_keys - nf_tables ct expression keys * enum nft_ct_keys - nf_tables ct expression keys
* *
...@@ -1055,6 +1080,11 @@ enum nft_log_attributes { ...@@ -1055,6 +1080,11 @@ enum nft_log_attributes {
}; };
#define NFTA_LOG_MAX (__NFTA_LOG_MAX - 1) #define NFTA_LOG_MAX (__NFTA_LOG_MAX - 1)
/**
* LOGLEVEL_AUDIT - a pseudo log level enabling audit logging
*/
#define LOGLEVEL_AUDIT 8
/** /**
* enum nft_queue_attributes - nf_tables queue expression netlink attributes * enum nft_queue_attributes - nf_tables queue expression netlink attributes
* *
...@@ -1230,10 +1260,14 @@ enum nft_dup_attributes { ...@@ -1230,10 +1260,14 @@ enum nft_dup_attributes {
* enum nft_fwd_attributes - nf_tables fwd expression netlink attributes * enum nft_fwd_attributes - nf_tables fwd expression netlink attributes
* *
* @NFTA_FWD_SREG_DEV: source register of output interface (NLA_U32: nft_register) * @NFTA_FWD_SREG_DEV: source register of output interface (NLA_U32: nft_register)
* @NFTA_FWD_SREG_ADDR: source register of destination address (NLA_U32: nft_register)
* @NFTA_FWD_NFPROTO: layer 3 family of source register address (NLA_U32: enum nfproto)
*/ */
enum nft_fwd_attributes { enum nft_fwd_attributes {
NFTA_FWD_UNSPEC, NFTA_FWD_UNSPEC,
NFTA_FWD_SREG_DEV, NFTA_FWD_SREG_DEV,
NFTA_FWD_SREG_ADDR,
NFTA_FWD_NFPROTO,
__NFTA_FWD_MAX __NFTA_FWD_MAX
}; };
#define NFTA_FWD_MAX (__NFTA_FWD_MAX - 1) #define NFTA_FWD_MAX (__NFTA_FWD_MAX - 1)
......
...@@ -129,10 +129,7 @@ config NFT_CHAIN_NAT_IPV4 ...@@ -129,10 +129,7 @@ config NFT_CHAIN_NAT_IPV4
source and destination ports. source and destination ports.
config NF_NAT_MASQUERADE_IPV4 config NF_NAT_MASQUERADE_IPV4
tristate "IPv4 masquerade support" bool
help
This is the kernel functionality to provide NAT in the masquerade
flavour (automatic source address selection).
config NFT_MASQ_IPV4 config NFT_MASQ_IPV4
tristate "IPv4 masquerading support for nf_tables" tristate "IPv4 masquerading support for nf_tables"
......
...@@ -10,6 +10,7 @@ nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o ...@@ -10,6 +10,7 @@ nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o
nf_nat_ipv4-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
# defrag # defrag
...@@ -32,9 +33,6 @@ nf_nat_snmp_basic-y := nf_nat_snmp_basic.asn1.o nf_nat_snmp_basic_main.o ...@@ -32,9 +33,6 @@ nf_nat_snmp_basic-y := nf_nat_snmp_basic.asn1.o nf_nat_snmp_basic_main.o
$(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic.asn1.h $(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic.asn1.h
obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
# NAT protocols (nf_nat) # NAT protocols (nf_nat)
obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
......
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
*/ */
#include <linux/types.h> #include <linux/types.h>
#include <linux/module.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/inetdevice.h> #include <linux/inetdevice.h>
#include <linux/ip.h> #include <linux/ip.h>
...@@ -157,6 +156,3 @@ void nf_nat_masquerade_ipv4_unregister_notifier(void) ...@@ -157,6 +156,3 @@ void nf_nat_masquerade_ipv4_unregister_notifier(void)
unregister_inetaddr_notifier(&masq_inet_notifier); unregister_inetaddr_notifier(&masq_inet_notifier);
} }
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier); EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
...@@ -136,10 +136,7 @@ config NF_NAT_IPV6 ...@@ -136,10 +136,7 @@ config NF_NAT_IPV6
if NF_NAT_IPV6 if NF_NAT_IPV6
config NF_NAT_MASQUERADE_IPV6 config NF_NAT_MASQUERADE_IPV6
tristate "IPv6 masquerade support" bool
help
This is the kernel functionality to provide NAT in the masquerade
flavour (automatic source address selection) for IPv6.
endif # NF_NAT_IPV6 endif # NF_NAT_IPV6
......
...@@ -18,8 +18,8 @@ nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o ...@@ -18,8 +18,8 @@ nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
nf_nat_ipv6-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o
obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
obj-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o
# defrag # defrag
nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
......
...@@ -10,7 +10,6 @@ ...@@ -10,7 +10,6 @@
*/ */
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/module.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/ipv6.h> #include <linux/ipv6.h>
...@@ -186,6 +185,3 @@ void nf_nat_masquerade_ipv6_unregister_notifier(void) ...@@ -186,6 +185,3 @@ void nf_nat_masquerade_ipv6_unregister_notifier(void)
unregister_netdevice_notifier(&masq_dev_notifier); unregister_netdevice_notifier(&masq_dev_notifier);
} }
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier); EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
...@@ -433,11 +433,7 @@ config NF_NAT_TFTP ...@@ -433,11 +433,7 @@ config NF_NAT_TFTP
default NF_NAT && NF_CONNTRACK_TFTP default NF_NAT && NF_CONNTRACK_TFTP
config NF_NAT_REDIRECT config NF_NAT_REDIRECT
tristate "IPv4/IPv6 redirect support" bool
depends on NF_NAT
help
This is the kernel functionality to redirect packets to local
machine through NAT.
config NETFILTER_SYNPROXY config NETFILTER_SYNPROXY
tristate tristate
...@@ -617,6 +613,15 @@ config NFT_FIB_INET ...@@ -617,6 +613,15 @@ config NFT_FIB_INET
The lookup will be delegated to the IPv4 or IPv6 FIB depending The lookup will be delegated to the IPv4 or IPv6 FIB depending
on the protocol of the packet. on the protocol of the packet.
config NFT_SOCKET
tristate "Netfilter nf_tables socket match support"
depends on IPV6 || IPV6=n
select NF_SOCKET_IPV4
select NF_SOCKET_IPV6 if IPV6
help
This option allows matching for the presence or absence of a
corresponding socket and its attributes.
if NF_TABLES_NETDEV if NF_TABLES_NETDEV
config NF_DUP_NETDEV config NF_DUP_NETDEV
......
...@@ -55,7 +55,7 @@ obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o ...@@ -55,7 +55,7 @@ obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o
obj-$(CONFIG_NF_NAT) += nf_nat.o obj-$(CONFIG_NF_NAT) += nf_nat.o
obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o nf_nat-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
# NAT helpers # NAT helpers
obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
...@@ -102,6 +102,7 @@ obj-$(CONFIG_NFT_FIB) += nft_fib.o ...@@ -102,6 +102,7 @@ obj-$(CONFIG_NFT_FIB) += nft_fib.o
obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o
obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_netdev.o obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_netdev.o
obj-$(CONFIG_NF_OSF) += nf_osf.o obj-$(CONFIG_NF_OSF) += nf_osf.o
obj-$(CONFIG_NFT_SOCKET) += nft_socket.o
# nf_tables netdev # nf_tables netdev
obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
......
...@@ -355,7 +355,8 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq, ...@@ -355,7 +355,8 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
} }
static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
struct ip_vs_app *app) struct ip_vs_app *app,
struct ip_vs_iphdr *ipvsh)
{ {
int diff; int diff;
const unsigned int tcp_offset = ip_hdrlen(skb); const unsigned int tcp_offset = ip_hdrlen(skb);
...@@ -386,7 +387,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, ...@@ -386,7 +387,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
if (app->pkt_out == NULL) if (app->pkt_out == NULL)
return 1; return 1;
if (!app->pkt_out(app, cp, skb, &diff)) if (!app->pkt_out(app, cp, skb, &diff, ipvsh))
return 0; return 0;
/* /*
...@@ -404,7 +405,8 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, ...@@ -404,7 +405,8 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
* called by ipvs packet handler, assumes previously checked cp!=NULL * called by ipvs packet handler, assumes previously checked cp!=NULL
* returns false if it can't handle packet (oom) * returns false if it can't handle packet (oom)
*/ */
int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb) int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
struct ip_vs_iphdr *ipvsh)
{ {
struct ip_vs_app *app; struct ip_vs_app *app;
...@@ -417,7 +419,7 @@ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb) ...@@ -417,7 +419,7 @@ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
/* TCP is complicated */ /* TCP is complicated */
if (cp->protocol == IPPROTO_TCP) if (cp->protocol == IPPROTO_TCP)
return app_tcp_pkt_out(cp, skb, app); return app_tcp_pkt_out(cp, skb, app, ipvsh);
/* /*
* Call private output hook function * Call private output hook function
...@@ -425,12 +427,13 @@ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb) ...@@ -425,12 +427,13 @@ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
if (app->pkt_out == NULL) if (app->pkt_out == NULL)
return 1; return 1;
return app->pkt_out(app, cp, skb, NULL); return app->pkt_out(app, cp, skb, NULL, ipvsh);
} }
static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
struct ip_vs_app *app) struct ip_vs_app *app,
struct ip_vs_iphdr *ipvsh)
{ {
int diff; int diff;
const unsigned int tcp_offset = ip_hdrlen(skb); const unsigned int tcp_offset = ip_hdrlen(skb);
...@@ -461,7 +464,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, ...@@ -461,7 +464,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
if (app->pkt_in == NULL) if (app->pkt_in == NULL)
return 1; return 1;
if (!app->pkt_in(app, cp, skb, &diff)) if (!app->pkt_in(app, cp, skb, &diff, ipvsh))
return 0; return 0;
/* /*
...@@ -479,7 +482,8 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, ...@@ -479,7 +482,8 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
* called by ipvs packet handler, assumes previously checked cp!=NULL. * called by ipvs packet handler, assumes previously checked cp!=NULL.
* returns false if can't handle packet (oom). * returns false if can't handle packet (oom).
*/ */
int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
struct ip_vs_iphdr *ipvsh)
{ {
struct ip_vs_app *app; struct ip_vs_app *app;
...@@ -492,7 +496,7 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) ...@@ -492,7 +496,7 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
/* TCP is complicated */ /* TCP is complicated */
if (cp->protocol == IPPROTO_TCP) if (cp->protocol == IPPROTO_TCP)
return app_tcp_pkt_in(cp, skb, app); return app_tcp_pkt_in(cp, skb, app, ipvsh);
/* /*
* Call private input hook function * Call private input hook function
...@@ -500,7 +504,7 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) ...@@ -500,7 +504,7 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
if (app->pkt_in == NULL) if (app->pkt_in == NULL)
return 1; return 1;
return app->pkt_in(app, cp, skb, NULL); return app->pkt_in(app, cp, skb, NULL, ipvsh);
} }
......
This diff is collapsed.
...@@ -67,15 +67,20 @@ ...@@ -67,15 +67,20 @@
#include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_zones.h>
#define FMT_TUPLE "%pI4:%u->%pI4:%u/%u" #define FMT_TUPLE "%s:%u->%s:%u/%u"
#define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \ #define ARG_TUPLE(T) IP_VS_DBG_ADDR((T)->src.l3num, &(T)->src.u3), \
&(T)->dst.u3.ip, ntohs((T)->dst.u.all), \ ntohs((T)->src.u.all), \
IP_VS_DBG_ADDR((T)->src.l3num, &(T)->dst.u3), \
ntohs((T)->dst.u.all), \
(T)->dst.protonum (T)->dst.protonum
#define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u" #define FMT_CONN "%s:%u->%s:%u->%s:%u/%u:%u"
#define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \ #define ARG_CONN(C) IP_VS_DBG_ADDR((C)->af, &((C)->caddr)), \
&((C)->vaddr.ip), ntohs((C)->vport), \ ntohs((C)->cport), \
&((C)->daddr.ip), ntohs((C)->dport), \ IP_VS_DBG_ADDR((C)->af, &((C)->vaddr)), \
ntohs((C)->vport), \
IP_VS_DBG_ADDR((C)->daf, &((C)->daddr)), \
ntohs((C)->dport), \
(C)->protocol, (C)->state (C)->protocol, (C)->state
void void
...@@ -127,13 +132,17 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) ...@@ -127,13 +132,17 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
new_tuple.dst.protonum != IPPROTO_ICMPV6) new_tuple.dst.protonum != IPPROTO_ICMPV6)
new_tuple.dst.u.tcp.port = cp->vport; new_tuple.dst.u.tcp.port = cp->vport;
} }
IP_VS_DBG(7, "%s: Updating conntrack ct=%p, status=0x%lX, " IP_VS_DBG_BUF(7, "%s: Updating conntrack ct=%p, status=0x%lX, "
"ctinfo=%d, old reply=" FMT_TUPLE "ctinfo=%d, old reply=" FMT_TUPLE "\n",
", new reply=" FMT_TUPLE ", cp=" FMT_CONN "\n", __func__, ct, ct->status, ctinfo,
__func__, ct, ct->status, ctinfo, ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple));
ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple), IP_VS_DBG_BUF(7, "%s: Updating conntrack ct=%p, status=0x%lX, "
ARG_TUPLE(&new_tuple), ARG_CONN(cp)); "ctinfo=%d, new reply=" FMT_TUPLE "\n",
__func__, ct, ct->status, ctinfo,
ARG_TUPLE(&new_tuple));
nf_conntrack_alter_reply(ct, &new_tuple); nf_conntrack_alter_reply(ct, &new_tuple);
IP_VS_DBG_BUF(7, "%s: Updated conntrack ct=%p for cp=" FMT_CONN "\n",
__func__, ct, ARG_CONN(cp));
} }
int ip_vs_confirm_conntrack(struct sk_buff *skb) int ip_vs_confirm_conntrack(struct sk_buff *skb)
...@@ -152,9 +161,6 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct, ...@@ -152,9 +161,6 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
struct ip_vs_conn_param p; struct ip_vs_conn_param p;
struct net *net = nf_ct_net(ct); struct net *net = nf_ct_net(ct);
if (exp->tuple.src.l3num != PF_INET)
return;
/* /*
* We assume that no NF locks are held before this callback. * We assume that no NF locks are held before this callback.
* ip_vs_conn_out_get and ip_vs_conn_in_get should match their * ip_vs_conn_out_get and ip_vs_conn_in_get should match their
...@@ -171,19 +177,15 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct, ...@@ -171,19 +177,15 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
cp = ip_vs_conn_out_get(&p); cp = ip_vs_conn_out_get(&p);
if (cp) { if (cp) {
/* Change reply CLIENT->RS to CLIENT->VS */ /* Change reply CLIENT->RS to CLIENT->VS */
IP_VS_DBG_BUF(7, "%s: for ct=%p, status=0x%lX found inout cp="
FMT_CONN "\n",
__func__, ct, ct->status, ARG_CONN(cp));
new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " IP_VS_DBG_BUF(7, "%s: ct=%p before alter: reply tuple="
FMT_TUPLE ", found inout cp=" FMT_CONN "\n", FMT_TUPLE "\n",
__func__, ct, ct->status, __func__, ct, ARG_TUPLE(&new_reply));
ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
ARG_CONN(cp));
new_reply.dst.u3 = cp->vaddr; new_reply.dst.u3 = cp->vaddr;
new_reply.dst.u.tcp.port = cp->vport; new_reply.dst.u.tcp.port = cp->vport;
IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
", inout cp=" FMT_CONN "\n",
__func__, ct,
ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
ARG_CONN(cp));
goto alter; goto alter;
} }
...@@ -191,25 +193,21 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct, ...@@ -191,25 +193,21 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
cp = ip_vs_conn_in_get(&p); cp = ip_vs_conn_in_get(&p);
if (cp) { if (cp) {
/* Change reply VS->CLIENT to RS->CLIENT */ /* Change reply VS->CLIENT to RS->CLIENT */
IP_VS_DBG_BUF(7, "%s: for ct=%p, status=0x%lX found outin cp="
FMT_CONN "\n",
__func__, ct, ct->status, ARG_CONN(cp));
new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " IP_VS_DBG_BUF(7, "%s: ct=%p before alter: reply tuple="
FMT_TUPLE ", found outin cp=" FMT_CONN "\n", FMT_TUPLE "\n",
__func__, ct, ct->status, __func__, ct, ARG_TUPLE(&new_reply));
ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
ARG_CONN(cp));
new_reply.src.u3 = cp->daddr; new_reply.src.u3 = cp->daddr;
new_reply.src.u.tcp.port = cp->dport; new_reply.src.u.tcp.port = cp->dport;
IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", "
FMT_TUPLE ", outin cp=" FMT_CONN "\n",
__func__, ct,
ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
ARG_CONN(cp));
goto alter; goto alter;
} }
IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE IP_VS_DBG_BUF(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE
" - unknown expect\n", " - unknown expect\n",
__func__, ct, ct->status, ARG_TUPLE(orig)); __func__, ct, ct->status, ARG_TUPLE(orig));
return; return;
alter: alter:
...@@ -247,8 +245,8 @@ void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct, ...@@ -247,8 +245,8 @@ void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
exp->expectfn = ip_vs_nfct_expect_callback; exp->expectfn = ip_vs_nfct_expect_callback;
IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n", IP_VS_DBG_BUF(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
__func__, ct, ARG_TUPLE(&exp->tuple)); __func__, ct, ARG_TUPLE(&exp->tuple));
nf_ct_expect_related(exp); nf_ct_expect_related(exp);
nf_ct_expect_put(exp); nf_ct_expect_put(exp);
} }
...@@ -274,26 +272,25 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) ...@@ -274,26 +272,25 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
tuple.dst.u3 = cp->vaddr; tuple.dst.u3 = cp->vaddr;
tuple.dst.u.all = cp->vport; tuple.dst.u.all = cp->vport;
IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE IP_VS_DBG_BUF(7, "%s: dropping conntrack for conn " FMT_CONN "\n",
" for conn " FMT_CONN "\n", __func__, ARG_CONN(cp));
__func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple); h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple);
if (h) { if (h) {
ct = nf_ct_tuplehash_to_ctrack(h); ct = nf_ct_tuplehash_to_ctrack(h);
if (nf_ct_kill(ct)) { if (nf_ct_kill(ct)) {
IP_VS_DBG(7, "%s: ct=%p, deleted conntrack for tuple=" IP_VS_DBG_BUF(7, "%s: ct=%p deleted for tuple="
FMT_TUPLE "\n", FMT_TUPLE "\n",
__func__, ct, ARG_TUPLE(&tuple)); __func__, ct, ARG_TUPLE(&tuple));
} else { } else {
IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple=" IP_VS_DBG_BUF(7, "%s: ct=%p, no conntrack for tuple="
FMT_TUPLE "\n", FMT_TUPLE "\n",
__func__, ct, ARG_TUPLE(&tuple)); __func__, ct, ARG_TUPLE(&tuple));
} }
nf_ct_put(ct); nf_ct_put(ct);
} else { } else {
IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n", IP_VS_DBG_BUF(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
__func__, ARG_TUPLE(&tuple)); __func__, ARG_TUPLE(&tuple));
} }
} }
...@@ -109,7 +109,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -109,7 +109,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
return 0; return 0;
/* Call application helper if needed */ /* Call application helper if needed */
ret = ip_vs_app_pkt_out(cp, skb); ret = ip_vs_app_pkt_out(cp, skb, iph);
if (ret == 0) if (ret == 0)
return 0; return 0;
/* ret=2: csum update is needed after payload mangling */ /* ret=2: csum update is needed after payload mangling */
...@@ -156,7 +156,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -156,7 +156,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
return 0; return 0;
/* Call application helper if needed */ /* Call application helper if needed */
ret = ip_vs_app_pkt_in(cp, skb); ret = ip_vs_app_pkt_in(cp, skb, iph);
if (ret == 0) if (ret == 0)
return 0; return 0;
/* ret=2: csum update is needed after payload mangling */ /* ret=2: csum update is needed after payload mangling */
......
...@@ -170,7 +170,7 @@ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -170,7 +170,7 @@ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
return 0; return 0;
/* Call application helper if needed */ /* Call application helper if needed */
if (!(ret = ip_vs_app_pkt_out(cp, skb))) if (!(ret = ip_vs_app_pkt_out(cp, skb, iph)))
return 0; return 0;
/* ret=2: csum update is needed after payload mangling */ /* ret=2: csum update is needed after payload mangling */
if (ret == 1) if (ret == 1)
...@@ -251,7 +251,7 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -251,7 +251,7 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
* Attempt ip_vs_app call. * Attempt ip_vs_app call.
* It will fix ip_vs_conn and iph ack_seq stuff * It will fix ip_vs_conn and iph ack_seq stuff
*/ */
if (!(ret = ip_vs_app_pkt_in(cp, skb))) if (!(ret = ip_vs_app_pkt_in(cp, skb, iph)))
return 0; return 0;
/* ret=2: csum update is needed after payload mangling */ /* ret=2: csum update is needed after payload mangling */
if (ret == 1) if (ret == 1)
......
...@@ -162,7 +162,7 @@ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -162,7 +162,7 @@ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
/* /*
* Call application helper if needed * Call application helper if needed
*/ */
if (!(ret = ip_vs_app_pkt_out(cp, skb))) if (!(ret = ip_vs_app_pkt_out(cp, skb, iph)))
return 0; return 0;
/* ret=2: csum update is needed after payload mangling */ /* ret=2: csum update is needed after payload mangling */
if (ret == 1) if (ret == 1)
...@@ -246,7 +246,7 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -246,7 +246,7 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
* Attempt ip_vs_app call. * Attempt ip_vs_app call.
* It will fix ip_vs_conn * It will fix ip_vs_conn
*/ */
if (!(ret = ip_vs_app_pkt_in(cp, skb))) if (!(ret = ip_vs_app_pkt_in(cp, skb, iph)))
return 0; return 0;
/* ret=2: csum update is needed after payload mangling */ /* ret=2: csum update is needed after payload mangling */
if (ret == 1) if (ret == 1)
......
...@@ -220,7 +220,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, ...@@ -220,7 +220,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
enum flow_offload_tuple_dir dir; enum flow_offload_tuple_dir dir;
struct flow_offload *flow; struct flow_offload *flow;
struct net_device *outdev; struct net_device *outdev;
const struct rtable *rt; struct rtable *rt;
unsigned int thoff; unsigned int thoff;
struct iphdr *iph; struct iphdr *iph;
__be32 nexthop; __be32 nexthop;
...@@ -241,7 +241,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, ...@@ -241,7 +241,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
dir = tuplehash->tuple.dir; dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache; rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) && if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) &&
(ip_hdr(skb)->frag_off & htons(IP_DF)) != 0) (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0)
...@@ -264,6 +264,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, ...@@ -264,6 +264,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
skb->dev = outdev; skb->dev = outdev;
nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
skb_dst_set_noref(skb, &rt->dst);
neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb); neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
return NF_STOLEN; return NF_STOLEN;
...@@ -480,6 +481,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, ...@@ -480,6 +481,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
skb->dev = outdev; skb->dev = outdev;
nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
skb_dst_set_noref(skb, &rt->dst);
neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb); neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
return NF_STOLEN; return NF_STOLEN;
......
...@@ -1036,7 +1036,7 @@ static struct pernet_operations nat_net_ops = { ...@@ -1036,7 +1036,7 @@ static struct pernet_operations nat_net_ops = {
.size = sizeof(struct nat_net), .size = sizeof(struct nat_net),
}; };
struct nf_nat_hook nat_hook = { static struct nf_nat_hook nat_hook = {
.parse_nat_setup = nfnetlink_parse_nat_setup, .parse_nat_setup = nfnetlink_parse_nat_setup,
#ifdef CONFIG_XFRM #ifdef CONFIG_XFRM
.decode_session = __nf_nat_decode_session, .decode_session = __nf_nat_decode_session,
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
#include <linux/inetdevice.h> #include <linux/inetdevice.h>
#include <linux/ip.h> #include <linux/ip.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/module.h>
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/netfilter.h> #include <linux/netfilter.h>
#include <linux/types.h> #include <linux/types.h>
...@@ -124,6 +123,3 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, ...@@ -124,6 +123,3 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST);
} }
EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv6); EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv6);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
This diff is collapsed.
...@@ -23,22 +23,6 @@ ...@@ -23,22 +23,6 @@
#include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_log.h> #include <net/netfilter/nf_log.h>
static const char *const comments[__NFT_TRACETYPE_MAX] = {
[NFT_TRACETYPE_POLICY] = "policy",
[NFT_TRACETYPE_RETURN] = "return",
[NFT_TRACETYPE_RULE] = "rule",
};
static const struct nf_loginfo trace_loginfo = {
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
.level = LOGLEVEL_WARNING,
.logflags = NF_LOG_DEFAULT_MASK,
},
},
};
static noinline void __nft_trace_packet(struct nft_traceinfo *info, static noinline void __nft_trace_packet(struct nft_traceinfo *info,
const struct nft_chain *chain, const struct nft_chain *chain,
enum nft_trace_types type) enum nft_trace_types type)
...@@ -133,7 +117,7 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain, ...@@ -133,7 +117,7 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain,
struct nft_jumpstack { struct nft_jumpstack {
const struct nft_chain *chain; const struct nft_chain *chain;
const struct nft_rule *rule; struct nft_rule *const *rules;
}; };
unsigned int unsigned int
...@@ -141,27 +125,29 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) ...@@ -141,27 +125,29 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
{ {
const struct nft_chain *chain = priv, *basechain = chain; const struct nft_chain *chain = priv, *basechain = chain;
const struct net *net = nft_net(pkt); const struct net *net = nft_net(pkt);
struct nft_rule *const *rules;
const struct nft_rule *rule; const struct nft_rule *rule;
const struct nft_expr *expr, *last; const struct nft_expr *expr, *last;
struct nft_regs regs; struct nft_regs regs;
unsigned int stackptr = 0; unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
unsigned int gencursor = nft_genmask_cur(net); bool genbit = READ_ONCE(net->nft.gencursor);
struct nft_traceinfo info; struct nft_traceinfo info;
info.trace = false; info.trace = false;
if (static_branch_unlikely(&nft_trace_enabled)) if (static_branch_unlikely(&nft_trace_enabled))
nft_trace_init(&info, pkt, &regs.verdict, basechain); nft_trace_init(&info, pkt, &regs.verdict, basechain);
do_chain: do_chain:
rule = list_entry(&chain->rules, struct nft_rule, list); if (genbit)
rules = rcu_dereference(chain->rules_gen_1);
else
rules = rcu_dereference(chain->rules_gen_0);
next_rule: next_rule:
rule = *rules;
regs.verdict.code = NFT_CONTINUE; regs.verdict.code = NFT_CONTINUE;
list_for_each_entry_continue_rcu(rule, &chain->rules, list) { for (; *rules ; rules++) {
rule = *rules;
/* This rule is not active, skip. */
if (unlikely(rule->genmask & gencursor))
continue;
nft_rule_for_each_expr(expr, last, rule) { nft_rule_for_each_expr(expr, last, rule) {
if (expr->ops == &nft_cmp_fast_ops) if (expr->ops == &nft_cmp_fast_ops)
nft_cmp_fast_eval(expr, &regs); nft_cmp_fast_eval(expr, &regs);
...@@ -199,7 +185,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) ...@@ -199,7 +185,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
case NFT_JUMP: case NFT_JUMP:
BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE); BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE);
jumpstack[stackptr].chain = chain; jumpstack[stackptr].chain = chain;
jumpstack[stackptr].rule = rule; jumpstack[stackptr].rules = rules + 1;
stackptr++; stackptr++;
/* fall through */ /* fall through */
case NFT_GOTO: case NFT_GOTO:
...@@ -221,7 +207,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) ...@@ -221,7 +207,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
if (stackptr > 0) { if (stackptr > 0) {
stackptr--; stackptr--;
chain = jumpstack[stackptr].chain; chain = jumpstack[stackptr].chain;
rule = jumpstack[stackptr].rule; rules = jumpstack[stackptr].rules;
goto next_rule; goto next_rule;
} }
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <net/sock.h> #include <net/sock.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/sched/signal.h>
#include <net/netlink.h> #include <net/netlink.h>
#include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink.h>
...@@ -37,6 +38,8 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); ...@@ -37,6 +38,8 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
rcu_dereference_protected(table[(id)].subsys, \ rcu_dereference_protected(table[(id)].subsys, \
lockdep_nfnl_is_held((id))) lockdep_nfnl_is_held((id)))
#define NFNL_MAX_ATTR_COUNT 32
static struct { static struct {
struct mutex mutex; struct mutex mutex;
const struct nfnetlink_subsystem __rcu *subsys; const struct nfnetlink_subsystem __rcu *subsys;
...@@ -76,6 +79,13 @@ EXPORT_SYMBOL_GPL(lockdep_nfnl_is_held); ...@@ -76,6 +79,13 @@ EXPORT_SYMBOL_GPL(lockdep_nfnl_is_held);
int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n)
{ {
u8 cb_id;
/* Sanity-check attr_count size to avoid stack buffer overflow. */
for (cb_id = 0; cb_id < n->cb_count; cb_id++)
if (WARN_ON(n->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT))
return -EINVAL;
nfnl_lock(n->subsys_id); nfnl_lock(n->subsys_id);
if (table[n->subsys_id].subsys) { if (table[n->subsys_id].subsys) {
nfnl_unlock(n->subsys_id); nfnl_unlock(n->subsys_id);
...@@ -185,11 +195,17 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, ...@@ -185,11 +195,17 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
{ {
int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; struct nlattr *cda[NFNL_MAX_ATTR_COUNT + 1];
struct nlattr *attr = (void *)nlh + min_len; struct nlattr *attr = (void *)nlh + min_len;
int attrlen = nlh->nlmsg_len - min_len; int attrlen = nlh->nlmsg_len - min_len;
__u8 subsys_id = NFNL_SUBSYS_ID(type); __u8 subsys_id = NFNL_SUBSYS_ID(type);
/* Sanity-check NFNL_MAX_ATTR_COUNT */
if (ss->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT) {
rcu_read_unlock();
return -ENOMEM;
}
err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, attrlen, err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, attrlen,
ss->cb[cb_id].policy, extack); ss->cb[cb_id].policy, extack);
if (err < 0) { if (err < 0) {
...@@ -330,6 +346,13 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, ...@@ -330,6 +346,13 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
while (skb->len >= nlmsg_total_size(0)) { while (skb->len >= nlmsg_total_size(0)) {
int msglen, type; int msglen, type;
if (fatal_signal_pending(current)) {
nfnl_err_reset(&err_list);
err = -EINTR;
status = NFNL_BATCH_FAILURE;
goto done;
}
memset(&extack, 0, sizeof(extack)); memset(&extack, 0, sizeof(extack));
nlh = nlmsg_hdr(skb); nlh = nlmsg_hdr(skb);
err = 0; err = 0;
...@@ -379,10 +402,16 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, ...@@ -379,10 +402,16 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
{ {
int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; struct nlattr *cda[NFNL_MAX_ATTR_COUNT + 1];
struct nlattr *attr = (void *)nlh + min_len; struct nlattr *attr = (void *)nlh + min_len;
int attrlen = nlh->nlmsg_len - min_len; int attrlen = nlh->nlmsg_len - min_len;
/* Sanity-check NFTA_MAX_ATTR */
if (ss->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT) {
err = -ENOMEM;
goto ack;
}
err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, err = nla_parse(cda, ss->cb[cb_id].attr_count, attr,
attrlen, ss->cb[cb_id].policy, NULL); attrlen, ss->cb[cb_id].policy, NULL);
if (err < 0) if (err < 0)
...@@ -441,10 +470,19 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, ...@@ -441,10 +470,19 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
kfree_skb(skb); kfree_skb(skb);
goto replay; goto replay;
} else if (status == NFNL_BATCH_DONE) { } else if (status == NFNL_BATCH_DONE) {
ss->commit(net, oskb); err = ss->commit(net, oskb);
if (err == -EAGAIN) {
status |= NFNL_BATCH_REPLAY;
goto done;
} else if (err) {
ss->abort(net, oskb);
netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL);
}
} else { } else {
ss->abort(net, oskb); ss->abort(net, oskb);
} }
if (ss->cleanup)
ss->cleanup(net);
nfnl_err_deliver(&err_list, oskb); nfnl_err_deliver(&err_list, oskb);
nfnl_unlock(subsys_id); nfnl_unlock(subsys_id);
......
...@@ -611,10 +611,10 @@ nfnl_compat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, ...@@ -611,10 +611,10 @@ nfnl_compat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
return -1; return -1;
} }
static int nfnl_compat_get(struct net *net, struct sock *nfnl, static int nfnl_compat_get_rcu(struct net *net, struct sock *nfnl,
struct sk_buff *skb, const struct nlmsghdr *nlh, struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const tb[], const struct nlattr * const tb[],
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
int ret = 0, target; int ret = 0, target;
struct nfgenmsg *nfmsg; struct nfgenmsg *nfmsg;
...@@ -653,16 +653,21 @@ static int nfnl_compat_get(struct net *net, struct sock *nfnl, ...@@ -653,16 +653,21 @@ static int nfnl_compat_get(struct net *net, struct sock *nfnl,
return -EINVAL; return -EINVAL;
} }
if (!try_module_get(THIS_MODULE))
return -EINVAL;
rcu_read_unlock();
try_then_request_module(xt_find_revision(nfmsg->nfgen_family, name, try_then_request_module(xt_find_revision(nfmsg->nfgen_family, name,
rev, target, &ret), rev, target, &ret),
fmt, name); fmt, name);
if (ret < 0) if (ret < 0)
return ret; goto out_put;
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (skb2 == NULL) if (skb2 == NULL) {
return -ENOMEM; ret = -ENOMEM;
goto out_put;
}
/* include the best revision for this extension in the message */ /* include the best revision for this extension in the message */
if (nfnl_compat_fill_info(skb2, NETLINK_CB(skb).portid, if (nfnl_compat_fill_info(skb2, NETLINK_CB(skb).portid,
...@@ -672,14 +677,16 @@ static int nfnl_compat_get(struct net *net, struct sock *nfnl, ...@@ -672,14 +677,16 @@ static int nfnl_compat_get(struct net *net, struct sock *nfnl,
nfmsg->nfgen_family, nfmsg->nfgen_family,
name, ret, target) <= 0) { name, ret, target) <= 0) {
kfree_skb(skb2); kfree_skb(skb2);
return -ENOSPC; goto out_put;
} }
ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
MSG_DONTWAIT); MSG_DONTWAIT);
if (ret > 0) if (ret > 0)
ret = 0; ret = 0;
out_put:
rcu_read_lock();
module_put(THIS_MODULE);
return ret == -EAGAIN ? -ENOBUFS : ret; return ret == -EAGAIN ? -ENOBUFS : ret;
} }
...@@ -691,7 +698,7 @@ static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = { ...@@ -691,7 +698,7 @@ static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = {
}; };
static const struct nfnl_callback nfnl_nft_compat_cb[NFNL_MSG_COMPAT_MAX] = { static const struct nfnl_callback nfnl_nft_compat_cb[NFNL_MSG_COMPAT_MAX] = {
[NFNL_MSG_COMPAT_GET] = { .call = nfnl_compat_get, [NFNL_MSG_COMPAT_GET] = { .call_rcu = nfnl_compat_get_rcu,
.attr_count = NFTA_COMPAT_MAX, .attr_count = NFTA_COMPAT_MAX,
.policy = nfnl_compat_policy_get }, .policy = nfnl_compat_policy_get },
}; };
......
...@@ -12,8 +12,12 @@ ...@@ -12,8 +12,12 @@
#include <linux/netlink.h> #include <linux/netlink.h>
#include <linux/netfilter.h> #include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h> #include <linux/netfilter/nf_tables.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_dup_netdev.h> #include <net/netfilter/nf_dup_netdev.h>
#include <net/neighbour.h>
#include <net/ip.h>
struct nft_fwd_netdev { struct nft_fwd_netdev {
enum nft_registers sreg_dev:8; enum nft_registers sreg_dev:8;
...@@ -32,6 +36,8 @@ static void nft_fwd_netdev_eval(const struct nft_expr *expr, ...@@ -32,6 +36,8 @@ static void nft_fwd_netdev_eval(const struct nft_expr *expr,
static const struct nla_policy nft_fwd_netdev_policy[NFTA_FWD_MAX + 1] = { static const struct nla_policy nft_fwd_netdev_policy[NFTA_FWD_MAX + 1] = {
[NFTA_FWD_SREG_DEV] = { .type = NLA_U32 }, [NFTA_FWD_SREG_DEV] = { .type = NLA_U32 },
[NFTA_FWD_SREG_ADDR] = { .type = NLA_U32 },
[NFTA_FWD_NFPROTO] = { .type = NLA_U32 },
}; };
static int nft_fwd_netdev_init(const struct nft_ctx *ctx, static int nft_fwd_netdev_init(const struct nft_ctx *ctx,
...@@ -62,7 +68,133 @@ static int nft_fwd_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr) ...@@ -62,7 +68,133 @@ static int nft_fwd_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr)
return -1; return -1;
} }
struct nft_fwd_neigh {
enum nft_registers sreg_dev:8;
enum nft_registers sreg_addr:8;
u8 nfproto;
};
static void nft_fwd_neigh_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_fwd_neigh *priv = nft_expr_priv(expr);
void *addr = &regs->data[priv->sreg_addr];
int oif = regs->data[priv->sreg_dev];
unsigned int verdict = NF_STOLEN;
struct sk_buff *skb = pkt->skb;
struct net_device *dev;
int neigh_table;
switch (priv->nfproto) {
case NFPROTO_IPV4: {
struct iphdr *iph;
if (skb->protocol != htons(ETH_P_IP)) {
verdict = NFT_BREAK;
goto out;
}
if (skb_try_make_writable(skb, sizeof(*iph))) {
verdict = NF_DROP;
goto out;
}
iph = ip_hdr(skb);
ip_decrease_ttl(iph);
neigh_table = NEIGH_ARP_TABLE;
break;
}
case NFPROTO_IPV6: {
struct ipv6hdr *ip6h;
if (skb->protocol != htons(ETH_P_IPV6)) {
verdict = NFT_BREAK;
goto out;
}
if (skb_try_make_writable(skb, sizeof(*ip6h))) {
verdict = NF_DROP;
goto out;
}
ip6h = ipv6_hdr(skb);
ip6h->hop_limit--;
neigh_table = NEIGH_ND_TABLE;
break;
}
default:
verdict = NFT_BREAK;
goto out;
}
dev = dev_get_by_index_rcu(nft_net(pkt), oif);
if (dev == NULL)
return;
skb->dev = dev;
neigh_xmit(neigh_table, dev, addr, skb);
out:
regs->verdict.code = verdict;
}
static int nft_fwd_neigh_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_fwd_neigh *priv = nft_expr_priv(expr);
unsigned int addr_len;
int err;
if (!tb[NFTA_FWD_SREG_DEV] ||
!tb[NFTA_FWD_SREG_ADDR] ||
!tb[NFTA_FWD_NFPROTO])
return -EINVAL;
priv->sreg_dev = nft_parse_register(tb[NFTA_FWD_SREG_DEV]);
priv->sreg_addr = nft_parse_register(tb[NFTA_FWD_SREG_ADDR]);
priv->nfproto = ntohl(nla_get_be32(tb[NFTA_FWD_NFPROTO]));
switch (priv->nfproto) {
case NFPROTO_IPV4:
addr_len = sizeof(struct in_addr);
break;
case NFPROTO_IPV6:
addr_len = sizeof(struct in6_addr);
break;
default:
return -EOPNOTSUPP;
}
err = nft_validate_register_load(priv->sreg_dev, sizeof(int));
if (err < 0)
return err;
return nft_validate_register_load(priv->sreg_addr, addr_len);
}
static const struct nft_expr_ops nft_fwd_netdev_ingress_ops;
static int nft_fwd_neigh_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_fwd_neigh *priv = nft_expr_priv(expr);
if (nft_dump_register(skb, NFTA_FWD_SREG_DEV, priv->sreg_dev) ||
nft_dump_register(skb, NFTA_FWD_SREG_ADDR, priv->sreg_addr) ||
nla_put_be32(skb, NFTA_FWD_NFPROTO, htonl(priv->nfproto)))
goto nla_put_failure;
return 0;
nla_put_failure:
return -1;
}
static struct nft_expr_type nft_fwd_netdev_type; static struct nft_expr_type nft_fwd_netdev_type;
static const struct nft_expr_ops nft_fwd_neigh_netdev_ops = {
.type = &nft_fwd_netdev_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_fwd_neigh)),
.eval = nft_fwd_neigh_eval,
.init = nft_fwd_neigh_init,
.dump = nft_fwd_neigh_dump,
};
static const struct nft_expr_ops nft_fwd_netdev_ops = { static const struct nft_expr_ops nft_fwd_netdev_ops = {
.type = &nft_fwd_netdev_type, .type = &nft_fwd_netdev_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_fwd_netdev)), .size = NFT_EXPR_SIZE(sizeof(struct nft_fwd_netdev)),
...@@ -71,10 +203,22 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = { ...@@ -71,10 +203,22 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = {
.dump = nft_fwd_netdev_dump, .dump = nft_fwd_netdev_dump,
}; };
static const struct nft_expr_ops *
nft_fwd_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
if (tb[NFTA_FWD_SREG_ADDR])
return &nft_fwd_neigh_netdev_ops;
if (tb[NFTA_FWD_SREG_DEV])
return &nft_fwd_netdev_ops;
return ERR_PTR(-EOPNOTSUPP);
}
static struct nft_expr_type nft_fwd_netdev_type __read_mostly = { static struct nft_expr_type nft_fwd_netdev_type __read_mostly = {
.family = NFPROTO_NETDEV, .family = NFPROTO_NETDEV,
.name = "fwd", .name = "fwd",
.ops = &nft_fwd_netdev_ops, .select_ops = nft_fwd_select_ops,
.policy = nft_fwd_netdev_policy, .policy = nft_fwd_netdev_policy,
.maxattr = NFTA_FWD_MAX, .maxattr = NFTA_FWD_MAX,
.owner = THIS_MODULE, .owner = THIS_MODULE,
......
...@@ -177,10 +177,7 @@ static int nft_jhash_map_init(const struct nft_ctx *ctx, ...@@ -177,10 +177,7 @@ static int nft_jhash_map_init(const struct nft_ctx *ctx,
priv->map = nft_set_lookup_global(ctx->net, ctx->table, priv->map = nft_set_lookup_global(ctx->net, ctx->table,
tb[NFTA_HASH_SET_NAME], tb[NFTA_HASH_SET_NAME],
tb[NFTA_HASH_SET_ID], genmask); tb[NFTA_HASH_SET_ID], genmask);
if (IS_ERR(priv->map)) return PTR_ERR_OR_ZERO(priv->map);
return PTR_ERR(priv->map);
return 0;
} }
static int nft_symhash_init(const struct nft_ctx *ctx, static int nft_symhash_init(const struct nft_ctx *ctx,
...@@ -220,10 +217,7 @@ static int nft_symhash_map_init(const struct nft_ctx *ctx, ...@@ -220,10 +217,7 @@ static int nft_symhash_map_init(const struct nft_ctx *ctx,
priv->map = nft_set_lookup_global(ctx->net, ctx->table, priv->map = nft_set_lookup_global(ctx->net, ctx->table,
tb[NFTA_HASH_SET_NAME], tb[NFTA_HASH_SET_NAME],
tb[NFTA_HASH_SET_ID], genmask); tb[NFTA_HASH_SET_ID], genmask);
if (IS_ERR(priv->map)) return PTR_ERR_OR_ZERO(priv->map);
return PTR_ERR(priv->map);
return 0;
} }
static int nft_jhash_dump(struct sk_buff *skb, static int nft_jhash_dump(struct sk_buff *skb,
......
...@@ -17,12 +17,6 @@ ...@@ -17,12 +17,6 @@
#include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h>
struct nft_immediate_expr {
struct nft_data data;
enum nft_registers dreg:8;
u8 dlen;
};
static void nft_immediate_eval(const struct nft_expr *expr, static void nft_immediate_eval(const struct nft_expr *expr,
struct nft_regs *regs, struct nft_regs *regs,
const struct nft_pktinfo *pkt) const struct nft_pktinfo *pkt)
...@@ -101,12 +95,27 @@ static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr) ...@@ -101,12 +95,27 @@ static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr)
static int nft_immediate_validate(const struct nft_ctx *ctx, static int nft_immediate_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr, const struct nft_expr *expr,
const struct nft_data **data) const struct nft_data **d)
{ {
const struct nft_immediate_expr *priv = nft_expr_priv(expr); const struct nft_immediate_expr *priv = nft_expr_priv(expr);
const struct nft_data *data;
int err;
if (priv->dreg == NFT_REG_VERDICT) if (priv->dreg != NFT_REG_VERDICT)
*data = &priv->data; return 0;
data = &priv->data;
switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
err = nft_chain_validate(ctx, data->verdict.chain);
if (err < 0)
return err;
break;
default:
break;
}
return 0; return 0;
} }
......
...@@ -9,12 +9,15 @@ ...@@ -9,12 +9,15 @@
* Development of this code funded by Astaro AG (http://www.astaro.com/) * Development of this code funded by Astaro AG (http://www.astaro.com/)
*/ */
#include <linux/audit.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/netlink.h> #include <linux/netlink.h>
#include <linux/netfilter.h> #include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h> #include <linux/netfilter/nf_tables.h>
#include <net/ipv6.h>
#include <net/ip.h>
#include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_log.h> #include <net/netfilter/nf_log.h>
#include <linux/netdevice.h> #include <linux/netdevice.h>
...@@ -26,12 +29,93 @@ struct nft_log { ...@@ -26,12 +29,93 @@ struct nft_log {
char *prefix; char *prefix;
}; };
static bool audit_ip4(struct audit_buffer *ab, struct sk_buff *skb)
{
struct iphdr _iph;
const struct iphdr *ih;
ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_iph), &_iph);
if (!ih)
return false;
audit_log_format(ab, " saddr=%pI4 daddr=%pI4 proto=%hhu",
&ih->saddr, &ih->daddr, ih->protocol);
return true;
}
static bool audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
{
struct ipv6hdr _ip6h;
const struct ipv6hdr *ih;
u8 nexthdr;
__be16 frag_off;
ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h);
if (!ih)
return false;
nexthdr = ih->nexthdr;
ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h), &nexthdr, &frag_off);
audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu",
&ih->saddr, &ih->daddr, nexthdr);
return true;
}
static void nft_log_eval_audit(const struct nft_pktinfo *pkt)
{
struct sk_buff *skb = pkt->skb;
struct audit_buffer *ab;
int fam = -1;
if (!audit_enabled)
return;
ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
if (!ab)
return;
audit_log_format(ab, "mark=%#x", skb->mark);
switch (nft_pf(pkt)) {
case NFPROTO_BRIDGE:
switch (eth_hdr(skb)->h_proto) {
case htons(ETH_P_IP):
fam = audit_ip4(ab, skb) ? NFPROTO_IPV4 : -1;
break;
case htons(ETH_P_IPV6):
fam = audit_ip6(ab, skb) ? NFPROTO_IPV6 : -1;
break;
}
break;
case NFPROTO_IPV4:
fam = audit_ip4(ab, skb) ? NFPROTO_IPV4 : -1;
break;
case NFPROTO_IPV6:
fam = audit_ip6(ab, skb) ? NFPROTO_IPV6 : -1;
break;
}
if (fam == -1)
audit_log_format(ab, " saddr=? daddr=? proto=-1");
audit_log_end(ab);
}
static void nft_log_eval(const struct nft_expr *expr, static void nft_log_eval(const struct nft_expr *expr,
struct nft_regs *regs, struct nft_regs *regs,
const struct nft_pktinfo *pkt) const struct nft_pktinfo *pkt)
{ {
const struct nft_log *priv = nft_expr_priv(expr); const struct nft_log *priv = nft_expr_priv(expr);
if (priv->loginfo.type == NF_LOG_TYPE_LOG &&
priv->loginfo.u.log.level == LOGLEVEL_AUDIT) {
nft_log_eval_audit(pkt);
return;
}
nf_log_packet(nft_net(pkt), nft_pf(pkt), nft_hook(pkt), pkt->skb, nf_log_packet(nft_net(pkt), nft_pf(pkt), nft_hook(pkt), pkt->skb,
nft_in(pkt), nft_out(pkt), &priv->loginfo, "%s", nft_in(pkt), nft_out(pkt), &priv->loginfo, "%s",
priv->prefix); priv->prefix);
...@@ -84,7 +168,7 @@ static int nft_log_init(const struct nft_ctx *ctx, ...@@ -84,7 +168,7 @@ static int nft_log_init(const struct nft_ctx *ctx,
} else { } else {
li->u.log.level = LOGLEVEL_WARNING; li->u.log.level = LOGLEVEL_WARNING;
} }
if (li->u.log.level > LOGLEVEL_DEBUG) { if (li->u.log.level > LOGLEVEL_AUDIT) {
err = -EINVAL; err = -EINVAL;
goto err1; goto err1;
} }
...@@ -112,6 +196,9 @@ static int nft_log_init(const struct nft_ctx *ctx, ...@@ -112,6 +196,9 @@ static int nft_log_init(const struct nft_ctx *ctx,
break; break;
} }
if (li->u.log.level == LOGLEVEL_AUDIT)
return 0;
err = nf_logger_find_get(ctx->family, li->type); err = nf_logger_find_get(ctx->family, li->type);
if (err < 0) if (err < 0)
goto err1; goto err1;
...@@ -133,6 +220,9 @@ static void nft_log_destroy(const struct nft_ctx *ctx, ...@@ -133,6 +220,9 @@ static void nft_log_destroy(const struct nft_ctx *ctx,
if (priv->prefix != nft_log_null_prefix) if (priv->prefix != nft_log_null_prefix)
kfree(priv->prefix); kfree(priv->prefix);
if (li->u.log.level == LOGLEVEL_AUDIT)
return;
nf_logger_put(ctx->family, li->type); nf_logger_put(ctx->family, li->type);
} }
......
...@@ -149,6 +149,52 @@ static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr) ...@@ -149,6 +149,52 @@ static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
return -1; return -1;
} }
static int nft_lookup_validate_setelem(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
const struct nft_data *data;
if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
*nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
return 0;
data = nft_set_ext_data(ext);
switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
return nft_chain_validate(ctx, data->verdict.chain);
default:
return 0;
}
}
static int nft_lookup_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **d)
{
const struct nft_lookup *priv = nft_expr_priv(expr);
struct nft_set_iter iter;
if (!(priv->set->flags & NFT_SET_MAP) ||
priv->set->dtype != NFT_DATA_VERDICT)
return 0;
iter.genmask = nft_genmask_next(ctx->net);
iter.skip = 0;
iter.count = 0;
iter.err = 0;
iter.fn = nft_lookup_validate_setelem;
priv->set->ops->walk(ctx, priv->set, &iter);
if (iter.err < 0)
return iter.err;
return 0;
}
static const struct nft_expr_ops nft_lookup_ops = { static const struct nft_expr_ops nft_lookup_ops = {
.type = &nft_lookup_type, .type = &nft_lookup_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
...@@ -156,6 +202,7 @@ static const struct nft_expr_ops nft_lookup_ops = { ...@@ -156,6 +202,7 @@ static const struct nft_expr_ops nft_lookup_ops = {
.init = nft_lookup_init, .init = nft_lookup_init,
.destroy = nft_lookup_destroy, .destroy = nft_lookup_destroy,
.dump = nft_lookup_dump, .dump = nft_lookup_dump,
.validate = nft_lookup_validate,
}; };
struct nft_expr_type nft_lookup_type __read_mostly = { struct nft_expr_type nft_lookup_type __read_mostly = {
......
...@@ -114,10 +114,7 @@ static int nft_ng_inc_map_init(const struct nft_ctx *ctx, ...@@ -114,10 +114,7 @@ static int nft_ng_inc_map_init(const struct nft_ctx *ctx,
tb[NFTA_NG_SET_NAME], tb[NFTA_NG_SET_NAME],
tb[NFTA_NG_SET_ID], genmask); tb[NFTA_NG_SET_ID], genmask);
if (IS_ERR(priv->map)) return PTR_ERR_OR_ZERO(priv->map);
return PTR_ERR(priv->map);
return 0;
} }
static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
......
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/module.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_socket.h>
#include <net/inet_sock.h>
struct nft_socket {
enum nft_socket_keys key:8;
union {
enum nft_registers dreg:8;
};
};
static void nft_socket_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_socket *priv = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
struct sock *sk = skb->sk;
u32 *dest = &regs->data[priv->dreg];
if (!sk)
switch(nft_pf(pkt)) {
case NFPROTO_IPV4:
sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt));
break;
#if IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
case NFPROTO_IPV6:
sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt));
break;
#endif
default:
WARN_ON_ONCE(1);
regs->verdict.code = NFT_BREAK;
return;
}
if(!sk) {
nft_reg_store8(dest, 0);
return;
}
/* So that subsequent socket matching not to require other lookups. */
skb->sk = sk;
switch(priv->key) {
case NFT_SOCKET_TRANSPARENT:
nft_reg_store8(dest, nf_sk_is_transparent(sk));
break;
default:
WARN_ON(1);
regs->verdict.code = NFT_BREAK;
}
}
static const struct nla_policy nft_socket_policy[NFTA_SOCKET_MAX + 1] = {
[NFTA_SOCKET_KEY] = { .type = NLA_U32 },
[NFTA_SOCKET_DREG] = { .type = NLA_U32 },
};
static int nft_socket_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_socket *priv = nft_expr_priv(expr);
unsigned int len;
if (!tb[NFTA_SOCKET_DREG] || !tb[NFTA_SOCKET_KEY])
return -EINVAL;
switch(ctx->family) {
case NFPROTO_IPV4:
#if IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
case NFPROTO_IPV6:
#endif
case NFPROTO_INET:
break;
default:
return -EOPNOTSUPP;
}
priv->key = ntohl(nla_get_u32(tb[NFTA_SOCKET_KEY]));
switch(priv->key) {
case NFT_SOCKET_TRANSPARENT:
len = sizeof(u8);
break;
default:
return -EOPNOTSUPP;
}
priv->dreg = nft_parse_register(tb[NFTA_SOCKET_DREG]);
return nft_validate_register_store(ctx, priv->dreg, NULL,
NFT_DATA_VALUE, len);
}
static int nft_socket_dump(struct sk_buff *skb,
const struct nft_expr *expr)
{
const struct nft_socket *priv = nft_expr_priv(expr);
if (nla_put_u32(skb, NFTA_SOCKET_KEY, htonl(priv->key)))
return -1;
if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg))
return -1;
return 0;
}
static struct nft_expr_type nft_socket_type;
static const struct nft_expr_ops nft_socket_ops = {
.type = &nft_socket_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_socket)),
.eval = nft_socket_eval,
.init = nft_socket_init,
.dump = nft_socket_dump,
};
static struct nft_expr_type nft_socket_type __read_mostly = {
.name = "socket",
.ops = &nft_socket_ops,
.policy = nft_socket_policy,
.maxattr = NFTA_SOCKET_MAX,
.owner = THIS_MODULE,
};
static int __init nft_socket_module_init(void)
{
return nft_register_expr(&nft_socket_type);
}
static void __exit nft_socket_module_exit(void)
{
nft_unregister_expr(&nft_socket_type);
}
module_init(nft_socket_module_init);
module_exit(nft_socket_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Máté Eckl");
MODULE_DESCRIPTION("nf_tables socket match module");
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment