Commit 89d5e232 authored by David S. Miller's avatar David S. Miller

Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Conflicts:
	net/netfilter/nf_conntrack_proto_tcp.c

The conflict had to do with overlapping changes dealing with
fixing the use of an "s32" to hold the value returned by
NAT_OFFSET().

Pablo Neira Ayuso says:

====================
The following batch contains Netfilter/IPVS updates for your net-next tree.
More specifically, they are:

* Trivial typo fix in xt_addrtype, from Phil Oester.

* Remove net_ratelimit in the conntrack logging for consistency with other
  logging subsystem, from Patrick McHardy.

* Remove unneeded includes from the recently added xt_connlabel support, from
  Florian Westphal.

* Allow to update conntracks via nfqueue, don't need NFQA_CFG_F_CONNTRACK for
  this, from Florian Westphal.

* Remove tproxy core, now that we have socket early demux, from Florian
  Westphal.

* A couple of patches to refactor conntrack event reporting to save a good
  bunch of lines, from Florian Westphal.

* Fix missing locking in NAT sequence adjustment, it did not manifested in
  any known bug so far, from Patrick McHardy.

* Change sequence number adjustment variable to 32 bits, to delay the
  possible early overflow in long standing connections, also from Patrick.

* Comestic cleanups for IPVS, from Dragos Foianu.

* Fix possible null dereference in IPVS in the SH scheduler, from Daniel
  Borkmann.

* Allow to attach conntrack expectations via nfqueue. Before this patch, you
  had to use ctnetlink instead, thus, we save the conntrack lookup.

* Export xt_rpfilter and xt_HMARK header files, from Nicolas Dichtel.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 15ec80f5 38c67328
...@@ -2,9 +2,8 @@ Transparent proxy support ...@@ -2,9 +2,8 @@ Transparent proxy support
========================= =========================
This feature adds Linux 2.2-like transparent proxy support to current kernels. This feature adds Linux 2.2-like transparent proxy support to current kernels.
To use it, enable NETFILTER_TPROXY, the socket match and the TPROXY target in To use it, enable the socket match and the TPROXY target in your kernel config.
your kernel config. You will need policy routing too, so be sure to enable that You will need policy routing too, so be sure to enable that as well.
as well.
1. Making non-local sockets work 1. Making non-local sockets work
......
...@@ -314,8 +314,8 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) ...@@ -314,8 +314,8 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
#endif /*CONFIG_NETFILTER*/ #endif /*CONFIG_NETFILTER*/
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu; extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu;
extern void nf_ct_attach(struct sk_buff *, struct sk_buff *); extern void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu; extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu;
struct nf_conn; struct nf_conn;
...@@ -325,12 +325,14 @@ struct nfq_ct_hook { ...@@ -325,12 +325,14 @@ struct nfq_ct_hook {
size_t (*build_size)(const struct nf_conn *ct); size_t (*build_size)(const struct nf_conn *ct);
int (*build)(struct sk_buff *skb, struct nf_conn *ct); int (*build)(struct sk_buff *skb, struct nf_conn *ct);
int (*parse)(const struct nlattr *attr, struct nf_conn *ct); int (*parse)(const struct nlattr *attr, struct nf_conn *ct);
int (*attach_expect)(const struct nlattr *attr, struct nf_conn *ct,
u32 portid, u32 report);
}; };
extern struct nfq_ct_hook __rcu *nfq_ct_hook; extern struct nfq_ct_hook __rcu *nfq_ct_hook;
struct nfq_ct_nat_hook { struct nfq_ct_nat_hook {
void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct, void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct,
u32 ctinfo, int off); u32 ctinfo, s32 off);
}; };
extern struct nfq_ct_nat_hook __rcu *nfq_ct_nat_hook; extern struct nfq_ct_nat_hook __rcu *nfq_ct_nat_hook;
#else #else
......
...@@ -181,8 +181,7 @@ __nf_conntrack_find(struct net *net, u16 zone, ...@@ -181,8 +181,7 @@ __nf_conntrack_find(struct net *net, u16 zone,
const struct nf_conntrack_tuple *tuple); const struct nf_conntrack_tuple *tuple);
extern int nf_conntrack_hash_check_insert(struct nf_conn *ct); extern int nf_conntrack_hash_check_insert(struct nf_conn *ct);
extern void nf_ct_delete_from_lists(struct nf_conn *ct); bool nf_ct_delete(struct nf_conn *ct, u32 pid, int report);
extern void nf_ct_dying_timeout(struct nf_conn *ct);
extern void nf_conntrack_flush_report(struct net *net, u32 portid, int report); extern void nf_conntrack_flush_report(struct net *net, u32 portid, int report);
...@@ -235,7 +234,7 @@ static inline bool nf_ct_kill(struct nf_conn *ct) ...@@ -235,7 +234,7 @@ static inline bool nf_ct_kill(struct nf_conn *ct)
} }
/* These are for NAT. Icky. */ /* These are for NAT. Icky. */
extern s16 (*nf_ct_nat_offset)(const struct nf_conn *ct, extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
enum ip_conntrack_dir dir, enum ip_conntrack_dir dir,
u32 seq); u32 seq);
...@@ -249,7 +248,9 @@ extern void nf_ct_untracked_status_or(unsigned long bits); ...@@ -249,7 +248,9 @@ extern void nf_ct_untracked_status_or(unsigned long bits);
/* Iterate over all conntracks: if iter returns true, it's deleted. */ /* Iterate over all conntracks: if iter returns true, it's deleted. */
extern void extern void
nf_ct_iterate_cleanup(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data); nf_ct_iterate_cleanup(struct net *net,
int (*iter)(struct nf_conn *i, void *data),
void *data, u32 portid, int report);
extern void nf_conntrack_free(struct nf_conn *ct); extern void nf_conntrack_free(struct nf_conn *ct);
extern struct nf_conn * extern struct nf_conn *
nf_conntrack_alloc(struct net *net, u16 zone, nf_conntrack_alloc(struct net *net, u16 zone,
......
...@@ -148,17 +148,10 @@ extern int nf_ct_port_nlattr_tuple_size(void); ...@@ -148,17 +148,10 @@ extern int nf_ct_port_nlattr_tuple_size(void);
extern const struct nla_policy nf_ct_port_nla_policy[]; extern const struct nla_policy nf_ct_port_nla_policy[];
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
#ifdef DEBUG_INVALID_PACKETS
#define LOG_INVALID(net, proto) \ #define LOG_INVALID(net, proto) \
((net)->ct.sysctl_log_invalid == (proto) || \ ((net)->ct.sysctl_log_invalid == (proto) || \
(net)->ct.sysctl_log_invalid == IPPROTO_RAW) (net)->ct.sysctl_log_invalid == IPPROTO_RAW)
#else #else
#define LOG_INVALID(net, proto) \
(((net)->ct.sysctl_log_invalid == (proto) || \
(net)->ct.sysctl_log_invalid == IPPROTO_RAW) \
&& net_ratelimit())
#endif
#else
static inline int LOG_INVALID(struct net *net, int proto) { return 0; } static inline int LOG_INVALID(struct net *net, int proto) { return 0; }
#endif /* CONFIG_SYSCTL */ #endif /* CONFIG_SYSCTL */
......
...@@ -19,7 +19,7 @@ struct nf_nat_seq { ...@@ -19,7 +19,7 @@ struct nf_nat_seq {
u_int32_t correction_pos; u_int32_t correction_pos;
/* sequence number offset before and after last modification */ /* sequence number offset before and after last modification */
int16_t offset_before, offset_after; int32_t offset_before, offset_after;
}; };
#include <linux/list.h> #include <linux/list.h>
......
...@@ -41,7 +41,7 @@ extern int nf_nat_mangle_udp_packet(struct sk_buff *skb, ...@@ -41,7 +41,7 @@ extern int nf_nat_mangle_udp_packet(struct sk_buff *skb,
extern void nf_nat_set_seq_adjust(struct nf_conn *ct, extern void nf_nat_set_seq_adjust(struct nf_conn *ct,
enum ip_conntrack_info ctinfo, enum ip_conntrack_info ctinfo,
__be32 seq, s16 off); __be32 seq, s32 off);
extern int nf_nat_seq_adjust(struct sk_buff *skb, extern int nf_nat_seq_adjust(struct sk_buff *skb,
struct nf_conn *ct, struct nf_conn *ct,
enum ip_conntrack_info ctinfo, enum ip_conntrack_info ctinfo,
...@@ -56,11 +56,11 @@ extern int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, ...@@ -56,11 +56,11 @@ extern int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
extern void nf_nat_follow_master(struct nf_conn *ct, extern void nf_nat_follow_master(struct nf_conn *ct,
struct nf_conntrack_expect *this); struct nf_conntrack_expect *this);
extern s16 nf_nat_get_offset(const struct nf_conn *ct, extern s32 nf_nat_get_offset(const struct nf_conn *ct,
enum ip_conntrack_dir dir, enum ip_conntrack_dir dir,
u32 seq); u32 seq);
extern void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, extern void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
u32 dir, int off); u32 dir, s32 off);
#endif #endif
#ifndef _NF_TPROXY_CORE_H
#define _NF_TPROXY_CORE_H
#include <linux/types.h>
#include <linux/in.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
#include <net/tcp.h>
#define NFT_LOOKUP_ANY 0
#define NFT_LOOKUP_LISTENER 1
#define NFT_LOOKUP_ESTABLISHED 2
/* look up and get a reference to a matching socket */
/* This function is used by the 'TPROXY' target and the 'socket'
* match. The following lookups are supported:
*
* Explicit TProxy target rule
* ===========================
*
* This is used when the user wants to intercept a connection matching
* an explicit iptables rule. In this case the sockets are assumed
* matching in preference order:
*
* - match: if there's a fully established connection matching the
* _packet_ tuple, it is returned, assuming the redirection
* already took place and we process a packet belonging to an
* established connection
*
* - match: if there's a listening socket matching the redirection
* (e.g. on-port & on-ip of the connection), it is returned,
* regardless if it was bound to 0.0.0.0 or an explicit
* address. The reasoning is that if there's an explicit rule, it
* does not really matter if the listener is bound to an interface
* or to 0. The user already stated that he wants redirection
* (since he added the rule).
*
* "socket" match based redirection (no specific rule)
* ===================================================
*
* There are connections with dynamic endpoints (e.g. FTP data
* connection) that the user is unable to add explicit rules
* for. These are taken care of by a generic "socket" rule. It is
* assumed that the proxy application is trusted to open such
* connections without explicit iptables rule (except of course the
* generic 'socket' rule). In this case the following sockets are
* matched in preference order:
*
* - match: if there's a fully established connection matching the
* _packet_ tuple
*
* - match: if there's a non-zero bound listener (possibly with a
* non-local address) We don't accept zero-bound listeners, since
* then local services could intercept traffic going through the
* box.
*
* Please note that there's an overlap between what a TPROXY target
* and a socket match will match. Normally if you have both rules the
* "socket" match will be the first one, effectively all packets
* belonging to established connections going through that one.
*/
static inline struct sock *
nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in, int lookup_type)
{
struct sock *sk;
/* look up socket */
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_ANY:
sk = __inet_lookup(net, &tcp_hashinfo,
saddr, sport, daddr, dport,
in->ifindex);
break;
case NFT_LOOKUP_LISTENER:
sk = inet_lookup_listener(net, &tcp_hashinfo,
saddr, sport,
daddr, dport,
in->ifindex);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too */
break;
case NFT_LOOKUP_ESTABLISHED:
sk = inet_lookup_established(net, &tcp_hashinfo,
saddr, sport, daddr, dport,
in->ifindex);
break;
default:
WARN_ON(1);
sk = NULL;
break;
}
break;
case IPPROTO_UDP:
sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
if (sk && lookup_type != NFT_LOOKUP_ANY) {
int connected = (sk->sk_state == TCP_ESTABLISHED);
int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too */
if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
(lookup_type == NFT_LOOKUP_LISTENER && connected)) {
sock_put(sk);
sk = NULL;
}
}
break;
default:
WARN_ON(1);
sk = NULL;
}
pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
return sk;
}
#if IS_ENABLED(CONFIG_IPV6)
static inline struct sock *
nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in, int lookup_type)
{
struct sock *sk;
/* look up socket */
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_ANY:
sk = inet6_lookup(net, &tcp_hashinfo,
saddr, sport, daddr, dport,
in->ifindex);
break;
case NFT_LOOKUP_LISTENER:
sk = inet6_lookup_listener(net, &tcp_hashinfo,
saddr, sport,
daddr, ntohs(dport),
in->ifindex);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too */
break;
case NFT_LOOKUP_ESTABLISHED:
sk = __inet6_lookup_established(net, &tcp_hashinfo,
saddr, sport, daddr, ntohs(dport),
in->ifindex);
break;
default:
WARN_ON(1);
sk = NULL;
break;
}
break;
case IPPROTO_UDP:
sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
if (sk && lookup_type != NFT_LOOKUP_ANY) {
int connected = (sk->sk_state == TCP_ESTABLISHED);
int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too */
if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
(lookup_type == NFT_LOOKUP_LISTENER && connected)) {
sock_put(sk);
sk = NULL;
}
}
break;
default:
WARN_ON(1);
sk = NULL;
}
pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n",
protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk);
return sk;
}
#endif
/* assign a socket to the skb -- consumes sk */
void
nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk);
#endif
...@@ -15,6 +15,8 @@ int nfqnl_ct_put(struct sk_buff *skb, struct nf_conn *ct, ...@@ -15,6 +15,8 @@ int nfqnl_ct_put(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo); enum ip_conntrack_info ctinfo);
void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo, int diff); enum ip_conntrack_info ctinfo, int diff);
int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,
u32 portid, u32 report);
#else #else
inline struct nf_conn * inline struct nf_conn *
nfqnl_ct_get(struct sk_buff *entskb, size_t *size, enum ip_conntrack_info *ctinfo) nfqnl_ct_get(struct sk_buff *entskb, size_t *size, enum ip_conntrack_info *ctinfo)
...@@ -39,5 +41,11 @@ inline void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, ...@@ -39,5 +41,11 @@ inline void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo, int diff) enum ip_conntrack_info ctinfo, int diff)
{ {
} }
inline int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,
u32 portid, u32 report)
{
return 0;
}
#endif /* NF_CONNTRACK */ #endif /* NF_CONNTRACK */
#endif #endif
...@@ -22,6 +22,7 @@ header-y += xt_CONNMARK.h ...@@ -22,6 +22,7 @@ header-y += xt_CONNMARK.h
header-y += xt_CONNSECMARK.h header-y += xt_CONNSECMARK.h
header-y += xt_CT.h header-y += xt_CT.h
header-y += xt_DSCP.h header-y += xt_DSCP.h
header-y += xt_HMARK.h
header-y += xt_IDLETIMER.h header-y += xt_IDLETIMER.h
header-y += xt_LED.h header-y += xt_LED.h
header-y += xt_LOG.h header-y += xt_LOG.h
...@@ -68,6 +69,7 @@ header-y += xt_quota.h ...@@ -68,6 +69,7 @@ header-y += xt_quota.h
header-y += xt_rateest.h header-y += xt_rateest.h
header-y += xt_realm.h header-y += xt_realm.h
header-y += xt_recent.h header-y += xt_recent.h
header-y += xt_rpfilter.h
header-y += xt_sctp.h header-y += xt_sctp.h
header-y += xt_set.h header-y += xt_set.h
header-y += xt_socket.h header-y += xt_socket.h
......
...@@ -46,6 +46,7 @@ enum nfqnl_attr_type { ...@@ -46,6 +46,7 @@ enum nfqnl_attr_type {
NFQA_CT_INFO, /* enum ip_conntrack_info */ NFQA_CT_INFO, /* enum ip_conntrack_info */
NFQA_CAP_LEN, /* __u32 length of captured packet */ NFQA_CAP_LEN, /* __u32 length of captured packet */
NFQA_SKB_INFO, /* __u32 skb meta information */ NFQA_SKB_INFO, /* __u32 skb meta information */
NFQA_EXP, /* nf_conntrack_netlink.h */
__NFQA_MAX __NFQA_MAX
}; };
......
...@@ -118,7 +118,7 @@ static int masq_device_event(struct notifier_block *this, ...@@ -118,7 +118,7 @@ static int masq_device_event(struct notifier_block *this,
NF_CT_ASSERT(dev->ifindex != 0); NF_CT_ASSERT(dev->ifindex != 0);
nf_ct_iterate_cleanup(net, device_cmp, nf_ct_iterate_cleanup(net, device_cmp,
(void *)(long)dev->ifindex); (void *)(long)dev->ifindex, 0, 0);
} }
return NOTIFY_DONE; return NOTIFY_DONE;
......
...@@ -76,7 +76,7 @@ static int masq_device_event(struct notifier_block *this, ...@@ -76,7 +76,7 @@ static int masq_device_event(struct notifier_block *this,
if (event == NETDEV_DOWN) if (event == NETDEV_DOWN)
nf_ct_iterate_cleanup(net, device_cmp, nf_ct_iterate_cleanup(net, device_cmp,
(void *)(long)dev->ifindex); (void *)(long)dev->ifindex, 0, 0);
return NOTIFY_DONE; return NOTIFY_DONE;
} }
......
...@@ -410,20 +410,6 @@ config NF_NAT_TFTP ...@@ -410,20 +410,6 @@ config NF_NAT_TFTP
endif # NF_CONNTRACK endif # NF_CONNTRACK
# transparent proxy support
config NETFILTER_TPROXY
tristate "Transparent proxying support"
depends on IP_NF_MANGLE
depends on NETFILTER_ADVANCED
help
This option enables transparent proxying support, that is,
support for handling non-locally bound IPv4 TCP and UDP sockets.
For it to work you will have to configure certain iptables rules
and use policy routing. For more information on how to set it up
see Documentation/networking/tproxy.txt.
To compile it as a module, choose M here. If unsure, say N.
config NETFILTER_XTABLES config NETFILTER_XTABLES
tristate "Netfilter Xtables support (required for ip_tables)" tristate "Netfilter Xtables support (required for ip_tables)"
default m if NETFILTER_ADVANCED=n default m if NETFILTER_ADVANCED=n
...@@ -720,10 +706,10 @@ config NETFILTER_XT_TARGET_TEE ...@@ -720,10 +706,10 @@ config NETFILTER_XT_TARGET_TEE
this clone be rerouted to another nexthop. this clone be rerouted to another nexthop.
config NETFILTER_XT_TARGET_TPROXY config NETFILTER_XT_TARGET_TPROXY
tristate '"TPROXY" target support' tristate '"TPROXY" target transparent proxying support'
depends on NETFILTER_TPROXY
depends on NETFILTER_XTABLES depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED depends on NETFILTER_ADVANCED
depends on IP_NF_MANGLE
select NF_DEFRAG_IPV4 select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
help help
...@@ -731,6 +717,9 @@ config NETFILTER_XT_TARGET_TPROXY ...@@ -731,6 +717,9 @@ config NETFILTER_XT_TARGET_TPROXY
REDIRECT. It can only be used in the mangle table and is useful REDIRECT. It can only be used in the mangle table and is useful
to redirect traffic to a transparent proxy. It does _not_ depend to redirect traffic to a transparent proxy. It does _not_ depend
on Netfilter connection tracking and NAT, unlike REDIRECT. on Netfilter connection tracking and NAT, unlike REDIRECT.
For it to work you will have to configure certain iptables rules
and use policy routing. For more information on how to set it up
see Documentation/networking/tproxy.txt.
To compile it as a module, choose M here. If unsure, say N. To compile it as a module, choose M here. If unsure, say N.
...@@ -1180,7 +1169,6 @@ config NETFILTER_XT_MATCH_SCTP ...@@ -1180,7 +1169,6 @@ config NETFILTER_XT_MATCH_SCTP
config NETFILTER_XT_MATCH_SOCKET config NETFILTER_XT_MATCH_SOCKET
tristate '"socket" match support' tristate '"socket" match support'
depends on NETFILTER_TPROXY
depends on NETFILTER_XTABLES depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED depends on NETFILTER_ADVANCED
depends on !NF_CONNTRACK || NF_CONNTRACK depends on !NF_CONNTRACK || NF_CONNTRACK
......
...@@ -61,9 +61,6 @@ obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o ...@@ -61,9 +61,6 @@ obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o
obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o
obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
# transparent proxy support
obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
# generic X tables # generic X tables
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
......
...@@ -234,12 +234,13 @@ EXPORT_SYMBOL(skb_make_writable); ...@@ -234,12 +234,13 @@ EXPORT_SYMBOL(skb_make_writable);
/* This does not belong here, but locally generated errors need it if connection /* This does not belong here, but locally generated errors need it if connection
tracking in use: without this, connection may not be in hash table, and hence tracking in use: without this, connection may not be in hash table, and hence
manufactured ICMP or RST packets will not be associated with it. */ manufactured ICMP or RST packets will not be associated with it. */
void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly; void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
__rcu __read_mostly;
EXPORT_SYMBOL(ip_ct_attach); EXPORT_SYMBOL(ip_ct_attach);
void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
{ {
void (*attach)(struct sk_buff *, struct sk_buff *); void (*attach)(struct sk_buff *, const struct sk_buff *);
if (skb->nfct) { if (skb->nfct) {
rcu_read_lock(); rcu_read_lock();
......
...@@ -414,7 +414,7 @@ static void ip_vs_lblcr_flush(struct ip_vs_service *svc) ...@@ -414,7 +414,7 @@ static void ip_vs_lblcr_flush(struct ip_vs_service *svc)
spin_lock_bh(&svc->sched_lock); spin_lock_bh(&svc->sched_lock);
tbl->dead = 1; tbl->dead = 1;
for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) {
hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
ip_vs_lblcr_free(en); ip_vs_lblcr_free(en);
} }
...@@ -440,7 +440,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) ...@@ -440,7 +440,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
struct ip_vs_lblcr_entry *en; struct ip_vs_lblcr_entry *en;
struct hlist_node *next; struct hlist_node *next;
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK; j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
spin_lock(&svc->sched_lock); spin_lock(&svc->sched_lock);
...@@ -495,7 +495,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data) ...@@ -495,7 +495,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
if (goal > tbl->max_size/2) if (goal > tbl->max_size/2)
goal = tbl->max_size/2; goal = tbl->max_size/2;
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK; j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
spin_lock(&svc->sched_lock); spin_lock(&svc->sched_lock);
...@@ -536,7 +536,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) ...@@ -536,7 +536,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
/* /*
* Initialize the hash buckets * Initialize the hash buckets
*/ */
for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) {
INIT_HLIST_HEAD(&tbl->bucket[i]); INIT_HLIST_HEAD(&tbl->bucket[i]);
} }
tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16; tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
......
...@@ -269,14 +269,20 @@ ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph) ...@@ -269,14 +269,20 @@ ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
switch (iph->protocol) { switch (iph->protocol) {
case IPPROTO_TCP: case IPPROTO_TCP:
th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
if (unlikely(th == NULL))
return 0;
port = th->source; port = th->source;
break; break;
case IPPROTO_UDP: case IPPROTO_UDP:
uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
if (unlikely(uh == NULL))
return 0;
port = uh->source; port = uh->source;
break; break;
case IPPROTO_SCTP: case IPPROTO_SCTP:
sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
if (unlikely(sh == NULL))
return 0;
port = sh->source; port = sh->source;
break; break;
default: default:
......
...@@ -238,7 +238,7 @@ destroy_conntrack(struct nf_conntrack *nfct) ...@@ -238,7 +238,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
nf_conntrack_free(ct); nf_conntrack_free(ct);
} }
void nf_ct_delete_from_lists(struct nf_conn *ct) static void nf_ct_delete_from_lists(struct nf_conn *ct)
{ {
struct net *net = nf_ct_net(ct); struct net *net = nf_ct_net(ct);
...@@ -253,7 +253,6 @@ void nf_ct_delete_from_lists(struct nf_conn *ct) ...@@ -253,7 +253,6 @@ void nf_ct_delete_from_lists(struct nf_conn *ct)
&net->ct.dying); &net->ct.dying);
spin_unlock_bh(&nf_conntrack_lock); spin_unlock_bh(&nf_conntrack_lock);
} }
EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists);
static void death_by_event(unsigned long ul_conntrack) static void death_by_event(unsigned long ul_conntrack)
{ {
...@@ -275,7 +274,7 @@ static void death_by_event(unsigned long ul_conntrack) ...@@ -275,7 +274,7 @@ static void death_by_event(unsigned long ul_conntrack)
nf_ct_put(ct); nf_ct_put(ct);
} }
void nf_ct_dying_timeout(struct nf_conn *ct) static void nf_ct_dying_timeout(struct nf_conn *ct)
{ {
struct net *net = nf_ct_net(ct); struct net *net = nf_ct_net(ct);
struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
...@@ -288,27 +287,33 @@ void nf_ct_dying_timeout(struct nf_conn *ct) ...@@ -288,27 +287,33 @@ void nf_ct_dying_timeout(struct nf_conn *ct)
(prandom_u32() % net->ct.sysctl_events_retry_timeout); (prandom_u32() % net->ct.sysctl_events_retry_timeout);
add_timer(&ecache->timeout); add_timer(&ecache->timeout);
} }
EXPORT_SYMBOL_GPL(nf_ct_dying_timeout);
static void death_by_timeout(unsigned long ul_conntrack) bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
{ {
struct nf_conn *ct = (void *)ul_conntrack;
struct nf_conn_tstamp *tstamp; struct nf_conn_tstamp *tstamp;
tstamp = nf_conn_tstamp_find(ct); tstamp = nf_conn_tstamp_find(ct);
if (tstamp && tstamp->stop == 0) if (tstamp && tstamp->stop == 0)
tstamp->stop = ktime_to_ns(ktime_get_real()); tstamp->stop = ktime_to_ns(ktime_get_real());
if (!test_bit(IPS_DYING_BIT, &ct->status) && if (!nf_ct_is_dying(ct) &&
unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) { unlikely(nf_conntrack_event_report(IPCT_DESTROY, ct,
portid, report) < 0)) {
/* destroy event was not delivered */ /* destroy event was not delivered */
nf_ct_delete_from_lists(ct); nf_ct_delete_from_lists(ct);
nf_ct_dying_timeout(ct); nf_ct_dying_timeout(ct);
return; return false;
} }
set_bit(IPS_DYING_BIT, &ct->status); set_bit(IPS_DYING_BIT, &ct->status);
nf_ct_delete_from_lists(ct); nf_ct_delete_from_lists(ct);
nf_ct_put(ct); nf_ct_put(ct);
return true;
}
EXPORT_SYMBOL_GPL(nf_ct_delete);
static void death_by_timeout(unsigned long ul_conntrack)
{
nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
} }
/* /*
...@@ -643,10 +648,7 @@ static noinline int early_drop(struct net *net, unsigned int hash) ...@@ -643,10 +648,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
return dropped; return dropped;
if (del_timer(&ct->timeout)) { if (del_timer(&ct->timeout)) {
death_by_timeout((unsigned long)ct); if (nf_ct_delete(ct, 0, 0)) {
/* Check if we indeed killed this entry. Reliable event
delivery may have inserted it into the dying list. */
if (test_bit(IPS_DYING_BIT, &ct->status)) {
dropped = 1; dropped = 1;
NF_CT_STAT_INC_ATOMIC(net, early_drop); NF_CT_STAT_INC_ATOMIC(net, early_drop);
} }
...@@ -1192,7 +1194,7 @@ EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size); ...@@ -1192,7 +1194,7 @@ EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
#endif #endif
/* Used by ipt_REJECT and ip6t_REJECT. */ /* Used by ipt_REJECT and ip6t_REJECT. */
static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
{ {
struct nf_conn *ct; struct nf_conn *ct;
enum ip_conntrack_info ctinfo; enum ip_conntrack_info ctinfo;
...@@ -1244,7 +1246,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), ...@@ -1244,7 +1246,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
void nf_ct_iterate_cleanup(struct net *net, void nf_ct_iterate_cleanup(struct net *net,
int (*iter)(struct nf_conn *i, void *data), int (*iter)(struct nf_conn *i, void *data),
void *data) void *data, u32 portid, int report)
{ {
struct nf_conn *ct; struct nf_conn *ct;
unsigned int bucket = 0; unsigned int bucket = 0;
...@@ -1252,7 +1254,8 @@ void nf_ct_iterate_cleanup(struct net *net, ...@@ -1252,7 +1254,8 @@ void nf_ct_iterate_cleanup(struct net *net,
while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
/* Time to push up daises... */ /* Time to push up daises... */
if (del_timer(&ct->timeout)) if (del_timer(&ct->timeout))
death_by_timeout((unsigned long)ct); nf_ct_delete(ct, portid, report);
/* ... else the timer will get him soon. */ /* ... else the timer will get him soon. */
nf_ct_put(ct); nf_ct_put(ct);
...@@ -1260,30 +1263,6 @@ void nf_ct_iterate_cleanup(struct net *net, ...@@ -1260,30 +1263,6 @@ void nf_ct_iterate_cleanup(struct net *net,
} }
EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
struct __nf_ct_flush_report {
u32 portid;
int report;
};
static int kill_report(struct nf_conn *i, void *data)
{
struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
struct nf_conn_tstamp *tstamp;
tstamp = nf_conn_tstamp_find(i);
if (tstamp && tstamp->stop == 0)
tstamp->stop = ktime_to_ns(ktime_get_real());
/* If we fail to deliver the event, death_by_timeout() will retry */
if (nf_conntrack_event_report(IPCT_DESTROY, i,
fr->portid, fr->report) < 0)
return 1;
/* Avoid the delivery of the destroy event in death_by_timeout(). */
set_bit(IPS_DYING_BIT, &i->status);
return 1;
}
static int kill_all(struct nf_conn *i, void *data) static int kill_all(struct nf_conn *i, void *data)
{ {
return 1; return 1;
...@@ -1301,11 +1280,7 @@ EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); ...@@ -1301,11 +1280,7 @@ EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
void nf_conntrack_flush_report(struct net *net, u32 portid, int report) void nf_conntrack_flush_report(struct net *net, u32 portid, int report)
{ {
struct __nf_ct_flush_report fr = { nf_ct_iterate_cleanup(net, kill_all, NULL, portid, report);
.portid = portid,
.report = report,
};
nf_ct_iterate_cleanup(net, kill_report, &fr);
} }
EXPORT_SYMBOL_GPL(nf_conntrack_flush_report); EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
...@@ -1386,7 +1361,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) ...@@ -1386,7 +1361,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
i_see_dead_people: i_see_dead_people:
busy = 0; busy = 0;
list_for_each_entry(net, net_exit_list, exit_list) { list_for_each_entry(net, net_exit_list, exit_list) {
nf_ct_iterate_cleanup(net, kill_all, NULL); nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0);
nf_ct_release_dying_list(net); nf_ct_release_dying_list(net);
if (atomic_read(&net->ct.count) != 0) if (atomic_read(&net->ct.count) != 0)
busy = 1; busy = 1;
...@@ -1692,7 +1667,7 @@ int nf_conntrack_init_net(struct net *net) ...@@ -1692,7 +1667,7 @@ int nf_conntrack_init_net(struct net *net)
return ret; return ret;
} }
s16 (*nf_ct_nat_offset)(const struct nf_conn *ct, s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
enum ip_conntrack_dir dir, enum ip_conntrack_dir dir,
u32 seq); u32 seq);
EXPORT_SYMBOL_GPL(nf_ct_nat_offset); EXPORT_SYMBOL_GPL(nf_ct_nat_offset);
...@@ -8,12 +8,8 @@ ...@@ -8,12 +8,8 @@
* published by the Free Software Foundation. * published by the Free Software Foundation.
*/ */
#include <linux/ctype.h>
#include <linux/export.h> #include <linux/export.h>
#include <linux/jhash.h>
#include <linux/spinlock.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/slab.h>
#include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_labels.h> #include <net/netfilter/nf_conntrack_labels.h>
......
...@@ -1038,21 +1038,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, ...@@ -1038,21 +1038,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
} }
} }
if (del_timer(&ct->timeout)) { if (del_timer(&ct->timeout))
if (nf_conntrack_event_report(IPCT_DESTROY, ct, nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
NETLINK_CB(skb).portid,
nlmsg_report(nlh)) < 0) {
nf_ct_delete_from_lists(ct);
/* we failed to report the event, try later */
nf_ct_dying_timeout(ct);
nf_ct_put(ct);
return 0;
}
/* death_by_timeout would report the event again */
set_bit(IPS_DYING_BIT, &ct->status);
nf_ct_delete_from_lists(ct);
nf_ct_put(ct);
}
nf_ct_put(ct); nf_ct_put(ct);
return 0; return 0;
...@@ -1999,6 +1987,27 @@ ctnetlink_stat_ct(struct sock *ctnl, struct sk_buff *skb, ...@@ -1999,6 +1987,27 @@ ctnetlink_stat_ct(struct sock *ctnl, struct sk_buff *skb,
return err == -EAGAIN ? -ENOBUFS : err; return err == -EAGAIN ? -ENOBUFS : err;
} }
static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
[CTA_EXPECT_MASTER] = { .type = NLA_NESTED },
[CTA_EXPECT_TUPLE] = { .type = NLA_NESTED },
[CTA_EXPECT_MASK] = { .type = NLA_NESTED },
[CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 },
[CTA_EXPECT_ID] = { .type = NLA_U32 },
[CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING,
.len = NF_CT_HELPER_NAME_LEN - 1 },
[CTA_EXPECT_ZONE] = { .type = NLA_U16 },
[CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
[CTA_EXPECT_CLASS] = { .type = NLA_U32 },
[CTA_EXPECT_NAT] = { .type = NLA_NESTED },
[CTA_EXPECT_FN] = { .type = NLA_NUL_STRING },
};
static struct nf_conntrack_expect *
ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct,
struct nf_conntrack_helper *helper,
struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *mask);
#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT #ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
static size_t static size_t
ctnetlink_nfqueue_build_size(const struct nf_conn *ct) ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
...@@ -2139,10 +2148,69 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct) ...@@ -2139,10 +2148,69 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
return ret; return ret;
} }
static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda,
const struct nf_conn *ct,
struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *mask)
{
int err;
err = ctnetlink_parse_tuple(cda, tuple, CTA_EXPECT_TUPLE,
nf_ct_l3num(ct));
if (err < 0)
return err;
return ctnetlink_parse_tuple(cda, mask, CTA_EXPECT_MASK,
nf_ct_l3num(ct));
}
static int
ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
u32 portid, u32 report)
{
struct nlattr *cda[CTA_EXPECT_MAX+1];
struct nf_conntrack_tuple tuple, mask;
struct nf_conntrack_helper *helper;
struct nf_conntrack_expect *exp;
int err;
err = nla_parse_nested(cda, CTA_EXPECT_MAX, attr, exp_nla_policy);
if (err < 0)
return err;
err = ctnetlink_nfqueue_exp_parse((const struct nlattr * const *)cda,
ct, &tuple, &mask);
if (err < 0)
return err;
if (cda[CTA_EXPECT_HELP_NAME]) {
const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
nf_ct_protonum(ct));
if (helper == NULL)
return -EOPNOTSUPP;
}
exp = ctnetlink_alloc_expect((const struct nlattr * const *)cda, ct,
helper, &tuple, &mask);
if (IS_ERR(exp))
return PTR_ERR(exp);
err = nf_ct_expect_related_report(exp, portid, report);
if (err < 0) {
nf_ct_expect_put(exp);
return err;
}
return 0;
}
static struct nfq_ct_hook ctnetlink_nfqueue_hook = { static struct nfq_ct_hook ctnetlink_nfqueue_hook = {
.build_size = ctnetlink_nfqueue_build_size, .build_size = ctnetlink_nfqueue_build_size,
.build = ctnetlink_nfqueue_build, .build = ctnetlink_nfqueue_build,
.parse = ctnetlink_nfqueue_parse, .parse = ctnetlink_nfqueue_parse,
.attach_expect = ctnetlink_nfqueue_attach_expect,
}; };
#endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */ #endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */
...@@ -2510,21 +2578,6 @@ static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb, ...@@ -2510,21 +2578,6 @@ static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb,
return err; return err;
} }
static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
[CTA_EXPECT_MASTER] = { .type = NLA_NESTED },
[CTA_EXPECT_TUPLE] = { .type = NLA_NESTED },
[CTA_EXPECT_MASK] = { .type = NLA_NESTED },
[CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 },
[CTA_EXPECT_ID] = { .type = NLA_U32 },
[CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING,
.len = NF_CT_HELPER_NAME_LEN - 1 },
[CTA_EXPECT_ZONE] = { .type = NLA_U16 },
[CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
[CTA_EXPECT_CLASS] = { .type = NLA_U32 },
[CTA_EXPECT_NAT] = { .type = NLA_NESTED },
[CTA_EXPECT_FN] = { .type = NLA_NUL_STRING },
};
static int static int
ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh, const struct nlmsghdr *nlh,
...@@ -2747,76 +2800,26 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, ...@@ -2747,76 +2800,26 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr,
#endif #endif
} }
static int static struct nf_conntrack_expect *
ctnetlink_create_expect(struct net *net, u16 zone, ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
const struct nlattr * const cda[], struct nf_conntrack_helper *helper,
u_int8_t u3, struct nf_conntrack_tuple *tuple,
u32 portid, int report) struct nf_conntrack_tuple *mask)
{ {
struct nf_conntrack_tuple tuple, mask, master_tuple; u_int32_t class = 0;
struct nf_conntrack_tuple_hash *h = NULL;
struct nf_conntrack_expect *exp; struct nf_conntrack_expect *exp;
struct nf_conn *ct;
struct nf_conn_help *help; struct nf_conn_help *help;
struct nf_conntrack_helper *helper = NULL; int err;
u_int32_t class = 0;
int err = 0;
/* caller guarantees that those three CTA_EXPECT_* exist */
err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
if (err < 0)
return err;
err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
if (err < 0)
return err;
err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
if (err < 0)
return err;
/* Look for master conntrack of this expectation */
h = nf_conntrack_find_get(net, zone, &master_tuple);
if (!h)
return -ENOENT;
ct = nf_ct_tuplehash_to_ctrack(h);
/* Look for helper of this expectation */
if (cda[CTA_EXPECT_HELP_NAME]) {
const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
nf_ct_protonum(ct));
if (helper == NULL) {
#ifdef CONFIG_MODULES
if (request_module("nfct-helper-%s", helpname) < 0) {
err = -EOPNOTSUPP;
goto out;
}
helper = __nf_conntrack_helper_find(helpname,
nf_ct_l3num(ct),
nf_ct_protonum(ct));
if (helper) {
err = -EAGAIN;
goto out;
}
#endif
err = -EOPNOTSUPP;
goto out;
}
}
if (cda[CTA_EXPECT_CLASS] && helper) { if (cda[CTA_EXPECT_CLASS] && helper) {
class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS])); class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS]));
if (class > helper->expect_class_max) { if (class > helper->expect_class_max)
err = -EINVAL; return ERR_PTR(-EINVAL);
goto out;
}
} }
exp = nf_ct_expect_alloc(ct); exp = nf_ct_expect_alloc(ct);
if (!exp) { if (!exp)
err = -ENOMEM; return ERR_PTR(-ENOMEM);
goto out;
}
help = nfct_help(ct); help = nfct_help(ct);
if (!help) { if (!help) {
if (!cda[CTA_EXPECT_TIMEOUT]) { if (!cda[CTA_EXPECT_TIMEOUT]) {
...@@ -2854,21 +2857,89 @@ ctnetlink_create_expect(struct net *net, u16 zone, ...@@ -2854,21 +2857,89 @@ ctnetlink_create_expect(struct net *net, u16 zone,
exp->class = class; exp->class = class;
exp->master = ct; exp->master = ct;
exp->helper = helper; exp->helper = helper;
memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple)); exp->tuple = *tuple;
memcpy(&exp->mask.src.u3, &mask.src.u3, sizeof(exp->mask.src.u3)); exp->mask.src.u3 = mask->src.u3;
exp->mask.src.u.all = mask.src.u.all; exp->mask.src.u.all = mask->src.u.all;
if (cda[CTA_EXPECT_NAT]) { if (cda[CTA_EXPECT_NAT]) {
err = ctnetlink_parse_expect_nat(cda[CTA_EXPECT_NAT], err = ctnetlink_parse_expect_nat(cda[CTA_EXPECT_NAT],
exp, u3); exp, nf_ct_l3num(ct));
if (err < 0) if (err < 0)
goto err_out; goto err_out;
} }
err = nf_ct_expect_related_report(exp, portid, report); return exp;
err_out: err_out:
nf_ct_expect_put(exp); nf_ct_expect_put(exp);
out: return ERR_PTR(err);
nf_ct_put(nf_ct_tuplehash_to_ctrack(h)); }
static int
ctnetlink_create_expect(struct net *net, u16 zone,
const struct nlattr * const cda[],
u_int8_t u3, u32 portid, int report)
{
struct nf_conntrack_tuple tuple, mask, master_tuple;
struct nf_conntrack_tuple_hash *h = NULL;
struct nf_conntrack_helper *helper = NULL;
struct nf_conntrack_expect *exp;
struct nf_conn *ct;
int err;
/* caller guarantees that those three CTA_EXPECT_* exist */
err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
if (err < 0)
return err;
err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
if (err < 0)
return err;
err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
if (err < 0)
return err;
/* Look for master conntrack of this expectation */
h = nf_conntrack_find_get(net, zone, &master_tuple);
if (!h)
return -ENOENT;
ct = nf_ct_tuplehash_to_ctrack(h);
if (cda[CTA_EXPECT_HELP_NAME]) {
const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
helper = __nf_conntrack_helper_find(helpname, u3,
nf_ct_protonum(ct));
if (helper == NULL) {
#ifdef CONFIG_MODULES
if (request_module("nfct-helper-%s", helpname) < 0) {
err = -EOPNOTSUPP;
goto err_ct;
}
helper = __nf_conntrack_helper_find(helpname, u3,
nf_ct_protonum(ct));
if (helper) {
err = -EAGAIN;
goto err_ct;
}
#endif
err = -EOPNOTSUPP;
goto err_ct;
}
}
exp = ctnetlink_alloc_expect(cda, ct, helper, &tuple, &mask);
if (IS_ERR(exp)) {
err = PTR_ERR(exp);
goto err_ct;
}
err = nf_ct_expect_related_report(exp, portid, report);
if (err < 0)
goto err_exp;
return 0;
err_exp:
nf_ct_expect_put(exp);
err_ct:
nf_ct_put(ct);
return err; return err;
} }
......
...@@ -281,7 +281,7 @@ void nf_ct_l3proto_pernet_unregister(struct net *net, ...@@ -281,7 +281,7 @@ void nf_ct_l3proto_pernet_unregister(struct net *net,
nf_ct_l3proto_unregister_sysctl(net, proto); nf_ct_l3proto_unregister_sysctl(net, proto);
/* Remove all contrack entries for this protocol */ /* Remove all contrack entries for this protocol */
nf_ct_iterate_cleanup(net, kill_l3proto, proto); nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0);
} }
EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister); EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
...@@ -476,7 +476,7 @@ void nf_ct_l4proto_pernet_unregister(struct net *net, ...@@ -476,7 +476,7 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
nf_ct_l4proto_unregister_sysctl(net, pn, l4proto); nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
/* Remove all contrack entries for this protocol */ /* Remove all contrack entries for this protocol */
nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0);
} }
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister); EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
......
...@@ -496,7 +496,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, ...@@ -496,7 +496,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
} }
#ifdef CONFIG_NF_NAT_NEEDED #ifdef CONFIG_NF_NAT_NEEDED
static inline s16 nat_offset(const struct nf_conn *ct, static inline s32 nat_offset(const struct nf_conn *ct,
enum ip_conntrack_dir dir, enum ip_conntrack_dir dir,
u32 seq) u32 seq)
{ {
...@@ -525,7 +525,7 @@ static bool tcp_in_window(const struct nf_conn *ct, ...@@ -525,7 +525,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
struct ip_ct_tcp_state *receiver = &state->seen[!dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir];
const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
__u32 seq, ack, sack, end, win, swin; __u32 seq, ack, sack, end, win, swin;
s16 receiver_offset; s32 receiver_offset;
bool res, in_recv_win; bool res, in_recv_win;
/* /*
......
...@@ -497,7 +497,7 @@ static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) ...@@ -497,7 +497,7 @@ static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
rtnl_lock(); rtnl_lock();
for_each_net(net) for_each_net(net)
nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean); nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
rtnl_unlock(); rtnl_unlock();
} }
...@@ -511,7 +511,7 @@ static void nf_nat_l3proto_clean(u8 l3proto) ...@@ -511,7 +511,7 @@ static void nf_nat_l3proto_clean(u8 l3proto)
rtnl_lock(); rtnl_lock();
for_each_net(net) for_each_net(net)
nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean); nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
rtnl_unlock(); rtnl_unlock();
} }
...@@ -749,7 +749,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) ...@@ -749,7 +749,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
{ {
struct nf_nat_proto_clean clean = {}; struct nf_nat_proto_clean clean = {};
nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean); nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean, 0, 0);
synchronize_rcu(); synchronize_rcu();
nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size); nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
} }
......
...@@ -30,8 +30,6 @@ ...@@ -30,8 +30,6 @@
pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \ pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \
x->offset_before, x->offset_after, x->correction_pos); x->offset_before, x->offset_after, x->correction_pos);
static DEFINE_SPINLOCK(nf_nat_seqofs_lock);
/* Setup TCP sequence correction given this change at this sequence */ /* Setup TCP sequence correction given this change at this sequence */
static inline void static inline void
adjust_tcp_sequence(u32 seq, adjust_tcp_sequence(u32 seq,
...@@ -49,7 +47,7 @@ adjust_tcp_sequence(u32 seq, ...@@ -49,7 +47,7 @@ adjust_tcp_sequence(u32 seq,
pr_debug("adjust_tcp_sequence: Seq_offset before: "); pr_debug("adjust_tcp_sequence: Seq_offset before: ");
DUMP_OFFSET(this_way); DUMP_OFFSET(this_way);
spin_lock_bh(&nf_nat_seqofs_lock); spin_lock_bh(&ct->lock);
/* SYN adjust. If it's uninitialized, or this is after last /* SYN adjust. If it's uninitialized, or this is after last
* correction, record it: we don't handle more than one * correction, record it: we don't handle more than one
...@@ -61,31 +59,26 @@ adjust_tcp_sequence(u32 seq, ...@@ -61,31 +59,26 @@ adjust_tcp_sequence(u32 seq,
this_way->offset_before = this_way->offset_after; this_way->offset_before = this_way->offset_after;
this_way->offset_after += sizediff; this_way->offset_after += sizediff;
} }
spin_unlock_bh(&nf_nat_seqofs_lock); spin_unlock_bh(&ct->lock);
pr_debug("adjust_tcp_sequence: Seq_offset after: "); pr_debug("adjust_tcp_sequence: Seq_offset after: ");
DUMP_OFFSET(this_way); DUMP_OFFSET(this_way);
} }
/* Get the offset value, for conntrack */ /* Get the offset value, for conntrack. Caller must have the conntrack locked */
s16 nf_nat_get_offset(const struct nf_conn *ct, s32 nf_nat_get_offset(const struct nf_conn *ct,
enum ip_conntrack_dir dir, enum ip_conntrack_dir dir,
u32 seq) u32 seq)
{ {
struct nf_conn_nat *nat = nfct_nat(ct); struct nf_conn_nat *nat = nfct_nat(ct);
struct nf_nat_seq *this_way; struct nf_nat_seq *this_way;
s16 offset;
if (!nat) if (!nat)
return 0; return 0;
this_way = &nat->seq[dir]; this_way = &nat->seq[dir];
spin_lock_bh(&nf_nat_seqofs_lock); return after(seq, this_way->correction_pos)
offset = after(seq, this_way->correction_pos)
? this_way->offset_after : this_way->offset_before; ? this_way->offset_after : this_way->offset_before;
spin_unlock_bh(&nf_nat_seqofs_lock);
return offset;
} }
/* Frobs data inside this packet, which is linear. */ /* Frobs data inside this packet, which is linear. */
...@@ -143,7 +136,7 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra) ...@@ -143,7 +136,7 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
} }
void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
__be32 seq, s16 off) __be32 seq, s32 off)
{ {
if (!off) if (!off)
return; return;
...@@ -370,9 +363,10 @@ nf_nat_seq_adjust(struct sk_buff *skb, ...@@ -370,9 +363,10 @@ nf_nat_seq_adjust(struct sk_buff *skb,
struct tcphdr *tcph; struct tcphdr *tcph;
int dir; int dir;
__be32 newseq, newack; __be32 newseq, newack;
s16 seqoff, ackoff; s32 seqoff, ackoff;
struct nf_conn_nat *nat = nfct_nat(ct); struct nf_conn_nat *nat = nfct_nat(ct);
struct nf_nat_seq *this_way, *other_way; struct nf_nat_seq *this_way, *other_way;
int res;
dir = CTINFO2DIR(ctinfo); dir = CTINFO2DIR(ctinfo);
...@@ -383,6 +377,7 @@ nf_nat_seq_adjust(struct sk_buff *skb, ...@@ -383,6 +377,7 @@ nf_nat_seq_adjust(struct sk_buff *skb,
return 0; return 0;
tcph = (void *)skb->data + protoff; tcph = (void *)skb->data + protoff;
spin_lock_bh(&ct->lock);
if (after(ntohl(tcph->seq), this_way->correction_pos)) if (after(ntohl(tcph->seq), this_way->correction_pos))
seqoff = this_way->offset_after; seqoff = this_way->offset_after;
else else
...@@ -407,7 +402,10 @@ nf_nat_seq_adjust(struct sk_buff *skb, ...@@ -407,7 +402,10 @@ nf_nat_seq_adjust(struct sk_buff *skb,
tcph->seq = newseq; tcph->seq = newseq;
tcph->ack_seq = newack; tcph->ack_seq = newack;
return nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo); res = nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo);
spin_unlock_bh(&ct->lock);
return res;
} }
/* Setup NAT on this expected conntrack so it follows master. */ /* Setup NAT on this expected conntrack so it follows master. */
......
/*
* Transparent proxy support for Linux/iptables
*
* Copyright (c) 2006-2007 BalaBit IT Ltd.
* Author: Balazs Scheidler, Krisztian Kovacs
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
*/
#include <linux/module.h>
#include <linux/net.h>
#include <linux/if.h>
#include <linux/netdevice.h>
#include <net/udp.h>
#include <net/netfilter/nf_tproxy_core.h>
static void
nf_tproxy_destructor(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
skb->sk = NULL;
skb->destructor = NULL;
if (sk)
sock_put(sk);
}
/* consumes sk */
void
nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
{
/* assigning tw sockets complicates things; most
* skb->sk->X checks would have to test sk->sk_state first */
if (sk->sk_state == TCP_TIME_WAIT) {
inet_twsk_put(inet_twsk(sk));
return;
}
skb_orphan(skb);
skb->sk = sk;
skb->destructor = nf_tproxy_destructor;
}
EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock);
static int __init nf_tproxy_init(void)
{
pr_info("NF_TPROXY: Transparent proxy support initialized, version 4.1.0\n");
pr_info("NF_TPROXY: Copyright (c) 2006-2007 BalaBit IT Ltd.\n");
return 0;
}
module_init(nf_tproxy_init);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Krisztian Kovacs");
MODULE_DESCRIPTION("Transparent proxy support core routines");
...@@ -862,6 +862,7 @@ static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = { ...@@ -862,6 +862,7 @@ static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = {
[NFQA_MARK] = { .type = NLA_U32 }, [NFQA_MARK] = { .type = NLA_U32 },
[NFQA_PAYLOAD] = { .type = NLA_UNSPEC }, [NFQA_PAYLOAD] = { .type = NLA_UNSPEC },
[NFQA_CT] = { .type = NLA_UNSPEC }, [NFQA_CT] = { .type = NLA_UNSPEC },
[NFQA_EXP] = { .type = NLA_UNSPEC },
}; };
static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
...@@ -990,9 +991,14 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, ...@@ -990,9 +991,14 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
if (entry == NULL) if (entry == NULL)
return -ENOENT; return -ENOENT;
rcu_read_lock(); if (nfqa[NFQA_CT]) {
if (nfqa[NFQA_CT] && (queue->flags & NFQA_CFG_F_CONNTRACK))
ct = nfqnl_ct_parse(entry->skb, nfqa[NFQA_CT], &ctinfo); ct = nfqnl_ct_parse(entry->skb, nfqa[NFQA_CT], &ctinfo);
if (ct && nfqa[NFQA_EXP]) {
nfqnl_attach_expect(ct, nfqa[NFQA_EXP],
NETLINK_CB(skb).portid,
nlmsg_report(nlh));
}
}
if (nfqa[NFQA_PAYLOAD]) { if (nfqa[NFQA_PAYLOAD]) {
u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]); u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]);
...@@ -1005,7 +1011,6 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, ...@@ -1005,7 +1011,6 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
if (ct) if (ct)
nfqnl_ct_seq_adjust(skb, ct, ctinfo, diff); nfqnl_ct_seq_adjust(skb, ct, ctinfo, diff);
} }
rcu_read_unlock();
if (nfqa[NFQA_MARK]) if (nfqa[NFQA_MARK])
entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
......
...@@ -96,3 +96,18 @@ void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, ...@@ -96,3 +96,18 @@ void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
if ((ct->status & IPS_NAT_MASK) && diff) if ((ct->status & IPS_NAT_MASK) && diff)
nfq_nat_ct->seq_adjust(skb, ct, ctinfo, diff); nfq_nat_ct->seq_adjust(skb, ct, ctinfo, diff);
} }
int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,
u32 portid, u32 report)
{
struct nfq_ct_hook *nfq_ct;
if (nf_ct_is_untracked(ct))
return 0;
nfq_ct = rcu_dereference(nfq_ct_hook);
if (nfq_ct == NULL)
return -EOPNOTSUPP;
return nfq_ct->attach_expect(attr, ct, portid, report);
}
...@@ -15,7 +15,9 @@ ...@@ -15,7 +15,9 @@
#include <linux/ip.h> #include <linux/ip.h>
#include <net/checksum.h> #include <net/checksum.h>
#include <net/udp.h> #include <net/udp.h>
#include <net/tcp.h>
#include <net/inet_sock.h> #include <net/inet_sock.h>
#include <net/inet_hashtables.h>
#include <linux/inetdevice.h> #include <linux/inetdevice.h>
#include <linux/netfilter/x_tables.h> #include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ip_tables.h>
...@@ -26,13 +28,18 @@ ...@@ -26,13 +28,18 @@
#define XT_TPROXY_HAVE_IPV6 1 #define XT_TPROXY_HAVE_IPV6 1
#include <net/if_inet6.h> #include <net/if_inet6.h>
#include <net/addrconf.h> #include <net/addrconf.h>
#include <net/inet6_hashtables.h>
#include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#endif #endif
#include <net/netfilter/nf_tproxy_core.h>
#include <linux/netfilter/xt_TPROXY.h> #include <linux/netfilter/xt_TPROXY.h>
enum nf_tproxy_lookup_t {
NFT_LOOKUP_LISTENER,
NFT_LOOKUP_ESTABLISHED,
};
static bool tproxy_sk_is_transparent(struct sock *sk) static bool tproxy_sk_is_transparent(struct sock *sk)
{ {
if (sk->sk_state != TCP_TIME_WAIT) { if (sk->sk_state != TCP_TIME_WAIT) {
...@@ -68,6 +75,157 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr) ...@@ -68,6 +75,157 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
return laddr ? laddr : daddr; return laddr ? laddr : daddr;
} }
/*
* This is used when the user wants to intercept a connection matching
* an explicit iptables rule. In this case the sockets are assumed
* matching in preference order:
*
* - match: if there's a fully established connection matching the
* _packet_ tuple, it is returned, assuming the redirection
* already took place and we process a packet belonging to an
* established connection
*
* - match: if there's a listening socket matching the redirection
* (e.g. on-port & on-ip of the connection), it is returned,
* regardless if it was bound to 0.0.0.0 or an explicit
* address. The reasoning is that if there's an explicit rule, it
* does not really matter if the listener is bound to an interface
* or to 0. The user already stated that he wants redirection
* (since he added the rule).
*
* Please note that there's an overlap between what a TPROXY target
* and a socket match will match. Normally if you have both rules the
* "socket" match will be the first one, effectively all packets
* belonging to established connections going through that one.
*/
static inline struct sock *
nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_LISTENER:
sk = inet_lookup_listener(net, &tcp_hashinfo,
saddr, sport,
daddr, dport,
in->ifindex);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
break;
case NFT_LOOKUP_ESTABLISHED:
sk = inet_lookup_established(net, &tcp_hashinfo,
saddr, sport, daddr, dport,
in->ifindex);
break;
default:
BUG();
}
break;
case IPPROTO_UDP:
sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
if (sk) {
int connected = (sk->sk_state == TCP_ESTABLISHED);
int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
(lookup_type == NFT_LOOKUP_LISTENER && connected)) {
sock_put(sk);
sk = NULL;
}
}
break;
default:
WARN_ON(1);
sk = NULL;
}
pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
return sk;
}
#ifdef XT_TPROXY_HAVE_IPV6
static inline struct sock *
nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_LISTENER:
sk = inet6_lookup_listener(net, &tcp_hashinfo,
saddr, sport,
daddr, ntohs(dport),
in->ifindex);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
break;
case NFT_LOOKUP_ESTABLISHED:
sk = __inet6_lookup_established(net, &tcp_hashinfo,
saddr, sport, daddr, ntohs(dport),
in->ifindex);
break;
default:
BUG();
}
break;
case IPPROTO_UDP:
sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
if (sk) {
int connected = (sk->sk_state == TCP_ESTABLISHED);
int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
(lookup_type == NFT_LOOKUP_LISTENER && connected)) {
sock_put(sk);
sk = NULL;
}
}
break;
default:
WARN_ON(1);
sk = NULL;
}
pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n",
protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk);
return sk;
}
#endif
/** /**
* tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections * tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections
* @skb: The skb being processed. * @skb: The skb being processed.
...@@ -117,6 +275,15 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport, ...@@ -117,6 +275,15 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
return sk; return sk;
} }
/* assign a socket to the skb -- consumes sk */
static void
nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
{
skb_orphan(skb);
skb->sk = sk;
skb->destructor = sock_edemux;
}
static unsigned int static unsigned int
tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport, tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
u_int32_t mark_mask, u_int32_t mark_value) u_int32_t mark_mask, u_int32_t mark_value)
......
...@@ -202,7 +202,7 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) ...@@ -202,7 +202,7 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
return -EINVAL; return -EINVAL;
} }
if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) { if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) {
pr_err("ipv6 PROHIBT (THROW, NAT ..) matching not supported\n"); pr_err("ipv6 PROHIBIT (THROW, NAT ..) matching not supported\n");
return -EINVAL; return -EINVAL;
} }
if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) { if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) {
......
...@@ -19,12 +19,12 @@ ...@@ -19,12 +19,12 @@
#include <net/icmp.h> #include <net/icmp.h>
#include <net/sock.h> #include <net/sock.h>
#include <net/inet_sock.h> #include <net/inet_sock.h>
#include <net/netfilter/nf_tproxy_core.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h> #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
#define XT_SOCKET_HAVE_IPV6 1 #define XT_SOCKET_HAVE_IPV6 1
#include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/inet6_hashtables.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#endif #endif
...@@ -101,6 +101,43 @@ extract_icmp4_fields(const struct sk_buff *skb, ...@@ -101,6 +101,43 @@ extract_icmp4_fields(const struct sk_buff *skb,
return 0; return 0;
} }
/* "socket" match based redirection (no specific rule)
* ===================================================
*
* There are connections with dynamic endpoints (e.g. FTP data
* connection) that the user is unable to add explicit rules
* for. These are taken care of by a generic "socket" rule. It is
* assumed that the proxy application is trusted to open such
* connections without explicit iptables rule (except of course the
* generic 'socket' rule). In this case the following sockets are
* matched in preference order:
*
* - match: if there's a fully established connection matching the
* _packet_ tuple
*
* - match: if there's a non-zero bound listener (possibly with a
* non-local address) We don't accept zero-bound listeners, since
* then local services could intercept traffic going through the
* box.
*/
static struct sock *
xt_socket_get_sock_v4(struct net *net, const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in)
{
switch (protocol) {
case IPPROTO_TCP:
return __inet_lookup(net, &tcp_hashinfo,
saddr, sport, daddr, dport,
in->ifindex);
case IPPROTO_UDP:
return udp4_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
}
return NULL;
}
static bool static bool
socket_match(const struct sk_buff *skb, struct xt_action_param *par, socket_match(const struct sk_buff *skb, struct xt_action_param *par,
const struct xt_socket_mtinfo1 *info) const struct xt_socket_mtinfo1 *info)
...@@ -156,9 +193,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, ...@@ -156,9 +193,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
#endif #endif
if (!sk) if (!sk)
sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol,
saddr, daddr, sport, dport, saddr, daddr, sport, dport,
par->in, NFT_LOOKUP_ANY); par->in);
if (sk) { if (sk) {
bool wildcard; bool wildcard;
bool transparent = true; bool transparent = true;
...@@ -265,6 +302,25 @@ extract_icmp6_fields(const struct sk_buff *skb, ...@@ -265,6 +302,25 @@ extract_icmp6_fields(const struct sk_buff *skb,
return 0; return 0;
} }
static struct sock *
xt_socket_get_sock_v6(struct net *net, const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in)
{
switch (protocol) {
case IPPROTO_TCP:
return inet6_lookup(net, &tcp_hashinfo,
saddr, sport, daddr, dport,
in->ifindex);
case IPPROTO_UDP:
return udp6_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
}
return NULL;
}
static bool static bool
socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
{ {
...@@ -302,9 +358,9 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) ...@@ -302,9 +358,9 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
} }
if (!sk) if (!sk)
sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, sk = xt_socket_get_sock_v6(dev_net(skb->dev), tproto,
saddr, daddr, sport, dport, saddr, daddr, sport, dport,
par->in, NFT_LOOKUP_ANY); par->in);
if (sk) { if (sk) {
bool wildcard; bool wildcard;
bool transparent = true; bool transparent = true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment