Commit 4cb160d0 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for your net-next tree:

1) Get rid of nf_sk_is_transparent(), use inet_sk_transparent() instead.
   From Máté Eckl.

2) Move shared tproxy infrastructure to nf_tproxy_ipv4 and nf_tproxy_ipv6.
   Also from Máté.

3) Add hashtable to speed up chain lookups by name, from Florian Westphal.

4) Patch series to add connlimit support reusing part of the
   nf_conncount infrastructure. This includes preparation changes such
   passing context to the object and expression destroy interface;
   garbage collection for expressions embedded into set elements, and
   the introduction of the clone_destroy interface for expressions.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 1ffdd8e1 1b2470e5
......@@ -13,4 +13,15 @@ unsigned int nf_conncount_count(struct net *net,
const u32 *key,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone);
unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone,
bool *addit);
bool nf_conncount_add(struct hlist_head *head,
const struct nf_conntrack_tuple *tuple);
void nf_conncount_cache_free(struct hlist_head *hhead);
#endif
......@@ -3,19 +3,6 @@
#define _NF_SOCK_H_
#include <net/sock.h>
#include <net/inet_timewait_sock.h>
static inline bool nf_sk_is_transparent(struct sock *sk)
{
switch (sk->sk_state) {
case TCP_TIME_WAIT:
return inet_twsk(sk)->tw_transparent;
case TCP_NEW_SYN_RECV:
return inet_rsk(inet_reqsk(sk))->no_srccheck;
default:
return inet_sk(sk)->transparent;
}
}
struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb,
const struct net_device *indev);
......
......@@ -9,6 +9,7 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/nf_tables.h>
#include <linux/u64_stats_sync.h>
#include <linux/rhashtable.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netlink.h>
......@@ -342,6 +343,7 @@ struct nft_set_ops {
const struct nft_set_desc *desc,
const struct nlattr * const nla[]);
void (*destroy)(const struct nft_set *set);
void (*gc_init)(const struct nft_set *set);
unsigned int elemsize;
};
......@@ -370,6 +372,8 @@ void nft_unregister_set(struct nft_set_type *type);
*
* @list: table set list node
* @bindings: list of set bindings
* @table: table this set belongs to
* @net: netnamespace this set belongs to
* @name: name of the set
* @handle: unique handle of the set
* @ktype: key type (numeric type defined by userspace, not used in the kernel)
......@@ -393,6 +397,8 @@ void nft_unregister_set(struct nft_set_type *type);
struct nft_set {
struct list_head list;
struct list_head bindings;
struct nft_table *table;
possible_net_t net;
char *name;
u64 handle;
u32 ktype;
......@@ -708,6 +714,7 @@ struct nft_expr_type {
};
#define NFT_EXPR_STATEFUL 0x1
#define NFT_EXPR_GC 0x2
/**
* struct nft_expr_ops - nf_tables expression operations
......@@ -739,11 +746,15 @@ struct nft_expr_ops {
const struct nft_expr *expr);
void (*destroy)(const struct nft_ctx *ctx,
const struct nft_expr *expr);
void (*destroy_clone)(const struct nft_ctx *ctx,
const struct nft_expr *expr);
int (*dump)(struct sk_buff *skb,
const struct nft_expr *expr);
int (*validate)(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data);
bool (*gc)(struct net *net,
const struct nft_expr *expr);
const struct nft_expr_type *type;
void *data;
};
......@@ -850,6 +861,7 @@ enum nft_chain_flags {
*
* @rules: list of rules in the chain
* @list: used internally
* @rhlhead: used internally
* @table: table that this chain belongs to
* @handle: chain handle
* @use: number of jump references to this chain
......@@ -862,6 +874,7 @@ struct nft_chain {
struct nft_rule *__rcu *rules_gen_1;
struct list_head rules;
struct list_head list;
struct rhlist_head rhlhead;
struct nft_table *table;
u64 handle;
u32 use;
......@@ -955,7 +968,8 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
* struct nft_table - nf_tables table
*
* @list: used internally
* @chains: chains in the table
* @chains_ht: chains in the table
* @chains: same, for stable walks
* @sets: sets in the table
* @objects: stateful objects in the table
* @flowtables: flow tables in the table
......@@ -969,6 +983,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
*/
struct nft_table {
struct list_head list;
struct rhltable chains_ht;
struct list_head chains;
struct list_head sets;
struct list_head objects;
......@@ -1070,7 +1085,8 @@ struct nft_object_ops {
int (*init)(const struct nft_ctx *ctx,
const struct nlattr *const tb[],
struct nft_object *obj);
void (*destroy)(struct nft_object *obj);
void (*destroy)(const struct nft_ctx *ctx,
struct nft_object *obj);
int (*dump)(struct sk_buff *skb,
struct nft_object *obj,
bool reset);
......
#ifndef _NF_TPROXY_H_
#define _NF_TPROXY_H_
#include <net/tcp.h>
enum nf_tproxy_lookup_t {
NF_TPROXY_LOOKUP_LISTENER,
NF_TPROXY_LOOKUP_ESTABLISHED,
};
static inline bool nf_tproxy_sk_is_transparent(struct sock *sk)
{
if (inet_sk_transparent(sk))
return true;
sock_gen_put(sk);
return false;
}
__be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr);
/**
* nf_tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections
* @skb: The skb being processed.
* @laddr: IPv4 address to redirect to or zero.
* @lport: TCP port to redirect to or zero.
* @sk: The TIME_WAIT TCP socket found by the lookup.
*
* We have to handle SYN packets arriving to TIME_WAIT sockets
* differently: instead of reopening the connection we should rather
* redirect the new connection to the proxy if there's a listener
* socket present.
*
* nf_tproxy_handle_time_wait4() consumes the socket reference passed in.
*
* Returns the listener socket if there's one, the TIME_WAIT socket if
* no such listener is found, or NULL if the TCP header is incomplete.
*/
struct sock *
nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
__be32 laddr, __be16 lport, struct sock *sk);
/*
* This is used when the user wants to intercept a connection matching
* an explicit iptables rule. In this case the sockets are assumed
* matching in preference order:
*
* - match: if there's a fully established connection matching the
* _packet_ tuple, it is returned, assuming the redirection
* already took place and we process a packet belonging to an
* established connection
*
* - match: if there's a listening socket matching the redirection
* (e.g. on-port & on-ip of the connection), it is returned,
* regardless if it was bound to 0.0.0.0 or an explicit
* address. The reasoning is that if there's an explicit rule, it
* does not really matter if the listener is bound to an interface
* or to 0. The user already stated that he wants redirection
* (since he added the rule).
*
* Please note that there's an overlap between what a TPROXY target
* and a socket match will match. Normally if you have both rules the
* "socket" match will be the first one, effectively all packets
* belonging to established connections going through that one.
*/
struct sock *
nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type);
const struct in6_addr *
nf_tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
const struct in6_addr *daddr);
/**
* nf_tproxy_handle_time_wait6 - handle IPv6 TCP TIME_WAIT reopen redirections
* @skb: The skb being processed.
* @tproto: Transport protocol.
* @thoff: Transport protocol header offset.
* @net: Network namespace.
* @laddr: IPv6 address to redirect to.
* @lport: TCP port to redirect to or zero.
* @sk: The TIME_WAIT TCP socket found by the lookup.
*
* We have to handle SYN packets arriving to TIME_WAIT sockets
* differently: instead of reopening the connection we should rather
* redirect the new connection to the proxy if there's a listener
* socket present.
*
* nf_tproxy_handle_time_wait6() consumes the socket reference passed in.
*
* Returns the listener socket if there's one, the TIME_WAIT socket if
* no such listener is found, or NULL if the TCP header is incomplete.
*/
struct sock *
nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
struct net *net,
const struct in6_addr *laddr,
const __be16 lport,
struct sock *sk);
struct sock *
nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type);
#endif /* _NF_TPROXY_H_ */
......@@ -1043,6 +1043,24 @@ enum nft_limit_attributes {
};
#define NFTA_LIMIT_MAX (__NFTA_LIMIT_MAX - 1)
enum nft_connlimit_flags {
NFT_CONNLIMIT_F_INV = (1 << 0),
};
/**
* enum nft_connlimit_attributes - nf_tables connlimit expression netlink attributes
*
* @NFTA_CONNLIMIT_COUNT: number of connections (NLA_U32)
* @NFTA_CONNLIMIT_FLAGS: flags (NLA_U32: enum nft_connlimit_flags)
*/
enum nft_connlimit_attributes {
NFTA_CONNLIMIT_UNSPEC,
NFTA_CONNLIMIT_COUNT,
NFTA_CONNLIMIT_FLAGS,
__NFTA_CONNLIMIT_MAX
};
#define NFTA_CONNLIMIT_MAX (__NFTA_CONNLIMIT_MAX - 1)
/**
* enum nft_counter_attributes - nf_tables counter expression netlink attributes
*
......@@ -1357,7 +1375,8 @@ enum nft_ct_helper_attributes {
#define NFT_OBJECT_QUOTA 2
#define NFT_OBJECT_CT_HELPER 3
#define NFT_OBJECT_LIMIT 4
#define __NFT_OBJECT_MAX 5
#define NFT_OBJECT_CONNLIMIT 5
#define __NFT_OBJECT_MAX 6
#define NFT_OBJECT_MAX (__NFT_OBJECT_MAX - 1)
/**
......
......@@ -29,7 +29,10 @@ config NF_SOCKET_IPV4
tristate "IPv4 socket lookup support"
help
This option enables the IPv4 socket lookup infrastructure. This is
is required by the iptables socket match.
is required by the {ip,nf}tables socket match.
config NF_TPROXY_IPV4
tristate "IPv4 tproxy support"
if NF_TABLES
......
......@@ -17,6 +17,7 @@ obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
obj-$(CONFIG_NF_SOCKET_IPV4) += nf_socket_ipv4.o
obj-$(CONFIG_NF_TPROXY_IPV4) += nf_tproxy_ipv4.o
# logging
obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o
......
/*
* Copyright (C) 2007-2008 BalaBit IT Ltd.
* Author: Krisztian Kovacs
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
*/
#include <net/netfilter/nf_tproxy.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/inet_sock.h>
#include <linux/ip.h>
#include <net/checksum.h>
#include <net/udp.h>
#include <net/tcp.h>
#include <linux/inetdevice.h>
struct sock *
nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
__be32 laddr, __be16 lport, struct sock *sk)
{
const struct iphdr *iph = ip_hdr(skb);
struct tcphdr _hdr, *hp;
hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
if (hp == NULL) {
inet_twsk_put(inet_twsk(sk));
return NULL;
}
if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
/* SYN to a TIME_WAIT socket, we'd rather redirect it
* to a listener socket if there's one */
struct sock *sk2;
sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, laddr ? laddr : iph->daddr,
hp->source, lport ? lport : hp->dest,
skb->dev, NF_TPROXY_LOOKUP_LISTENER);
if (sk2) {
inet_twsk_deschedule_put(inet_twsk(sk));
sk = sk2;
}
}
return sk;
}
EXPORT_SYMBOL_GPL(nf_tproxy_handle_time_wait4);
__be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
{
struct in_device *indev;
__be32 laddr;
if (user_laddr)
return user_laddr;
laddr = 0;
indev = __in_dev_get_rcu(skb->dev);
for_primary_ifa(indev) {
laddr = ifa->ifa_local;
break;
} endfor_ifa(indev);
return laddr ? laddr : daddr;
}
EXPORT_SYMBOL_GPL(nf_tproxy_laddr4);
struct sock *
nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
struct tcphdr *tcph;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NF_TPROXY_LOOKUP_LISTENER:
tcph = hp;
sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
ip_hdrlen(skb) +
__tcp_hdrlen(tcph),
saddr, sport,
daddr, dport,
in->ifindex, 0);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
break;
case NF_TPROXY_LOOKUP_ESTABLISHED:
sk = inet_lookup_established(net, &tcp_hashinfo,
saddr, sport, daddr, dport,
in->ifindex);
break;
default:
BUG();
}
break;
case IPPROTO_UDP:
sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
if (sk) {
int connected = (sk->sk_state == TCP_ESTABLISHED);
int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
if ((lookup_type == NF_TPROXY_LOOKUP_ESTABLISHED &&
(!connected || wildcard)) ||
(lookup_type == NF_TPROXY_LOOKUP_LISTENER && connected)) {
sock_put(sk);
sk = NULL;
}
}
break;
default:
WARN_ON(1);
sk = NULL;
}
pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
return sk;
}
EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs");
MODULE_DESCRIPTION("Netfilter IPv4 transparent proxy support");
......@@ -29,7 +29,10 @@ config NF_SOCKET_IPV6
tristate "IPv6 socket lookup support"
help
This option enables the IPv6 socket lookup infrastructure. This
is used by the ip6tables socket match.
is used by the {ip6,nf}tables socket match.
config NF_TPROXY_IPV6
tristate "IPv6 tproxy support"
if NF_TABLES
......
......@@ -26,6 +26,7 @@ nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
obj-$(CONFIG_NF_SOCKET_IPV6) += nf_socket_ipv6.o
obj-$(CONFIG_NF_TPROXY_IPV6) += nf_tproxy_ipv6.o
# logging
obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o
......
#include <net/netfilter/nf_tproxy.h>
#include <linux/module.h>
#include <net/inet6_hashtables.h>
#include <net/addrconf.h>
#include <net/udp.h>
#include <net/tcp.h>
const struct in6_addr *
nf_tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
const struct in6_addr *daddr)
{
struct inet6_dev *indev;
struct inet6_ifaddr *ifa;
struct in6_addr *laddr;
if (!ipv6_addr_any(user_laddr))
return user_laddr;
laddr = NULL;
indev = __in6_dev_get(skb->dev);
if (indev) {
read_lock_bh(&indev->lock);
list_for_each_entry(ifa, &indev->addr_list, if_list) {
if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED))
continue;
laddr = &ifa->addr;
break;
}
read_unlock_bh(&indev->lock);
}
return laddr ? laddr : daddr;
}
EXPORT_SYMBOL_GPL(nf_tproxy_laddr6);
struct sock *
nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
struct net *net,
const struct in6_addr *laddr,
const __be16 lport,
struct sock *sk)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct tcphdr _hdr, *hp;
hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
if (hp == NULL) {
inet_twsk_put(inet_twsk(sk));
return NULL;
}
if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
/* SYN to a TIME_WAIT socket, we'd rather redirect it
* to a listener socket if there's one */
struct sock *sk2;
sk2 = nf_tproxy_get_sock_v6(net, skb, thoff, hp, tproto,
&iph->saddr,
nf_tproxy_laddr6(skb, laddr, &iph->daddr),
hp->source,
lport ? lport : hp->dest,
skb->dev, NF_TPROXY_LOOKUP_LISTENER);
if (sk2) {
inet_twsk_deschedule_put(inet_twsk(sk));
sk = sk2;
}
}
return sk;
}
EXPORT_SYMBOL_GPL(nf_tproxy_handle_time_wait6);
struct sock *
nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
struct tcphdr *tcph;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NF_TPROXY_LOOKUP_LISTENER:
tcph = hp;
sk = inet6_lookup_listener(net, &tcp_hashinfo, skb,
thoff + __tcp_hdrlen(tcph),
saddr, sport,
daddr, ntohs(dport),
in->ifindex, 0);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
break;
case NF_TPROXY_LOOKUP_ESTABLISHED:
sk = __inet6_lookup_established(net, &tcp_hashinfo,
saddr, sport, daddr, ntohs(dport),
in->ifindex, 0);
break;
default:
BUG();
}
break;
case IPPROTO_UDP:
sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
if (sk) {
int connected = (sk->sk_state == TCP_ESTABLISHED);
int wildcard = ipv6_addr_any(&sk->sk_v6_rcv_saddr);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
if ((lookup_type == NF_TPROXY_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
(lookup_type == NF_TPROXY_LOOKUP_LISTENER && connected)) {
sock_put(sk);
sk = NULL;
}
}
break;
default:
WARN_ON(1);
sk = NULL;
}
pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n",
protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk);
return sk;
}
EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v6);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs");
MODULE_DESCRIPTION("Netfilter IPv4 transparent proxy support");
......@@ -517,6 +517,15 @@ config NFT_COUNTER
This option adds the "counter" expression that you can use to
include packet and byte counters in a rule.
config NFT_CONNLIMIT
tristate "Netfilter nf_tables connlimit module"
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NETFILTER_CONNCOUNT
help
This option adds the "connlimit" expression that you can use to
ratelimit rule matchings per connections.
config NFT_LOG
tristate "Netfilter nf_tables log module"
help
......@@ -989,6 +998,8 @@ config NETFILTER_XT_TARGET_TPROXY
depends on IP_NF_MANGLE
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
select NF_TPROXY_IPV4
select NF_TPROXY_IPV6 if IP6_NF_IPTABLES
help
This option adds a `TPROXY' target, which is somewhat similar to
REDIRECT. It can only be used in the mangle table and is useful
......
......@@ -80,6 +80,7 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
obj-$(CONFIG_NF_TABLES) += nf_tables.o
obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
obj-$(CONFIG_NFT_CONNLIMIT) += nft_connlimit.o
obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
obj-$(CONFIG_NFT_CT) += nft_ct.o
obj-$(CONFIG_NFT_FLOW_OFFLOAD) += nft_flow_offload.o
......
......@@ -79,7 +79,7 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
return memcmp(a, b, klen * sizeof(u32));
}
static bool add_hlist(struct hlist_head *head,
bool nf_conncount_add(struct hlist_head *head,
const struct nf_conntrack_tuple *tuple)
{
struct nf_conncount_tuple *conn;
......@@ -91,12 +91,12 @@ static bool add_hlist(struct hlist_head *head,
hlist_add_head(&conn->node, head);
return true;
}
EXPORT_SYMBOL_GPL(nf_conncount_add);
static unsigned int check_hlist(struct net *net,
struct hlist_head *head,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone,
bool *addit)
unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone,
bool *addit)
{
const struct nf_conntrack_tuple_hash *found;
struct nf_conncount_tuple *conn;
......@@ -141,6 +141,7 @@ static unsigned int check_hlist(struct net *net,
return length;
}
EXPORT_SYMBOL_GPL(nf_conncount_lookup);
static void tree_nodes_free(struct rb_root *root,
struct nf_conncount_rb *gc_nodes[],
......@@ -187,13 +188,15 @@ count_tree(struct net *net, struct rb_root *root,
} else {
/* same source network -> be counted! */
unsigned int count;
count = check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
count = nf_conncount_lookup(net, &rbconn->hhead, tuple,
zone, &addit);
tree_nodes_free(root, gc_nodes, gc_count);
if (!addit)
return count;
if (!add_hlist(&rbconn->hhead, tuple))
if (!nf_conncount_add(&rbconn->hhead, tuple))
return 0; /* hotdrop */
return count + 1;
......@@ -203,7 +206,7 @@ count_tree(struct net *net, struct rb_root *root,
continue;
/* only used for GC on hhead, retval and 'addit' ignored */
check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
nf_conncount_lookup(net, &rbconn->hhead, tuple, zone, &addit);
if (hlist_empty(&rbconn->hhead))
gc_nodes[gc_count++] = rbconn;
}
......@@ -303,11 +306,19 @@ struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family
}
EXPORT_SYMBOL_GPL(nf_conncount_init);
static void destroy_tree(struct rb_root *r)
void nf_conncount_cache_free(struct hlist_head *hhead)
{
struct nf_conncount_tuple *conn;
struct nf_conncount_rb *rbconn;
struct hlist_node *n;
hlist_for_each_entry_safe(conn, n, hhead, node)
kmem_cache_free(conncount_conn_cachep, conn);
}
EXPORT_SYMBOL_GPL(nf_conncount_cache_free);
static void destroy_tree(struct rb_root *r)
{
struct nf_conncount_rb *rbconn;
struct rb_node *node;
while ((node = rb_first(r)) != NULL) {
......@@ -315,8 +326,7 @@ static void destroy_tree(struct rb_root *r)
rb_erase(node, r);
hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node)
kmem_cache_free(conncount_conn_cachep, conn);
nf_conncount_cache_free(&rbconn->hhead);
kmem_cache_free(conncount_rb_cachep, rbconn);
}
......
......@@ -34,6 +34,20 @@ enum {
NFT_VALIDATE_DO,
};
static u32 nft_chain_hash(const void *data, u32 len, u32 seed);
static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed);
static int nft_chain_hash_cmp(struct rhashtable_compare_arg *, const void *);
static const struct rhashtable_params nft_chain_ht_params = {
.head_offset = offsetof(struct nft_chain, rhlhead),
.key_offset = offsetof(struct nft_chain, name),
.hashfn = nft_chain_hash,
.obj_hashfn = nft_chain_hash_obj,
.obj_cmpfn = nft_chain_hash_cmp,
.locks_mul = 1,
.automatic_shrinking = true,
};
static void nft_validate_state_update(struct net *net, u8 new_validate_state)
{
switch (net->nft.validate_state) {
......@@ -720,6 +734,29 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
return ret;
}
static u32 nft_chain_hash(const void *data, u32 len, u32 seed)
{
const char *name = data;
return jhash(name, strlen(name), seed);
}
static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed)
{
const struct nft_chain *chain = data;
return nft_chain_hash(chain->name, 0, seed);
}
static int nft_chain_hash_cmp(struct rhashtable_compare_arg *arg,
const void *ptr)
{
const struct nft_chain *chain = ptr;
const char *name = arg->key;
return strcmp(chain->name, name);
}
static int nf_tables_newtable(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
......@@ -766,6 +803,10 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
if (table->name == NULL)
goto err_strdup;
err = rhltable_init(&table->chains_ht, &nft_chain_ht_params);
if (err)
goto err_chain_ht;
INIT_LIST_HEAD(&table->chains);
INIT_LIST_HEAD(&table->sets);
INIT_LIST_HEAD(&table->objects);
......@@ -782,6 +823,8 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
list_add_tail_rcu(&table->list, &net->nft.tables);
return 0;
err_trans:
rhltable_destroy(&table->chains_ht);
err_chain_ht:
kfree(table->name);
err_strdup:
kfree(table);
......@@ -922,6 +965,7 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
{
BUG_ON(ctx->table->use > 0);
rhltable_destroy(&ctx->table->chains_ht);
kfree(ctx->table->name);
kfree(ctx->table);
}
......@@ -967,21 +1011,35 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask)
return ERR_PTR(-ENOENT);
}
static struct nft_chain *nft_chain_lookup(const struct nft_table *table,
static struct nft_chain *nft_chain_lookup(struct nft_table *table,
const struct nlattr *nla, u8 genmask)
{
char search[NFT_CHAIN_MAXNAMELEN + 1];
struct rhlist_head *tmp, *list;
struct nft_chain *chain;
if (nla == NULL)
return ERR_PTR(-EINVAL);
list_for_each_entry_rcu(chain, &table->chains, list) {
if (!nla_strcmp(nla, chain->name) &&
nft_active_genmask(chain, genmask))
return chain;
}
nla_strlcpy(search, nla, sizeof(search));
return ERR_PTR(-ENOENT);
WARN_ON(!rcu_read_lock_held() &&
!lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
chain = ERR_PTR(-ENOENT);
rcu_read_lock();
list = rhltable_lookup(&table->chains_ht, search, nft_chain_ht_params);
if (!list)
goto out_unlock;
rhl_for_each_entry_rcu(chain, tmp, list, rhlhead) {
if (nft_active_genmask(chain, genmask))
goto out_unlock;
}
chain = ERR_PTR(-ENOENT);
out_unlock:
rcu_read_unlock();
return chain;
}
static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
......@@ -1185,8 +1243,8 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_cur(net);
const struct nft_table *table;
const struct nft_chain *chain;
struct nft_table *table;
struct sk_buff *skb2;
int family = nfmsg->nfgen_family;
int err;
......@@ -1504,9 +1562,17 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (err < 0)
goto err1;
err = rhltable_insert_key(&table->chains_ht, chain->name,
&chain->rhlhead, nft_chain_ht_params);
if (err)
goto err2;
err = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
if (err < 0)
if (err < 0) {
rhltable_remove(&table->chains_ht, &chain->rhlhead,
nft_chain_ht_params);
goto err2;
}
table->use++;
list_add_tail_rcu(&chain->list, &table->chains);
......@@ -2206,9 +2272,9 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_cur(net);
const struct nft_table *table;
const struct nft_chain *chain;
const struct nft_rule *rule;
struct nft_table *table;
struct sk_buff *skb2;
int family = nfmsg->nfgen_family;
int err;
......@@ -3359,6 +3425,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
}
INIT_LIST_HEAD(&set->bindings);
set->table = table;
write_pnet(&set->net, net);
set->ops = ops;
set->ktype = ktype;
set->klen = desc.klen;
......@@ -4036,12 +4104,24 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
bool destroy_expr)
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
struct nft_ctx ctx = {
.net = read_pnet(&set->net),
.family = set->table->family,
};
nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_release(nft_set_ext_data(ext), set->dtype);
if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) {
struct nft_expr *expr = nft_set_ext_expr(ext);
if (expr->ops->destroy_clone) {
expr->ops->destroy_clone(&ctx, expr);
module_put(expr->ops->type->owner);
} else {
nf_tables_expr_destroy(&ctx, expr);
}
}
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
(*nft_set_ext_obj(ext))->use--;
kfree(elem);
......@@ -4051,12 +4131,13 @@ EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
/* Only called from commit path, nft_set_elem_deactivate() already deals with
* the refcounting from the preparation phase.
*/
static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem)
static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
const struct nft_set *set, void *elem)
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
nf_tables_expr_destroy(ctx, nft_set_ext_expr(ext));
kfree(elem);
}
......@@ -4787,7 +4868,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
kfree(obj->name);
err2:
if (obj->ops->destroy)
obj->ops->destroy(obj);
obj->ops->destroy(&ctx, obj);
kfree(obj);
err1:
module_put(type->owner);
......@@ -4997,10 +5078,10 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
return err;
}
static void nft_obj_destroy(struct nft_object *obj)
static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
{
if (obj->ops->destroy)
obj->ops->destroy(obj);
obj->ops->destroy(ctx, obj);
module_put(obj->ops->type->owner);
kfree(obj->name);
......@@ -5966,8 +6047,16 @@ static void nft_chain_commit_update(struct nft_trans *trans)
{
struct nft_base_chain *basechain;
if (nft_trans_chain_name(trans))
if (nft_trans_chain_name(trans)) {
rhltable_remove(&trans->ctx.table->chains_ht,
&trans->ctx.chain->rhlhead,
nft_chain_ht_params);
swap(trans->ctx.chain->name, nft_trans_chain_name(trans));
rhltable_insert_key(&trans->ctx.table->chains_ht,
trans->ctx.chain->name,
&trans->ctx.chain->rhlhead,
nft_chain_ht_params);
}
if (!nft_is_base_chain(trans->ctx.chain))
return;
......@@ -5999,11 +6088,12 @@ static void nft_commit_release(struct nft_trans *trans)
nft_set_destroy(nft_trans_set(trans));
break;
case NFT_MSG_DELSETELEM:
nf_tables_set_elem_destroy(nft_trans_elem_set(trans),
nf_tables_set_elem_destroy(&trans->ctx,
nft_trans_elem_set(trans),
nft_trans_elem(trans).priv);
break;
case NFT_MSG_DELOBJ:
nft_obj_destroy(nft_trans_obj(trans));
nft_obj_destroy(&trans->ctx, nft_trans_obj(trans));
break;
case NFT_MSG_DELFLOWTABLE:
nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
......@@ -6143,6 +6233,15 @@ static void nf_tables_commit_chain_active(struct net *net, struct nft_chain *cha
nf_tables_commit_chain_free_rules_old(g0);
}
static void nft_chain_del(struct nft_chain *chain)
{
struct nft_table *table = chain->table;
WARN_ON_ONCE(rhltable_remove(&table->chains_ht, &chain->rhlhead,
nft_chain_ht_params));
list_del_rcu(&chain->list);
}
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
struct nft_trans *trans, *next;
......@@ -6217,7 +6316,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_trans_destroy(trans);
break;
case NFT_MSG_DELCHAIN:
list_del_rcu(&trans->ctx.chain->list);
nft_chain_del(trans->ctx.chain);
nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
nf_tables_unregister_hook(trans->ctx.net,
trans->ctx.table,
......@@ -6328,7 +6427,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
nft_trans_elem(trans).priv, true);
break;
case NFT_MSG_NEWOBJ:
nft_obj_destroy(nft_trans_obj(trans));
nft_obj_destroy(&trans->ctx, nft_trans_obj(trans));
break;
case NFT_MSG_NEWFLOWTABLE:
nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
......@@ -6368,7 +6467,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
nft_trans_destroy(trans);
} else {
trans->ctx.table->use--;
list_del_rcu(&trans->ctx.chain->list);
nft_chain_del(trans->ctx.chain);
nf_tables_unregister_hook(trans->ctx.net,
trans->ctx.table,
trans->ctx.chain);
......@@ -6970,7 +7069,7 @@ int __nft_release_basechain(struct nft_ctx *ctx)
ctx->chain->use--;
nf_tables_rule_release(ctx, rule);
}
list_del(&ctx->chain->list);
nft_chain_del(ctx->chain);
ctx->table->use--;
nf_tables_chain_destroy(ctx);
......@@ -7022,11 +7121,11 @@ static void __nft_release_tables(struct net *net)
list_for_each_entry_safe(obj, ne, &table->objects, list) {
list_del(&obj->list);
table->use--;
nft_obj_destroy(obj);
nft_obj_destroy(&ctx, obj);
}
list_for_each_entry_safe(chain, nc, &table->chains, list) {
ctx.chain = chain;
list_del(&chain->list);
nft_chain_del(chain);
table->use--;
nf_tables_chain_destroy(&ctx);
}
......
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_count.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_zones.h>
struct nft_connlimit {
spinlock_t lock;
struct hlist_head hhead;
u32 limit;
bool invert;
};
static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
struct nft_regs *regs,
const struct nft_pktinfo *pkt,
const struct nft_set_ext *ext)
{
const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
const struct nf_conntrack_tuple *tuple_ptr;
struct nf_conntrack_tuple tuple;
enum ip_conntrack_info ctinfo;
const struct nf_conn *ct;
unsigned int count;
bool addit;
tuple_ptr = &tuple;
ct = nf_ct_get(pkt->skb, &ctinfo);
if (ct != NULL) {
tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
zone = nf_ct_zone(ct);
} else if (!nf_ct_get_tuplepr(pkt->skb, skb_network_offset(pkt->skb),
nft_pf(pkt), nft_net(pkt), &tuple)) {
regs->verdict.code = NF_DROP;
return;
}
spin_lock_bh(&priv->lock);
count = nf_conncount_lookup(nft_net(pkt), &priv->hhead, tuple_ptr, zone,
&addit);
if (!addit)
goto out;
if (!nf_conncount_add(&priv->hhead, tuple_ptr)) {
regs->verdict.code = NF_DROP;
spin_unlock_bh(&priv->lock);
return;
}
count++;
out:
spin_unlock_bh(&priv->lock);
if ((count > priv->limit) ^ priv->invert) {
regs->verdict.code = NFT_BREAK;
return;
}
}
static int nft_connlimit_do_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[],
struct nft_connlimit *priv)
{
bool invert = false;
u32 flags, limit;
if (!tb[NFTA_CONNLIMIT_COUNT])
return -EINVAL;
limit = ntohl(nla_get_be32(tb[NFTA_CONNLIMIT_COUNT]));
if (tb[NFTA_CONNLIMIT_FLAGS]) {
flags = ntohl(nla_get_be32(tb[NFTA_CONNLIMIT_FLAGS]));
if (flags & ~NFT_CONNLIMIT_F_INV)
return -EOPNOTSUPP;
if (flags & NFT_CONNLIMIT_F_INV)
invert = true;
}
spin_lock_init(&priv->lock);
INIT_HLIST_HEAD(&priv->hhead);
priv->limit = limit;
priv->invert = invert;
return nf_ct_netns_get(ctx->net, ctx->family);
}
static void nft_connlimit_do_destroy(const struct nft_ctx *ctx,
struct nft_connlimit *priv)
{
nf_ct_netns_put(ctx->net, ctx->family);
nf_conncount_cache_free(&priv->hhead);
}
static int nft_connlimit_do_dump(struct sk_buff *skb,
struct nft_connlimit *priv)
{
if (nla_put_be32(skb, NFTA_CONNLIMIT_COUNT, htonl(priv->limit)))
goto nla_put_failure;
if (priv->invert &&
nla_put_be32(skb, NFTA_CONNLIMIT_FLAGS, htonl(NFT_CONNLIMIT_F_INV)))
goto nla_put_failure;
return 0;
nla_put_failure:
return -1;
}
static inline void nft_connlimit_obj_eval(struct nft_object *obj,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_connlimit *priv = nft_obj_data(obj);
nft_connlimit_do_eval(priv, regs, pkt, NULL);
}
static int nft_connlimit_obj_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[],
struct nft_object *obj)
{
struct nft_connlimit *priv = nft_obj_data(obj);
return nft_connlimit_do_init(ctx, tb, priv);
}
static void nft_connlimit_obj_destroy(const struct nft_ctx *ctx,
struct nft_object *obj)
{
struct nft_connlimit *priv = nft_obj_data(obj);
nft_connlimit_do_destroy(ctx, priv);
}
static int nft_connlimit_obj_dump(struct sk_buff *skb,
struct nft_object *obj, bool reset)
{
struct nft_connlimit *priv = nft_obj_data(obj);
return nft_connlimit_do_dump(skb, priv);
}
static const struct nla_policy nft_connlimit_policy[NFTA_CONNLIMIT_MAX + 1] = {
[NFTA_CONNLIMIT_COUNT] = { .type = NLA_U32 },
[NFTA_CONNLIMIT_FLAGS] = { .type = NLA_U32 },
};
static struct nft_object_type nft_connlimit_obj_type;
static const struct nft_object_ops nft_connlimit_obj_ops = {
.type = &nft_connlimit_obj_type,
.size = sizeof(struct nft_connlimit),
.eval = nft_connlimit_obj_eval,
.init = nft_connlimit_obj_init,
.destroy = nft_connlimit_obj_destroy,
.dump = nft_connlimit_obj_dump,
};
static struct nft_object_type nft_connlimit_obj_type __read_mostly = {
.type = NFT_OBJECT_CONNLIMIT,
.ops = &nft_connlimit_obj_ops,
.maxattr = NFTA_CONNLIMIT_MAX,
.policy = nft_connlimit_policy,
.owner = THIS_MODULE,
};
static void nft_connlimit_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_connlimit *priv = nft_expr_priv(expr);
nft_connlimit_do_eval(priv, regs, pkt, NULL);
}
static int nft_connlimit_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_connlimit *priv = nft_expr_priv(expr);
return nft_connlimit_do_dump(skb, priv);
}
static int nft_connlimit_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_connlimit *priv = nft_expr_priv(expr);
return nft_connlimit_do_init(ctx, tb, priv);
}
static void nft_connlimit_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_connlimit *priv = nft_expr_priv(expr);
nft_connlimit_do_destroy(ctx, priv);
}
static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
{
struct nft_connlimit *priv_dst = nft_expr_priv(dst);
struct nft_connlimit *priv_src = nft_expr_priv(src);
spin_lock_init(&priv_dst->lock);
INIT_HLIST_HEAD(&priv_dst->hhead);
priv_dst->limit = priv_src->limit;
priv_dst->invert = priv_src->invert;
return 0;
}
static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_connlimit *priv = nft_expr_priv(expr);
nf_conncount_cache_free(&priv->hhead);
}
static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
{
struct nft_connlimit *priv = nft_expr_priv(expr);
bool addit, ret;
spin_lock_bh(&priv->lock);
nf_conncount_lookup(net, &priv->hhead, NULL, &nf_ct_zone_dflt, &addit);
ret = hlist_empty(&priv->hhead);
spin_unlock_bh(&priv->lock);
return ret;
}
static struct nft_expr_type nft_connlimit_type;
static const struct nft_expr_ops nft_connlimit_ops = {
.type = &nft_connlimit_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_connlimit)),
.eval = nft_connlimit_eval,
.init = nft_connlimit_init,
.destroy = nft_connlimit_destroy,
.clone = nft_connlimit_clone,
.destroy_clone = nft_connlimit_destroy_clone,
.dump = nft_connlimit_dump,
.gc = nft_connlimit_gc,
};
static struct nft_expr_type nft_connlimit_type __read_mostly = {
.name = "connlimit",
.ops = &nft_connlimit_ops,
.policy = nft_connlimit_policy,
.maxattr = NFTA_CONNLIMIT_MAX,
.flags = NFT_EXPR_STATEFUL | NFT_EXPR_GC,
.owner = THIS_MODULE,
};
static int __init nft_connlimit_module_init(void)
{
int err;
err = nft_register_obj(&nft_connlimit_obj_type);
if (err < 0)
return err;
err = nft_register_expr(&nft_connlimit_type);
if (err < 0)
goto err1;
return 0;
err1:
nft_unregister_obj(&nft_connlimit_obj_type);
return err;
}
static void __exit nft_connlimit_module_exit(void)
{
nft_unregister_expr(&nft_connlimit_type);
nft_unregister_obj(&nft_connlimit_obj_type);
}
module_init(nft_connlimit_module_init);
module_exit(nft_connlimit_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso");
MODULE_ALIAS_NFT_EXPR("connlimit");
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CONNLIMIT);
......@@ -96,7 +96,8 @@ static void nft_counter_do_destroy(struct nft_counter_percpu_priv *priv)
free_percpu(priv->counter);
}
static void nft_counter_obj_destroy(struct nft_object *obj)
static void nft_counter_obj_destroy(const struct nft_ctx *ctx,
struct nft_object *obj)
{
struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
......@@ -257,6 +258,7 @@ static const struct nft_expr_ops nft_counter_ops = {
.eval = nft_counter_eval,
.init = nft_counter_init,
.destroy = nft_counter_destroy,
.destroy_clone = nft_counter_destroy,
.dump = nft_counter_dump,
.clone = nft_counter_clone,
};
......
......@@ -826,7 +826,8 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
return 0;
}
static void nft_ct_helper_obj_destroy(struct nft_object *obj)
static void nft_ct_helper_obj_destroy(const struct nft_ctx *ctx,
struct nft_object *obj)
{
struct nft_ct_helper_obj *priv = nft_obj_data(obj);
......
......@@ -195,6 +195,15 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
err = -EOPNOTSUPP;
if (!(priv->expr->ops->type->flags & NFT_EXPR_STATEFUL))
goto err1;
if (priv->expr->ops->type->flags & NFT_EXPR_GC) {
if (set->flags & NFT_SET_TIMEOUT)
goto err1;
if (!set->ops->gc_init)
goto err1;
set->ops->gc_init(set);
}
} else if (set->flags & NFT_SET_EVAL)
return -EINVAL;
......
......@@ -311,8 +311,16 @@ static void nft_rhash_gc(struct work_struct *work)
continue;
}
if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPR)) {
struct nft_expr *expr = nft_set_ext_expr(&he->ext);
if (expr->ops->gc &&
expr->ops->gc(read_pnet(&set->net), expr))
goto gc;
}
if (!nft_set_elem_expired(&he->ext))
continue;
gc:
if (nft_set_elem_mark_busy(&he->ext))
continue;
......@@ -339,6 +347,14 @@ static unsigned int nft_rhash_privsize(const struct nlattr * const nla[],
return sizeof(struct nft_rhash);
}
static void nft_rhash_gc_init(const struct nft_set *set)
{
struct nft_rhash *priv = nft_set_priv(set);
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
nft_set_gc_interval(set));
}
static int nft_rhash_init(const struct nft_set *set,
const struct nft_set_desc *desc,
const struct nlattr * const tb[])
......@@ -356,8 +372,8 @@ static int nft_rhash_init(const struct nft_set *set,
INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc);
if (set->flags & NFT_SET_TIMEOUT)
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
nft_set_gc_interval(set));
nft_rhash_gc_init(set);
return 0;
}
......@@ -647,6 +663,7 @@ static struct nft_set_type nft_rhash_type __read_mostly = {
.elemsize = offsetof(struct nft_rhash_elem, ext),
.estimate = nft_rhash_estimate,
.init = nft_rhash_init,
.gc_init = nft_rhash_gc_init,
.destroy = nft_rhash_destroy,
.insert = nft_rhash_insert,
.activate = nft_rhash_activate,
......
......@@ -5,6 +5,7 @@
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_socket.h>
#include <net/inet_sock.h>
#include <net/tcp.h>
struct nft_socket {
enum nft_socket_keys key:8;
......@@ -48,7 +49,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
switch(priv->key) {
case NFT_SOCKET_TRANSPARENT:
nft_reg_store8(dest, nf_sk_is_transparent(sk));
nft_reg_store8(dest, inet_sk_transparent(sk));
break;
default:
WARN_ON(1);
......
......@@ -33,264 +33,9 @@
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#endif
#include <net/netfilter/nf_tproxy.h>
#include <linux/netfilter/xt_TPROXY.h>
enum nf_tproxy_lookup_t {
NFT_LOOKUP_LISTENER,
NFT_LOOKUP_ESTABLISHED,
};
static bool tproxy_sk_is_transparent(struct sock *sk)
{
switch (sk->sk_state) {
case TCP_TIME_WAIT:
if (inet_twsk(sk)->tw_transparent)
return true;
break;
case TCP_NEW_SYN_RECV:
if (inet_rsk(inet_reqsk(sk))->no_srccheck)
return true;
break;
default:
if (inet_sk(sk)->transparent)
return true;
}
sock_gen_put(sk);
return false;
}
static inline __be32
tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
{
struct in_device *indev;
__be32 laddr;
if (user_laddr)
return user_laddr;
laddr = 0;
indev = __in_dev_get_rcu(skb->dev);
for_primary_ifa(indev) {
laddr = ifa->ifa_local;
break;
} endfor_ifa(indev);
return laddr ? laddr : daddr;
}
/*
* This is used when the user wants to intercept a connection matching
* an explicit iptables rule. In this case the sockets are assumed
* matching in preference order:
*
* - match: if there's a fully established connection matching the
* _packet_ tuple, it is returned, assuming the redirection
* already took place and we process a packet belonging to an
* established connection
*
* - match: if there's a listening socket matching the redirection
* (e.g. on-port & on-ip of the connection), it is returned,
* regardless if it was bound to 0.0.0.0 or an explicit
* address. The reasoning is that if there's an explicit rule, it
* does not really matter if the listener is bound to an interface
* or to 0. The user already stated that he wants redirection
* (since he added the rule).
*
* Please note that there's an overlap between what a TPROXY target
* and a socket match will match. Normally if you have both rules the
* "socket" match will be the first one, effectively all packets
* belonging to established connections going through that one.
*/
static inline struct sock *
nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
struct tcphdr *tcph;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_LISTENER:
tcph = hp;
sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
ip_hdrlen(skb) +
__tcp_hdrlen(tcph),
saddr, sport,
daddr, dport,
in->ifindex, 0);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
break;
case NFT_LOOKUP_ESTABLISHED:
sk = inet_lookup_established(net, &tcp_hashinfo,
saddr, sport, daddr, dport,
in->ifindex);
break;
default:
BUG();
}
break;
case IPPROTO_UDP:
sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
if (sk) {
int connected = (sk->sk_state == TCP_ESTABLISHED);
int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
(lookup_type == NFT_LOOKUP_LISTENER && connected)) {
sock_put(sk);
sk = NULL;
}
}
break;
default:
WARN_ON(1);
sk = NULL;
}
pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
return sk;
}
#ifdef XT_TPROXY_HAVE_IPV6
static inline struct sock *
nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
struct tcphdr *tcph;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_LISTENER:
tcph = hp;
sk = inet6_lookup_listener(net, &tcp_hashinfo, skb,
thoff + __tcp_hdrlen(tcph),
saddr, sport,
daddr, ntohs(dport),
in->ifindex, 0);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
break;
case NFT_LOOKUP_ESTABLISHED:
sk = __inet6_lookup_established(net, &tcp_hashinfo,
saddr, sport, daddr, ntohs(dport),
in->ifindex, 0);
break;
default:
BUG();
}
break;
case IPPROTO_UDP:
sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
if (sk) {
int connected = (sk->sk_state == TCP_ESTABLISHED);
int wildcard = ipv6_addr_any(&sk->sk_v6_rcv_saddr);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
* xt_socket, since xt_TPROXY needs 0 bound
* listeners too
*/
if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
(lookup_type == NFT_LOOKUP_LISTENER && connected)) {
sock_put(sk);
sk = NULL;
}
}
break;
default:
WARN_ON(1);
sk = NULL;
}
pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n",
protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk);
return sk;
}
#endif
/**
* tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections
* @skb: The skb being processed.
* @laddr: IPv4 address to redirect to or zero.
* @lport: TCP port to redirect to or zero.
* @sk: The TIME_WAIT TCP socket found by the lookup.
*
* We have to handle SYN packets arriving to TIME_WAIT sockets
* differently: instead of reopening the connection we should rather
* redirect the new connection to the proxy if there's a listener
* socket present.
*
* tproxy_handle_time_wait4() consumes the socket reference passed in.
*
* Returns the listener socket if there's one, the TIME_WAIT socket if
* no such listener is found, or NULL if the TCP header is incomplete.
*/
static struct sock *
tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
__be32 laddr, __be16 lport, struct sock *sk)
{
const struct iphdr *iph = ip_hdr(skb);
struct tcphdr _hdr, *hp;
hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
if (hp == NULL) {
inet_twsk_put(inet_twsk(sk));
return NULL;
}
if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
/* SYN to a TIME_WAIT socket, we'd rather redirect it
* to a listener socket if there's one */
struct sock *sk2;
sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, laddr ? laddr : iph->daddr,
hp->source, lport ? lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
if (sk2) {
inet_twsk_deschedule_put(inet_twsk(sk));
sk = sk2;
}
}
return sk;
}
/* assign a socket to the skb -- consumes sk */
static void
nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
......@@ -319,26 +64,26 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, iph->daddr,
hp->source, hp->dest,
skb->dev, NFT_LOOKUP_ESTABLISHED);
skb->dev, NF_TPROXY_LOOKUP_ESTABLISHED);
laddr = tproxy_laddr4(skb, laddr, iph->daddr);
laddr = nf_tproxy_laddr4(skb, laddr, iph->daddr);
if (!lport)
lport = hp->dest;
/* UDP has no TCP_TIME_WAIT state, so we never enter here */
if (sk && sk->sk_state == TCP_TIME_WAIT)
/* reopening a TIME_WAIT connection needs special handling */
sk = tproxy_handle_time_wait4(net, skb, laddr, lport, sk);
sk = nf_tproxy_handle_time_wait4(net, skb, laddr, lport, sk);
else if (!sk)
/* no, there's no established connection, check if
* there's a listener on the redirected addr/port */
sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, laddr,
hp->source, lport,
skb->dev, NFT_LOOKUP_LISTENER);
skb->dev, NF_TPROXY_LOOKUP_LISTENER);
/* NOTE: assign_sock consumes our sk reference */
if (sk && tproxy_sk_is_transparent(sk)) {
if (sk && nf_tproxy_sk_is_transparent(sk)) {
/* This should be in a separate target, but we don't do multiple
targets on the same rule yet */
skb->mark = (skb->mark & ~mark_mask) ^ mark_value;
......@@ -377,87 +122,6 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
#ifdef XT_TPROXY_HAVE_IPV6
static inline const struct in6_addr *
tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
const struct in6_addr *daddr)
{
struct inet6_dev *indev;
struct inet6_ifaddr *ifa;
struct in6_addr *laddr;
if (!ipv6_addr_any(user_laddr))
return user_laddr;
laddr = NULL;
indev = __in6_dev_get(skb->dev);
if (indev) {
read_lock_bh(&indev->lock);
list_for_each_entry(ifa, &indev->addr_list, if_list) {
if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED))
continue;
laddr = &ifa->addr;
break;
}
read_unlock_bh(&indev->lock);
}
return laddr ? laddr : daddr;
}
/**
* tproxy_handle_time_wait6 - handle IPv6 TCP TIME_WAIT reopen redirections
* @skb: The skb being processed.
* @tproto: Transport protocol.
* @thoff: Transport protocol header offset.
* @par: Iptables target parameters.
* @sk: The TIME_WAIT TCP socket found by the lookup.
*
* We have to handle SYN packets arriving to TIME_WAIT sockets
* differently: instead of reopening the connection we should rather
* redirect the new connection to the proxy if there's a listener
* socket present.
*
* tproxy_handle_time_wait6() consumes the socket reference passed in.
*
* Returns the listener socket if there's one, the TIME_WAIT socket if
* no such listener is found, or NULL if the TCP header is incomplete.
*/
static struct sock *
tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
const struct xt_action_param *par,
struct sock *sk)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct tcphdr _hdr, *hp;
const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
if (hp == NULL) {
inet_twsk_put(inet_twsk(sk));
return NULL;
}
if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
/* SYN to a TIME_WAIT socket, we'd rather redirect it
* to a listener socket if there's one */
struct sock *sk2;
sk2 = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto,
&iph->saddr,
tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
hp->source,
tgi->lport ? tgi->lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
if (sk2) {
inet_twsk_deschedule_put(inet_twsk(sk));
sk = sk2;
}
}
return sk;
}
static unsigned int
tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
{
......@@ -489,25 +153,31 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto,
&iph->saddr, &iph->daddr,
hp->source, hp->dest,
xt_in(par), NFT_LOOKUP_ESTABLISHED);
xt_in(par), NF_TPROXY_LOOKUP_ESTABLISHED);
laddr = tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr);
laddr = nf_tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr);
lport = tgi->lport ? tgi->lport : hp->dest;
/* UDP has no TCP_TIME_WAIT state, so we never enter here */
if (sk && sk->sk_state == TCP_TIME_WAIT)
if (sk && sk->sk_state == TCP_TIME_WAIT) {
const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
/* reopening a TIME_WAIT connection needs special handling */
sk = tproxy_handle_time_wait6(skb, tproto, thoff, par, sk);
sk = nf_tproxy_handle_time_wait6(skb, tproto, thoff,
xt_net(par),
&tgi->laddr.in6,
tgi->lport,
sk);
}
else if (!sk)
/* no there's no established connection, check if
* there's a listener on the redirected addr/port */
sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp,
tproto, &iph->saddr, laddr,
hp->source, lport,
xt_in(par), NFT_LOOKUP_LISTENER);
xt_in(par), NF_TPROXY_LOOKUP_LISTENER);
/* NOTE: assign_sock consumes our sk reference */
if (sk && tproxy_sk_is_transparent(sk)) {
if (sk && nf_tproxy_sk_is_transparent(sk)) {
/* This should be in a separate target, but we don't do multiple
targets on the same rule yet */
skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value;
......
......@@ -73,7 +73,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
* if XT_SOCKET_TRANSPARENT is used
*/
if (info->flags & XT_SOCKET_TRANSPARENT)
transparent = nf_sk_is_transparent(sk);
transparent = inet_sk_transparent(sk);
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
transparent && sk_fullsock(sk))
......@@ -130,7 +130,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
* if XT_SOCKET_TRANSPARENT is used
*/
if (info->flags & XT_SOCKET_TRANSPARENT)
transparent = nf_sk_is_transparent(sk);
transparent = inet_sk_transparent(sk);
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
transparent && sk_fullsock(sk))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment