Commit c6dd940b authored by Florian Westphal's avatar Florian Westphal Committed by Pablo Neira Ayuso

netfilter: allow early drop of assured conntracks

If insertion of a new conntrack fails because the table is full, the kernel
searches the next buckets of the hash slot where the new connection
was supposed to be inserted at for an entry that hasn't seen traffic
in reply direction (non-assured), if it finds one, that entry is
is dropped and the new connection entry is allocated.

Allow the conntrack gc worker to also remove *assured* conntracks if
resources are low.

Do this by querying the l4 tracker, e.g. tcp connections are now dropped
if they are no longer established (e.g. in finwait).

This could be refined further, e.g. by adding 'soft' established timeout
(i.e., a timeout that is only used once we get close to resource
exhaustion).

Cc: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
Acked-by: default avatarJozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: default avatarPablo Neira Ayuso <pablo@netfilter.org>
parent b3a5db10
...@@ -58,6 +58,9 @@ struct nf_conntrack_l4proto { ...@@ -58,6 +58,9 @@ struct nf_conntrack_l4proto {
unsigned int dataoff, unsigned int dataoff,
u_int8_t pf, unsigned int hooknum); u_int8_t pf, unsigned int hooknum);
/* called by gc worker if table is full */
bool (*can_early_drop)(const struct nf_conn *ct);
/* Print out the per-protocol part of the tuple. Return like seq_* */ /* Print out the per-protocol part of the tuple. Return like seq_* */
void (*print_tuple)(struct seq_file *s, void (*print_tuple)(struct seq_file *s,
const struct nf_conntrack_tuple *); const struct nf_conntrack_tuple *);
......
...@@ -76,6 +76,7 @@ struct conntrack_gc_work { ...@@ -76,6 +76,7 @@ struct conntrack_gc_work {
struct delayed_work dwork; struct delayed_work dwork;
u32 last_bucket; u32 last_bucket;
bool exiting; bool exiting;
bool early_drop;
long next_gc_run; long next_gc_run;
}; };
...@@ -951,10 +952,30 @@ static noinline int early_drop(struct net *net, unsigned int _hash) ...@@ -951,10 +952,30 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
return false; return false;
} }
static bool gc_worker_skip_ct(const struct nf_conn *ct)
{
return !nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct);
}
static bool gc_worker_can_early_drop(const struct nf_conn *ct)
{
const struct nf_conntrack_l4proto *l4proto;
if (!test_bit(IPS_ASSURED_BIT, &ct->status))
return true;
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->can_early_drop && l4proto->can_early_drop(ct))
return true;
return false;
}
static void gc_worker(struct work_struct *work) static void gc_worker(struct work_struct *work)
{ {
unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
unsigned int i, goal, buckets = 0, expired_count = 0; unsigned int i, goal, buckets = 0, expired_count = 0;
unsigned int nf_conntrack_max95 = 0;
struct conntrack_gc_work *gc_work; struct conntrack_gc_work *gc_work;
unsigned int ratio, scanned = 0; unsigned int ratio, scanned = 0;
unsigned long next_run; unsigned long next_run;
...@@ -963,6 +984,8 @@ static void gc_worker(struct work_struct *work) ...@@ -963,6 +984,8 @@ static void gc_worker(struct work_struct *work)
goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV; goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
i = gc_work->last_bucket; i = gc_work->last_bucket;
if (gc_work->early_drop)
nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
do { do {
struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple_hash *h;
...@@ -979,6 +1002,8 @@ static void gc_worker(struct work_struct *work) ...@@ -979,6 +1002,8 @@ static void gc_worker(struct work_struct *work)
i = 0; i = 0;
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
struct net *net;
tmp = nf_ct_tuplehash_to_ctrack(h); tmp = nf_ct_tuplehash_to_ctrack(h);
scanned++; scanned++;
...@@ -987,6 +1012,27 @@ static void gc_worker(struct work_struct *work) ...@@ -987,6 +1012,27 @@ static void gc_worker(struct work_struct *work)
expired_count++; expired_count++;
continue; continue;
} }
if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
continue;
net = nf_ct_net(tmp);
if (atomic_read(&net->ct.count) < nf_conntrack_max95)
continue;
/* need to take reference to avoid possible races */
if (!atomic_inc_not_zero(&tmp->ct_general.use))
continue;
if (gc_worker_skip_ct(tmp)) {
nf_ct_put(tmp);
continue;
}
if (gc_worker_can_early_drop(tmp))
nf_ct_kill(tmp);
nf_ct_put(tmp);
} }
/* could check get_nulls_value() here and restart if ct /* could check get_nulls_value() here and restart if ct
...@@ -1032,6 +1078,7 @@ static void gc_worker(struct work_struct *work) ...@@ -1032,6 +1078,7 @@ static void gc_worker(struct work_struct *work)
next_run = gc_work->next_gc_run; next_run = gc_work->next_gc_run;
gc_work->last_bucket = i; gc_work->last_bucket = i;
gc_work->early_drop = false;
queue_delayed_work(system_long_wq, &gc_work->dwork, next_run); queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
} }
...@@ -1057,6 +1104,8 @@ __nf_conntrack_alloc(struct net *net, ...@@ -1057,6 +1104,8 @@ __nf_conntrack_alloc(struct net *net,
if (nf_conntrack_max && if (nf_conntrack_max &&
unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
if (!early_drop(net, hash)) { if (!early_drop(net, hash)) {
if (!conntrack_gc_work.early_drop)
conntrack_gc_work.early_drop = true;
atomic_dec(&net->ct.count); atomic_dec(&net->ct.count);
net_warn_ratelimited("nf_conntrack: table full, dropping packet\n"); net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
......
...@@ -609,6 +609,20 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl, ...@@ -609,6 +609,20 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl,
return -NF_ACCEPT; return -NF_ACCEPT;
} }
static bool dccp_can_early_drop(const struct nf_conn *ct)
{
switch (ct->proto.dccp.state) {
case CT_DCCP_CLOSEREQ:
case CT_DCCP_CLOSING:
case CT_DCCP_TIMEWAIT:
return true;
default:
break;
}
return false;
}
static void dccp_print_tuple(struct seq_file *s, static void dccp_print_tuple(struct seq_file *s,
const struct nf_conntrack_tuple *tuple) const struct nf_conntrack_tuple *tuple)
{ {
...@@ -868,6 +882,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = { ...@@ -868,6 +882,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
.packet = dccp_packet, .packet = dccp_packet,
.get_timeouts = dccp_get_timeouts, .get_timeouts = dccp_get_timeouts,
.error = dccp_error, .error = dccp_error,
.can_early_drop = dccp_can_early_drop,
.print_tuple = dccp_print_tuple, .print_tuple = dccp_print_tuple,
.print_conntrack = dccp_print_conntrack, .print_conntrack = dccp_print_conntrack,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK) #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
...@@ -902,6 +917,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = { ...@@ -902,6 +917,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
.packet = dccp_packet, .packet = dccp_packet,
.get_timeouts = dccp_get_timeouts, .get_timeouts = dccp_get_timeouts,
.error = dccp_error, .error = dccp_error,
.can_early_drop = dccp_can_early_drop,
.print_tuple = dccp_print_tuple, .print_tuple = dccp_print_tuple,
.print_conntrack = dccp_print_conntrack, .print_conntrack = dccp_print_conntrack,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK) #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
......
...@@ -535,6 +535,20 @@ static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb, ...@@ -535,6 +535,20 @@ static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb,
return -NF_ACCEPT; return -NF_ACCEPT;
} }
static bool sctp_can_early_drop(const struct nf_conn *ct)
{
switch (ct->proto.sctp.state) {
case SCTP_CONNTRACK_SHUTDOWN_SENT:
case SCTP_CONNTRACK_SHUTDOWN_RECD:
case SCTP_CONNTRACK_SHUTDOWN_ACK_SENT:
return true;
default:
break;
}
return false;
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK) #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink.h>
...@@ -783,6 +797,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { ...@@ -783,6 +797,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
.get_timeouts = sctp_get_timeouts, .get_timeouts = sctp_get_timeouts,
.new = sctp_new, .new = sctp_new,
.error = sctp_error, .error = sctp_error,
.can_early_drop = sctp_can_early_drop,
.me = THIS_MODULE, .me = THIS_MODULE,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK) #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = sctp_to_nlattr, .to_nlattr = sctp_to_nlattr,
...@@ -818,6 +833,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { ...@@ -818,6 +833,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
.get_timeouts = sctp_get_timeouts, .get_timeouts = sctp_get_timeouts,
.new = sctp_new, .new = sctp_new,
.error = sctp_error, .error = sctp_error,
.can_early_drop = sctp_can_early_drop,
.me = THIS_MODULE, .me = THIS_MODULE,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK) #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = sctp_to_nlattr, .to_nlattr = sctp_to_nlattr,
......
...@@ -1172,6 +1172,22 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, ...@@ -1172,6 +1172,22 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
return true; return true;
} }
static bool tcp_can_early_drop(const struct nf_conn *ct)
{
switch (ct->proto.tcp.state) {
case TCP_CONNTRACK_FIN_WAIT:
case TCP_CONNTRACK_LAST_ACK:
case TCP_CONNTRACK_TIME_WAIT:
case TCP_CONNTRACK_CLOSE:
case TCP_CONNTRACK_CLOSE_WAIT:
return true;
default:
break;
}
return false;
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK) #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink.h>
...@@ -1549,6 +1565,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = ...@@ -1549,6 +1565,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
.get_timeouts = tcp_get_timeouts, .get_timeouts = tcp_get_timeouts,
.new = tcp_new, .new = tcp_new,
.error = tcp_error, .error = tcp_error,
.can_early_drop = tcp_can_early_drop,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK) #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = tcp_to_nlattr, .to_nlattr = tcp_to_nlattr,
.nlattr_size = tcp_nlattr_size, .nlattr_size = tcp_nlattr_size,
...@@ -1586,6 +1603,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = ...@@ -1586,6 +1603,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
.get_timeouts = tcp_get_timeouts, .get_timeouts = tcp_get_timeouts,
.new = tcp_new, .new = tcp_new,
.error = tcp_error, .error = tcp_error,
.can_early_drop = tcp_can_early_drop,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK) #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = tcp_to_nlattr, .to_nlattr = tcp_to_nlattr,
.nlattr_size = tcp_nlattr_size, .nlattr_size = tcp_nlattr_size,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment