Commit 4d80ecdb authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains Netfilter fixes for you net tree, they
are:

1) Restore __GFP_NORETRY in xt_table allocations to mitigate effects of
   large memory allocation requests, from Michal Hocko.

2) Release IPv6 fragment queue in case of error in fragmentation header,
   this is a follow up to amend patch 83f1999c, from Subash Abhinov
   Kasiviswanathan.

3) Flowtable infrastructure depends on NETFILTER_INGRESS as it registers
   a hook for each flowtable, reported by John Crispin.

4) Missing initialization of info->priv in xt_cgroup version 1, from
   Cong Wang.

5) Give a chance to garbage collector to run after scheduling flowtable
   cleanup.

6) Releasing flowtable content on nft_flow_offload module removal is
   not required at all, there is not dependencies between this module
   and flowtables, remove it.

7) Fix missing xt_rateest_mutex grabbing for hash insertions, also from
   Cong Wang.

8) Move nf_flow_table_cleanup() routine to flowtable core, this patch is
   a dependency for the next patch in this list.

9) Flowtable resources are not properly released on removal from the
   control plane. Fix this resource leak by scheduling removal of all
   entries and explicit call to the garbage collector.

10) nf_ct_nat_offset() declaration is dead code, this function prototype
    is not used anywhere, remove it. From Taehee Yoo.

11) Fix another flowtable resource leak on entry insertion failures,
    this patch also fixes a possible use-after-free. Patch from Felix
    Fietkau.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents a2e5790d 0ff90b6c
......@@ -213,11 +213,6 @@ static inline bool nf_ct_kill(struct nf_conn *ct)
return nf_ct_delete(ct, 0, 0);
}
/* These are for NAT. Icky. */
extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
enum ip_conntrack_dir dir,
u32 seq);
/* Set all unconfirmed conntrack as dying */
void nf_ct_unconfirmed_destroy(struct net *);
......
......@@ -14,6 +14,7 @@ struct nf_flowtable_type {
struct list_head list;
int family;
void (*gc)(struct work_struct *work);
void (*free)(struct nf_flowtable *ft);
const struct rhashtable_params *params;
nf_hookfn *hook;
struct module *owner;
......@@ -89,12 +90,15 @@ struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
void flow_offload_free(struct flow_offload *flow);
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
void flow_offload_del(struct nf_flowtable *flow_table, struct flow_offload *flow);
struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
struct flow_offload_tuple *tuple);
int nf_flow_table_iterate(struct nf_flowtable *flow_table,
void (*iter)(struct flow_offload *flow, void *data),
void *data);
void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
void nf_flow_table_free(struct nf_flowtable *flow_table);
void nf_flow_offload_work_gc(struct work_struct *work);
extern const struct rhashtable_params nf_flow_offload_rhash_params;
......
......@@ -80,8 +80,7 @@ endif # NF_TABLES
config NF_FLOW_TABLE_IPV4
tristate "Netfilter flow table IPv4 module"
depends on NF_CONNTRACK && NF_TABLES
select NF_FLOW_TABLE
depends on NF_FLOW_TABLE
help
This option adds the flow table IPv4 support.
......
......@@ -260,6 +260,7 @@ static struct nf_flowtable_type flowtable_ipv4 = {
.family = NFPROTO_IPV4,
.params = &nf_flow_offload_rhash_params,
.gc = nf_flow_offload_work_gc,
.free = nf_flow_table_free,
.hook = nf_flow_offload_ip_hook,
.owner = THIS_MODULE,
};
......
......@@ -73,8 +73,7 @@ endif # NF_TABLES
config NF_FLOW_TABLE_IPV6
tristate "Netfilter flow table IPv6 module"
depends on NF_CONNTRACK && NF_TABLES
select NF_FLOW_TABLE
depends on NF_FLOW_TABLE
help
This option adds the flow table IPv6 support.
......
......@@ -264,6 +264,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
* this case. -DaveM
*/
pr_debug("end of fragment not rounded to 8 bytes.\n");
inet_frag_kill(&fq->q, &nf_frags);
return -EPROTO;
}
if (end > fq->q.len) {
......
......@@ -253,6 +253,7 @@ static struct nf_flowtable_type flowtable_ipv6 = {
.family = NFPROTO_IPV6,
.params = &nf_flow_offload_rhash_params,
.gc = nf_flow_offload_work_gc,
.free = nf_flow_table_free,
.hook = nf_flow_offload_ipv6_hook,
.owner = THIS_MODULE,
};
......
......@@ -666,8 +666,8 @@ endif # NF_TABLES
config NF_FLOW_TABLE_INET
tristate "Netfilter flow table mixed IPv4/IPv6 module"
depends on NF_FLOW_TABLE_IPV4 && NF_FLOW_TABLE_IPV6
select NF_FLOW_TABLE
depends on NF_FLOW_TABLE_IPV4
depends on NF_FLOW_TABLE_IPV6
help
This option adds the flow table mixed IPv4/IPv6 support.
......@@ -675,7 +675,9 @@ config NF_FLOW_TABLE_INET
config NF_FLOW_TABLE
tristate "Netfilter flow table module"
depends on NF_CONNTRACK && NF_TABLES
depends on NETFILTER_INGRESS
depends on NF_CONNTRACK
depends on NF_TABLES
help
This option adds the flow table core infrastructure.
......
......@@ -4,6 +4,7 @@
#include <linux/netfilter.h>
#include <linux/rhashtable.h>
#include <linux/netdevice.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
......@@ -124,7 +125,9 @@ void flow_offload_free(struct flow_offload *flow)
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
e = container_of(flow, struct flow_offload_entry, flow);
kfree(e);
nf_ct_delete(e->ct, 0, 0);
nf_ct_put(e->ct);
kfree_rcu(e, rcu_head);
}
EXPORT_SYMBOL_GPL(flow_offload_free);
......@@ -148,11 +151,9 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
}
EXPORT_SYMBOL_GPL(flow_offload_add);
void flow_offload_del(struct nf_flowtable *flow_table,
struct flow_offload *flow)
static void flow_offload_del(struct nf_flowtable *flow_table,
struct flow_offload *flow)
{
struct flow_offload_entry *e;
rhashtable_remove_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
*flow_table->type->params);
......@@ -160,10 +161,8 @@ void flow_offload_del(struct nf_flowtable *flow_table,
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
*flow_table->type->params);
e = container_of(flow, struct flow_offload_entry, flow);
kfree_rcu(e, rcu_head);
flow_offload_free(flow);
}
EXPORT_SYMBOL_GPL(flow_offload_del);
struct flow_offload_tuple_rhash *
flow_offload_lookup(struct nf_flowtable *flow_table,
......@@ -174,15 +173,6 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
}
EXPORT_SYMBOL_GPL(flow_offload_lookup);
static void nf_flow_release_ct(const struct flow_offload *flow)
{
struct flow_offload_entry *e;
e = container_of(flow, struct flow_offload_entry, flow);
nf_ct_delete(e->ct, 0, 0);
nf_ct_put(e->ct);
}
int nf_flow_table_iterate(struct nf_flowtable *flow_table,
void (*iter)(struct flow_offload *flow, void *data),
void *data)
......@@ -231,19 +221,16 @@ static inline bool nf_flow_is_dying(const struct flow_offload *flow)
return flow->flags & FLOW_OFFLOAD_DYING;
}
void nf_flow_offload_work_gc(struct work_struct *work)
static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
{
struct flow_offload_tuple_rhash *tuplehash;
struct nf_flowtable *flow_table;
struct rhashtable_iter hti;
struct flow_offload *flow;
int err;
flow_table = container_of(work, struct nf_flowtable, gc_work.work);
err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
if (err)
goto schedule;
return 0;
rhashtable_walk_start(&hti);
......@@ -261,15 +248,22 @@ void nf_flow_offload_work_gc(struct work_struct *work)
flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
if (nf_flow_has_expired(flow) ||
nf_flow_is_dying(flow)) {
nf_flow_is_dying(flow))
flow_offload_del(flow_table, flow);
nf_flow_release_ct(flow);
}
}
out:
rhashtable_walk_stop(&hti);
rhashtable_walk_exit(&hti);
schedule:
return 1;
}
void nf_flow_offload_work_gc(struct work_struct *work)
{
struct nf_flowtable *flow_table;
flow_table = container_of(work, struct nf_flowtable, gc_work.work);
nf_flow_offload_gc_step(flow_table);
queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
}
EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
......@@ -425,5 +419,35 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
}
EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
{
struct net_device *dev = data;
if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
return;
flow_offload_dead(flow);
}
static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
void *data)
{
nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
flush_delayed_work(&flowtable->gc_work);
}
void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
{
nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
}
EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
void nf_flow_table_free(struct nf_flowtable *flow_table)
{
nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
WARN_ON(!nf_flow_offload_gc_step(flow_table));
}
EXPORT_SYMBOL_GPL(nf_flow_table_free);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
......@@ -24,6 +24,7 @@ static struct nf_flowtable_type flowtable_inet = {
.family = NFPROTO_INET,
.params = &nf_flow_offload_rhash_params,
.gc = nf_flow_offload_work_gc,
.free = nf_flow_table_free,
.hook = nf_flow_offload_inet_hook,
.owner = THIS_MODULE,
};
......
......@@ -5006,13 +5006,13 @@ void nft_flow_table_iterate(struct net *net,
struct nft_flowtable *flowtable;
const struct nft_table *table;
rcu_read_lock();
list_for_each_entry_rcu(table, &net->nft.tables, list) {
list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
nfnl_lock(NFNL_SUBSYS_NFTABLES);
list_for_each_entry(table, &net->nft.tables, list) {
list_for_each_entry(flowtable, &table->flowtables, list) {
iter(&flowtable->data, data);
}
}
rcu_read_unlock();
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_flow_table_iterate);
......@@ -5399,17 +5399,12 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
static void nft_flowtable_destroy(void *ptr, void *arg)
{
kfree(ptr);
}
static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
{
cancel_delayed_work_sync(&flowtable->data.gc_work);
kfree(flowtable->name);
rhashtable_free_and_destroy(&flowtable->data.rhashtable,
nft_flowtable_destroy, NULL);
flowtable->data.type->free(&flowtable->data);
rhashtable_destroy(&flowtable->data.rhashtable);
module_put(flowtable->data.type->owner);
}
......
......@@ -194,22 +194,6 @@ static struct nft_expr_type nft_flow_offload_type __read_mostly = {
.owner = THIS_MODULE,
};
static void flow_offload_iterate_cleanup(struct flow_offload *flow, void *data)
{
struct net_device *dev = data;
if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
return;
flow_offload_dead(flow);
}
static void nft_flow_offload_iterate_cleanup(struct nf_flowtable *flowtable,
void *data)
{
nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data);
}
static int flow_offload_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
......@@ -218,7 +202,7 @@ static int flow_offload_netdev_event(struct notifier_block *this,
if (event != NETDEV_DOWN)
return NOTIFY_DONE;
nft_flow_table_iterate(dev_net(dev), nft_flow_offload_iterate_cleanup, dev);
nf_flow_table_cleanup(dev_net(dev), dev);
return NOTIFY_DONE;
}
......@@ -246,14 +230,8 @@ static int __init nft_flow_offload_module_init(void)
static void __exit nft_flow_offload_module_exit(void)
{
struct net *net;
nft_unregister_expr(&nft_flow_offload_type);
unregister_netdevice_notifier(&flow_offload_netdev_notifier);
rtnl_lock();
for_each_net(net)
nft_flow_table_iterate(net, nft_flow_offload_iterate_cleanup, NULL);
rtnl_unlock();
}
module_init(nft_flow_offload_module_init);
......
......@@ -1008,7 +1008,12 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
if ((size >> PAGE_SHIFT) + 2 > totalram_pages)
return NULL;
info = kvmalloc(sz, GFP_KERNEL);
/* __GFP_NORETRY is not fully supported by kvmalloc but it should
* work reasonably well if sz is too large and bail out rather
* than shoot all processes down before realizing there is nothing
* more to reclaim.
*/
info = kvmalloc(sz, GFP_KERNEL | __GFP_NORETRY);
if (!info)
return NULL;
......
......@@ -39,23 +39,31 @@ static void xt_rateest_hash_insert(struct xt_rateest *est)
hlist_add_head(&est->list, &rateest_hash[h]);
}
struct xt_rateest *xt_rateest_lookup(const char *name)
static struct xt_rateest *__xt_rateest_lookup(const char *name)
{
struct xt_rateest *est;
unsigned int h;
h = xt_rateest_hash(name);
mutex_lock(&xt_rateest_mutex);
hlist_for_each_entry(est, &rateest_hash[h], list) {
if (strcmp(est->name, name) == 0) {
est->refcnt++;
mutex_unlock(&xt_rateest_mutex);
return est;
}
}
mutex_unlock(&xt_rateest_mutex);
return NULL;
}
struct xt_rateest *xt_rateest_lookup(const char *name)
{
struct xt_rateest *est;
mutex_lock(&xt_rateest_mutex);
est = __xt_rateest_lookup(name);
mutex_unlock(&xt_rateest_mutex);
return est;
}
EXPORT_SYMBOL_GPL(xt_rateest_lookup);
void xt_rateest_put(struct xt_rateest *est)
......@@ -100,8 +108,10 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
net_get_random_once(&jhash_rnd, sizeof(jhash_rnd));
est = xt_rateest_lookup(info->name);
mutex_lock(&xt_rateest_mutex);
est = __xt_rateest_lookup(info->name);
if (est) {
mutex_unlock(&xt_rateest_mutex);
/*
* If estimator parameters are specified, they must match the
* existing estimator.
......@@ -139,11 +149,13 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
info->est = est;
xt_rateest_hash_insert(est);
mutex_unlock(&xt_rateest_mutex);
return 0;
err2:
kfree(est);
err1:
mutex_unlock(&xt_rateest_mutex);
return ret;
}
......
......@@ -52,6 +52,7 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par)
return -EINVAL;
}
info->priv = NULL;
if (info->has_path) {
cgrp = cgroup_get_from_path(info->path);
if (IS_ERR(cgrp)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment