Commit 439cd39e authored by Stefano Brivio's avatar Stefano Brivio Committed by Pablo Neira Ayuso

netfilter: ipset: list:set: Decrease refcount synchronously on deletion and replace

Commit 45040978 ("netfilter: ipset: Fix set:list type crash
when flush/dump set in parallel") postponed decreasing set
reference counters to the RCU callback.

An 'ipset del' command can terminate before the RCU grace period
is elapsed, and if sets are listed before then, the reference
counter shown in userspace will be wrong:

 # ipset create h hash:ip; ipset create l list:set; ipset add l
 # ipset del l h; ipset list h
 Name: h
 Type: hash:ip
 Revision: 4
 Header: family inet hashsize 1024 maxelem 65536
 Size in memory: 88
 References: 1
 Number of entries: 0
 Members:
 # sleep 1; ipset list h
 Name: h
 Type: hash:ip
 Revision: 4
 Header: family inet hashsize 1024 maxelem 65536
 Size in memory: 88
 References: 0
 Number of entries: 0
 Members:

Fix this by making the reference count update synchronous again.

As a result, when sets are listed, ip_set_name_byindex() might
now fetch a set whose reference count is already zero. Instead
of relying on the reference count to protect against concurrent
set renaming, grab ip_set_ref_lock as reader and copy the name,
while holding the same lock in ip_set_rename() as writer
instead.
Reported-by: default avatarLi Shuang <shuali@redhat.com>
Fixes: 45040978 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel")
Signed-off-by: default avatarStefano Brivio <sbrivio@redhat.com>
Signed-off-by: default avatarJozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: default avatarPablo Neira Ayuso <pablo@netfilter.org>
parent 4269fea7
...@@ -314,7 +314,7 @@ enum { ...@@ -314,7 +314,7 @@ enum {
extern ip_set_id_t ip_set_get_byname(struct net *net, extern ip_set_id_t ip_set_get_byname(struct net *net,
const char *name, struct ip_set **set); const char *name, struct ip_set **set);
extern void ip_set_put_byindex(struct net *net, ip_set_id_t index); extern void ip_set_put_byindex(struct net *net, ip_set_id_t index);
extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index); extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name);
extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index); extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index);
extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index); extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index);
......
...@@ -693,21 +693,20 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index) ...@@ -693,21 +693,20 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index)
EXPORT_SYMBOL_GPL(ip_set_put_byindex); EXPORT_SYMBOL_GPL(ip_set_put_byindex);
/* Get the name of a set behind a set index. /* Get the name of a set behind a set index.
* We assume the set is referenced, so it does exist and * Set itself is protected by RCU, but its name isn't: to protect against
* can't be destroyed. The set cannot be renamed due to * renaming, grab ip_set_ref_lock as reader (see ip_set_rename()) and copy the
* the referencing either. * name.
*
*/ */
const char * void
ip_set_name_byindex(struct net *net, ip_set_id_t index) ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
{ {
const struct ip_set *set = ip_set_rcu_get(net, index); struct ip_set *set = ip_set_rcu_get(net, index);
BUG_ON(!set); BUG_ON(!set);
BUG_ON(set->ref == 0);
/* Referenced, so it's safe */ read_lock_bh(&ip_set_ref_lock);
return set->name; strncpy(name, set->name, IPSET_MAXNAMELEN);
read_unlock_bh(&ip_set_ref_lock);
} }
EXPORT_SYMBOL_GPL(ip_set_name_byindex); EXPORT_SYMBOL_GPL(ip_set_name_byindex);
...@@ -1153,7 +1152,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl, ...@@ -1153,7 +1152,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
if (!set) if (!set)
return -ENOENT; return -ENOENT;
read_lock_bh(&ip_set_ref_lock); write_lock_bh(&ip_set_ref_lock);
if (set->ref != 0) { if (set->ref != 0) {
ret = -IPSET_ERR_REFERENCED; ret = -IPSET_ERR_REFERENCED;
goto out; goto out;
...@@ -1170,7 +1169,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl, ...@@ -1170,7 +1169,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
strncpy(set->name, name2, IPSET_MAXNAMELEN); strncpy(set->name, name2, IPSET_MAXNAMELEN);
out: out:
read_unlock_bh(&ip_set_ref_lock); write_unlock_bh(&ip_set_ref_lock);
return ret; return ret;
} }
......
...@@ -148,9 +148,7 @@ __list_set_del_rcu(struct rcu_head * rcu) ...@@ -148,9 +148,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
{ {
struct set_elem *e = container_of(rcu, struct set_elem, rcu); struct set_elem *e = container_of(rcu, struct set_elem, rcu);
struct ip_set *set = e->set; struct ip_set *set = e->set;
struct list_set *map = set->data;
ip_set_put_byindex(map->net, e->id);
ip_set_ext_destroy(set, e); ip_set_ext_destroy(set, e);
kfree(e); kfree(e);
} }
...@@ -158,15 +156,21 @@ __list_set_del_rcu(struct rcu_head * rcu) ...@@ -158,15 +156,21 @@ __list_set_del_rcu(struct rcu_head * rcu)
static inline void static inline void
list_set_del(struct ip_set *set, struct set_elem *e) list_set_del(struct ip_set *set, struct set_elem *e)
{ {
struct list_set *map = set->data;
set->elements--; set->elements--;
list_del_rcu(&e->list); list_del_rcu(&e->list);
ip_set_put_byindex(map->net, e->id);
call_rcu(&e->rcu, __list_set_del_rcu); call_rcu(&e->rcu, __list_set_del_rcu);
} }
static inline void static inline void
list_set_replace(struct set_elem *e, struct set_elem *old) list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
{ {
struct list_set *map = set->data;
list_replace_rcu(&old->list, &e->list); list_replace_rcu(&old->list, &e->list);
ip_set_put_byindex(map->net, old->id);
call_rcu(&old->rcu, __list_set_del_rcu); call_rcu(&old->rcu, __list_set_del_rcu);
} }
...@@ -298,7 +302,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, ...@@ -298,7 +302,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
INIT_LIST_HEAD(&e->list); INIT_LIST_HEAD(&e->list);
list_set_init_extensions(set, ext, e); list_set_init_extensions(set, ext, e);
if (n) if (n)
list_set_replace(e, n); list_set_replace(set, e, n);
else if (next) else if (next)
list_add_tail_rcu(&e->list, &next->list); list_add_tail_rcu(&e->list, &next->list);
else if (prev) else if (prev)
...@@ -486,6 +490,7 @@ list_set_list(const struct ip_set *set, ...@@ -486,6 +490,7 @@ list_set_list(const struct ip_set *set,
const struct list_set *map = set->data; const struct list_set *map = set->data;
struct nlattr *atd, *nested; struct nlattr *atd, *nested;
u32 i = 0, first = cb->args[IPSET_CB_ARG0]; u32 i = 0, first = cb->args[IPSET_CB_ARG0];
char name[IPSET_MAXNAMELEN];
struct set_elem *e; struct set_elem *e;
int ret = 0; int ret = 0;
...@@ -504,8 +509,8 @@ list_set_list(const struct ip_set *set, ...@@ -504,8 +509,8 @@ list_set_list(const struct ip_set *set,
nested = ipset_nest_start(skb, IPSET_ATTR_DATA); nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
if (!nested) if (!nested)
goto nla_put_failure; goto nla_put_failure;
if (nla_put_string(skb, IPSET_ATTR_NAME, ip_set_name_byindex(map->net, e->id, name);
ip_set_name_byindex(map->net, e->id))) if (nla_put_string(skb, IPSET_ATTR_NAME, name))
goto nla_put_failure; goto nla_put_failure;
if (ip_set_put_extensions(skb, set, e, true)) if (ip_set_put_extensions(skb, set, e, true))
goto nla_put_failure; goto nla_put_failure;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment