Commit d57609fa authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains Netfilter fixes for net:

1) Use signed integer in ipv6_skip_exthdr() called from nf_confirm().
   Reported by static analysis tooling, patch from Florian Westphal.

2) Missing set type checks in nf_tables: Validate that set declaration
   matches the an existing set type, otherwise bail out with EEXIST.
   Currently, nf_tables silently accepts the re-declaration with a
   different type but it bails out later with EINVAL when the user adds
   entries to the set. This fix is relatively large because it requires
   two preparation patches that are included in this batch.

3) Do not ignore updates of timeout and gc_interval parameters in
   existing sets.

4) Fix a hang when 0/0 subnets is added to a hash:net,port,net type of
   ipset. Except hash:net,port,net and hash:net,iface, the set types don't
   support 0/0 and the auxiliary functions rely on this fact. So 0/0 needs
   a special handling in hash:net,port,net which was missing (hash:net,iface
   was not affected by this bug), from Jozsef Kadlecsik.

5) When adding/deleting large number of elements in one step in ipset,
   it can take a reasonable amount of time and can result in soft lockup
   errors. This patch is a complete rework of the previous version in order
   to use a smaller internal batch limit and at the same time removing
   the external hard limit to add arbitrary number of elements in one step.
   Also from Jozsef Kadlecsik.

Except for patch #1, which fixes a bug introduced in the previous net-next
development cycle, anything else has been broken for several releases.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 588ab2dc 5e29dc36
......@@ -197,7 +197,7 @@ struct ip_set_region {
};
/* Max range where every element is added/deleted in one step */
#define IPSET_MAX_RANGE (1<<20)
#define IPSET_MAX_RANGE (1<<14)
/* The max revision number supported by any set type + 1 */
#define IPSET_REVISION_MAX 9
......
......@@ -312,17 +312,29 @@ struct nft_set_iter {
/**
* struct nft_set_desc - description of set elements
*
* @ktype: key type
* @klen: key length
* @dtype: data type
* @dlen: data length
* @objtype: object type
* @flags: flags
* @size: number of set elements
* @policy: set policy
* @gc_int: garbage collector interval
* @field_len: length of each field in concatenation, bytes
* @field_count: number of concatenated fields in element
* @expr: set must support for expressions
*/
struct nft_set_desc {
u32 ktype;
unsigned int klen;
u32 dtype;
unsigned int dlen;
u32 objtype;
unsigned int size;
u32 policy;
u32 gc_int;
u64 timeout;
u8 field_len[NFT_REG32_COUNT];
u8 field_count;
bool expr;
......@@ -585,7 +597,9 @@ void *nft_set_catchall_gc(const struct nft_set *set);
static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
{
return set->gc_int ? msecs_to_jiffies(set->gc_int) : HZ;
u32 gc_int = READ_ONCE(set->gc_int);
return gc_int ? msecs_to_jiffies(gc_int) : HZ;
}
/**
......@@ -1558,6 +1572,9 @@ struct nft_trans_rule {
struct nft_trans_set {
struct nft_set *set;
u32 set_id;
u32 gc_int;
u64 timeout;
bool update;
bool bound;
};
......@@ -1567,6 +1584,12 @@ struct nft_trans_set {
(((struct nft_trans_set *)trans->data)->set_id)
#define nft_trans_set_bound(trans) \
(((struct nft_trans_set *)trans->data)->bound)
#define nft_trans_set_update(trans) \
(((struct nft_trans_set *)trans->data)->update)
#define nft_trans_set_timeout(trans) \
(((struct nft_trans_set *)trans->data)->timeout)
#define nft_trans_set_gc_int(trans) \
(((struct nft_trans_set *)trans->data)->gc_int)
struct nft_trans_chain {
bool update;
......
......@@ -1698,9 +1698,10 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb,
ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
ip_set_unlock(set);
retried = true;
} while (ret == -EAGAIN &&
} while (ret == -ERANGE ||
(ret == -EAGAIN &&
set->variant->resize &&
(ret = set->variant->resize(set, retried)) == 0);
(ret = set->variant->resize(set, retried)) == 0));
if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
return 0;
......
......@@ -100,11 +100,11 @@ static int
hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
const struct hash_ip4 *h = set->data;
struct hash_ip4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ip4_elem e = { 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, hosts;
u32 ip = 0, ip_to = 0, hosts, i = 0;
int ret = 0;
if (tb[IPSET_ATTR_LINENO])
......@@ -149,14 +149,14 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
/* 64bit division is not allowed on 32bit */
if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE)
return -ERANGE;
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to;) {
for (; ip <= ip_to; i++) {
e.ip = htonl(ip);
if (i > IPSET_MAX_RANGE) {
hash_ip4_data_next(&h->next, &e);
return -ERANGE;
}
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
......
......@@ -97,11 +97,11 @@ static int
hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
const struct hash_ipmark4 *h = set->data;
struct hash_ipmark4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipmark4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip, ip_to = 0;
u32 ip, ip_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
......@@ -148,13 +148,14 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
ip_set_mask_from_to(ip, ip_to, cidr);
}
if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE)
return -ERANGE;
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
for (; ip <= ip_to; ip++, i++) {
e.ip = htonl(ip);
if (i > IPSET_MAX_RANGE) {
hash_ipmark4_data_next(&h->next, &e);
return -ERANGE;
}
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
......
......@@ -112,11 +112,11 @@ static int
hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
const struct hash_ipport4 *h = set->data;
struct hash_ipport4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipport4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip, ip_to = 0, p = 0, port, port_to;
u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
bool with_ports = false;
int ret;
......@@ -184,17 +184,18 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
return -ERANGE;
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
for (; p <= port_to; p++) {
for (; p <= port_to; p++, i++) {
e.ip = htonl(ip);
e.port = htons(p);
if (i > IPSET_MAX_RANGE) {
hash_ipport4_data_next(&h->next, &e);
return -ERANGE;
}
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
......
......@@ -108,11 +108,11 @@ static int
hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
const struct hash_ipportip4 *h = set->data;
struct hash_ipportip4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportip4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip, ip_to = 0, p = 0, port, port_to;
u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
bool with_ports = false;
int ret;
......@@ -180,17 +180,18 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
return -ERANGE;
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
for (; p <= port_to; p++) {
for (; p <= port_to; p++, i++) {
e.ip = htonl(ip);
e.port = htons(p);
if (i > IPSET_MAX_RANGE) {
hash_ipportip4_data_next(&h->next, &e);
return -ERANGE;
}
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
......
......@@ -160,12 +160,12 @@ static int
hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
const struct hash_ipportnet4 *h = set->data;
struct hash_ipportnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, p = 0, port, port_to;
u32 ip2_from = 0, ip2_to = 0, ip2;
u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
bool with_ports = false;
u8 cidr;
int ret;
......@@ -253,9 +253,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
return -ERANGE;
ip2_to = ip2_from;
if (tb[IPSET_ATTR_IP2_TO]) {
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
......@@ -282,9 +279,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
for (; p <= port_to; p++) {
e.port = htons(p);
do {
i++;
e.ip2 = htonl(ip2);
ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr);
e.cidr = cidr - 1;
if (i > IPSET_MAX_RANGE) {
hash_ipportnet4_data_next(&h->next,
&e);
return -ERANGE;
}
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
......
......@@ -136,11 +136,11 @@ static int
hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
const struct hash_net4 *h = set->data;
struct hash_net4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net4_elem e = { .cidr = HOST_MASK };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, ipn, n = 0;
u32 ip = 0, ip_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
......@@ -188,19 +188,16 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ip + UINT_MAX == ip_to)
return -IPSET_ERR_HASH_RANGE;
}
ipn = ip;
do {
ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
n++;
} while (ipn++ < ip_to);
if (n > IPSET_MAX_RANGE)
return -ERANGE;
if (retried)
ip = ntohl(h->next.ip);
do {
i++;
e.ip = htonl(ip);
if (i > IPSET_MAX_RANGE) {
hash_net4_data_next(&h->next, &e);
return -ERANGE;
}
ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
......
......@@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, ipn, n = 0;
u32 ip = 0, ip_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
......@@ -256,19 +256,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip, ip_to, e.cidr);
}
ipn = ip;
do {
ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
n++;
} while (ipn++ < ip_to);
if (n > IPSET_MAX_RANGE)
return -ERANGE;
if (retried)
ip = ntohl(h->next.ip);
do {
i++;
e.ip = htonl(ip);
if (i > IPSET_MAX_RANGE) {
hash_netiface4_data_next(&h->next, &e);
return -ERANGE;
}
ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
ret = adtfn(set, &e, &ext, &ext, flags);
......
......@@ -166,13 +166,12 @@ static int
hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
const struct hash_netnet4 *h = set->data;
struct hash_netnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0;
u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn;
u64 n = 0, m = 0;
u32 ip2 = 0, ip2_from = 0, ip2_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
......@@ -248,19 +247,6 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
ipn = ip;
do {
ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
n++;
} while (ipn++ < ip_to);
ipn = ip2_from;
do {
ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
m++;
} while (ipn++ < ip2_to);
if (n*m > IPSET_MAX_RANGE)
return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip[0]);
......@@ -273,7 +259,12 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
e.ip[0] = htonl(ip);
ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
do {
i++;
e.ip[1] = htonl(ip2);
if (i > IPSET_MAX_RANGE) {
hash_netnet4_data_next(&h->next, &e);
return -ERANGE;
}
ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
......
......@@ -154,12 +154,11 @@ static int
hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
const struct hash_netport4 *h = set->data;
struct hash_netport4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn;
u64 n = 0;
u32 port, port_to, p = 0, ip = 0, ip_to = 0, i = 0;
bool with_ports = false;
u8 cidr;
int ret;
......@@ -236,14 +235,6 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
}
ipn = ip;
do {
ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr);
n++;
} while (ipn++ < ip_to);
if (n*(port_to - port + 1) > IPSET_MAX_RANGE)
return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip);
......@@ -255,8 +246,12 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
e.ip = htonl(ip);
ip = ip_set_range_to_cidr(ip, ip_to, &cidr);
e.cidr = cidr - 1;
for (; p <= port_to; p++) {
for (; p <= port_to; p++, i++) {
e.port = htons(p);
if (i > IPSET_MAX_RANGE) {
hash_netport4_data_next(&h->next, &e);
return -ERANGE;
}
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
......
......@@ -173,17 +173,26 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
static u32
hash_netportnet4_range_to_cidr(u32 from, u32 to, u8 *cidr)
{
if (from == 0 && to == UINT_MAX) {
*cidr = 0;
return to;
}
return ip_set_range_to_cidr(from, to, cidr);
}
static int
hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
const struct hash_netportnet4 *h = set->data;
struct hash_netportnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netportnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, p = 0, port, port_to;
u32 ip2_from = 0, ip2_to = 0, ip2, ipn;
u64 n = 0, m = 0;
u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
bool with_ports = false;
int ret;
......@@ -285,19 +294,6 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
ipn = ip;
do {
ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
n++;
} while (ipn++ < ip_to);
ipn = ip2_from;
do {
ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
m++;
} while (ipn++ < ip2_to);
if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE)
return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip[0]);
......@@ -310,13 +306,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
do {
e.ip[0] = htonl(ip);
ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
ip = hash_netportnet4_range_to_cidr(ip, ip_to, &e.cidr[0]);
for (; p <= port_to; p++) {
e.port = htons(p);
do {
i++;
e.ip[1] = htonl(ip2);
ip2 = ip_set_range_to_cidr(ip2, ip2_to,
&e.cidr[1]);
if (i > IPSET_MAX_RANGE) {
hash_netportnet4_data_next(&h->next,
&e);
return -ERANGE;
}
ip2 = hash_netportnet4_range_to_cidr(ip2,
ip2_to, &e.cidr[1]);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
......
......@@ -141,6 +141,7 @@ unsigned int nf_confirm(void *priv,
struct nf_conn *ct;
bool seqadj_needed;
__be16 frag_off;
int start;
u8 pnum;
ct = nf_ct_get(skb, &ctinfo);
......@@ -163,9 +164,11 @@ unsigned int nf_confirm(void *priv,
break;
case NFPROTO_IPV6:
pnum = ipv6_hdr(skb)->nexthdr;
protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off);
if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
start = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off);
if (start < 0 || (frag_off & htons(~0x7)) != 0)
return nf_conntrack_confirm(skb);
protoff = start;
break;
default:
return nf_conntrack_confirm(skb);
......
......@@ -465,8 +465,9 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx)
return 0;
}
static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
struct nft_set *set)
static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
struct nft_set *set,
const struct nft_set_desc *desc)
{
struct nft_trans *trans;
......@@ -474,17 +475,28 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
if (trans == NULL)
return -ENOMEM;
if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) {
nft_trans_set_id(trans) =
ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
nft_activate_next(ctx->net, set);
}
nft_trans_set(trans) = set;
if (desc) {
nft_trans_set_update(trans) = true;
nft_trans_set_gc_int(trans) = desc->gc_int;
nft_trans_set_timeout(trans) = desc->timeout;
}
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
}
static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
struct nft_set *set)
{
return __nft_trans_set_add(ctx, msg_type, set, NULL);
}
static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
{
int err;
......@@ -3780,8 +3792,7 @@ static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags)
static const struct nft_set_ops *
nft_select_set_ops(const struct nft_ctx *ctx,
const struct nlattr * const nla[],
const struct nft_set_desc *desc,
enum nft_set_policies policy)
const struct nft_set_desc *desc)
{
struct nftables_pernet *nft_net = nft_pernet(ctx->net);
const struct nft_set_ops *ops, *bops;
......@@ -3810,7 +3821,7 @@ nft_select_set_ops(const struct nft_ctx *ctx,
if (!ops->estimate(desc, flags, &est))
continue;
switch (policy) {
switch (desc->policy) {
case NFT_SET_POL_PERFORMANCE:
if (est.lookup < best.lookup)
break;
......@@ -4045,8 +4056,10 @@ static int nf_tables_fill_set_concat(struct sk_buff *skb,
static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
const struct nft_set *set, u16 event, u16 flags)
{
struct nlmsghdr *nlh;
u64 timeout = READ_ONCE(set->timeout);
u32 gc_int = READ_ONCE(set->gc_int);
u32 portid = ctx->portid;
struct nlmsghdr *nlh;
struct nlattr *nest;
u32 seq = ctx->seq;
int i;
......@@ -4082,13 +4095,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
nla_put_be32(skb, NFTA_SET_OBJ_TYPE, htonl(set->objtype)))
goto nla_put_failure;
if (set->timeout &&
if (timeout &&
nla_put_be64(skb, NFTA_SET_TIMEOUT,
nf_jiffies64_to_msecs(set->timeout),
nf_jiffies64_to_msecs(timeout),
NFTA_SET_PAD))
goto nla_put_failure;
if (set->gc_int &&
nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int)))
if (gc_int &&
nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(gc_int)))
goto nla_put_failure;
if (set->policy != NFT_SET_POL_PERFORMANCE) {
......@@ -4389,15 +4402,94 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc,
return err;
}
static int nft_set_expr_alloc(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr * const *nla,
struct nft_expr **exprs, int *num_exprs,
u32 flags)
{
struct nft_expr *expr;
int err, i;
if (nla[NFTA_SET_EXPR]) {
expr = nft_set_elem_expr_alloc(ctx, set, nla[NFTA_SET_EXPR]);
if (IS_ERR(expr)) {
err = PTR_ERR(expr);
goto err_set_expr_alloc;
}
exprs[0] = expr;
(*num_exprs)++;
} else if (nla[NFTA_SET_EXPRESSIONS]) {
struct nlattr *tmp;
int left;
if (!(flags & NFT_SET_EXPR)) {
err = -EINVAL;
goto err_set_expr_alloc;
}
i = 0;
nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
if (i == NFT_SET_EXPR_MAX) {
err = -E2BIG;
goto err_set_expr_alloc;
}
if (nla_type(tmp) != NFTA_LIST_ELEM) {
err = -EINVAL;
goto err_set_expr_alloc;
}
expr = nft_set_elem_expr_alloc(ctx, set, tmp);
if (IS_ERR(expr)) {
err = PTR_ERR(expr);
goto err_set_expr_alloc;
}
exprs[i++] = expr;
(*num_exprs)++;
}
}
return 0;
err_set_expr_alloc:
for (i = 0; i < *num_exprs; i++)
nft_expr_destroy(ctx, exprs[i]);
return err;
}
static bool nft_set_is_same(const struct nft_set *set,
const struct nft_set_desc *desc,
struct nft_expr *exprs[], u32 num_exprs, u32 flags)
{
int i;
if (set->ktype != desc->ktype ||
set->dtype != desc->dtype ||
set->flags != flags ||
set->klen != desc->klen ||
set->dlen != desc->dlen ||
set->field_count != desc->field_count ||
set->num_exprs != num_exprs)
return false;
for (i = 0; i < desc->field_count; i++) {
if (set->field_len[i] != desc->field_len[i])
return false;
}
for (i = 0; i < num_exprs; i++) {
if (set->exprs[i]->ops != exprs[i]->ops)
return false;
}
return true;
}
static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
u32 ktype, dtype, flags, policy, gc_int, objtype;
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
u8 family = info->nfmsg->nfgen_family;
const struct nft_set_ops *ops;
struct nft_expr *expr = NULL;
struct net *net = info->net;
struct nft_set_desc desc;
struct nft_table *table;
......@@ -4405,10 +4497,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
struct nft_set *set;
struct nft_ctx ctx;
size_t alloc_size;
u64 timeout;
int num_exprs = 0;
char *name;
int err, i;
u16 udlen;
u32 flags;
u64 size;
if (nla[NFTA_SET_TABLE] == NULL ||
......@@ -4419,10 +4512,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
memset(&desc, 0, sizeof(desc));
ktype = NFT_DATA_VALUE;
desc.ktype = NFT_DATA_VALUE;
if (nla[NFTA_SET_KEY_TYPE] != NULL) {
ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
desc.ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
if ((desc.ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
return -EINVAL;
}
......@@ -4447,17 +4540,17 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
return -EOPNOTSUPP;
}
dtype = 0;
desc.dtype = 0;
if (nla[NFTA_SET_DATA_TYPE] != NULL) {
if (!(flags & NFT_SET_MAP))
return -EINVAL;
dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
dtype != NFT_DATA_VERDICT)
desc.dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
if ((desc.dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
desc.dtype != NFT_DATA_VERDICT)
return -EINVAL;
if (dtype != NFT_DATA_VERDICT) {
if (desc.dtype != NFT_DATA_VERDICT) {
if (nla[NFTA_SET_DATA_LEN] == NULL)
return -EINVAL;
desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
......@@ -4472,34 +4565,34 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (!(flags & NFT_SET_OBJECT))
return -EINVAL;
objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
if (objtype == NFT_OBJECT_UNSPEC ||
objtype > NFT_OBJECT_MAX)
desc.objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
if (desc.objtype == NFT_OBJECT_UNSPEC ||
desc.objtype > NFT_OBJECT_MAX)
return -EOPNOTSUPP;
} else if (flags & NFT_SET_OBJECT)
return -EINVAL;
else
objtype = NFT_OBJECT_UNSPEC;
desc.objtype = NFT_OBJECT_UNSPEC;
timeout = 0;
desc.timeout = 0;
if (nla[NFTA_SET_TIMEOUT] != NULL) {
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout);
err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout);
if (err)
return err;
}
gc_int = 0;
desc.gc_int = 0;
if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
}
policy = NFT_SET_POL_PERFORMANCE;
desc.policy = NFT_SET_POL_PERFORMANCE;
if (nla[NFTA_SET_POLICY] != NULL)
policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
desc.policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
if (nla[NFTA_SET_DESC] != NULL) {
err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]);
......@@ -4531,6 +4624,8 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
return PTR_ERR(set);
}
} else {
struct nft_expr *exprs[NFT_SET_EXPR_MAX] = {};
if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
return -EEXIST;
......@@ -4538,13 +4633,29 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
return 0;
err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags);
if (err < 0)
return err;
err = 0;
if (!nft_set_is_same(set, &desc, exprs, num_exprs, flags)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
err = -EEXIST;
}
for (i = 0; i < num_exprs; i++)
nft_expr_destroy(&ctx, exprs[i]);
if (err < 0)
return err;
return __nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set, &desc);
}
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
return -ENOENT;
ops = nft_select_set_ops(&ctx, nla, &desc, policy);
ops = nft_select_set_ops(&ctx, nla, &desc);
if (IS_ERR(ops))
return PTR_ERR(ops);
......@@ -4584,18 +4695,18 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
set->table = table;
write_pnet(&set->net, net);
set->ops = ops;
set->ktype = ktype;
set->ktype = desc.ktype;
set->klen = desc.klen;
set->dtype = dtype;
set->objtype = objtype;
set->dtype = desc.dtype;
set->objtype = desc.objtype;
set->dlen = desc.dlen;
set->flags = flags;
set->size = desc.size;
set->policy = policy;
set->policy = desc.policy;
set->udlen = udlen;
set->udata = udata;
set->timeout = timeout;
set->gc_int = gc_int;
set->timeout = desc.timeout;
set->gc_int = desc.gc_int;
set->field_count = desc.field_count;
for (i = 0; i < desc.field_count; i++)
......@@ -4605,43 +4716,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (err < 0)
goto err_set_init;
if (nla[NFTA_SET_EXPR]) {
expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]);
if (IS_ERR(expr)) {
err = PTR_ERR(expr);
goto err_set_expr_alloc;
}
set->exprs[0] = expr;
set->num_exprs++;
} else if (nla[NFTA_SET_EXPRESSIONS]) {
struct nft_expr *expr;
struct nlattr *tmp;
int left;
if (!(flags & NFT_SET_EXPR)) {
err = -EINVAL;
goto err_set_expr_alloc;
}
i = 0;
nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
if (i == NFT_SET_EXPR_MAX) {
err = -E2BIG;
goto err_set_expr_alloc;
}
if (nla_type(tmp) != NFTA_LIST_ELEM) {
err = -EINVAL;
goto err_set_expr_alloc;
}
expr = nft_set_elem_expr_alloc(&ctx, set, tmp);
if (IS_ERR(expr)) {
err = PTR_ERR(expr);
goto err_set_expr_alloc;
}
set->exprs[i++] = expr;
set->num_exprs++;
}
}
err = nft_set_expr_alloc(&ctx, set, nla, set->exprs, &num_exprs, flags);
if (err < 0)
goto err_set_destroy;
set->num_exprs = num_exprs;
set->handle = nf_tables_alloc_handle(table);
err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
......@@ -4655,7 +4734,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
err_set_expr_alloc:
for (i = 0; i < set->num_exprs; i++)
nft_expr_destroy(&ctx, set->exprs[i]);
err_set_destroy:
ops->destroy(set);
err_set_init:
kfree(set->name);
......@@ -6008,7 +6087,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return err;
} else if (set->flags & NFT_SET_TIMEOUT &&
!(flags & NFT_SET_ELEM_INTERVAL_END)) {
timeout = set->timeout;
timeout = READ_ONCE(set->timeout);
}
expiration = 0;
......@@ -6109,7 +6188,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
goto err_parse_key_end;
if (timeout != set->timeout) {
if (timeout != READ_ONCE(set->timeout)) {
err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
if (err < 0)
goto err_parse_key_end;
......@@ -9031,6 +9110,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_flow_rule_destroy(nft_trans_flow_rule(trans));
break;
case NFT_MSG_NEWSET:
if (nft_trans_set_update(trans)) {
struct nft_set *set = nft_trans_set(trans);
WRITE_ONCE(set->timeout, nft_trans_set_timeout(trans));
WRITE_ONCE(set->gc_int, nft_trans_set_gc_int(trans));
} else {
nft_clear(net, nft_trans_set(trans));
/* This avoids hitting -EBUSY when deleting the table
* from the transaction.
......@@ -9038,7 +9123,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
if (nft_set_is_anonymous(nft_trans_set(trans)) &&
!list_empty(&nft_trans_set(trans)->bindings))
trans->ctx.table->use--;
}
nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
NFT_MSG_NEWSET, GFP_KERNEL);
nft_trans_destroy(trans);
......@@ -9260,6 +9345,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSET:
if (nft_trans_set_update(trans)) {
nft_trans_destroy(trans);
break;
}
trans->ctx.table->use--;
if (nft_trans_set_bound(trans)) {
nft_trans_destroy(trans);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment