Commit e86e180b authored by David S. Miller's avatar David S. Miller

Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

The following patchset contains Netfilter/IPVS updates for net-next,
most relevantly they are:

* cleanup to remove double semicolon from stephen hemminger.

* calm down sparse warning in xt_ipcomp, from Fan Du.

* nf_ct_labels support for nf_tables, from Florian Westphal.

* new macros to simplify rcu dereferences in the scope of nfnetlink
  and nf_tables, from Patrick McHardy.

* Accept queue and drop (including reason for drop) to verdict
  parsing in nf_tables, also from Patrick.

* Remove unused random seed initialization in nfnetlink_log, from
  Florian Westphal.

* Allow to attach user-specific information to nf_tables rules, useful
  to attach user comments to rule, from me.

* Return errors in ipset according to the manpage documentation, from
  Jozsef Kadlecsik.

* Fix coccinelle warnings related to incorrect bool type usage for ipset,
  from Fengguang Wu.

* Add hash:ip,mark set type to ipset, from Vytas Dauksa.

* Fix message for each spotted by ipset for each netns that is created,
  from Ilia Mirkin.

* Add forceadd option to ipset, which evicts a random entry from the set
  if it becomes full, from Josh Hunt.

* Minor IPVS cleanups and fixes from Andi Kleen and Tingwei Liu.

* Improve conntrack scalability by removing a central spinlock, original
  work from Eric Dumazet. Jesper Dangaard Brouer took them over to address
  remaining issues. Several patches to prepare this change come in first
  place.

* Rework nft_hash to resolve bugs (leaking chain, missing rcu synchronization
  on element removal, etc. from Patrick McHardy.

* Restore context in the rule deletion path, as we now release rule objects
  synchronously, from Patrick McHardy. This gets back event notification for
  anonymous sets.

* Fix NAT family validation in nft_nat, also from Patrick.

* Improve scalability of xt_connlimit by using an array of spinlocks and
  by introducing a rb-tree of hashtables for faster lookup of accounted
  objects per network. This patch was preceded by several patches and
  refactorizations to accomodate this change including the use of kmem_cache,
  from Florian Westphal.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e7ef085d 7d084877
...@@ -39,11 +39,13 @@ enum ip_set_feature { ...@@ -39,11 +39,13 @@ enum ip_set_feature {
IPSET_TYPE_NAME = (1 << IPSET_TYPE_NAME_FLAG), IPSET_TYPE_NAME = (1 << IPSET_TYPE_NAME_FLAG),
IPSET_TYPE_IFACE_FLAG = 5, IPSET_TYPE_IFACE_FLAG = 5,
IPSET_TYPE_IFACE = (1 << IPSET_TYPE_IFACE_FLAG), IPSET_TYPE_IFACE = (1 << IPSET_TYPE_IFACE_FLAG),
IPSET_TYPE_NOMATCH_FLAG = 6, IPSET_TYPE_MARK_FLAG = 6,
IPSET_TYPE_MARK = (1 << IPSET_TYPE_MARK_FLAG),
IPSET_TYPE_NOMATCH_FLAG = 7,
IPSET_TYPE_NOMATCH = (1 << IPSET_TYPE_NOMATCH_FLAG), IPSET_TYPE_NOMATCH = (1 << IPSET_TYPE_NOMATCH_FLAG),
/* Strictly speaking not a feature, but a flag for dumping: /* Strictly speaking not a feature, but a flag for dumping:
* this settype must be dumped last */ * this settype must be dumped last */
IPSET_DUMP_LAST_FLAG = 7, IPSET_DUMP_LAST_FLAG = 8,
IPSET_DUMP_LAST = (1 << IPSET_DUMP_LAST_FLAG), IPSET_DUMP_LAST = (1 << IPSET_DUMP_LAST_FLAG),
}; };
...@@ -63,6 +65,7 @@ enum ip_set_extension { ...@@ -63,6 +65,7 @@ enum ip_set_extension {
#define SET_WITH_TIMEOUT(s) ((s)->extensions & IPSET_EXT_TIMEOUT) #define SET_WITH_TIMEOUT(s) ((s)->extensions & IPSET_EXT_TIMEOUT)
#define SET_WITH_COUNTER(s) ((s)->extensions & IPSET_EXT_COUNTER) #define SET_WITH_COUNTER(s) ((s)->extensions & IPSET_EXT_COUNTER)
#define SET_WITH_COMMENT(s) ((s)->extensions & IPSET_EXT_COMMENT) #define SET_WITH_COMMENT(s) ((s)->extensions & IPSET_EXT_COMMENT)
#define SET_WITH_FORCEADD(s) ((s)->flags & IPSET_CREATE_FLAG_FORCEADD)
/* Extension id, in size order */ /* Extension id, in size order */
enum ip_set_ext_id { enum ip_set_ext_id {
...@@ -171,8 +174,6 @@ struct ip_set_type { ...@@ -171,8 +174,6 @@ struct ip_set_type {
char name[IPSET_MAXNAMELEN]; char name[IPSET_MAXNAMELEN];
/* Protocol version */ /* Protocol version */
u8 protocol; u8 protocol;
/* Set features to control swapping */
u8 features;
/* Set type dimension */ /* Set type dimension */
u8 dimension; u8 dimension;
/* /*
...@@ -182,6 +183,8 @@ struct ip_set_type { ...@@ -182,6 +183,8 @@ struct ip_set_type {
u8 family; u8 family;
/* Type revisions */ /* Type revisions */
u8 revision_min, revision_max; u8 revision_min, revision_max;
/* Set features to control swapping */
u16 features;
/* Create set */ /* Create set */
int (*create)(struct net *net, struct ip_set *set, int (*create)(struct net *net, struct ip_set *set,
...@@ -217,6 +220,8 @@ struct ip_set { ...@@ -217,6 +220,8 @@ struct ip_set {
u8 revision; u8 revision;
/* Extensions */ /* Extensions */
u8 extensions; u8 extensions;
/* Create flags */
u8 flags;
/* Default timeout value, if enabled */ /* Default timeout value, if enabled */
u32 timeout; u32 timeout;
/* Element data size */ /* Element data size */
...@@ -251,6 +256,8 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set) ...@@ -251,6 +256,8 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
cadt_flags |= IPSET_FLAG_WITH_COUNTERS; cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
if (SET_WITH_COMMENT(set)) if (SET_WITH_COMMENT(set))
cadt_flags |= IPSET_FLAG_WITH_COMMENT; cadt_flags |= IPSET_FLAG_WITH_COMMENT;
if (SET_WITH_FORCEADD(set))
cadt_flags |= IPSET_FLAG_WITH_FORCEADD;
if (!cadt_flags) if (!cadt_flags)
return 0; return 0;
......
...@@ -44,6 +44,27 @@ int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, ...@@ -44,6 +44,27 @@ int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid,
void nfnl_lock(__u8 subsys_id); void nfnl_lock(__u8 subsys_id);
void nfnl_unlock(__u8 subsys_id); void nfnl_unlock(__u8 subsys_id);
#ifdef CONFIG_PROVE_LOCKING
int lockdep_nfnl_is_held(__u8 subsys_id);
#else
static inline int lockdep_nfnl_is_held(__u8 subsys_id)
{
return 1;
}
#endif /* CONFIG_PROVE_LOCKING */
/*
* nfnl_dereference - fetch RCU pointer when updates are prevented by subsys mutex
*
* @p: The pointer to read, prior to dereferencing
* @ss: The nfnetlink subsystem ID
*
* Return the value of the specified RCU-protected pointer, but omit
* both the smp_read_barrier_depends() and the ACCESS_ONCE(), because
* caller holds the NFNL subsystem mutex.
*/
#define nfnl_dereference(p, ss) \
rcu_dereference_protected(p, lockdep_nfnl_is_held(ss))
#define MODULE_ALIAS_NFNL_SUBSYS(subsys) \ #define MODULE_ALIAS_NFNL_SUBSYS(subsys) \
MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys)) MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys))
......
...@@ -73,10 +73,17 @@ struct nf_conn_help { ...@@ -73,10 +73,17 @@ struct nf_conn_help {
struct nf_conn { struct nf_conn {
/* Usage count in here is 1 for hash table/destruct timer, 1 per skb, /* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
plus 1 for any connection(s) we are `master' for */ * plus 1 for any connection(s) we are `master' for
*
* Hint, SKB address this struct and refcnt via skb->nfct and
* helpers nf_conntrack_get() and nf_conntrack_put().
* Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt,
* beware nf_ct_get() is different and don't inc refcnt.
*/
struct nf_conntrack ct_general; struct nf_conntrack ct_general;
spinlock_t lock; spinlock_t lock;
u16 cpu;
/* XXX should I move this to the tail ? - Y.K */ /* XXX should I move this to the tail ? - Y.K */
/* These are my tuples; original and reply */ /* These are my tuples; original and reply */
......
...@@ -77,6 +77,13 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, ...@@ -77,6 +77,13 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *proto); const struct nf_conntrack_l4proto *proto);
extern spinlock_t nf_conntrack_lock ; #ifdef CONFIG_LOCKDEP
# define CONNTRACK_LOCKS 8
#else
# define CONNTRACK_LOCKS 1024
#endif
extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
extern spinlock_t nf_conntrack_expect_lock;
#endif /* _NF_CONNTRACK_CORE_H */ #endif /* _NF_CONNTRACK_CORE_H */
...@@ -7,6 +7,8 @@ ...@@ -7,6 +7,8 @@
#include <uapi/linux/netfilter/xt_connlabel.h> #include <uapi/linux/netfilter/xt_connlabel.h>
#define NF_CT_LABELS_MAX_SIZE ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE)
struct nf_conn_labels { struct nf_conn_labels {
u8 words; u8 words;
unsigned long bits[]; unsigned long bits[];
...@@ -29,7 +31,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) ...@@ -29,7 +31,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct)
u8 words; u8 words;
words = ACCESS_ONCE(net->ct.label_words); words = ACCESS_ONCE(net->ct.label_words);
if (words == 0 || WARN_ON_ONCE(words > 8)) if (words == 0)
return NULL; return NULL;
cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS,
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <linux/list.h> #include <linux/list.h>
#include <linux/netfilter.h> #include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/x_tables.h> #include <linux/netfilter/x_tables.h>
#include <linux/netfilter/nf_tables.h> #include <linux/netfilter/nf_tables.h>
#include <net/netlink.h> #include <net/netlink.h>
...@@ -288,7 +289,8 @@ struct nft_expr_ops { ...@@ -288,7 +289,8 @@ struct nft_expr_ops {
int (*init)(const struct nft_ctx *ctx, int (*init)(const struct nft_ctx *ctx,
const struct nft_expr *expr, const struct nft_expr *expr,
const struct nlattr * const tb[]); const struct nlattr * const tb[]);
void (*destroy)(const struct nft_expr *expr); void (*destroy)(const struct nft_ctx *ctx,
const struct nft_expr *expr);
int (*dump)(struct sk_buff *skb, int (*dump)(struct sk_buff *skb,
const struct nft_expr *expr); const struct nft_expr *expr);
int (*validate)(const struct nft_ctx *ctx, int (*validate)(const struct nft_ctx *ctx,
...@@ -325,13 +327,15 @@ static inline void *nft_expr_priv(const struct nft_expr *expr) ...@@ -325,13 +327,15 @@ static inline void *nft_expr_priv(const struct nft_expr *expr)
* @handle: rule handle * @handle: rule handle
* @genmask: generation mask * @genmask: generation mask
* @dlen: length of expression data * @dlen: length of expression data
* @ulen: length of user data (used for comments)
* @data: expression data * @data: expression data
*/ */
struct nft_rule { struct nft_rule {
struct list_head list; struct list_head list;
u64 handle:46, u64 handle:42,
genmask:2, genmask:2,
dlen:16; dlen:12,
ulen:8;
unsigned char data[] unsigned char data[]
__attribute__((aligned(__alignof__(struct nft_expr)))); __attribute__((aligned(__alignof__(struct nft_expr))));
}; };
...@@ -340,19 +344,13 @@ struct nft_rule { ...@@ -340,19 +344,13 @@ struct nft_rule {
* struct nft_rule_trans - nf_tables rule update in transaction * struct nft_rule_trans - nf_tables rule update in transaction
* *
* @list: used internally * @list: used internally
* @ctx: rule context
* @rule: rule that needs to be updated * @rule: rule that needs to be updated
* @chain: chain that this rule belongs to
* @table: table for which this chain applies
* @nlh: netlink header of the message that contain this update
* @family: family expressesed as AF_*
*/ */
struct nft_rule_trans { struct nft_rule_trans {
struct list_head list; struct list_head list;
struct nft_ctx ctx;
struct nft_rule *rule; struct nft_rule *rule;
const struct nft_chain *chain;
const struct nft_table *table;
const struct nlmsghdr *nlh;
u8 family;
}; };
static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
...@@ -370,6 +368,11 @@ static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule) ...@@ -370,6 +368,11 @@ static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule)
return (struct nft_expr *)&rule->data[rule->dlen]; return (struct nft_expr *)&rule->data[rule->dlen];
} }
static inline void *nft_userdata(const struct nft_rule *rule)
{
return (void *)&rule->data[rule->dlen];
}
/* /*
* The last pointer isn't really necessary, but the compiler isn't able to * The last pointer isn't really necessary, but the compiler isn't able to
* determine that the result of nft_expr_last() is always the same since it * determine that the result of nft_expr_last() is always the same since it
...@@ -521,6 +524,9 @@ void nft_unregister_chain_type(const struct nf_chain_type *); ...@@ -521,6 +524,9 @@ void nft_unregister_chain_type(const struct nf_chain_type *);
int nft_register_expr(struct nft_expr_type *); int nft_register_expr(struct nft_expr_type *);
void nft_unregister_expr(struct nft_expr_type *); void nft_unregister_expr(struct nft_expr_type *);
#define nft_dereference(p) \
nfnl_dereference(p, NFNL_SUBSYS_NFTABLES)
#define MODULE_ALIAS_NFT_FAMILY(family) \ #define MODULE_ALIAS_NFT_FAMILY(family) \
MODULE_ALIAS("nft-afinfo-" __stringify(family)) MODULE_ALIAS("nft-afinfo-" __stringify(family))
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include <linux/list_nulls.h> #include <linux/list_nulls.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/netfilter/nf_conntrack_tcp.h> #include <linux/netfilter/nf_conntrack_tcp.h>
#include <linux/seqlock.h>
struct ctl_table_header; struct ctl_table_header;
struct nf_conntrack_ecache; struct nf_conntrack_ecache;
...@@ -62,6 +63,13 @@ struct nf_ip_net { ...@@ -62,6 +63,13 @@ struct nf_ip_net {
#endif #endif
}; };
struct ct_pcpu {
spinlock_t lock;
struct hlist_nulls_head unconfirmed;
struct hlist_nulls_head dying;
struct hlist_nulls_head tmpl;
};
struct netns_ct { struct netns_ct {
atomic_t count; atomic_t count;
unsigned int expect_count; unsigned int expect_count;
...@@ -83,12 +91,11 @@ struct netns_ct { ...@@ -83,12 +91,11 @@ struct netns_ct {
int sysctl_checksum; int sysctl_checksum;
unsigned int htable_size; unsigned int htable_size;
seqcount_t generation;
struct kmem_cache *nf_conntrack_cachep; struct kmem_cache *nf_conntrack_cachep;
struct hlist_nulls_head *hash; struct hlist_nulls_head *hash;
struct hlist_head *expect_hash; struct hlist_head *expect_hash;
struct hlist_nulls_head unconfirmed; struct ct_pcpu __percpu *pcpu_lists;
struct hlist_nulls_head dying;
struct hlist_nulls_head tmpl;
struct ip_conntrack_stat __percpu *stat; struct ip_conntrack_stat __percpu *stat;
struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
struct nf_exp_event_notifier __rcu *nf_expect_event_cb; struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
......
...@@ -82,6 +82,8 @@ enum { ...@@ -82,6 +82,8 @@ enum {
IPSET_ATTR_PROTO, /* 7 */ IPSET_ATTR_PROTO, /* 7 */
IPSET_ATTR_CADT_FLAGS, /* 8 */ IPSET_ATTR_CADT_FLAGS, /* 8 */
IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO, /* 9 */ IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO, /* 9 */
IPSET_ATTR_MARK, /* 10 */
IPSET_ATTR_MARKMASK, /* 11 */
/* Reserve empty slots */ /* Reserve empty slots */
IPSET_ATTR_CADT_MAX = 16, IPSET_ATTR_CADT_MAX = 16,
/* Create-only specific attributes */ /* Create-only specific attributes */
...@@ -144,6 +146,7 @@ enum ipset_errno { ...@@ -144,6 +146,7 @@ enum ipset_errno {
IPSET_ERR_IPADDR_IPV6, IPSET_ERR_IPADDR_IPV6,
IPSET_ERR_COUNTER, IPSET_ERR_COUNTER,
IPSET_ERR_COMMENT, IPSET_ERR_COMMENT,
IPSET_ERR_INVALID_MARKMASK,
/* Type specific error codes */ /* Type specific error codes */
IPSET_ERR_TYPE_SPECIFIC = 4352, IPSET_ERR_TYPE_SPECIFIC = 4352,
...@@ -182,9 +185,18 @@ enum ipset_cadt_flags { ...@@ -182,9 +185,18 @@ enum ipset_cadt_flags {
IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS), IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS),
IPSET_FLAG_BIT_WITH_COMMENT = 4, IPSET_FLAG_BIT_WITH_COMMENT = 4,
IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT), IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT),
IPSET_FLAG_BIT_WITH_FORCEADD = 5,
IPSET_FLAG_WITH_FORCEADD = (1 << IPSET_FLAG_BIT_WITH_FORCEADD),
IPSET_FLAG_CADT_MAX = 15, IPSET_FLAG_CADT_MAX = 15,
}; };
/* The flag bits which correspond to the non-extension create flags */
enum ipset_create_flags {
IPSET_CREATE_FLAG_BIT_FORCEADD = 0,
IPSET_CREATE_FLAG_FORCEADD = (1 << IPSET_CREATE_FLAG_BIT_FORCEADD),
IPSET_CREATE_FLAG_BIT_MAX = 7,
};
/* Commands with settype-specific attributes */ /* Commands with settype-specific attributes */
enum ipset_adt { enum ipset_adt {
IPSET_ADD, IPSET_ADD,
......
#ifndef _LINUX_NF_TABLES_H #ifndef _LINUX_NF_TABLES_H
#define _LINUX_NF_TABLES_H #define _LINUX_NF_TABLES_H
#define NFT_CHAIN_MAXNAMELEN 32 #define NFT_CHAIN_MAXNAMELEN 32
#define NFT_USERDATA_MAXLEN 256
enum nft_registers { enum nft_registers {
NFT_REG_VERDICT, NFT_REG_VERDICT,
...@@ -156,6 +157,7 @@ enum nft_chain_attributes { ...@@ -156,6 +157,7 @@ enum nft_chain_attributes {
* @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes) * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes)
* @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes) * @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes)
* @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64) * @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64)
* @NFTA_RULE_USERDATA: user data (NLA_BINARY, NFT_USERDATA_MAXLEN)
*/ */
enum nft_rule_attributes { enum nft_rule_attributes {
NFTA_RULE_UNSPEC, NFTA_RULE_UNSPEC,
...@@ -165,6 +167,7 @@ enum nft_rule_attributes { ...@@ -165,6 +167,7 @@ enum nft_rule_attributes {
NFTA_RULE_EXPRESSIONS, NFTA_RULE_EXPRESSIONS,
NFTA_RULE_COMPAT, NFTA_RULE_COMPAT,
NFTA_RULE_POSITION, NFTA_RULE_POSITION,
NFTA_RULE_USERDATA,
__NFTA_RULE_MAX __NFTA_RULE_MAX
}; };
#define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1)
...@@ -601,6 +604,7 @@ enum nft_ct_keys { ...@@ -601,6 +604,7 @@ enum nft_ct_keys {
NFT_CT_PROTOCOL, NFT_CT_PROTOCOL,
NFT_CT_PROTO_SRC, NFT_CT_PROTO_SRC,
NFT_CT_PROTO_DST, NFT_CT_PROTO_DST,
NFT_CT_LABELS,
}; };
/** /**
......
...@@ -61,7 +61,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type) ...@@ -61,7 +61,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
skb_dst_set(skb, NULL); skb_dst_set(skb, NULL);
dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0); dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0);
if (IS_ERR(dst)) if (IS_ERR(dst))
return PTR_ERR(dst);; return PTR_ERR(dst);
skb_dst_set(skb, dst); skb_dst_set(skb, dst);
} }
#endif #endif
......
...@@ -61,6 +61,15 @@ config IP_SET_HASH_IP ...@@ -61,6 +61,15 @@ config IP_SET_HASH_IP
To compile it as a module, choose M here. If unsure, say N. To compile it as a module, choose M here. If unsure, say N.
config IP_SET_HASH_IPMARK
tristate "hash:ip,mark set support"
depends on IP_SET
help
This option adds the hash:ip,mark set type support, by which one
can store IPv4/IPv6 address and mark pairs.
To compile it as a module, choose M here. If unsure, say N.
config IP_SET_HASH_IPPORT config IP_SET_HASH_IPPORT
tristate "hash:ip,port set support" tristate "hash:ip,port set support"
depends on IP_SET depends on IP_SET
......
...@@ -14,6 +14,7 @@ obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o ...@@ -14,6 +14,7 @@ obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o
# hash types # hash types
obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o
obj-$(CONFIG_IP_SET_HASH_IPMARK) += ip_set_hash_ipmark.o
obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o
obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o
obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o
......
...@@ -54,10 +54,10 @@ MODULE_DESCRIPTION("core IP set support"); ...@@ -54,10 +54,10 @@ MODULE_DESCRIPTION("core IP set support");
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
/* When the nfnl mutex is held: */ /* When the nfnl mutex is held: */
#define nfnl_dereference(p) \ #define ip_set_dereference(p) \
rcu_dereference_protected(p, 1) rcu_dereference_protected(p, 1)
#define nfnl_set(inst, id) \ #define ip_set(inst, id) \
nfnl_dereference((inst)->ip_set_list)[id] ip_set_dereference((inst)->ip_set_list)[id]
/* /*
* The set types are implemented in modules and registered set types * The set types are implemented in modules and registered set types
...@@ -368,6 +368,8 @@ ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len) ...@@ -368,6 +368,8 @@ ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
if (tb[IPSET_ATTR_CADT_FLAGS]) if (tb[IPSET_ATTR_CADT_FLAGS])
cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
set->flags |= IPSET_CREATE_FLAG_FORCEADD;
for (id = 0; id < IPSET_EXT_ID_MAX; id++) { for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
if (!add_extension(id, cadt_flags, tb)) if (!add_extension(id, cadt_flags, tb))
continue; continue;
...@@ -510,7 +512,7 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, ...@@ -510,7 +512,7 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
if (opt->dim < set->type->dimension || if (opt->dim < set->type->dimension ||
!(opt->family == set->family || set->family == NFPROTO_UNSPEC)) !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return 0; return -IPSET_ERR_TYPE_MISMATCH;
write_lock_bh(&set->lock); write_lock_bh(&set->lock);
ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt); ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
...@@ -533,7 +535,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, ...@@ -533,7 +535,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
if (opt->dim < set->type->dimension || if (opt->dim < set->type->dimension ||
!(opt->family == set->family || set->family == NFPROTO_UNSPEC)) !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return 0; return -IPSET_ERR_TYPE_MISMATCH;
write_lock_bh(&set->lock); write_lock_bh(&set->lock);
ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt); ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
...@@ -640,7 +642,7 @@ ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index) ...@@ -640,7 +642,7 @@ ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index)
return IPSET_INVALID_ID; return IPSET_INVALID_ID;
nfnl_lock(NFNL_SUBSYS_IPSET); nfnl_lock(NFNL_SUBSYS_IPSET);
set = nfnl_set(inst, index); set = ip_set(inst, index);
if (set) if (set)
__ip_set_get(set); __ip_set_get(set);
else else
...@@ -666,7 +668,7 @@ ip_set_nfnl_put(struct net *net, ip_set_id_t index) ...@@ -666,7 +668,7 @@ ip_set_nfnl_put(struct net *net, ip_set_id_t index)
nfnl_lock(NFNL_SUBSYS_IPSET); nfnl_lock(NFNL_SUBSYS_IPSET);
if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */ if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */
set = nfnl_set(inst, index); set = ip_set(inst, index);
if (set != NULL) if (set != NULL)
__ip_set_put(set); __ip_set_put(set);
} }
...@@ -734,7 +736,7 @@ find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id) ...@@ -734,7 +736,7 @@ find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id)
*id = IPSET_INVALID_ID; *id = IPSET_INVALID_ID;
for (i = 0; i < inst->ip_set_max; i++) { for (i = 0; i < inst->ip_set_max; i++) {
set = nfnl_set(inst, i); set = ip_set(inst, i);
if (set != NULL && STREQ(set->name, name)) { if (set != NULL && STREQ(set->name, name)) {
*id = i; *id = i;
break; break;
...@@ -760,7 +762,7 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index, ...@@ -760,7 +762,7 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
*index = IPSET_INVALID_ID; *index = IPSET_INVALID_ID;
for (i = 0; i < inst->ip_set_max; i++) { for (i = 0; i < inst->ip_set_max; i++) {
s = nfnl_set(inst, i); s = ip_set(inst, i);
if (s == NULL) { if (s == NULL) {
if (*index == IPSET_INVALID_ID) if (*index == IPSET_INVALID_ID)
*index = i; *index = i;
...@@ -883,7 +885,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, ...@@ -883,7 +885,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
if (!list) if (!list)
goto cleanup; goto cleanup;
/* nfnl mutex is held, both lists are valid */ /* nfnl mutex is held, both lists are valid */
tmp = nfnl_dereference(inst->ip_set_list); tmp = ip_set_dereference(inst->ip_set_list);
memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max); memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max);
rcu_assign_pointer(inst->ip_set_list, list); rcu_assign_pointer(inst->ip_set_list, list);
/* Make sure all current packets have passed through */ /* Make sure all current packets have passed through */
...@@ -900,7 +902,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, ...@@ -900,7 +902,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
* Finally! Add our shiny new set to the list, and be done. * Finally! Add our shiny new set to the list, and be done.
*/ */
pr_debug("create: '%s' created with index %u!\n", set->name, index); pr_debug("create: '%s' created with index %u!\n", set->name, index);
nfnl_set(inst, index) = set; ip_set(inst, index) = set;
return ret; return ret;
...@@ -925,10 +927,10 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { ...@@ -925,10 +927,10 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
static void static void
ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index) ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index)
{ {
struct ip_set *set = nfnl_set(inst, index); struct ip_set *set = ip_set(inst, index);
pr_debug("set: %s\n", set->name); pr_debug("set: %s\n", set->name);
nfnl_set(inst, index) = NULL; ip_set(inst, index) = NULL;
/* Must call it without holding any lock */ /* Must call it without holding any lock */
set->variant->destroy(set); set->variant->destroy(set);
...@@ -962,7 +964,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, ...@@ -962,7 +964,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
read_lock_bh(&ip_set_ref_lock); read_lock_bh(&ip_set_ref_lock);
if (!attr[IPSET_ATTR_SETNAME]) { if (!attr[IPSET_ATTR_SETNAME]) {
for (i = 0; i < inst->ip_set_max; i++) { for (i = 0; i < inst->ip_set_max; i++) {
s = nfnl_set(inst, i); s = ip_set(inst, i);
if (s != NULL && s->ref) { if (s != NULL && s->ref) {
ret = -IPSET_ERR_BUSY; ret = -IPSET_ERR_BUSY;
goto out; goto out;
...@@ -970,7 +972,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, ...@@ -970,7 +972,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
} }
read_unlock_bh(&ip_set_ref_lock); read_unlock_bh(&ip_set_ref_lock);
for (i = 0; i < inst->ip_set_max; i++) { for (i = 0; i < inst->ip_set_max; i++) {
s = nfnl_set(inst, i); s = ip_set(inst, i);
if (s != NULL) if (s != NULL)
ip_set_destroy_set(inst, i); ip_set_destroy_set(inst, i);
} }
...@@ -1020,7 +1022,7 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb, ...@@ -1020,7 +1022,7 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
if (!attr[IPSET_ATTR_SETNAME]) { if (!attr[IPSET_ATTR_SETNAME]) {
for (i = 0; i < inst->ip_set_max; i++) { for (i = 0; i < inst->ip_set_max; i++) {
s = nfnl_set(inst, i); s = ip_set(inst, i);
if (s != NULL) if (s != NULL)
ip_set_flush_set(s); ip_set_flush_set(s);
} }
...@@ -1074,7 +1076,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, ...@@ -1074,7 +1076,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
for (i = 0; i < inst->ip_set_max; i++) { for (i = 0; i < inst->ip_set_max; i++) {
s = nfnl_set(inst, i); s = ip_set(inst, i);
if (s != NULL && STREQ(s->name, name2)) { if (s != NULL && STREQ(s->name, name2)) {
ret = -IPSET_ERR_EXIST_SETNAME2; ret = -IPSET_ERR_EXIST_SETNAME2;
goto out; goto out;
...@@ -1134,8 +1136,8 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, ...@@ -1134,8 +1136,8 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
write_lock_bh(&ip_set_ref_lock); write_lock_bh(&ip_set_ref_lock);
swap(from->ref, to->ref); swap(from->ref, to->ref);
nfnl_set(inst, from_id) = to; ip_set(inst, from_id) = to;
nfnl_set(inst, to_id) = from; ip_set(inst, to_id) = from;
write_unlock_bh(&ip_set_ref_lock); write_unlock_bh(&ip_set_ref_lock);
return 0; return 0;
...@@ -1157,7 +1159,7 @@ ip_set_dump_done(struct netlink_callback *cb) ...@@ -1157,7 +1159,7 @@ ip_set_dump_done(struct netlink_callback *cb)
struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET]; struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET];
if (cb->args[IPSET_CB_ARG0]) { if (cb->args[IPSET_CB_ARG0]) {
pr_debug("release set %s\n", pr_debug("release set %s\n",
nfnl_set(inst, cb->args[IPSET_CB_INDEX])->name); ip_set(inst, cb->args[IPSET_CB_INDEX])->name);
__ip_set_put_byindex(inst, __ip_set_put_byindex(inst,
(ip_set_id_t) cb->args[IPSET_CB_INDEX]); (ip_set_id_t) cb->args[IPSET_CB_INDEX]);
} }
...@@ -1254,7 +1256,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -1254,7 +1256,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
dump_type, dump_flags, cb->args[IPSET_CB_INDEX]); dump_type, dump_flags, cb->args[IPSET_CB_INDEX]);
for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) { for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) {
index = (ip_set_id_t) cb->args[IPSET_CB_INDEX]; index = (ip_set_id_t) cb->args[IPSET_CB_INDEX];
set = nfnl_set(inst, index); set = ip_set(inst, index);
if (set == NULL) { if (set == NULL) {
if (dump_type == DUMP_ONE) { if (dump_type == DUMP_ONE) {
ret = -ENOENT; ret = -ENOENT;
...@@ -1332,7 +1334,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -1332,7 +1334,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
release_refcount: release_refcount:
/* If there was an error or set is done, release set */ /* If there was an error or set is done, release set */
if (ret || !cb->args[IPSET_CB_ARG0]) { if (ret || !cb->args[IPSET_CB_ARG0]) {
pr_debug("release set %s\n", nfnl_set(inst, index)->name); pr_debug("release set %s\n", ip_set(inst, index)->name);
__ip_set_put_byindex(inst, index); __ip_set_put_byindex(inst, index);
cb->args[IPSET_CB_ARG0] = 0; cb->args[IPSET_CB_ARG0] = 0;
} }
...@@ -1887,7 +1889,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) ...@@ -1887,7 +1889,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
find_set_and_id(inst, req_get->set.name, &id); find_set_and_id(inst, req_get->set.name, &id);
req_get->set.index = id; req_get->set.index = id;
if (id != IPSET_INVALID_ID) if (id != IPSET_INVALID_ID)
req_get->family = nfnl_set(inst, id)->family; req_get->family = ip_set(inst, id)->family;
nfnl_unlock(NFNL_SUBSYS_IPSET); nfnl_unlock(NFNL_SUBSYS_IPSET);
goto copy; goto copy;
} }
...@@ -1901,7 +1903,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) ...@@ -1901,7 +1903,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
goto done; goto done;
} }
nfnl_lock(NFNL_SUBSYS_IPSET); nfnl_lock(NFNL_SUBSYS_IPSET);
set = nfnl_set(inst, req_get->set.index); set = ip_set(inst, req_get->set.index);
strncpy(req_get->set.name, set ? set->name : "", strncpy(req_get->set.name, set ? set->name : "",
IPSET_MAXNAMELEN); IPSET_MAXNAMELEN);
nfnl_unlock(NFNL_SUBSYS_IPSET); nfnl_unlock(NFNL_SUBSYS_IPSET);
...@@ -1945,7 +1947,6 @@ ip_set_net_init(struct net *net) ...@@ -1945,7 +1947,6 @@ ip_set_net_init(struct net *net)
return -ENOMEM; return -ENOMEM;
inst->is_deleted = 0; inst->is_deleted = 0;
rcu_assign_pointer(inst->ip_set_list, list); rcu_assign_pointer(inst->ip_set_list, list);
pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
return 0; return 0;
} }
...@@ -1960,7 +1961,7 @@ ip_set_net_exit(struct net *net) ...@@ -1960,7 +1961,7 @@ ip_set_net_exit(struct net *net)
inst->is_deleted = 1; /* flag for ip_set_nfnl_put */ inst->is_deleted = 1; /* flag for ip_set_nfnl_put */
for (i = 0; i < inst->ip_set_max; i++) { for (i = 0; i < inst->ip_set_max; i++) {
set = nfnl_set(inst, i); set = ip_set(inst, i);
if (set != NULL) if (set != NULL)
ip_set_destroy_set(inst, i); ip_set_destroy_set(inst, i);
} }
...@@ -1996,6 +1997,7 @@ ip_set_init(void) ...@@ -1996,6 +1997,7 @@ ip_set_init(void)
nfnetlink_subsys_unregister(&ip_set_netlink_subsys); nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
return ret; return ret;
} }
pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL);
return 0; return 0;
} }
......
...@@ -263,6 +263,9 @@ struct htype { ...@@ -263,6 +263,9 @@ struct htype {
u32 maxelem; /* max elements in the hash */ u32 maxelem; /* max elements in the hash */
u32 elements; /* current element (vs timeout) */ u32 elements; /* current element (vs timeout) */
u32 initval; /* random jhash init value */ u32 initval; /* random jhash init value */
#ifdef IP_SET_HASH_WITH_MARKMASK
u32 markmask; /* markmask value for mark mask to store */
#endif
struct timer_list gc; /* garbage collection when timeout enabled */ struct timer_list gc; /* garbage collection when timeout enabled */
struct mtype_elem next; /* temporary storage for uadd */ struct mtype_elem next; /* temporary storage for uadd */
#ifdef IP_SET_HASH_WITH_MULTI #ifdef IP_SET_HASH_WITH_MULTI
...@@ -453,6 +456,9 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b) ...@@ -453,6 +456,9 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b)
a->timeout == b->timeout && a->timeout == b->timeout &&
#ifdef IP_SET_HASH_WITH_NETMASK #ifdef IP_SET_HASH_WITH_NETMASK
x->netmask == y->netmask && x->netmask == y->netmask &&
#endif
#ifdef IP_SET_HASH_WITH_MARKMASK
x->markmask == y->markmask &&
#endif #endif
a->extensions == b->extensions; a->extensions == b->extensions;
} }
...@@ -627,6 +633,18 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, ...@@ -627,6 +633,18 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
bool flag_exist = flags & IPSET_FLAG_EXIST; bool flag_exist = flags & IPSET_FLAG_EXIST;
u32 key, multi = 0; u32 key, multi = 0;
if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set)) {
rcu_read_lock_bh();
t = rcu_dereference_bh(h->table);
key = HKEY(value, h->initval, t->htable_bits);
n = hbucket(t,key);
if (n->pos) {
/* Choosing the first entry in the array to replace */
j = 0;
goto reuse_slot;
}
rcu_read_unlock_bh();
}
if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem) if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem)
/* FIXME: when set is full, we slow down here */ /* FIXME: when set is full, we slow down here */
mtype_expire(set, h, NLEN(set->family), set->dsize); mtype_expire(set, h, NLEN(set->family), set->dsize);
...@@ -907,6 +925,10 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) ...@@ -907,6 +925,10 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
if (h->netmask != HOST_MASK && if (h->netmask != HOST_MASK &&
nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask)) nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask))
goto nla_put_failure; goto nla_put_failure;
#endif
#ifdef IP_SET_HASH_WITH_MARKMASK
if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask))
goto nla_put_failure;
#endif #endif
if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
...@@ -1016,6 +1038,9 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, ...@@ -1016,6 +1038,9 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
struct nlattr *tb[], u32 flags) struct nlattr *tb[], u32 flags)
{ {
u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
#ifdef IP_SET_HASH_WITH_MARKMASK
u32 markmask;
#endif
u8 hbits; u8 hbits;
#ifdef IP_SET_HASH_WITH_NETMASK #ifdef IP_SET_HASH_WITH_NETMASK
u8 netmask; u8 netmask;
...@@ -1026,6 +1051,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, ...@@ -1026,6 +1051,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
return -IPSET_ERR_INVALID_FAMILY; return -IPSET_ERR_INVALID_FAMILY;
#ifdef IP_SET_HASH_WITH_MARKMASK
markmask = 0xffffffff;
#endif
#ifdef IP_SET_HASH_WITH_NETMASK #ifdef IP_SET_HASH_WITH_NETMASK
netmask = set->family == NFPROTO_IPV4 ? 32 : 128; netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
pr_debug("Create set %s with family %s\n", pr_debug("Create set %s with family %s\n",
...@@ -1034,6 +1063,9 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, ...@@ -1034,6 +1063,9 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
#ifdef IP_SET_HASH_WITH_MARKMASK
!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK) ||
#endif
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL; return -IPSET_ERR_PROTOCOL;
...@@ -1057,6 +1089,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, ...@@ -1057,6 +1089,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
return -IPSET_ERR_INVALID_NETMASK; return -IPSET_ERR_INVALID_NETMASK;
} }
#endif #endif
#ifdef IP_SET_HASH_WITH_MARKMASK
if (tb[IPSET_ATTR_MARKMASK]) {
markmask = ntohl(nla_get_u32(tb[IPSET_ATTR_MARKMASK]));
if ((markmask > 4294967295u) || markmask == 0)
return -IPSET_ERR_INVALID_MARKMASK;
}
#endif
hsize = sizeof(*h); hsize = sizeof(*h);
#ifdef IP_SET_HASH_WITH_NETS #ifdef IP_SET_HASH_WITH_NETS
...@@ -1070,6 +1110,9 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, ...@@ -1070,6 +1110,9 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
h->maxelem = maxelem; h->maxelem = maxelem;
#ifdef IP_SET_HASH_WITH_NETMASK #ifdef IP_SET_HASH_WITH_NETMASK
h->netmask = netmask; h->netmask = netmask;
#endif
#ifdef IP_SET_HASH_WITH_MARKMASK
h->markmask = markmask;
#endif #endif
get_random_bytes(&h->initval, sizeof(h->initval)); get_random_bytes(&h->initval, sizeof(h->initval));
set->timeout = IPSET_NO_TIMEOUT; set->timeout = IPSET_NO_TIMEOUT;
......
...@@ -25,7 +25,8 @@ ...@@ -25,7 +25,8 @@
#define IPSET_TYPE_REV_MIN 0 #define IPSET_TYPE_REV_MIN 0
/* 1 Counters support */ /* 1 Counters support */
#define IPSET_TYPE_REV_MAX 2 /* Comments support */ /* 2 Comments support */
#define IPSET_TYPE_REV_MAX 3 /* Forceadd support */
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
......
/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
* Copyright (C) 2013 Smoothwall Ltd. <vytas.dauksa@smoothwall.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
/* Kernel module implementing an IP set type: the hash:ip,mark type */
#include <linux/jhash.h>
#include <linux/module.h>
#include <linux/ip.h>
#include <linux/skbuff.h>
#include <linux/errno.h>
#include <linux/random.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/netlink.h>
#include <net/tcp.h>
#include <linux/netfilter.h>
#include <linux/netfilter/ipset/pfxlen.h>
#include <linux/netfilter/ipset/ip_set.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
#define IPSET_TYPE_REV_MIN 0
#define IPSET_TYPE_REV_MAX 1 /* Forceadd support */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Vytas Dauksa <vytas.dauksa@smoothwall.net>");
IP_SET_MODULE_DESC("hash:ip,mark", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_hash:ip,mark");
/* Type specific function prefix */
#define HTYPE hash_ipmark
#define IP_SET_HASH_WITH_MARKMASK
/* IPv4 variant */
/* Member elements */
struct hash_ipmark4_elem {
__be32 ip;
__u32 mark;
};
/* Common functions */
static inline bool
hash_ipmark4_data_equal(const struct hash_ipmark4_elem *ip1,
const struct hash_ipmark4_elem *ip2,
u32 *multi)
{
return ip1->ip == ip2->ip &&
ip1->mark == ip2->mark;
}
static bool
hash_ipmark4_data_list(struct sk_buff *skb,
const struct hash_ipmark4_elem *data)
{
if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark)))
goto nla_put_failure;
return 0;
nla_put_failure:
return 1;
}
static inline void
hash_ipmark4_data_next(struct hash_ipmark4_elem *next,
const struct hash_ipmark4_elem *d)
{
next->ip = d->ip;
}
#define MTYPE hash_ipmark4
#define PF 4
#define HOST_MASK 32
#define HKEY_DATALEN sizeof(struct hash_ipmark4_elem)
#include "ip_set_hash_gen.h"
static int
hash_ipmark4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
const struct hash_ipmark *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipmark4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
e.mark = skb->mark;
e.mark &= h->markmask;
ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
static int
hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
const struct hash_ipmark *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipmark4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip, ip_to = 0;
int ret;
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_MARK) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) ||
ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK]));
e.mark &= h->markmask;
if (adt == IPSET_TEST ||
!(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) {
ret = adtfn(set, &e, &ext, &ext, flags);
return ip_set_eexist(ret, flags) ? 0 : ret;
}
ip_to = ip = ntohl(e.ip);
if (tb[IPSET_ATTR_IP_TO]) {
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
return ret;
if (ip > ip_to)
swap(ip, ip_to);
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
if (!cidr || cidr > 32)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
}
if (retried)
ip = ntohl(h->next.ip);
for (; !before(ip_to, ip); ip++) {
e.ip = htonl(ip);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
else
ret = 0;
}
return ret;
}
/* IPv6 variant */
struct hash_ipmark6_elem {
union nf_inet_addr ip;
__u32 mark;
};
/* Common functions */
static inline bool
hash_ipmark6_data_equal(const struct hash_ipmark6_elem *ip1,
const struct hash_ipmark6_elem *ip2,
u32 *multi)
{
return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
ip1->mark == ip2->mark;
}
static bool
hash_ipmark6_data_list(struct sk_buff *skb,
const struct hash_ipmark6_elem *data)
{
if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) ||
nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark)))
goto nla_put_failure;
return 0;
nla_put_failure:
return 1;
}
static inline void
hash_ipmark6_data_next(struct hash_ipmark4_elem *next,
const struct hash_ipmark6_elem *d)
{
}
#undef MTYPE
#undef PF
#undef HOST_MASK
#undef HKEY_DATALEN
#define MTYPE hash_ipmark6
#define PF 6
#define HOST_MASK 128
#define HKEY_DATALEN sizeof(struct hash_ipmark6_elem)
#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
static int
hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
const struct hash_ipmark *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipmark6_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
e.mark = skb->mark;
e.mark &= h->markmask;
ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
static int
hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
const struct hash_ipmark *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipmark6_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret;
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_MARK) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK]));
e.mark &= h->markmask;
if (adt == IPSET_TEST) {
ret = adtfn(set, &e, &ext, &ext, flags);
return ip_set_eexist(ret, flags) ? 0 : ret;
}
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
else
ret = 0;
return ret;
}
static struct ip_set_type hash_ipmark_type __read_mostly = {
.name = "hash:ip,mark",
.protocol = IPSET_PROTOCOL,
.features = IPSET_TYPE_IP | IPSET_TYPE_MARK,
.dimension = IPSET_DIM_TWO,
.family = NFPROTO_UNSPEC,
.revision_min = IPSET_TYPE_REV_MIN,
.revision_max = IPSET_TYPE_REV_MAX,
.create = hash_ipmark_create,
.create_policy = {
[IPSET_ATTR_MARKMASK] = { .type = NLA_U32 },
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
[IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
[IPSET_ATTR_PROBES] = { .type = NLA_U8 },
[IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
},
.adt_policy = {
[IPSET_ATTR_IP] = { .type = NLA_NESTED },
[IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
[IPSET_ATTR_MARK] = { .type = NLA_U32 },
[IPSET_ATTR_CIDR] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
},
.me = THIS_MODULE,
};
static int __init
hash_ipmark_init(void)
{
return ip_set_type_register(&hash_ipmark_type);
}
static void __exit
hash_ipmark_fini(void)
{
ip_set_type_unregister(&hash_ipmark_type);
}
module_init(hash_ipmark_init);
module_exit(hash_ipmark_fini);
...@@ -27,7 +27,8 @@ ...@@ -27,7 +27,8 @@
#define IPSET_TYPE_REV_MIN 0 #define IPSET_TYPE_REV_MIN 0
/* 1 SCTP and UDPLITE support added */ /* 1 SCTP and UDPLITE support added */
/* 2 Counters support added */ /* 2 Counters support added */
#define IPSET_TYPE_REV_MAX 3 /* Comments support added */ /* 3 Comments support added */
#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
......
...@@ -27,7 +27,8 @@ ...@@ -27,7 +27,8 @@
#define IPSET_TYPE_REV_MIN 0 #define IPSET_TYPE_REV_MIN 0
/* 1 SCTP and UDPLITE support added */ /* 1 SCTP and UDPLITE support added */
/* 2 Counters support added */ /* 2 Counters support added */
#define IPSET_TYPE_REV_MAX 3 /* Comments support added */ /* 3 Comments support added */
#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
......
...@@ -29,7 +29,8 @@ ...@@ -29,7 +29,8 @@
/* 2 Range as input support for IPv4 added */ /* 2 Range as input support for IPv4 added */
/* 3 nomatch flag support added */ /* 3 nomatch flag support added */
/* 4 Counters support added */ /* 4 Counters support added */
#define IPSET_TYPE_REV_MAX 5 /* Comments support added */ /* 5 Comments support added */
#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
......
...@@ -26,7 +26,8 @@ ...@@ -26,7 +26,8 @@
/* 1 Range as input support for IPv4 added */ /* 1 Range as input support for IPv4 added */
/* 2 nomatch flag support added */ /* 2 nomatch flag support added */
/* 3 Counters support added */ /* 3 Counters support added */
#define IPSET_TYPE_REV_MAX 4 /* Comments support added */ /* 4 Comments support added */
#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
......
...@@ -27,7 +27,8 @@ ...@@ -27,7 +27,8 @@
/* 1 nomatch flag support added */ /* 1 nomatch flag support added */
/* 2 /0 support added */ /* 2 /0 support added */
/* 3 Counters support added */ /* 3 Counters support added */
#define IPSET_TYPE_REV_MAX 4 /* Comments support added */ /* 4 Comments support added */
#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#include <linux/netfilter/ipset/ip_set_hash.h> #include <linux/netfilter/ipset/ip_set_hash.h>
#define IPSET_TYPE_REV_MIN 0 #define IPSET_TYPE_REV_MIN 0
#define IPSET_TYPE_REV_MAX 0 #define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>"); MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
...@@ -112,10 +112,10 @@ hash_netnet4_data_list(struct sk_buff *skb, ...@@ -112,10 +112,10 @@ hash_netnet4_data_list(struct sk_buff *skb,
(flags && (flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure; goto nla_put_failure;
return 0; return false;
nla_put_failure: nla_put_failure:
return 1; return true;
} }
static inline void static inline void
...@@ -334,10 +334,10 @@ hash_netnet6_data_list(struct sk_buff *skb, ...@@ -334,10 +334,10 @@ hash_netnet6_data_list(struct sk_buff *skb,
(flags && (flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure; goto nla_put_failure;
return 0; return false;
nla_put_failure: nla_put_failure:
return 1; return true;
} }
static inline void static inline void
......
...@@ -28,7 +28,8 @@ ...@@ -28,7 +28,8 @@
/* 2 Range as input support for IPv4 added */ /* 2 Range as input support for IPv4 added */
/* 3 nomatch flag support added */ /* 3 nomatch flag support added */
/* 4 Counters support added */ /* 4 Counters support added */
#define IPSET_TYPE_REV_MAX 5 /* Comments support added */ /* 5 Comments support added */
#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
......
...@@ -25,7 +25,8 @@ ...@@ -25,7 +25,8 @@
#include <linux/netfilter/ipset/ip_set_hash.h> #include <linux/netfilter/ipset/ip_set_hash.h>
#define IPSET_TYPE_REV_MIN 0 #define IPSET_TYPE_REV_MIN 0
#define IPSET_TYPE_REV_MAX 0 /* Comments support added */ /* 0 Comments support added */
#define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>"); MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
......
...@@ -7,8 +7,8 @@ ...@@ -7,8 +7,8 @@
#define E(a, b, c, d) \ #define E(a, b, c, d) \
{.ip6 = { \ {.ip6 = { \
__constant_htonl(a), __constant_htonl(b), \ htonl(a), htonl(b), \
__constant_htonl(c), __constant_htonl(d), \ htonl(c), htonl(d), \
} } } }
/* /*
......
...@@ -3580,7 +3580,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) ...@@ -3580,7 +3580,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
} }
static const struct genl_ops ip_vs_genl_ops[] __read_mostly = { static const struct genl_ops ip_vs_genl_ops[] = {
{ {
.cmd = IPVS_CMD_NEW_SERVICE, .cmd = IPVS_CMD_NEW_SERVICE,
.flags = GENL_ADMIN_PERM, .flags = GENL_ADMIN_PERM,
......
...@@ -238,7 +238,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc) ...@@ -238,7 +238,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc)
spin_lock_bh(&svc->sched_lock); spin_lock_bh(&svc->sched_lock);
tbl->dead = 1; tbl->dead = 1;
for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { for (i = 0; i < IP_VS_LBLC_TAB_SIZE; i++) {
hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
ip_vs_lblc_del(en); ip_vs_lblc_del(en);
atomic_dec(&tbl->entries); atomic_dec(&tbl->entries);
...@@ -265,7 +265,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) ...@@ -265,7 +265,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
unsigned long now = jiffies; unsigned long now = jiffies;
int i, j; int i, j;
for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { for (i = 0, j = tbl->rover; i < IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK; j = (j + 1) & IP_VS_LBLC_TAB_MASK;
spin_lock(&svc->sched_lock); spin_lock(&svc->sched_lock);
...@@ -321,7 +321,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) ...@@ -321,7 +321,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
if (goal > tbl->max_size/2) if (goal > tbl->max_size/2)
goal = tbl->max_size/2; goal = tbl->max_size/2;
for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { for (i = 0, j = tbl->rover; i < IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK; j = (j + 1) & IP_VS_LBLC_TAB_MASK;
spin_lock(&svc->sched_lock); spin_lock(&svc->sched_lock);
...@@ -340,7 +340,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) ...@@ -340,7 +340,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
tbl->rover = j; tbl->rover = j;
out: out:
mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL); mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
} }
...@@ -363,7 +363,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) ...@@ -363,7 +363,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
/* /*
* Initialize the hash buckets * Initialize the hash buckets
*/ */
for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { for (i = 0; i < IP_VS_LBLC_TAB_SIZE; i++) {
INIT_HLIST_HEAD(&tbl->bucket[i]); INIT_HLIST_HEAD(&tbl->bucket[i]);
} }
tbl->max_size = IP_VS_LBLC_TAB_SIZE*16; tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
...@@ -536,8 +536,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, ...@@ -536,8 +536,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
/* /*
* IPVS LBLC Scheduler structure * IPVS LBLC Scheduler structure
*/ */
static struct ip_vs_scheduler ip_vs_lblc_scheduler = static struct ip_vs_scheduler ip_vs_lblc_scheduler = {
{
.name = "lblc", .name = "lblc",
.refcnt = ATOMIC_INIT(0), .refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE, .module = THIS_MODULE,
......
This diff is collapsed.
...@@ -66,9 +66,9 @@ static void nf_ct_expectation_timed_out(unsigned long ul_expect) ...@@ -66,9 +66,9 @@ static void nf_ct_expectation_timed_out(unsigned long ul_expect)
{ {
struct nf_conntrack_expect *exp = (void *)ul_expect; struct nf_conntrack_expect *exp = (void *)ul_expect;
spin_lock_bh(&nf_conntrack_lock); spin_lock_bh(&nf_conntrack_expect_lock);
nf_ct_unlink_expect(exp); nf_ct_unlink_expect(exp);
spin_unlock_bh(&nf_conntrack_lock); spin_unlock_bh(&nf_conntrack_expect_lock);
nf_ct_expect_put(exp); nf_ct_expect_put(exp);
} }
...@@ -155,6 +155,18 @@ nf_ct_find_expectation(struct net *net, u16 zone, ...@@ -155,6 +155,18 @@ nf_ct_find_expectation(struct net *net, u16 zone,
if (!nf_ct_is_confirmed(exp->master)) if (!nf_ct_is_confirmed(exp->master))
return NULL; return NULL;
/* Avoid race with other CPUs, that for exp->master ct, is
* about to invoke ->destroy(), or nf_ct_delete() via timeout
* or early_drop().
*
* The atomic_inc_not_zero() check tells: If that fails, we
* know that the ct is being destroyed. If it succeeds, we
* can be sure the ct cannot disappear underneath.
*/
if (unlikely(nf_ct_is_dying(exp->master) ||
!atomic_inc_not_zero(&exp->master->ct_general.use)))
return NULL;
if (exp->flags & NF_CT_EXPECT_PERMANENT) { if (exp->flags & NF_CT_EXPECT_PERMANENT) {
atomic_inc(&exp->use); atomic_inc(&exp->use);
return exp; return exp;
...@@ -162,6 +174,8 @@ nf_ct_find_expectation(struct net *net, u16 zone, ...@@ -162,6 +174,8 @@ nf_ct_find_expectation(struct net *net, u16 zone,
nf_ct_unlink_expect(exp); nf_ct_unlink_expect(exp);
return exp; return exp;
} }
/* Undo exp->master refcnt increase, if del_timer() failed */
nf_ct_put(exp->master);
return NULL; return NULL;
} }
...@@ -177,12 +191,14 @@ void nf_ct_remove_expectations(struct nf_conn *ct) ...@@ -177,12 +191,14 @@ void nf_ct_remove_expectations(struct nf_conn *ct)
if (!help) if (!help)
return; return;
spin_lock_bh(&nf_conntrack_expect_lock);
hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
if (del_timer(&exp->timeout)) { if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp); nf_ct_unlink_expect(exp);
nf_ct_expect_put(exp); nf_ct_expect_put(exp);
} }
} }
spin_unlock_bh(&nf_conntrack_expect_lock);
} }
EXPORT_SYMBOL_GPL(nf_ct_remove_expectations); EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
...@@ -217,12 +233,12 @@ static inline int expect_matches(const struct nf_conntrack_expect *a, ...@@ -217,12 +233,12 @@ static inline int expect_matches(const struct nf_conntrack_expect *a,
/* Generally a bad idea to call this: could have matched already. */ /* Generally a bad idea to call this: could have matched already. */
void nf_ct_unexpect_related(struct nf_conntrack_expect *exp) void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
{ {
spin_lock_bh(&nf_conntrack_lock); spin_lock_bh(&nf_conntrack_expect_lock);
if (del_timer(&exp->timeout)) { if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp); nf_ct_unlink_expect(exp);
nf_ct_expect_put(exp); nf_ct_expect_put(exp);
} }
spin_unlock_bh(&nf_conntrack_lock); spin_unlock_bh(&nf_conntrack_expect_lock);
} }
EXPORT_SYMBOL_GPL(nf_ct_unexpect_related); EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
...@@ -335,7 +351,7 @@ static int nf_ct_expect_insert(struct nf_conntrack_expect *exp) ...@@ -335,7 +351,7 @@ static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
setup_timer(&exp->timeout, nf_ct_expectation_timed_out, setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
(unsigned long)exp); (unsigned long)exp);
helper = rcu_dereference_protected(master_help->helper, helper = rcu_dereference_protected(master_help->helper,
lockdep_is_held(&nf_conntrack_lock)); lockdep_is_held(&nf_conntrack_expect_lock));
if (helper) { if (helper) {
exp->timeout.expires = jiffies + exp->timeout.expires = jiffies +
helper->expect_policy[exp->class].timeout * HZ; helper->expect_policy[exp->class].timeout * HZ;
...@@ -395,7 +411,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) ...@@ -395,7 +411,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
} }
/* Will be over limit? */ /* Will be over limit? */
helper = rcu_dereference_protected(master_help->helper, helper = rcu_dereference_protected(master_help->helper,
lockdep_is_held(&nf_conntrack_lock)); lockdep_is_held(&nf_conntrack_expect_lock));
if (helper) { if (helper) {
p = &helper->expect_policy[expect->class]; p = &helper->expect_policy[expect->class];
if (p->max_expected && if (p->max_expected &&
...@@ -417,12 +433,12 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) ...@@ -417,12 +433,12 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
return ret; return ret;
} }
int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
u32 portid, int report) u32 portid, int report)
{ {
int ret; int ret;
spin_lock_bh(&nf_conntrack_lock); spin_lock_bh(&nf_conntrack_expect_lock);
ret = __nf_ct_expect_check(expect); ret = __nf_ct_expect_check(expect);
if (ret <= 0) if (ret <= 0)
goto out; goto out;
...@@ -430,11 +446,11 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, ...@@ -430,11 +446,11 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
ret = nf_ct_expect_insert(expect); ret = nf_ct_expect_insert(expect);
if (ret < 0) if (ret < 0)
goto out; goto out;
spin_unlock_bh(&nf_conntrack_lock); spin_unlock_bh(&nf_conntrack_expect_lock);
nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report); nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
return ret; return ret;
out: out:
spin_unlock_bh(&nf_conntrack_lock); spin_unlock_bh(&nf_conntrack_expect_lock);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
......
...@@ -1476,7 +1476,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, ...@@ -1476,7 +1476,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
nf_ct_refresh(ct, skb, info->timeout * HZ); nf_ct_refresh(ct, skb, info->timeout * HZ);
/* Set expect timeout */ /* Set expect timeout */
spin_lock_bh(&nf_conntrack_lock); spin_lock_bh(&nf_conntrack_expect_lock);
exp = find_expect(ct, &ct->tuplehash[dir].tuple.dst.u3, exp = find_expect(ct, &ct->tuplehash[dir].tuple.dst.u3,
info->sig_port[!dir]); info->sig_port[!dir]);
if (exp) { if (exp) {
...@@ -1486,7 +1486,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, ...@@ -1486,7 +1486,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
nf_ct_dump_tuple(&exp->tuple); nf_ct_dump_tuple(&exp->tuple);
set_expect_timeout(exp, info->timeout); set_expect_timeout(exp, info->timeout);
} }
spin_unlock_bh(&nf_conntrack_lock); spin_unlock_bh(&nf_conntrack_expect_lock);
} }
return 0; return 0;
......
...@@ -250,16 +250,14 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, ...@@ -250,16 +250,14 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
} }
EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper); EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper);
/* appropiate ct lock protecting must be taken by caller */
static inline int unhelp(struct nf_conntrack_tuple_hash *i, static inline int unhelp(struct nf_conntrack_tuple_hash *i,
const struct nf_conntrack_helper *me) const struct nf_conntrack_helper *me)
{ {
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
struct nf_conn_help *help = nfct_help(ct); struct nf_conn_help *help = nfct_help(ct);
if (help && rcu_dereference_protected( if (help && rcu_dereference_raw(help->helper) == me) {
help->helper,
lockdep_is_held(&nf_conntrack_lock)
) == me) {
nf_conntrack_event(IPCT_HELPER, ct); nf_conntrack_event(IPCT_HELPER, ct);
RCU_INIT_POINTER(help->helper, NULL); RCU_INIT_POINTER(help->helper, NULL);
} }
...@@ -284,17 +282,17 @@ static LIST_HEAD(nf_ct_helper_expectfn_list); ...@@ -284,17 +282,17 @@ static LIST_HEAD(nf_ct_helper_expectfn_list);
void nf_ct_helper_expectfn_register(struct nf_ct_helper_expectfn *n) void nf_ct_helper_expectfn_register(struct nf_ct_helper_expectfn *n)
{ {
spin_lock_bh(&nf_conntrack_lock); spin_lock_bh(&nf_conntrack_expect_lock);
list_add_rcu(&n->head, &nf_ct_helper_expectfn_list); list_add_rcu(&n->head, &nf_ct_helper_expectfn_list);
spin_unlock_bh(&nf_conntrack_lock); spin_unlock_bh(&nf_conntrack_expect_lock);
} }
EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_register); EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_register);
void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n) void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n)
{ {
spin_lock_bh(&nf_conntrack_lock); spin_lock_bh(&nf_conntrack_expect_lock);
list_del_rcu(&n->head); list_del_rcu(&n->head);
spin_unlock_bh(&nf_conntrack_lock); spin_unlock_bh(&nf_conntrack_expect_lock);
} }
EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister); EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister);
...@@ -396,15 +394,17 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, ...@@ -396,15 +394,17 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
const struct hlist_node *next; const struct hlist_node *next;
const struct hlist_nulls_node *nn; const struct hlist_nulls_node *nn;
unsigned int i; unsigned int i;
int cpu;
/* Get rid of expectations */ /* Get rid of expectations */
spin_lock_bh(&nf_conntrack_expect_lock);
for (i = 0; i < nf_ct_expect_hsize; i++) { for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, next, hlist_for_each_entry_safe(exp, next,
&net->ct.expect_hash[i], hnode) { &net->ct.expect_hash[i], hnode) {
struct nf_conn_help *help = nfct_help(exp->master); struct nf_conn_help *help = nfct_help(exp->master);
if ((rcu_dereference_protected( if ((rcu_dereference_protected(
help->helper, help->helper,
lockdep_is_held(&nf_conntrack_lock) lockdep_is_held(&nf_conntrack_expect_lock)
) == me || exp->helper == me) && ) == me || exp->helper == me) &&
del_timer(&exp->timeout)) { del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp); nf_ct_unlink_expect(exp);
...@@ -412,14 +412,27 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, ...@@ -412,14 +412,27 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
} }
} }
} }
spin_unlock_bh(&nf_conntrack_expect_lock);
/* Get rid of expecteds, set helpers to NULL. */ /* Get rid of expecteds, set helpers to NULL. */
hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode) for_each_possible_cpu(cpu) {
unhelp(h, me); struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
for (i = 0; i < net->ct.htable_size; i++) {
hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) spin_lock_bh(&pcpu->lock);
hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
unhelp(h, me); unhelp(h, me);
spin_unlock_bh(&pcpu->lock);
}
local_bh_disable();
for (i = 0; i < net->ct.htable_size; i++) {
spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
if (i < net->ct.htable_size) {
hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
unhelp(h, me);
}
spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
} }
local_bh_enable();
} }
void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
...@@ -437,10 +450,8 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) ...@@ -437,10 +450,8 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
synchronize_rcu(); synchronize_rcu();
rtnl_lock(); rtnl_lock();
spin_lock_bh(&nf_conntrack_lock);
for_each_net(net) for_each_net(net)
__nf_conntrack_helper_unregister(me, net); __nf_conntrack_helper_unregister(me, net);
spin_unlock_bh(&nf_conntrack_lock);
rtnl_unlock(); rtnl_unlock();
} }
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
......
...@@ -764,14 +764,23 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -764,14 +764,23 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
u_int8_t l3proto = nfmsg->nfgen_family; u_int8_t l3proto = nfmsg->nfgen_family;
int res; int res;
spinlock_t *lockp;
#ifdef CONFIG_NF_CONNTRACK_MARK #ifdef CONFIG_NF_CONNTRACK_MARK
const struct ctnetlink_dump_filter *filter = cb->data; const struct ctnetlink_dump_filter *filter = cb->data;
#endif #endif
spin_lock_bh(&nf_conntrack_lock);
last = (struct nf_conn *)cb->args[1]; last = (struct nf_conn *)cb->args[1];
local_bh_disable();
for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) { for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
restart: restart:
lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
spin_lock(lockp);
if (cb->args[0] >= net->ct.htable_size) {
spin_unlock(lockp);
goto out;
}
hlist_nulls_for_each_entry(h, n, &net->ct.hash[cb->args[0]], hlist_nulls_for_each_entry(h, n, &net->ct.hash[cb->args[0]],
hnnode) { hnnode) {
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
...@@ -803,16 +812,18 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -803,16 +812,18 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
if (res < 0) { if (res < 0) {
nf_conntrack_get(&ct->ct_general); nf_conntrack_get(&ct->ct_general);
cb->args[1] = (unsigned long)ct; cb->args[1] = (unsigned long)ct;
spin_unlock(lockp);
goto out; goto out;
} }
} }
spin_unlock(lockp);
if (cb->args[1]) { if (cb->args[1]) {
cb->args[1] = 0; cb->args[1] = 0;
goto restart; goto restart;
} }
} }
out: out:
spin_unlock_bh(&nf_conntrack_lock); local_bh_enable();
if (last) if (last)
nf_ct_put(last); nf_ct_put(last);
...@@ -966,7 +977,6 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, ...@@ -966,7 +977,6 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name,
return 0; return 0;
} }
#define __CTA_LABELS_MAX_LENGTH ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE)
static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
[CTA_TUPLE_ORIG] = { .type = NLA_NESTED }, [CTA_TUPLE_ORIG] = { .type = NLA_NESTED },
[CTA_TUPLE_REPLY] = { .type = NLA_NESTED }, [CTA_TUPLE_REPLY] = { .type = NLA_NESTED },
...@@ -984,9 +994,9 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { ...@@ -984,9 +994,9 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
[CTA_ZONE] = { .type = NLA_U16 }, [CTA_ZONE] = { .type = NLA_U16 },
[CTA_MARK_MASK] = { .type = NLA_U32 }, [CTA_MARK_MASK] = { .type = NLA_U32 },
[CTA_LABELS] = { .type = NLA_BINARY, [CTA_LABELS] = { .type = NLA_BINARY,
.len = __CTA_LABELS_MAX_LENGTH }, .len = NF_CT_LABELS_MAX_SIZE },
[CTA_LABELS_MASK] = { .type = NLA_BINARY, [CTA_LABELS_MASK] = { .type = NLA_BINARY,
.len = __CTA_LABELS_MAX_LENGTH }, .len = NF_CT_LABELS_MAX_SIZE },
}; };
static int static int
...@@ -1138,50 +1148,65 @@ static int ctnetlink_done_list(struct netlink_callback *cb) ...@@ -1138,50 +1148,65 @@ static int ctnetlink_done_list(struct netlink_callback *cb)
} }
static int static int
ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying)
struct hlist_nulls_head *list)
{ {
struct nf_conn *ct, *last; struct nf_conn *ct, *last = NULL;
struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n; struct hlist_nulls_node *n;
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
u_int8_t l3proto = nfmsg->nfgen_family; u_int8_t l3proto = nfmsg->nfgen_family;
int res; int res;
int cpu;
struct hlist_nulls_head *list;
struct net *net = sock_net(skb->sk);
if (cb->args[2]) if (cb->args[2])
return 0; return 0;
spin_lock_bh(&nf_conntrack_lock); if (cb->args[0] == nr_cpu_ids)
last = (struct nf_conn *)cb->args[1]; return 0;
restart:
hlist_nulls_for_each_entry(h, n, list, hnnode) { for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) {
ct = nf_ct_tuplehash_to_ctrack(h); struct ct_pcpu *pcpu;
if (l3proto && nf_ct_l3num(ct) != l3proto)
if (!cpu_possible(cpu))
continue; continue;
if (cb->args[1]) {
if (ct != last) pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
spin_lock_bh(&pcpu->lock);
last = (struct nf_conn *)cb->args[1];
list = dying ? &pcpu->dying : &pcpu->unconfirmed;
restart:
hlist_nulls_for_each_entry(h, n, list, hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
if (l3proto && nf_ct_l3num(ct) != l3proto)
continue; continue;
cb->args[1] = 0; if (cb->args[1]) {
} if (ct != last)
rcu_read_lock(); continue;
res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->args[1] = 0;
cb->nlh->nlmsg_seq, }
NFNL_MSG_TYPE(cb->nlh->nlmsg_type), rcu_read_lock();
ct); res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
rcu_read_unlock(); cb->nlh->nlmsg_seq,
if (res < 0) { NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
nf_conntrack_get(&ct->ct_general); ct);
cb->args[1] = (unsigned long)ct; rcu_read_unlock();
goto out; if (res < 0) {
nf_conntrack_get(&ct->ct_general);
cb->args[1] = (unsigned long)ct;
spin_unlock_bh(&pcpu->lock);
goto out;
}
} }
if (cb->args[1]) {
cb->args[1] = 0;
goto restart;
} else
cb->args[2] = 1;
spin_unlock_bh(&pcpu->lock);
} }
if (cb->args[1]) {
cb->args[1] = 0;
goto restart;
} else
cb->args[2] = 1;
out: out:
spin_unlock_bh(&nf_conntrack_lock);
if (last) if (last)
nf_ct_put(last); nf_ct_put(last);
...@@ -1191,9 +1216,7 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, ...@@ -1191,9 +1216,7 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb,
static int static int
ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
{ {
struct net *net = sock_net(skb->sk); return ctnetlink_dump_list(skb, cb, true);
return ctnetlink_dump_list(skb, cb, &net->ct.dying);
} }
static int static int
...@@ -1215,9 +1238,7 @@ ctnetlink_get_ct_dying(struct sock *ctnl, struct sk_buff *skb, ...@@ -1215,9 +1238,7 @@ ctnetlink_get_ct_dying(struct sock *ctnl, struct sk_buff *skb,
static int static int
ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb) ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb)
{ {
struct net *net = sock_net(skb->sk); return ctnetlink_dump_list(skb, cb, false);
return ctnetlink_dump_list(skb, cb, &net->ct.unconfirmed);
} }
static int static int
...@@ -1361,14 +1382,14 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) ...@@ -1361,14 +1382,14 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[])
nf_ct_protonum(ct)); nf_ct_protonum(ct));
if (helper == NULL) { if (helper == NULL) {
#ifdef CONFIG_MODULES #ifdef CONFIG_MODULES
spin_unlock_bh(&nf_conntrack_lock); spin_unlock_bh(&nf_conntrack_expect_lock);
if (request_module("nfct-helper-%s", helpname) < 0) { if (request_module("nfct-helper-%s", helpname) < 0) {
spin_lock_bh(&nf_conntrack_lock); spin_lock_bh(&nf_conntrack_expect_lock);
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
spin_lock_bh(&nf_conntrack_lock); spin_lock_bh(&nf_conntrack_expect_lock);
helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
nf_ct_protonum(ct)); nf_ct_protonum(ct));
if (helper) if (helper)
...@@ -1804,9 +1825,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, ...@@ -1804,9 +1825,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
err = -EEXIST; err = -EEXIST;
ct = nf_ct_tuplehash_to_ctrack(h); ct = nf_ct_tuplehash_to_ctrack(h);
if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
spin_lock_bh(&nf_conntrack_lock); spin_lock_bh(&nf_conntrack_expect_lock);
err = ctnetlink_change_conntrack(ct, cda); err = ctnetlink_change_conntrack(ct, cda);
spin_unlock_bh(&nf_conntrack_lock); spin_unlock_bh(&nf_conntrack_expect_lock);
if (err == 0) { if (err == 0) {
nf_conntrack_eventmask_report((1 << IPCT_REPLY) | nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
(1 << IPCT_ASSURED) | (1 << IPCT_ASSURED) |
...@@ -2135,9 +2156,9 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct) ...@@ -2135,9 +2156,9 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
if (ret < 0) if (ret < 0)
return ret; return ret;
spin_lock_bh(&nf_conntrack_lock); spin_lock_bh(&nf_conntrack_expect_lock);
ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct); ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct);
spin_unlock_bh(&nf_conntrack_lock); spin_unlock_bh(&nf_conntrack_expect_lock);
return ret; return ret;
} }
...@@ -2692,13 +2713,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, ...@@ -2692,13 +2713,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
} }
/* after list removal, usage count == 1 */ /* after list removal, usage count == 1 */
spin_lock_bh(&nf_conntrack_lock); spin_lock_bh(&nf_conntrack_expect_lock);
if (del_timer(&exp->timeout)) { if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid, nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid,
nlmsg_report(nlh)); nlmsg_report(nlh));
nf_ct_expect_put(exp); nf_ct_expect_put(exp);
} }
spin_unlock_bh(&nf_conntrack_lock); spin_unlock_bh(&nf_conntrack_expect_lock);
/* have to put what we 'get' above. /* have to put what we 'get' above.
* after this line usage count == 0 */ * after this line usage count == 0 */
nf_ct_expect_put(exp); nf_ct_expect_put(exp);
...@@ -2707,7 +2728,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, ...@@ -2707,7 +2728,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
struct nf_conn_help *m_help; struct nf_conn_help *m_help;
/* delete all expectations for this helper */ /* delete all expectations for this helper */
spin_lock_bh(&nf_conntrack_lock); spin_lock_bh(&nf_conntrack_expect_lock);
for (i = 0; i < nf_ct_expect_hsize; i++) { for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, next, hlist_for_each_entry_safe(exp, next,
&net->ct.expect_hash[i], &net->ct.expect_hash[i],
...@@ -2722,10 +2743,10 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, ...@@ -2722,10 +2743,10 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
} }
} }
} }
spin_unlock_bh(&nf_conntrack_lock); spin_unlock_bh(&nf_conntrack_expect_lock);
} else { } else {
/* This basically means we have to flush everything*/ /* This basically means we have to flush everything*/
spin_lock_bh(&nf_conntrack_lock); spin_lock_bh(&nf_conntrack_expect_lock);
for (i = 0; i < nf_ct_expect_hsize; i++) { for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, next, hlist_for_each_entry_safe(exp, next,
&net->ct.expect_hash[i], &net->ct.expect_hash[i],
...@@ -2738,7 +2759,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, ...@@ -2738,7 +2759,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
} }
} }
} }
spin_unlock_bh(&nf_conntrack_lock); spin_unlock_bh(&nf_conntrack_expect_lock);
} }
return 0; return 0;
...@@ -2964,11 +2985,11 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, ...@@ -2964,11 +2985,11 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
if (err < 0) if (err < 0)
return err; return err;
spin_lock_bh(&nf_conntrack_lock); spin_lock_bh(&nf_conntrack_expect_lock);
exp = __nf_ct_expect_find(net, zone, &tuple); exp = __nf_ct_expect_find(net, zone, &tuple);
if (!exp) { if (!exp) {
spin_unlock_bh(&nf_conntrack_lock); spin_unlock_bh(&nf_conntrack_expect_lock);
err = -ENOENT; err = -ENOENT;
if (nlh->nlmsg_flags & NLM_F_CREATE) { if (nlh->nlmsg_flags & NLM_F_CREATE) {
err = ctnetlink_create_expect(net, zone, cda, err = ctnetlink_create_expect(net, zone, cda,
...@@ -2982,7 +3003,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, ...@@ -2982,7 +3003,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
err = -EEXIST; err = -EEXIST;
if (!(nlh->nlmsg_flags & NLM_F_EXCL)) if (!(nlh->nlmsg_flags & NLM_F_EXCL))
err = ctnetlink_change_expect(exp, cda); err = ctnetlink_change_expect(exp, cda);
spin_unlock_bh(&nf_conntrack_lock); spin_unlock_bh(&nf_conntrack_expect_lock);
return err; return err;
} }
......
...@@ -800,7 +800,7 @@ static int refresh_signalling_expectation(struct nf_conn *ct, ...@@ -800,7 +800,7 @@ static int refresh_signalling_expectation(struct nf_conn *ct,
struct hlist_node *next; struct hlist_node *next;
int found = 0; int found = 0;
spin_lock_bh(&nf_conntrack_lock); spin_lock_bh(&nf_conntrack_expect_lock);
hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
if (exp->class != SIP_EXPECT_SIGNALLING || if (exp->class != SIP_EXPECT_SIGNALLING ||
!nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) || !nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) ||
...@@ -815,7 +815,7 @@ static int refresh_signalling_expectation(struct nf_conn *ct, ...@@ -815,7 +815,7 @@ static int refresh_signalling_expectation(struct nf_conn *ct,
found = 1; found = 1;
break; break;
} }
spin_unlock_bh(&nf_conntrack_lock); spin_unlock_bh(&nf_conntrack_expect_lock);
return found; return found;
} }
...@@ -825,7 +825,7 @@ static void flush_expectations(struct nf_conn *ct, bool media) ...@@ -825,7 +825,7 @@ static void flush_expectations(struct nf_conn *ct, bool media)
struct nf_conntrack_expect *exp; struct nf_conntrack_expect *exp;
struct hlist_node *next; struct hlist_node *next;
spin_lock_bh(&nf_conntrack_lock); spin_lock_bh(&nf_conntrack_expect_lock);
hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
if ((exp->class != SIP_EXPECT_SIGNALLING) ^ media) if ((exp->class != SIP_EXPECT_SIGNALLING) ^ media)
continue; continue;
...@@ -836,7 +836,7 @@ static void flush_expectations(struct nf_conn *ct, bool media) ...@@ -836,7 +836,7 @@ static void flush_expectations(struct nf_conn *ct, bool media)
if (!media) if (!media)
break; break;
} }
spin_unlock_bh(&nf_conntrack_lock); spin_unlock_bh(&nf_conntrack_expect_lock);
} }
static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
......
...@@ -794,9 +794,8 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr) ...@@ -794,9 +794,8 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
if (chain->stats) { if (chain->stats) {
/* nfnl_lock is held, add some nfnl function for this, later */
struct nft_stats __percpu *oldstats = struct nft_stats __percpu *oldstats =
rcu_dereference_protected(chain->stats, 1); nft_dereference(chain->stats);
rcu_assign_pointer(chain->stats, newstats); rcu_assign_pointer(chain->stats, newstats);
synchronize_rcu(); synchronize_rcu();
...@@ -1254,10 +1253,11 @@ static int nf_tables_newexpr(const struct nft_ctx *ctx, ...@@ -1254,10 +1253,11 @@ static int nf_tables_newexpr(const struct nft_ctx *ctx,
return err; return err;
} }
static void nf_tables_expr_destroy(struct nft_expr *expr) static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
struct nft_expr *expr)
{ {
if (expr->ops->destroy) if (expr->ops->destroy)
expr->ops->destroy(expr); expr->ops->destroy(ctx, expr);
module_put(expr->ops->type->owner); module_put(expr->ops->type->owner);
} }
...@@ -1296,6 +1296,8 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { ...@@ -1296,6 +1296,8 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
[NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED },
[NFTA_RULE_COMPAT] = { .type = NLA_NESTED }, [NFTA_RULE_COMPAT] = { .type = NLA_NESTED },
[NFTA_RULE_POSITION] = { .type = NLA_U64 }, [NFTA_RULE_POSITION] = { .type = NLA_U64 },
[NFTA_RULE_USERDATA] = { .type = NLA_BINARY,
.len = NFT_USERDATA_MAXLEN },
}; };
static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
...@@ -1348,6 +1350,10 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, ...@@ -1348,6 +1350,10 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
} }
nla_nest_end(skb, list); nla_nest_end(skb, list);
if (rule->ulen &&
nla_put(skb, NFTA_RULE_USERDATA, rule->ulen, nft_userdata(rule)))
goto nla_put_failure;
return nlmsg_end(skb, nlh); return nlmsg_end(skb, nlh);
nla_put_failure: nla_put_failure:
...@@ -1531,7 +1537,8 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, ...@@ -1531,7 +1537,8 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
return err; return err;
} }
static void nf_tables_rule_destroy(struct nft_rule *rule) static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
struct nft_rule *rule)
{ {
struct nft_expr *expr; struct nft_expr *expr;
...@@ -1541,7 +1548,7 @@ static void nf_tables_rule_destroy(struct nft_rule *rule) ...@@ -1541,7 +1548,7 @@ static void nf_tables_rule_destroy(struct nft_rule *rule)
*/ */
expr = nft_expr_first(rule); expr = nft_expr_first(rule);
while (expr->ops && expr != nft_expr_last(rule)) { while (expr->ops && expr != nft_expr_last(rule)) {
nf_tables_expr_destroy(expr); nf_tables_expr_destroy(ctx, expr);
expr = nft_expr_next(expr); expr = nft_expr_next(expr);
} }
kfree(rule); kfree(rule);
...@@ -1552,7 +1559,7 @@ static void nf_tables_rule_destroy(struct nft_rule *rule) ...@@ -1552,7 +1559,7 @@ static void nf_tables_rule_destroy(struct nft_rule *rule)
static struct nft_expr_info *info; static struct nft_expr_info *info;
static struct nft_rule_trans * static struct nft_rule_trans *
nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx) nf_tables_trans_add(struct nft_ctx *ctx, struct nft_rule *rule)
{ {
struct nft_rule_trans *rupd; struct nft_rule_trans *rupd;
...@@ -1560,11 +1567,8 @@ nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx) ...@@ -1560,11 +1567,8 @@ nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx)
if (rupd == NULL) if (rupd == NULL)
return NULL; return NULL;
rupd->chain = ctx->chain; rupd->ctx = *ctx;
rupd->table = ctx->table;
rupd->rule = rule; rupd->rule = rule;
rupd->family = ctx->afi->family;
rupd->nlh = ctx->nlh;
list_add_tail(&rupd->list, &ctx->net->nft.commit_list); list_add_tail(&rupd->list, &ctx->net->nft.commit_list);
return rupd; return rupd;
...@@ -1584,7 +1588,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, ...@@ -1584,7 +1588,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
struct nft_expr *expr; struct nft_expr *expr;
struct nft_ctx ctx; struct nft_ctx ctx;
struct nlattr *tmp; struct nlattr *tmp;
unsigned int size, i, n; unsigned int size, i, n, ulen = 0;
int err, rem; int err, rem;
bool create; bool create;
u64 handle, pos_handle; u64 handle, pos_handle;
...@@ -1650,8 +1654,11 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, ...@@ -1650,8 +1654,11 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
} }
} }
if (nla[NFTA_RULE_USERDATA])
ulen = nla_len(nla[NFTA_RULE_USERDATA]);
err = -ENOMEM; err = -ENOMEM;
rule = kzalloc(sizeof(*rule) + size, GFP_KERNEL); rule = kzalloc(sizeof(*rule) + size + ulen, GFP_KERNEL);
if (rule == NULL) if (rule == NULL)
goto err1; goto err1;
...@@ -1659,6 +1666,10 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, ...@@ -1659,6 +1666,10 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
rule->handle = handle; rule->handle = handle;
rule->dlen = size; rule->dlen = size;
rule->ulen = ulen;
if (ulen)
nla_memcpy(nft_userdata(rule), nla[NFTA_RULE_USERDATA], ulen);
expr = nft_expr_first(rule); expr = nft_expr_first(rule);
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
...@@ -1671,7 +1682,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, ...@@ -1671,7 +1682,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
if (nlh->nlmsg_flags & NLM_F_REPLACE) { if (nlh->nlmsg_flags & NLM_F_REPLACE) {
if (nft_rule_is_active_next(net, old_rule)) { if (nft_rule_is_active_next(net, old_rule)) {
repl = nf_tables_trans_add(old_rule, &ctx); repl = nf_tables_trans_add(&ctx, old_rule);
if (repl == NULL) { if (repl == NULL) {
err = -ENOMEM; err = -ENOMEM;
goto err2; goto err2;
...@@ -1694,7 +1705,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, ...@@ -1694,7 +1705,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
list_add_rcu(&rule->list, &chain->rules); list_add_rcu(&rule->list, &chain->rules);
} }
if (nf_tables_trans_add(rule, &ctx) == NULL) { if (nf_tables_trans_add(&ctx, rule) == NULL) {
err = -ENOMEM; err = -ENOMEM;
goto err3; goto err3;
} }
...@@ -1709,7 +1720,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, ...@@ -1709,7 +1720,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
kfree(repl); kfree(repl);
} }
err2: err2:
nf_tables_rule_destroy(rule); nf_tables_rule_destroy(&ctx, rule);
err1: err1:
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
if (info[i].ops != NULL) if (info[i].ops != NULL)
...@@ -1723,7 +1734,7 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule) ...@@ -1723,7 +1734,7 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule)
{ {
/* You cannot delete the same rule twice */ /* You cannot delete the same rule twice */
if (nft_rule_is_active_next(ctx->net, rule)) { if (nft_rule_is_active_next(ctx->net, rule)) {
if (nf_tables_trans_add(rule, ctx) == NULL) if (nf_tables_trans_add(ctx, rule) == NULL)
return -ENOMEM; return -ENOMEM;
nft_rule_disactivate_next(ctx->net, rule); nft_rule_disactivate_next(ctx->net, rule);
return 0; return 0;
...@@ -1819,10 +1830,10 @@ static int nf_tables_commit(struct sk_buff *skb) ...@@ -1819,10 +1830,10 @@ static int nf_tables_commit(struct sk_buff *skb)
*/ */
if (nft_rule_is_active(net, rupd->rule)) { if (nft_rule_is_active(net, rupd->rule)) {
nft_rule_clear(net, rupd->rule); nft_rule_clear(net, rupd->rule);
nf_tables_rule_notify(skb, rupd->nlh, rupd->table, nf_tables_rule_notify(skb, rupd->ctx.nlh,
rupd->chain, rupd->rule, rupd->ctx.table, rupd->ctx.chain,
NFT_MSG_NEWRULE, 0, rupd->rule, NFT_MSG_NEWRULE, 0,
rupd->family); rupd->ctx.afi->family);
list_del(&rupd->list); list_del(&rupd->list);
kfree(rupd); kfree(rupd);
continue; continue;
...@@ -1830,9 +1841,10 @@ static int nf_tables_commit(struct sk_buff *skb) ...@@ -1830,9 +1841,10 @@ static int nf_tables_commit(struct sk_buff *skb)
/* This rule is in the past, get rid of it */ /* This rule is in the past, get rid of it */
list_del_rcu(&rupd->rule->list); list_del_rcu(&rupd->rule->list);
nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain, nf_tables_rule_notify(skb, rupd->ctx.nlh,
rupd->ctx.table, rupd->ctx.chain,
rupd->rule, NFT_MSG_DELRULE, 0, rupd->rule, NFT_MSG_DELRULE, 0,
rupd->family); rupd->ctx.afi->family);
} }
/* Make sure we don't see any packet traversing old rules */ /* Make sure we don't see any packet traversing old rules */
...@@ -1840,7 +1852,7 @@ static int nf_tables_commit(struct sk_buff *skb) ...@@ -1840,7 +1852,7 @@ static int nf_tables_commit(struct sk_buff *skb)
/* Now we can safely release unused old rules */ /* Now we can safely release unused old rules */
list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
nf_tables_rule_destroy(rupd->rule); nf_tables_rule_destroy(&rupd->ctx, rupd->rule);
list_del(&rupd->list); list_del(&rupd->list);
kfree(rupd); kfree(rupd);
} }
...@@ -1869,7 +1881,7 @@ static int nf_tables_abort(struct sk_buff *skb) ...@@ -1869,7 +1881,7 @@ static int nf_tables_abort(struct sk_buff *skb)
synchronize_rcu(); synchronize_rcu();
list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
nf_tables_rule_destroy(rupd->rule); nf_tables_rule_destroy(&rupd->ctx, rupd->rule);
list_del(&rupd->list); list_del(&rupd->list);
kfree(rupd); kfree(rupd);
} }
...@@ -2430,8 +2442,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, ...@@ -2430,8 +2442,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
{ {
list_del(&set->list); list_del(&set->list);
if (!(set->flags & NFT_SET_ANONYMOUS)) nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
set->ops->destroy(set); set->ops->destroy(set);
module_put(set->ops->owner); module_put(set->ops->owner);
...@@ -3175,9 +3186,16 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, ...@@ -3175,9 +3186,16 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
switch (data->verdict) { switch (data->verdict) {
case NF_ACCEPT: default:
case NF_DROP: switch (data->verdict & NF_VERDICT_MASK) {
case NF_QUEUE: case NF_ACCEPT:
case NF_DROP:
case NF_QUEUE:
break;
default:
return -EINVAL;
}
/* fall through */
case NFT_CONTINUE: case NFT_CONTINUE:
case NFT_BREAK: case NFT_BREAK:
case NFT_RETURN: case NFT_RETURN:
...@@ -3198,8 +3216,6 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, ...@@ -3198,8 +3216,6 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
data->chain = chain; data->chain = chain;
desc->len = sizeof(data); desc->len = sizeof(data);
break; break;
default:
return -EINVAL;
} }
desc->type = NFT_DATA_VERDICT; desc->type = NFT_DATA_VERDICT;
......
...@@ -61,6 +61,14 @@ void nfnl_unlock(__u8 subsys_id) ...@@ -61,6 +61,14 @@ void nfnl_unlock(__u8 subsys_id)
} }
EXPORT_SYMBOL_GPL(nfnl_unlock); EXPORT_SYMBOL_GPL(nfnl_unlock);
#ifdef CONFIG_PROVE_LOCKING
int lockdep_nfnl_is_held(u8 subsys_id)
{
return lockdep_is_held(&table[subsys_id].mutex);
}
EXPORT_SYMBOL_GPL(lockdep_nfnl_is_held);
#endif
int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n)
{ {
nfnl_lock(n->subsys_id); nfnl_lock(n->subsys_id);
......
...@@ -28,8 +28,6 @@ ...@@ -28,8 +28,6 @@
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
#include <linux/security.h> #include <linux/security.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/jhash.h>
#include <linux/random.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <net/sock.h> #include <net/sock.h>
#include <net/netfilter/nf_log.h> #include <net/netfilter/nf_log.h>
...@@ -75,7 +73,6 @@ struct nfulnl_instance { ...@@ -75,7 +73,6 @@ struct nfulnl_instance {
}; };
#define INSTANCE_BUCKETS 16 #define INSTANCE_BUCKETS 16
static unsigned int hash_init;
static int nfnl_log_net_id __read_mostly; static int nfnl_log_net_id __read_mostly;
...@@ -1067,11 +1064,6 @@ static int __init nfnetlink_log_init(void) ...@@ -1067,11 +1064,6 @@ static int __init nfnetlink_log_init(void)
{ {
int status = -ENOMEM; int status = -ENOMEM;
/* it's not really all that important to have a random value, so
* we can do this from the init function, even if there hasn't
* been that much entropy yet */
get_random_bytes(&hash_init, sizeof(hash_init));
netlink_register_notifier(&nfulnl_rtnl_notifier); netlink_register_notifier(&nfulnl_rtnl_notifier);
status = nfnetlink_subsys_register(&nfulnl_subsys); status = nfnetlink_subsys_register(&nfulnl_subsys);
if (status < 0) { if (status < 0) {
......
...@@ -192,7 +192,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, ...@@ -192,7 +192,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
} }
static void static void
nft_target_destroy(const struct nft_expr *expr) nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
{ {
struct xt_target *target = expr->ops->data; struct xt_target *target = expr->ops->data;
...@@ -379,7 +379,7 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, ...@@ -379,7 +379,7 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
} }
static void static void
nft_match_destroy(const struct nft_expr *expr) nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
{ {
struct xt_match *match = expr->ops->data; struct xt_match *match = expr->ops->data;
......
...@@ -19,15 +19,15 @@ ...@@ -19,15 +19,15 @@
#include <net/netfilter/nf_conntrack_tuple.h> #include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_labels.h>
struct nft_ct { struct nft_ct {
enum nft_ct_keys key:8; enum nft_ct_keys key:8;
enum ip_conntrack_dir dir:8; enum ip_conntrack_dir dir:8;
union{ union {
enum nft_registers dreg:8; enum nft_registers dreg:8;
enum nft_registers sreg:8; enum nft_registers sreg:8;
}; };
uint8_t family;
}; };
static void nft_ct_get_eval(const struct nft_expr *expr, static void nft_ct_get_eval(const struct nft_expr *expr,
...@@ -97,6 +97,26 @@ static void nft_ct_get_eval(const struct nft_expr *expr, ...@@ -97,6 +97,26 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
goto err; goto err;
strncpy((char *)dest->data, helper->name, sizeof(dest->data)); strncpy((char *)dest->data, helper->name, sizeof(dest->data));
return; return;
#ifdef CONFIG_NF_CONNTRACK_LABELS
case NFT_CT_LABELS: {
struct nf_conn_labels *labels = nf_ct_labels_find(ct);
unsigned int size;
if (!labels) {
memset(dest->data, 0, sizeof(dest->data));
return;
}
BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > sizeof(dest->data));
size = labels->words * sizeof(long);
memcpy(dest->data, labels->bits, size);
if (size < sizeof(dest->data))
memset(((char *) dest->data) + size, 0,
sizeof(dest->data) - size);
return;
}
#endif
} }
tuple = &ct->tuplehash[priv->dir].tuple; tuple = &ct->tuplehash[priv->dir].tuple;
...@@ -220,6 +240,9 @@ static int nft_ct_init_validate_get(const struct nft_expr *expr, ...@@ -220,6 +240,9 @@ static int nft_ct_init_validate_get(const struct nft_expr *expr,
#endif #endif
#ifdef CONFIG_NF_CONNTRACK_SECMARK #ifdef CONFIG_NF_CONNTRACK_SECMARK
case NFT_CT_SECMARK: case NFT_CT_SECMARK:
#endif
#ifdef CONFIG_NF_CONNTRACK_LABELS
case NFT_CT_LABELS:
#endif #endif
case NFT_CT_EXPIRATION: case NFT_CT_EXPIRATION:
case NFT_CT_HELPER: case NFT_CT_HELPER:
...@@ -292,16 +315,13 @@ static int nft_ct_init(const struct nft_ctx *ctx, ...@@ -292,16 +315,13 @@ static int nft_ct_init(const struct nft_ctx *ctx,
if (err < 0) if (err < 0)
return err; return err;
priv->family = ctx->afi->family;
return 0; return 0;
} }
static void nft_ct_destroy(const struct nft_expr *expr) static void nft_ct_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{ {
struct nft_ct *priv = nft_expr_priv(expr); nft_ct_l3proto_module_put(ctx->afi->family);
nft_ct_l3proto_module_put(priv->family);
} }
static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
......
This diff is collapsed.
...@@ -70,7 +70,8 @@ static int nft_immediate_init(const struct nft_ctx *ctx, ...@@ -70,7 +70,8 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
return err; return err;
} }
static void nft_immediate_destroy(const struct nft_expr *expr) static void nft_immediate_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{ {
const struct nft_immediate_expr *priv = nft_expr_priv(expr); const struct nft_immediate_expr *priv = nft_expr_priv(expr);
return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg)); return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg));
......
...@@ -74,7 +74,8 @@ static int nft_log_init(const struct nft_ctx *ctx, ...@@ -74,7 +74,8 @@ static int nft_log_init(const struct nft_ctx *ctx,
return 0; return 0;
} }
static void nft_log_destroy(const struct nft_expr *expr) static void nft_log_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{ {
struct nft_log *priv = nft_expr_priv(expr); struct nft_log *priv = nft_expr_priv(expr);
......
...@@ -89,11 +89,12 @@ static int nft_lookup_init(const struct nft_ctx *ctx, ...@@ -89,11 +89,12 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
return 0; return 0;
} }
static void nft_lookup_destroy(const struct nft_expr *expr) static void nft_lookup_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{ {
struct nft_lookup *priv = nft_expr_priv(expr); struct nft_lookup *priv = nft_expr_priv(expr);
nf_tables_unbind_set(NULL, priv->set, &priv->binding); nf_tables_unbind_set(ctx, priv->set, &priv->binding);
} }
static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr) static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
......
...@@ -31,8 +31,8 @@ struct nft_nat { ...@@ -31,8 +31,8 @@ struct nft_nat {
enum nft_registers sreg_addr_max:8; enum nft_registers sreg_addr_max:8;
enum nft_registers sreg_proto_min:8; enum nft_registers sreg_proto_min:8;
enum nft_registers sreg_proto_max:8; enum nft_registers sreg_proto_max:8;
int family; enum nf_nat_manip_type type:8;
enum nf_nat_manip_type type; u8 family;
}; };
static void nft_nat_eval(const struct nft_expr *expr, static void nft_nat_eval(const struct nft_expr *expr,
...@@ -88,6 +88,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, ...@@ -88,6 +88,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nlattr * const tb[]) const struct nlattr * const tb[])
{ {
struct nft_nat *priv = nft_expr_priv(expr); struct nft_nat *priv = nft_expr_priv(expr);
u32 family;
int err; int err;
if (tb[NFTA_NAT_TYPE] == NULL) if (tb[NFTA_NAT_TYPE] == NULL)
...@@ -107,9 +108,12 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, ...@@ -107,9 +108,12 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
if (tb[NFTA_NAT_FAMILY] == NULL) if (tb[NFTA_NAT_FAMILY] == NULL)
return -EINVAL; return -EINVAL;
priv->family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY])); family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY]));
if (priv->family != AF_INET && priv->family != AF_INET6) if (family != AF_INET && family != AF_INET6)
return -EINVAL; return -EAFNOSUPPORT;
if (family != ctx->afi->family)
return -EOPNOTSUPP;
priv->family = family;
if (tb[NFTA_NAT_REG_ADDR_MIN]) { if (tb[NFTA_NAT_REG_ADDR_MIN]) {
priv->sreg_addr_min = ntohl(nla_get_be32( priv->sreg_addr_min = ntohl(nla_get_be32(
...@@ -202,13 +206,7 @@ static struct nft_expr_type nft_nat_type __read_mostly = { ...@@ -202,13 +206,7 @@ static struct nft_expr_type nft_nat_type __read_mostly = {
static int __init nft_nat_module_init(void) static int __init nft_nat_module_init(void)
{ {
int err; return nft_register_expr(&nft_nat_type);
err = nft_register_expr(&nft_nat_type);
if (err < 0)
return err;
return 0;
} }
static void __exit nft_nat_module_exit(void) static void __exit nft_nat_module_exit(void)
......
...@@ -146,11 +146,11 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -146,11 +146,11 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
if (par->family == NFPROTO_BRIDGE) { if (par->family == NFPROTO_BRIDGE) {
switch (eth_hdr(skb)->h_proto) { switch (eth_hdr(skb)->h_proto) {
case __constant_htons(ETH_P_IP): case htons(ETH_P_IP):
audit_ip4(ab, skb); audit_ip4(ab, skb);
break; break;
case __constant_htons(ETH_P_IPV6): case htons(ETH_P_IPV6):
audit_ip6(ab, skb); audit_ip6(ab, skb);
break; break;
} }
......
This diff is collapsed.
...@@ -60,7 +60,7 @@ static bool comp_mt(const struct sk_buff *skb, struct xt_action_param *par) ...@@ -60,7 +60,7 @@ static bool comp_mt(const struct sk_buff *skb, struct xt_action_param *par)
} }
return spi_match(compinfo->spis[0], compinfo->spis[1], return spi_match(compinfo->spis[0], compinfo->spis[1],
ntohl(chdr->cpi << 16), ntohs(chdr->cpi),
!!(compinfo->invflags & XT_IPCOMP_INV_SPI)); !!(compinfo->invflags & XT_IPCOMP_INV_SPI));
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment