Commit c42d7121 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

The following patchset contains Netfilter/IPVS updates for net-next,
they are:

1) Count pre-established connections as active in "least connection"
   schedulers such that pre-established connections to avoid overloading
   backend servers on peak demands, from Michal Kubecek via Simon Horman.

2) Address a race condition when resizing the conntrack table by caching
   the bucket size when fulling iterating over the hashtable in these
   three possible scenarios: 1) dump via /proc/net/nf_conntrack,
   2) unlinking userspace helper and 3) unlinking custom conntrack timeout.
   From Liping Zhang.

3) Revisit early_drop() path to perform lockless traversal on conntrack
   eviction under stress, use del_timer() as synchronization point to
   avoid two CPUs evicting the same entry, from Florian Westphal.

4) Move NAT hlist_head to nf_conn object, this simplifies the existing
   NAT extension and it doesn't increase size since recent patches to
   align nf_conn, from Florian.

5) Use rhashtable for the by-source NAT hashtable, also from Florian.

6) Don't allow --physdev-is-out from OUTPUT chain, just like
   --physdev-out is not either, from Hangbin Liu.

7) Automagically set on nf_conntrack counters if the user tries to
   match ct bytes/packets from nftables, from Liping Zhang.

8) Remove possible_net_t fields in nf_tables set objects since we just
   simply pass the net pointer to the backend set type implementations.

9) Fix possible off-by-one in h323, from Toby DiPasquale.

10) early_drop() may be called from ctnetlink patch, so we must hold
    rcu read size lock from them too, this amends Florian's patch #3
    coming in this batch, from Liping Zhang.

11) Use binary search to validate jump offset in x_tables, this
    addresses the O(n!) validation that was introduced recently
    resolve security issues with unpriviledge namespaces, from Florian.

12) Fix reference leak to connlabel in error path of nft_ct, from Zhang.

13) Three updates for nft_log: Fix log prefix leak in error path. Bail
    out on loglevel larger than debug in nft_log and set on the new
    NF_LOG_F_COPY_LEN flag when snaplen is specified. Again from Zhang.

14) Allow to filter rule dumps in nf_tables based on table and chain
    names.

15) Simplify connlabel to always use 128 bits to store labels and
    get rid of unused function in xt_connlabel, from Florian.

16) Replace set_expect_timeout() by mod_timer() from the h323 conntrack
    helper, by Gao Feng.

17) Put back x_tables module reference in nft_compat on error, from
    Liping Zhang.

18) Add a reference count to the x_tables extensions cache in
    nft_compat, so we can remove them when unused and avoid a crash
    if the extensions are rmmod, again from Zhang.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents de0ba9a0 4b512e1c
......@@ -250,6 +250,10 @@ int xt_check_entry_offsets(const void *base, const char *elems,
unsigned int target_offset,
unsigned int next_offset);
unsigned int *xt_alloc_entry_offsets(unsigned int size);
bool xt_find_jump_offset(const unsigned int *offsets,
unsigned int target, unsigned int size);
int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto,
bool inv_proto);
int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto,
......
......@@ -17,6 +17,7 @@
#include <linux/bitops.h>
#include <linux/compiler.h>
#include <linux/atomic.h>
#include <linux/rhashtable.h>
#include <linux/netfilter/nf_conntrack_tcp.h>
#include <linux/netfilter/nf_conntrack_dccp.h>
......@@ -117,6 +118,9 @@ struct nf_conn {
/* Extensions */
struct nf_ct_ext *ext;
#if IS_ENABLED(CONFIG_NF_NAT)
struct rhash_head nat_bysource;
#endif
/* Storage reserved for other modules, must be the last member */
union nf_conntrack_proto proto;
};
......@@ -266,12 +270,12 @@ static inline int nf_ct_is_template(const struct nf_conn *ct)
}
/* It's confirmed if it is, or has been in the hash table. */
static inline int nf_ct_is_confirmed(struct nf_conn *ct)
static inline int nf_ct_is_confirmed(const struct nf_conn *ct)
{
return test_bit(IPS_CONFIRMED_BIT, &ct->status);
}
static inline int nf_ct_is_dying(struct nf_conn *ct)
static inline int nf_ct_is_dying(const struct nf_conn *ct)
{
return test_bit(IPS_DYING_BIT, &ct->status);
}
......@@ -309,6 +313,7 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl);
#define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count)
#define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
#define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v))
#define MODULE_ALIAS_NFCT_HELPER(helper) \
MODULE_ALIAS("nfct-helper-" helper)
......
......@@ -51,6 +51,8 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto);
void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize);
/* Find a connection corresponding to a tuple. */
struct nf_conntrack_tuple_hash *
nf_conntrack_find_get(struct net *net,
......
......@@ -99,9 +99,6 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
struct nf_ct_ext_type {
/* Destroys relationships (can be NULL). */
void (*destroy)(struct nf_conn *ct);
/* Called when realloacted (can be NULL).
Contents has already been moved. */
void (*move)(void *new, void *old);
enum nf_ct_ext_id id;
......
......@@ -58,10 +58,25 @@ struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name,
struct nf_conntrack_helper *nf_conntrack_helper_try_module_get(const char *name,
u16 l3num,
u8 protonum);
void nf_ct_helper_init(struct nf_conntrack_helper *helper,
u16 l3num, u16 protonum, const char *name,
u16 default_port, u16 spec_port, u32 id,
const struct nf_conntrack_expect_policy *exp_pol,
u32 expect_class_max, u32 data_len,
int (*help)(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo),
int (*from_nlattr)(struct nlattr *attr,
struct nf_conn *ct),
struct module *module);
int nf_conntrack_helper_register(struct nf_conntrack_helper *);
void nf_conntrack_helper_unregister(struct nf_conntrack_helper *);
int nf_conntrack_helpers_register(struct nf_conntrack_helper *, unsigned int);
void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *,
unsigned int);
struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct,
struct nf_conntrack_helper *helper,
gfp_t gfp);
......
......@@ -10,8 +10,7 @@
#define NF_CT_LABELS_MAX_SIZE ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE)
struct nf_conn_labels {
u8 words;
unsigned long bits[];
unsigned long bits[NF_CT_LABELS_MAX_SIZE / sizeof(long)];
};
static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct)
......@@ -26,27 +25,18 @@ static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct)
static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct)
{
#ifdef CONFIG_NF_CONNTRACK_LABELS
struct nf_conn_labels *cl_ext;
struct net *net = nf_ct_net(ct);
u8 words;
words = ACCESS_ONCE(net->ct.label_words);
if (words == 0)
if (net->ct.labels_used == 0)
return NULL;
cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS,
words * sizeof(long), GFP_ATOMIC);
if (cl_ext != NULL)
cl_ext->words = words;
return cl_ext;
return nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS,
sizeof(struct nf_conn_labels), GFP_ATOMIC);
#else
return NULL;
#endif
}
int nf_connlabel_set(struct nf_conn *ct, u16 bit);
int nf_connlabels_replace(struct nf_conn *ct,
const u32 *data, const u32 *mask, unsigned int words);
......
#ifndef _NF_NAT_H
#define _NF_NAT_H
#include <linux/rhashtable.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/nf_nat.h>
#include <net/netfilter/nf_conntrack_tuple.h>
......@@ -29,8 +30,6 @@ struct nf_conn;
/* The structure embedded in the conntrack structure. */
struct nf_conn_nat {
struct hlist_node bysource;
struct nf_conn *ct;
union nf_conntrack_nat_help help;
#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \
IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6)
......
......@@ -236,7 +236,8 @@ struct nft_expr;
* @features: features supported by the implementation
*/
struct nft_set_ops {
bool (*lookup)(const struct nft_set *set,
bool (*lookup)(const struct net *net,
const struct nft_set *set,
const u32 *key,
const struct nft_set_ext **ext);
bool (*update)(struct nft_set *set,
......@@ -248,11 +249,14 @@ struct nft_set_ops {
struct nft_regs *regs,
const struct nft_set_ext **ext);
int (*insert)(const struct nft_set *set,
int (*insert)(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem);
void (*activate)(const struct nft_set *set,
void (*activate)(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem);
void * (*deactivate)(const struct nft_set *set,
void * (*deactivate)(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem);
void (*remove)(const struct nft_set *set,
const struct nft_set_elem *elem);
......@@ -295,7 +299,6 @@ void nft_unregister_set(struct nft_set_ops *ops);
* @udlen: user data length
* @udata: user data
* @ops: set ops
* @pnet: network namespace
* @flags: set flags
* @genmask: generation mask
* @klen: key length
......@@ -318,7 +321,6 @@ struct nft_set {
unsigned char *udata;
/* runtime data below here */
const struct nft_set_ops *ops ____cacheline_aligned;
possible_net_t pnet;
u16 flags:14,
genmask:2;
u8 klen;
......@@ -804,7 +806,6 @@ struct nft_stats {
* struct nft_base_chain - nf_tables base chain
*
* @ops: netfilter hook ops
* @pnet: net namespace that this chain belongs to
* @type: chain type
* @policy: default policy
* @stats: per-cpu chain stats
......@@ -813,7 +814,6 @@ struct nft_stats {
*/
struct nft_base_chain {
struct nf_hook_ops ops[NFT_HOOK_OPS_MAX];
possible_net_t pnet;
const struct nf_chain_type *type;
u8 policy;
u8 flags;
......@@ -1009,10 +1009,11 @@ static inline bool nft_set_elem_active(const struct nft_set_ext *ext,
return !(ext->genmask & genmask);
}
static inline void nft_set_elem_change_active(const struct nft_set *set,
static inline void nft_set_elem_change_active(const struct net *net,
const struct nft_set *set,
struct nft_set_ext *ext)
{
ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet));
ext->genmask ^= nft_genmask_next(net);
}
/*
......
......@@ -299,23 +299,12 @@ static inline bool unconditional(const struct arpt_entry *e)
memcmp(&e->arp, &uncond, sizeof(uncond)) == 0;
}
static bool find_jump_target(const struct xt_table_info *t,
const struct arpt_entry *target)
{
struct arpt_entry *iter;
xt_entry_foreach(iter, t->entries, t->size) {
if (iter == target)
return true;
}
return false;
}
/* Figures out from what hook each rule can be called: returns 0 if
* there are loops. Puts hook bitmask in comefrom.
*/
static int mark_source_chains(const struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0)
unsigned int valid_hooks, void *entry0,
unsigned int *offsets)
{
unsigned int hook;
......@@ -388,10 +377,11 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
XT_STANDARD_TARGET) == 0 &&
newpos >= 0) {
/* This a jump; chase it. */
if (!xt_find_jump_offset(offsets, newpos,
newinfo->number))
return 0;
e = (struct arpt_entry *)
(entry0 + newpos);
if (!find_jump_target(newinfo, e))
return 0;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
......@@ -543,6 +533,7 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
const struct arpt_replace *repl)
{
struct arpt_entry *iter;
unsigned int *offsets;
unsigned int i;
int ret = 0;
......@@ -555,6 +546,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
newinfo->underflow[i] = 0xFFFFFFFF;
}
offsets = xt_alloc_entry_offsets(newinfo->number);
if (!offsets)
return -ENOMEM;
i = 0;
/* Walk through entries, checking offsets. */
......@@ -565,17 +559,20 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
repl->underflow,
repl->valid_hooks);
if (ret != 0)
break;
goto out_free;
if (i < repl->num_entries)
offsets[i] = (void *)iter - entry0;
++i;
if (strcmp(arpt_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
}
if (ret != 0)
return ret;
goto out_free;
ret = -EINVAL;
if (i != repl->num_entries)
return -EINVAL;
goto out_free;
/* Check hooks all assigned */
for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
......@@ -583,13 +580,16 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
if (!(repl->valid_hooks & (1 << i)))
continue;
if (newinfo->hook_entry[i] == 0xFFFFFFFF)
return -EINVAL;
goto out_free;
if (newinfo->underflow[i] == 0xFFFFFFFF)
return -EINVAL;
goto out_free;
}
if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
return -ELOOP;
if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
ret = -ELOOP;
goto out_free;
}
kvfree(offsets);
/* Finally, each sanity check must pass */
i = 0;
......@@ -609,6 +609,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
return ret;
}
return ret;
out_free:
kvfree(offsets);
return ret;
}
......
......@@ -373,23 +373,12 @@ ipt_do_table(struct sk_buff *skb,
else return verdict;
}
static bool find_jump_target(const struct xt_table_info *t,
const struct ipt_entry *target)
{
struct ipt_entry *iter;
xt_entry_foreach(iter, t->entries, t->size) {
if (iter == target)
return true;
}
return false;
}
/* Figures out from what hook each rule can be called: returns 0 if
there are loops. Puts hook bitmask in comefrom. */
static int
mark_source_chains(const struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0)
unsigned int valid_hooks, void *entry0,
unsigned int *offsets)
{
unsigned int hook;
......@@ -458,10 +447,11 @@ mark_source_chains(const struct xt_table_info *newinfo,
XT_STANDARD_TARGET) == 0 &&
newpos >= 0) {
/* This a jump; chase it. */
if (!xt_find_jump_offset(offsets, newpos,
newinfo->number))
return 0;
e = (struct ipt_entry *)
(entry0 + newpos);
if (!find_jump_target(newinfo, e))
return 0;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
......@@ -694,6 +684,7 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
const struct ipt_replace *repl)
{
struct ipt_entry *iter;
unsigned int *offsets;
unsigned int i;
int ret = 0;
......@@ -706,6 +697,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
newinfo->underflow[i] = 0xFFFFFFFF;
}
offsets = xt_alloc_entry_offsets(newinfo->number);
if (!offsets)
return -ENOMEM;
i = 0;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter, entry0, newinfo->size) {
......@@ -715,15 +709,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
repl->underflow,
repl->valid_hooks);
if (ret != 0)
return ret;
goto out_free;
if (i < repl->num_entries)
offsets[i] = (void *)iter - entry0;
++i;
if (strcmp(ipt_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
}
ret = -EINVAL;
if (i != repl->num_entries)
return -EINVAL;
goto out_free;
/* Check hooks all assigned */
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
......@@ -731,13 +728,16 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (!(repl->valid_hooks & (1 << i)))
continue;
if (newinfo->hook_entry[i] == 0xFFFFFFFF)
return -EINVAL;
goto out_free;
if (newinfo->underflow[i] == 0xFFFFFFFF)
return -EINVAL;
goto out_free;
}
if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
return -ELOOP;
if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
ret = -ELOOP;
goto out_free;
}
kvfree(offsets);
/* Finally, each sanity check must pass */
i = 0;
......@@ -757,6 +757,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
return ret;
}
return ret;
out_free:
kvfree(offsets);
return ret;
}
......
......@@ -26,6 +26,8 @@
struct ct_iter_state {
struct seq_net_private p;
struct hlist_nulls_head *hash;
unsigned int htable_size;
unsigned int bucket;
};
......@@ -35,10 +37,10 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
struct hlist_nulls_node *n;
for (st->bucket = 0;
st->bucket < nf_conntrack_htable_size;
st->bucket < st->htable_size;
st->bucket++) {
n = rcu_dereference(
hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
hlist_nulls_first_rcu(&st->hash[st->bucket]));
if (!is_a_nulls(n))
return n;
}
......@@ -53,11 +55,11 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
head = rcu_dereference(hlist_nulls_next_rcu(head));
while (is_a_nulls(head)) {
if (likely(get_nulls_value(head) == st->bucket)) {
if (++st->bucket >= nf_conntrack_htable_size)
if (++st->bucket >= st->htable_size)
return NULL;
}
head = rcu_dereference(
hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
hlist_nulls_first_rcu(&st->hash[st->bucket]));
}
return head;
}
......@@ -75,7 +77,11 @@ static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
struct ct_iter_state *st = seq->private;
rcu_read_lock();
nf_conntrack_get_ht(&st->hash, &st->htable_size);
return ct_get_idx(seq, *pos);
}
......
......@@ -402,23 +402,12 @@ ip6t_do_table(struct sk_buff *skb,
else return verdict;
}
static bool find_jump_target(const struct xt_table_info *t,
const struct ip6t_entry *target)
{
struct ip6t_entry *iter;
xt_entry_foreach(iter, t->entries, t->size) {
if (iter == target)
return true;
}
return false;
}
/* Figures out from what hook each rule can be called: returns 0 if
there are loops. Puts hook bitmask in comefrom. */
static int
mark_source_chains(const struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0)
unsigned int valid_hooks, void *entry0,
unsigned int *offsets)
{
unsigned int hook;
......@@ -487,10 +476,11 @@ mark_source_chains(const struct xt_table_info *newinfo,
XT_STANDARD_TARGET) == 0 &&
newpos >= 0) {
/* This a jump; chase it. */
if (!xt_find_jump_offset(offsets, newpos,
newinfo->number))
return 0;
e = (struct ip6t_entry *)
(entry0 + newpos);
if (!find_jump_target(newinfo, e))
return 0;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
......@@ -724,6 +714,7 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
const struct ip6t_replace *repl)
{
struct ip6t_entry *iter;
unsigned int *offsets;
unsigned int i;
int ret = 0;
......@@ -736,6 +727,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
newinfo->underflow[i] = 0xFFFFFFFF;
}
offsets = xt_alloc_entry_offsets(newinfo->number);
if (!offsets)
return -ENOMEM;
i = 0;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter, entry0, newinfo->size) {
......@@ -745,15 +739,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
repl->underflow,
repl->valid_hooks);
if (ret != 0)
return ret;
goto out_free;
if (i < repl->num_entries)
offsets[i] = (void *)iter - entry0;
++i;
if (strcmp(ip6t_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
}
ret = -EINVAL;
if (i != repl->num_entries)
return -EINVAL;
goto out_free;
/* Check hooks all assigned */
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
......@@ -761,13 +758,16 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (!(repl->valid_hooks & (1 << i)))
continue;
if (newinfo->hook_entry[i] == 0xFFFFFFFF)
return -EINVAL;
goto out_free;
if (newinfo->underflow[i] == 0xFFFFFFFF)
return -EINVAL;
goto out_free;
}
if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
return -ELOOP;
if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
ret = -ELOOP;
goto out_free;
}
kvfree(offsets);
/* Finally, each sanity check must pass */
i = 0;
......@@ -787,6 +787,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
return ret;
}
return ret;
out_free:
kvfree(offsets);
return ret;
}
......
......@@ -395,6 +395,20 @@ static const char *const tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
[IP_VS_TCP_S_LAST] = "BUG!",
};
static const bool tcp_state_active_table[IP_VS_TCP_S_LAST] = {
[IP_VS_TCP_S_NONE] = false,
[IP_VS_TCP_S_ESTABLISHED] = true,
[IP_VS_TCP_S_SYN_SENT] = true,
[IP_VS_TCP_S_SYN_RECV] = true,
[IP_VS_TCP_S_FIN_WAIT] = false,
[IP_VS_TCP_S_TIME_WAIT] = false,
[IP_VS_TCP_S_CLOSE] = false,
[IP_VS_TCP_S_CLOSE_WAIT] = false,
[IP_VS_TCP_S_LAST_ACK] = false,
[IP_VS_TCP_S_LISTEN] = false,
[IP_VS_TCP_S_SYNACK] = true,
};
#define sNO IP_VS_TCP_S_NONE
#define sES IP_VS_TCP_S_ESTABLISHED
#define sSS IP_VS_TCP_S_SYN_SENT
......@@ -418,6 +432,13 @@ static const char * tcp_state_name(int state)
return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
}
static bool tcp_state_active(int state)
{
if (state >= IP_VS_TCP_S_LAST)
return false;
return tcp_state_active_table[state];
}
static struct tcp_states_t tcp_states [] = {
/* INPUT */
/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
......@@ -540,12 +561,12 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
if (dest) {
if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
(new_state != IP_VS_TCP_S_ESTABLISHED)) {
!tcp_state_active(new_state)) {
atomic_dec(&dest->activeconns);
atomic_inc(&dest->inactconns);
cp->flags |= IP_VS_CONN_F_INACTIVE;
} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
(new_state == IP_VS_TCP_S_ESTABLISHED)) {
tcp_state_active(new_state)) {
atomic_inc(&dest->activeconns);
atomic_dec(&dest->inactconns);
cp->flags &= ~IP_VS_CONN_F_INACTIVE;
......
......@@ -460,6 +460,23 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
net_eq(net, nf_ct_net(ct));
}
/* must be called with rcu read lock held */
void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
{
struct hlist_nulls_head *hptr;
unsigned int sequence, hsz;
do {
sequence = read_seqcount_begin(&nf_conntrack_generation);
hsz = nf_conntrack_htable_size;
hptr = nf_conntrack_hash;
} while (read_seqcount_retry(&nf_conntrack_generation, sequence));
*hash = hptr;
*hsize = hsz;
}
EXPORT_SYMBOL_GPL(nf_conntrack_get_ht);
/*
* Warning :
* - Caller must take a reference on returned object
......@@ -818,67 +835,69 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
/* There's a small race here where we may free a just-assured
connection. Too bad: we're in trouble anyway. */
static noinline int early_drop(struct net *net, unsigned int _hash)
static unsigned int early_drop_list(struct net *net,
struct hlist_nulls_head *head)
{
/* Use oldest entry, which is roughly LRU */
struct nf_conntrack_tuple_hash *h;
struct nf_conn *tmp;
struct hlist_nulls_node *n;
unsigned int i, hash, sequence;
struct nf_conn *ct = NULL;
spinlock_t *lockp;
bool ret = false;
unsigned int drops = 0;
struct nf_conn *tmp;
i = 0;
hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
tmp = nf_ct_tuplehash_to_ctrack(h);
local_bh_disable();
restart:
sequence = read_seqcount_begin(&nf_conntrack_generation);
for (; i < NF_CT_EVICTION_RANGE; i++) {
hash = scale_hash(_hash++);
lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
nf_conntrack_lock(lockp);
if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
spin_unlock(lockp);
goto restart;
}
hlist_nulls_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash],
hnnode) {
tmp = nf_ct_tuplehash_to_ctrack(h);
if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
!net_eq(nf_ct_net(tmp), net) ||
nf_ct_is_dying(tmp))
continue;
if (atomic_inc_not_zero(&tmp->ct_general.use)) {
ct = tmp;
break;
}
}
if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
!net_eq(nf_ct_net(tmp), net) ||
nf_ct_is_dying(tmp))
continue;
spin_unlock(lockp);
if (ct)
break;
if (!atomic_inc_not_zero(&tmp->ct_general.use))
continue;
/* kill only if still in same netns -- might have moved due to
* SLAB_DESTROY_BY_RCU rules.
*
* We steal the timer reference. If that fails timer has
* already fired or someone else deleted it. Just drop ref
* and move to next entry.
*/
if (net_eq(nf_ct_net(tmp), net) &&
nf_ct_is_confirmed(tmp) &&
del_timer(&tmp->timeout) &&
nf_ct_delete(tmp, 0, 0))
drops++;
nf_ct_put(tmp);
}
local_bh_enable();
return drops;
}
if (!ct)
return false;
static noinline int early_drop(struct net *net, unsigned int _hash)
{
unsigned int i;
/* kill only if in same netns -- might have moved due to
* SLAB_DESTROY_BY_RCU rules
*/
if (net_eq(nf_ct_net(ct), net) && del_timer(&ct->timeout)) {
if (nf_ct_delete(ct, 0, 0)) {
NF_CT_STAT_INC_ATOMIC(net, early_drop);
ret = true;
for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
struct hlist_nulls_head *ct_hash;
unsigned hash, sequence, drops;
rcu_read_lock();
do {
sequence = read_seqcount_begin(&nf_conntrack_generation);
hash = scale_hash(_hash++);
ct_hash = nf_conntrack_hash;
} while (read_seqcount_retry(&nf_conntrack_generation, sequence));
drops = early_drop_list(net, &ct_hash[hash]);
rcu_read_unlock();
if (drops) {
NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops);
return true;
}
}
nf_ct_put(ct);
return ret;
return false;
}
static struct nf_conn *
......
......@@ -73,7 +73,7 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
size_t var_alloc_len, gfp_t gfp)
{
struct nf_ct_ext *old, *new;
int i, newlen, newoff;
int newlen, newoff;
struct nf_ct_ext_type *t;
/* Conntrack must not be confirmed to avoid races on reallocation. */
......@@ -99,19 +99,8 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
return NULL;
if (new != old) {
for (i = 0; i < NF_CT_EXT_NUM; i++) {
if (!__nf_ct_ext_exist(old, i))
continue;
rcu_read_lock();
t = rcu_dereference(nf_ct_ext_types[i]);
if (t && t->move)
t->move((void *)new + new->offset[i],
(void *)old + old->offset[i]);
rcu_read_unlock();
}
kfree_rcu(old, rcu);
ct->ext = new;
rcu_assign_pointer(ct->ext, new);
}
new->offset[id] = newoff;
......
......@@ -572,7 +572,7 @@ static int nf_ct_ftp_from_nlattr(struct nlattr *attr, struct nf_conn *ct)
return 0;
}
static struct nf_conntrack_helper ftp[MAX_PORTS][2] __read_mostly;
static struct nf_conntrack_helper ftp[MAX_PORTS * 2] __read_mostly;
static const struct nf_conntrack_expect_policy ftp_exp_policy = {
.max_expected = 1,
......@@ -582,24 +582,13 @@ static const struct nf_conntrack_expect_policy ftp_exp_policy = {
/* don't make this __exit, since it's called from __init ! */
static void nf_conntrack_ftp_fini(void)
{
int i, j;
for (i = 0; i < ports_c; i++) {
for (j = 0; j < 2; j++) {
if (ftp[i][j].me == NULL)
continue;
pr_debug("unregistering helper for pf: %d port: %d\n",
ftp[i][j].tuple.src.l3num, ports[i]);
nf_conntrack_helper_unregister(&ftp[i][j]);
}
}
nf_conntrack_helpers_unregister(ftp, ports_c * 2);
kfree(ftp_buffer);
}
static int __init nf_conntrack_ftp_init(void)
{
int i, j = -1, ret = 0;
int i, ret = 0;
ftp_buffer = kmalloc(65536, GFP_KERNEL);
if (!ftp_buffer)
......@@ -611,32 +600,21 @@ static int __init nf_conntrack_ftp_init(void)
/* FIXME should be configurable whether IPv4 and IPv6 FTP connections
are tracked or not - YK */
for (i = 0; i < ports_c; i++) {
ftp[i][0].tuple.src.l3num = PF_INET;
ftp[i][1].tuple.src.l3num = PF_INET6;
for (j = 0; j < 2; j++) {
ftp[i][j].data_len = sizeof(struct nf_ct_ftp_master);
ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]);
ftp[i][j].tuple.dst.protonum = IPPROTO_TCP;
ftp[i][j].expect_policy = &ftp_exp_policy;
ftp[i][j].me = THIS_MODULE;
ftp[i][j].help = help;
ftp[i][j].from_nlattr = nf_ct_ftp_from_nlattr;
if (ports[i] == FTP_PORT)
sprintf(ftp[i][j].name, "ftp");
else
sprintf(ftp[i][j].name, "ftp-%d", ports[i]);
pr_debug("registering helper for pf: %d port: %d\n",
ftp[i][j].tuple.src.l3num, ports[i]);
ret = nf_conntrack_helper_register(&ftp[i][j]);
if (ret) {
pr_err("failed to register helper for pf: %d port: %d\n",
ftp[i][j].tuple.src.l3num, ports[i]);
ports_c = i;
nf_conntrack_ftp_fini();
return ret;
}
}
nf_ct_helper_init(&ftp[2 * i], AF_INET, IPPROTO_TCP, "ftp",
FTP_PORT, ports[i], ports[i], &ftp_exp_policy,
0, sizeof(struct nf_ct_ftp_master), help,
nf_ct_ftp_from_nlattr, THIS_MODULE);
nf_ct_helper_init(&ftp[2 * i + 1], AF_INET6, IPPROTO_TCP, "ftp",
FTP_PORT, ports[i], ports[i], &ftp_exp_policy,
0, sizeof(struct nf_ct_ftp_master), help,
nf_ct_ftp_from_nlattr, THIS_MODULE);
}
ret = nf_conntrack_helpers_register(ftp, ports_c * 2);
if (ret < 0) {
pr_err("failed to register helpers\n");
kfree(ftp_buffer);
return ret;
}
return 0;
......
......@@ -846,9 +846,10 @@ int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931)
sz -= len;
/* Message Type */
if (sz < 1)
if (sz < 2)
return H323_ERROR_BOUND;
q931->MessageType = *p++;
sz--;
PRINT("MessageType = %02X\n", q931->MessageType);
if (*p & 0x80) {
p++;
......
......@@ -1272,19 +1272,6 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
return NULL;
}
/****************************************************************************/
static int set_expect_timeout(struct nf_conntrack_expect *exp,
unsigned int timeout)
{
if (!exp || !del_timer(&exp->timeout))
return 0;
exp->timeout.expires = jiffies + timeout * HZ;
add_timer(&exp->timeout);
return 1;
}
/****************************************************************************/
static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
......@@ -1486,7 +1473,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
"timeout to %u seconds for",
info->timeout);
nf_ct_dump_tuple(&exp->tuple);
set_expect_timeout(exp, info->timeout);
mod_timer(&exp->timeout, jiffies + info->timeout * HZ);
}
spin_unlock_bh(&nf_conntrack_expect_lock);
}
......
......@@ -409,6 +409,8 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
struct nf_conntrack_expect *exp;
const struct hlist_node *next;
const struct hlist_nulls_node *nn;
unsigned int last_hsize;
spinlock_t *lock;
struct net *net;
unsigned int i;
......@@ -446,18 +448,80 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
rtnl_unlock();
local_bh_disable();
for (i = 0; i < nf_conntrack_htable_size; i++) {
nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
if (i < nf_conntrack_htable_size) {
hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
unhelp(h, me);
restart:
last_hsize = nf_conntrack_htable_size;
for (i = 0; i < last_hsize; i++) {
lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS];
nf_conntrack_lock(lock);
if (last_hsize != nf_conntrack_htable_size) {
spin_unlock(lock);
goto restart;
}
spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
unhelp(h, me);
spin_unlock(lock);
}
local_bh_enable();
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
void nf_ct_helper_init(struct nf_conntrack_helper *helper,
u16 l3num, u16 protonum, const char *name,
u16 default_port, u16 spec_port, u32 id,
const struct nf_conntrack_expect_policy *exp_pol,
u32 expect_class_max, u32 data_len,
int (*help)(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo),
int (*from_nlattr)(struct nlattr *attr,
struct nf_conn *ct),
struct module *module)
{
helper->tuple.src.l3num = l3num;
helper->tuple.dst.protonum = protonum;
helper->tuple.src.u.all = htons(spec_port);
helper->expect_policy = exp_pol;
helper->expect_class_max = expect_class_max;
helper->data_len = data_len;
helper->help = help;
helper->from_nlattr = from_nlattr;
helper->me = module;
if (spec_port == default_port)
snprintf(helper->name, sizeof(helper->name), "%s", name);
else
snprintf(helper->name, sizeof(helper->name), "%s-%u", name, id);
}
EXPORT_SYMBOL_GPL(nf_ct_helper_init);
int nf_conntrack_helpers_register(struct nf_conntrack_helper *helper,
unsigned int n)
{
unsigned int i;
int err = 0;
for (i = 0; i < n; i++) {
err = nf_conntrack_helper_register(&helper[i]);
if (err < 0)
goto err;
}
return err;
err:
if (i > 0)
nf_conntrack_helpers_unregister(helper, i);
return err;
}
EXPORT_SYMBOL_GPL(nf_conntrack_helpers_register);
void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *helper,
unsigned int n)
{
while (n-- > 0)
nf_conntrack_helper_unregister(&helper[n]);
}
EXPORT_SYMBOL_GPL(nf_conntrack_helpers_unregister);
static struct nf_ct_ext_type helper_extend __read_mostly = {
.len = sizeof(struct nf_conn_help),
.align = __alignof__(struct nf_conn_help),
......
......@@ -255,27 +255,18 @@ static int __init nf_conntrack_irc_init(void)
ports[ports_c++] = IRC_PORT;
for (i = 0; i < ports_c; i++) {
irc[i].tuple.src.l3num = AF_INET;
irc[i].tuple.src.u.tcp.port = htons(ports[i]);
irc[i].tuple.dst.protonum = IPPROTO_TCP;
irc[i].expect_policy = &irc_exp_policy;
irc[i].me = THIS_MODULE;
irc[i].help = help;
if (ports[i] == IRC_PORT)
sprintf(irc[i].name, "irc");
else
sprintf(irc[i].name, "irc-%u", i);
ret = nf_conntrack_helper_register(&irc[i]);
if (ret) {
pr_err("failed to register helper for pf: %u port: %u\n",
irc[i].tuple.src.l3num, ports[i]);
ports_c = i;
nf_conntrack_irc_fini();
return ret;
}
nf_ct_helper_init(&irc[i], AF_INET, IPPROTO_TCP, "irc",
IRC_PORT, ports[i], i, &irc_exp_policy,
0, 0, help, NULL, THIS_MODULE);
}
ret = nf_conntrack_helpers_register(&irc[0], ports_c);
if (ret) {
pr_err("failed to register helpers\n");
kfree(irc_buffer);
return ret;
}
return 0;
}
......@@ -283,10 +274,7 @@ static int __init nf_conntrack_irc_init(void)
* it is needed by the init function */
static void nf_conntrack_irc_fini(void)
{
int i;
for (i = 0; i < ports_c; i++)
nf_conntrack_helper_unregister(&irc[i]);
nf_conntrack_helpers_unregister(irc, ports_c);
kfree(irc_buffer);
}
......
......@@ -16,23 +16,6 @@
static spinlock_t nf_connlabels_lock;
int nf_connlabel_set(struct nf_conn *ct, u16 bit)
{
struct nf_conn_labels *labels = nf_ct_labels_find(ct);
if (!labels || BIT_WORD(bit) >= labels->words)
return -ENOSPC;
if (test_bit(bit, labels->bits))
return 0;
if (!test_and_set_bit(bit, labels->bits))
nf_conntrack_event_cache(IPCT_LABEL, ct);
return 0;
}
EXPORT_SYMBOL_GPL(nf_connlabel_set);
static int replace_u32(u32 *address, u32 mask, u32 new)
{
u32 old, tmp;
......@@ -60,7 +43,7 @@ int nf_connlabels_replace(struct nf_conn *ct,
if (!labels)
return -ENOSPC;
size = labels->words * sizeof(long);
size = sizeof(labels->bits);
if (size < (words32 * sizeof(u32)))
words32 = size / sizeof(u32);
......@@ -80,16 +63,11 @@ EXPORT_SYMBOL_GPL(nf_connlabels_replace);
int nf_connlabels_get(struct net *net, unsigned int bits)
{
size_t words;
words = BIT_WORD(bits) + 1;
if (words > NF_CT_LABELS_MAX_SIZE / sizeof(long))
if (BIT_WORD(bits) >= NF_CT_LABELS_MAX_SIZE / sizeof(long))
return -ERANGE;
spin_lock(&nf_connlabels_lock);
net->ct.labels_used++;
if (words > net->ct.label_words)
net->ct.label_words = words;
spin_unlock(&nf_connlabels_lock);
return 0;
......@@ -100,8 +78,6 @@ void nf_connlabels_put(struct net *net)
{
spin_lock(&nf_connlabels_lock);
net->ct.labels_used--;
if (net->ct.labels_used == 0)
net->ct.label_words = 0;
spin_unlock(&nf_connlabels_lock);
}
EXPORT_SYMBOL_GPL(nf_connlabels_put);
......
......@@ -346,25 +346,25 @@ static inline int ctnetlink_label_size(const struct nf_conn *ct)
if (!labels)
return 0;
return nla_total_size(labels->words * sizeof(long));
return nla_total_size(sizeof(labels->bits));
}
static int
ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct)
{
struct nf_conn_labels *labels = nf_ct_labels_find(ct);
unsigned int len, i;
unsigned int i;
if (!labels)
return 0;
len = labels->words * sizeof(long);
i = 0;
do {
if (labels->bits[i] != 0)
return nla_put(skb, CTA_LABELS, len, labels->bits);
return nla_put(skb, CTA_LABELS, sizeof(labels->bits),
labels->bits);
i++;
} while (i < labels->words);
} while (i < ARRAY_SIZE(labels->bits));
return 0;
}
......
......@@ -166,7 +166,7 @@ static int help(struct sk_buff *skb,
return ret;
}
static struct nf_conntrack_helper sane[MAX_PORTS][2] __read_mostly;
static struct nf_conntrack_helper sane[MAX_PORTS * 2] __read_mostly;
static const struct nf_conntrack_expect_policy sane_exp_policy = {
.max_expected = 1,
......@@ -176,22 +176,13 @@ static const struct nf_conntrack_expect_policy sane_exp_policy = {
/* don't make this __exit, since it's called from __init ! */
static void nf_conntrack_sane_fini(void)
{
int i, j;
for (i = 0; i < ports_c; i++) {
for (j = 0; j < 2; j++) {
pr_debug("unregistering helper for pf: %d port: %d\n",
sane[i][j].tuple.src.l3num, ports[i]);
nf_conntrack_helper_unregister(&sane[i][j]);
}
}
nf_conntrack_helpers_unregister(sane, ports_c * 2);
kfree(sane_buffer);
}
static int __init nf_conntrack_sane_init(void)
{
int i, j = -1, ret = 0;
int i, ret = 0;
sane_buffer = kmalloc(65536, GFP_KERNEL);
if (!sane_buffer)
......@@ -203,31 +194,23 @@ static int __init nf_conntrack_sane_init(void)
/* FIXME should be configurable whether IPv4 and IPv6 connections
are tracked or not - YK */
for (i = 0; i < ports_c; i++) {
sane[i][0].tuple.src.l3num = PF_INET;
sane[i][1].tuple.src.l3num = PF_INET6;
for (j = 0; j < 2; j++) {
sane[i][j].data_len = sizeof(struct nf_ct_sane_master);
sane[i][j].tuple.src.u.tcp.port = htons(ports[i]);
sane[i][j].tuple.dst.protonum = IPPROTO_TCP;
sane[i][j].expect_policy = &sane_exp_policy;
sane[i][j].me = THIS_MODULE;
sane[i][j].help = help;
if (ports[i] == SANE_PORT)
sprintf(sane[i][j].name, "sane");
else
sprintf(sane[i][j].name, "sane-%d", ports[i]);
pr_debug("registering helper for pf: %d port: %d\n",
sane[i][j].tuple.src.l3num, ports[i]);
ret = nf_conntrack_helper_register(&sane[i][j]);
if (ret) {
pr_err("failed to register helper for pf: %d port: %d\n",
sane[i][j].tuple.src.l3num, ports[i]);
ports_c = i;
nf_conntrack_sane_fini();
return ret;
}
}
nf_ct_helper_init(&sane[2 * i], AF_INET, IPPROTO_TCP, "sane",
SANE_PORT, ports[i], ports[i],
&sane_exp_policy, 0,
sizeof(struct nf_ct_sane_master), help, NULL,
THIS_MODULE);
nf_ct_helper_init(&sane[2 * i + 1], AF_INET6, IPPROTO_TCP, "sane",
SANE_PORT, ports[i], ports[i],
&sane_exp_policy, 0,
sizeof(struct nf_ct_sane_master), help, NULL,
THIS_MODULE);
}
ret = nf_conntrack_helpers_register(sane, ports_c * 2);
if (ret < 0) {
pr_err("failed to register helpers\n");
kfree(sane_buffer);
return ret;
}
return 0;
......
......@@ -1589,7 +1589,7 @@ static int sip_help_udp(struct sk_buff *skb, unsigned int protoff,
return process_sip_msg(skb, ct, protoff, dataoff, &dptr, &datalen);
}
static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly;
static struct nf_conntrack_helper sip[MAX_PORTS * 4] __read_mostly;
static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = {
[SIP_EXPECT_SIGNALLING] = {
......@@ -1616,20 +1616,12 @@ static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1
static void nf_conntrack_sip_fini(void)
{
int i, j;
for (i = 0; i < ports_c; i++) {
for (j = 0; j < ARRAY_SIZE(sip[i]); j++) {
if (sip[i][j].me == NULL)
continue;
nf_conntrack_helper_unregister(&sip[i][j]);
}
}
nf_conntrack_helpers_unregister(sip, ports_c * 4);
}
static int __init nf_conntrack_sip_init(void)
{
int i, j, ret;
int i, ret;
if (ports_c == 0)
ports[ports_c++] = SIP_PORT;
......@@ -1637,43 +1629,32 @@ static int __init nf_conntrack_sip_init(void)
for (i = 0; i < ports_c; i++) {
memset(&sip[i], 0, sizeof(sip[i]));
sip[i][0].tuple.src.l3num = AF_INET;
sip[i][0].tuple.dst.protonum = IPPROTO_UDP;
sip[i][0].help = sip_help_udp;
sip[i][1].tuple.src.l3num = AF_INET;
sip[i][1].tuple.dst.protonum = IPPROTO_TCP;
sip[i][1].help = sip_help_tcp;
sip[i][2].tuple.src.l3num = AF_INET6;
sip[i][2].tuple.dst.protonum = IPPROTO_UDP;
sip[i][2].help = sip_help_udp;
sip[i][3].tuple.src.l3num = AF_INET6;
sip[i][3].tuple.dst.protonum = IPPROTO_TCP;
sip[i][3].help = sip_help_tcp;
for (j = 0; j < ARRAY_SIZE(sip[i]); j++) {
sip[i][j].data_len = sizeof(struct nf_ct_sip_master);
sip[i][j].tuple.src.u.udp.port = htons(ports[i]);
sip[i][j].expect_policy = sip_exp_policy;
sip[i][j].expect_class_max = SIP_EXPECT_MAX;
sip[i][j].me = THIS_MODULE;
if (ports[i] == SIP_PORT)
sprintf(sip[i][j].name, "sip");
else
sprintf(sip[i][j].name, "sip-%u", i);
pr_debug("port #%u: %u\n", i, ports[i]);
nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP, "sip",
SIP_PORT, ports[i], i, sip_exp_policy,
SIP_EXPECT_MAX,
sizeof(struct nf_ct_sip_master), sip_help_udp,
NULL, THIS_MODULE);
nf_ct_helper_init(&sip[4 * i + 1], AF_INET, IPPROTO_TCP, "sip",
SIP_PORT, ports[i], i, sip_exp_policy,
SIP_EXPECT_MAX,
sizeof(struct nf_ct_sip_master), sip_help_tcp,
NULL, THIS_MODULE);
nf_ct_helper_init(&sip[4 * i + 2], AF_INET6, IPPROTO_UDP, "sip",
SIP_PORT, ports[i], i, sip_exp_policy,
SIP_EXPECT_MAX,
sizeof(struct nf_ct_sip_master), sip_help_udp,
NULL, THIS_MODULE);
nf_ct_helper_init(&sip[4 * i + 3], AF_INET6, IPPROTO_TCP, "sip",
SIP_PORT, ports[i], i, sip_exp_policy,
SIP_EXPECT_MAX,
sizeof(struct nf_ct_sip_master), sip_help_tcp,
NULL, THIS_MODULE);
}
ret = nf_conntrack_helper_register(&sip[i][j]);
if (ret) {
pr_err("failed to register helper for pf: %u port: %u\n",
sip[i][j].tuple.src.l3num, ports[i]);
ports_c = i;
nf_conntrack_sip_fini();
return ret;
}
}
ret = nf_conntrack_helpers_register(sip, ports_c * 4);
if (ret < 0) {
pr_err("failed to register helpers\n");
return ret;
}
return 0;
}
......
......@@ -48,6 +48,8 @@ EXPORT_SYMBOL_GPL(print_tuple);
struct ct_iter_state {
struct seq_net_private p;
struct hlist_nulls_head *hash;
unsigned int htable_size;
unsigned int bucket;
u_int64_t time_now;
};
......@@ -58,9 +60,10 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
struct hlist_nulls_node *n;
for (st->bucket = 0;
st->bucket < nf_conntrack_htable_size;
st->bucket < st->htable_size;
st->bucket++) {
n = rcu_dereference(hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
n = rcu_dereference(
hlist_nulls_first_rcu(&st->hash[st->bucket]));
if (!is_a_nulls(n))
return n;
}
......@@ -75,12 +78,11 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
head = rcu_dereference(hlist_nulls_next_rcu(head));
while (is_a_nulls(head)) {
if (likely(get_nulls_value(head) == st->bucket)) {
if (++st->bucket >= nf_conntrack_htable_size)
if (++st->bucket >= st->htable_size)
return NULL;
}
head = rcu_dereference(
hlist_nulls_first_rcu(
&nf_conntrack_hash[st->bucket]));
hlist_nulls_first_rcu(&st->hash[st->bucket]));
}
return head;
}
......@@ -102,6 +104,8 @@ static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
st->time_now = ktime_get_real_ns();
rcu_read_lock();
nf_conntrack_get_ht(&st->hash, &st->htable_size);
return ct_get_idx(seq, *pos);
}
......
......@@ -97,7 +97,7 @@ static int tftp_help(struct sk_buff *skb,
return ret;
}
static struct nf_conntrack_helper tftp[MAX_PORTS][2] __read_mostly;
static struct nf_conntrack_helper tftp[MAX_PORTS * 2] __read_mostly;
static const struct nf_conntrack_expect_policy tftp_exp_policy = {
.max_expected = 1,
......@@ -106,47 +106,29 @@ static const struct nf_conntrack_expect_policy tftp_exp_policy = {
static void nf_conntrack_tftp_fini(void)
{
int i, j;
for (i = 0; i < ports_c; i++) {
for (j = 0; j < 2; j++)
nf_conntrack_helper_unregister(&tftp[i][j]);
}
nf_conntrack_helpers_unregister(tftp, ports_c * 2);
}
static int __init nf_conntrack_tftp_init(void)
{
int i, j, ret;
int i, ret;
if (ports_c == 0)
ports[ports_c++] = TFTP_PORT;
for (i = 0; i < ports_c; i++) {
memset(&tftp[i], 0, sizeof(tftp[i]));
tftp[i][0].tuple.src.l3num = AF_INET;
tftp[i][1].tuple.src.l3num = AF_INET6;
for (j = 0; j < 2; j++) {
tftp[i][j].tuple.dst.protonum = IPPROTO_UDP;
tftp[i][j].tuple.src.u.udp.port = htons(ports[i]);
tftp[i][j].expect_policy = &tftp_exp_policy;
tftp[i][j].me = THIS_MODULE;
tftp[i][j].help = tftp_help;
if (ports[i] == TFTP_PORT)
sprintf(tftp[i][j].name, "tftp");
else
sprintf(tftp[i][j].name, "tftp-%u", i);
ret = nf_conntrack_helper_register(&tftp[i][j]);
if (ret) {
pr_err("failed to register helper for pf: %u port: %u\n",
tftp[i][j].tuple.src.l3num, ports[i]);
ports_c = i;
nf_conntrack_tftp_fini();
return ret;
}
}
nf_ct_helper_init(&tftp[2 * i], AF_INET, IPPROTO_UDP, "tftp",
TFTP_PORT, ports[i], i, &tftp_exp_policy,
0, 0, tftp_help, NULL, THIS_MODULE);
nf_ct_helper_init(&tftp[2 * i + 1], AF_INET6, IPPROTO_UDP, "tftp",
TFTP_PORT, ports[i], i, &tftp_exp_policy,
0, 0, tftp_help, NULL, THIS_MODULE);
}
ret = nf_conntrack_helpers_register(tftp, ports_c * 2);
if (ret < 0) {
pr_err("failed to register helpers\n");
return ret;
}
return 0;
}
......
......@@ -30,17 +30,19 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_nat.h>
static DEFINE_SPINLOCK(nf_nat_lock);
static DEFINE_MUTEX(nf_nat_proto_mutex);
static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
__read_mostly;
static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
__read_mostly;
static struct hlist_head *nf_nat_bysource __read_mostly;
static unsigned int nf_nat_htable_size __read_mostly;
static unsigned int nf_nat_hash_rnd __read_mostly;
struct nf_nat_conn_key {
const struct net *net;
const struct nf_conntrack_tuple *tuple;
const struct nf_conntrack_zone *zone;
};
static struct rhashtable nf_nat_bysource_table;
inline const struct nf_nat_l3proto *
__nf_nat_l3proto_find(u8 family)
......@@ -119,19 +121,17 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
EXPORT_SYMBOL(nf_xfrm_me_harder);
#endif /* CONFIG_XFRM */
/* We keep an extra hash for each conntrack, for fast searching. */
static inline unsigned int
hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
static u32 nf_nat_bysource_hash(const void *data, u32 len, u32 seed)
{
unsigned int hash;
get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
const struct nf_conntrack_tuple *t;
const struct nf_conn *ct = data;
t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
/* Original src, to ensure we map it consistently if poss. */
hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
return reciprocal_scale(hash, nf_nat_htable_size);
seed ^= net_hash_mix(nf_ct_net(ct));
return jhash2((const u32 *)&t->src, sizeof(t->src) / sizeof(u32),
t->dst.protonum ^ seed);
}
/* Is this tuple already taken? (not by us) */
......@@ -187,6 +187,26 @@ same_src(const struct nf_conn *ct,
t->src.u.all == tuple->src.u.all);
}
static int nf_nat_bysource_cmp(struct rhashtable_compare_arg *arg,
const void *obj)
{
const struct nf_nat_conn_key *key = arg->key;
const struct nf_conn *ct = obj;
return same_src(ct, key->tuple) &&
net_eq(nf_ct_net(ct), key->net) &&
nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL);
}
static struct rhashtable_params nf_nat_bysource_params = {
.head_offset = offsetof(struct nf_conn, nat_bysource),
.obj_hashfn = nf_nat_bysource_hash,
.obj_cmpfn = nf_nat_bysource_cmp,
.nelem_hint = 256,
.min_size = 1024,
.nulls_base = (1U << RHT_BASE_SHIFT),
};
/* Only called for SRC manip */
static int
find_appropriate_src(struct net *net,
......@@ -197,25 +217,23 @@ find_appropriate_src(struct net *net,
struct nf_conntrack_tuple *result,
const struct nf_nat_range *range)
{
unsigned int h = hash_by_src(net, tuple);
const struct nf_conn_nat *nat;
const struct nf_conn *ct;
struct nf_nat_conn_key key = {
.net = net,
.tuple = tuple,
.zone = zone
};
hlist_for_each_entry_rcu(nat, &nf_nat_bysource[h], bysource) {
ct = nat->ct;
if (same_src(ct, tuple) &&
net_eq(net, nf_ct_net(ct)) &&
nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
/* Copy source part from reply tuple. */
nf_ct_invert_tuplepr(result,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
result->dst = tuple->dst;
if (in_range(l3proto, l4proto, result, range))
return 1;
}
}
return 0;
ct = rhashtable_lookup_fast(&nf_nat_bysource_table, &key,
nf_nat_bysource_params);
if (!ct)
return 0;
nf_ct_invert_tuplepr(result,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
result->dst = tuple->dst;
return in_range(l3proto, l4proto, result, range);
}
/* For [FUTURE] fragmentation handling, we want the least-used
......@@ -387,7 +405,6 @@ nf_nat_setup_info(struct nf_conn *ct,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype)
{
struct net *net = nf_ct_net(ct);
struct nf_conntrack_tuple curr_tuple, new_tuple;
struct nf_conn_nat *nat;
......@@ -428,17 +445,13 @@ nf_nat_setup_info(struct nf_conn *ct,
}
if (maniptype == NF_NAT_MANIP_SRC) {
unsigned int srchash;
srchash = hash_by_src(net,
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
spin_lock_bh(&nf_nat_lock);
/* nf_conntrack_alter_reply might re-allocate extension aera */
nat = nfct_nat(ct);
nat->ct = ct;
hlist_add_head_rcu(&nat->bysource,
&nf_nat_bysource[srchash]);
spin_unlock_bh(&nf_nat_lock);
int err;
err = rhashtable_insert_fast(&nf_nat_bysource_table,
&ct->nat_bysource,
nf_nat_bysource_params);
if (err)
return NF_DROP;
}
/* It's done. */
......@@ -543,7 +556,7 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
if (nf_nat_proto_remove(ct, data))
return 1;
if (!nat || !nat->ct)
if (!nat)
return 0;
/* This netns is being destroyed, and conntrack has nat null binding.
......@@ -555,11 +568,10 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
if (!del_timer(&ct->timeout))
return 1;
spin_lock_bh(&nf_nat_lock);
hlist_del_rcu(&nat->bysource);
ct->status &= ~IPS_NAT_DONE_MASK;
nat->ct = NULL;
spin_unlock_bh(&nf_nat_lock);
rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource,
nf_nat_bysource_params);
add_timer(&ct->timeout);
......@@ -688,35 +700,17 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
if (nat == NULL || nat->ct == NULL)
return;
NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE);
spin_lock_bh(&nf_nat_lock);
hlist_del_rcu(&nat->bysource);
spin_unlock_bh(&nf_nat_lock);
}
static void nf_nat_move_storage(void *new, void *old)
{
struct nf_conn_nat *new_nat = new;
struct nf_conn_nat *old_nat = old;
struct nf_conn *ct = old_nat->ct;
if (!ct || !(ct->status & IPS_SRC_NAT_DONE))
if (!nat)
return;
spin_lock_bh(&nf_nat_lock);
hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
spin_unlock_bh(&nf_nat_lock);
rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource,
nf_nat_bysource_params);
}
static struct nf_ct_ext_type nat_extend __read_mostly = {
.len = sizeof(struct nf_conn_nat),
.align = __alignof__(struct nf_conn_nat),
.destroy = nf_nat_cleanup_conntrack,
.move = nf_nat_move_storage,
.id = NF_CT_EXT_NAT,
.flags = NF_CT_EXT_F_PREALLOC,
};
......@@ -845,16 +839,13 @@ static int __init nf_nat_init(void)
{
int ret;
/* Leave them the same for the moment. */
nf_nat_htable_size = nf_conntrack_htable_size;
nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
if (!nf_nat_bysource)
return -ENOMEM;
ret = rhashtable_init(&nf_nat_bysource_table, &nf_nat_bysource_params);
if (ret)
return ret;
ret = nf_ct_extend_register(&nat_extend);
if (ret < 0) {
nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
rhashtable_destroy(&nf_nat_bysource_table);
printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
return ret;
}
......@@ -878,7 +869,7 @@ static int __init nf_nat_init(void)
return 0;
cleanup_extend:
nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
rhashtable_destroy(&nf_nat_bysource_table);
nf_ct_extend_unregister(&nat_extend);
return ret;
}
......@@ -896,8 +887,8 @@ static void __exit nf_nat_cleanup(void)
#endif
for (i = 0; i < NFPROTO_NUMPROTO; i++)
kfree(nf_nat_l4protos[i]);
synchronize_net();
nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
rhashtable_destroy(&nf_nat_bysource_table);
}
MODULE_LICENSE("GPL");
......
......@@ -1405,7 +1405,6 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
rcu_assign_pointer(basechain->stats, stats);
}
write_pnet(&basechain->pnet, net);
basechain->type = type;
chain = &basechain->chain;
......@@ -1860,10 +1859,16 @@ static int nf_tables_rule_notify(const struct nft_ctx *ctx,
return err;
}
struct nft_rule_dump_ctx {
char table[NFT_TABLE_MAXNAMELEN];
char chain[NFT_CHAIN_MAXNAMELEN];
};
static int nf_tables_dump_rules(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
const struct nft_rule_dump_ctx *ctx = cb->data;
const struct nft_af_info *afi;
const struct nft_table *table;
const struct nft_chain *chain;
......@@ -1880,7 +1885,15 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
continue;
list_for_each_entry_rcu(table, &afi->tables, list) {
if (ctx && ctx->table[0] &&
strcmp(ctx->table, table->name) != 0)
continue;
list_for_each_entry_rcu(chain, &table->chains, list) {
if (ctx && ctx->chain[0] &&
strcmp(ctx->chain, chain->name) != 0)
continue;
list_for_each_entry_rcu(rule, &chain->rules, list) {
if (!nft_is_active(net, rule))
goto cont;
......@@ -1910,6 +1923,12 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
return skb->len;
}
static int nf_tables_dump_rules_done(struct netlink_callback *cb)
{
kfree(cb->data);
return 0;
}
static int nf_tables_getrule(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
......@@ -1927,7 +1946,25 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nf_tables_dump_rules,
.done = nf_tables_dump_rules_done,
};
if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) {
struct nft_rule_dump_ctx *ctx;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
if (nla[NFTA_RULE_TABLE])
nla_strlcpy(ctx->table, nla[NFTA_RULE_TABLE],
sizeof(ctx->table));
if (nla[NFTA_RULE_CHAIN])
nla_strlcpy(ctx->chain, nla[NFTA_RULE_CHAIN],
sizeof(ctx->chain));
c.data = ctx;
}
return netlink_dump_start(nlsk, skb, nlh, &c);
}
......@@ -2843,7 +2880,6 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
}
INIT_LIST_HEAD(&set->bindings);
write_pnet(&set->pnet, net);
set->ops = ops;
set->ktype = ktype;
set->klen = desc.klen;
......@@ -3522,7 +3558,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err4;
ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
err = set->ops->insert(set, &elem);
err = set->ops->insert(ctx->net, set, &elem);
if (err < 0)
goto err5;
......@@ -3646,7 +3682,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
goto err3;
}
priv = set->ops->deactivate(set, &elem);
priv = set->ops->deactivate(ctx->net, set, &elem);
if (priv == NULL) {
err = -ENOENT;
goto err4;
......@@ -4020,7 +4056,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_NEWSETELEM:
te = (struct nft_trans_elem *)trans->data;
te->set->ops->activate(te->set, &te->elem);
te->set->ops->activate(net, te->set, &te->elem);
nf_tables_setelem_notify(&trans->ctx, te->set,
&te->elem,
NFT_MSG_NEWSETELEM, 0);
......@@ -4145,7 +4181,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
case NFT_MSG_DELSETELEM:
te = (struct nft_trans_elem *)trans->data;
te->set->ops->activate(te->set, &te->elem);
te->set->ops->activate(net, te->set, &te->elem);
te->set->ndeact--;
nft_trans_destroy(trans);
......
......@@ -303,16 +303,24 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
{
struct nf_conntrack_tuple_hash *h;
const struct hlist_nulls_node *nn;
unsigned int last_hsize;
spinlock_t *lock;
int i;
local_bh_disable();
for (i = 0; i < nf_conntrack_htable_size; i++) {
nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
if (i < nf_conntrack_htable_size) {
hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
untimeout(h, timeout);
restart:
last_hsize = nf_conntrack_htable_size;
for (i = 0; i < last_hsize; i++) {
lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS];
nf_conntrack_lock(lock);
if (last_hsize != nf_conntrack_htable_size) {
spin_unlock(lock);
goto restart;
}
spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
untimeout(h, timeout);
spin_unlock(lock);
}
local_bh_enable();
}
......
......@@ -23,6 +23,20 @@
#include <linux/netfilter_arp/arp_tables.h>
#include <net/netfilter/nf_tables.h>
struct nft_xt {
struct list_head head;
struct nft_expr_ops ops;
unsigned int refcnt;
};
static void nft_xt_put(struct nft_xt *xt)
{
if (--xt->refcnt == 0) {
list_del(&xt->head);
kfree(xt);
}
}
static int nft_compat_chain_validate_dependency(const char *tablename,
const struct nft_chain *chain)
{
......@@ -260,6 +274,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
if (par.target->destroy != NULL)
par.target->destroy(&par);
nft_xt_put(container_of(expr->ops, struct nft_xt, ops));
module_put(target->me);
}
......@@ -442,6 +457,7 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
if (par.match->destroy != NULL)
par.match->destroy(&par);
nft_xt_put(container_of(expr->ops, struct nft_xt, ops));
module_put(match->me);
}
......@@ -612,11 +628,6 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = {
static LIST_HEAD(nft_match_list);
struct nft_xt {
struct list_head head;
struct nft_expr_ops ops;
};
static struct nft_expr_type nft_match_type;
static bool nft_match_cmp(const struct xt_match *match,
......@@ -634,6 +645,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
struct xt_match *match;
char *mt_name;
u32 rev, family;
int err;
if (tb[NFTA_MATCH_NAME] == NULL ||
tb[NFTA_MATCH_REV] == NULL ||
......@@ -652,6 +664,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
if (!try_module_get(match->me))
return ERR_PTR(-ENOENT);
nft_match->refcnt++;
return &nft_match->ops;
}
}
......@@ -660,14 +673,19 @@ nft_match_select_ops(const struct nft_ctx *ctx,
if (IS_ERR(match))
return ERR_PTR(-ENOENT);
if (match->matchsize > nla_len(tb[NFTA_MATCH_INFO]))
return ERR_PTR(-EINVAL);
if (match->matchsize > nla_len(tb[NFTA_MATCH_INFO])) {
err = -EINVAL;
goto err;
}
/* This is the first time we use this match, allocate operations */
nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
if (nft_match == NULL)
return ERR_PTR(-ENOMEM);
if (nft_match == NULL) {
err = -ENOMEM;
goto err;
}
nft_match->refcnt = 1;
nft_match->ops.type = &nft_match_type;
nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
nft_match->ops.eval = nft_match_eval;
......@@ -680,14 +698,9 @@ nft_match_select_ops(const struct nft_ctx *ctx,
list_add(&nft_match->head, &nft_match_list);
return &nft_match->ops;
}
static void nft_match_release(void)
{
struct nft_xt *nft_match, *tmp;
list_for_each_entry_safe(nft_match, tmp, &nft_match_list, head)
kfree(nft_match);
err:
module_put(match->me);
return ERR_PTR(err);
}
static struct nft_expr_type nft_match_type __read_mostly = {
......@@ -717,6 +730,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
struct xt_target *target;
char *tg_name;
u32 rev, family;
int err;
if (tb[NFTA_TARGET_NAME] == NULL ||
tb[NFTA_TARGET_REV] == NULL ||
......@@ -735,6 +749,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
if (!try_module_get(target->me))
return ERR_PTR(-ENOENT);
nft_target->refcnt++;
return &nft_target->ops;
}
}
......@@ -743,14 +758,19 @@ nft_target_select_ops(const struct nft_ctx *ctx,
if (IS_ERR(target))
return ERR_PTR(-ENOENT);
if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO]))
return ERR_PTR(-EINVAL);
if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO])) {
err = -EINVAL;
goto err;
}
/* This is the first time we use this target, allocate operations */
nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
if (nft_target == NULL)
return ERR_PTR(-ENOMEM);
if (nft_target == NULL) {
err = -ENOMEM;
goto err;
}
nft_target->refcnt = 1;
nft_target->ops.type = &nft_target_type;
nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
nft_target->ops.init = nft_target_init;
......@@ -767,14 +787,9 @@ nft_target_select_ops(const struct nft_ctx *ctx,
list_add(&nft_target->head, &nft_target_list);
return &nft_target->ops;
}
static void nft_target_release(void)
{
struct nft_xt *nft_target, *tmp;
list_for_each_entry_safe(nft_target, tmp, &nft_target_list, head)
kfree(nft_target);
err:
module_put(target->me);
return ERR_PTR(err);
}
static struct nft_expr_type nft_target_type __read_mostly = {
......@@ -819,8 +834,6 @@ static void __exit nft_compat_module_exit(void)
nfnetlink_subsys_unregister(&nfnl_compat_subsys);
nft_unregister_expr(&nft_target_type);
nft_unregister_expr(&nft_match_type);
nft_match_release();
nft_target_release();
}
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT);
......
......@@ -109,18 +109,11 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
#ifdef CONFIG_NF_CONNTRACK_LABELS
case NFT_CT_LABELS: {
struct nf_conn_labels *labels = nf_ct_labels_find(ct);
unsigned int size;
if (!labels) {
if (labels)
memcpy(dest, labels->bits, NF_CT_LABELS_MAX_SIZE);
else
memset(dest, 0, NF_CT_LABELS_MAX_SIZE);
return;
}
size = labels->words * sizeof(long);
memcpy(dest, labels->bits, size);
if (size < NF_CT_LABELS_MAX_SIZE)
memset(((char *) dest) + size, 0,
NF_CT_LABELS_MAX_SIZE - size);
return;
}
#endif
......@@ -351,6 +344,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
if (priv->key == NFT_CT_BYTES || priv->key == NFT_CT_PKTS)
nf_ct_set_acct(ctx->net, true);
return 0;
}
......@@ -359,6 +355,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_ct *priv = nft_expr_priv(expr);
bool label_got = false;
unsigned int len;
int err;
......@@ -377,6 +374,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1);
if (err)
return err;
label_got = true;
break;
#endif
default:
......@@ -386,17 +384,28 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]);
err = nft_validate_register_load(priv->sreg, len);
if (err < 0)
return err;
goto err1;
err = nft_ct_l3proto_try_module_get(ctx->afi->family);
if (err < 0)
return err;
goto err1;
return 0;
err1:
if (label_got)
nf_connlabels_put(ctx->net);
return err;
}
static void nft_ct_get_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
nft_ct_l3proto_module_put(ctx->afi->family);
}
static void nft_ct_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
static void nft_ct_set_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_ct *priv = nft_expr_priv(expr);
......@@ -468,7 +477,7 @@ static const struct nft_expr_ops nft_ct_get_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
.eval = nft_ct_get_eval,
.init = nft_ct_get_init,
.destroy = nft_ct_destroy,
.destroy = nft_ct_get_destroy,
.dump = nft_ct_get_dump,
};
......@@ -477,7 +486,7 @@ static const struct nft_expr_ops nft_ct_set_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
.eval = nft_ct_set_eval,
.init = nft_ct_set_init,
.destroy = nft_ct_destroy,
.destroy = nft_ct_set_destroy,
.dump = nft_ct_set_dump,
};
......
......@@ -71,13 +71,13 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
return 0;
}
static bool nft_hash_lookup(const struct nft_set *set, const u32 *key,
const struct nft_set_ext **ext)
static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
struct nft_hash *priv = nft_set_priv(set);
const struct nft_hash_elem *he;
struct nft_hash_cmp_arg arg = {
.genmask = nft_genmask_cur(read_pnet(&set->pnet)),
.genmask = nft_genmask_cur(net),
.set = set,
.key = key,
};
......@@ -125,13 +125,13 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key,
return false;
}
static int nft_hash_insert(const struct nft_set *set,
static int nft_hash_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_elem *he = elem->priv;
struct nft_hash_cmp_arg arg = {
.genmask = nft_genmask_next(read_pnet(&set->pnet)),
.genmask = nft_genmask_next(net),
.set = set,
.key = elem->key.val.data,
};
......@@ -140,20 +140,20 @@ static int nft_hash_insert(const struct nft_set *set,
nft_hash_params);
}
static void nft_hash_activate(const struct nft_set *set,
static void nft_hash_activate(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_hash_elem *he = elem->priv;
nft_set_elem_change_active(set, &he->ext);
nft_set_elem_change_active(net, set, &he->ext);
nft_set_elem_clear_busy(&he->ext);
}
static void *nft_hash_deactivate(const struct nft_set *set,
static void *nft_hash_deactivate(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_hash *priv = nft_set_priv(set);
struct net *net = read_pnet(&set->pnet);
struct nft_hash_elem *he;
struct nft_hash_cmp_arg arg = {
.genmask = nft_genmask_next(net),
......@@ -166,7 +166,7 @@ static void *nft_hash_deactivate(const struct nft_set *set,
if (he != NULL) {
if (!nft_set_elem_mark_busy(&he->ext) ||
!nft_is_active(net, &he->ext))
nft_set_elem_change_active(set, &he->ext);
nft_set_elem_change_active(net, set, &he->ext);
else
he = NULL;
}
......
......@@ -52,6 +52,14 @@ static int nft_log_init(const struct nft_ctx *ctx,
struct nft_log *priv = nft_expr_priv(expr);
struct nf_loginfo *li = &priv->loginfo;
const struct nlattr *nla;
int err;
li->type = NF_LOG_TYPE_LOG;
if (tb[NFTA_LOG_LEVEL] != NULL &&
tb[NFTA_LOG_GROUP] != NULL)
return -EINVAL;
if (tb[NFTA_LOG_GROUP] != NULL)
li->type = NF_LOG_TYPE_ULOG;
nla = tb[NFTA_LOG_PREFIX];
if (nla != NULL) {
......@@ -63,13 +71,6 @@ static int nft_log_init(const struct nft_ctx *ctx,
priv->prefix = (char *)nft_log_null_prefix;
}
li->type = NF_LOG_TYPE_LOG;
if (tb[NFTA_LOG_LEVEL] != NULL &&
tb[NFTA_LOG_GROUP] != NULL)
return -EINVAL;
if (tb[NFTA_LOG_GROUP] != NULL)
li->type = NF_LOG_TYPE_ULOG;
switch (li->type) {
case NF_LOG_TYPE_LOG:
if (tb[NFTA_LOG_LEVEL] != NULL) {
......@@ -78,6 +79,11 @@ static int nft_log_init(const struct nft_ctx *ctx,
} else {
li->u.log.level = LOGLEVEL_WARNING;
}
if (li->u.log.level > LOGLEVEL_DEBUG) {
err = -EINVAL;
goto err1;
}
if (tb[NFTA_LOG_FLAGS] != NULL) {
li->u.log.logflags =
ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS]));
......@@ -86,6 +92,7 @@ static int nft_log_init(const struct nft_ctx *ctx,
case NF_LOG_TYPE_ULOG:
li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP]));
if (tb[NFTA_LOG_SNAPLEN] != NULL) {
li->u.ulog.flags |= NF_LOG_F_COPY_LEN;
li->u.ulog.copy_len =
ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN]));
}
......@@ -96,7 +103,16 @@ static int nft_log_init(const struct nft_ctx *ctx,
break;
}
return nf_logger_find_get(ctx->afi->family, li->type);
err = nf_logger_find_get(ctx->afi->family, li->type);
if (err < 0)
goto err1;
return 0;
err1:
if (priv->prefix != nft_log_null_prefix)
kfree(priv->prefix);
return err;
}
static void nft_log_destroy(const struct nft_ctx *ctx,
......@@ -134,7 +150,7 @@ static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group)))
goto nla_put_failure;
if (li->u.ulog.copy_len) {
if (li->u.ulog.flags & NF_LOG_F_COPY_LEN) {
if (nla_put_be32(skb, NFTA_LOG_SNAPLEN,
htonl(li->u.ulog.copy_len)))
goto nla_put_failure;
......
......@@ -35,7 +35,7 @@ static void nft_lookup_eval(const struct nft_expr *expr,
const struct nft_set_ext *ext;
bool found;
found = set->ops->lookup(set, &regs->data[priv->sreg], &ext) ^
found = set->ops->lookup(pkt->net, set, &regs->data[priv->sreg], &ext) ^
priv->invert;
if (!found) {
......
......@@ -41,13 +41,13 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
}
static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
const struct nft_set_ext **ext)
static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
const struct nft_rbtree *priv = nft_set_priv(set);
const struct nft_rbtree_elem *rbe, *interval = NULL;
u8 genmask = nft_genmask_cur(net);
const struct rb_node *parent;
u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
const void *this;
int d;
......@@ -93,13 +93,13 @@ static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
return false;
}
static int __nft_rbtree_insert(const struct nft_set *set,
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *new)
{
struct nft_rbtree *priv = nft_set_priv(set);
u8 genmask = nft_genmask_next(net);
struct nft_rbtree_elem *rbe;
struct rb_node *parent, **p;
u8 genmask = nft_genmask_next(read_pnet(&set->pnet));
int d;
parent = NULL;
......@@ -132,14 +132,14 @@ static int __nft_rbtree_insert(const struct nft_set *set,
return 0;
}
static int nft_rbtree_insert(const struct nft_set *set,
static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_rbtree_elem *rbe = elem->priv;
int err;
spin_lock_bh(&nft_rbtree_lock);
err = __nft_rbtree_insert(set, rbe);
err = __nft_rbtree_insert(net, set, rbe);
spin_unlock_bh(&nft_rbtree_lock);
return err;
......@@ -156,21 +156,23 @@ static void nft_rbtree_remove(const struct nft_set *set,
spin_unlock_bh(&nft_rbtree_lock);
}
static void nft_rbtree_activate(const struct nft_set *set,
static void nft_rbtree_activate(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_rbtree_elem *rbe = elem->priv;
nft_set_elem_change_active(set, &rbe->ext);
nft_set_elem_change_active(net, set, &rbe->ext);
}
static void *nft_rbtree_deactivate(const struct nft_set *set,
static void *nft_rbtree_deactivate(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
{
const struct nft_rbtree *priv = nft_set_priv(set);
const struct rb_node *parent = priv->root.rb_node;
struct nft_rbtree_elem *rbe, *this = elem->priv;
u8 genmask = nft_genmask_next(read_pnet(&set->pnet));
u8 genmask = nft_genmask_next(net);
int d;
while (parent != NULL) {
......@@ -196,7 +198,7 @@ static void *nft_rbtree_deactivate(const struct nft_set *set,
parent = parent->rb_right;
continue;
}
nft_set_elem_change_active(set, &rbe->ext);
nft_set_elem_change_active(net, set, &rbe->ext);
return rbe;
}
}
......
......@@ -702,6 +702,56 @@ int xt_check_entry_offsets(const void *base,
}
EXPORT_SYMBOL(xt_check_entry_offsets);
/**
* xt_alloc_entry_offsets - allocate array to store rule head offsets
*
* @size: number of entries
*
* Return: NULL or kmalloc'd or vmalloc'd array
*/
unsigned int *xt_alloc_entry_offsets(unsigned int size)
{
unsigned int *off;
off = kcalloc(size, sizeof(unsigned int), GFP_KERNEL | __GFP_NOWARN);
if (off)
return off;
if (size < (SIZE_MAX / sizeof(unsigned int)))
off = vmalloc(size * sizeof(unsigned int));
return off;
}
EXPORT_SYMBOL(xt_alloc_entry_offsets);
/**
* xt_find_jump_offset - check if target is a valid jump offset
*
* @offsets: array containing all valid rule start offsets of a rule blob
* @target: the jump target to search for
* @size: entries in @offset
*/
bool xt_find_jump_offset(const unsigned int *offsets,
unsigned int target, unsigned int size)
{
int m, low = 0, hi = size;
while (hi > low) {
m = (low + hi) / 2u;
if (offsets[m] > target)
hi = m;
else if (offsets[m] < target)
low = m + 1;
else
return true;
}
return false;
}
EXPORT_SYMBOL(xt_find_jump_offset);
int xt_check_target(struct xt_tgchk_param *par,
unsigned int size, u_int8_t proto, bool inv_proto)
{
......
......@@ -9,6 +9,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_labels.h>
#include <linux/netfilter/x_tables.h>
......@@ -18,21 +19,12 @@ MODULE_DESCRIPTION("Xtables: add/match connection trackling labels");
MODULE_ALIAS("ipt_connlabel");
MODULE_ALIAS("ip6t_connlabel");
static bool connlabel_match(const struct nf_conn *ct, u16 bit)
{
struct nf_conn_labels *labels = nf_ct_labels_find(ct);
if (!labels)
return false;
return BIT_WORD(bit) < labels->words && test_bit(bit, labels->bits);
}
static bool
connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_connlabel_mtinfo *info = par->matchinfo;
enum ip_conntrack_info ctinfo;
struct nf_conn_labels *labels;
struct nf_conn *ct;
bool invert = info->options & XT_CONNLABEL_OP_INVERT;
......@@ -40,10 +32,21 @@ connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (ct == NULL || nf_ct_is_untracked(ct))
return invert;
if (info->options & XT_CONNLABEL_OP_SET)
return (nf_connlabel_set(ct, info->bit) == 0) ^ invert;
labels = nf_ct_labels_find(ct);
if (!labels)
return invert;
if (test_bit(info->bit, labels->bits))
return !invert;
if (info->options & XT_CONNLABEL_OP_SET) {
if (!test_and_set_bit(info->bit, labels->bits))
nf_conntrack_event_cache(IPCT_LABEL, ct);
return !invert;
}
return connlabel_match(ct, info->bit) ^ invert;
return invert;
}
static int connlabel_mt_check(const struct xt_mtchk_param *par)
......
......@@ -102,14 +102,14 @@ static int physdev_mt_check(const struct xt_mtchk_param *par)
if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
info->bitmask & ~XT_PHYSDEV_OP_MASK)
return -EINVAL;
if (info->bitmask & XT_PHYSDEV_OP_OUT &&
if (info->bitmask & (XT_PHYSDEV_OP_OUT | XT_PHYSDEV_OP_ISOUT) &&
(!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
info->invert & XT_PHYSDEV_OP_BRIDGED) &&
par->hook_mask & ((1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) {
pr_info("using --physdev-out in the OUTPUT, FORWARD and "
"POSTROUTING chains for non-bridged traffic is not "
"supported anymore.\n");
pr_info("using --physdev-out and --physdev-is-out are only"
"supported in the FORWARD and POSTROUTING chains with"
"bridged traffic.\n");
if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
return -EINVAL;
}
......
......@@ -135,7 +135,7 @@ static void ovs_ct_get_labels(const struct nf_conn *ct,
struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;
if (cl) {
size_t len = cl->words * sizeof(long);
size_t len = sizeof(cl->bits);
if (len > OVS_CT_LABELS_LEN)
len = OVS_CT_LABELS_LEN;
......@@ -274,7 +274,7 @@ static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key,
nf_ct_labels_ext_add(ct);
cl = nf_ct_labels_find(ct);
}
if (!cl || cl->words * sizeof(long) < OVS_CT_LABELS_LEN)
if (!cl || sizeof(cl->bits) < OVS_CT_LABELS_LEN)
return -ENOSPC;
err = nf_connlabels_replace(ct, (u32 *)labels, (u32 *)mask,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment