Commit 89697f1d authored by Linus Torvalds's avatar Linus Torvalds

Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6

* master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6: (25 commits)
  [XFRM]: Fix OOPSes in xfrm_audit_log().
  [TCP]: cleanup of htcp (resend)
  [TCP]: Use read mostly for CUBIC parameters.
  [NETFILTER]: nf_conntrack_tcp: make sysctl variables static
  [NETFILTER]: ip6t_mh: drop piggyback payload packet on MH packets
  [NETFILTER]: Fix whitespace errors
  [NETFILTER]: Kconfig: improve dependency handling
  [NETFILTER]: xt_mac/xt_CLASSIFY: use IPv6 hook names for IPv6 registration
  [NETFILTER]: nf_conntrack: change nf_conntrack_l[34]proto_unregister to void
  [NETFILTER]: nf_conntrack: properly use RCU for nf_conntrack_destroyed callback
  [NETFILTER]: ip_conntrack: properly use RCU for ip_conntrack_destroyed callback
  [NETFILTER]: nf_conntrack: fix invalid conntrack statistics RCU assumption
  [NETFILTER]: ip_conntrack: fix invalid conntrack statistics RCU assumption
  [NETFILTER]: nf_conntrack: properly use RCU API for nf_ct_protos/nf_ct_l3protos arrays
  [NETFILTER]: ip_conntrack: properly use RCU API for ip_ct_protos array
  [NETFILTER]: nf_nat: properly use RCU API for nf_nat_protos array
  [NETFILTER]: ip_nat: properly use RCU API for ip_nat_protos array
  [NETFILTER]: nf_log: minor cleanups
  [NETFILTER]: nf_log: switch logger registration/unregistration to mutex
  [NETFILTER]: nf_log: make nf_log_unregister_pf return void
  ...
parents fd19e44f 13fcfbb0
...@@ -172,8 +172,8 @@ struct nf_logger { ...@@ -172,8 +172,8 @@ struct nf_logger {
/* Function to register/unregister log function. */ /* Function to register/unregister log function. */
int nf_log_register(int pf, struct nf_logger *logger); int nf_log_register(int pf, struct nf_logger *logger);
int nf_log_unregister_pf(int pf); void nf_log_unregister(struct nf_logger *logger);
void nf_log_unregister_logger(struct nf_logger *logger); void nf_log_unregister_pf(int pf);
/* Calls the registered backend logging function */ /* Calls the registered backend logging function */
void nf_log_packet(int pf, void nf_log_packet(int pf,
......
...@@ -301,6 +301,12 @@ extern unsigned int ip_conntrack_htable_size; ...@@ -301,6 +301,12 @@ extern unsigned int ip_conntrack_htable_size;
extern int ip_conntrack_checksum; extern int ip_conntrack_checksum;
#define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++) #define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++)
#define CONNTRACK_STAT_INC_ATOMIC(count) \
do { \
local_bh_disable(); \
__get_cpu_var(ip_conntrack_stat).count++; \
local_bh_enable(); \
} while (0)
#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
#include <linux/notifier.h> #include <linux/notifier.h>
......
...@@ -257,6 +257,12 @@ extern int nf_conntrack_max; ...@@ -257,6 +257,12 @@ extern int nf_conntrack_max;
DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
#define NF_CT_STAT_INC(count) (__get_cpu_var(nf_conntrack_stat).count++) #define NF_CT_STAT_INC(count) (__get_cpu_var(nf_conntrack_stat).count++)
#define NF_CT_STAT_INC_ATOMIC(count) \
do { \
local_bh_disable(); \
__get_cpu_var(nf_conntrack_stat).count++; \
local_bh_enable(); \
} while (0)
/* no helper, no nat */ /* no helper, no nat */
#define NF_CT_F_BASIC 0 #define NF_CT_F_BASIC 0
......
...@@ -89,7 +89,7 @@ extern struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX]; ...@@ -89,7 +89,7 @@ extern struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX];
/* Protocol registration. */ /* Protocol registration. */
extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto); extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto);
extern int nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto); extern void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto);
extern struct nf_conntrack_l3proto * extern struct nf_conntrack_l3proto *
nf_ct_l3proto_find_get(u_int16_t l3proto); nf_ct_l3proto_find_get(u_int16_t l3proto);
...@@ -106,7 +106,7 @@ __nf_ct_l3proto_find(u_int16_t l3proto) ...@@ -106,7 +106,7 @@ __nf_ct_l3proto_find(u_int16_t l3proto)
{ {
if (unlikely(l3proto >= AF_MAX)) if (unlikely(l3proto >= AF_MAX))
return &nf_conntrack_l3proto_generic; return &nf_conntrack_l3proto_generic;
return nf_ct_l3protos[l3proto]; return rcu_dereference(nf_ct_l3protos[l3proto]);
} }
#endif /*_NF_CONNTRACK_L3PROTO_H*/ #endif /*_NF_CONNTRACK_L3PROTO_H*/
...@@ -109,7 +109,7 @@ extern void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p); ...@@ -109,7 +109,7 @@ extern void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p);
/* Protocol registration. */ /* Protocol registration. */
extern int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *proto); extern int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *proto);
extern int nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *proto); extern void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *proto);
/* Generic netlink helpers */ /* Generic netlink helpers */
extern int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb, extern int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb,
......
...@@ -208,7 +208,7 @@ static int __init ebt_log_init(void) ...@@ -208,7 +208,7 @@ static int __init ebt_log_init(void)
static void __exit ebt_log_fini(void) static void __exit ebt_log_fini(void)
{ {
nf_log_unregister_logger(&ebt_log_logger); nf_log_unregister(&ebt_log_logger);
ebt_unregister_watcher(&log); ebt_unregister_watcher(&log);
} }
......
...@@ -323,7 +323,7 @@ static void __exit ebt_ulog_fini(void) ...@@ -323,7 +323,7 @@ static void __exit ebt_ulog_fini(void)
ebt_ulog_buff_t *ub; ebt_ulog_buff_t *ub;
int i; int i;
nf_log_unregister_logger(&ebt_ulog_logger); nf_log_unregister(&ebt_ulog_logger);
ebt_unregister_watcher(&ulog); ebt_unregister_watcher(&ulog);
for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
ub = &ulog_buffers[i]; ub = &ulog_buffers[i];
......
...@@ -226,7 +226,7 @@ config IP_NF_QUEUE ...@@ -226,7 +226,7 @@ config IP_NF_QUEUE
config IP_NF_IPTABLES config IP_NF_IPTABLES
tristate "IP tables support (required for filtering/masq/NAT)" tristate "IP tables support (required for filtering/masq/NAT)"
depends on NETFILTER_XTABLES select NETFILTER_XTABLES
help help
iptables is a general, extensible packet identification framework. iptables is a general, extensible packet identification framework.
The packet filtering and full NAT (masquerading, port forwarding, The packet filtering and full NAT (masquerading, port forwarding,
...@@ -606,7 +606,9 @@ config IP_NF_TARGET_TTL ...@@ -606,7 +606,9 @@ config IP_NF_TARGET_TTL
config IP_NF_TARGET_CLUSTERIP config IP_NF_TARGET_CLUSTERIP
tristate "CLUSTERIP target support (EXPERIMENTAL)" tristate "CLUSTERIP target support (EXPERIMENTAL)"
depends on IP_NF_MANGLE && EXPERIMENTAL depends on IP_NF_MANGLE && EXPERIMENTAL
depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4) depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK
select NF_CONNTRACK_MARK if NF_CONNTRACK_IPV4
help help
The CLUSTERIP target allows you to build load-balancing clusters of The CLUSTERIP target allows you to build load-balancing clusters of
network servers without having a dedicated load-balancing network servers without having a dedicated load-balancing
...@@ -629,7 +631,7 @@ config IP_NF_RAW ...@@ -629,7 +631,7 @@ config IP_NF_RAW
# ARP tables # ARP tables
config IP_NF_ARPTABLES config IP_NF_ARPTABLES
tristate "ARP tables support" tristate "ARP tables support"
depends on NETFILTER_XTABLES select NETFILTER_XTABLES
help help
arptables is a general, extensible packet identification framework. arptables is a general, extensible packet identification framework.
The ARP packet filtering and mangling (manipulation)subsystems The ARP packet filtering and mangling (manipulation)subsystems
......
...@@ -303,6 +303,7 @@ destroy_conntrack(struct nf_conntrack *nfct) ...@@ -303,6 +303,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
struct ip_conntrack *ct = (struct ip_conntrack *)nfct; struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
struct ip_conntrack_protocol *proto; struct ip_conntrack_protocol *proto;
struct ip_conntrack_helper *helper; struct ip_conntrack_helper *helper;
typeof(ip_conntrack_destroyed) destroyed;
DEBUGP("destroy_conntrack(%p)\n", ct); DEBUGP("destroy_conntrack(%p)\n", ct);
IP_NF_ASSERT(atomic_read(&nfct->use) == 0); IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
...@@ -318,12 +319,16 @@ destroy_conntrack(struct nf_conntrack *nfct) ...@@ -318,12 +319,16 @@ destroy_conntrack(struct nf_conntrack *nfct)
/* To make sure we don't get any weird locking issues here: /* To make sure we don't get any weird locking issues here:
* destroy_conntrack() MUST NOT be called with a write lock * destroy_conntrack() MUST NOT be called with a write lock
* to ip_conntrack_lock!!! -HW */ * to ip_conntrack_lock!!! -HW */
rcu_read_lock();
proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
if (proto && proto->destroy) if (proto && proto->destroy)
proto->destroy(ct); proto->destroy(ct);
if (ip_conntrack_destroyed) destroyed = rcu_dereference(ip_conntrack_destroyed);
ip_conntrack_destroyed(ct); if (destroyed)
destroyed(ct);
rcu_read_unlock();
write_lock_bh(&ip_conntrack_lock); write_lock_bh(&ip_conntrack_lock);
/* Expectations will have been removed in clean_from_lists, /* Expectations will have been removed in clean_from_lists,
...@@ -536,7 +541,7 @@ static int early_drop(struct list_head *chain) ...@@ -536,7 +541,7 @@ static int early_drop(struct list_head *chain)
if (del_timer(&ct->timeout)) { if (del_timer(&ct->timeout)) {
death_by_timeout((unsigned long)ct); death_by_timeout((unsigned long)ct);
dropped = 1; dropped = 1;
CONNTRACK_STAT_INC(early_drop); CONNTRACK_STAT_INC_ATOMIC(early_drop);
} }
ip_conntrack_put(ct); ip_conntrack_put(ct);
return dropped; return dropped;
...@@ -595,13 +600,13 @@ ip_conntrack_proto_find_get(u_int8_t protocol) ...@@ -595,13 +600,13 @@ ip_conntrack_proto_find_get(u_int8_t protocol)
{ {
struct ip_conntrack_protocol *p; struct ip_conntrack_protocol *p;
preempt_disable(); rcu_read_lock();
p = __ip_conntrack_proto_find(protocol); p = __ip_conntrack_proto_find(protocol);
if (p) { if (p) {
if (!try_module_get(p->me)) if (!try_module_get(p->me))
p = &ip_conntrack_generic_protocol; p = &ip_conntrack_generic_protocol;
} }
preempt_enable(); rcu_read_unlock();
return p; return p;
} }
...@@ -802,7 +807,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum, ...@@ -802,7 +807,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
/* Previously seen (loopback or untracked)? Ignore. */ /* Previously seen (loopback or untracked)? Ignore. */
if ((*pskb)->nfct) { if ((*pskb)->nfct) {
CONNTRACK_STAT_INC(ignore); CONNTRACK_STAT_INC_ATOMIC(ignore);
return NF_ACCEPT; return NF_ACCEPT;
} }
...@@ -830,6 +835,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum, ...@@ -830,6 +835,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
} }
#endif #endif
/* rcu_read_lock()ed by nf_hook_slow */
proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol); proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol);
/* It may be an special packet, error, unclean... /* It may be an special packet, error, unclean...
...@@ -837,20 +843,20 @@ unsigned int ip_conntrack_in(unsigned int hooknum, ...@@ -837,20 +843,20 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
* core what to do with the packet. */ * core what to do with the packet. */
if (proto->error != NULL if (proto->error != NULL
&& (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) { && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
CONNTRACK_STAT_INC(error); CONNTRACK_STAT_INC_ATOMIC(error);
CONNTRACK_STAT_INC(invalid); CONNTRACK_STAT_INC_ATOMIC(invalid);
return -ret; return -ret;
} }
if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) { if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
/* Not valid part of a connection */ /* Not valid part of a connection */
CONNTRACK_STAT_INC(invalid); CONNTRACK_STAT_INC_ATOMIC(invalid);
return NF_ACCEPT; return NF_ACCEPT;
} }
if (IS_ERR(ct)) { if (IS_ERR(ct)) {
/* Too stressed to deal. */ /* Too stressed to deal. */
CONNTRACK_STAT_INC(drop); CONNTRACK_STAT_INC_ATOMIC(drop);
return NF_DROP; return NF_DROP;
} }
...@@ -862,7 +868,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum, ...@@ -862,7 +868,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
* the netfilter core what to do*/ * the netfilter core what to do*/
nf_conntrack_put((*pskb)->nfct); nf_conntrack_put((*pskb)->nfct);
(*pskb)->nfct = NULL; (*pskb)->nfct = NULL;
CONNTRACK_STAT_INC(invalid); CONNTRACK_STAT_INC_ATOMIC(invalid);
return -ret; return -ret;
} }
...@@ -875,8 +881,15 @@ unsigned int ip_conntrack_in(unsigned int hooknum, ...@@ -875,8 +881,15 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
int invert_tuplepr(struct ip_conntrack_tuple *inverse, int invert_tuplepr(struct ip_conntrack_tuple *inverse,
const struct ip_conntrack_tuple *orig) const struct ip_conntrack_tuple *orig)
{ {
return ip_ct_invert_tuple(inverse, orig, struct ip_conntrack_protocol *proto;
__ip_conntrack_proto_find(orig->dst.protonum)); int ret;
rcu_read_lock();
proto = __ip_conntrack_proto_find(orig->dst.protonum);
ret = ip_ct_invert_tuple(inverse, orig, proto);
rcu_read_unlock();
return ret;
} }
/* Would two expected things clash? */ /* Would two expected things clash? */
...@@ -1354,7 +1367,7 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size) ...@@ -1354,7 +1367,7 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
supposed to kill the mall. */ supposed to kill the mall. */
void ip_conntrack_cleanup(void) void ip_conntrack_cleanup(void)
{ {
ip_ct_attach = NULL; rcu_assign_pointer(ip_ct_attach, NULL);
/* This makes sure all current packets have passed through /* This makes sure all current packets have passed through
netfilter framework. Roll on, two-stage module netfilter framework. Roll on, two-stage module
...@@ -1507,15 +1520,15 @@ int __init ip_conntrack_init(void) ...@@ -1507,15 +1520,15 @@ int __init ip_conntrack_init(void)
/* Don't NEED lock here, but good form anyway. */ /* Don't NEED lock here, but good form anyway. */
write_lock_bh(&ip_conntrack_lock); write_lock_bh(&ip_conntrack_lock);
for (i = 0; i < MAX_IP_CT_PROTO; i++) for (i = 0; i < MAX_IP_CT_PROTO; i++)
ip_ct_protos[i] = &ip_conntrack_generic_protocol; rcu_assign_pointer(ip_ct_protos[i], &ip_conntrack_generic_protocol);
/* Sew in builtin protocols. */ /* Sew in builtin protocols. */
ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp; rcu_assign_pointer(ip_ct_protos[IPPROTO_TCP], &ip_conntrack_protocol_tcp);
ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp; rcu_assign_pointer(ip_ct_protos[IPPROTO_UDP], &ip_conntrack_protocol_udp);
ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp; rcu_assign_pointer(ip_ct_protos[IPPROTO_ICMP], &ip_conntrack_protocol_icmp);
write_unlock_bh(&ip_conntrack_lock); write_unlock_bh(&ip_conntrack_lock);
/* For use by ipt_REJECT */ /* For use by ipt_REJECT */
ip_ct_attach = ip_conntrack_attach; rcu_assign_pointer(ip_ct_attach, ip_conntrack_attach);
/* Set up fake conntrack: /* Set up fake conntrack:
- to never be deleted, not in any hashes */ - to never be deleted, not in any hashes */
......
...@@ -796,7 +796,7 @@ int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto) ...@@ -796,7 +796,7 @@ int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
ret = -EBUSY; ret = -EBUSY;
goto out; goto out;
} }
ip_ct_protos[proto->proto] = proto; rcu_assign_pointer(ip_ct_protos[proto->proto], proto);
out: out:
write_unlock_bh(&ip_conntrack_lock); write_unlock_bh(&ip_conntrack_lock);
return ret; return ret;
...@@ -805,11 +805,10 @@ int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto) ...@@ -805,11 +805,10 @@ int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto) void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
{ {
write_lock_bh(&ip_conntrack_lock); write_lock_bh(&ip_conntrack_lock);
ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol; rcu_assign_pointer(ip_ct_protos[proto->proto],
&ip_conntrack_generic_protocol);
write_unlock_bh(&ip_conntrack_lock); write_unlock_bh(&ip_conntrack_lock);
synchronize_rcu();
/* Somebody could be still looking at the proto in bh. */
synchronize_net();
/* Remove all contrack entries for this protocol */ /* Remove all contrack entries for this protocol */
ip_ct_iterate_cleanup(kill_proto, &proto->proto); ip_ct_iterate_cleanup(kill_proto, &proto->proto);
......
...@@ -50,7 +50,7 @@ static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO]; ...@@ -50,7 +50,7 @@ static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
static inline struct ip_nat_protocol * static inline struct ip_nat_protocol *
__ip_nat_proto_find(u_int8_t protonum) __ip_nat_proto_find(u_int8_t protonum)
{ {
return ip_nat_protos[protonum]; return rcu_dereference(ip_nat_protos[protonum]);
} }
struct ip_nat_protocol * struct ip_nat_protocol *
...@@ -58,13 +58,11 @@ ip_nat_proto_find_get(u_int8_t protonum) ...@@ -58,13 +58,11 @@ ip_nat_proto_find_get(u_int8_t protonum)
{ {
struct ip_nat_protocol *p; struct ip_nat_protocol *p;
/* we need to disable preemption to make sure 'p' doesn't get rcu_read_lock();
* removed until we've grabbed the reference */
preempt_disable();
p = __ip_nat_proto_find(protonum); p = __ip_nat_proto_find(protonum);
if (!try_module_get(p->me)) if (!try_module_get(p->me))
p = &ip_nat_unknown_protocol; p = &ip_nat_unknown_protocol;
preempt_enable(); rcu_read_unlock();
return p; return p;
} }
...@@ -120,8 +118,8 @@ static int ...@@ -120,8 +118,8 @@ static int
in_range(const struct ip_conntrack_tuple *tuple, in_range(const struct ip_conntrack_tuple *tuple,
const struct ip_nat_range *range) const struct ip_nat_range *range)
{ {
struct ip_nat_protocol *proto = struct ip_nat_protocol *proto;
__ip_nat_proto_find(tuple->dst.protonum); int ret = 0;
/* If we are supposed to map IPs, then we must be in the /* If we are supposed to map IPs, then we must be in the
range specified, otherwise let this drag us onto a new src IP. */ range specified, otherwise let this drag us onto a new src IP. */
...@@ -131,12 +129,15 @@ in_range(const struct ip_conntrack_tuple *tuple, ...@@ -131,12 +129,15 @@ in_range(const struct ip_conntrack_tuple *tuple,
return 0; return 0;
} }
rcu_read_lock();
proto = __ip_nat_proto_find(tuple->dst.protonum);
if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
|| proto->in_range(tuple, IP_NAT_MANIP_SRC, || proto->in_range(tuple, IP_NAT_MANIP_SRC,
&range->min, &range->max)) &range->min, &range->max))
return 1; ret = 1;
rcu_read_unlock();
return 0; return ret;
} }
static inline int static inline int
...@@ -260,27 +261,25 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, ...@@ -260,27 +261,25 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple,
/* 3) The per-protocol part of the manip is made to map into /* 3) The per-protocol part of the manip is made to map into
the range to make a unique tuple. */ the range to make a unique tuple. */
proto = ip_nat_proto_find_get(orig_tuple->dst.protonum); rcu_read_lock();
proto = __ip_nat_proto_find(orig_tuple->dst.protonum);
/* Change protocol info to have some randomization */ /* Change protocol info to have some randomization */
if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) { if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
proto->unique_tuple(tuple, range, maniptype, conntrack); proto->unique_tuple(tuple, range, maniptype, conntrack);
ip_nat_proto_put(proto); goto out;
return;
} }
/* Only bother mapping if it's not already in range and unique */ /* Only bother mapping if it's not already in range and unique */
if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
|| proto->in_range(tuple, maniptype, &range->min, &range->max)) || proto->in_range(tuple, maniptype, &range->min, &range->max))
&& !ip_nat_used_tuple(tuple, conntrack)) { && !ip_nat_used_tuple(tuple, conntrack))
ip_nat_proto_put(proto); goto out;
return;
}
/* Last change: get protocol to try to obtain unique tuple. */ /* Last change: get protocol to try to obtain unique tuple. */
proto->unique_tuple(tuple, range, maniptype, conntrack); proto->unique_tuple(tuple, range, maniptype, conntrack);
out:
ip_nat_proto_put(proto); rcu_read_unlock();
} }
unsigned int unsigned int
...@@ -360,12 +359,11 @@ manip_pkt(u_int16_t proto, ...@@ -360,12 +359,11 @@ manip_pkt(u_int16_t proto,
iph = (void *)(*pskb)->data + iphdroff; iph = (void *)(*pskb)->data + iphdroff;
/* Manipulate protcol part. */ /* Manipulate protcol part. */
p = ip_nat_proto_find_get(proto);
if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) { /* rcu_read_lock()ed by nf_hook_slow */
ip_nat_proto_put(p); p = __ip_nat_proto_find(proto);
if (!p->manip_pkt(pskb, iphdroff, target, maniptype))
return 0; return 0;
}
ip_nat_proto_put(p);
iph = (void *)(*pskb)->data + iphdroff; iph = (void *)(*pskb)->data + iphdroff;
...@@ -422,6 +420,7 @@ int ip_nat_icmp_reply_translation(struct ip_conntrack *ct, ...@@ -422,6 +420,7 @@ int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
struct icmphdr icmp; struct icmphdr icmp;
struct iphdr ip; struct iphdr ip;
} *inside; } *inside;
struct ip_conntrack_protocol *proto;
struct ip_conntrack_tuple inner, target; struct ip_conntrack_tuple inner, target;
int hdrlen = (*pskb)->nh.iph->ihl * 4; int hdrlen = (*pskb)->nh.iph->ihl * 4;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
...@@ -457,10 +456,11 @@ int ip_nat_icmp_reply_translation(struct ip_conntrack *ct, ...@@ -457,10 +456,11 @@ int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n", DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
*pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
/* rcu_read_lock()ed by nf_hook_slow */
proto = __ip_conntrack_proto_find(inside->ip.protocol);
if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 + if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 +
sizeof(struct icmphdr) + inside->ip.ihl*4, sizeof(struct icmphdr) + inside->ip.ihl*4,
&inner, &inner, proto))
__ip_conntrack_proto_find(inside->ip.protocol)))
return 0; return 0;
/* Change inner back to look like incoming packet. We do the /* Change inner back to look like incoming packet. We do the
...@@ -515,7 +515,7 @@ int ip_nat_protocol_register(struct ip_nat_protocol *proto) ...@@ -515,7 +515,7 @@ int ip_nat_protocol_register(struct ip_nat_protocol *proto)
ret = -EBUSY; ret = -EBUSY;
goto out; goto out;
} }
ip_nat_protos[proto->protonum] = proto; rcu_assign_pointer(ip_nat_protos[proto->protonum], proto);
out: out:
write_unlock_bh(&ip_nat_lock); write_unlock_bh(&ip_nat_lock);
return ret; return ret;
...@@ -526,11 +526,10 @@ EXPORT_SYMBOL(ip_nat_protocol_register); ...@@ -526,11 +526,10 @@ EXPORT_SYMBOL(ip_nat_protocol_register);
void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
{ {
write_lock_bh(&ip_nat_lock); write_lock_bh(&ip_nat_lock);
ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol; rcu_assign_pointer(ip_nat_protos[proto->protonum],
&ip_nat_unknown_protocol);
write_unlock_bh(&ip_nat_lock); write_unlock_bh(&ip_nat_lock);
synchronize_rcu();
/* Someone could be still looking at the proto in a bh. */
synchronize_net();
} }
EXPORT_SYMBOL(ip_nat_protocol_unregister); EXPORT_SYMBOL(ip_nat_protocol_unregister);
...@@ -594,10 +593,10 @@ static int __init ip_nat_init(void) ...@@ -594,10 +593,10 @@ static int __init ip_nat_init(void)
/* Sew in builtin protocols. */ /* Sew in builtin protocols. */
write_lock_bh(&ip_nat_lock); write_lock_bh(&ip_nat_lock);
for (i = 0; i < MAX_IP_NAT_PROTO; i++) for (i = 0; i < MAX_IP_NAT_PROTO; i++)
ip_nat_protos[i] = &ip_nat_unknown_protocol; rcu_assign_pointer(ip_nat_protos[i], &ip_nat_unknown_protocol);
ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp; rcu_assign_pointer(ip_nat_protos[IPPROTO_TCP], &ip_nat_protocol_tcp);
ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp; rcu_assign_pointer(ip_nat_protos[IPPROTO_UDP], &ip_nat_protocol_udp);
ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp; rcu_assign_pointer(ip_nat_protos[IPPROTO_ICMP], &ip_nat_protocol_icmp);
write_unlock_bh(&ip_nat_lock); write_unlock_bh(&ip_nat_lock);
for (i = 0; i < ip_nat_htable_size; i++) { for (i = 0; i < ip_nat_htable_size; i++) {
...@@ -605,8 +604,8 @@ static int __init ip_nat_init(void) ...@@ -605,8 +604,8 @@ static int __init ip_nat_init(void)
} }
/* FIXME: Man, this is a hack. <SIGH> */ /* FIXME: Man, this is a hack. <SIGH> */
IP_NF_ASSERT(ip_conntrack_destroyed == NULL); IP_NF_ASSERT(rcu_dereference(ip_conntrack_destroyed) == NULL);
ip_conntrack_destroyed = &ip_nat_cleanup_conntrack; rcu_assign_pointer(ip_conntrack_destroyed, ip_nat_cleanup_conntrack);
/* Initialize fake conntrack so that NAT will skip it */ /* Initialize fake conntrack so that NAT will skip it */
ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK; ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
...@@ -624,7 +623,8 @@ static int clean_nat(struct ip_conntrack *i, void *data) ...@@ -624,7 +623,8 @@ static int clean_nat(struct ip_conntrack *i, void *data)
static void __exit ip_nat_cleanup(void) static void __exit ip_nat_cleanup(void)
{ {
ip_ct_iterate_cleanup(&clean_nat, NULL); ip_ct_iterate_cleanup(&clean_nat, NULL);
ip_conntrack_destroyed = NULL; rcu_assign_pointer(ip_conntrack_destroyed, NULL);
synchronize_rcu();
vfree(bysource); vfree(bysource);
} }
......
...@@ -489,7 +489,7 @@ static int __init ipt_log_init(void) ...@@ -489,7 +489,7 @@ static int __init ipt_log_init(void)
static void __exit ipt_log_fini(void) static void __exit ipt_log_fini(void)
{ {
nf_log_unregister_logger(&ipt_log_logger); nf_log_unregister(&ipt_log_logger);
xt_unregister_target(&ipt_log_reg); xt_unregister_target(&ipt_log_reg);
} }
......
...@@ -419,7 +419,7 @@ static void __exit ipt_ulog_fini(void) ...@@ -419,7 +419,7 @@ static void __exit ipt_ulog_fini(void)
DEBUGP("ipt_ULOG: cleanup_module\n"); DEBUGP("ipt_ULOG: cleanup_module\n");
if (nflog) if (nflog)
nf_log_unregister_logger(&ipt_ulog_logger); nf_log_unregister(&ipt_ulog_logger);
xt_unregister_target(&ipt_ulog_reg); xt_unregister_target(&ipt_ulog_reg);
sock_release(nflognl->sk_socket); sock_release(nflognl->sk_socket);
......
...@@ -170,7 +170,9 @@ icmp_error_message(struct sk_buff *skb, ...@@ -170,7 +170,9 @@ icmp_error_message(struct sk_buff *skb,
return -NF_ACCEPT; return -NF_ACCEPT;
} }
/* rcu_read_lock()ed by nf_hook_slow */
innerproto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); innerproto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp); dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
/* Are they talking about one of our connections? */ /* Are they talking about one of our connections? */
if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET, if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
......
...@@ -53,7 +53,7 @@ static struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]; ...@@ -53,7 +53,7 @@ static struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO];
static inline struct nf_nat_protocol * static inline struct nf_nat_protocol *
__nf_nat_proto_find(u_int8_t protonum) __nf_nat_proto_find(u_int8_t protonum)
{ {
return nf_nat_protos[protonum]; return rcu_dereference(nf_nat_protos[protonum]);
} }
struct nf_nat_protocol * struct nf_nat_protocol *
...@@ -61,13 +61,11 @@ nf_nat_proto_find_get(u_int8_t protonum) ...@@ -61,13 +61,11 @@ nf_nat_proto_find_get(u_int8_t protonum)
{ {
struct nf_nat_protocol *p; struct nf_nat_protocol *p;
/* we need to disable preemption to make sure 'p' doesn't get rcu_read_lock();
* removed until we've grabbed the reference */
preempt_disable();
p = __nf_nat_proto_find(protonum); p = __nf_nat_proto_find(protonum);
if (!try_module_get(p->me)) if (!try_module_get(p->me))
p = &nf_nat_unknown_protocol; p = &nf_nat_unknown_protocol;
preempt_enable(); rcu_read_unlock();
return p; return p;
} }
...@@ -126,8 +124,8 @@ in_range(const struct nf_conntrack_tuple *tuple, ...@@ -126,8 +124,8 @@ in_range(const struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range) const struct nf_nat_range *range)
{ {
struct nf_nat_protocol *proto; struct nf_nat_protocol *proto;
int ret = 0;
proto = __nf_nat_proto_find(tuple->dst.protonum);
/* If we are supposed to map IPs, then we must be in the /* If we are supposed to map IPs, then we must be in the
range specified, otherwise let this drag us onto a new src IP. */ range specified, otherwise let this drag us onto a new src IP. */
if (range->flags & IP_NAT_RANGE_MAP_IPS) { if (range->flags & IP_NAT_RANGE_MAP_IPS) {
...@@ -136,12 +134,15 @@ in_range(const struct nf_conntrack_tuple *tuple, ...@@ -136,12 +134,15 @@ in_range(const struct nf_conntrack_tuple *tuple,
return 0; return 0;
} }
rcu_read_lock();
proto = __nf_nat_proto_find(tuple->dst.protonum);
if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
proto->in_range(tuple, IP_NAT_MANIP_SRC, proto->in_range(tuple, IP_NAT_MANIP_SRC,
&range->min, &range->max)) &range->min, &range->max))
return 1; ret = 1;
rcu_read_unlock();
return 0; return ret;
} }
static inline int static inline int
...@@ -268,27 +269,25 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, ...@@ -268,27 +269,25 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
/* 3) The per-protocol part of the manip is made to map into /* 3) The per-protocol part of the manip is made to map into
the range to make a unique tuple. */ the range to make a unique tuple. */
proto = nf_nat_proto_find_get(orig_tuple->dst.protonum); rcu_read_lock();
proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
/* Change protocol info to have some randomization */ /* Change protocol info to have some randomization */
if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) { if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
proto->unique_tuple(tuple, range, maniptype, ct); proto->unique_tuple(tuple, range, maniptype, ct);
nf_nat_proto_put(proto); goto out;
return;
} }
/* Only bother mapping if it's not already in range and unique */ /* Only bother mapping if it's not already in range and unique */
if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
proto->in_range(tuple, maniptype, &range->min, &range->max)) && proto->in_range(tuple, maniptype, &range->min, &range->max)) &&
!nf_nat_used_tuple(tuple, ct)) { !nf_nat_used_tuple(tuple, ct))
nf_nat_proto_put(proto); goto out;
return;
}
/* Last change: get protocol to try to obtain unique tuple. */ /* Last change: get protocol to try to obtain unique tuple. */
proto->unique_tuple(tuple, range, maniptype, ct); proto->unique_tuple(tuple, range, maniptype, ct);
out:
nf_nat_proto_put(proto); rcu_read_unlock();
} }
unsigned int unsigned int
...@@ -369,12 +368,11 @@ manip_pkt(u_int16_t proto, ...@@ -369,12 +368,11 @@ manip_pkt(u_int16_t proto,
iph = (void *)(*pskb)->data + iphdroff; iph = (void *)(*pskb)->data + iphdroff;
/* Manipulate protcol part. */ /* Manipulate protcol part. */
p = nf_nat_proto_find_get(proto);
if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) { /* rcu_read_lock()ed by nf_hook_slow */
nf_nat_proto_put(p); p = __nf_nat_proto_find(proto);
if (!p->manip_pkt(pskb, iphdroff, target, maniptype))
return 0; return 0;
}
nf_nat_proto_put(p);
iph = (void *)(*pskb)->data + iphdroff; iph = (void *)(*pskb)->data + iphdroff;
...@@ -431,6 +429,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, ...@@ -431,6 +429,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
struct icmphdr icmp; struct icmphdr icmp;
struct iphdr ip; struct iphdr ip;
} *inside; } *inside;
struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple inner, target; struct nf_conntrack_tuple inner, target;
int hdrlen = (*pskb)->nh.iph->ihl * 4; int hdrlen = (*pskb)->nh.iph->ihl * 4;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
...@@ -466,16 +465,16 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, ...@@ -466,16 +465,16 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n", DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
*pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
/* rcu_read_lock()ed by nf_hook_slow */
l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
if (!nf_ct_get_tuple(*pskb, if (!nf_ct_get_tuple(*pskb,
(*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr), (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr),
(*pskb)->nh.iph->ihl*4 + (*pskb)->nh.iph->ihl*4 +
sizeof(struct icmphdr) + inside->ip.ihl*4, sizeof(struct icmphdr) + inside->ip.ihl*4,
(u_int16_t)AF_INET, (u_int16_t)AF_INET,
inside->ip.protocol, inside->ip.protocol,
&inner, &inner, l3proto, l4proto))
l3proto,
__nf_ct_l4proto_find((u_int16_t)PF_INET,
inside->ip.protocol)))
return 0; return 0;
/* Change inner back to look like incoming packet. We do the /* Change inner back to look like incoming packet. We do the
...@@ -529,7 +528,7 @@ int nf_nat_protocol_register(struct nf_nat_protocol *proto) ...@@ -529,7 +528,7 @@ int nf_nat_protocol_register(struct nf_nat_protocol *proto)
ret = -EBUSY; ret = -EBUSY;
goto out; goto out;
} }
nf_nat_protos[proto->protonum] = proto; rcu_assign_pointer(nf_nat_protos[proto->protonum], proto);
out: out:
write_unlock_bh(&nf_nat_lock); write_unlock_bh(&nf_nat_lock);
return ret; return ret;
...@@ -540,11 +539,10 @@ EXPORT_SYMBOL(nf_nat_protocol_register); ...@@ -540,11 +539,10 @@ EXPORT_SYMBOL(nf_nat_protocol_register);
void nf_nat_protocol_unregister(struct nf_nat_protocol *proto) void nf_nat_protocol_unregister(struct nf_nat_protocol *proto)
{ {
write_lock_bh(&nf_nat_lock); write_lock_bh(&nf_nat_lock);
nf_nat_protos[proto->protonum] = &nf_nat_unknown_protocol; rcu_assign_pointer(nf_nat_protos[proto->protonum],
&nf_nat_unknown_protocol);
write_unlock_bh(&nf_nat_lock); write_unlock_bh(&nf_nat_lock);
synchronize_rcu();
/* Someone could be still looking at the proto in a bh. */
synchronize_net();
} }
EXPORT_SYMBOL(nf_nat_protocol_unregister); EXPORT_SYMBOL(nf_nat_protocol_unregister);
...@@ -608,10 +606,10 @@ static int __init nf_nat_init(void) ...@@ -608,10 +606,10 @@ static int __init nf_nat_init(void)
/* Sew in builtin protocols. */ /* Sew in builtin protocols. */
write_lock_bh(&nf_nat_lock); write_lock_bh(&nf_nat_lock);
for (i = 0; i < MAX_IP_NAT_PROTO; i++) for (i = 0; i < MAX_IP_NAT_PROTO; i++)
nf_nat_protos[i] = &nf_nat_unknown_protocol; rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol);
nf_nat_protos[IPPROTO_TCP] = &nf_nat_protocol_tcp; rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp);
nf_nat_protos[IPPROTO_UDP] = &nf_nat_protocol_udp; rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp);
nf_nat_protos[IPPROTO_ICMP] = &nf_nat_protocol_icmp; rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp);
write_unlock_bh(&nf_nat_lock); write_unlock_bh(&nf_nat_lock);
for (i = 0; i < nf_nat_htable_size; i++) { for (i = 0; i < nf_nat_htable_size; i++) {
...@@ -619,8 +617,8 @@ static int __init nf_nat_init(void) ...@@ -619,8 +617,8 @@ static int __init nf_nat_init(void)
} }
/* FIXME: Man, this is a hack. <SIGH> */ /* FIXME: Man, this is a hack. <SIGH> */
NF_CT_ASSERT(nf_conntrack_destroyed == NULL); NF_CT_ASSERT(rcu_dereference(nf_conntrack_destroyed) == NULL);
nf_conntrack_destroyed = &nf_nat_cleanup_conntrack; rcu_assign_pointer(nf_conntrack_destroyed, nf_nat_cleanup_conntrack);
/* Initialize fake conntrack so that NAT will skip it */ /* Initialize fake conntrack so that NAT will skip it */
nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
...@@ -644,7 +642,8 @@ static int clean_nat(struct nf_conn *i, void *data) ...@@ -644,7 +642,8 @@ static int clean_nat(struct nf_conn *i, void *data)
static void __exit nf_nat_cleanup(void) static void __exit nf_nat_cleanup(void)
{ {
nf_ct_iterate_cleanup(&clean_nat, NULL); nf_ct_iterate_cleanup(&clean_nat, NULL);
nf_conntrack_destroyed = NULL; rcu_assign_pointer(nf_conntrack_destroyed, NULL);
synchronize_rcu();
vfree(bysource); vfree(bysource);
nf_ct_l3proto_put(l3proto); nf_ct_l3proto_put(l3proto);
} }
......
...@@ -26,16 +26,16 @@ ...@@ -26,16 +26,16 @@
*/ */
#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ #define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */
static int fast_convergence = 1; static int fast_convergence __read_mostly = 1;
static int max_increment = 16; static int max_increment __read_mostly = 16;
static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */ static int beta __read_mostly = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
static int initial_ssthresh = 100; static int initial_ssthresh __read_mostly = 100;
static int bic_scale = 41; static int bic_scale __read_mostly = 41;
static int tcp_friendliness = 1; static int tcp_friendliness __read_mostly = 1;
static u32 cube_rtt_scale; static u32 cube_rtt_scale __read_mostly;
static u32 beta_scale; static u32 beta_scale __read_mostly;
static u64 cube_factor; static u64 cube_factor __read_mostly;
/* Note parameters that are used for precomputing scale factors are read-only */ /* Note parameters that are used for precomputing scale factors are read-only */
module_param(fast_convergence, int, 0644); module_param(fast_convergence, int, 0644);
......
...@@ -14,18 +14,19 @@ ...@@ -14,18 +14,19 @@
#define BETA_MIN (1<<6) /* 0.5 with shift << 7 */ #define BETA_MIN (1<<6) /* 0.5 with shift << 7 */
#define BETA_MAX 102 /* 0.8 with shift << 7 */ #define BETA_MAX 102 /* 0.8 with shift << 7 */
static int use_rtt_scaling = 1; static int use_rtt_scaling __read_mostly = 1;
module_param(use_rtt_scaling, int, 0644); module_param(use_rtt_scaling, int, 0644);
MODULE_PARM_DESC(use_rtt_scaling, "turn on/off RTT scaling"); MODULE_PARM_DESC(use_rtt_scaling, "turn on/off RTT scaling");
static int use_bandwidth_switch = 1; static int use_bandwidth_switch __read_mostly = 1;
module_param(use_bandwidth_switch, int, 0644); module_param(use_bandwidth_switch, int, 0644);
MODULE_PARM_DESC(use_bandwidth_switch, "turn on/off bandwidth switcher"); MODULE_PARM_DESC(use_bandwidth_switch, "turn on/off bandwidth switcher");
struct htcp { struct htcp {
u32 alpha; /* Fixed point arith, << 7 */ u32 alpha; /* Fixed point arith, << 7 */
u8 beta; /* Fixed point arith, << 7 */ u8 beta; /* Fixed point arith, << 7 */
u8 modeswitch; /* Delay modeswitch until we had at least one congestion event */ u8 modeswitch; /* Delay modeswitch
until we had at least one congestion event */
u16 pkts_acked; u16 pkts_acked;
u32 packetcount; u32 packetcount;
u32 minRTT; u32 minRTT;
...@@ -44,14 +45,14 @@ struct htcp { ...@@ -44,14 +45,14 @@ struct htcp {
u32 lasttime; u32 lasttime;
}; };
static inline u32 htcp_cong_time(struct htcp *ca) static inline u32 htcp_cong_time(const struct htcp *ca)
{ {
return jiffies - ca->last_cong; return jiffies - ca->last_cong;
} }
static inline u32 htcp_ccount(struct htcp *ca) static inline u32 htcp_ccount(const struct htcp *ca)
{ {
return htcp_cong_time(ca)/ca->minRTT; return htcp_cong_time(ca) / ca->minRTT;
} }
static inline void htcp_reset(struct htcp *ca) static inline void htcp_reset(struct htcp *ca)
...@@ -67,10 +68,12 @@ static u32 htcp_cwnd_undo(struct sock *sk) ...@@ -67,10 +68,12 @@ static u32 htcp_cwnd_undo(struct sock *sk)
{ {
const struct tcp_sock *tp = tcp_sk(sk); const struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk); struct htcp *ca = inet_csk_ca(sk);
ca->last_cong = ca->undo_last_cong; ca->last_cong = ca->undo_last_cong;
ca->maxRTT = ca->undo_maxRTT; ca->maxRTT = ca->undo_maxRTT;
ca->old_maxB = ca->undo_old_maxB; ca->old_maxB = ca->undo_old_maxB;
return max(tp->snd_cwnd, (tp->snd_ssthresh<<7)/ca->beta);
return max(tp->snd_cwnd, (tp->snd_ssthresh << 7) / ca->beta);
} }
static inline void measure_rtt(struct sock *sk) static inline void measure_rtt(struct sock *sk)
...@@ -78,17 +81,19 @@ static inline void measure_rtt(struct sock *sk) ...@@ -78,17 +81,19 @@ static inline void measure_rtt(struct sock *sk)
const struct inet_connection_sock *icsk = inet_csk(sk); const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk); const struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk); struct htcp *ca = inet_csk_ca(sk);
u32 srtt = tp->srtt>>3; u32 srtt = tp->srtt >> 3;
/* keep track of minimum RTT seen so far, minRTT is zero at first */ /* keep track of minimum RTT seen so far, minRTT is zero at first */
if (ca->minRTT > srtt || !ca->minRTT) if (ca->minRTT > srtt || !ca->minRTT)
ca->minRTT = srtt; ca->minRTT = srtt;
/* max RTT */ /* max RTT */
if (icsk->icsk_ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && htcp_ccount(ca) > 3) { if (icsk->icsk_ca_state == TCP_CA_Open
&& tp->snd_ssthresh < 0xFFFF && htcp_ccount(ca) > 3) {
if (ca->maxRTT < ca->minRTT) if (ca->maxRTT < ca->minRTT)
ca->maxRTT = ca->minRTT; ca->maxRTT = ca->minRTT;
if (ca->maxRTT < srtt && srtt <= ca->maxRTT+msecs_to_jiffies(20)) if (ca->maxRTT < srtt
&& srtt <= ca->maxRTT + msecs_to_jiffies(20))
ca->maxRTT = srtt; ca->maxRTT = srtt;
} }
} }
...@@ -116,15 +121,16 @@ static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked) ...@@ -116,15 +121,16 @@ static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked)
ca->packetcount += pkts_acked; ca->packetcount += pkts_acked;
if (ca->packetcount >= tp->snd_cwnd - (ca->alpha>>7? : 1) if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1)
&& now - ca->lasttime >= ca->minRTT && now - ca->lasttime >= ca->minRTT
&& ca->minRTT > 0) { && ca->minRTT > 0) {
__u32 cur_Bi = ca->packetcount*HZ/(now - ca->lasttime); __u32 cur_Bi = ca->packetcount * HZ / (now - ca->lasttime);
if (htcp_ccount(ca) <= 3) { if (htcp_ccount(ca) <= 3) {
/* just after backoff */ /* just after backoff */
ca->minB = ca->maxB = ca->Bi = cur_Bi; ca->minB = ca->maxB = ca->Bi = cur_Bi;
} else { } else {
ca->Bi = (3*ca->Bi + cur_Bi)/4; ca->Bi = (3 * ca->Bi + cur_Bi) / 4;
if (ca->Bi > ca->maxB) if (ca->Bi > ca->maxB)
ca->maxB = ca->Bi; ca->maxB = ca->Bi;
if (ca->minB > ca->maxB) if (ca->minB > ca->maxB)
...@@ -142,7 +148,7 @@ static inline void htcp_beta_update(struct htcp *ca, u32 minRTT, u32 maxRTT) ...@@ -142,7 +148,7 @@ static inline void htcp_beta_update(struct htcp *ca, u32 minRTT, u32 maxRTT)
u32 old_maxB = ca->old_maxB; u32 old_maxB = ca->old_maxB;
ca->old_maxB = ca->maxB; ca->old_maxB = ca->maxB;
if (!between(5*maxB, 4*old_maxB, 6*old_maxB)) { if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) {
ca->beta = BETA_MIN; ca->beta = BETA_MIN;
ca->modeswitch = 0; ca->modeswitch = 0;
return; return;
...@@ -150,7 +156,7 @@ static inline void htcp_beta_update(struct htcp *ca, u32 minRTT, u32 maxRTT) ...@@ -150,7 +156,7 @@ static inline void htcp_beta_update(struct htcp *ca, u32 minRTT, u32 maxRTT)
} }
if (ca->modeswitch && minRTT > msecs_to_jiffies(10) && maxRTT) { if (ca->modeswitch && minRTT > msecs_to_jiffies(10) && maxRTT) {
ca->beta = (minRTT<<7)/maxRTT; ca->beta = (minRTT << 7) / maxRTT;
if (ca->beta < BETA_MIN) if (ca->beta < BETA_MIN)
ca->beta = BETA_MIN; ca->beta = BETA_MIN;
else if (ca->beta > BETA_MAX) else if (ca->beta > BETA_MAX)
...@@ -169,23 +175,26 @@ static inline void htcp_alpha_update(struct htcp *ca) ...@@ -169,23 +175,26 @@ static inline void htcp_alpha_update(struct htcp *ca)
if (diff > HZ) { if (diff > HZ) {
diff -= HZ; diff -= HZ;
factor = 1+ ( 10*diff + ((diff/2)*(diff/2)/HZ) )/HZ; factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / HZ)) / HZ;
} }
if (use_rtt_scaling && minRTT) { if (use_rtt_scaling && minRTT) {
u32 scale = (HZ<<3)/(10*minRTT); u32 scale = (HZ << 3) / (10 * minRTT);
scale = min(max(scale, 1U<<2), 10U<<3); /* clamping ratio to interval [0.5,10]<<3 */
factor = (factor<<3)/scale; /* clamping ratio to interval [0.5,10]<<3 */
scale = min(max(scale, 1U << 2), 10U << 3);
factor = (factor << 3) / scale;
if (!factor) if (!factor)
factor = 1; factor = 1;
} }
ca->alpha = 2*factor*((1<<7)-ca->beta); ca->alpha = 2 * factor * ((1 << 7) - ca->beta);
if (!ca->alpha) if (!ca->alpha)
ca->alpha = ALPHA_BASE; ca->alpha = ALPHA_BASE;
} }
/* After we have the rtt data to calculate beta, we'd still prefer to wait one /*
* After we have the rtt data to calculate beta, we'd still prefer to wait one
* rtt before we adjust our beta to ensure we are working from a consistent * rtt before we adjust our beta to ensure we are working from a consistent
* data. * data.
* *
...@@ -202,15 +211,16 @@ static void htcp_param_update(struct sock *sk) ...@@ -202,15 +211,16 @@ static void htcp_param_update(struct sock *sk)
htcp_beta_update(ca, minRTT, maxRTT); htcp_beta_update(ca, minRTT, maxRTT);
htcp_alpha_update(ca); htcp_alpha_update(ca);
/* add slowly fading memory for maxRTT to accommodate routing changes etc */ /* add slowly fading memory for maxRTT to accommodate routing changes */
if (minRTT > 0 && maxRTT > minRTT) if (minRTT > 0 && maxRTT > minRTT)
ca->maxRTT = minRTT + ((maxRTT-minRTT)*95)/100; ca->maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100;
} }
static u32 htcp_recalc_ssthresh(struct sock *sk) static u32 htcp_recalc_ssthresh(struct sock *sk)
{ {
const struct tcp_sock *tp = tcp_sk(sk); const struct tcp_sock *tp = tcp_sk(sk);
const struct htcp *ca = inet_csk_ca(sk); const struct htcp *ca = inet_csk_ca(sk);
htcp_param_update(sk); htcp_param_update(sk);
return max((tp->snd_cwnd * ca->beta) >> 7, 2U); return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
} }
...@@ -227,7 +237,6 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, ...@@ -227,7 +237,6 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
if (tp->snd_cwnd <= tp->snd_ssthresh) if (tp->snd_cwnd <= tp->snd_ssthresh)
tcp_slow_start(tp); tcp_slow_start(tp);
else { else {
measure_rtt(sk); measure_rtt(sk);
/* In dangerous area, increase slowly. /* In dangerous area, increase slowly.
......
...@@ -42,7 +42,8 @@ config IP6_NF_QUEUE ...@@ -42,7 +42,8 @@ config IP6_NF_QUEUE
config IP6_NF_IPTABLES config IP6_NF_IPTABLES
tristate "IP6 tables support (required for filtering)" tristate "IP6 tables support (required for filtering)"
depends on INET && IPV6 && EXPERIMENTAL && NETFILTER_XTABLES depends on INET && IPV6 && EXPERIMENTAL
select NETFILTER_XTABLES
help help
ip6tables is a general, extensible packet identification framework. ip6tables is a general, extensible packet identification framework.
Currently only the packet filtering and packet mangling subsystem Currently only the packet filtering and packet mangling subsystem
......
...@@ -501,7 +501,7 @@ static int __init ip6t_log_init(void) ...@@ -501,7 +501,7 @@ static int __init ip6t_log_init(void)
static void __exit ip6t_log_fini(void) static void __exit ip6t_log_fini(void)
{ {
nf_log_unregister_logger(&ip6t_logger); nf_log_unregister(&ip6t_logger);
xt_unregister_target(&ip6t_log_reg); xt_unregister_target(&ip6t_log_reg);
} }
......
...@@ -66,6 +66,13 @@ match(const struct sk_buff *skb, ...@@ -66,6 +66,13 @@ match(const struct sk_buff *skb,
return 0; return 0;
} }
if (mh->ip6mh_proto != IPPROTO_NONE) {
duprintf("Dropping invalid MH Payload Proto: %u\n",
mh->ip6mh_proto);
*hotdrop = 1;
return 0;
}
return type_match(mhinfo->types[0], mhinfo->types[1], mh->ip6mh_type, return type_match(mhinfo->types[0], mhinfo->types[1], mh->ip6mh_type,
!!(mhinfo->invflags & IP6T_MH_INV_TYPE)); !!(mhinfo->invflags & IP6T_MH_INV_TYPE));
} }
......
...@@ -154,8 +154,8 @@ ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, ...@@ -154,8 +154,8 @@ ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
*/ */
if ((protoff < 0) || (protoff > (*pskb)->len)) { if ((protoff < 0) || (protoff > (*pskb)->len)) {
DEBUGP("ip6_conntrack_core: can't find proto in pkt\n"); DEBUGP("ip6_conntrack_core: can't find proto in pkt\n");
NF_CT_STAT_INC(error); NF_CT_STAT_INC_ATOMIC(error);
NF_CT_STAT_INC(invalid); NF_CT_STAT_INC_ATOMIC(invalid);
return -NF_ACCEPT; return -NF_ACCEPT;
} }
......
...@@ -182,6 +182,7 @@ icmpv6_error_message(struct sk_buff *skb, ...@@ -182,6 +182,7 @@ icmpv6_error_message(struct sk_buff *skb,
return -NF_ACCEPT; return -NF_ACCEPT;
} }
/* rcu_read_lock()ed by nf_hook_slow */
inproto = __nf_ct_l4proto_find(PF_INET6, inprotonum); inproto = __nf_ct_l4proto_find(PF_INET6, inprotonum);
/* Are they talking about one of our connections? */ /* Are they talking about one of our connections? */
......
...@@ -2297,16 +2297,17 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg ...@@ -2297,16 +2297,17 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
&sel, tmp.security, 1); &sel, tmp.security, 1);
security_xfrm_policy_free(&tmp); security_xfrm_policy_free(&tmp);
xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
AUDIT_MAC_IPSEC_DELSPD, (xp) ? 1 : 0, xp, NULL);
if (xp == NULL) if (xp == NULL)
return -ENOENT; return -ENOENT;
err = 0; err = security_xfrm_policy_delete(xp);
if ((err = security_xfrm_policy_delete(xp))) xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
AUDIT_MAC_IPSEC_DELSPD, err ? 0 : 1, xp, NULL);
if (err)
goto out; goto out;
c.seq = hdr->sadb_msg_seq; c.seq = hdr->sadb_msg_seq;
c.pid = hdr->sadb_msg_pid; c.pid = hdr->sadb_msg_pid;
c.event = XFRM_MSG_DELPOLICY; c.event = XFRM_MSG_DELPOLICY;
......
...@@ -302,7 +302,9 @@ config NETFILTER_XT_TARGET_CONNMARK ...@@ -302,7 +302,9 @@ config NETFILTER_XT_TARGET_CONNMARK
tristate '"CONNMARK" target support' tristate '"CONNMARK" target support'
depends on NETFILTER_XTABLES depends on NETFILTER_XTABLES
depends on IP_NF_MANGLE || IP6_NF_MANGLE depends on IP_NF_MANGLE || IP6_NF_MANGLE
depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK) depends on IP_NF_CONNTRACK || NF_CONNTRACK
select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK
select NF_CONNTRACK_MARK if NF_CONNTRACK
help help
This option adds a `CONNMARK' target, which allows one to manipulate This option adds a `CONNMARK' target, which allows one to manipulate
the connection mark value. Similar to the MARK target, but the connection mark value. Similar to the MARK target, but
...@@ -434,7 +436,9 @@ config NETFILTER_XT_MATCH_COMMENT ...@@ -434,7 +436,9 @@ config NETFILTER_XT_MATCH_COMMENT
config NETFILTER_XT_MATCH_CONNBYTES config NETFILTER_XT_MATCH_CONNBYTES
tristate '"connbytes" per-connection counter match support' tristate '"connbytes" per-connection counter match support'
depends on NETFILTER_XTABLES depends on NETFILTER_XTABLES
depends on (IP_NF_CONNTRACK && IP_NF_CT_ACCT) || (NF_CT_ACCT && NF_CONNTRACK) depends on IP_NF_CONNTRACK || NF_CONNTRACK
select IP_NF_CT_ACCT if IP_NF_CONNTRACK
select NF_CT_ACCT if NF_CONNTRACK
help help
This option adds a `connbytes' match, which allows you to match the This option adds a `connbytes' match, which allows you to match the
number of bytes and/or packets for each direction within a connection. number of bytes and/or packets for each direction within a connection.
...@@ -445,7 +449,9 @@ config NETFILTER_XT_MATCH_CONNBYTES ...@@ -445,7 +449,9 @@ config NETFILTER_XT_MATCH_CONNBYTES
config NETFILTER_XT_MATCH_CONNMARK config NETFILTER_XT_MATCH_CONNMARK
tristate '"connmark" connection mark match support' tristate '"connmark" connection mark match support'
depends on NETFILTER_XTABLES depends on NETFILTER_XTABLES
depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK) depends on IP_NF_CONNTRACK || NF_CONNTRACK
select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK
select NF_CONNTRACK_MARK if NF_CONNTRACK
help help
This option adds a `connmark' match, which allows you to match the This option adds a `connmark' match, which allows you to match the
connection mark value previously set for the session by `CONNMARK'. connection mark value previously set for the session by `CONNMARK'.
......
...@@ -22,29 +22,34 @@ ...@@ -22,29 +22,34 @@
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/inetdevice.h> #include <linux/inetdevice.h>
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
#include <linux/mutex.h>
#include <net/sock.h> #include <net/sock.h>
#include "nf_internals.h" #include "nf_internals.h"
static DEFINE_SPINLOCK(afinfo_lock); static DEFINE_MUTEX(afinfo_mutex);
struct nf_afinfo *nf_afinfo[NPROTO] __read_mostly; struct nf_afinfo *nf_afinfo[NPROTO] __read_mostly;
EXPORT_SYMBOL(nf_afinfo); EXPORT_SYMBOL(nf_afinfo);
int nf_register_afinfo(struct nf_afinfo *afinfo) int nf_register_afinfo(struct nf_afinfo *afinfo)
{ {
spin_lock(&afinfo_lock); int err;
err = mutex_lock_interruptible(&afinfo_mutex);
if (err < 0)
return err;
rcu_assign_pointer(nf_afinfo[afinfo->family], afinfo); rcu_assign_pointer(nf_afinfo[afinfo->family], afinfo);
spin_unlock(&afinfo_lock); mutex_unlock(&afinfo_mutex);
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(nf_register_afinfo); EXPORT_SYMBOL_GPL(nf_register_afinfo);
void nf_unregister_afinfo(struct nf_afinfo *afinfo) void nf_unregister_afinfo(struct nf_afinfo *afinfo)
{ {
spin_lock(&afinfo_lock); mutex_lock(&afinfo_mutex);
rcu_assign_pointer(nf_afinfo[afinfo->family], NULL); rcu_assign_pointer(nf_afinfo[afinfo->family], NULL);
spin_unlock(&afinfo_lock); mutex_unlock(&afinfo_mutex);
synchronize_rcu(); synchronize_rcu();
} }
EXPORT_SYMBOL_GPL(nf_unregister_afinfo); EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
...@@ -56,30 +61,31 @@ EXPORT_SYMBOL_GPL(nf_unregister_afinfo); ...@@ -56,30 +61,31 @@ EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
* packets come back: if the hook is gone, the packet is discarded. */ * packets come back: if the hook is gone, the packet is discarded. */
struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS] __read_mostly; struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS] __read_mostly;
EXPORT_SYMBOL(nf_hooks); EXPORT_SYMBOL(nf_hooks);
static DEFINE_SPINLOCK(nf_hook_lock); static DEFINE_MUTEX(nf_hook_mutex);
int nf_register_hook(struct nf_hook_ops *reg) int nf_register_hook(struct nf_hook_ops *reg)
{ {
struct list_head *i; struct list_head *i;
int err;
spin_lock_bh(&nf_hook_lock); err = mutex_lock_interruptible(&nf_hook_mutex);
if (err < 0)
return err;
list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) { list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
if (reg->priority < ((struct nf_hook_ops *)i)->priority) if (reg->priority < ((struct nf_hook_ops *)i)->priority)
break; break;
} }
list_add_rcu(&reg->list, i->prev); list_add_rcu(&reg->list, i->prev);
spin_unlock_bh(&nf_hook_lock); mutex_unlock(&nf_hook_mutex);
synchronize_net();
return 0; return 0;
} }
EXPORT_SYMBOL(nf_register_hook); EXPORT_SYMBOL(nf_register_hook);
void nf_unregister_hook(struct nf_hook_ops *reg) void nf_unregister_hook(struct nf_hook_ops *reg)
{ {
spin_lock_bh(&nf_hook_lock); mutex_lock(&nf_hook_mutex);
list_del_rcu(&reg->list); list_del_rcu(&reg->list);
spin_unlock_bh(&nf_hook_lock); mutex_unlock(&nf_hook_mutex);
synchronize_net(); synchronize_net();
} }
...@@ -248,9 +254,12 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) ...@@ -248,9 +254,12 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
{ {
void (*attach)(struct sk_buff *, struct sk_buff *); void (*attach)(struct sk_buff *, struct sk_buff *);
if (skb->nfct && (attach = ip_ct_attach) != NULL) { if (skb->nfct) {
mb(); /* Just to be sure: must be read before executing this */ rcu_read_lock();
attach = rcu_dereference(ip_ct_attach);
if (attach)
attach(new, skb); attach(new, skb);
rcu_read_unlock();
} }
} }
EXPORT_SYMBOL(nf_ct_attach); EXPORT_SYMBOL(nf_ct_attach);
......
...@@ -318,6 +318,7 @@ destroy_conntrack(struct nf_conntrack *nfct) ...@@ -318,6 +318,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
struct nf_conn_help *help = nfct_help(ct); struct nf_conn_help *help = nfct_help(ct);
struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l3proto *l3proto;
struct nf_conntrack_l4proto *l4proto; struct nf_conntrack_l4proto *l4proto;
typeof(nf_conntrack_destroyed) destroyed;
DEBUGP("destroy_conntrack(%p)\n", ct); DEBUGP("destroy_conntrack(%p)\n", ct);
NF_CT_ASSERT(atomic_read(&nfct->use) == 0); NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
...@@ -332,16 +333,21 @@ destroy_conntrack(struct nf_conntrack *nfct) ...@@ -332,16 +333,21 @@ destroy_conntrack(struct nf_conntrack *nfct)
/* To make sure we don't get any weird locking issues here: /* To make sure we don't get any weird locking issues here:
* destroy_conntrack() MUST NOT be called with a write lock * destroy_conntrack() MUST NOT be called with a write lock
* to nf_conntrack_lock!!! -HW */ * to nf_conntrack_lock!!! -HW */
rcu_read_lock();
l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num); l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num);
if (l3proto && l3proto->destroy) if (l3proto && l3proto->destroy)
l3proto->destroy(ct); l3proto->destroy(ct);
l4proto = __nf_ct_l4proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); l4proto = __nf_ct_l4proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num,
ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
if (l4proto && l4proto->destroy) if (l4proto && l4proto->destroy)
l4proto->destroy(ct); l4proto->destroy(ct);
if (nf_conntrack_destroyed) destroyed = rcu_dereference(nf_conntrack_destroyed);
nf_conntrack_destroyed(ct); if (destroyed)
destroyed(ct);
rcu_read_unlock();
write_lock_bh(&nf_conntrack_lock); write_lock_bh(&nf_conntrack_lock);
/* Expectations will have been removed in clean_from_lists, /* Expectations will have been removed in clean_from_lists,
...@@ -560,7 +566,7 @@ static int early_drop(struct list_head *chain) ...@@ -560,7 +566,7 @@ static int early_drop(struct list_head *chain)
if (del_timer(&ct->timeout)) { if (del_timer(&ct->timeout)) {
death_by_timeout((unsigned long)ct); death_by_timeout((unsigned long)ct);
dropped = 1; dropped = 1;
NF_CT_STAT_INC(early_drop); NF_CT_STAT_INC_ATOMIC(early_drop);
} }
nf_ct_put(ct); nf_ct_put(ct);
return dropped; return dropped;
...@@ -647,9 +653,14 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, ...@@ -647,9 +653,14 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
const struct nf_conntrack_tuple *repl) const struct nf_conntrack_tuple *repl)
{ {
struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l3proto *l3proto;
struct nf_conn *ct;
rcu_read_lock();
l3proto = __nf_ct_l3proto_find(orig->src.l3num); l3proto = __nf_ct_l3proto_find(orig->src.l3num);
return __nf_conntrack_alloc(orig, repl, l3proto, 0); ct = __nf_conntrack_alloc(orig, repl, l3proto, 0);
rcu_read_unlock();
return ct;
} }
EXPORT_SYMBOL_GPL(nf_conntrack_alloc); EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
...@@ -813,11 +824,13 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) ...@@ -813,11 +824,13 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
/* Previously seen (loopback or untracked)? Ignore. */ /* Previously seen (loopback or untracked)? Ignore. */
if ((*pskb)->nfct) { if ((*pskb)->nfct) {
NF_CT_STAT_INC(ignore); NF_CT_STAT_INC_ATOMIC(ignore);
return NF_ACCEPT; return NF_ACCEPT;
} }
/* rcu_read_lock()ed by nf_hook_slow */
l3proto = __nf_ct_l3proto_find((u_int16_t)pf); l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) { if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) {
DEBUGP("not prepared to track yet or error occured\n"); DEBUGP("not prepared to track yet or error occured\n");
return -ret; return -ret;
...@@ -830,8 +843,8 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) ...@@ -830,8 +843,8 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
* core what to do with the packet. */ * core what to do with the packet. */
if (l4proto->error != NULL && if (l4proto->error != NULL &&
(ret = l4proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) { (ret = l4proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
NF_CT_STAT_INC(error); NF_CT_STAT_INC_ATOMIC(error);
NF_CT_STAT_INC(invalid); NF_CT_STAT_INC_ATOMIC(invalid);
return -ret; return -ret;
} }
...@@ -839,13 +852,13 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) ...@@ -839,13 +852,13 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
&set_reply, &ctinfo); &set_reply, &ctinfo);
if (!ct) { if (!ct) {
/* Not valid part of a connection */ /* Not valid part of a connection */
NF_CT_STAT_INC(invalid); NF_CT_STAT_INC_ATOMIC(invalid);
return NF_ACCEPT; return NF_ACCEPT;
} }
if (IS_ERR(ct)) { if (IS_ERR(ct)) {
/* Too stressed to deal. */ /* Too stressed to deal. */
NF_CT_STAT_INC(drop); NF_CT_STAT_INC_ATOMIC(drop);
return NF_DROP; return NF_DROP;
} }
...@@ -858,7 +871,7 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) ...@@ -858,7 +871,7 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
DEBUGP("nf_conntrack_in: Can't track with proto module\n"); DEBUGP("nf_conntrack_in: Can't track with proto module\n");
nf_conntrack_put((*pskb)->nfct); nf_conntrack_put((*pskb)->nfct);
(*pskb)->nfct = NULL; (*pskb)->nfct = NULL;
NF_CT_STAT_INC(invalid); NF_CT_STAT_INC_ATOMIC(invalid);
return -ret; return -ret;
} }
...@@ -872,10 +885,15 @@ EXPORT_SYMBOL_GPL(nf_conntrack_in); ...@@ -872,10 +885,15 @@ EXPORT_SYMBOL_GPL(nf_conntrack_in);
int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig) const struct nf_conntrack_tuple *orig)
{ {
return nf_ct_invert_tuple(inverse, orig, int ret;
rcu_read_lock();
ret = nf_ct_invert_tuple(inverse, orig,
__nf_ct_l3proto_find(orig->src.l3num), __nf_ct_l3proto_find(orig->src.l3num),
__nf_ct_l4proto_find(orig->src.l3num, __nf_ct_l4proto_find(orig->src.l3num,
orig->dst.protonum)); orig->dst.protonum));
rcu_read_unlock();
return ret;
} }
EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr); EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr);
...@@ -1105,7 +1123,7 @@ void nf_conntrack_cleanup(void) ...@@ -1105,7 +1123,7 @@ void nf_conntrack_cleanup(void)
{ {
int i; int i;
ip_ct_attach = NULL; rcu_assign_pointer(ip_ct_attach, NULL);
/* This makes sure all current packets have passed through /* This makes sure all current packets have passed through
netfilter framework. Roll on, two-stage module netfilter framework. Roll on, two-stage module
...@@ -1273,7 +1291,7 @@ int __init nf_conntrack_init(void) ...@@ -1273,7 +1291,7 @@ int __init nf_conntrack_init(void)
write_unlock_bh(&nf_conntrack_lock); write_unlock_bh(&nf_conntrack_lock);
/* For use by REJECT target */ /* For use by REJECT target */
ip_ct_attach = __nf_conntrack_attach; rcu_assign_pointer(ip_ct_attach, __nf_conntrack_attach);
/* Set up fake conntrack: /* Set up fake conntrack:
- to never be deleted, not in any hashes */ - to never be deleted, not in any hashes */
......
...@@ -66,7 +66,7 @@ __nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto) ...@@ -66,7 +66,7 @@ __nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL)) if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL))
return &nf_conntrack_l4proto_generic; return &nf_conntrack_l4proto_generic;
return nf_ct_protos[l3proto][l4proto]; return rcu_dereference(nf_ct_protos[l3proto][l4proto]);
} }
EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find); EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
...@@ -77,11 +77,11 @@ nf_ct_l4proto_find_get(u_int16_t l3proto, u_int8_t l4proto) ...@@ -77,11 +77,11 @@ nf_ct_l4proto_find_get(u_int16_t l3proto, u_int8_t l4proto)
{ {
struct nf_conntrack_l4proto *p; struct nf_conntrack_l4proto *p;
preempt_disable(); rcu_read_lock();
p = __nf_ct_l4proto_find(l3proto, l4proto); p = __nf_ct_l4proto_find(l3proto, l4proto);
if (!try_module_get(p->me)) if (!try_module_get(p->me))
p = &nf_conntrack_l4proto_generic; p = &nf_conntrack_l4proto_generic;
preempt_enable(); rcu_read_unlock();
return p; return p;
} }
...@@ -98,11 +98,11 @@ nf_ct_l3proto_find_get(u_int16_t l3proto) ...@@ -98,11 +98,11 @@ nf_ct_l3proto_find_get(u_int16_t l3proto)
{ {
struct nf_conntrack_l3proto *p; struct nf_conntrack_l3proto *p;
preempt_disable(); rcu_read_lock();
p = __nf_ct_l3proto_find(l3proto); p = __nf_ct_l3proto_find(l3proto);
if (!try_module_get(p->me)) if (!try_module_get(p->me))
p = &nf_conntrack_l3proto_generic; p = &nf_conntrack_l3proto_generic;
preempt_enable(); rcu_read_unlock();
return p; return p;
} }
...@@ -137,10 +137,8 @@ void nf_ct_l3proto_module_put(unsigned short l3proto) ...@@ -137,10 +137,8 @@ void nf_ct_l3proto_module_put(unsigned short l3proto)
{ {
struct nf_conntrack_l3proto *p; struct nf_conntrack_l3proto *p;
preempt_disable(); /* rcu_read_lock not necessary since the caller holds a reference */
p = __nf_ct_l3proto_find(l3proto); p = __nf_ct_l3proto_find(l3proto);
preempt_enable();
module_put(p->me); module_put(p->me);
} }
EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put); EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
...@@ -202,7 +200,7 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) ...@@ -202,7 +200,7 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
ret = -EBUSY; ret = -EBUSY;
goto out_unlock; goto out_unlock;
} }
nf_ct_l3protos[proto->l3proto] = proto; rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
write_unlock_bh(&nf_conntrack_lock); write_unlock_bh(&nf_conntrack_lock);
ret = nf_ct_l3proto_register_sysctl(proto); ret = nf_ct_l3proto_register_sysctl(proto);
...@@ -217,35 +215,21 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) ...@@ -217,35 +215,21 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
} }
EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register); EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register);
int nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
{ {
int ret = 0; BUG_ON(proto->l3proto >= AF_MAX);
if (proto->l3proto >= AF_MAX) {
ret = -EBUSY;
goto out;
}
write_lock_bh(&nf_conntrack_lock); write_lock_bh(&nf_conntrack_lock);
if (nf_ct_l3protos[proto->l3proto] != proto) { BUG_ON(nf_ct_l3protos[proto->l3proto] != proto);
write_unlock_bh(&nf_conntrack_lock); rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
ret = -EBUSY; &nf_conntrack_l3proto_generic);
goto out;
}
nf_ct_l3protos[proto->l3proto] = &nf_conntrack_l3proto_generic;
write_unlock_bh(&nf_conntrack_lock); write_unlock_bh(&nf_conntrack_lock);
synchronize_rcu();
nf_ct_l3proto_unregister_sysctl(proto); nf_ct_l3proto_unregister_sysctl(proto);
/* Somebody could be still looking at the proto in bh. */
synchronize_net();
/* Remove all contrack entries for this protocol */ /* Remove all contrack entries for this protocol */
nf_ct_iterate_cleanup(kill_l3proto, proto); nf_ct_iterate_cleanup(kill_l3proto, proto);
out:
return ret;
} }
EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister);
...@@ -356,7 +340,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto) ...@@ -356,7 +340,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
goto retry; goto retry;
} }
nf_ct_protos[l4proto->l3proto][l4proto->l4proto] = l4proto; rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], l4proto);
write_unlock_bh(&nf_conntrack_lock); write_unlock_bh(&nf_conntrack_lock);
ret = nf_ct_l4proto_register_sysctl(l4proto); ret = nf_ct_l4proto_register_sysctl(l4proto);
...@@ -371,40 +355,25 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto) ...@@ -371,40 +355,25 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
} }
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register); EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register);
int nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
{ {
int ret = 0; BUG_ON(l4proto->l3proto >= PF_MAX);
if (l4proto->l3proto >= PF_MAX) {
ret = -EBUSY;
goto out;
}
if (l4proto == &nf_conntrack_l4proto_generic) { if (l4proto == &nf_conntrack_l4proto_generic) {
nf_ct_l4proto_unregister_sysctl(l4proto); nf_ct_l4proto_unregister_sysctl(l4proto);
goto out; return;
} }
write_lock_bh(&nf_conntrack_lock); write_lock_bh(&nf_conntrack_lock);
if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto);
!= l4proto) { rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
write_unlock_bh(&nf_conntrack_lock); &nf_conntrack_l4proto_generic);
ret = -EBUSY;
goto out;
}
nf_ct_protos[l4proto->l3proto][l4proto->l4proto]
= &nf_conntrack_l4proto_generic;
write_unlock_bh(&nf_conntrack_lock); write_unlock_bh(&nf_conntrack_lock);
synchronize_rcu();
nf_ct_l4proto_unregister_sysctl(l4proto); nf_ct_l4proto_unregister_sysctl(l4proto);
/* Somebody could be still looking at the proto in bh. */
synchronize_net();
/* Remove all contrack entries for this protocol */ /* Remove all contrack entries for this protocol */
nf_ct_iterate_cleanup(kill_l4proto, l4proto); nf_ct_iterate_cleanup(kill_l4proto, l4proto);
out:
return ret;
} }
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister);
...@@ -58,16 +58,16 @@ static DEFINE_RWLOCK(tcp_lock); ...@@ -58,16 +58,16 @@ static DEFINE_RWLOCK(tcp_lock);
/* "Be conservative in what you do, /* "Be conservative in what you do,
be liberal in what you accept from others." be liberal in what you accept from others."
If it's non-zero, we mark only out of window RST segments as INVALID. */ If it's non-zero, we mark only out of window RST segments as INVALID. */
int nf_ct_tcp_be_liberal __read_mostly = 0; static int nf_ct_tcp_be_liberal __read_mostly = 0;
/* If it is set to zero, we disable picking up already established /* If it is set to zero, we disable picking up already established
connections. */ connections. */
int nf_ct_tcp_loose __read_mostly = 1; static int nf_ct_tcp_loose __read_mostly = 1;
/* Max number of the retransmitted packets without receiving an (acceptable) /* Max number of the retransmitted packets without receiving an (acceptable)
ACK from the destination. If this number is reached, a shorter timer ACK from the destination. If this number is reached, a shorter timer
will be started. */ will be started. */
int nf_ct_tcp_max_retrans __read_mostly = 3; static int nf_ct_tcp_max_retrans __read_mostly = 3;
/* FIXME: Examine ipfilter's timeouts and conntrack transitions more /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
closely. They're more complex. --RR */ closely. They're more complex. --RR */
......
...@@ -14,62 +14,63 @@ ...@@ -14,62 +14,63 @@
#define NF_LOG_PREFIXLEN 128 #define NF_LOG_PREFIXLEN 128
static struct nf_logger *nf_logging[NPROTO]; /* = NULL */ static struct nf_logger *nf_loggers[NPROTO];
static DEFINE_SPINLOCK(nf_log_lock); static DEFINE_MUTEX(nf_log_mutex);
/* return EBUSY if somebody else is registered, EEXIST if the same logger /* return EBUSY if somebody else is registered, EEXIST if the same logger
* is registred, 0 on success. */ * is registred, 0 on success. */
int nf_log_register(int pf, struct nf_logger *logger) int nf_log_register(int pf, struct nf_logger *logger)
{ {
int ret = -EBUSY; int ret;
if (pf >= NPROTO) if (pf >= NPROTO)
return -EINVAL; return -EINVAL;
/* Any setup of logging members must be done before /* Any setup of logging members must be done before
* substituting pointer. */ * substituting pointer. */
spin_lock(&nf_log_lock); ret = mutex_lock_interruptible(&nf_log_mutex);
if (!nf_logging[pf]) { if (ret < 0)
rcu_assign_pointer(nf_logging[pf], logger); return ret;
ret = 0;
} else if (nf_logging[pf] == logger) if (!nf_loggers[pf])
rcu_assign_pointer(nf_loggers[pf], logger);
else if (nf_loggers[pf] == logger)
ret = -EEXIST; ret = -EEXIST;
else
ret = -EBUSY;
spin_unlock(&nf_log_lock); mutex_unlock(&nf_log_mutex);
return ret; return ret;
} }
EXPORT_SYMBOL(nf_log_register); EXPORT_SYMBOL(nf_log_register);
int nf_log_unregister_pf(int pf) void nf_log_unregister_pf(int pf)
{ {
if (pf >= NPROTO) if (pf >= NPROTO)
return -EINVAL; return;
mutex_lock(&nf_log_mutex);
spin_lock(&nf_log_lock); rcu_assign_pointer(nf_loggers[pf], NULL);
nf_logging[pf] = NULL; mutex_unlock(&nf_log_mutex);
spin_unlock(&nf_log_lock);
/* Give time to concurrent readers. */ /* Give time to concurrent readers. */
synchronize_net(); synchronize_rcu();
return 0;
} }
EXPORT_SYMBOL(nf_log_unregister_pf); EXPORT_SYMBOL(nf_log_unregister_pf);
void nf_log_unregister_logger(struct nf_logger *logger) void nf_log_unregister(struct nf_logger *logger)
{ {
int i; int i;
spin_lock(&nf_log_lock); mutex_lock(&nf_log_mutex);
for (i = 0; i < NPROTO; i++) { for (i = 0; i < NPROTO; i++) {
if (nf_logging[i] == logger) if (nf_loggers[i] == logger)
nf_logging[i] = NULL; rcu_assign_pointer(nf_loggers[i], NULL);
} }
spin_unlock(&nf_log_lock); mutex_unlock(&nf_log_mutex);
synchronize_net(); synchronize_rcu();
} }
EXPORT_SYMBOL(nf_log_unregister_logger); EXPORT_SYMBOL(nf_log_unregister);
void nf_log_packet(int pf, void nf_log_packet(int pf,
unsigned int hooknum, unsigned int hooknum,
...@@ -84,7 +85,7 @@ void nf_log_packet(int pf, ...@@ -84,7 +85,7 @@ void nf_log_packet(int pf,
struct nf_logger *logger; struct nf_logger *logger;
rcu_read_lock(); rcu_read_lock();
logger = rcu_dereference(nf_logging[pf]); logger = rcu_dereference(nf_loggers[pf]);
if (logger) { if (logger) {
va_start(args, fmt); va_start(args, fmt);
vsnprintf(prefix, sizeof(prefix), fmt, args); vsnprintf(prefix, sizeof(prefix), fmt, args);
...@@ -131,7 +132,7 @@ static int seq_show(struct seq_file *s, void *v) ...@@ -131,7 +132,7 @@ static int seq_show(struct seq_file *s, void *v)
loff_t *pos = v; loff_t *pos = v;
const struct nf_logger *logger; const struct nf_logger *logger;
logger = rcu_dereference(nf_logging[*pos]); logger = rcu_dereference(nf_loggers[*pos]);
if (!logger) if (!logger)
return seq_printf(s, "%2lld NONE\n", *pos); return seq_printf(s, "%2lld NONE\n", *pos);
......
...@@ -1077,7 +1077,7 @@ static int __init nfnetlink_log_init(void) ...@@ -1077,7 +1077,7 @@ static int __init nfnetlink_log_init(void)
static void __exit nfnetlink_log_fini(void) static void __exit nfnetlink_log_fini(void)
{ {
nf_log_unregister_logger(&nfulnl_logger); nf_log_unregister(&nfulnl_logger);
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
remove_proc_entry("nfnetlink_log", proc_net_netfilter); remove_proc_entry("nfnetlink_log", proc_net_netfilter);
#endif #endif
......
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
#include <linux/ip.h> #include <linux/ip.h>
#include <net/checksum.h> #include <net/checksum.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter/x_tables.h> #include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_CLASSIFY.h> #include <linux/netfilter/xt_CLASSIFY.h>
...@@ -55,9 +57,9 @@ static struct xt_target xt_classify_target[] = { ...@@ -55,9 +57,9 @@ static struct xt_target xt_classify_target[] = {
.target = target, .target = target,
.targetsize = sizeof(struct xt_classify_target_info), .targetsize = sizeof(struct xt_classify_target_info),
.table = "mangle", .table = "mangle",
.hooks = (1 << NF_IP_LOCAL_OUT) | .hooks = (1 << NF_IP6_LOCAL_OUT) |
(1 << NF_IP_FORWARD) | (1 << NF_IP6_FORWARD) |
(1 << NF_IP_POST_ROUTING), (1 << NF_IP6_POST_ROUTING),
.me = THIS_MODULE, .me = THIS_MODULE,
}, },
}; };
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/etherdevice.h> #include <linux/etherdevice.h>
#include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter/xt_mac.h> #include <linux/netfilter/xt_mac.h>
#include <linux/netfilter/x_tables.h> #include <linux/netfilter/x_tables.h>
...@@ -59,9 +60,9 @@ static struct xt_match xt_mac_match[] = { ...@@ -59,9 +60,9 @@ static struct xt_match xt_mac_match[] = {
.family = AF_INET6, .family = AF_INET6,
.match = match, .match = match,
.matchsize = sizeof(struct xt_mac_info), .matchsize = sizeof(struct xt_mac_info),
.hooks = (1 << NF_IP_PRE_ROUTING) | .hooks = (1 << NF_IP6_PRE_ROUTING) |
(1 << NF_IP_LOCAL_IN) | (1 << NF_IP6_LOCAL_IN) |
(1 << NF_IP_FORWARD), (1 << NF_IP6_FORWARD),
.me = THIS_MODULE, .me = THIS_MODULE,
}, },
}; };
......
...@@ -1997,6 +1997,11 @@ void xfrm_audit_log(uid_t auid, u32 sid, int type, int result, ...@@ -1997,6 +1997,11 @@ void xfrm_audit_log(uid_t auid, u32 sid, int type, int result,
if (audit_enabled == 0) if (audit_enabled == 0)
return; return;
BUG_ON((type == AUDIT_MAC_IPSEC_ADDSA ||
type == AUDIT_MAC_IPSEC_DELSA) && !x);
BUG_ON((type == AUDIT_MAC_IPSEC_ADDSPD ||
type == AUDIT_MAC_IPSEC_DELSPD) && !xp);
audit_buf = audit_log_start(current->audit_context, GFP_ATOMIC, type); audit_buf = audit_log_start(current->audit_context, GFP_ATOMIC, type);
if (audit_buf == NULL) if (audit_buf == NULL)
return; return;
......
...@@ -1273,10 +1273,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, ...@@ -1273,10 +1273,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, delete); xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, delete);
security_xfrm_policy_free(&tmp); security_xfrm_policy_free(&tmp);
} }
if (delete)
xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid,
AUDIT_MAC_IPSEC_DELSPD, (xp) ? 1 : 0, xp, NULL);
if (xp == NULL) if (xp == NULL)
return -ENOENT; return -ENOENT;
...@@ -1292,8 +1288,14 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, ...@@ -1292,8 +1288,14 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
MSG_DONTWAIT); MSG_DONTWAIT);
} }
} else { } else {
if ((err = security_xfrm_policy_delete(xp)) != 0) err = security_xfrm_policy_delete(xp);
xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid,
AUDIT_MAC_IPSEC_DELSPD, err ? 0 : 1, xp, NULL);
if (err != 0)
goto out; goto out;
c.data.byid = p->index; c.data.byid = p->index;
c.event = nlh->nlmsg_type; c.event = nlh->nlmsg_type;
c.seq = nlh->nlmsg_seq; c.seq = nlh->nlmsg_seq;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment