Commit 89697f1d authored by Linus Torvalds's avatar Linus Torvalds

Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6

* master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6: (25 commits)
  [XFRM]: Fix OOPSes in xfrm_audit_log().
  [TCP]: cleanup of htcp (resend)
  [TCP]: Use read mostly for CUBIC parameters.
  [NETFILTER]: nf_conntrack_tcp: make sysctl variables static
  [NETFILTER]: ip6t_mh: drop piggyback payload packet on MH packets
  [NETFILTER]: Fix whitespace errors
  [NETFILTER]: Kconfig: improve dependency handling
  [NETFILTER]: xt_mac/xt_CLASSIFY: use IPv6 hook names for IPv6 registration
  [NETFILTER]: nf_conntrack: change nf_conntrack_l[34]proto_unregister to void
  [NETFILTER]: nf_conntrack: properly use RCU for nf_conntrack_destroyed callback
  [NETFILTER]: ip_conntrack: properly use RCU for ip_conntrack_destroyed callback
  [NETFILTER]: nf_conntrack: fix invalid conntrack statistics RCU assumption
  [NETFILTER]: ip_conntrack: fix invalid conntrack statistics RCU assumption
  [NETFILTER]: nf_conntrack: properly use RCU API for nf_ct_protos/nf_ct_l3protos arrays
  [NETFILTER]: ip_conntrack: properly use RCU API for ip_ct_protos array
  [NETFILTER]: nf_nat: properly use RCU API for nf_nat_protos array
  [NETFILTER]: ip_nat: properly use RCU API for ip_nat_protos array
  [NETFILTER]: nf_log: minor cleanups
  [NETFILTER]: nf_log: switch logger registration/unregistration to mutex
  [NETFILTER]: nf_log: make nf_log_unregister_pf return void
  ...
parents fd19e44f 13fcfbb0
......@@ -172,8 +172,8 @@ struct nf_logger {
/* Function to register/unregister log function. */
int nf_log_register(int pf, struct nf_logger *logger);
int nf_log_unregister_pf(int pf);
void nf_log_unregister_logger(struct nf_logger *logger);
void nf_log_unregister(struct nf_logger *logger);
void nf_log_unregister_pf(int pf);
/* Calls the registered backend logging function */
void nf_log_packet(int pf,
......
......@@ -301,6 +301,12 @@ extern unsigned int ip_conntrack_htable_size;
extern int ip_conntrack_checksum;
#define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++)
#define CONNTRACK_STAT_INC_ATOMIC(count) \
do { \
local_bh_disable(); \
__get_cpu_var(ip_conntrack_stat).count++; \
local_bh_enable(); \
} while (0)
#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
#include <linux/notifier.h>
......
......@@ -257,6 +257,12 @@ extern int nf_conntrack_max;
DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
#define NF_CT_STAT_INC(count) (__get_cpu_var(nf_conntrack_stat).count++)
#define NF_CT_STAT_INC_ATOMIC(count) \
do { \
local_bh_disable(); \
__get_cpu_var(nf_conntrack_stat).count++; \
local_bh_enable(); \
} while (0)
/* no helper, no nat */
#define NF_CT_F_BASIC 0
......
......@@ -89,7 +89,7 @@ extern struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX];
/* Protocol registration. */
extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto);
extern int nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto);
extern void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto);
extern struct nf_conntrack_l3proto *
nf_ct_l3proto_find_get(u_int16_t l3proto);
......@@ -106,7 +106,7 @@ __nf_ct_l3proto_find(u_int16_t l3proto)
{
if (unlikely(l3proto >= AF_MAX))
return &nf_conntrack_l3proto_generic;
return nf_ct_l3protos[l3proto];
return rcu_dereference(nf_ct_l3protos[l3proto]);
}
#endif /*_NF_CONNTRACK_L3PROTO_H*/
......@@ -109,7 +109,7 @@ extern void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p);
/* Protocol registration. */
extern int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *proto);
extern int nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *proto);
extern void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *proto);
/* Generic netlink helpers */
extern int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb,
......
......@@ -208,7 +208,7 @@ static int __init ebt_log_init(void)
static void __exit ebt_log_fini(void)
{
nf_log_unregister_logger(&ebt_log_logger);
nf_log_unregister(&ebt_log_logger);
ebt_unregister_watcher(&log);
}
......
......@@ -323,7 +323,7 @@ static void __exit ebt_ulog_fini(void)
ebt_ulog_buff_t *ub;
int i;
nf_log_unregister_logger(&ebt_ulog_logger);
nf_log_unregister(&ebt_ulog_logger);
ebt_unregister_watcher(&ulog);
for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
ub = &ulog_buffers[i];
......
......@@ -226,7 +226,7 @@ config IP_NF_QUEUE
config IP_NF_IPTABLES
tristate "IP tables support (required for filtering/masq/NAT)"
depends on NETFILTER_XTABLES
select NETFILTER_XTABLES
help
iptables is a general, extensible packet identification framework.
The packet filtering and full NAT (masquerading, port forwarding,
......@@ -606,7 +606,9 @@ config IP_NF_TARGET_TTL
config IP_NF_TARGET_CLUSTERIP
tristate "CLUSTERIP target support (EXPERIMENTAL)"
depends on IP_NF_MANGLE && EXPERIMENTAL
depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK
select NF_CONNTRACK_MARK if NF_CONNTRACK_IPV4
help
The CLUSTERIP target allows you to build load-balancing clusters of
network servers without having a dedicated load-balancing
......@@ -629,7 +631,7 @@ config IP_NF_RAW
# ARP tables
config IP_NF_ARPTABLES
tristate "ARP tables support"
depends on NETFILTER_XTABLES
select NETFILTER_XTABLES
help
arptables is a general, extensible packet identification framework.
The ARP packet filtering and mangling (manipulation)subsystems
......
......@@ -303,6 +303,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
struct ip_conntrack_protocol *proto;
struct ip_conntrack_helper *helper;
typeof(ip_conntrack_destroyed) destroyed;
DEBUGP("destroy_conntrack(%p)\n", ct);
IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
......@@ -318,12 +319,16 @@ destroy_conntrack(struct nf_conntrack *nfct)
/* To make sure we don't get any weird locking issues here:
* destroy_conntrack() MUST NOT be called with a write lock
* to ip_conntrack_lock!!! -HW */
rcu_read_lock();
proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
if (proto && proto->destroy)
proto->destroy(ct);
if (ip_conntrack_destroyed)
ip_conntrack_destroyed(ct);
destroyed = rcu_dereference(ip_conntrack_destroyed);
if (destroyed)
destroyed(ct);
rcu_read_unlock();
write_lock_bh(&ip_conntrack_lock);
/* Expectations will have been removed in clean_from_lists,
......@@ -536,7 +541,7 @@ static int early_drop(struct list_head *chain)
if (del_timer(&ct->timeout)) {
death_by_timeout((unsigned long)ct);
dropped = 1;
CONNTRACK_STAT_INC(early_drop);
CONNTRACK_STAT_INC_ATOMIC(early_drop);
}
ip_conntrack_put(ct);
return dropped;
......@@ -595,13 +600,13 @@ ip_conntrack_proto_find_get(u_int8_t protocol)
{
struct ip_conntrack_protocol *p;
preempt_disable();
rcu_read_lock();
p = __ip_conntrack_proto_find(protocol);
if (p) {
if (!try_module_get(p->me))
p = &ip_conntrack_generic_protocol;
}
preempt_enable();
rcu_read_unlock();
return p;
}
......@@ -802,7 +807,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
/* Previously seen (loopback or untracked)? Ignore. */
if ((*pskb)->nfct) {
CONNTRACK_STAT_INC(ignore);
CONNTRACK_STAT_INC_ATOMIC(ignore);
return NF_ACCEPT;
}
......@@ -830,6 +835,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
}
#endif
/* rcu_read_lock()ed by nf_hook_slow */
proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol);
/* It may be an special packet, error, unclean...
......@@ -837,20 +843,20 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
* core what to do with the packet. */
if (proto->error != NULL
&& (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
CONNTRACK_STAT_INC(error);
CONNTRACK_STAT_INC(invalid);
CONNTRACK_STAT_INC_ATOMIC(error);
CONNTRACK_STAT_INC_ATOMIC(invalid);
return -ret;
}
if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
/* Not valid part of a connection */
CONNTRACK_STAT_INC(invalid);
CONNTRACK_STAT_INC_ATOMIC(invalid);
return NF_ACCEPT;
}
if (IS_ERR(ct)) {
/* Too stressed to deal. */
CONNTRACK_STAT_INC(drop);
CONNTRACK_STAT_INC_ATOMIC(drop);
return NF_DROP;
}
......@@ -862,7 +868,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
* the netfilter core what to do*/
nf_conntrack_put((*pskb)->nfct);
(*pskb)->nfct = NULL;
CONNTRACK_STAT_INC(invalid);
CONNTRACK_STAT_INC_ATOMIC(invalid);
return -ret;
}
......@@ -875,8 +881,15 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
int invert_tuplepr(struct ip_conntrack_tuple *inverse,
const struct ip_conntrack_tuple *orig)
{
return ip_ct_invert_tuple(inverse, orig,
__ip_conntrack_proto_find(orig->dst.protonum));
struct ip_conntrack_protocol *proto;
int ret;
rcu_read_lock();
proto = __ip_conntrack_proto_find(orig->dst.protonum);
ret = ip_ct_invert_tuple(inverse, orig, proto);
rcu_read_unlock();
return ret;
}
/* Would two expected things clash? */
......@@ -1354,7 +1367,7 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
supposed to kill the mall. */
void ip_conntrack_cleanup(void)
{
ip_ct_attach = NULL;
rcu_assign_pointer(ip_ct_attach, NULL);
/* This makes sure all current packets have passed through
netfilter framework. Roll on, two-stage module
......@@ -1507,15 +1520,15 @@ int __init ip_conntrack_init(void)
/* Don't NEED lock here, but good form anyway. */
write_lock_bh(&ip_conntrack_lock);
for (i = 0; i < MAX_IP_CT_PROTO; i++)
ip_ct_protos[i] = &ip_conntrack_generic_protocol;
rcu_assign_pointer(ip_ct_protos[i], &ip_conntrack_generic_protocol);
/* Sew in builtin protocols. */
ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
rcu_assign_pointer(ip_ct_protos[IPPROTO_TCP], &ip_conntrack_protocol_tcp);
rcu_assign_pointer(ip_ct_protos[IPPROTO_UDP], &ip_conntrack_protocol_udp);
rcu_assign_pointer(ip_ct_protos[IPPROTO_ICMP], &ip_conntrack_protocol_icmp);
write_unlock_bh(&ip_conntrack_lock);
/* For use by ipt_REJECT */
ip_ct_attach = ip_conntrack_attach;
rcu_assign_pointer(ip_ct_attach, ip_conntrack_attach);
/* Set up fake conntrack:
- to never be deleted, not in any hashes */
......
......@@ -796,7 +796,7 @@ int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
ret = -EBUSY;
goto out;
}
ip_ct_protos[proto->proto] = proto;
rcu_assign_pointer(ip_ct_protos[proto->proto], proto);
out:
write_unlock_bh(&ip_conntrack_lock);
return ret;
......@@ -805,11 +805,10 @@ int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
{
write_lock_bh(&ip_conntrack_lock);
ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
rcu_assign_pointer(ip_ct_protos[proto->proto],
&ip_conntrack_generic_protocol);
write_unlock_bh(&ip_conntrack_lock);
/* Somebody could be still looking at the proto in bh. */
synchronize_net();
synchronize_rcu();
/* Remove all contrack entries for this protocol */
ip_ct_iterate_cleanup(kill_proto, &proto->proto);
......
......@@ -50,7 +50,7 @@ static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
static inline struct ip_nat_protocol *
__ip_nat_proto_find(u_int8_t protonum)
{
return ip_nat_protos[protonum];
return rcu_dereference(ip_nat_protos[protonum]);
}
struct ip_nat_protocol *
......@@ -58,13 +58,11 @@ ip_nat_proto_find_get(u_int8_t protonum)
{
struct ip_nat_protocol *p;
/* we need to disable preemption to make sure 'p' doesn't get
* removed until we've grabbed the reference */
preempt_disable();
rcu_read_lock();
p = __ip_nat_proto_find(protonum);
if (!try_module_get(p->me))
p = &ip_nat_unknown_protocol;
preempt_enable();
rcu_read_unlock();
return p;
}
......@@ -120,8 +118,8 @@ static int
in_range(const struct ip_conntrack_tuple *tuple,
const struct ip_nat_range *range)
{
struct ip_nat_protocol *proto =
__ip_nat_proto_find(tuple->dst.protonum);
struct ip_nat_protocol *proto;
int ret = 0;
/* If we are supposed to map IPs, then we must be in the
range specified, otherwise let this drag us onto a new src IP. */
......@@ -131,12 +129,15 @@ in_range(const struct ip_conntrack_tuple *tuple,
return 0;
}
rcu_read_lock();
proto = __ip_nat_proto_find(tuple->dst.protonum);
if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
|| proto->in_range(tuple, IP_NAT_MANIP_SRC,
&range->min, &range->max))
return 1;
ret = 1;
rcu_read_unlock();
return 0;
return ret;
}
static inline int
......@@ -260,27 +261,25 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple,
/* 3) The per-protocol part of the manip is made to map into
the range to make a unique tuple. */
proto = ip_nat_proto_find_get(orig_tuple->dst.protonum);
rcu_read_lock();
proto = __ip_nat_proto_find(orig_tuple->dst.protonum);
/* Change protocol info to have some randomization */
if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
proto->unique_tuple(tuple, range, maniptype, conntrack);
ip_nat_proto_put(proto);
return;
goto out;
}
/* Only bother mapping if it's not already in range and unique */
if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
|| proto->in_range(tuple, maniptype, &range->min, &range->max))
&& !ip_nat_used_tuple(tuple, conntrack)) {
ip_nat_proto_put(proto);
return;
}
&& !ip_nat_used_tuple(tuple, conntrack))
goto out;
/* Last change: get protocol to try to obtain unique tuple. */
proto->unique_tuple(tuple, range, maniptype, conntrack);
ip_nat_proto_put(proto);
out:
rcu_read_unlock();
}
unsigned int
......@@ -360,12 +359,11 @@ manip_pkt(u_int16_t proto,
iph = (void *)(*pskb)->data + iphdroff;
/* Manipulate protcol part. */
p = ip_nat_proto_find_get(proto);
if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) {
ip_nat_proto_put(p);
/* rcu_read_lock()ed by nf_hook_slow */
p = __ip_nat_proto_find(proto);
if (!p->manip_pkt(pskb, iphdroff, target, maniptype))
return 0;
}
ip_nat_proto_put(p);
iph = (void *)(*pskb)->data + iphdroff;
......@@ -422,6 +420,7 @@ int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
struct icmphdr icmp;
struct iphdr ip;
} *inside;
struct ip_conntrack_protocol *proto;
struct ip_conntrack_tuple inner, target;
int hdrlen = (*pskb)->nh.iph->ihl * 4;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
......@@ -457,10 +456,11 @@ int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
*pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
/* rcu_read_lock()ed by nf_hook_slow */
proto = __ip_conntrack_proto_find(inside->ip.protocol);
if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 +
sizeof(struct icmphdr) + inside->ip.ihl*4,
&inner,
__ip_conntrack_proto_find(inside->ip.protocol)))
&inner, proto))
return 0;
/* Change inner back to look like incoming packet. We do the
......@@ -515,7 +515,7 @@ int ip_nat_protocol_register(struct ip_nat_protocol *proto)
ret = -EBUSY;
goto out;
}
ip_nat_protos[proto->protonum] = proto;
rcu_assign_pointer(ip_nat_protos[proto->protonum], proto);
out:
write_unlock_bh(&ip_nat_lock);
return ret;
......@@ -526,11 +526,10 @@ EXPORT_SYMBOL(ip_nat_protocol_register);
void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
{
write_lock_bh(&ip_nat_lock);
ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
rcu_assign_pointer(ip_nat_protos[proto->protonum],
&ip_nat_unknown_protocol);
write_unlock_bh(&ip_nat_lock);
/* Someone could be still looking at the proto in a bh. */
synchronize_net();
synchronize_rcu();
}
EXPORT_SYMBOL(ip_nat_protocol_unregister);
......@@ -594,10 +593,10 @@ static int __init ip_nat_init(void)
/* Sew in builtin protocols. */
write_lock_bh(&ip_nat_lock);
for (i = 0; i < MAX_IP_NAT_PROTO; i++)
ip_nat_protos[i] = &ip_nat_unknown_protocol;
ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
rcu_assign_pointer(ip_nat_protos[i], &ip_nat_unknown_protocol);
rcu_assign_pointer(ip_nat_protos[IPPROTO_TCP], &ip_nat_protocol_tcp);
rcu_assign_pointer(ip_nat_protos[IPPROTO_UDP], &ip_nat_protocol_udp);
rcu_assign_pointer(ip_nat_protos[IPPROTO_ICMP], &ip_nat_protocol_icmp);
write_unlock_bh(&ip_nat_lock);
for (i = 0; i < ip_nat_htable_size; i++) {
......@@ -605,8 +604,8 @@ static int __init ip_nat_init(void)
}
/* FIXME: Man, this is a hack. <SIGH> */
IP_NF_ASSERT(ip_conntrack_destroyed == NULL);
ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
IP_NF_ASSERT(rcu_dereference(ip_conntrack_destroyed) == NULL);
rcu_assign_pointer(ip_conntrack_destroyed, ip_nat_cleanup_conntrack);
/* Initialize fake conntrack so that NAT will skip it */
ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
......@@ -624,7 +623,8 @@ static int clean_nat(struct ip_conntrack *i, void *data)
static void __exit ip_nat_cleanup(void)
{
ip_ct_iterate_cleanup(&clean_nat, NULL);
ip_conntrack_destroyed = NULL;
rcu_assign_pointer(ip_conntrack_destroyed, NULL);
synchronize_rcu();
vfree(bysource);
}
......
......@@ -489,7 +489,7 @@ static int __init ipt_log_init(void)
static void __exit ipt_log_fini(void)
{
nf_log_unregister_logger(&ipt_log_logger);
nf_log_unregister(&ipt_log_logger);
xt_unregister_target(&ipt_log_reg);
}
......
......@@ -419,7 +419,7 @@ static void __exit ipt_ulog_fini(void)
DEBUGP("ipt_ULOG: cleanup_module\n");
if (nflog)
nf_log_unregister_logger(&ipt_ulog_logger);
nf_log_unregister(&ipt_ulog_logger);
xt_unregister_target(&ipt_ulog_reg);
sock_release(nflognl->sk_socket);
......
......@@ -170,7 +170,9 @@ icmp_error_message(struct sk_buff *skb,
return -NF_ACCEPT;
}
/* rcu_read_lock()ed by nf_hook_slow */
innerproto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
/* Are they talking about one of our connections? */
if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
......
......@@ -53,7 +53,7 @@ static struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO];
static inline struct nf_nat_protocol *
__nf_nat_proto_find(u_int8_t protonum)
{
return nf_nat_protos[protonum];
return rcu_dereference(nf_nat_protos[protonum]);
}
struct nf_nat_protocol *
......@@ -61,13 +61,11 @@ nf_nat_proto_find_get(u_int8_t protonum)
{
struct nf_nat_protocol *p;
/* we need to disable preemption to make sure 'p' doesn't get
* removed until we've grabbed the reference */
preempt_disable();
rcu_read_lock();
p = __nf_nat_proto_find(protonum);
if (!try_module_get(p->me))
p = &nf_nat_unknown_protocol;
preempt_enable();
rcu_read_unlock();
return p;
}
......@@ -126,8 +124,8 @@ in_range(const struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range)
{
struct nf_nat_protocol *proto;
int ret = 0;
proto = __nf_nat_proto_find(tuple->dst.protonum);
/* If we are supposed to map IPs, then we must be in the
range specified, otherwise let this drag us onto a new src IP. */
if (range->flags & IP_NAT_RANGE_MAP_IPS) {
......@@ -136,12 +134,15 @@ in_range(const struct nf_conntrack_tuple *tuple,
return 0;
}
rcu_read_lock();
proto = __nf_nat_proto_find(tuple->dst.protonum);
if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
proto->in_range(tuple, IP_NAT_MANIP_SRC,
&range->min, &range->max))
return 1;
ret = 1;
rcu_read_unlock();
return 0;
return ret;
}
static inline int
......@@ -268,27 +269,25 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
/* 3) The per-protocol part of the manip is made to map into
the range to make a unique tuple. */
proto = nf_nat_proto_find_get(orig_tuple->dst.protonum);
rcu_read_lock();
proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
/* Change protocol info to have some randomization */
if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
proto->unique_tuple(tuple, range, maniptype, ct);
nf_nat_proto_put(proto);
return;
goto out;
}
/* Only bother mapping if it's not already in range and unique */
if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
proto->in_range(tuple, maniptype, &range->min, &range->max)) &&
!nf_nat_used_tuple(tuple, ct)) {
nf_nat_proto_put(proto);
return;
}
!nf_nat_used_tuple(tuple, ct))
goto out;
/* Last change: get protocol to try to obtain unique tuple. */
proto->unique_tuple(tuple, range, maniptype, ct);
nf_nat_proto_put(proto);
out:
rcu_read_unlock();
}
unsigned int
......@@ -369,12 +368,11 @@ manip_pkt(u_int16_t proto,
iph = (void *)(*pskb)->data + iphdroff;
/* Manipulate protcol part. */
p = nf_nat_proto_find_get(proto);
if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) {
nf_nat_proto_put(p);
/* rcu_read_lock()ed by nf_hook_slow */
p = __nf_nat_proto_find(proto);
if (!p->manip_pkt(pskb, iphdroff, target, maniptype))
return 0;
}
nf_nat_proto_put(p);
iph = (void *)(*pskb)->data + iphdroff;
......@@ -431,6 +429,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
struct icmphdr icmp;
struct iphdr ip;
} *inside;
struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple inner, target;
int hdrlen = (*pskb)->nh.iph->ihl * 4;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
......@@ -466,16 +465,16 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
*pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
/* rcu_read_lock()ed by nf_hook_slow */
l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
if (!nf_ct_get_tuple(*pskb,
(*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr),
(*pskb)->nh.iph->ihl*4 +
sizeof(struct icmphdr) + inside->ip.ihl*4,
(u_int16_t)AF_INET,
inside->ip.protocol,
&inner,
l3proto,
__nf_ct_l4proto_find((u_int16_t)PF_INET,
inside->ip.protocol)))
&inner, l3proto, l4proto))
return 0;
/* Change inner back to look like incoming packet. We do the
......@@ -529,7 +528,7 @@ int nf_nat_protocol_register(struct nf_nat_protocol *proto)
ret = -EBUSY;
goto out;
}
nf_nat_protos[proto->protonum] = proto;
rcu_assign_pointer(nf_nat_protos[proto->protonum], proto);
out:
write_unlock_bh(&nf_nat_lock);
return ret;
......@@ -540,11 +539,10 @@ EXPORT_SYMBOL(nf_nat_protocol_register);
void nf_nat_protocol_unregister(struct nf_nat_protocol *proto)
{
write_lock_bh(&nf_nat_lock);
nf_nat_protos[proto->protonum] = &nf_nat_unknown_protocol;
rcu_assign_pointer(nf_nat_protos[proto->protonum],
&nf_nat_unknown_protocol);
write_unlock_bh(&nf_nat_lock);
/* Someone could be still looking at the proto in a bh. */
synchronize_net();
synchronize_rcu();
}
EXPORT_SYMBOL(nf_nat_protocol_unregister);
......@@ -608,10 +606,10 @@ static int __init nf_nat_init(void)
/* Sew in builtin protocols. */
write_lock_bh(&nf_nat_lock);
for (i = 0; i < MAX_IP_NAT_PROTO; i++)
nf_nat_protos[i] = &nf_nat_unknown_protocol;
nf_nat_protos[IPPROTO_TCP] = &nf_nat_protocol_tcp;
nf_nat_protos[IPPROTO_UDP] = &nf_nat_protocol_udp;
nf_nat_protos[IPPROTO_ICMP] = &nf_nat_protocol_icmp;
rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol);
rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp);
rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp);
rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp);
write_unlock_bh(&nf_nat_lock);
for (i = 0; i < nf_nat_htable_size; i++) {
......@@ -619,8 +617,8 @@ static int __init nf_nat_init(void)
}
/* FIXME: Man, this is a hack. <SIGH> */
NF_CT_ASSERT(nf_conntrack_destroyed == NULL);
nf_conntrack_destroyed = &nf_nat_cleanup_conntrack;
NF_CT_ASSERT(rcu_dereference(nf_conntrack_destroyed) == NULL);
rcu_assign_pointer(nf_conntrack_destroyed, nf_nat_cleanup_conntrack);
/* Initialize fake conntrack so that NAT will skip it */
nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
......@@ -644,7 +642,8 @@ static int clean_nat(struct nf_conn *i, void *data)
static void __exit nf_nat_cleanup(void)
{
nf_ct_iterate_cleanup(&clean_nat, NULL);
nf_conntrack_destroyed = NULL;
rcu_assign_pointer(nf_conntrack_destroyed, NULL);
synchronize_rcu();
vfree(bysource);
nf_ct_l3proto_put(l3proto);
}
......
......@@ -26,16 +26,16 @@
*/
#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */
static int fast_convergence = 1;
static int max_increment = 16;
static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
static int initial_ssthresh = 100;
static int bic_scale = 41;
static int tcp_friendliness = 1;
static u32 cube_rtt_scale;
static u32 beta_scale;
static u64 cube_factor;
static int fast_convergence __read_mostly = 1;
static int max_increment __read_mostly = 16;
static int beta __read_mostly = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
static int initial_ssthresh __read_mostly = 100;
static int bic_scale __read_mostly = 41;
static int tcp_friendliness __read_mostly = 1;
static u32 cube_rtt_scale __read_mostly;
static u32 beta_scale __read_mostly;
static u64 cube_factor __read_mostly;
/* Note parameters that are used for precomputing scale factors are read-only */
module_param(fast_convergence, int, 0644);
......
......@@ -14,18 +14,19 @@
#define BETA_MIN (1<<6) /* 0.5 with shift << 7 */
#define BETA_MAX 102 /* 0.8 with shift << 7 */
static int use_rtt_scaling = 1;
static int use_rtt_scaling __read_mostly = 1;
module_param(use_rtt_scaling, int, 0644);
MODULE_PARM_DESC(use_rtt_scaling, "turn on/off RTT scaling");
static int use_bandwidth_switch = 1;
static int use_bandwidth_switch __read_mostly = 1;
module_param(use_bandwidth_switch, int, 0644);
MODULE_PARM_DESC(use_bandwidth_switch, "turn on/off bandwidth switcher");
struct htcp {
u32 alpha; /* Fixed point arith, << 7 */
u8 beta; /* Fixed point arith, << 7 */
u8 modeswitch; /* Delay modeswitch until we had at least one congestion event */
u8 modeswitch; /* Delay modeswitch
until we had at least one congestion event */
u16 pkts_acked;
u32 packetcount;
u32 minRTT;
......@@ -44,14 +45,14 @@ struct htcp {
u32 lasttime;
};
static inline u32 htcp_cong_time(struct htcp *ca)
static inline u32 htcp_cong_time(const struct htcp *ca)
{
return jiffies - ca->last_cong;
}
static inline u32 htcp_ccount(struct htcp *ca)
static inline u32 htcp_ccount(const struct htcp *ca)
{
return htcp_cong_time(ca)/ca->minRTT;
return htcp_cong_time(ca) / ca->minRTT;
}
static inline void htcp_reset(struct htcp *ca)
......@@ -67,10 +68,12 @@ static u32 htcp_cwnd_undo(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
ca->last_cong = ca->undo_last_cong;
ca->maxRTT = ca->undo_maxRTT;
ca->old_maxB = ca->undo_old_maxB;
return max(tp->snd_cwnd, (tp->snd_ssthresh<<7)/ca->beta);
return max(tp->snd_cwnd, (tp->snd_ssthresh << 7) / ca->beta);
}
static inline void measure_rtt(struct sock *sk)
......@@ -78,17 +81,19 @@ static inline void measure_rtt(struct sock *sk)
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
u32 srtt = tp->srtt>>3;
u32 srtt = tp->srtt >> 3;
/* keep track of minimum RTT seen so far, minRTT is zero at first */
if (ca->minRTT > srtt || !ca->minRTT)
ca->minRTT = srtt;
/* max RTT */
if (icsk->icsk_ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && htcp_ccount(ca) > 3) {
if (icsk->icsk_ca_state == TCP_CA_Open
&& tp->snd_ssthresh < 0xFFFF && htcp_ccount(ca) > 3) {
if (ca->maxRTT < ca->minRTT)
ca->maxRTT = ca->minRTT;
if (ca->maxRTT < srtt && srtt <= ca->maxRTT+msecs_to_jiffies(20))
if (ca->maxRTT < srtt
&& srtt <= ca->maxRTT + msecs_to_jiffies(20))
ca->maxRTT = srtt;
}
}
......@@ -116,15 +121,16 @@ static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked)
ca->packetcount += pkts_acked;
if (ca->packetcount >= tp->snd_cwnd - (ca->alpha>>7? : 1)
if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1)
&& now - ca->lasttime >= ca->minRTT
&& ca->minRTT > 0) {
__u32 cur_Bi = ca->packetcount*HZ/(now - ca->lasttime);
__u32 cur_Bi = ca->packetcount * HZ / (now - ca->lasttime);
if (htcp_ccount(ca) <= 3) {
/* just after backoff */
ca->minB = ca->maxB = ca->Bi = cur_Bi;
} else {
ca->Bi = (3*ca->Bi + cur_Bi)/4;
ca->Bi = (3 * ca->Bi + cur_Bi) / 4;
if (ca->Bi > ca->maxB)
ca->maxB = ca->Bi;
if (ca->minB > ca->maxB)
......@@ -142,7 +148,7 @@ static inline void htcp_beta_update(struct htcp *ca, u32 minRTT, u32 maxRTT)
u32 old_maxB = ca->old_maxB;
ca->old_maxB = ca->maxB;
if (!between(5*maxB, 4*old_maxB, 6*old_maxB)) {
if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) {
ca->beta = BETA_MIN;
ca->modeswitch = 0;
return;
......@@ -150,7 +156,7 @@ static inline void htcp_beta_update(struct htcp *ca, u32 minRTT, u32 maxRTT)
}
if (ca->modeswitch && minRTT > msecs_to_jiffies(10) && maxRTT) {
ca->beta = (minRTT<<7)/maxRTT;
ca->beta = (minRTT << 7) / maxRTT;
if (ca->beta < BETA_MIN)
ca->beta = BETA_MIN;
else if (ca->beta > BETA_MAX)
......@@ -169,23 +175,26 @@ static inline void htcp_alpha_update(struct htcp *ca)
if (diff > HZ) {
diff -= HZ;
factor = 1+ ( 10*diff + ((diff/2)*(diff/2)/HZ) )/HZ;
factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / HZ)) / HZ;
}
if (use_rtt_scaling && minRTT) {
u32 scale = (HZ<<3)/(10*minRTT);
scale = min(max(scale, 1U<<2), 10U<<3); /* clamping ratio to interval [0.5,10]<<3 */
factor = (factor<<3)/scale;
u32 scale = (HZ << 3) / (10 * minRTT);
/* clamping ratio to interval [0.5,10]<<3 */
scale = min(max(scale, 1U << 2), 10U << 3);
factor = (factor << 3) / scale;
if (!factor)
factor = 1;
}
ca->alpha = 2*factor*((1<<7)-ca->beta);
ca->alpha = 2 * factor * ((1 << 7) - ca->beta);
if (!ca->alpha)
ca->alpha = ALPHA_BASE;
}
/* After we have the rtt data to calculate beta, we'd still prefer to wait one
/*
* After we have the rtt data to calculate beta, we'd still prefer to wait one
* rtt before we adjust our beta to ensure we are working from a consistent
* data.
*
......@@ -202,15 +211,16 @@ static void htcp_param_update(struct sock *sk)
htcp_beta_update(ca, minRTT, maxRTT);
htcp_alpha_update(ca);
/* add slowly fading memory for maxRTT to accommodate routing changes etc */
/* add slowly fading memory for maxRTT to accommodate routing changes */
if (minRTT > 0 && maxRTT > minRTT)
ca->maxRTT = minRTT + ((maxRTT-minRTT)*95)/100;
ca->maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100;
}
static u32 htcp_recalc_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
const struct htcp *ca = inet_csk_ca(sk);
htcp_param_update(sk);
return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
}
......@@ -227,7 +237,6 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
if (tp->snd_cwnd <= tp->snd_ssthresh)
tcp_slow_start(tp);
else {
measure_rtt(sk);
/* In dangerous area, increase slowly.
......
......@@ -42,7 +42,8 @@ config IP6_NF_QUEUE
config IP6_NF_IPTABLES
tristate "IP6 tables support (required for filtering)"
depends on INET && IPV6 && EXPERIMENTAL && NETFILTER_XTABLES
depends on INET && IPV6 && EXPERIMENTAL
select NETFILTER_XTABLES
help
ip6tables is a general, extensible packet identification framework.
Currently only the packet filtering and packet mangling subsystem
......
......@@ -501,7 +501,7 @@ static int __init ip6t_log_init(void)
static void __exit ip6t_log_fini(void)
{
nf_log_unregister_logger(&ip6t_logger);
nf_log_unregister(&ip6t_logger);
xt_unregister_target(&ip6t_log_reg);
}
......
......@@ -66,6 +66,13 @@ match(const struct sk_buff *skb,
return 0;
}
if (mh->ip6mh_proto != IPPROTO_NONE) {
duprintf("Dropping invalid MH Payload Proto: %u\n",
mh->ip6mh_proto);
*hotdrop = 1;
return 0;
}
return type_match(mhinfo->types[0], mhinfo->types[1], mh->ip6mh_type,
!!(mhinfo->invflags & IP6T_MH_INV_TYPE));
}
......
......@@ -154,8 +154,8 @@ ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
*/
if ((protoff < 0) || (protoff > (*pskb)->len)) {
DEBUGP("ip6_conntrack_core: can't find proto in pkt\n");
NF_CT_STAT_INC(error);
NF_CT_STAT_INC(invalid);
NF_CT_STAT_INC_ATOMIC(error);
NF_CT_STAT_INC_ATOMIC(invalid);
return -NF_ACCEPT;
}
......
......@@ -182,6 +182,7 @@ icmpv6_error_message(struct sk_buff *skb,
return -NF_ACCEPT;
}
/* rcu_read_lock()ed by nf_hook_slow */
inproto = __nf_ct_l4proto_find(PF_INET6, inprotonum);
/* Are they talking about one of our connections? */
......
......@@ -2297,16 +2297,17 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
&sel, tmp.security, 1);
security_xfrm_policy_free(&tmp);
xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
AUDIT_MAC_IPSEC_DELSPD, (xp) ? 1 : 0, xp, NULL);
if (xp == NULL)
return -ENOENT;
err = 0;
err = security_xfrm_policy_delete(xp);
if ((err = security_xfrm_policy_delete(xp)))
xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
AUDIT_MAC_IPSEC_DELSPD, err ? 0 : 1, xp, NULL);
if (err)
goto out;
c.seq = hdr->sadb_msg_seq;
c.pid = hdr->sadb_msg_pid;
c.event = XFRM_MSG_DELPOLICY;
......
......@@ -302,7 +302,9 @@ config NETFILTER_XT_TARGET_CONNMARK
tristate '"CONNMARK" target support'
depends on NETFILTER_XTABLES
depends on IP_NF_MANGLE || IP6_NF_MANGLE
depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK)
depends on IP_NF_CONNTRACK || NF_CONNTRACK
select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK
select NF_CONNTRACK_MARK if NF_CONNTRACK
help
This option adds a `CONNMARK' target, which allows one to manipulate
the connection mark value. Similar to the MARK target, but
......@@ -434,7 +436,9 @@ config NETFILTER_XT_MATCH_COMMENT
config NETFILTER_XT_MATCH_CONNBYTES
tristate '"connbytes" per-connection counter match support'
depends on NETFILTER_XTABLES
depends on (IP_NF_CONNTRACK && IP_NF_CT_ACCT) || (NF_CT_ACCT && NF_CONNTRACK)
depends on IP_NF_CONNTRACK || NF_CONNTRACK
select IP_NF_CT_ACCT if IP_NF_CONNTRACK
select NF_CT_ACCT if NF_CONNTRACK
help
This option adds a `connbytes' match, which allows you to match the
number of bytes and/or packets for each direction within a connection.
......@@ -445,7 +449,9 @@ config NETFILTER_XT_MATCH_CONNBYTES
config NETFILTER_XT_MATCH_CONNMARK
tristate '"connmark" connection mark match support'
depends on NETFILTER_XTABLES
depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK)
depends on IP_NF_CONNTRACK || NF_CONNTRACK
select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK
select NF_CONNTRACK_MARK if NF_CONNTRACK
help
This option adds a `connmark' match, which allows you to match the
connection mark value previously set for the session by `CONNMARK'.
......
......@@ -22,29 +22,34 @@
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/proc_fs.h>
#include <linux/mutex.h>
#include <net/sock.h>
#include "nf_internals.h"
static DEFINE_SPINLOCK(afinfo_lock);
static DEFINE_MUTEX(afinfo_mutex);
struct nf_afinfo *nf_afinfo[NPROTO] __read_mostly;
EXPORT_SYMBOL(nf_afinfo);
int nf_register_afinfo(struct nf_afinfo *afinfo)
{
spin_lock(&afinfo_lock);
int err;
err = mutex_lock_interruptible(&afinfo_mutex);
if (err < 0)
return err;
rcu_assign_pointer(nf_afinfo[afinfo->family], afinfo);
spin_unlock(&afinfo_lock);
mutex_unlock(&afinfo_mutex);
return 0;
}
EXPORT_SYMBOL_GPL(nf_register_afinfo);
void nf_unregister_afinfo(struct nf_afinfo *afinfo)
{
spin_lock(&afinfo_lock);
mutex_lock(&afinfo_mutex);
rcu_assign_pointer(nf_afinfo[afinfo->family], NULL);
spin_unlock(&afinfo_lock);
mutex_unlock(&afinfo_mutex);
synchronize_rcu();
}
EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
......@@ -56,30 +61,31 @@ EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
* packets come back: if the hook is gone, the packet is discarded. */
struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS] __read_mostly;
EXPORT_SYMBOL(nf_hooks);
static DEFINE_SPINLOCK(nf_hook_lock);
static DEFINE_MUTEX(nf_hook_mutex);
int nf_register_hook(struct nf_hook_ops *reg)
{
struct list_head *i;
int err;
spin_lock_bh(&nf_hook_lock);
err = mutex_lock_interruptible(&nf_hook_mutex);
if (err < 0)
return err;
list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
if (reg->priority < ((struct nf_hook_ops *)i)->priority)
break;
}
list_add_rcu(&reg->list, i->prev);
spin_unlock_bh(&nf_hook_lock);
synchronize_net();
mutex_unlock(&nf_hook_mutex);
return 0;
}
EXPORT_SYMBOL(nf_register_hook);
void nf_unregister_hook(struct nf_hook_ops *reg)
{
spin_lock_bh(&nf_hook_lock);
mutex_lock(&nf_hook_mutex);
list_del_rcu(&reg->list);
spin_unlock_bh(&nf_hook_lock);
mutex_unlock(&nf_hook_mutex);
synchronize_net();
}
......@@ -248,9 +254,12 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
{
void (*attach)(struct sk_buff *, struct sk_buff *);
if (skb->nfct && (attach = ip_ct_attach) != NULL) {
mb(); /* Just to be sure: must be read before executing this */
if (skb->nfct) {
rcu_read_lock();
attach = rcu_dereference(ip_ct_attach);
if (attach)
attach(new, skb);
rcu_read_unlock();
}
}
EXPORT_SYMBOL(nf_ct_attach);
......
......@@ -318,6 +318,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
struct nf_conn_help *help = nfct_help(ct);
struct nf_conntrack_l3proto *l3proto;
struct nf_conntrack_l4proto *l4proto;
typeof(nf_conntrack_destroyed) destroyed;
DEBUGP("destroy_conntrack(%p)\n", ct);
NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
......@@ -332,16 +333,21 @@ destroy_conntrack(struct nf_conntrack *nfct)
/* To make sure we don't get any weird locking issues here:
* destroy_conntrack() MUST NOT be called with a write lock
* to nf_conntrack_lock!!! -HW */
rcu_read_lock();
l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num);
if (l3proto && l3proto->destroy)
l3proto->destroy(ct);
l4proto = __nf_ct_l4proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
l4proto = __nf_ct_l4proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num,
ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
if (l4proto && l4proto->destroy)
l4proto->destroy(ct);
if (nf_conntrack_destroyed)
nf_conntrack_destroyed(ct);
destroyed = rcu_dereference(nf_conntrack_destroyed);
if (destroyed)
destroyed(ct);
rcu_read_unlock();
write_lock_bh(&nf_conntrack_lock);
/* Expectations will have been removed in clean_from_lists,
......@@ -560,7 +566,7 @@ static int early_drop(struct list_head *chain)
if (del_timer(&ct->timeout)) {
death_by_timeout((unsigned long)ct);
dropped = 1;
NF_CT_STAT_INC(early_drop);
NF_CT_STAT_INC_ATOMIC(early_drop);
}
nf_ct_put(ct);
return dropped;
......@@ -647,9 +653,14 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
const struct nf_conntrack_tuple *repl)
{
struct nf_conntrack_l3proto *l3proto;
struct nf_conn *ct;
rcu_read_lock();
l3proto = __nf_ct_l3proto_find(orig->src.l3num);
return __nf_conntrack_alloc(orig, repl, l3proto, 0);
ct = __nf_conntrack_alloc(orig, repl, l3proto, 0);
rcu_read_unlock();
return ct;
}
EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
......@@ -813,11 +824,13 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
/* Previously seen (loopback or untracked)? Ignore. */
if ((*pskb)->nfct) {
NF_CT_STAT_INC(ignore);
NF_CT_STAT_INC_ATOMIC(ignore);
return NF_ACCEPT;
}
/* rcu_read_lock()ed by nf_hook_slow */
l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) {
DEBUGP("not prepared to track yet or error occured\n");
return -ret;
......@@ -830,8 +843,8 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
* core what to do with the packet. */
if (l4proto->error != NULL &&
(ret = l4proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
NF_CT_STAT_INC(error);
NF_CT_STAT_INC(invalid);
NF_CT_STAT_INC_ATOMIC(error);
NF_CT_STAT_INC_ATOMIC(invalid);
return -ret;
}
......@@ -839,13 +852,13 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
&set_reply, &ctinfo);
if (!ct) {
/* Not valid part of a connection */
NF_CT_STAT_INC(invalid);
NF_CT_STAT_INC_ATOMIC(invalid);
return NF_ACCEPT;
}
if (IS_ERR(ct)) {
/* Too stressed to deal. */
NF_CT_STAT_INC(drop);
NF_CT_STAT_INC_ATOMIC(drop);
return NF_DROP;
}
......@@ -858,7 +871,7 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
DEBUGP("nf_conntrack_in: Can't track with proto module\n");
nf_conntrack_put((*pskb)->nfct);
(*pskb)->nfct = NULL;
NF_CT_STAT_INC(invalid);
NF_CT_STAT_INC_ATOMIC(invalid);
return -ret;
}
......@@ -872,10 +885,15 @@ EXPORT_SYMBOL_GPL(nf_conntrack_in);
int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_tuple *orig)
{
return nf_ct_invert_tuple(inverse, orig,
int ret;
rcu_read_lock();
ret = nf_ct_invert_tuple(inverse, orig,
__nf_ct_l3proto_find(orig->src.l3num),
__nf_ct_l4proto_find(orig->src.l3num,
orig->dst.protonum));
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr);
......@@ -1105,7 +1123,7 @@ void nf_conntrack_cleanup(void)
{
int i;
ip_ct_attach = NULL;
rcu_assign_pointer(ip_ct_attach, NULL);
/* This makes sure all current packets have passed through
netfilter framework. Roll on, two-stage module
......@@ -1273,7 +1291,7 @@ int __init nf_conntrack_init(void)
write_unlock_bh(&nf_conntrack_lock);
/* For use by REJECT target */
ip_ct_attach = __nf_conntrack_attach;
rcu_assign_pointer(ip_ct_attach, __nf_conntrack_attach);
/* Set up fake conntrack:
- to never be deleted, not in any hashes */
......
......@@ -66,7 +66,7 @@ __nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL))
return &nf_conntrack_l4proto_generic;
return nf_ct_protos[l3proto][l4proto];
return rcu_dereference(nf_ct_protos[l3proto][l4proto]);
}
EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
......@@ -77,11 +77,11 @@ nf_ct_l4proto_find_get(u_int16_t l3proto, u_int8_t l4proto)
{
struct nf_conntrack_l4proto *p;
preempt_disable();
rcu_read_lock();
p = __nf_ct_l4proto_find(l3proto, l4proto);
if (!try_module_get(p->me))
p = &nf_conntrack_l4proto_generic;
preempt_enable();
rcu_read_unlock();
return p;
}
......@@ -98,11 +98,11 @@ nf_ct_l3proto_find_get(u_int16_t l3proto)
{
struct nf_conntrack_l3proto *p;
preempt_disable();
rcu_read_lock();
p = __nf_ct_l3proto_find(l3proto);
if (!try_module_get(p->me))
p = &nf_conntrack_l3proto_generic;
preempt_enable();
rcu_read_unlock();
return p;
}
......@@ -137,10 +137,8 @@ void nf_ct_l3proto_module_put(unsigned short l3proto)
{
struct nf_conntrack_l3proto *p;
preempt_disable();
/* rcu_read_lock not necessary since the caller holds a reference */
p = __nf_ct_l3proto_find(l3proto);
preempt_enable();
module_put(p->me);
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
......@@ -202,7 +200,7 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
ret = -EBUSY;
goto out_unlock;
}
nf_ct_l3protos[proto->l3proto] = proto;
rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
write_unlock_bh(&nf_conntrack_lock);
ret = nf_ct_l3proto_register_sysctl(proto);
......@@ -217,35 +215,21 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
}
EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register);
int nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
{
int ret = 0;
if (proto->l3proto >= AF_MAX) {
ret = -EBUSY;
goto out;
}
BUG_ON(proto->l3proto >= AF_MAX);
write_lock_bh(&nf_conntrack_lock);
if (nf_ct_l3protos[proto->l3proto] != proto) {
write_unlock_bh(&nf_conntrack_lock);
ret = -EBUSY;
goto out;
}
nf_ct_l3protos[proto->l3proto] = &nf_conntrack_l3proto_generic;
BUG_ON(nf_ct_l3protos[proto->l3proto] != proto);
rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
&nf_conntrack_l3proto_generic);
write_unlock_bh(&nf_conntrack_lock);
synchronize_rcu();
nf_ct_l3proto_unregister_sysctl(proto);
/* Somebody could be still looking at the proto in bh. */
synchronize_net();
/* Remove all contrack entries for this protocol */
nf_ct_iterate_cleanup(kill_l3proto, proto);
out:
return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister);
......@@ -356,7 +340,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
goto retry;
}
nf_ct_protos[l4proto->l3proto][l4proto->l4proto] = l4proto;
rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], l4proto);
write_unlock_bh(&nf_conntrack_lock);
ret = nf_ct_l4proto_register_sysctl(l4proto);
......@@ -371,40 +355,25 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
}
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register);
int nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
if (l4proto->l3proto >= PF_MAX) {
ret = -EBUSY;
goto out;
}
BUG_ON(l4proto->l3proto >= PF_MAX);
if (l4proto == &nf_conntrack_l4proto_generic) {
nf_ct_l4proto_unregister_sysctl(l4proto);
goto out;
return;
}
write_lock_bh(&nf_conntrack_lock);
if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto]
!= l4proto) {
write_unlock_bh(&nf_conntrack_lock);
ret = -EBUSY;
goto out;
}
nf_ct_protos[l4proto->l3proto][l4proto->l4proto]
= &nf_conntrack_l4proto_generic;
BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto);
rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
&nf_conntrack_l4proto_generic);
write_unlock_bh(&nf_conntrack_lock);
synchronize_rcu();
nf_ct_l4proto_unregister_sysctl(l4proto);
/* Somebody could be still looking at the proto in bh. */
synchronize_net();
/* Remove all contrack entries for this protocol */
nf_ct_iterate_cleanup(kill_l4proto, l4proto);
out:
return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister);
......@@ -58,16 +58,16 @@ static DEFINE_RWLOCK(tcp_lock);
/* "Be conservative in what you do,
be liberal in what you accept from others."
If it's non-zero, we mark only out of window RST segments as INVALID. */
int nf_ct_tcp_be_liberal __read_mostly = 0;
static int nf_ct_tcp_be_liberal __read_mostly = 0;
/* If it is set to zero, we disable picking up already established
connections. */
int nf_ct_tcp_loose __read_mostly = 1;
static int nf_ct_tcp_loose __read_mostly = 1;
/* Max number of the retransmitted packets without receiving an (acceptable)
ACK from the destination. If this number is reached, a shorter timer
will be started. */
int nf_ct_tcp_max_retrans __read_mostly = 3;
static int nf_ct_tcp_max_retrans __read_mostly = 3;
/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
closely. They're more complex. --RR */
......
......@@ -14,62 +14,63 @@
#define NF_LOG_PREFIXLEN 128
static struct nf_logger *nf_logging[NPROTO]; /* = NULL */
static DEFINE_SPINLOCK(nf_log_lock);
static struct nf_logger *nf_loggers[NPROTO];
static DEFINE_MUTEX(nf_log_mutex);
/* return EBUSY if somebody else is registered, EEXIST if the same logger
* is registred, 0 on success. */
int nf_log_register(int pf, struct nf_logger *logger)
{
int ret = -EBUSY;
int ret;
if (pf >= NPROTO)
return -EINVAL;
/* Any setup of logging members must be done before
* substituting pointer. */
spin_lock(&nf_log_lock);
if (!nf_logging[pf]) {
rcu_assign_pointer(nf_logging[pf], logger);
ret = 0;
} else if (nf_logging[pf] == logger)
ret = mutex_lock_interruptible(&nf_log_mutex);
if (ret < 0)
return ret;
if (!nf_loggers[pf])
rcu_assign_pointer(nf_loggers[pf], logger);
else if (nf_loggers[pf] == logger)
ret = -EEXIST;
else
ret = -EBUSY;
spin_unlock(&nf_log_lock);
mutex_unlock(&nf_log_mutex);
return ret;
}
EXPORT_SYMBOL(nf_log_register);
int nf_log_unregister_pf(int pf)
void nf_log_unregister_pf(int pf)
{
if (pf >= NPROTO)
return -EINVAL;
spin_lock(&nf_log_lock);
nf_logging[pf] = NULL;
spin_unlock(&nf_log_lock);
return;
mutex_lock(&nf_log_mutex);
rcu_assign_pointer(nf_loggers[pf], NULL);
mutex_unlock(&nf_log_mutex);
/* Give time to concurrent readers. */
synchronize_net();
return 0;
synchronize_rcu();
}
EXPORT_SYMBOL(nf_log_unregister_pf);
void nf_log_unregister_logger(struct nf_logger *logger)
void nf_log_unregister(struct nf_logger *logger)
{
int i;
spin_lock(&nf_log_lock);
mutex_lock(&nf_log_mutex);
for (i = 0; i < NPROTO; i++) {
if (nf_logging[i] == logger)
nf_logging[i] = NULL;
if (nf_loggers[i] == logger)
rcu_assign_pointer(nf_loggers[i], NULL);
}
spin_unlock(&nf_log_lock);
mutex_unlock(&nf_log_mutex);
synchronize_net();
synchronize_rcu();
}
EXPORT_SYMBOL(nf_log_unregister_logger);
EXPORT_SYMBOL(nf_log_unregister);
void nf_log_packet(int pf,
unsigned int hooknum,
......@@ -84,7 +85,7 @@ void nf_log_packet(int pf,
struct nf_logger *logger;
rcu_read_lock();
logger = rcu_dereference(nf_logging[pf]);
logger = rcu_dereference(nf_loggers[pf]);
if (logger) {
va_start(args, fmt);
vsnprintf(prefix, sizeof(prefix), fmt, args);
......@@ -131,7 +132,7 @@ static int seq_show(struct seq_file *s, void *v)
loff_t *pos = v;
const struct nf_logger *logger;
logger = rcu_dereference(nf_logging[*pos]);
logger = rcu_dereference(nf_loggers[*pos]);
if (!logger)
return seq_printf(s, "%2lld NONE\n", *pos);
......
......@@ -1077,7 +1077,7 @@ static int __init nfnetlink_log_init(void)
static void __exit nfnetlink_log_fini(void)
{
nf_log_unregister_logger(&nfulnl_logger);
nf_log_unregister(&nfulnl_logger);
#ifdef CONFIG_PROC_FS
remove_proc_entry("nfnetlink_log", proc_net_netfilter);
#endif
......
......@@ -15,6 +15,8 @@
#include <linux/ip.h>
#include <net/checksum.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_CLASSIFY.h>
......@@ -55,9 +57,9 @@ static struct xt_target xt_classify_target[] = {
.target = target,
.targetsize = sizeof(struct xt_classify_target_info),
.table = "mangle",
.hooks = (1 << NF_IP_LOCAL_OUT) |
(1 << NF_IP_FORWARD) |
(1 << NF_IP_POST_ROUTING),
.hooks = (1 << NF_IP6_LOCAL_OUT) |
(1 << NF_IP6_FORWARD) |
(1 << NF_IP6_POST_ROUTING),
.me = THIS_MODULE,
},
};
......
......@@ -14,6 +14,7 @@
#include <linux/etherdevice.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter/xt_mac.h>
#include <linux/netfilter/x_tables.h>
......@@ -59,9 +60,9 @@ static struct xt_match xt_mac_match[] = {
.family = AF_INET6,
.match = match,
.matchsize = sizeof(struct xt_mac_info),
.hooks = (1 << NF_IP_PRE_ROUTING) |
(1 << NF_IP_LOCAL_IN) |
(1 << NF_IP_FORWARD),
.hooks = (1 << NF_IP6_PRE_ROUTING) |
(1 << NF_IP6_LOCAL_IN) |
(1 << NF_IP6_FORWARD),
.me = THIS_MODULE,
},
};
......
......@@ -1997,6 +1997,11 @@ void xfrm_audit_log(uid_t auid, u32 sid, int type, int result,
if (audit_enabled == 0)
return;
BUG_ON((type == AUDIT_MAC_IPSEC_ADDSA ||
type == AUDIT_MAC_IPSEC_DELSA) && !x);
BUG_ON((type == AUDIT_MAC_IPSEC_ADDSPD ||
type == AUDIT_MAC_IPSEC_DELSPD) && !xp);
audit_buf = audit_log_start(current->audit_context, GFP_ATOMIC, type);
if (audit_buf == NULL)
return;
......
......@@ -1273,10 +1273,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, delete);
security_xfrm_policy_free(&tmp);
}
if (delete)
xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid,
AUDIT_MAC_IPSEC_DELSPD, (xp) ? 1 : 0, xp, NULL);
if (xp == NULL)
return -ENOENT;
......@@ -1292,8 +1288,14 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
MSG_DONTWAIT);
}
} else {
if ((err = security_xfrm_policy_delete(xp)) != 0)
err = security_xfrm_policy_delete(xp);
xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid,
AUDIT_MAC_IPSEC_DELSPD, err ? 0 : 1, xp, NULL);
if (err != 0)
goto out;
c.data.byid = p->index;
c.event = nlh->nlmsg_type;
c.seq = nlh->nlmsg_seq;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment