Commit 83bf2e40 authored by David S. Miller's avatar David S. Miller
parents de384830 6661481d
......@@ -3,6 +3,7 @@ header-y += nf_conntrack_tuple_common.h
header-y += nfnetlink_conntrack.h
header-y += nfnetlink_log.h
header-y += nfnetlink_queue.h
header-y += xt_CHECKSUM.h
header-y += xt_CLASSIFY.h
header-y += xt_CONNMARK.h
header-y += xt_CONNSECMARK.h
......@@ -19,17 +20,19 @@ header-y += xt_TCPMSS.h
header-y += xt_TCPOPTSTRIP.h
header-y += xt_TEE.h
header-y += xt_TPROXY.h
header-y += xt_cluster.h
header-y += xt_comment.h
header-y += xt_connbytes.h
header-y += xt_connlimit.h
header-y += xt_connmark.h
header-y += xt_conntrack.h
header-y += xt_cluster.h
header-y += xt_cpu.h
header-y += xt_dccp.h
header-y += xt_dscp.h
header-y += xt_esp.h
header-y += xt_hashlimit.h
header-y += xt_iprange.h
header-y += xt_ipvs.h
header-y += xt_helper.h
header-y += xt_length.h
header-y += xt_limit.h
......
......@@ -89,7 +89,7 @@ enum nfulnl_attr_config {
#define NFULNL_COPY_NONE 0x00
#define NFULNL_COPY_META 0x01
#define NFULNL_COPY_PACKET 0x02
#define NFULNL_COPY_DISABLED 0x03
/* 0xff is reserved, don't use it for new copy modes. */
#define NFULNL_CFG_F_SEQ 0x0001
#define NFULNL_CFG_F_SEQ_GLOBAL 0x0002
......
/* Header file for iptables ipt_CHECKSUM target
*
* (C) 2002 by Harald Welte <laforge@gnumonks.org>
* (C) 2010 Red Hat Inc
* Author: Michael S. Tsirkin <mst@redhat.com>
*
* This software is distributed under GNU GPL v2, 1991
*/
#ifndef _XT_CHECKSUM_TARGET_H
#define _XT_CHECKSUM_TARGET_H
#include <linux/types.h>
#define XT_CHECKSUM_OP_FILL 0x01 /* fill in checksum in IP header */
struct xt_CHECKSUM_info {
__u8 operation; /* bitset of operations */
};
#endif /* _XT_CHECKSUM_TARGET_H */
#ifndef _XT_CPU_H
#define _XT_CPU_H
#include <linux/types.h>
struct xt_cpu_info {
__u32 cpu;
__u32 invert;
};
#endif /*_XT_CPU_H*/
#ifndef _XT_IPVS_H
#define _XT_IPVS_H
enum {
XT_IPVS_IPVS_PROPERTY = 1 << 0, /* all other options imply this one */
XT_IPVS_PROTO = 1 << 1,
XT_IPVS_VADDR = 1 << 2,
XT_IPVS_VPORT = 1 << 3,
XT_IPVS_DIR = 1 << 4,
XT_IPVS_METHOD = 1 << 5,
XT_IPVS_VPORTCTL = 1 << 6,
XT_IPVS_MASK = (1 << 7) - 1,
XT_IPVS_ONCE_MASK = XT_IPVS_MASK & ~XT_IPVS_IPVS_PROPERTY
};
struct xt_ipvs_mtinfo {
union nf_inet_addr vaddr, vmask;
__be16 vport;
__u8 l4proto;
__u8 fwd_method;
__be16 vportctl;
__u8 invert;
__u8 bitmask;
};
#endif /* _XT_IPVS_H */
......@@ -11,9 +11,9 @@ struct xt_quota_priv;
struct xt_quota_info {
u_int32_t flags;
u_int32_t pad;
aligned_u64 quota;
/* Used internally by the kernel */
aligned_u64 quota;
struct xt_quota_priv *master;
};
......
......@@ -632,10 +632,22 @@ extern struct ip_vs_conn *ip_vs_ct_in_get
(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
const union nf_inet_addr *d_addr, __be16 d_port);
struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
extern struct ip_vs_conn *ip_vs_conn_out_get
(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
const union nf_inet_addr *d_addr, __be16 d_port);
struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
/* put back the conn without restarting its timer */
static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
{
......@@ -736,8 +748,6 @@ extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb);
extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb);
extern int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
char *o_buf, int o_len, char *n_buf, int n_len);
extern int ip_vs_app_init(void);
extern void ip_vs_app_cleanup(void);
......
......@@ -28,9 +28,14 @@ struct nf_ct_ext {
char data[0];
};
static inline int nf_ct_ext_exist(const struct nf_conn *ct, u8 id)
static inline bool __nf_ct_ext_exist(const struct nf_ct_ext *ext, u8 id)
{
return (ct->ext && ct->ext->offset[id]);
return !!ext->offset[id];
}
static inline bool nf_ct_ext_exist(const struct nf_conn *ct, u8 id)
{
return (ct->ext && __nf_ct_ext_exist(ct->ext, id));
}
static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id)
......
......@@ -27,9 +27,9 @@ struct nf_nat_protocol {
/* Alter the per-proto part of the tuple (depending on
maniptype), to give a unique tuple in the given range if
possible; return false if not. Per-protocol part of tuple
is initialized to the incoming packet. */
bool (*unique_tuple)(struct nf_conntrack_tuple *tuple,
possible. Per-protocol part of tuple is initialized to the
incoming packet. */
void (*unique_tuple)(struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct);
......@@ -63,7 +63,7 @@ extern bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
const union nf_conntrack_man_proto *min,
const union nf_conntrack_man_proto *max);
extern bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
extern void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct,
......
......@@ -10,5 +10,7 @@ nfulnl_log_packet(u_int8_t pf,
const struct nf_loginfo *li_user,
const char *prefix);
#define NFULNL_COPY_DISABLED 0xff
#endif /* _KER_NFNETLINK_LOG_H */
......@@ -283,16 +283,13 @@ unsigned int arpt_do_table(struct sk_buff *skb,
arp = arp_hdr(skb);
do {
const struct arpt_entry_target *t;
int hdr_len;
if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
e = arpt_next_entry(e);
continue;
}
hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
(2 * skb->dev->addr_len);
ADD_COUNTER(e->counters, hdr_len, 1);
ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1);
t = arpt_get_target_c(e);
......@@ -713,7 +710,7 @@ static void get_counters(const struct xt_table_info *t,
struct arpt_entry *iter;
unsigned int cpu;
unsigned int i;
unsigned int curcpu;
unsigned int curcpu = get_cpu();
/* Instead of clearing (by a previous call to memset())
* the counters and using adds, we set the counters
......@@ -723,14 +720,16 @@ static void get_counters(const struct xt_table_info *t,
* if new softirq were to run and call ipt_do_table
*/
local_bh_disable();
curcpu = smp_processor_id();
i = 0;
xt_entry_foreach(iter, t->entries[curcpu], t->size) {
SET_COUNTER(counters[i], iter->counters.bcnt,
iter->counters.pcnt);
++i;
}
local_bh_enable();
/* Processing counters from other cpus, we can let bottom half enabled,
* (preemption is disabled)
*/
for_each_possible_cpu(cpu) {
if (cpu == curcpu)
......@@ -744,7 +743,7 @@ static void get_counters(const struct xt_table_info *t,
}
xt_info_wrunlock(cpu);
}
local_bh_enable();
put_cpu();
}
static struct xt_counters *alloc_counters(const struct xt_table *table)
......
......@@ -364,7 +364,7 @@ ipt_do_table(struct sk_buff *skb,
goto no_match;
}
ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
ADD_COUNTER(e->counters, skb->len, 1);
t = ipt_get_target(e);
IP_NF_ASSERT(t->u.kernel.target);
......@@ -884,7 +884,7 @@ get_counters(const struct xt_table_info *t,
struct ipt_entry *iter;
unsigned int cpu;
unsigned int i;
unsigned int curcpu;
unsigned int curcpu = get_cpu();
/* Instead of clearing (by a previous call to memset())
* the counters and using adds, we set the counters
......@@ -894,14 +894,16 @@ get_counters(const struct xt_table_info *t,
* if new softirq were to run and call ipt_do_table
*/
local_bh_disable();
curcpu = smp_processor_id();
i = 0;
xt_entry_foreach(iter, t->entries[curcpu], t->size) {
SET_COUNTER(counters[i], iter->counters.bcnt,
iter->counters.pcnt);
++i;
}
local_bh_enable();
/* Processing counters from other cpus, we can let bottom half enabled,
* (preemption is disabled)
*/
for_each_possible_cpu(cpu) {
if (cpu == curcpu)
......@@ -915,7 +917,7 @@ get_counters(const struct xt_table_info *t,
}
xt_info_wrunlock(cpu);
}
local_bh_enable();
put_cpu();
}
static struct xt_counters *alloc_counters(const struct xt_table *table)
......
......@@ -95,10 +95,11 @@ static void send_reset(struct sk_buff *oldskb, int hook)
}
tcph->rst = 1;
tcph->check = tcp_v4_check(sizeof(struct tcphdr),
niph->saddr, niph->daddr,
csum_partial(tcph,
sizeof(struct tcphdr), 0));
tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr,
niph->daddr, 0);
nskb->ip_summed = CHECKSUM_PARTIAL;
nskb->csum_start = (unsigned char *)tcph - nskb->head;
nskb->csum_offset = offsetof(struct tcphdr, check);
addr_type = RTN_UNSPEC;
if (hook != NF_INET_FORWARD
......@@ -115,7 +116,6 @@ static void send_reset(struct sk_buff *oldskb, int hook)
goto free_nskb;
niph->ttl = dst_metric(skb_dst(nskb), RTAX_HOPLIMIT);
nskb->ip_summed = CHECKSUM_NONE;
/* "Never happens" */
if (nskb->len > dst_mtu(skb_dst(nskb)))
......
......@@ -261,14 +261,9 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
rcu_read_lock();
proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
/* Change protocol info to have some randomization */
if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
proto->unique_tuple(tuple, range, maniptype, ct);
goto out;
}
/* Only bother mapping if it's not already in range and unique */
if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM) &&
(!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
proto->in_range(tuple, maniptype, &range->min, &range->max)) &&
!nf_nat_used_tuple(tuple, ct))
goto out;
......@@ -440,7 +435,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
return 0;
inside = (void *)skb->data + ip_hdrlen(skb);
inside = (void *)skb->data + hdrlen;
/* We're actually going to mangle it beyond trivial checksum
adjustment, so make sure the current checksum is correct. */
......@@ -470,12 +465,10 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
/* rcu_read_lock()ed by nf_hook_slow */
l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
if (!nf_ct_get_tuple(skb,
ip_hdrlen(skb) + sizeof(struct icmphdr),
(ip_hdrlen(skb) +
if (!nf_ct_get_tuple(skb, hdrlen + sizeof(struct icmphdr),
(hdrlen +
sizeof(struct icmphdr) + inside->ip.ihl * 4),
(u_int16_t)AF_INET,
inside->ip.protocol,
(u_int16_t)AF_INET, inside->ip.protocol,
&inner, l3proto, l4proto))
return 0;
......@@ -484,15 +477,13 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
pass all hooks (locally-generated ICMP). Consider incoming
packet: PREROUTING (DST manip), routing produces ICMP, goes
through POSTROUTING (which must correct the DST manip). */
if (!manip_pkt(inside->ip.protocol, skb,
ip_hdrlen(skb) + sizeof(inside->icmp),
&ct->tuplehash[!dir].tuple,
!manip))
if (!manip_pkt(inside->ip.protocol, skb, hdrlen + sizeof(inside->icmp),
&ct->tuplehash[!dir].tuple, !manip))
return 0;
if (skb->ip_summed != CHECKSUM_PARTIAL) {
/* Reloading "inside" here since manip_pkt inner. */
inside = (void *)skb->data + ip_hdrlen(skb);
inside = (void *)skb->data + hdrlen;
inside->icmp.checksum = 0;
inside->icmp.checksum =
csum_fold(skb_checksum(skb, hdrlen,
......
......@@ -34,7 +34,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
}
EXPORT_SYMBOL_GPL(nf_nat_proto_in_range);
bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct,
......@@ -53,7 +53,7 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
/* If it's dst rewrite, can't change port */
if (maniptype == IP_NAT_MANIP_DST)
return false;
return;
if (ntohs(*portptr) < 1024) {
/* Loose convention: >> 512 is credential passing */
......@@ -81,15 +81,15 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
else
off = *rover;
for (i = 0; i < range_size; i++, off++) {
for (i = 0; ; ++off) {
*portptr = htons(min + off % range_size);
if (nf_nat_used_tuple(tuple, ct))
if (++i != range_size && nf_nat_used_tuple(tuple, ct))
continue;
if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
*rover = off;
return true;
return;
}
return false;
return;
}
EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple);
......
......@@ -22,14 +22,14 @@
static u_int16_t dccp_port_rover;
static bool
static void
dccp_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
&dccp_port_rover);
nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
&dccp_port_rover);
}
static bool
......
......@@ -37,7 +37,7 @@ MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
/* generate unique tuple ... */
static bool
static void
gre_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
......@@ -50,7 +50,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
/* If there is no master conntrack we are not PPTP,
do not change tuples */
if (!ct->master)
return false;
return;
if (maniptype == IP_NAT_MANIP_SRC)
keyptr = &tuple->src.u.gre.key;
......@@ -68,14 +68,14 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
pr_debug("min = %u, range_size = %u\n", min, range_size);
for (i = 0; i < range_size; i++, key++) {
for (i = 0; ; ++key) {
*keyptr = htons(min + key % range_size);
if (!nf_nat_used_tuple(tuple, ct))
return true;
if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
return;
}
pr_debug("%p: no NAT mapping\n", ct);
return false;
return;
}
/* manipulate a GRE packet according to maniptype */
......
......@@ -27,7 +27,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple,
ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
}
static bool
static void
icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
......@@ -42,13 +42,13 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
range_size = 0xFFFF;
for (i = 0; i < range_size; i++, id++) {
for (i = 0; ; ++id) {
tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
(id % range_size));
if (!nf_nat_used_tuple(tuple, ct))
return true;
if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
return;
}
return false;
return;
}
static bool
......
......@@ -16,14 +16,14 @@
static u_int16_t nf_sctp_port_rover;
static bool
static void
sctp_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
&nf_sctp_port_rover);
nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
&nf_sctp_port_rover);
}
static bool
......
......@@ -20,14 +20,13 @@
static u_int16_t tcp_port_rover;
static bool
static void
tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
&tcp_port_rover);
nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &tcp_port_rover);
}
static bool
......
......@@ -19,14 +19,13 @@
static u_int16_t udp_port_rover;
static bool
static void
udp_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
&udp_port_rover);
nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &udp_port_rover);
}
static bool
......
......@@ -18,14 +18,14 @@
static u_int16_t udplite_port_rover;
static bool
static void
udplite_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
&udplite_port_rover);
nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
&udplite_port_rover);
}
static bool
......
......@@ -26,14 +26,14 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
return true;
}
static bool unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
/* Sorry: we can't help you; if it's not unique, we can't frob
anything. */
return false;
return;
}
static bool
......
......@@ -387,9 +387,7 @@ ip6t_do_table(struct sk_buff *skb,
goto no_match;
}
ADD_COUNTER(e->counters,
ntohs(ipv6_hdr(skb)->payload_len) +
sizeof(struct ipv6hdr), 1);
ADD_COUNTER(e->counters, skb->len, 1);
t = ip6t_get_target_c(e);
IP_NF_ASSERT(t->u.kernel.target);
......@@ -899,7 +897,7 @@ get_counters(const struct xt_table_info *t,
struct ip6t_entry *iter;
unsigned int cpu;
unsigned int i;
unsigned int curcpu;
unsigned int curcpu = get_cpu();
/* Instead of clearing (by a previous call to memset())
* the counters and using adds, we set the counters
......@@ -909,14 +907,16 @@ get_counters(const struct xt_table_info *t,
* if new softirq were to run and call ipt_do_table
*/
local_bh_disable();
curcpu = smp_processor_id();
i = 0;
xt_entry_foreach(iter, t->entries[curcpu], t->size) {
SET_COUNTER(counters[i], iter->counters.bcnt,
iter->counters.pcnt);
++i;
}
local_bh_enable();
/* Processing counters from other cpus, we can let bottom half enabled,
* (preemption is disabled)
*/
for_each_possible_cpu(cpu) {
if (cpu == curcpu)
......@@ -930,7 +930,7 @@ get_counters(const struct xt_table_info *t,
}
xt_info_wrunlock(cpu);
}
local_bh_enable();
put_cpu();
}
static struct xt_counters *alloc_counters(const struct xt_table *table)
......
......@@ -269,6 +269,11 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
* in the chain of fragments so far. We must know where to put
* this fragment, right?
*/
prev = fq->q.fragments_tail;
if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) {
next = NULL;
goto found;
}
prev = NULL;
for (next = fq->q.fragments; next != NULL; next = next->next) {
if (NFCT_FRAG6_CB(next)->offset >= offset)
......@@ -276,6 +281,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
prev = next;
}
found:
/* We found where to put this one. Check for overlap with
* preceding fragment, and, if needed, align things so that
* any overlaps are eliminated.
......@@ -341,6 +347,8 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
/* Insert this fragment in the chain of fragments. */
skb->next = next;
if (!next)
fq->q.fragments_tail = skb;
if (prev)
prev->next = skb;
else
......@@ -464,6 +472,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
head->csum);
fq->q.fragments = NULL;
fq->q.fragments_tail = NULL;
/* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
fp = skb_shinfo(head)->frag_list;
......
......@@ -326,6 +326,22 @@ config NETFILTER_XT_CONNMARK
comment "Xtables targets"
config NETFILTER_XT_TARGET_CHECKSUM
tristate "CHECKSUM target support"
depends on IP_NF_MANGLE || IP6_NF_MANGLE
depends on NETFILTER_ADVANCED
---help---
This option adds a `CHECKSUM' target, which can be used in the iptables mangle
table.
You can use this target to compute and fill in the checksum in
a packet that lacks a checksum. This is particularly useful,
if you need to work around old applications such as dhcp clients,
that do not work well with checksum offloads, but don't want to disable
checksum offload in your device.
To compile it as a module, choose M here. If unsure, say N.
config NETFILTER_XT_TARGET_CLASSIFY
tristate '"CLASSIFY" target support'
depends on NETFILTER_ADVANCED
......@@ -647,6 +663,15 @@ config NETFILTER_XT_MATCH_CONNTRACK
To compile it as a module, choose M here. If unsure, say N.
config NETFILTER_XT_MATCH_CPU
tristate '"cpu" match support'
depends on NETFILTER_ADVANCED
help
CPU matching allows you to match packets based on the CPU
currently handling the packet.
To compile it as a module, choose M here. If unsure, say N.
config NETFILTER_XT_MATCH_DCCP
tristate '"dccp" protocol match support'
depends on NETFILTER_ADVANCED
......@@ -726,6 +751,16 @@ config NETFILTER_XT_MATCH_IPRANGE
If unsure, say M.
config NETFILTER_XT_MATCH_IPVS
tristate '"ipvs" match support'
depends on IP_VS
depends on NETFILTER_ADVANCED
depends on NF_CONNTRACK
help
This option allows you to match against IPVS properties of a packet.
If unsure, say N.
config NETFILTER_XT_MATCH_LENGTH
tristate '"length" match support'
depends on NETFILTER_ADVANCED
......
......@@ -45,6 +45,7 @@ obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
# targets
obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o
obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
......@@ -69,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o
obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o
obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
......@@ -76,6 +78,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o
obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o
obj-$(CONFIG_NETFILTER_XT_MATCH_IPVS) += xt_ipvs.o
obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
......
......@@ -3,7 +3,7 @@
#
menuconfig IP_VS
tristate "IP virtual server support"
depends on NET && INET && NETFILTER
depends on NET && INET && NETFILTER && NF_CONNTRACK
---help---
IP Virtual Server support will let you build a high-performance
virtual server based on cluster of two or more real servers. This
......@@ -26,7 +26,7 @@ if IP_VS
config IP_VS_IPV6
bool "IPv6 support for IPVS"
depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6)
depends on IPV6 = y || IP_VS = IPV6
---help---
Add IPv6 support to IPVS. This is incomplete and might be dangerous.
......@@ -87,19 +87,16 @@ config IP_VS_PROTO_UDP
protocol. Say Y if unsure.
config IP_VS_PROTO_AH_ESP
bool
depends on UNDEFINED
def_bool IP_VS_PROTO_ESP || IP_VS_PROTO_AH
config IP_VS_PROTO_ESP
bool "ESP load balancing support"
select IP_VS_PROTO_AH_ESP
---help---
This option enables support for load balancing ESP (Encapsulation
Security Payload) transport protocol. Say Y if unsure.
config IP_VS_PROTO_AH
bool "AH load balancing support"
select IP_VS_PROTO_AH_ESP
---help---
This option enables support for load balancing AH (Authentication
Header) transport protocol. Say Y if unsure.
......@@ -238,7 +235,7 @@ comment 'IPVS application helper'
config IP_VS_FTP
tristate "FTP protocol helper"
depends on IP_VS_PROTO_TCP
depends on IP_VS_PROTO_TCP && NF_NAT
---help---
FTP is a protocol that transfers IP address and/or port number in
the payload. In the virtual server via Network Address Translation,
......
......@@ -569,49 +569,6 @@ static const struct file_operations ip_vs_app_fops = {
};
#endif
/*
* Replace a segment of data with a new segment
*/
int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
char *o_buf, int o_len, char *n_buf, int n_len)
{
int diff;
int o_offset;
int o_left;
EnterFunction(9);
diff = n_len - o_len;
o_offset = o_buf - (char *)skb->data;
/* The length of left data after o_buf+o_len in the skb data */
o_left = skb->len - (o_offset + o_len);
if (diff <= 0) {
memmove(o_buf + n_len, o_buf + o_len, o_left);
memcpy(o_buf, n_buf, n_len);
skb_trim(skb, skb->len + diff);
} else if (diff <= skb_tailroom(skb)) {
skb_put(skb, diff);
memmove(o_buf + n_len, o_buf + o_len, o_left);
memcpy(o_buf, n_buf, n_len);
} else {
if (pskb_expand_head(skb, skb_headroom(skb), diff, pri))
return -ENOMEM;
skb_put(skb, diff);
memmove(skb->data + o_offset + n_len,
skb->data + o_offset + o_len, o_left);
skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len);
}
/* must update the iph total length here */
ip_hdr(skb)->tot_len = htons(skb->len);
LeaveFunction(9);
return 0;
}
int __init ip_vs_app_init(void)
{
/* we will replace it with proc_net_ipvs_create() soon */
......
......@@ -271,6 +271,29 @@ struct ip_vs_conn *ip_vs_conn_in_get
return cp;
}
struct ip_vs_conn *
ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off, int inverse)
{
__be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
if (pptr == NULL)
return NULL;
if (likely(!inverse))
return ip_vs_conn_in_get(af, iph->protocol,
&iph->saddr, pptr[0],
&iph->daddr, pptr[1]);
else
return ip_vs_conn_in_get(af, iph->protocol,
&iph->daddr, pptr[1],
&iph->saddr, pptr[0]);
}
EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto);
/* Get reference to connection template */
struct ip_vs_conn *ip_vs_ct_in_get
(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
......@@ -356,6 +379,28 @@ struct ip_vs_conn *ip_vs_conn_out_get
return ret;
}
struct ip_vs_conn *
ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off, int inverse)
{
__be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
if (pptr == NULL)
return NULL;
if (likely(!inverse))
return ip_vs_conn_out_get(af, iph->protocol,
&iph->saddr, pptr[0],
&iph->daddr, pptr[1]);
else
return ip_vs_conn_out_get(af, iph->protocol,
&iph->daddr, pptr[1],
&iph->saddr, pptr[0]);
}
EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
/*
* Put back the conn and restart its timer with its timeout
......
......@@ -54,7 +54,6 @@
EXPORT_SYMBOL(register_ip_vs_scheduler);
EXPORT_SYMBOL(unregister_ip_vs_scheduler);
EXPORT_SYMBOL(ip_vs_skb_replace);
EXPORT_SYMBOL(ip_vs_proto_name);
EXPORT_SYMBOL(ip_vs_conn_new);
EXPORT_SYMBOL(ip_vs_conn_in_get);
......@@ -536,26 +535,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
return NF_DROP;
}
/*
* It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
* chain, and is used for VS/NAT.
* It detects packets for VS/NAT connections and sends the packets
* immediately. This can avoid that iptable_nat mangles the packets
* for VS/NAT.
*/
static unsigned int ip_vs_post_routing(unsigned int hooknum,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
if (!skb->ipvs_property)
return NF_ACCEPT;
/* The packet was sent from IPVS, exit this chain */
return NF_STOP;
}
__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
{
return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
......@@ -1499,14 +1478,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
.hooknum = NF_INET_FORWARD,
.priority = 99,
},
/* Before the netfilter connection tracking, exit from POST_ROUTING */
{
.hook = ip_vs_post_routing,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_NAT_SRC-1,
},
#ifdef CONFIG_IP_VS_IPV6
/* After packet filtering, forward packet through VS/DR, VS/TUN,
* or VS/NAT(change destination), so that filtering rules can be
......@@ -1535,14 +1506,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
.hooknum = NF_INET_FORWARD,
.priority = 99,
},
/* Before the netfilter connection tracking, exit from POST_ROUTING */
{
.hook = ip_vs_post_routing,
.owner = THIS_MODULE,
.pf = PF_INET6,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_NAT_SRC-1,
},
#endif
};
......
......@@ -20,6 +20,17 @@
*
* Author: Wouter Gadeyne
*
*
* Code for ip_vs_expect_related and ip_vs_expect_callback is taken from
* http://www.ssi.bg/~ja/nfct/:
*
* ip_vs_nfct.c: Netfilter connection tracking support for IPVS
*
* Portions Copyright (C) 2001-2002
* Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
*
* Portions Copyright (C) 2003-2008
* Julian Anastasov
*/
#define KMSG_COMPONENT "IPVS"
......@@ -32,6 +43,9 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_nat_helper.h>
#include <linux/gfp.h>
#include <net/protocol.h>
#include <net/tcp.h>
......@@ -43,6 +57,16 @@
#define SERVER_STRING "227 Entering Passive Mode ("
#define CLIENT_STRING "PORT "
#define FMT_TUPLE "%pI4:%u->%pI4:%u/%u"
#define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \
&(T)->dst.u3.ip, ntohs((T)->dst.u.all), \
(T)->dst.protonum
#define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u"
#define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \
&((C)->vaddr.ip), ntohs((C)->vport), \
&((C)->daddr.ip), ntohs((C)->dport), \
(C)->protocol, (C)->state
/*
* List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
......@@ -123,6 +147,119 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
return 1;
}
/*
* Called from init_conntrack() as expectfn handler.
*/
static void
ip_vs_expect_callback(struct nf_conn *ct,
struct nf_conntrack_expect *exp)
{
struct nf_conntrack_tuple *orig, new_reply;
struct ip_vs_conn *cp;
if (exp->tuple.src.l3num != PF_INET)
return;
/*
* We assume that no NF locks are held before this callback.
* ip_vs_conn_out_get and ip_vs_conn_in_get should match their
* expectations even if they use wildcard values, now we provide the
* actual values from the newly created original conntrack direction.
* The conntrack is confirmed when packet reaches IPVS hooks.
*/
/* RS->CLIENT */
orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum,
&orig->src.u3, orig->src.u.tcp.port,
&orig->dst.u3, orig->dst.u.tcp.port);
if (cp) {
/* Change reply CLIENT->RS to CLIENT->VS */
new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
FMT_TUPLE ", found inout cp=" FMT_CONN "\n",
__func__, ct, ct->status,
ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
ARG_CONN(cp));
new_reply.dst.u3 = cp->vaddr;
new_reply.dst.u.tcp.port = cp->vport;
IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
", inout cp=" FMT_CONN "\n",
__func__, ct,
ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
ARG_CONN(cp));
goto alter;
}
/* CLIENT->VS */
cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum,
&orig->src.u3, orig->src.u.tcp.port,
&orig->dst.u3, orig->dst.u.tcp.port);
if (cp) {
/* Change reply VS->CLIENT to RS->CLIENT */
new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
FMT_TUPLE ", found outin cp=" FMT_CONN "\n",
__func__, ct, ct->status,
ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
ARG_CONN(cp));
new_reply.src.u3 = cp->daddr;
new_reply.src.u.tcp.port = cp->dport;
IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", "
FMT_TUPLE ", outin cp=" FMT_CONN "\n",
__func__, ct,
ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
ARG_CONN(cp));
goto alter;
}
IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuple=" FMT_TUPLE
" - unknown expect\n",
__func__, ct, ct->status, ARG_TUPLE(orig));
return;
alter:
/* Never alter conntrack for non-NAT conns */
if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
nf_conntrack_alter_reply(ct, &new_reply);
ip_vs_conn_put(cp);
return;
}
/*
* Create NF conntrack expectation with wildcard (optional) source port.
* Then the default callback function will alter the reply and will confirm
* the conntrack entry when the first packet comes.
*/
static void
ip_vs_expect_related(struct sk_buff *skb, struct nf_conn *ct,
struct ip_vs_conn *cp, u_int8_t proto,
const __be16 *port, int from_rs)
{
struct nf_conntrack_expect *exp;
BUG_ON(!ct || ct == &nf_conntrack_untracked);
exp = nf_ct_expect_alloc(ct);
if (!exp)
return;
if (from_rs)
nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
nf_ct_l3num(ct), &cp->daddr, &cp->caddr,
proto, port, &cp->cport);
else
nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
nf_ct_l3num(ct), &cp->caddr, &cp->vaddr,
proto, port, &cp->vport);
exp->expectfn = ip_vs_expect_callback;
IP_VS_DBG(7, "%s(): ct=%p, expect tuple=" FMT_TUPLE "\n",
__func__, ct, ARG_TUPLE(&exp->tuple));
nf_ct_expect_related(exp);
nf_ct_expect_put(exp);
}
/*
* Look at outgoing ftp packets to catch the response to a PASV command
......@@ -149,7 +286,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
struct ip_vs_conn *n_cp;
char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */
unsigned buf_len;
int ret;
int ret = 0;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
#ifdef CONFIG_IP_VS_IPV6
/* This application helper doesn't work with IPv6 yet,
......@@ -219,19 +358,26 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
buf_len = strlen(buf);
ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct)) {
/* If mangling fails this function will return 0
* which will cause the packet to be dropped.
* Mangling can only fail under memory pressure,
* hopefully it will succeed on the retransmitted
* packet.
*/
ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
start-data, end-start,
buf, buf_len);
if (ret)
ip_vs_expect_related(skb, ct, n_cp,
IPPROTO_TCP, NULL, 0);
}
/*
* Calculate required delta-offset to keep TCP happy
* Not setting 'diff' is intentional, otherwise the sequence
* would be adjusted twice.
*/
*diff = buf_len - (end-start);
if (*diff == 0) {
/* simply replace it with new passive address */
memcpy(start, buf, buf_len);
ret = 1;
} else {
ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start,
end-start, buf, buf_len);
}
cp->app_data = NULL;
ip_vs_tcp_conn_listen(n_cp);
......@@ -263,6 +409,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
union nf_inet_addr to;
__be16 port;
struct ip_vs_conn *n_cp;
struct nf_conn *ct;
#ifdef CONFIG_IP_VS_IPV6
/* This application helper doesn't work with IPv6 yet,
......@@ -349,6 +496,11 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
ip_vs_control_add(n_cp, cp);
}
ct = (struct nf_conn *)skb->nfct;
if (ct && ct != &nf_conntrack_untracked)
ip_vs_expect_related(skb, ct, n_cp,
IPPROTO_TCP, &n_cp->dport, 1);
/*
* Move tunnel to listen state
*/
......
......@@ -98,6 +98,7 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
return NULL;
}
EXPORT_SYMBOL(ip_vs_proto_get);
/*
......
......@@ -8,55 +8,6 @@
#include <net/sctp/checksum.h>
#include <net/ip_vs.h>
static struct ip_vs_conn *
sctp_conn_in_get(int af,
const struct sk_buff *skb,
struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse)
{
__be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
if (pptr == NULL)
return NULL;
if (likely(!inverse))
return ip_vs_conn_in_get(af, iph->protocol,
&iph->saddr, pptr[0],
&iph->daddr, pptr[1]);
else
return ip_vs_conn_in_get(af, iph->protocol,
&iph->daddr, pptr[1],
&iph->saddr, pptr[0]);
}
static struct ip_vs_conn *
sctp_conn_out_get(int af,
const struct sk_buff *skb,
struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse)
{
__be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
if (pptr == NULL)
return NULL;
if (likely(!inverse))
return ip_vs_conn_out_get(af, iph->protocol,
&iph->saddr, pptr[0],
&iph->daddr, pptr[1]);
else
return ip_vs_conn_out_get(af, iph->protocol,
&iph->daddr, pptr[1],
&iph->saddr, pptr[0]);
}
static int
sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp)
......@@ -173,7 +124,7 @@ sctp_dnat_handler(struct sk_buff *skb,
return 0;
/* Call application helper if needed */
if (!ip_vs_app_pkt_out(cp, skb))
if (!ip_vs_app_pkt_in(cp, skb))
return 0;
}
......@@ -1169,8 +1120,8 @@ struct ip_vs_protocol ip_vs_protocol_sctp = {
.register_app = sctp_register_app,
.unregister_app = sctp_unregister_app,
.conn_schedule = sctp_conn_schedule,
.conn_in_get = sctp_conn_in_get,
.conn_out_get = sctp_conn_out_get,
.conn_in_get = ip_vs_conn_in_get_proto,
.conn_out_get = ip_vs_conn_out_get_proto,
.snat_handler = sctp_snat_handler,
.dnat_handler = sctp_dnat_handler,
.csum_check = sctp_csum_check,
......
......@@ -27,52 +27,6 @@
#include <net/ip_vs.h>
static struct ip_vs_conn *
tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph, unsigned int proto_off,
int inverse)
{
__be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
if (pptr == NULL)
return NULL;
if (likely(!inverse)) {
return ip_vs_conn_in_get(af, iph->protocol,
&iph->saddr, pptr[0],
&iph->daddr, pptr[1]);
} else {
return ip_vs_conn_in_get(af, iph->protocol,
&iph->daddr, pptr[1],
&iph->saddr, pptr[0]);
}
}
static struct ip_vs_conn *
tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph, unsigned int proto_off,
int inverse)
{
__be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
if (pptr == NULL)
return NULL;
if (likely(!inverse)) {
return ip_vs_conn_out_get(af, iph->protocol,
&iph->saddr, pptr[0],
&iph->daddr, pptr[1]);
} else {
return ip_vs_conn_out_get(af, iph->protocol,
&iph->daddr, pptr[1],
&iph->saddr, pptr[0]);
}
}
static int
tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp)
......@@ -721,8 +675,8 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
.register_app = tcp_register_app,
.unregister_app = tcp_unregister_app,
.conn_schedule = tcp_conn_schedule,
.conn_in_get = tcp_conn_in_get,
.conn_out_get = tcp_conn_out_get,
.conn_in_get = ip_vs_conn_in_get_proto,
.conn_out_get = ip_vs_conn_out_get_proto,
.snat_handler = tcp_snat_handler,
.dnat_handler = tcp_dnat_handler,
.csum_check = tcp_csum_check,
......
......@@ -27,58 +27,6 @@
#include <net/ip.h>
#include <net/ip6_checksum.h>
static struct ip_vs_conn *
udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph, unsigned int proto_off,
int inverse)
{
struct ip_vs_conn *cp;
__be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
if (pptr == NULL)
return NULL;
if (likely(!inverse)) {
cp = ip_vs_conn_in_get(af, iph->protocol,
&iph->saddr, pptr[0],
&iph->daddr, pptr[1]);
} else {
cp = ip_vs_conn_in_get(af, iph->protocol,
&iph->daddr, pptr[1],
&iph->saddr, pptr[0]);
}
return cp;
}
static struct ip_vs_conn *
udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph, unsigned int proto_off,
int inverse)
{
struct ip_vs_conn *cp;
__be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
if (pptr == NULL)
return NULL;
if (likely(!inverse)) {
cp = ip_vs_conn_out_get(af, iph->protocol,
&iph->saddr, pptr[0],
&iph->daddr, pptr[1]);
} else {
cp = ip_vs_conn_out_get(af, iph->protocol,
&iph->daddr, pptr[1],
&iph->saddr, pptr[0]);
}
return cp;
}
static int
udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp)
......@@ -520,8 +468,8 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
.init = udp_init,
.exit = udp_exit,
.conn_schedule = udp_conn_schedule,
.conn_in_get = udp_conn_in_get,
.conn_out_get = udp_conn_out_get,
.conn_in_get = ip_vs_conn_in_get_proto,
.conn_out_get = ip_vs_conn_out_get_proto,
.snat_handler = udp_snat_handler,
.dnat_handler = udp_dnat_handler,
.csum_check = udp_csum_check,
......
......@@ -28,6 +28,7 @@
#include <net/ip6_route.h>
#include <linux/icmpv6.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_conntrack.h>
#include <linux/netfilter_ipv4.h>
#include <net/ip_vs.h>
......@@ -348,6 +349,30 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
}
#endif
static void
ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp)
{
struct nf_conn *ct = (struct nf_conn *)skb->nfct;
struct nf_conntrack_tuple new_tuple;
if (ct == NULL || nf_ct_is_untracked(ct) || nf_ct_is_confirmed(ct))
return;
/*
* The connection is not yet in the hashtable, so we update it.
* CIP->VIP will remain the same, so leave the tuple in
* IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the
* real-server we will see RIP->DIP.
*/
new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
new_tuple.src.u3 = cp->daddr;
/*
* This will also take care of UDP and other protocols.
*/
new_tuple.src.u.tcp.port = cp->dport;
nf_conntrack_alter_reply(ct, &new_tuple);
}
/*
* NAT transmitter (only for outside-to-inside nat forwarding)
* Not used for related ICMP
......@@ -403,6 +428,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
ip_vs_update_conntrack(skb, cp);
/* FIXME: when application helper enlarges the packet and the length
is larger than the MTU of outgoing device, there will be still
MTU problem. */
......@@ -479,6 +506,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
ip_vs_update_conntrack(skb, cp);
/* FIXME: when application helper enlarges the packet and the length
is larger than the MTU of outgoing device, there will be still
MTU problem. */
......
......@@ -966,8 +966,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
if (acct) {
spin_lock_bh(&ct->lock);
acct[CTINFO2DIR(ctinfo)].packets++;
acct[CTINFO2DIR(ctinfo)].bytes +=
skb->len - skb_network_offset(skb);
acct[CTINFO2DIR(ctinfo)].bytes += skb->len;
spin_unlock_bh(&ct->lock);
}
}
......
......@@ -23,9 +23,10 @@ void __nf_ct_ext_destroy(struct nf_conn *ct)
{
unsigned int i;
struct nf_ct_ext_type *t;
struct nf_ct_ext *ext = ct->ext;
for (i = 0; i < NF_CT_EXT_NUM; i++) {
if (!nf_ct_ext_exist(ct, i))
if (!__nf_ct_ext_exist(ext, i))
continue;
rcu_read_lock();
......@@ -73,44 +74,45 @@ static void __nf_ct_ext_free_rcu(struct rcu_head *head)
void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
{
struct nf_ct_ext *new;
struct nf_ct_ext *old, *new;
int i, newlen, newoff;
struct nf_ct_ext_type *t;
/* Conntrack must not be confirmed to avoid races on reallocation. */
NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
if (!ct->ext)
old = ct->ext;
if (!old)
return nf_ct_ext_create(&ct->ext, id, gfp);
if (nf_ct_ext_exist(ct, id))
if (__nf_ct_ext_exist(old, id))
return NULL;
rcu_read_lock();
t = rcu_dereference(nf_ct_ext_types[id]);
BUG_ON(t == NULL);
newoff = ALIGN(ct->ext->len, t->align);
newoff = ALIGN(old->len, t->align);
newlen = newoff + t->len;
rcu_read_unlock();
new = __krealloc(ct->ext, newlen, gfp);
new = __krealloc(old, newlen, gfp);
if (!new)
return NULL;
if (new != ct->ext) {
if (new != old) {
for (i = 0; i < NF_CT_EXT_NUM; i++) {
if (!nf_ct_ext_exist(ct, i))
if (!__nf_ct_ext_exist(old, i))
continue;
rcu_read_lock();
t = rcu_dereference(nf_ct_ext_types[i]);
if (t && t->move)
t->move((void *)new + new->offset[i],
(void *)ct->ext + ct->ext->offset[i]);
(void *)old + old->offset[i]);
rcu_read_unlock();
}
call_rcu(&ct->ext->rcu, __nf_ct_ext_free_rcu);
call_rcu(&old->rcu, __nf_ct_ext_free_rcu);
ct->ext = new;
}
......
......@@ -585,8 +585,16 @@ static bool tcp_in_window(const struct nf_conn *ct,
* Let's try to use the data from the packet.
*/
sender->td_end = end;
win <<= sender->td_scale;
sender->td_maxwin = (win == 0 ? 1 : win);
sender->td_maxend = end + sender->td_maxwin;
/*
* We haven't seen traffic in the other direction yet
* but we have to tweak window tracking to pass III
* and IV until that happens.
*/
if (receiver->td_maxwin == 0)
receiver->td_end = receiver->td_maxend = sack;
}
} else if (((state->state == TCP_CONNTRACK_SYN_SENT
&& dir == IP_CT_DIR_ORIGINAL)
......@@ -680,7 +688,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
/*
* Update receiver data.
*/
if (after(end, sender->td_maxend))
if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
receiver->td_maxwin += end - sender->td_maxend;
if (after(sack + win, receiver->td_maxend - 1)) {
receiver->td_maxend = sack + win;
......
/* iptables module for the packet checksum mangling
*
* (C) 2002 by Harald Welte <laforge@netfilter.org>
* (C) 2010 Red Hat, Inc.
*
* Author: Michael S. Tsirkin <mst@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_CHECKSUM.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Michael S. Tsirkin <mst@redhat.com>");
MODULE_DESCRIPTION("Xtables: checksum modification");
MODULE_ALIAS("ipt_CHECKSUM");
MODULE_ALIAS("ip6t_CHECKSUM");
static unsigned int
checksum_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
if (skb->ip_summed == CHECKSUM_PARTIAL)
skb_checksum_help(skb);
return XT_CONTINUE;
}
static int checksum_tg_check(const struct xt_tgchk_param *par)
{
const struct xt_CHECKSUM_info *einfo = par->targinfo;
if (einfo->operation & ~XT_CHECKSUM_OP_FILL) {
pr_info("unsupported CHECKSUM operation %x\n", einfo->operation);
return -EINVAL;
}
if (!einfo->operation) {
pr_info("no CHECKSUM operation enabled\n");
return -EINVAL;
}
return 0;
}
static struct xt_target checksum_tg_reg __read_mostly = {
.name = "CHECKSUM",
.family = NFPROTO_UNSPEC,
.target = checksum_tg,
.targetsize = sizeof(struct xt_CHECKSUM_info),
.table = "mangle",
.checkentry = checksum_tg_check,
.me = THIS_MODULE,
};
static int __init checksum_tg_init(void)
{
return xt_register_target(&checksum_tg_reg);
}
static void __exit checksum_tg_exit(void)
{
xt_unregister_target(&checksum_tg_reg);
}
module_init(checksum_tg_init);
module_exit(checksum_tg_exit);
......@@ -37,8 +37,10 @@ tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par)
return NF_DROP;
sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr,
hp->source, tgi->lport ? tgi->lport : hp->dest,
iph->saddr,
tgi->laddr ? tgi->laddr : iph->daddr,
hp->source,
tgi->lport ? tgi->lport : hp->dest,
par->in, true);
/* NOTE: assign_sock consumes our sk reference */
......
/* Kernel module to match running CPU */
/*
* Might be used to distribute connections on several daemons, if
* RPS (Remote Packet Steering) is enabled or NIC is multiqueue capable,
* each RX queue IRQ affined to one CPU (1:1 mapping)
*
*/
/* (C) 2010 Eric Dumazet
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netfilter/xt_cpu.h>
#include <linux/netfilter/x_tables.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>");
MODULE_DESCRIPTION("Xtables: CPU match");
static int cpu_mt_check(const struct xt_mtchk_param *par)
{
const struct xt_cpu_info *info = par->matchinfo;
if (info->invert & ~1)
return -EINVAL;
return 0;
}
static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_cpu_info *info = par->matchinfo;
return (info->cpu == smp_processor_id()) ^ info->invert;
}
static struct xt_match cpu_mt_reg __read_mostly = {
.name = "cpu",
.revision = 0,
.family = NFPROTO_UNSPEC,
.checkentry = cpu_mt_check,
.match = cpu_mt,
.matchsize = sizeof(struct xt_cpu_info),
.me = THIS_MODULE,
};
static int __init cpu_mt_init(void)
{
return xt_register_match(&cpu_mt_reg);
}
static void __exit cpu_mt_exit(void)
{
xt_unregister_match(&cpu_mt_reg);
}
module_init(cpu_mt_init);
module_exit(cpu_mt_exit);
/*
* xt_ipvs - kernel module to match IPVS connection properties
*
* Author: Hannes Eder <heder@google.com>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/spinlock.h>
#include <linux/skbuff.h>
#ifdef CONFIG_IP_VS_IPV6
#include <net/ipv6.h>
#endif
#include <linux/ip_vs.h>
#include <linux/types.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_ipvs.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/ip_vs.h>
MODULE_AUTHOR("Hannes Eder <heder@google.com>");
MODULE_DESCRIPTION("Xtables: match IPVS connection properties");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ipt_ipvs");
MODULE_ALIAS("ip6t_ipvs");
/* borrowed from xt_conntrack */
static bool ipvs_mt_addrcmp(const union nf_inet_addr *kaddr,
const union nf_inet_addr *uaddr,
const union nf_inet_addr *umask,
unsigned int l3proto)
{
if (l3proto == NFPROTO_IPV4)
return ((kaddr->ip ^ uaddr->ip) & umask->ip) == 0;
#ifdef CONFIG_IP_VS_IPV6
else if (l3proto == NFPROTO_IPV6)
return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6,
&uaddr->in6) == 0;
#endif
else
return false;
}
static bool
ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_ipvs_mtinfo *data = par->matchinfo;
/* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */
const u_int8_t family = par->family;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_conn *cp;
bool match = true;
if (data->bitmask == XT_IPVS_IPVS_PROPERTY) {
match = skb->ipvs_property ^
!!(data->invert & XT_IPVS_IPVS_PROPERTY);
goto out;
}
/* other flags than XT_IPVS_IPVS_PROPERTY are set */
if (!skb->ipvs_property) {
match = false;
goto out;
}
ip_vs_fill_iphdr(family, skb_network_header(skb), &iph);
if (data->bitmask & XT_IPVS_PROTO)
if ((iph.protocol == data->l4proto) ^
!(data->invert & XT_IPVS_PROTO)) {
match = false;
goto out;
}
pp = ip_vs_proto_get(iph.protocol);
if (unlikely(!pp)) {
match = false;
goto out;
}
/*
* Check if the packet belongs to an existing entry
*/
cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */);
if (unlikely(cp == NULL)) {
match = false;
goto out;
}
/*
* We found a connection, i.e. ct != 0, make sure to call
* __ip_vs_conn_put before returning. In our case jump to out_put_con.
*/
if (data->bitmask & XT_IPVS_VPORT)
if ((cp->vport == data->vport) ^
!(data->invert & XT_IPVS_VPORT)) {
match = false;
goto out_put_cp;
}
if (data->bitmask & XT_IPVS_VPORTCTL)
if ((cp->control != NULL &&
cp->control->vport == data->vportctl) ^
!(data->invert & XT_IPVS_VPORTCTL)) {
match = false;
goto out_put_cp;
}
if (data->bitmask & XT_IPVS_DIR) {
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
if (ct == NULL || nf_ct_is_untracked(ct)) {
match = false;
goto out_put_cp;
}
if ((ctinfo >= IP_CT_IS_REPLY) ^
!!(data->invert & XT_IPVS_DIR)) {
match = false;
goto out_put_cp;
}
}
if (data->bitmask & XT_IPVS_METHOD)
if (((cp->flags & IP_VS_CONN_F_FWD_MASK) == data->fwd_method) ^
!(data->invert & XT_IPVS_METHOD)) {
match = false;
goto out_put_cp;
}
if (data->bitmask & XT_IPVS_VADDR) {
if (ipvs_mt_addrcmp(&cp->vaddr, &data->vaddr,
&data->vmask, family) ^
!(data->invert & XT_IPVS_VADDR)) {
match = false;
goto out_put_cp;
}
}
out_put_cp:
__ip_vs_conn_put(cp);
out:
pr_debug("match=%d\n", match);
return match;
}
static int ipvs_mt_check(const struct xt_mtchk_param *par)
{
if (par->family != NFPROTO_IPV4
#ifdef CONFIG_IP_VS_IPV6
&& par->family != NFPROTO_IPV6
#endif
) {
pr_info("protocol family %u not supported\n", par->family);
return -EINVAL;
}
return 0;
}
static struct xt_match xt_ipvs_mt_reg __read_mostly = {
.name = "ipvs",
.revision = 0,
.family = NFPROTO_UNSPEC,
.match = ipvs_mt,
.checkentry = ipvs_mt_check,
.matchsize = XT_ALIGN(sizeof(struct xt_ipvs_mtinfo)),
.me = THIS_MODULE,
};
static int __init ipvs_mt_init(void)
{
return xt_register_match(&xt_ipvs_mt_reg);
}
static void __exit ipvs_mt_exit(void)
{
xt_unregister_match(&xt_ipvs_mt_reg);
}
module_init(ipvs_mt_init);
module_exit(ipvs_mt_exit);
......@@ -11,7 +11,8 @@
#include <linux/netfilter/xt_quota.h>
struct xt_quota_priv {
uint64_t quota;
spinlock_t lock;
uint64_t quota;
};
MODULE_LICENSE("GPL");
......@@ -20,8 +21,6 @@ MODULE_DESCRIPTION("Xtables: countdown quota match");
MODULE_ALIAS("ipt_quota");
MODULE_ALIAS("ip6t_quota");
static DEFINE_SPINLOCK(quota_lock);
static bool
quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
......@@ -29,7 +28,7 @@ quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
struct xt_quota_priv *priv = q->master;
bool ret = q->flags & XT_QUOTA_INVERT;
spin_lock_bh(&quota_lock);
spin_lock_bh(&priv->lock);
if (priv->quota >= skb->len) {
priv->quota -= skb->len;
ret = !ret;
......@@ -37,9 +36,7 @@ quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
/* we do not allow even small packets from now on */
priv->quota = 0;
}
/* Copy quota back to matchinfo so that iptables can display it */
q->quota = priv->quota;
spin_unlock_bh(&quota_lock);
spin_unlock_bh(&priv->lock);
return ret;
}
......@@ -55,6 +52,7 @@ static int quota_mt_check(const struct xt_mtchk_param *par)
if (q->master == NULL)
return -ENOMEM;
spin_lock_init(&q->master->lock);
q->master->quota = q->quota;
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment