Commit fc604767 authored by Julian Anastasov's avatar Julian Anastasov Committed by Simon Horman

ipvs: changes for local real server

 	This patch deals with local real servers:

- Add support for DNAT to local address (different real server port).
It needs ip_vs_out hook in LOCAL_OUT for both families because
skb->protocol is not set for locally generated packets and can not
be used to set 'af'.

- Skip packets in ip_vs_in marked with skb->ipvs_property because
ip_vs_out processing can be executed in LOCAL_OUT but we still
have the conn_out_get check in ip_vs_in.

- Ignore packets with inet->nodefrag from local stack

- Require skb_dst(skb) != NULL because we use it to get struct net

- Add support for changing the route to local IPv4 stack after DNAT
depending on the source address type. Local client sets output
route and the remote client sets input route. It looks like
IPv6 does not need such rerouting because the replies use
addresses from initial incoming header, not from skb route.

- All transmitters now have strict checks for the destination
address type: redirect from non-local address to local real
server requires NAT method, local address can not be used as
source address when talking to remote real server.

- Now LOCALNODE is not set explicitly as forwarding
method in real server to allow the connections to provide
correct forwarding method to the backup server. Not sure if
this breaks tools that expect to see 'Local' real server type.
If needed, this can be supported with new flag IP_VS_DEST_F_LOCAL.
Now it should be possible connections in backup that lost
their fwmark information during sync to be forwarded properly
to their daddr, even if it is local address in the backup server.
By this way backup could be used as real server for DR or TUN,
for NAT there are some restrictions because tuple collisions
in conntracks can create problems for the traffic.

- Call ip_vs_dst_reset when destination is updated in case
some real server IP type is changed between local and remote.

[ horms@verge.net.au: removed trailing whitespace ]
Signed-off-by: default avatarJulian Anastasov <ja@ssi.bg>
Signed-off-by: default avatarSimon Horman <horms@verge.net.au>
parent f5a41847
......@@ -409,6 +409,7 @@ struct ip_vs_conn {
/* packet transmitter for different forwarding methods. If it
mangles the packet, it must return NF_DROP or better NF_STOLEN,
otherwise this must be changed to a sk_buff **.
NF_ACCEPT can be returned when destination is local.
*/
int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp);
......
......@@ -984,26 +984,34 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
}
/*
* It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
* Check if outgoing packet belongs to the established ip_vs_conn.
*/
static unsigned int
ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
{
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_conn *cp;
int af;
EnterFunction(11);
af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
return NF_ACCEPT;
/* Bad... Do not break raw sockets */
if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
af == AF_INET)) {
struct sock *sk = skb->sk;
struct inet_sock *inet = inet_sk(skb->sk);
if (inet && sk->sk_family == PF_INET && inet->nodefrag)
return NF_ACCEPT;
}
if (unlikely(!skb_dst(skb)))
return NF_ACCEPT;
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
......@@ -1106,6 +1114,69 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
return handle_response(af, skb, pp, cp, iph.len);
}
/*
* It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
return ip_vs_out(hooknum, skb, AF_INET);
}
/*
* It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
unsigned int verdict;
/* Disable BH in LOCAL_OUT until all places are fixed */
local_bh_disable();
verdict = ip_vs_out(hooknum, skb, AF_INET);
local_bh_enable();
return verdict;
}
#ifdef CONFIG_IP_VS_IPV6
/*
* It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
return ip_vs_out(hooknum, skb, AF_INET6);
}
/*
* It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
unsigned int verdict;
/* Disable BH in LOCAL_OUT until all places are fixed */
local_bh_disable();
verdict = ip_vs_out(hooknum, skb, AF_INET6);
local_bh_enable();
return verdict;
}
#endif
/*
* Handle ICMP messages in the outside-to-inside direction (incoming).
......@@ -1342,6 +1413,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
struct ip_vs_conn *cp;
int ret, restart, af, pkts;
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
return NF_ACCEPT;
af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
......@@ -1525,13 +1600,13 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
.hooknum = NF_INET_LOCAL_IN,
.priority = 100,
},
/* After packet filtering, change source only for VS/NAT */
/* Before ip_vs_in, change source only for VS/NAT */
{
.hook = ip_vs_out,
.hook = ip_vs_local_reply4,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_FORWARD,
.priority = 100,
.hooknum = NF_INET_LOCAL_OUT,
.priority = -99,
},
/* After packet filtering (but before ip_vs_out_icmp), catch icmp
* destined for 0.0.0.0/0, which is for incoming IPVS connections */
......@@ -1542,6 +1617,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
.hooknum = NF_INET_FORWARD,
.priority = 99,
},
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply4,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_FORWARD,
.priority = 100,
},
#ifdef CONFIG_IP_VS_IPV6
/* After packet filtering, forward packet through VS/DR, VS/TUN,
* or VS/NAT(change destination), so that filtering rules can be
......@@ -1553,13 +1636,13 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
.hooknum = NF_INET_LOCAL_IN,
.priority = 100,
},
/* After packet filtering, change source only for VS/NAT */
/* Before ip_vs_in, change source only for VS/NAT */
{
.hook = ip_vs_out,
.hook = ip_vs_local_reply6,
.owner = THIS_MODULE,
.pf = PF_INET6,
.hooknum = NF_INET_FORWARD,
.priority = 100,
.pf = PF_INET,
.hooknum = NF_INET_LOCAL_OUT,
.priority = -99,
},
/* After packet filtering (but before ip_vs_out_icmp), catch icmp
* destined for 0.0.0.0/0, which is for incoming IPVS connections */
......@@ -1570,6 +1653,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
.hooknum = NF_INET_FORWARD,
.priority = 99,
},
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply6,
.owner = THIS_MODULE,
.pf = PF_INET6,
.hooknum = NF_INET_FORWARD,
.priority = 100,
},
#endif
};
......
......@@ -777,20 +777,6 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
conn_flags |= IP_VS_CONN_F_INACTIVE;
/* check if local node and update the flags */
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6) {
if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
| IP_VS_CONN_F_LOCALNODE;
}
} else
#endif
if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
| IP_VS_CONN_F_LOCALNODE;
}
/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
conn_flags |= IP_VS_CONN_F_NOOUTPUT;
......@@ -824,6 +810,10 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
dest->u_threshold = udest->u_threshold;
dest->l_threshold = udest->l_threshold;
spin_lock(&dest->dst_lock);
ip_vs_dst_reset(dest);
spin_unlock(&dest->dst_lock);
if (add)
ip_vs_new_estimator(&dest->stats);
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment