Commit 36aea585 authored by Pablo Neira Ayuso's avatar Pablo Neira Ayuso

Merge tag 'ipvs-for-v4.4' of https://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-next

Simon Horman says:

====================
IPVS Updates for v4.4

please consider these IPVS Updates for v4.4.

The updates include the following from Alex Gartrell:
* Scheduling of ICMP
* Sysctl to ignore tunneled packets; and hence some packet-looping scenarios
====================
Signed-off-by: default avatarPablo Neira Ayuso <pablo@netfilter.org>
parents 47bbbb30 4e478098
...@@ -157,6 +157,16 @@ expire_quiescent_template - BOOLEAN ...@@ -157,6 +157,16 @@ expire_quiescent_template - BOOLEAN
persistence template if it is to be used to schedule a new persistence template if it is to be used to schedule a new
connection and the destination server is quiescent. connection and the destination server is quiescent.
ignore_tunneled - BOOLEAN
0 - disabled (default)
not 0 - enabled
If set, ipvs will set the ipvs_property on all packets which are of
unrecognized protocols. This prevents us from routing tunneled
protocols like ipip, which is useful to prevent rescheduling
packets that have been tunneled to the ipvs host (i.e. to prevent
ipvs routing loops when ipvs is also acting as a real server).
nat_icmp_send - BOOLEAN nat_icmp_send - BOOLEAN
0 - disabled (default) 0 - disabled (default)
not 0 - enabled not 0 - enabled
......
...@@ -29,6 +29,9 @@ ...@@ -29,6 +29,9 @@
#endif #endif
#include <net/net_namespace.h> /* Netw namespace */ #include <net/net_namespace.h> /* Netw namespace */
#define IP_VS_HDR_INVERSE 1
#define IP_VS_HDR_ICMP 2
/* Generic access of ipvs struct */ /* Generic access of ipvs struct */
static inline struct netns_ipvs *net_ipvs(struct net* net) static inline struct netns_ipvs *net_ipvs(struct net* net)
{ {
...@@ -104,6 +107,8 @@ static inline struct net *seq_file_single_net(struct seq_file *seq) ...@@ -104,6 +107,8 @@ static inline struct net *seq_file_single_net(struct seq_file *seq)
extern int ip_vs_conn_tab_size; extern int ip_vs_conn_tab_size;
struct ip_vs_iphdr { struct ip_vs_iphdr {
int hdr_flags; /* ipvs flags */
__u32 off; /* Where IP or IPv4 header starts */
__u32 len; /* IPv4 simply where L4 starts __u32 len; /* IPv4 simply where L4 starts
* IPv6 where L4 Transport Header starts */ * IPv6 where L4 Transport Header starts */
__u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/ __u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/
...@@ -120,48 +125,89 @@ static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset, ...@@ -120,48 +125,89 @@ static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
return skb_header_pointer(skb, offset, len, buffer); return skb_header_pointer(skb, offset, len, buffer);
} }
static inline void
ip_vs_fill_ip4hdr(const void *nh, struct ip_vs_iphdr *iphdr)
{
const struct iphdr *iph = nh;
iphdr->len = iph->ihl * 4;
iphdr->fragoffs = 0;
iphdr->protocol = iph->protocol;
iphdr->saddr.ip = iph->saddr;
iphdr->daddr.ip = iph->daddr;
}
/* This function handles filling *ip_vs_iphdr, both for IPv4 and IPv6. /* This function handles filling *ip_vs_iphdr, both for IPv4 and IPv6.
* IPv6 requires some extra work, as finding proper header position, * IPv6 requires some extra work, as finding proper header position,
* depend on the IPv6 extension headers. * depend on the IPv6 extension headers.
*/ */
static inline void static inline int
ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, struct ip_vs_iphdr *iphdr) ip_vs_fill_iph_skb_off(int af, const struct sk_buff *skb, int offset,
int hdr_flags, struct ip_vs_iphdr *iphdr)
{ {
iphdr->hdr_flags = hdr_flags;
iphdr->off = offset;
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) { if (af == AF_INET6) {
const struct ipv6hdr *iph = struct ipv6hdr _iph;
(struct ipv6hdr *)skb_network_header(skb); const struct ipv6hdr *iph = skb_header_pointer(
skb, offset, sizeof(_iph), &_iph);
if (!iph)
return 0;
iphdr->saddr.in6 = iph->saddr; iphdr->saddr.in6 = iph->saddr;
iphdr->daddr.in6 = iph->daddr; iphdr->daddr.in6 = iph->daddr;
/* ipv6_find_hdr() updates len, flags */ /* ipv6_find_hdr() updates len, flags */
iphdr->len = 0; iphdr->len = offset;
iphdr->flags = 0; iphdr->flags = 0;
iphdr->protocol = ipv6_find_hdr(skb, &iphdr->len, -1, iphdr->protocol = ipv6_find_hdr(skb, &iphdr->len, -1,
&iphdr->fragoffs, &iphdr->fragoffs,
&iphdr->flags); &iphdr->flags);
if (iphdr->protocol < 0)
return 0;
} else } else
#endif #endif
{ {
const struct iphdr *iph = struct iphdr _iph;
(struct iphdr *)skb_network_header(skb); const struct iphdr *iph = skb_header_pointer(
iphdr->len = iph->ihl * 4; skb, offset, sizeof(_iph), &_iph);
if (!iph)
return 0;
iphdr->len = offset + iph->ihl * 4;
iphdr->fragoffs = 0; iphdr->fragoffs = 0;
iphdr->protocol = iph->protocol; iphdr->protocol = iph->protocol;
iphdr->saddr.ip = iph->saddr; iphdr->saddr.ip = iph->saddr;
iphdr->daddr.ip = iph->daddr; iphdr->daddr.ip = iph->daddr;
} }
return 1;
}
static inline int
ip_vs_fill_iph_skb_icmp(int af, const struct sk_buff *skb, int offset,
bool inverse, struct ip_vs_iphdr *iphdr)
{
int hdr_flags = IP_VS_HDR_ICMP;
if (inverse)
hdr_flags |= IP_VS_HDR_INVERSE;
return ip_vs_fill_iph_skb_off(af, skb, offset, hdr_flags, iphdr);
}
static inline int
ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, bool inverse,
struct ip_vs_iphdr *iphdr)
{
int hdr_flags = 0;
if (inverse)
hdr_flags |= IP_VS_HDR_INVERSE;
return ip_vs_fill_iph_skb_off(af, skb, skb_network_offset(skb),
hdr_flags, iphdr);
}
static inline bool
ip_vs_iph_inverse(const struct ip_vs_iphdr *iph)
{
return !!(iph->hdr_flags & IP_VS_HDR_INVERSE);
}
static inline bool
ip_vs_iph_icmp(const struct ip_vs_iphdr *iph)
{
return !!(iph->hdr_flags & IP_VS_HDR_ICMP);
} }
static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst, static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst,
...@@ -449,14 +495,12 @@ struct ip_vs_protocol { ...@@ -449,14 +495,12 @@ struct ip_vs_protocol {
struct ip_vs_conn * struct ip_vs_conn *
(*conn_in_get)(int af, (*conn_in_get)(int af,
const struct sk_buff *skb, const struct sk_buff *skb,
const struct ip_vs_iphdr *iph, const struct ip_vs_iphdr *iph);
int inverse);
struct ip_vs_conn * struct ip_vs_conn *
(*conn_out_get)(int af, (*conn_out_get)(int af,
const struct sk_buff *skb, const struct sk_buff *skb,
const struct ip_vs_iphdr *iph, const struct ip_vs_iphdr *iph);
int inverse);
int (*snat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp, int (*snat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp, struct ip_vs_iphdr *iph); struct ip_vs_conn *cp, struct ip_vs_iphdr *iph);
...@@ -953,6 +997,8 @@ struct netns_ipvs { ...@@ -953,6 +997,8 @@ struct netns_ipvs {
int sysctl_pmtu_disc; int sysctl_pmtu_disc;
int sysctl_backup_only; int sysctl_backup_only;
int sysctl_conn_reuse_mode; int sysctl_conn_reuse_mode;
int sysctl_schedule_icmp;
int sysctl_ignore_tunneled;
/* ip_vs_lblc */ /* ip_vs_lblc */
int sysctl_lblc_expiration; int sysctl_lblc_expiration;
...@@ -1071,6 +1117,16 @@ static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs) ...@@ -1071,6 +1117,16 @@ static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
return ipvs->sysctl_conn_reuse_mode; return ipvs->sysctl_conn_reuse_mode;
} }
static inline int sysctl_schedule_icmp(struct netns_ipvs *ipvs)
{
return ipvs->sysctl_schedule_icmp;
}
static inline int sysctl_ignore_tunneled(struct netns_ipvs *ipvs)
{
return ipvs->sysctl_ignore_tunneled;
}
#else #else
static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
...@@ -1143,6 +1199,16 @@ static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs) ...@@ -1143,6 +1199,16 @@ static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
return 1; return 1;
} }
static inline int sysctl_schedule_icmp(struct netns_ipvs *ipvs)
{
return 0;
}
static inline int sysctl_ignore_tunneled(struct netns_ipvs *ipvs)
{
return 0;
}
#endif #endif
/* IPVS core functions /* IPVS core functions
...@@ -1186,14 +1252,12 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p); ...@@ -1186,14 +1252,12 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p);
struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p); struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p);
struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
const struct ip_vs_iphdr *iph, const struct ip_vs_iphdr *iph);
int inverse);
struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p); struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p);
struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
const struct ip_vs_iphdr *iph, const struct ip_vs_iphdr *iph);
int inverse);
/* Get reference to gain full access to conn. /* Get reference to gain full access to conn.
* By default, RCU read-side critical sections have access only to * By default, RCU read-side critical sections have access only to
......
...@@ -316,7 +316,7 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p) ...@@ -316,7 +316,7 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
static int static int
ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
const struct ip_vs_iphdr *iph, const struct ip_vs_iphdr *iph,
int inverse, struct ip_vs_conn_param *p) struct ip_vs_conn_param *p)
{ {
__be16 _ports[2], *pptr; __be16 _ports[2], *pptr;
struct net *net = skb_net(skb); struct net *net = skb_net(skb);
...@@ -325,7 +325,7 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, ...@@ -325,7 +325,7 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
if (pptr == NULL) if (pptr == NULL)
return 1; return 1;
if (likely(!inverse)) if (likely(!ip_vs_iph_inverse(iph)))
ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr, ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr,
pptr[0], &iph->daddr, pptr[1], p); pptr[0], &iph->daddr, pptr[1], p);
else else
...@@ -336,11 +336,11 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, ...@@ -336,11 +336,11 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
struct ip_vs_conn * struct ip_vs_conn *
ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
const struct ip_vs_iphdr *iph, int inverse) const struct ip_vs_iphdr *iph)
{ {
struct ip_vs_conn_param p; struct ip_vs_conn_param p;
if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p)) if (ip_vs_conn_fill_param_proto(af, skb, iph, &p))
return NULL; return NULL;
return ip_vs_conn_in_get(&p); return ip_vs_conn_in_get(&p);
...@@ -440,11 +440,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) ...@@ -440,11 +440,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
struct ip_vs_conn * struct ip_vs_conn *
ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
const struct ip_vs_iphdr *iph, int inverse) const struct ip_vs_iphdr *iph)
{ {
struct ip_vs_conn_param p; struct ip_vs_conn_param p;
if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p)) if (ip_vs_conn_fill_param_proto(af, skb, iph, &p))
return NULL; return NULL;
return ip_vs_conn_out_get(&p); return ip_vs_conn_out_get(&p);
......
...@@ -245,20 +245,30 @@ ip_vs_sched_persist(struct ip_vs_service *svc, ...@@ -245,20 +245,30 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
union nf_inet_addr snet; /* source network of the client, union nf_inet_addr snet; /* source network of the client,
after masking */ after masking */
const union nf_inet_addr *src_addr, *dst_addr;
if (likely(!ip_vs_iph_inverse(iph))) {
src_addr = &iph->saddr;
dst_addr = &iph->daddr;
} else {
src_addr = &iph->daddr;
dst_addr = &iph->saddr;
}
/* Mask saddr with the netmask to adjust template granularity */ /* Mask saddr with the netmask to adjust template granularity */
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6) if (svc->af == AF_INET6)
ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, ipv6_addr_prefix(&snet.in6, &src_addr->in6,
(__force __u32) svc->netmask); (__force __u32) svc->netmask);
else else
#endif #endif
snet.ip = iph->saddr.ip & svc->netmask; snet.ip = src_addr->ip & svc->netmask;
IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
"mnet %s\n", "mnet %s\n",
IP_VS_DBG_ADDR(svc->af, &iph->saddr), ntohs(src_port), IP_VS_DBG_ADDR(svc->af, src_addr), ntohs(src_port),
IP_VS_DBG_ADDR(svc->af, &iph->daddr), ntohs(dst_port), IP_VS_DBG_ADDR(svc->af, dst_addr), ntohs(dst_port),
IP_VS_DBG_ADDR(svc->af, &snet)); IP_VS_DBG_ADDR(svc->af, &snet));
/* /*
...@@ -276,7 +286,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, ...@@ -276,7 +286,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
*/ */
{ {
int protocol = iph->protocol; int protocol = iph->protocol;
const union nf_inet_addr *vaddr = &iph->daddr; const union nf_inet_addr *vaddr = dst_addr;
__be16 vport = 0; __be16 vport = 0;
if (dst_port == svc->port) { if (dst_port == svc->port) {
...@@ -366,8 +376,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc, ...@@ -366,8 +376,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
/* /*
* Create a new connection according to the template * Create a new connection according to the template
*/ */
ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr, ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, src_addr,
src_port, &iph->daddr, dst_port, &param); src_port, dst_addr, dst_port, &param);
cp = ip_vs_conn_new(&param, dest->af, &dest->addr, dport, flags, dest, cp = ip_vs_conn_new(&param, dest->af, &dest->addr, dport, flags, dest,
skb->mark); skb->mark);
...@@ -418,7 +428,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, ...@@ -418,7 +428,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_conn *cp = NULL; struct ip_vs_conn *cp = NULL;
struct ip_vs_scheduler *sched; struct ip_vs_scheduler *sched;
struct ip_vs_dest *dest; struct ip_vs_dest *dest;
__be16 _ports[2], *pptr; __be16 _ports[2], *pptr, cport, vport;
const void *caddr, *vaddr;
unsigned int flags; unsigned int flags;
*ignored = 1; *ignored = 1;
...@@ -429,14 +440,26 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, ...@@ -429,14 +440,26 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
if (pptr == NULL) if (pptr == NULL)
return NULL; return NULL;
if (likely(!ip_vs_iph_inverse(iph))) {
cport = pptr[0];
caddr = &iph->saddr;
vport = pptr[1];
vaddr = &iph->daddr;
} else {
cport = pptr[1];
caddr = &iph->daddr;
vport = pptr[0];
vaddr = &iph->saddr;
}
/* /*
* FTPDATA needs this check when using local real server. * FTPDATA needs this check when using local real server.
* Never schedule Active FTPDATA connections from real server. * Never schedule Active FTPDATA connections from real server.
* For LVS-NAT they must be already created. For other methods * For LVS-NAT they must be already created. For other methods
* with persistence the connection is created on SYN+ACK. * with persistence the connection is created on SYN+ACK.
*/ */
if (pptr[0] == FTPDATA) { if (cport == FTPDATA) {
IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off,
"Not scheduling FTPDATA"); "Not scheduling FTPDATA");
return NULL; return NULL;
} }
...@@ -444,19 +467,25 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, ...@@ -444,19 +467,25 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
/* /*
* Do not schedule replies from local real server. * Do not schedule replies from local real server.
*/ */
if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK)) {
(cp = pp->conn_in_get(svc->af, skb, iph, 1))) { iph->hdr_flags ^= IP_VS_HDR_INVERSE;
IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, cp = pp->conn_in_get(svc->af, skb, iph);
"Not scheduling reply for existing connection"); iph->hdr_flags ^= IP_VS_HDR_INVERSE;
if (cp) {
IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off,
"Not scheduling reply for existing"
" connection");
__ip_vs_conn_put(cp); __ip_vs_conn_put(cp);
return NULL; return NULL;
} }
}
/* /*
* Persistent service * Persistent service
*/ */
if (svc->flags & IP_VS_SVC_F_PERSISTENT) if (svc->flags & IP_VS_SVC_F_PERSISTENT)
return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored, return ip_vs_sched_persist(svc, skb, cport, vport, ignored,
iph); iph);
*ignored = 0; *ignored = 0;
...@@ -464,7 +493,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, ...@@ -464,7 +493,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
/* /*
* Non-persistent service * Non-persistent service
*/ */
if (!svc->fwmark && pptr[1] != svc->port) { if (!svc->fwmark && vport != svc->port) {
if (!svc->port) if (!svc->port)
pr_err("Schedule: port zero only supported " pr_err("Schedule: port zero only supported "
"in persistent services, " "in persistent services, "
...@@ -496,10 +525,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, ...@@ -496,10 +525,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_conn_param p; struct ip_vs_conn_param p;
ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
&iph->saddr, pptr[0], &iph->daddr, caddr, cport, vaddr, vport, &p);
pptr[1], &p);
cp = ip_vs_conn_new(&p, dest->af, &dest->addr, cp = ip_vs_conn_new(&p, dest->af, &dest->addr,
dest->port ? dest->port : pptr[1], dest->port ? dest->port : vport,
flags, dest, skb->mark); flags, dest, skb->mark);
if (!cp) { if (!cp) {
*ignored = -1; *ignored = -1;
...@@ -519,6 +547,17 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, ...@@ -519,6 +547,17 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
return cp; return cp;
} }
#ifdef CONFIG_SYSCTL
static inline int ip_vs_addr_is_unicast(struct net *net, int af,
union nf_inet_addr *addr)
{
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
return ipv6_addr_type(&addr->in6) & IPV6_ADDR_UNICAST;
#endif
return (inet_addr_type(net, addr->ip) == RTN_UNICAST);
}
#endif
/* /*
* Pass or drop the packet. * Pass or drop the packet.
...@@ -528,33 +567,28 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, ...@@ -528,33 +567,28 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph) struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph)
{ {
__be16 _ports[2], *pptr; __be16 _ports[2], *pptr, dport;
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
struct net *net; struct net *net;
struct netns_ipvs *ipvs; struct netns_ipvs *ipvs;
int unicast;
#endif #endif
pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
if (pptr == NULL) { if (!pptr)
return NF_DROP; return NF_DROP;
} dport = likely(!ip_vs_iph_inverse(iph)) ? pptr[1] : pptr[0];
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
net = skb_net(skb); net = skb_net(skb);
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
unicast = ipv6_addr_type(&iph->daddr.in6) & IPV6_ADDR_UNICAST;
else
#endif
unicast = (inet_addr_type(net, iph->daddr.ip) == RTN_UNICAST);
/* if it is fwmark-based service, the cache_bypass sysctl is up /* if it is fwmark-based service, the cache_bypass sysctl is up
and the destination is a non-local unicast, then create and the destination is a non-local unicast, then create
a cache_bypass connection entry */ a cache_bypass connection entry */
ipvs = net_ipvs(net); ipvs = net_ipvs(net);
if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) { if (ipvs->sysctl_cache_bypass && svc->fwmark &&
!(iph->hdr_flags & (IP_VS_HDR_INVERSE | IP_VS_HDR_ICMP)) &&
ip_vs_addr_is_unicast(net, svc->af, &iph->daddr)) {
int ret; int ret;
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
...@@ -598,9 +632,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ...@@ -598,9 +632,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
* listed in the ipvs table), pass the packets, because it is * listed in the ipvs table), pass the packets, because it is
* not ipvs job to decide to drop the packets. * not ipvs job to decide to drop the packets.
*/ */
if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) if (svc->port == FTPPORT && dport != FTPPORT)
return NF_ACCEPT; return NF_ACCEPT;
if (unlikely(ip_vs_iph_icmp(iph)))
return NF_DROP;
/* /*
* Notify the client that the destination is unreachable, and * Notify the client that the destination is unreachable, and
* release the socket buffer. * release the socket buffer.
...@@ -934,10 +971,10 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, ...@@ -934,10 +971,10 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
"Checking outgoing ICMP for"); "Checking outgoing ICMP for");
ip_vs_fill_ip4hdr(cih, &ciph); ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, true, &ciph);
ciph.len += offset;
/* The embedded headers contain source and dest in reverse order */ /* The embedded headers contain source and dest in reverse order */
cp = pp->conn_out_get(AF_INET, skb, &ciph, 1); cp = pp->conn_out_get(AF_INET, skb, &ciph);
if (!cp) if (!cp)
return NF_ACCEPT; return NF_ACCEPT;
...@@ -951,12 +988,11 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, ...@@ -951,12 +988,11 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
unsigned int hooknum, struct ip_vs_iphdr *ipvsh) unsigned int hooknum, struct ip_vs_iphdr *ipvsh)
{ {
struct icmp6hdr _icmph, *ic; struct icmp6hdr _icmph, *ic;
struct ipv6hdr _ip6h, *ip6h; /* The ip header contained within ICMP */
struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */ struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
struct ip_vs_protocol *pp; struct ip_vs_protocol *pp;
union nf_inet_addr snet; union nf_inet_addr snet;
unsigned int writable; unsigned int offset;
*related = 1; *related = 1;
ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh); ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh);
...@@ -984,31 +1020,23 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, ...@@ -984,31 +1020,23 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
ic->icmp6_type, ntohs(icmpv6_id(ic)), ic->icmp6_type, ntohs(icmpv6_id(ic)),
&ipvsh->saddr, &ipvsh->daddr); &ipvsh->saddr, &ipvsh->daddr);
/* Now find the contained IP header */ if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, ipvsh->len + sizeof(_icmph),
ciph.len = ipvsh->len + sizeof(_icmph); true, &ciph))
ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
if (ip6h == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */ return NF_ACCEPT; /* The packet looks wrong, ignore */
ciph.saddr.in6 = ip6h->saddr; /* conn_out_get() handles reverse order */
ciph.daddr.in6 = ip6h->daddr;
/* skip possible IPv6 exthdrs of contained IPv6 packet */
ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
if (ciph.protocol < 0)
return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
pp = ip_vs_proto_get(ciph.protocol); pp = ip_vs_proto_get(ciph.protocol);
if (!pp) if (!pp)
return NF_ACCEPT; return NF_ACCEPT;
/* The embedded headers contain source and dest in reverse order */ /* The embedded headers contain source and dest in reverse order */
cp = pp->conn_out_get(AF_INET6, skb, &ciph, 1); cp = pp->conn_out_get(AF_INET6, skb, &ciph);
if (!cp) if (!cp)
return NF_ACCEPT; return NF_ACCEPT;
snet.in6 = ciph.saddr.in6; snet.in6 = ciph.saddr.in6;
writable = ciph.len; offset = ciph.len;
return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp, return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp,
pp, writable, sizeof(struct ipv6hdr), pp, offset, sizeof(struct ipv6hdr),
hooknum); hooknum);
} }
#endif #endif
...@@ -1093,7 +1121,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, ...@@ -1093,7 +1121,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
{ {
struct ip_vs_protocol *pp = pd->pp; struct ip_vs_protocol *pp = pd->pp;
IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet");
if (!skb_make_writable(skb, iph->len)) if (!skb_make_writable(skb, iph->len))
goto drop; goto drop;
...@@ -1130,7 +1158,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, ...@@ -1130,7 +1158,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (ip_vs_route_me_harder(af, skb, hooknum)) if (ip_vs_route_me_harder(af, skb, hooknum))
goto drop; goto drop;
IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); IP_VS_DBG_PKT(10, af, pp, skb, iph->off, "After SNAT");
ip_vs_out_stats(cp, skb); ip_vs_out_stats(cp, skb);
ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
...@@ -1186,7 +1214,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) ...@@ -1186,7 +1214,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
if (!net_ipvs(net)->enable) if (!net_ipvs(net)->enable)
return NF_ACCEPT; return NF_ACCEPT;
ip_vs_fill_iph_skb(af, skb, &iph); ip_vs_fill_iph_skb(af, skb, false, &iph);
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) { if (af == AF_INET6) {
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
...@@ -1221,13 +1249,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) ...@@ -1221,13 +1249,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
ip_vs_defrag_user(hooknum))) ip_vs_defrag_user(hooknum)))
return NF_STOLEN; return NF_STOLEN;
ip_vs_fill_ip4hdr(skb_network_header(skb), &iph); ip_vs_fill_iph_skb(AF_INET, skb, false, &iph);
} }
/* /*
* Check if the packet belongs to an existing entry * Check if the packet belongs to an existing entry
*/ */
cp = pp->conn_out_get(af, skb, &iph, 0); cp = pp->conn_out_get(af, skb, &iph);
if (likely(cp)) if (likely(cp))
return handle_response(af, skb, pd, cp, &iph, hooknum); return handle_response(af, skb, pd, cp, &iph, hooknum);
...@@ -1272,7 +1300,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) ...@@ -1272,7 +1300,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
} }
} }
} }
IP_VS_DBG_PKT(12, af, pp, skb, 0, IP_VS_DBG_PKT(12, af, pp, skb, iph.off,
"ip_vs_out: packet continues traversal as normal"); "ip_vs_out: packet continues traversal as normal");
return NF_ACCEPT; return NF_ACCEPT;
} }
...@@ -1327,6 +1355,42 @@ ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, ...@@ -1327,6 +1355,42 @@ ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
#endif #endif
static unsigned int
ip_vs_try_to_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp,
struct ip_vs_iphdr *iph)
{
struct ip_vs_protocol *pp = pd->pp;
if (!iph->fragoffs) {
/* No (second) fragments need to enter here, as nf_defrag_ipv6
* replayed fragment zero will already have created the cp
*/
/* Schedule and create new connection entry into cpp */
if (!pp->conn_schedule(af, skb, pd, verdict, cpp, iph))
return 0;
}
if (unlikely(!*cpp)) {
/* sorry, all this trouble for a no-hit :) */
IP_VS_DBG_PKT(12, af, pp, skb, iph->off,
"ip_vs_in: packet continues traversal as normal");
if (iph->fragoffs) {
/* Fragment that couldn't be mapped to a conn entry
* is missing module nf_defrag_ipv6
*/
IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
IP_VS_DBG_PKT(7, af, pp, skb, iph->off,
"unhandled fragment");
}
*verdict = NF_ACCEPT;
return 0;
}
return 1;
}
/* /*
* Handle ICMP messages in the outside-to-inside direction (incoming). * Handle ICMP messages in the outside-to-inside direction (incoming).
* Find any that might be relevant, check against existing connections, * Find any that might be relevant, check against existing connections,
...@@ -1345,7 +1409,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) ...@@ -1345,7 +1409,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
struct ip_vs_protocol *pp; struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd; struct ip_vs_proto_data *pd;
unsigned int offset, offset2, ihl, verdict; unsigned int offset, offset2, ihl, verdict;
bool ipip; bool ipip, new_cp = false;
*related = 1; *related = 1;
...@@ -1416,16 +1480,25 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) ...@@ -1416,16 +1480,25 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
"Checking incoming ICMP for"); "Checking incoming ICMP for");
offset2 = offset; offset2 = offset;
ip_vs_fill_ip4hdr(cih, &ciph); ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, !ipip, &ciph);
ciph.len += offset;
offset = ciph.len; offset = ciph.len;
/* The embedded headers contain source and dest in reverse order. /* The embedded headers contain source and dest in reverse order.
* For IPIP this is error for request, not for reply. * For IPIP this is error for request, not for reply.
*/ */
cp = pp->conn_in_get(AF_INET, skb, &ciph, ipip ? 0 : 1); cp = pp->conn_in_get(AF_INET, skb, &ciph);
if (!cp)
if (!cp) {
int v;
if (!sysctl_schedule_icmp(net_ipvs(net)))
return NF_ACCEPT; return NF_ACCEPT;
if (!ip_vs_try_to_schedule(AF_INET, skb, pd, &v, &cp, &ciph))
return v;
new_cp = true;
}
verdict = NF_DROP; verdict = NF_DROP;
/* Ensure the checksum is correct */ /* Ensure the checksum is correct */
...@@ -1501,7 +1574,10 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) ...@@ -1501,7 +1574,10 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph); verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph);
out: out:
if (likely(!new_cp))
__ip_vs_conn_put(cp); __ip_vs_conn_put(cp);
else
ip_vs_conn_put(cp);
return verdict; return verdict;
} }
...@@ -1511,13 +1587,13 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, ...@@ -1511,13 +1587,13 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
unsigned int hooknum, struct ip_vs_iphdr *iph) unsigned int hooknum, struct ip_vs_iphdr *iph)
{ {
struct net *net = NULL; struct net *net = NULL;
struct ipv6hdr _ip6h, *ip6h;
struct icmp6hdr _icmph, *ic; struct icmp6hdr _icmph, *ic;
struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */ struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
struct ip_vs_protocol *pp; struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd; struct ip_vs_proto_data *pd;
unsigned int offs_ciph, writable, verdict; unsigned int offset, verdict;
bool new_cp = false;
*related = 1; *related = 1;
...@@ -1546,18 +1622,9 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, ...@@ -1546,18 +1622,9 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
ic->icmp6_type, ntohs(icmpv6_id(ic)), ic->icmp6_type, ntohs(icmpv6_id(ic)),
&iph->saddr, &iph->daddr); &iph->saddr, &iph->daddr);
/* Now find the contained IP header */ offset = iph->len + sizeof(_icmph);
ciph.len = iph->len + sizeof(_icmph); if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, offset, true, &ciph))
offs_ciph = ciph.len; /* Save ip header offset */ return NF_ACCEPT;
ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
if (ip6h == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
ciph.saddr.in6 = ip6h->saddr; /* conn_in_get() handles reverse order */
ciph.daddr.in6 = ip6h->daddr;
/* skip possible IPv6 exthdrs of contained IPv6 packet */
ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
if (ciph.protocol < 0)
return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
net = skb_net(skb); net = skb_net(skb);
pd = ip_vs_proto_data_get(net, ciph.protocol); pd = ip_vs_proto_data_get(net, ciph.protocol);
...@@ -1569,36 +1636,49 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, ...@@ -1569,36 +1636,49 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
if (ciph.fragoffs) if (ciph.fragoffs)
return NF_ACCEPT; return NF_ACCEPT;
IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offs_ciph, IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
"Checking incoming ICMPv6 for"); "Checking incoming ICMPv6 for");
/* The embedded headers contain source and dest in reverse order /* The embedded headers contain source and dest in reverse order
* if not from localhost * if not from localhost
*/ */
cp = pp->conn_in_get(AF_INET6, skb, &ciph, cp = pp->conn_in_get(AF_INET6, skb, &ciph);
(hooknum == NF_INET_LOCAL_OUT) ? 0 : 1);
if (!cp) if (!cp) {
int v;
if (!sysctl_schedule_icmp(net_ipvs(net)))
return NF_ACCEPT; return NF_ACCEPT;
if (!ip_vs_try_to_schedule(AF_INET6, skb, pd, &v, &cp, &ciph))
return v;
new_cp = true;
}
/* VS/TUN, VS/DR and LOCALNODE just let it go */ /* VS/TUN, VS/DR and LOCALNODE just let it go */
if ((hooknum == NF_INET_LOCAL_OUT) && if ((hooknum == NF_INET_LOCAL_OUT) &&
(IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) { (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) {
__ip_vs_conn_put(cp); verdict = NF_ACCEPT;
return NF_ACCEPT; goto out;
} }
/* do the statistics and put it back */ /* do the statistics and put it back */
ip_vs_in_stats(cp, skb); ip_vs_in_stats(cp, skb);
/* Need to mangle contained IPv6 header in ICMPv6 packet */ /* Need to mangle contained IPv6 header in ICMPv6 packet */
writable = ciph.len; offset = ciph.len;
if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol || if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol ||
IPPROTO_SCTP == ciph.protocol) IPPROTO_SCTP == ciph.protocol)
writable += 2 * sizeof(__u16); /* Also mangle ports */ offset += 2 * sizeof(__u16); /* Also mangle ports */
verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, writable, hooknum, &ciph); verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum, &ciph);
out:
if (likely(!new_cp))
__ip_vs_conn_put(cp); __ip_vs_conn_put(cp);
else
ip_vs_conn_put(cp);
return verdict; return verdict;
} }
...@@ -1633,7 +1713,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) ...@@ -1633,7 +1713,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
if (unlikely((skb->pkt_type != PACKET_HOST && if (unlikely((skb->pkt_type != PACKET_HOST &&
hooknum != NF_INET_LOCAL_OUT) || hooknum != NF_INET_LOCAL_OUT) ||
!skb_dst(skb))) { !skb_dst(skb))) {
ip_vs_fill_iph_skb(af, skb, &iph); ip_vs_fill_iph_skb(af, skb, false, &iph);
IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s" IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s"
" ignored in hook %u\n", " ignored in hook %u\n",
skb->pkt_type, iph.protocol, skb->pkt_type, iph.protocol,
...@@ -1646,7 +1726,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) ...@@ -1646,7 +1726,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
return NF_ACCEPT; return NF_ACCEPT;
ip_vs_fill_iph_skb(af, skb, &iph); ip_vs_fill_iph_skb(af, skb, false, &iph);
/* Bad... Do not break raw sockets */ /* Bad... Do not break raw sockets */
if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
...@@ -1680,13 +1760,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) ...@@ -1680,13 +1760,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
/* Protocol supported? */ /* Protocol supported? */
pd = ip_vs_proto_data_get(net, iph.protocol); pd = ip_vs_proto_data_get(net, iph.protocol);
if (unlikely(!pd)) if (unlikely(!pd)) {
/* The only way we'll see this packet again is if it's
* encapsulated, so mark it with ipvs_property=1 so we
* skip it if we're ignoring tunneled packets
*/
if (sysctl_ignore_tunneled(ipvs))
skb->ipvs_property = 1;
return NF_ACCEPT; return NF_ACCEPT;
}
pp = pd->pp; pp = pd->pp;
/* /*
* Check if the packet belongs to an existing connection entry * Check if the packet belongs to an existing connection entry
*/ */
cp = pp->conn_in_get(af, skb, &iph, 0); cp = pp->conn_in_get(af, skb, &iph);
conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
if (conn_reuse_mode && !iph.fragoffs && if (conn_reuse_mode && !iph.fragoffs &&
...@@ -1700,32 +1788,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) ...@@ -1700,32 +1788,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
cp = NULL; cp = NULL;
} }
if (unlikely(!cp) && !iph.fragoffs) { if (unlikely(!cp)) {
/* No (second) fragments need to enter here, as nf_defrag_ipv6
* replayed fragment zero will already have created the cp
*/
int v; int v;
/* Schedule and create new connection entry into &cp */ if (!ip_vs_try_to_schedule(af, skb, pd, &v, &cp, &iph))
if (!pp->conn_schedule(af, skb, pd, &v, &cp, &iph))
return v; return v;
} }
if (unlikely(!cp)) { IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet");
/* sorry, all this trouble for a no-hit :) */
IP_VS_DBG_PKT(12, af, pp, skb, 0,
"ip_vs_in: packet continues traversal as normal");
if (iph.fragoffs) {
/* Fragment that couldn't be mapped to a conn entry
* is missing module nf_defrag_ipv6
*/
IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
IP_VS_DBG_PKT(7, af, pp, skb, 0, "unhandled fragment");
}
return NF_ACCEPT;
}
IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
/* Check the server status */ /* Check the server status */
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
/* the destination server is not available */ /* the destination server is not available */
...@@ -1859,7 +1930,7 @@ ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb, ...@@ -1859,7 +1930,7 @@ ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb,
struct netns_ipvs *ipvs; struct netns_ipvs *ipvs;
struct ip_vs_iphdr iphdr; struct ip_vs_iphdr iphdr;
ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr); ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr);
if (iphdr.protocol != IPPROTO_ICMPV6) if (iphdr.protocol != IPPROTO_ICMPV6)
return NF_ACCEPT; return NF_ACCEPT;
......
...@@ -1844,6 +1844,18 @@ static struct ctl_table vs_vars[] = { ...@@ -1844,6 +1844,18 @@ static struct ctl_table vs_vars[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec, .proc_handler = proc_dointvec,
}, },
{
.procname = "schedule_icmp",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "ignore_tunneled",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
#ifdef CONFIG_IP_VS_DEBUG #ifdef CONFIG_IP_VS_DEBUG
{ {
.procname = "debug_level", .procname = "debug_level",
...@@ -3895,7 +3907,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) ...@@ -3895,7 +3907,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
tbl[idx++].data = &ipvs->sysctl_backup_only; tbl[idx++].data = &ipvs->sysctl_backup_only;
ipvs->sysctl_conn_reuse_mode = 1; ipvs->sysctl_conn_reuse_mode = 1;
tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
if (ipvs->sysctl_hdr == NULL) { if (ipvs->sysctl_hdr == NULL) {
......
...@@ -70,7 +70,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) ...@@ -70,7 +70,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
const char *dptr; const char *dptr;
int retc; int retc;
ip_vs_fill_iph_skb(p->af, skb, &iph); ip_vs_fill_iph_skb(p->af, skb, false, &iph);
/* Only useful with UDP */ /* Only useful with UDP */
if (iph.protocol != IPPROTO_UDP) if (iph.protocol != IPPROTO_UDP)
......
...@@ -42,10 +42,10 @@ struct isakmp_hdr { ...@@ -42,10 +42,10 @@ struct isakmp_hdr {
static void static void
ah_esp_conn_fill_param_proto(struct net *net, int af, ah_esp_conn_fill_param_proto(struct net *net, int af,
const struct ip_vs_iphdr *iph, int inverse, const struct ip_vs_iphdr *iph,
struct ip_vs_conn_param *p) struct ip_vs_conn_param *p)
{ {
if (likely(!inverse)) if (likely(!ip_vs_iph_inverse(iph)))
ip_vs_conn_fill_param(net, af, IPPROTO_UDP, ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
&iph->saddr, htons(PORT_ISAKMP), &iph->saddr, htons(PORT_ISAKMP),
&iph->daddr, htons(PORT_ISAKMP), p); &iph->daddr, htons(PORT_ISAKMP), p);
...@@ -57,14 +57,13 @@ ah_esp_conn_fill_param_proto(struct net *net, int af, ...@@ -57,14 +57,13 @@ ah_esp_conn_fill_param_proto(struct net *net, int af,
static struct ip_vs_conn * static struct ip_vs_conn *
ah_esp_conn_in_get(int af, const struct sk_buff *skb, ah_esp_conn_in_get(int af, const struct sk_buff *skb,
const struct ip_vs_iphdr *iph, const struct ip_vs_iphdr *iph)
int inverse)
{ {
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
struct ip_vs_conn_param p; struct ip_vs_conn_param p;
struct net *net = skb_net(skb); struct net *net = skb_net(skb);
ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); ah_esp_conn_fill_param_proto(net, af, iph, &p);
cp = ip_vs_conn_in_get(&p); cp = ip_vs_conn_in_get(&p);
if (!cp) { if (!cp) {
/* /*
...@@ -73,7 +72,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, ...@@ -73,7 +72,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb,
*/ */
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet " IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
"%s%s %s->%s\n", "%s%s %s->%s\n",
inverse ? "ICMP+" : "", ip_vs_iph_icmp(iph) ? "ICMP+" : "",
ip_vs_proto_get(iph->protocol)->name, ip_vs_proto_get(iph->protocol)->name,
IP_VS_DBG_ADDR(af, &iph->saddr), IP_VS_DBG_ADDR(af, &iph->saddr),
IP_VS_DBG_ADDR(af, &iph->daddr)); IP_VS_DBG_ADDR(af, &iph->daddr));
...@@ -85,18 +84,18 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, ...@@ -85,18 +84,18 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb,
static struct ip_vs_conn * static struct ip_vs_conn *
ah_esp_conn_out_get(int af, const struct sk_buff *skb, ah_esp_conn_out_get(int af, const struct sk_buff *skb,
const struct ip_vs_iphdr *iph, int inverse) const struct ip_vs_iphdr *iph)
{ {
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
struct ip_vs_conn_param p; struct ip_vs_conn_param p;
struct net *net = skb_net(skb); struct net *net = skb_net(skb);
ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); ah_esp_conn_fill_param_proto(net, af, iph, &p);
cp = ip_vs_conn_out_get(&p); cp = ip_vs_conn_out_get(&p);
if (!cp) { if (!cp) {
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
"%s%s %s->%s\n", "%s%s %s->%s\n",
inverse ? "ICMP+" : "", ip_vs_iph_icmp(iph) ? "ICMP+" : "",
ip_vs_proto_get(iph->protocol)->name, ip_vs_proto_get(iph->protocol)->name,
IP_VS_DBG_ADDR(af, &iph->saddr), IP_VS_DBG_ADDR(af, &iph->saddr),
IP_VS_DBG_ADDR(af, &iph->daddr)); IP_VS_DBG_ADDR(af, &iph->daddr));
......
...@@ -18,16 +18,24 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, ...@@ -18,16 +18,24 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
struct netns_ipvs *ipvs; struct netns_ipvs *ipvs;
sctp_chunkhdr_t _schunkh, *sch; sctp_chunkhdr_t _schunkh, *sch;
sctp_sctphdr_t *sh, _sctph; sctp_sctphdr_t *sh, _sctph;
__be16 _ports[2], *ports = NULL;
if (likely(!ip_vs_iph_icmp(iph))) {
sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
if (sh == NULL) { if (sh) {
*verdict = NF_DROP; sch = skb_header_pointer(
return 0; skb, iph->len + sizeof(sctp_sctphdr_t),
sizeof(_schunkh), &_schunkh);
if (sch && (sch->type == SCTP_CID_INIT ||
sysctl_sloppy_sctp(ipvs)))
ports = &sh->source;
}
} else {
ports = skb_header_pointer(
skb, iph->len, sizeof(_ports), &_ports);
} }
sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t), if (!ports) {
sizeof(_schunkh), &_schunkh);
if (sch == NULL) {
*verdict = NF_DROP; *verdict = NF_DROP;
return 0; return 0;
} }
...@@ -35,9 +43,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, ...@@ -35,9 +43,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
net = skb_net(skb); net = skb_net(skb);
ipvs = net_ipvs(net); ipvs = net_ipvs(net);
rcu_read_lock(); rcu_read_lock();
if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) && if (likely(!ip_vs_iph_inverse(iph)))
(svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
&iph->daddr, sh->dest))) { &iph->daddr, ports[1]);
else
svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
&iph->saddr, ports[0]);
if (svc) {
int ignored; int ignored;
if (ip_vs_todrop(ipvs)) { if (ip_vs_todrop(ipvs)) {
......
...@@ -40,19 +40,43 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, ...@@ -40,19 +40,43 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
struct ip_vs_service *svc; struct ip_vs_service *svc;
struct tcphdr _tcph, *th; struct tcphdr _tcph, *th;
struct netns_ipvs *ipvs; struct netns_ipvs *ipvs;
__be16 _ports[2], *ports = NULL;
net = skb_net(skb);
ipvs = net_ipvs(net);
/* In the event of icmp, we're only guaranteed to have the first 8
* bytes of the transport header, so we only check the rest of the
* TCP packet for non-ICMP packets
*/
if (likely(!ip_vs_iph_icmp(iph))) {
th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
if (th == NULL) { if (th) {
if (th->rst || !(sysctl_sloppy_tcp(ipvs) || th->syn))
return 1;
ports = &th->source;
}
} else {
ports = skb_header_pointer(
skb, iph->len, sizeof(_ports), &_ports);
}
if (!ports) {
*verdict = NF_DROP; *verdict = NF_DROP;
return 0; return 0;
} }
net = skb_net(skb);
ipvs = net_ipvs(net);
/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
rcu_read_lock(); rcu_read_lock();
if ((th->syn || sysctl_sloppy_tcp(ipvs)) && !th->rst &&
(svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, if (likely(!ip_vs_iph_inverse(iph)))
&iph->daddr, th->dest))) { svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
&iph->daddr, ports[1]);
else
svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
&iph->saddr, ports[0]);
if (svc) {
int ignored; int ignored;
if (ip_vs_todrop(ipvs)) { if (ip_vs_todrop(ipvs)) {
......
...@@ -36,17 +36,32 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, ...@@ -36,17 +36,32 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
struct net *net; struct net *net;
struct ip_vs_service *svc; struct ip_vs_service *svc;
struct udphdr _udph, *uh; struct udphdr _udph, *uh;
__be16 _ports[2], *ports = NULL;
if (likely(!ip_vs_iph_icmp(iph))) {
/* IPv6 fragments, only first fragment will hit this */ /* IPv6 fragments, only first fragment will hit this */
uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
if (uh == NULL) { if (uh)
ports = &uh->source;
} else {
ports = skb_header_pointer(
skb, iph->len, sizeof(_ports), &_ports);
}
if (!ports) {
*verdict = NF_DROP; *verdict = NF_DROP;
return 0; return 0;
} }
net = skb_net(skb); net = skb_net(skb);
rcu_read_lock(); rcu_read_lock();
if (likely(!ip_vs_iph_inverse(iph)))
svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
&iph->daddr, uh->dest); &iph->daddr, ports[1]);
else
svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
&iph->saddr, ports[0]);
if (svc) { if (svc) {
int ignored; int ignored;
......
...@@ -280,35 +280,29 @@ static int ip_vs_sh_dest_changed(struct ip_vs_service *svc, ...@@ -280,35 +280,29 @@ static int ip_vs_sh_dest_changed(struct ip_vs_service *svc,
static inline __be16 static inline __be16
ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph) ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
{ {
__be16 port; __be16 _ports[2], *ports;
struct tcphdr _tcph, *th;
struct udphdr _udph, *uh;
sctp_sctphdr_t _sctph, *sh;
/* At this point we know that we have a valid packet of some kind.
* Because ICMP packets are only guaranteed to have the first 8
* bytes, let's just grab the ports. Fortunately they're in the
* same position for all three of the protocols we care about.
*/
switch (iph->protocol) { switch (iph->protocol) {
case IPPROTO_TCP: case IPPROTO_TCP:
th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
if (unlikely(th == NULL))
return 0;
port = th->source;
break;
case IPPROTO_UDP: case IPPROTO_UDP:
uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
if (unlikely(uh == NULL))
return 0;
port = uh->source;
break;
case IPPROTO_SCTP: case IPPROTO_SCTP:
sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); ports = skb_header_pointer(skb, iph->len, sizeof(_ports),
if (unlikely(sh == NULL)) &_ports);
if (unlikely(!ports))
return 0; return 0;
port = sh->source;
break; if (likely(!ip_vs_iph_inverse(iph)))
return ports[0];
else
return ports[1];
default: default:
port = 0; return 0;
} }
return port;
} }
...@@ -322,6 +316,9 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, ...@@ -322,6 +316,9 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
struct ip_vs_dest *dest; struct ip_vs_dest *dest;
struct ip_vs_sh_state *s; struct ip_vs_sh_state *s;
__be16 port = 0; __be16 port = 0;
const union nf_inet_addr *hash_addr;
hash_addr = ip_vs_iph_inverse(iph) ? &iph->daddr : &iph->saddr;
IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
...@@ -331,9 +328,9 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, ...@@ -331,9 +328,9 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
s = (struct ip_vs_sh_state *) svc->sched_data; s = (struct ip_vs_sh_state *) svc->sched_data;
if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK) if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK)
dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port); dest = ip_vs_sh_get_fallback(svc, s, hash_addr, port);
else else
dest = ip_vs_sh_get(svc, s, &iph->saddr, port); dest = ip_vs_sh_get(svc, s, hash_addr, port);
if (!dest) { if (!dest) {
ip_vs_scheduler_err(svc, "no destination available"); ip_vs_scheduler_err(svc, "no destination available");
...@@ -341,7 +338,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, ...@@ -341,7 +338,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
} }
IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n", IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n",
IP_VS_DBG_ADDR(svc->af, &iph->saddr), IP_VS_DBG_ADDR(svc->af, hash_addr),
IP_VS_DBG_ADDR(dest->af, &dest->addr), IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port)); ntohs(dest->port));
......
...@@ -224,7 +224,7 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode, ...@@ -224,7 +224,7 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
if (!skb->dev) if (!skb->dev)
skb->dev = net->loopback_dev; skb->dev = net->loopback_dev;
/* only send ICMP too big on first fragment */ /* only send ICMP too big on first fragment */
if (!ipvsh->fragoffs) if (!ipvsh->fragoffs && !ip_vs_iph_icmp(ipvsh))
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
IP_VS_DBG(1, "frag needed for %pI6c\n", IP_VS_DBG(1, "frag needed for %pI6c\n",
&ipv6_hdr(skb)->saddr); &ipv6_hdr(skb)->saddr);
...@@ -242,7 +242,8 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode, ...@@ -242,7 +242,8 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
return true; return true;
if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) && if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) &&
skb->len > mtu && !skb_is_gso(skb))) { skb->len > mtu && !skb_is_gso(skb) &&
!ip_vs_iph_icmp(ipvsh))) {
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu)); htonl(mtu));
IP_VS_DBG(1, "frag needed for %pI4\n", IP_VS_DBG(1, "frag needed for %pI4\n",
...@@ -656,10 +657,12 @@ int ...@@ -656,10 +657,12 @@ int
ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{ {
struct ipv6hdr *iph = ipv6_hdr(skb);
EnterFunction(10); EnterFunction(10);
rcu_read_lock(); rcu_read_lock();
if (__ip_vs_get_out_rt_v6(cp->af, skb, NULL, &ipvsh->daddr.in6, NULL, if (__ip_vs_get_out_rt_v6(cp->af, skb, NULL, &iph->daddr, NULL,
ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0) ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
goto tx_error; goto tx_error;
...@@ -723,7 +726,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -723,7 +726,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct nf_conn *ct = nf_ct_get(skb, &ctinfo); struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct)) { if (ct && !nf_ct_is_untracked(ct)) {
IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0, IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, ipvsh->off,
"ip_vs_nat_xmit(): " "ip_vs_nat_xmit(): "
"stopping DNAT to local address"); "stopping DNAT to local address");
goto tx_error; goto tx_error;
...@@ -733,8 +736,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -733,8 +736,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* From world but DNAT to loopback address? */ /* From world but DNAT to loopback address? */
if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) { if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, ipvsh->off,
"stopping DNAT to loopback address"); "ip_vs_nat_xmit(): stopping DNAT to loopback "
"address");
goto tx_error; goto tx_error;
} }
...@@ -751,7 +755,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -751,7 +755,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_hdr(skb)->daddr = cp->daddr.ip; ip_hdr(skb)->daddr = cp->daddr.ip;
ip_send_check(ip_hdr(skb)); ip_send_check(ip_hdr(skb));
IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT"); IP_VS_DBG_PKT(10, AF_INET, pp, skb, ipvsh->off, "After DNAT");
/* FIXME: when application helper enlarges the packet and the length /* FIXME: when application helper enlarges the packet and the length
is larger than the MTU of outgoing device, there will be still is larger than the MTU of outgoing device, there will be still
...@@ -812,7 +816,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -812,7 +816,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct nf_conn *ct = nf_ct_get(skb, &ctinfo); struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct)) { if (ct && !nf_ct_is_untracked(ct)) {
IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0, IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, ipvsh->off,
"ip_vs_nat_xmit_v6(): " "ip_vs_nat_xmit_v6(): "
"stopping DNAT to local address"); "stopping DNAT to local address");
goto tx_error; goto tx_error;
...@@ -823,7 +827,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -823,7 +827,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* From world but DNAT to loopback address? */ /* From world but DNAT to loopback address? */
if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) { ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) {
IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, ipvsh->off,
"ip_vs_nat_xmit_v6(): " "ip_vs_nat_xmit_v6(): "
"stopping DNAT to loopback address"); "stopping DNAT to loopback address");
goto tx_error; goto tx_error;
...@@ -841,7 +845,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -841,7 +845,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error; goto tx_error;
ipv6_hdr(skb)->daddr = cp->daddr.in6; ipv6_hdr(skb)->daddr = cp->daddr.in6;
IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT"); IP_VS_DBG_PKT(10, AF_INET6, pp, skb, ipvsh->off, "After DNAT");
/* FIXME: when application helper enlarges the packet and the length /* FIXME: when application helper enlarges the packet and the length
is larger than the MTU of outgoing device, there will be still is larger than the MTU of outgoing device, there will be still
......
...@@ -67,7 +67,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) ...@@ -67,7 +67,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
goto out; goto out;
} }
ip_vs_fill_iph_skb(family, skb, &iph); ip_vs_fill_iph_skb(family, skb, true, &iph);
if (data->bitmask & XT_IPVS_PROTO) if (data->bitmask & XT_IPVS_PROTO)
if ((iph.protocol == data->l4proto) ^ if ((iph.protocol == data->l4proto) ^
...@@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) ...@@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
/* /*
* Check if the packet belongs to an existing entry * Check if the packet belongs to an existing entry
*/ */
cp = pp->conn_out_get(family, skb, &iph, 1 /* inverse */); cp = pp->conn_out_get(family, skb, &iph);
if (unlikely(cp == NULL)) { if (unlikely(cp == NULL)) {
match = false; match = false;
goto out; goto out;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment