Commit c6c96c18 authored by Alexander Frolkin's avatar Alexander Frolkin Committed by Simon Horman

ipvs: sloppy TCP and SCTP

This adds support for sloppy TCP and SCTP modes to IPVS.

When enabled (sysctls net.ipv4.vs.sloppy_tcp and
net.ipv4.vs.sloppy_sctp), allows IPVS to create connection state on any
packet, not just a TCP SYN (or SCTP INIT).

This allows connections to fail over from one IPVS director to another
mid-flight.
Signed-off-by: default avatarAlexander Frolkin <avf@eldamar.org.uk>
Signed-off-by: default avatarJulian Anastasov <ja@ssi.bg>
Signed-off-by: default avatarSimon Horman <horms@verge.net.au>
parent bba54de5
...@@ -978,6 +978,8 @@ struct netns_ipvs { ...@@ -978,6 +978,8 @@ struct netns_ipvs {
int sysctl_sync_sock_size; int sysctl_sync_sock_size;
int sysctl_cache_bypass; int sysctl_cache_bypass;
int sysctl_expire_nodest_conn; int sysctl_expire_nodest_conn;
int sysctl_sloppy_tcp;
int sysctl_sloppy_sctp;
int sysctl_expire_quiescent_template; int sysctl_expire_quiescent_template;
int sysctl_sync_threshold[2]; int sysctl_sync_threshold[2];
unsigned int sysctl_sync_refresh_period; unsigned int sysctl_sync_refresh_period;
...@@ -1020,6 +1022,8 @@ struct netns_ipvs { ...@@ -1020,6 +1022,8 @@ struct netns_ipvs {
#define DEFAULT_SYNC_THRESHOLD 3 #define DEFAULT_SYNC_THRESHOLD 3
#define DEFAULT_SYNC_PERIOD 50 #define DEFAULT_SYNC_PERIOD 50
#define DEFAULT_SYNC_VER 1 #define DEFAULT_SYNC_VER 1
#define DEFAULT_SLOPPY_TCP 0
#define DEFAULT_SLOPPY_SCTP 0
#define DEFAULT_SYNC_REFRESH_PERIOD (0U * HZ) #define DEFAULT_SYNC_REFRESH_PERIOD (0U * HZ)
#define DEFAULT_SYNC_RETRIES 0 #define DEFAULT_SYNC_RETRIES 0
#define IPVS_SYNC_WAKEUP_RATE 8 #define IPVS_SYNC_WAKEUP_RATE 8
...@@ -1056,6 +1060,16 @@ static inline int sysctl_sync_ver(struct netns_ipvs *ipvs) ...@@ -1056,6 +1060,16 @@ static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
return ipvs->sysctl_sync_ver; return ipvs->sysctl_sync_ver;
} }
static inline int sysctl_sloppy_tcp(struct netns_ipvs *ipvs)
{
return ipvs->sysctl_sloppy_tcp;
}
static inline int sysctl_sloppy_sctp(struct netns_ipvs *ipvs)
{
return ipvs->sysctl_sloppy_sctp;
}
static inline int sysctl_sync_ports(struct netns_ipvs *ipvs) static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
{ {
return ACCESS_ONCE(ipvs->sysctl_sync_ports); return ACCESS_ONCE(ipvs->sysctl_sync_ports);
...@@ -1109,6 +1123,16 @@ static inline int sysctl_sync_ver(struct netns_ipvs *ipvs) ...@@ -1109,6 +1123,16 @@ static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
return DEFAULT_SYNC_VER; return DEFAULT_SYNC_VER;
} }
static inline int sysctl_sloppy_tcp(struct netns_ipvs *ipvs)
{
return DEFAULT_SLOPPY_TCP;
}
static inline int sysctl_sloppy_sctp(struct netns_ipvs *ipvs)
{
return DEFAULT_SLOPPY_SCTP;
}
static inline int sysctl_sync_ports(struct netns_ipvs *ipvs) static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
{ {
return 1; return 1;
......
...@@ -1738,6 +1738,18 @@ static struct ctl_table vs_vars[] = { ...@@ -1738,6 +1738,18 @@ static struct ctl_table vs_vars[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec, .proc_handler = proc_dointvec,
}, },
{
.procname = "sloppy_tcp",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "sloppy_sctp",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{ {
.procname = "expire_quiescent_template", .procname = "expire_quiescent_template",
.maxlen = sizeof(int), .maxlen = sizeof(int),
...@@ -3723,6 +3735,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) ...@@ -3723,6 +3735,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
tbl[idx++].data = &ipvs->sysctl_sync_sock_size; tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
tbl[idx++].data = &ipvs->sysctl_cache_bypass; tbl[idx++].data = &ipvs->sysctl_cache_bypass;
tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
tbl[idx++].data = &ipvs->sysctl_sloppy_tcp;
tbl[idx++].data = &ipvs->sysctl_sloppy_sctp;
tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
......
...@@ -15,6 +15,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, ...@@ -15,6 +15,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
{ {
struct net *net; struct net *net;
struct ip_vs_service *svc; struct ip_vs_service *svc;
struct netns_ipvs *ipvs;
sctp_chunkhdr_t _schunkh, *sch; sctp_chunkhdr_t _schunkh, *sch;
sctp_sctphdr_t *sh, _sctph; sctp_sctphdr_t *sh, _sctph;
...@@ -27,13 +28,14 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, ...@@ -27,13 +28,14 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (sch == NULL) if (sch == NULL)
return 0; return 0;
net = skb_net(skb); net = skb_net(skb);
ipvs = net_ipvs(net);
rcu_read_lock(); rcu_read_lock();
if ((sch->type == SCTP_CID_INIT) && if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) &&
(svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
&iph->daddr, sh->dest))) { &iph->daddr, sh->dest))) {
int ignored; int ignored;
if (ip_vs_todrop(net_ipvs(net))) { if (ip_vs_todrop(ipvs)) {
/* /*
* It seems that we are very loaded. * It seems that we are very loaded.
* We have to drop this packet :( * We have to drop this packet :(
...@@ -232,21 +234,21 @@ static struct ipvs_sctp_nextstate ...@@ -232,21 +234,21 @@ static struct ipvs_sctp_nextstate
* STATE : IP_VS_SCTP_S_NONE * STATE : IP_VS_SCTP_S_NONE
*/ */
/*next state *//*event */ /*next state *//*event */
{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, {{IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_CLI */ },
{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
{IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
{IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ },
......
...@@ -39,6 +39,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, ...@@ -39,6 +39,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
struct net *net; struct net *net;
struct ip_vs_service *svc; struct ip_vs_service *svc;
struct tcphdr _tcph, *th; struct tcphdr _tcph, *th;
struct netns_ipvs *ipvs;
th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
if (th == NULL) { if (th == NULL) {
...@@ -46,14 +47,15 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, ...@@ -46,14 +47,15 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
return 0; return 0;
} }
net = skb_net(skb); net = skb_net(skb);
ipvs = net_ipvs(net);
/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
rcu_read_lock(); rcu_read_lock();
if (th->syn && if ((th->syn || sysctl_sloppy_tcp(ipvs)) && !th->rst &&
(svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
&iph->daddr, th->dest))) { &iph->daddr, th->dest))) {
int ignored; int ignored;
if (ip_vs_todrop(net_ipvs(net))) { if (ip_vs_todrop(ipvs)) {
/* /*
* It seems that we are very loaded. * It seems that we are very loaded.
* We have to drop this packet :( * We have to drop this packet :(
...@@ -401,7 +403,7 @@ static struct tcp_states_t tcp_states [] = { ...@@ -401,7 +403,7 @@ static struct tcp_states_t tcp_states [] = {
/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }}, /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }}, /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }}, /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
/* OUTPUT */ /* OUTPUT */
...@@ -415,7 +417,7 @@ static struct tcp_states_t tcp_states [] = { ...@@ -415,7 +417,7 @@ static struct tcp_states_t tcp_states [] = {
/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }}, /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }}, /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
}; };
...@@ -424,7 +426,7 @@ static struct tcp_states_t tcp_states_dos [] = { ...@@ -424,7 +426,7 @@ static struct tcp_states_t tcp_states_dos [] = {
/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }}, /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }}, /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
/*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }}, /*ack*/ {{sES, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
/* OUTPUT */ /* OUTPUT */
...@@ -438,7 +440,7 @@ static struct tcp_states_t tcp_states_dos [] = { ...@@ -438,7 +440,7 @@ static struct tcp_states_t tcp_states_dos [] = {
/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
/*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }}, /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }}, /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment