Commit 0f4389e9 authored by Arnaldo Carvalho de Melo's avatar Arnaldo Carvalho de Melo Committed by Linus Torvalds

[TCP]: Fix excessive stack usage resulting in OOPS with 4KSTACKS.

Various routines were putting a full struct tcp_sock on
the local stack.  What they really wanted was a subset
of this information when doing TCP options processing
when we only have a mini-socket (for example in SYN-RECVD
and TIME_WAIT states).

Therefore pull out the needed information into a sub-struct
and use that in the TCP options processing routines.
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@conectiva.com.br>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent f7b4ca43
......@@ -210,6 +210,27 @@ enum tcp_congestion_algo {
TCP_BIC,
};
struct tcp_options_received {
/* PAWS/RTTM data */
long ts_recent_stamp;/* Time we stored ts_recent (for aging) */
__u32 ts_recent; /* Time stamp to echo next */
__u32 rcv_tsval; /* Time stamp value */
__u32 rcv_tsecr; /* Time stamp echo reply */
char saw_tstamp; /* Saw TIMESTAMP on last packet */
char tstamp_ok; /* TIMESTAMP seen on SYN packet */
char sack_ok; /* SACK seen on SYN packet */
char wscale_ok; /* Wscale seen on SYN packet */
__u8 snd_wscale; /* Window scaling received from sender */
__u8 rcv_wscale; /* Window scaling to send to receiver */
/* SACKs data */
__u8 dsack; /* D-SACK is scheduled */
__u8 eff_sacks; /* Size of SACK array to send with next packet */
__u8 num_sacks; /* Number of SACK blocks */
__u8 __pad;
__u16 user_mss; /* mss requested by user in ioctl */
__u16 mss_clamp; /* Maximal mss, negotiated at connection setup */
};
struct tcp_sock {
/* inet_sock has to be the first member of tcp_sock */
struct inet_sock inet;
......@@ -262,22 +283,19 @@ struct tcp_sock {
__u32 pmtu_cookie; /* Last pmtu seen by socket */
__u32 mss_cache; /* Cached effective mss, not including SACKS */
__u16 mss_cache_std; /* Like mss_cache, but without TSO */
__u16 mss_clamp; /* Maximal mss, negotiated at connection setup */
__u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */
__u16 ext2_header_len;/* Options depending on route */
__u8 ca_state; /* State of fast-retransmit machine */
__u8 retransmits; /* Number of unrecovered RTO timeouts. */
__u32 frto_highmark; /* snd_nxt when RTO occurred */
__u8 reordering; /* Packet reordering metric. */
__u8 frto_counter; /* Number of new acks after RTO */
__u32 frto_highmark; /* snd_nxt when RTO occurred */
__u8 adv_cong; /* Using Vegas, Westwood, or BIC */
__u8 defer_accept; /* User waits for some data after accept() */
/* one byte hole, try to pack */
/* RTT measurement */
__u8 backoff; /* backoff */
__u32 srtt; /* smoothed round trip time << 3 */
__u32 mdev; /* medium deviation */
__u32 mdev_max; /* maximal mdev for the last rtt period */
......@@ -288,7 +306,15 @@ struct tcp_sock {
__u32 packets_out; /* Packets which are "in flight" */
__u32 left_out; /* Packets which leaved network */
__u32 retrans_out; /* Retransmitted packets out */
__u8 backoff; /* backoff */
/*
* Options received (usually on last packet, some only on SYN packets).
*/
__u8 nonagle; /* Disable Nagle algorithm? */
__u8 keepalive_probes; /* num of allowed keep alive probes */
__u8 probes_out; /* unanswered 0 window probes */
struct tcp_options_received rx_opt;
/*
* Slow start and congestion control (see also Nagle, and Karn & Partridge)
......@@ -314,40 +340,19 @@ struct tcp_sock {
__u32 write_seq; /* Tail(+1) of data held in tcp send buffer */
__u32 pushed_seq; /* Last pushed seq, required to talk to windows */
__u32 copied_seq; /* Head of yet unread data */
/*
* Options received (usually on last packet, some only on SYN packets).
*/
char tstamp_ok, /* TIMESTAMP seen on SYN packet */
wscale_ok, /* Wscale seen on SYN packet */
sack_ok; /* SACK seen on SYN packet */
char saw_tstamp; /* Saw TIMESTAMP on last packet */
__u8 snd_wscale; /* Window scaling received from sender */
__u8 rcv_wscale; /* Window scaling to send to receiver */
__u8 nonagle; /* Disable Nagle algorithm? */
__u8 keepalive_probes; /* num of allowed keep alive probes */
/* PAWS/RTTM data */
__u32 rcv_tsval; /* Time stamp value */
__u32 rcv_tsecr; /* Time stamp echo reply */
__u32 ts_recent; /* Time stamp to echo next */
long ts_recent_stamp;/* Time we stored ts_recent (for aging) */
/* SACKs data */
__u16 user_mss; /* mss requested by user in ioctl */
__u8 dsack; /* D-SACK is scheduled */
__u8 eff_sacks; /* Size of SACK array to send with next packet */
struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
__u32 window_clamp; /* Maximal window to advertise */
__u32 rcv_ssthresh; /* Current window clamp */
__u8 probes_out; /* unanswered 0 window probes */
__u8 num_sacks; /* Number of SACK blocks */
__u16 advmss; /* Advertised MSS */
__u8 syn_retries; /* num of allowed syn retries */
__u8 ecn_flags; /* ECN status bits. */
__u16 prior_ssthresh; /* ssthresh saved at recovery start */
__u16 __pad1;
__u32 lost_out; /* Lost packets */
__u32 sacked_out; /* SACK'd packets */
__u32 fackets_out; /* FACK'd packets */
......
......@@ -832,9 +832,9 @@ static __inline__ void tcp_delack_init(struct tcp_sock *tp)
memset(&tp->ack, 0, sizeof(tp->ack));
}
static inline void tcp_clear_options(struct tcp_sock *tp)
static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
{
tp->tstamp_ok = tp->sack_ok = tp->wscale_ok = tp->snd_wscale = 0;
rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
}
enum tcp_tw_status
......@@ -883,7 +883,7 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
extern int tcp_listen_start(struct sock *sk);
extern void tcp_parse_options(struct sk_buff *skb,
struct tcp_sock *tp,
struct tcp_options_received *opt_rx,
int estab);
/*
......@@ -1071,7 +1071,7 @@ static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
static __inline__ void tcp_fast_path_on(struct tcp_sock *tp)
{
__tcp_fast_path_on(tp, tp->snd_wnd>>tp->snd_wscale);
__tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
}
static inline void tcp_fast_path_check(struct sock *sk, struct tcp_sock *tp)
......@@ -1323,7 +1323,7 @@ static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp)
static inline void tcp_sync_left_out(struct tcp_sock *tp)
{
if (tp->sack_ok &&
if (tp->rx_opt.sack_ok &&
(tp->sacked_out >= tp->packets_out - tp->lost_out))
tp->sacked_out = tp->packets_out - tp->lost_out;
tp->left_out = tp->sacked_out + tp->lost_out;
......@@ -1649,39 +1649,39 @@ static __inline__ void tcp_done(struct sock *sk)
tcp_destroy_sock(sk);
}
static __inline__ void tcp_sack_reset(struct tcp_sock *tp)
static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt)
{
tp->dsack = 0;
tp->eff_sacks = 0;
tp->num_sacks = 0;
rx_opt->dsack = 0;
rx_opt->eff_sacks = 0;
rx_opt->num_sacks = 0;
}
static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp, __u32 tstamp)
{
if (tp->tstamp_ok) {
if (tp->rx_opt.tstamp_ok) {
*ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
(TCPOPT_TIMESTAMP << 8) |
TCPOLEN_TIMESTAMP);
*ptr++ = htonl(tstamp);
*ptr++ = htonl(tp->ts_recent);
*ptr++ = htonl(tp->rx_opt.ts_recent);
}
if (tp->eff_sacks) {
struct tcp_sack_block *sp = tp->dsack ? tp->duplicate_sack : tp->selective_acks;
if (tp->rx_opt.eff_sacks) {
struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks;
int this_sack;
*ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
(TCPOPT_SACK << 8) |
(TCPOLEN_SACK_BASE +
(tp->eff_sacks * TCPOLEN_SACK_PERBLOCK)));
for(this_sack = 0; this_sack < tp->eff_sacks; this_sack++) {
(tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)));
for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
*ptr++ = htonl(sp[this_sack].start_seq);
*ptr++ = htonl(sp[this_sack].end_seq);
}
if (tp->dsack) {
tp->dsack = 0;
tp->eff_sacks--;
if (tp->rx_opt.dsack) {
tp->rx_opt.dsack = 0;
tp->rx_opt.eff_sacks--;
}
}
}
......@@ -1827,17 +1827,17 @@ static inline void tcp_synq_drop(struct sock *sk, struct open_request *req,
}
static __inline__ void tcp_openreq_init(struct open_request *req,
struct tcp_sock *tp,
struct tcp_options_received *rx_opt,
struct sk_buff *skb)
{
req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
req->rcv_isn = TCP_SKB_CB(skb)->seq;
req->mss = tp->mss_clamp;
req->ts_recent = tp->saw_tstamp ? tp->rcv_tsval : 0;
req->tstamp_ok = tp->tstamp_ok;
req->sack_ok = tp->sack_ok;
req->snd_wscale = tp->snd_wscale;
req->wscale_ok = tp->wscale_ok;
req->mss = rx_opt->mss_clamp;
req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
req->tstamp_ok = rx_opt->tstamp_ok;
req->sack_ok = rx_opt->sack_ok;
req->snd_wscale = rx_opt->snd_wscale;
req->wscale_ok = rx_opt->wscale_ok;
req->acked = 0;
req->ecn_ok = 0;
req->rmt_port = skb->h.th->source;
......@@ -1886,11 +1886,11 @@ static inline int tcp_fin_time(const struct tcp_sock *tp)
return fin_timeout;
}
static inline int tcp_paws_check(const struct tcp_sock *tp, int rst)
static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int rst)
{
if ((s32)(tp->rcv_tsval - tp->ts_recent) >= 0)
if ((s32)(rx_opt->rcv_tsval - rx_opt->ts_recent) >= 0)
return 0;
if (xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_24DAYS)
if (xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)
return 0;
/* RST segments are not recommended to carry timestamp,
......@@ -1905,7 +1905,7 @@ static inline int tcp_paws_check(const struct tcp_sock *tp, int rst)
However, we can relax time bounds for RST segments to MSL.
*/
if (rst && xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_MSL)
if (rst && xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
return 0;
return 1;
}
......
......@@ -1829,8 +1829,8 @@ int tcp_disconnect(struct sock *sk, int flags)
tcp_clear_retrans(tp);
tcp_delack_init(tp);
sk->sk_send_head = NULL;
tp->saw_tstamp = 0;
tcp_sack_reset(tp);
tp->rx_opt.saw_tstamp = 0;
tcp_sack_reset(&tp->rx_opt);
__sk_dst_reset(sk);
BUG_TRAP(!inet->num || tp->bind_hash);
......@@ -1969,7 +1969,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
err = -EINVAL;
break;
}
tp->user_mss = val;
tp->rx_opt.user_mss = val;
break;
case TCP_NODELAY:
......@@ -2119,14 +2119,14 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_probes = tp->probes_out;
info->tcpi_backoff = tp->backoff;
if (tp->tstamp_ok)
if (tp->rx_opt.tstamp_ok)
info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
if (tp->sack_ok)
if (tp->rx_opt.sack_ok)
info->tcpi_options |= TCPI_OPT_SACK;
if (tp->wscale_ok) {
if (tp->rx_opt.wscale_ok) {
info->tcpi_options |= TCPI_OPT_WSCALE;
info->tcpi_snd_wscale = tp->snd_wscale;
info->tcpi_rcv_wscale = tp->rcv_wscale;
info->tcpi_snd_wscale = tp->rx_opt.snd_wscale;
info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale;
}
if (tp->ecn_flags&TCP_ECN_OK)
......@@ -2186,7 +2186,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
case TCP_MAXSEG:
val = tp->mss_cache_std;
if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
val = tp->user_mss;
val = tp->rx_opt.user_mss;
break;
case TCP_NODELAY:
val = !!(tp->nonagle&TCP_NAGLE_OFF);
......
This diff is collapsed.
......@@ -591,8 +591,8 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
if ((tp->write_seq =
tw->tw_snd_nxt + 65535 + 2) == 0)
tp->write_seq = 1;
tp->ts_recent = tw->tw_ts_recent;
tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
tp->rx_opt.ts_recent = tw->tw_ts_recent;
tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
sock_hold(sk2);
goto unique;
} else
......@@ -783,25 +783,25 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->saddr = rt->rt_src;
inet->rcv_saddr = inet->saddr;
if (tp->ts_recent_stamp && inet->daddr != daddr) {
if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
/* Reset inherited state */
tp->ts_recent = 0;
tp->ts_recent_stamp = 0;
tp->write_seq = 0;
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
tp->write_seq = 0;
}
if (sysctl_tcp_tw_recycle &&
!tp->ts_recent_stamp && rt->rt_dst == daddr) {
!tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
struct inet_peer *peer = rt_get_peer(rt);
/* VJ's idea. We save last timestamp seen from
* the destination in peer table, when entering state TIME-WAIT
* and initialize ts_recent from it, when trying new connection.
* and initialize rx_opt.ts_recent from it, when trying new connection.
*/
if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
tp->ts_recent_stamp = peer->tcp_ts_stamp;
tp->ts_recent = peer->tcp_ts;
tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
tp->rx_opt.ts_recent = peer->tcp_ts;
}
}
......@@ -812,7 +812,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (inet->opt)
tp->ext_header_len = inet->opt->optlen;
tp->mss_clamp = 536;
tp->rx_opt.mss_clamp = 536;
/* Socket identity is still unknown (sport may be zero).
* However we set state to SYN-SENT and not releasing socket
......@@ -1393,7 +1393,7 @@ struct or_calltable or_ipv4 = {
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock tp;
struct tcp_options_received tmp_opt;
struct open_request *req;
__u32 saddr = skb->nh.iph->saddr;
__u32 daddr = skb->nh.iph->daddr;
......@@ -1435,29 +1435,29 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (!req)
goto drop;
tcp_clear_options(&tp);
tp.mss_clamp = 536;
tp.user_mss = tcp_sk(sk)->user_mss;
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = 536;
tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
tcp_parse_options(skb, &tp, 0);
tcp_parse_options(skb, &tmp_opt, 0);
if (want_cookie) {
tcp_clear_options(&tp);
tp.saw_tstamp = 0;
tcp_clear_options(&tmp_opt);
tmp_opt.saw_tstamp = 0;
}
if (tp.saw_tstamp && !tp.rcv_tsval) {
if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
/* Some OSes (unknown ones, but I see them on web server, which
* contains information interesting only for windows'
* users) do not send their stamp in SYN. It is easy case.
* We simply do not advertise TS support.
*/
tp.saw_tstamp = 0;
tp.tstamp_ok = 0;
tmp_opt.saw_tstamp = 0;
tmp_opt.tstamp_ok = 0;
}
tp.tstamp_ok = tp.saw_tstamp;
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tp, skb);
tcp_openreq_init(req, &tmp_opt, skb);
req->af.v4_req.loc_addr = daddr;
req->af.v4_req.rmt_addr = saddr;
......@@ -1483,7 +1483,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
* timewait bucket, so that all the necessary checks
* are made in the function processing timewait state.
*/
if (tp.saw_tstamp &&
if (tmp_opt.saw_tstamp &&
sysctl_tcp_tw_recycle &&
(dst = tcp_v4_route_req(sk, req)) != NULL &&
(peer = rt_get_peer((struct rtable *)dst)) != NULL &&
......@@ -1987,11 +1987,11 @@ int tcp_v4_remember_stamp(struct sock *sk)
}
if (peer) {
if ((s32)(peer->tcp_ts - tp->ts_recent) <= 0 ||
if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
(peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
peer->tcp_ts_stamp <= tp->ts_recent_stamp)) {
peer->tcp_ts_stamp = tp->ts_recent_stamp;
peer->tcp_ts = tp->ts_recent;
peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
peer->tcp_ts = tp->rx_opt.ts_recent;
}
if (release_it)
inet_putpeer(peer);
......
......@@ -125,17 +125,17 @@ enum tcp_tw_status
tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb,
struct tcphdr *th, unsigned len)
{
struct tcp_sock tp;
struct tcp_options_received tmp_opt;
int paws_reject = 0;
tp.saw_tstamp = 0;
tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(struct tcphdr) >> 2) && tw->tw_ts_recent_stamp) {
tcp_parse_options(skb, &tp, 0);
tcp_parse_options(skb, &tmp_opt, 0);
if (tp.saw_tstamp) {
tp.ts_recent = tw->tw_ts_recent;
tp.ts_recent_stamp = tw->tw_ts_recent_stamp;
paws_reject = tcp_paws_check(&tp, th->rst);
if (tmp_opt.saw_tstamp) {
tmp_opt.ts_recent = tw->tw_ts_recent;
tmp_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
paws_reject = tcp_paws_check(&tmp_opt, th->rst);
}
}
......@@ -176,9 +176,9 @@ tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb,
/* FIN arrived, enter true time-wait state. */
tw->tw_substate = TCP_TIME_WAIT;
tw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
if (tp.saw_tstamp) {
if (tmp_opt.saw_tstamp) {
tw->tw_ts_recent_stamp = xtime.tv_sec;
tw->tw_ts_recent = tp.rcv_tsval;
tw->tw_ts_recent = tmp_opt.rcv_tsval;
}
/* I am shamed, but failed to make it more elegant.
......@@ -231,8 +231,8 @@ tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb,
}
tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
if (tp.saw_tstamp) {
tw->tw_ts_recent = tp.rcv_tsval;
if (tmp_opt.saw_tstamp) {
tw->tw_ts_recent = tmp_opt.rcv_tsval;
tw->tw_ts_recent_stamp = xtime.tv_sec;
}
......@@ -259,7 +259,7 @@ tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb,
if (th->syn && !th->rst && !th->ack && !paws_reject &&
(after(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt) ||
(tp.saw_tstamp && (s32)(tw->tw_ts_recent - tp.rcv_tsval) < 0))) {
(tmp_opt.saw_tstamp && (s32)(tw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) {
u32 isn = tw->tw_snd_nxt + 65535 + 2;
if (isn == 0)
isn++;
......@@ -332,7 +332,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
struct tcp_sock *tp = tcp_sk(sk);
int recycle_ok = 0;
if (sysctl_tcp_tw_recycle && tp->ts_recent_stamp)
if (sysctl_tcp_tw_recycle && tp->rx_opt.ts_recent_stamp)
recycle_ok = tp->af_specific->remember_stamp(sk);
if (tcp_tw_count < sysctl_tcp_max_tw_buckets)
......@@ -353,15 +353,15 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tw->tw_dport = inet->dport;
tw->tw_family = sk->sk_family;
tw->tw_reuse = sk->sk_reuse;
tw->tw_rcv_wscale = tp->rcv_wscale;
tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale;
atomic_set(&tw->tw_refcnt, 1);
tw->tw_hashent = sk->sk_hashent;
tw->tw_rcv_nxt = tp->rcv_nxt;
tw->tw_snd_nxt = tp->snd_nxt;
tw->tw_rcv_wnd = tcp_receive_window(tp);
tw->tw_ts_recent = tp->ts_recent;
tw->tw_ts_recent_stamp = tp->ts_recent_stamp;
tw->tw_ts_recent = tp->rx_opt.ts_recent;
tw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
tw_dead_node_init(tw);
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
......@@ -780,13 +780,13 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
newtp->pushed_seq = newtp->write_seq;
newtp->copied_seq = req->rcv_isn + 1;
newtp->saw_tstamp = 0;
newtp->rx_opt.saw_tstamp = 0;
newtp->dsack = 0;
newtp->eff_sacks = 0;
newtp->rx_opt.dsack = 0;
newtp->rx_opt.eff_sacks = 0;
newtp->probes_out = 0;
newtp->num_sacks = 0;
newtp->rx_opt.num_sacks = 0;
newtp->urg_data = 0;
newtp->listen_opt = NULL;
newtp->accept_queue = newtp->accept_queue_tail = NULL;
......@@ -809,36 +809,36 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
newsk->sk_sleep = NULL;
newsk->sk_owner = NULL;
newtp->tstamp_ok = req->tstamp_ok;
if((newtp->sack_ok = req->sack_ok) != 0) {
newtp->rx_opt.tstamp_ok = req->tstamp_ok;
if((newtp->rx_opt.sack_ok = req->sack_ok) != 0) {
if (sysctl_tcp_fack)
newtp->sack_ok |= 2;
newtp->rx_opt.sack_ok |= 2;
}
newtp->window_clamp = req->window_clamp;
newtp->rcv_ssthresh = req->rcv_wnd;
newtp->rcv_wnd = req->rcv_wnd;
newtp->wscale_ok = req->wscale_ok;
if (newtp->wscale_ok) {
newtp->snd_wscale = req->snd_wscale;
newtp->rcv_wscale = req->rcv_wscale;
newtp->rx_opt.wscale_ok = req->wscale_ok;
if (newtp->rx_opt.wscale_ok) {
newtp->rx_opt.snd_wscale = req->snd_wscale;
newtp->rx_opt.rcv_wscale = req->rcv_wscale;
} else {
newtp->snd_wscale = newtp->rcv_wscale = 0;
newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
newtp->window_clamp = min(newtp->window_clamp, 65535U);
}
newtp->snd_wnd = ntohs(skb->h.th->window) << newtp->snd_wscale;
newtp->snd_wnd = ntohs(skb->h.th->window) << newtp->rx_opt.snd_wscale;
newtp->max_window = newtp->snd_wnd;
if (newtp->tstamp_ok) {
newtp->ts_recent = req->ts_recent;
newtp->ts_recent_stamp = xtime.tv_sec;
if (newtp->rx_opt.tstamp_ok) {
newtp->rx_opt.ts_recent = req->ts_recent;
newtp->rx_opt.ts_recent_stamp = xtime.tv_sec;
newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
} else {
newtp->ts_recent_stamp = 0;
newtp->rx_opt.ts_recent_stamp = 0;
newtp->tcp_header_len = sizeof(struct tcphdr);
}
if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len)
newtp->ack.last_seg_size = skb->len-newtp->tcp_header_len;
newtp->mss_clamp = req->mss;
newtp->rx_opt.mss_clamp = req->mss;
TCP_ECN_openreq_child(newtp, req);
if (newtp->ecn_flags&TCP_ECN_OK)
newsk->sk_no_largesend = 1;
......@@ -863,21 +863,21 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
struct tcp_sock *tp = tcp_sk(sk);
u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
int paws_reject = 0;
struct tcp_sock ttp;
struct tcp_options_received tmp_opt;
struct sock *child;
ttp.saw_tstamp = 0;
tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(struct tcphdr)>>2)) {
tcp_parse_options(skb, &ttp, 0);
tcp_parse_options(skb, &tmp_opt, 0);
if (ttp.saw_tstamp) {
ttp.ts_recent = req->ts_recent;
if (tmp_opt.saw_tstamp) {
tmp_opt.ts_recent = req->ts_recent;
/* We do not store true stamp, but it is not required,
* it can be estimated (approximately)
* from another data.
*/
ttp.ts_recent_stamp = xtime.tv_sec - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
paws_reject = tcp_paws_check(&ttp, th->rst);
tmp_opt.ts_recent_stamp = xtime.tv_sec - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
paws_reject = tcp_paws_check(&tmp_opt, th->rst);
}
}
......@@ -982,63 +982,63 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
/* In sequence, PAWS is OK. */
if (ttp.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1))
req->ts_recent = ttp.rcv_tsval;
if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1))
req->ts_recent = tmp_opt.rcv_tsval;
if (TCP_SKB_CB(skb)->seq == req->rcv_isn) {
/* Truncate SYN, it is out of window starting
at req->rcv_isn+1. */
flg &= ~TCP_FLAG_SYN;
}
if (TCP_SKB_CB(skb)->seq == req->rcv_isn) {
/* Truncate SYN, it is out of window starting
at req->rcv_isn+1. */
flg &= ~TCP_FLAG_SYN;
}
/* RFC793: "second check the RST bit" and
* "fourth, check the SYN bit"
*/
if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN))
goto embryonic_reset;
/* RFC793: "second check the RST bit" and
* "fourth, check the SYN bit"
*/
if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN))
goto embryonic_reset;
/* ACK sequence verified above, just make sure ACK is
* set. If ACK not set, just silently drop the packet.
*/
if (!(flg & TCP_FLAG_ACK))
return NULL;
/* ACK sequence verified above, just make sure ACK is
* set. If ACK not set, just silently drop the packet.
*/
if (!(flg & TCP_FLAG_ACK))
return NULL;
/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == req->rcv_isn+1) {
req->acked = 1;
return NULL;
}
/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == req->rcv_isn+1) {
req->acked = 1;
return NULL;
}
/* OK, ACK is valid, create big socket and
* feed this segment to it. It will repeat all
* the tests. THIS SEGMENT MUST MOVE SOCKET TO
* ESTABLISHED STATE. If it will be dropped after
* socket is created, wait for troubles.
*/
child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
if (child == NULL)
goto listen_overflow;
/* OK, ACK is valid, create big socket and
* feed this segment to it. It will repeat all
* the tests. THIS SEGMENT MUST MOVE SOCKET TO
* ESTABLISHED STATE. If it will be dropped after
* socket is created, wait for troubles.
*/
child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
if (child == NULL)
goto listen_overflow;
sk_set_owner(child, sk->sk_owner);
tcp_synq_unlink(tp, req, prev);
tcp_synq_removed(sk, req);
sk_set_owner(child, sk->sk_owner);
tcp_synq_unlink(tp, req, prev);
tcp_synq_removed(sk, req);
tcp_acceptq_queue(sk, req, child);
return child;
tcp_acceptq_queue(sk, req, child);
return child;
listen_overflow:
if (!sysctl_tcp_abort_on_overflow) {
req->acked = 1;
return NULL;
}
listen_overflow:
if (!sysctl_tcp_abort_on_overflow) {
req->acked = 1;
return NULL;
}
embryonic_reset:
NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS);
if (!(flg & TCP_FLAG_RST))
req->class->send_reset(skb);
embryonic_reset:
NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS);
if (!(flg & TCP_FLAG_RST))
req->class->send_reset(skb);
tcp_synq_drop(sk, req, prev);
return NULL;
tcp_synq_drop(sk, req, prev);
return NULL;
}
/*
......
......@@ -236,13 +236,13 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
/* Make sure we do not exceed the maximum possible
* scaled window.
*/
if (!tp->rcv_wscale)
if (!tp->rx_opt.rcv_wscale)
new_win = min(new_win, MAX_TCP_WINDOW);
else
new_win = min(new_win, (65535U << tp->rcv_wscale));
new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
/* RFC1323 scaling applied */
new_win >>= tp->rcv_wscale;
new_win >>= tp->rx_opt.rcv_wscale;
/* If we advertise zero window, disable fast path. */
if (new_win == 0)
......@@ -296,12 +296,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
}
} else if (tp->eff_sacks) {
} else if (tp->rx_opt.eff_sacks) {
/* A SACK is 2 pad bytes, a 2 byte header, plus
* 2 32-bit sequence numbers for each SACK block.
*/
tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
(tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
(tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
}
/*
......@@ -349,9 +349,9 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
(sysctl_flags & SYSCTL_FLAG_TSTAMPS),
(sysctl_flags & SYSCTL_FLAG_SACK),
(sysctl_flags & SYSCTL_FLAG_WSCALE),
tp->rcv_wscale,
tp->rx_opt.rcv_wscale,
tcb->when,
tp->ts_recent);
tp->rx_opt.ts_recent);
} else {
tcp_build_and_update_options((__u32 *)(th + 1),
tp, tcb->when);
......@@ -607,10 +607,10 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
/* This function synchronize snd mss to current pmtu/exthdr set.
tp->user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
for TCP options, but includes only bare TCP header.
tp->mss_clamp is mss negotiated at connection setup.
tp->rx_opt.mss_clamp is mss negotiated at connection setup.
It is minumum of user_mss and mss received with SYN.
It also does not include TCP options.
......@@ -619,7 +619,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
tp->mss_cache is current effective sending mss, including
all tcp options except for SACKs. It is evaluated,
taking into account current pmtu, but never exceeds
tp->mss_clamp.
tp->rx_opt.mss_clamp.
NOTE1. rfc1122 clearly states that advertised MSS
DOES NOT include either tcp or ip options.
......@@ -643,8 +643,8 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr);
/* Clamp it (mss_clamp does not include tcp options) */
if (mss_now > tp->mss_clamp)
mss_now = tp->mss_clamp;
if (mss_now > tp->rx_opt.mss_clamp)
mss_now = tp->rx_opt.mss_clamp;
/* Now subtract optional transport overhead */
mss_now -= tp->ext_header_len + tp->ext2_header_len;
......@@ -723,9 +723,9 @@ unsigned int tcp_current_mss(struct sock *sk, int large)
mss_now = tp->mss_cache;
}
if (tp->eff_sacks)
if (tp->rx_opt.eff_sacks)
mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
(tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
(tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
return mss_now;
}
......@@ -875,16 +875,16 @@ u32 __tcp_select_window(struct sock *sk)
* scaled window will not line up with the MSS boundary anyway.
*/
window = tp->rcv_wnd;
if (tp->rcv_wscale) {
if (tp->rx_opt.rcv_wscale) {
window = free_space;
/* Advertise enough space so that it won't get scaled away.
* Import case: prevent zero window announcement if
* 1<<rcv_wscale > mss.
*/
if (((window >> tp->rcv_wscale) << tp->rcv_wscale) != window)
window = (((window >> tp->rcv_wscale) + 1)
<< tp->rcv_wscale);
if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
window = (((window >> tp->rx_opt.rcv_wscale) + 1)
<< tp->rx_opt.rcv_wscale);
} else {
/* Get the largest window that is a nice multiple of mss.
* Window clamp already applied above.
......@@ -962,7 +962,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
tp->left_out -= tcp_skb_pcount(next_skb);
}
/* Reno case is special. Sigh... */
if (!tp->sack_ok && tp->sacked_out) {
if (!tp->rx_opt.sack_ok && tp->sacked_out) {
tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
tp->left_out -= tcp_skb_pcount(next_skb);
}
......@@ -1200,7 +1200,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
return;
/* No forward retransmissions in Reno are possible. */
if (!tp->sack_ok)
if (!tp->rx_opt.sack_ok)
return;
/* Yeah, we have to make difficult choice between forward transmission
......@@ -1439,8 +1439,8 @@ static inline void tcp_connect_init(struct sock *sk)
(sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
/* If user gave his TCP_MAXSEG, record it to clamp */
if (tp->user_mss)
tp->mss_clamp = tp->user_mss;
if (tp->rx_opt.user_mss)
tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
tp->max_window = 0;
tcp_sync_mss(sk, dst_pmtu(dst));
......@@ -1451,11 +1451,11 @@ static inline void tcp_connect_init(struct sock *sk)
tcp_ca_init(tp);
tcp_select_initial_window(tcp_full_space(sk),
tp->advmss - (tp->ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
&tp->rcv_wnd,
&tp->window_clamp,
sysctl_tcp_window_scaling,
&tp->rcv_wscale);
&tp->rx_opt.rcv_wscale);
tp->rcv_ssthresh = tp->rcv_wnd;
......
......@@ -353,7 +353,7 @@ static void tcp_retransmit_timer(struct sock *sk)
if (tp->retransmits == 0) {
if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) {
if (tp->sack_ok) {
if (tp->rx_opt.sack_ok) {
if (tp->ca_state == TCP_CA_Recovery)
NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL);
else
......
......@@ -473,8 +473,8 @@ static int tcp_v6_check_established(struct sock *sk)
tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
if (!tp->write_seq)
tp->write_seq = 1;
tp->ts_recent = tw->tw_ts_recent;
tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
tp->rx_opt.ts_recent = tw->tw_ts_recent;
tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
sock_hold(sk2);
goto unique;
} else
......@@ -609,10 +609,10 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
return -EINVAL;
}
if (tp->ts_recent_stamp &&
if (tp->rx_opt.ts_recent_stamp &&
!ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
tp->ts_recent = 0;
tp->ts_recent_stamp = 0;
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
tp->write_seq = 0;
}
......@@ -703,7 +703,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
tp->ext2_header_len = dst->header_len;
tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
inet->dport = usin->sin6_port;
......@@ -1202,7 +1202,8 @@ static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct tcp_sock tmptp, *tp = tcp_sk(sk);
struct tcp_options_received tmp_opt;
struct tcp_sock *tp = tcp_sk(sk);
struct open_request *req = NULL;
__u32 isn = TCP_SKB_CB(skb)->when;
......@@ -1228,14 +1229,14 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (req == NULL)
goto drop;
tcp_clear_options(&tmptp);
tmptp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
tmptp.user_mss = tp->user_mss;
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
tmp_opt.user_mss = tp->rx_opt.user_mss;
tcp_parse_options(skb, &tmptp, 0);
tcp_parse_options(skb, &tmp_opt, 0);
tmptp.tstamp_ok = tmptp.saw_tstamp;
tcp_openreq_init(req, &tmptp, skb);
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tmp_opt, skb);
req->class = &or_ipv6;
ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment