Commit d9707786 authored by Alexey Kuznetsov's avatar Alexey Kuznetsov Committed by David S. Miller

[IPSEC]: fragmentation & tcp mss calculation.

1. Add local_df field to struct sk_buff to mark packets which
   are to be fragmented locally despite of their IPv6ness of IP DF flag
2. Add ext2_header_len to tcp_opt to keep memory of part of header length
   depending on route
3. Add trailer_len to struct dst_entry and xfrm_state to know how
   much of space should be reserved at tail of frame for subsequent
   transformations.
4. [BUG] icv_trun_len must be used while mss claculation, not
   icv_full_length.
parent cd931326
......@@ -226,7 +226,7 @@ struct sk_buff {
unsigned int len,
data_len,
csum;
unsigned char __unused,
unsigned char local_df,
cloned,
pkt_type,
ip_summed;
......
......@@ -245,6 +245,7 @@ struct tcp_opt {
__u16 mss_cache_std; /* Like mss_cache, but without TSO */
__u16 mss_clamp; /* Maximal mss, negotiated at connection setup */
__u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */
__u16 ext2_header_len;/* Options depending on route */
__u8 ca_state; /* State of fast-retransmit machine */
__u8 retransmits; /* Number of unrecovered RTO timeouts. */
......
......@@ -50,7 +50,8 @@ struct dst_entry
unsigned long lastuse;
unsigned long expires;
unsigned header_len; /* more space at head required */
unsigned short header_len; /* more space at head required */
unsigned short trailer_len; /* space to reserve at tail */
u32 metrics[RTAX_MAX];
struct dst_entry *path;
......
......@@ -927,7 +927,8 @@ static __inline__ unsigned int tcp_current_mss(struct sock *sk, int large)
if (dst) {
u32 mtu = dst_pmtu(dst);
if (mtu != tp->pmtu_cookie)
if (mtu != tp->pmtu_cookie ||
tp->ext2_header_len != dst->header_len)
mss_now = tcp_sync_mss(sk, mtu);
}
if (tp->eff_sacks)
......
......@@ -107,6 +107,7 @@ struct xfrm_state
u16 family;
xfrm_address_t saddr;
int header_len;
int trailer_len;
} props;
struct xfrm_lifetime_cfg lft;
......@@ -255,6 +256,11 @@ static inline void xfrm_state_put(struct xfrm_state *x)
__xfrm_state_destroy(x);
}
static inline void xfrm_state_hold(struct xfrm_state *x)
{
atomic_inc(&x->refcnt);
}
static inline int
xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
{
......
......@@ -208,6 +208,7 @@ struct sk_buff *alloc_skb(unsigned int size, int gfp_mask)
skb->len = 0;
skb->data_len = 0;
skb->csum = 0;
skb->local_df = 0;
skb->cloned = 0;
skb->pkt_type = PACKET_HOST; /* Default type */
skb->ip_summed = 0;
......@@ -375,6 +376,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
C(len);
C(data_len);
C(csum);
C(local_df);
n->cloned = 1;
C(pkt_type);
C(ip_summed);
......@@ -438,6 +440,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->mac.raw = old->mac.raw + offset;
memcpy(new->cb, old->cb, sizeof(old->cb));
atomic_set(&new->users, 1);
new->local_df = old->local_df;
new->pkt_type = old->pkt_type;
new->stamp = old->stamp;
new->destructor = NULL;
......
......@@ -259,7 +259,7 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
if (esp->conf.padlen)
mtu = (mtu + esp->conf.padlen-1)&~(esp->conf.padlen-1);
return mtu + x->props.header_len + esp->auth.icv_full_len;
return mtu + x->props.header_len + esp->auth.icv_trunc_len;
}
void esp4_err(struct sk_buff *skb, u32 info)
......@@ -365,6 +365,7 @@ int esp_init_state(struct xfrm_state *x, void *args)
if (x->props.mode)
x->props.header_len += 20;
x->data = esp;
x->props.trailer_len = esp4_get_max_size(x, 0) - x->props.header_len;
return 0;
error:
......
......@@ -440,7 +440,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
iph = skb->nh.iph;
if (unlikely(iph->frag_off & htons(IP_DF))) {
if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(dst_pmtu(&rt->u.dst)));
kfree_skb(skb);
......@@ -793,6 +793,19 @@ int ip_append_data(struct sock *sk,
inet->cork.length += length;
/* So, what's going on in the loop below?
*
* We use calculated fragment length to generate chained skb,
* each of segments is IP fragment ready for sending to network after
* adding appropriate IP header.
*
* Mistake is:
*
* If mtu-fragheaderlen is not 0 modulo 8, we generate additional
* small fragment of length (mtu-fragheaderlen)%8, even though
* it is not necessary. Not a big bug, but needs a fix.
*/
if ((skb = skb_peek_tail(&sk->write_queue)) == NULL)
goto alloc_new_skb;
......@@ -815,6 +828,15 @@ int ip_append_data(struct sock *sk,
alloclen = maxfraglen;
else
alloclen = datalen + fragheaderlen;
/* The last fragment gets additional space at tail.
* Note, with MSG_MORE we overallocate on fragments,
* because we have no idea what fragment will be
* the last.
*/
if (datalen == length)
alloclen += rt->u.dst.trailer_len;
if (transhdrlen) {
skb = sock_alloc_send_skb(sk,
alloclen + hh_len + 15,
......@@ -1088,6 +1110,16 @@ int ip_push_pending_frames(struct sock *sk)
#endif
}
/* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow
* to fragment the frame generated here. No matter, what transforms
* how transforms change size of the packet, it will come out.
*/
if (inet->pmtudisc != IP_PMTUDISC_DO)
skb->local_df = 1;
/* DF bit is set when we want to see DF on outgoing frames.
* If local_df is set too, we still allow to fragment this frame
* locally. */
if (inet->pmtudisc == IP_PMTUDISC_DO ||
(!skb_shinfo(skb)->frag_list && ip_dont_fragment(sk, &rt->u.dst)))
df = htons(IP_DF);
......
......@@ -852,11 +852,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* OK, now commit destination to socket. */
__sk_dst_set(sk, &rt->u.dst);
tcp_v4_setup_caps(sk, &rt->u.dst);
/* DAVEM REDPEN: This used to sit above forced ext_header_len = 0
* above, it was real bug. Is this one correct?
*/
tp->ext_header_len += rt->u.dst.header_len;
tp->ext2_header_len = rt->u.dst.header_len;
if (!tp->write_seq)
tp->write_seq = secure_tcp_sequence_number(inet->saddr,
......@@ -1611,7 +1607,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newtp->ext_header_len = 0;
if (newinet->opt)
newtp->ext_header_len = newinet->opt->optlen;
newtp->ext_header_len += dst->header_len;
newtp->ext2_header_len = dst->header_len;
newinet->id = newtp->write_seq ^ jiffies;
tcp_sync_mss(newsk, dst_pmtu(dst));
......
......@@ -570,7 +570,7 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu)
mss_now = tp->mss_clamp;
/* Now subtract optional transport overhead */
mss_now -= tp->ext_header_len;
mss_now -= tp->ext_header_len + tp->ext2_header_len;
/* Then reserve room for full set of TCP options and 8 bytes of data */
if (mss_now < 48)
......@@ -591,7 +591,7 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu)
int large_mss;
large_mss = 65535 - tp->af_specific->net_header_len -
tp->ext_header_len - tp->tcp_header_len;
tp->ext_header_len - tp->ext2_header_len - tp->tcp_header_len;
if (tp->max_window && large_mss > (tp->max_window>>1))
large_mss = max((tp->max_window>>1), 68U - tp->tcp_header_len);
......
......@@ -896,6 +896,7 @@ xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
int i;
int err;
int header_len = 0;
int trailer_len = 0;
dst = dst_prev = NULL;
......@@ -921,6 +922,7 @@ xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
local = xfrm[i]->props.saddr.xfrm4_addr;
}
header_len += xfrm[i]->props.header_len;
trailer_len += xfrm[i]->props.trailer_len;
}
if (remote != fl->fl4_dst) {
......@@ -947,6 +949,7 @@ xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
dst_prev->flags |= DST_HOST;
dst_prev->lastuse = jiffies;
dst_prev->header_len = header_len;
dst_prev->trailer_len = trailer_len;
memcpy(&dst_prev->metrics, &rt->u.dst.metrics, sizeof(dst_prev->metrics));
dst_prev->path = &rt->u.dst;
......@@ -966,6 +969,7 @@ xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
x->u.rt.rt_gateway = rt->rt_gateway;
x->u.rt.rt_spec_dst = rt0->rt_spec_dst;
header_len -= x->u.dst.xfrm->props.header_len;
trailer_len -= x->u.dst.xfrm->props.trailer_len;
}
*dst_p = dst;
return 0;
......@@ -989,6 +993,7 @@ xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx
int i;
int err = 0;
int header_len = 0;
int trailer_len = 0;
dst = dst_prev = NULL;
......@@ -1014,6 +1019,7 @@ xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx
local = (struct in6_addr*)&xfrm[i]->props.saddr;
}
header_len += xfrm[i]->props.header_len;
trailer_len += xfrm[i]->props.trailer_len;
}
if (ipv6_addr_cmp(remote, fl->fl6_dst)) {
......@@ -1040,6 +1046,7 @@ xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx
dst_prev->flags |= DST_HOST;
dst_prev->lastuse = jiffies;
dst_prev->header_len = header_len;
dst_prev->trailer_len = trailer_len;
memcpy(&dst_prev->metrics, &rt->u.dst.metrics, sizeof(dst_prev->metrics));
dst_prev->path = &rt->u.dst;
......@@ -1056,6 +1063,7 @@ xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx
x->u.rt6.rt6i_gateway = rt0->rt6i_gateway;
memcpy(&x->u.rt6.rt6i_gateway, &rt0->rt6i_gateway, sizeof(x->u.rt6.rt6i_gateway));
header_len -= x->u.dst.xfrm->props.header_len;
trailer_len -= x->u.dst.xfrm->props.trailer_len;
}
*dst_p = dst;
return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment