Commit 193cdc4a authored by David S. Miller's avatar David S. Miller

Merge branch 'udpv6_lockless_send'

Vladislav Yasevich says:

====================
ipv6: Add lockless UDP send path

This series introduces a lockless UDPv6 send path similar to
what Herbert Xu did for IPv4 a while ago.

There are some difference from IPv4.  IPv6 caching for flow
label is a bit different, as well as it requires another cork
cork structure that holds the IPv6 ancillary data.

Please take a look.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ba0c39cb 32dce968
...@@ -125,6 +125,12 @@ struct ipv6_mc_socklist; ...@@ -125,6 +125,12 @@ struct ipv6_mc_socklist;
struct ipv6_ac_socklist; struct ipv6_ac_socklist;
struct ipv6_fl_socklist; struct ipv6_fl_socklist;
struct inet6_cork {
struct ipv6_txoptions *opt;
u8 hop_limit;
u8 tclass;
};
/** /**
* struct ipv6_pinfo - ipv6 private area * struct ipv6_pinfo - ipv6 private area
* *
...@@ -217,11 +223,7 @@ struct ipv6_pinfo { ...@@ -217,11 +223,7 @@ struct ipv6_pinfo {
struct ipv6_txoptions *opt; struct ipv6_txoptions *opt;
struct sk_buff *pktoptions; struct sk_buff *pktoptions;
struct sk_buff *rxpmtu; struct sk_buff *rxpmtu;
struct { struct inet6_cork cork;
struct ipv6_txoptions *opt;
u8 hop_limit;
u8 tclass;
} cork;
}; };
/* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */ /* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */
......
...@@ -788,6 +788,25 @@ int ip6_push_pending_frames(struct sock *sk); ...@@ -788,6 +788,25 @@ int ip6_push_pending_frames(struct sock *sk);
void ip6_flush_pending_frames(struct sock *sk); void ip6_flush_pending_frames(struct sock *sk);
int ip6_send_skb(struct sk_buff *skb);
struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff_head *queue,
struct inet_cork_full *cork,
struct inet6_cork *v6_cork);
struct sk_buff *ip6_make_skb(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
int hlimit, int tclass, struct ipv6_txoptions *opt,
struct flowi6 *fl6, struct rt6_info *rt,
unsigned int flags, int dontfrag);
static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
{
return __ip6_make_skb(sk, &sk->sk_write_queue, &inet_sk(sk)->cork,
&inet6_sk(sk)->cork);
}
int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6); int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6);
struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst); const struct in6_addr *final_dst);
......
...@@ -1041,6 +1041,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, ...@@ -1041,6 +1041,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
static inline int ip6_ufo_append_data(struct sock *sk, static inline int ip6_ufo_append_data(struct sock *sk,
struct sk_buff_head *queue,
int getfrag(void *from, char *to, int offset, int len, int getfrag(void *from, char *to, int offset, int len,
int odd, struct sk_buff *skb), int odd, struct sk_buff *skb),
void *from, int length, int hh_len, int fragheaderlen, void *from, int length, int hh_len, int fragheaderlen,
...@@ -1056,7 +1057,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, ...@@ -1056,7 +1057,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
* device, so create one single skb packet containing complete * device, so create one single skb packet containing complete
* udp datagram * udp datagram
*/ */
skb = skb_peek_tail(&sk->sk_write_queue); skb = skb_peek_tail(queue);
if (skb == NULL) { if (skb == NULL) {
skb = sock_alloc_send_skb(sk, skb = sock_alloc_send_skb(sk,
hh_len + fragheaderlen + transhdrlen + 20, hh_len + fragheaderlen + transhdrlen + 20,
...@@ -1079,7 +1080,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, ...@@ -1079,7 +1080,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
skb->protocol = htons(ETH_P_IPV6); skb->protocol = htons(ETH_P_IPV6);
skb->csum = 0; skb->csum = 0;
__skb_queue_tail(&sk->sk_write_queue, skb); __skb_queue_tail(queue, skb);
} else if (skb_is_gso(skb)) { } else if (skb_is_gso(skb)) {
goto append; goto append;
} }
...@@ -1135,99 +1136,106 @@ static void ip6_append_data_mtu(unsigned int *mtu, ...@@ -1135,99 +1136,106 @@ static void ip6_append_data_mtu(unsigned int *mtu,
} }
} }
int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
int offset, int len, int odd, struct sk_buff *skb), struct inet6_cork *v6_cork,
void *from, int length, int transhdrlen, int hlimit, int tclass, struct ipv6_txoptions *opt,
int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, struct rt6_info *rt, struct flowi6 *fl6)
struct rt6_info *rt, unsigned int flags, int dontfrag)
{ {
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_cork *cork; unsigned int mtu;
/*
* setup for corking
*/
if (opt) {
if (WARN_ON(v6_cork->opt))
return -EINVAL;
v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
if (unlikely(v6_cork->opt == NULL))
return -ENOBUFS;
v6_cork->opt->tot_len = opt->tot_len;
v6_cork->opt->opt_flen = opt->opt_flen;
v6_cork->opt->opt_nflen = opt->opt_nflen;
v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
sk->sk_allocation);
if (opt->dst0opt && !v6_cork->opt->dst0opt)
return -ENOBUFS;
v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
sk->sk_allocation);
if (opt->dst1opt && !v6_cork->opt->dst1opt)
return -ENOBUFS;
v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
sk->sk_allocation);
if (opt->hopopt && !v6_cork->opt->hopopt)
return -ENOBUFS;
v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
sk->sk_allocation);
if (opt->srcrt && !v6_cork->opt->srcrt)
return -ENOBUFS;
/* need source address above miyazawa*/
}
dst_hold(&rt->dst);
cork->base.dst = &rt->dst;
cork->fl.u.ip6 = *fl6;
v6_cork->hop_limit = hlimit;
v6_cork->tclass = tclass;
if (rt->dst.flags & DST_XFRM_TUNNEL)
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
rt->dst.dev->mtu : dst_mtu(&rt->dst);
else
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
rt->dst.dev->mtu : dst_mtu(rt->dst.path);
if (np->frag_size < mtu) {
if (np->frag_size)
mtu = np->frag_size;
}
cork->base.fragsize = mtu;
if (dst_allfrag(rt->dst.path))
cork->base.flags |= IPCORK_ALLFRAG;
cork->base.length = 0;
return 0;
}
static int __ip6_append_data(struct sock *sk,
struct flowi6 *fl6,
struct sk_buff_head *queue,
struct inet_cork *cork,
struct inet6_cork *v6_cork,
struct page_frag *pfrag,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
unsigned int flags, int dontfrag)
{
struct sk_buff *skb, *skb_prev = NULL; struct sk_buff *skb, *skb_prev = NULL;
unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu; unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
int exthdrlen; int exthdrlen = 0;
int dst_exthdrlen; int dst_exthdrlen = 0;
int hh_len; int hh_len;
int copy; int copy;
int err; int err;
int offset = 0; int offset = 0;
__u8 tx_flags = 0; __u8 tx_flags = 0;
u32 tskey = 0; u32 tskey = 0;
struct rt6_info *rt = (struct rt6_info *)cork->dst;
struct ipv6_txoptions *opt = v6_cork->opt;
int csummode = CHECKSUM_NONE;
if (flags&MSG_PROBE) skb = skb_peek_tail(queue);
return 0; if (!skb) {
cork = &inet->cork.base; exthdrlen = opt ? opt->opt_flen : 0;
if (skb_queue_empty(&sk->sk_write_queue)) {
/*
* setup for corking
*/
if (opt) {
if (WARN_ON(np->cork.opt))
return -EINVAL;
np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
if (unlikely(np->cork.opt == NULL))
return -ENOBUFS;
np->cork.opt->tot_len = opt->tot_len;
np->cork.opt->opt_flen = opt->opt_flen;
np->cork.opt->opt_nflen = opt->opt_nflen;
np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
sk->sk_allocation);
if (opt->dst0opt && !np->cork.opt->dst0opt)
return -ENOBUFS;
np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
sk->sk_allocation);
if (opt->dst1opt && !np->cork.opt->dst1opt)
return -ENOBUFS;
np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
sk->sk_allocation);
if (opt->hopopt && !np->cork.opt->hopopt)
return -ENOBUFS;
np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
sk->sk_allocation);
if (opt->srcrt && !np->cork.opt->srcrt)
return -ENOBUFS;
/* need source address above miyazawa*/
}
dst_hold(&rt->dst);
cork->dst = &rt->dst;
inet->cork.fl.u.ip6 = *fl6;
np->cork.hop_limit = hlimit;
np->cork.tclass = tclass;
if (rt->dst.flags & DST_XFRM_TUNNEL)
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
rt->dst.dev->mtu : dst_mtu(&rt->dst);
else
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
rt->dst.dev->mtu : dst_mtu(rt->dst.path);
if (np->frag_size < mtu) {
if (np->frag_size)
mtu = np->frag_size;
}
cork->fragsize = mtu;
if (dst_allfrag(rt->dst.path))
cork->flags |= IPCORK_ALLFRAG;
cork->length = 0;
exthdrlen = (opt ? opt->opt_flen : 0);
length += exthdrlen;
transhdrlen += exthdrlen;
dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
} else {
rt = (struct rt6_info *)cork->dst;
fl6 = &inet->cork.fl.u.ip6;
opt = np->cork.opt;
transhdrlen = 0;
exthdrlen = 0;
dst_exthdrlen = 0;
mtu = cork->fragsize;
} }
mtu = cork->fragsize;
orig_mtu = mtu; orig_mtu = mtu;
hh_len = LL_RESERVED_SPACE(rt->dst.dev); hh_len = LL_RESERVED_SPACE(rt->dst.dev);
...@@ -1276,6 +1284,14 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, ...@@ -1276,6 +1284,14 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
tskey = sk->sk_tskey++; tskey = sk->sk_tskey++;
} }
/* If this is the first and only packet and device
* supports checksum offloading, let's use it.
*/
if (!skb &&
length + fragheaderlen < mtu &&
rt->dst.dev->features & NETIF_F_V6_CSUM &&
!exthdrlen)
csummode = CHECKSUM_PARTIAL;
/* /*
* Let's try using as much space as possible. * Let's try using as much space as possible.
* Use MTU if total length of the message fits into the MTU. * Use MTU if total length of the message fits into the MTU.
...@@ -1292,13 +1308,12 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, ...@@ -1292,13 +1308,12 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
* --yoshfuji * --yoshfuji
*/ */
skb = skb_peek_tail(&sk->sk_write_queue);
cork->length += length; cork->length += length;
if (((length > mtu) || if (((length > mtu) ||
(skb && skb_is_gso(skb))) && (skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) && (sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO)) { (rt->dst.dev->features & NETIF_F_UFO)) {
err = ip6_ufo_append_data(sk, getfrag, from, length, err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen, hh_len, fragheaderlen,
transhdrlen, mtu, flags, rt); transhdrlen, mtu, flags, rt);
if (err) if (err)
...@@ -1389,7 +1404,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, ...@@ -1389,7 +1404,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
* Fill in the control structures * Fill in the control structures
*/ */
skb->protocol = htons(ETH_P_IPV6); skb->protocol = htons(ETH_P_IPV6);
skb->ip_summed = CHECKSUM_NONE; skb->ip_summed = csummode;
skb->csum = 0; skb->csum = 0;
/* reserve for fragmentation and ipsec header */ /* reserve for fragmentation and ipsec header */
skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
...@@ -1439,7 +1454,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, ...@@ -1439,7 +1454,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
/* /*
* Put the packet on the pending queue * Put the packet on the pending queue
*/ */
__skb_queue_tail(&sk->sk_write_queue, skb); __skb_queue_tail(queue, skb);
continue; continue;
} }
...@@ -1458,7 +1473,6 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, ...@@ -1458,7 +1473,6 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
} }
} else { } else {
int i = skb_shinfo(skb)->nr_frags; int i = skb_shinfo(skb)->nr_frags;
struct page_frag *pfrag = sk_page_frag(sk);
err = -ENOMEM; err = -ENOMEM;
if (!sk_page_frag_refill(sk, pfrag)) if (!sk_page_frag_refill(sk, pfrag))
...@@ -1501,43 +1515,81 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, ...@@ -1501,43 +1515,81 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
return err; return err;
} }
int ip6_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen, int hlimit,
int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
struct rt6_info *rt, unsigned int flags, int dontfrag)
{
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
int exthdrlen;
int err;
if (flags&MSG_PROBE)
return 0;
if (skb_queue_empty(&sk->sk_write_queue)) {
/*
* setup for corking
*/
err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
tclass, opt, rt, fl6);
if (err)
return err;
exthdrlen = (opt ? opt->opt_flen : 0);
length += exthdrlen;
transhdrlen += exthdrlen;
} else {
fl6 = &inet->cork.fl.u.ip6;
transhdrlen = 0;
}
return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
&np->cork, sk_page_frag(sk), getfrag,
from, length, transhdrlen, flags, dontfrag);
}
EXPORT_SYMBOL_GPL(ip6_append_data); EXPORT_SYMBOL_GPL(ip6_append_data);
static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) static void ip6_cork_release(struct inet_cork_full *cork,
struct inet6_cork *v6_cork)
{ {
if (np->cork.opt) { if (v6_cork->opt) {
kfree(np->cork.opt->dst0opt); kfree(v6_cork->opt->dst0opt);
kfree(np->cork.opt->dst1opt); kfree(v6_cork->opt->dst1opt);
kfree(np->cork.opt->hopopt); kfree(v6_cork->opt->hopopt);
kfree(np->cork.opt->srcrt); kfree(v6_cork->opt->srcrt);
kfree(np->cork.opt); kfree(v6_cork->opt);
np->cork.opt = NULL; v6_cork->opt = NULL;
} }
if (inet->cork.base.dst) { if (cork->base.dst) {
dst_release(inet->cork.base.dst); dst_release(cork->base.dst);
inet->cork.base.dst = NULL; cork->base.dst = NULL;
inet->cork.base.flags &= ~IPCORK_ALLFRAG; cork->base.flags &= ~IPCORK_ALLFRAG;
} }
memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); memset(&cork->fl, 0, sizeof(cork->fl));
} }
int ip6_push_pending_frames(struct sock *sk) struct sk_buff *__ip6_make_skb(struct sock *sk,
struct sk_buff_head *queue,
struct inet_cork_full *cork,
struct inet6_cork *v6_cork)
{ {
struct sk_buff *skb, *tmp_skb; struct sk_buff *skb, *tmp_skb;
struct sk_buff **tail_skb; struct sk_buff **tail_skb;
struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk);
struct net *net = sock_net(sk); struct net *net = sock_net(sk);
struct ipv6hdr *hdr; struct ipv6hdr *hdr;
struct ipv6_txoptions *opt = np->cork.opt; struct ipv6_txoptions *opt = v6_cork->opt;
struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst; struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
struct flowi6 *fl6 = &inet->cork.fl.u.ip6; struct flowi6 *fl6 = &cork->fl.u.ip6;
unsigned char proto = fl6->flowi6_proto; unsigned char proto = fl6->flowi6_proto;
int err = 0;
skb = __skb_dequeue(&sk->sk_write_queue); skb = __skb_dequeue(queue);
if (skb == NULL) if (skb == NULL)
goto out; goto out;
tail_skb = &(skb_shinfo(skb)->frag_list); tail_skb = &(skb_shinfo(skb)->frag_list);
...@@ -1545,7 +1597,7 @@ int ip6_push_pending_frames(struct sock *sk) ...@@ -1545,7 +1597,7 @@ int ip6_push_pending_frames(struct sock *sk)
/* move skb->data to ip header from ext header */ /* move skb->data to ip header from ext header */
if (skb->data < skb_network_header(skb)) if (skb->data < skb_network_header(skb))
__skb_pull(skb, skb_network_offset(skb)); __skb_pull(skb, skb_network_offset(skb));
while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
__skb_pull(tmp_skb, skb_network_header_len(skb)); __skb_pull(tmp_skb, skb_network_header_len(skb));
*tail_skb = tmp_skb; *tail_skb = tmp_skb;
tail_skb = &(tmp_skb->next); tail_skb = &(tmp_skb->next);
...@@ -1570,10 +1622,10 @@ int ip6_push_pending_frames(struct sock *sk) ...@@ -1570,10 +1622,10 @@ int ip6_push_pending_frames(struct sock *sk)
skb_reset_network_header(skb); skb_reset_network_header(skb);
hdr = ipv6_hdr(skb); hdr = ipv6_hdr(skb);
ip6_flow_hdr(hdr, np->cork.tclass, ip6_flow_hdr(hdr, v6_cork->tclass,
ip6_make_flowlabel(net, skb, fl6->flowlabel, ip6_make_flowlabel(net, skb, fl6->flowlabel,
np->autoflowlabel)); np->autoflowlabel));
hdr->hop_limit = np->cork.hop_limit; hdr->hop_limit = v6_cork->hop_limit;
hdr->nexthdr = proto; hdr->nexthdr = proto;
hdr->saddr = fl6->saddr; hdr->saddr = fl6->saddr;
hdr->daddr = *final_dst; hdr->daddr = *final_dst;
...@@ -1590,34 +1642,104 @@ int ip6_push_pending_frames(struct sock *sk) ...@@ -1590,34 +1642,104 @@ int ip6_push_pending_frames(struct sock *sk)
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
} }
ip6_cork_release(cork, v6_cork);
out:
return skb;
}
int ip6_send_skb(struct sk_buff *skb)
{
struct net *net = sock_net(skb->sk);
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
int err;
err = ip6_local_out(skb); err = ip6_local_out(skb);
if (err) { if (err) {
if (err > 0) if (err > 0)
err = net_xmit_errno(err); err = net_xmit_errno(err);
if (err) if (err)
goto error; IP6_INC_STATS(net, rt->rt6i_idev,
IPSTATS_MIB_OUTDISCARDS);
} }
out:
ip6_cork_release(inet, np);
return err; return err;
error: }
IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
goto out; int ip6_push_pending_frames(struct sock *sk)
{
struct sk_buff *skb;
skb = ip6_finish_skb(sk);
if (!skb)
return 0;
return ip6_send_skb(skb);
} }
EXPORT_SYMBOL_GPL(ip6_push_pending_frames); EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
void ip6_flush_pending_frames(struct sock *sk) static void __ip6_flush_pending_frames(struct sock *sk,
struct sk_buff_head *queue,
struct inet_cork_full *cork,
struct inet6_cork *v6_cork)
{ {
struct sk_buff *skb; struct sk_buff *skb;
while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { while ((skb = __skb_dequeue_tail(queue)) != NULL) {
if (skb_dst(skb)) if (skb_dst(skb))
IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUTDISCARDS); IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb); kfree_skb(skb);
} }
ip6_cork_release(inet_sk(sk), inet6_sk(sk)); ip6_cork_release(cork, v6_cork);
}
void ip6_flush_pending_frames(struct sock *sk)
{
__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
&inet_sk(sk)->cork, &inet6_sk(sk)->cork);
} }
EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
struct sk_buff *ip6_make_skb(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
int hlimit, int tclass,
struct ipv6_txoptions *opt, struct flowi6 *fl6,
struct rt6_info *rt, unsigned int flags,
int dontfrag)
{
struct inet_cork_full cork;
struct inet6_cork v6_cork;
struct sk_buff_head queue;
int exthdrlen = (opt ? opt->opt_flen : 0);
int err;
if (flags & MSG_PROBE)
return NULL;
__skb_queue_head_init(&queue);
cork.base.flags = 0;
cork.base.addr = 0;
cork.base.opt = NULL;
v6_cork.opt = NULL;
err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
if (err)
return ERR_PTR(err);
if (dontfrag < 0)
dontfrag = inet6_sk(sk)->dontfrag;
err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
&current->task_frag, getfrag, from,
length + exthdrlen, transhdrlen + exthdrlen,
flags, dontfrag);
if (err) {
__ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
return ERR_PTR(err);
}
return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
}
...@@ -990,9 +990,10 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, ...@@ -990,9 +990,10 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
{ {
unsigned int offset; unsigned int offset;
struct udphdr *uh = udp_hdr(skb); struct udphdr *uh = udp_hdr(skb);
struct sk_buff *frags = skb_shinfo(skb)->frag_list;
__wsum csum = 0; __wsum csum = 0;
if (skb_queue_len(&sk->sk_write_queue) == 1) { if (!frags) {
/* Only one fragment on the socket. */ /* Only one fragment on the socket. */
skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check); skb->csum_offset = offsetof(struct udphdr, check);
...@@ -1008,9 +1009,9 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, ...@@ -1008,9 +1009,9 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
skb->ip_summed = CHECKSUM_NONE; skb->ip_summed = CHECKSUM_NONE;
skb_queue_walk(&sk->sk_write_queue, skb) { do {
csum = csum_add(csum, skb->csum); csum = csum_add(csum, frags->csum);
} } while ((frags = frags->next));
uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP,
csum); csum);
...@@ -1023,26 +1024,15 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, ...@@ -1023,26 +1024,15 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
* Sending * Sending
*/ */
static int udp_v6_push_pending_frames(struct sock *sk) static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6)
{ {
struct sk_buff *skb; struct sock *sk = skb->sk;
struct udphdr *uh; struct udphdr *uh;
struct udp_sock *up = udp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
struct flowi6 *fl6;
int err = 0; int err = 0;
int is_udplite = IS_UDPLITE(sk); int is_udplite = IS_UDPLITE(sk);
__wsum csum = 0; __wsum csum = 0;
int offset = skb_transport_offset(skb);
if (up->pending == AF_INET) int len = skb->len - offset;
return udp_push_pending_frames(sk);
fl6 = &inet->cork.fl.u.ip6;
/* Grab the skbuff where UDP header space exists. */
skb = skb_peek(&sk->sk_write_queue);
if (skb == NULL)
goto out;
/* /*
* Create a UDP header * Create a UDP header
...@@ -1050,29 +1040,28 @@ static int udp_v6_push_pending_frames(struct sock *sk) ...@@ -1050,29 +1040,28 @@ static int udp_v6_push_pending_frames(struct sock *sk)
uh = udp_hdr(skb); uh = udp_hdr(skb);
uh->source = fl6->fl6_sport; uh->source = fl6->fl6_sport;
uh->dest = fl6->fl6_dport; uh->dest = fl6->fl6_dport;
uh->len = htons(up->len); uh->len = htons(len);
uh->check = 0; uh->check = 0;
if (is_udplite) if (is_udplite)
csum = udplite_csum_outgoing(sk, skb); csum = udplite_csum(skb);
else if (up->no_check6_tx) { /* UDP csum disabled */ else if (udp_sk(sk)->no_check6_tx) { /* UDP csum disabled */
skb->ip_summed = CHECKSUM_NONE; skb->ip_summed = CHECKSUM_NONE;
goto send; goto send;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, len);
up->len);
goto send; goto send;
} else } else
csum = udp_csum_outgoing(sk, skb); csum = udp_csum(skb);
/* add protocol-dependent pseudo-header */ /* add protocol-dependent pseudo-header */
uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr,
up->len, fl6->flowi6_proto, csum); len, fl6->flowi6_proto, csum);
if (uh->check == 0) if (uh->check == 0)
uh->check = CSUM_MANGLED_0; uh->check = CSUM_MANGLED_0;
send: send:
err = ip6_push_pending_frames(sk); err = ip6_send_skb(skb);
if (err) { if (err) {
if (err == -ENOBUFS && !inet6_sk(sk)->recverr) { if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
UDP6_INC_STATS_USER(sock_net(sk), UDP6_INC_STATS_USER(sock_net(sk),
...@@ -1082,6 +1071,30 @@ static int udp_v6_push_pending_frames(struct sock *sk) ...@@ -1082,6 +1071,30 @@ static int udp_v6_push_pending_frames(struct sock *sk)
} else } else
UDP6_INC_STATS_USER(sock_net(sk), UDP6_INC_STATS_USER(sock_net(sk),
UDP_MIB_OUTDATAGRAMS, is_udplite); UDP_MIB_OUTDATAGRAMS, is_udplite);
return err;
}
static int udp_v6_push_pending_frames(struct sock *sk)
{
struct sk_buff *skb;
struct udp_sock *up = udp_sk(sk);
struct flowi6 fl6;
int err = 0;
if (up->pending == AF_INET)
return udp_push_pending_frames(sk);
/* ip6_finish_skb will release the cork, so make a copy of
* fl6 here.
*/
fl6 = inet_sk(sk)->cork.fl.u.ip6;
skb = ip6_finish_skb(sk);
if (!skb)
goto out;
err = udp_v6_send_skb(skb, &fl6);
out: out:
up->len = 0; up->len = 0;
up->pending = 0; up->pending = 0;
...@@ -1164,6 +1177,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, ...@@ -1164,6 +1177,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
if (len > INT_MAX - sizeof(struct udphdr)) if (len > INT_MAX - sizeof(struct udphdr))
return -EMSGSIZE; return -EMSGSIZE;
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
if (up->pending) { if (up->pending) {
/* /*
* There are pending frames. * There are pending frames.
...@@ -1294,6 +1308,20 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, ...@@ -1294,6 +1308,20 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
goto do_confirm; goto do_confirm;
back_from_confirm: back_from_confirm:
/* Lockless fast path for the non-corking case */
if (!corkreq) {
struct sk_buff *skb;
skb = ip6_make_skb(sk, getfrag, msg, ulen,
sizeof(struct udphdr), hlimit, tclass, opt,
&fl6, (struct rt6_info *)dst,
msg->msg_flags, dontfrag);
err = PTR_ERR(skb);
if (!IS_ERR_OR_NULL(skb))
err = udp_v6_send_skb(skb, &fl6);
goto release_dst;
}
lock_sock(sk); lock_sock(sk);
if (unlikely(up->pending)) { if (unlikely(up->pending)) {
/* The socket is already corked while preparing it. */ /* The socket is already corked while preparing it. */
...@@ -1311,7 +1339,6 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, ...@@ -1311,7 +1339,6 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
if (dontfrag < 0) if (dontfrag < 0)
dontfrag = np->dontfrag; dontfrag = np->dontfrag;
up->len += ulen; up->len += ulen;
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
err = ip6_append_data(sk, getfrag, msg, ulen, err = ip6_append_data(sk, getfrag, msg, ulen,
sizeof(struct udphdr), hlimit, tclass, opt, &fl6, sizeof(struct udphdr), hlimit, tclass, opt, &fl6,
(struct rt6_info *)dst, (struct rt6_info *)dst,
...@@ -1323,6 +1350,11 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, ...@@ -1323,6 +1350,11 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
up->pending = 0; up->pending = 0;
if (err > 0)
err = np->recverr ? net_xmit_errno(err) : 0;
release_sock(sk);
release_dst:
if (dst) { if (dst) {
if (connected) { if (connected) {
ip6_dst_store(sk, dst, ip6_dst_store(sk, dst,
...@@ -1339,9 +1371,6 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, ...@@ -1339,9 +1371,6 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
dst = NULL; dst = NULL;
} }
if (err > 0)
err = np->recverr ? net_xmit_errno(err) : 0;
release_sock(sk);
out: out:
dst_release(dst); dst_release(dst);
fl6_sock_release(flowlabel); fl6_sock_release(flowlabel);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment