Commit c5a65680 authored by David S. Miller's avatar David S. Miller

Merge branch 'csums-next'

Tom Herbert says:

====================
net: Checksum offload changes - Part VI

I am working on overhauling RX checksum offload. Goals of this effort
are:

- Specify what exactly it means when driver returns CHECKSUM_UNNECESSARY
- Preserve CHECKSUM_COMPLETE through encapsulation layers
- Don't do skb_checksum more than once per packet
- Unify GRO and non-GRO csum verification as much as possible
- Unify the checksum functions (checksum_init)
- Simplify code

What is in this seventh patch set:

- Add skb->csum. This allows a device or GRO to indicate that an
  invalid checksum was detected.
- Checksum unncessary to checksum complete conversions.

With these changes, I believe that the third goal of the overhaul is
now mostly achieved. In the case of no encapsulation or one layer of
encapsulation, there should only be at most one skb_checksum over
each packet (between GRO and normal path). In the case of two layers
of encapsulation, it is still possible with the right combination of
non-zero and zero UDP checksums to have >1 skb_checksum. For instance:
IP>GRE(with csum)>IP>UDP(zero csum)>VXLAN>IP>UDP(non-zero csum),
would likely necessiate an skb_checksum in GRO and normal path.
This doesn't seem like a common scenario at all so I'm inclined to
not address this now, if multiple layers of encapsulation becomes
popular we can reassess.

Note that checksum conversion shows a nice improvement for RX VXLAN when
outer UDP checksum is enabled (12.65% CPU compared to 20.94%). This
is not only from the fact that we don't need checksum calculation on
the host, but also allows GRO for VXLAN in this case. Checksum
conversion does not help send side (which still needs to perform
a checksum on host). For that we will implement remote checksum offload
in a later patch
(http://tools.ietf.org/html/draft-herbert-remotecsumoffload-00).

Please review carefully and test if possible, mucking with basic
checksum functions is always a little precarious :-)
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 52aec126 72297c59
......@@ -2370,6 +2370,8 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
/* Disable multicast loopback */
inet_sk(sock->sk)->mc_loop = 0;
udp_set_convert_csum(sock->sk, true);
return sock;
}
......
......@@ -2216,7 +2216,9 @@ static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \
__ret = __skb_gro_checksum_validate_complete(skb, \
compute_pseudo(skb, proto)); \
if (!__ret) \
if (__ret) \
__skb_mark_checksum_bad(skb); \
else \
skb_gro_incr_csum_unnecessary(skb); \
__ret; \
})
......@@ -2231,6 +2233,26 @@ static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
#define skb_gro_checksum_simple_validate(skb) \
__skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo)
static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb)
{
return (NAPI_GRO_CB(skb)->csum_cnt == 0 &&
!NAPI_GRO_CB(skb)->csum_valid);
}
static inline void __skb_gro_checksum_convert(struct sk_buff *skb,
__sum16 check, __wsum pseudo)
{
NAPI_GRO_CB(skb)->csum = ~pseudo;
NAPI_GRO_CB(skb)->csum_valid = 1;
}
#define skb_gro_checksum_try_convert(skb, proto, check, compute_pseudo) \
do { \
if (__skb_gro_checksum_convert_check(skb)) \
__skb_gro_checksum_convert(skb, check, \
compute_pseudo(skb, proto)); \
} while (0)
static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
unsigned short type,
const void *daddr, const void *saddr,
......
......@@ -617,7 +617,8 @@ struct sk_buff {
kmemcheck_bitfield_begin(flags3);
__u8 csum_level:2;
/* 14 bit hole */
__u8 csum_bad:1;
/* 13 bit hole */
kmemcheck_bitfield_end(flags3);
__be16 inner_protocol;
......@@ -2825,6 +2826,21 @@ static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
}
}
static inline void __skb_mark_checksum_bad(struct sk_buff *skb)
{
/* Mark current checksum as bad (typically called from GRO
* path). In the case that ip_summed is CHECKSUM_NONE
* this must be the first checksum encountered in the packet.
* When ip_summed is CHECKSUM_UNNECESSARY, this is the first
* checksum after the last one validated. For UDP, a zero
* checksum can not be marked as bad.
*/
if (skb->ip_summed == CHECKSUM_NONE ||
skb->ip_summed == CHECKSUM_UNNECESSARY)
skb->csum_bad = 1;
}
/* Check if we need to perform checksum complete validation.
*
* Returns true if checksum complete is needed, false otherwise
......@@ -2866,6 +2882,9 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb,
skb->csum_valid = 1;
return 0;
}
} else if (skb->csum_bad) {
/* ip_summed == CHECKSUM_NONE in this case */
return 1;
}
skb->csum = psum;
......@@ -2923,6 +2942,26 @@ static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
#define skb_checksum_simple_validate(skb) \
__skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo)
static inline bool __skb_checksum_convert_check(struct sk_buff *skb)
{
return (skb->ip_summed == CHECKSUM_NONE &&
skb->csum_valid && !skb->csum_bad);
}
static inline void __skb_checksum_convert(struct sk_buff *skb,
__sum16 check, __wsum pseudo)
{
skb->csum = ~pseudo;
skb->ip_summed = CHECKSUM_COMPLETE;
}
#define skb_checksum_try_convert(skb, proto, check, compute_pseudo) \
do { \
if (__skb_checksum_convert_check(skb)) \
__skb_checksum_convert(skb, check, \
compute_pseudo(skb, proto)); \
} while (0)
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
void nf_conntrack_destroy(struct nf_conntrack *nfct);
static inline void nf_conntrack_put(struct nf_conntrack *nfct)
......
......@@ -49,7 +49,11 @@ struct udp_sock {
unsigned int corkflag; /* Cork is required */
__u8 encap_type; /* Is this an Encapsulation socket? */
unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
no_check6_rx:1;/* Allow zero UDP6 checksums on RX? */
no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
convert_csum:1;/* On receive, convert checksum
* unnecessary to checksum complete
* if possible.
*/
/*
* Following member retains the information to create a UDP header
* when the socket is uncorked.
......@@ -98,6 +102,16 @@ static inline bool udp_get_no_check6_rx(struct sock *sk)
return udp_sk(sk)->no_check6_rx;
}
static inline void udp_set_convert_csum(struct sock *sk, bool val)
{
udp_sk(sk)->convert_csum = val;
}
static inline bool udp_get_convert_csum(struct sock *sk)
{
return udp_sk(sk)->convert_csum;
}
#define udp_portaddr_for_each_entry(__sk, node, list) \
hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node)
......
......@@ -3918,7 +3918,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
if (!(skb->dev->features & NETIF_F_GRO))
goto normal;
if (skb_is_gso(skb) || skb_has_frag_list(skb))
if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad)
goto normal;
gro_list_prepare(napi, skb);
......
......@@ -125,6 +125,10 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
*csum_err = true;
return -EINVAL;
}
skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
null_compute_pseudo);
options++;
}
......
......@@ -172,10 +172,14 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
}
/* Don't bother verifying checksum if we're going to flush anyway. */
if ((greh->flags & GRE_CSUM) && !NAPI_GRO_CB(skb)->flush &&
skb_gro_checksum_simple_validate(skb))
if ((greh->flags & GRE_CSUM) && !NAPI_GRO_CB(skb)->flush) {
if (skb_gro_checksum_simple_validate(skb))
goto out_unlock;
skb_gro_checksum_try_convert(skb, IPPROTO_GRE, 0,
null_compute_pseudo);
}
flush = 0;
for (p = *head; p; p = p->next) {
......
......@@ -1788,6 +1788,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (sk != NULL) {
int ret;
if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk))
skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
inet_compute_pseudo);
ret = udp_queue_rcv_skb(sk, skb);
sock_put(sk);
......
......@@ -290,16 +290,25 @@ static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
{
struct udphdr *uh = udp_gro_udphdr(skb);
if (unlikely(!uh))
goto flush;
/* Don't bother verifying checksum if we're going to flush anyway. */
if (unlikely(!uh) ||
(!NAPI_GRO_CB(skb)->flush &&
skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
inet_gro_compute_pseudo))) {
if (!NAPI_GRO_CB(skb)->flush)
goto skip;
if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
inet_gro_compute_pseudo))
goto flush;
else if (uh->check)
skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
inet_gro_compute_pseudo);
skip:
return udp_gro_receive(head, skb, uh);
flush:
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
}
return udp_gro_receive(head, skb, uh);
}
int udp_gro_complete(struct sk_buff *skb, int nhoff)
......
......@@ -891,6 +891,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
goto csum_error;
}
if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk))
skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
ip6_compute_pseudo);
ret = udpv6_queue_rcv_skb(sk, skb);
sock_put(sk);
......
......@@ -134,16 +134,26 @@ static struct sk_buff **udp6_gro_receive(struct sk_buff **head,
{
struct udphdr *uh = udp_gro_udphdr(skb);
if (unlikely(!uh))
goto flush;
/* Don't bother verifying checksum if we're going to flush anyway. */
if (unlikely(!uh) ||
(!NAPI_GRO_CB(skb)->flush &&
skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
ip6_gro_compute_pseudo))) {
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
}
if (!NAPI_GRO_CB(skb)->flush)
goto skip;
if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
ip6_gro_compute_pseudo))
goto flush;
else if (uh->check)
skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
ip6_gro_compute_pseudo);
skip:
return udp_gro_receive(head, skb, uh);
flush:
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
}
int udp6_gro_complete(struct sk_buff *skb, int nhoff)
......
......@@ -1392,6 +1392,8 @@ static int l2tp_tunnel_sock_create(struct net *net,
if (err < 0)
goto out;
udp_set_convert_csum(sock->sk, true);
break;
case L2TP_ENCAPTYPE_IP:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment