Commit 53e20678 authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-remove-non-GSO-code'

Eric Dumazet says:

====================
tcp: remove non GSO code

Switching TCP to GSO mode, relying on core networking layers
to perform eventual adaptation for dumb devices was overdue.

1) Most TCP developments are done with TSO in mind.
2) Less high-resolution timers needs to be armed for TCP-pacing
3) GSO can benefit of xmit_more hint
4) Receiver GRO is more effective (as if TSO was used for real on sender)
   -> less ACK packets and overhead.
5) Write queues have less overhead (one skb holds about 64KB of payload)
6) SACK coalescing just works. (no payload in skb->head)
7) rtx rb-tree contains less packets, SACK is cheaper.
8) Removal of legacy code. Less maintenance hassles.

Note that I have left the sendpage/zerocopy paths, but they probably can
benefit from the same strategy.

Thanks to Oleksandr Natalenko for reporting a performance issue for BBR/fq_codel,
which was the main reason I worked on this patch series.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 960103ff 98be9b12
...@@ -417,6 +417,7 @@ struct sock { ...@@ -417,6 +417,7 @@ struct sock {
struct page_frag sk_frag; struct page_frag sk_frag;
netdev_features_t sk_route_caps; netdev_features_t sk_route_caps;
netdev_features_t sk_route_nocaps; netdev_features_t sk_route_nocaps;
netdev_features_t sk_route_forced_caps;
int sk_gso_type; int sk_gso_type;
unsigned int sk_gso_max_size; unsigned int sk_gso_max_size;
gfp_t sk_allocation; gfp_t sk_allocation;
...@@ -1862,15 +1863,6 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags) ...@@ -1862,15 +1863,6 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags)
sk->sk_route_caps &= ~flags; sk->sk_route_caps &= ~flags;
} }
static inline bool sk_check_csum_caps(struct sock *sk)
{
return (sk->sk_route_caps & NETIF_F_HW_CSUM) ||
(sk->sk_family == PF_INET &&
(sk->sk_route_caps & NETIF_F_IP_CSUM)) ||
(sk->sk_family == PF_INET6 &&
(sk->sk_route_caps & NETIF_F_IPV6_CSUM));
}
static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb, static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
struct iov_iter *from, char *to, struct iov_iter *from, char *to,
int copy, int offset) int copy, int offset)
......
...@@ -1777,7 +1777,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) ...@@ -1777,7 +1777,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
u32 max_segs = 1; u32 max_segs = 1;
sk_dst_set(sk, dst); sk_dst_set(sk, dst);
sk->sk_route_caps = dst->dev->features; sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps;
if (sk->sk_route_caps & NETIF_F_GSO) if (sk->sk_route_caps & NETIF_F_GSO)
sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
sk->sk_route_caps &= ~sk->sk_route_nocaps; sk->sk_route_caps &= ~sk->sk_route_nocaps;
......
...@@ -453,6 +453,7 @@ void tcp_init_sock(struct sock *sk) ...@@ -453,6 +453,7 @@ void tcp_init_sock(struct sock *sk)
sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
sk_sockets_allocated_inc(sk); sk_sockets_allocated_inc(sk);
sk->sk_route_forced_caps = NETIF_F_GSO;
} }
EXPORT_SYMBOL(tcp_init_sock); EXPORT_SYMBOL(tcp_init_sock);
...@@ -897,7 +898,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, ...@@ -897,7 +898,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
u32 new_size_goal, size_goal; u32 new_size_goal, size_goal;
if (!large_allowed || !sk_can_gso(sk)) if (!large_allowed)
return mss_now; return mss_now;
/* Note : tcp_tso_autosize() will eventually split this later */ /* Note : tcp_tso_autosize() will eventually split this later */
...@@ -1062,8 +1063,7 @@ EXPORT_SYMBOL_GPL(do_tcp_sendpages); ...@@ -1062,8 +1063,7 @@ EXPORT_SYMBOL_GPL(do_tcp_sendpages);
int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
size_t size, int flags) size_t size, int flags)
{ {
if (!(sk->sk_route_caps & NETIF_F_SG) || if (!(sk->sk_route_caps & NETIF_F_SG))
!sk_check_csum_caps(sk))
return sock_no_sendpage_locked(sk, page, offset, size, flags); return sock_no_sendpage_locked(sk, page, offset, size, flags);
tcp_rate_check_app_limited(sk); /* is sending application-limited? */ tcp_rate_check_app_limited(sk); /* is sending application-limited? */
...@@ -1102,27 +1102,11 @@ static int linear_payload_sz(bool first_skb) ...@@ -1102,27 +1102,11 @@ static int linear_payload_sz(bool first_skb)
return 0; return 0;
} }
static int select_size(const struct sock *sk, bool sg, bool first_skb, bool zc) static int select_size(bool first_skb, bool zc)
{ {
const struct tcp_sock *tp = tcp_sk(sk); if (zc)
int tmp = tp->mss_cache; return 0;
return linear_payload_sz(first_skb);
if (sg) {
if (zc)
return 0;
if (sk_can_gso(sk)) {
tmp = linear_payload_sz(first_skb);
} else {
int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
if (tmp >= pgbreak &&
tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
tmp = pgbreak;
}
}
return tmp;
} }
void tcp_free_fastopen_req(struct tcp_sock *tp) void tcp_free_fastopen_req(struct tcp_sock *tp)
...@@ -1187,7 +1171,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) ...@@ -1187,7 +1171,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
int flags, err, copied = 0; int flags, err, copied = 0;
int mss_now = 0, size_goal, copied_syn = 0; int mss_now = 0, size_goal, copied_syn = 0;
bool process_backlog = false; bool process_backlog = false;
bool sg, zc = false; bool zc = false;
long timeo; long timeo;
flags = msg->msg_flags; flags = msg->msg_flags;
...@@ -1205,7 +1189,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) ...@@ -1205,7 +1189,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
goto out_err; goto out_err;
} }
zc = sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG; zc = sk->sk_route_caps & NETIF_F_SG;
if (!zc) if (!zc)
uarg->zerocopy = 0; uarg->zerocopy = 0;
} }
...@@ -1268,18 +1252,12 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) ...@@ -1268,18 +1252,12 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto do_error; goto do_error;
sg = !!(sk->sk_route_caps & NETIF_F_SG);
while (msg_data_left(msg)) { while (msg_data_left(msg)) {
int copy = 0; int copy = 0;
int max = size_goal;
skb = tcp_write_queue_tail(sk); skb = tcp_write_queue_tail(sk);
if (skb) { if (skb)
if (skb->ip_summed == CHECKSUM_NONE) copy = size_goal - skb->len;
max = mss_now;
copy = max - skb->len;
}
if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) { if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
bool first_skb; bool first_skb;
...@@ -1297,22 +1275,17 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) ...@@ -1297,22 +1275,17 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
goto restart; goto restart;
} }
first_skb = tcp_rtx_and_write_queues_empty(sk); first_skb = tcp_rtx_and_write_queues_empty(sk);
linear = select_size(sk, sg, first_skb, zc); linear = select_size(first_skb, zc);
skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation, skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation,
first_skb); first_skb);
if (!skb) if (!skb)
goto wait_for_memory; goto wait_for_memory;
process_backlog = true; process_backlog = true;
/* skb->ip_summed = CHECKSUM_PARTIAL;
* Check whether we can use HW checksum.
*/
if (sk_check_csum_caps(sk))
skb->ip_summed = CHECKSUM_PARTIAL;
skb_entail(sk, skb); skb_entail(sk, skb);
copy = size_goal; copy = size_goal;
max = size_goal;
/* All packets are restored as if they have /* All packets are restored as if they have
* already been sent. skb_mstamp isn't set to * already been sent. skb_mstamp isn't set to
...@@ -1343,7 +1316,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) ...@@ -1343,7 +1316,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
if (!skb_can_coalesce(skb, i, pfrag->page, if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) { pfrag->offset)) {
if (i >= sysctl_max_skb_frags || !sg) { if (i >= sysctl_max_skb_frags) {
tcp_mark_push(tp, skb); tcp_mark_push(tp, skb);
goto new_segment; goto new_segment;
} }
...@@ -1396,7 +1369,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) ...@@ -1396,7 +1369,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
goto out; goto out;
} }
if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) if (skb->len < size_goal || (flags & MSG_OOB) || unlikely(tp->repair))
continue; continue;
if (forced_push(tp)) { if (forced_push(tp)) {
......
...@@ -1358,9 +1358,6 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, ...@@ -1358,9 +1358,6 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
int len; int len;
int in_sack; int in_sack;
if (!sk_can_gso(sk))
goto fallback;
/* Normally R but no L won't result in plain S */ /* Normally R but no L won't result in plain S */
if (!dup_sack && if (!dup_sack &&
(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS) (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
......
...@@ -561,16 +561,9 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr) ...@@ -561,16 +561,9 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
{ {
struct tcphdr *th = tcp_hdr(skb); struct tcphdr *th = tcp_hdr(skb);
if (skb->ip_summed == CHECKSUM_PARTIAL) { th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0); skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct tcphdr, check);
skb->csum_offset = offsetof(struct tcphdr, check);
} else {
th->check = tcp_v4_check(skb->len, saddr, daddr,
csum_partial(th,
th->doff << 2,
skb->csum));
}
} }
/* This routine computes an IPv4 TCP checksum. */ /* This routine computes an IPv4 TCP checksum. */
......
...@@ -1206,7 +1206,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) ...@@ -1206,7 +1206,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
/* Initialize TSO segments for a packet. */ /* Initialize TSO segments for a packet. */
static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now) static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
{ {
if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) { if (skb->len <= mss_now) {
/* Avoid the costly divide in the normal /* Avoid the costly divide in the normal
* non-TSO case. * non-TSO case.
*/ */
...@@ -1335,21 +1335,9 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, ...@@ -1335,21 +1335,9 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
tcp_skb_fragment_eor(skb, buff); tcp_skb_fragment_eor(skb, buff);
if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { skb_split(skb, buff, len);
/* Copy and checksum data tail into the new buffer. */
buff->csum = csum_partial_copy_nocheck(skb->data + len,
skb_put(buff, nsize),
nsize, 0);
skb_trim(skb, len);
skb->csum = csum_block_sub(skb->csum, buff->csum, len);
} else {
skb->ip_summed = CHECKSUM_PARTIAL;
skb_split(skb, buff, len);
}
buff->ip_summed = skb->ip_summed; buff->ip_summed = CHECKSUM_PARTIAL;
buff->tstamp = skb->tstamp; buff->tstamp = skb->tstamp;
tcp_fragment_tstamp(skb, buff); tcp_fragment_tstamp(skb, buff);
...@@ -1901,7 +1889,7 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue, ...@@ -1901,7 +1889,7 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
tcp_skb_fragment_eor(skb, buff); tcp_skb_fragment_eor(skb, buff);
buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL; buff->ip_summed = CHECKSUM_PARTIAL;
skb_split(skb, buff, len); skb_split(skb, buff, len);
tcp_fragment_tstamp(skb, buff); tcp_fragment_tstamp(skb, buff);
...@@ -2134,7 +2122,7 @@ static int tcp_mtu_probe(struct sock *sk) ...@@ -2134,7 +2122,7 @@ static int tcp_mtu_probe(struct sock *sk)
TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK; TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
TCP_SKB_CB(nskb)->sacked = 0; TCP_SKB_CB(nskb)->sacked = 0;
nskb->csum = 0; nskb->csum = 0;
nskb->ip_summed = skb->ip_summed; nskb->ip_summed = CHECKSUM_PARTIAL;
tcp_insert_write_queue_before(nskb, skb, sk); tcp_insert_write_queue_before(nskb, skb, sk);
tcp_highest_sack_replace(sk, skb, nskb); tcp_highest_sack_replace(sk, skb, nskb);
...@@ -2142,14 +2130,7 @@ static int tcp_mtu_probe(struct sock *sk) ...@@ -2142,14 +2130,7 @@ static int tcp_mtu_probe(struct sock *sk)
len = 0; len = 0;
tcp_for_write_queue_from_safe(skb, next, sk) { tcp_for_write_queue_from_safe(skb, next, sk) {
copy = min_t(int, skb->len, probe_size - len); copy = min_t(int, skb->len, probe_size - len);
if (nskb->ip_summed) { skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
} else {
__wsum csum = skb_copy_and_csum_bits(skb, 0,
skb_put(nskb, copy),
copy, 0);
nskb->csum = csum_block_add(nskb->csum, csum, len);
}
if (skb->len <= copy) { if (skb->len <= copy) {
/* We've eaten all the data from this skb. /* We've eaten all the data from this skb.
...@@ -2166,9 +2147,6 @@ static int tcp_mtu_probe(struct sock *sk) ...@@ -2166,9 +2147,6 @@ static int tcp_mtu_probe(struct sock *sk)
~(TCPHDR_FIN|TCPHDR_PSH); ~(TCPHDR_FIN|TCPHDR_PSH);
if (!skb_shinfo(skb)->nr_frags) { if (!skb_shinfo(skb)->nr_frags) {
skb_pull(skb, copy); skb_pull(skb, copy);
if (skb->ip_summed != CHECKSUM_PARTIAL)
skb->csum = csum_partial(skb->data,
skb->len, 0);
} else { } else {
__pskb_trim_head(skb, copy); __pskb_trim_head(skb, copy);
tcp_set_skb_tso_segs(skb, mss_now); tcp_set_skb_tso_segs(skb, mss_now);
...@@ -2746,12 +2724,6 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) ...@@ -2746,12 +2724,6 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
} }
tcp_highest_sack_replace(sk, next_skb, skb); tcp_highest_sack_replace(sk, next_skb, skb);
if (next_skb->ip_summed == CHECKSUM_PARTIAL)
skb->ip_summed = CHECKSUM_PARTIAL;
if (skb->ip_summed != CHECKSUM_PARTIAL)
skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
/* Update sequence range on original skb. */ /* Update sequence range on original skb. */
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment