Commit c7bb4b89 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

ipv6: tcp: drop silly ICMPv6 packet too big messages

While TCP stack scales reasonably well, there is still one part that
can be used to DDOS it.

IPv6 Packet too big messages have to lookup/insert a new route,
and if abused by attackers, can easily put hosts under high stress,
with many cpus contending on a spinlock while one is stuck in fib6_run_gc()

ip6_protocol_deliver_rcu()
 icmpv6_rcv()
  icmpv6_notify()
   tcp_v6_err()
    tcp_v6_mtu_reduced()
     inet6_csk_update_pmtu()
      ip6_rt_update_pmtu()
       __ip6_rt_update_pmtu()
        ip6_rt_cache_alloc()
         ip6_dst_alloc()
          dst_alloc()
           ip6_dst_gc()
            fib6_run_gc()
             spin_lock_bh() ...

Some of our servers have been hit by malicious ICMPv6 packets
trying to _increase_ the MTU/MSS of TCP flows.

We believe these ICMPv6 packets are a result of a bug in one ISP stack,
since they were blindly sent back for _every_ (small) packet sent to them.

These packets are for one TCP flow:
09:24:36.266491 IP6 Addr1 > Victim ICMP6, packet too big, mtu 1460, length 1240
09:24:36.266509 IP6 Addr1 > Victim ICMP6, packet too big, mtu 1460, length 1240
09:24:36.316688 IP6 Addr1 > Victim ICMP6, packet too big, mtu 1460, length 1240
09:24:36.316704 IP6 Addr1 > Victim ICMP6, packet too big, mtu 1460, length 1240
09:24:36.608151 IP6 Addr1 > Victim ICMP6, packet too big, mtu 1460, length 1240

TCP stack can filter some silly requests :

1) MTU below IPV6_MIN_MTU can be filtered early in tcp_v6_err()
2) tcp_v6_mtu_reduced() can drop requests trying to increase current MSS.

This tests happen before the IPv6 routing stack is entered, thus
removing the potential contention and route exhaustion.

Note that IPv6 stack was performing these checks, but too late
(ie : after the route has been added, and after the potential
garbage collect war)

v2: fix typo caught by Martin, thanks !
v3: exports tcp_mtu_to_mss(), caught by David, thanks !

Fixes: 1da177e4 ("Linux-2.6.12-rc2")
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarMaciej Żenczykowski <maze@google.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Acked-by: default avatarMartin KaFai Lau <kafai@fb.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 9615fe36
...@@ -1732,6 +1732,7 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu) ...@@ -1732,6 +1732,7 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu)
return __tcp_mtu_to_mss(sk, pmtu) - return __tcp_mtu_to_mss(sk, pmtu) -
(tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr)); (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr));
} }
EXPORT_SYMBOL(tcp_mtu_to_mss);
/* Inverse of above */ /* Inverse of above */
int tcp_mss_to_mtu(struct sock *sk, int mss) int tcp_mss_to_mtu(struct sock *sk, int mss)
......
...@@ -348,11 +348,20 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ...@@ -348,11 +348,20 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
static void tcp_v6_mtu_reduced(struct sock *sk) static void tcp_v6_mtu_reduced(struct sock *sk)
{ {
struct dst_entry *dst; struct dst_entry *dst;
u32 mtu;
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
return; return;
dst = inet6_csk_update_pmtu(sk, READ_ONCE(tcp_sk(sk)->mtu_info)); mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
/* Drop requests trying to increase our current mss.
* Check done in __ip6_rt_update_pmtu() is too late.
*/
if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
return;
dst = inet6_csk_update_pmtu(sk, mtu);
if (!dst) if (!dst)
return; return;
...@@ -433,6 +442,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ...@@ -433,6 +442,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
} }
if (type == ICMPV6_PKT_TOOBIG) { if (type == ICMPV6_PKT_TOOBIG) {
u32 mtu = ntohl(info);
/* We are not interested in TCP_LISTEN and open_requests /* We are not interested in TCP_LISTEN and open_requests
* (SYN-ACKs send out by Linux are always <576bytes so * (SYN-ACKs send out by Linux are always <576bytes so
* they should go through unfragmented). * they should go through unfragmented).
...@@ -443,7 +454,11 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ...@@ -443,7 +454,11 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!ip6_sk_accept_pmtu(sk)) if (!ip6_sk_accept_pmtu(sk))
goto out; goto out;
WRITE_ONCE(tp->mtu_info, ntohl(info)); if (mtu < IPV6_MIN_MTU)
goto out;
WRITE_ONCE(tp->mtu_info, mtu);
if (!sock_owned_by_user(sk)) if (!sock_owned_by_user(sk))
tcp_v6_mtu_reduced(sk); tcp_v6_mtu_reduced(sk);
else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment