Commit 36393395 authored by David S. Miller's avatar David S. Miller

ipv4: Handle PMTU in all ICMP error handlers.

With ip_rt_frag_needed() removed, we have to explicitly update PMTU
information in every ICMP error handler.

Create two helper functions to facilitate this.

1) ipv4_sk_update_pmtu()

   This updates the PMTU when we have a socket context to
   work with.

2) ipv4_update_pmtu()

   Raw version, used when no socket context is available.  For this
   interface, we essentially just pass in explicit arguments for
   the flow identity information we would have extracted from the
   socket.

   And you'll notice that ipv4_sk_update_pmtu() is simply implemented
   in terms of ipv4_update_pmtu()

Note that __ip_route_output_key() is used, rather than something like
ip_route_output_flow() or ip_route_output_key().  This is because we
absolutely do not want to end up with a route that does IPSEC
encapsulation and the like.  Instead, we only want the route that
would get us to the node described by the outermost IP header.
Reported-by: default avatarSteffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent b3908e22
...@@ -215,6 +215,9 @@ static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 s ...@@ -215,6 +215,9 @@ static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 s
return ip_route_input_common(skb, dst, src, tos, devin, true); return ip_route_input_common(skb, dst, src, tos, devin, true);
} }
extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
int oif, u32 mark, u8 protocol, int flow_flags);
extern void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu);
extern void ip_rt_send_redirect(struct sk_buff *skb); extern void ip_rt_send_redirect(struct sk_buff *skb);
extern unsigned int inet_addr_type(struct net *net, __be32 addr); extern unsigned int inet_addr_type(struct net *net, __be32 addr);
......
...@@ -408,6 +408,7 @@ static void ah4_err(struct sk_buff *skb, u32 info) ...@@ -408,6 +408,7 @@ static void ah4_err(struct sk_buff *skb, u32 info)
return; return;
pr_debug("pmtu discovery on SA AH/%08x/%08x\n", pr_debug("pmtu discovery on SA AH/%08x/%08x\n",
ntohl(ah->spi), ntohl(iph->daddr)); ntohl(ah->spi), ntohl(iph->daddr));
ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0);
xfrm_state_put(x); xfrm_state_put(x);
} }
......
...@@ -494,6 +494,7 @@ static void esp4_err(struct sk_buff *skb, u32 info) ...@@ -494,6 +494,7 @@ static void esp4_err(struct sk_buff *skb, u32 info)
return; return;
NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
ntohl(esph->spi), ntohl(iph->daddr)); ntohl(esph->spi), ntohl(iph->daddr));
ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0);
xfrm_state_put(x); xfrm_state_put(x);
} }
......
...@@ -516,9 +516,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info) ...@@ -516,9 +516,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
case ICMP_PORT_UNREACH: case ICMP_PORT_UNREACH:
/* Impossible event. */ /* Impossible event. */
return; return;
case ICMP_FRAG_NEEDED:
/* Soft state for pmtu is maintained by IP core. */
return;
default: default:
/* All others are translated to HOST_UNREACH. /* All others are translated to HOST_UNREACH.
rfc2003 contains "deep thoughts" about NET_UNREACH, rfc2003 contains "deep thoughts" about NET_UNREACH,
...@@ -538,7 +535,16 @@ static void ipgre_err(struct sk_buff *skb, u32 info) ...@@ -538,7 +535,16 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
flags & GRE_KEY ? flags & GRE_KEY ?
*(((__be32 *)p) + (grehlen / 4) - 1) : 0, *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
p[1]); p[1]);
if (t == NULL || t->parms.iph.daddr == 0 || if (t == NULL)
goto out;
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
t->parms.link, 0, IPPROTO_GRE, 0);
goto out;
}
if (t->parms.iph.daddr == 0 ||
ipv4_is_multicast(t->parms.iph.daddr)) ipv4_is_multicast(t->parms.iph.daddr))
goto out; goto out;
......
...@@ -42,6 +42,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) ...@@ -42,6 +42,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
return; return;
NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI4\n", NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI4\n",
spi, &iph->daddr); spi, &iph->daddr);
ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0);
xfrm_state_put(x); xfrm_state_put(x);
} }
......
...@@ -348,9 +348,6 @@ static int ipip_err(struct sk_buff *skb, u32 info) ...@@ -348,9 +348,6 @@ static int ipip_err(struct sk_buff *skb, u32 info)
case ICMP_PORT_UNREACH: case ICMP_PORT_UNREACH:
/* Impossible event. */ /* Impossible event. */
return 0; return 0;
case ICMP_FRAG_NEEDED:
/* Soft state for pmtu is maintained by IP core. */
return 0;
default: default:
/* All others are translated to HOST_UNREACH. /* All others are translated to HOST_UNREACH.
rfc2003 contains "deep thoughts" about NET_UNREACH, rfc2003 contains "deep thoughts" about NET_UNREACH,
...@@ -369,7 +366,17 @@ static int ipip_err(struct sk_buff *skb, u32 info) ...@@ -369,7 +366,17 @@ static int ipip_err(struct sk_buff *skb, u32 info)
rcu_read_lock(); rcu_read_lock();
t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
if (t == NULL || t->parms.iph.daddr == 0) if (t == NULL)
goto out;
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
t->dev->ifindex, 0, IPPROTO_IPIP, 0);
err = 0;
goto out;
}
if (t->parms.iph.daddr == 0)
goto out; goto out;
err = 0; err = 0;
......
...@@ -371,6 +371,7 @@ void ping_err(struct sk_buff *skb, u32 info) ...@@ -371,6 +371,7 @@ void ping_err(struct sk_buff *skb, u32 info)
break; break;
case ICMP_DEST_UNREACH: case ICMP_DEST_UNREACH:
if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
ipv4_sk_update_pmtu(skb, sk, info);
if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) {
err = EMSGSIZE; err = EMSGSIZE;
harderr = 1; harderr = 1;
......
...@@ -216,6 +216,9 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) ...@@ -216,6 +216,9 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
int err = 0; int err = 0;
int harderr = 0; int harderr = 0;
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
ipv4_sk_update_pmtu(skb, sk, info);
/* Report error on raw socket, if: /* Report error on raw socket, if:
1. User requested ip_recverr. 1. User requested ip_recverr.
2. Socket is connected (otherwise the error indication 2. Socket is connected (otherwise the error indication
......
...@@ -1711,6 +1711,34 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) ...@@ -1711,6 +1711,34 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
} }
} }
void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
int oif, u32 mark, u8 protocol, int flow_flags)
{
const struct iphdr *iph = (const struct iphdr *)skb->data;
struct flowi4 fl4;
struct rtable *rt;
flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE,
protocol, flow_flags | FLOWI_FLAG_PRECOW_METRICS,
iph->daddr, iph->saddr, 0, 0);
rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) {
ip_rt_update_pmtu(&rt->dst, mtu);
ip_rt_put(rt);
}
}
EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
{
const struct inet_sock *inet = inet_sk(sk);
return ipv4_update_pmtu(skb, sock_net(sk), mtu,
sk->sk_bound_dev_if, sk->sk_mark,
inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
inet_sk_flowi_flags(sk));
}
EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
static void ipv4_validate_peer(struct rtable *rt) static void ipv4_validate_peer(struct rtable *rt)
{ {
......
...@@ -615,6 +615,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) ...@@ -615,6 +615,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
break; break;
case ICMP_DEST_UNREACH: case ICMP_DEST_UNREACH:
if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
ipv4_sk_update_pmtu(skb, sk, info);
if (inet->pmtudisc != IP_PMTUDISC_DONT) { if (inet->pmtudisc != IP_PMTUDISC_DONT) {
err = EMSGSIZE; err = EMSGSIZE;
harderr = 1; harderr = 1;
......
...@@ -527,9 +527,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info) ...@@ -527,9 +527,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
case ICMP_PORT_UNREACH: case ICMP_PORT_UNREACH:
/* Impossible event. */ /* Impossible event. */
return 0; return 0;
case ICMP_FRAG_NEEDED:
/* Soft state for pmtu is maintained by IP core. */
return 0;
default: default:
/* All others are translated to HOST_UNREACH. /* All others are translated to HOST_UNREACH.
rfc2003 contains "deep thoughts" about NET_UNREACH, rfc2003 contains "deep thoughts" about NET_UNREACH,
...@@ -551,7 +548,17 @@ static int ipip6_err(struct sk_buff *skb, u32 info) ...@@ -551,7 +548,17 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
skb->dev, skb->dev,
iph->daddr, iph->daddr,
iph->saddr); iph->saddr);
if (t == NULL || t->parms.iph.daddr == 0) if (t == NULL)
goto out;
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
t->dev->ifindex, 0, IPPROTO_IPV6, 0);
err = 0;
goto out;
}
if (t->parms.iph.daddr == 0)
goto out; goto out;
err = 0; err = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment