Commit 1b346576 authored by Hannes Frederic Sowa's avatar Hannes Frederic Sowa Committed by David S. Miller

ipv4: yet another new IP_MTU_DISCOVER option IP_PMTUDISC_OMIT

IP_PMTUDISC_INTERFACE has a design error: because it does not allow the
generation of fragments if the interface mtu is exceeded, it is very
hard to make use of this option in already deployed name server software
for which I introduced this option.

This patch adds yet another new IP_MTU_DISCOVER option to not honor any
path mtu information and not accepting new icmp notifications destined for
the socket this option is enabled on. But we allow outgoing fragmentation
in case the packet size exceeds the outgoing interface mtu.

As such this new option can be used as a drop-in replacement for
IP_PMTUDISC_DONT, which is currently in use by most name server software
making the adoption of this option very smooth and easy.

The original advantage of IP_PMTUDISC_INTERFACE is still maintained:
ignoring incoming path MTU updates and not honoring discovered path MTUs
in the output path.

Fixes: 482fc609 ("ipv4: introduce new IP_MTU_DISCOVER mode IP_PMTUDISC_INTERFACE")
Cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: default avatarHannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 69647ce4
...@@ -266,7 +266,8 @@ int ip_dont_fragment(struct sock *sk, struct dst_entry *dst) ...@@ -266,7 +266,8 @@ int ip_dont_fragment(struct sock *sk, struct dst_entry *dst)
static inline bool ip_sk_accept_pmtu(const struct sock *sk) static inline bool ip_sk_accept_pmtu(const struct sock *sk)
{ {
return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE; return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE &&
inet_sk(sk)->pmtudisc != IP_PMTUDISC_OMIT;
} }
static inline bool ip_sk_use_pmtu(const struct sock *sk) static inline bool ip_sk_use_pmtu(const struct sock *sk)
...@@ -274,6 +275,12 @@ static inline bool ip_sk_use_pmtu(const struct sock *sk) ...@@ -274,6 +275,12 @@ static inline bool ip_sk_use_pmtu(const struct sock *sk)
return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE; return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE;
} }
static inline bool ip_sk_local_df(const struct sock *sk)
{
return inet_sk(sk)->pmtudisc < IP_PMTUDISC_DO ||
inet_sk(sk)->pmtudisc == IP_PMTUDISC_OMIT;
}
static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
bool forwarding) bool forwarding)
{ {
......
...@@ -120,6 +120,10 @@ struct in_addr { ...@@ -120,6 +120,10 @@ struct in_addr {
* this socket to prevent accepting spoofed ones. * this socket to prevent accepting spoofed ones.
*/ */
#define IP_PMTUDISC_INTERFACE 4 #define IP_PMTUDISC_INTERFACE 4
/* weaker version of IP_PMTUDISC_INTERFACE, which allos packets to get
* fragmented if they exeed the interface mtu
*/
#define IP_PMTUDISC_OMIT 5
#define IP_MULTICAST_IF 32 #define IP_MULTICAST_IF 32
#define IP_MULTICAST_TTL 33 #define IP_MULTICAST_TTL 33
......
...@@ -824,8 +824,7 @@ static int __ip_append_data(struct sock *sk, ...@@ -824,8 +824,7 @@ static int __ip_append_data(struct sock *sk,
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ? maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu;
mtu : 0xFFFF;
if (cork->length + length > maxnonfragsize - fragheaderlen) { if (cork->length + length > maxnonfragsize - fragheaderlen) {
ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
...@@ -1148,8 +1147,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, ...@@ -1148,8 +1147,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ? maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu;
mtu : 0xFFFF;
if (cork->length + size > maxnonfragsize - fragheaderlen) { if (cork->length + size > maxnonfragsize - fragheaderlen) {
ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
...@@ -1310,8 +1308,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, ...@@ -1310,8 +1308,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
* to fragment the frame generated here. No matter, what transforms * to fragment the frame generated here. No matter, what transforms
* how transforms change size of the packet, it will come out. * how transforms change size of the packet, it will come out.
*/ */
if (inet->pmtudisc < IP_PMTUDISC_DO) skb->local_df = ip_sk_local_df(sk);
skb->local_df = 1;
/* DF bit is set when we want to see DF on outgoing frames. /* DF bit is set when we want to see DF on outgoing frames.
* If local_df is set too, we still allow to fragment this frame * If local_df is set too, we still allow to fragment this frame
......
...@@ -643,7 +643,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, ...@@ -643,7 +643,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
inet->nodefrag = val ? 1 : 0; inet->nodefrag = val ? 1 : 0;
break; break;
case IP_MTU_DISCOVER: case IP_MTU_DISCOVER:
if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_INTERFACE) if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
goto e_inval; goto e_inval;
inet->pmtudisc = val; inet->pmtudisc = val;
break; break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment