Commit 76d7c457 authored by David S. Miller's avatar David S. Miller

Merge branch 'icmp_frag'

Andy Zhou says:

====================
fragmentation ICMP

Currently, we send ICMP packets when errors occur during fragmentation or
de-fragmentation.  However, it is a bug when sending those ICMP packets
in the context of using netfilter for bridging.

Those ICMP packets are only expected in the context of routing, not in
bridging mode.

The local stack is not involved in bridging forward decisions, thus
should be not used for deciding the reverse path for those ICMP messages.

This bug only affects IPV4, not in IPv6.

v1->v2:  restructure the patches into two patches that fix defragmentation and
         fragmentation respectively.

	 A bit is add in IPCB to control whether ICMP packet should be
	 generated for defragmentation.

	 Fragmentation ICMP is now removed by restructuring the
	 ip_fragment() API.

v2->v3:  Add droping icmp for bridging contrack users
         drop exporting ip_fragment() API.

v3->v4:  Remove unnecessary parentheses in 'return' statements

v4->v5:  Drop the patch that sets and checks a bit in IPCB
         that prevents ip_defrag to send ICMP.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents a2ad5d2a 49d16b23
...@@ -108,8 +108,8 @@ int ip_local_deliver(struct sk_buff *skb); ...@@ -108,8 +108,8 @@ int ip_local_deliver(struct sk_buff *skb);
int ip_mr_input(struct sk_buff *skb); int ip_mr_input(struct sk_buff *skb);
int ip_output(struct sock *sk, struct sk_buff *skb); int ip_output(struct sock *sk, struct sk_buff *skb);
int ip_mc_output(struct sock *sk, struct sk_buff *skb); int ip_mc_output(struct sock *sk, struct sk_buff *skb);
int ip_fragment(struct sock *sk, struct sk_buff *skb, int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *)); int (*output)(struct sock *, struct sk_buff *));
int ip_do_nat(struct sk_buff *skb); int ip_do_nat(struct sk_buff *skb);
void ip_send_check(struct iphdr *ip); void ip_send_check(struct iphdr *ip);
int __ip_local_out(struct sk_buff *skb); int __ip_local_out(struct sk_buff *skb);
...@@ -478,6 +478,16 @@ enum ip_defrag_users { ...@@ -478,6 +478,16 @@ enum ip_defrag_users {
IP_DEFRAG_MACVLAN, IP_DEFRAG_MACVLAN,
}; };
/* Return true if the value of 'user' is between 'lower_bond'
* and 'upper_bond' inclusively.
*/
static inline bool ip_defrag_user_in_between(u32 user,
enum ip_defrag_users lower_bond,
enum ip_defrag_users upper_bond)
{
return user >= lower_bond && user <= upper_bond;
}
int ip_defrag(struct sk_buff *skb, u32 user); int ip_defrag(struct sk_buff *skb, u32 user);
#ifdef CONFIG_INET #ifdef CONFIG_INET
struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user); struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user);
......
...@@ -853,6 +853,25 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) ...@@ -853,6 +853,25 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
return br_dev_queue_push_xmit(sk, skb); return br_dev_queue_push_xmit(sk, skb);
} }
static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *))
{
unsigned int mtu = ip_skb_dst_mtu(skb);
struct iphdr *iph = ip_hdr(skb);
struct rtable *rt = skb_rtable(skb);
struct net_device *dev = rt->dst.dev;
if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
(IPCB(skb)->frag_max_size &&
IPCB(skb)->frag_max_size > mtu))) {
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
}
return ip_do_fragment(sk, skb, output);
}
static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
{ {
int ret; int ret;
...@@ -886,7 +905,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) ...@@ -886,7 +905,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
skb_copy_from_linear_data_offset(skb, -data->size, data->mac, skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
data->size); data->size);
ret = ip_fragment(sk, skb, br_nf_push_frag_xmit); ret = br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit);
} else { } else {
nf_bridge_info_free(skb); nf_bridge_info_free(skb);
ret = br_dev_queue_push_xmit(sk, skb); ret = br_dev_queue_push_xmit(sk, skb);
......
...@@ -173,6 +173,15 @@ static void ipq_kill(struct ipq *ipq) ...@@ -173,6 +173,15 @@ static void ipq_kill(struct ipq *ipq)
inet_frag_kill(&ipq->q, &ip4_frags); inet_frag_kill(&ipq->q, &ip4_frags);
} }
static bool frag_expire_skip_icmp(u32 user)
{
return user == IP_DEFRAG_AF_PACKET ||
ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_IN,
__IP_DEFRAG_CONNTRACK_IN_END) ||
ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_BRIDGE_IN,
__IP_DEFRAG_CONNTRACK_BRIDGE_IN);
}
/* /*
* Oops, a fragment queue timed out. Kill it and send an ICMP reply. * Oops, a fragment queue timed out. Kill it and send an ICMP reply.
*/ */
...@@ -217,10 +226,8 @@ static void ip_expire(unsigned long arg) ...@@ -217,10 +226,8 @@ static void ip_expire(unsigned long arg)
/* Only an end host needs to send an ICMP /* Only an end host needs to send an ICMP
* "Fragment Reassembly Timeout" message, per RFC792. * "Fragment Reassembly Timeout" message, per RFC792.
*/ */
if (qp->user == IP_DEFRAG_AF_PACKET || if (frag_expire_skip_icmp(qp->user) &&
((qp->user >= IP_DEFRAG_CONNTRACK_IN) && (skb_rtable(head)->rt_type != RTN_LOCAL))
(qp->user <= __IP_DEFRAG_CONNTRACK_IN_END) &&
(skb_rtable(head)->rt_type != RTN_LOCAL)))
goto out_rcu_unlock; goto out_rcu_unlock;
/* Send an ICMP "Fragment Reassembly Timeout" message. */ /* Send an ICMP "Fragment Reassembly Timeout" message. */
......
...@@ -83,6 +83,9 @@ ...@@ -83,6 +83,9 @@
int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
EXPORT_SYMBOL(sysctl_ip_default_ttl); EXPORT_SYMBOL(sysctl_ip_default_ttl);
static int ip_fragment(struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *));
/* Generate a checksum for an outgoing IP datagram. */ /* Generate a checksum for an outgoing IP datagram. */
void ip_send_check(struct iphdr *iph) void ip_send_check(struct iphdr *iph)
{ {
...@@ -478,6 +481,28 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) ...@@ -478,6 +481,28 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from); skb_copy_secmark(to, from);
} }
static int ip_fragment(struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *))
{
struct iphdr *iph = ip_hdr(skb);
unsigned int mtu = ip_skb_dst_mtu(skb);
if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
(IPCB(skb)->frag_max_size &&
IPCB(skb)->frag_max_size > mtu))) {
struct rtable *rt = skb_rtable(skb);
struct net_device *dev = rt->dst.dev;
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
kfree_skb(skb);
return -EMSGSIZE;
}
return ip_do_fragment(sk, skb, output);
}
/* /*
* This IP datagram is too large to be sent in one piece. Break it up into * This IP datagram is too large to be sent in one piece. Break it up into
* smaller pieces (each of size equal to IP header plus * smaller pieces (each of size equal to IP header plus
...@@ -485,8 +510,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) ...@@ -485,8 +510,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
* single device frame, and queue such a frame for sending. * single device frame, and queue such a frame for sending.
*/ */
int ip_fragment(struct sock *sk, struct sk_buff *skb, int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *)) int (*output)(struct sock *, struct sk_buff *))
{ {
struct iphdr *iph; struct iphdr *iph;
int ptr; int ptr;
...@@ -507,15 +532,6 @@ int ip_fragment(struct sock *sk, struct sk_buff *skb, ...@@ -507,15 +532,6 @@ int ip_fragment(struct sock *sk, struct sk_buff *skb,
iph = ip_hdr(skb); iph = ip_hdr(skb);
mtu = ip_skb_dst_mtu(skb); mtu = ip_skb_dst_mtu(skb);
if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
(IPCB(skb)->frag_max_size &&
IPCB(skb)->frag_max_size > mtu))) {
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
kfree_skb(skb);
return -EMSGSIZE;
}
/* /*
* Setup starting values. * Setup starting values.
...@@ -751,7 +767,7 @@ int ip_fragment(struct sock *sk, struct sk_buff *skb, ...@@ -751,7 +767,7 @@ int ip_fragment(struct sock *sk, struct sk_buff *skb,
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
return err; return err;
} }
EXPORT_SYMBOL(ip_fragment); EXPORT_SYMBOL(ip_do_fragment);
int int
ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment