Commit 6a17b961 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

ipv6: do not use per netns icmp sockets

Back in linux-2.6.25 (commit 98c6d1b2 "[NETNS]: Make icmpv6_sk per namespace.",
we added private per-cpu/per-netns ipv6 icmp sockets.

This adds memory and cpu costs, which do not seem needed.
Now typical servers have 256 or more cores, this adds considerable
tax to netns users.

icmp sockets are used from BH context, are not receiving packets,
and do not store any persistent state but the 'struct net' pointer.

icmpv6_xmit_lock() already makes sure to lock the chosen per-cpu
socket.

This patch has a considerable impact on the number of netns
that the worker thread in cleanup_net() can dismantle per second,
because ip6mr_sk_done() is no longer called, meaning we no longer
acquire the rtnl mutex, competing with other threads adding new netns.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent a15c89c7
...@@ -88,7 +88,6 @@ struct netns_ipv6 { ...@@ -88,7 +88,6 @@ struct netns_ipv6 {
struct fib6_table *fib6_local_tbl; struct fib6_table *fib6_local_tbl;
struct fib_rules_ops *fib6_rules_ops; struct fib_rules_ops *fib6_rules_ops;
#endif #endif
struct sock * __percpu *icmp_sk;
struct sock *ndisc_sk; struct sock *ndisc_sk;
struct sock *tcp_sk; struct sock *tcp_sk;
struct sock *igmp_sk; struct sock *igmp_sk;
......
...@@ -69,17 +69,7 @@ ...@@ -69,17 +69,7 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
/* static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
* The ICMP socket(s). This is the most convenient way to flow control
* our ICMP output as well as maintain a clean interface throughout
* all layers. All Socketless IP sends will soon be gone.
*
* On SMP we have one ICMP socket per-cpu.
*/
static struct sock *icmpv6_sk(struct net *net)
{
return this_cpu_read(*net->ipv6.icmp_sk);
}
static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info) u8 type, u8 code, int offset, __be32 info)
...@@ -110,11 +100,11 @@ static const struct inet6_protocol icmpv6_protocol = { ...@@ -110,11 +100,11 @@ static const struct inet6_protocol icmpv6_protocol = {
}; };
/* Called with BH disabled */ /* Called with BH disabled */
static __inline__ struct sock *icmpv6_xmit_lock(struct net *net) static struct sock *icmpv6_xmit_lock(struct net *net)
{ {
struct sock *sk; struct sock *sk;
sk = icmpv6_sk(net); sk = this_cpu_read(ipv6_icmp_sk);
if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
/* This can happen if the output path (f.e. SIT or /* This can happen if the output path (f.e. SIT or
* ip6ip6 tunnel) signals dst_link_failure() for an * ip6ip6 tunnel) signals dst_link_failure() for an
...@@ -122,11 +112,13 @@ static __inline__ struct sock *icmpv6_xmit_lock(struct net *net) ...@@ -122,11 +112,13 @@ static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
*/ */
return NULL; return NULL;
} }
sock_net_set(sk, net);
return sk; return sk;
} }
static __inline__ void icmpv6_xmit_unlock(struct sock *sk) static void icmpv6_xmit_unlock(struct sock *sk)
{ {
sock_net_set(sk, &init_net);
spin_unlock(&sk->sk_lock.slock); spin_unlock(&sk->sk_lock.slock);
} }
...@@ -1034,59 +1026,27 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6, ...@@ -1034,59 +1026,27 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
} }
static void __net_exit icmpv6_sk_exit(struct net *net) int __init icmpv6_init(void)
{
int i;
for_each_possible_cpu(i)
inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
free_percpu(net->ipv6.icmp_sk);
}
static int __net_init icmpv6_sk_init(struct net *net)
{ {
struct sock *sk; struct sock *sk;
int err, i; int err, i;
net->ipv6.icmp_sk = alloc_percpu(struct sock *);
if (!net->ipv6.icmp_sk)
return -ENOMEM;
for_each_possible_cpu(i) { for_each_possible_cpu(i) {
err = inet_ctl_sock_create(&sk, PF_INET6, err = inet_ctl_sock_create(&sk, PF_INET6,
SOCK_RAW, IPPROTO_ICMPV6, net); SOCK_RAW, IPPROTO_ICMPV6, &init_net);
if (err < 0) { if (err < 0) {
pr_err("Failed to initialize the ICMP6 control socket (err %d)\n", pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
err); err);
goto fail; return err;
} }
*per_cpu_ptr(net->ipv6.icmp_sk, i) = sk; per_cpu(ipv6_icmp_sk, i) = sk;
/* Enough space for 2 64K ICMP packets, including /* Enough space for 2 64K ICMP packets, including
* sk_buff struct overhead. * sk_buff struct overhead.
*/ */
sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
} }
return 0;
fail:
icmpv6_sk_exit(net);
return err;
}
static struct pernet_operations icmpv6_sk_ops = {
.init = icmpv6_sk_init,
.exit = icmpv6_sk_exit,
};
int __init icmpv6_init(void)
{
int err;
err = register_pernet_subsys(&icmpv6_sk_ops);
if (err < 0)
return err;
err = -EAGAIN; err = -EAGAIN;
if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
...@@ -1101,14 +1061,12 @@ int __init icmpv6_init(void) ...@@ -1101,14 +1061,12 @@ int __init icmpv6_init(void)
inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
fail: fail:
pr_err("Failed to register ICMP6 protocol\n"); pr_err("Failed to register ICMP6 protocol\n");
unregister_pernet_subsys(&icmpv6_sk_ops);
return err; return err;
} }
void icmpv6_cleanup(void) void icmpv6_cleanup(void)
{ {
inet6_unregister_icmp_sender(icmp6_send); inet6_unregister_icmp_sender(icmp6_send);
unregister_pernet_subsys(&icmpv6_sk_ops);
inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment