Commit eb4dea58 authored by Herbert Xu's avatar Herbert Xu Committed by David S. Miller

net: Fix percpu counters deadlock

When we converted the protocol atomic counters such as the orphan
count and the total socket count deadlocks were introduced due to
the mismatch in BH status of the spots that used the percpu counter
operations.

Based on the diagnosis and patch by Peter Zijlstra, this patch
fixes these issues by disabling BH where we may be in process
context.
Reported-by: default avatarJeff Kirsher <jeffrey.t.kirsher@intel.com>
Tested-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0f23174a
...@@ -964,7 +964,6 @@ void dccp_close(struct sock *sk, long timeout) ...@@ -964,7 +964,6 @@ void dccp_close(struct sock *sk, long timeout)
state = sk->sk_state; state = sk->sk_state;
sock_hold(sk); sock_hold(sk);
sock_orphan(sk); sock_orphan(sk);
percpu_counter_inc(sk->sk_prot->orphan_count);
/* /*
* It is the last release_sock in its life. It will remove backlog. * It is the last release_sock in its life. It will remove backlog.
...@@ -978,6 +977,8 @@ void dccp_close(struct sock *sk, long timeout) ...@@ -978,6 +977,8 @@ void dccp_close(struct sock *sk, long timeout)
bh_lock_sock(sk); bh_lock_sock(sk);
WARN_ON(sock_owned_by_user(sk)); WARN_ON(sock_owned_by_user(sk));
percpu_counter_inc(sk->sk_prot->orphan_count);
/* Have we already been destroyed by a softirq or backlog? */ /* Have we already been destroyed by a softirq or backlog? */
if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
goto out; goto out;
......
...@@ -633,8 +633,6 @@ void inet_csk_listen_stop(struct sock *sk) ...@@ -633,8 +633,6 @@ void inet_csk_listen_stop(struct sock *sk)
acc_req = req->dl_next; acc_req = req->dl_next;
percpu_counter_inc(sk->sk_prot->orphan_count);
local_bh_disable(); local_bh_disable();
bh_lock_sock(child); bh_lock_sock(child);
WARN_ON(sock_owned_by_user(child)); WARN_ON(sock_owned_by_user(child));
...@@ -644,6 +642,8 @@ void inet_csk_listen_stop(struct sock *sk) ...@@ -644,6 +642,8 @@ void inet_csk_listen_stop(struct sock *sk)
sock_orphan(child); sock_orphan(child);
percpu_counter_inc(sk->sk_prot->orphan_count);
inet_csk_destroy_sock(child); inet_csk_destroy_sock(child);
bh_unlock_sock(child); bh_unlock_sock(child);
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include <net/tcp.h> #include <net/tcp.h>
#include <net/udp.h> #include <net/udp.h>
#include <net/udplite.h> #include <net/udplite.h>
#include <linux/bottom_half.h>
#include <linux/inetdevice.h> #include <linux/inetdevice.h>
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
#include <linux/seq_file.h> #include <linux/seq_file.h>
...@@ -50,13 +51,17 @@ ...@@ -50,13 +51,17 @@
static int sockstat_seq_show(struct seq_file *seq, void *v) static int sockstat_seq_show(struct seq_file *seq, void *v)
{ {
struct net *net = seq->private; struct net *net = seq->private;
int orphans, sockets;
local_bh_disable();
orphans = percpu_counter_sum_positive(&tcp_orphan_count),
sockets = percpu_counter_sum_positive(&tcp_sockets_allocated),
local_bh_enable();
socket_seq_show(seq); socket_seq_show(seq);
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
sock_prot_inuse_get(net, &tcp_prot), sock_prot_inuse_get(net, &tcp_prot), orphans,
(int)percpu_counter_sum_positive(&tcp_orphan_count), tcp_death_row.tw_count, sockets,
tcp_death_row.tw_count,
(int)percpu_counter_sum_positive(&tcp_sockets_allocated),
atomic_read(&tcp_memory_allocated)); atomic_read(&tcp_memory_allocated));
seq_printf(seq, "UDP: inuse %d mem %d\n", seq_printf(seq, "UDP: inuse %d mem %d\n",
sock_prot_inuse_get(net, &udp_prot), sock_prot_inuse_get(net, &udp_prot),
......
...@@ -1836,7 +1836,6 @@ void tcp_close(struct sock *sk, long timeout) ...@@ -1836,7 +1836,6 @@ void tcp_close(struct sock *sk, long timeout)
state = sk->sk_state; state = sk->sk_state;
sock_hold(sk); sock_hold(sk);
sock_orphan(sk); sock_orphan(sk);
percpu_counter_inc(sk->sk_prot->orphan_count);
/* It is the last release_sock in its life. It will remove backlog. */ /* It is the last release_sock in its life. It will remove backlog. */
release_sock(sk); release_sock(sk);
...@@ -1849,6 +1848,8 @@ void tcp_close(struct sock *sk, long timeout) ...@@ -1849,6 +1848,8 @@ void tcp_close(struct sock *sk, long timeout)
bh_lock_sock(sk); bh_lock_sock(sk);
WARN_ON(sock_owned_by_user(sk)); WARN_ON(sock_owned_by_user(sk));
percpu_counter_inc(sk->sk_prot->orphan_count);
/* Have we already been destroyed by a softirq or backlog? */ /* Have we already been destroyed by a softirq or backlog? */
if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE) if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
goto out; goto out;
......
...@@ -51,6 +51,7 @@ ...@@ -51,6 +51,7 @@
*/ */
#include <linux/bottom_half.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/fcntl.h> #include <linux/fcntl.h>
#include <linux/module.h> #include <linux/module.h>
...@@ -1797,7 +1798,9 @@ static int tcp_v4_init_sock(struct sock *sk) ...@@ -1797,7 +1798,9 @@ static int tcp_v4_init_sock(struct sock *sk)
sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1];
local_bh_disable();
percpu_counter_inc(&tcp_sockets_allocated); percpu_counter_inc(&tcp_sockets_allocated);
local_bh_enable();
return 0; return 0;
} }
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
* 2 of the License, or (at your option) any later version. * 2 of the License, or (at your option) any later version.
*/ */
#include <linux/bottom_half.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/types.h> #include <linux/types.h>
...@@ -1830,7 +1831,9 @@ static int tcp_v6_init_sock(struct sock *sk) ...@@ -1830,7 +1831,9 @@ static int tcp_v6_init_sock(struct sock *sk)
sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1];
local_bh_disable();
percpu_counter_inc(&tcp_sockets_allocated); percpu_counter_inc(&tcp_sockets_allocated);
local_bh_enable();
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment