Commit d40ce48c authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'af_unix-replace-unix_table_lock-with-per-hash-locks'

Kuniyuki Iwashima says:

====================
af_unix: Replace unix_table_lock with per-hash locks.

The hash table of AF_UNIX sockets is protected by a single big lock,
unix_table_lock.  This series replaces it with small per-hash locks.

1st -  2nd : Misc refactoring
3rd -  8th : Separate BSD/abstract address logics
9th - 11th : Prep to save a hash in each socket
12th       : Replace the big lock
13th       : Speed up autobind()

Note to maintainers:
The 12th patch adds two kinds of Sparse warnings on patchwork:

  about unix_table_double_lock/unlock()
    We can avoid this by adding two apparent acquires/releases annotations,
    but there are the same kinds of warnings about unix_state_double_lock().

  about unix_next_socket() and unix_seq_stop() (/proc/net/unix)
    This is because Sparse does not understand logic in unix_next_socket(),
    which leaves a spin lock held until it returns NULL.
    Also, tcp_seq_stop() causes a warning for the same reason.

These warnings seem reasonable, but let me know if there is any better way.
Please see [0] for details.

[0]: https://lore.kernel.org/netdev/20211117001611.74123-1-kuniyu@amazon.co.jp/
====================

Link: https://lore.kernel.org/r/20211124021431.48956-1-kuniyu@amazon.co.jpSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 442b03c3 9acbc584
...@@ -20,13 +20,12 @@ struct sock *unix_peer_get(struct sock *sk); ...@@ -20,13 +20,12 @@ struct sock *unix_peer_get(struct sock *sk);
#define UNIX_HASH_BITS 8 #define UNIX_HASH_BITS 8
extern unsigned int unix_tot_inflight; extern unsigned int unix_tot_inflight;
extern spinlock_t unix_table_lock; extern spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE];
extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
struct unix_address { struct unix_address {
refcount_t refcnt; refcount_t refcnt;
int len; int len;
unsigned int hash;
struct sockaddr_un name[]; struct sockaddr_un name[];
}; };
......
This diff is collapsed.
...@@ -13,13 +13,14 @@ ...@@ -13,13 +13,14 @@
static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb) static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb)
{ {
/* might or might not have unix_table_lock */ /* might or might not have unix_table_locks */
struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
if (!addr) if (!addr)
return 0; return 0;
return nla_put(nlskb, UNIX_DIAG_NAME, addr->len - sizeof(short), return nla_put(nlskb, UNIX_DIAG_NAME,
addr->len - offsetof(struct sockaddr_un, sun_path),
addr->name->sun_path); addr->name->sun_path);
} }
...@@ -203,13 +204,13 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -203,13 +204,13 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
s_slot = cb->args[0]; s_slot = cb->args[0];
num = s_num = cb->args[1]; num = s_num = cb->args[1];
spin_lock(&unix_table_lock);
for (slot = s_slot; for (slot = s_slot;
slot < ARRAY_SIZE(unix_socket_table); slot < ARRAY_SIZE(unix_socket_table);
s_num = 0, slot++) { s_num = 0, slot++) {
struct sock *sk; struct sock *sk;
num = 0; num = 0;
spin_lock(&unix_table_locks[slot]);
sk_for_each(sk, &unix_socket_table[slot]) { sk_for_each(sk, &unix_socket_table[slot]) {
if (!net_eq(sock_net(sk), net)) if (!net_eq(sock_net(sk), net))
continue; continue;
...@@ -220,14 +221,16 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -220,14 +221,16 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (sk_diag_dump(sk, skb, req, if (sk_diag_dump(sk, skb, req,
NETLINK_CB(cb->skb).portid, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, cb->nlh->nlmsg_seq,
NLM_F_MULTI) < 0) NLM_F_MULTI) < 0) {
spin_unlock(&unix_table_locks[slot]);
goto done; goto done;
}
next: next:
num++; num++;
} }
spin_unlock(&unix_table_locks[slot]);
} }
done: done:
spin_unlock(&unix_table_lock);
cb->args[0] = slot; cb->args[0] = slot;
cb->args[1] = num; cb->args[1] = num;
...@@ -236,21 +239,19 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -236,21 +239,19 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
static struct sock *unix_lookup_by_ino(unsigned int ino) static struct sock *unix_lookup_by_ino(unsigned int ino)
{ {
int i;
struct sock *sk; struct sock *sk;
int i;
spin_lock(&unix_table_lock);
for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) { for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) {
spin_lock(&unix_table_locks[i]);
sk_for_each(sk, &unix_socket_table[i]) sk_for_each(sk, &unix_socket_table[i])
if (ino == sock_i_ino(sk)) { if (ino == sock_i_ino(sk)) {
sock_hold(sk); sock_hold(sk);
spin_unlock(&unix_table_lock); spin_unlock(&unix_table_locks[i]);
return sk; return sk;
} }
spin_unlock(&unix_table_locks[i]);
} }
spin_unlock(&unix_table_lock);
return NULL; return NULL;
} }
......
...@@ -49,7 +49,7 @@ int dump_unix(struct bpf_iter__unix *ctx) ...@@ -49,7 +49,7 @@ int dump_unix(struct bpf_iter__unix *ctx)
sock_i_ino(sk)); sock_i_ino(sk));
if (unix_sk->addr) { if (unix_sk->addr) {
if (!UNIX_ABSTRACT(unix_sk)) { if (unix_sk->addr->name->sun_path[0]) {
BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path); BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
} else { } else {
/* The name of the abstract UNIX domain socket starts /* The name of the abstract UNIX domain socket starts
......
...@@ -6,8 +6,6 @@ ...@@ -6,8 +6,6 @@
#define AF_INET6 10 #define AF_INET6 10
#define __SO_ACCEPTCON (1 << 16) #define __SO_ACCEPTCON (1 << 16)
#define UNIX_HASH_SIZE 256
#define UNIX_ABSTRACT(unix_sk) (unix_sk->addr->hash < UNIX_HASH_SIZE)
#define SOL_TCP 6 #define SOL_TCP 6
#define TCP_CONGESTION 13 #define TCP_CONGESTION 13
......
...@@ -23,7 +23,7 @@ int BPF_PROG(unix_listen, struct socket *sock, int backlog) ...@@ -23,7 +23,7 @@ int BPF_PROG(unix_listen, struct socket *sock, int backlog)
if (!unix_sk) if (!unix_sk)
return 0; return 0;
if (!UNIX_ABSTRACT(unix_sk)) if (unix_sk->addr->name->sun_path[0])
return 0; return 0;
len = unix_sk->addr->len - sizeof(short); len = unix_sk->addr->len - sizeof(short);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment