Commit 6dd4142f authored by David S. Miller's avatar David S. Miller

Merge branch 'af_unix-per-netns-socket-hash'

Kuniyuki Iwashima says:

====================
af_unix: Introduce per-netns socket hash table.

This series replaces unix_socket_table with a per-netns hash table and
reduces lock contention and time on iterating over the list.

Note the 3rd-6th patches can be a single patch, but for ease of review,
they are split into small changes without breakage.

Changes:
  v3:
    6th:
      * Remove unix_table_locks from comments.
      * Remove missed spin_unlock(&unix_table_locks) in
        unix_lookup_by_ino() (kernel test robot)

  v2: https://lore.kernel.org/netdev/20220620185151.65294-1-kuniyu@amazon.com/
    3rd:
      * Update changelog
      * Remove holes from per-netns hash table structure
      * Use kvmalloc_array() instead of kmalloc() (Eric Dumazet)
      * Remove unnecessary parts in af_unix_init() (Eric Dumazet)
      * Move `err_sysctl` label into ifdef block (kernel test robot)
      * Remove struct netns_unix from struct net if CONFIG_UNIX is disabled
    4th:
      * Use spin_lock_nested() (kernel test robot)

  v1: https://lore.kernel.org/netdev/20220616234714.4291-1-kuniyu@amazon.com/
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ffd3018b 2f7ca90a
......@@ -16,12 +16,11 @@ void wait_for_unix_gc(void);
struct sock *unix_get_socket(struct file *filp);
struct sock *unix_peer_get(struct sock *sk);
#define UNIX_HASH_SIZE 256
#define UNIX_HASH_MOD (256 - 1)
#define UNIX_HASH_SIZE (256 * 2)
#define UNIX_HASH_BITS 8
extern unsigned int unix_tot_inflight;
extern spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE];
extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
struct unix_address {
refcount_t refcnt;
......
......@@ -120,7 +120,9 @@ struct net {
struct netns_core core;
struct netns_mib mib;
struct netns_packet packet;
#if IS_ENABLED(CONFIG_UNIX)
struct netns_unix unx;
#endif
struct netns_nexthop nexthop;
struct netns_ipv4 ipv4;
#if IS_ENABLED(CONFIG_IPV6)
......
......@@ -5,8 +5,14 @@
#ifndef __NETNS_UNIX_H__
#define __NETNS_UNIX_H__
struct unix_table {
spinlock_t *locks;
struct hlist_head *buckets;
};
struct ctl_table_header;
struct netns_unix {
struct unix_table table;
int sysctl_max_dgram_qlen;
struct ctl_table_header *ctl;
};
......
This diff is collapsed.
......@@ -13,7 +13,7 @@
static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb)
{
/* might or might not have unix_table_locks */
/* might or might not have a hash table lock */
struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
if (!addr)
......@@ -195,25 +195,21 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct unix_diag_req *req;
int num, s_num, slot, s_slot;
struct net *net = sock_net(skb->sk);
int num, s_num, slot, s_slot;
struct unix_diag_req *req;
req = nlmsg_data(cb->nlh);
s_slot = cb->args[0];
num = s_num = cb->args[1];
for (slot = s_slot;
slot < ARRAY_SIZE(unix_socket_table);
s_num = 0, slot++) {
for (slot = s_slot; slot < UNIX_HASH_SIZE; s_num = 0, slot++) {
struct sock *sk;
num = 0;
spin_lock(&unix_table_locks[slot]);
sk_for_each(sk, &unix_socket_table[slot]) {
if (!net_eq(sock_net(sk), net))
continue;
spin_lock(&net->unx.table.locks[slot]);
sk_for_each(sk, &net->unx.table.buckets[slot]) {
if (num < s_num)
goto next;
if (!(req->udiag_states & (1 << sk->sk_state)))
......@@ -222,13 +218,13 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI) < 0) {
spin_unlock(&unix_table_locks[slot]);
spin_unlock(&net->unx.table.locks[slot]);
goto done;
}
next:
num++;
}
spin_unlock(&unix_table_locks[slot]);
spin_unlock(&net->unx.table.locks[slot]);
}
done:
cb->args[0] = slot;
......@@ -237,20 +233,21 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
static struct sock *unix_lookup_by_ino(unsigned int ino)
static struct sock *unix_lookup_by_ino(struct net *net, unsigned int ino)
{
struct sock *sk;
int i;
for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) {
spin_lock(&unix_table_locks[i]);
sk_for_each(sk, &unix_socket_table[i])
for (i = 0; i < UNIX_HASH_SIZE; i++) {
spin_lock(&net->unx.table.locks[i]);
sk_for_each(sk, &net->unx.table.buckets[i]) {
if (ino == sock_i_ino(sk)) {
sock_hold(sk);
spin_unlock(&unix_table_locks[i]);
spin_unlock(&net->unx.table.locks[i]);
return sk;
}
spin_unlock(&unix_table_locks[i]);
}
spin_unlock(&net->unx.table.locks[i]);
}
return NULL;
}
......@@ -259,21 +256,20 @@ static int unix_diag_get_exact(struct sk_buff *in_skb,
const struct nlmsghdr *nlh,
struct unix_diag_req *req)
{
int err = -EINVAL;
struct sock *sk;
struct sk_buff *rep;
unsigned int extra_len;
struct net *net = sock_net(in_skb->sk);
unsigned int extra_len;
struct sk_buff *rep;
struct sock *sk;
int err;
err = -EINVAL;
if (req->udiag_ino == 0)
goto out_nosk;
sk = unix_lookup_by_ino(req->udiag_ino);
sk = unix_lookup_by_ino(net, req->udiag_ino);
err = -ENOENT;
if (sk == NULL)
goto out_nosk;
if (!net_eq(sock_net(sk), net))
goto out;
err = sock_diag_check_cookie(sk, req->udiag_cookie);
if (err)
......@@ -308,7 +304,6 @@ static int unix_diag_get_exact(struct sk_buff *in_skb,
static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
{
int hdrlen = sizeof(struct unix_diag_req);
struct net *net = sock_net(skb->sk);
if (nlmsg_len(h) < hdrlen)
return -EINVAL;
......@@ -317,7 +312,7 @@ static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
struct netlink_dump_control c = {
.dump = unix_diag_dump,
};
return netlink_dump_start(net->diag_nlsk, skb, h, &c);
return netlink_dump_start(sock_net(skb->sk)->diag_nlsk, skb, h, &c);
} else
return unix_diag_get_exact(skb, h, nlmsg_data(h));
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment