Commit f86dcc5a authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

udp: dynamically size hash tables at boot time

UDP_HTABLE_SIZE was initialy defined to 128, which is a bit small for
several setups.

4000 active UDP sockets -> 32 sockets per chain in average. An
incoming frame has to lookup all sockets to find best match, so long
chains hurt latency.

Instead of a fixed size hash table that cant be perfect for every
needs, let UDP stack choose its table size at boot time like tcp/ip
route, using alloc_large_system_hash() helper

Add an optional boot parameter, uhash_entries=x so that an admin can
force a size between 256 and 65536 if needed, like thash_entries and
rhash_entries.

dmesg logs two new lines :
[    0.647039] UDP hash table entries: 512 (order: 0, 4096 bytes)
[    0.647099] UDP Lite hash table entries: 512 (order: 0, 4096 bytes)

Maximal size on 64bit arches would be 65536 slots, ie 1 MBytes for non
debugging spinlocks.
Signed-off-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 8a6dfd43
...@@ -2589,6 +2589,9 @@ and is between 256 and 4096 characters. It is defined in the file ...@@ -2589,6 +2589,9 @@ and is between 256 and 4096 characters. It is defined in the file
uart6850= [HW,OSS] uart6850= [HW,OSS]
Format: <io>,<irq> Format: <io>,<irq>
uhash_entries= [KNL,NET]
Set number of hash buckets for UDP/UDP-Lite connections
uhci-hcd.ignore_oc= uhci-hcd.ignore_oc=
[USB] Ignore overcurrent events (default N). [USB] Ignore overcurrent events (default N).
Some badly-designed motherboards generate lots of Some badly-designed motherboards generate lots of
......
...@@ -45,11 +45,11 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb) ...@@ -45,11 +45,11 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
return (struct udphdr *)skb_transport_header(skb); return (struct udphdr *)skb_transport_header(skb);
} }
#define UDP_HTABLE_SIZE 128 #define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256)
static inline int udp_hashfn(struct net *net, const unsigned num) static inline int udp_hashfn(struct net *net, unsigned num, unsigned mask)
{ {
return (num + net_hash_mix(net)) & (UDP_HTABLE_SIZE - 1); return (num + net_hash_mix(net)) & mask;
} }
struct udp_sock { struct udp_sock {
......
...@@ -54,12 +54,19 @@ struct udp_hslot { ...@@ -54,12 +54,19 @@ struct udp_hslot {
struct hlist_nulls_head head; struct hlist_nulls_head head;
spinlock_t lock; spinlock_t lock;
} __attribute__((aligned(2 * sizeof(long)))); } __attribute__((aligned(2 * sizeof(long))));
struct udp_table { struct udp_table {
struct udp_hslot hash[UDP_HTABLE_SIZE]; struct udp_hslot *hash;
unsigned int mask;
unsigned int log;
}; };
extern struct udp_table udp_table; extern struct udp_table udp_table;
extern void udp_table_init(struct udp_table *); extern void udp_table_init(struct udp_table *, const char *);
static inline struct udp_hslot *udp_hashslot(struct udp_table *table,
struct net *net, unsigned num)
{
return &table->hash[udp_hashfn(net, num, table->mask)];
}
/* Note: this must match 'valbool' in sock_setsockopt */ /* Note: this must match 'valbool' in sock_setsockopt */
#define UDP_CSUM_NOXMIT 1 #define UDP_CSUM_NOXMIT 1
......
...@@ -106,7 +106,7 @@ ...@@ -106,7 +106,7 @@
#include <net/xfrm.h> #include <net/xfrm.h>
#include "udp_impl.h" #include "udp_impl.h"
struct udp_table udp_table; struct udp_table udp_table __read_mostly;
EXPORT_SYMBOL(udp_table); EXPORT_SYMBOL(udp_table);
int sysctl_udp_mem[3] __read_mostly; int sysctl_udp_mem[3] __read_mostly;
...@@ -121,14 +121,16 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min); ...@@ -121,14 +121,16 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min);
atomic_t udp_memory_allocated; atomic_t udp_memory_allocated;
EXPORT_SYMBOL(udp_memory_allocated); EXPORT_SYMBOL(udp_memory_allocated);
#define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE) #define MAX_UDP_PORTS 65536
#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
static int udp_lib_lport_inuse(struct net *net, __u16 num, static int udp_lib_lport_inuse(struct net *net, __u16 num,
const struct udp_hslot *hslot, const struct udp_hslot *hslot,
unsigned long *bitmap, unsigned long *bitmap,
struct sock *sk, struct sock *sk,
int (*saddr_comp)(const struct sock *sk1, int (*saddr_comp)(const struct sock *sk1,
const struct sock *sk2)) const struct sock *sk2),
unsigned int log)
{ {
struct sock *sk2; struct sock *sk2;
struct hlist_nulls_node *node; struct hlist_nulls_node *node;
...@@ -142,8 +144,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, ...@@ -142,8 +144,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
|| sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
(*saddr_comp)(sk, sk2)) { (*saddr_comp)(sk, sk2)) {
if (bitmap) if (bitmap)
__set_bit(sk2->sk_hash / UDP_HTABLE_SIZE, __set_bit(sk2->sk_hash >> log, bitmap);
bitmap);
else else
return 1; return 1;
} }
...@@ -180,13 +181,15 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, ...@@ -180,13 +181,15 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
/* /*
* force rand to be an odd multiple of UDP_HTABLE_SIZE * force rand to be an odd multiple of UDP_HTABLE_SIZE
*/ */
rand = (rand | 1) * UDP_HTABLE_SIZE; rand = (rand | 1) * (udptable->mask + 1);
for (last = first + UDP_HTABLE_SIZE; first != last; first++) { for (last = first + udptable->mask + 1;
hslot = &udptable->hash[udp_hashfn(net, first)]; first != last;
first++) {
hslot = udp_hashslot(udptable, net, first);
bitmap_zero(bitmap, PORTS_PER_CHAIN); bitmap_zero(bitmap, PORTS_PER_CHAIN);
spin_lock_bh(&hslot->lock); spin_lock_bh(&hslot->lock);
udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
saddr_comp); saddr_comp, udptable->log);
snum = first; snum = first;
/* /*
...@@ -196,7 +199,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, ...@@ -196,7 +199,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
*/ */
do { do {
if (low <= snum && snum <= high && if (low <= snum && snum <= high &&
!test_bit(snum / UDP_HTABLE_SIZE, bitmap)) !test_bit(snum >> udptable->log, bitmap))
goto found; goto found;
snum += rand; snum += rand;
} while (snum != first); } while (snum != first);
...@@ -204,9 +207,10 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, ...@@ -204,9 +207,10 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
} }
goto fail; goto fail;
} else { } else {
hslot = &udptable->hash[udp_hashfn(net, snum)]; hslot = udp_hashslot(udptable, net, snum);
spin_lock_bh(&hslot->lock); spin_lock_bh(&hslot->lock);
if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp)) if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk,
saddr_comp, 0))
goto fail_unlock; goto fail_unlock;
} }
found: found:
...@@ -283,7 +287,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, ...@@ -283,7 +287,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
struct sock *sk, *result; struct sock *sk, *result;
struct hlist_nulls_node *node; struct hlist_nulls_node *node;
unsigned short hnum = ntohs(dport); unsigned short hnum = ntohs(dport);
unsigned int hash = udp_hashfn(net, hnum); unsigned int hash = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot = &udptable->hash[hash]; struct udp_hslot *hslot = &udptable->hash[hash];
int score, badness; int score, badness;
...@@ -1013,8 +1017,8 @@ void udp_lib_unhash(struct sock *sk) ...@@ -1013,8 +1017,8 @@ void udp_lib_unhash(struct sock *sk)
{ {
if (sk_hashed(sk)) { if (sk_hashed(sk)) {
struct udp_table *udptable = sk->sk_prot->h.udp_table; struct udp_table *udptable = sk->sk_prot->h.udp_table;
unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash); struct udp_hslot *hslot = udp_hashslot(udptable, sock_net(sk),
struct udp_hslot *hslot = &udptable->hash[hash]; sk->sk_hash);
spin_lock_bh(&hslot->lock); spin_lock_bh(&hslot->lock);
if (sk_nulls_del_node_init_rcu(sk)) { if (sk_nulls_del_node_init_rcu(sk)) {
...@@ -1169,7 +1173,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, ...@@ -1169,7 +1173,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct udp_table *udptable) struct udp_table *udptable)
{ {
struct sock *sk; struct sock *sk;
struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
int dif; int dif;
spin_lock(&hslot->lock); spin_lock(&hslot->lock);
...@@ -1609,9 +1613,14 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) ...@@ -1609,9 +1613,14 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
struct udp_iter_state *state = seq->private; struct udp_iter_state *state = seq->private;
struct net *net = seq_file_net(seq); struct net *net = seq_file_net(seq);
for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { for (state->bucket = start; state->bucket <= state->udp_table->mask;
++state->bucket) {
struct hlist_nulls_node *node; struct hlist_nulls_node *node;
struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; struct udp_hslot *hslot = &state->udp_table->hash[state->bucket];
if (hlist_nulls_empty(&hslot->head))
continue;
spin_lock_bh(&hslot->lock); spin_lock_bh(&hslot->lock);
sk_nulls_for_each(sk, node, &hslot->head) { sk_nulls_for_each(sk, node, &hslot->head) {
if (!net_eq(sock_net(sk), net)) if (!net_eq(sock_net(sk), net))
...@@ -1636,7 +1645,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) ...@@ -1636,7 +1645,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
} while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
if (!sk) { if (!sk) {
if (state->bucket < UDP_HTABLE_SIZE) if (state->bucket <= state->udp_table->mask)
spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
return udp_get_first(seq, state->bucket + 1); return udp_get_first(seq, state->bucket + 1);
} }
...@@ -1656,7 +1665,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) ...@@ -1656,7 +1665,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
static void *udp_seq_start(struct seq_file *seq, loff_t *pos) static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
{ {
struct udp_iter_state *state = seq->private; struct udp_iter_state *state = seq->private;
state->bucket = UDP_HTABLE_SIZE; state->bucket = MAX_UDP_PORTS;
return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
} }
...@@ -1678,7 +1687,7 @@ static void udp_seq_stop(struct seq_file *seq, void *v) ...@@ -1678,7 +1687,7 @@ static void udp_seq_stop(struct seq_file *seq, void *v)
{ {
struct udp_iter_state *state = seq->private; struct udp_iter_state *state = seq->private;
if (state->bucket < UDP_HTABLE_SIZE) if (state->bucket <= state->udp_table->mask)
spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
} }
...@@ -1738,7 +1747,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, ...@@ -1738,7 +1747,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
__u16 destp = ntohs(inet->dport); __u16 destp = ntohs(inet->dport);
__u16 srcp = ntohs(inet->sport); __u16 srcp = ntohs(inet->sport);
seq_printf(f, "%4d: %08X:%04X %08X:%04X" seq_printf(f, "%5d: %08X:%04X %08X:%04X"
" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n",
bucket, src, srcp, dest, destp, sp->sk_state, bucket, src, srcp, dest, destp, sp->sk_state,
sk_wmem_alloc_get(sp), sk_wmem_alloc_get(sp),
...@@ -1804,11 +1813,43 @@ void udp4_proc_exit(void) ...@@ -1804,11 +1813,43 @@ void udp4_proc_exit(void)
} }
#endif /* CONFIG_PROC_FS */ #endif /* CONFIG_PROC_FS */
void __init udp_table_init(struct udp_table *table) static __initdata unsigned long uhash_entries;
static int __init set_uhash_entries(char *str)
{ {
int i; if (!str)
return 0;
uhash_entries = simple_strtoul(str, &str, 0);
if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN)
uhash_entries = UDP_HTABLE_SIZE_MIN;
return 1;
}
__setup("uhash_entries=", set_uhash_entries);
for (i = 0; i < UDP_HTABLE_SIZE; i++) { void __init udp_table_init(struct udp_table *table, const char *name)
{
unsigned int i;
if (!CONFIG_BASE_SMALL)
table->hash = alloc_large_system_hash(name,
sizeof(struct udp_hslot),
uhash_entries,
21, /* one slot per 2 MB */
0,
&table->log,
&table->mask,
64 * 1024);
/*
* Make sure hash table has the minimum size
*/
if (CONFIG_BASE_SMALL || table->mask < UDP_HTABLE_SIZE_MIN - 1) {
table->hash = kmalloc(UDP_HTABLE_SIZE_MIN *
sizeof(struct udp_hslot), GFP_KERNEL);
if (!table->hash)
panic(name);
table->log = ilog2(UDP_HTABLE_SIZE_MIN);
table->mask = UDP_HTABLE_SIZE_MIN - 1;
}
for (i = 0; i <= table->mask; i++) {
INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i);
spin_lock_init(&table->hash[i].lock); spin_lock_init(&table->hash[i].lock);
} }
...@@ -1818,7 +1859,7 @@ void __init udp_init(void) ...@@ -1818,7 +1859,7 @@ void __init udp_init(void)
{ {
unsigned long nr_pages, limit; unsigned long nr_pages, limit;
udp_table_init(&udp_table); udp_table_init(&udp_table, "UDP");
/* Set the pressure threshold up by the same strategy of TCP. It is a /* Set the pressure threshold up by the same strategy of TCP. It is a
* fraction of global memory that is up to 1/2 at 256 MB, decreasing * fraction of global memory that is up to 1/2 at 256 MB, decreasing
* toward zero with the amount of memory, with a floor of 128 pages. * toward zero with the amount of memory, with a floor of 128 pages.
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
*/ */
#include "udp_impl.h" #include "udp_impl.h"
struct udp_table udplite_table; struct udp_table udplite_table __read_mostly;
EXPORT_SYMBOL(udplite_table); EXPORT_SYMBOL(udplite_table);
static int udplite_rcv(struct sk_buff *skb) static int udplite_rcv(struct sk_buff *skb)
...@@ -110,7 +110,7 @@ static inline int udplite4_proc_init(void) ...@@ -110,7 +110,7 @@ static inline int udplite4_proc_init(void)
void __init udplite4_register(void) void __init udplite4_register(void)
{ {
udp_table_init(&udplite_table); udp_table_init(&udplite_table, "UDP-Lite");
if (proto_register(&udplite_prot, 1)) if (proto_register(&udplite_prot, 1))
goto out_register_err; goto out_register_err;
......
...@@ -132,7 +132,7 @@ static struct sock *__udp6_lib_lookup(struct net *net, ...@@ -132,7 +132,7 @@ static struct sock *__udp6_lib_lookup(struct net *net,
struct sock *sk, *result; struct sock *sk, *result;
struct hlist_nulls_node *node; struct hlist_nulls_node *node;
unsigned short hnum = ntohs(dport); unsigned short hnum = ntohs(dport);
unsigned int hash = udp_hashfn(net, hnum); unsigned int hash = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot = &udptable->hash[hash]; struct udp_hslot *hslot = &udptable->hash[hash];
int score, badness; int score, badness;
...@@ -452,7 +452,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, ...@@ -452,7 +452,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
{ {
struct sock *sk, *sk2; struct sock *sk, *sk2;
const struct udphdr *uh = udp_hdr(skb); const struct udphdr *uh = udp_hdr(skb);
struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
int dif; int dif;
spin_lock(&hslot->lock); spin_lock(&hslot->lock);
...@@ -1197,7 +1197,7 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket ...@@ -1197,7 +1197,7 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket
destp = ntohs(inet->dport); destp = ntohs(inet->dport);
srcp = ntohs(inet->sport); srcp = ntohs(inet->sport);
seq_printf(seq, seq_printf(seq,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
"%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
bucket, bucket,
src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[0], src->s6_addr32[1],
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment