Commit 98322f22 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

udp: optimize bind(0) if many ports are in use

commit 9088c560
(udp: Improve port randomization) introduced a regression for UDP bind() syscall
to null port (getting a random port) in case lot of ports are already in use.

This is because we do about 28000 scans of very long chains (220 sockets per chain),
with many spin_lock_bh()/spin_unlock_bh() calls.

Fix this using a bitmap (64 bytes for current value of UDP_HTABLE_SIZE)
so that we scan chains at most once.

Instead of 250 ms per bind() call, we get after patch a time of 2.9 ms 

Based on a report from Vitaly Mayatskikh
Reported-by: default avatarVitaly Mayatskikh <v.mayatskih@gmail.com>
Signed-off-by: default avatarEric Dumazet <dada1@cosmosbay.com>
Tested-by: default avatarVitaly Mayatskikh <v.mayatskih@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 8527bec5
...@@ -120,8 +120,11 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min); ...@@ -120,8 +120,11 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min);
atomic_t udp_memory_allocated; atomic_t udp_memory_allocated;
EXPORT_SYMBOL(udp_memory_allocated); EXPORT_SYMBOL(udp_memory_allocated);
#define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE)
static int udp_lib_lport_inuse(struct net *net, __u16 num, static int udp_lib_lport_inuse(struct net *net, __u16 num,
const struct udp_hslot *hslot, const struct udp_hslot *hslot,
unsigned long *bitmap,
struct sock *sk, struct sock *sk,
int (*saddr_comp)(const struct sock *sk1, int (*saddr_comp)(const struct sock *sk1,
const struct sock *sk2)) const struct sock *sk2))
...@@ -132,12 +135,17 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, ...@@ -132,12 +135,17 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
sk_nulls_for_each(sk2, node, &hslot->head) sk_nulls_for_each(sk2, node, &hslot->head)
if (net_eq(sock_net(sk2), net) && if (net_eq(sock_net(sk2), net) &&
sk2 != sk && sk2 != sk &&
sk2->sk_hash == num && (bitmap || sk2->sk_hash == num) &&
(!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_reuse || !sk->sk_reuse) &&
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
|| sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
(*saddr_comp)(sk, sk2)) (*saddr_comp)(sk, sk2)) {
if (bitmap)
__set_bit(sk2->sk_hash / UDP_HTABLE_SIZE,
bitmap);
else
return 1; return 1;
}
return 0; return 0;
} }
...@@ -160,32 +168,47 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, ...@@ -160,32 +168,47 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
if (!snum) { if (!snum) {
int low, high, remaining; int low, high, remaining;
unsigned rand; unsigned rand;
unsigned short first; unsigned short first, last;
DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
inet_get_local_port_range(&low, &high); inet_get_local_port_range(&low, &high);
remaining = (high - low) + 1; remaining = (high - low) + 1;
rand = net_random(); rand = net_random();
snum = first = rand % remaining + low; first = (((u64)rand * remaining) >> 32) + low;
rand |= 1; /*
for (;;) { * force rand to be an odd multiple of UDP_HTABLE_SIZE
hslot = &udptable->hash[udp_hashfn(net, snum)]; */
rand = (rand | 1) * UDP_HTABLE_SIZE;
for (last = first + UDP_HTABLE_SIZE; first != last; first++) {
hslot = &udptable->hash[udp_hashfn(net, first)];
bitmap_zero(bitmap, PORTS_PER_CHAIN);
spin_lock_bh(&hslot->lock); spin_lock_bh(&hslot->lock);
if (!udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
break; saddr_comp);
spin_unlock_bh(&hslot->lock);
snum = first;
/*
* Iterate on all possible values of snum for this hash.
* Using steps of an odd multiple of UDP_HTABLE_SIZE
* give us randomization and full range coverage.
*/
do { do {
snum = snum + rand; if (low <= snum && snum <= high &&
} while (snum < low || snum > high); !test_bit(snum / UDP_HTABLE_SIZE, bitmap))
if (snum == first) goto found;
goto fail; snum += rand;
} while (snum != first);
spin_unlock_bh(&hslot->lock);
} }
goto fail;
} else { } else {
hslot = &udptable->hash[udp_hashfn(net, snum)]; hslot = &udptable->hash[udp_hashfn(net, snum)];
spin_lock_bh(&hslot->lock); spin_lock_bh(&hslot->lock);
if (udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp))
goto fail_unlock; goto fail_unlock;
} }
found:
inet_sk(sk)->num = snum; inet_sk(sk)->num = snum;
sk->sk_hash = snum; sk->sk_hash = snum;
if (sk_unhashed(sk)) { if (sk_unhashed(sk)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment