Commit 614919c3 authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp_src_port_selection'

Eric Dumazet says:

====================
tcp: improve source port selection

With increase of TCP sockets in hosts, we often hit limitations
caused by port selection, due to randomization and poor strategy.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ce5ec440 946f9eb2
...@@ -148,8 +148,6 @@ struct inet_hashinfo { ...@@ -148,8 +148,6 @@ struct inet_hashinfo {
*/ */
struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE] struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE]
____cacheline_aligned_in_smp; ____cacheline_aligned_in_smp;
atomic_t bsockets;
}; };
static inline struct inet_ehash_bucket *inet_ehash_bucket( static inline struct inet_ehash_bucket *inet_ehash_bucket(
......
...@@ -99,6 +99,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) ...@@ -99,6 +99,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
struct net *net = sock_net(sk); struct net *net = sock_net(sk);
int smallest_size = -1, smallest_rover; int smallest_size = -1, smallest_rover;
kuid_t uid = sock_i_uid(sk); kuid_t uid = sock_i_uid(sk);
int attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
local_bh_disable(); local_bh_disable();
if (!snum) { if (!snum) {
...@@ -106,6 +107,14 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) ...@@ -106,6 +107,14 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
again: again:
inet_get_local_port_range(net, &low, &high); inet_get_local_port_range(net, &low, &high);
if (attempt_half) {
int half = low + ((high - low) >> 1);
if (attempt_half == 1)
high = half;
else
low = half;
}
remaining = (high - low) + 1; remaining = (high - low) + 1;
smallest_rover = rover = prandom_u32() % remaining + low; smallest_rover = rover = prandom_u32() % remaining + low;
...@@ -127,11 +136,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) ...@@ -127,11 +136,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
(tb->num_owners < smallest_size || smallest_size == -1)) { (tb->num_owners < smallest_size || smallest_size == -1)) {
smallest_size = tb->num_owners; smallest_size = tb->num_owners;
smallest_rover = rover; smallest_rover = rover;
if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 &&
!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
snum = smallest_rover;
goto tb_found;
}
} }
if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
snum = rover; snum = rover;
...@@ -159,6 +163,11 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) ...@@ -159,6 +163,11 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
snum = smallest_rover; snum = smallest_rover;
goto have_snum; goto have_snum;
} }
if (attempt_half == 1) {
/* OK we now try the upper half of the range */
attempt_half = 2;
goto again;
}
goto fail; goto fail;
} }
/* OK, here is the one we will use. HEAD is /* OK, here is the one we will use. HEAD is
......
...@@ -90,10 +90,6 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket ...@@ -90,10 +90,6 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
const unsigned short snum) const unsigned short snum)
{ {
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
atomic_inc(&hashinfo->bsockets);
inet_sk(sk)->inet_num = snum; inet_sk(sk)->inet_num = snum;
sk_add_bind_node(sk, &tb->owners); sk_add_bind_node(sk, &tb->owners);
tb->num_owners++; tb->num_owners++;
...@@ -111,8 +107,6 @@ static void __inet_put_port(struct sock *sk) ...@@ -111,8 +107,6 @@ static void __inet_put_port(struct sock *sk)
struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
struct inet_bind_bucket *tb; struct inet_bind_bucket *tb;
atomic_dec(&hashinfo->bsockets);
spin_lock(&head->lock); spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash; tb = inet_csk(sk)->icsk_bind_hash;
__sk_del_bind_node(sk); __sk_del_bind_node(sk);
...@@ -608,7 +602,6 @@ void inet_hashinfo_init(struct inet_hashinfo *h) ...@@ -608,7 +602,6 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
{ {
int i; int i;
atomic_set(&h->bsockets, 0);
for (i = 0; i < INET_LHTABLE_SIZE; i++) { for (i = 0; i < INET_LHTABLE_SIZE; i++) {
spin_lock_init(&h->listening_hash[i].lock); spin_lock_init(&h->listening_hash[i].lock);
INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head, INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment