Commit c6d3c96f authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-2nd-listener-hash'

Martin KaFai Lau says:

====================
tcp: Add a 2nd listener hashtable (port+addr)

This patch set adds a 2nd listener hashtable.  It is to resolve
the performance issue when a process is listening at many IP
addresses with the same port (e.g. [IP1]:443, [IP2]:443... [IPN]:443)

v2:
- Move the new lhash2 and lhash2_mask before the existing
  listening_hash to avoid adding another cacheline
  to inet_hashinfo (Suggested by Eric Dumazet, Thanks!)
- I take this chance to plug an existing 4 bytes hole while
  adding 'unsigned int lhash2_mask'.
- Add some comments about lhash2 in inet_hashtables.h
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents fb7516d4 27da6d37
...@@ -77,6 +77,7 @@ struct inet_connection_sock_af_ops { ...@@ -77,6 +77,7 @@ struct inet_connection_sock_af_ops {
* @icsk_af_ops Operations which are AF_INET{4,6} specific * @icsk_af_ops Operations which are AF_INET{4,6} specific
* @icsk_ulp_ops Pluggable ULP control hook * @icsk_ulp_ops Pluggable ULP control hook
* @icsk_ulp_data ULP private data * @icsk_ulp_data ULP private data
* @icsk_listen_portaddr_node hash to the portaddr listener hashtable
* @icsk_ca_state: Congestion control state * @icsk_ca_state: Congestion control state
* @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_retransmits: Number of unrecovered [RTO] timeouts
* @icsk_pending: Scheduled timer event * @icsk_pending: Scheduled timer event
...@@ -101,6 +102,7 @@ struct inet_connection_sock { ...@@ -101,6 +102,7 @@ struct inet_connection_sock {
const struct inet_connection_sock_af_ops *icsk_af_ops; const struct inet_connection_sock_af_ops *icsk_af_ops;
const struct tcp_ulp_ops *icsk_ulp_ops; const struct tcp_ulp_ops *icsk_ulp_ops;
void *icsk_ulp_data; void *icsk_ulp_data;
struct hlist_node icsk_listen_portaddr_node;
unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
__u8 icsk_ca_state:6, __u8 icsk_ca_state:6,
icsk_ca_setsockopt:1, icsk_ca_setsockopt:1,
......
...@@ -111,6 +111,7 @@ struct inet_bind_hashbucket { ...@@ -111,6 +111,7 @@ struct inet_bind_hashbucket {
*/ */
struct inet_listen_hashbucket { struct inet_listen_hashbucket {
spinlock_t lock; spinlock_t lock;
unsigned int count;
struct hlist_head head; struct hlist_head head;
}; };
...@@ -132,12 +133,13 @@ struct inet_hashinfo { ...@@ -132,12 +133,13 @@ struct inet_hashinfo {
/* Ok, let's try this, I give up, we do need a local binding /* Ok, let's try this, I give up, we do need a local binding
* TCP hash as well as the others for fast bind/connect. * TCP hash as well as the others for fast bind/connect.
*/ */
struct kmem_cache *bind_bucket_cachep;
struct inet_bind_hashbucket *bhash; struct inet_bind_hashbucket *bhash;
unsigned int bhash_size; unsigned int bhash_size;
/* 4 bytes hole on 64 bit */
struct kmem_cache *bind_bucket_cachep; /* The 2nd listener table hashed by local port and address */
unsigned int lhash2_mask;
struct inet_listen_hashbucket *lhash2;
/* All the above members are written once at bootup and /* All the above members are written once at bootup and
* never written again _or_ are predominantly read-access. * never written again _or_ are predominantly read-access.
...@@ -145,14 +147,25 @@ struct inet_hashinfo { ...@@ -145,14 +147,25 @@ struct inet_hashinfo {
* Now align to a new cache line as all the following members * Now align to a new cache line as all the following members
* might be often dirty. * might be often dirty.
*/ */
/* All sockets in TCP_LISTEN state will be in here. This is the only /* All sockets in TCP_LISTEN state will be in listening_hash.
* table where wildcard'd TCP sockets can exist. Hash function here * This is the only table where wildcard'd TCP sockets can
* is just local port number. * exist. listening_hash is only hashed by local port number.
* If lhash2 is initialized, the same socket will also be hashed
* to lhash2 by port and address.
*/ */
struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE] struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE]
____cacheline_aligned_in_smp; ____cacheline_aligned_in_smp;
}; };
#define inet_lhash2_for_each_icsk_rcu(__icsk, list) \
hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node)
static inline struct inet_listen_hashbucket *
inet_lhash2_bucket(struct inet_hashinfo *h, u32 hash)
{
return &h->lhash2[hash & h->lhash2_mask];
}
static inline struct inet_ehash_bucket *inet_ehash_bucket( static inline struct inet_ehash_bucket *inet_ehash_bucket(
struct inet_hashinfo *hashinfo, struct inet_hashinfo *hashinfo,
unsigned int hash) unsigned int hash)
...@@ -208,6 +221,10 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child); ...@@ -208,6 +221,10 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child);
void inet_put_port(struct sock *sk); void inet_put_port(struct sock *sk);
void inet_hashinfo_init(struct inet_hashinfo *h); void inet_hashinfo_init(struct inet_hashinfo *h);
void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
unsigned long numentries, int scale,
unsigned long low_limit,
unsigned long high_limit);
bool inet_ehash_insert(struct sock *sk, struct sock *osk); bool inet_ehash_insert(struct sock *sk, struct sock *osk);
bool inet_ehash_nolisten(struct sock *sk, struct sock *osk); bool inet_ehash_nolisten(struct sock *sk, struct sock *osk);
......
...@@ -26,12 +26,14 @@ ...@@ -26,12 +26,14 @@
#include <linux/ip.h> #include <linux/ip.h>
#include <linux/in.h> #include <linux/in.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <linux/jhash.h>
#include <net/inet_sock.h> #include <net/inet_sock.h>
#include <net/route.h> #include <net/route.h>
#include <net/snmp.h> #include <net/snmp.h>
#include <net/flow.h> #include <net/flow.h>
#include <net/flow_dissector.h> #include <net/flow_dissector.h>
#include <net/netns/hash.h>
#define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */ #define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */
...@@ -521,6 +523,13 @@ static inline unsigned int ipv4_addr_hash(__be32 ip) ...@@ -521,6 +523,13 @@ static inline unsigned int ipv4_addr_hash(__be32 ip)
return (__force unsigned int) ip; return (__force unsigned int) ip;
} }
static inline u32 ipv4_portaddr_hash(const struct net *net,
__be32 saddr,
unsigned int port)
{
return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port;
}
bool ip_call_ra_chain(struct sk_buff *skb); bool ip_call_ra_chain(struct sk_buff *skb);
/* /*
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <net/flow.h> #include <net/flow.h>
#include <net/flow_dissector.h> #include <net/flow_dissector.h>
#include <net/snmp.h> #include <net/snmp.h>
#include <net/netns/hash.h>
#define SIN6_LEN_RFC2133 24 #define SIN6_LEN_RFC2133 24
...@@ -673,6 +674,22 @@ static inline bool ipv6_addr_v4mapped(const struct in6_addr *a) ...@@ -673,6 +674,22 @@ static inline bool ipv6_addr_v4mapped(const struct in6_addr *a)
cpu_to_be32(0x0000ffff))) == 0UL; cpu_to_be32(0x0000ffff))) == 0UL;
} }
static inline u32 ipv6_portaddr_hash(const struct net *net,
const struct in6_addr *addr6,
unsigned int port)
{
unsigned int hash, mix = net_hash_mix(net);
if (ipv6_addr_any(addr6))
hash = jhash_1word(0, mix);
else if (ipv6_addr_v4mapped(addr6))
hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix);
else
hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix);
return hash ^ port;
}
/* /*
* Check for a RFC 4843 ORCHID address * Check for a RFC 4843 ORCHID address
* (Overlay Routable Cryptographic Hash Identifiers) * (Overlay Routable Cryptographic Hash Identifiers)
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/bootmem.h>
#include <net/addrconf.h> #include <net/addrconf.h>
#include <net/inet_connection_sock.h> #include <net/inet_connection_sock.h>
...@@ -168,6 +169,60 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child) ...@@ -168,6 +169,60 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
} }
EXPORT_SYMBOL_GPL(__inet_inherit_port); EXPORT_SYMBOL_GPL(__inet_inherit_port);
static struct inet_listen_hashbucket *
inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk)
{
u32 hash;
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6)
hash = ipv6_portaddr_hash(sock_net(sk),
&sk->sk_v6_rcv_saddr,
inet_sk(sk)->inet_num);
else
#endif
hash = ipv4_portaddr_hash(sock_net(sk),
inet_sk(sk)->inet_rcv_saddr,
inet_sk(sk)->inet_num);
return inet_lhash2_bucket(h, hash);
}
static void inet_hash2(struct inet_hashinfo *h, struct sock *sk)
{
struct inet_listen_hashbucket *ilb2;
if (!h->lhash2)
return;
ilb2 = inet_lhash2_bucket_sk(h, sk);
spin_lock(&ilb2->lock);
if (sk->sk_reuseport && sk->sk_family == AF_INET6)
hlist_add_tail_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
&ilb2->head);
else
hlist_add_head_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
&ilb2->head);
ilb2->count++;
spin_unlock(&ilb2->lock);
}
static void inet_unhash2(struct inet_hashinfo *h, struct sock *sk)
{
struct inet_listen_hashbucket *ilb2;
if (!h->lhash2 ||
WARN_ON_ONCE(hlist_unhashed(&inet_csk(sk)->icsk_listen_portaddr_node)))
return;
ilb2 = inet_lhash2_bucket_sk(h, sk);
spin_lock(&ilb2->lock);
hlist_del_init_rcu(&inet_csk(sk)->icsk_listen_portaddr_node);
ilb2->count--;
spin_unlock(&ilb2->lock);
}
static inline int compute_score(struct sock *sk, struct net *net, static inline int compute_score(struct sock *sk, struct net *net,
const unsigned short hnum, const __be32 daddr, const unsigned short hnum, const __be32 daddr,
const int dif, const int sdif, bool exact_dif) const int dif, const int sdif, bool exact_dif)
...@@ -207,6 +262,40 @@ static inline int compute_score(struct sock *sk, struct net *net, ...@@ -207,6 +262,40 @@ static inline int compute_score(struct sock *sk, struct net *net,
*/ */
/* called with rcu_read_lock() : No refcount taken on the socket */ /* called with rcu_read_lock() : No refcount taken on the socket */
static struct sock *inet_lhash2_lookup(struct net *net,
struct inet_listen_hashbucket *ilb2,
struct sk_buff *skb, int doff,
const __be32 saddr, __be16 sport,
const __be32 daddr, const unsigned short hnum,
const int dif, const int sdif)
{
bool exact_dif = inet_exact_dif_match(net, skb);
struct inet_connection_sock *icsk;
struct sock *sk, *result = NULL;
int score, hiscore = 0;
u32 phash = 0;
inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
sk = (struct sock *)icsk;
score = compute_score(sk, net, hnum, daddr,
dif, sdif, exact_dif);
if (score > hiscore) {
if (sk->sk_reuseport) {
phash = inet_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, phash,
skb, doff);
if (result)
return result;
}
result = sk;
hiscore = score;
}
}
return result;
}
struct sock *__inet_lookup_listener(struct net *net, struct sock *__inet_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo, struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff, struct sk_buff *skb, int doff,
...@@ -217,10 +306,42 @@ struct sock *__inet_lookup_listener(struct net *net, ...@@ -217,10 +306,42 @@ struct sock *__inet_lookup_listener(struct net *net,
unsigned int hash = inet_lhashfn(net, hnum); unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
bool exact_dif = inet_exact_dif_match(net, skb); bool exact_dif = inet_exact_dif_match(net, skb);
struct inet_listen_hashbucket *ilb2;
struct sock *sk, *result = NULL; struct sock *sk, *result = NULL;
int score, hiscore = 0; int score, hiscore = 0;
unsigned int hash2;
u32 phash = 0; u32 phash = 0;
if (ilb->count <= 10 || !hashinfo->lhash2)
goto port_lookup;
/* Too many sk in the ilb bucket (which is hashed by port alone).
* Try lhash2 (which is hashed by port and addr) instead.
*/
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
if (ilb2->count > ilb->count)
goto port_lookup;
result = inet_lhash2_lookup(net, ilb2, skb, doff,
saddr, sport, daddr, hnum,
dif, sdif);
if (result)
return result;
/* Lookup lhash2 with INADDR_ANY */
hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
if (ilb2->count > ilb->count)
goto port_lookup;
return inet_lhash2_lookup(net, ilb2, skb, doff,
saddr, sport, daddr, hnum,
dif, sdif);
port_lookup:
sk_for_each_rcu(sk, &ilb->head) { sk_for_each_rcu(sk, &ilb->head) {
score = compute_score(sk, net, hnum, daddr, score = compute_score(sk, net, hnum, daddr,
dif, sdif, exact_dif); dif, sdif, exact_dif);
...@@ -476,6 +597,8 @@ int __inet_hash(struct sock *sk, struct sock *osk) ...@@ -476,6 +597,8 @@ int __inet_hash(struct sock *sk, struct sock *osk)
hlist_add_tail_rcu(&sk->sk_node, &ilb->head); hlist_add_tail_rcu(&sk->sk_node, &ilb->head);
else else
hlist_add_head_rcu(&sk->sk_node, &ilb->head); hlist_add_head_rcu(&sk->sk_node, &ilb->head);
inet_hash2(hashinfo, sk);
ilb->count++;
sock_set_flag(sk, SOCK_RCU_FREE); sock_set_flag(sk, SOCK_RCU_FREE);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
unlock: unlock:
...@@ -502,28 +625,35 @@ EXPORT_SYMBOL_GPL(inet_hash); ...@@ -502,28 +625,35 @@ EXPORT_SYMBOL_GPL(inet_hash);
void inet_unhash(struct sock *sk) void inet_unhash(struct sock *sk)
{ {
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct inet_listen_hashbucket *ilb;
spinlock_t *lock; spinlock_t *lock;
bool listener = false; bool listener = false;
int done;
if (sk_unhashed(sk)) if (sk_unhashed(sk))
return; return;
if (sk->sk_state == TCP_LISTEN) { if (sk->sk_state == TCP_LISTEN) {
lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock; ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
lock = &ilb->lock;
listener = true; listener = true;
} else { } else {
lock = inet_ehash_lockp(hashinfo, sk->sk_hash); lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
} }
spin_lock_bh(lock); spin_lock_bh(lock);
if (sk_unhashed(sk))
goto unlock;
if (rcu_access_pointer(sk->sk_reuseport_cb)) if (rcu_access_pointer(sk->sk_reuseport_cb))
reuseport_detach_sock(sk); reuseport_detach_sock(sk);
if (listener) if (listener) {
done = __sk_del_node_init(sk); inet_unhash2(hashinfo, sk);
else __sk_del_node_init(sk);
done = __sk_nulls_del_node_init_rcu(sk); ilb->count--;
if (done) } else {
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); __sk_nulls_del_node_init_rcu(sk);
}
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
unlock:
spin_unlock_bh(lock); spin_unlock_bh(lock);
} }
EXPORT_SYMBOL_GPL(inet_unhash); EXPORT_SYMBOL_GPL(inet_unhash);
...@@ -658,10 +788,37 @@ void inet_hashinfo_init(struct inet_hashinfo *h) ...@@ -658,10 +788,37 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
for (i = 0; i < INET_LHTABLE_SIZE; i++) { for (i = 0; i < INET_LHTABLE_SIZE; i++) {
spin_lock_init(&h->listening_hash[i].lock); spin_lock_init(&h->listening_hash[i].lock);
INIT_HLIST_HEAD(&h->listening_hash[i].head); INIT_HLIST_HEAD(&h->listening_hash[i].head);
h->listening_hash[i].count = 0;
} }
h->lhash2 = NULL;
} }
EXPORT_SYMBOL_GPL(inet_hashinfo_init); EXPORT_SYMBOL_GPL(inet_hashinfo_init);
void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
unsigned long numentries, int scale,
unsigned long low_limit,
unsigned long high_limit)
{
unsigned int i;
h->lhash2 = alloc_large_system_hash(name,
sizeof(*h->lhash2),
numentries,
scale,
0,
NULL,
&h->lhash2_mask,
low_limit,
high_limit);
for (i = 0; i <= h->lhash2_mask; i++) {
spin_lock_init(&h->lhash2[i].lock);
INIT_HLIST_HEAD(&h->lhash2[i].head);
h->lhash2[i].count = 0;
}
}
int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
{ {
unsigned int locksz = sizeof(spinlock_t); unsigned int locksz = sizeof(spinlock_t);
......
...@@ -3577,6 +3577,9 @@ void __init tcp_init(void) ...@@ -3577,6 +3577,9 @@ void __init tcp_init(void)
percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
inet_hashinfo_init(&tcp_hashinfo); inet_hashinfo_init(&tcp_hashinfo);
inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash",
thash_entries, 21, /* one slot per 2 MB*/
0, 64 * 1024);
tcp_hashinfo.bind_bucket_cachep = tcp_hashinfo.bind_bucket_cachep =
kmem_cache_create("tcp_bind_bucket", kmem_cache_create("tcp_bind_bucket",
sizeof(struct inet_bind_bucket), 0, sizeof(struct inet_bind_bucket), 0,
......
...@@ -357,18 +357,12 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, ...@@ -357,18 +357,12 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
} }
EXPORT_SYMBOL(udp_lib_get_port); EXPORT_SYMBOL(udp_lib_get_port);
static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr,
unsigned int port)
{
return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port;
}
int udp_v4_get_port(struct sock *sk, unsigned short snum) int udp_v4_get_port(struct sock *sk, unsigned short snum)
{ {
unsigned int hash2_nulladdr = unsigned int hash2_nulladdr =
udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum); ipv4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum);
unsigned int hash2_partial = unsigned int hash2_partial =
udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
/* precompute partial secondary hash */ /* precompute partial secondary hash */
udp_sk(sk)->udp_portaddr_hash = hash2_partial; udp_sk(sk)->udp_portaddr_hash = hash2_partial;
...@@ -485,7 +479,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, ...@@ -485,7 +479,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
u32 hash = 0; u32 hash = 0;
if (hslot->count > 10) { if (hslot->count > 10) {
hash2 = udp4_portaddr_hash(net, daddr, hnum); hash2 = ipv4_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask; slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2]; hslot2 = &udptable->hash2[slot2];
if (hslot->count < hslot2->count) if (hslot->count < hslot2->count)
...@@ -496,7 +490,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, ...@@ -496,7 +490,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
exact_dif, hslot2, skb); exact_dif, hslot2, skb);
if (!result) { if (!result) {
unsigned int old_slot2 = slot2; unsigned int old_slot2 = slot2;
hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
slot2 = hash2 & udptable->mask; slot2 = hash2 & udptable->mask;
/* avoid searching the same slot again. */ /* avoid searching the same slot again. */
if (unlikely(slot2 == old_slot2)) if (unlikely(slot2 == old_slot2))
...@@ -1761,7 +1755,7 @@ EXPORT_SYMBOL(udp_lib_rehash); ...@@ -1761,7 +1755,7 @@ EXPORT_SYMBOL(udp_lib_rehash);
static void udp_v4_rehash(struct sock *sk) static void udp_v4_rehash(struct sock *sk)
{ {
u16 new_hash = udp4_portaddr_hash(sock_net(sk), u16 new_hash = ipv4_portaddr_hash(sock_net(sk),
inet_sk(sk)->inet_rcv_saddr, inet_sk(sk)->inet_rcv_saddr,
inet_sk(sk)->inet_num); inet_sk(sk)->inet_num);
udp_lib_rehash(sk, new_hash); udp_lib_rehash(sk, new_hash);
...@@ -1952,9 +1946,9 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, ...@@ -1952,9 +1946,9 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct sk_buff *nskb; struct sk_buff *nskb;
if (use_hash2) { if (use_hash2) {
hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & hash2_any = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
udptable->mask; udptable->mask;
hash2 = udp4_portaddr_hash(net, daddr, hnum) & udptable->mask; hash2 = ipv4_portaddr_hash(net, daddr, hnum) & udptable->mask;
start_lookup: start_lookup:
hslot = &udptable->hash2[hash2]; hslot = &udptable->hash2[hash2];
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
...@@ -2186,7 +2180,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, ...@@ -2186,7 +2180,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
int dif, int sdif) int dif, int sdif)
{ {
unsigned short hnum = ntohs(loc_port); unsigned short hnum = ntohs(loc_port);
unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); unsigned int hash2 = ipv4_portaddr_hash(net, loc_addr, hnum);
unsigned int slot2 = hash2 & udp_table.mask; unsigned int slot2 = hash2 & udp_table.mask;
struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr); INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
......
...@@ -125,6 +125,40 @@ static inline int compute_score(struct sock *sk, struct net *net, ...@@ -125,6 +125,40 @@ static inline int compute_score(struct sock *sk, struct net *net,
} }
/* called with rcu_read_lock() */ /* called with rcu_read_lock() */
static struct sock *inet6_lhash2_lookup(struct net *net,
struct inet_listen_hashbucket *ilb2,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
const __be16 sport, const struct in6_addr *daddr,
const unsigned short hnum, const int dif, const int sdif)
{
bool exact_dif = inet6_exact_dif_match(net, skb);
struct inet_connection_sock *icsk;
struct sock *sk, *result = NULL;
int score, hiscore = 0;
u32 phash = 0;
inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
sk = (struct sock *)icsk;
score = compute_score(sk, net, hnum, daddr, dif, sdif,
exact_dif);
if (score > hiscore) {
if (sk->sk_reuseport) {
phash = inet6_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, phash,
skb, doff);
if (result)
return result;
}
result = sk;
hiscore = score;
}
}
return result;
}
struct sock *inet6_lookup_listener(struct net *net, struct sock *inet6_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo, struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff, struct sk_buff *skb, int doff,
...@@ -135,10 +169,42 @@ struct sock *inet6_lookup_listener(struct net *net, ...@@ -135,10 +169,42 @@ struct sock *inet6_lookup_listener(struct net *net,
unsigned int hash = inet_lhashfn(net, hnum); unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
bool exact_dif = inet6_exact_dif_match(net, skb); bool exact_dif = inet6_exact_dif_match(net, skb);
struct inet_listen_hashbucket *ilb2;
struct sock *sk, *result = NULL; struct sock *sk, *result = NULL;
int score, hiscore = 0; int score, hiscore = 0;
unsigned int hash2;
u32 phash = 0; u32 phash = 0;
if (ilb->count <= 10 || !hashinfo->lhash2)
goto port_lookup;
/* Too many sk in the ilb bucket (which is hashed by port alone).
* Try lhash2 (which is hashed by port and addr) instead.
*/
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
if (ilb2->count > ilb->count)
goto port_lookup;
result = inet6_lhash2_lookup(net, ilb2, skb, doff,
saddr, sport, daddr, hnum,
dif, sdif);
if (result)
return result;
/* Lookup lhash2 with in6addr_any */
hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
if (ilb2->count > ilb->count)
goto port_lookup;
return inet6_lhash2_lookup(net, ilb2, skb, doff,
saddr, sport, daddr, hnum,
dif, sdif);
port_lookup:
sk_for_each(sk, &ilb->head) { sk_for_each(sk, &ilb->head) {
score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif);
if (score > hiscore) { if (score > hiscore) {
......
...@@ -89,28 +89,12 @@ static u32 udp6_ehashfn(const struct net *net, ...@@ -89,28 +89,12 @@ static u32 udp6_ehashfn(const struct net *net,
udp_ipv6_hash_secret + net_hash_mix(net)); udp_ipv6_hash_secret + net_hash_mix(net));
} }
static u32 udp6_portaddr_hash(const struct net *net,
const struct in6_addr *addr6,
unsigned int port)
{
unsigned int hash, mix = net_hash_mix(net);
if (ipv6_addr_any(addr6))
hash = jhash_1word(0, mix);
else if (ipv6_addr_v4mapped(addr6))
hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix);
else
hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix);
return hash ^ port;
}
int udp_v6_get_port(struct sock *sk, unsigned short snum) int udp_v6_get_port(struct sock *sk, unsigned short snum)
{ {
unsigned int hash2_nulladdr = unsigned int hash2_nulladdr =
udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum); ipv6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
unsigned int hash2_partial = unsigned int hash2_partial =
udp6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0); ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0);
/* precompute partial secondary hash */ /* precompute partial secondary hash */
udp_sk(sk)->udp_portaddr_hash = hash2_partial; udp_sk(sk)->udp_portaddr_hash = hash2_partial;
...@@ -119,7 +103,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) ...@@ -119,7 +103,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
static void udp_v6_rehash(struct sock *sk) static void udp_v6_rehash(struct sock *sk)
{ {
u16 new_hash = udp6_portaddr_hash(sock_net(sk), u16 new_hash = ipv6_portaddr_hash(sock_net(sk),
&sk->sk_v6_rcv_saddr, &sk->sk_v6_rcv_saddr,
inet_sk(sk)->inet_num); inet_sk(sk)->inet_num);
...@@ -225,7 +209,7 @@ struct sock *__udp6_lib_lookup(struct net *net, ...@@ -225,7 +209,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
u32 hash = 0; u32 hash = 0;
if (hslot->count > 10) { if (hslot->count > 10) {
hash2 = udp6_portaddr_hash(net, daddr, hnum); hash2 = ipv6_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask; slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2]; hslot2 = &udptable->hash2[slot2];
if (hslot->count < hslot2->count) if (hslot->count < hslot2->count)
...@@ -236,7 +220,7 @@ struct sock *__udp6_lib_lookup(struct net *net, ...@@ -236,7 +220,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
hslot2, skb); hslot2, skb);
if (!result) { if (!result) {
unsigned int old_slot2 = slot2; unsigned int old_slot2 = slot2;
hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum); hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
slot2 = hash2 & udptable->mask; slot2 = hash2 & udptable->mask;
/* avoid searching the same slot again. */ /* avoid searching the same slot again. */
if (unlikely(slot2 == old_slot2)) if (unlikely(slot2 == old_slot2))
...@@ -705,9 +689,9 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, ...@@ -705,9 +689,9 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct sk_buff *nskb; struct sk_buff *nskb;
if (use_hash2) { if (use_hash2) {
hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) & hash2_any = ipv6_portaddr_hash(net, &in6addr_any, hnum) &
udptable->mask; udptable->mask;
hash2 = udp6_portaddr_hash(net, daddr, hnum) & udptable->mask; hash2 = ipv6_portaddr_hash(net, daddr, hnum) & udptable->mask;
start_lookup: start_lookup:
hslot = &udptable->hash2[hash2]; hslot = &udptable->hash2[hash2];
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
...@@ -895,7 +879,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, ...@@ -895,7 +879,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
int dif, int sdif) int dif, int sdif)
{ {
unsigned short hnum = ntohs(loc_port); unsigned short hnum = ntohs(loc_port);
unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum); unsigned int hash2 = ipv6_portaddr_hash(net, loc_addr, hnum);
unsigned int slot2 = hash2 & udp_table.mask; unsigned int slot2 = hash2 & udp_table.mask;
struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment