Commit 33c15297 authored by David S. Miller's avatar David S. Miller

Merge branch 'sctp-transport-rhashtable'

Xin Long says:

====================
sctp: use transport hashtable to replace association's with rhashtable

for telecom center, the usual case is that a server is connected by thousands
of clients. but if the server with only one enpoint(udp style) use the same
sport and dport to communicate with every clients, and every assoc in server
will be hashed in the same chain of global assoc hashtable due to currently we
choose dport and sport as the hash key.

when a packet is received, sctp_rcv try to find the assoc with sport and dport,
since that chain is too long to find it fast, it make the performance turn to
very low, some test data is as follow:

in server:
$./ss [start a udp style server there]
in client:
$./cc [start 2500 sockets to connect server with same port and different ip,
       and use one of them to send data to server]

===== test on net-next
-- perf top
server:
  55.73%  [kernel]             [k] sctp_assoc_is_match
   6.80%  [kernel]             [k] sctp_assoc_lookup_paddr
   4.81%  [kernel]             [k] sctp_v4_cmp_addr
   3.12%  [kernel]             [k] _raw_spin_unlock_irqrestore
   1.94%  [kernel]             [k] sctp_cmp_addr_exact

client:
  46.01%  [kernel]                    [k] sctp_endpoint_lookup_assoc
   5.55%  libc-2.17.so                [.] __libc_calloc
   5.39%  libc-2.17.so                [.] _int_free
   3.92%  libc-2.17.so                [.] _int_malloc
   3.23%  [kernel]                    [k] __memset

-- spent time
time is 487s, send pkt is 10000000

we need to change the way to calculate the hash key, to use lport +
rport + paddr as the hash key can avoid this issue.

besides, this patchset will use transport hashtable to replace
association hashtable to lookup with rhashtable api. get transport
first then get association by t->asoc. and also it will make tcp
style work better.

===== test with this patchset:
-- perf top
server:
  15.98%  [kernel]                 [k] _raw_spin_unlock_irqrestore
   9.92%  [kernel]                 [k] __pv_queued_spin_lock_slowpath
   7.22%  [kernel]                 [k] copy_user_generic_string
   2.38%  libpthread-2.17.so       [.] __recvmsg_nocancel
   1.88%  [kernel]                 [k] sctp_recvmsg

client:
  11.90%  [kernel]                   [k] sctp_hash_cmp
   8.52%  [kernel]                   [k] rht_deferred_worker
   4.94%  [kernel]                   [k] __pv_queued_spin_lock_slowpath
   3.95%  [kernel]                   [k] sctp_bind_addr_match
   2.49%  [kernel]                   [k] __memset

-- spent time
time is 22s, send pkt is 10000000
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 6a5ef90c c79c0666
......@@ -126,8 +126,6 @@ int sctp_primitive_ASCONF(struct net *, struct sctp_association *, void *arg);
*/
int sctp_rcv(struct sk_buff *skb);
void sctp_v4_err(struct sk_buff *skb, u32 info);
void sctp_hash_established(struct sctp_association *);
void sctp_unhash_established(struct sctp_association *);
void sctp_hash_endpoint(struct sctp_endpoint *);
void sctp_unhash_endpoint(struct sctp_endpoint *);
struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *,
......@@ -143,6 +141,17 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
struct sctp_transport *t);
void sctp_backlog_migrate(struct sctp_association *assoc,
struct sock *oldsk, struct sock *newsk);
int sctp_transport_hashtable_init(void);
void sctp_transport_hashtable_destroy(void);
void sctp_hash_transport(struct sctp_transport *t);
void sctp_unhash_transport(struct sctp_transport *t);
struct sctp_transport *sctp_addrs_lookup_transport(
struct net *net,
const union sctp_addr *laddr,
const union sctp_addr *paddr);
struct sctp_transport *sctp_epaddr_lookup_transport(
const struct sctp_endpoint *ep,
const union sctp_addr *paddr);
/*
* sctp/proc.c
......@@ -519,25 +528,6 @@ static inline int sctp_ep_hashfn(struct net *net, __u16 lport)
return (net_hash_mix(net) + lport) & (sctp_ep_hashsize - 1);
}
/* This is the hash function for the association hash table. */
static inline int sctp_assoc_hashfn(struct net *net, __u16 lport, __u16 rport)
{
int h = (lport << 16) + rport + net_hash_mix(net);
h ^= h>>8;
return h & (sctp_assoc_hashsize - 1);
}
/* This is the hash function for the association hash table. This is
* not used yet, but could be used as a better hash function when
* we have a vtag.
*/
static inline int sctp_vtag_hashfn(__u16 lport, __u16 rport, __u32 vtag)
{
int h = (lport << 16) + rport;
h ^= vtag;
return h & (sctp_assoc_hashsize - 1);
}
#define sctp_for_each_hentry(epb, head) \
hlist_for_each_entry(epb, head, node)
......
......@@ -48,6 +48,7 @@
#define __sctp_structs_h__
#include <linux/ktime.h>
#include <linux/rhashtable.h>
#include <linux/socket.h> /* linux/in.h needs this!! */
#include <linux/in.h> /* We get struct sockaddr_in. */
#include <linux/in6.h> /* We get struct in6_addr */
......@@ -119,14 +120,13 @@ extern struct sctp_globals {
/* This is the hash of all endpoints. */
struct sctp_hashbucket *ep_hashtable;
/* This is the hash of all associations. */
struct sctp_hashbucket *assoc_hashtable;
/* This is the sctp port control hash. */
struct sctp_bind_hashbucket *port_hashtable;
/* This is the hash of all transports. */
struct rhashtable transport_hashtable;
/* Sizes of above hashtables. */
int ep_hashsize;
int assoc_hashsize;
int port_hashsize;
/* Default initialization values to be applied to new associations. */
......@@ -143,10 +143,9 @@ extern struct sctp_globals {
#define sctp_address_families (sctp_globals.address_families)
#define sctp_ep_hashsize (sctp_globals.ep_hashsize)
#define sctp_ep_hashtable (sctp_globals.ep_hashtable)
#define sctp_assoc_hashsize (sctp_globals.assoc_hashsize)
#define sctp_assoc_hashtable (sctp_globals.assoc_hashtable)
#define sctp_port_hashsize (sctp_globals.port_hashsize)
#define sctp_port_hashtable (sctp_globals.port_hashtable)
#define sctp_transport_hashtable (sctp_globals.transport_hashtable)
#define sctp_checksum_disable (sctp_globals.checksum_disable)
/* SCTP Socket type: UDP or TCP style. */
......@@ -753,6 +752,7 @@ static inline int sctp_packet_empty(struct sctp_packet *packet)
struct sctp_transport {
/* A list of transports. */
struct list_head transports;
struct rhash_head node;
/* Reference counting. */
atomic_t refcnt;
......
......@@ -383,6 +383,7 @@ void sctp_association_free(struct sctp_association *asoc)
list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) {
transport = list_entry(pos, struct sctp_transport, transports);
list_del_rcu(pos);
sctp_unhash_transport(transport);
sctp_transport_free(transport);
}
......@@ -500,6 +501,8 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
/* Remove this peer from the list. */
list_del_rcu(&peer->transports);
/* Remove this peer from the transport hashtable */
sctp_unhash_transport(peer);
/* Get the first transport of asoc. */
pos = asoc->peer.transport_addr_list.next;
......@@ -699,6 +702,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
/* Attach the remote transport to our asoc. */
list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list);
asoc->peer.transport_count++;
/* Add this peer into the transport hashtable */
sctp_hash_transport(peer);
/* If we do not yet have a primary path, set one. */
if (!asoc->peer.primary_path) {
......
......@@ -314,21 +314,16 @@ struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep,
}
/* Find the association that goes with this chunk.
* We do a linear search of the associations for this endpoint.
* We return the matching transport address too.
* We lookup the transport from hashtable at first, then get association
* through t->assoc.
*/
static struct sctp_association *__sctp_endpoint_lookup_assoc(
struct sctp_association *sctp_endpoint_lookup_assoc(
const struct sctp_endpoint *ep,
const union sctp_addr *paddr,
struct sctp_transport **transport)
{
struct sctp_association *asoc = NULL;
struct sctp_association *tmp;
struct sctp_transport *t = NULL;
struct sctp_hashbucket *head;
struct sctp_ep_common *epb;
int hash;
int rport;
struct sctp_transport *t;
*transport = NULL;
......@@ -337,45 +332,16 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc(
*/
if (!ep->base.bind_addr.port)
goto out;
t = sctp_epaddr_lookup_transport(ep, paddr);
if (!t || t->asoc->temp)
goto out;
rport = ntohs(paddr->v4.sin_port);
hash = sctp_assoc_hashfn(sock_net(ep->base.sk), ep->base.bind_addr.port,
rport);
head = &sctp_assoc_hashtable[hash];
read_lock(&head->lock);
sctp_for_each_hentry(epb, &head->chain) {
tmp = sctp_assoc(epb);
if (tmp->ep != ep || rport != tmp->peer.port)
continue;
t = sctp_assoc_lookup_paddr(tmp, paddr);
if (t) {
asoc = tmp;
*transport = t;
break;
}
}
read_unlock(&head->lock);
*transport = t;
asoc = t->asoc;
out:
return asoc;
}
/* Lookup association on an endpoint based on a peer address. BH-safe. */
struct sctp_association *sctp_endpoint_lookup_assoc(
const struct sctp_endpoint *ep,
const union sctp_addr *paddr,
struct sctp_transport **transport)
{
struct sctp_association *asoc;
local_bh_disable();
asoc = __sctp_endpoint_lookup_assoc(ep, paddr, transport);
local_bh_enable();
return asoc;
}
/* Look for any peeled off association from the endpoint that matches the
* given peer address.
*/
......
......@@ -782,65 +782,135 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
return ep;
}
/* Insert association into the hash table. */
static void __sctp_hash_established(struct sctp_association *asoc)
/* rhashtable for transport */
struct sctp_hash_cmp_arg {
const union sctp_addr *laddr;
const union sctp_addr *paddr;
const struct net *net;
};
static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
const void *ptr)
{
struct net *net = sock_net(asoc->base.sk);
struct sctp_ep_common *epb;
struct sctp_hashbucket *head;
const struct sctp_hash_cmp_arg *x = arg->key;
const struct sctp_transport *t = ptr;
struct sctp_association *asoc = t->asoc;
const struct net *net = x->net;
epb = &asoc->base;
if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port))
return 1;
if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr))
return 1;
if (!net_eq(sock_net(asoc->base.sk), net))
return 1;
if (!sctp_bind_addr_match(&asoc->base.bind_addr,
x->laddr, sctp_sk(asoc->base.sk)))
return 1;
/* Calculate which chain this entry will belong to. */
epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port,
asoc->peer.port);
return 0;
}
head = &sctp_assoc_hashtable[epb->hashent];
static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed)
{
const struct sctp_transport *t = data;
const union sctp_addr *paddr = &t->ipaddr;
const struct net *net = sock_net(t->asoc->base.sk);
u16 lport = htons(t->asoc->base.bind_addr.port);
u32 addr;
if (paddr->sa.sa_family == AF_INET6)
addr = jhash(&paddr->v6.sin6_addr, 16, seed);
else
addr = paddr->v4.sin_addr.s_addr;
write_lock(&head->lock);
hlist_add_head(&epb->node, &head->chain);
write_unlock(&head->lock);
return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 |
(__force __u32)lport, net_hash_mix(net), seed);
}
/* Add an association to the hash. Local BH-safe. */
void sctp_hash_established(struct sctp_association *asoc)
static inline u32 sctp_hash_key(const void *data, u32 len, u32 seed)
{
if (asoc->temp)
return;
const struct sctp_hash_cmp_arg *x = data;
const union sctp_addr *paddr = x->paddr;
const struct net *net = x->net;
u16 lport = x->laddr->v4.sin_port;
u32 addr;
if (paddr->sa.sa_family == AF_INET6)
addr = jhash(&paddr->v6.sin6_addr, 16, seed);
else
addr = paddr->v4.sin_addr.s_addr;
local_bh_disable();
__sctp_hash_established(asoc);
local_bh_enable();
return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 |
(__force __u32)lport, net_hash_mix(net), seed);
}
/* Remove association from the hash table. */
static void __sctp_unhash_established(struct sctp_association *asoc)
static const struct rhashtable_params sctp_hash_params = {
.head_offset = offsetof(struct sctp_transport, node),
.hashfn = sctp_hash_key,
.obj_hashfn = sctp_hash_obj,
.obj_cmpfn = sctp_hash_cmp,
.automatic_shrinking = true,
};
int sctp_transport_hashtable_init(void)
{
struct net *net = sock_net(asoc->base.sk);
struct sctp_hashbucket *head;
struct sctp_ep_common *epb;
return rhashtable_init(&sctp_transport_hashtable, &sctp_hash_params);
}
epb = &asoc->base;
void sctp_transport_hashtable_destroy(void)
{
rhashtable_destroy(&sctp_transport_hashtable);
}
epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port,
asoc->peer.port);
void sctp_hash_transport(struct sctp_transport *t)
{
struct sctp_sockaddr_entry *addr;
struct sctp_hash_cmp_arg arg;
addr = list_entry(t->asoc->base.bind_addr.address_list.next,
struct sctp_sockaddr_entry, list);
arg.laddr = &addr->a;
arg.paddr = &t->ipaddr;
arg.net = sock_net(t->asoc->base.sk);
reinsert:
if (rhashtable_lookup_insert_key(&sctp_transport_hashtable, &arg,
&t->node, sctp_hash_params) == -EBUSY)
goto reinsert;
}
head = &sctp_assoc_hashtable[epb->hashent];
void sctp_unhash_transport(struct sctp_transport *t)
{
rhashtable_remove_fast(&sctp_transport_hashtable, &t->node,
sctp_hash_params);
}
write_lock(&head->lock);
hlist_del_init(&epb->node);
write_unlock(&head->lock);
struct sctp_transport *sctp_addrs_lookup_transport(
struct net *net,
const union sctp_addr *laddr,
const union sctp_addr *paddr)
{
struct sctp_hash_cmp_arg arg = {
.laddr = laddr,
.paddr = paddr,
.net = net,
};
return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg,
sctp_hash_params);
}
/* Remove association from the hash table. Local BH-safe. */
void sctp_unhash_established(struct sctp_association *asoc)
struct sctp_transport *sctp_epaddr_lookup_transport(
const struct sctp_endpoint *ep,
const union sctp_addr *paddr)
{
if (asoc->temp)
return;
struct sctp_sockaddr_entry *addr;
struct net *net = sock_net(ep->base.sk);
local_bh_disable();
__sctp_unhash_established(asoc);
local_bh_enable();
addr = list_entry(ep->base.bind_addr.address_list.next,
struct sctp_sockaddr_entry, list);
return sctp_addrs_lookup_transport(net, &addr->a, paddr);
}
/* Look up an association. */
......@@ -850,38 +920,19 @@ static struct sctp_association *__sctp_lookup_association(
const union sctp_addr *peer,
struct sctp_transport **pt)
{
struct sctp_hashbucket *head;
struct sctp_ep_common *epb;
struct sctp_association *asoc;
struct sctp_transport *transport;
int hash;
struct sctp_transport *t;
/* Optimize here for direct hit, only listening connections can
* have wildcards anyways.
*/
hash = sctp_assoc_hashfn(net, ntohs(local->v4.sin_port),
ntohs(peer->v4.sin_port));
head = &sctp_assoc_hashtable[hash];
read_lock(&head->lock);
sctp_for_each_hentry(epb, &head->chain) {
asoc = sctp_assoc(epb);
transport = sctp_assoc_is_match(asoc, net, local, peer);
if (transport)
goto hit;
}
read_unlock(&head->lock);
t = sctp_addrs_lookup_transport(net, local, peer);
if (!t || t->dead || t->asoc->temp)
return NULL;
return NULL;
sctp_association_hold(t->asoc);
*pt = t;
hit:
*pt = transport;
sctp_association_hold(asoc);
read_unlock(&head->lock);
return asoc;
return t->asoc;
}
/* Look up an association. BH-safe. */
/* Look up an association. protected by RCU read lock */
static
struct sctp_association *sctp_lookup_association(struct net *net,
const union sctp_addr *laddr,
......@@ -890,9 +941,9 @@ struct sctp_association *sctp_lookup_association(struct net *net,
{
struct sctp_association *asoc;
local_bh_disable();
rcu_read_lock();
asoc = __sctp_lookup_association(net, laddr, paddr, transportp);
local_bh_enable();
rcu_read_unlock();
return asoc;
}
......
This diff is collapsed.
......@@ -1416,24 +1416,6 @@ static __init int sctp_init(void)
for (order = 0; (1UL << order) < goal; order++)
;
do {
sctp_assoc_hashsize = (1UL << order) * PAGE_SIZE /
sizeof(struct sctp_hashbucket);
if ((sctp_assoc_hashsize > (64 * 1024)) && order > 0)
continue;
sctp_assoc_hashtable = (struct sctp_hashbucket *)
__get_free_pages(GFP_KERNEL | __GFP_NOWARN, order);
} while (!sctp_assoc_hashtable && --order > 0);
if (!sctp_assoc_hashtable) {
pr_err("Failed association hash alloc\n");
status = -ENOMEM;
goto err_ahash_alloc;
}
for (i = 0; i < sctp_assoc_hashsize; i++) {
rwlock_init(&sctp_assoc_hashtable[i].lock);
INIT_HLIST_HEAD(&sctp_assoc_hashtable[i].chain);
}
/* Allocate and initialize the endpoint hash table. */
sctp_ep_hashsize = 64;
sctp_ep_hashtable =
......@@ -1467,8 +1449,10 @@ static __init int sctp_init(void)
INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain);
}
pr_info("Hash tables configured (established %d bind %d)\n",
sctp_assoc_hashsize, sctp_port_hashsize);
if (sctp_transport_hashtable_init())
goto err_thash_alloc;
pr_info("Hash tables configured (bind %d)\n", sctp_port_hashsize);
sctp_sysctl_register();
......@@ -1521,12 +1505,10 @@ static __init int sctp_init(void)
get_order(sctp_port_hashsize *
sizeof(struct sctp_bind_hashbucket)));
err_bhash_alloc:
sctp_transport_hashtable_destroy();
err_thash_alloc:
kfree(sctp_ep_hashtable);
err_ehash_alloc:
free_pages((unsigned long)sctp_assoc_hashtable,
get_order(sctp_assoc_hashsize *
sizeof(struct sctp_hashbucket)));
err_ahash_alloc:
percpu_counter_destroy(&sctp_sockets_allocated);
err_percpu_counter_init:
kmem_cache_destroy(sctp_chunk_cachep);
......@@ -1560,13 +1542,11 @@ static __exit void sctp_exit(void)
sctp_sysctl_unregister();
free_pages((unsigned long)sctp_assoc_hashtable,
get_order(sctp_assoc_hashsize *
sizeof(struct sctp_hashbucket)));
kfree(sctp_ep_hashtable);
free_pages((unsigned long)sctp_port_hashtable,
get_order(sctp_port_hashsize *
sizeof(struct sctp_bind_hashbucket)));
kfree(sctp_ep_hashtable);
sctp_transport_hashtable_destroy();
percpu_counter_destroy(&sctp_sockets_allocated);
......
......@@ -866,7 +866,6 @@ static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds,
(!asoc->temp) && (sk->sk_shutdown != SHUTDOWN_MASK))
return;
sctp_unhash_established(asoc);
sctp_association_free(asoc);
}
......@@ -1269,7 +1268,6 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
asoc = cmd->obj.asoc;
BUG_ON(asoc->peer.primary_path == NULL);
sctp_endpoint_add_asoc(ep, asoc);
sctp_hash_established(asoc);
break;
case SCTP_CMD_UPDATE_ASSOC:
......
......@@ -1228,7 +1228,6 @@ static int __sctp_connect(struct sock *sk,
* To the hash table, try to unhash it, just in case, its a noop
* if it wasn't hashed so we're safe
*/
sctp_unhash_established(asoc);
sctp_association_free(asoc);
}
return err;
......@@ -1504,7 +1503,6 @@ static void sctp_close(struct sock *sk, long timeout)
* ABORT or SHUTDOWN based on the linger options.
*/
if (sctp_state(asoc, CLOSED)) {
sctp_unhash_established(asoc);
sctp_association_free(asoc);
continue;
}
......@@ -1986,10 +1984,8 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
goto out_unlock;
out_free:
if (new_asoc) {
sctp_unhash_established(asoc);
if (new_asoc)
sctp_association_free(asoc);
}
out_unlock:
release_sock(sk);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment