Commit 4d99b258 authored by Liang Zhen's avatar Liang Zhen Committed by Greg Kroah-Hartman

staging: lustre: avoid intensive reconnecting for ko2iblnd

When there is a connection race between two nodes and one side
of the connection is rejected by the other side. o2iblnd will
reconnect immediately, this is going to generate a lot of
trashes if:

 - race winner is slow and can't send out connecting request
   in short time.
 - remote side leaves a cmid in TIMEWAIT state, which will reject
   future connection requests

To resolve this problem, this patch changed the reconnection
behave: reconnection is submitted by connd only if a zombie
connection is being destroyed and there is a pending
reconnection request for the corresponding peer.

Also, after a few rejections, reconnection will have a time
interval between each attempt.
Signed-off-by: default avatarLiang Zhen <liang.zhen@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7569
Reviewed-on: http://review.whamcloud.com/17892Reviewed-by: default avatarDoug Oucharek <doug.s.oucharek@intel.com>
Reviewed-by: default avatarJames Simmons <uja.ornl@yahoo.com>
Tested-by: default avatarJames Simmons <uja.ornl@yahoo.com>
Reviewed-by: default avatarOleg Drokin <oleg.drokin@intel.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 82fffff4
......@@ -364,9 +364,7 @@ void kiblnd_destroy_peer(kib_peer_t *peer)
LASSERT(net);
LASSERT(!atomic_read(&peer->ibp_refcount));
LASSERT(!kiblnd_peer_active(peer));
LASSERT(!peer->ibp_connecting);
LASSERT(!peer->ibp_accepting);
LASSERT(list_empty(&peer->ibp_conns));
LASSERT(kiblnd_peer_idle(peer));
LASSERT(list_empty(&peer->ibp_tx_queue));
LIBCFS_FREE(peer, sizeof(*peer));
......@@ -392,10 +390,7 @@ kib_peer_t *kiblnd_find_peer_locked(lnet_nid_t nid)
list_for_each(tmp, peer_list) {
peer = list_entry(tmp, kib_peer_t, ibp_list);
LASSERT(peer->ibp_connecting > 0 || /* creating conns */
peer->ibp_accepting > 0 ||
!list_empty(&peer->ibp_conns)); /* active conn */
LASSERT(!kiblnd_peer_idle(peer));
if (peer->ibp_nid != nid)
continue;
......@@ -432,9 +427,7 @@ static int kiblnd_get_peer_info(lnet_ni_t *ni, int index,
for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
list_for_each(ptmp, &kiblnd_data.kib_peers[i]) {
peer = list_entry(ptmp, kib_peer_t, ibp_list);
LASSERT(peer->ibp_connecting > 0 ||
peer->ibp_accepting > 0 ||
!list_empty(&peer->ibp_conns));
LASSERT(!kiblnd_peer_idle(peer));
if (peer->ibp_ni != ni)
continue;
......@@ -502,9 +495,7 @@ static int kiblnd_del_peer(lnet_ni_t *ni, lnet_nid_t nid)
for (i = lo; i <= hi; i++) {
list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
peer = list_entry(ptmp, kib_peer_t, ibp_list);
LASSERT(peer->ibp_connecting > 0 ||
peer->ibp_accepting > 0 ||
!list_empty(&peer->ibp_conns));
LASSERT(!kiblnd_peer_idle(peer));
if (peer->ibp_ni != ni)
continue;
......@@ -545,9 +536,7 @@ static kib_conn_t *kiblnd_get_conn_by_idx(lnet_ni_t *ni, int index)
for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
list_for_each(ptmp, &kiblnd_data.kib_peers[i]) {
peer = list_entry(ptmp, kib_peer_t, ibp_list);
LASSERT(peer->ibp_connecting > 0 ||
peer->ibp_accepting > 0 ||
!list_empty(&peer->ibp_conns));
LASSERT(!kiblnd_peer_idle(peer));
if (peer->ibp_ni != ni)
continue;
......@@ -837,14 +826,14 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
return conn;
failed_2:
kiblnd_destroy_conn(conn);
kiblnd_destroy_conn(conn, true);
failed_1:
LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr));
failed_0:
return NULL;
}
void kiblnd_destroy_conn(kib_conn_t *conn)
void kiblnd_destroy_conn(kib_conn_t *conn, bool free_conn)
{
struct rdma_cm_id *cmid = conn->ibc_cmid;
kib_peer_t *peer = conn->ibc_peer;
......@@ -984,9 +973,7 @@ static int kiblnd_close_matching_conns(lnet_ni_t *ni, lnet_nid_t nid)
for (i = lo; i <= hi; i++) {
list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
peer = list_entry(ptmp, kib_peer_t, ibp_list);
LASSERT(peer->ibp_connecting > 0 ||
peer->ibp_accepting > 0 ||
!list_empty(&peer->ibp_conns));
LASSERT(!kiblnd_peer_idle(peer));
if (peer->ibp_ni != ni)
continue;
......@@ -1071,12 +1058,8 @@ static void kiblnd_query(lnet_ni_t *ni, lnet_nid_t nid, unsigned long *when)
read_lock_irqsave(glock, flags);
peer = kiblnd_find_peer_locked(nid);
if (peer) {
LASSERT(peer->ibp_connecting > 0 || /* creating conns */
peer->ibp_accepting > 0 ||
!list_empty(&peer->ibp_conns)); /* active conn */
if (peer)
last_alive = peer->ibp_last_alive;
}
read_unlock_irqrestore(glock, flags);
......@@ -2368,6 +2351,8 @@ static void kiblnd_base_shutdown(void)
LASSERT(list_empty(&kiblnd_data.kib_peers[i]));
LASSERT(list_empty(&kiblnd_data.kib_connd_zombies));
LASSERT(list_empty(&kiblnd_data.kib_connd_conns));
LASSERT(list_empty(&kiblnd_data.kib_reconn_list));
LASSERT(list_empty(&kiblnd_data.kib_reconn_wait));
/* flag threads to terminate; wake and wait for them to die */
kiblnd_data.kib_shutdown = 1;
......@@ -2506,6 +2491,9 @@ static int kiblnd_base_startup(void)
spin_lock_init(&kiblnd_data.kib_connd_lock);
INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns);
INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies);
INIT_LIST_HEAD(&kiblnd_data.kib_reconn_list);
INIT_LIST_HEAD(&kiblnd_data.kib_reconn_wait);
init_waitqueue_head(&kiblnd_data.kib_connd_waitq);
init_waitqueue_head(&kiblnd_data.kib_failover_waitq);
......
......@@ -348,6 +348,16 @@ typedef struct {
void *kib_connd; /* the connd task (serialisation assertions) */
struct list_head kib_connd_conns; /* connections to setup/teardown */
struct list_head kib_connd_zombies; /* connections with zero refcount */
/* connections to reconnect */
struct list_head kib_reconn_list;
/* peers wait for reconnection */
struct list_head kib_reconn_wait;
/**
* The second that peers are pulled out from \a kib_reconn_wait
* for reconnection.
*/
time64_t kib_reconn_sec;
wait_queue_head_t kib_connd_waitq; /* connection daemon sleeps here */
spinlock_t kib_connd_lock; /* serialise */
struct ib_qp_attr kib_error_qpa; /* QP->ERROR */
......@@ -525,6 +535,8 @@ typedef struct kib_conn {
struct list_head ibc_list; /* stash on peer's conn list */
struct list_head ibc_sched_list; /* schedule for attention */
__u16 ibc_version; /* version of connection */
/* reconnect later */
__u16 ibc_reconnect:1;
__u64 ibc_incarnation; /* which instance of the peer */
atomic_t ibc_refcount; /* # users */
int ibc_state; /* what's happening */
......@@ -574,18 +586,25 @@ typedef struct kib_peer {
struct list_head ibp_list; /* stash on global peer list */
lnet_nid_t ibp_nid; /* who's on the other end(s) */
lnet_ni_t *ibp_ni; /* LNet interface */
atomic_t ibp_refcount; /* # users */
struct list_head ibp_conns; /* all active connections */
struct list_head ibp_tx_queue; /* msgs waiting for a conn */
__u16 ibp_version; /* version of peer */
__u64 ibp_incarnation; /* incarnation of peer */
int ibp_connecting; /* current active connection attempts
*/
int ibp_accepting; /* current passive connection attempts
*/
int ibp_error; /* errno on closing this peer */
unsigned long ibp_last_alive; /* when (in jiffies) I was last alive
*/
/* when (in jiffies) I was last alive */
unsigned long ibp_last_alive;
/* # users */
atomic_t ibp_refcount;
/* version of peer */
__u16 ibp_version;
/* current passive connection attempts */
unsigned short ibp_accepting;
/* current active connection attempts */
unsigned short ibp_connecting;
/* reconnect this peer later */
unsigned short ibp_reconnecting:1;
/* # consecutive reconnection attempts to this peer */
unsigned int ibp_reconnected;
/* errno on closing this peer */
int ibp_error;
/* max map_on_demand */
__u16 ibp_max_frags;
/* max_peer_credits */
......@@ -667,6 +686,20 @@ do { \
kiblnd_destroy_peer(peer); \
} while (0)
static inline bool
kiblnd_peer_connecting(kib_peer_t *peer)
{
return peer->ibp_connecting ||
peer->ibp_reconnecting ||
peer->ibp_accepting;
}
static inline bool
kiblnd_peer_idle(kib_peer_t *peer)
{
return !kiblnd_peer_connecting(peer) && list_empty(&peer->ibp_conns);
}
static inline struct list_head *
kiblnd_nid2peerlist(lnet_nid_t nid)
{
......@@ -943,6 +976,7 @@ int kiblnd_translate_mtu(int value);
int kiblnd_dev_failover(kib_dev_t *dev);
int kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid);
void kiblnd_destroy_peer(kib_peer_t *peer);
bool kiblnd_reconnect_peer(kib_peer_t *peer);
void kiblnd_destroy_dev(kib_dev_t *dev);
void kiblnd_unlink_peer_locked(kib_peer_t *peer);
kib_peer_t *kiblnd_find_peer_locked(lnet_nid_t nid);
......@@ -952,7 +986,7 @@ int kiblnd_close_peer_conns_locked(kib_peer_t *peer, int why);
kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
int state, int version);
void kiblnd_destroy_conn(kib_conn_t *conn);
void kiblnd_destroy_conn(kib_conn_t *conn, bool free_conn);
void kiblnd_close_conn(kib_conn_t *conn, int error);
void kiblnd_close_conn_locked(kib_conn_t *conn, int error);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment