Commit 60bf8bf8 authored by Sage Weil's avatar Sage Weil

libceph: fix msgr backoff

With commit f363e45f we replaced a bunch of hacky workqueue mutual
exclusion logic with the WQ_NON_REENTRANT flag.  One pieces of fallout is
that the exponential backoff breaks in certain cases:

 * con_work attempts to connect.
 * we get an immediate failure, and the socket state change handler queues
   immediate work.
 * con_work calls con_fault, we decide to back off, but can't queue delayed
   work.

In this case, we add a BACKOFF bit to make con_work reschedule delayed work
next time it runs (which should be immediately).
Signed-off-by: default avatarSage Weil <sage@newdream.net>
parent 692d20f5
...@@ -123,6 +123,7 @@ struct ceph_msg_pos { ...@@ -123,6 +123,7 @@ struct ceph_msg_pos {
#define SOCK_CLOSED 11 /* socket state changed to closed */ #define SOCK_CLOSED 11 /* socket state changed to closed */
#define OPENING 13 /* open connection w/ (possibly new) peer */ #define OPENING 13 /* open connection w/ (possibly new) peer */
#define DEAD 14 /* dead, about to kfree */ #define DEAD 14 /* dead, about to kfree */
#define BACKOFF 15
/* /*
* A single connection with another host. * A single connection with another host.
......
...@@ -1949,6 +1949,19 @@ static void con_work(struct work_struct *work) ...@@ -1949,6 +1949,19 @@ static void con_work(struct work_struct *work)
work.work); work.work);
mutex_lock(&con->mutex); mutex_lock(&con->mutex);
if (test_and_clear_bit(BACKOFF, &con->state)) {
dout("con_work %p backing off\n", con);
if (queue_delayed_work(ceph_msgr_wq, &con->work,
round_jiffies_relative(con->delay))) {
dout("con_work %p backoff %lu\n", con, con->delay);
mutex_unlock(&con->mutex);
return;
} else {
con->ops->put(con);
dout("con_work %p FAILED to back off %lu\n", con,
con->delay);
}
}
if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */
dout("con_work CLOSED\n"); dout("con_work CLOSED\n");
...@@ -2017,11 +2030,24 @@ static void ceph_fault(struct ceph_connection *con) ...@@ -2017,11 +2030,24 @@ static void ceph_fault(struct ceph_connection *con)
con->delay = BASE_DELAY_INTERVAL; con->delay = BASE_DELAY_INTERVAL;
else if (con->delay < MAX_DELAY_INTERVAL) else if (con->delay < MAX_DELAY_INTERVAL)
con->delay *= 2; con->delay *= 2;
dout("fault queueing %p delay %lu\n", con, con->delay);
con->ops->get(con); con->ops->get(con);
if (queue_delayed_work(ceph_msgr_wq, &con->work, if (queue_delayed_work(ceph_msgr_wq, &con->work,
round_jiffies_relative(con->delay)) == 0) round_jiffies_relative(con->delay))) {
dout("fault queued %p delay %lu\n", con, con->delay);
} else {
con->ops->put(con); con->ops->put(con);
dout("fault failed to queue %p delay %lu, backoff\n",
con, con->delay);
/*
* In many cases we see a socket state change
* while con_work is running and end up
* queuing (non-delayed) work, such that we
* can't backoff with a delay. Set a flag so
* that when con_work restarts we schedule the
* delay then.
*/
set_bit(BACKOFF, &con->state);
}
} }
out_unlock: out_unlock:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment