Commit 4cf9d544 authored by Sage Weil's avatar Sage Weil

libceph: don't time out osd requests that haven't been received

Keep track of when an outgoing message is ACKed (i.e., the server fully
received it and, presumably, queued it for processing).  Time out OSD
requests only if it's been too long since they've been received.

This prevents timeouts and connection thrashing when the OSDs are simply
busy and are throttling the requests they read off the network.
Reviewed-by: default avatarYehuda Sadeh <yehuda@hq.newdream.net>
Signed-off-by: default avatarSage Weil <sage@newdream.net>
parent 8f04d422
...@@ -94,6 +94,7 @@ struct ceph_msg { ...@@ -94,6 +94,7 @@ struct ceph_msg {
bool more_to_follow; bool more_to_follow;
bool needs_out_seq; bool needs_out_seq;
int front_max; int front_max;
unsigned long ack_stamp; /* tx: when we were acked */
struct ceph_msgpool *pool; struct ceph_msgpool *pool;
}; };
......
...@@ -486,13 +486,10 @@ static void prepare_write_message(struct ceph_connection *con) ...@@ -486,13 +486,10 @@ static void prepare_write_message(struct ceph_connection *con)
m = list_first_entry(&con->out_queue, m = list_first_entry(&con->out_queue,
struct ceph_msg, list_head); struct ceph_msg, list_head);
con->out_msg = m; con->out_msg = m;
if (test_bit(LOSSYTX, &con->state)) {
list_del_init(&m->list_head); /* put message on sent list */
} else { ceph_msg_get(m);
/* put message on sent list */ list_move_tail(&m->list_head, &con->out_sent);
ceph_msg_get(m);
list_move_tail(&m->list_head, &con->out_sent);
}
/* /*
* only assign outgoing seq # if we haven't sent this message * only assign outgoing seq # if we haven't sent this message
...@@ -1399,6 +1396,7 @@ static void process_ack(struct ceph_connection *con) ...@@ -1399,6 +1396,7 @@ static void process_ack(struct ceph_connection *con)
break; break;
dout("got ack for seq %llu type %d at %p\n", seq, dout("got ack for seq %llu type %d at %p\n", seq,
le16_to_cpu(m->hdr.type), m); le16_to_cpu(m->hdr.type), m);
m->ack_stamp = jiffies;
ceph_msg_remove(m); ceph_msg_remove(m);
} }
prepare_read_tag(con); prepare_read_tag(con);
......
...@@ -1085,9 +1085,15 @@ static void handle_timeout(struct work_struct *work) ...@@ -1085,9 +1085,15 @@ static void handle_timeout(struct work_struct *work)
req = list_entry(osdc->req_lru.next, struct ceph_osd_request, req = list_entry(osdc->req_lru.next, struct ceph_osd_request,
r_req_lru_item); r_req_lru_item);
/* hasn't been long enough since we sent it? */
if (time_before(jiffies, req->r_stamp + timeout)) if (time_before(jiffies, req->r_stamp + timeout))
break; break;
/* hasn't been long enough since it was acked? */
if (req->r_request->ack_stamp == 0 ||
time_before(jiffies, req->r_request->ack_stamp + timeout))
break;
BUG_ON(req == last_req && req->r_stamp == last_stamp); BUG_ON(req == last_req && req->r_stamp == last_stamp);
last_req = req; last_req = req;
last_stamp = req->r_stamp; last_stamp = req->r_stamp;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment