Commit 514349dc authored by Trond Myklebust's avatar Trond Myklebust

[PATCH] RPC over UDP congestion control updates [5/8]

Clean up the Van Jacobson network congestion control code.
parent c6b43f23
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* The transport code maintains an estimate on the maximum number of out- * The transport code maintains an estimate on the maximum number of out-
* standing RPC requests, using a smoothed version of the congestion * standing RPC requests, using a smoothed version of the congestion
* avoidance implemented in 44BSD. This is basically the Van Jacobson * avoidance implemented in 44BSD. This is basically the Van Jacobson
* slow start algorithm: If a retransmit occurs, the congestion window is * congestion algorithm: If a retransmit occurs, the congestion window is
* halved; otherwise, it is incremented by 1/cwnd when * halved; otherwise, it is incremented by 1/cwnd when
* *
* - a reply is received and * - a reply is received and
...@@ -32,15 +32,13 @@ ...@@ -32,15 +32,13 @@
* Note: on machines with low memory we should probably use a smaller * Note: on machines with low memory we should probably use a smaller
* MAXREQS value: At 32 outstanding reqs with 8 megs of RAM, fragment * MAXREQS value: At 32 outstanding reqs with 8 megs of RAM, fragment
* reassembly will frequently run out of memory. * reassembly will frequently run out of memory.
* Come Linux 2.3, we'll handle fragments directly.
*/ */
#define RPC_MAXCONG 16 #define RPC_MAXCONG 16
#define RPC_MAXREQS (RPC_MAXCONG + 1) #define RPC_MAXREQS (RPC_MAXCONG + 1)
#define RPC_CWNDSCALE 256 #define RPC_CWNDSCALE 256
#define RPC_MAXCWND (RPC_MAXCONG * RPC_CWNDSCALE) #define RPC_MAXCWND (RPC_MAXCONG * RPC_CWNDSCALE)
#define RPC_INITCWND RPC_CWNDSCALE #define RPC_INITCWND RPC_CWNDSCALE
#define RPCXPRT_CONGESTED(xprt) \ #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
((xprt)->cong >= (xprt)->cwnd)
/* Default timeout values */ /* Default timeout values */
#define RPC_MAX_UDP_TIMEOUT (60*HZ) #define RPC_MAX_UDP_TIMEOUT (60*HZ)
...@@ -83,6 +81,7 @@ struct rpc_rqst { ...@@ -83,6 +81,7 @@ struct rpc_rqst {
struct rpc_task * rq_task; /* RPC task data */ struct rpc_task * rq_task; /* RPC task data */
__u32 rq_xid; /* request XID */ __u32 rq_xid; /* request XID */
struct rpc_rqst * rq_next; /* free list */ struct rpc_rqst * rq_next; /* free list */
int rq_cong; /* has incremented xprt->cong */
int rq_received; /* receive completed */ int rq_received; /* receive completed */
struct list_head rq_list; struct list_head rq_list;
......
...@@ -89,6 +89,7 @@ static void xprt_disconnect(struct rpc_xprt *); ...@@ -89,6 +89,7 @@ static void xprt_disconnect(struct rpc_xprt *);
static void xprt_reconn_status(struct rpc_task *task); static void xprt_reconn_status(struct rpc_task *task);
static struct socket *xprt_create_socket(int, struct rpc_timeout *); static struct socket *xprt_create_socket(int, struct rpc_timeout *);
static int xprt_bind_socket(struct rpc_xprt *, struct socket *); static int xprt_bind_socket(struct rpc_xprt *, struct socket *);
static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
#ifdef RPC_DEBUG_DATA #ifdef RPC_DEBUG_DATA
/* /*
...@@ -253,6 +254,40 @@ xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) ...@@ -253,6 +254,40 @@ xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req)
return result; return result;
} }
/*
* Van Jacobson congestion avoidance. Check if the congestion window
* overflowed. Put the task to sleep if this is the case.
*/
static int
__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
if (req->rq_cong)
return 1;
dprintk("RPC: %4d xprt_cwnd_limited cong = %ld cwnd = %ld\n",
task->tk_pid, xprt->cong, xprt->cwnd);
if (RPCXPRT_CONGESTED(xprt))
return 0;
req->rq_cong = 1;
xprt->cong += RPC_CWNDSCALE;
return 1;
}
/*
* Adjust the congestion window, and wake up the next task
* that has been sleeping due to congestion
*/
static void
__xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
if (!req->rq_cong)
return;
req->rq_cong = 0;
xprt->cong -= RPC_CWNDSCALE;
__xprt_lock_write_next(xprt);
}
/* /*
* Adjust RPC congestion window * Adjust RPC congestion window
* We use a time-smoothed congestion estimator to avoid heavy oscillation. * We use a time-smoothed congestion estimator to avoid heavy oscillation.
...@@ -1146,8 +1181,6 @@ xprt_reserve(struct rpc_task *task) ...@@ -1146,8 +1181,6 @@ xprt_reserve(struct rpc_task *task)
if (task->tk_rqstp) if (task->tk_rqstp)
return 0; return 0;
dprintk("RPC: %4d xprt_reserve cong = %ld cwnd = %ld\n",
task->tk_pid, xprt->cong, xprt->cwnd);
spin_lock_bh(&xprt->xprt_lock); spin_lock_bh(&xprt->xprt_lock);
xprt_reserve_status(task); xprt_reserve_status(task);
if (task->tk_rqstp) { if (task->tk_rqstp) {
...@@ -1181,13 +1214,14 @@ xprt_reserve_status(struct rpc_task *task) ...@@ -1181,13 +1214,14 @@ xprt_reserve_status(struct rpc_task *task)
} else if (task->tk_rqstp) { } else if (task->tk_rqstp) {
/* We've already been given a request slot: NOP */ /* We've already been given a request slot: NOP */
} else { } else {
if (RPCXPRT_CONGESTED(xprt) || !(req = xprt->free)) if (!(req = xprt->free))
goto out_nofree;
if (!(xprt->nocong || __xprt_get_cong(xprt, req)))
goto out_nofree; goto out_nofree;
/* OK: There's room for us. Grab a free slot and bump /* OK: There's room for us. Grab a free slot and bump
* congestion value */ * congestion value */
xprt->free = req->rq_next; xprt->free = req->rq_next;
req->rq_next = NULL; req->rq_next = NULL;
xprt->cong += RPC_CWNDSCALE;
task->tk_rqstp = req; task->tk_rqstp = req;
xprt_request_init(task, xprt); xprt_request_init(task, xprt);
...@@ -1252,9 +1286,7 @@ xprt_release(struct rpc_task *task) ...@@ -1252,9 +1286,7 @@ xprt_release(struct rpc_task *task)
req->rq_next = xprt->free; req->rq_next = xprt->free;
xprt->free = req; xprt->free = req;
/* Decrease congestion value. */ __xprt_put_cong(xprt, req);
xprt->cong -= RPC_CWNDSCALE;
xprt_clear_backlog(xprt); xprt_clear_backlog(xprt);
spin_unlock_bh(&xprt->xprt_lock); spin_unlock_bh(&xprt->xprt_lock);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment