Commit be83269e authored by Chuck Lever's avatar Chuck Lever Committed by Linus Torvalds

[PATCH] (1/2) clean up RPC over TCP transport socket connect

This provides clean up and bug fixes for the RPC layer's TCP socket
connection management logic.  Trond, Alexey, and DaveM have seen this
patch.  i've been running it for several weeks here and feel it is ready
for wider testing.  these two patches are prerequisites for further
clean-ups and fixes for RPC over TCP.

bugs fixed:
+  TCP connection establishment now times out after 60 seconds instead of
   hanging for ten minutes.  60 seconds is more in line with how long a
   server takes to reboot.
+  on a soft-mounted file system, TCP reconnections now time out and fail
   the RPC request, like most other NFS clients, instead of hanging the
   NFS client until the server comes back.
+  on hard-mounted file systems, the RPC layer now delays 15 seconds
   before retrying after a failed connection attempt instead of retrying
   as soon as it can.
+  TCP connection error recovery is now more verbose so users can see why
   their NFS sessions are hung.  this can be tuned with future patches if
   it is unreasonably noisy.
+  the TCP connect logic is cleaned up so adding checks for new errnos
   is easier.
+  the same code now handles both initial connection and reconnection.
   the original initial connection code did not have comprehensive error
   handling.
+  some obscure design elements are now documented in comments.
+  kfree was used by mistake in xprt_destroy.
parent c13e8f03
...@@ -44,6 +44,19 @@ ...@@ -44,6 +44,19 @@
#define RPC_MAX_UDP_TIMEOUT (60*HZ) #define RPC_MAX_UDP_TIMEOUT (60*HZ)
#define RPC_MAX_TCP_TIMEOUT (600*HZ) #define RPC_MAX_TCP_TIMEOUT (600*HZ)
/*
* Wait duration for an RPC TCP connection to be established. Solaris
* NFS over TCP uses 60 seconds, for example, which is in line with how
* long a server takes to reboot.
*/
#define RPC_CONNECT_TIMEOUT (60*HZ)
/*
* Delay an arbitrary number of seconds before attempting to reconnect
* after an error.
*/
#define RPC_REESTABLISH_TIMEOUT (15*HZ)
/* RPC call and reply header size as number of 32bit words (verifier /* RPC call and reply header size as number of 32bit words (verifier
* size computed separately) * size computed separately)
*/ */
......
...@@ -565,7 +565,7 @@ call_bind(struct rpc_task *task) ...@@ -565,7 +565,7 @@ call_bind(struct rpc_task *task)
if (!clnt->cl_port) { if (!clnt->cl_port) {
task->tk_action = call_reconnect; task->tk_action = call_reconnect;
task->tk_timeout = clnt->cl_timeout.to_maxval; task->tk_timeout = RPC_CONNECT_TIMEOUT;
rpc_getport(task, clnt); rpc_getport(task, clnt);
} }
} }
...@@ -638,7 +638,6 @@ static void ...@@ -638,7 +638,6 @@ static void
call_status(struct rpc_task *task) call_status(struct rpc_task *task)
{ {
struct rpc_clnt *clnt = task->tk_client; struct rpc_clnt *clnt = task->tk_client;
struct rpc_xprt *xprt = clnt->cl_xprt;
struct rpc_rqst *req = task->tk_rqstp; struct rpc_rqst *req = task->tk_rqstp;
int status; int status;
...@@ -661,30 +660,23 @@ call_status(struct rpc_task *task) ...@@ -661,30 +660,23 @@ call_status(struct rpc_task *task)
break; break;
case -ECONNREFUSED: case -ECONNREFUSED:
case -ENOTCONN: case -ENOTCONN:
req->rq_bytes_sent = 0; if (clnt->cl_autobind)
if (clnt->cl_autobind || !clnt->cl_port) {
clnt->cl_port = 0; clnt->cl_port = 0;
task->tk_action = call_bind; task->tk_action = call_bind;
break; break;
}
if (xprt->stream) {
task->tk_action = call_reconnect;
break;
}
/*
* Sleep and dream of an open connection
*/
task->tk_timeout = 5 * HZ;
rpc_sleep_on(&xprt->sending, task, NULL, NULL);
case -ENOMEM:
case -EAGAIN: case -EAGAIN:
task->tk_action = call_transmit; task->tk_action = call_transmit;
break; break;
case -EIO:
/* shutdown or soft timeout */
rpc_exit(task, status);
break;
default: default:
if (clnt->cl_chatty) if (clnt->cl_chatty)
printk("%s: RPC call returned error %d\n", printk("%s: RPC call returned error %d\n",
clnt->cl_protname, -status); clnt->cl_protname, -status);
rpc_exit(task, status); rpc_exit(task, status);
break;
} }
} }
......
...@@ -87,8 +87,10 @@ static void do_xprt_transmit(struct rpc_task *); ...@@ -87,8 +87,10 @@ static void do_xprt_transmit(struct rpc_task *);
static inline void do_xprt_reserve(struct rpc_task *); static inline void do_xprt_reserve(struct rpc_task *);
static void xprt_disconnect(struct rpc_xprt *); static void xprt_disconnect(struct rpc_xprt *);
static void xprt_reconn_status(struct rpc_task *task); static void xprt_reconn_status(struct rpc_task *task);
static struct rpc_xprt * xprt_setup(int proto, struct sockaddr_in *ap,
struct rpc_timeout *to);
static struct socket *xprt_create_socket(int, struct rpc_timeout *); static struct socket *xprt_create_socket(int, struct rpc_timeout *);
static int xprt_bind_socket(struct rpc_xprt *, struct socket *); static void xprt_bind_socket(struct rpc_xprt *, struct socket *);
static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
#ifdef RPC_DEBUG_DATA #ifdef RPC_DEBUG_DATA
...@@ -419,78 +421,103 @@ xprt_reconnect(struct rpc_task *task) ...@@ -419,78 +421,103 @@ xprt_reconnect(struct rpc_task *task)
struct sock *inet; struct sock *inet;
int status; int status;
dprintk("RPC: %4d xprt_reconnect %p connected %d\n", dprintk("RPC: %4d xprt_reconnect xprt %p %s connected\n", task->tk_pid,
task->tk_pid, xprt, xprt_connected(xprt)); xprt, (xprt_connected(xprt) ? "is" : "is not"));
if (xprt->shutdown)
return;
if (!xprt->stream) if (xprt->shutdown) {
task->tk_status = -EIO;
return; return;
}
if (!xprt->addr.sin_port) { if (!xprt->addr.sin_port) {
task->tk_status = -EIO; task->tk_status = -EIO;
return; return;
} }
if (!xprt_lock_write(xprt, task)) if (!xprt_lock_write(xprt, task))
return; return;
if (xprt_connected(xprt)) if (xprt_connected(xprt))
goto out_write; goto out_write;
if (sock && sock->state != SS_UNCONNECTED) /*
* We're here because the xprt was marked disconnected.
* Start by resetting any existing state.
*/
xprt_close(xprt); xprt_close(xprt);
status = -ENOTCONN; if (!(sock = xprt_create_socket(xprt->prot, &xprt->timeout))) {
if (!(inet = xprt->inet)) { /* couldn't create socket or bind to reserved port;
/* Create an unconnected socket */ * this is likely a permanent error, so cause an abort */
if (!(sock = xprt_create_socket(xprt->prot, &xprt->timeout))) task->tk_status = -EIO;
goto defer; goto out_write;
}
xprt_bind_socket(xprt, sock); xprt_bind_socket(xprt, sock);
inet = sock->sk; inet = sock->sk;
}
/* Now connect it asynchronously. */ /*
dprintk("RPC: %4d connecting new socket\n", task->tk_pid); * Tell the socket layer to start connecting...
*/
status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
sizeof(xprt->addr), O_NONBLOCK); sizeof(xprt->addr), O_NONBLOCK);
dprintk("RPC: %4d connect status %d connected %d sock state %d\n",
task->tk_pid, -status, xprt_connected(xprt), inet->state);
if (status < 0) {
switch (status) { switch (status) {
case -EALREADY:
case -EINPROGRESS: case -EINPROGRESS:
status = 0; case -EALREADY:
break;
case -EISCONN:
case -EPIPE:
status = 0;
xprt_close(xprt);
goto defer;
default:
printk("RPC: TCP connect error %d!\n", -status);
xprt_close(xprt);
goto defer;
}
/* Protect against TCP socket state changes */ /* Protect against TCP socket state changes */
lock_sock(inet); lock_sock(inet);
dprintk("RPC: %4d connect status %d connected %d\n",
task->tk_pid, status, xprt_connected(xprt));
if (inet->state != TCP_ESTABLISHED) { if (inet->state != TCP_ESTABLISHED) {
task->tk_timeout = xprt->timeout.to_maxval; dprintk("RPC: %4d waiting for connection\n",
/* if the socket is already closing, delay 5 secs */ task->tk_pid);
if ((1<<inet->state) & ~(TCPF_SYN_SENT|TCPF_SYN_RECV)) task->tk_timeout = RPC_CONNECT_TIMEOUT;
task->tk_timeout = 5*HZ; /* if the socket is already closing, delay briefly */
rpc_sleep_on(&xprt->pending, task, xprt_reconn_status, NULL); if ((1 << inet->state) & ~(TCPF_SYN_SENT|TCPF_SYN_RECV))
task->tk_timeout = RPC_REESTABLISH_TIMEOUT;
rpc_sleep_on(&xprt->pending, task, xprt_reconn_status,
NULL);
release_sock(inet); release_sock(inet);
/* task status set when task wakes up again */
return; return;
} }
release_sock(inet); release_sock(inet);
task->tk_status = 0;
break;
case 0:
case -EISCONN: /* not likely, but just in case */
/* Half closed state. No race -- this socket is dead. */
if (inet->state != TCP_ESTABLISHED) {
xprt_close(xprt);
task->tk_status = -EAGAIN;
goto out_write;
} }
defer:
if (status < 0) { /* Otherwise, the connection is already established. */
rpc_delay(task, 5*HZ); task->tk_status = 0;
break;
case -EPIPE:
xprt_close(xprt);
task->tk_status = -ENOTCONN; task->tk_status = -ENOTCONN;
goto out_write;
default:
/* Report myriad other possible returns. If this file
* system is soft mounted, just error out, like Solaris. */
xprt_close(xprt);
if (task->tk_client->cl_softrtry) {
printk(KERN_WARNING
"RPC: error %d connecting to server %s, exiting\n",
-status, task->tk_client->cl_server);
task->tk_status = -EIO;
} else {
printk(KERN_WARNING
"RPC: error %d connecting to server %s\n",
-status, task->tk_client->cl_server);
rpc_delay(task, RPC_REESTABLISH_TIMEOUT);
task->tk_status = status;
} }
break;
}
out_write: out_write:
xprt_release_write(xprt, task); xprt_release_write(xprt, task);
} }
...@@ -504,9 +531,29 @@ xprt_reconn_status(struct rpc_task *task) ...@@ -504,9 +531,29 @@ xprt_reconn_status(struct rpc_task *task)
{ {
struct rpc_xprt *xprt = task->tk_xprt; struct rpc_xprt *xprt = task->tk_xprt;
dprintk("RPC: %4d xprt_reconn_timeout %d\n", switch (task->tk_status) {
task->tk_pid, task->tk_status); case 0:
dprintk("RPC: %4d xprt_reconn_status: connection established\n",
task->tk_pid);
goto out;
case -ETIMEDOUT:
dprintk("RPC: %4d xprt_reconn_status: timed out\n",
task->tk_pid);
/* prevent TCP from continuing to retry SYNs */
xprt_close(xprt);
break;
default:
printk(KERN_ERR "RPC: error %d connecting to server %s\n",
-task->tk_status, task->tk_client->cl_server);
xprt_close(xprt);
rpc_delay(task, RPC_REESTABLISH_TIMEOUT);
break;
}
/* if soft mounted, cause this RPC to fail */
if (task->tk_client->cl_softrtry)
task->tk_status = -EIO;
out:
xprt_release_write(xprt, task); xprt_release_write(xprt, task);
} }
...@@ -1154,8 +1201,12 @@ do_xprt_transmit(struct rpc_task *task) ...@@ -1154,8 +1201,12 @@ do_xprt_transmit(struct rpc_task *task)
return; return;
case -ECONNREFUSED: case -ECONNREFUSED:
case -ENOTCONN: case -ENOTCONN:
if (!xprt->stream) if (!xprt->stream) {
task->tk_timeout = RPC_REESTABLISH_TIMEOUT;
rpc_sleep_on(&xprt->sending, task, NULL, NULL);
return; return;
}
/* fall through */
default: default:
if (xprt->stream) if (xprt->stream)
xprt_disconnect(xprt); xprt_disconnect(xprt);
...@@ -1305,8 +1356,7 @@ xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr) ...@@ -1305,8 +1356,7 @@ xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr)
* Initialize an RPC client * Initialize an RPC client
*/ */
static struct rpc_xprt * static struct rpc_xprt *
xprt_setup(struct socket *sock, int proto, xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to)
struct sockaddr_in *ap, struct rpc_timeout *to)
{ {
struct rpc_xprt *xprt; struct rpc_xprt *xprt;
struct rpc_rqst *req; struct rpc_rqst *req;
...@@ -1353,7 +1403,6 @@ xprt_setup(struct socket *sock, int proto, ...@@ -1353,7 +1403,6 @@ xprt_setup(struct socket *sock, int proto,
dprintk("RPC: created transport %p\n", xprt); dprintk("RPC: created transport %p\n", xprt);
xprt_bind_socket(xprt, sock);
return xprt; return xprt;
} }
...@@ -1381,13 +1430,13 @@ xprt_bindresvport(struct socket *sock) ...@@ -1381,13 +1430,13 @@ xprt_bindresvport(struct socket *sock)
return err; return err;
} }
static int static void
xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock) xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock)
{ {
struct sock *sk = sock->sk; struct sock *sk = sock->sk;
if (xprt->inet) if (xprt->inet)
return -EBUSY; return;
sk->user_data = xprt; sk->user_data = xprt;
xprt->old_data_ready = sk->data_ready; xprt->old_data_ready = sk->data_ready;
...@@ -1413,7 +1462,7 @@ xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock) ...@@ -1413,7 +1462,7 @@ xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock)
if(xprt->stream) if(xprt->stream)
rpciod_up(); rpciod_up();
return 0; return;
} }
/* /*
...@@ -1457,8 +1506,10 @@ xprt_create_socket(int proto, struct rpc_timeout *to) ...@@ -1457,8 +1506,10 @@ xprt_create_socket(int proto, struct rpc_timeout *to)
} }
/* If the caller has the capability, bind to a reserved port */ /* If the caller has the capability, bind to a reserved port */
if (capable(CAP_NET_BIND_SERVICE) && xprt_bindresvport(sock) < 0) if (capable(CAP_NET_BIND_SERVICE) && xprt_bindresvport(sock) < 0) {
printk("RPC: can't bind to reserved port.\n");
goto failed; goto failed;
}
return sock; return sock;
...@@ -1473,17 +1524,32 @@ xprt_create_socket(int proto, struct rpc_timeout *to) ...@@ -1473,17 +1524,32 @@ xprt_create_socket(int proto, struct rpc_timeout *to)
struct rpc_xprt * struct rpc_xprt *
xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to)
{ {
struct socket *sock;
struct rpc_xprt *xprt; struct rpc_xprt *xprt;
dprintk("RPC: xprt_create_proto called\n"); xprt = xprt_setup(proto, sap, to);
if (!xprt)
if (!(sock = xprt_create_socket(proto, to))) goto out;
return NULL;
if (!(xprt = xprt_setup(sock, proto, sap, to))) if (!xprt->stream) {
sock_release(sock); struct socket *sock = xprt_create_socket(proto, to);
if (sock)
xprt_bind_socket(xprt, sock);
else {
rpc_free(xprt);
xprt = NULL;
}
} else
/*
* Don't allow a TCP service user unless they have
* enough capability to bind a reserved port.
*/
if (!capable(CAP_NET_BIND_SERVICE)) {
rpc_free(xprt);
xprt = NULL;
}
out:
dprintk("RPC: xprt_create_proto created xprt %p\n", xprt);
return xprt; return xprt;
} }
...@@ -1522,7 +1588,7 @@ xprt_destroy(struct rpc_xprt *xprt) ...@@ -1522,7 +1588,7 @@ xprt_destroy(struct rpc_xprt *xprt)
dprintk("RPC: destroying transport %p\n", xprt); dprintk("RPC: destroying transport %p\n", xprt);
xprt_shutdown(xprt); xprt_shutdown(xprt);
xprt_close(xprt); xprt_close(xprt);
kfree(xprt); rpc_free(xprt);
return 0; return 0;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment