Commit b365d955 authored by David S. Miller's avatar David S. Miller

Merge branch 'rds-fixes'

Sowmini Varadhan says:

====================
RDS: TCP: sychronization during connection startup

This patch series ensures that the passive (accept) side of the
TCP connection used for RDS-TCP is correctly synchronized with
any concurrent active (connect) attempts for a given pair of peers.

Patch 1 in the series makes sure that the t_sock in struct
rds_tcp_connection is only reset after any threads in rds_tcp_xmit
have completed (otherwise a null-ptr deref may be encountered).
Patch 2 synchronizes rds_tcp_accept_one() with the rds_tcp*connect()
path.

v2: review comments from Santosh Shilimkar, other spelling corrections
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 42c8819b bd7c5f98
...@@ -127,7 +127,7 @@ void rds_tcp_restore_callbacks(struct socket *sock, ...@@ -127,7 +127,7 @@ void rds_tcp_restore_callbacks(struct socket *sock,
/* /*
* This is the only path that sets tc->t_sock. Send and receive trust that * This is the only path that sets tc->t_sock. Send and receive trust that
* it is set. The RDS_CONN_CONNECTED bit protects those paths from being * it is set. The RDS_CONN_UP bit protects those paths from being
* called while it isn't set. * called while it isn't set.
*/ */
void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
...@@ -216,6 +216,7 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) ...@@ -216,6 +216,7 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
if (!tc) if (!tc)
return -ENOMEM; return -ENOMEM;
mutex_init(&tc->t_conn_lock);
tc->t_sock = NULL; tc->t_sock = NULL;
tc->t_tinc = NULL; tc->t_tinc = NULL;
tc->t_tinc_hdr_rem = sizeof(struct rds_header); tc->t_tinc_hdr_rem = sizeof(struct rds_header);
......
...@@ -12,6 +12,10 @@ struct rds_tcp_connection { ...@@ -12,6 +12,10 @@ struct rds_tcp_connection {
struct list_head t_tcp_node; struct list_head t_tcp_node;
struct rds_connection *conn; struct rds_connection *conn;
/* t_conn_lock synchronizes the connection establishment between
* rds_tcp_accept_one and rds_tcp_conn_connect
*/
struct mutex t_conn_lock;
struct socket *t_sock; struct socket *t_sock;
void *t_orig_write_space; void *t_orig_write_space;
void *t_orig_data_ready; void *t_orig_data_ready;
......
...@@ -78,7 +78,14 @@ int rds_tcp_conn_connect(struct rds_connection *conn) ...@@ -78,7 +78,14 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
struct socket *sock = NULL; struct socket *sock = NULL;
struct sockaddr_in src, dest; struct sockaddr_in src, dest;
int ret; int ret;
struct rds_tcp_connection *tc = conn->c_transport_data;
mutex_lock(&tc->t_conn_lock);
if (rds_conn_up(conn)) {
mutex_unlock(&tc->t_conn_lock);
return 0;
}
ret = sock_create_kern(rds_conn_net(conn), PF_INET, ret = sock_create_kern(rds_conn_net(conn), PF_INET,
SOCK_STREAM, IPPROTO_TCP, &sock); SOCK_STREAM, IPPROTO_TCP, &sock);
if (ret < 0) if (ret < 0)
...@@ -120,6 +127,7 @@ int rds_tcp_conn_connect(struct rds_connection *conn) ...@@ -120,6 +127,7 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
} }
out: out:
mutex_unlock(&tc->t_conn_lock);
if (sock) if (sock)
sock_release(sock); sock_release(sock);
return ret; return ret;
......
...@@ -76,7 +76,9 @@ int rds_tcp_accept_one(struct socket *sock) ...@@ -76,7 +76,9 @@ int rds_tcp_accept_one(struct socket *sock)
struct rds_connection *conn; struct rds_connection *conn;
int ret; int ret;
struct inet_sock *inet; struct inet_sock *inet;
struct rds_tcp_connection *rs_tcp; struct rds_tcp_connection *rs_tcp = NULL;
int conn_state;
struct sock *nsk;
ret = sock_create_kern(sock_net(sock->sk), sock->sk->sk_family, ret = sock_create_kern(sock_net(sock->sk), sock->sk->sk_family,
sock->sk->sk_type, sock->sk->sk_protocol, sock->sk->sk_type, sock->sk->sk_protocol,
...@@ -115,28 +117,44 @@ int rds_tcp_accept_one(struct socket *sock) ...@@ -115,28 +117,44 @@ int rds_tcp_accept_one(struct socket *sock)
* rds_tcp_state_change() will do that cleanup * rds_tcp_state_change() will do that cleanup
*/ */
rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data; rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data;
if (rs_tcp->t_sock &&
ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) {
struct sock *nsk = new_sock->sk;
nsk->sk_user_data = NULL;
nsk->sk_prot->disconnect(nsk, 0);
tcp_done(nsk);
new_sock = NULL;
ret = 0;
goto out;
} else if (rs_tcp->t_sock) {
rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp);
conn->c_outgoing = 0;
}
rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING); rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
mutex_lock(&rs_tcp->t_conn_lock);
conn_state = rds_conn_state(conn);
if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP)
goto rst_nsk;
if (rs_tcp->t_sock) {
/* Need to resolve a duelling SYN between peers.
* We have an outstanding SYN to this peer, which may
* potentially have transitioned to the RDS_CONN_UP state,
* so we must quiesce any send threads before resetting
* c_transport_data.
*/
wait_event(conn->c_waitq,
!test_bit(RDS_IN_XMIT, &conn->c_flags));
if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) {
goto rst_nsk;
} else if (rs_tcp->t_sock) {
rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp);
conn->c_outgoing = 0;
}
}
rds_tcp_set_callbacks(new_sock, conn); rds_tcp_set_callbacks(new_sock, conn);
rds_connect_complete(conn); rds_connect_complete(conn); /* marks RDS_CONN_UP */
new_sock = NULL;
ret = 0;
goto out;
rst_nsk:
/* reset the newly returned accept sock and bail */
nsk = new_sock->sk;
rds_tcp_stats_inc(s_tcp_listen_closed_stale);
nsk->sk_user_data = NULL;
nsk->sk_prot->disconnect(nsk, 0);
tcp_done(nsk);
new_sock = NULL; new_sock = NULL;
ret = 0; ret = 0;
out: out:
if (rs_tcp)
mutex_unlock(&rs_tcp->t_conn_lock);
if (new_sock) if (new_sock)
sock_release(new_sock); sock_release(new_sock);
return ret; return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment