Commit 34a95132 authored by David S. Miller's avatar David S. Miller

Merge branch 'rds-tcp-misc-bug-fixes'

Sowmini Varadhan says:

====================
rds: tcp: misc bug fixes

This series contains 2 bug fixes (patch2, patch3) and one bit of
code cleanup (patch1) identified during database testing
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 5f886eef 10beea7d
...@@ -124,11 +124,6 @@ static void __rds_conn_path_init(struct rds_connection *conn, ...@@ -124,11 +124,6 @@ static void __rds_conn_path_init(struct rds_connection *conn,
cp->cp_conn = conn; cp->cp_conn = conn;
atomic_set(&cp->cp_state, RDS_CONN_DOWN); atomic_set(&cp->cp_state, RDS_CONN_DOWN);
cp->cp_send_gen = 0; cp->cp_send_gen = 0;
/* cp_outgoing is per-path. So we can only set it here
* for the single-path transports.
*/
if (!conn->c_trans->t_mp_capable)
cp->cp_outgoing = (is_outgoing ? 1 : 0);
cp->cp_reconnect_jiffies = 0; cp->cp_reconnect_jiffies = 0;
INIT_DELAYED_WORK(&cp->cp_send_w, rds_send_worker); INIT_DELAYED_WORK(&cp->cp_send_w, rds_send_worker);
INIT_DELAYED_WORK(&cp->cp_recv_w, rds_recv_worker); INIT_DELAYED_WORK(&cp->cp_recv_w, rds_recv_worker);
......
...@@ -92,6 +92,8 @@ enum { ...@@ -92,6 +92,8 @@ enum {
#define RDS_MPATH_HASH(rs, n) (jhash_1word((rs)->rs_bound_port, \ #define RDS_MPATH_HASH(rs, n) (jhash_1word((rs)->rs_bound_port, \
(rs)->rs_hash_initval) & ((n) - 1)) (rs)->rs_hash_initval) & ((n) - 1))
#define IS_CANONICAL(laddr, faddr) (htonl(laddr) < htonl(faddr))
/* Per mpath connection state */ /* Per mpath connection state */
struct rds_conn_path { struct rds_conn_path {
struct rds_connection *cp_conn; struct rds_connection *cp_conn;
...@@ -125,8 +127,6 @@ struct rds_conn_path { ...@@ -125,8 +127,6 @@ struct rds_conn_path {
unsigned int cp_unacked_packets; unsigned int cp_unacked_packets;
unsigned int cp_unacked_bytes; unsigned int cp_unacked_bytes;
unsigned int cp_outgoing:1,
cp_pad_to_32:31;
unsigned int cp_index; unsigned int cp_index;
}; };
......
...@@ -215,10 +215,10 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr, ...@@ -215,10 +215,10 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
switch (type) { switch (type) {
case RDS_EXTHDR_NPATHS: case RDS_EXTHDR_NPATHS:
conn->c_npaths = min_t(int, RDS_MPATH_WORKERS, conn->c_npaths = min_t(int, RDS_MPATH_WORKERS,
buffer.rds_npaths); be16_to_cpu(buffer.rds_npaths));
break; break;
case RDS_EXTHDR_GEN_NUM: case RDS_EXTHDR_GEN_NUM:
new_peer_gen_num = buffer.rds_gen_num; new_peer_gen_num = be32_to_cpu(buffer.rds_gen_num);
break; break;
default: default:
pr_warn_ratelimited("ignoring unknown exthdr type " pr_warn_ratelimited("ignoring unknown exthdr type "
...@@ -254,7 +254,8 @@ static void rds_start_mprds(struct rds_connection *conn) ...@@ -254,7 +254,8 @@ static void rds_start_mprds(struct rds_connection *conn)
int i; int i;
struct rds_conn_path *cp; struct rds_conn_path *cp;
if (conn->c_npaths > 1 && conn->c_laddr < conn->c_faddr) { if (conn->c_npaths > 1 &&
IS_CANONICAL(conn->c_laddr, conn->c_faddr)) {
for (i = 1; i < conn->c_npaths; i++) { for (i = 1; i < conn->c_npaths; i++) {
cp = &conn->c_path[i]; cp = &conn->c_path[i];
rds_conn_path_connect_if_down(cp); rds_conn_path_connect_if_down(cp);
...@@ -339,14 +340,15 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, ...@@ -339,14 +340,15 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
rds_stats_inc(s_recv_ping); rds_stats_inc(s_recv_ping);
rds_send_pong(cp, inc->i_hdr.h_sport); rds_send_pong(cp, inc->i_hdr.h_sport);
/* if this is a handshake ping, start multipath if necessary */ /* if this is a handshake ping, start multipath if necessary */
if (RDS_HS_PROBE(inc->i_hdr.h_sport, inc->i_hdr.h_dport)) { if (RDS_HS_PROBE(be16_to_cpu(inc->i_hdr.h_sport),
be16_to_cpu(inc->i_hdr.h_dport))) {
rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn); rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn);
rds_start_mprds(cp->cp_conn); rds_start_mprds(cp->cp_conn);
} }
goto out; goto out;
} }
if (inc->i_hdr.h_dport == RDS_FLAG_PROBE_PORT && if (be16_to_cpu(inc->i_hdr.h_dport) == RDS_FLAG_PROBE_PORT &&
inc->i_hdr.h_sport == 0) { inc->i_hdr.h_sport == 0) {
rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn); rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn);
/* if this is a handshake pong, start multipath if necessary */ /* if this is a handshake pong, start multipath if necessary */
......
...@@ -1246,15 +1246,17 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport, ...@@ -1246,15 +1246,17 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport,
rm->m_inc.i_hdr.h_flags |= h_flags; rm->m_inc.i_hdr.h_flags |= h_flags;
cp->cp_next_tx_seq++; cp->cp_next_tx_seq++;
if (RDS_HS_PROBE(sport, dport) && cp->cp_conn->c_trans->t_mp_capable) { if (RDS_HS_PROBE(be16_to_cpu(sport), be16_to_cpu(dport)) &&
u16 npaths = RDS_MPATH_WORKERS; cp->cp_conn->c_trans->t_mp_capable) {
u16 npaths = cpu_to_be16(RDS_MPATH_WORKERS);
u32 my_gen_num = cpu_to_be32(cp->cp_conn->c_my_gen_num);
rds_message_add_extension(&rm->m_inc.i_hdr, rds_message_add_extension(&rm->m_inc.i_hdr,
RDS_EXTHDR_NPATHS, &npaths, RDS_EXTHDR_NPATHS, &npaths,
sizeof(npaths)); sizeof(npaths));
rds_message_add_extension(&rm->m_inc.i_hdr, rds_message_add_extension(&rm->m_inc.i_hdr,
RDS_EXTHDR_GEN_NUM, RDS_EXTHDR_GEN_NUM,
&cp->cp_conn->c_my_gen_num, &my_gen_num,
sizeof(u32)); sizeof(u32));
} }
spin_unlock_irqrestore(&cp->cp_lock, flags); spin_unlock_irqrestore(&cp->cp_lock, flags);
...@@ -1293,5 +1295,6 @@ rds_send_ping(struct rds_connection *conn) ...@@ -1293,5 +1295,6 @@ rds_send_ping(struct rds_connection *conn)
} }
conn->c_ping_triggered = 1; conn->c_ping_triggered = 1;
spin_unlock_irqrestore(&cp->cp_lock, flags); spin_unlock_irqrestore(&cp->cp_lock, flags);
rds_send_probe(&conn->c_path[0], RDS_FLAG_PROBE_PORT, 0, 0); rds_send_probe(&conn->c_path[0], cpu_to_be16(RDS_FLAG_PROBE_PORT),
0, 0);
} }
...@@ -66,7 +66,7 @@ void rds_tcp_state_change(struct sock *sk) ...@@ -66,7 +66,7 @@ void rds_tcp_state_change(struct sock *sk)
* RDS connection as RDS_CONN_UP until the reconnect, * RDS connection as RDS_CONN_UP until the reconnect,
* to avoid RDS datagram loss. * to avoid RDS datagram loss.
*/ */
if (cp->cp_conn->c_laddr > cp->cp_conn->c_faddr && if (!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr) &&
rds_conn_path_transition(cp, RDS_CONN_CONNECTING, rds_conn_path_transition(cp, RDS_CONN_CONNECTING,
RDS_CONN_ERROR)) { RDS_CONN_ERROR)) {
rds_conn_path_drop(cp); rds_conn_path_drop(cp);
...@@ -135,7 +135,6 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) ...@@ -135,7 +135,6 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest), ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest),
O_NONBLOCK); O_NONBLOCK);
cp->cp_outgoing = 1;
rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret); rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret);
if (ret == -EINPROGRESS) if (ret == -EINPROGRESS)
ret = 0; ret = 0;
......
...@@ -83,7 +83,7 @@ static ...@@ -83,7 +83,7 @@ static
struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn) struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn)
{ {
int i; int i;
bool peer_is_smaller = (conn->c_faddr < conn->c_laddr); bool peer_is_smaller = IS_CANONICAL(conn->c_faddr, conn->c_laddr);
int npaths = max_t(int, 1, conn->c_npaths); int npaths = max_t(int, 1, conn->c_npaths);
/* for mprds, all paths MUST be initiated by the peer /* for mprds, all paths MUST be initiated by the peer
...@@ -112,6 +112,17 @@ struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn) ...@@ -112,6 +112,17 @@ struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn)
return NULL; return NULL;
} }
static void rds_tcp_set_linger(struct socket *sock)
{
struct linger no_linger = {
.l_onoff = 1,
.l_linger = 0,
};
kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
(char *)&no_linger, sizeof(no_linger));
}
int rds_tcp_accept_one(struct socket *sock) int rds_tcp_accept_one(struct socket *sock)
{ {
struct socket *new_sock = NULL; struct socket *new_sock = NULL;
...@@ -171,21 +182,10 @@ int rds_tcp_accept_one(struct socket *sock) ...@@ -171,21 +182,10 @@ int rds_tcp_accept_one(struct socket *sock)
if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_ERROR) if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_ERROR)
goto rst_nsk; goto rst_nsk;
if (rs_tcp->t_sock) { if (rs_tcp->t_sock) {
/* Need to resolve a duelling SYN between peers. /* Duelling SYN has been handled in rds_tcp_accept_one() */
* We have an outstanding SYN to this peer, which may rds_tcp_reset_callbacks(new_sock, cp);
* potentially have transitioned to the RDS_CONN_UP state, /* rds_connect_path_complete() marks RDS_CONN_UP */
* so we must quiesce any send threads before resetting rds_connect_path_complete(cp, RDS_CONN_RESETTING);
* c_transport_data.
*/
if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr) ||
!cp->cp_outgoing) {
goto rst_nsk;
} else {
rds_tcp_reset_callbacks(new_sock, cp);
cp->cp_outgoing = 0;
/* rds_connect_path_complete() marks RDS_CONN_UP */
rds_connect_path_complete(cp, RDS_CONN_RESETTING);
}
} else { } else {
rds_tcp_set_callbacks(new_sock, cp); rds_tcp_set_callbacks(new_sock, cp);
rds_connect_path_complete(cp, RDS_CONN_CONNECTING); rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
...@@ -194,7 +194,13 @@ int rds_tcp_accept_one(struct socket *sock) ...@@ -194,7 +194,13 @@ int rds_tcp_accept_one(struct socket *sock)
ret = 0; ret = 0;
goto out; goto out;
rst_nsk: rst_nsk:
/* reset the newly returned accept sock and bail */ /* reset the newly returned accept sock and bail.
* It is safe to set linger on new_sock because the RDS connection
* has not been brought up on new_sock, so no RDS-level data could
* be pending on it. By setting linger, we achieve the side-effect
* of avoiding TIME_WAIT state on new_sock.
*/
rds_tcp_set_linger(new_sock);
kernel_sock_shutdown(new_sock, SHUT_RDWR); kernel_sock_shutdown(new_sock, SHUT_RDWR);
ret = 0; ret = 0;
out: out:
......
...@@ -127,7 +127,7 @@ void rds_queue_reconnect(struct rds_conn_path *cp) ...@@ -127,7 +127,7 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
/* let peer with smaller addr initiate reconnect, to avoid duels */ /* let peer with smaller addr initiate reconnect, to avoid duels */
if (conn->c_trans->t_type == RDS_TRANS_TCP && if (conn->c_trans->t_type == RDS_TRANS_TCP &&
conn->c_laddr > conn->c_faddr) !IS_CANONICAL(conn->c_laddr, conn->c_faddr))
return; return;
set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
...@@ -156,7 +156,8 @@ void rds_connect_worker(struct work_struct *work) ...@@ -156,7 +156,8 @@ void rds_connect_worker(struct work_struct *work)
struct rds_connection *conn = cp->cp_conn; struct rds_connection *conn = cp->cp_conn;
int ret; int ret;
if (cp->cp_index > 0 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr) if (cp->cp_index > 0 &&
!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr))
return; return;
clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING); ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment