Commit dae58728 authored by Faisal Latif's avatar Faisal Latif Committed by Roland Dreier

RDMA/nes: Fix double CLOSE event indication crash

During a stress testing in a large cluster, multiple close event are
detected and BUG() is hit in the iWARP core.  The cause is that the
active node gave up while waiting for an MPA response from the peer
and tried to close the connection by sending RST.  The passive node
driver receives the RST but is waiting for MPA response from the user.
When the MPA accept is received, the driver offloads the connection
and sends a CLOSE event.  The driver gets an AE indicating RESET
received and also sends a CLOSE event, hitting a BUG().

Fix this by correcting RESET handling and sending CLOSE events.
Signed-off-by: default avatarFaisal Latif <faisal.latif@intel.com>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 70c9db0f
......@@ -502,7 +502,9 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
static void nes_retrans_expired(struct nes_cm_node *cm_node)
{
struct iw_cm_id *cm_id = cm_node->cm_id;
switch (cm_node->state) {
enum nes_cm_node_state state = cm_node->state;
cm_node->state = NES_CM_STATE_CLOSED;
switch (state) {
case NES_CM_STATE_SYN_RCVD:
case NES_CM_STATE_CLOSING:
rem_ref_cm_node(cm_node->cm_core, cm_node);
......@@ -511,7 +513,6 @@ static void nes_retrans_expired(struct nes_cm_node *cm_node)
case NES_CM_STATE_FIN_WAIT1:
if (cm_node->cm_id)
cm_id->rem_ref(cm_id);
cm_node->state = NES_CM_STATE_CLOSED;
send_reset(cm_node, NULL);
break;
default:
......@@ -1439,9 +1440,6 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
break;
case NES_CM_STATE_MPAREQ_RCVD:
passive_state = atomic_add_return(1, &cm_node->passive_state);
if (passive_state == NES_SEND_RESET_EVENT)
create_event(cm_node, NES_CM_EVENT_RESET);
cm_node->state = NES_CM_STATE_CLOSED;
dev_kfree_skb_any(skb);
break;
case NES_CM_STATE_ESTABLISHED:
......@@ -1456,6 +1454,7 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
case NES_CM_STATE_CLOSED:
drop_packet(skb);
break;
case NES_CM_STATE_FIN_WAIT2:
case NES_CM_STATE_FIN_WAIT1:
case NES_CM_STATE_LAST_ACK:
cm_node->cm_id->rem_ref(cm_node->cm_id);
......@@ -2777,6 +2776,12 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
return -EINVAL;
}
passive_state = atomic_add_return(1, &cm_node->passive_state);
if (passive_state == NES_SEND_RESET_EVENT) {
rem_ref_cm_node(cm_node->cm_core, cm_node);
return -ECONNRESET;
}
/* associate the node with the QP */
nesqp->cm_node = (void *)cm_node;
cm_node->nesqp = nesqp;
......@@ -2979,9 +2984,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
"ret=%d\n", __func__, __LINE__, ret);
passive_state = atomic_add_return(1, &cm_node->passive_state);
if (passive_state == NES_SEND_RESET_EVENT)
create_event(cm_node, NES_CM_EVENT_RESET);
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment