Commit 9db0ff53 authored by Mark Bloch's avatar Mark Bloch Committed by Doug Ledford

IB/cm: Mark stale CM id's whenever the mad agent was unregistered

When there is a CM id object that has port assigned to it, it means that
the cm-id asked for the specific port that it should go by it, but if
that port was removed (hot-unplug event) the cm-id was not updated.
In order to fix that the port keeps a list of all the cm-id's that are
planning to go by it, whenever the port is removed it marks all of them
as invalid.

This commit fixes a kernel panic which happens when running traffic between
guests and we force reboot a guest mid traffic, it triggers a kernel panic:

 Call Trace:
  [<ffffffff815271fa>] ? panic+0xa7/0x16f
  [<ffffffff8152b534>] ? oops_end+0xe4/0x100
  [<ffffffff8104a00b>] ? no_context+0xfb/0x260
  [<ffffffff81084db2>] ? del_timer_sync+0x22/0x30
  [<ffffffff8104a295>] ? __bad_area_nosemaphore+0x125/0x1e0
  [<ffffffff81084240>] ? process_timeout+0x0/0x10
  [<ffffffff8104a363>] ? bad_area_nosemaphore+0x13/0x20
  [<ffffffff8104aabf>] ? __do_page_fault+0x31f/0x480
  [<ffffffff81065df0>] ? default_wake_function+0x0/0x20
  [<ffffffffa0752675>] ? free_msg+0x55/0x70 [mlx5_core]
  [<ffffffffa0753434>] ? cmd_exec+0x124/0x840 [mlx5_core]
  [<ffffffff8105a924>] ? find_busiest_group+0x244/0x9f0
  [<ffffffff8152d45e>] ? do_page_fault+0x3e/0xa0
  [<ffffffff8152a815>] ? page_fault+0x25/0x30
  [<ffffffffa024da25>] ? cm_alloc_msg+0x35/0xc0 [ib_cm]
  [<ffffffffa024e821>] ? ib_send_cm_dreq+0xb1/0x1e0 [ib_cm]
  [<ffffffffa024f836>] ? cm_destroy_id+0x176/0x320 [ib_cm]
  [<ffffffffa024fb00>] ? ib_destroy_cm_id+0x10/0x20 [ib_cm]
  [<ffffffffa034f527>] ? ipoib_cm_free_rx_reap_list+0xa7/0x110 [ib_ipoib]
  [<ffffffffa034f590>] ? ipoib_cm_rx_reap+0x0/0x20 [ib_ipoib]
  [<ffffffffa034f5a5>] ? ipoib_cm_rx_reap+0x15/0x20 [ib_ipoib]
  [<ffffffff81094d20>] ? worker_thread+0x170/0x2a0
  [<ffffffff8109b2a0>] ? autoremove_wake_function+0x0/0x40
  [<ffffffff81094bb0>] ? worker_thread+0x0/0x2a0
  [<ffffffff8109aef6>] ? kthread+0x96/0xa0
  [<ffffffff8100c20a>] ? child_rip+0xa/0x20
  [<ffffffff8109ae60>] ? kthread+0x0/0xa0
  [<ffffffff8100c200>] ? child_rip+0x0/0x20

Fixes: a977049d ("[PATCH] IB: Add the kernel CM implementation")
Signed-off-by: default avatarMark Bloch <markb@mellanox.com>
Signed-off-by: default avatarErez Shitrit <erezsh@mellanox.com>
Reviewed-by: default avatarMaor Gottlieb <maorg@mellanox.com>
Signed-off-by: default avatarLeon Romanovsky <leon@kernel.org>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 5b810a24
...@@ -80,6 +80,8 @@ static struct ib_cm { ...@@ -80,6 +80,8 @@ static struct ib_cm {
__be32 random_id_operand; __be32 random_id_operand;
struct list_head timewait_list; struct list_head timewait_list;
struct workqueue_struct *wq; struct workqueue_struct *wq;
/* Sync on cm change port state */
spinlock_t state_lock;
} cm; } cm;
/* Counter indexes ordered by attribute ID */ /* Counter indexes ordered by attribute ID */
...@@ -161,6 +163,8 @@ struct cm_port { ...@@ -161,6 +163,8 @@ struct cm_port {
struct ib_mad_agent *mad_agent; struct ib_mad_agent *mad_agent;
struct kobject port_obj; struct kobject port_obj;
u8 port_num; u8 port_num;
struct list_head cm_priv_prim_list;
struct list_head cm_priv_altr_list;
struct cm_counter_group counter_group[CM_COUNTER_GROUPS]; struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
}; };
...@@ -241,6 +245,12 @@ struct cm_id_private { ...@@ -241,6 +245,12 @@ struct cm_id_private {
u8 service_timeout; u8 service_timeout;
u8 target_ack_delay; u8 target_ack_delay;
struct list_head prim_list;
struct list_head altr_list;
/* Indicates that the send port mad is registered and av is set */
int prim_send_port_not_ready;
int altr_send_port_not_ready;
struct list_head work_list; struct list_head work_list;
atomic_t work_count; atomic_t work_count;
}; };
...@@ -259,20 +269,47 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, ...@@ -259,20 +269,47 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
struct ib_mad_agent *mad_agent; struct ib_mad_agent *mad_agent;
struct ib_mad_send_buf *m; struct ib_mad_send_buf *m;
struct ib_ah *ah; struct ib_ah *ah;
struct cm_av *av;
unsigned long flags, flags2;
int ret = 0;
/* don't let the port to be released till the agent is down */
spin_lock_irqsave(&cm.state_lock, flags2);
spin_lock_irqsave(&cm.lock, flags);
if (!cm_id_priv->prim_send_port_not_ready)
av = &cm_id_priv->av;
else if (!cm_id_priv->altr_send_port_not_ready &&
(cm_id_priv->alt_av.port))
av = &cm_id_priv->alt_av;
else {
pr_info("%s: not valid CM id\n", __func__);
ret = -ENODEV;
spin_unlock_irqrestore(&cm.lock, flags);
goto out;
}
spin_unlock_irqrestore(&cm.lock, flags);
/* Make sure the port haven't released the mad yet */
mad_agent = cm_id_priv->av.port->mad_agent; mad_agent = cm_id_priv->av.port->mad_agent;
ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr); if (!mad_agent) {
if (IS_ERR(ah)) pr_info("%s: not a valid MAD agent\n", __func__);
return PTR_ERR(ah); ret = -ENODEV;
goto out;
}
ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto out;
}
m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
cm_id_priv->av.pkey_index, av->pkey_index,
0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
GFP_ATOMIC, GFP_ATOMIC,
IB_MGMT_BASE_VERSION); IB_MGMT_BASE_VERSION);
if (IS_ERR(m)) { if (IS_ERR(m)) {
ib_destroy_ah(ah); ib_destroy_ah(ah);
return PTR_ERR(m); ret = PTR_ERR(m);
goto out;
} }
/* Timeout set by caller if response is expected. */ /* Timeout set by caller if response is expected. */
...@@ -282,7 +319,10 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, ...@@ -282,7 +319,10 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
atomic_inc(&cm_id_priv->refcount); atomic_inc(&cm_id_priv->refcount);
m->context[0] = cm_id_priv; m->context[0] = cm_id_priv;
*msg = m; *msg = m;
return 0;
out:
spin_unlock_irqrestore(&cm.state_lock, flags2);
return ret;
} }
static int cm_alloc_response_msg(struct cm_port *port, static int cm_alloc_response_msg(struct cm_port *port,
...@@ -352,7 +392,8 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, ...@@ -352,7 +392,8 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
grh, &av->ah_attr); grh, &av->ah_attr);
} }
static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av,
struct cm_id_private *cm_id_priv)
{ {
struct cm_device *cm_dev; struct cm_device *cm_dev;
struct cm_port *port = NULL; struct cm_port *port = NULL;
...@@ -387,7 +428,17 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) ...@@ -387,7 +428,17 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
&av->ah_attr); &av->ah_attr);
av->timeout = path->packet_life_time + 1; av->timeout = path->packet_life_time + 1;
return 0; spin_lock_irqsave(&cm.lock, flags);
if (&cm_id_priv->av == av)
list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
else if (&cm_id_priv->alt_av == av)
list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
else
ret = -EINVAL;
spin_unlock_irqrestore(&cm.lock, flags);
return ret;
} }
static int cm_alloc_id(struct cm_id_private *cm_id_priv) static int cm_alloc_id(struct cm_id_private *cm_id_priv)
...@@ -677,6 +728,8 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device, ...@@ -677,6 +728,8 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
spin_lock_init(&cm_id_priv->lock); spin_lock_init(&cm_id_priv->lock);
init_completion(&cm_id_priv->comp); init_completion(&cm_id_priv->comp);
INIT_LIST_HEAD(&cm_id_priv->work_list); INIT_LIST_HEAD(&cm_id_priv->work_list);
INIT_LIST_HEAD(&cm_id_priv->prim_list);
INIT_LIST_HEAD(&cm_id_priv->altr_list);
atomic_set(&cm_id_priv->work_count, -1); atomic_set(&cm_id_priv->work_count, -1);
atomic_set(&cm_id_priv->refcount, 1); atomic_set(&cm_id_priv->refcount, 1);
return &cm_id_priv->id; return &cm_id_priv->id;
...@@ -892,6 +945,15 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err) ...@@ -892,6 +945,15 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
break; break;
} }
spin_lock_irq(&cm.lock);
if (!list_empty(&cm_id_priv->altr_list) &&
(!cm_id_priv->altr_send_port_not_ready))
list_del(&cm_id_priv->altr_list);
if (!list_empty(&cm_id_priv->prim_list) &&
(!cm_id_priv->prim_send_port_not_ready))
list_del(&cm_id_priv->prim_list);
spin_unlock_irq(&cm.lock);
cm_free_id(cm_id->local_id); cm_free_id(cm_id->local_id);
cm_deref_id(cm_id_priv); cm_deref_id(cm_id_priv);
wait_for_completion(&cm_id_priv->comp); wait_for_completion(&cm_id_priv->comp);
...@@ -1192,12 +1254,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, ...@@ -1192,12 +1254,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
goto out; goto out;
} }
ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av); ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av,
cm_id_priv);
if (ret) if (ret)
goto error1; goto error1;
if (param->alternate_path) { if (param->alternate_path) {
ret = cm_init_av_by_path(param->alternate_path, ret = cm_init_av_by_path(param->alternate_path,
&cm_id_priv->alt_av); &cm_id_priv->alt_av, cm_id_priv);
if (ret) if (ret)
goto error1; goto error1;
} }
...@@ -1653,7 +1716,8 @@ static int cm_req_handler(struct cm_work *work) ...@@ -1653,7 +1716,8 @@ static int cm_req_handler(struct cm_work *work)
dev_put(gid_attr.ndev); dev_put(gid_attr.ndev);
} }
work->path[0].gid_type = gid_attr.gid_type; work->path[0].gid_type = gid_attr.gid_type;
ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
cm_id_priv);
} }
if (ret) { if (ret) {
int err = ib_get_cached_gid(work->port->cm_dev->ib_device, int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
...@@ -1672,7 +1736,8 @@ static int cm_req_handler(struct cm_work *work) ...@@ -1672,7 +1736,8 @@ static int cm_req_handler(struct cm_work *work)
goto rejected; goto rejected;
} }
if (req_msg->alt_local_lid) { if (req_msg->alt_local_lid) {
ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av); ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av,
cm_id_priv);
if (ret) { if (ret) {
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
&work->path[0].sgid, &work->path[0].sgid,
...@@ -2727,7 +2792,8 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id, ...@@ -2727,7 +2792,8 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
goto out; goto out;
} }
ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av); ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av,
cm_id_priv);
if (ret) if (ret)
goto out; goto out;
cm_id_priv->alt_av.timeout = cm_id_priv->alt_av.timeout =
...@@ -2839,7 +2905,8 @@ static int cm_lap_handler(struct cm_work *work) ...@@ -2839,7 +2905,8 @@ static int cm_lap_handler(struct cm_work *work)
cm_init_av_for_response(work->port, work->mad_recv_wc->wc, cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
work->mad_recv_wc->recv_buf.grh, work->mad_recv_wc->recv_buf.grh,
&cm_id_priv->av); &cm_id_priv->av);
cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av); cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
cm_id_priv);
ret = atomic_inc_and_test(&cm_id_priv->work_count); ret = atomic_inc_and_test(&cm_id_priv->work_count);
if (!ret) if (!ret)
list_add_tail(&work->list, &cm_id_priv->work_list); list_add_tail(&work->list, &cm_id_priv->work_list);
...@@ -3031,7 +3098,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, ...@@ -3031,7 +3098,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
return -EINVAL; return -EINVAL;
cm_id_priv = container_of(cm_id, struct cm_id_private, id); cm_id_priv = container_of(cm_id, struct cm_id_private, id);
ret = cm_init_av_by_path(param->path, &cm_id_priv->av); ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv);
if (ret) if (ret)
goto out; goto out;
...@@ -3468,7 +3535,9 @@ static int cm_establish(struct ib_cm_id *cm_id) ...@@ -3468,7 +3535,9 @@ static int cm_establish(struct ib_cm_id *cm_id)
static int cm_migrate(struct ib_cm_id *cm_id) static int cm_migrate(struct ib_cm_id *cm_id)
{ {
struct cm_id_private *cm_id_priv; struct cm_id_private *cm_id_priv;
struct cm_av tmp_av;
unsigned long flags; unsigned long flags;
int tmp_send_port_not_ready;
int ret = 0; int ret = 0;
cm_id_priv = container_of(cm_id, struct cm_id_private, id); cm_id_priv = container_of(cm_id, struct cm_id_private, id);
...@@ -3477,7 +3546,14 @@ static int cm_migrate(struct ib_cm_id *cm_id) ...@@ -3477,7 +3546,14 @@ static int cm_migrate(struct ib_cm_id *cm_id)
(cm_id->lap_state == IB_CM_LAP_UNINIT || (cm_id->lap_state == IB_CM_LAP_UNINIT ||
cm_id->lap_state == IB_CM_LAP_IDLE)) { cm_id->lap_state == IB_CM_LAP_IDLE)) {
cm_id->lap_state = IB_CM_LAP_IDLE; cm_id->lap_state = IB_CM_LAP_IDLE;
/* Swap address vector */
tmp_av = cm_id_priv->av;
cm_id_priv->av = cm_id_priv->alt_av; cm_id_priv->av = cm_id_priv->alt_av;
cm_id_priv->alt_av = tmp_av;
/* Swap port send ready state */
tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready;
cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready;
cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready;
} else } else
ret = -EINVAL; ret = -EINVAL;
spin_unlock_irqrestore(&cm_id_priv->lock, flags); spin_unlock_irqrestore(&cm_id_priv->lock, flags);
...@@ -3888,6 +3964,9 @@ static void cm_add_one(struct ib_device *ib_device) ...@@ -3888,6 +3964,9 @@ static void cm_add_one(struct ib_device *ib_device)
port->cm_dev = cm_dev; port->cm_dev = cm_dev;
port->port_num = i; port->port_num = i;
INIT_LIST_HEAD(&port->cm_priv_prim_list);
INIT_LIST_HEAD(&port->cm_priv_altr_list);
ret = cm_create_port_fs(port); ret = cm_create_port_fs(port);
if (ret) if (ret)
goto error1; goto error1;
...@@ -3945,6 +4024,8 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) ...@@ -3945,6 +4024,8 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
{ {
struct cm_device *cm_dev = client_data; struct cm_device *cm_dev = client_data;
struct cm_port *port; struct cm_port *port;
struct cm_id_private *cm_id_priv;
struct ib_mad_agent *cur_mad_agent;
struct ib_port_modify port_modify = { struct ib_port_modify port_modify = {
.clr_port_cap_mask = IB_PORT_CM_SUP .clr_port_cap_mask = IB_PORT_CM_SUP
}; };
...@@ -3968,15 +4049,27 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) ...@@ -3968,15 +4049,27 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
port = cm_dev->port[i-1]; port = cm_dev->port[i-1];
ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_modify_port(ib_device, port->port_num, 0, &port_modify);
/* Mark all the cm_id's as not valid */
spin_lock_irq(&cm.lock);
list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list)
cm_id_priv->altr_send_port_not_ready = 1;
list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list)
cm_id_priv->prim_send_port_not_ready = 1;
spin_unlock_irq(&cm.lock);
/* /*
* We flush the queue here after the going_down set, this * We flush the queue here after the going_down set, this
* verify that no new works will be queued in the recv handler, * verify that no new works will be queued in the recv handler,
* after that we can call the unregister_mad_agent * after that we can call the unregister_mad_agent
*/ */
flush_workqueue(cm.wq); flush_workqueue(cm.wq);
ib_unregister_mad_agent(port->mad_agent); spin_lock_irq(&cm.state_lock);
cur_mad_agent = port->mad_agent;
port->mad_agent = NULL;
spin_unlock_irq(&cm.state_lock);
ib_unregister_mad_agent(cur_mad_agent);
cm_remove_port_fs(port); cm_remove_port_fs(port);
} }
device_unregister(cm_dev->device); device_unregister(cm_dev->device);
kfree(cm_dev); kfree(cm_dev);
} }
...@@ -3989,6 +4082,7 @@ static int __init ib_cm_init(void) ...@@ -3989,6 +4082,7 @@ static int __init ib_cm_init(void)
INIT_LIST_HEAD(&cm.device_list); INIT_LIST_HEAD(&cm.device_list);
rwlock_init(&cm.device_lock); rwlock_init(&cm.device_lock);
spin_lock_init(&cm.lock); spin_lock_init(&cm.lock);
spin_lock_init(&cm.state_lock);
cm.listen_service_table = RB_ROOT; cm.listen_service_table = RB_ROOT;
cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID); cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
cm.remote_id_table = RB_ROOT; cm.remote_id_table = RB_ROOT;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment