Commit 26bbf13c authored by Yosef Etigin's avatar Yosef Etigin Committed by Roland Dreier

IPoIB: Handle P_Key table reordering

SM reconfiguration or failover possibly causes a shuffling of the values
in the P_Key table. Right now, IPoIB only queries for the P_Key index
once when it creates the device QP, and hence there are problems if the
index of a P_Key value changes.  Fix this by using the PKEY_CHANGE event
to trigger a recheck of the P_Key index.
Signed-off-by: default avatarYosef Etigin <yosefe@voltaire.com>
Acked-by: default avatarMichael S. Tsirkin <mst@dev.mellanox.co.il>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 1af4c435
...@@ -201,15 +201,17 @@ struct ipoib_dev_priv { ...@@ -201,15 +201,17 @@ struct ipoib_dev_priv {
struct list_head multicast_list; struct list_head multicast_list;
struct rb_root multicast_tree; struct rb_root multicast_tree;
struct delayed_work pkey_task; struct delayed_work pkey_poll_task;
struct delayed_work mcast_task; struct delayed_work mcast_task;
struct work_struct flush_task; struct work_struct flush_task;
struct work_struct restart_task; struct work_struct restart_task;
struct delayed_work ah_reap_task; struct delayed_work ah_reap_task;
struct work_struct pkey_event_task;
struct ib_device *ca; struct ib_device *ca;
u8 port; u8 port;
u16 pkey; u16 pkey;
u16 pkey_index;
struct ib_pd *pd; struct ib_pd *pd;
struct ib_mr *mr; struct ib_mr *mr;
struct ib_cq *cq; struct ib_cq *cq;
...@@ -333,12 +335,13 @@ struct ipoib_dev_priv *ipoib_intf_alloc(const char *format); ...@@ -333,12 +335,13 @@ struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port); int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_ib_dev_flush(struct work_struct *work); void ipoib_ib_dev_flush(struct work_struct *work);
void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev); void ipoib_ib_dev_cleanup(struct net_device *dev);
int ipoib_ib_dev_open(struct net_device *dev); int ipoib_ib_dev_open(struct net_device *dev);
int ipoib_ib_dev_up(struct net_device *dev); int ipoib_ib_dev_up(struct net_device *dev);
int ipoib_ib_dev_down(struct net_device *dev, int flush); int ipoib_ib_dev_down(struct net_device *dev, int flush);
int ipoib_ib_dev_stop(struct net_device *dev); int ipoib_ib_dev_stop(struct net_device *dev, int flush);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_dev_cleanup(struct net_device *dev); void ipoib_dev_cleanup(struct net_device *dev);
......
...@@ -448,6 +448,13 @@ int ipoib_ib_dev_open(struct net_device *dev) ...@@ -448,6 +448,13 @@ int ipoib_ib_dev_open(struct net_device *dev)
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret; int ret;
if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) {
ipoib_warn(priv, "P_Key 0x%04x not found\n", priv->pkey);
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
return -1;
}
set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
ret = ipoib_init_qp(dev); ret = ipoib_init_qp(dev);
if (ret) { if (ret) {
ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret); ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
...@@ -457,14 +464,14 @@ int ipoib_ib_dev_open(struct net_device *dev) ...@@ -457,14 +464,14 @@ int ipoib_ib_dev_open(struct net_device *dev)
ret = ipoib_ib_post_receives(dev); ret = ipoib_ib_post_receives(dev);
if (ret) { if (ret) {
ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
ipoib_ib_dev_stop(dev); ipoib_ib_dev_stop(dev, 1);
return -1; return -1;
} }
ret = ipoib_cm_dev_open(dev); ret = ipoib_cm_dev_open(dev);
if (ret) { if (ret) {
ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
ipoib_ib_dev_stop(dev); ipoib_ib_dev_stop(dev, 1);
return -1; return -1;
} }
...@@ -516,7 +523,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush) ...@@ -516,7 +523,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
mutex_lock(&pkey_mutex); mutex_lock(&pkey_mutex);
set_bit(IPOIB_PKEY_STOP, &priv->flags); set_bit(IPOIB_PKEY_STOP, &priv->flags);
cancel_delayed_work(&priv->pkey_task); cancel_delayed_work(&priv->pkey_poll_task);
mutex_unlock(&pkey_mutex); mutex_unlock(&pkey_mutex);
if (flush) if (flush)
flush_workqueue(ipoib_workqueue); flush_workqueue(ipoib_workqueue);
...@@ -543,7 +550,7 @@ static int recvs_pending(struct net_device *dev) ...@@ -543,7 +550,7 @@ static int recvs_pending(struct net_device *dev)
return pending; return pending;
} }
int ipoib_ib_dev_stop(struct net_device *dev) int ipoib_ib_dev_stop(struct net_device *dev, int flush)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_attr qp_attr; struct ib_qp_attr qp_attr;
...@@ -629,7 +636,8 @@ int ipoib_ib_dev_stop(struct net_device *dev) ...@@ -629,7 +636,8 @@ int ipoib_ib_dev_stop(struct net_device *dev)
/* Wait for all AHs to be reaped */ /* Wait for all AHs to be reaped */
set_bit(IPOIB_STOP_REAPER, &priv->flags); set_bit(IPOIB_STOP_REAPER, &priv->flags);
cancel_delayed_work(&priv->ah_reap_task); cancel_delayed_work(&priv->ah_reap_task);
flush_workqueue(ipoib_workqueue); if (flush)
flush_workqueue(ipoib_workqueue);
begin = jiffies; begin = jiffies;
...@@ -673,13 +681,24 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port) ...@@ -673,13 +681,24 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
return 0; return 0;
} }
void ipoib_ib_dev_flush(struct work_struct *work) static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
{ {
struct ipoib_dev_priv *cpriv, *priv = struct ipoib_dev_priv *cpriv;
container_of(work, struct ipoib_dev_priv, flush_task);
struct net_device *dev = priv->dev; struct net_device *dev = priv->dev;
u16 new_index;
mutex_lock(&priv->vlan_mutex);
if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) ) { /*
* Flush any child interfaces too -- they might be up even if
* the parent is down.
*/
list_for_each_entry(cpriv, &priv->child_intfs, list)
__ipoib_ib_dev_flush(cpriv, pkey_event);
mutex_unlock(&priv->vlan_mutex);
if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n"); ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
return; return;
} }
...@@ -689,10 +708,32 @@ void ipoib_ib_dev_flush(struct work_struct *work) ...@@ -689,10 +708,32 @@ void ipoib_ib_dev_flush(struct work_struct *work)
return; return;
} }
if (pkey_event) {
if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
ipoib_ib_dev_down(dev, 0);
ipoib_pkey_dev_delay_open(dev);
return;
}
set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
/* restart QP only if P_Key index is changed */
if (new_index == priv->pkey_index) {
ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
return;
}
priv->pkey_index = new_index;
}
ipoib_dbg(priv, "flushing\n"); ipoib_dbg(priv, "flushing\n");
ipoib_ib_dev_down(dev, 0); ipoib_ib_dev_down(dev, 0);
if (pkey_event) {
ipoib_ib_dev_stop(dev, 0);
ipoib_ib_dev_open(dev);
}
/* /*
* The device could have been brought down between the start and when * The device could have been brought down between the start and when
* we get here, don't bring it back up if it's not configured up * we get here, don't bring it back up if it's not configured up
...@@ -701,14 +742,24 @@ void ipoib_ib_dev_flush(struct work_struct *work) ...@@ -701,14 +742,24 @@ void ipoib_ib_dev_flush(struct work_struct *work)
ipoib_ib_dev_up(dev); ipoib_ib_dev_up(dev);
ipoib_mcast_restart_task(&priv->restart_task); ipoib_mcast_restart_task(&priv->restart_task);
} }
}
mutex_lock(&priv->vlan_mutex); void ipoib_ib_dev_flush(struct work_struct *work)
{
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, flush_task);
/* Flush any child interfaces too */ ipoib_dbg(priv, "Flushing %s\n", priv->dev->name);
list_for_each_entry(cpriv, &priv->child_intfs, list) __ipoib_ib_dev_flush(priv, 0);
ipoib_ib_dev_flush(&cpriv->flush_task); }
mutex_unlock(&priv->vlan_mutex); void ipoib_pkey_event(struct work_struct *work)
{
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, pkey_event_task);
ipoib_dbg(priv, "Flushing %s and restarting its QP\n", priv->dev->name);
__ipoib_ib_dev_flush(priv, 1);
} }
void ipoib_ib_dev_cleanup(struct net_device *dev) void ipoib_ib_dev_cleanup(struct net_device *dev)
...@@ -736,7 +787,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev) ...@@ -736,7 +787,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
void ipoib_pkey_poll(struct work_struct *work) void ipoib_pkey_poll(struct work_struct *work)
{ {
struct ipoib_dev_priv *priv = struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, pkey_task.work); container_of(work, struct ipoib_dev_priv, pkey_poll_task.work);
struct net_device *dev = priv->dev; struct net_device *dev = priv->dev;
ipoib_pkey_dev_check_presence(dev); ipoib_pkey_dev_check_presence(dev);
...@@ -747,7 +798,7 @@ void ipoib_pkey_poll(struct work_struct *work) ...@@ -747,7 +798,7 @@ void ipoib_pkey_poll(struct work_struct *work)
mutex_lock(&pkey_mutex); mutex_lock(&pkey_mutex);
if (!test_bit(IPOIB_PKEY_STOP, &priv->flags)) if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
queue_delayed_work(ipoib_workqueue, queue_delayed_work(ipoib_workqueue,
&priv->pkey_task, &priv->pkey_poll_task,
HZ); HZ);
mutex_unlock(&pkey_mutex); mutex_unlock(&pkey_mutex);
} }
...@@ -766,7 +817,7 @@ int ipoib_pkey_dev_delay_open(struct net_device *dev) ...@@ -766,7 +817,7 @@ int ipoib_pkey_dev_delay_open(struct net_device *dev)
mutex_lock(&pkey_mutex); mutex_lock(&pkey_mutex);
clear_bit(IPOIB_PKEY_STOP, &priv->flags); clear_bit(IPOIB_PKEY_STOP, &priv->flags);
queue_delayed_work(ipoib_workqueue, queue_delayed_work(ipoib_workqueue,
&priv->pkey_task, &priv->pkey_poll_task,
HZ); HZ);
mutex_unlock(&pkey_mutex); mutex_unlock(&pkey_mutex);
return 1; return 1;
......
...@@ -107,7 +107,7 @@ int ipoib_open(struct net_device *dev) ...@@ -107,7 +107,7 @@ int ipoib_open(struct net_device *dev)
return -EINVAL; return -EINVAL;
if (ipoib_ib_dev_up(dev)) { if (ipoib_ib_dev_up(dev)) {
ipoib_ib_dev_stop(dev); ipoib_ib_dev_stop(dev, 1);
return -EINVAL; return -EINVAL;
} }
...@@ -152,7 +152,7 @@ static int ipoib_stop(struct net_device *dev) ...@@ -152,7 +152,7 @@ static int ipoib_stop(struct net_device *dev)
flush_workqueue(ipoib_workqueue); flush_workqueue(ipoib_workqueue);
ipoib_ib_dev_down(dev, 1); ipoib_ib_dev_down(dev, 1);
ipoib_ib_dev_stop(dev); ipoib_ib_dev_stop(dev, 1);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv; struct ipoib_dev_priv *cpriv;
...@@ -988,7 +988,8 @@ static void ipoib_setup(struct net_device *dev) ...@@ -988,7 +988,8 @@ static void ipoib_setup(struct net_device *dev)
INIT_LIST_HEAD(&priv->dead_ahs); INIT_LIST_HEAD(&priv->dead_ahs);
INIT_LIST_HEAD(&priv->multicast_list); INIT_LIST_HEAD(&priv->multicast_list);
INIT_DELAYED_WORK(&priv->pkey_task, ipoib_pkey_poll); INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event);
INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush); INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush);
INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
......
...@@ -33,8 +33,6 @@ ...@@ -33,8 +33,6 @@
* $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $ * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $
*/ */
#include <rdma/ib_cache.h>
#include "ipoib.h" #include "ipoib.h"
int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
...@@ -49,7 +47,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) ...@@ -49,7 +47,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
if (!qp_attr) if (!qp_attr)
goto out; goto out;
if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) { if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
ret = -ENXIO; ret = -ENXIO;
goto out; goto out;
...@@ -94,26 +92,16 @@ int ipoib_init_qp(struct net_device *dev) ...@@ -94,26 +92,16 @@ int ipoib_init_qp(struct net_device *dev)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret; int ret;
u16 pkey_index;
struct ib_qp_attr qp_attr; struct ib_qp_attr qp_attr;
int attr_mask; int attr_mask;
/* if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
* Search through the port P_Key table for the requested pkey value. return -1;
* The port has to be assigned to the respective IB partition in
* advance.
*/
ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index);
if (ret) {
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
return ret;
}
set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
qp_attr.qp_state = IB_QPS_INIT; qp_attr.qp_state = IB_QPS_INIT;
qp_attr.qkey = 0; qp_attr.qkey = 0;
qp_attr.port_num = priv->port; qp_attr.port_num = priv->port;
qp_attr.pkey_index = pkey_index; qp_attr.pkey_index = priv->pkey_index;
attr_mask = attr_mask =
IB_QP_QKEY | IB_QP_QKEY |
IB_QP_PORT | IB_QP_PORT |
...@@ -259,14 +247,18 @@ void ipoib_event(struct ib_event_handler *handler, ...@@ -259,14 +247,18 @@ void ipoib_event(struct ib_event_handler *handler,
struct ipoib_dev_priv *priv = struct ipoib_dev_priv *priv =
container_of(handler, struct ipoib_dev_priv, event_handler); container_of(handler, struct ipoib_dev_priv, event_handler);
if ((record->event == IB_EVENT_PORT_ERR || if (record->element.port_num != priv->port)
record->event == IB_EVENT_PKEY_CHANGE || return;
record->event == IB_EVENT_PORT_ACTIVE ||
record->event == IB_EVENT_LID_CHANGE || if (record->event == IB_EVENT_PORT_ERR ||
record->event == IB_EVENT_SM_CHANGE || record->event == IB_EVENT_PORT_ACTIVE ||
record->event == IB_EVENT_CLIENT_REREGISTER) && record->event == IB_EVENT_LID_CHANGE ||
record->element.port_num == priv->port) { record->event == IB_EVENT_SM_CHANGE ||
record->event == IB_EVENT_CLIENT_REREGISTER) {
ipoib_dbg(priv, "Port state change event\n"); ipoib_dbg(priv, "Port state change event\n");
queue_work(ipoib_workqueue, &priv->flush_task); queue_work(ipoib_workqueue, &priv->flush_task);
} else if (record->event == IB_EVENT_PKEY_CHANGE) {
ipoib_dbg(priv, "P_Key change event on port:%d\n", priv->port);
queue_work(ipoib_workqueue, &priv->pkey_event_task);
} }
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment