Commit d04be142 authored by Logan Gunthorpe's avatar Logan Gunthorpe Committed by Jon Mason

ntb_hw_switchtec: Force down the link before initializing

If one host crashes and soft reboots, the other host may not see a
link down event. Then when the crashed host comes back up, the
surviving host may not know the link was reset and the NTB clients
may not work without being reset.

To solve this, we send a LINK_FORCE_DOWN message to each peer every
time we come up, before we register the NTB device. If a surviving
host still thinks the link is up it will take it down immediately.
In this way, once the crashed host comes up fully, it will send a
regular link up event as per usual and the link will be properly
restarted.

While we are in the area, this also fixes the MSG_LINK_UP message that
was in the link down function that was reported by Doug Meyers.
Signed-off-by: default avatarLogan Gunthorpe <logang@deltatee.com>
Reported-by: default avatarThanhTuThai <cruisethai@gmail.com>
Signed-off-by: default avatarJon Mason <jdmason@kudzu.us>
parent 270d32e6
......@@ -122,6 +122,7 @@ struct switchtec_ntb {
bool link_is_up;
enum ntb_speed link_speed;
enum ntb_width link_width;
struct work_struct link_reinit_work;
};
static struct switchtec_ntb *ntb_sndev(struct ntb_dev *ntb)
......@@ -494,18 +495,43 @@ static void crosslink_init_dbmsgs(struct switchtec_ntb *sndev)
&sndev->mmio_peer_dbmsg->odb_mask);
}
enum {
enum switchtec_msg {
LINK_MESSAGE = 0,
MSG_LINK_UP = 1,
MSG_LINK_DOWN = 2,
MSG_CHECK_LINK = 3,
MSG_LINK_FORCE_DOWN = 4,
};
static void switchtec_ntb_check_link(struct switchtec_ntb *sndev)
static int switchtec_ntb_reinit_peer(struct switchtec_ntb *sndev);
static void link_reinit_work(struct work_struct *work)
{
struct switchtec_ntb *sndev;
sndev = container_of(work, struct switchtec_ntb, link_reinit_work);
switchtec_ntb_reinit_peer(sndev);
}
static void switchtec_ntb_check_link(struct switchtec_ntb *sndev,
enum switchtec_msg msg)
{
int link_sta;
int old = sndev->link_is_up;
if (msg == MSG_LINK_FORCE_DOWN) {
schedule_work(&sndev->link_reinit_work);
if (sndev->link_is_up) {
sndev->link_is_up = 0;
ntb_link_event(&sndev->ntb);
dev_info(&sndev->stdev->dev, "ntb link forced down\n");
}
return;
}
link_sta = sndev->self_shared->link_sta;
if (link_sta) {
u64 peer = ioread64(&sndev->peer_shared->magic);
......@@ -534,7 +560,7 @@ static void switchtec_ntb_link_notification(struct switchtec_dev *stdev)
{
struct switchtec_ntb *sndev = stdev->sndev;
switchtec_ntb_check_link(sndev);
switchtec_ntb_check_link(sndev, MSG_CHECK_LINK);
}
static u64 switchtec_ntb_link_is_up(struct ntb_dev *ntb,
......@@ -562,7 +588,7 @@ static int switchtec_ntb_link_enable(struct ntb_dev *ntb,
sndev->self_shared->link_sta = 1;
switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_UP);
switchtec_ntb_check_link(sndev);
switchtec_ntb_check_link(sndev, MSG_CHECK_LINK);
return 0;
}
......@@ -574,9 +600,9 @@ static int switchtec_ntb_link_disable(struct ntb_dev *ntb)
dev_dbg(&sndev->stdev->dev, "disabling link\n");
sndev->self_shared->link_sta = 0;
switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_UP);
switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_DOWN);
switchtec_ntb_check_link(sndev);
switchtec_ntb_check_link(sndev, MSG_CHECK_LINK);
return 0;
}
......@@ -822,6 +848,8 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev)
sndev->ntb.topo = NTB_TOPO_SWITCH;
sndev->ntb.ops = &switchtec_ntb_ops;
INIT_WORK(&sndev->link_reinit_work, link_reinit_work);
sndev->self_partition = sndev->stdev->partition;
sndev->mmio_ntb = sndev->stdev->mmio_ntb;
......@@ -1368,7 +1396,7 @@ static irqreturn_t switchtec_ntb_message_isr(int irq, void *dev)
iowrite8(1, &sndev->mmio_self_dbmsg->imsg[i].status);
if (i == LINK_MESSAGE)
switchtec_ntb_check_link(sndev);
switchtec_ntb_check_link(sndev, msg);
}
}
......@@ -1429,6 +1457,14 @@ static void switchtec_ntb_deinit_db_msg_irq(struct switchtec_ntb *sndev)
free_irq(sndev->message_irq, sndev);
}
static int switchtec_ntb_reinit_peer(struct switchtec_ntb *sndev)
{
dev_info(&sndev->stdev->dev, "peer reinitialized\n");
switchtec_ntb_deinit_shared_mw(sndev);
switchtec_ntb_init_mw(sndev);
return switchtec_ntb_init_shared_mw(sndev);
}
static int switchtec_ntb_add(struct device *dev,
struct class_interface *class_intf)
{
......@@ -1471,6 +1507,13 @@ static int switchtec_ntb_add(struct device *dev,
if (rc)
goto deinit_shared_and_exit;
/*
* If this host crashed, the other host may think the link is
* still up. Tell them to force it down (it will go back up
* once we register the ntb device).
*/
switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_FORCE_DOWN);
rc = ntb_register_device(&sndev->ntb);
if (rc)
goto deinit_and_exit;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment