Commit 9996b049 authored by Sebastian Sanchez's avatar Sebastian Sanchez Committed by Doug Ledford

IB/hfi1: Fix infinite loop in 8051 command error path

When an 8051 command times out, the entire DC block is restarted. During
the restart, the host interface version bit is set, which calls
do_8051_command() recursively. The host version bit needs to be set
before the link moves into polling, so the host version bit can be set
in set_local_link_attributes() instead. Thus, the 8051 command functions
can be simplied as a non-locking version (dd->dc8051_lock) of those
functions are no longer needed.

Fixes: 9be6a5d7 ("IB/hfi1: Prevent LNI out of sync by resetting host interface version")
Reviewed-by: default avatarMichael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: default avatarSebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent f5b53b04
...@@ -6518,11 +6518,12 @@ static void _dc_start(struct hfi1_devdata *dd) ...@@ -6518,11 +6518,12 @@ static void _dc_start(struct hfi1_devdata *dd)
if (!dd->dc_shutdown) if (!dd->dc_shutdown)
return; return;
/* /* Take the 8051 out of reset */
* Take the 8051 out of reset, wait until 8051 is ready, and set host write_csr(dd, DC_DC8051_CFG_RST, 0ull);
* version bit. /* Wait until 8051 is ready */
*/ if (wait_fm_ready(dd, TIMEOUT_8051_START))
release_and_wait_ready_8051_firmware(dd); dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
__func__);
/* Take away reset for LCB and RX FPE (set in lcb_shutdown). */ /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
write_csr(dd, DCC_CFG_RESET, 0x10); write_csr(dd, DCC_CFG_RESET, 0x10);
...@@ -8564,23 +8565,27 @@ int write_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 data) ...@@ -8564,23 +8565,27 @@ int write_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 data)
} }
/* /*
* If the 8051 is in reset mode (dd->dc_shutdown == 1), this function
* will still continue executing.
*
* Returns: * Returns:
* < 0 = Linux error, not able to get access * < 0 = Linux error, not able to get access
* > 0 = 8051 command RETURN_CODE * > 0 = 8051 command RETURN_CODE
*/ */
static int _do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data, static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
u64 *out_data) u64 *out_data)
{ {
u64 reg, completed; u64 reg, completed;
int return_code; int return_code;
unsigned long timeout; unsigned long timeout;
lockdep_assert_held(&dd->dc8051_lock);
hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data); hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data);
mutex_lock(&dd->dc8051_lock);
/* We can't send any commands to the 8051 if it's in reset */
if (dd->dc_shutdown) {
return_code = -ENODEV;
goto fail;
}
/* /*
* If an 8051 host command timed out previously, then the 8051 is * If an 8051 host command timed out previously, then the 8051 is
* stuck. * stuck.
...@@ -8680,29 +8685,6 @@ static int _do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data, ...@@ -8680,29 +8685,6 @@ static int _do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
*/ */
write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0); write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0);
fail:
return return_code;
}
/*
* Returns:
* < 0 = Linux error, not able to get access
* > 0 = 8051 command RETURN_CODE
*/
static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
u64 *out_data)
{
int return_code;
mutex_lock(&dd->dc8051_lock);
/* We can't send any commands to the 8051 if it's in reset */
if (dd->dc_shutdown) {
return_code = -ENODEV;
goto fail;
}
return_code = _do_8051_command(dd, type, in_data, out_data);
fail: fail:
mutex_unlock(&dd->dc8051_lock); mutex_unlock(&dd->dc8051_lock);
return return_code; return return_code;
...@@ -8713,17 +8695,16 @@ static int set_physical_link_state(struct hfi1_devdata *dd, u64 state) ...@@ -8713,17 +8695,16 @@ static int set_physical_link_state(struct hfi1_devdata *dd, u64 state)
return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL); return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL);
} }
static int _load_8051_config(struct hfi1_devdata *dd, u8 field_id, int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
u8 lane_id, u32 config_data) u8 lane_id, u32 config_data)
{ {
u64 data; u64 data;
int ret; int ret;
lockdep_assert_held(&dd->dc8051_lock);
data = (u64)field_id << LOAD_DATA_FIELD_ID_SHIFT data = (u64)field_id << LOAD_DATA_FIELD_ID_SHIFT
| (u64)lane_id << LOAD_DATA_LANE_ID_SHIFT | (u64)lane_id << LOAD_DATA_LANE_ID_SHIFT
| (u64)config_data << LOAD_DATA_DATA_SHIFT; | (u64)config_data << LOAD_DATA_DATA_SHIFT;
ret = _do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL); ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
if (ret != HCMD_SUCCESS) { if (ret != HCMD_SUCCESS) {
dd_dev_err(dd, dd_dev_err(dd,
"load 8051 config: field id %d, lane %d, err %d\n", "load 8051 config: field id %d, lane %d, err %d\n",
...@@ -8732,18 +8713,6 @@ static int _load_8051_config(struct hfi1_devdata *dd, u8 field_id, ...@@ -8732,18 +8713,6 @@ static int _load_8051_config(struct hfi1_devdata *dd, u8 field_id,
return ret; return ret;
} }
int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
u8 lane_id, u32 config_data)
{
int return_code;
mutex_lock(&dd->dc8051_lock);
return_code = _load_8051_config(dd, field_id, lane_id, config_data);
mutex_unlock(&dd->dc8051_lock);
return return_code;
}
/* /*
* Read the 8051 firmware "registers". Use the RAM directly. Always * Read the 8051 firmware "registers". Use the RAM directly. Always
* set the result, even on error. * set the result, even on error.
...@@ -8859,14 +8828,13 @@ int write_host_interface_version(struct hfi1_devdata *dd, u8 version) ...@@ -8859,14 +8828,13 @@ int write_host_interface_version(struct hfi1_devdata *dd, u8 version)
u32 frame; u32 frame;
u32 mask; u32 mask;
lockdep_assert_held(&dd->dc8051_lock);
mask = (HOST_INTERFACE_VERSION_MASK << HOST_INTERFACE_VERSION_SHIFT); mask = (HOST_INTERFACE_VERSION_MASK << HOST_INTERFACE_VERSION_SHIFT);
read_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, &frame); read_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, &frame);
/* Clear, then set field */ /* Clear, then set field */
frame &= ~mask; frame &= ~mask;
frame |= ((u32)version << HOST_INTERFACE_VERSION_SHIFT); frame |= ((u32)version << HOST_INTERFACE_VERSION_SHIFT);
return _load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, return load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG,
frame); frame);
} }
void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
...@@ -9270,6 +9238,14 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd) ...@@ -9270,6 +9238,14 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
if (ret != HCMD_SUCCESS) if (ret != HCMD_SUCCESS)
goto set_local_link_attributes_fail; goto set_local_link_attributes_fail;
ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION);
if (ret != HCMD_SUCCESS) {
dd_dev_err(dd,
"Failed to set host interface version, return 0x%x\n",
ret);
goto set_local_link_attributes_fail;
}
/* /*
* DC supports continuous updates. * DC supports continuous updates.
*/ */
......
...@@ -508,6 +508,7 @@ ...@@ -508,6 +508,7 @@
#define DOWN_REMOTE_REASON_SHIFT 16 #define DOWN_REMOTE_REASON_SHIFT 16
#define DOWN_REMOTE_REASON_MASK 0xff #define DOWN_REMOTE_REASON_MASK 0xff
#define HOST_INTERFACE_VERSION 1
#define HOST_INTERFACE_VERSION_SHIFT 16 #define HOST_INTERFACE_VERSION_SHIFT 16
#define HOST_INTERFACE_VERSION_MASK 0xff #define HOST_INTERFACE_VERSION_MASK 0xff
...@@ -713,7 +714,6 @@ void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, ...@@ -713,7 +714,6 @@ void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
u8 *ver_patch); u8 *ver_patch);
int write_host_interface_version(struct hfi1_devdata *dd, u8 version); int write_host_interface_version(struct hfi1_devdata *dd, u8 version);
void read_guid(struct hfi1_devdata *dd); void read_guid(struct hfi1_devdata *dd);
int release_and_wait_ready_8051_firmware(struct hfi1_devdata *dd);
int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout); int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout);
void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason, void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
u8 neigh_reason, u8 rem_reason); u8 neigh_reason, u8 rem_reason);
......
...@@ -68,7 +68,6 @@ ...@@ -68,7 +68,6 @@
#define ALT_FW_FABRIC_NAME "hfi1_fabric_d.fw" #define ALT_FW_FABRIC_NAME "hfi1_fabric_d.fw"
#define ALT_FW_SBUS_NAME "hfi1_sbus_d.fw" #define ALT_FW_SBUS_NAME "hfi1_sbus_d.fw"
#define ALT_FW_PCIE_NAME "hfi1_pcie_d.fw" #define ALT_FW_PCIE_NAME "hfi1_pcie_d.fw"
#define HOST_INTERFACE_VERSION 1
MODULE_FIRMWARE(DEFAULT_FW_8051_NAME_ASIC); MODULE_FIRMWARE(DEFAULT_FW_8051_NAME_ASIC);
MODULE_FIRMWARE(DEFAULT_FW_FABRIC_NAME); MODULE_FIRMWARE(DEFAULT_FW_FABRIC_NAME);
...@@ -975,46 +974,6 @@ int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout) ...@@ -975,46 +974,6 @@ int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout)
} }
} }
/*
* Clear all reset bits, releasing the 8051.
* Wait for firmware to be ready to accept host requests.
* Then, set host version bit.
*
* This function executes even if the 8051 is in reset mode when
* dd->dc_shutdown == 1.
*
* Expects dd->dc8051_lock to be held.
*/
int release_and_wait_ready_8051_firmware(struct hfi1_devdata *dd)
{
int ret;
lockdep_assert_held(&dd->dc8051_lock);
/* clear all reset bits, releasing the 8051 */
write_csr(dd, DC_DC8051_CFG_RST, 0ull);
/*
* Wait for firmware to be ready to accept host
* requests.
*/
ret = wait_fm_ready(dd, TIMEOUT_8051_START);
if (ret) {
dd_dev_err(dd, "8051 start timeout, current FW state 0x%x\n",
get_firmware_state(dd));
return ret;
}
ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION);
if (ret != HCMD_SUCCESS) {
dd_dev_err(dd,
"Failed to set host interface version, return 0x%x\n",
ret);
return -EIO;
}
return 0;
}
/* /*
* Load the 8051 firmware. * Load the 8051 firmware.
*/ */
...@@ -1080,22 +1039,31 @@ static int load_8051_firmware(struct hfi1_devdata *dd, ...@@ -1080,22 +1039,31 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
if (ret) if (ret)
return ret; return ret;
/* clear all reset bits, releasing the 8051 */
write_csr(dd, DC_DC8051_CFG_RST, 0ull);
/* /*
* Clear all reset bits, releasing the 8051.
* DC reset step 5. Wait for firmware to be ready to accept host * DC reset step 5. Wait for firmware to be ready to accept host
* requests. * requests.
* Then, set host version bit.
*/ */
mutex_lock(&dd->dc8051_lock); ret = wait_fm_ready(dd, TIMEOUT_8051_START);
ret = release_and_wait_ready_8051_firmware(dd); if (ret) { /* timed out */
mutex_unlock(&dd->dc8051_lock); dd_dev_err(dd, "8051 start timeout, current state 0x%x\n",
if (ret) get_firmware_state(dd));
return ret; return -ETIMEDOUT;
}
read_misc_status(dd, &ver_major, &ver_minor, &ver_patch); read_misc_status(dd, &ver_major, &ver_minor, &ver_patch);
dd_dev_info(dd, "8051 firmware version %d.%d.%d\n", dd_dev_info(dd, "8051 firmware version %d.%d.%d\n",
(int)ver_major, (int)ver_minor, (int)ver_patch); (int)ver_major, (int)ver_minor, (int)ver_patch);
dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch); dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch);
ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION);
if (ret != HCMD_SUCCESS) {
dd_dev_err(dd,
"Failed to set host interface version, return 0x%x\n",
ret);
return -EIO;
}
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment