Commit 86b6ba17 authored by David S. Miller's avatar David S. Miller

Merge branch 'net-qed-qede-critical-hw-error-handling'

Igor Russkikh says:

====================
net: qed/qede: critical hw error handling

FastLinQ devices as a complex systems may observe various hardware
level error conditions, both severe and recoverable.

Driver is able to detect and report this, but so far it only did
trace/dmesg based reporting.

Here we implement an extended hw error detection, service task
handler captures a dump for the later analysis.

I also resubmit a patch from Denis Bolotin on tx timeout handler,
addressing David's comment regarding recovery procedure as an extra
reaction on this event.

v2:

Removing the patch with ethtool dump and udev magic. Its quite isolated,
I'm working on devlink based logic for this separately.

v1:

https://patchwork.ozlabs.org/project/netdev/cover/cover.1588758463.git.irusskikh@marvell.com/
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents c8a867a3 8f76812e
...@@ -740,12 +740,6 @@ struct qed_dbg_feature { ...@@ -740,12 +740,6 @@ struct qed_dbg_feature {
u32 dumped_dwords; u32 dumped_dwords;
}; };
struct qed_dbg_params {
struct qed_dbg_feature features[DBG_FEATURE_NUM];
u8 engine_for_debug;
bool print_data;
};
struct qed_dev { struct qed_dev {
u32 dp_module; u32 dp_module;
u8 dp_level; u8 dp_level;
...@@ -844,6 +838,9 @@ struct qed_dev { ...@@ -844,6 +838,9 @@ struct qed_dev {
/* Recovery */ /* Recovery */
bool recov_in_prog; bool recov_in_prog;
/* Indicates whether should prevent attentions from being reasserted */
bool attn_clr_en;
/* LLH info */ /* LLH info */
u8 ppfid_bitmap; u8 ppfid_bitmap;
struct qed_llh_info *p_llh_info; struct qed_llh_info *p_llh_info;
...@@ -872,17 +869,18 @@ struct qed_dev { ...@@ -872,17 +869,18 @@ struct qed_dev {
} protocol_ops; } protocol_ops;
void *ops_cookie; void *ops_cookie;
struct qed_dbg_params dbg_params;
#ifdef CONFIG_QED_LL2 #ifdef CONFIG_QED_LL2
struct qed_cb_ll2_info *ll2; struct qed_cb_ll2_info *ll2;
u8 ll2_mac_address[ETH_ALEN]; u8 ll2_mac_address[ETH_ALEN];
#endif #endif
struct qed_dbg_feature dbg_features[DBG_FEATURE_NUM]; struct qed_dbg_feature dbg_features[DBG_FEATURE_NUM];
u8 engine_for_debug;
bool disable_ilt_dump; bool disable_ilt_dump;
DECLARE_HASHTABLE(connections, 10); DECLARE_HASHTABLE(connections, 10);
const struct firmware *firmware; const struct firmware *firmware;
bool print_dbg_data;
u32 rdma_max_sge; u32 rdma_max_sge;
u32 rdma_max_inline; u32 rdma_max_inline;
u32 rdma_max_srq_sge; u32 rdma_max_srq_sge;
...@@ -1020,6 +1018,8 @@ u32 qed_unzip_data(struct qed_hwfn *p_hwfn, ...@@ -1020,6 +1018,8 @@ u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
u32 input_len, u8 *input_buf, u32 input_len, u8 *input_buf,
u32 max_size, u8 *unzip_buf); u32 max_size, u8 *unzip_buf);
void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn); void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn);
void qed_hw_error_occurred(struct qed_hwfn *p_hwfn,
enum qed_hw_err_type err_type);
void qed_get_protocol_stats(struct qed_dev *cdev, void qed_get_protocol_stats(struct qed_dev *cdev,
enum qed_mcp_protocol_type type, enum qed_mcp_protocol_type type,
union qed_mcp_protocol_stats *stats); union qed_mcp_protocol_stats *stats);
......
...@@ -7453,7 +7453,7 @@ static enum dbg_status format_feature(struct qed_hwfn *p_hwfn, ...@@ -7453,7 +7453,7 @@ static enum dbg_status format_feature(struct qed_hwfn *p_hwfn,
enum qed_dbg_features feature_idx) enum qed_dbg_features feature_idx)
{ {
struct qed_dbg_feature *feature = struct qed_dbg_feature *feature =
&p_hwfn->cdev->dbg_params.features[feature_idx]; &p_hwfn->cdev->dbg_features[feature_idx];
u32 text_size_bytes, null_char_pos, i; u32 text_size_bytes, null_char_pos, i;
enum dbg_status rc; enum dbg_status rc;
char *text_buf; char *text_buf;
...@@ -7502,7 +7502,7 @@ static enum dbg_status format_feature(struct qed_hwfn *p_hwfn, ...@@ -7502,7 +7502,7 @@ static enum dbg_status format_feature(struct qed_hwfn *p_hwfn,
text_buf[i] = '\n'; text_buf[i] = '\n';
/* Dump printable feature to log */ /* Dump printable feature to log */
if (p_hwfn->cdev->dbg_params.print_data) if (p_hwfn->cdev->print_dbg_data)
qed_dbg_print_feature(text_buf, text_size_bytes); qed_dbg_print_feature(text_buf, text_size_bytes);
/* Free the old dump_buf and point the dump_buf to the newly allocagted /* Free the old dump_buf and point the dump_buf to the newly allocagted
...@@ -7523,7 +7523,7 @@ static enum dbg_status qed_dbg_dump(struct qed_hwfn *p_hwfn, ...@@ -7523,7 +7523,7 @@ static enum dbg_status qed_dbg_dump(struct qed_hwfn *p_hwfn,
enum qed_dbg_features feature_idx) enum qed_dbg_features feature_idx)
{ {
struct qed_dbg_feature *feature = struct qed_dbg_feature *feature =
&p_hwfn->cdev->dbg_params.features[feature_idx]; &p_hwfn->cdev->dbg_features[feature_idx];
u32 buf_size_dwords; u32 buf_size_dwords;
enum dbg_status rc; enum dbg_status rc;
...@@ -7648,7 +7648,7 @@ static int qed_dbg_nvm_image(struct qed_dev *cdev, void *buffer, ...@@ -7648,7 +7648,7 @@ static int qed_dbg_nvm_image(struct qed_dev *cdev, void *buffer,
enum qed_nvm_images image_id) enum qed_nvm_images image_id)
{ {
struct qed_hwfn *p_hwfn = struct qed_hwfn *p_hwfn =
&cdev->hwfns[cdev->dbg_params.engine_for_debug]; &cdev->hwfns[cdev->engine_for_debug];
u32 len_rounded, i; u32 len_rounded, i;
__be32 val; __be32 val;
int rc; int rc;
...@@ -7780,7 +7780,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer) ...@@ -7780,7 +7780,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
{ {
u8 cur_engine, omit_engine = 0, org_engine; u8 cur_engine, omit_engine = 0, org_engine;
struct qed_hwfn *p_hwfn = struct qed_hwfn *p_hwfn =
&cdev->hwfns[cdev->dbg_params.engine_for_debug]; &cdev->hwfns[cdev->engine_for_debug];
struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
int grc_params[MAX_DBG_GRC_PARAMS], i; int grc_params[MAX_DBG_GRC_PARAMS], i;
u32 offset = 0, feature_size; u32 offset = 0, feature_size;
...@@ -8000,7 +8000,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer) ...@@ -8000,7 +8000,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
int qed_dbg_all_data_size(struct qed_dev *cdev) int qed_dbg_all_data_size(struct qed_dev *cdev)
{ {
struct qed_hwfn *p_hwfn = struct qed_hwfn *p_hwfn =
&cdev->hwfns[cdev->dbg_params.engine_for_debug]; &cdev->hwfns[cdev->engine_for_debug];
u32 regs_len = 0, image_len = 0, ilt_len = 0, total_ilt_len = 0; u32 regs_len = 0, image_len = 0, ilt_len = 0, total_ilt_len = 0;
u8 cur_engine, org_engine; u8 cur_engine, org_engine;
...@@ -8059,9 +8059,9 @@ int qed_dbg_feature(struct qed_dev *cdev, void *buffer, ...@@ -8059,9 +8059,9 @@ int qed_dbg_feature(struct qed_dev *cdev, void *buffer,
enum qed_dbg_features feature, u32 *num_dumped_bytes) enum qed_dbg_features feature, u32 *num_dumped_bytes)
{ {
struct qed_hwfn *p_hwfn = struct qed_hwfn *p_hwfn =
&cdev->hwfns[cdev->dbg_params.engine_for_debug]; &cdev->hwfns[cdev->engine_for_debug];
struct qed_dbg_feature *qed_feature = struct qed_dbg_feature *qed_feature =
&cdev->dbg_params.features[feature]; &cdev->dbg_features[feature];
enum dbg_status dbg_rc; enum dbg_status dbg_rc;
struct qed_ptt *p_ptt; struct qed_ptt *p_ptt;
int rc = 0; int rc = 0;
...@@ -8084,7 +8084,7 @@ int qed_dbg_feature(struct qed_dev *cdev, void *buffer, ...@@ -8084,7 +8084,7 @@ int qed_dbg_feature(struct qed_dev *cdev, void *buffer,
DP_VERBOSE(cdev, QED_MSG_DEBUG, DP_VERBOSE(cdev, QED_MSG_DEBUG,
"copying debugfs feature to external buffer\n"); "copying debugfs feature to external buffer\n");
memcpy(buffer, qed_feature->dump_buf, qed_feature->buf_size); memcpy(buffer, qed_feature->dump_buf, qed_feature->buf_size);
*num_dumped_bytes = cdev->dbg_params.features[feature].dumped_dwords * *num_dumped_bytes = cdev->dbg_features[feature].dumped_dwords *
4; 4;
out: out:
...@@ -8095,7 +8095,7 @@ int qed_dbg_feature(struct qed_dev *cdev, void *buffer, ...@@ -8095,7 +8095,7 @@ int qed_dbg_feature(struct qed_dev *cdev, void *buffer,
int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature) int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature)
{ {
struct qed_hwfn *p_hwfn = struct qed_hwfn *p_hwfn =
&cdev->hwfns[cdev->dbg_params.engine_for_debug]; &cdev->hwfns[cdev->engine_for_debug];
struct qed_dbg_feature *qed_feature = &cdev->dbg_features[feature]; struct qed_dbg_feature *qed_feature = &cdev->dbg_features[feature];
struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn); struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
u32 buf_size_dwords; u32 buf_size_dwords;
...@@ -8120,14 +8120,14 @@ int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature) ...@@ -8120,14 +8120,14 @@ int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature)
u8 qed_get_debug_engine(struct qed_dev *cdev) u8 qed_get_debug_engine(struct qed_dev *cdev)
{ {
return cdev->dbg_params.engine_for_debug; return cdev->engine_for_debug;
} }
void qed_set_debug_engine(struct qed_dev *cdev, int engine_number) void qed_set_debug_engine(struct qed_dev *cdev, int engine_number)
{ {
DP_VERBOSE(cdev, QED_MSG_DEBUG, "set debug engine to %d\n", DP_VERBOSE(cdev, QED_MSG_DEBUG, "set debug engine to %d\n",
engine_number); engine_number);
cdev->dbg_params.engine_for_debug = engine_number; cdev->engine_for_debug = engine_number;
} }
void qed_dbg_pf_init(struct qed_dev *cdev) void qed_dbg_pf_init(struct qed_dev *cdev)
...@@ -8146,7 +8146,7 @@ void qed_dbg_pf_init(struct qed_dev *cdev) ...@@ -8146,7 +8146,7 @@ void qed_dbg_pf_init(struct qed_dev *cdev)
} }
/* Set the hwfn to be 0 as default */ /* Set the hwfn to be 0 as default */
cdev->dbg_params.engine_for_debug = 0; cdev->engine_for_debug = 0;
} }
void qed_dbg_pf_exit(struct qed_dev *cdev) void qed_dbg_pf_exit(struct qed_dev *cdev)
......
...@@ -3085,7 +3085,9 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params) ...@@ -3085,7 +3085,9 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
rc = qed_final_cleanup(p_hwfn, p_hwfn->p_main_ptt, rc = qed_final_cleanup(p_hwfn, p_hwfn->p_main_ptt,
p_hwfn->rel_pf_id, false); p_hwfn->rel_pf_id, false);
if (rc) { if (rc) {
DP_NOTICE(p_hwfn, "Final cleanup failed\n"); qed_hw_err_notify(p_hwfn, p_hwfn->p_main_ptt,
QED_HW_ERR_RAMROD_FAIL,
"Final cleanup failed\n");
goto load_err; goto load_err;
} }
} }
......
...@@ -12400,6 +12400,13 @@ struct load_rsp_stc { ...@@ -12400,6 +12400,13 @@ struct load_rsp_stc {
#define LOAD_RSP_FLAGS0_DRV_EXISTS (0x1 << 0) #define LOAD_RSP_FLAGS0_DRV_EXISTS (0x1 << 0)
}; };
struct mdump_retain_data_stc {
u32 valid;
u32 epoch;
u32 pf;
u32 status;
};
union drv_union_data { union drv_union_data {
u32 ver_str[MCP_DRV_VER_STR_SIZE_DWORD]; u32 ver_str[MCP_DRV_VER_STR_SIZE_DWORD];
struct mcp_mac wol_mac; struct mcp_mac wol_mac;
...@@ -12488,10 +12495,14 @@ struct public_drv_mb { ...@@ -12488,10 +12495,14 @@ struct public_drv_mb {
#define DRV_MSG_CODE_BIST_TEST 0x001e0000 #define DRV_MSG_CODE_BIST_TEST 0x001e0000
#define DRV_MSG_CODE_SET_LED_MODE 0x00200000 #define DRV_MSG_CODE_SET_LED_MODE 0x00200000
#define DRV_MSG_CODE_RESOURCE_CMD 0x00230000 #define DRV_MSG_CODE_RESOURCE_CMD 0x00230000
/* Send crash dump commands with param[3:0] - opcode */
#define DRV_MSG_CODE_MDUMP_CMD 0x00250000
#define DRV_MSG_CODE_GET_TLV_DONE 0x002f0000 #define DRV_MSG_CODE_GET_TLV_DONE 0x002f0000
#define DRV_MSG_CODE_GET_ENGINE_CONFIG 0x00370000 #define DRV_MSG_CODE_GET_ENGINE_CONFIG 0x00370000
#define DRV_MSG_CODE_GET_PPFID_BITMAP 0x43000000 #define DRV_MSG_CODE_GET_PPFID_BITMAP 0x43000000
#define DRV_MSG_CODE_DEBUG_DATA_SEND 0xc0040000
#define RESOURCE_CMD_REQ_RESC_MASK 0x0000001F #define RESOURCE_CMD_REQ_RESC_MASK 0x0000001F
#define RESOURCE_CMD_REQ_RESC_SHIFT 0 #define RESOURCE_CMD_REQ_RESC_SHIFT 0
#define RESOURCE_CMD_REQ_OPCODE_MASK 0x000000E0 #define RESOURCE_CMD_REQ_OPCODE_MASK 0x000000E0
...@@ -12517,6 +12528,21 @@ struct public_drv_mb { ...@@ -12517,6 +12528,21 @@ struct public_drv_mb {
#define RESOURCE_DUMP 0 #define RESOURCE_DUMP 0
/* DRV_MSG_CODE_MDUMP_CMD parameters */
#define MDUMP_DRV_PARAM_OPCODE_MASK 0x0000000f
#define DRV_MSG_CODE_MDUMP_ACK 0x01
#define DRV_MSG_CODE_MDUMP_SET_VALUES 0x02
#define DRV_MSG_CODE_MDUMP_TRIGGER 0x03
#define DRV_MSG_CODE_MDUMP_GET_CONFIG 0x04
#define DRV_MSG_CODE_MDUMP_SET_ENABLE 0x05
#define DRV_MSG_CODE_MDUMP_CLEAR_LOGS 0x06
#define DRV_MSG_CODE_MDUMP_GET_RETAIN 0x07
#define DRV_MSG_CODE_MDUMP_CLR_RETAIN 0x08
#define DRV_MSG_CODE_HW_DUMP_TRIGGER 0x0a
#define DRV_MSG_CODE_MDUMP_GEN_MDUMP2 0x0b
#define DRV_MSG_CODE_MDUMP_FREE_MDUMP2 0x0c
#define DRV_MSG_CODE_GET_PF_RDMA_PROTOCOL 0x002b0000 #define DRV_MSG_CODE_GET_PF_RDMA_PROTOCOL 0x002b0000
#define DRV_MSG_CODE_OS_WOL 0x002e0000 #define DRV_MSG_CODE_OS_WOL 0x002e0000
...@@ -12626,6 +12652,17 @@ struct public_drv_mb { ...@@ -12626,6 +12652,17 @@ struct public_drv_mb {
#define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_EEE 0x00000002 #define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_EEE 0x00000002
#define DRV_MB_PARAM_FEATURE_SUPPORT_FUNC_VLINK 0x00010000 #define DRV_MB_PARAM_FEATURE_SUPPORT_FUNC_VLINK 0x00010000
/* DRV_MSG_CODE_DEBUG_DATA_SEND parameters */
#define DRV_MSG_CODE_DEBUG_DATA_SEND_SIZE_OFFSET 0
#define DRV_MSG_CODE_DEBUG_DATA_SEND_SIZE_MASK 0xFF
/* Driver attributes params */
#define DRV_MB_PARAM_ATTRIBUTE_KEY_OFFSET 0
#define DRV_MB_PARAM_ATTRIBUTE_KEY_MASK 0x00FFFFFF
#define DRV_MB_PARAM_ATTRIBUTE_CMD_OFFSET 24
#define DRV_MB_PARAM_ATTRIBUTE_CMD_MASK 0xFF000000
#define DRV_MB_PARAM_NVM_CFG_OPTION_ID_OFFSET 0
#define DRV_MB_PARAM_NVM_CFG_OPTION_ID_SHIFT 0 #define DRV_MB_PARAM_NVM_CFG_OPTION_ID_SHIFT 0
#define DRV_MB_PARAM_NVM_CFG_OPTION_ID_MASK 0x0000FFFF #define DRV_MB_PARAM_NVM_CFG_OPTION_ID_MASK 0x0000FFFF
#define DRV_MB_PARAM_NVM_CFG_OPTION_ALL_SHIFT 16 #define DRV_MB_PARAM_NVM_CFG_OPTION_ALL_SHIFT 16
...@@ -12678,6 +12715,14 @@ struct public_drv_mb { ...@@ -12678,6 +12715,14 @@ struct public_drv_mb {
#define FW_MSG_CODE_DRV_CFG_PF_VFS_MSIX_DONE 0x00870000 #define FW_MSG_CODE_DRV_CFG_PF_VFS_MSIX_DONE 0x00870000
#define FW_MSG_SEQ_NUMBER_MASK 0x0000ffff #define FW_MSG_SEQ_NUMBER_MASK 0x0000ffff
#define FW_MSG_CODE_DEBUG_DATA_SEND_INV_ARG 0xb0070000
#define FW_MSG_CODE_DEBUG_DATA_SEND_BUF_FULL 0xb0080000
#define FW_MSG_CODE_DEBUG_DATA_SEND_NO_BUF 0xb0090000
#define FW_MSG_CODE_DEBUG_NOT_ENABLED 0xb00a0000
#define FW_MSG_CODE_DEBUG_DATA_SEND_OK 0xb00b0000
#define FW_MSG_CODE_MDUMP_INVALID_CMD 0x00030000
u32 fw_mb_param; u32 fw_mb_param;
#define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_MASK 0xFFFF0000 #define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_MASK 0xFFFF0000
#define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT 16 #define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT 16
...@@ -12742,9 +12787,9 @@ enum MFW_DRV_MSG_TYPE { ...@@ -12742,9 +12787,9 @@ enum MFW_DRV_MSG_TYPE {
MFW_DRV_MSG_GET_FCOE_STATS, MFW_DRV_MSG_GET_FCOE_STATS,
MFW_DRV_MSG_GET_ISCSI_STATS, MFW_DRV_MSG_GET_ISCSI_STATS,
MFW_DRV_MSG_GET_RDMA_STATS, MFW_DRV_MSG_GET_RDMA_STATS,
MFW_DRV_MSG_BW_UPDATE10, MFW_DRV_MSG_FAILURE_DETECTED,
MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE, MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE,
MFW_DRV_MSG_BW_UPDATE11, MFW_DRV_MSG_CRITICAL_ERROR_OCCURRED,
MFW_DRV_MSG_RESERVED, MFW_DRV_MSG_RESERVED,
MFW_DRV_MSG_GET_TLV_REQ, MFW_DRV_MSG_GET_TLV_REQ,
MFW_DRV_MSG_OEM_CFG_UPDATE, MFW_DRV_MSG_OEM_CFG_UPDATE,
......
...@@ -762,9 +762,10 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn, ...@@ -762,9 +762,10 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn,
dst_type, dst_type,
length_cur); length_cur);
if (qed_status) { if (qed_status) {
DP_NOTICE(p_hwfn, qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_DMAE_FAIL,
"qed_dmae_execute_sub_operation Failed with error 0x%x. source_addr 0x%llx, destination addr 0x%llx, size_in_dwords 0x%x\n", "qed_dmae_execute_sub_operation Failed with error 0x%x. source_addr 0x%llx, destination addr 0x%llx, size_in_dwords 0x%x\n",
qed_status, src_addr, dst_addr, length_cur); qed_status, src_addr,
dst_addr, length_cur);
break; break;
} }
} }
...@@ -837,6 +838,41 @@ int qed_dmae_host2host(struct qed_hwfn *p_hwfn, ...@@ -837,6 +838,41 @@ int qed_dmae_host2host(struct qed_hwfn *p_hwfn,
return rc; return rc;
} }
void qed_hw_err_notify(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
enum qed_hw_err_type err_type, char *fmt, ...)
{
char buf[QED_HW_ERR_MAX_STR_SIZE];
va_list vl;
int len;
if (fmt) {
va_start(vl, fmt);
len = vsnprintf(buf, QED_HW_ERR_MAX_STR_SIZE, fmt, vl);
va_end(vl);
if (len > QED_HW_ERR_MAX_STR_SIZE - 1)
len = QED_HW_ERR_MAX_STR_SIZE - 1;
DP_NOTICE(p_hwfn, "%s", buf);
}
/* Fan failure cannot be masked by handling of another HW error */
if (p_hwfn->cdev->recov_in_prog &&
err_type != QED_HW_ERR_FAN_FAIL) {
DP_VERBOSE(p_hwfn,
NETIF_MSG_DRV,
"Recovery is in progress. Avoid notifying about HW error %d.\n",
err_type);
return;
}
qed_hw_error_occurred(p_hwfn, err_type);
if (fmt)
qed_mcp_send_raw_debug_data(p_hwfn, p_ptt, buf, len);
}
int qed_dmae_sanity(struct qed_hwfn *p_hwfn, int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, const char *phase) struct qed_ptt *p_ptt, const char *phase)
{ {
......
...@@ -315,4 +315,19 @@ int qed_init_fw_data(struct qed_dev *cdev, ...@@ -315,4 +315,19 @@ int qed_init_fw_data(struct qed_dev *cdev,
int qed_dmae_sanity(struct qed_hwfn *p_hwfn, int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, const char *phase); struct qed_ptt *p_ptt, const char *phase);
#define QED_HW_ERR_MAX_STR_SIZE 256
/**
* @brief qed_hw_err_notify - Notify upper layer driver and management FW
* about a HW error.
*
* @param p_hwfn
* @param p_ptt
* @param err_type
* @param fmt - debug data buffer to send to the MFW
* @param ... - buffer format args
*/
void qed_hw_err_notify(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
enum qed_hw_err_type err_type, char *fmt, ...);
#endif #endif
...@@ -96,6 +96,7 @@ struct aeu_invert_reg_bit { ...@@ -96,6 +96,7 @@ struct aeu_invert_reg_bit {
#define ATTENTION_BB(value) (value << ATTENTION_BB_SHIFT) #define ATTENTION_BB(value) (value << ATTENTION_BB_SHIFT)
#define ATTENTION_BB_DIFFERENT BIT(23) #define ATTENTION_BB_DIFFERENT BIT(23)
#define ATTENTION_CLEAR_ENABLE BIT(28)
unsigned int flags; unsigned int flags;
/* Callback to call if attention will be triggered */ /* Callback to call if attention will be triggered */
...@@ -363,6 +364,21 @@ static int qed_pglueb_rbc_attn_cb(struct qed_hwfn *p_hwfn) ...@@ -363,6 +364,21 @@ static int qed_pglueb_rbc_attn_cb(struct qed_hwfn *p_hwfn)
return qed_pglueb_rbc_attn_handler(p_hwfn, p_hwfn->p_dpc_ptt); return qed_pglueb_rbc_attn_handler(p_hwfn, p_hwfn->p_dpc_ptt);
} }
static int qed_fw_assertion(struct qed_hwfn *p_hwfn)
{
qed_hw_err_notify(p_hwfn, p_hwfn->p_dpc_ptt, QED_HW_ERR_FW_ASSERT,
"FW assertion!\n");
return -EINVAL;
}
static int qed_general_attention_35(struct qed_hwfn *p_hwfn)
{
DP_INFO(p_hwfn, "General attention 35!\n");
return 0;
}
#define QED_DORQ_ATTENTION_REASON_MASK (0xfffff) #define QED_DORQ_ATTENTION_REASON_MASK (0xfffff)
#define QED_DORQ_ATTENTION_OPAQUE_MASK (0xffff) #define QED_DORQ_ATTENTION_OPAQUE_MASK (0xffff)
#define QED_DORQ_ATTENTION_OPAQUE_SHIFT (0x0) #define QED_DORQ_ATTENTION_OPAQUE_SHIFT (0x0)
...@@ -605,13 +621,15 @@ static struct aeu_invert_reg aeu_descs[NUM_ATTN_REGS] = { ...@@ -605,13 +621,15 @@ static struct aeu_invert_reg aeu_descs[NUM_ATTN_REGS] = {
{ {
{ /* After Invert 4 */ { /* After Invert 4 */
{"General Attention 32", ATTENTION_SINGLE, {"General Attention 32", ATTENTION_SINGLE |
NULL, MAX_BLOCK_ID}, ATTENTION_CLEAR_ENABLE, qed_fw_assertion,
MAX_BLOCK_ID},
{"General Attention %d", {"General Attention %d",
(2 << ATTENTION_LENGTH_SHIFT) | (2 << ATTENTION_LENGTH_SHIFT) |
(33 << ATTENTION_OFFSET_SHIFT), NULL, MAX_BLOCK_ID}, (33 << ATTENTION_OFFSET_SHIFT), NULL, MAX_BLOCK_ID},
{"General Attention 35", ATTENTION_SINGLE, {"General Attention 35", ATTENTION_SINGLE |
NULL, MAX_BLOCK_ID}, ATTENTION_CLEAR_ENABLE, qed_general_attention_35,
MAX_BLOCK_ID},
{"NWS Parity", {"NWS Parity",
ATTENTION_PAR | ATTENTION_BB_DIFFERENT | ATTENTION_PAR | ATTENTION_BB_DIFFERENT |
ATTENTION_BB(AEU_INVERT_REG_SPECIAL_CNIG_0), ATTENTION_BB(AEU_INVERT_REG_SPECIAL_CNIG_0),
...@@ -927,9 +945,12 @@ qed_int_deassertion_aeu_bit(struct qed_hwfn *p_hwfn, ...@@ -927,9 +945,12 @@ qed_int_deassertion_aeu_bit(struct qed_hwfn *p_hwfn,
qed_int_attn_print(p_hwfn, p_aeu->block_index, qed_int_attn_print(p_hwfn, p_aeu->block_index,
ATTN_TYPE_INTERRUPT, !b_fatal); ATTN_TYPE_INTERRUPT, !b_fatal);
/* Reach assertion if attention is fatal */
/* If the attention is benign, no need to prevent it */ if (b_fatal)
if (!rc) qed_hw_err_notify(p_hwfn, p_hwfn->p_dpc_ptt, QED_HW_ERR_HW_ATTN,
"`%s': Fatal attention\n",
p_bit_name);
else /* If the attention is benign, no need to prevent it */
goto out; goto out;
/* Prevent this Attention from being asserted in the future */ /* Prevent this Attention from being asserted in the future */
...@@ -2349,6 +2370,11 @@ void qed_int_disable_post_isr_release(struct qed_dev *cdev) ...@@ -2349,6 +2370,11 @@ void qed_int_disable_post_isr_release(struct qed_dev *cdev)
cdev->hwfns[i].b_int_requested = false; cdev->hwfns[i].b_int_requested = false;
} }
void qed_int_attn_clr_enable(struct qed_dev *cdev, bool clr_enable)
{
cdev->attn_clr_en = clr_enable;
}
int qed_int_set_timer_res(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, int qed_int_set_timer_res(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
u8 timer_res, u16 sb_id, bool tx) u8 timer_res, u16 sb_id, bool tx)
{ {
......
...@@ -190,6 +190,17 @@ void qed_int_get_num_sbs(struct qed_hwfn *p_hwfn, ...@@ -190,6 +190,17 @@ void qed_int_get_num_sbs(struct qed_hwfn *p_hwfn,
*/ */
void qed_int_disable_post_isr_release(struct qed_dev *cdev); void qed_int_disable_post_isr_release(struct qed_dev *cdev);
/**
* @brief qed_int_attn_clr_enable - sets whether the general behavior is
* preventing attentions from being reasserted, or following the
* attributes of the specific attention.
*
* @param cdev
* @param clr_enable
*
*/
void qed_int_attn_clr_enable(struct qed_dev *cdev, bool clr_enable);
/** /**
* @brief - Doorbell Recovery handler. * @brief - Doorbell Recovery handler.
* Run doorbell recovery in case of PF overflow (and flush DORQ if * Run doorbell recovery in case of PF overflow (and flush DORQ if
......
...@@ -2468,6 +2468,39 @@ void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn) ...@@ -2468,6 +2468,39 @@ void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn)
ops->schedule_recovery_handler(cookie); ops->schedule_recovery_handler(cookie);
} }
char *qed_hw_err_type_descr[] = {
[QED_HW_ERR_FAN_FAIL] = "Fan Failure",
[QED_HW_ERR_MFW_RESP_FAIL] = "MFW Response Failure",
[QED_HW_ERR_HW_ATTN] = "HW Attention",
[QED_HW_ERR_DMAE_FAIL] = "DMAE Failure",
[QED_HW_ERR_RAMROD_FAIL] = "Ramrod Failure",
[QED_HW_ERR_FW_ASSERT] = "FW Assertion",
[QED_HW_ERR_LAST] = "Unknown",
};
void qed_hw_error_occurred(struct qed_hwfn *p_hwfn,
enum qed_hw_err_type err_type)
{
struct qed_common_cb_ops *ops = p_hwfn->cdev->protocol_ops.common;
void *cookie = p_hwfn->cdev->ops_cookie;
char *err_str;
if (err_type > QED_HW_ERR_LAST)
err_type = QED_HW_ERR_LAST;
err_str = qed_hw_err_type_descr[err_type];
DP_NOTICE(p_hwfn, "HW error occurred [%s]\n", err_str);
/* Call the HW error handler of the protocol driver.
* If it is not available - perform a minimal handling of preventing
* HW attentions from being reasserted.
*/
if (ops && ops->schedule_hw_err_handler)
ops->schedule_hw_err_handler(cookie, err_type);
else
qed_int_attn_clr_enable(p_hwfn->cdev, true);
}
static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal, static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
void *handle) void *handle)
{ {
...@@ -2689,6 +2722,7 @@ const struct qed_common_ops qed_common_ops_pass = { ...@@ -2689,6 +2722,7 @@ const struct qed_common_ops qed_common_ops_pass = {
.set_led = &qed_set_led, .set_led = &qed_set_led,
.recovery_process = &qed_recovery_process, .recovery_process = &qed_recovery_process,
.recovery_prolog = &qed_recovery_prolog, .recovery_prolog = &qed_recovery_prolog,
.attn_clr_enable = &qed_int_attn_clr_enable,
.update_drv_state = &qed_update_drv_state, .update_drv_state = &qed_update_drv_state,
.update_mac = &qed_update_mac, .update_mac = &qed_update_mac,
.update_mtu = &qed_update_mtu, .update_mtu = &qed_update_mtu,
......
...@@ -575,6 +575,8 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, ...@@ -575,6 +575,8 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
if (!QED_MB_FLAGS_IS_SET(p_mb_params, AVOID_BLOCK)) if (!QED_MB_FLAGS_IS_SET(p_mb_params, AVOID_BLOCK))
qed_mcp_cmd_set_blocking(p_hwfn, true); qed_mcp_cmd_set_blocking(p_hwfn, true);
qed_hw_err_notify(p_hwfn, p_ptt,
QED_HW_ERR_MFW_RESP_FAIL, NULL);
return -EAGAIN; return -EAGAIN;
} }
...@@ -1704,6 +1706,127 @@ static void qed_mcp_update_stag(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) ...@@ -1704,6 +1706,127 @@ static void qed_mcp_update_stag(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
&resp, &param); &resp, &param);
} }
static void qed_mcp_handle_fan_failure(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt)
{
/* A single notification should be sent to upper driver in CMT mode */
if (p_hwfn != QED_LEADING_HWFN(p_hwfn->cdev))
return;
qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_FAN_FAIL,
"Fan failure was detected on the network interface card and it's going to be shut down.\n");
}
struct qed_mdump_cmd_params {
u32 cmd;
void *p_data_src;
u8 data_src_size;
void *p_data_dst;
u8 data_dst_size;
u32 mcp_resp;
};
static int
qed_mcp_mdump_cmd(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
struct qed_mdump_cmd_params *p_mdump_cmd_params)
{
struct qed_mcp_mb_params mb_params;
int rc;
memset(&mb_params, 0, sizeof(mb_params));
mb_params.cmd = DRV_MSG_CODE_MDUMP_CMD;
mb_params.param = p_mdump_cmd_params->cmd;
mb_params.p_data_src = p_mdump_cmd_params->p_data_src;
mb_params.data_src_size = p_mdump_cmd_params->data_src_size;
mb_params.p_data_dst = p_mdump_cmd_params->p_data_dst;
mb_params.data_dst_size = p_mdump_cmd_params->data_dst_size;
rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
if (rc)
return rc;
p_mdump_cmd_params->mcp_resp = mb_params.mcp_resp;
if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_MDUMP_INVALID_CMD) {
DP_INFO(p_hwfn,
"The mdump sub command is unsupported by the MFW [mdump_cmd 0x%x]\n",
p_mdump_cmd_params->cmd);
rc = -EOPNOTSUPP;
} else if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
DP_INFO(p_hwfn,
"The mdump command is not supported by the MFW\n");
rc = -EOPNOTSUPP;
}
return rc;
}
static int qed_mcp_mdump_ack(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
struct qed_mdump_cmd_params mdump_cmd_params;
memset(&mdump_cmd_params, 0, sizeof(mdump_cmd_params));
mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_ACK;
return qed_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
}
int
qed_mcp_mdump_get_retain(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
struct mdump_retain_data_stc *p_mdump_retain)
{
struct qed_mdump_cmd_params mdump_cmd_params;
int rc;
memset(&mdump_cmd_params, 0, sizeof(mdump_cmd_params));
mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_GET_RETAIN;
mdump_cmd_params.p_data_dst = p_mdump_retain;
mdump_cmd_params.data_dst_size = sizeof(*p_mdump_retain);
rc = qed_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
if (rc)
return rc;
if (mdump_cmd_params.mcp_resp != FW_MSG_CODE_OK) {
DP_INFO(p_hwfn,
"Failed to get the mdump retained data [mcp_resp 0x%x]\n",
mdump_cmd_params.mcp_resp);
return -EINVAL;
}
return 0;
}
static void qed_mcp_handle_critical_error(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt)
{
struct mdump_retain_data_stc mdump_retain;
int rc;
/* In CMT mode - no need for more than a single acknowledgment to the
* MFW, and no more than a single notification to the upper driver.
*/
if (p_hwfn != QED_LEADING_HWFN(p_hwfn->cdev))
return;
rc = qed_mcp_mdump_get_retain(p_hwfn, p_ptt, &mdump_retain);
if (rc == 0 && mdump_retain.valid)
DP_NOTICE(p_hwfn,
"The MFW notified that a critical error occurred in the device [epoch 0x%08x, pf 0x%x, status 0x%08x]\n",
mdump_retain.epoch,
mdump_retain.pf, mdump_retain.status);
else
DP_NOTICE(p_hwfn,
"The MFW notified that a critical error occurred in the device\n");
DP_NOTICE(p_hwfn,
"Acknowledging the notification to not allow the MFW crash dump [driver debug data collection is preferable]\n");
qed_mcp_mdump_ack(p_hwfn, p_ptt);
qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_HW_ATTN, NULL);
}
void qed_mcp_read_ufp_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) void qed_mcp_read_ufp_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{ {
struct public_func shmem_info; struct public_func shmem_info;
...@@ -1850,6 +1973,12 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn, ...@@ -1850,6 +1973,12 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
case MFW_DRV_MSG_S_TAG_UPDATE: case MFW_DRV_MSG_S_TAG_UPDATE:
qed_mcp_update_stag(p_hwfn, p_ptt); qed_mcp_update_stag(p_hwfn, p_ptt);
break; break;
case MFW_DRV_MSG_FAILURE_DETECTED:
qed_mcp_handle_fan_failure(p_hwfn, p_ptt);
break;
case MFW_DRV_MSG_CRITICAL_ERROR_OCCURRED:
qed_mcp_handle_critical_error(p_hwfn, p_ptt);
break;
case MFW_DRV_MSG_GET_TLV_REQ: case MFW_DRV_MSG_GET_TLV_REQ:
qed_mfw_tlv_req(p_hwfn); qed_mfw_tlv_req(p_hwfn);
break; break;
...@@ -3819,3 +3948,127 @@ int qed_mcp_nvm_set_cfg(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, ...@@ -3819,3 +3948,127 @@ int qed_mcp_nvm_set_cfg(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
DRV_MSG_CODE_SET_NVM_CFG_OPTION, DRV_MSG_CODE_SET_NVM_CFG_OPTION,
mb_param, &resp, &param, len, (u32 *)p_buf); mb_param, &resp, &param, len, (u32 *)p_buf);
} }
#define QED_MCP_DBG_DATA_MAX_SIZE MCP_DRV_NVM_BUF_LEN
#define QED_MCP_DBG_DATA_MAX_HEADER_SIZE sizeof(u32)
#define QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE \
(QED_MCP_DBG_DATA_MAX_SIZE - QED_MCP_DBG_DATA_MAX_HEADER_SIZE)
static int
__qed_mcp_send_debug_data(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, u8 *p_buf, u8 size)
{
struct qed_mcp_mb_params mb_params;
int rc;
if (size > QED_MCP_DBG_DATA_MAX_SIZE) {
DP_ERR(p_hwfn,
"Debug data size is %d while it should not exceed %d\n",
size, QED_MCP_DBG_DATA_MAX_SIZE);
return -EINVAL;
}
memset(&mb_params, 0, sizeof(mb_params));
mb_params.cmd = DRV_MSG_CODE_DEBUG_DATA_SEND;
SET_MFW_FIELD(mb_params.param, DRV_MSG_CODE_DEBUG_DATA_SEND_SIZE, size);
mb_params.p_data_src = p_buf;
mb_params.data_src_size = size;
rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
if (rc)
return rc;
if (mb_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
DP_INFO(p_hwfn,
"The DEBUG_DATA_SEND command is unsupported by the MFW\n");
return -EOPNOTSUPP;
} else if (mb_params.mcp_resp == (u32)FW_MSG_CODE_DEBUG_NOT_ENABLED) {
DP_INFO(p_hwfn, "The DEBUG_DATA_SEND command is not enabled\n");
return -EBUSY;
} else if (mb_params.mcp_resp != (u32)FW_MSG_CODE_DEBUG_DATA_SEND_OK) {
DP_NOTICE(p_hwfn,
"Failed to send debug data to the MFW [resp 0x%08x]\n",
mb_params.mcp_resp);
return -EINVAL;
}
return 0;
}
enum qed_mcp_dbg_data_type {
QED_MCP_DBG_DATA_TYPE_RAW,
};
/* Header format: [31:28] PFID, [27:20] flags, [19:12] type, [11:0] S/N */
#define QED_MCP_DBG_DATA_HDR_SN_OFFSET 0
#define QED_MCP_DBG_DATA_HDR_SN_MASK 0x00000fff
#define QED_MCP_DBG_DATA_HDR_TYPE_OFFSET 12
#define QED_MCP_DBG_DATA_HDR_TYPE_MASK 0x000ff000
#define QED_MCP_DBG_DATA_HDR_FLAGS_OFFSET 20
#define QED_MCP_DBG_DATA_HDR_FLAGS_MASK 0x0ff00000
#define QED_MCP_DBG_DATA_HDR_PF_OFFSET 28
#define QED_MCP_DBG_DATA_HDR_PF_MASK 0xf0000000
#define QED_MCP_DBG_DATA_HDR_FLAGS_FIRST 0x1
#define QED_MCP_DBG_DATA_HDR_FLAGS_LAST 0x2
static int
qed_mcp_send_debug_data(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
enum qed_mcp_dbg_data_type type, u8 *p_buf, u32 size)
{
u8 raw_data[QED_MCP_DBG_DATA_MAX_SIZE], *p_tmp_buf = p_buf;
u32 tmp_size = size, *p_header, *p_payload;
u8 flags = 0;
u16 seq;
int rc;
p_header = (u32 *)raw_data;
p_payload = (u32 *)(raw_data + QED_MCP_DBG_DATA_MAX_HEADER_SIZE);
seq = (u16)atomic_inc_return(&p_hwfn->mcp_info->dbg_data_seq);
/* First chunk is marked as 'first' */
flags |= QED_MCP_DBG_DATA_HDR_FLAGS_FIRST;
*p_header = 0;
SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_SN, seq);
SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_TYPE, type);
SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_FLAGS, flags);
SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_PF, p_hwfn->abs_pf_id);
while (tmp_size > QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE) {
memcpy(p_payload, p_tmp_buf, QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE);
rc = __qed_mcp_send_debug_data(p_hwfn, p_ptt, raw_data,
QED_MCP_DBG_DATA_MAX_SIZE);
if (rc)
return rc;
/* Clear the 'first' marking after sending the first chunk */
if (p_tmp_buf == p_buf) {
flags &= ~QED_MCP_DBG_DATA_HDR_FLAGS_FIRST;
SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_FLAGS,
flags);
}
p_tmp_buf += QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE;
tmp_size -= QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE;
}
/* Last chunk is marked as 'last' */
flags |= QED_MCP_DBG_DATA_HDR_FLAGS_LAST;
SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_FLAGS, flags);
memcpy(p_payload, p_tmp_buf, tmp_size);
/* Casting the left size to u8 is ok since at this point it is <= 32 */
return __qed_mcp_send_debug_data(p_hwfn, p_ptt, raw_data,
(u8)(QED_MCP_DBG_DATA_MAX_HEADER_SIZE +
tmp_size));
}
int
qed_mcp_send_raw_debug_data(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, u8 *p_buf, u32 size)
{
return qed_mcp_send_debug_data(p_hwfn, p_ptt,
QED_MCP_DBG_DATA_TYPE_RAW, p_buf, size);
}
...@@ -685,6 +685,18 @@ int qed_mcp_bist_nvm_get_image_att(struct qed_hwfn *p_hwfn, ...@@ -685,6 +685,18 @@ int qed_mcp_bist_nvm_get_image_att(struct qed_hwfn *p_hwfn,
*/ */
int qed_mfw_process_tlv_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); int qed_mfw_process_tlv_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
/**
* @brief Send raw debug data to the MFW
*
* @param p_hwfn
* @param p_ptt
* @param p_buf - raw debug data buffer
* @param size - buffer size
*/
int
qed_mcp_send_raw_debug_data(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, u8 *p_buf, u32 size);
/* Using hwfn number (and not pf_num) is required since in CMT mode, /* Using hwfn number (and not pf_num) is required since in CMT mode,
* same pf_num may be used by two different hwfn * same pf_num may be used by two different hwfn
* TODO - this shouldn't really be in .h file, but until all fields * TODO - this shouldn't really be in .h file, but until all fields
...@@ -731,6 +743,9 @@ struct qed_mcp_info { ...@@ -731,6 +743,9 @@ struct qed_mcp_info {
/* Capabilties negotiated with the MFW */ /* Capabilties negotiated with the MFW */
u32 capabilities; u32 capabilities;
/* S/N for debug data mailbox commands */
atomic_t dbg_data_seq;
}; };
struct qed_mcp_mb_params { struct qed_mcp_mb_params {
...@@ -1001,6 +1016,19 @@ int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn, ...@@ -1001,6 +1016,19 @@ int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn,
int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn, int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, u32 mask_parities); struct qed_ptt *p_ptt, u32 mask_parities);
/* @brief - Gets the mdump retained data from the MFW.
*
* @param p_hwfn
* @param p_ptt
* @param p_mdump_retain
*
* @param return 0 upon success.
*/
int
qed_mcp_mdump_get_retain(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
struct mdump_retain_data_stc *p_mdump_retain);
/** /**
* @brief - Sets the MFW's max value for the given resource * @brief - Sets the MFW's max value for the given resource
* *
......
...@@ -160,12 +160,16 @@ static int qed_spq_block(struct qed_hwfn *p_hwfn, ...@@ -160,12 +160,16 @@ static int qed_spq_block(struct qed_hwfn *p_hwfn,
return 0; return 0;
} }
err: err:
DP_NOTICE(p_hwfn, p_ptt = qed_ptt_acquire(p_hwfn);
if (!p_ptt)
return -EBUSY;
qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_RAMROD_FAIL,
"Ramrod is stuck [CID %08x cmd %02x protocol %02x echo %04x]\n", "Ramrod is stuck [CID %08x cmd %02x protocol %02x echo %04x]\n",
le32_to_cpu(p_ent->elem.hdr.cid), le32_to_cpu(p_ent->elem.hdr.cid),
p_ent->elem.hdr.cmd_id, p_ent->elem.hdr.cmd_id,
p_ent->elem.hdr.protocol_id, p_ent->elem.hdr.protocol_id,
le16_to_cpu(p_ent->elem.hdr.echo)); le16_to_cpu(p_ent->elem.hdr.echo));
qed_ptt_release(p_hwfn, p_ptt);
return -EBUSY; return -EBUSY;
} }
......
...@@ -278,6 +278,14 @@ struct qede_dev { ...@@ -278,6 +278,14 @@ struct qede_dev {
struct qede_rdma_dev rdma_info; struct qede_rdma_dev rdma_info;
struct bpf_prog *xdp_prog; struct bpf_prog *xdp_prog;
unsigned long err_flags;
#define QEDE_ERR_IS_HANDLED 31
#define QEDE_ERR_ATTN_CLR_EN 0
#define QEDE_ERR_GET_DBG_INFO 1
#define QEDE_ERR_IS_RECOVERABLE 2
#define QEDE_ERR_WARN 3
struct qede_dump_info dump_info; struct qede_dump_info dump_info;
}; };
...@@ -485,12 +493,15 @@ struct qede_fastpath { ...@@ -485,12 +493,15 @@ struct qede_fastpath {
#define QEDE_SP_RECOVERY 0 #define QEDE_SP_RECOVERY 0
#define QEDE_SP_RX_MODE 1 #define QEDE_SP_RX_MODE 1
#define QEDE_SP_RSVD1 2
#define QEDE_SP_RSVD2 3
#define QEDE_SP_HW_ERR 4
#define QEDE_SP_ARFS_CONFIG 5
#define QEDE_SP_AER 7 #define QEDE_SP_AER 7
#ifdef CONFIG_RFS_ACCEL #ifdef CONFIG_RFS_ACCEL
int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
u16 rxq_index, u32 flow_id); u16 rxq_index, u32 flow_id);
#define QEDE_SP_ARFS_CONFIG 4
#define QEDE_SP_TASK_POLL_DELAY (5 * HZ) #define QEDE_SP_TASK_POLL_DELAY (5 * HZ)
#endif #endif
...@@ -522,7 +533,6 @@ u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb, ...@@ -522,7 +533,6 @@ u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb,
netdev_features_t qede_features_check(struct sk_buff *skb, netdev_features_t qede_features_check(struct sk_buff *skb,
struct net_device *dev, struct net_device *dev,
netdev_features_t features); netdev_features_t features);
void qede_tx_log_print(struct qede_dev *edev, struct qede_fastpath *fp);
int qede_alloc_rx_buffer(struct qede_rx_queue *rxq, bool allow_lazy); int qede_alloc_rx_buffer(struct qede_rx_queue *rxq, bool allow_lazy);
int qede_free_tx_pkt(struct qede_dev *edev, int qede_free_tx_pkt(struct qede_dev *edev,
struct qede_tx_queue *txq, int *len); struct qede_tx_queue *txq, int *len);
......
...@@ -190,12 +190,14 @@ static const struct { ...@@ -190,12 +190,14 @@ static const struct {
enum { enum {
QEDE_PRI_FLAG_CMT, QEDE_PRI_FLAG_CMT,
QEDE_PRI_FLAG_SMART_AN_SUPPORT, /* MFW supports SmartAN */ QEDE_PRI_FLAG_SMART_AN_SUPPORT, /* MFW supports SmartAN */
QEDE_PRI_FLAG_RECOVER_ON_ERROR,
QEDE_PRI_FLAG_LEN, QEDE_PRI_FLAG_LEN,
}; };
static const char qede_private_arr[QEDE_PRI_FLAG_LEN][ETH_GSTRING_LEN] = { static const char qede_private_arr[QEDE_PRI_FLAG_LEN][ETH_GSTRING_LEN] = {
"Coupled-Function", "Coupled-Function",
"SmartAN capable", "SmartAN capable",
"Recover on error",
}; };
enum qede_ethtool_tests { enum qede_ethtool_tests {
...@@ -417,9 +419,30 @@ static u32 qede_get_priv_flags(struct net_device *dev) ...@@ -417,9 +419,30 @@ static u32 qede_get_priv_flags(struct net_device *dev)
if (edev->dev_info.common.smart_an) if (edev->dev_info.common.smart_an)
flags |= BIT(QEDE_PRI_FLAG_SMART_AN_SUPPORT); flags |= BIT(QEDE_PRI_FLAG_SMART_AN_SUPPORT);
if (edev->err_flags & BIT(QEDE_ERR_IS_RECOVERABLE))
flags |= BIT(QEDE_PRI_FLAG_RECOVER_ON_ERROR);
return flags; return flags;
} }
static int qede_set_priv_flags(struct net_device *dev, u32 flags)
{
struct qede_dev *edev = netdev_priv(dev);
u32 cflags = qede_get_priv_flags(dev);
u32 dflags = flags ^ cflags;
/* can only change RECOVER_ON_ERROR flag */
if (dflags & ~BIT(QEDE_PRI_FLAG_RECOVER_ON_ERROR))
return -EINVAL;
if (flags & BIT(QEDE_PRI_FLAG_RECOVER_ON_ERROR))
set_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags);
else
clear_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags);
return 0;
}
struct qede_link_mode_mapping { struct qede_link_mode_mapping {
u32 qed_link_mode; u32 qed_link_mode;
u32 ethtool_link_mode; u32 ethtool_link_mode;
...@@ -2098,6 +2121,7 @@ static const struct ethtool_ops qede_ethtool_ops = { ...@@ -2098,6 +2121,7 @@ static const struct ethtool_ops qede_ethtool_ops = {
.set_phys_id = qede_set_phys_id, .set_phys_id = qede_set_phys_id,
.get_ethtool_stats = qede_get_ethtool_stats, .get_ethtool_stats = qede_get_ethtool_stats,
.get_priv_flags = qede_get_priv_flags, .get_priv_flags = qede_get_priv_flags,
.set_priv_flags = qede_set_priv_flags,
.get_sset_count = qede_get_sset_count, .get_sset_count = qede_get_sset_count,
.get_rxnfc = qede_get_rxnfc, .get_rxnfc = qede_get_rxnfc,
.set_rxnfc = qede_set_rxnfc, .set_rxnfc = qede_set_rxnfc,
......
...@@ -139,10 +139,12 @@ static void qede_shutdown(struct pci_dev *pdev); ...@@ -139,10 +139,12 @@ static void qede_shutdown(struct pci_dev *pdev);
static void qede_link_update(void *dev, struct qed_link_output *link); static void qede_link_update(void *dev, struct qed_link_output *link);
static void qede_schedule_recovery_handler(void *dev); static void qede_schedule_recovery_handler(void *dev);
static void qede_recovery_handler(struct qede_dev *edev); static void qede_recovery_handler(struct qede_dev *edev);
static void qede_schedule_hw_err_handler(void *dev,
enum qed_hw_err_type err_type);
static void qede_get_eth_tlv_data(void *edev, void *data); static void qede_get_eth_tlv_data(void *edev, void *data);
static void qede_get_generic_tlv_data(void *edev, static void qede_get_generic_tlv_data(void *edev,
struct qed_generic_tlvs *data); struct qed_generic_tlvs *data);
static void qede_generic_hw_err_handler(struct qede_dev *edev);
#ifdef CONFIG_QED_SRIOV #ifdef CONFIG_QED_SRIOV
static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos, static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos,
__be16 vlan_proto) __be16 vlan_proto)
...@@ -230,6 +232,7 @@ static struct qed_eth_cb_ops qede_ll_ops = { ...@@ -230,6 +232,7 @@ static struct qed_eth_cb_ops qede_ll_ops = {
#endif #endif
.link_update = qede_link_update, .link_update = qede_link_update,
.schedule_recovery_handler = qede_schedule_recovery_handler, .schedule_recovery_handler = qede_schedule_recovery_handler,
.schedule_hw_err_handler = qede_schedule_hw_err_handler,
.get_generic_tlv_data = qede_get_generic_tlv_data, .get_generic_tlv_data = qede_get_generic_tlv_data,
.get_protocol_tlv_data = qede_get_eth_tlv_data, .get_protocol_tlv_data = qede_get_eth_tlv_data,
}, },
...@@ -536,6 +539,51 @@ static int qede_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) ...@@ -536,6 +539,51 @@ static int qede_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return 0; return 0;
} }
static void qede_tx_log_print(struct qede_dev *edev, struct qede_tx_queue *txq)
{
DP_NOTICE(edev,
"Txq[%d]: FW cons [host] %04x, SW cons %04x, SW prod %04x [Jiffies %lu]\n",
txq->index, le16_to_cpu(*txq->hw_cons_ptr),
qed_chain_get_cons_idx(&txq->tx_pbl),
qed_chain_get_prod_idx(&txq->tx_pbl),
jiffies);
}
static void qede_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct qede_dev *edev = netdev_priv(dev);
struct qede_tx_queue *txq;
int cos;
netif_carrier_off(dev);
DP_NOTICE(edev, "TX timeout on queue %u!\n", txqueue);
if (!(edev->fp_array[txqueue].type & QEDE_FASTPATH_TX))
return;
for_each_cos_in_txq(edev, cos) {
txq = &edev->fp_array[txqueue].txq[cos];
if (qed_chain_get_cons_idx(&txq->tx_pbl) !=
qed_chain_get_prod_idx(&txq->tx_pbl))
qede_tx_log_print(edev, txq);
}
if (IS_VF(edev))
return;
if (test_and_set_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags) ||
edev->state == QEDE_STATE_RECOVERY) {
DP_INFO(edev,
"Avoid handling a Tx timeout while another HW error is being handled\n");
return;
}
set_bit(QEDE_ERR_GET_DBG_INFO, &edev->err_flags);
set_bit(QEDE_SP_HW_ERR, &edev->sp_flags);
schedule_delayed_work(&edev->sp_task, 0);
}
static int qede_setup_tc(struct net_device *ndev, u8 num_tc) static int qede_setup_tc(struct net_device *ndev, u8 num_tc)
{ {
struct qede_dev *edev = netdev_priv(ndev); struct qede_dev *edev = netdev_priv(ndev);
...@@ -623,6 +671,7 @@ static const struct net_device_ops qede_netdev_ops = { ...@@ -623,6 +671,7 @@ static const struct net_device_ops qede_netdev_ops = {
.ndo_validate_addr = eth_validate_addr, .ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = qede_change_mtu, .ndo_change_mtu = qede_change_mtu,
.ndo_do_ioctl = qede_ioctl, .ndo_do_ioctl = qede_ioctl,
.ndo_tx_timeout = qede_tx_timeout,
#ifdef CONFIG_QED_SRIOV #ifdef CONFIG_QED_SRIOV
.ndo_set_vf_mac = qede_set_vf_mac, .ndo_set_vf_mac = qede_set_vf_mac,
.ndo_set_vf_vlan = qede_set_vf_vlan, .ndo_set_vf_vlan = qede_set_vf_vlan,
...@@ -1009,6 +1058,8 @@ static void qede_sp_task(struct work_struct *work) ...@@ -1009,6 +1058,8 @@ static void qede_sp_task(struct work_struct *work)
qede_process_arfs_filters(edev, false); qede_process_arfs_filters(edev, false);
} }
#endif #endif
if (test_and_clear_bit(QEDE_SP_HW_ERR, &edev->sp_flags))
qede_generic_hw_err_handler(edev);
__qede_unlock(edev); __qede_unlock(edev);
if (test_and_clear_bit(QEDE_SP_AER, &edev->sp_flags)) { if (test_and_clear_bit(QEDE_SP_AER, &edev->sp_flags)) {
...@@ -2509,6 +2560,100 @@ static void qede_recovery_handler(struct qede_dev *edev) ...@@ -2509,6 +2560,100 @@ static void qede_recovery_handler(struct qede_dev *edev)
qede_recovery_failed(edev); qede_recovery_failed(edev);
} }
static void qede_atomic_hw_err_handler(struct qede_dev *edev)
{
struct qed_dev *cdev = edev->cdev;
DP_NOTICE(edev,
"Generic non-sleepable HW error handling started - err_flags 0x%lx\n",
edev->err_flags);
/* Get a call trace of the flow that led to the error */
WARN_ON(test_bit(QEDE_ERR_WARN, &edev->err_flags));
/* Prevent HW attentions from being reasserted */
if (test_bit(QEDE_ERR_ATTN_CLR_EN, &edev->err_flags))
edev->ops->common->attn_clr_enable(cdev, true);
DP_NOTICE(edev, "Generic non-sleepable HW error handling is done\n");
}
static void qede_generic_hw_err_handler(struct qede_dev *edev)
{
struct qed_dev *cdev = edev->cdev;
DP_NOTICE(edev,
"Generic sleepable HW error handling started - err_flags 0x%lx\n",
edev->err_flags);
/* Trigger a recovery process.
* This is placed in the sleep requiring section just to make
* sure it is the last one, and that all the other operations
* were completed.
*/
if (test_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags))
edev->ops->common->recovery_process(cdev);
clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags);
DP_NOTICE(edev, "Generic sleepable HW error handling is done\n");
}
static void qede_set_hw_err_flags(struct qede_dev *edev,
enum qed_hw_err_type err_type)
{
unsigned long err_flags = 0;
switch (err_type) {
case QED_HW_ERR_DMAE_FAIL:
set_bit(QEDE_ERR_WARN, &err_flags);
fallthrough;
case QED_HW_ERR_MFW_RESP_FAIL:
case QED_HW_ERR_HW_ATTN:
case QED_HW_ERR_RAMROD_FAIL:
case QED_HW_ERR_FW_ASSERT:
set_bit(QEDE_ERR_ATTN_CLR_EN, &err_flags);
set_bit(QEDE_ERR_GET_DBG_INFO, &err_flags);
break;
default:
DP_NOTICE(edev, "Unexpected HW error [%d]\n", err_type);
break;
}
edev->err_flags |= err_flags;
}
static void qede_schedule_hw_err_handler(void *dev,
enum qed_hw_err_type err_type)
{
struct qede_dev *edev = dev;
/* Fan failure cannot be masked by handling of another HW error or by a
* concurrent recovery process.
*/
if ((test_and_set_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags) ||
edev->state == QEDE_STATE_RECOVERY) &&
err_type != QED_HW_ERR_FAN_FAIL) {
DP_INFO(edev,
"Avoid scheduling an error handling while another HW error is being handled\n");
return;
}
if (err_type >= QED_HW_ERR_LAST) {
DP_NOTICE(edev, "Unknown HW error [%d]\n", err_type);
clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags);
return;
}
qede_set_hw_err_flags(edev, err_type);
qede_atomic_hw_err_handler(edev);
set_bit(QEDE_SP_HW_ERR, &edev->sp_flags);
schedule_delayed_work(&edev->sp_task, 0);
DP_INFO(edev, "Scheduled a error handler [err_type %d]\n", err_type);
}
static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq) static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq)
{ {
struct netdev_queue *netdev_txq; struct netdev_queue *netdev_txq;
......
...@@ -607,6 +607,16 @@ struct qed_sb_info { ...@@ -607,6 +607,16 @@ struct qed_sb_info {
struct qed_dev *cdev; struct qed_dev *cdev;
}; };
enum qed_hw_err_type {
QED_HW_ERR_FAN_FAIL,
QED_HW_ERR_MFW_RESP_FAIL,
QED_HW_ERR_HW_ATTN,
QED_HW_ERR_DMAE_FAIL,
QED_HW_ERR_RAMROD_FAIL,
QED_HW_ERR_FW_ASSERT,
QED_HW_ERR_LAST,
};
enum qed_dev_type { enum qed_dev_type {
QED_DEV_TYPE_BB, QED_DEV_TYPE_BB,
QED_DEV_TYPE_AH, QED_DEV_TYPE_AH,
...@@ -811,9 +821,10 @@ enum qed_nvm_flash_cmd { ...@@ -811,9 +821,10 @@ enum qed_nvm_flash_cmd {
struct qed_common_cb_ops { struct qed_common_cb_ops {
void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc); void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc);
void (*link_update)(void *dev, void (*link_update)(void *dev, struct qed_link_output *link);
struct qed_link_output *link);
void (*schedule_recovery_handler)(void *dev); void (*schedule_recovery_handler)(void *dev);
void (*schedule_hw_err_handler)(void *dev,
enum qed_hw_err_type err_type);
void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type); void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type);
void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data); void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data);
void (*get_protocol_tlv_data)(void *dev, void *data); void (*get_protocol_tlv_data)(void *dev, void *data);
...@@ -1034,6 +1045,15 @@ struct qed_common_ops { ...@@ -1034,6 +1045,15 @@ struct qed_common_ops {
*/ */
int (*set_led)(struct qed_dev *cdev, int (*set_led)(struct qed_dev *cdev,
enum qed_led_mode mode); enum qed_led_mode mode);
/**
* @brief attn_clr_enable - Prevent attentions from being reasserted
*
* @param cdev
* @param clr_enable
*/
void (*attn_clr_enable)(struct qed_dev *cdev, bool clr_enable);
/** /**
* @brief db_recovery_add - add doorbell information to the doorbell * @brief db_recovery_add - add doorbell information to the doorbell
* recovery mechanism. * recovery mechanism.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment