Commit 8b09be5f authored by Yuval Mintz's avatar Yuval Mintz Committed by David S. Miller

bnx2x: Revising locking scheme for MAC configuration

On very rare occasions, repeated load/unload stress test in the presence of
our storage driver (bnx2i/bnx2fc) causes a kernel panic in bnx2x code
(NULL pointer dereference). Stack traces indicate the issue happens during MAC
configuration; thorough code review showed that indeed several races exist
in which one thread can iterate over the list of configured MACs while another
deletes entries from the same list.

This patch adds a varient on the single-writer/Multiple-reader lock mechanism -
It utilizes an already exsiting bottom-half lock, using it so that Whenever
a writer is unable to continue due to the existence of another writer/reader,
it pends its request for future deliverance.
The writer / last readers will check for the existence of such requests and
perform them instead of the original initiator.
This prevents the writer from having to sleep while waiting for the lock
to be accessible, which might cause deadlocks given the locks already
held by the writer.

Another result of this patch is that setting of Rx Mode is now made in
sleepable context - Setting of Rx Mode is made under a bottom-half lock, which
was always nontrivial for the bnx2x driver, as the HW/FW configuration requires
wait for completions.
Since sleep was impossible (due to the sleepless-context), various mechanisms
were utilized to prevent the calling thread from sleep, but the truth was that
when the caller thread (i.e, the one calling ndo_set_rx_mode()) returned, the
Rx mode was still not set in HW/FW.

bnx2x_set_rx_mode() will now overtly schedule for the Rx changes to be
configured by the sp_rtnl_task which hold the RTNL lock and is sleepable
context.
Signed-off-by: default avatarYuval Mintz <yuvalmin@broadcom.com>
Signed-off-by: default avatarAriel Elior <ariele@broadcom.com>
Signed-off-by: default avatarEilon Greenstein <eilong@broadcom.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 4beac029
......@@ -1331,7 +1331,7 @@ enum {
BNX2X_SP_RTNL_ENABLE_SRIOV,
BNX2X_SP_RTNL_VFPF_MCAST,
BNX2X_SP_RTNL_VFPF_CHANNEL_DOWN,
BNX2X_SP_RTNL_VFPF_STORM_RX_MODE,
BNX2X_SP_RTNL_RX_MODE,
BNX2X_SP_RTNL_HYPERVISOR_VLAN,
};
......
......@@ -2060,7 +2060,11 @@ void bnx2x_squeeze_objects(struct bnx2x *bp)
rparam.mcast_obj = &bp->mcast_obj;
__set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
/* Add a DEL command... */
/* Add a DEL command... - Since we're doing a driver cleanup only,
* we take a lock surrounding both the initial send and the CONTs,
* as we don't want a true completion to disrupt us in the middle.
*/
netif_addr_lock_bh(bp->dev);
rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL);
if (rc < 0)
BNX2X_ERR("Failed to add a new DEL command to a multi-cast object: %d\n",
......@@ -2072,11 +2076,13 @@ void bnx2x_squeeze_objects(struct bnx2x *bp)
if (rc < 0) {
BNX2X_ERR("Failed to clean multi-cast object: %d\n",
rc);
netif_addr_unlock_bh(bp->dev);
return;
}
rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_CONT);
}
netif_addr_unlock_bh(bp->dev);
}
#ifndef BNX2X_STOP_ON_ERROR
......@@ -2432,9 +2438,7 @@ int bnx2x_load_cnic(struct bnx2x *bp)
}
/* Initialize Rx filter. */
netif_addr_lock_bh(bp->dev);
bnx2x_set_rx_mode(bp->dev);
netif_addr_unlock_bh(bp->dev);
bnx2x_set_rx_mode_inner(bp);
/* re-read iscsi info */
bnx2x_get_iscsi_info(bp);
......@@ -2704,9 +2708,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
/* Start fast path */
/* Initialize Rx filter. */
netif_addr_lock_bh(bp->dev);
bnx2x_set_rx_mode(bp->dev);
netif_addr_unlock_bh(bp->dev);
bnx2x_set_rx_mode_inner(bp);
/* Start the Tx */
switch (load_mode) {
......
......@@ -418,6 +418,7 @@ int bnx2x_set_eth_mac(struct bnx2x *bp, bool set);
* netif_addr_lock_bh()
*/
void bnx2x_set_rx_mode(struct net_device *dev);
void bnx2x_set_rx_mode_inner(struct bnx2x *bp);
/**
* bnx2x_set_storm_rx_mode - configure MAC filtering rules in a FW.
......
......@@ -9628,11 +9628,9 @@ static void bnx2x_sp_rtnl_task(struct work_struct *work)
}
}
if (test_and_clear_bit(BNX2X_SP_RTNL_VFPF_STORM_RX_MODE,
&bp->sp_rtnl_state)) {
DP(BNX2X_MSG_SP,
"sending set storm rx mode vf pf channel message from rtnl sp-task\n");
bnx2x_vfpf_storm_rx_mode(bp);
if (test_and_clear_bit(BNX2X_SP_RTNL_RX_MODE, &bp->sp_rtnl_state)) {
DP(BNX2X_MSG_SP, "Handling Rx Mode setting\n");
bnx2x_set_rx_mode_inner(bp);
}
if (test_and_clear_bit(BNX2X_SP_RTNL_HYPERVISOR_VLAN,
......@@ -11849,34 +11847,48 @@ static int bnx2x_set_mc_list(struct bnx2x *bp)
void bnx2x_set_rx_mode(struct net_device *dev)
{
struct bnx2x *bp = netdev_priv(dev);
u32 rx_mode = BNX2X_RX_MODE_NORMAL;
if (bp->state != BNX2X_STATE_OPEN) {
DP(NETIF_MSG_IFUP, "state is %x, returning\n", bp->state);
return;
} else {
/* Schedule an SP task to handle rest of change */
DP(NETIF_MSG_IFUP, "Scheduling an Rx mode change\n");
smp_mb__before_clear_bit();
set_bit(BNX2X_SP_RTNL_RX_MODE, &bp->sp_rtnl_state);
smp_mb__after_clear_bit();
schedule_delayed_work(&bp->sp_rtnl_task, 0);
}
}
void bnx2x_set_rx_mode_inner(struct bnx2x *bp)
{
u32 rx_mode = BNX2X_RX_MODE_NORMAL;
DP(NETIF_MSG_IFUP, "dev->flags = %x\n", bp->dev->flags);
if (dev->flags & IFF_PROMISC)
netif_addr_lock_bh(bp->dev);
if (bp->dev->flags & IFF_PROMISC) {
rx_mode = BNX2X_RX_MODE_PROMISC;
else if ((dev->flags & IFF_ALLMULTI) ||
((netdev_mc_count(dev) > BNX2X_MAX_MULTICAST) &&
CHIP_IS_E1(bp)))
} else if ((bp->dev->flags & IFF_ALLMULTI) ||
((netdev_mc_count(bp->dev) > BNX2X_MAX_MULTICAST) &&
CHIP_IS_E1(bp))) {
rx_mode = BNX2X_RX_MODE_ALLMULTI;
else {
} else {
if (IS_PF(bp)) {
/* some multicasts */
if (bnx2x_set_mc_list(bp) < 0)
rx_mode = BNX2X_RX_MODE_ALLMULTI;
/* release bh lock, as bnx2x_set_uc_list might sleep */
netif_addr_unlock_bh(bp->dev);
if (bnx2x_set_uc_list(bp) < 0)
rx_mode = BNX2X_RX_MODE_PROMISC;
netif_addr_lock_bh(bp->dev);
} else {
/* configuring mcast to a vf involves sleeping (when we
* wait for the pf's response). Since this function is
* called from non sleepable context we must schedule
* a work item for this purpose
* wait for the pf's response).
*/
smp_mb__before_clear_bit();
set_bit(BNX2X_SP_RTNL_VFPF_MCAST,
......@@ -11894,22 +11906,20 @@ void bnx2x_set_rx_mode(struct net_device *dev)
/* Schedule the rx_mode command */
if (test_bit(BNX2X_FILTER_RX_MODE_PENDING, &bp->sp_state)) {
set_bit(BNX2X_FILTER_RX_MODE_SCHED, &bp->sp_state);
netif_addr_unlock_bh(bp->dev);
return;
}
if (IS_PF(bp)) {
bnx2x_set_storm_rx_mode(bp);
netif_addr_unlock_bh(bp->dev);
} else {
/* configuring rx mode to storms in a vf involves sleeping (when
* we wait for the pf's response). Since this function is
* called from non sleepable context we must schedule
* a work item for this purpose
/* VF will need to request the PF to make this change, and so
* the VF needs to release the bottom-half lock prior to the
* request (as it will likely require sleep on the VF side)
*/
smp_mb__before_clear_bit();
set_bit(BNX2X_SP_RTNL_VFPF_STORM_RX_MODE,
&bp->sp_rtnl_state);
smp_mb__after_clear_bit();
schedule_delayed_work(&bp->sp_rtnl_task, 0);
netif_addr_unlock_bh(bp->dev);
bnx2x_vfpf_storm_rx_mode(bp);
}
}
......
......@@ -285,6 +285,12 @@ struct bnx2x_vlan_mac_obj {
* entries.
*/
struct list_head head;
/* Implement a simple reader/writer lock on the head list.
* all these fields should only be accessed under the exe_queue lock
*/
u8 head_reader; /* Num. of readers accessing head list */
bool head_exe_request; /* Pending execution request. */
unsigned long saved_ramrod_flags; /* Ramrods of pending execution */
/* TODO: Add it's initialization in the init functions */
struct bnx2x_exe_queue_obj exe_queue;
......@@ -1302,8 +1308,16 @@ void bnx2x_init_vlan_mac_obj(struct bnx2x *bp,
struct bnx2x_credit_pool_obj *macs_pool,
struct bnx2x_credit_pool_obj *vlans_pool);
int bnx2x_vlan_mac_h_read_lock(struct bnx2x *bp,
struct bnx2x_vlan_mac_obj *o);
void bnx2x_vlan_mac_h_read_unlock(struct bnx2x *bp,
struct bnx2x_vlan_mac_obj *o);
int bnx2x_vlan_mac_h_write_lock(struct bnx2x *bp,
struct bnx2x_vlan_mac_obj *o);
void bnx2x_vlan_mac_h_write_unlock(struct bnx2x *bp,
struct bnx2x_vlan_mac_obj *o);
int bnx2x_config_vlan_mac(struct bnx2x *bp,
struct bnx2x_vlan_mac_ramrod_params *p);
struct bnx2x_vlan_mac_ramrod_params *p);
int bnx2x_vlan_mac_move(struct bnx2x *bp,
struct bnx2x_vlan_mac_ramrod_params *p,
......
......@@ -491,12 +491,20 @@ static inline void bnx2x_vfop_credit(struct bnx2x *bp,
* and a valid credit counter
*/
if (!vfop->rc && args->credit) {
int cnt = 0;
struct list_head *pos;
int read_lock;
int cnt = 0;
read_lock = bnx2x_vlan_mac_h_read_lock(bp, obj);
if (read_lock)
DP(BNX2X_MSG_SP, "Failed to take vlan mac read head; continuing anyway\n");
list_for_each(pos, &obj->head)
cnt++;
if (!read_lock)
bnx2x_vlan_mac_h_read_unlock(bp, obj);
atomic_set(args->credit, cnt);
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment