Commit df012169 authored by David S. Miller's avatar David S. Miller

Merge branch 'bonding_rcu'

Ding Tianhong says:

====================
bonding: rebuild the lock use for bond monitor

Now the bond slave list is not protected by bond lock, only by RTNL,
but the monitor still use the bond lock to protect the slave list,
it is useless, according to the Veaceslav's opinion, there were
three way to fix the protect problem:

1. add bond_master_upper_dev_link() and bond_upper_dev_unlink()
   in bond->lock, but it is unsafe to call call_netdevice_notifiers()
   in write lock.
2. remove unused bond->lock for monitor function, only use the exist
   rtnl lock(), it will take performance loss in fast path.
3. use RCU to protect the slave list, of course, performance is better,
   but in slow path, it is ignored.

obviously the solution 1 is not fit here, I will consider the 2 and 3
solution. My principle is simple, if in fast path, RCU is better,
otherwise in slow path, both is well, but according to the Jay Vosburgh's
opinion, the monitor will loss performace if use RTNL to protect the all
slave list, so remove the bond lock and replace with RCU.

The second problem is the curr_slave_lock for bond, it is too old and
unwanted in many place, because the curr_active_slave would only be
changed in 3 place:

1. enslave slave.
2. release slave.
3. change active slave.

all above were already holding bond lock, RTNL and curr_slave_lock
together, it is tedious and no need to add so mach lock, when change
the curr_active_slave, you have to hold the RTNL and curr_slave_lock
together, and when you read the curr_active_slave, RTNL or curr_slave_lock,
any one of them is no problem.

for the stability, I did not change the logic for the monitor,
all change is clear and simple, I have test the patch set for lockdep,
it work well and stability.

v2. accept the Jay Vosburgh's opinion, remove the RTNL and replace with RCU,
    also add some rcu function for bond use, so the patch set reach 10.

v3. accept the Nikolay Aleksandrov's opinion, remove no needed bond_has_slave_rcu(),
    add protection for several 3ad mode handler functions and current_arp_slave.
    rebuild the bond_first_slave_rcu(), make it more clear.

v4. because the struct netdev_adjacent should not be exist in netdevice.h, so I have
    to make a new function to support micro bond_first_slave_rcu().
    also add a new patch to simplify the bond_resend_igmp_join_requests_delayed().

v5. according the Jay Vosburgh's opinion, in patch 2 and 6, the calling of notify
    peer is hardly to happen with the bond_xxx_commit() when the monitoring is running,
    so the performance impact about make two round trips to one trip on RTNL is minimal,
    no need to do that,the reason is very clear, so modify the patch 2 and 6, recover
    the notify peer in RTNL alone.
====================
Signed-off-by: default avatarJay Vosburgh <fubar@us.ibm.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e57a784d f2369109
......@@ -147,11 +147,12 @@ static inline struct aggregator *__get_first_agg(struct port *port)
struct bonding *bond = __get_bond_by_port(port);
struct slave *first_slave;
// If there's no bond for this port, or bond has no slaves
/* If there's no bond for this port, or bond has no slaves */
if (bond == NULL)
return NULL;
first_slave = bond_first_slave(bond);
rcu_read_lock();
first_slave = bond_first_slave_rcu(bond);
rcu_read_unlock();
return first_slave ? &(SLAVE_AD_INFO(first_slave).aggregator) : NULL;
}
......@@ -702,9 +703,13 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator)
struct list_head *iter;
struct slave *slave;
bond_for_each_slave(bond, slave, iter)
if (SLAVE_AD_INFO(slave).aggregator.is_active)
rcu_read_lock();
bond_for_each_slave_rcu(bond, slave, iter)
if (SLAVE_AD_INFO(slave).aggregator.is_active) {
rcu_read_unlock();
return &(SLAVE_AD_INFO(slave).aggregator);
}
rcu_read_unlock();
return NULL;
}
......@@ -1471,7 +1476,8 @@ static void ad_agg_selection_logic(struct aggregator *agg)
active = __get_active_agg(agg);
best = (active && agg_device_up(active)) ? active : NULL;
bond_for_each_slave(bond, slave, iter) {
rcu_read_lock();
bond_for_each_slave_rcu(bond, slave, iter) {
agg = &(SLAVE_AD_INFO(slave).aggregator);
agg->is_active = 0;
......@@ -1505,7 +1511,7 @@ static void ad_agg_selection_logic(struct aggregator *agg)
active->is_active = 1;
}
// if there is new best aggregator, activate it
/* if there is new best aggregator, activate it */
if (best) {
pr_debug("best Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n",
best->aggregator_identifier, best->num_of_ports,
......@@ -1516,7 +1522,7 @@ static void ad_agg_selection_logic(struct aggregator *agg)
best->lag_ports, best->slave,
best->slave ? best->slave->dev->name : "NULL");
bond_for_each_slave(bond, slave, iter) {
bond_for_each_slave_rcu(bond, slave, iter) {
agg = &(SLAVE_AD_INFO(slave).aggregator);
pr_debug("Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n",
......@@ -1526,10 +1532,11 @@ static void ad_agg_selection_logic(struct aggregator *agg)
agg->is_individual, agg->is_active);
}
// check if any partner replys
/* check if any partner replys */
if (best->is_individual) {
pr_warning("%s: Warning: No 802.3ad response from the link partner for any adapters in the bond\n",
best->slave ? best->slave->bond->dev->name : "NULL");
best->slave ?
best->slave->bond->dev->name : "NULL");
}
best->is_active = 1;
......@@ -1541,7 +1548,7 @@ static void ad_agg_selection_logic(struct aggregator *agg)
best->partner_oper_aggregator_key,
best->is_individual, best->is_active);
// disable the ports that were related to the former active_aggregator
/* disable the ports that were related to the former active_aggregator */
if (active) {
for (port = active->lag_ports; port;
port = port->next_port_in_aggregator) {
......@@ -1565,6 +1572,8 @@ static void ad_agg_selection_logic(struct aggregator *agg)
}
}
rcu_read_unlock();
bond_3ad_set_carrier(bond);
}
......@@ -2069,17 +2078,18 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
struct port *port;
read_lock(&bond->lock);
rcu_read_lock();
//check if there are any slaves
/* check if there are any slaves */
if (!bond_has_slaves(bond))
goto re_arm;
// check if agg_select_timer timer after initialize is timed out
/* check if agg_select_timer timer after initialize is timed out */
if (BOND_AD_INFO(bond).agg_select_timer && !(--BOND_AD_INFO(bond).agg_select_timer)) {
slave = bond_first_slave(bond);
slave = bond_first_slave_rcu(bond);
port = slave ? &(SLAVE_AD_INFO(slave).port) : NULL;
// select the active aggregator for the bond
/* select the active aggregator for the bond */
if (port) {
if (!port->slave) {
pr_warning("%s: Warning: bond's first port is uninitialized\n",
......@@ -2093,8 +2103,8 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
bond_3ad_set_carrier(bond);
}
// for each port run the state machines
bond_for_each_slave(bond, slave, iter) {
/* for each port run the state machines */
bond_for_each_slave_rcu(bond, slave, iter) {
port = &(SLAVE_AD_INFO(slave).port);
if (!port->slave) {
pr_warning("%s: Warning: Found an uninitialized port\n",
......@@ -2114,7 +2124,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
ad_mux_machine(port);
ad_tx_machine(port);
// turn off the BEGIN bit, since we already handled it
/* turn off the BEGIN bit, since we already handled it */
if (port->sm_vars & AD_PORT_BEGIN)
port->sm_vars &= ~AD_PORT_BEGIN;
......@@ -2122,9 +2132,9 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
}
re_arm:
queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks);
rcu_read_unlock();
read_unlock(&bond->lock);
queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks);
}
/**
......@@ -2303,7 +2313,9 @@ int bond_3ad_set_carrier(struct bonding *bond)
struct aggregator *active;
struct slave *first_slave;
first_slave = bond_first_slave(bond);
rcu_read_lock();
first_slave = bond_first_slave_rcu(bond);
rcu_read_unlock();
if (!first_slave)
return 0;
active = __get_active_agg(&(SLAVE_AD_INFO(first_slave).aggregator));
......
......@@ -469,7 +469,7 @@ static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[])
/* slave being removed should not be active at this point
*
* Caller must hold bond lock for read
* Caller must hold rtnl.
*/
static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
{
......@@ -815,7 +815,7 @@ static void rlb_rebalance(struct bonding *bond)
for (; hash_index != RLB_NULL_INDEX;
hash_index = client_info->used_next) {
client_info = &(bond_info->rx_hashtbl[hash_index]);
assigned_slave = rlb_next_rx_slave(bond);
assigned_slave = __rlb_next_rx_slave(bond);
if (assigned_slave && (client_info->slave != assigned_slave)) {
client_info->slave = assigned_slave;
client_info->ntt = 1;
......@@ -1494,14 +1494,14 @@ void bond_alb_monitor(struct work_struct *work)
struct list_head *iter;
struct slave *slave;
read_lock(&bond->lock);
if (!bond_has_slaves(bond)) {
bond_info->tx_rebalance_counter = 0;
bond_info->lp_counter = 0;
goto re_arm;
}
rcu_read_lock();
bond_info->tx_rebalance_counter++;
bond_info->lp_counter++;
......@@ -1514,7 +1514,7 @@ void bond_alb_monitor(struct work_struct *work)
*/
read_lock(&bond->curr_slave_lock);
bond_for_each_slave(bond, slave, iter)
bond_for_each_slave_rcu(bond, slave, iter)
alb_send_learning_packets(slave, slave->dev->dev_addr);
read_unlock(&bond->curr_slave_lock);
......@@ -1527,7 +1527,7 @@ void bond_alb_monitor(struct work_struct *work)
read_lock(&bond->curr_slave_lock);
bond_for_each_slave(bond, slave, iter) {
bond_for_each_slave_rcu(bond, slave, iter) {
tlb_clear_slave(bond, slave, 1);
if (slave == bond->curr_active_slave) {
SLAVE_TLB_INFO(slave).load =
......@@ -1551,11 +1551,9 @@ void bond_alb_monitor(struct work_struct *work)
* dev_set_promiscuity requires rtnl and
* nothing else. Avoid race with bond_close.
*/
read_unlock(&bond->lock);
if (!rtnl_trylock()) {
read_lock(&bond->lock);
rcu_read_unlock();
if (!rtnl_trylock())
goto re_arm;
}
bond_info->rlb_promisc_timeout_counter = 0;
......@@ -1567,7 +1565,7 @@ void bond_alb_monitor(struct work_struct *work)
bond_info->primary_is_promisc = 0;
rtnl_unlock();
read_lock(&bond->lock);
rcu_read_lock();
}
if (bond_info->rlb_rebalance) {
......@@ -1589,11 +1587,9 @@ void bond_alb_monitor(struct work_struct *work)
}
}
}
rcu_read_unlock();
re_arm:
queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks);
read_unlock(&bond->lock);
}
/* assumption: called before the slave is attached to the bond
......@@ -1679,14 +1675,11 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char
* If new_slave is NULL, caller must hold curr_slave_lock or
* bond->lock for write.
*
* If new_slave is not NULL, caller must hold RTNL, bond->lock for
* read and curr_slave_lock for write. Processing here may sleep, so
* no other locks may be held.
* If new_slave is not NULL, caller must hold RTNL, curr_slave_lock
* for write. Processing here may sleep, so no other locks may be held.
*/
void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave)
__releases(&bond->curr_slave_lock)
__releases(&bond->lock)
__acquires(&bond->lock)
__acquires(&bond->curr_slave_lock)
{
struct slave *swap_slave;
......@@ -1722,7 +1715,6 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave
tlb_clear_slave(bond, new_slave, 1);
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
ASSERT_RTNL();
......@@ -1748,11 +1740,9 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave
/* swap mac address */
alb_swap_mac_addr(swap_slave, new_slave);
alb_fasten_mac_swap(bond, swap_slave, new_slave);
read_lock(&bond->lock);
} else {
/* set the new_slave to the bond mac address */
alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr);
read_lock(&bond->lock);
alb_send_learning_packets(new_slave, bond->dev->dev_addr);
}
......
This diff is collapsed.
......@@ -107,7 +107,6 @@ int bond_option_active_slave_set(struct bonding *bond,
}
block_netpoll_tx();
read_lock(&bond->lock);
write_lock_bh(&bond->curr_slave_lock);
/* check to see if we are clearing active */
......@@ -142,7 +141,6 @@ int bond_option_active_slave_set(struct bonding *bond,
}
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
unblock_netpoll_tx();
return ret;
}
......
......@@ -878,7 +878,6 @@ static ssize_t bonding_store_primary(struct device *d,
if (!rtnl_trylock())
return restart_syscall();
block_netpoll_tx();
read_lock(&bond->lock);
write_lock_bh(&bond->curr_slave_lock);
if (!USES_PRIMARY(bond->params.mode)) {
......@@ -918,7 +917,6 @@ static ssize_t bonding_store_primary(struct device *d,
bond->dev->name, ifname, bond->dev->name);
out:
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
unblock_netpoll_tx();
rtnl_unlock();
......@@ -966,11 +964,9 @@ static ssize_t bonding_store_primary_reselect(struct device *d,
new_value);
block_netpoll_tx();
read_lock(&bond->lock);
write_lock_bh(&bond->curr_slave_lock);
bond_select_active_slave(bond);
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
unblock_netpoll_tx();
out:
rtnl_unlock();
......
......@@ -101,6 +101,10 @@
netdev_adjacent_get_private(bond_slave_list(bond)->prev) : \
NULL)
/* Caller must have rcu_read_lock */
#define bond_first_slave_rcu(bond) \
netdev_lower_get_first_private_rcu(bond->dev)
#define bond_is_first_slave(bond, pos) (pos == bond_first_slave(bond))
#define bond_is_last_slave(bond, pos) (pos == bond_last_slave(bond))
......
......@@ -2907,6 +2907,7 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
priv = netdev_lower_get_next_private_rcu(dev, &(iter)))
void *netdev_adjacent_get_private(struct list_head *adj_list);
void *netdev_lower_get_first_private_rcu(struct net_device *dev);
struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev);
......
......@@ -4543,6 +4543,27 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
}
EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
/**
* netdev_lower_get_first_private_rcu - Get the first ->private from the
* lower neighbour list, RCU
* variant
* @dev: device
*
* Gets the first netdev_adjacent->private from the dev's lower neighbour
* list. The caller must hold RCU read lock.
*/
void *netdev_lower_get_first_private_rcu(struct net_device *dev)
{
struct netdev_adjacent *lower;
lower = list_first_or_null_rcu(&dev->adj_list.lower,
struct netdev_adjacent, list);
if (lower)
return lower->private;
return NULL;
}
EXPORT_SYMBOL(netdev_lower_get_first_private_rcu);
/**
* netdev_master_upper_dev_get_rcu - Get master upper device
* @dev: device
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment