Commit dc6d6844 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

Pull infiniband reverts from Roland Dreier:
 "Last minute InfiniBand/RDMA changes for 3.19:

   - Revert IPoIB driver back to 3.18 state.  We had a number of fixes
     go into 3.19, but they introduced regressions.  We tried to get
     everything fixed up but ran out of time, so we'll try again for
     3.20.

   - Similarly, turn off the new "extended query port" verb.  Late in
     the cycle we realized the ABI is not quite right, and rather than
     freeze something in a rush and make a mistake, we'll take a bit
     more time and get it right in 3.20"

* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband:
  IB/core: Temporarily disable ex_query_device uverb
  Revert "IPoIB: Consolidate rtnl_lock tasks in workqueue"
  Revert "IPoIB: Make the carrier_on_task race aware"
  Revert "IPoIB: fix MCAST_FLAG_BUSY usage"
  Revert "IPoIB: fix mcast_dev_flush/mcast_restart_task race"
  Revert "IPoIB: change init sequence ordering"
  Revert "IPoIB: Use dedicated workqueues per interface"
  Revert "IPoIB: Make ipoib_mcast_stop_thread flush the workqueue"
  Revert "IPoIB: No longer use flush as a parameter"
parents 59acf657 ecb7b123
...@@ -123,7 +123,6 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, ...@@ -123,7 +123,6 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
struct ib_udata *uhw) = { struct ib_udata *uhw) = {
[IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
[IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow, [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow,
[IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device
}; };
static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_add_one(struct ib_device *device);
......
...@@ -98,15 +98,9 @@ enum { ...@@ -98,15 +98,9 @@ enum {
IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */ IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */
IPOIB_MCAST_FLAG_SENDONLY = 1, IPOIB_MCAST_FLAG_SENDONLY = 1,
/* IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
* For IPOIB_MCAST_FLAG_BUSY
* When set, in flight join and mcast->mc is unreliable
* When clear and mcast->mc IS_ERR_OR_NULL, need to restart or
* haven't started yet
* When clear and mcast->mc is valid pointer, join was successful
*/
IPOIB_MCAST_FLAG_BUSY = 2,
IPOIB_MCAST_FLAG_ATTACHED = 3, IPOIB_MCAST_FLAG_ATTACHED = 3,
IPOIB_MCAST_JOIN_STARTED = 4,
MAX_SEND_CQE = 16, MAX_SEND_CQE = 16,
IPOIB_CM_COPYBREAK = 256, IPOIB_CM_COPYBREAK = 256,
...@@ -323,7 +317,6 @@ struct ipoib_dev_priv { ...@@ -323,7 +317,6 @@ struct ipoib_dev_priv {
struct list_head multicast_list; struct list_head multicast_list;
struct rb_root multicast_tree; struct rb_root multicast_tree;
struct workqueue_struct *wq;
struct delayed_work mcast_task; struct delayed_work mcast_task;
struct work_struct carrier_on_task; struct work_struct carrier_on_task;
struct work_struct flush_light; struct work_struct flush_light;
...@@ -484,10 +477,10 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work); ...@@ -484,10 +477,10 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work);
void ipoib_pkey_event(struct work_struct *work); void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev); void ipoib_ib_dev_cleanup(struct net_device *dev);
int ipoib_ib_dev_open(struct net_device *dev); int ipoib_ib_dev_open(struct net_device *dev, int flush);
int ipoib_ib_dev_up(struct net_device *dev); int ipoib_ib_dev_up(struct net_device *dev);
int ipoib_ib_dev_down(struct net_device *dev); int ipoib_ib_dev_down(struct net_device *dev, int flush);
int ipoib_ib_dev_stop(struct net_device *dev); int ipoib_ib_dev_stop(struct net_device *dev, int flush);
void ipoib_pkey_dev_check_presence(struct net_device *dev); void ipoib_pkey_dev_check_presence(struct net_device *dev);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
...@@ -499,7 +492,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb); ...@@ -499,7 +492,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
void ipoib_mcast_restart_task(struct work_struct *work); void ipoib_mcast_restart_task(struct work_struct *work);
int ipoib_mcast_start_thread(struct net_device *dev); int ipoib_mcast_start_thread(struct net_device *dev);
int ipoib_mcast_stop_thread(struct net_device *dev); int ipoib_mcast_stop_thread(struct net_device *dev, int flush);
void ipoib_mcast_dev_down(struct net_device *dev); void ipoib_mcast_dev_down(struct net_device *dev);
void ipoib_mcast_dev_flush(struct net_device *dev); void ipoib_mcast_dev_flush(struct net_device *dev);
......
...@@ -474,7 +474,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even ...@@ -474,7 +474,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
} }
spin_lock_irq(&priv->lock); spin_lock_irq(&priv->lock);
queue_delayed_work(priv->wq, queue_delayed_work(ipoib_workqueue,
&priv->cm.stale_task, IPOIB_CM_RX_DELAY); &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
/* Add this entry to passive ids list head, but do not re-add it /* Add this entry to passive ids list head, but do not re-add it
* if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */ * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
...@@ -576,7 +576,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) ...@@ -576,7 +576,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
spin_lock_irqsave(&priv->lock, flags); spin_lock_irqsave(&priv->lock, flags);
list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
ipoib_cm_start_rx_drain(priv); ipoib_cm_start_rx_drain(priv);
queue_work(priv->wq, &priv->cm.rx_reap_task); queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
spin_unlock_irqrestore(&priv->lock, flags); spin_unlock_irqrestore(&priv->lock, flags);
} else } else
ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
...@@ -603,7 +603,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) ...@@ -603,7 +603,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
spin_lock_irqsave(&priv->lock, flags); spin_lock_irqsave(&priv->lock, flags);
list_move(&p->list, &priv->cm.rx_reap_list); list_move(&p->list, &priv->cm.rx_reap_list);
spin_unlock_irqrestore(&priv->lock, flags); spin_unlock_irqrestore(&priv->lock, flags);
queue_work(priv->wq, &priv->cm.rx_reap_task); queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
} }
return; return;
} }
...@@ -827,7 +827,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) ...@@ -827,7 +827,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
list_move(&tx->list, &priv->cm.reap_list); list_move(&tx->list, &priv->cm.reap_list);
queue_work(priv->wq, &priv->cm.reap_task); queue_work(ipoib_workqueue, &priv->cm.reap_task);
} }
clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags); clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
...@@ -1255,7 +1255,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, ...@@ -1255,7 +1255,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
list_move(&tx->list, &priv->cm.reap_list); list_move(&tx->list, &priv->cm.reap_list);
queue_work(priv->wq, &priv->cm.reap_task); queue_work(ipoib_workqueue, &priv->cm.reap_task);
} }
spin_unlock_irqrestore(&priv->lock, flags); spin_unlock_irqrestore(&priv->lock, flags);
...@@ -1284,7 +1284,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path ...@@ -1284,7 +1284,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
tx->dev = dev; tx->dev = dev;
list_add(&tx->list, &priv->cm.start_list); list_add(&tx->list, &priv->cm.start_list);
set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags); set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
queue_work(priv->wq, &priv->cm.start_task); queue_work(ipoib_workqueue, &priv->cm.start_task);
return tx; return tx;
} }
...@@ -1295,7 +1295,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) ...@@ -1295,7 +1295,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
spin_lock_irqsave(&priv->lock, flags); spin_lock_irqsave(&priv->lock, flags);
list_move(&tx->list, &priv->cm.reap_list); list_move(&tx->list, &priv->cm.reap_list);
queue_work(priv->wq, &priv->cm.reap_task); queue_work(ipoib_workqueue, &priv->cm.reap_task);
ipoib_dbg(priv, "Reap connection for gid %pI6\n", ipoib_dbg(priv, "Reap connection for gid %pI6\n",
tx->neigh->daddr + 4); tx->neigh->daddr + 4);
tx->neigh = NULL; tx->neigh = NULL;
...@@ -1417,7 +1417,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, ...@@ -1417,7 +1417,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
skb_queue_tail(&priv->cm.skb_queue, skb); skb_queue_tail(&priv->cm.skb_queue, skb);
if (e) if (e)
queue_work(priv->wq, &priv->cm.skb_task); queue_work(ipoib_workqueue, &priv->cm.skb_task);
} }
static void ipoib_cm_rx_reap(struct work_struct *work) static void ipoib_cm_rx_reap(struct work_struct *work)
...@@ -1450,7 +1450,7 @@ static void ipoib_cm_stale_task(struct work_struct *work) ...@@ -1450,7 +1450,7 @@ static void ipoib_cm_stale_task(struct work_struct *work)
} }
if (!list_empty(&priv->cm.passive_ids)) if (!list_empty(&priv->cm.passive_ids))
queue_delayed_work(priv->wq, queue_delayed_work(ipoib_workqueue,
&priv->cm.stale_task, IPOIB_CM_RX_DELAY); &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
spin_unlock_irq(&priv->lock); spin_unlock_irq(&priv->lock);
} }
......
...@@ -655,7 +655,7 @@ void ipoib_reap_ah(struct work_struct *work) ...@@ -655,7 +655,7 @@ void ipoib_reap_ah(struct work_struct *work)
__ipoib_reap_ah(dev); __ipoib_reap_ah(dev);
if (!test_bit(IPOIB_STOP_REAPER, &priv->flags)) if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
queue_delayed_work(priv->wq, &priv->ah_reap_task, queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
round_jiffies_relative(HZ)); round_jiffies_relative(HZ));
} }
...@@ -664,7 +664,7 @@ static void ipoib_ib_tx_timer_func(unsigned long ctx) ...@@ -664,7 +664,7 @@ static void ipoib_ib_tx_timer_func(unsigned long ctx)
drain_tx_cq((struct net_device *)ctx); drain_tx_cq((struct net_device *)ctx);
} }
int ipoib_ib_dev_open(struct net_device *dev) int ipoib_ib_dev_open(struct net_device *dev, int flush)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret; int ret;
...@@ -696,7 +696,7 @@ int ipoib_ib_dev_open(struct net_device *dev) ...@@ -696,7 +696,7 @@ int ipoib_ib_dev_open(struct net_device *dev)
} }
clear_bit(IPOIB_STOP_REAPER, &priv->flags); clear_bit(IPOIB_STOP_REAPER, &priv->flags);
queue_delayed_work(priv->wq, &priv->ah_reap_task, queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
round_jiffies_relative(HZ)); round_jiffies_relative(HZ));
if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
...@@ -706,7 +706,7 @@ int ipoib_ib_dev_open(struct net_device *dev) ...@@ -706,7 +706,7 @@ int ipoib_ib_dev_open(struct net_device *dev)
dev_stop: dev_stop:
if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
napi_enable(&priv->napi); napi_enable(&priv->napi);
ipoib_ib_dev_stop(dev); ipoib_ib_dev_stop(dev, flush);
return -1; return -1;
} }
...@@ -738,7 +738,7 @@ int ipoib_ib_dev_up(struct net_device *dev) ...@@ -738,7 +738,7 @@ int ipoib_ib_dev_up(struct net_device *dev)
return ipoib_mcast_start_thread(dev); return ipoib_mcast_start_thread(dev);
} }
int ipoib_ib_dev_down(struct net_device *dev) int ipoib_ib_dev_down(struct net_device *dev, int flush)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
...@@ -747,7 +747,7 @@ int ipoib_ib_dev_down(struct net_device *dev) ...@@ -747,7 +747,7 @@ int ipoib_ib_dev_down(struct net_device *dev)
clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags); clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
netif_carrier_off(dev); netif_carrier_off(dev);
ipoib_mcast_stop_thread(dev); ipoib_mcast_stop_thread(dev, flush);
ipoib_mcast_dev_flush(dev); ipoib_mcast_dev_flush(dev);
ipoib_flush_paths(dev); ipoib_flush_paths(dev);
...@@ -807,7 +807,7 @@ void ipoib_drain_cq(struct net_device *dev) ...@@ -807,7 +807,7 @@ void ipoib_drain_cq(struct net_device *dev)
local_bh_enable(); local_bh_enable();
} }
int ipoib_ib_dev_stop(struct net_device *dev) int ipoib_ib_dev_stop(struct net_device *dev, int flush)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_attr qp_attr; struct ib_qp_attr qp_attr;
...@@ -880,7 +880,8 @@ int ipoib_ib_dev_stop(struct net_device *dev) ...@@ -880,7 +880,8 @@ int ipoib_ib_dev_stop(struct net_device *dev)
/* Wait for all AHs to be reaped */ /* Wait for all AHs to be reaped */
set_bit(IPOIB_STOP_REAPER, &priv->flags); set_bit(IPOIB_STOP_REAPER, &priv->flags);
cancel_delayed_work(&priv->ah_reap_task); cancel_delayed_work(&priv->ah_reap_task);
flush_workqueue(priv->wq); if (flush)
flush_workqueue(ipoib_workqueue);
begin = jiffies; begin = jiffies;
...@@ -917,7 +918,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port) ...@@ -917,7 +918,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
(unsigned long) dev); (unsigned long) dev);
if (dev->flags & IFF_UP) { if (dev->flags & IFF_UP) {
if (ipoib_ib_dev_open(dev)) { if (ipoib_ib_dev_open(dev, 1)) {
ipoib_transport_dev_cleanup(dev); ipoib_transport_dev_cleanup(dev);
return -ENODEV; return -ENODEV;
} }
...@@ -1039,12 +1040,12 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ...@@ -1039,12 +1040,12 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
} }
if (level >= IPOIB_FLUSH_NORMAL) if (level >= IPOIB_FLUSH_NORMAL)
ipoib_ib_dev_down(dev); ipoib_ib_dev_down(dev, 0);
if (level == IPOIB_FLUSH_HEAVY) { if (level == IPOIB_FLUSH_HEAVY) {
if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
ipoib_ib_dev_stop(dev); ipoib_ib_dev_stop(dev, 0);
if (ipoib_ib_dev_open(dev) != 0) if (ipoib_ib_dev_open(dev, 0) != 0)
return; return;
if (netif_queue_stopped(dev)) if (netif_queue_stopped(dev))
netif_start_queue(dev); netif_start_queue(dev);
...@@ -1096,7 +1097,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev) ...@@ -1096,7 +1097,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
*/ */
ipoib_flush_paths(dev); ipoib_flush_paths(dev);
ipoib_mcast_stop_thread(dev); ipoib_mcast_stop_thread(dev, 1);
ipoib_mcast_dev_flush(dev); ipoib_mcast_dev_flush(dev);
ipoib_transport_dev_cleanup(dev); ipoib_transport_dev_cleanup(dev);
......
...@@ -108,7 +108,7 @@ int ipoib_open(struct net_device *dev) ...@@ -108,7 +108,7 @@ int ipoib_open(struct net_device *dev)
set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
if (ipoib_ib_dev_open(dev)) { if (ipoib_ib_dev_open(dev, 1)) {
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
return 0; return 0;
goto err_disable; goto err_disable;
...@@ -139,7 +139,7 @@ int ipoib_open(struct net_device *dev) ...@@ -139,7 +139,7 @@ int ipoib_open(struct net_device *dev)
return 0; return 0;
err_stop: err_stop:
ipoib_ib_dev_stop(dev); ipoib_ib_dev_stop(dev, 1);
err_disable: err_disable:
clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
...@@ -157,8 +157,8 @@ static int ipoib_stop(struct net_device *dev) ...@@ -157,8 +157,8 @@ static int ipoib_stop(struct net_device *dev)
netif_stop_queue(dev); netif_stop_queue(dev);
ipoib_ib_dev_down(dev); ipoib_ib_dev_down(dev, 1);
ipoib_ib_dev_stop(dev); ipoib_ib_dev_stop(dev, 0);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv; struct ipoib_dev_priv *cpriv;
...@@ -839,7 +839,7 @@ static void ipoib_set_mcast_list(struct net_device *dev) ...@@ -839,7 +839,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
return; return;
} }
queue_work(priv->wq, &priv->restart_task); queue_work(ipoib_workqueue, &priv->restart_task);
} }
static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr) static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
...@@ -954,7 +954,7 @@ static void ipoib_reap_neigh(struct work_struct *work) ...@@ -954,7 +954,7 @@ static void ipoib_reap_neigh(struct work_struct *work)
__ipoib_reap_neigh(priv); __ipoib_reap_neigh(priv);
if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
queue_delayed_work(priv->wq, &priv->neigh_reap_task, queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
arp_tbl.gc_interval); arp_tbl.gc_interval);
} }
...@@ -1133,7 +1133,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) ...@@ -1133,7 +1133,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
/* start garbage collection */ /* start garbage collection */
clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
queue_delayed_work(priv->wq, &priv->neigh_reap_task, queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
arp_tbl.gc_interval); arp_tbl.gc_interval);
return 0; return 0;
...@@ -1262,13 +1262,15 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) ...@@ -1262,13 +1262,15 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
if (ipoib_neigh_hash_init(priv) < 0)
goto out;
/* Allocate RX/TX "rings" to hold queued skbs */ /* Allocate RX/TX "rings" to hold queued skbs */
priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
GFP_KERNEL); GFP_KERNEL);
if (!priv->rx_ring) { if (!priv->rx_ring) {
printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
ca->name, ipoib_recvq_size); ca->name, ipoib_recvq_size);
goto out; goto out_neigh_hash_cleanup;
} }
priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring); priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
...@@ -1283,24 +1285,16 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) ...@@ -1283,24 +1285,16 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
if (ipoib_ib_dev_init(dev, ca, port)) if (ipoib_ib_dev_init(dev, ca, port))
goto out_tx_ring_cleanup; goto out_tx_ring_cleanup;
/*
* Must be after ipoib_ib_dev_init so we can allocate a per
* device wq there and use it here
*/
if (ipoib_neigh_hash_init(priv) < 0)
goto out_dev_uninit;
return 0; return 0;
out_dev_uninit:
ipoib_ib_dev_cleanup(dev);
out_tx_ring_cleanup: out_tx_ring_cleanup:
vfree(priv->tx_ring); vfree(priv->tx_ring);
out_rx_ring_cleanup: out_rx_ring_cleanup:
kfree(priv->rx_ring); kfree(priv->rx_ring);
out_neigh_hash_cleanup:
ipoib_neigh_hash_uninit(dev);
out: out:
return -ENOMEM; return -ENOMEM;
} }
...@@ -1323,12 +1317,6 @@ void ipoib_dev_cleanup(struct net_device *dev) ...@@ -1323,12 +1317,6 @@ void ipoib_dev_cleanup(struct net_device *dev)
} }
unregister_netdevice_many(&head); unregister_netdevice_many(&head);
/*
* Must be before ipoib_ib_dev_cleanup or we delete an in use
* work queue
*/
ipoib_neigh_hash_uninit(dev);
ipoib_ib_dev_cleanup(dev); ipoib_ib_dev_cleanup(dev);
kfree(priv->rx_ring); kfree(priv->rx_ring);
...@@ -1336,6 +1324,8 @@ void ipoib_dev_cleanup(struct net_device *dev) ...@@ -1336,6 +1324,8 @@ void ipoib_dev_cleanup(struct net_device *dev)
priv->rx_ring = NULL; priv->rx_ring = NULL;
priv->tx_ring = NULL; priv->tx_ring = NULL;
ipoib_neigh_hash_uninit(dev);
} }
static const struct header_ops ipoib_header_ops = { static const struct header_ops ipoib_header_ops = {
...@@ -1646,7 +1636,7 @@ static struct net_device *ipoib_add_port(const char *format, ...@@ -1646,7 +1636,7 @@ static struct net_device *ipoib_add_port(const char *format,
/* Stop GC if started before flush */ /* Stop GC if started before flush */
set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
cancel_delayed_work(&priv->neigh_reap_task); cancel_delayed_work(&priv->neigh_reap_task);
flush_workqueue(priv->wq); flush_workqueue(ipoib_workqueue);
event_failed: event_failed:
ipoib_dev_cleanup(priv->dev); ipoib_dev_cleanup(priv->dev);
...@@ -1717,7 +1707,7 @@ static void ipoib_remove_one(struct ib_device *device) ...@@ -1717,7 +1707,7 @@ static void ipoib_remove_one(struct ib_device *device)
/* Stop GC */ /* Stop GC */
set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
cancel_delayed_work(&priv->neigh_reap_task); cancel_delayed_work(&priv->neigh_reap_task);
flush_workqueue(priv->wq); flush_workqueue(ipoib_workqueue);
unregister_netdev(priv->dev); unregister_netdev(priv->dev);
free_netdev(priv->dev); free_netdev(priv->dev);
...@@ -1758,13 +1748,8 @@ static int __init ipoib_init_module(void) ...@@ -1758,13 +1748,8 @@ static int __init ipoib_init_module(void)
* unregister_netdev() and linkwatch_event take the rtnl lock, * unregister_netdev() and linkwatch_event take the rtnl lock,
* so flush_scheduled_work() can deadlock during device * so flush_scheduled_work() can deadlock during device
* removal. * removal.
*
* In addition, bringing one device up and another down at the
* same time can deadlock a single workqueue, so we have this
* global fallback workqueue, but we also attempt to open a
* per device workqueue each time we bring an interface up
*/ */
ipoib_workqueue = create_singlethread_workqueue("ipoib_flush"); ipoib_workqueue = create_singlethread_workqueue("ipoib");
if (!ipoib_workqueue) { if (!ipoib_workqueue) {
ret = -ENOMEM; ret = -ENOMEM;
goto err_fs; goto err_fs;
......
...@@ -190,6 +190,12 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, ...@@ -190,6 +190,12 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
spin_unlock_irq(&priv->lock); spin_unlock_irq(&priv->lock);
priv->tx_wr.wr.ud.remote_qkey = priv->qkey; priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
set_qkey = 1; set_qkey = 1;
if (!ipoib_cm_admin_enabled(dev)) {
rtnl_lock();
dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
rtnl_unlock();
}
} }
if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
...@@ -271,27 +277,16 @@ ipoib_mcast_sendonly_join_complete(int status, ...@@ -271,27 +277,16 @@ ipoib_mcast_sendonly_join_complete(int status,
struct ipoib_mcast *mcast = multicast->context; struct ipoib_mcast *mcast = multicast->context;
struct net_device *dev = mcast->dev; struct net_device *dev = mcast->dev;
/*
* We have to take the mutex to force mcast_sendonly_join to
* return from ib_sa_multicast_join and set mcast->mc to a
* valid value. Otherwise we were racing with ourselves in
* that we might fail here, but get a valid return from
* ib_sa_multicast_join after we had cleared mcast->mc here,
* resulting in mis-matched joins and leaves and a deadlock
*/
mutex_lock(&mcast_mutex);
/* We trap for port events ourselves. */ /* We trap for port events ourselves. */
if (status == -ENETRESET) if (status == -ENETRESET)
goto out; return 0;
if (!status) if (!status)
status = ipoib_mcast_join_finish(mcast, &multicast->rec); status = ipoib_mcast_join_finish(mcast, &multicast->rec);
if (status) { if (status) {
if (mcast->logcount++ < 20) if (mcast->logcount++ < 20)
ipoib_dbg_mcast(netdev_priv(dev), "sendonly multicast " ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n",
"join failed for %pI6, status %d\n",
mcast->mcmember.mgid.raw, status); mcast->mcmember.mgid.raw, status);
/* Flush out any queued packets */ /* Flush out any queued packets */
...@@ -301,15 +296,11 @@ ipoib_mcast_sendonly_join_complete(int status, ...@@ -301,15 +296,11 @@ ipoib_mcast_sendonly_join_complete(int status,
dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
} }
netif_tx_unlock_bh(dev); netif_tx_unlock_bh(dev);
/* Clear the busy flag so we try again */
status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
&mcast->flags);
} }
out:
clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
if (status)
mcast->mc = NULL;
complete(&mcast->done);
if (status == -ENETRESET)
status = 0;
mutex_unlock(&mcast_mutex);
return status; return status;
} }
...@@ -327,14 +318,12 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) ...@@ -327,14 +318,12 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
int ret = 0; int ret = 0;
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
ipoib_dbg_mcast(priv, "device shutting down, no sendonly " ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
"multicast joins\n");
return -ENODEV; return -ENODEV;
} }
if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) { if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
ipoib_dbg_mcast(priv, "multicast entry busy, skipping " ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
"sendonly join\n");
return -EBUSY; return -EBUSY;
} }
...@@ -342,9 +331,6 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) ...@@ -342,9 +331,6 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
rec.port_gid = priv->local_gid; rec.port_gid = priv->local_gid;
rec.pkey = cpu_to_be16(priv->pkey); rec.pkey = cpu_to_be16(priv->pkey);
mutex_lock(&mcast_mutex);
init_completion(&mcast->done);
set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
priv->port, &rec, priv->port, &rec,
IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_MGID |
...@@ -357,14 +343,12 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) ...@@ -357,14 +343,12 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
if (IS_ERR(mcast->mc)) { if (IS_ERR(mcast->mc)) {
ret = PTR_ERR(mcast->mc); ret = PTR_ERR(mcast->mc);
clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
complete(&mcast->done); ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
ipoib_warn(priv, "ib_sa_join_multicast for sendonly join " ret);
"failed (ret = %d)\n", ret);
} else { } else {
ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting " ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n",
"sendonly join\n", mcast->mcmember.mgid.raw); mcast->mcmember.mgid.raw);
} }
mutex_unlock(&mcast_mutex);
return ret; return ret;
} }
...@@ -375,29 +359,18 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work) ...@@ -375,29 +359,18 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
carrier_on_task); carrier_on_task);
struct ib_port_attr attr; struct ib_port_attr attr;
/*
* Take rtnl_lock to avoid racing with ipoib_stop() and
* turning the carrier back on while a device is being
* removed.
*/
if (ib_query_port(priv->ca, priv->port, &attr) || if (ib_query_port(priv->ca, priv->port, &attr) ||
attr.state != IB_PORT_ACTIVE) { attr.state != IB_PORT_ACTIVE) {
ipoib_dbg(priv, "Keeping carrier off until IB port is active\n"); ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
return; return;
} }
/* rtnl_lock();
* Take rtnl_lock to avoid racing with ipoib_stop() and
* turning the carrier back on while a device is being
* removed. However, ipoib_stop() will attempt to flush
* the workqueue while holding the rtnl lock, so loop
* on trylock until either we get the lock or we see
* FLAG_ADMIN_UP go away as that signals that we are bailing
* and can safely ignore the carrier on work.
*/
while (!rtnl_trylock()) {
if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
return;
else
msleep(20);
}
if (!ipoib_cm_admin_enabled(priv->dev))
dev_set_mtu(priv->dev, min(priv->mcast_mtu, priv->admin_mtu));
netif_carrier_on(priv->dev); netif_carrier_on(priv->dev);
rtnl_unlock(); rtnl_unlock();
} }
...@@ -412,63 +385,60 @@ static int ipoib_mcast_join_complete(int status, ...@@ -412,63 +385,60 @@ static int ipoib_mcast_join_complete(int status,
ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n", ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
mcast->mcmember.mgid.raw, status); mcast->mcmember.mgid.raw, status);
/*
* We have to take the mutex to force mcast_join to
* return from ib_sa_multicast_join and set mcast->mc to a
* valid value. Otherwise we were racing with ourselves in
* that we might fail here, but get a valid return from
* ib_sa_multicast_join after we had cleared mcast->mc here,
* resulting in mis-matched joins and leaves and a deadlock
*/
mutex_lock(&mcast_mutex);
/* We trap for port events ourselves. */ /* We trap for port events ourselves. */
if (status == -ENETRESET) if (status == -ENETRESET) {
status = 0;
goto out; goto out;
}
if (!status) if (!status)
status = ipoib_mcast_join_finish(mcast, &multicast->rec); status = ipoib_mcast_join_finish(mcast, &multicast->rec);
if (!status) { if (!status) {
mcast->backoff = 1; mcast->backoff = 1;
mutex_lock(&mcast_mutex);
if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
queue_delayed_work(priv->wq, &priv->mcast_task, 0); queue_delayed_work(ipoib_workqueue,
&priv->mcast_task, 0);
mutex_unlock(&mcast_mutex);
/* /*
* Defer carrier on work to priv->wq to avoid a * Defer carrier on work to ipoib_workqueue to avoid a
* deadlock on rtnl_lock here. * deadlock on rtnl_lock here.
*/ */
if (mcast == priv->broadcast) if (mcast == priv->broadcast)
queue_work(priv->wq, &priv->carrier_on_task); queue_work(ipoib_workqueue, &priv->carrier_on_task);
} else {
if (mcast->logcount++ < 20) {
if (status == -ETIMEDOUT || status == -EAGAIN) {
ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
mcast->mcmember.mgid.raw, status);
} else {
ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
mcast->mcmember.mgid.raw, status);
}
}
mcast->backoff *= 2; status = 0;
if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) goto out;
mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
} }
out:
if (mcast->logcount++ < 20) {
if (status == -ETIMEDOUT || status == -EAGAIN) {
ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
mcast->mcmember.mgid.raw, status);
} else {
ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
mcast->mcmember.mgid.raw, status);
}
}
mcast->backoff *= 2;
if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
/* Clear the busy flag so we try again */
status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
mutex_lock(&mcast_mutex);
spin_lock_irq(&priv->lock); spin_lock_irq(&priv->lock);
clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
if (status) queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
mcast->mc = NULL;
complete(&mcast->done);
if (status == -ENETRESET)
status = 0;
if (status && test_bit(IPOIB_MCAST_RUN, &priv->flags))
queue_delayed_work(priv->wq, &priv->mcast_task,
mcast->backoff * HZ); mcast->backoff * HZ);
spin_unlock_irq(&priv->lock); spin_unlock_irq(&priv->lock);
mutex_unlock(&mcast_mutex); mutex_unlock(&mcast_mutex);
out:
complete(&mcast->done);
return status; return status;
} }
...@@ -517,9 +487,10 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, ...@@ -517,9 +487,10 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
rec.hop_limit = priv->broadcast->mcmember.hop_limit; rec.hop_limit = priv->broadcast->mcmember.hop_limit;
} }
mutex_lock(&mcast_mutex);
init_completion(&mcast->done);
set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
init_completion(&mcast->done);
set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags);
mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
&rec, comp_mask, GFP_KERNEL, &rec, comp_mask, GFP_KERNEL,
ipoib_mcast_join_complete, mcast); ipoib_mcast_join_complete, mcast);
...@@ -533,11 +504,13 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, ...@@ -533,11 +504,13 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
mutex_lock(&mcast_mutex);
if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
queue_delayed_work(priv->wq, &priv->mcast_task, queue_delayed_work(ipoib_workqueue,
&priv->mcast_task,
mcast->backoff * HZ); mcast->backoff * HZ);
mutex_unlock(&mcast_mutex);
} }
mutex_unlock(&mcast_mutex);
} }
void ipoib_mcast_join_task(struct work_struct *work) void ipoib_mcast_join_task(struct work_struct *work)
...@@ -574,8 +547,8 @@ void ipoib_mcast_join_task(struct work_struct *work) ...@@ -574,8 +547,8 @@ void ipoib_mcast_join_task(struct work_struct *work)
ipoib_warn(priv, "failed to allocate broadcast group\n"); ipoib_warn(priv, "failed to allocate broadcast group\n");
mutex_lock(&mcast_mutex); mutex_lock(&mcast_mutex);
if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
queue_delayed_work(priv->wq, &priv->mcast_task, queue_delayed_work(ipoib_workqueue,
HZ); &priv->mcast_task, HZ);
mutex_unlock(&mcast_mutex); mutex_unlock(&mcast_mutex);
return; return;
} }
...@@ -590,8 +563,7 @@ void ipoib_mcast_join_task(struct work_struct *work) ...@@ -590,8 +563,7 @@ void ipoib_mcast_join_task(struct work_struct *work)
} }
if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
if (IS_ERR_OR_NULL(priv->broadcast->mc) && if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
ipoib_mcast_join(dev, priv->broadcast, 0); ipoib_mcast_join(dev, priv->broadcast, 0);
return; return;
} }
...@@ -599,33 +571,23 @@ void ipoib_mcast_join_task(struct work_struct *work) ...@@ -599,33 +571,23 @@ void ipoib_mcast_join_task(struct work_struct *work)
while (1) { while (1) {
struct ipoib_mcast *mcast = NULL; struct ipoib_mcast *mcast = NULL;
/*
* Need the mutex so our flags are consistent, need the
* priv->lock so we don't race with list removals in either
* mcast_dev_flush or mcast_restart_task
*/
mutex_lock(&mcast_mutex);
spin_lock_irq(&priv->lock); spin_lock_irq(&priv->lock);
list_for_each_entry(mcast, &priv->multicast_list, list) { list_for_each_entry(mcast, &priv->multicast_list, list) {
if (IS_ERR_OR_NULL(mcast->mc) && if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
!test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) && && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
/* Found the next unjoined group */ /* Found the next unjoined group */
break; break;
} }
} }
spin_unlock_irq(&priv->lock); spin_unlock_irq(&priv->lock);
mutex_unlock(&mcast_mutex);
if (&mcast->list == &priv->multicast_list) { if (&mcast->list == &priv->multicast_list) {
/* All done */ /* All done */
break; break;
} }
if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) ipoib_mcast_join(dev, mcast, 1);
ipoib_mcast_sendonly_join(mcast);
else
ipoib_mcast_join(dev, mcast, 1);
return; return;
} }
...@@ -642,13 +604,13 @@ int ipoib_mcast_start_thread(struct net_device *dev) ...@@ -642,13 +604,13 @@ int ipoib_mcast_start_thread(struct net_device *dev)
mutex_lock(&mcast_mutex); mutex_lock(&mcast_mutex);
if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags)) if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
queue_delayed_work(priv->wq, &priv->mcast_task, 0); queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
mutex_unlock(&mcast_mutex); mutex_unlock(&mcast_mutex);
return 0; return 0;
} }
int ipoib_mcast_stop_thread(struct net_device *dev) int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
...@@ -659,7 +621,8 @@ int ipoib_mcast_stop_thread(struct net_device *dev) ...@@ -659,7 +621,8 @@ int ipoib_mcast_stop_thread(struct net_device *dev)
cancel_delayed_work(&priv->mcast_task); cancel_delayed_work(&priv->mcast_task);
mutex_unlock(&mcast_mutex); mutex_unlock(&mcast_mutex);
flush_workqueue(priv->wq); if (flush)
flush_workqueue(ipoib_workqueue);
return 0; return 0;
} }
...@@ -670,9 +633,6 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) ...@@ -670,9 +633,6 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
int ret = 0; int ret = 0;
if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
ipoib_warn(priv, "ipoib_mcast_leave on an in-flight join\n");
if (!IS_ERR_OR_NULL(mcast->mc))
ib_sa_free_multicast(mcast->mc); ib_sa_free_multicast(mcast->mc);
if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
...@@ -725,8 +685,6 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) ...@@ -725,8 +685,6 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid)); memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
__ipoib_mcast_add(dev, mcast); __ipoib_mcast_add(dev, mcast);
list_add_tail(&mcast->list, &priv->multicast_list); list_add_tail(&mcast->list, &priv->multicast_list);
if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
queue_delayed_work(priv->wq, &priv->mcast_task, 0);
} }
if (!mcast->ah) { if (!mcast->ah) {
...@@ -740,6 +698,8 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) ...@@ -740,6 +698,8 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
ipoib_dbg_mcast(priv, "no address vector, " ipoib_dbg_mcast(priv, "no address vector, "
"but multicast join already started\n"); "but multicast join already started\n");
else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
ipoib_mcast_sendonly_join(mcast);
/* /*
* If lookup completes between here and out:, don't * If lookup completes between here and out:, don't
...@@ -799,12 +759,9 @@ void ipoib_mcast_dev_flush(struct net_device *dev) ...@@ -799,12 +759,9 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
spin_unlock_irqrestore(&priv->lock, flags); spin_unlock_irqrestore(&priv->lock, flags);
/* /* seperate between the wait to the leave*/
* make sure the in-flight joins have finished before we attempt
* to leave
*/
list_for_each_entry_safe(mcast, tmcast, &remove_list, list) list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags))
wait_for_completion(&mcast->done); wait_for_completion(&mcast->done);
list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
...@@ -837,6 +794,8 @@ void ipoib_mcast_restart_task(struct work_struct *work) ...@@ -837,6 +794,8 @@ void ipoib_mcast_restart_task(struct work_struct *work)
ipoib_dbg_mcast(priv, "restarting multicast task\n"); ipoib_dbg_mcast(priv, "restarting multicast task\n");
ipoib_mcast_stop_thread(dev, 0);
local_irq_save(flags); local_irq_save(flags);
netif_addr_lock(dev); netif_addr_lock(dev);
spin_lock(&priv->lock); spin_lock(&priv->lock);
...@@ -921,38 +880,14 @@ void ipoib_mcast_restart_task(struct work_struct *work) ...@@ -921,38 +880,14 @@ void ipoib_mcast_restart_task(struct work_struct *work)
netif_addr_unlock(dev); netif_addr_unlock(dev);
local_irq_restore(flags); local_irq_restore(flags);
/* /* We have to cancel outside of the spinlock */
* make sure the in-flight joins have finished before we attempt
* to leave
*/
list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
wait_for_completion(&mcast->done);
/*
* We have to cancel outside of the spinlock, but we have to
* take the rtnl lock or else we race with the removal of
* entries from the remove list in mcast_dev_flush as part
* of ipoib_stop(). We detect the drop of the ADMIN_UP flag
* to signal that we have hit this particular race, and we
* return since we know we don't need to do anything else
* anyway.
*/
while (!rtnl_trylock()) {
if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
return;
else
msleep(20);
}
list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
ipoib_mcast_leave(mcast->dev, mcast); ipoib_mcast_leave(mcast->dev, mcast);
ipoib_mcast_free(mcast); ipoib_mcast_free(mcast);
} }
/*
* Restart our join task if needed if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
*/ ipoib_mcast_start_thread(dev);
ipoib_mcast_start_thread(dev);
rtnl_unlock();
} }
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
......
...@@ -145,20 +145,10 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) ...@@ -145,20 +145,10 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
int ret, size; int ret, size;
int i; int i;
/*
* the various IPoIB tasks assume they will never race against
* themselves, so always use a single thread workqueue
*/
priv->wq = create_singlethread_workqueue("ipoib_wq");
if (!priv->wq) {
printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
return -ENODEV;
}
priv->pd = ib_alloc_pd(priv->ca); priv->pd = ib_alloc_pd(priv->ca);
if (IS_ERR(priv->pd)) { if (IS_ERR(priv->pd)) {
printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name); printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name);
goto out_free_wq; return -ENODEV;
} }
priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE); priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
...@@ -252,10 +242,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) ...@@ -252,10 +242,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
out_free_pd: out_free_pd:
ib_dealloc_pd(priv->pd); ib_dealloc_pd(priv->pd);
out_free_wq:
destroy_workqueue(priv->wq);
priv->wq = NULL;
return -ENODEV; return -ENODEV;
} }
...@@ -284,12 +270,6 @@ void ipoib_transport_dev_cleanup(struct net_device *dev) ...@@ -284,12 +270,6 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
if (ib_dealloc_pd(priv->pd)) if (ib_dealloc_pd(priv->pd))
ipoib_warn(priv, "ib_dealloc_pd failed\n"); ipoib_warn(priv, "ib_dealloc_pd failed\n");
if (priv->wq) {
flush_workqueue(priv->wq);
destroy_workqueue(priv->wq);
priv->wq = NULL;
}
} }
void ipoib_event(struct ib_event_handler *handler, void ipoib_event(struct ib_event_handler *handler,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment