Commit ea819867 authored by Zach Brown's avatar Zach Brown Committed by Andy Grover

RDS/IB: protect the list of IB devices

The RDS IB device list wasn't protected by any locking.  Traversal in
both the get_mr and FMR flushing paths could race with additon and
removal.

List manipulation is done with RCU primatives and is protected by the
write side of a rwsem.  The list traversal in the get_mr fast path is
protected by a rcu read critical section.  The FMR list traversal is
more problematic because it can block while traversing the list.  We
protect this with the read side of the rwsem.
Signed-off-by: default avatarZach Brown <zach.brown@oracle.com>
parent 1bde04a6
...@@ -53,6 +53,12 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer"); ...@@ -53,6 +53,12 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer");
module_param(rds_ib_retry_count, int, 0444); module_param(rds_ib_retry_count, int, 0444);
MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error"); MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error");
/*
* we have a clumsy combination of RCU and a rwsem protecting this list
* because it is used both in the get_mr fast path and while blocking in
* the FMR flushing path.
*/
DECLARE_RWSEM(rds_ib_devices_lock);
struct list_head rds_ib_devices; struct list_head rds_ib_devices;
/* NOTE: if also grabbing ibdev lock, grab this first */ /* NOTE: if also grabbing ibdev lock, grab this first */
...@@ -171,7 +177,10 @@ void rds_ib_add_one(struct ib_device *device) ...@@ -171,7 +177,10 @@ void rds_ib_add_one(struct ib_device *device)
INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
INIT_LIST_HEAD(&rds_ibdev->conn_list); INIT_LIST_HEAD(&rds_ibdev->conn_list);
list_add_tail(&rds_ibdev->list, &rds_ib_devices);
down_write(&rds_ib_devices_lock);
list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices);
up_write(&rds_ib_devices_lock);
atomic_inc(&rds_ibdev->refcount); atomic_inc(&rds_ibdev->refcount);
ib_set_client_data(device, &rds_ib_client, rds_ibdev); ib_set_client_data(device, &rds_ib_client, rds_ibdev);
...@@ -230,16 +239,20 @@ void rds_ib_remove_one(struct ib_device *device) ...@@ -230,16 +239,20 @@ void rds_ib_remove_one(struct ib_device *device)
rds_ib_dev_shutdown(rds_ibdev); rds_ib_dev_shutdown(rds_ibdev);
/* stop connection attempts from getting a reference to this device. */
ib_set_client_data(device, &rds_ib_client, NULL);
down_write(&rds_ib_devices_lock);
list_del_rcu(&rds_ibdev->list);
up_write(&rds_ib_devices_lock);
/* /*
* prevent future connection attempts from getting a reference to this * This synchronize rcu is waiting for readers of both the ib
* device and wait for currently racing connection attempts to finish * client data and the devices list to finish before we drop
* getting their reference * both of those references.
*/ */
ib_set_client_data(device, &rds_ib_client, NULL);
synchronize_rcu(); synchronize_rcu();
rds_ib_dev_put(rds_ibdev); rds_ib_dev_put(rds_ibdev);
list_del(&rds_ibdev->list);
rds_ib_dev_put(rds_ibdev); rds_ib_dev_put(rds_ibdev);
} }
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#define RDS_IB_RECYCLE_BATCH_COUNT 32 #define RDS_IB_RECYCLE_BATCH_COUNT 32
extern struct rw_semaphore rds_ib_devices_lock;
extern struct list_head rds_ib_devices; extern struct list_head rds_ib_devices;
/* /*
......
...@@ -94,8 +94,8 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr) ...@@ -94,8 +94,8 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
struct rds_ib_device *rds_ibdev; struct rds_ib_device *rds_ibdev;
struct rds_ib_ipaddr *i_ipaddr; struct rds_ib_ipaddr *i_ipaddr;
list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) {
list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) { list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
if (i_ipaddr->ipaddr == ipaddr) { if (i_ipaddr->ipaddr == ipaddr) {
atomic_inc(&rds_ibdev->refcount); atomic_inc(&rds_ibdev->refcount);
...@@ -103,8 +103,8 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr) ...@@ -103,8 +103,8 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
return rds_ibdev; return rds_ibdev;
} }
} }
rcu_read_unlock();
} }
rcu_read_unlock();
return NULL; return NULL;
} }
...@@ -761,12 +761,14 @@ void rds_ib_flush_mrs(void) ...@@ -761,12 +761,14 @@ void rds_ib_flush_mrs(void)
{ {
struct rds_ib_device *rds_ibdev; struct rds_ib_device *rds_ibdev;
down_read(&rds_ib_devices_lock);
list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
if (pool) if (pool)
rds_ib_flush_mr_pool(pool, 0, NULL); rds_ib_flush_mr_pool(pool, 0, NULL);
} }
up_read(&rds_ib_devices_lock);
} }
void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment