Commit 46c8217c authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull rdma fixes from Doug Ledford:
 - Fixes for mlx5 related issues
 - Fixes for ipoib multicast handling

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma:
  IB/ipoib: increase the max mcast backlog queue
  IB/ipoib: Make sendonly multicast joins create the mcast group
  IB/ipoib: Expire sendonly multicast joins
  IB/mlx5: Remove pa_lkey usages
  IB/mlx5: Remove support for IB_DEVICE_LOCAL_DMA_LKEY
  IB/iser: Add module parameter for always register memory
  xprtrdma: Replace global lkey with lkey local to PD
parents f97b870e 2866196f
......@@ -245,7 +245,6 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
if (MLX5_CAP_GEN(mdev, apm))
props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
if (MLX5_CAP_GEN(mdev, xrc))
props->device_cap_flags |= IB_DEVICE_XRC;
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
......@@ -795,53 +794,6 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
return 0;
}
static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
{
struct mlx5_create_mkey_mbox_in *in;
struct mlx5_mkey_seg *seg;
struct mlx5_core_mr mr;
int err;
in = kzalloc(sizeof(*in), GFP_KERNEL);
if (!in)
return -ENOMEM;
seg = &in->seg;
seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA;
seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
seg->start_addr = 0;
err = mlx5_core_create_mkey(dev->mdev, &mr, in, sizeof(*in),
NULL, NULL, NULL);
if (err) {
mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
goto err_in;
}
kfree(in);
*key = mr.key;
return 0;
err_in:
kfree(in);
return err;
}
static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key)
{
struct mlx5_core_mr mr;
int err;
memset(&mr, 0, sizeof(mr));
mr.key = key;
err = mlx5_core_destroy_mkey(dev->mdev, &mr);
if (err)
mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key);
}
static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
......@@ -867,13 +819,6 @@ static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
kfree(pd);
return ERR_PTR(-EFAULT);
}
} else {
err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn);
if (err) {
mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
kfree(pd);
return ERR_PTR(err);
}
}
return &pd->ibpd;
......@@ -884,9 +829,6 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
struct mlx5_ib_dev *mdev = to_mdev(pd->device);
struct mlx5_ib_pd *mpd = to_mpd(pd);
if (!pd->uobject)
free_pa_mkey(mdev, mpd->pa_lkey);
mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
kfree(mpd);
......@@ -1245,18 +1187,10 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
struct ib_srq_init_attr attr;
struct mlx5_ib_dev *dev;
struct ib_cq_init_attr cq_attr = {.cqe = 1};
u32 rsvd_lkey;
int ret = 0;
dev = container_of(devr, struct mlx5_ib_dev, devr);
ret = mlx5_core_query_special_context(dev->mdev, &rsvd_lkey);
if (ret) {
pr_err("Failed to query special context %d\n", ret);
return ret;
}
dev->ib_dev.local_dma_lkey = rsvd_lkey;
devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
if (IS_ERR(devr->p0)) {
ret = PTR_ERR(devr->p0);
......@@ -1418,6 +1352,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
dev->num_ports = MLX5_CAP_GEN(mdev, num_ports);
dev->ib_dev.phys_port_cnt = dev->num_ports;
dev->ib_dev.num_comp_vectors =
......
......@@ -103,7 +103,6 @@ static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibuconte
struct mlx5_ib_pd {
struct ib_pd ibpd;
u32 pdn;
u32 pa_lkey;
};
/* Use macros here so that don't have to duplicate
......@@ -213,7 +212,6 @@ struct mlx5_ib_qp {
int uuarn;
int create_type;
u32 pa_lkey;
/* Store signature errors */
bool signature_en;
......
......@@ -925,8 +925,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
err = create_kernel_qp(dev, init_attr, qp, &in, &inlen);
if (err)
mlx5_ib_dbg(dev, "err %d\n", err);
else
qp->pa_lkey = to_mpd(pd)->pa_lkey;
}
if (err)
......@@ -2045,7 +2043,7 @@ static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm);
dseg->addr = cpu_to_be64(mfrpl->map);
dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64));
dseg->lkey = cpu_to_be32(pd->pa_lkey);
dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
}
static __be32 send_ieth(struct ib_send_wr *wr)
......
......@@ -80,7 +80,7 @@ enum {
IPOIB_NUM_WC = 4,
IPOIB_MAX_PATH_REC_QUEUE = 3,
IPOIB_MAX_MCAST_QUEUE = 3,
IPOIB_MAX_MCAST_QUEUE = 64,
IPOIB_FLAG_OPER_UP = 0,
IPOIB_FLAG_INITIALIZED = 1,
......@@ -548,6 +548,8 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
union ib_gid *mgid, int set_qkey);
int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast);
struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid);
int ipoib_init_qp(struct net_device *dev);
int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
......
......@@ -1149,6 +1149,9 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
unsigned long dt;
unsigned long flags;
int i;
LIST_HEAD(remove_list);
struct ipoib_mcast *mcast, *tmcast;
struct net_device *dev = priv->dev;
if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
return;
......@@ -1176,6 +1179,19 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
lockdep_is_held(&priv->lock))) != NULL) {
/* was the neigh idle for two GC periods */
if (time_after(neigh_obsolete, neigh->alive)) {
u8 *mgid = neigh->daddr + 4;
/* Is this multicast ? */
if (*mgid == 0xff) {
mcast = __ipoib_mcast_find(dev, mgid);
if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
list_del(&mcast->list);
rb_erase(&mcast->rb_node, &priv->multicast_tree);
list_add_tail(&mcast->list, &remove_list);
}
}
rcu_assign_pointer(*np,
rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&priv->lock)));
......@@ -1191,6 +1207,8 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
out_unlock:
spin_unlock_irqrestore(&priv->lock, flags);
list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
ipoib_mcast_leave(dev, mcast);
}
static void ipoib_reap_neigh(struct work_struct *work)
......
......@@ -153,7 +153,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
return mcast;
}
static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct rb_node *n = priv->multicast_tree.rb_node;
......@@ -508,17 +508,19 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
rec.hop_limit = priv->broadcast->mcmember.hop_limit;
/*
* Historically Linux IPoIB has never properly supported SEND
* ONLY join. It emulated it by not providing all the required
* attributes, which is enough to prevent group creation and
* detect if there are full members or not. A major problem
* with supporting SEND ONLY is detecting when the group is
* auto-destroyed as IPoIB will cache the MLID..
* Send-only IB Multicast joins do not work at the core
* IB layer yet, so we can't use them here. However,
* we are emulating an Ethernet multicast send, which
* does not require a multicast subscription and will
* still send properly. The most appropriate thing to
* do is to create the group if it doesn't exist as that
* most closely emulates the behavior, from a user space
* application perspecitive, of Ethernet multicast
* operation. For now, we do a full join, maybe later
* when the core IB layers support send only joins we
* will use them.
*/
#if 1
if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
comp_mask &= ~IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
#else
#if 0
if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
rec.join_state = 4;
#endif
......@@ -675,7 +677,7 @@ int ipoib_mcast_stop_thread(struct net_device *dev)
return 0;
}
static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret = 0;
......
......@@ -97,6 +97,11 @@ unsigned int iser_max_sectors = ISER_DEF_MAX_SECTORS;
module_param_named(max_sectors, iser_max_sectors, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(max_sectors, "Max number of sectors in a single scsi command (default:1024");
bool iser_always_reg = true;
module_param_named(always_register, iser_always_reg, bool, S_IRUGO);
MODULE_PARM_DESC(always_register,
"Always register memory, even for continuous memory regions (default:true)");
bool iser_pi_enable = false;
module_param_named(pi_enable, iser_pi_enable, bool, S_IRUGO);
MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)");
......
......@@ -611,6 +611,7 @@ extern int iser_debug_level;
extern bool iser_pi_enable;
extern int iser_pi_guard;
extern unsigned int iser_max_sectors;
extern bool iser_always_reg;
int iser_assign_reg_ops(struct iser_device *device);
......
......@@ -803,11 +803,12 @@ static int
iser_reg_prot_sg(struct iscsi_iser_task *task,
struct iser_data_buf *mem,
struct iser_fr_desc *desc,
bool use_dma_key,
struct iser_mem_reg *reg)
{
struct iser_device *device = task->iser_conn->ib_conn.device;
if (mem->dma_nents == 1)
if (use_dma_key)
return iser_reg_dma(device, mem, reg);
return device->reg_ops->reg_mem(task, mem, &desc->pi_ctx->rsc, reg);
......@@ -817,11 +818,12 @@ static int
iser_reg_data_sg(struct iscsi_iser_task *task,
struct iser_data_buf *mem,
struct iser_fr_desc *desc,
bool use_dma_key,
struct iser_mem_reg *reg)
{
struct iser_device *device = task->iser_conn->ib_conn.device;
if (mem->dma_nents == 1)
if (use_dma_key)
return iser_reg_dma(device, mem, reg);
return device->reg_ops->reg_mem(task, mem, &desc->rsc, reg);
......@@ -836,14 +838,17 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
struct iser_mem_reg *reg = &task->rdma_reg[dir];
struct iser_mem_reg *data_reg;
struct iser_fr_desc *desc = NULL;
bool use_dma_key;
int err;
err = iser_handle_unaligned_buf(task, mem, dir);
if (unlikely(err))
return err;
if (mem->dma_nents != 1 ||
scsi_get_prot_op(task->sc) != SCSI_PROT_NORMAL) {
use_dma_key = (mem->dma_nents == 1 && !iser_always_reg &&
scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL);
if (!use_dma_key) {
desc = device->reg_ops->reg_desc_get(ib_conn);
reg->mem_h = desc;
}
......@@ -853,7 +858,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
else
data_reg = &task->desc.data_reg;
err = iser_reg_data_sg(task, mem, desc, data_reg);
err = iser_reg_data_sg(task, mem, desc, use_dma_key, data_reg);
if (unlikely(err))
goto err_reg;
......@@ -866,7 +871,8 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
if (unlikely(err))
goto err_reg;
err = iser_reg_prot_sg(task, mem, desc, prot_reg);
err = iser_reg_prot_sg(task, mem, desc,
use_dma_key, prot_reg);
if (unlikely(err))
goto err_reg;
}
......
......@@ -133,11 +133,15 @@ static int iser_create_device_ib_res(struct iser_device *device)
(unsigned long)comp);
}
device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ);
if (IS_ERR(device->mr))
goto dma_mr_err;
if (!iser_always_reg) {
int access = IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ;
device->mr = ib_get_dma_mr(device->pd, access);
if (IS_ERR(device->mr))
goto dma_mr_err;
}
INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
iser_event_handler);
......@@ -147,7 +151,8 @@ static int iser_create_device_ib_res(struct iser_device *device)
return 0;
handler_err:
ib_dereg_mr(device->mr);
if (device->mr)
ib_dereg_mr(device->mr);
dma_mr_err:
for (i = 0; i < device->comps_used; i++)
tasklet_kill(&device->comps[i].tasklet);
......@@ -173,7 +178,6 @@ static int iser_create_device_ib_res(struct iser_device *device)
static void iser_free_device_ib_res(struct iser_device *device)
{
int i;
BUG_ON(device->mr == NULL);
for (i = 0; i < device->comps_used; i++) {
struct iser_comp *comp = &device->comps[i];
......@@ -184,7 +188,8 @@ static void iser_free_device_ib_res(struct iser_device *device)
}
(void)ib_unregister_event_handler(&device->event_handler);
(void)ib_dereg_mr(device->mr);
if (device->mr)
(void)ib_dereg_mr(device->mr);
ib_dealloc_pd(device->pd);
kfree(device->comps);
......
......@@ -200,25 +200,3 @@ int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev)
return err;
}
int mlx5_core_query_special_context(struct mlx5_core_dev *dev, u32 *rsvd_lkey)
{
struct mlx5_cmd_query_special_contexts_mbox_in in;
struct mlx5_cmd_query_special_contexts_mbox_out out;
int err;
memset(&in, 0, sizeof(in));
memset(&out, 0, sizeof(out));
in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
if (err)
return err;
if (out.hdr.status)
err = mlx5_cmd_status_to_err(&out.hdr);
*rsvd_lkey = be32_to_cpu(out.resd_lkey);
return err;
}
EXPORT_SYMBOL(mlx5_core_query_special_context);
......@@ -402,17 +402,6 @@ struct mlx5_cmd_teardown_hca_mbox_out {
u8 rsvd[8];
};
struct mlx5_cmd_query_special_contexts_mbox_in {
struct mlx5_inbox_hdr hdr;
u8 rsvd[8];
};
struct mlx5_cmd_query_special_contexts_mbox_out {
struct mlx5_outbox_hdr hdr;
__be32 dump_fill_mkey;
__be32 resd_lkey;
};
struct mlx5_cmd_layout {
u8 type;
u8 rsvd0[3];
......
......@@ -845,7 +845,6 @@ void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol);
int mlx5_register_interface(struct mlx5_interface *intf);
void mlx5_unregister_interface(struct mlx5_interface *intf);
int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id);
int mlx5_core_query_special_context(struct mlx5_core_dev *dev, u32 *rsvd_lkey);
struct mlx5_profile {
u64 mask;
......
......@@ -39,25 +39,6 @@ static int
fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct rpcrdma_create_data_internal *cdata)
{
struct ib_device_attr *devattr = &ia->ri_devattr;
struct ib_mr *mr;
/* Obtain an lkey to use for the regbufs, which are
* protected from remote access.
*/
if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
ia->ri_dma_lkey = ia->ri_device->local_dma_lkey;
} else {
mr = ib_get_dma_mr(ia->ri_pd, IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(mr)) {
pr_err("%s: ib_get_dma_mr for failed with %lX\n",
__func__, PTR_ERR(mr));
return -ENOMEM;
}
ia->ri_dma_lkey = ia->ri_dma_mr->lkey;
ia->ri_dma_mr = mr;
}
return 0;
}
......
......@@ -189,11 +189,6 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct ib_device_attr *devattr = &ia->ri_devattr;
int depth, delta;
/* Obtain an lkey to use for the regbufs, which are
* protected from remote access.
*/
ia->ri_dma_lkey = ia->ri_device->local_dma_lkey;
ia->ri_max_frmr_depth =
min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
devattr->max_fast_reg_page_list_len);
......
......@@ -23,7 +23,6 @@ static int
physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct rpcrdma_create_data_internal *cdata)
{
struct ib_device_attr *devattr = &ia->ri_devattr;
struct ib_mr *mr;
/* Obtain an rkey to use for RPC data payloads.
......@@ -37,15 +36,8 @@ physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
__func__, PTR_ERR(mr));
return -ENOMEM;
}
ia->ri_dma_mr = mr;
/* Obtain an lkey to use for regbufs.
*/
if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
ia->ri_dma_lkey = ia->ri_device->local_dma_lkey;
else
ia->ri_dma_lkey = ia->ri_dma_mr->lkey;
ia->ri_dma_mr = mr;
return 0;
}
......
......@@ -1252,7 +1252,7 @@ rpcrdma_alloc_regbuf(struct rpcrdma_ia *ia, size_t size, gfp_t flags)
goto out_free;
iov->length = size;
iov->lkey = ia->ri_dma_lkey;
iov->lkey = ia->ri_pd->local_dma_lkey;
rb->rg_size = size;
rb->rg_owner = NULL;
return rb;
......
......@@ -65,7 +65,6 @@ struct rpcrdma_ia {
struct rdma_cm_id *ri_id;
struct ib_pd *ri_pd;
struct ib_mr *ri_dma_mr;
u32 ri_dma_lkey;
struct completion ri_done;
int ri_async_rc;
unsigned int ri_max_frmr_depth;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment