Commit 6fa8f719 authored by Vladimir Sokolovsky's avatar Vladimir Sokolovsky Committed by Roland Dreier

IB/mlx4: Add support for masked atomic operations

Add support for masked atomic operations (masked compare and swap,
masked fetch and add).
Signed-off-by: default avatarVladimir Sokolovsky <vlad@mellanox.co.il>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 5e80ba8f
...@@ -661,6 +661,14 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, ...@@ -661,6 +661,14 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
wc->opcode = IB_WC_FETCH_ADD; wc->opcode = IB_WC_FETCH_ADD;
wc->byte_len = 8; wc->byte_len = 8;
break; break;
case MLX4_OPCODE_MASKED_ATOMIC_CS:
wc->opcode = IB_WC_MASKED_COMP_SWAP;
wc->byte_len = 8;
break;
case MLX4_OPCODE_MASKED_ATOMIC_FA:
wc->opcode = IB_WC_MASKED_FETCH_ADD;
wc->byte_len = 8;
break;
case MLX4_OPCODE_BIND_MW: case MLX4_OPCODE_BIND_MW:
wc->opcode = IB_WC_BIND_MW; wc->opcode = IB_WC_BIND_MW;
break; break;
......
...@@ -139,6 +139,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, ...@@ -139,6 +139,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay;
props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
IB_ATOMIC_HCA : IB_ATOMIC_NONE; IB_ATOMIC_HCA : IB_ATOMIC_NONE;
props->masked_atomic_cap = IB_ATOMIC_HCA;
props->max_pkeys = dev->dev->caps.pkey_table_len[1]; props->max_pkeys = dev->dev->caps.pkey_table_len[1];
props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms; props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
......
...@@ -74,17 +74,19 @@ enum { ...@@ -74,17 +74,19 @@ enum {
}; };
static const __be32 mlx4_ib_opcode[] = { static const __be32 mlx4_ib_opcode[] = {
[IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND), [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND),
[IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO), [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO),
[IB_WR_SEND_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_SEND_IMM), [IB_WR_SEND_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_SEND_IMM),
[IB_WR_RDMA_WRITE] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), [IB_WR_RDMA_WRITE] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
[IB_WR_RDMA_WRITE_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), [IB_WR_RDMA_WRITE_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
[IB_WR_RDMA_READ] = cpu_to_be32(MLX4_OPCODE_RDMA_READ), [IB_WR_RDMA_READ] = cpu_to_be32(MLX4_OPCODE_RDMA_READ),
[IB_WR_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), [IB_WR_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
[IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
[IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL), [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
[IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
[IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR),
[IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
}; };
static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
...@@ -1407,6 +1409,9 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr * ...@@ -1407,6 +1409,9 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
} else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask);
} else { } else {
aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
aseg->compare = 0; aseg->compare = 0;
...@@ -1414,6 +1419,15 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr * ...@@ -1414,6 +1419,15 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *
} }
static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
struct ib_send_wr *wr)
{
aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask);
aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask);
}
static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
struct ib_send_wr *wr) struct ib_send_wr *wr)
{ {
...@@ -1567,6 +1581,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -1567,6 +1581,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (wr->opcode) { switch (wr->opcode) {
case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD: case IB_WR_ATOMIC_FETCH_AND_ADD:
case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
set_raddr_seg(wqe, wr->wr.atomic.remote_addr, set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
wr->wr.atomic.rkey); wr->wr.atomic.rkey);
wqe += sizeof (struct mlx4_wqe_raddr_seg); wqe += sizeof (struct mlx4_wqe_raddr_seg);
...@@ -1579,6 +1594,19 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -1579,6 +1594,19 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break; break;
case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
wr->wr.atomic.rkey);
wqe += sizeof (struct mlx4_wqe_raddr_seg);
set_masked_atomic_seg(wqe, wr);
wqe += sizeof (struct mlx4_wqe_masked_atomic_seg);
size += (sizeof (struct mlx4_wqe_raddr_seg) +
sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16;
break;
case IB_WR_RDMA_READ: case IB_WR_RDMA_READ:
case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM: case IB_WR_RDMA_WRITE_WITH_IMM:
......
...@@ -123,8 +123,8 @@ enum { ...@@ -123,8 +123,8 @@ enum {
MLX4_OPCODE_RDMA_READ = 0x10, MLX4_OPCODE_RDMA_READ = 0x10,
MLX4_OPCODE_ATOMIC_CS = 0x11, MLX4_OPCODE_ATOMIC_CS = 0x11,
MLX4_OPCODE_ATOMIC_FA = 0x12, MLX4_OPCODE_ATOMIC_FA = 0x12,
MLX4_OPCODE_ATOMIC_MASK_CS = 0x14, MLX4_OPCODE_MASKED_ATOMIC_CS = 0x14,
MLX4_OPCODE_ATOMIC_MASK_FA = 0x15, MLX4_OPCODE_MASKED_ATOMIC_FA = 0x15,
MLX4_OPCODE_BIND_MW = 0x18, MLX4_OPCODE_BIND_MW = 0x18,
MLX4_OPCODE_FMR = 0x19, MLX4_OPCODE_FMR = 0x19,
MLX4_OPCODE_LOCAL_INVAL = 0x1b, MLX4_OPCODE_LOCAL_INVAL = 0x1b,
......
...@@ -285,6 +285,13 @@ struct mlx4_wqe_atomic_seg { ...@@ -285,6 +285,13 @@ struct mlx4_wqe_atomic_seg {
__be64 compare; __be64 compare;
}; };
struct mlx4_wqe_masked_atomic_seg {
__be64 swap_add;
__be64 compare;
__be64 swap_add_mask;
__be64 compare_mask;
};
struct mlx4_wqe_data_seg { struct mlx4_wqe_data_seg {
__be32 byte_count; __be32 byte_count;
__be32 lkey; __be32 lkey;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment