Commit 78c56e53 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
 "Not much too exciting here, although two syzkaller bugs that seem to
  have 9 lives may have finally been squashed.

  Several core bugs and a batch of driver bug fixes:

   - Fix compilation problems in qib and hfi1

   - Do not corrupt the joined multicast group state when using
     SEND_ONLY

   - Several CMA bugs, a reference leak for listening and two syzkaller
     crashers

   - Various bug fixes for irdma

   - Fix a Sleeping while atomic bug in usnic

   - Properly sanitize kernel pointers in dmesg

   - Two bugs in the 64b CQE support for hns"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/hns: Add the check of the CQE size of the user space
  RDMA/hns: Fix the size setting error when copying CQE in clean_cq()
  RDMA/hfi1: Fix kernel pointer leak
  RDMA/usnic: Lock VF with mutex instead of spinlock
  RDMA/hns: Work around broken constant propagation in gcc 8
  RDMA/cma: Ensure rdma_addr_cancel() happens before issuing more requests
  RDMA/cma: Do not change route.addr.src_addr.ss_family
  RDMA/irdma: Report correct WC error when there are MW bind errors
  RDMA/irdma: Report correct WC error when transport retry counter is exceeded
  RDMA/irdma: Validate number of CQ entries on create CQ
  RDMA/irdma: Skip CQP ring during a reset
  MAINTAINERS: Update Broadcom RDMA maintainers
  RDMA/cma: Fix listener leak in rdma_cma_listen_on_all() failure
  IB/cma: Do not send IGMP leaves for sendonly Multicast groups
  IB/qib: Fix clang confusion of NULL pointer comparison
parents 02d5e016 e671f0ec
......@@ -3820,7 +3820,6 @@ F: drivers/scsi/mpi3mr/
BROADCOM NETXTREME-E ROCE DRIVER
M: Selvin Xavier <selvin.xavier@broadcom.com>
M: Naresh Kumar PBS <nareshkumar.pbs@broadcom.com>
L: linux-rdma@vger.kernel.org
S: Supported
W: http://www.broadcom.com
......
......@@ -1746,15 +1746,16 @@ static void cma_cancel_route(struct rdma_id_private *id_priv)
}
}
static void cma_cancel_listens(struct rdma_id_private *id_priv)
static void _cma_cancel_listens(struct rdma_id_private *id_priv)
{
struct rdma_id_private *dev_id_priv;
lockdep_assert_held(&lock);
/*
* Remove from listen_any_list to prevent added devices from spawning
* additional listen requests.
*/
mutex_lock(&lock);
list_del(&id_priv->list);
while (!list_empty(&id_priv->listen_list)) {
......@@ -1768,6 +1769,12 @@ static void cma_cancel_listens(struct rdma_id_private *id_priv)
rdma_destroy_id(&dev_id_priv->id);
mutex_lock(&lock);
}
}
static void cma_cancel_listens(struct rdma_id_private *id_priv)
{
mutex_lock(&lock);
_cma_cancel_listens(id_priv);
mutex_unlock(&lock);
}
......@@ -1776,6 +1783,14 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
{
switch (state) {
case RDMA_CM_ADDR_QUERY:
/*
* We can avoid doing the rdma_addr_cancel() based on state,
* only RDMA_CM_ADDR_QUERY has a work that could still execute.
* Notice that the addr_handler work could still be exiting
* outside this state, however due to the interaction with the
* handler_mutex the work is guaranteed not to touch id_priv
* during exit.
*/
rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
break;
case RDMA_CM_ROUTE_QUERY:
......@@ -1810,6 +1825,8 @@ static void cma_release_port(struct rdma_id_private *id_priv)
static void destroy_mc(struct rdma_id_private *id_priv,
struct cma_multicast *mc)
{
bool send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num))
ib_sa_free_multicast(mc->sa_mc);
......@@ -1826,7 +1843,10 @@ static void destroy_mc(struct rdma_id_private *id_priv,
cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr,
&mgid);
cma_igmp_send(ndev, &mgid, false);
if (!send_only)
cma_igmp_send(ndev, &mgid, false);
dev_put(ndev);
}
......@@ -2574,7 +2594,7 @@ static int cma_listen_on_all(struct rdma_id_private *id_priv)
return 0;
err_listen:
list_del(&id_priv->list);
_cma_cancel_listens(id_priv);
mutex_unlock(&lock);
if (to_destroy)
rdma_destroy_id(&to_destroy->id);
......@@ -3413,6 +3433,21 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
if (dst_addr->sa_family == AF_IB) {
ret = cma_resolve_ib_addr(id_priv);
} else {
/*
* The FSM can return back to RDMA_CM_ADDR_BOUND after
* rdma_resolve_ip() is called, eg through the error
* path in addr_handler(). If this happens the existing
* request must be canceled before issuing a new one.
* Since canceling a request is a bit slow and this
* oddball path is rare, keep track once a request has
* been issued. The track turns out to be a permanent
* state since this is the only cancel as it is
* immediately before rdma_resolve_ip().
*/
if (id_priv->used_resolve_ip)
rdma_addr_cancel(&id->route.addr.dev_addr);
else
id_priv->used_resolve_ip = 1;
ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
&id->route.addr.dev_addr,
timeout_ms, addr_handler,
......@@ -3771,9 +3806,13 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
int ret;
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) {
struct sockaddr_in any_in = {
.sin_family = AF_INET,
.sin_addr.s_addr = htonl(INADDR_ANY),
};
/* For a well behaved ULP state will be RDMA_CM_IDLE */
id->route.addr.src_addr.ss_family = AF_INET;
ret = rdma_bind_addr(id, cma_src_addr(id_priv));
ret = rdma_bind_addr(id, (struct sockaddr *)&any_in);
if (ret)
return ret;
if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
......
......@@ -91,6 +91,7 @@ struct rdma_id_private {
u8 afonly;
u8 timeout;
u8 min_rnr_timer;
u8 used_resolve_ip;
enum ib_gid_type gid_type;
/*
......
......@@ -876,14 +876,14 @@ void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q)
struct hfi1_ipoib_txq *txq = &priv->txqs[q];
u64 completed = atomic64_read(&txq->complete_txreqs);
dd_dev_info(priv->dd, "timeout txq %llx q %u stopped %u stops %d no_desc %d ring_full %d\n",
(unsigned long long)txq, q,
dd_dev_info(priv->dd, "timeout txq %p q %u stopped %u stops %d no_desc %d ring_full %d\n",
txq, q,
__netif_subqueue_stopped(dev, txq->q_idx),
atomic_read(&txq->stops),
atomic_read(&txq->no_desc),
atomic_read(&txq->ring_full));
dd_dev_info(priv->dd, "sde %llx engine %u\n",
(unsigned long long)txq->sde,
dd_dev_info(priv->dd, "sde %p engine %u\n",
txq->sde,
txq->sde ? txq->sde->this_idx : 0);
dd_dev_info(priv->dd, "flow %x\n", txq->flow.as_int);
dd_dev_info(priv->dd, "sent %llu completed %llu used %llu\n",
......
......@@ -326,19 +326,30 @@ static void set_cq_param(struct hns_roce_cq *hr_cq, u32 cq_entries, int vector,
INIT_LIST_HEAD(&hr_cq->rq_list);
}
static void set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata,
struct hns_roce_ib_create_cq *ucmd)
static int set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata,
struct hns_roce_ib_create_cq *ucmd)
{
struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
if (udata) {
if (udata->inlen >= offsetofend(typeof(*ucmd), cqe_size))
hr_cq->cqe_size = ucmd->cqe_size;
else
hr_cq->cqe_size = HNS_ROCE_V2_CQE_SIZE;
} else {
if (!udata) {
hr_cq->cqe_size = hr_dev->caps.cqe_sz;
return 0;
}
if (udata->inlen >= offsetofend(typeof(*ucmd), cqe_size)) {
if (ucmd->cqe_size != HNS_ROCE_V2_CQE_SIZE &&
ucmd->cqe_size != HNS_ROCE_V3_CQE_SIZE) {
ibdev_err(&hr_dev->ib_dev,
"invalid cqe size %u.\n", ucmd->cqe_size);
return -EINVAL;
}
hr_cq->cqe_size = ucmd->cqe_size;
} else {
hr_cq->cqe_size = HNS_ROCE_V2_CQE_SIZE;
}
return 0;
}
int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
......@@ -366,7 +377,9 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
set_cq_param(hr_cq, attr->cqe, attr->comp_vector, &ucmd);
set_cqe_size(hr_cq, udata, &ucmd);
ret = set_cqe_size(hr_cq, udata, &ucmd);
if (ret)
return ret;
ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr);
if (ret) {
......
......@@ -3299,7 +3299,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
dest = get_cqe_v2(hr_cq, (prod_index + nfreed) &
hr_cq->ib_cq.cqe);
owner_bit = hr_reg_read(dest, CQE_OWNER);
memcpy(dest, cqe, sizeof(*cqe));
memcpy(dest, cqe, hr_cq->cqe_size);
hr_reg_write(dest, CQE_OWNER, owner_bit);
}
}
......@@ -4397,7 +4397,12 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
hr_qp->path_mtu = ib_mtu;
mtu = ib_mtu_enum_to_int(ib_mtu);
if (WARN_ON(mtu < 0))
if (WARN_ON(mtu <= 0))
return -EINVAL;
#define MAX_LP_MSG_LEN 65536
/* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 64KB */
lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / mtu);
if (WARN_ON(lp_pktn_ini >= 0xF))
return -EINVAL;
if (attr_mask & IB_QP_PATH_MTU) {
......@@ -4405,10 +4410,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
hr_reg_clear(qpc_mask, QPC_MTU);
}
#define MAX_LP_MSG_LEN 65536
/* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 64KB */
lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / mtu);
hr_reg_write(context, QPC_LP_PKTN_INI, lp_pktn_ini);
hr_reg_clear(qpc_mask, QPC_LP_PKTN_INI);
......
......@@ -3496,7 +3496,7 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp)
original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT ||
last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE ||
last_ae == IRDMA_AE_BAD_CLOSE ||
last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->reset)) {
last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset)) {
issue_close = 1;
iwqp->cm_id = NULL;
qp->term_flags = 0;
......@@ -4250,7 +4250,7 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr,
teardown_entry);
attr.qp_state = IB_QPS_ERR;
irdma_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL);
if (iwdev->reset)
if (iwdev->rf->reset)
irdma_cm_disconn(cm_node->iwqp);
irdma_rem_ref_cm_node(cm_node);
}
......
......@@ -176,6 +176,14 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp,
case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR:
qp->flush_code = FLUSH_GENERAL_ERR;
break;
case IRDMA_AE_LLP_TOO_MANY_RETRIES:
qp->flush_code = FLUSH_RETRY_EXC_ERR;
break;
case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS:
case IRDMA_AE_AMP_MWBIND_BIND_DISABLED:
case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS:
qp->flush_code = FLUSH_MW_BIND_ERR;
break;
default:
qp->flush_code = FLUSH_FATAL_ERR;
break;
......@@ -1489,7 +1497,7 @@ void irdma_reinitialize_ieq(struct irdma_sc_vsi *vsi)
irdma_puda_dele_rsrc(vsi, IRDMA_PUDA_RSRC_TYPE_IEQ, false);
if (irdma_initialize_ieq(iwdev)) {
iwdev->reset = true;
iwdev->rf->reset = true;
rf->gen_ops.request_reset(rf);
}
}
......@@ -1632,13 +1640,13 @@ void irdma_rt_deinit_hw(struct irdma_device *iwdev)
case IEQ_CREATED:
if (!iwdev->roce_mode)
irdma_puda_dele_rsrc(&iwdev->vsi, IRDMA_PUDA_RSRC_TYPE_IEQ,
iwdev->reset);
iwdev->rf->reset);
fallthrough;
case ILQ_CREATED:
if (!iwdev->roce_mode)
irdma_puda_dele_rsrc(&iwdev->vsi,
IRDMA_PUDA_RSRC_TYPE_ILQ,
iwdev->reset);
iwdev->rf->reset);
break;
default:
ibdev_warn(&iwdev->ibdev, "bad init_state = %d\n", iwdev->init_state);
......
......@@ -55,7 +55,7 @@ static void i40iw_close(struct i40e_info *cdev_info, struct i40e_client *client,
iwdev = to_iwdev(ibdev);
if (reset)
iwdev->reset = true;
iwdev->rf->reset = true;
iwdev->iw_status = 0;
irdma_port_ibevent(iwdev);
......
......@@ -346,7 +346,6 @@ struct irdma_device {
bool roce_mode:1;
bool roce_dcqcn_en:1;
bool dcb:1;
bool reset:1;
bool iw_ooo:1;
enum init_completion_state init_state;
......
......@@ -102,6 +102,8 @@ enum irdma_flush_opcode {
FLUSH_REM_OP_ERR,
FLUSH_LOC_LEN_ERR,
FLUSH_FATAL_ERR,
FLUSH_RETRY_EXC_ERR,
FLUSH_MW_BIND_ERR,
};
enum irdma_cmpl_status {
......
......@@ -2507,7 +2507,7 @@ void irdma_modify_qp_to_err(struct irdma_sc_qp *sc_qp)
struct irdma_qp *qp = sc_qp->qp_uk.back_qp;
struct ib_qp_attr attr;
if (qp->iwdev->reset)
if (qp->iwdev->rf->reset)
return;
attr.qp_state = IB_QPS_ERR;
......
......@@ -535,8 +535,7 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
irdma_qp_rem_ref(&iwqp->ibqp);
wait_for_completion(&iwqp->free_qp);
irdma_free_lsmm_rsrc(iwqp);
if (!iwdev->reset)
irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp);
irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp);
if (!iwqp->user_mode) {
if (iwqp->iwscq) {
......@@ -2035,7 +2034,7 @@ static int irdma_create_cq(struct ib_cq *ibcq,
/* Kmode allocations */
int rsize;
if (entries > rf->max_cqe) {
if (entries < 1 || entries > rf->max_cqe) {
err_code = -EINVAL;
goto cq_free_rsrc;
}
......@@ -3353,6 +3352,10 @@ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode
return IB_WC_LOC_LEN_ERR;
case FLUSH_GENERAL_ERR:
return IB_WC_WR_FLUSH_ERR;
case FLUSH_RETRY_EXC_ERR:
return IB_WC_RETRY_EXC_ERR;
case FLUSH_MW_BIND_ERR:
return IB_WC_MW_BIND_ERR;
case FLUSH_FATAL_ERR:
default:
return IB_WC_FATAL_ERR;
......
......@@ -403,7 +403,7 @@ static ssize_t diagc_attr_store(struct ib_device *ibdev, u32 port_num,
}
#define QIB_DIAGC_ATTR(N) \
static_assert(&((struct qib_ibport *)0)->rvp.n_##N != (u64 *)NULL); \
static_assert(__same_type(((struct qib_ibport *)0)->rvp.n_##N, u64)); \
static struct qib_diagc_attr qib_diagc_attr_##N = { \
.attr = __ATTR(N, 0664, diagc_attr_show, diagc_attr_store), \
.counter = \
......
......@@ -90,7 +90,7 @@ struct usnic_ib_dev {
struct usnic_ib_vf {
struct usnic_ib_dev *pf;
spinlock_t lock;
struct mutex lock;
struct usnic_vnic *vnic;
unsigned int qp_grp_ref_cnt;
struct usnic_ib_pd *pd;
......
......@@ -572,7 +572,7 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev,
}
vf->pf = pf;
spin_lock_init(&vf->lock);
mutex_init(&vf->lock);
mutex_lock(&pf->usdev_lock);
list_add_tail(&vf->link, &pf->vf_dev_list);
/*
......
......@@ -196,7 +196,7 @@ find_free_vf_and_create_qp_grp(struct ib_qp *qp,
for (i = 0; dev_list[i]; i++) {
dev = dev_list[i];
vf = dev_get_drvdata(dev);
spin_lock(&vf->lock);
mutex_lock(&vf->lock);
vnic = vf->vnic;
if (!usnic_vnic_check_room(vnic, res_spec)) {
usnic_dbg("Found used vnic %s from %s\n",
......@@ -208,10 +208,10 @@ find_free_vf_and_create_qp_grp(struct ib_qp *qp,
vf, pd, res_spec,
trans_spec);
spin_unlock(&vf->lock);
mutex_unlock(&vf->lock);
goto qp_grp_check;
}
spin_unlock(&vf->lock);
mutex_unlock(&vf->lock);
}
usnic_uiom_free_dev_list(dev_list);
......@@ -220,7 +220,7 @@ find_free_vf_and_create_qp_grp(struct ib_qp *qp,
/* Try to find resources on an unused vf */
list_for_each_entry(vf, &us_ibdev->vf_dev_list, link) {
spin_lock(&vf->lock);
mutex_lock(&vf->lock);
vnic = vf->vnic;
if (vf->qp_grp_ref_cnt == 0 &&
usnic_vnic_check_room(vnic, res_spec) == 0) {
......@@ -228,10 +228,10 @@ find_free_vf_and_create_qp_grp(struct ib_qp *qp,
vf, pd, res_spec,
trans_spec);
spin_unlock(&vf->lock);
mutex_unlock(&vf->lock);
goto qp_grp_check;
}
spin_unlock(&vf->lock);
mutex_unlock(&vf->lock);
}
usnic_info("No free qp grp found on %s\n",
......@@ -253,9 +253,9 @@ static void qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp)
WARN_ON(qp_grp->state != IB_QPS_RESET);
spin_lock(&vf->lock);
mutex_lock(&vf->lock);
usnic_ib_qp_grp_destroy(qp_grp);
spin_unlock(&vf->lock);
mutex_unlock(&vf->lock);
}
static int create_qp_validate_user_data(struct usnic_ib_create_qp_cmd cmd)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment