Commit dd286422 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband:
  RDMA/cxgb3: Wrap the software send queue pointer as needed on flush
  IB/ipath: Change ipath_devdata.ipath_sdma_status to be unsigned long
  IB/ipath: Make ipath_portdata work with struct pid * not pid_t
  IB/ipath: Fix RDMA read response sequence checking
  IB/ipath: Fix many locking issues when switching to error state
  IB/ipath: Fix RC and UC error handling
  RDMA/nes: Fix up nes_lro_max_aggr module parameter
parents 4717df58 a58e58fa
...@@ -405,11 +405,11 @@ int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) ...@@ -405,11 +405,11 @@ int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2); struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2);
ptr = wq->sq_rptr + count; ptr = wq->sq_rptr + count;
sqp += count; sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
while (ptr != wq->sq_wptr) { while (ptr != wq->sq_wptr) {
insert_sq_cqe(wq, cq, sqp); insert_sq_cqe(wq, cq, sqp);
sqp++;
ptr++; ptr++;
sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
flushed++; flushed++;
} }
return flushed; return flushed;
......
...@@ -1894,7 +1894,7 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) ...@@ -1894,7 +1894,7 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
*/ */
if (dd->ipath_flags & IPATH_HAS_SEND_DMA) { if (dd->ipath_flags & IPATH_HAS_SEND_DMA) {
int skip_cancel; int skip_cancel;
u64 *statp = &dd->ipath_sdma_status; unsigned long *statp = &dd->ipath_sdma_status;
spin_lock_irqsave(&dd->ipath_sdma_lock, flags); spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
skip_cancel = skip_cancel =
...@@ -2616,7 +2616,7 @@ int ipath_reset_device(int unit) ...@@ -2616,7 +2616,7 @@ int ipath_reset_device(int unit)
ipath_dbg("unit %u port %d is in use " ipath_dbg("unit %u port %d is in use "
"(PID %u cmd %s), can't reset\n", "(PID %u cmd %s), can't reset\n",
unit, i, unit, i,
dd->ipath_pd[i]->port_pid, pid_nr(dd->ipath_pd[i]->port_pid),
dd->ipath_pd[i]->port_comm); dd->ipath_pd[i]->port_comm);
ret = -EBUSY; ret = -EBUSY;
goto bail; goto bail;
...@@ -2654,19 +2654,21 @@ int ipath_reset_device(int unit) ...@@ -2654,19 +2654,21 @@ int ipath_reset_device(int unit)
static int ipath_signal_procs(struct ipath_devdata *dd, int sig) static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
{ {
int i, sub, any = 0; int i, sub, any = 0;
pid_t pid; struct pid *pid;
if (!dd->ipath_pd) if (!dd->ipath_pd)
return 0; return 0;
for (i = 1; i < dd->ipath_cfgports; i++) { for (i = 1; i < dd->ipath_cfgports; i++) {
if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt || if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
!dd->ipath_pd[i]->port_pid)
continue; continue;
pid = dd->ipath_pd[i]->port_pid; pid = dd->ipath_pd[i]->port_pid;
if (!pid)
continue;
dev_info(&dd->pcidev->dev, "context %d in use " dev_info(&dd->pcidev->dev, "context %d in use "
"(PID %u), sending signal %d\n", "(PID %u), sending signal %d\n",
i, pid, sig); i, pid_nr(pid), sig);
kill_proc(pid, sig, 1); kill_pid(pid, sig, 1);
any++; any++;
for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) { for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {
pid = dd->ipath_pd[i]->port_subpid[sub]; pid = dd->ipath_pd[i]->port_subpid[sub];
...@@ -2674,8 +2676,8 @@ static int ipath_signal_procs(struct ipath_devdata *dd, int sig) ...@@ -2674,8 +2676,8 @@ static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
continue; continue;
dev_info(&dd->pcidev->dev, "sub-context " dev_info(&dd->pcidev->dev, "sub-context "
"%d:%d in use (PID %u), sending " "%d:%d in use (PID %u), sending "
"signal %d\n", i, sub, pid, sig); "signal %d\n", i, sub, pid_nr(pid), sig);
kill_proc(pid, sig, 1); kill_pid(pid, sig, 1);
any++; any++;
} }
} }
......
...@@ -555,7 +555,7 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport, ...@@ -555,7 +555,7 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
p = dd->ipath_pageshadow[porttid + tid]; p = dd->ipath_pageshadow[porttid + tid];
dd->ipath_pageshadow[porttid + tid] = NULL; dd->ipath_pageshadow[porttid + tid] = NULL;
ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n", ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
pd->port_pid, tid); pid_nr(pd->port_pid), tid);
dd->ipath_f_put_tid(dd, &tidbase[tid], dd->ipath_f_put_tid(dd, &tidbase[tid],
RCVHQ_RCV_TYPE_EXPECTED, RCVHQ_RCV_TYPE_EXPECTED,
dd->ipath_tidinvalid); dd->ipath_tidinvalid);
...@@ -1609,7 +1609,7 @@ static int try_alloc_port(struct ipath_devdata *dd, int port, ...@@ -1609,7 +1609,7 @@ static int try_alloc_port(struct ipath_devdata *dd, int port,
port); port);
pd->port_cnt = 1; pd->port_cnt = 1;
port_fp(fp) = pd; port_fp(fp) = pd;
pd->port_pid = current->pid; pd->port_pid = get_pid(task_pid(current));
strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm)); strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
ipath_stats.sps_ports++; ipath_stats.sps_ports++;
ret = 0; ret = 0;
...@@ -1793,14 +1793,15 @@ static int find_shared_port(struct file *fp, ...@@ -1793,14 +1793,15 @@ static int find_shared_port(struct file *fp,
} }
port_fp(fp) = pd; port_fp(fp) = pd;
subport_fp(fp) = pd->port_cnt++; subport_fp(fp) = pd->port_cnt++;
pd->port_subpid[subport_fp(fp)] = current->pid; pd->port_subpid[subport_fp(fp)] =
get_pid(task_pid(current));
tidcursor_fp(fp) = 0; tidcursor_fp(fp) = 0;
pd->active_slaves |= 1 << subport_fp(fp); pd->active_slaves |= 1 << subport_fp(fp);
ipath_cdbg(PROC, ipath_cdbg(PROC,
"%s[%u] %u sharing %s[%u] unit:port %u:%u\n", "%s[%u] %u sharing %s[%u] unit:port %u:%u\n",
current->comm, current->pid, current->comm, current->pid,
subport_fp(fp), subport_fp(fp),
pd->port_comm, pd->port_pid, pd->port_comm, pid_nr(pd->port_pid),
dd->ipath_unit, pd->port_port); dd->ipath_unit, pd->port_port);
ret = 1; ret = 1;
goto done; goto done;
...@@ -2066,7 +2067,8 @@ static int ipath_close(struct inode *in, struct file *fp) ...@@ -2066,7 +2067,8 @@ static int ipath_close(struct inode *in, struct file *fp)
* the slave(s) don't wait for receive data forever. * the slave(s) don't wait for receive data forever.
*/ */
pd->active_slaves &= ~(1 << fd->subport); pd->active_slaves &= ~(1 << fd->subport);
pd->port_subpid[fd->subport] = 0; put_pid(pd->port_subpid[fd->subport]);
pd->port_subpid[fd->subport] = NULL;
mutex_unlock(&ipath_mutex); mutex_unlock(&ipath_mutex);
goto bail; goto bail;
} }
...@@ -2074,7 +2076,7 @@ static int ipath_close(struct inode *in, struct file *fp) ...@@ -2074,7 +2076,7 @@ static int ipath_close(struct inode *in, struct file *fp)
if (pd->port_hdrqfull) { if (pd->port_hdrqfull) {
ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors " ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors "
"during run\n", pd->port_comm, pd->port_pid, "during run\n", pd->port_comm, pid_nr(pd->port_pid),
pd->port_hdrqfull); pd->port_hdrqfull);
pd->port_hdrqfull = 0; pd->port_hdrqfull = 0;
} }
...@@ -2134,11 +2136,12 @@ static int ipath_close(struct inode *in, struct file *fp) ...@@ -2134,11 +2136,12 @@ static int ipath_close(struct inode *in, struct file *fp)
unlock_expected_tids(pd); unlock_expected_tids(pd);
ipath_stats.sps_ports--; ipath_stats.sps_ports--;
ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n", ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
pd->port_comm, pd->port_pid, pd->port_comm, pid_nr(pd->port_pid),
dd->ipath_unit, port); dd->ipath_unit, port);
} }
pd->port_pid = 0; put_pid(pd->port_pid);
pd->port_pid = NULL;
dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */ dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */
mutex_unlock(&ipath_mutex); mutex_unlock(&ipath_mutex);
ipath_free_pddata(dd, pd); /* after releasing the mutex */ ipath_free_pddata(dd, pd); /* after releasing the mutex */
......
...@@ -159,8 +159,8 @@ struct ipath_portdata { ...@@ -159,8 +159,8 @@ struct ipath_portdata {
/* saved total number of polled urgent packets for poll edge trigger */ /* saved total number of polled urgent packets for poll edge trigger */
u32 port_urgent_poll; u32 port_urgent_poll;
/* pid of process using this port */ /* pid of process using this port */
pid_t port_pid; struct pid *port_pid;
pid_t port_subpid[INFINIPATH_MAX_SUBPORT]; struct pid *port_subpid[INFINIPATH_MAX_SUBPORT];
/* same size as task_struct .comm[] */ /* same size as task_struct .comm[] */
char port_comm[16]; char port_comm[16];
/* pkeys set by this use of this port */ /* pkeys set by this use of this port */
...@@ -483,7 +483,7 @@ struct ipath_devdata { ...@@ -483,7 +483,7 @@ struct ipath_devdata {
/* SendDMA related entries */ /* SendDMA related entries */
spinlock_t ipath_sdma_lock; spinlock_t ipath_sdma_lock;
u64 ipath_sdma_status; unsigned long ipath_sdma_status;
unsigned long ipath_sdma_abort_jiffies; unsigned long ipath_sdma_abort_jiffies;
unsigned long ipath_sdma_abort_intr_timeout; unsigned long ipath_sdma_abort_intr_timeout;
unsigned long ipath_sdma_buf_jiffies; unsigned long ipath_sdma_buf_jiffies;
...@@ -822,8 +822,8 @@ struct ipath_devdata { ...@@ -822,8 +822,8 @@ struct ipath_devdata {
#define IPATH_SDMA_DISARMED 1 #define IPATH_SDMA_DISARMED 1
#define IPATH_SDMA_DISABLED 2 #define IPATH_SDMA_DISABLED 2
#define IPATH_SDMA_LAYERBUF 3 #define IPATH_SDMA_LAYERBUF 3
#define IPATH_SDMA_RUNNING 62 #define IPATH_SDMA_RUNNING 30
#define IPATH_SDMA_SHUTDOWN 63 #define IPATH_SDMA_SHUTDOWN 31
/* bit combinations that correspond to abort states */ /* bit combinations that correspond to abort states */
#define IPATH_SDMA_ABORT_NONE 0 #define IPATH_SDMA_ABORT_NONE 0
......
...@@ -242,7 +242,6 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp) ...@@ -242,7 +242,6 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
{ {
struct ipath_qp *q, **qpp; struct ipath_qp *q, **qpp;
unsigned long flags; unsigned long flags;
int fnd = 0;
spin_lock_irqsave(&qpt->lock, flags); spin_lock_irqsave(&qpt->lock, flags);
...@@ -253,51 +252,40 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp) ...@@ -253,51 +252,40 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
*qpp = qp->next; *qpp = qp->next;
qp->next = NULL; qp->next = NULL;
atomic_dec(&qp->refcount); atomic_dec(&qp->refcount);
fnd = 1;
break; break;
} }
} }
spin_unlock_irqrestore(&qpt->lock, flags); spin_unlock_irqrestore(&qpt->lock, flags);
if (!fnd)
return;
free_qpn(qpt, qp->ibqp.qp_num);
wait_event(qp->wait, !atomic_read(&qp->refcount));
} }
/** /**
* ipath_free_all_qps - remove all QPs from the table * ipath_free_all_qps - check for QPs still in use
* @qpt: the QP table to empty * @qpt: the QP table to empty
*
* There should not be any QPs still in use.
* Free memory for table.
*/ */
void ipath_free_all_qps(struct ipath_qp_table *qpt) unsigned ipath_free_all_qps(struct ipath_qp_table *qpt)
{ {
unsigned long flags; unsigned long flags;
struct ipath_qp *qp, *nqp; struct ipath_qp *qp;
u32 n; u32 n, qp_inuse = 0;
spin_lock_irqsave(&qpt->lock, flags);
for (n = 0; n < qpt->max; n++) { for (n = 0; n < qpt->max; n++) {
spin_lock_irqsave(&qpt->lock, flags);
qp = qpt->table[n]; qp = qpt->table[n];
qpt->table[n] = NULL; qpt->table[n] = NULL;
spin_unlock_irqrestore(&qpt->lock, flags);
for (; qp; qp = qp->next)
while (qp) { qp_inuse++;
nqp = qp->next;
free_qpn(qpt, qp->ibqp.qp_num);
if (!atomic_dec_and_test(&qp->refcount) ||
!ipath_destroy_qp(&qp->ibqp))
ipath_dbg("QP memory leak!\n");
qp = nqp;
}
} }
spin_unlock_irqrestore(&qpt->lock, flags);
for (n = 0; n < ARRAY_SIZE(qpt->map); n++) { for (n = 0; n < ARRAY_SIZE(qpt->map); n++)
if (qpt->map[n].page) if (qpt->map[n].page)
free_page((unsigned long)qpt->map[n].page); free_page((unsigned long) qpt->map[n].page);
} return qp_inuse;
} }
/** /**
...@@ -336,11 +324,12 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) ...@@ -336,11 +324,12 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
qp->remote_qpn = 0; qp->remote_qpn = 0;
qp->qkey = 0; qp->qkey = 0;
qp->qp_access_flags = 0; qp->qp_access_flags = 0;
qp->s_busy = 0; atomic_set(&qp->s_dma_busy, 0);
qp->s_flags &= IPATH_S_SIGNAL_REQ_WR; qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
qp->s_hdrwords = 0; qp->s_hdrwords = 0;
qp->s_wqe = NULL; qp->s_wqe = NULL;
qp->s_pkt_delay = 0; qp->s_pkt_delay = 0;
qp->s_draining = 0;
qp->s_psn = 0; qp->s_psn = 0;
qp->r_psn = 0; qp->r_psn = 0;
qp->r_msn = 0; qp->r_msn = 0;
...@@ -353,7 +342,8 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) ...@@ -353,7 +342,8 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
} }
qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
qp->r_nak_state = 0; qp->r_nak_state = 0;
qp->r_wrid_valid = 0; qp->r_aflags = 0;
qp->r_flags = 0;
qp->s_rnr_timeout = 0; qp->s_rnr_timeout = 0;
qp->s_head = 0; qp->s_head = 0;
qp->s_tail = 0; qp->s_tail = 0;
...@@ -361,7 +351,6 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) ...@@ -361,7 +351,6 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
qp->s_last = 0; qp->s_last = 0;
qp->s_ssn = 1; qp->s_ssn = 1;
qp->s_lsn = 0; qp->s_lsn = 0;
qp->s_wait_credit = 0;
memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
qp->r_head_ack_queue = 0; qp->r_head_ack_queue = 0;
qp->s_tail_ack_queue = 0; qp->s_tail_ack_queue = 0;
...@@ -370,17 +359,17 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) ...@@ -370,17 +359,17 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
qp->r_rq.wq->head = 0; qp->r_rq.wq->head = 0;
qp->r_rq.wq->tail = 0; qp->r_rq.wq->tail = 0;
} }
qp->r_reuse_sge = 0;
} }
/** /**
* ipath_error_qp - put a QP into an error state * ipath_error_qp - put a QP into the error state
* @qp: the QP to put into an error state * @qp: the QP to put into the error state
* @err: the receive completion error to signal if a RWQE is active * @err: the receive completion error to signal if a RWQE is active
* *
* Flushes both send and receive work queues. * Flushes both send and receive work queues.
* Returns true if last WQE event should be generated. * Returns true if last WQE event should be generated.
* The QP s_lock should be held and interrupts disabled. * The QP s_lock should be held and interrupts disabled.
* If we are already in error state, just return.
*/ */
int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
...@@ -389,8 +378,10 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) ...@@ -389,8 +378,10 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
struct ib_wc wc; struct ib_wc wc;
int ret = 0; int ret = 0;
ipath_dbg("QP%d/%d in error state (%d)\n", if (qp->state == IB_QPS_ERR)
qp->ibqp.qp_num, qp->remote_qpn, err); goto bail;
qp->state = IB_QPS_ERR;
spin_lock(&dev->pending_lock); spin_lock(&dev->pending_lock);
if (!list_empty(&qp->timerwait)) if (!list_empty(&qp->timerwait))
...@@ -399,39 +390,21 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) ...@@ -399,39 +390,21 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
list_del_init(&qp->piowait); list_del_init(&qp->piowait);
spin_unlock(&dev->pending_lock); spin_unlock(&dev->pending_lock);
wc.vendor_err = 0; /* Schedule the sending tasklet to drain the send work queue. */
wc.byte_len = 0; if (qp->s_last != qp->s_head)
wc.imm_data = 0; ipath_schedule_send(qp);
memset(&wc, 0, sizeof(wc));
wc.qp = &qp->ibqp; wc.qp = &qp->ibqp;
wc.src_qp = 0; wc.opcode = IB_WC_RECV;
wc.wc_flags = 0;
wc.pkey_index = 0; if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) {
wc.slid = 0;
wc.sl = 0;
wc.dlid_path_bits = 0;
wc.port_num = 0;
if (qp->r_wrid_valid) {
qp->r_wrid_valid = 0;
wc.wr_id = qp->r_wr_id; wc.wr_id = qp->r_wr_id;
wc.opcode = IB_WC_RECV;
wc.status = err; wc.status = err;
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
} }
wc.status = IB_WC_WR_FLUSH_ERR; wc.status = IB_WC_WR_FLUSH_ERR;
while (qp->s_last != qp->s_head) {
struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
wc.wr_id = wqe->wr.wr_id;
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
if (++qp->s_last >= qp->s_size)
qp->s_last = 0;
ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
}
qp->s_cur = qp->s_tail = qp->s_head;
qp->s_hdrwords = 0;
qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
if (qp->r_rq.wq) { if (qp->r_rq.wq) {
struct ipath_rwq *wq; struct ipath_rwq *wq;
u32 head; u32 head;
...@@ -447,7 +420,6 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) ...@@ -447,7 +420,6 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
tail = wq->tail; tail = wq->tail;
if (tail >= qp->r_rq.size) if (tail >= qp->r_rq.size)
tail = 0; tail = 0;
wc.opcode = IB_WC_RECV;
while (tail != head) { while (tail != head) {
wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
if (++tail >= qp->r_rq.size) if (++tail >= qp->r_rq.size)
...@@ -460,6 +432,7 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) ...@@ -460,6 +432,7 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
} else if (qp->ibqp.event_handler) } else if (qp->ibqp.event_handler)
ret = 1; ret = 1;
bail:
return ret; return ret;
} }
...@@ -478,11 +451,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ...@@ -478,11 +451,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct ipath_ibdev *dev = to_idev(ibqp->device); struct ipath_ibdev *dev = to_idev(ibqp->device);
struct ipath_qp *qp = to_iqp(ibqp); struct ipath_qp *qp = to_iqp(ibqp);
enum ib_qp_state cur_state, new_state; enum ib_qp_state cur_state, new_state;
unsigned long flags;
int lastwqe = 0; int lastwqe = 0;
int ret; int ret;
spin_lock_irqsave(&qp->s_lock, flags); spin_lock_irq(&qp->s_lock);
cur_state = attr_mask & IB_QP_CUR_STATE ? cur_state = attr_mask & IB_QP_CUR_STATE ?
attr->cur_qp_state : qp->state; attr->cur_qp_state : qp->state;
...@@ -535,16 +507,42 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ...@@ -535,16 +507,42 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
switch (new_state) { switch (new_state) {
case IB_QPS_RESET: case IB_QPS_RESET:
if (qp->state != IB_QPS_RESET) {
qp->state = IB_QPS_RESET;
spin_lock(&dev->pending_lock);
if (!list_empty(&qp->timerwait))
list_del_init(&qp->timerwait);
if (!list_empty(&qp->piowait))
list_del_init(&qp->piowait);
spin_unlock(&dev->pending_lock);
qp->s_flags &= ~IPATH_S_ANY_WAIT;
spin_unlock_irq(&qp->s_lock);
/* Stop the sending tasklet */
tasklet_kill(&qp->s_task);
wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
spin_lock_irq(&qp->s_lock);
}
ipath_reset_qp(qp, ibqp->qp_type); ipath_reset_qp(qp, ibqp->qp_type);
break; break;
case IB_QPS_SQD:
qp->s_draining = qp->s_last != qp->s_cur;
qp->state = new_state;
break;
case IB_QPS_SQE:
if (qp->ibqp.qp_type == IB_QPT_RC)
goto inval;
qp->state = new_state;
break;
case IB_QPS_ERR: case IB_QPS_ERR:
lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
break; break;
default: default:
qp->state = new_state;
break; break;
} }
if (attr_mask & IB_QP_PKEY_INDEX) if (attr_mask & IB_QP_PKEY_INDEX)
...@@ -597,8 +595,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ...@@ -597,8 +595,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
qp->s_max_rd_atomic = attr->max_rd_atomic; qp->s_max_rd_atomic = attr->max_rd_atomic;
qp->state = new_state; spin_unlock_irq(&qp->s_lock);
spin_unlock_irqrestore(&qp->s_lock, flags);
if (lastwqe) { if (lastwqe) {
struct ib_event ev; struct ib_event ev;
...@@ -612,7 +609,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ...@@ -612,7 +609,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto bail; goto bail;
inval: inval:
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irq(&qp->s_lock);
ret = -EINVAL; ret = -EINVAL;
bail: bail:
...@@ -643,7 +640,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ...@@ -643,7 +640,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->pkey_index = qp->s_pkey_index; attr->pkey_index = qp->s_pkey_index;
attr->alt_pkey_index = 0; attr->alt_pkey_index = 0;
attr->en_sqd_async_notify = 0; attr->en_sqd_async_notify = 0;
attr->sq_draining = 0; attr->sq_draining = qp->s_draining;
attr->max_rd_atomic = qp->s_max_rd_atomic; attr->max_rd_atomic = qp->s_max_rd_atomic;
attr->max_dest_rd_atomic = qp->r_max_rd_atomic; attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
attr->min_rnr_timer = qp->r_min_rnr_timer; attr->min_rnr_timer = qp->r_min_rnr_timer;
...@@ -833,6 +830,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, ...@@ -833,6 +830,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
spin_lock_init(&qp->r_rq.lock); spin_lock_init(&qp->r_rq.lock);
atomic_set(&qp->refcount, 0); atomic_set(&qp->refcount, 0);
init_waitqueue_head(&qp->wait); init_waitqueue_head(&qp->wait);
init_waitqueue_head(&qp->wait_dma);
tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp); tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp);
INIT_LIST_HEAD(&qp->piowait); INIT_LIST_HEAD(&qp->piowait);
INIT_LIST_HEAD(&qp->timerwait); INIT_LIST_HEAD(&qp->timerwait);
...@@ -926,6 +924,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, ...@@ -926,6 +924,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
else else
vfree(qp->r_rq.wq); vfree(qp->r_rq.wq);
ipath_free_qp(&dev->qp_table, qp); ipath_free_qp(&dev->qp_table, qp);
free_qpn(&dev->qp_table, qp->ibqp.qp_num);
bail_qp: bail_qp:
kfree(qp); kfree(qp);
bail_swq: bail_swq:
...@@ -947,41 +946,44 @@ int ipath_destroy_qp(struct ib_qp *ibqp) ...@@ -947,41 +946,44 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
{ {
struct ipath_qp *qp = to_iqp(ibqp); struct ipath_qp *qp = to_iqp(ibqp);
struct ipath_ibdev *dev = to_idev(ibqp->device); struct ipath_ibdev *dev = to_idev(ibqp->device);
unsigned long flags;
spin_lock_irqsave(&qp->s_lock, flags); /* Make sure HW and driver activity is stopped. */
qp->state = IB_QPS_ERR; spin_lock_irq(&qp->s_lock);
spin_unlock_irqrestore(&qp->s_lock, flags); if (qp->state != IB_QPS_RESET) {
spin_lock(&dev->n_qps_lock); qp->state = IB_QPS_RESET;
dev->n_qps_allocated--; spin_lock(&dev->pending_lock);
spin_unlock(&dev->n_qps_lock); if (!list_empty(&qp->timerwait))
list_del_init(&qp->timerwait);
if (!list_empty(&qp->piowait))
list_del_init(&qp->piowait);
spin_unlock(&dev->pending_lock);
qp->s_flags &= ~IPATH_S_ANY_WAIT;
spin_unlock_irq(&qp->s_lock);
/* Stop the sending tasklet */
tasklet_kill(&qp->s_task);
wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
} else
spin_unlock_irq(&qp->s_lock);
/* Stop the sending tasklet. */ ipath_free_qp(&dev->qp_table, qp);
tasklet_kill(&qp->s_task);
if (qp->s_tx) { if (qp->s_tx) {
atomic_dec(&qp->refcount); atomic_dec(&qp->refcount);
if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
kfree(qp->s_tx->txreq.map_addr); kfree(qp->s_tx->txreq.map_addr);
spin_lock_irq(&dev->pending_lock);
list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
spin_unlock_irq(&dev->pending_lock);
qp->s_tx = NULL;
} }
/* Make sure the QP isn't on the timeout list. */ wait_event(qp->wait, !atomic_read(&qp->refcount));
spin_lock_irqsave(&dev->pending_lock, flags);
if (!list_empty(&qp->timerwait))
list_del_init(&qp->timerwait);
if (!list_empty(&qp->piowait))
list_del_init(&qp->piowait);
if (qp->s_tx)
list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
spin_unlock_irqrestore(&dev->pending_lock, flags);
/* /* all user's cleaned up, mark it available */
* Make sure that the QP is not in the QPN table so receive free_qpn(&dev->qp_table, qp->ibqp.qp_num);
* interrupts will discard packets for this QP. XXX Also remove QP spin_lock(&dev->n_qps_lock);
* from multicast table. dev->n_qps_allocated--;
*/ spin_unlock(&dev->n_qps_lock);
if (atomic_read(&qp->refcount) != 0)
ipath_free_qp(&dev->qp_table, qp);
if (qp->ip) if (qp->ip)
kref_put(&qp->ip->ref, ipath_release_mmap_info); kref_put(&qp->ip->ref, ipath_release_mmap_info);
...@@ -1025,48 +1027,6 @@ int ipath_init_qp_table(struct ipath_ibdev *idev, int size) ...@@ -1025,48 +1027,6 @@ int ipath_init_qp_table(struct ipath_ibdev *idev, int size)
return ret; return ret;
} }
/**
* ipath_sqerror_qp - put a QP's send queue into an error state
* @qp: QP who's send queue will be put into an error state
* @wc: the WC responsible for putting the QP in this state
*
* Flushes the send work queue.
* The QP s_lock should be held and interrupts disabled.
*/
void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
ipath_dbg("Send queue error on QP%d/%d: err: %d\n",
qp->ibqp.qp_num, qp->remote_qpn, wc->status);
spin_lock(&dev->pending_lock);
if (!list_empty(&qp->timerwait))
list_del_init(&qp->timerwait);
if (!list_empty(&qp->piowait))
list_del_init(&qp->piowait);
spin_unlock(&dev->pending_lock);
ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
if (++qp->s_last >= qp->s_size)
qp->s_last = 0;
wc->status = IB_WC_WR_FLUSH_ERR;
while (qp->s_last != qp->s_head) {
wqe = get_swqe_ptr(qp, qp->s_last);
wc->wr_id = wqe->wr.wr_id;
wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
if (++qp->s_last >= qp->s_size)
qp->s_last = 0;
}
qp->s_cur = qp->s_tail = qp->s_head;
qp->state = IB_QPS_SQE;
}
/** /**
* ipath_get_credit - flush the send work queue of a QP * ipath_get_credit - flush the send work queue of a QP
* @qp: the qp who's send work queue to flush * @qp: the qp who's send work queue to flush
...@@ -1093,9 +1053,10 @@ void ipath_get_credit(struct ipath_qp *qp, u32 aeth) ...@@ -1093,9 +1053,10 @@ void ipath_get_credit(struct ipath_qp *qp, u32 aeth)
} }
/* Restart sending if it was blocked due to lack of credits. */ /* Restart sending if it was blocked due to lack of credits. */
if (qp->s_cur != qp->s_head && if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) &&
qp->s_cur != qp->s_head &&
(qp->s_lsn == (u32) -1 || (qp->s_lsn == (u32) -1 ||
ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn, ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn,
qp->s_lsn + 1) <= 0)) qp->s_lsn + 1) <= 0))
tasklet_hi_schedule(&qp->s_task); ipath_schedule_send(qp);
} }
...@@ -92,6 +92,10 @@ static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp, ...@@ -92,6 +92,10 @@ static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,
u32 bth0; u32 bth0;
u32 bth2; u32 bth2;
/* Don't send an ACK if we aren't supposed to. */
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
goto bail;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */ /* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5; hwords = 5;
...@@ -238,14 +242,25 @@ int ipath_make_rc_req(struct ipath_qp *qp) ...@@ -238,14 +242,25 @@ int ipath_make_rc_req(struct ipath_qp *qp)
ipath_make_rc_ack(dev, qp, ohdr, pmtu)) ipath_make_rc_ack(dev, qp, ohdr, pmtu))
goto done; goto done;
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
qp->s_rnr_timeout || qp->s_wait_credit) if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
goto bail; goto bail;
/* We are in the error state, flush the work request. */
if (qp->s_last == qp->s_head)
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (atomic_read(&qp->s_dma_busy)) {
qp->s_flags |= IPATH_S_WAIT_DMA;
goto bail;
}
wqe = get_swqe_ptr(qp, qp->s_last);
ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done;
}
/* Limit the number of packets sent without an ACK. */ /* Leave BUSY set until RNR timeout. */
if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { if (qp->s_rnr_timeout) {
qp->s_wait_credit = 1; qp->s_flags |= IPATH_S_WAITING;
dev->n_rc_stalls++;
goto bail; goto bail;
} }
...@@ -257,6 +272,9 @@ int ipath_make_rc_req(struct ipath_qp *qp) ...@@ -257,6 +272,9 @@ int ipath_make_rc_req(struct ipath_qp *qp)
wqe = get_swqe_ptr(qp, qp->s_cur); wqe = get_swqe_ptr(qp, qp->s_cur);
switch (qp->s_state) { switch (qp->s_state) {
default: default:
if (!(ib_ipath_state_ops[qp->state] &
IPATH_PROCESS_NEXT_SEND_OK))
goto bail;
/* /*
* Resend an old request or start a new one. * Resend an old request or start a new one.
* *
...@@ -294,8 +312,10 @@ int ipath_make_rc_req(struct ipath_qp *qp) ...@@ -294,8 +312,10 @@ int ipath_make_rc_req(struct ipath_qp *qp)
case IB_WR_SEND_WITH_IMM: case IB_WR_SEND_WITH_IMM:
/* If no credit, return. */ /* If no credit, return. */
if (qp->s_lsn != (u32) -1 && if (qp->s_lsn != (u32) -1 &&
ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
goto bail; goto bail;
}
wqe->lpsn = wqe->psn; wqe->lpsn = wqe->psn;
if (len > pmtu) { if (len > pmtu) {
wqe->lpsn += (len - 1) / pmtu; wqe->lpsn += (len - 1) / pmtu;
...@@ -325,8 +345,10 @@ int ipath_make_rc_req(struct ipath_qp *qp) ...@@ -325,8 +345,10 @@ int ipath_make_rc_req(struct ipath_qp *qp)
case IB_WR_RDMA_WRITE_WITH_IMM: case IB_WR_RDMA_WRITE_WITH_IMM:
/* If no credit, return. */ /* If no credit, return. */
if (qp->s_lsn != (u32) -1 && if (qp->s_lsn != (u32) -1 &&
ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
goto bail; goto bail;
}
ohdr->u.rc.reth.vaddr = ohdr->u.rc.reth.vaddr =
cpu_to_be64(wqe->wr.wr.rdma.remote_addr); cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
ohdr->u.rc.reth.rkey = ohdr->u.rc.reth.rkey =
...@@ -570,7 +592,11 @@ int ipath_make_rc_req(struct ipath_qp *qp) ...@@ -570,7 +592,11 @@ int ipath_make_rc_req(struct ipath_qp *qp)
ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2); ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);
done: done:
ret = 1; ret = 1;
goto unlock;
bail: bail:
qp->s_flags &= ~IPATH_S_BUSY;
unlock:
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, flags);
return ret; return ret;
} }
...@@ -606,7 +632,11 @@ static void send_rc_ack(struct ipath_qp *qp) ...@@ -606,7 +632,11 @@ static void send_rc_ack(struct ipath_qp *qp)
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, flags);
/* Don't try to send ACKs if the link isn't ACTIVE */
dd = dev->dd; dd = dev->dd;
if (!(dd->ipath_flags & IPATH_LINKACTIVE))
goto done;
piobuf = ipath_getpiobuf(dd, 0, NULL); piobuf = ipath_getpiobuf(dd, 0, NULL);
if (!piobuf) { if (!piobuf) {
/* /*
...@@ -668,15 +698,16 @@ static void send_rc_ack(struct ipath_qp *qp) ...@@ -668,15 +698,16 @@ static void send_rc_ack(struct ipath_qp *qp)
goto done; goto done;
queue_ack: queue_ack:
dev->n_rc_qacks++; if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) {
qp->s_flags |= IPATH_S_ACK_PENDING; dev->n_rc_qacks++;
qp->s_nak_state = qp->r_nak_state; qp->s_flags |= IPATH_S_ACK_PENDING;
qp->s_ack_psn = qp->r_ack_psn; qp->s_nak_state = qp->r_nak_state;
qp->s_ack_psn = qp->r_ack_psn;
/* Schedule the send tasklet. */
ipath_schedule_send(qp);
}
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, flags);
/* Call ipath_do_rc_send() in another thread. */
tasklet_hi_schedule(&qp->s_task);
done: done:
return; return;
} }
...@@ -735,7 +766,7 @@ static void reset_psn(struct ipath_qp *qp, u32 psn) ...@@ -735,7 +766,7 @@ static void reset_psn(struct ipath_qp *qp, u32 psn)
/* /*
* Set the state to restart in the middle of a request. * Set the state to restart in the middle of a request.
* Don't change the s_sge, s_cur_sge, or s_cur_size. * Don't change the s_sge, s_cur_sge, or s_cur_size.
* See ipath_do_rc_send(). * See ipath_make_rc_req().
*/ */
switch (opcode) { switch (opcode) {
case IB_WR_SEND: case IB_WR_SEND:
...@@ -771,27 +802,14 @@ static void reset_psn(struct ipath_qp *qp, u32 psn) ...@@ -771,27 +802,14 @@ static void reset_psn(struct ipath_qp *qp, u32 psn)
* *
* The QP s_lock should be held and interrupts disabled. * The QP s_lock should be held and interrupts disabled.
*/ */
void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) void ipath_restart_rc(struct ipath_qp *qp, u32 psn)
{ {
struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
struct ipath_ibdev *dev; struct ipath_ibdev *dev;
if (qp->s_retry == 0) { if (qp->s_retry == 0) {
wc->wr_id = wqe->wr.wr_id; ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
wc->status = IB_WC_RETRY_EXC_ERR; ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
wc->vendor_err = 0;
wc->byte_len = 0;
wc->qp = &qp->ibqp;
wc->imm_data = 0;
wc->src_qp = qp->remote_qpn;
wc->wc_flags = 0;
wc->pkey_index = 0;
wc->slid = qp->remote_ah_attr.dlid;
wc->sl = qp->remote_ah_attr.sl;
wc->dlid_path_bits = 0;
wc->port_num = 0;
ipath_sqerror_qp(qp, wc);
goto bail; goto bail;
} }
qp->s_retry--; qp->s_retry--;
...@@ -804,6 +822,8 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) ...@@ -804,6 +822,8 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
spin_lock(&dev->pending_lock); spin_lock(&dev->pending_lock);
if (!list_empty(&qp->timerwait)) if (!list_empty(&qp->timerwait))
list_del_init(&qp->timerwait); list_del_init(&qp->timerwait);
if (!list_empty(&qp->piowait))
list_del_init(&qp->piowait);
spin_unlock(&dev->pending_lock); spin_unlock(&dev->pending_lock);
if (wqe->wr.opcode == IB_WR_RDMA_READ) if (wqe->wr.opcode == IB_WR_RDMA_READ)
...@@ -812,7 +832,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) ...@@ -812,7 +832,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK; dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
reset_psn(qp, psn); reset_psn(qp, psn);
tasklet_hi_schedule(&qp->s_task); ipath_schedule_send(qp);
bail: bail:
return; return;
...@@ -820,13 +840,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) ...@@ -820,13 +840,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
static inline void update_last_psn(struct ipath_qp *qp, u32 psn) static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
{ {
if (qp->s_last_psn != psn) { qp->s_last_psn = psn;
qp->s_last_psn = psn;
if (qp->s_wait_credit) {
qp->s_wait_credit = 0;
tasklet_hi_schedule(&qp->s_task);
}
}
} }
/** /**
...@@ -845,6 +859,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, ...@@ -845,6 +859,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
{ {
struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ib_wc wc; struct ib_wc wc;
enum ib_wc_status status;
struct ipath_swqe *wqe; struct ipath_swqe *wqe;
int ret = 0; int ret = 0;
u32 ack_psn; u32 ack_psn;
...@@ -909,7 +924,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, ...@@ -909,7 +924,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
*/ */
update_last_psn(qp, wqe->psn - 1); update_last_psn(qp, wqe->psn - 1);
/* Retry this request. */ /* Retry this request. */
ipath_restart_rc(qp, wqe->psn, &wc); ipath_restart_rc(qp, wqe->psn);
/* /*
* No need to process the ACK/NAK since we are * No need to process the ACK/NAK since we are
* restarting an earlier request. * restarting an earlier request.
...@@ -925,32 +940,23 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, ...@@ -925,32 +940,23 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
qp->s_num_rd_atomic--; qp->s_num_rd_atomic--;
/* Restart sending task if fence is complete */ /* Restart sending task if fence is complete */
if ((qp->s_flags & IPATH_S_FENCE_PENDING) && if (((qp->s_flags & IPATH_S_FENCE_PENDING) &&
!qp->s_num_rd_atomic) { !qp->s_num_rd_atomic) ||
qp->s_flags &= ~IPATH_S_FENCE_PENDING; qp->s_flags & IPATH_S_RDMAR_PENDING)
tasklet_hi_schedule(&qp->s_task); ipath_schedule_send(qp);
} else if (qp->s_flags & IPATH_S_RDMAR_PENDING) {
qp->s_flags &= ~IPATH_S_RDMAR_PENDING;
tasklet_hi_schedule(&qp->s_task);
}
} }
/* Post a send completion queue entry if requested. */ /* Post a send completion queue entry if requested. */
if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) { (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
memset(&wc, 0, sizeof wc);
wc.wr_id = wqe->wr.wr_id; wc.wr_id = wqe->wr.wr_id;
wc.status = IB_WC_SUCCESS; wc.status = IB_WC_SUCCESS;
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
wc.vendor_err = 0;
wc.byte_len = wqe->length; wc.byte_len = wqe->length;
wc.imm_data = 0;
wc.qp = &qp->ibqp; wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn; wc.src_qp = qp->remote_qpn;
wc.wc_flags = 0;
wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid; wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl; wc.sl = qp->remote_ah_attr.sl;
wc.dlid_path_bits = 0;
wc.port_num = 0;
ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
} }
qp->s_retry = qp->s_retry_cnt; qp->s_retry = qp->s_retry_cnt;
...@@ -971,6 +977,8 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, ...@@ -971,6 +977,8 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
} else { } else {
if (++qp->s_last >= qp->s_size) if (++qp->s_last >= qp->s_size)
qp->s_last = 0; qp->s_last = 0;
if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur)
qp->s_draining = 0;
if (qp->s_last == qp->s_tail) if (qp->s_last == qp->s_tail)
break; break;
wqe = get_swqe_ptr(qp, qp->s_last); wqe = get_swqe_ptr(qp, qp->s_last);
...@@ -994,7 +1002,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, ...@@ -994,7 +1002,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
*/ */
if (ipath_cmp24(qp->s_psn, psn) <= 0) { if (ipath_cmp24(qp->s_psn, psn) <= 0) {
reset_psn(qp, psn + 1); reset_psn(qp, psn + 1);
tasklet_hi_schedule(&qp->s_task); ipath_schedule_send(qp);
} }
} else if (ipath_cmp24(qp->s_psn, psn) <= 0) { } else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
qp->s_state = OP(SEND_LAST); qp->s_state = OP(SEND_LAST);
...@@ -1012,7 +1020,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, ...@@ -1012,7 +1020,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
if (qp->s_last == qp->s_tail) if (qp->s_last == qp->s_tail)
goto bail; goto bail;
if (qp->s_rnr_retry == 0) { if (qp->s_rnr_retry == 0) {
wc.status = IB_WC_RNR_RETRY_EXC_ERR; status = IB_WC_RNR_RETRY_EXC_ERR;
goto class_b; goto class_b;
} }
if (qp->s_rnr_retry_cnt < 7) if (qp->s_rnr_retry_cnt < 7)
...@@ -1033,6 +1041,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, ...@@ -1033,6 +1041,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) & ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) &
IPATH_AETH_CREDIT_MASK]; IPATH_AETH_CREDIT_MASK];
ipath_insert_rnr_queue(qp); ipath_insert_rnr_queue(qp);
ipath_schedule_send(qp);
goto bail; goto bail;
case 3: /* NAK */ case 3: /* NAK */
...@@ -1050,37 +1059,25 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, ...@@ -1050,37 +1059,25 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
* RDMA READ response which terminates the RDMA * RDMA READ response which terminates the RDMA
* READ. * READ.
*/ */
ipath_restart_rc(qp, psn, &wc); ipath_restart_rc(qp, psn);
break; break;
case 1: /* Invalid Request */ case 1: /* Invalid Request */
wc.status = IB_WC_REM_INV_REQ_ERR; status = IB_WC_REM_INV_REQ_ERR;
dev->n_other_naks++; dev->n_other_naks++;
goto class_b; goto class_b;
case 2: /* Remote Access Error */ case 2: /* Remote Access Error */
wc.status = IB_WC_REM_ACCESS_ERR; status = IB_WC_REM_ACCESS_ERR;
dev->n_other_naks++; dev->n_other_naks++;
goto class_b; goto class_b;
case 3: /* Remote Operation Error */ case 3: /* Remote Operation Error */
wc.status = IB_WC_REM_OP_ERR; status = IB_WC_REM_OP_ERR;
dev->n_other_naks++; dev->n_other_naks++;
class_b: class_b:
wc.wr_id = wqe->wr.wr_id; ipath_send_complete(qp, wqe, status);
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
wc.vendor_err = 0;
wc.byte_len = 0;
wc.qp = &qp->ibqp;
wc.imm_data = 0;
wc.src_qp = qp->remote_qpn;
wc.wc_flags = 0;
wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
wc.dlid_path_bits = 0;
wc.port_num = 0;
ipath_sqerror_qp(qp, &wc);
break; break;
default: default:
...@@ -1126,8 +1123,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, ...@@ -1126,8 +1123,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
int header_in_data) int header_in_data)
{ {
struct ipath_swqe *wqe; struct ipath_swqe *wqe;
enum ib_wc_status status;
unsigned long flags; unsigned long flags;
struct ib_wc wc;
int diff; int diff;
u32 pad; u32 pad;
u32 aeth; u32 aeth;
...@@ -1135,6 +1132,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, ...@@ -1135,6 +1132,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
spin_lock_irqsave(&qp->s_lock, flags); spin_lock_irqsave(&qp->s_lock, flags);
/* Double check we can process this now that we hold the s_lock. */
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
goto ack_done;
/* Ignore invalid responses. */ /* Ignore invalid responses. */
if (ipath_cmp24(psn, qp->s_next_psn) >= 0) if (ipath_cmp24(psn, qp->s_next_psn) >= 0)
goto ack_done; goto ack_done;
...@@ -1159,6 +1160,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, ...@@ -1159,6 +1160,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
if (unlikely(qp->s_last == qp->s_tail)) if (unlikely(qp->s_last == qp->s_tail))
goto ack_done; goto ack_done;
wqe = get_swqe_ptr(qp, qp->s_last); wqe = get_swqe_ptr(qp, qp->s_last);
status = IB_WC_SUCCESS;
switch (opcode) { switch (opcode) {
case OP(ACKNOWLEDGE): case OP(ACKNOWLEDGE):
...@@ -1187,6 +1189,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, ...@@ -1187,6 +1189,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
wqe = get_swqe_ptr(qp, qp->s_last); wqe = get_swqe_ptr(qp, qp->s_last);
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
goto ack_op_err; goto ack_op_err;
qp->r_flags &= ~IPATH_R_RDMAR_SEQ;
/* /*
* If this is a response to a resent RDMA read, we * If this is a response to a resent RDMA read, we
* have to be careful to copy the data to the right * have to be careful to copy the data to the right
...@@ -1200,7 +1203,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, ...@@ -1200,7 +1203,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
/* no AETH, no ACK */ /* no AETH, no ACK */
if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
dev->n_rdma_seq++; dev->n_rdma_seq++;
ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); if (qp->r_flags & IPATH_R_RDMAR_SEQ)
goto ack_done;
qp->r_flags |= IPATH_R_RDMAR_SEQ;
ipath_restart_rc(qp, qp->s_last_psn + 1);
goto ack_done; goto ack_done;
} }
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
...@@ -1261,7 +1267,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, ...@@ -1261,7 +1267,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
/* ACKs READ req. */ /* ACKs READ req. */
if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
dev->n_rdma_seq++; dev->n_rdma_seq++;
ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); if (qp->r_flags & IPATH_R_RDMAR_SEQ)
goto ack_done;
qp->r_flags |= IPATH_R_RDMAR_SEQ;
ipath_restart_rc(qp, qp->s_last_psn + 1);
goto ack_done; goto ack_done;
} }
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
...@@ -1291,31 +1300,16 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, ...@@ -1291,31 +1300,16 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
goto ack_done; goto ack_done;
} }
ack_done:
spin_unlock_irqrestore(&qp->s_lock, flags);
goto bail;
ack_op_err: ack_op_err:
wc.status = IB_WC_LOC_QP_OP_ERR; status = IB_WC_LOC_QP_OP_ERR;
goto ack_err; goto ack_err;
ack_len_err: ack_len_err:
wc.status = IB_WC_LOC_LEN_ERR; status = IB_WC_LOC_LEN_ERR;
ack_err: ack_err:
wc.wr_id = wqe->wr.wr_id; ipath_send_complete(qp, wqe, status);
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
wc.vendor_err = 0; ack_done:
wc.byte_len = 0;
wc.imm_data = 0;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
wc.wc_flags = 0;
wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
wc.dlid_path_bits = 0;
wc.port_num = 0;
ipath_sqerror_qp(qp, &wc);
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, flags);
bail: bail:
return; return;
...@@ -1384,7 +1378,12 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, ...@@ -1384,7 +1378,12 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
psn &= IPATH_PSN_MASK; psn &= IPATH_PSN_MASK;
e = NULL; e = NULL;
old_req = 1; old_req = 1;
spin_lock_irqsave(&qp->s_lock, flags); spin_lock_irqsave(&qp->s_lock, flags);
/* Double check we can process this now that we hold the s_lock. */
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
goto unlock_done;
for (i = qp->r_head_ack_queue; ; i = prev) { for (i = qp->r_head_ack_queue; ; i = prev) {
if (i == qp->s_tail_ack_queue) if (i == qp->s_tail_ack_queue)
old_req = 0; old_req = 0;
...@@ -1512,7 +1511,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, ...@@ -1512,7 +1511,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
break; break;
} }
qp->r_nak_state = 0; qp->r_nak_state = 0;
tasklet_hi_schedule(&qp->s_task); ipath_schedule_send(qp);
unlock_done: unlock_done:
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, flags);
...@@ -1523,13 +1522,12 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, ...@@ -1523,13 +1522,12 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
return 0; return 0;
} }
static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
{ {
unsigned long flags; unsigned long flags;
int lastwqe; int lastwqe;
spin_lock_irqsave(&qp->s_lock, flags); spin_lock_irqsave(&qp->s_lock, flags);
qp->state = IB_QPS_ERR;
lastwqe = ipath_error_qp(qp, err); lastwqe = ipath_error_qp(qp, err);
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, flags);
...@@ -1545,18 +1543,15 @@ static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) ...@@ -1545,18 +1543,15 @@ static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n) static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
{ {
unsigned long flags;
unsigned next; unsigned next;
next = n + 1; next = n + 1;
if (next > IPATH_MAX_RDMA_ATOMIC) if (next > IPATH_MAX_RDMA_ATOMIC)
next = 0; next = 0;
spin_lock_irqsave(&qp->s_lock, flags);
if (n == qp->s_tail_ack_queue) { if (n == qp->s_tail_ack_queue) {
qp->s_tail_ack_queue = next; qp->s_tail_ack_queue = next;
qp->s_ack_state = OP(ACKNOWLEDGE); qp->s_ack_state = OP(ACKNOWLEDGE);
} }
spin_unlock_irqrestore(&qp->s_lock, flags);
} }
/** /**
...@@ -1585,6 +1580,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -1585,6 +1580,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int diff; int diff;
struct ib_reth *reth; struct ib_reth *reth;
int header_in_data; int header_in_data;
unsigned long flags;
/* Validate the SLID. See Ch. 9.6.1.5 */ /* Validate the SLID. See Ch. 9.6.1.5 */
if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
...@@ -1643,11 +1639,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -1643,11 +1639,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
opcode == OP(SEND_LAST) || opcode == OP(SEND_LAST) ||
opcode == OP(SEND_LAST_WITH_IMMEDIATE)) opcode == OP(SEND_LAST_WITH_IMMEDIATE))
break; break;
nack_inv: goto nack_inv;
ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR);
qp->r_nak_state = IB_NAK_INVALID_REQUEST;
qp->r_ack_psn = qp->r_psn;
goto send_ack;
case OP(RDMA_WRITE_FIRST): case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_MIDDLE): case OP(RDMA_WRITE_MIDDLE):
...@@ -1673,18 +1665,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -1673,18 +1665,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
break; break;
} }
wc.imm_data = 0; memset(&wc, 0, sizeof wc);
wc.wc_flags = 0;
/* OK, process the packet. */ /* OK, process the packet. */
switch (opcode) { switch (opcode) {
case OP(SEND_FIRST): case OP(SEND_FIRST):
if (!ipath_get_rwqe(qp, 0)) { if (!ipath_get_rwqe(qp, 0))
rnr_nak: goto rnr_nak;
qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
qp->r_ack_psn = qp->r_psn;
goto send_ack;
}
qp->r_rcv_len = 0; qp->r_rcv_len = 0;
/* FALLTHROUGH */ /* FALLTHROUGH */
case OP(SEND_MIDDLE): case OP(SEND_MIDDLE):
...@@ -1741,9 +1728,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -1741,9 +1728,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
goto nack_inv; goto nack_inv;
ipath_copy_sge(&qp->r_sge, data, tlen); ipath_copy_sge(&qp->r_sge, data, tlen);
qp->r_msn++; qp->r_msn++;
if (!qp->r_wrid_valid) if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
break; break;
qp->r_wrid_valid = 0;
wc.wr_id = qp->r_wr_id; wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS; wc.status = IB_WC_SUCCESS;
if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) || if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
...@@ -1751,14 +1737,10 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -1751,14 +1737,10 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
else else
wc.opcode = IB_WC_RECV; wc.opcode = IB_WC_RECV;
wc.vendor_err = 0;
wc.qp = &qp->ibqp; wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn; wc.src_qp = qp->remote_qpn;
wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid; wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl; wc.sl = qp->remote_ah_attr.sl;
wc.dlid_path_bits = 0;
wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */ /* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] & (ohdr->bth[0] &
...@@ -1819,9 +1801,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -1819,9 +1801,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
next = qp->r_head_ack_queue + 1; next = qp->r_head_ack_queue + 1;
if (next > IPATH_MAX_RDMA_ATOMIC) if (next > IPATH_MAX_RDMA_ATOMIC)
next = 0; next = 0;
spin_lock_irqsave(&qp->s_lock, flags);
/* Double check we can process this while holding the s_lock. */
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
goto unlock;
if (unlikely(next == qp->s_tail_ack_queue)) { if (unlikely(next == qp->s_tail_ack_queue)) {
if (!qp->s_ack_queue[next].sent) if (!qp->s_ack_queue[next].sent)
goto nack_inv; goto nack_inv_unlck;
ipath_update_ack_queue(qp, next); ipath_update_ack_queue(qp, next);
} }
e = &qp->s_ack_queue[qp->r_head_ack_queue]; e = &qp->s_ack_queue[qp->r_head_ack_queue];
...@@ -1842,7 +1828,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -1842,7 +1828,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr, ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
rkey, IB_ACCESS_REMOTE_READ); rkey, IB_ACCESS_REMOTE_READ);
if (unlikely(!ok)) if (unlikely(!ok))
goto nack_acc; goto nack_acc_unlck;
/* /*
* Update the next expected PSN. We add 1 later * Update the next expected PSN. We add 1 later
* below, so only add the remainder here. * below, so only add the remainder here.
...@@ -1869,13 +1855,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -1869,13 +1855,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
qp->r_psn++; qp->r_psn++;
qp->r_state = opcode; qp->r_state = opcode;
qp->r_nak_state = 0; qp->r_nak_state = 0;
barrier();
qp->r_head_ack_queue = next; qp->r_head_ack_queue = next;
/* Call ipath_do_rc_send() in another thread. */ /* Schedule the send tasklet. */
tasklet_hi_schedule(&qp->s_task); ipath_schedule_send(qp);
goto done; goto unlock;
} }
case OP(COMPARE_SWAP): case OP(COMPARE_SWAP):
...@@ -1894,9 +1879,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -1894,9 +1879,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
next = qp->r_head_ack_queue + 1; next = qp->r_head_ack_queue + 1;
if (next > IPATH_MAX_RDMA_ATOMIC) if (next > IPATH_MAX_RDMA_ATOMIC)
next = 0; next = 0;
spin_lock_irqsave(&qp->s_lock, flags);
/* Double check we can process this while holding the s_lock. */
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
goto unlock;
if (unlikely(next == qp->s_tail_ack_queue)) { if (unlikely(next == qp->s_tail_ack_queue)) {
if (!qp->s_ack_queue[next].sent) if (!qp->s_ack_queue[next].sent)
goto nack_inv; goto nack_inv_unlck;
ipath_update_ack_queue(qp, next); ipath_update_ack_queue(qp, next);
} }
if (!header_in_data) if (!header_in_data)
...@@ -1906,13 +1895,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -1906,13 +1895,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
be32_to_cpu(ateth->vaddr[1]); be32_to_cpu(ateth->vaddr[1]);
if (unlikely(vaddr & (sizeof(u64) - 1))) if (unlikely(vaddr & (sizeof(u64) - 1)))
goto nack_inv; goto nack_inv_unlck;
rkey = be32_to_cpu(ateth->rkey); rkey = be32_to_cpu(ateth->rkey);
/* Check rkey & NAK */ /* Check rkey & NAK */
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
sizeof(u64), vaddr, rkey, sizeof(u64), vaddr, rkey,
IB_ACCESS_REMOTE_ATOMIC))) IB_ACCESS_REMOTE_ATOMIC)))
goto nack_acc; goto nack_acc_unlck;
/* Perform atomic OP and save result. */ /* Perform atomic OP and save result. */
maddr = (atomic64_t *) qp->r_sge.sge.vaddr; maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
sdata = be64_to_cpu(ateth->swap_data); sdata = be64_to_cpu(ateth->swap_data);
...@@ -1929,13 +1918,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -1929,13 +1918,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
qp->r_psn++; qp->r_psn++;
qp->r_state = opcode; qp->r_state = opcode;
qp->r_nak_state = 0; qp->r_nak_state = 0;
barrier();
qp->r_head_ack_queue = next; qp->r_head_ack_queue = next;
/* Call ipath_do_rc_send() in another thread. */ /* Schedule the send tasklet. */
tasklet_hi_schedule(&qp->s_task); ipath_schedule_send(qp);
goto done; goto unlock;
} }
default: default:
...@@ -1951,14 +1939,31 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -1951,14 +1939,31 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
goto send_ack; goto send_ack;
goto done; goto done;
rnr_nak:
qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
qp->r_ack_psn = qp->r_psn;
goto send_ack;
nack_inv_unlck:
spin_unlock_irqrestore(&qp->s_lock, flags);
nack_inv:
ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
qp->r_nak_state = IB_NAK_INVALID_REQUEST;
qp->r_ack_psn = qp->r_psn;
goto send_ack;
nack_acc_unlck:
spin_unlock_irqrestore(&qp->s_lock, flags);
nack_acc: nack_acc:
ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR); ipath_rc_error(qp, IB_WC_LOC_PROT_ERR);
qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
qp->r_ack_psn = qp->r_psn; qp->r_ack_psn = qp->r_psn;
send_ack: send_ack:
send_rc_ack(qp); send_rc_ack(qp);
goto done;
unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
done: done:
return; return;
} }
/* /*
* Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
...@@ -78,6 +78,7 @@ const u32 ib_ipath_rnr_table[32] = { ...@@ -78,6 +78,7 @@ const u32 ib_ipath_rnr_table[32] = {
* ipath_insert_rnr_queue - put QP on the RNR timeout list for the device * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
* @qp: the QP * @qp: the QP
* *
* Called with the QP s_lock held and interrupts disabled.
* XXX Use a simple list for now. We might need a priority * XXX Use a simple list for now. We might need a priority
* queue if we have lots of QPs waiting for RNR timeouts * queue if we have lots of QPs waiting for RNR timeouts
* but that should be rare. * but that should be rare.
...@@ -85,9 +86,9 @@ const u32 ib_ipath_rnr_table[32] = { ...@@ -85,9 +86,9 @@ const u32 ib_ipath_rnr_table[32] = {
void ipath_insert_rnr_queue(struct ipath_qp *qp) void ipath_insert_rnr_queue(struct ipath_qp *qp)
{ {
struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
unsigned long flags;
spin_lock_irqsave(&dev->pending_lock, flags); /* We already did a spin_lock_irqsave(), so just use spin_lock */
spin_lock(&dev->pending_lock);
if (list_empty(&dev->rnrwait)) if (list_empty(&dev->rnrwait))
list_add(&qp->timerwait, &dev->rnrwait); list_add(&qp->timerwait, &dev->rnrwait);
else { else {
...@@ -109,7 +110,7 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp) ...@@ -109,7 +110,7 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
nqp->s_rnr_timeout -= qp->s_rnr_timeout; nqp->s_rnr_timeout -= qp->s_rnr_timeout;
list_add(&qp->timerwait, l); list_add(&qp->timerwait, l);
} }
spin_unlock_irqrestore(&dev->pending_lock, flags); spin_unlock(&dev->pending_lock);
} }
/** /**
...@@ -140,20 +141,11 @@ int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe, ...@@ -140,20 +141,11 @@ int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
goto bail; goto bail;
bad_lkey: bad_lkey:
memset(&wc, 0, sizeof(wc));
wc.wr_id = wqe->wr_id; wc.wr_id = wqe->wr_id;
wc.status = IB_WC_LOC_PROT_ERR; wc.status = IB_WC_LOC_PROT_ERR;
wc.opcode = IB_WC_RECV; wc.opcode = IB_WC_RECV;
wc.vendor_err = 0;
wc.byte_len = 0;
wc.imm_data = 0;
wc.qp = &qp->ibqp; wc.qp = &qp->ibqp;
wc.src_qp = 0;
wc.wc_flags = 0;
wc.pkey_index = 0;
wc.slid = 0;
wc.sl = 0;
wc.dlid_path_bits = 0;
wc.port_num = 0;
/* Signal solicited completion event. */ /* Signal solicited completion event. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
ret = 0; ret = 0;
...@@ -194,6 +186,11 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) ...@@ -194,6 +186,11 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
} }
spin_lock_irqsave(&rq->lock, flags); spin_lock_irqsave(&rq->lock, flags);
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
ret = 0;
goto unlock;
}
wq = rq->wq; wq = rq->wq;
tail = wq->tail; tail = wq->tail;
/* Validate tail before using it since it is user writable. */ /* Validate tail before using it since it is user writable. */
...@@ -201,9 +198,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) ...@@ -201,9 +198,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
tail = 0; tail = 0;
do { do {
if (unlikely(tail == wq->head)) { if (unlikely(tail == wq->head)) {
spin_unlock_irqrestore(&rq->lock, flags);
ret = 0; ret = 0;
goto bail; goto unlock;
} }
/* Make sure entry is read after head index is read. */ /* Make sure entry is read after head index is read. */
smp_rmb(); smp_rmb();
...@@ -216,7 +212,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) ...@@ -216,7 +212,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
wq->tail = tail; wq->tail = tail;
ret = 1; ret = 1;
qp->r_wrid_valid = 1; set_bit(IPATH_R_WRID_VALID, &qp->r_aflags);
if (handler) { if (handler) {
u32 n; u32 n;
...@@ -243,8 +239,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) ...@@ -243,8 +239,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
goto bail; goto bail;
} }
} }
unlock:
spin_unlock_irqrestore(&rq->lock, flags); spin_unlock_irqrestore(&rq->lock, flags);
bail: bail:
return ret; return ret;
} }
...@@ -270,38 +266,63 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp) ...@@ -270,38 +266,63 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
struct ib_wc wc; struct ib_wc wc;
u64 sdata; u64 sdata;
atomic64_t *maddr; atomic64_t *maddr;
enum ib_wc_status send_status;
/*
* Note that we check the responder QP state after
* checking the requester's state.
*/
qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
if (!qp) {
dev->n_pkt_drops++;
return;
}
again:
spin_lock_irqsave(&sqp->s_lock, flags); spin_lock_irqsave(&sqp->s_lock, flags);
if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) || /* Return if we are already busy processing a work request. */
sqp->s_rnr_timeout) { if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
spin_unlock_irqrestore(&sqp->s_lock, flags); !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
goto done; goto unlock;
}
/* Get the next send request. */ sqp->s_flags |= IPATH_S_BUSY;
if (sqp->s_last == sqp->s_head) {
/* Send work queue is empty. */ again:
spin_unlock_irqrestore(&sqp->s_lock, flags); if (sqp->s_last == sqp->s_head)
goto done; goto clr_busy;
wqe = get_swqe_ptr(sqp, sqp->s_last);
/* Return if it is not OK to start a new work reqeust. */
if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND))
goto clr_busy;
/* We are in the error state, flush the work request. */
send_status = IB_WC_WR_FLUSH_ERR;
goto flush_send;
} }
/* /*
* We can rely on the entry not changing without the s_lock * We can rely on the entry not changing without the s_lock
* being held until we update s_last. * being held until we update s_last.
* We increment s_cur to indicate s_last is in progress.
*/ */
wqe = get_swqe_ptr(sqp, sqp->s_last); if (sqp->s_last == sqp->s_cur) {
if (++sqp->s_cur >= sqp->s_size)
sqp->s_cur = 0;
}
spin_unlock_irqrestore(&sqp->s_lock, flags); spin_unlock_irqrestore(&sqp->s_lock, flags);
wc.wc_flags = 0; if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
wc.imm_data = 0; dev->n_pkt_drops++;
/*
* For RC, the requester would timeout and retry so
* shortcut the timeouts and just signal too many retries.
*/
if (sqp->ibqp.qp_type == IB_QPT_RC)
send_status = IB_WC_RETRY_EXC_ERR;
else
send_status = IB_WC_SUCCESS;
goto serr;
}
memset(&wc, 0, sizeof wc);
send_status = IB_WC_SUCCESS;
sqp->s_sge.sge = wqe->sg_list[0]; sqp->s_sge.sge = wqe->sg_list[0];
sqp->s_sge.sg_list = wqe->sg_list + 1; sqp->s_sge.sg_list = wqe->sg_list + 1;
...@@ -313,75 +334,33 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp) ...@@ -313,75 +334,33 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
wc.imm_data = wqe->wr.ex.imm_data; wc.imm_data = wqe->wr.ex.imm_data;
/* FALLTHROUGH */ /* FALLTHROUGH */
case IB_WR_SEND: case IB_WR_SEND:
if (!ipath_get_rwqe(qp, 0)) { if (!ipath_get_rwqe(qp, 0))
rnr_nak: goto rnr_nak;
/* Handle RNR NAK */
if (qp->ibqp.qp_type == IB_QPT_UC)
goto send_comp;
if (sqp->s_rnr_retry == 0) {
wc.status = IB_WC_RNR_RETRY_EXC_ERR;
goto err;
}
if (sqp->s_rnr_retry_cnt < 7)
sqp->s_rnr_retry--;
dev->n_rnr_naks++;
sqp->s_rnr_timeout =
ib_ipath_rnr_table[qp->r_min_rnr_timer];
ipath_insert_rnr_queue(sqp);
goto done;
}
break; break;
case IB_WR_RDMA_WRITE_WITH_IMM: case IB_WR_RDMA_WRITE_WITH_IMM:
if (unlikely(!(qp->qp_access_flags & if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
IB_ACCESS_REMOTE_WRITE))) { goto inv_err;
wc.status = IB_WC_REM_INV_REQ_ERR;
goto err;
}
wc.wc_flags = IB_WC_WITH_IMM; wc.wc_flags = IB_WC_WITH_IMM;
wc.imm_data = wqe->wr.ex.imm_data; wc.imm_data = wqe->wr.ex.imm_data;
if (!ipath_get_rwqe(qp, 1)) if (!ipath_get_rwqe(qp, 1))
goto rnr_nak; goto rnr_nak;
/* FALLTHROUGH */ /* FALLTHROUGH */
case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE:
if (unlikely(!(qp->qp_access_flags & if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
IB_ACCESS_REMOTE_WRITE))) { goto inv_err;
wc.status = IB_WC_REM_INV_REQ_ERR;
goto err;
}
if (wqe->length == 0) if (wqe->length == 0)
break; break;
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length, if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey, wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_WRITE))) { IB_ACCESS_REMOTE_WRITE)))
acc_err: goto acc_err;
wc.status = IB_WC_REM_ACCESS_ERR;
err:
wc.wr_id = wqe->wr.wr_id;
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
wc.vendor_err = 0;
wc.byte_len = 0;
wc.qp = &sqp->ibqp;
wc.src_qp = sqp->remote_qpn;
wc.pkey_index = 0;
wc.slid = sqp->remote_ah_attr.dlid;
wc.sl = sqp->remote_ah_attr.sl;
wc.dlid_path_bits = 0;
wc.port_num = 0;
spin_lock_irqsave(&sqp->s_lock, flags);
ipath_sqerror_qp(sqp, &wc);
spin_unlock_irqrestore(&sqp->s_lock, flags);
goto done;
}
break; break;
case IB_WR_RDMA_READ: case IB_WR_RDMA_READ:
if (unlikely(!(qp->qp_access_flags & if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
IB_ACCESS_REMOTE_READ))) { goto inv_err;
wc.status = IB_WC_REM_INV_REQ_ERR;
goto err;
}
if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey, wqe->wr.wr.rdma.rkey,
...@@ -394,11 +373,8 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp) ...@@ -394,11 +373,8 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD: case IB_WR_ATOMIC_FETCH_AND_ADD:
if (unlikely(!(qp->qp_access_flags & if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
IB_ACCESS_REMOTE_ATOMIC))) { goto inv_err;
wc.status = IB_WC_REM_INV_REQ_ERR;
goto err;
}
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
wqe->wr.wr.atomic.remote_addr, wqe->wr.wr.atomic.remote_addr,
wqe->wr.wr.atomic.rkey, wqe->wr.wr.atomic.rkey,
...@@ -415,7 +391,8 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp) ...@@ -415,7 +391,8 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
goto send_comp; goto send_comp;
default: default:
goto done; send_status = IB_WC_LOC_QP_OP_ERR;
goto serr;
} }
sge = &sqp->s_sge.sge; sge = &sqp->s_sge.sge;
...@@ -448,8 +425,7 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp) ...@@ -448,8 +425,7 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
sqp->s_len -= len; sqp->s_len -= len;
} }
if (wqe->wr.opcode == IB_WR_RDMA_WRITE || if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
wqe->wr.opcode == IB_WR_RDMA_READ)
goto send_comp; goto send_comp;
if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
...@@ -458,33 +434,89 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp) ...@@ -458,33 +434,89 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
wc.opcode = IB_WC_RECV; wc.opcode = IB_WC_RECV;
wc.wr_id = qp->r_wr_id; wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS; wc.status = IB_WC_SUCCESS;
wc.vendor_err = 0;
wc.byte_len = wqe->length; wc.byte_len = wqe->length;
wc.qp = &qp->ibqp; wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn; wc.src_qp = qp->remote_qpn;
wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid; wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl; wc.sl = qp->remote_ah_attr.sl;
wc.dlid_path_bits = 0;
wc.port_num = 1; wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */ /* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
wqe->wr.send_flags & IB_SEND_SOLICITED); wqe->wr.send_flags & IB_SEND_SOLICITED);
send_comp: send_comp:
spin_lock_irqsave(&sqp->s_lock, flags);
flush_send:
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
ipath_send_complete(sqp, wqe, IB_WC_SUCCESS); ipath_send_complete(sqp, wqe, send_status);
goto again; goto again;
rnr_nak:
/* Handle RNR NAK */
if (qp->ibqp.qp_type == IB_QPT_UC)
goto send_comp;
/*
* Note: we don't need the s_lock held since the BUSY flag
* makes this single threaded.
*/
if (sqp->s_rnr_retry == 0) {
send_status = IB_WC_RNR_RETRY_EXC_ERR;
goto serr;
}
if (sqp->s_rnr_retry_cnt < 7)
sqp->s_rnr_retry--;
spin_lock_irqsave(&sqp->s_lock, flags);
if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK))
goto clr_busy;
sqp->s_flags |= IPATH_S_WAITING;
dev->n_rnr_naks++;
sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer];
ipath_insert_rnr_queue(sqp);
goto clr_busy;
inv_err:
send_status = IB_WC_REM_INV_REQ_ERR;
wc.status = IB_WC_LOC_QP_OP_ERR;
goto err;
acc_err:
send_status = IB_WC_REM_ACCESS_ERR;
wc.status = IB_WC_LOC_PROT_ERR;
err:
/* responder goes to error state */
ipath_rc_error(qp, wc.status);
serr:
spin_lock_irqsave(&sqp->s_lock, flags);
ipath_send_complete(sqp, wqe, send_status);
if (sqp->ibqp.qp_type == IB_QPT_RC) {
int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
sqp->s_flags &= ~IPATH_S_BUSY;
spin_unlock_irqrestore(&sqp->s_lock, flags);
if (lastwqe) {
struct ib_event ev;
ev.device = sqp->ibqp.device;
ev.element.qp = &sqp->ibqp;
ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
}
goto done;
}
clr_busy:
sqp->s_flags &= ~IPATH_S_BUSY;
unlock:
spin_unlock_irqrestore(&sqp->s_lock, flags);
done: done:
if (atomic_dec_and_test(&qp->refcount)) if (qp && atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait); wake_up(&qp->wait);
} }
static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp) static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
{ {
if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) || if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
qp->ibqp.qp_type == IB_QPT_SMI) { qp->ibqp.qp_type == IB_QPT_SMI) {
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
...@@ -502,26 +534,36 @@ static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp) ...@@ -502,26 +534,36 @@ static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
* @dev: the device we ran out of buffers on * @dev: the device we ran out of buffers on
* *
* Called when we run out of PIO buffers. * Called when we run out of PIO buffers.
* If we are now in the error state, return zero to flush the
* send work request.
*/ */
static void ipath_no_bufs_available(struct ipath_qp *qp, static int ipath_no_bufs_available(struct ipath_qp *qp,
struct ipath_ibdev *dev) struct ipath_ibdev *dev)
{ {
unsigned long flags; unsigned long flags;
int ret = 1;
/* /*
* Note that as soon as want_buffer() is called and * Note that as soon as want_buffer() is called and
* possibly before it returns, ipath_ib_piobufavail() * possibly before it returns, ipath_ib_piobufavail()
* could be called. If we are still in the tasklet function, * could be called. Therefore, put QP on the piowait list before
* tasklet_hi_schedule() will not call us until the next time * enabling the PIO avail interrupt.
* tasklet_hi_schedule() is called.
* We leave the busy flag set so that another post send doesn't
* try to put the same QP on the piowait list again.
*/ */
spin_lock_irqsave(&dev->pending_lock, flags); spin_lock_irqsave(&qp->s_lock, flags);
list_add_tail(&qp->piowait, &dev->piowait); if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
spin_unlock_irqrestore(&dev->pending_lock, flags); dev->n_piowait++;
want_buffer(dev->dd, qp); qp->s_flags |= IPATH_S_WAITING;
dev->n_piowait++; qp->s_flags &= ~IPATH_S_BUSY;
spin_lock(&dev->pending_lock);
if (list_empty(&qp->piowait))
list_add_tail(&qp->piowait, &dev->piowait);
spin_unlock(&dev->pending_lock);
} else
ret = 0;
spin_unlock_irqrestore(&qp->s_lock, flags);
if (ret)
want_buffer(dev->dd, qp);
return ret;
} }
/** /**
...@@ -597,15 +639,13 @@ void ipath_do_send(unsigned long data) ...@@ -597,15 +639,13 @@ void ipath_do_send(unsigned long data)
struct ipath_qp *qp = (struct ipath_qp *)data; struct ipath_qp *qp = (struct ipath_qp *)data;
struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
int (*make_req)(struct ipath_qp *qp); int (*make_req)(struct ipath_qp *qp);
unsigned long flags;
if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
goto bail;
if ((qp->ibqp.qp_type == IB_QPT_RC || if ((qp->ibqp.qp_type == IB_QPT_RC ||
qp->ibqp.qp_type == IB_QPT_UC) && qp->ibqp.qp_type == IB_QPT_UC) &&
qp->remote_ah_attr.dlid == dev->dd->ipath_lid) { qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
ipath_ruc_loopback(qp); ipath_ruc_loopback(qp);
goto clear; goto bail;
} }
if (qp->ibqp.qp_type == IB_QPT_RC) if (qp->ibqp.qp_type == IB_QPT_RC)
...@@ -615,6 +655,19 @@ void ipath_do_send(unsigned long data) ...@@ -615,6 +655,19 @@ void ipath_do_send(unsigned long data)
else else
make_req = ipath_make_ud_req; make_req = ipath_make_ud_req;
spin_lock_irqsave(&qp->s_lock, flags);
/* Return if we are already busy processing a work request. */
if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) {
spin_unlock_irqrestore(&qp->s_lock, flags);
goto bail;
}
qp->s_flags |= IPATH_S_BUSY;
spin_unlock_irqrestore(&qp->s_lock, flags);
again: again:
/* Check for a constructed packet to be sent. */ /* Check for a constructed packet to be sent. */
if (qp->s_hdrwords != 0) { if (qp->s_hdrwords != 0) {
...@@ -624,8 +677,8 @@ void ipath_do_send(unsigned long data) ...@@ -624,8 +677,8 @@ void ipath_do_send(unsigned long data)
*/ */
if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords, if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
qp->s_cur_sge, qp->s_cur_size)) { qp->s_cur_sge, qp->s_cur_size)) {
ipath_no_bufs_available(qp, dev); if (ipath_no_bufs_available(qp, dev))
goto bail; goto bail;
} }
dev->n_unicast_xmit++; dev->n_unicast_xmit++;
/* Record that we sent the packet and s_hdr is empty. */ /* Record that we sent the packet and s_hdr is empty. */
...@@ -634,16 +687,20 @@ void ipath_do_send(unsigned long data) ...@@ -634,16 +687,20 @@ void ipath_do_send(unsigned long data)
if (make_req(qp)) if (make_req(qp))
goto again; goto again;
clear:
clear_bit(IPATH_S_BUSY, &qp->s_busy);
bail:; bail:;
} }
/*
* This should be called with s_lock held.
*/
void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
enum ib_wc_status status) enum ib_wc_status status)
{ {
unsigned long flags; u32 old_last, last;
u32 last;
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
return;
/* See ch. 11.2.4.1 and 10.7.3.1 */ /* See ch. 11.2.4.1 and 10.7.3.1 */
if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
...@@ -651,27 +708,25 @@ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, ...@@ -651,27 +708,25 @@ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
status != IB_WC_SUCCESS) { status != IB_WC_SUCCESS) {
struct ib_wc wc; struct ib_wc wc;
memset(&wc, 0, sizeof wc);
wc.wr_id = wqe->wr.wr_id; wc.wr_id = wqe->wr.wr_id;
wc.status = status; wc.status = status;
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
wc.vendor_err = 0;
wc.byte_len = wqe->length;
wc.imm_data = 0;
wc.qp = &qp->ibqp; wc.qp = &qp->ibqp;
wc.src_qp = 0; if (status == IB_WC_SUCCESS)
wc.wc_flags = 0; wc.byte_len = wqe->length;
wc.pkey_index = 0; ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
wc.slid = 0; status != IB_WC_SUCCESS);
wc.sl = 0;
wc.dlid_path_bits = 0;
wc.port_num = 0;
ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
} }
spin_lock_irqsave(&qp->s_lock, flags); old_last = last = qp->s_last;
last = qp->s_last;
if (++last >= qp->s_size) if (++last >= qp->s_size)
last = 0; last = 0;
qp->s_last = last; qp->s_last = last;
spin_unlock_irqrestore(&qp->s_lock, flags); if (qp->s_cur == old_last)
qp->s_cur = last;
if (qp->s_tail == old_last)
qp->s_tail = last;
if (qp->state == IB_QPS_SQD && last == qp->s_cur)
qp->s_draining = 0;
} }
/* /*
* Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
...@@ -47,14 +47,30 @@ int ipath_make_uc_req(struct ipath_qp *qp) ...@@ -47,14 +47,30 @@ int ipath_make_uc_req(struct ipath_qp *qp)
{ {
struct ipath_other_headers *ohdr; struct ipath_other_headers *ohdr;
struct ipath_swqe *wqe; struct ipath_swqe *wqe;
unsigned long flags;
u32 hwords; u32 hwords;
u32 bth0; u32 bth0;
u32 len; u32 len;
u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
int ret = 0; int ret = 0;
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) spin_lock_irqsave(&qp->s_lock, flags);
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
if (qp->s_last == qp->s_head)
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (atomic_read(&qp->s_dma_busy)) {
qp->s_flags |= IPATH_S_WAIT_DMA;
goto bail;
}
wqe = get_swqe_ptr(qp, qp->s_last);
ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done; goto done;
}
ohdr = &qp->s_hdr.u.oth; ohdr = &qp->s_hdr.u.oth;
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
...@@ -69,9 +85,12 @@ int ipath_make_uc_req(struct ipath_qp *qp) ...@@ -69,9 +85,12 @@ int ipath_make_uc_req(struct ipath_qp *qp)
qp->s_wqe = NULL; qp->s_wqe = NULL;
switch (qp->s_state) { switch (qp->s_state) {
default: default:
if (!(ib_ipath_state_ops[qp->state] &
IPATH_PROCESS_NEXT_SEND_OK))
goto bail;
/* Check if send work queue is empty. */ /* Check if send work queue is empty. */
if (qp->s_cur == qp->s_head) if (qp->s_cur == qp->s_head)
goto done; goto bail;
/* /*
* Start a new request. * Start a new request.
*/ */
...@@ -134,7 +153,7 @@ int ipath_make_uc_req(struct ipath_qp *qp) ...@@ -134,7 +153,7 @@ int ipath_make_uc_req(struct ipath_qp *qp)
break; break;
default: default:
goto done; goto bail;
} }
break; break;
...@@ -194,9 +213,14 @@ int ipath_make_uc_req(struct ipath_qp *qp) ...@@ -194,9 +213,14 @@ int ipath_make_uc_req(struct ipath_qp *qp)
ipath_make_ruc_header(to_idev(qp->ibqp.device), ipath_make_ruc_header(to_idev(qp->ibqp.device),
qp, ohdr, bth0 | (qp->s_state << 24), qp, ohdr, bth0 | (qp->s_state << 24),
qp->s_next_psn++ & IPATH_PSN_MASK); qp->s_next_psn++ & IPATH_PSN_MASK);
done:
ret = 1; ret = 1;
goto unlock;
done: bail:
qp->s_flags &= ~IPATH_S_BUSY;
unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret; return ret;
} }
...@@ -258,8 +282,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -258,8 +282,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
*/ */
opcode = be32_to_cpu(ohdr->bth[0]) >> 24; opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
wc.imm_data = 0; memset(&wc, 0, sizeof wc);
wc.wc_flags = 0;
/* Compare the PSN verses the expected PSN. */ /* Compare the PSN verses the expected PSN. */
if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) { if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
...@@ -322,8 +345,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -322,8 +345,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
case OP(SEND_ONLY): case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE): case OP(SEND_ONLY_WITH_IMMEDIATE):
send_first: send_first:
if (qp->r_reuse_sge) { if (qp->r_flags & IPATH_R_REUSE_SGE) {
qp->r_reuse_sge = 0; qp->r_flags &= ~IPATH_R_REUSE_SGE;
qp->r_sge = qp->s_rdma_read_sge; qp->r_sge = qp->s_rdma_read_sge;
} else if (!ipath_get_rwqe(qp, 0)) { } else if (!ipath_get_rwqe(qp, 0)) {
dev->n_pkt_drops++; dev->n_pkt_drops++;
...@@ -340,13 +363,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -340,13 +363,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
case OP(SEND_MIDDLE): case OP(SEND_MIDDLE):
/* Check for invalid length PMTU or posted rwqe len. */ /* Check for invalid length PMTU or posted rwqe len. */
if (unlikely(tlen != (hdrsize + pmtu + 4))) { if (unlikely(tlen != (hdrsize + pmtu + 4))) {
qp->r_reuse_sge = 1; qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++; dev->n_pkt_drops++;
goto done; goto done;
} }
qp->r_rcv_len += pmtu; qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len)) { if (unlikely(qp->r_rcv_len > qp->r_len)) {
qp->r_reuse_sge = 1; qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++; dev->n_pkt_drops++;
goto done; goto done;
} }
...@@ -372,7 +395,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -372,7 +395,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
/* Check for invalid length. */ /* Check for invalid length. */
/* XXX LAST len should be >= 1 */ /* XXX LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4))) { if (unlikely(tlen < (hdrsize + pad + 4))) {
qp->r_reuse_sge = 1; qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++; dev->n_pkt_drops++;
goto done; goto done;
} }
...@@ -380,7 +403,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -380,7 +403,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
tlen -= (hdrsize + pad + 4); tlen -= (hdrsize + pad + 4);
wc.byte_len = tlen + qp->r_rcv_len; wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len)) { if (unlikely(wc.byte_len > qp->r_len)) {
qp->r_reuse_sge = 1; qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++; dev->n_pkt_drops++;
goto done; goto done;
} }
...@@ -390,14 +413,10 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -390,14 +413,10 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
wc.wr_id = qp->r_wr_id; wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS; wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV; wc.opcode = IB_WC_RECV;
wc.vendor_err = 0;
wc.qp = &qp->ibqp; wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn; wc.src_qp = qp->remote_qpn;
wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid; wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl; wc.sl = qp->remote_ah_attr.sl;
wc.dlid_path_bits = 0;
wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */ /* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] & (ohdr->bth[0] &
...@@ -488,8 +507,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -488,8 +507,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
dev->n_pkt_drops++; dev->n_pkt_drops++;
goto done; goto done;
} }
if (qp->r_reuse_sge) if (qp->r_flags & IPATH_R_REUSE_SGE)
qp->r_reuse_sge = 0; qp->r_flags &= ~IPATH_R_REUSE_SGE;
else if (!ipath_get_rwqe(qp, 1)) { else if (!ipath_get_rwqe(qp, 1)) {
dev->n_pkt_drops++; dev->n_pkt_drops++;
goto done; goto done;
......
...@@ -65,9 +65,9 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) ...@@ -65,9 +65,9 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
u32 length; u32 length;
qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn); qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn);
if (!qp) { if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
dev->n_pkt_drops++; dev->n_pkt_drops++;
goto send_comp; goto done;
} }
rsge.sg_list = NULL; rsge.sg_list = NULL;
...@@ -91,14 +91,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) ...@@ -91,14 +91,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
* present on the wire. * present on the wire.
*/ */
length = swqe->length; length = swqe->length;
memset(&wc, 0, sizeof wc);
wc.byte_len = length + sizeof(struct ib_grh); wc.byte_len = length + sizeof(struct ib_grh);
if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) { if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
wc.wc_flags = IB_WC_WITH_IMM; wc.wc_flags = IB_WC_WITH_IMM;
wc.imm_data = swqe->wr.ex.imm_data; wc.imm_data = swqe->wr.ex.imm_data;
} else {
wc.wc_flags = 0;
wc.imm_data = 0;
} }
/* /*
...@@ -229,7 +227,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) ...@@ -229,7 +227,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
} }
wc.status = IB_WC_SUCCESS; wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV; wc.opcode = IB_WC_RECV;
wc.vendor_err = 0;
wc.qp = &qp->ibqp; wc.qp = &qp->ibqp;
wc.src_qp = sqp->ibqp.qp_num; wc.src_qp = sqp->ibqp.qp_num;
/* XXX do we know which pkey matched? Only needed for GSI. */ /* XXX do we know which pkey matched? Only needed for GSI. */
...@@ -248,8 +245,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) ...@@ -248,8 +245,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
kfree(rsge.sg_list); kfree(rsge.sg_list);
if (atomic_dec_and_test(&qp->refcount)) if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait); wake_up(&qp->wait);
send_comp: done:;
ipath_send_complete(sqp, swqe, IB_WC_SUCCESS);
} }
/** /**
...@@ -264,6 +260,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) ...@@ -264,6 +260,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
struct ipath_other_headers *ohdr; struct ipath_other_headers *ohdr;
struct ib_ah_attr *ah_attr; struct ib_ah_attr *ah_attr;
struct ipath_swqe *wqe; struct ipath_swqe *wqe;
unsigned long flags;
u32 nwords; u32 nwords;
u32 extra_bytes; u32 extra_bytes;
u32 bth0; u32 bth0;
...@@ -271,13 +268,30 @@ int ipath_make_ud_req(struct ipath_qp *qp) ...@@ -271,13 +268,30 @@ int ipath_make_ud_req(struct ipath_qp *qp)
u16 lid; u16 lid;
int ret = 0; int ret = 0;
if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))) spin_lock_irqsave(&qp->s_lock, flags);
goto bail;
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
if (qp->s_last == qp->s_head)
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (atomic_read(&qp->s_dma_busy)) {
qp->s_flags |= IPATH_S_WAIT_DMA;
goto bail;
}
wqe = get_swqe_ptr(qp, qp->s_last);
ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done;
}
if (qp->s_cur == qp->s_head) if (qp->s_cur == qp->s_head)
goto bail; goto bail;
wqe = get_swqe_ptr(qp, qp->s_cur); wqe = get_swqe_ptr(qp, qp->s_cur);
if (++qp->s_cur >= qp->s_size)
qp->s_cur = 0;
/* Construct the header. */ /* Construct the header. */
ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
...@@ -288,10 +302,23 @@ int ipath_make_ud_req(struct ipath_qp *qp) ...@@ -288,10 +302,23 @@ int ipath_make_ud_req(struct ipath_qp *qp)
dev->n_unicast_xmit++; dev->n_unicast_xmit++;
} else { } else {
dev->n_unicast_xmit++; dev->n_unicast_xmit++;
lid = ah_attr->dlid & lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1);
~((1 << dev->dd->ipath_lmc) - 1);
if (unlikely(lid == dev->dd->ipath_lid)) { if (unlikely(lid == dev->dd->ipath_lid)) {
/*
* If DMAs are in progress, we can't generate
* a completion for the loopback packet since
* it would be out of order.
* XXX Instead of waiting, we could queue a
* zero length descriptor so we get a callback.
*/
if (atomic_read(&qp->s_dma_busy)) {
qp->s_flags |= IPATH_S_WAIT_DMA;
goto bail;
}
spin_unlock_irqrestore(&qp->s_lock, flags);
ipath_ud_loopback(qp, wqe); ipath_ud_loopback(qp, wqe);
spin_lock_irqsave(&qp->s_lock, flags);
ipath_send_complete(qp, wqe, IB_WC_SUCCESS);
goto done; goto done;
} }
} }
...@@ -368,11 +395,13 @@ int ipath_make_ud_req(struct ipath_qp *qp) ...@@ -368,11 +395,13 @@ int ipath_make_ud_req(struct ipath_qp *qp)
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
done: done:
if (++qp->s_cur >= qp->s_size)
qp->s_cur = 0;
ret = 1; ret = 1;
goto unlock;
bail: bail:
qp->s_flags &= ~IPATH_S_BUSY;
unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret; return ret;
} }
...@@ -506,8 +535,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -506,8 +535,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
/* /*
* Get the next work request entry to find where to put the data. * Get the next work request entry to find where to put the data.
*/ */
if (qp->r_reuse_sge) if (qp->r_flags & IPATH_R_REUSE_SGE)
qp->r_reuse_sge = 0; qp->r_flags &= ~IPATH_R_REUSE_SGE;
else if (!ipath_get_rwqe(qp, 0)) { else if (!ipath_get_rwqe(qp, 0)) {
/* /*
* Count VL15 packets dropped due to no receive buffer. * Count VL15 packets dropped due to no receive buffer.
...@@ -523,7 +552,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -523,7 +552,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
} }
/* Silently drop packets which are too big. */ /* Silently drop packets which are too big. */
if (wc.byte_len > qp->r_len) { if (wc.byte_len > qp->r_len) {
qp->r_reuse_sge = 1; qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++; dev->n_pkt_drops++;
goto bail; goto bail;
} }
...@@ -535,7 +564,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -535,7 +564,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
ipath_copy_sge(&qp->r_sge, data, ipath_copy_sge(&qp->r_sge, data,
wc.byte_len - sizeof(struct ib_grh)); wc.byte_len - sizeof(struct ib_grh));
qp->r_wrid_valid = 0; if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
goto bail;
wc.wr_id = qp->r_wr_id; wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS; wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV; wc.opcode = IB_WC_RECV;
......
...@@ -45,8 +45,6 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd, ...@@ -45,8 +45,6 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd,
int ipath_user_sdma_make_progress(struct ipath_devdata *dd, int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
struct ipath_user_sdma_queue *pq); struct ipath_user_sdma_queue *pq);
int ipath_user_sdma_pkt_sent(const struct ipath_user_sdma_queue *pq,
u32 counter);
void ipath_user_sdma_queue_drain(struct ipath_devdata *dd, void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
struct ipath_user_sdma_queue *pq); struct ipath_user_sdma_queue *pq);
......
...@@ -111,16 +111,24 @@ static unsigned int ib_ipath_disable_sma; ...@@ -111,16 +111,24 @@ static unsigned int ib_ipath_disable_sma;
module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO); module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(disable_sma, "Disable the SMA"); MODULE_PARM_DESC(disable_sma, "Disable the SMA");
/*
* Note that it is OK to post send work requests in the SQE and ERR
* states; ipath_do_send() will process them and generate error
* completions as per IB 1.2 C10-96.
*/
const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = 0, [IB_QPS_RESET] = 0,
[IB_QPS_INIT] = IPATH_POST_RECV_OK, [IB_QPS_INIT] = IPATH_POST_RECV_OK,
[IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
[IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK |
IPATH_PROCESS_NEXT_SEND_OK,
[IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
IPATH_POST_SEND_OK, IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
[IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
[IB_QPS_ERR] = 0, IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
[IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV |
IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
}; };
struct ipath_ucontext { struct ipath_ucontext {
...@@ -230,18 +238,6 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length) ...@@ -230,18 +238,6 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
} }
} }
static void ipath_flush_wqe(struct ipath_qp *qp, struct ib_send_wr *wr)
{
struct ib_wc wc;
memset(&wc, 0, sizeof(wc));
wc.wr_id = wr->wr_id;
wc.status = IB_WC_WR_FLUSH_ERR;
wc.opcode = ib_ipath_wc_opcode[wr->opcode];
wc.qp = &qp->ibqp;
ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
}
/* /*
* Count the number of DMA descriptors needed to send length bytes of data. * Count the number of DMA descriptors needed to send length bytes of data.
* Don't modify the ipath_sge_state to get the count. * Don't modify the ipath_sge_state to get the count.
...@@ -347,14 +343,8 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) ...@@ -347,14 +343,8 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
spin_lock_irqsave(&qp->s_lock, flags); spin_lock_irqsave(&qp->s_lock, flags);
/* Check that state is OK to post send. */ /* Check that state is OK to post send. */
if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) { if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
if (qp->state != IB_QPS_SQE && qp->state != IB_QPS_ERR) goto bail_inval;
goto bail_inval;
/* C10-96 says generate a flushed completion entry. */
ipath_flush_wqe(qp, wr);
ret = 0;
goto bail;
}
/* IB spec says that num_sge == 0 is OK. */ /* IB spec says that num_sge == 0 is OK. */
if (wr->num_sge > qp->s_max_sge) if (wr->num_sge > qp->s_max_sge)
...@@ -677,6 +667,7 @@ bail:; ...@@ -677,6 +667,7 @@ bail:;
static void ipath_ib_timer(struct ipath_ibdev *dev) static void ipath_ib_timer(struct ipath_ibdev *dev)
{ {
struct ipath_qp *resend = NULL; struct ipath_qp *resend = NULL;
struct ipath_qp *rnr = NULL;
struct list_head *last; struct list_head *last;
struct ipath_qp *qp; struct ipath_qp *qp;
unsigned long flags; unsigned long flags;
...@@ -703,7 +694,9 @@ static void ipath_ib_timer(struct ipath_ibdev *dev) ...@@ -703,7 +694,9 @@ static void ipath_ib_timer(struct ipath_ibdev *dev)
if (--qp->s_rnr_timeout == 0) { if (--qp->s_rnr_timeout == 0) {
do { do {
list_del_init(&qp->timerwait); list_del_init(&qp->timerwait);
tasklet_hi_schedule(&qp->s_task); qp->timer_next = rnr;
rnr = qp;
atomic_inc(&qp->refcount);
if (list_empty(last)) if (list_empty(last))
break; break;
qp = list_entry(last->next, struct ipath_qp, qp = list_entry(last->next, struct ipath_qp,
...@@ -743,16 +736,31 @@ static void ipath_ib_timer(struct ipath_ibdev *dev) ...@@ -743,16 +736,31 @@ static void ipath_ib_timer(struct ipath_ibdev *dev)
spin_unlock_irqrestore(&dev->pending_lock, flags); spin_unlock_irqrestore(&dev->pending_lock, flags);
/* XXX What if timer fires again while this is running? */ /* XXX What if timer fires again while this is running? */
for (qp = resend; qp != NULL; qp = qp->timer_next) { while (resend != NULL) {
struct ib_wc wc; qp = resend;
resend = qp->timer_next;
spin_lock_irqsave(&qp->s_lock, flags); spin_lock_irqsave(&qp->s_lock, flags);
if (qp->s_last != qp->s_tail && qp->state == IB_QPS_RTS) { if (qp->s_last != qp->s_tail &&
ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
dev->n_timeouts++; dev->n_timeouts++;
ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); ipath_restart_rc(qp, qp->s_last_psn + 1);
} }
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, flags);
/* Notify ipath_destroy_qp() if it is waiting. */
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
}
while (rnr != NULL) {
qp = rnr;
rnr = qp->timer_next;
spin_lock_irqsave(&qp->s_lock, flags);
if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
ipath_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
/* Notify ipath_destroy_qp() if it is waiting. */ /* Notify ipath_destroy_qp() if it is waiting. */
if (atomic_dec_and_test(&qp->refcount)) if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait); wake_up(&qp->wait);
...@@ -1012,13 +1020,24 @@ static void sdma_complete(void *cookie, int status) ...@@ -1012,13 +1020,24 @@ static void sdma_complete(void *cookie, int status)
struct ipath_verbs_txreq *tx = cookie; struct ipath_verbs_txreq *tx = cookie;
struct ipath_qp *qp = tx->qp; struct ipath_qp *qp = tx->qp;
struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
unsigned int flags;
enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
/* Generate a completion queue entry if needed */ if (atomic_dec_and_test(&qp->s_dma_busy)) {
if (qp->ibqp.qp_type != IB_QPT_RC && tx->wqe) { spin_lock_irqsave(&qp->s_lock, flags);
enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? if (tx->wqe)
IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; ipath_send_complete(qp, tx->wqe, ibs);
if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
qp->s_last != qp->s_head) ||
(qp->s_flags & IPATH_S_WAIT_DMA))
ipath_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
wake_up(&qp->wait_dma);
} else if (tx->wqe) {
spin_lock_irqsave(&qp->s_lock, flags);
ipath_send_complete(qp, tx->wqe, ibs); ipath_send_complete(qp, tx->wqe, ibs);
spin_unlock_irqrestore(&qp->s_lock, flags);
} }
if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
...@@ -1029,6 +1048,21 @@ static void sdma_complete(void *cookie, int status) ...@@ -1029,6 +1048,21 @@ static void sdma_complete(void *cookie, int status)
wake_up(&qp->wait); wake_up(&qp->wait);
} }
static void decrement_dma_busy(struct ipath_qp *qp)
{
unsigned int flags;
if (atomic_dec_and_test(&qp->s_dma_busy)) {
spin_lock_irqsave(&qp->s_lock, flags);
if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
qp->s_last != qp->s_head) ||
(qp->s_flags & IPATH_S_WAIT_DMA))
ipath_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
wake_up(&qp->wait_dma);
}
}
/* /*
* Compute the number of clock cycles of delay before sending the next packet. * Compute the number of clock cycles of delay before sending the next packet.
* The multipliers reflect the number of clocks for the fastest rate so * The multipliers reflect the number of clocks for the fastest rate so
...@@ -1067,9 +1101,12 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, ...@@ -1067,9 +1101,12 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
if (tx) { if (tx) {
qp->s_tx = NULL; qp->s_tx = NULL;
/* resend previously constructed packet */ /* resend previously constructed packet */
atomic_inc(&qp->s_dma_busy);
ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx); ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
if (ret) if (ret) {
qp->s_tx = tx; qp->s_tx = tx;
decrement_dma_busy(qp);
}
goto bail; goto bail;
} }
...@@ -1120,12 +1157,14 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, ...@@ -1120,12 +1157,14 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
tx->txreq.sg_count = ndesc; tx->txreq.sg_count = ndesc;
tx->map_len = (hdrwords + 2) << 2; tx->map_len = (hdrwords + 2) << 2;
tx->txreq.map_addr = &tx->hdr; tx->txreq.map_addr = &tx->hdr;
atomic_inc(&qp->s_dma_busy);
ret = ipath_sdma_verbs_send(dd, ss, dwords, tx); ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
if (ret) { if (ret) {
/* save ss and length in dwords */ /* save ss and length in dwords */
tx->ss = ss; tx->ss = ss;
tx->len = dwords; tx->len = dwords;
qp->s_tx = tx; qp->s_tx = tx;
decrement_dma_busy(qp);
} }
goto bail; goto bail;
} }
...@@ -1146,6 +1185,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, ...@@ -1146,6 +1185,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
memcpy(piobuf, hdr, hdrwords << 2); memcpy(piobuf, hdr, hdrwords << 2);
ipath_copy_from_sge(piobuf + hdrwords, ss, len); ipath_copy_from_sge(piobuf + hdrwords, ss, len);
atomic_inc(&qp->s_dma_busy);
ret = ipath_sdma_verbs_send(dd, NULL, 0, tx); ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
/* /*
* If we couldn't queue the DMA request, save the info * If we couldn't queue the DMA request, save the info
...@@ -1156,6 +1196,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, ...@@ -1156,6 +1196,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
tx->ss = NULL; tx->ss = NULL;
tx->len = 0; tx->len = 0;
qp->s_tx = tx; qp->s_tx = tx;
decrement_dma_busy(qp);
} }
dev->n_unaligned++; dev->n_unaligned++;
goto bail; goto bail;
...@@ -1179,6 +1220,7 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp, ...@@ -1179,6 +1220,7 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp,
unsigned flush_wc; unsigned flush_wc;
u32 control; u32 control;
int ret; int ret;
unsigned int flags;
piobuf = ipath_getpiobuf(dd, plen, NULL); piobuf = ipath_getpiobuf(dd, plen, NULL);
if (unlikely(piobuf == NULL)) { if (unlikely(piobuf == NULL)) {
...@@ -1249,8 +1291,11 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp, ...@@ -1249,8 +1291,11 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp,
} }
copy_io(piobuf, ss, len, flush_wc); copy_io(piobuf, ss, len, flush_wc);
done: done:
if (qp->s_wqe) if (qp->s_wqe) {
spin_lock_irqsave(&qp->s_lock, flags);
ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
spin_unlock_irqrestore(&qp->s_lock, flags);
}
ret = 0; ret = 0;
bail: bail:
return ret; return ret;
...@@ -1283,19 +1328,12 @@ int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr, ...@@ -1283,19 +1328,12 @@ int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
* can defer SDMA restart until link goes ACTIVE without * can defer SDMA restart until link goes ACTIVE without
* worrying about just how we got there. * worrying about just how we got there.
*/ */
if (qp->ibqp.qp_type == IB_QPT_SMI) if (qp->ibqp.qp_type == IB_QPT_SMI ||
!(dd->ipath_flags & IPATH_HAS_SEND_DMA))
ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
plen, dwords); plen, dwords);
/* All non-VL15 packets are dropped if link is not ACTIVE */
else if (!(dd->ipath_flags & IPATH_LINKACTIVE)) {
if (qp->s_wqe)
ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
ret = 0;
} else if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
plen, dwords);
else else
ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
plen, dwords); plen, dwords);
return ret; return ret;
...@@ -1403,27 +1441,46 @@ int ipath_get_counters(struct ipath_devdata *dd, ...@@ -1403,27 +1441,46 @@ int ipath_get_counters(struct ipath_devdata *dd,
* This is called from ipath_intr() at interrupt level when a PIO buffer is * This is called from ipath_intr() at interrupt level when a PIO buffer is
* available after ipath_verbs_send() returned an error that no buffers were * available after ipath_verbs_send() returned an error that no buffers were
* available. Return 1 if we consumed all the PIO buffers and we still have * available. Return 1 if we consumed all the PIO buffers and we still have
* QPs waiting for buffers (for now, just do a tasklet_hi_schedule and * QPs waiting for buffers (for now, just restart the send tasklet and
* return zero). * return zero).
*/ */
int ipath_ib_piobufavail(struct ipath_ibdev *dev) int ipath_ib_piobufavail(struct ipath_ibdev *dev)
{ {
struct list_head *list;
struct ipath_qp *qplist;
struct ipath_qp *qp; struct ipath_qp *qp;
unsigned long flags; unsigned long flags;
if (dev == NULL) if (dev == NULL)
goto bail; goto bail;
list = &dev->piowait;
qplist = NULL;
spin_lock_irqsave(&dev->pending_lock, flags); spin_lock_irqsave(&dev->pending_lock, flags);
while (!list_empty(&dev->piowait)) { while (!list_empty(list)) {
qp = list_entry(dev->piowait.next, struct ipath_qp, qp = list_entry(list->next, struct ipath_qp, piowait);
piowait);
list_del_init(&qp->piowait); list_del_init(&qp->piowait);
clear_bit(IPATH_S_BUSY, &qp->s_busy); qp->pio_next = qplist;
tasklet_hi_schedule(&qp->s_task); qplist = qp;
atomic_inc(&qp->refcount);
} }
spin_unlock_irqrestore(&dev->pending_lock, flags); spin_unlock_irqrestore(&dev->pending_lock, flags);
while (qplist != NULL) {
qp = qplist;
qplist = qp->pio_next;
spin_lock_irqsave(&qp->s_lock, flags);
if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
ipath_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
/* Notify ipath_destroy_qp() if it is waiting. */
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
}
bail: bail:
return 0; return 0;
} }
...@@ -2145,11 +2202,12 @@ int ipath_register_ib_device(struct ipath_devdata *dd) ...@@ -2145,11 +2202,12 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
void ipath_unregister_ib_device(struct ipath_ibdev *dev) void ipath_unregister_ib_device(struct ipath_ibdev *dev)
{ {
struct ib_device *ibdev = &dev->ibdev; struct ib_device *ibdev = &dev->ibdev;
u32 qps_inuse;
disable_timer(dev->dd);
ib_unregister_device(ibdev); ib_unregister_device(ibdev);
disable_timer(dev->dd);
if (!list_empty(&dev->pending[0]) || if (!list_empty(&dev->pending[0]) ||
!list_empty(&dev->pending[1]) || !list_empty(&dev->pending[1]) ||
!list_empty(&dev->pending[2])) !list_empty(&dev->pending[2]))
...@@ -2164,7 +2222,10 @@ void ipath_unregister_ib_device(struct ipath_ibdev *dev) ...@@ -2164,7 +2222,10 @@ void ipath_unregister_ib_device(struct ipath_ibdev *dev)
* Note that ipath_unregister_ib_device() can be called before all * Note that ipath_unregister_ib_device() can be called before all
* the QPs are destroyed! * the QPs are destroyed!
*/ */
ipath_free_all_qps(&dev->qp_table); qps_inuse = ipath_free_all_qps(&dev->qp_table);
if (qps_inuse)
ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",
qps_inuse);
kfree(dev->qp_table.table); kfree(dev->qp_table.table);
kfree(dev->lk_table.table); kfree(dev->lk_table.table);
kfree(dev->txreq_bufs); kfree(dev->txreq_bufs);
...@@ -2215,17 +2276,14 @@ static ssize_t show_stats(struct device *device, struct device_attribute *attr, ...@@ -2215,17 +2276,14 @@ static ssize_t show_stats(struct device *device, struct device_attribute *attr,
"RC OTH NAKs %d\n" "RC OTH NAKs %d\n"
"RC timeouts %d\n" "RC timeouts %d\n"
"RC RDMA dup %d\n" "RC RDMA dup %d\n"
"RC stalls %d\n"
"piobuf wait %d\n" "piobuf wait %d\n"
"no piobuf %d\n"
"unaligned %d\n" "unaligned %d\n"
"PKT drops %d\n" "PKT drops %d\n"
"WQE errs %d\n", "WQE errs %d\n",
dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
dev->n_other_naks, dev->n_timeouts, dev->n_other_naks, dev->n_timeouts,
dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait, dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,
dev->n_no_piobuf, dev->n_unaligned,
dev->n_pkt_drops, dev->n_wqe_errs); dev->n_pkt_drops, dev->n_wqe_errs);
for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
const struct ipath_opcode_stats *si = &dev->opstats[i]; const struct ipath_opcode_stats *si = &dev->opstats[i];
......
...@@ -74,6 +74,11 @@ ...@@ -74,6 +74,11 @@
#define IPATH_POST_RECV_OK 0x02 #define IPATH_POST_RECV_OK 0x02
#define IPATH_PROCESS_RECV_OK 0x04 #define IPATH_PROCESS_RECV_OK 0x04
#define IPATH_PROCESS_SEND_OK 0x08 #define IPATH_PROCESS_SEND_OK 0x08
#define IPATH_PROCESS_NEXT_SEND_OK 0x10
#define IPATH_FLUSH_SEND 0x20
#define IPATH_FLUSH_RECV 0x40
#define IPATH_PROCESS_OR_FLUSH_SEND \
(IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND)
/* IB Performance Manager status values */ /* IB Performance Manager status values */
#define IB_PMA_SAMPLE_STATUS_DONE 0x00 #define IB_PMA_SAMPLE_STATUS_DONE 0x00
...@@ -353,12 +358,14 @@ struct ipath_qp { ...@@ -353,12 +358,14 @@ struct ipath_qp {
struct ib_qp ibqp; struct ib_qp ibqp;
struct ipath_qp *next; /* link list for QPN hash table */ struct ipath_qp *next; /* link list for QPN hash table */
struct ipath_qp *timer_next; /* link list for ipath_ib_timer() */ struct ipath_qp *timer_next; /* link list for ipath_ib_timer() */
struct ipath_qp *pio_next; /* link for ipath_ib_piobufavail() */
struct list_head piowait; /* link for wait PIO buf */ struct list_head piowait; /* link for wait PIO buf */
struct list_head timerwait; /* link for waiting for timeouts */ struct list_head timerwait; /* link for waiting for timeouts */
struct ib_ah_attr remote_ah_attr; struct ib_ah_attr remote_ah_attr;
struct ipath_ib_header s_hdr; /* next packet header to send */ struct ipath_ib_header s_hdr; /* next packet header to send */
atomic_t refcount; atomic_t refcount;
wait_queue_head_t wait; wait_queue_head_t wait;
wait_queue_head_t wait_dma;
struct tasklet_struct s_task; struct tasklet_struct s_task;
struct ipath_mmap_info *ip; struct ipath_mmap_info *ip;
struct ipath_sge_state *s_cur_sge; struct ipath_sge_state *s_cur_sge;
...@@ -369,7 +376,7 @@ struct ipath_qp { ...@@ -369,7 +376,7 @@ struct ipath_qp {
struct ipath_sge_state s_rdma_read_sge; struct ipath_sge_state s_rdma_read_sge;
struct ipath_sge_state r_sge; /* current receive data */ struct ipath_sge_state r_sge; /* current receive data */
spinlock_t s_lock; spinlock_t s_lock;
unsigned long s_busy; atomic_t s_dma_busy;
u16 s_pkt_delay; u16 s_pkt_delay;
u16 s_hdrwords; /* size of s_hdr in 32 bit words */ u16 s_hdrwords; /* size of s_hdr in 32 bit words */
u32 s_cur_size; /* size of send packet in bytes */ u32 s_cur_size; /* size of send packet in bytes */
...@@ -383,6 +390,7 @@ struct ipath_qp { ...@@ -383,6 +390,7 @@ struct ipath_qp {
u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
u64 r_wr_id; /* ID for current receive WQE */ u64 r_wr_id; /* ID for current receive WQE */
unsigned long r_aflags;
u32 r_len; /* total length of r_sge */ u32 r_len; /* total length of r_sge */
u32 r_rcv_len; /* receive data len processed */ u32 r_rcv_len; /* receive data len processed */
u32 r_psn; /* expected rcv packet sequence number */ u32 r_psn; /* expected rcv packet sequence number */
...@@ -394,8 +402,7 @@ struct ipath_qp { ...@@ -394,8 +402,7 @@ struct ipath_qp {
u8 r_state; /* opcode of last packet received */ u8 r_state; /* opcode of last packet received */
u8 r_nak_state; /* non-zero if NAK is pending */ u8 r_nak_state; /* non-zero if NAK is pending */
u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
u8 r_reuse_sge; /* for UC receive errors */ u8 r_flags;
u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */
u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
u8 r_head_ack_queue; /* index into s_ack_queue[] */ u8 r_head_ack_queue; /* index into s_ack_queue[] */
u8 qp_access_flags; u8 qp_access_flags;
...@@ -404,13 +411,13 @@ struct ipath_qp { ...@@ -404,13 +411,13 @@ struct ipath_qp {
u8 s_rnr_retry_cnt; u8 s_rnr_retry_cnt;
u8 s_retry; /* requester retry counter */ u8 s_retry; /* requester retry counter */
u8 s_rnr_retry; /* requester RNR retry counter */ u8 s_rnr_retry; /* requester RNR retry counter */
u8 s_wait_credit; /* limit number of unacked packets sent */
u8 s_pkey_index; /* PKEY index to use */ u8 s_pkey_index; /* PKEY index to use */
u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
u8 s_tail_ack_queue; /* index into s_ack_queue[] */ u8 s_tail_ack_queue; /* index into s_ack_queue[] */
u8 s_flags; u8 s_flags;
u8 s_dmult; u8 s_dmult;
u8 s_draining;
u8 timeout; /* Timeout for this QP */ u8 timeout; /* Timeout for this QP */
enum ib_mtu path_mtu; enum ib_mtu path_mtu;
u32 remote_qpn; u32 remote_qpn;
...@@ -428,16 +435,40 @@ struct ipath_qp { ...@@ -428,16 +435,40 @@ struct ipath_qp {
struct ipath_sge r_sg_list[0]; /* verified SGEs */ struct ipath_sge r_sg_list[0]; /* verified SGEs */
}; };
/* Bit definition for s_busy. */ /*
#define IPATH_S_BUSY 0 * Atomic bit definitions for r_aflags.
*/
#define IPATH_R_WRID_VALID 0
/*
* Bit definitions for r_flags.
*/
#define IPATH_R_REUSE_SGE 0x01
#define IPATH_R_RDMAR_SEQ 0x02
/* /*
* Bit definitions for s_flags. * Bit definitions for s_flags.
*
* IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs
* before processing the next SWQE
* IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs
* before processing the next SWQE
* IPATH_S_WAITING - waiting for RNR timeout or send buffer available.
* IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
* IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating
* next send completion entry not via send DMA.
*/ */
#define IPATH_S_SIGNAL_REQ_WR 0x01 #define IPATH_S_SIGNAL_REQ_WR 0x01
#define IPATH_S_FENCE_PENDING 0x02 #define IPATH_S_FENCE_PENDING 0x02
#define IPATH_S_RDMAR_PENDING 0x04 #define IPATH_S_RDMAR_PENDING 0x04
#define IPATH_S_ACK_PENDING 0x08 #define IPATH_S_ACK_PENDING 0x08
#define IPATH_S_BUSY 0x10
#define IPATH_S_WAITING 0x20
#define IPATH_S_WAIT_SSN_CREDIT 0x40
#define IPATH_S_WAIT_DMA 0x80
#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \
IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA)
#define IPATH_PSN_CREDIT 512 #define IPATH_PSN_CREDIT 512
...@@ -573,13 +604,11 @@ struct ipath_ibdev { ...@@ -573,13 +604,11 @@ struct ipath_ibdev {
u32 n_rnr_naks; u32 n_rnr_naks;
u32 n_other_naks; u32 n_other_naks;
u32 n_timeouts; u32 n_timeouts;
u32 n_rc_stalls;
u32 n_pkt_drops; u32 n_pkt_drops;
u32 n_vl15_dropped; u32 n_vl15_dropped;
u32 n_wqe_errs; u32 n_wqe_errs;
u32 n_rdma_dup_busy; u32 n_rdma_dup_busy;
u32 n_piowait; u32 n_piowait;
u32 n_no_piobuf;
u32 n_unaligned; u32 n_unaligned;
u32 port_cap_flags; u32 port_cap_flags;
u32 pma_sample_start; u32 pma_sample_start;
...@@ -657,6 +686,17 @@ static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev) ...@@ -657,6 +686,17 @@ static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev)
return container_of(ibdev, struct ipath_ibdev, ibdev); return container_of(ibdev, struct ipath_ibdev, ibdev);
} }
/*
* This must be called with s_lock held.
*/
static inline void ipath_schedule_send(struct ipath_qp *qp)
{
if (qp->s_flags & IPATH_S_ANY_WAIT)
qp->s_flags &= ~IPATH_S_ANY_WAIT;
if (!(qp->s_flags & IPATH_S_BUSY))
tasklet_hi_schedule(&qp->s_task);
}
int ipath_process_mad(struct ib_device *ibdev, int ipath_process_mad(struct ib_device *ibdev,
int mad_flags, int mad_flags,
u8 port_num, u8 port_num,
...@@ -706,12 +746,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ...@@ -706,12 +746,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_qp_init_attr *init_attr); int attr_mask, struct ib_qp_init_attr *init_attr);
void ipath_free_all_qps(struct ipath_qp_table *qpt); unsigned ipath_free_all_qps(struct ipath_qp_table *qpt);
int ipath_init_qp_table(struct ipath_ibdev *idev, int size); int ipath_init_qp_table(struct ipath_ibdev *idev, int size);
void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc);
void ipath_get_credit(struct ipath_qp *qp, u32 aeth); void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
unsigned ipath_ib_rate_to_mult(enum ib_rate rate); unsigned ipath_ib_rate_to_mult(enum ib_rate rate);
...@@ -729,7 +767,9 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ...@@ -729,7 +767,9 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct ipath_qp *qp); int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc); void ipath_restart_rc(struct ipath_qp *qp, u32 psn);
void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err);
int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr); int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr);
......
...@@ -91,10 +91,6 @@ unsigned int nes_debug_level = 0; ...@@ -91,10 +91,6 @@ unsigned int nes_debug_level = 0;
module_param_named(debug_level, nes_debug_level, uint, 0644); module_param_named(debug_level, nes_debug_level, uint, 0644);
MODULE_PARM_DESC(debug_level, "Enable debug output level"); MODULE_PARM_DESC(debug_level, "Enable debug output level");
unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR;
module_param(nes_lro_max_aggr, int, NES_LRO_MAX_AGGR);
MODULE_PARM_DESC(nes_mro_max_aggr, " nic LRO MAX packet aggregation");
LIST_HEAD(nes_adapter_list); LIST_HEAD(nes_adapter_list);
static LIST_HEAD(nes_dev_list); static LIST_HEAD(nes_dev_list);
......
...@@ -173,7 +173,6 @@ extern int disable_mpa_crc; ...@@ -173,7 +173,6 @@ extern int disable_mpa_crc;
extern unsigned int send_first; extern unsigned int send_first;
extern unsigned int nes_drv_opt; extern unsigned int nes_drv_opt;
extern unsigned int nes_debug_level; extern unsigned int nes_debug_level;
extern unsigned int nes_lro_max_aggr;
extern struct list_head nes_adapter_list; extern struct list_head nes_adapter_list;
......
...@@ -42,6 +42,10 @@ ...@@ -42,6 +42,10 @@
#include "nes.h" #include "nes.h"
static unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR;
module_param(nes_lro_max_aggr, uint, 0444);
MODULE_PARM_DESC(nes_lro_max_aggr, "NIC LRO max packet aggregation");
static u32 crit_err_count; static u32 crit_err_count;
u32 int_mod_timer_init; u32 int_mod_timer_init;
u32 int_mod_cq_depth_256; u32 int_mod_cq_depth_256;
...@@ -1738,7 +1742,7 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) ...@@ -1738,7 +1742,7 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
jumbomode = 1; jumbomode = 1;
nes_nic_init_timer_defaults(nesdev, jumbomode); nes_nic_init_timer_defaults(nesdev, jumbomode);
} }
nesvnic->lro_mgr.max_aggr = NES_LRO_MAX_AGGR; nesvnic->lro_mgr.max_aggr = nes_lro_max_aggr;
nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS; nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS;
nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc; nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc;
nesvnic->lro_mgr.get_skb_header = nes_lro_get_skb_hdr; nesvnic->lro_mgr.get_skb_header = nes_lro_get_skb_hdr;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment