From e509be898d8937634437caa474b57ac12795e5bc Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 13 May 2008 11:41:29 -0700 Subject: IB/ipath: Fix many locking issues when switching to error state The send DMA hardware queue voided a number of prior assumptions about when a send is complete which led to completions being generated out of order. There were also a number of locking issues when switching the QP to the error or reset states, and we implement the IB_QPS_SQD state. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_qp.c | 183 +++++++++++++------------- drivers/infiniband/hw/ipath/ipath_rc.c | 151 +++++++++++++-------- drivers/infiniband/hw/ipath/ipath_ruc.c | 168 +++++++++++++++-------- drivers/infiniband/hw/ipath/ipath_uc.c | 57 +++++--- drivers/infiniband/hw/ipath/ipath_ud.c | 66 +++++++--- drivers/infiniband/hw/ipath/ipath_user_sdma.h | 2 - drivers/infiniband/hw/ipath/ipath_verbs.c | 174 ++++++++++++++++-------- drivers/infiniband/hw/ipath/ipath_verbs.h | 57 ++++++-- 8 files changed, 554 insertions(+), 304 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 6f98632877e..4715911101e 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -242,7 +242,6 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp) { struct ipath_qp *q, **qpp; unsigned long flags; - int fnd = 0; spin_lock_irqsave(&qpt->lock, flags); @@ -253,51 +252,40 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp) *qpp = qp->next; qp->next = NULL; atomic_dec(&qp->refcount); - fnd = 1; break; } } spin_unlock_irqrestore(&qpt->lock, flags); - - if (!fnd) - return; - - free_qpn(qpt, qp->ibqp.qp_num); - - wait_event(qp->wait, !atomic_read(&qp->refcount)); } /** - * ipath_free_all_qps - remove all QPs from the table + * ipath_free_all_qps - check for QPs still in use * @qpt: the QP table to empty + * + * There should not be any QPs still in use. + * Free memory for table. */ -void ipath_free_all_qps(struct ipath_qp_table *qpt) +unsigned ipath_free_all_qps(struct ipath_qp_table *qpt) { unsigned long flags; - struct ipath_qp *qp, *nqp; - u32 n; + struct ipath_qp *qp; + u32 n, qp_inuse = 0; + spin_lock_irqsave(&qpt->lock, flags); for (n = 0; n < qpt->max; n++) { - spin_lock_irqsave(&qpt->lock, flags); qp = qpt->table[n]; qpt->table[n] = NULL; - spin_unlock_irqrestore(&qpt->lock, flags); - - while (qp) { - nqp = qp->next; - free_qpn(qpt, qp->ibqp.qp_num); - if (!atomic_dec_and_test(&qp->refcount) || - !ipath_destroy_qp(&qp->ibqp)) - ipath_dbg("QP memory leak!\n"); - qp = nqp; - } + + for (; qp; qp = qp->next) + qp_inuse++; } + spin_unlock_irqrestore(&qpt->lock, flags); - for (n = 0; n < ARRAY_SIZE(qpt->map); n++) { + for (n = 0; n < ARRAY_SIZE(qpt->map); n++) if (qpt->map[n].page) - free_page((unsigned long)qpt->map[n].page); - } + free_page((unsigned long) qpt->map[n].page); + return qp_inuse; } /** @@ -336,11 +324,12 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) qp->remote_qpn = 0; qp->qkey = 0; qp->qp_access_flags = 0; - qp->s_busy = 0; + atomic_set(&qp->s_dma_busy, 0); qp->s_flags &= IPATH_S_SIGNAL_REQ_WR; qp->s_hdrwords = 0; qp->s_wqe = NULL; qp->s_pkt_delay = 0; + qp->s_draining = 0; qp->s_psn = 0; qp->r_psn = 0; qp->r_msn = 0; @@ -353,7 +342,8 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) } qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; qp->r_nak_state = 0; - qp->r_wrid_valid = 0; + qp->r_aflags = 0; + qp->r_flags = 0; qp->s_rnr_timeout = 0; qp->s_head = 0; qp->s_tail = 0; @@ -361,7 +351,6 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) qp->s_last = 0; qp->s_ssn = 1; qp->s_lsn = 0; - qp->s_wait_credit = 0; memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); qp->r_head_ack_queue = 0; qp->s_tail_ack_queue = 0; @@ -370,7 +359,6 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) qp->r_rq.wq->head = 0; qp->r_rq.wq->tail = 0; } - qp->r_reuse_sge = 0; } /** @@ -402,39 +390,21 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) list_del_init(&qp->piowait); spin_unlock(&dev->pending_lock); - wc.vendor_err = 0; - wc.byte_len = 0; - wc.imm_data = 0; + /* Schedule the sending tasklet to drain the send work queue. */ + if (qp->s_last != qp->s_head) + ipath_schedule_send(qp); + + memset(&wc, 0, sizeof(wc)); wc.qp = &qp->ibqp; - wc.src_qp = 0; - wc.wc_flags = 0; - wc.pkey_index = 0; - wc.slid = 0; - wc.sl = 0; - wc.dlid_path_bits = 0; - wc.port_num = 0; - if (qp->r_wrid_valid) { - qp->r_wrid_valid = 0; + wc.opcode = IB_WC_RECV; + + if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) { wc.wr_id = qp->r_wr_id; - wc.opcode = IB_WC_RECV; wc.status = err; ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); } wc.status = IB_WC_WR_FLUSH_ERR; - while (qp->s_last != qp->s_head) { - struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); - - wc.wr_id = wqe->wr.wr_id; - wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); - } - qp->s_cur = qp->s_tail = qp->s_head; - qp->s_hdrwords = 0; - qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; - if (qp->r_rq.wq) { struct ipath_rwq *wq; u32 head; @@ -450,7 +420,6 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) tail = wq->tail; if (tail >= qp->r_rq.size) tail = 0; - wc.opcode = IB_WC_RECV; while (tail != head) { wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; if (++tail >= qp->r_rq.size) @@ -482,11 +451,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct ipath_ibdev *dev = to_idev(ibqp->device); struct ipath_qp *qp = to_iqp(ibqp); enum ib_qp_state cur_state, new_state; - unsigned long flags; int lastwqe = 0; int ret; - spin_lock_irqsave(&qp->s_lock, flags); + spin_lock_irq(&qp->s_lock); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; @@ -539,16 +507,42 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, switch (new_state) { case IB_QPS_RESET: + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->timerwait)) + list_del_init(&qp->timerwait); + if (!list_empty(&qp->piowait)) + list_del_init(&qp->piowait); + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~IPATH_S_ANY_WAIT; + spin_unlock_irq(&qp->s_lock); + /* Stop the sending tasklet */ + tasklet_kill(&qp->s_task); + wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); + spin_lock_irq(&qp->s_lock); + } ipath_reset_qp(qp, ibqp->qp_type); break; + case IB_QPS_SQD: + qp->s_draining = qp->s_last != qp->s_cur; + qp->state = new_state; + break; + + case IB_QPS_SQE: + if (qp->ibqp.qp_type == IB_QPT_RC) + goto inval; + qp->state = new_state; + break; + case IB_QPS_ERR: lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); break; default: + qp->state = new_state; break; - } if (attr_mask & IB_QP_PKEY_INDEX) @@ -601,8 +595,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) qp->s_max_rd_atomic = attr->max_rd_atomic; - qp->state = new_state; - spin_unlock_irqrestore(&qp->s_lock, flags); + spin_unlock_irq(&qp->s_lock); if (lastwqe) { struct ib_event ev; @@ -616,7 +609,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto bail; inval: - spin_unlock_irqrestore(&qp->s_lock, flags); + spin_unlock_irq(&qp->s_lock); ret = -EINVAL; bail: @@ -647,7 +640,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->pkey_index = qp->s_pkey_index; attr->alt_pkey_index = 0; attr->en_sqd_async_notify = 0; - attr->sq_draining = 0; + attr->sq_draining = qp->s_draining; attr->max_rd_atomic = qp->s_max_rd_atomic; attr->max_dest_rd_atomic = qp->r_max_rd_atomic; attr->min_rnr_timer = qp->r_min_rnr_timer; @@ -837,6 +830,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, spin_lock_init(&qp->r_rq.lock); atomic_set(&qp->refcount, 0); init_waitqueue_head(&qp->wait); + init_waitqueue_head(&qp->wait_dma); tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp); INIT_LIST_HEAD(&qp->piowait); INIT_LIST_HEAD(&qp->timerwait); @@ -930,6 +924,7 @@ bail_ip: else vfree(qp->r_rq.wq); ipath_free_qp(&dev->qp_table, qp); + free_qpn(&dev->qp_table, qp->ibqp.qp_num); bail_qp: kfree(qp); bail_swq: @@ -951,41 +946,44 @@ int ipath_destroy_qp(struct ib_qp *ibqp) { struct ipath_qp *qp = to_iqp(ibqp); struct ipath_ibdev *dev = to_idev(ibqp->device); - unsigned long flags; - spin_lock_irqsave(&qp->s_lock, flags); - qp->state = IB_QPS_ERR; - spin_unlock_irqrestore(&qp->s_lock, flags); - spin_lock(&dev->n_qps_lock); - dev->n_qps_allocated--; - spin_unlock(&dev->n_qps_lock); + /* Make sure HW and driver activity is stopped. */ + spin_lock_irq(&qp->s_lock); + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->timerwait)) + list_del_init(&qp->timerwait); + if (!list_empty(&qp->piowait)) + list_del_init(&qp->piowait); + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~IPATH_S_ANY_WAIT; + spin_unlock_irq(&qp->s_lock); + /* Stop the sending tasklet */ + tasklet_kill(&qp->s_task); + wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); + } else + spin_unlock_irq(&qp->s_lock); - /* Stop the sending tasklet. */ - tasklet_kill(&qp->s_task); + ipath_free_qp(&dev->qp_table, qp); if (qp->s_tx) { atomic_dec(&qp->refcount); if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) kfree(qp->s_tx->txreq.map_addr); + spin_lock_irq(&dev->pending_lock); + list_add(&qp->s_tx->txreq.list, &dev->txreq_free); + spin_unlock_irq(&dev->pending_lock); + qp->s_tx = NULL; } - /* Make sure the QP isn't on the timeout list. */ - spin_lock_irqsave(&dev->pending_lock, flags); - if (!list_empty(&qp->timerwait)) - list_del_init(&qp->timerwait); - if (!list_empty(&qp->piowait)) - list_del_init(&qp->piowait); - if (qp->s_tx) - list_add(&qp->s_tx->txreq.list, &dev->txreq_free); - spin_unlock_irqrestore(&dev->pending_lock, flags); + wait_event(qp->wait, !atomic_read(&qp->refcount)); - /* - * Make sure that the QP is not in the QPN table so receive - * interrupts will discard packets for this QP. XXX Also remove QP - * from multicast table. - */ - if (atomic_read(&qp->refcount) != 0) - ipath_free_qp(&dev->qp_table, qp); + /* all user's cleaned up, mark it available */ + free_qpn(&dev->qp_table, qp->ibqp.qp_num); + spin_lock(&dev->n_qps_lock); + dev->n_qps_allocated--; + spin_unlock(&dev->n_qps_lock); if (qp->ip) kref_put(&qp->ip->ref, ipath_release_mmap_info); @@ -1055,9 +1053,10 @@ void ipath_get_credit(struct ipath_qp *qp, u32 aeth) } /* Restart sending if it was blocked due to lack of credits. */ - if (qp->s_cur != qp->s_head && + if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) && + qp->s_cur != qp->s_head && (qp->s_lsn == (u32) -1 || ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn, qp->s_lsn + 1) <= 0)) - tasklet_hi_schedule(&qp->s_task); + ipath_schedule_send(qp); } diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index b4b26c3aa61..5b5276a270b 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -92,6 +92,10 @@ static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp, u32 bth0; u32 bth2; + /* Don't send an ACK if we aren't supposed to. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto bail; + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; @@ -238,14 +242,25 @@ int ipath_make_rc_req(struct ipath_qp *qp) ipath_make_rc_ack(dev, qp, ohdr, pmtu)) goto done; - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || - qp->s_rnr_timeout || qp->s_wait_credit) - goto bail; + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { + if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= IPATH_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); + goto done; + } - /* Limit the number of packets sent without an ACK. */ - if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { - qp->s_wait_credit = 1; - dev->n_rc_stalls++; + /* Leave BUSY set until RNR timeout. */ + if (qp->s_rnr_timeout) { + qp->s_flags |= IPATH_S_WAITING; goto bail; } @@ -257,6 +272,9 @@ int ipath_make_rc_req(struct ipath_qp *qp) wqe = get_swqe_ptr(qp, qp->s_cur); switch (qp->s_state) { default: + if (!(ib_ipath_state_ops[qp->state] & + IPATH_PROCESS_NEXT_SEND_OK)) + goto bail; /* * Resend an old request or start a new one. * @@ -294,8 +312,10 @@ int ipath_make_rc_req(struct ipath_qp *qp) case IB_WR_SEND_WITH_IMM: /* If no credit, return. */ if (qp->s_lsn != (u32) -1 && - ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) + ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { + qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT; goto bail; + } wqe->lpsn = wqe->psn; if (len > pmtu) { wqe->lpsn += (len - 1) / pmtu; @@ -325,8 +345,10 @@ int ipath_make_rc_req(struct ipath_qp *qp) case IB_WR_RDMA_WRITE_WITH_IMM: /* If no credit, return. */ if (qp->s_lsn != (u32) -1 && - ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) + ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { + qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT; goto bail; + } ohdr->u.rc.reth.vaddr = cpu_to_be64(wqe->wr.wr.rdma.remote_addr); ohdr->u.rc.reth.rkey = @@ -570,7 +592,11 @@ int ipath_make_rc_req(struct ipath_qp *qp) ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2); done: ret = 1; + goto unlock; + bail: + qp->s_flags &= ~IPATH_S_BUSY; +unlock: spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -606,7 +632,11 @@ static void send_rc_ack(struct ipath_qp *qp) spin_unlock_irqrestore(&qp->s_lock, flags); + /* Don't try to send ACKs if the link isn't ACTIVE */ dd = dev->dd; + if (!(dd->ipath_flags & IPATH_LINKACTIVE)) + goto done; + piobuf = ipath_getpiobuf(dd, 0, NULL); if (!piobuf) { /* @@ -668,15 +698,16 @@ static void send_rc_ack(struct ipath_qp *qp) goto done; queue_ack: - dev->n_rc_qacks++; - qp->s_flags |= IPATH_S_ACK_PENDING; - qp->s_nak_state = qp->r_nak_state; - qp->s_ack_psn = qp->r_ack_psn; + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) { + dev->n_rc_qacks++; + qp->s_flags |= IPATH_S_ACK_PENDING; + qp->s_nak_state = qp->r_nak_state; + qp->s_ack_psn = qp->r_ack_psn; + + /* Schedule the send tasklet. */ + ipath_schedule_send(qp); + } spin_unlock_irqrestore(&qp->s_lock, flags); - - /* Call ipath_do_rc_send() in another thread. */ - tasklet_hi_schedule(&qp->s_task); - done: return; } @@ -735,7 +766,7 @@ static void reset_psn(struct ipath_qp *qp, u32 psn) /* * Set the state to restart in the middle of a request. * Don't change the s_sge, s_cur_sge, or s_cur_size. - * See ipath_do_rc_send(). + * See ipath_make_rc_req(). */ switch (opcode) { case IB_WR_SEND: @@ -801,7 +832,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn) dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK; reset_psn(qp, psn); - tasklet_hi_schedule(&qp->s_task); + ipath_schedule_send(qp); bail: return; @@ -809,13 +840,7 @@ bail: static inline void update_last_psn(struct ipath_qp *qp, u32 psn) { - if (qp->s_last_psn != psn) { - qp->s_last_psn = psn; - if (qp->s_wait_credit) { - qp->s_wait_credit = 0; - tasklet_hi_schedule(&qp->s_task); - } - } + qp->s_last_psn = psn; } /** @@ -915,14 +940,10 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { qp->s_num_rd_atomic--; /* Restart sending task if fence is complete */ - if ((qp->s_flags & IPATH_S_FENCE_PENDING) && - !qp->s_num_rd_atomic) { - qp->s_flags &= ~IPATH_S_FENCE_PENDING; - tasklet_hi_schedule(&qp->s_task); - } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) { - qp->s_flags &= ~IPATH_S_RDMAR_PENDING; - tasklet_hi_schedule(&qp->s_task); - } + if (((qp->s_flags & IPATH_S_FENCE_PENDING) && + !qp->s_num_rd_atomic) || + qp->s_flags & IPATH_S_RDMAR_PENDING) + ipath_schedule_send(qp); } /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || @@ -956,6 +977,8 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, } else { if (++qp->s_last >= qp->s_size) qp->s_last = 0; + if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur) + qp->s_draining = 0; if (qp->s_last == qp->s_tail) break; wqe = get_swqe_ptr(qp, qp->s_last); @@ -979,7 +1002,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, */ if (ipath_cmp24(qp->s_psn, psn) <= 0) { reset_psn(qp, psn + 1); - tasklet_hi_schedule(&qp->s_task); + ipath_schedule_send(qp); } } else if (ipath_cmp24(qp->s_psn, psn) <= 0) { qp->s_state = OP(SEND_LAST); @@ -1018,6 +1041,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK]; ipath_insert_rnr_queue(qp); + ipath_schedule_send(qp); goto bail; case 3: /* NAK */ @@ -1108,6 +1132,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this now that we hold the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto ack_done; + /* Ignore invalid responses. */ if (ipath_cmp24(psn, qp->s_next_psn) >= 0) goto ack_done; @@ -1343,7 +1371,12 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, psn &= IPATH_PSN_MASK; e = NULL; old_req = 1; + spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this now that we hold the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto unlock_done; + for (i = qp->r_head_ack_queue; ; i = prev) { if (i == qp->s_tail_ack_queue) old_req = 0; @@ -1471,7 +1504,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, break; } qp->r_nak_state = 0; - tasklet_hi_schedule(&qp->s_task); + ipath_schedule_send(qp); unlock_done: spin_unlock_irqrestore(&qp->s_lock, flags); @@ -1503,18 +1536,15 @@ void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n) { - unsigned long flags; unsigned next; next = n + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; - spin_lock_irqsave(&qp->s_lock, flags); if (n == qp->s_tail_ack_queue) { qp->s_tail_ack_queue = next; qp->s_ack_state = OP(ACKNOWLEDGE); } - spin_unlock_irqrestore(&qp->s_lock, flags); } /** @@ -1543,6 +1573,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, int diff; struct ib_reth *reth; int header_in_data; + unsigned long flags; /* Validate the SLID. See Ch. 9.6.1.5 */ if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) @@ -1690,9 +1721,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, goto nack_inv; ipath_copy_sge(&qp->r_sge, data, tlen); qp->r_msn++; - if (!qp->r_wrid_valid) + if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) break; - qp->r_wrid_valid = 0; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) || @@ -1764,9 +1794,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, next = qp->r_head_ack_queue + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; + spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this while holding the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto unlock; if (unlikely(next == qp->s_tail_ack_queue)) { if (!qp->s_ack_queue[next].sent) - goto nack_inv; + goto nack_inv_unlck; ipath_update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; @@ -1787,7 +1821,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey, IB_ACCESS_REMOTE_READ); if (unlikely(!ok)) - goto nack_acc; + goto nack_acc_unlck; /* * Update the next expected PSN. We add 1 later * below, so only add the remainder here. @@ -1814,13 +1848,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, qp->r_psn++; qp->r_state = opcode; qp->r_nak_state = 0; - barrier(); qp->r_head_ack_queue = next; - /* Call ipath_do_rc_send() in another thread. */ - tasklet_hi_schedule(&qp->s_task); + /* Schedule the send tasklet. */ + ipath_schedule_send(qp); - goto done; + goto unlock; } case OP(COMPARE_SWAP): @@ -1839,9 +1872,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, next = qp->r_head_ack_queue + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; + spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this while holding the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto unlock; if (unlikely(next == qp->s_tail_ack_queue)) { if (!qp->s_ack_queue[next].sent) - goto nack_inv; + goto nack_inv_unlck; ipath_update_ack_queue(qp, next); } if (!header_in_data) @@ -1851,13 +1888,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | be32_to_cpu(ateth->vaddr[1]); if (unlikely(vaddr & (sizeof(u64) - 1))) - goto nack_inv; + goto nack_inv_unlck; rkey = be32_to_cpu(ateth->rkey); /* Check rkey & NAK */ if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), vaddr, rkey, IB_ACCESS_REMOTE_ATOMIC))) - goto nack_acc; + goto nack_acc_unlck; /* Perform atomic OP and save result. */ maddr = (atomic64_t *) qp->r_sge.sge.vaddr; sdata = be64_to_cpu(ateth->swap_data); @@ -1874,13 +1911,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, qp->r_psn++; qp->r_state = opcode; qp->r_nak_state = 0; - barrier(); qp->r_head_ack_queue = next; - /* Call ipath_do_rc_send() in another thread. */ - tasklet_hi_schedule(&qp->s_task); + /* Schedule the send tasklet. */ + ipath_schedule_send(qp); - goto done; + goto unlock; } default: @@ -1901,19 +1937,26 @@ rnr_nak: qp->r_ack_psn = qp->r_psn; goto send_ack; +nack_inv_unlck: + spin_unlock_irqrestore(&qp->s_lock, flags); nack_inv: ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR); qp->r_nak_state = IB_NAK_INVALID_REQUEST; qp->r_ack_psn = qp->r_psn; goto send_ack; +nack_acc_unlck: + spin_unlock_irqrestore(&qp->s_lock, flags); nack_acc: ipath_rc_error(qp, IB_WC_LOC_PROT_ERR); qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; send_ack: send_rc_ack(qp); + goto done; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); done: return; } diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index c716a03dd39..a4b5521567f 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -78,6 +78,7 @@ const u32 ib_ipath_rnr_table[32] = { * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device * @qp: the QP * + * Called with the QP s_lock held and interrupts disabled. * XXX Use a simple list for now. We might need a priority * queue if we have lots of QPs waiting for RNR timeouts * but that should be rare. @@ -85,9 +86,9 @@ const u32 ib_ipath_rnr_table[32] = { void ipath_insert_rnr_queue(struct ipath_qp *qp) { struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - unsigned long flags; - spin_lock_irqsave(&dev->pending_lock, flags); + /* We already did a spin_lock_irqsave(), so just use spin_lock */ + spin_lock(&dev->pending_lock); if (list_empty(&dev->rnrwait)) list_add(&qp->timerwait, &dev->rnrwait); else { @@ -109,7 +110,7 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp) nqp->s_rnr_timeout -= qp->s_rnr_timeout; list_add(&qp->timerwait, l); } - spin_unlock_irqrestore(&dev->pending_lock, flags); + spin_unlock(&dev->pending_lock); } /** @@ -185,6 +186,11 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) } spin_lock_irqsave(&rq->lock, flags); + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { + ret = 0; + goto unlock; + } + wq = rq->wq; tail = wq->tail; /* Validate tail before using it since it is user writable. */ @@ -192,9 +198,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) tail = 0; do { if (unlikely(tail == wq->head)) { - spin_unlock_irqrestore(&rq->lock, flags); ret = 0; - goto bail; + goto unlock; } /* Make sure entry is read after head index is read. */ smp_rmb(); @@ -207,7 +212,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) wq->tail = tail; ret = 1; - qp->r_wrid_valid = 1; + set_bit(IPATH_R_WRID_VALID, &qp->r_aflags); if (handler) { u32 n; @@ -234,8 +239,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) goto bail; } } +unlock: spin_unlock_irqrestore(&rq->lock, flags); - bail: return ret; } @@ -263,35 +268,59 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp) atomic64_t *maddr; enum ib_wc_status send_status; + /* + * Note that we check the responder QP state after + * checking the requester's state. + */ qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); - if (!qp) { - dev->n_pkt_drops++; - return; - } -again: spin_lock_irqsave(&sqp->s_lock, flags); - if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) || - sqp->s_rnr_timeout) { - spin_unlock_irqrestore(&sqp->s_lock, flags); - goto done; - } + /* Return if we are already busy processing a work request. */ + if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || + !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) + goto unlock; - /* Get the next send request. */ - if (sqp->s_last == sqp->s_head) { - /* Send work queue is empty. */ - spin_unlock_irqrestore(&sqp->s_lock, flags); - goto done; + sqp->s_flags |= IPATH_S_BUSY; + +again: + if (sqp->s_last == sqp->s_head) + goto clr_busy; + wqe = get_swqe_ptr(sqp, sqp->s_last); + + /* Return if it is not OK to start a new work reqeust. */ + if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) { + if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND)) + goto clr_busy; + /* We are in the error state, flush the work request. */ + send_status = IB_WC_WR_FLUSH_ERR; + goto flush_send; } /* * We can rely on the entry not changing without the s_lock * being held until we update s_last. + * We increment s_cur to indicate s_last is in progress. */ - wqe = get_swqe_ptr(sqp, sqp->s_last); + if (sqp->s_last == sqp->s_cur) { + if (++sqp->s_cur >= sqp->s_size) + sqp->s_cur = 0; + } spin_unlock_irqrestore(&sqp->s_lock, flags); + if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { + dev->n_pkt_drops++; + /* + * For RC, the requester would timeout and retry so + * shortcut the timeouts and just signal too many retries. + */ + if (sqp->ibqp.qp_type == IB_QPT_RC) + send_status = IB_WC_RETRY_EXC_ERR; + else + send_status = IB_WC_SUCCESS; + goto serr; + } + memset(&wc, 0, sizeof wc); send_status = IB_WC_SUCCESS; @@ -396,8 +425,7 @@ again: sqp->s_len -= len; } - if (wqe->wr.opcode == IB_WR_RDMA_WRITE || - wqe->wr.opcode == IB_WR_RDMA_READ) + if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) goto send_comp; if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) @@ -417,6 +445,8 @@ again: wqe->wr.send_flags & IB_SEND_SOLICITED); send_comp: + spin_lock_irqsave(&sqp->s_lock, flags); +flush_send: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; ipath_send_complete(sqp, wqe, send_status); goto again; @@ -437,11 +467,12 @@ rnr_nak: sqp->s_rnr_retry--; spin_lock_irqsave(&sqp->s_lock, flags); if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK)) - goto unlock; + goto clr_busy; + sqp->s_flags |= IPATH_S_WAITING; dev->n_rnr_naks++; sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer]; ipath_insert_rnr_queue(sqp); - goto unlock; + goto clr_busy; inv_err: send_status = IB_WC_REM_INV_REQ_ERR; @@ -473,17 +504,19 @@ serr: } goto done; } +clr_busy: + sqp->s_flags &= ~IPATH_S_BUSY; unlock: spin_unlock_irqrestore(&sqp->s_lock, flags); done: - if (atomic_dec_and_test(&qp->refcount)) + if (qp && atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); } static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp) { if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) || - qp->ibqp.qp_type == IB_QPT_SMI) { + qp->ibqp.qp_type == IB_QPT_SMI) { unsigned long flags; spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); @@ -501,26 +534,36 @@ static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp) * @dev: the device we ran out of buffers on * * Called when we run out of PIO buffers. + * If we are now in the error state, return zero to flush the + * send work request. */ -static void ipath_no_bufs_available(struct ipath_qp *qp, +static int ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) { unsigned long flags; + int ret = 1; /* * Note that as soon as want_buffer() is called and * possibly before it returns, ipath_ib_piobufavail() - * could be called. If we are still in the tasklet function, - * tasklet_hi_schedule() will not call us until the next time - * tasklet_hi_schedule() is called. - * We leave the busy flag set so that another post send doesn't - * try to put the same QP on the piowait list again. + * could be called. Therefore, put QP on the piowait list before + * enabling the PIO avail interrupt. */ - spin_lock_irqsave(&dev->pending_lock, flags); - list_add_tail(&qp->piowait, &dev->piowait); - spin_unlock_irqrestore(&dev->pending_lock, flags); - want_buffer(dev->dd, qp); - dev->n_piowait++; + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) { + dev->n_piowait++; + qp->s_flags |= IPATH_S_WAITING; + qp->s_flags &= ~IPATH_S_BUSY; + spin_lock(&dev->pending_lock); + if (list_empty(&qp->piowait)) + list_add_tail(&qp->piowait, &dev->piowait); + spin_unlock(&dev->pending_lock); + } else + ret = 0; + spin_unlock_irqrestore(&qp->s_lock, flags); + if (ret) + want_buffer(dev->dd, qp); + return ret; } /** @@ -596,15 +639,13 @@ void ipath_do_send(unsigned long data) struct ipath_qp *qp = (struct ipath_qp *)data; struct ipath_ibdev *dev = to_idev(qp->ibqp.device); int (*make_req)(struct ipath_qp *qp); - - if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy)) - goto bail; + unsigned long flags; if ((qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) && qp->remote_ah_attr.dlid == dev->dd->ipath_lid) { ipath_ruc_loopback(qp); - goto clear; + goto bail; } if (qp->ibqp.qp_type == IB_QPT_RC) @@ -614,6 +655,19 @@ void ipath_do_send(unsigned long data) else make_req = ipath_make_ud_req; + spin_lock_irqsave(&qp->s_lock, flags); + + /* Return if we are already busy processing a work request. */ + if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || + !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) { + spin_unlock_irqrestore(&qp->s_lock, flags); + goto bail; + } + + qp->s_flags |= IPATH_S_BUSY; + + spin_unlock_irqrestore(&qp->s_lock, flags); + again: /* Check for a constructed packet to be sent. */ if (qp->s_hdrwords != 0) { @@ -623,8 +677,8 @@ again: */ if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords, qp->s_cur_sge, qp->s_cur_size)) { - ipath_no_bufs_available(qp, dev); - goto bail; + if (ipath_no_bufs_available(qp, dev)) + goto bail; } dev->n_unicast_xmit++; /* Record that we sent the packet and s_hdr is empty. */ @@ -633,16 +687,20 @@ again: if (make_req(qp)) goto again; -clear: - clear_bit(IPATH_S_BUSY, &qp->s_busy); + bail:; } +/* + * This should be called with s_lock held. + */ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, enum ib_wc_status status) { - unsigned long flags; - u32 last; + u32 old_last, last; + + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) + return; /* See ch. 11.2.4.1 and 10.7.3.1 */ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || @@ -661,10 +719,14 @@ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, status != IB_WC_SUCCESS); } - spin_lock_irqsave(&qp->s_lock, flags); - last = qp->s_last; + old_last = last = qp->s_last; if (++last >= qp->s_size) last = 0; qp->s_last = last; - spin_unlock_irqrestore(&qp->s_lock, flags); + if (qp->s_cur == old_last) + qp->s_cur = last; + if (qp->s_tail == old_last) + qp->s_tail = last; + if (qp->state == IB_QPS_SQD && last == qp->s_cur) + qp->s_draining = 0; } diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index bfe8926b551..7fd18e83390 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -47,14 +47,30 @@ int ipath_make_uc_req(struct ipath_qp *qp) { struct ipath_other_headers *ohdr; struct ipath_swqe *wqe; + unsigned long flags; u32 hwords; u32 bth0; u32 len; u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); int ret = 0; - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) + spin_lock_irqsave(&qp->s_lock, flags); + + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { + if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= IPATH_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); goto done; + } ohdr = &qp->s_hdr.u.oth; if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) @@ -69,9 +85,12 @@ int ipath_make_uc_req(struct ipath_qp *qp) qp->s_wqe = NULL; switch (qp->s_state) { default: + if (!(ib_ipath_state_ops[qp->state] & + IPATH_PROCESS_NEXT_SEND_OK)) + goto bail; /* Check if send work queue is empty. */ if (qp->s_cur == qp->s_head) - goto done; + goto bail; /* * Start a new request. */ @@ -134,7 +153,7 @@ int ipath_make_uc_req(struct ipath_qp *qp) break; default: - goto done; + goto bail; } break; @@ -194,9 +213,14 @@ int ipath_make_uc_req(struct ipath_qp *qp) ipath_make_ruc_header(to_idev(qp->ibqp.device), qp, ohdr, bth0 | (qp->s_state << 24), qp->s_next_psn++ & IPATH_PSN_MASK); +done: ret = 1; + goto unlock; -done: +bail: + qp->s_flags &= ~IPATH_S_BUSY; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -258,8 +282,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, */ opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - wc.imm_data = 0; - wc.wc_flags = 0; + memset(&wc, 0, sizeof wc); /* Compare the PSN verses the expected PSN. */ if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) { @@ -322,8 +345,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(SEND_ONLY): case OP(SEND_ONLY_WITH_IMMEDIATE): send_first: - if (qp->r_reuse_sge) { - qp->r_reuse_sge = 0; + if (qp->r_flags & IPATH_R_REUSE_SGE) { + qp->r_flags &= ~IPATH_R_REUSE_SGE; qp->r_sge = qp->s_rdma_read_sge; } else if (!ipath_get_rwqe(qp, 0)) { dev->n_pkt_drops++; @@ -340,13 +363,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(SEND_MIDDLE): /* Check for invalid length PMTU or posted rwqe len. */ if (unlikely(tlen != (hdrsize + pmtu + 4))) { - qp->r_reuse_sge = 1; + qp->r_flags |= IPATH_R_REUSE_SGE; dev->n_pkt_drops++; goto done; } qp->r_rcv_len += pmtu; if (unlikely(qp->r_rcv_len > qp->r_len)) { - qp->r_reuse_sge = 1; + qp->r_flags |= IPATH_R_REUSE_SGE; dev->n_pkt_drops++; goto done; } @@ -372,7 +395,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, /* Check for invalid length. */ /* XXX LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) { - qp->r_reuse_sge = 1; + qp->r_flags |= IPATH_R_REUSE_SGE; dev->n_pkt_drops++; goto done; } @@ -380,7 +403,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, tlen -= (hdrsize + pad + 4); wc.byte_len = tlen + qp->r_rcv_len; if (unlikely(wc.byte_len > qp->r_len)) { - qp->r_reuse_sge = 1; + qp->r_flags |= IPATH_R_REUSE_SGE; dev->n_pkt_drops++; goto done; } @@ -390,14 +413,10 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; - wc.vendor_err = 0; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; - wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & @@ -488,8 +507,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, dev->n_pkt_drops++; goto done; } - if (qp->r_reuse_sge) - qp->r_reuse_sge = 0; + if (qp->r_flags & IPATH_R_REUSE_SGE) + qp->r_flags &= ~IPATH_R_REUSE_SGE; else if (!ipath_get_rwqe(qp, 1)) { dev->n_pkt_drops++; goto done; diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 8b6a261c89e..77ca8ca74e7 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -65,9 +65,9 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) u32 length; qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn); - if (!qp) { + if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { dev->n_pkt_drops++; - goto send_comp; + goto done; } rsge.sg_list = NULL; @@ -91,14 +91,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) * present on the wire. */ length = swqe->length; + memset(&wc, 0, sizeof wc); wc.byte_len = length + sizeof(struct ib_grh); if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) { wc.wc_flags = IB_WC_WITH_IMM; wc.imm_data = swqe->wr.ex.imm_data; - } else { - wc.wc_flags = 0; - wc.imm_data = 0; } /* @@ -229,7 +227,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) } wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; - wc.vendor_err = 0; wc.qp = &qp->ibqp; wc.src_qp = sqp->ibqp.qp_num; /* XXX do we know which pkey matched? Only needed for GSI. */ @@ -248,8 +245,7 @@ drop: kfree(rsge.sg_list); if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); -send_comp: - ipath_send_complete(sqp, swqe, IB_WC_SUCCESS); +done:; } /** @@ -264,6 +260,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) struct ipath_other_headers *ohdr; struct ib_ah_attr *ah_attr; struct ipath_swqe *wqe; + unsigned long flags; u32 nwords; u32 extra_bytes; u32 bth0; @@ -271,13 +268,30 @@ int ipath_make_ud_req(struct ipath_qp *qp) u16 lid; int ret = 0; - if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))) - goto bail; + spin_lock_irqsave(&qp->s_lock, flags); + + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) { + if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= IPATH_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); + goto done; + } if (qp->s_cur == qp->s_head) goto bail; wqe = get_swqe_ptr(qp, qp->s_cur); + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; /* Construct the header. */ ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; @@ -288,10 +302,23 @@ int ipath_make_ud_req(struct ipath_qp *qp) dev->n_unicast_xmit++; } else { dev->n_unicast_xmit++; - lid = ah_attr->dlid & - ~((1 << dev->dd->ipath_lmc) - 1); + lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1); if (unlikely(lid == dev->dd->ipath_lid)) { + /* + * If DMAs are in progress, we can't generate + * a completion for the loopback packet since + * it would be out of order. + * XXX Instead of waiting, we could queue a + * zero length descriptor so we get a callback. + */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= IPATH_S_WAIT_DMA; + goto bail; + } + spin_unlock_irqrestore(&qp->s_lock, flags); ipath_ud_loopback(qp, wqe); + spin_lock_irqsave(&qp->s_lock, flags); + ipath_send_complete(qp, wqe, IB_WC_SUCCESS); goto done; } } @@ -368,11 +395,13 @@ int ipath_make_ud_req(struct ipath_qp *qp) ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); done: - if (++qp->s_cur >= qp->s_size) - qp->s_cur = 0; ret = 1; + goto unlock; bail: + qp->s_flags &= ~IPATH_S_BUSY; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -506,8 +535,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, /* * Get the next work request entry to find where to put the data. */ - if (qp->r_reuse_sge) - qp->r_reuse_sge = 0; + if (qp->r_flags & IPATH_R_REUSE_SGE) + qp->r_flags &= ~IPATH_R_REUSE_SGE; else if (!ipath_get_rwqe(qp, 0)) { /* * Count VL15 packets dropped due to no receive buffer. @@ -523,7 +552,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, } /* Silently drop packets which are too big. */ if (wc.byte_len > qp->r_len) { - qp->r_reuse_sge = 1; + qp->r_flags |= IPATH_R_REUSE_SGE; dev->n_pkt_drops++; goto bail; } @@ -535,7 +564,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); ipath_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh)); - qp->r_wrid_valid = 0; + if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) + goto bail; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.h b/drivers/infiniband/hw/ipath/ipath_user_sdma.h index e70946c1428..fc76316c4a5 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_sdma.h +++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.h @@ -45,8 +45,6 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd, int ipath_user_sdma_make_progress(struct ipath_devdata *dd, struct ipath_user_sdma_queue *pq); -int ipath_user_sdma_pkt_sent(const struct ipath_user_sdma_queue *pq, - u32 counter); void ipath_user_sdma_queue_drain(struct ipath_devdata *dd, struct ipath_user_sdma_queue *pq); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 22bb42dc8f7..e0ec540042b 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -111,16 +111,24 @@ static unsigned int ib_ipath_disable_sma; module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(disable_sma, "Disable the SMA"); +/* + * Note that it is OK to post send work requests in the SQE and ERR + * states; ipath_do_send() will process them and generate error + * completions as per IB 1.2 C10-96. + */ const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { [IB_QPS_RESET] = 0, [IB_QPS_INIT] = IPATH_POST_RECV_OK, [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | - IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, + IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK | + IPATH_PROCESS_NEXT_SEND_OK, [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | - IPATH_POST_SEND_OK, - [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, - [IB_QPS_ERR] = 0, + IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, + [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | + IPATH_POST_SEND_OK | IPATH_FLUSH_SEND, + [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV | + IPATH_POST_SEND_OK | IPATH_FLUSH_SEND, }; struct ipath_ucontext { @@ -230,18 +238,6 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length) } } -static void ipath_flush_wqe(struct ipath_qp *qp, struct ib_send_wr *wr) -{ - struct ib_wc wc; - - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wr->wr_id; - wc.status = IB_WC_WR_FLUSH_ERR; - wc.opcode = ib_ipath_wc_opcode[wr->opcode]; - wc.qp = &qp->ibqp; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); -} - /* * Count the number of DMA descriptors needed to send length bytes of data. * Don't modify the ipath_sge_state to get the count. @@ -347,14 +343,8 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) spin_lock_irqsave(&qp->s_lock, flags); /* Check that state is OK to post send. */ - if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) { - if (qp->state != IB_QPS_SQE && qp->state != IB_QPS_ERR) - goto bail_inval; - /* C10-96 says generate a flushed completion entry. */ - ipath_flush_wqe(qp, wr); - ret = 0; - goto bail; - } + if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) + goto bail_inval; /* IB spec says that num_sge == 0 is OK. */ if (wr->num_sge > qp->s_max_sge) @@ -677,6 +667,7 @@ bail:; static void ipath_ib_timer(struct ipath_ibdev *dev) { struct ipath_qp *resend = NULL; + struct ipath_qp *rnr = NULL; struct list_head *last; struct ipath_qp *qp; unsigned long flags; @@ -703,7 +694,9 @@ static void ipath_ib_timer(struct ipath_ibdev *dev) if (--qp->s_rnr_timeout == 0) { do { list_del_init(&qp->timerwait); - tasklet_hi_schedule(&qp->s_task); + qp->timer_next = rnr; + rnr = qp; + atomic_inc(&qp->refcount); if (list_empty(last)) break; qp = list_entry(last->next, struct ipath_qp, @@ -743,14 +736,31 @@ static void ipath_ib_timer(struct ipath_ibdev *dev) spin_unlock_irqrestore(&dev->pending_lock, flags); /* XXX What if timer fires again while this is running? */ - for (qp = resend; qp != NULL; qp = qp->timer_next) { + while (resend != NULL) { + qp = resend; + resend = qp->timer_next; + spin_lock_irqsave(&qp->s_lock, flags); - if (qp->s_last != qp->s_tail && qp->state == IB_QPS_RTS) { + if (qp->s_last != qp->s_tail && + ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) { dev->n_timeouts++; ipath_restart_rc(qp, qp->s_last_psn + 1); } spin_unlock_irqrestore(&qp->s_lock, flags); + /* Notify ipath_destroy_qp() if it is waiting. */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } + while (rnr != NULL) { + qp = rnr; + rnr = qp->timer_next; + + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) + ipath_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + /* Notify ipath_destroy_qp() if it is waiting. */ if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); @@ -1010,13 +1020,24 @@ static void sdma_complete(void *cookie, int status) struct ipath_verbs_txreq *tx = cookie; struct ipath_qp *qp = tx->qp; struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + unsigned int flags; + enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? + IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; - /* Generate a completion queue entry if needed */ - if (qp->ibqp.qp_type != IB_QPT_RC && tx->wqe) { - enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? - IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; - + if (atomic_dec_and_test(&qp->s_dma_busy)) { + spin_lock_irqsave(&qp->s_lock, flags); + if (tx->wqe) + ipath_send_complete(qp, tx->wqe, ibs); + if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && + qp->s_last != qp->s_head) || + (qp->s_flags & IPATH_S_WAIT_DMA)) + ipath_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + wake_up(&qp->wait_dma); + } else if (tx->wqe) { + spin_lock_irqsave(&qp->s_lock, flags); ipath_send_complete(qp, tx->wqe, ibs); + spin_unlock_irqrestore(&qp->s_lock, flags); } if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) @@ -1027,6 +1048,21 @@ static void sdma_complete(void *cookie, int status) wake_up(&qp->wait); } +static void decrement_dma_busy(struct ipath_qp *qp) +{ + unsigned int flags; + + if (atomic_dec_and_test(&qp->s_dma_busy)) { + spin_lock_irqsave(&qp->s_lock, flags); + if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && + qp->s_last != qp->s_head) || + (qp->s_flags & IPATH_S_WAIT_DMA)) + ipath_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + wake_up(&qp->wait_dma); + } +} + /* * Compute the number of clock cycles of delay before sending the next packet. * The multipliers reflect the number of clocks for the fastest rate so @@ -1065,9 +1101,12 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, if (tx) { qp->s_tx = NULL; /* resend previously constructed packet */ + atomic_inc(&qp->s_dma_busy); ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx); - if (ret) + if (ret) { qp->s_tx = tx; + decrement_dma_busy(qp); + } goto bail; } @@ -1118,12 +1157,14 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, tx->txreq.sg_count = ndesc; tx->map_len = (hdrwords + 2) << 2; tx->txreq.map_addr = &tx->hdr; + atomic_inc(&qp->s_dma_busy); ret = ipath_sdma_verbs_send(dd, ss, dwords, tx); if (ret) { /* save ss and length in dwords */ tx->ss = ss; tx->len = dwords; qp->s_tx = tx; + decrement_dma_busy(qp); } goto bail; } @@ -1144,6 +1185,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, memcpy(piobuf, hdr, hdrwords << 2); ipath_copy_from_sge(piobuf + hdrwords, ss, len); + atomic_inc(&qp->s_dma_busy); ret = ipath_sdma_verbs_send(dd, NULL, 0, tx); /* * If we couldn't queue the DMA request, save the info @@ -1154,6 +1196,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, tx->ss = NULL; tx->len = 0; qp->s_tx = tx; + decrement_dma_busy(qp); } dev->n_unaligned++; goto bail; @@ -1177,6 +1220,7 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp, unsigned flush_wc; u32 control; int ret; + unsigned int flags; piobuf = ipath_getpiobuf(dd, plen, NULL); if (unlikely(piobuf == NULL)) { @@ -1247,8 +1291,11 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp, } copy_io(piobuf, ss, len, flush_wc); done: - if (qp->s_wqe) + if (qp->s_wqe) { + spin_lock_irqsave(&qp->s_lock, flags); ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); + spin_unlock_irqrestore(&qp->s_lock, flags); + } ret = 0; bail: return ret; @@ -1281,19 +1328,12 @@ int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr, * can defer SDMA restart until link goes ACTIVE without * worrying about just how we got there. */ - if (qp->ibqp.qp_type == IB_QPT_SMI) + if (qp->ibqp.qp_type == IB_QPT_SMI || + !(dd->ipath_flags & IPATH_HAS_SEND_DMA)) ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, plen, dwords); - /* All non-VL15 packets are dropped if link is not ACTIVE */ - else if (!(dd->ipath_flags & IPATH_LINKACTIVE)) { - if (qp->s_wqe) - ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); - ret = 0; - } else if (dd->ipath_flags & IPATH_HAS_SEND_DMA) - ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len, - plen, dwords); else - ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, + ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len, plen, dwords); return ret; @@ -1401,27 +1441,46 @@ bail: * This is called from ipath_intr() at interrupt level when a PIO buffer is * available after ipath_verbs_send() returned an error that no buffers were * available. Return 1 if we consumed all the PIO buffers and we still have - * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and + * QPs waiting for buffers (for now, just restart the send tasklet and * return zero). */ int ipath_ib_piobufavail(struct ipath_ibdev *dev) { + struct list_head *list; + struct ipath_qp *qplist; struct ipath_qp *qp; unsigned long flags; if (dev == NULL) goto bail; + list = &dev->piowait; + qplist = NULL; + spin_lock_irqsave(&dev->pending_lock, flags); - while (!list_empty(&dev->piowait)) { - qp = list_entry(dev->piowait.next, struct ipath_qp, - piowait); + while (!list_empty(list)) { + qp = list_entry(list->next, struct ipath_qp, piowait); list_del_init(&qp->piowait); - clear_bit(IPATH_S_BUSY, &qp->s_busy); - tasklet_hi_schedule(&qp->s_task); + qp->pio_next = qplist; + qplist = qp; + atomic_inc(&qp->refcount); } spin_unlock_irqrestore(&dev->pending_lock, flags); + while (qplist != NULL) { + qp = qplist; + qplist = qp->pio_next; + + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) + ipath_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + + /* Notify ipath_destroy_qp() if it is waiting. */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } + bail: return 0; } @@ -2143,11 +2202,12 @@ bail: void ipath_unregister_ib_device(struct ipath_ibdev *dev) { struct ib_device *ibdev = &dev->ibdev; - - disable_timer(dev->dd); + u32 qps_inuse; ib_unregister_device(ibdev); + disable_timer(dev->dd); + if (!list_empty(&dev->pending[0]) || !list_empty(&dev->pending[1]) || !list_empty(&dev->pending[2])) @@ -2162,7 +2222,10 @@ void ipath_unregister_ib_device(struct ipath_ibdev *dev) * Note that ipath_unregister_ib_device() can be called before all * the QPs are destroyed! */ - ipath_free_all_qps(&dev->qp_table); + qps_inuse = ipath_free_all_qps(&dev->qp_table); + if (qps_inuse) + ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n", + qps_inuse); kfree(dev->qp_table.table); kfree(dev->lk_table.table); kfree(dev->txreq_bufs); @@ -2213,17 +2276,14 @@ static ssize_t show_stats(struct device *device, struct device_attribute *attr, "RC OTH NAKs %d\n" "RC timeouts %d\n" "RC RDMA dup %d\n" - "RC stalls %d\n" "piobuf wait %d\n" - "no piobuf %d\n" "unaligned %d\n" "PKT drops %d\n" "WQE errs %d\n", dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, dev->n_other_naks, dev->n_timeouts, - dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait, - dev->n_no_piobuf, dev->n_unaligned, + dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned, dev->n_pkt_drops, dev->n_wqe_errs); for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { const struct ipath_opcode_stats *si = &dev->opstats[i]; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 4c7c2aa8e19..deee02ca7ca 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -74,6 +74,11 @@ #define IPATH_POST_RECV_OK 0x02 #define IPATH_PROCESS_RECV_OK 0x04 #define IPATH_PROCESS_SEND_OK 0x08 +#define IPATH_PROCESS_NEXT_SEND_OK 0x10 +#define IPATH_FLUSH_SEND 0x20 +#define IPATH_FLUSH_RECV 0x40 +#define IPATH_PROCESS_OR_FLUSH_SEND \ + (IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND) /* IB Performance Manager status values */ #define IB_PMA_SAMPLE_STATUS_DONE 0x00 @@ -353,12 +358,14 @@ struct ipath_qp { struct ib_qp ibqp; struct ipath_qp *next; /* link list for QPN hash table */ struct ipath_qp *timer_next; /* link list for ipath_ib_timer() */ + struct ipath_qp *pio_next; /* link for ipath_ib_piobufavail() */ struct list_head piowait; /* link for wait PIO buf */ struct list_head timerwait; /* link for waiting for timeouts */ struct ib_ah_attr remote_ah_attr; struct ipath_ib_header s_hdr; /* next packet header to send */ atomic_t refcount; wait_queue_head_t wait; + wait_queue_head_t wait_dma; struct tasklet_struct s_task; struct ipath_mmap_info *ip; struct ipath_sge_state *s_cur_sge; @@ -369,7 +376,7 @@ struct ipath_qp { struct ipath_sge_state s_rdma_read_sge; struct ipath_sge_state r_sge; /* current receive data */ spinlock_t s_lock; - unsigned long s_busy; + atomic_t s_dma_busy; u16 s_pkt_delay; u16 s_hdrwords; /* size of s_hdr in 32 bit words */ u32 s_cur_size; /* size of send packet in bytes */ @@ -383,6 +390,7 @@ struct ipath_qp { u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ u64 r_wr_id; /* ID for current receive WQE */ + unsigned long r_aflags; u32 r_len; /* total length of r_sge */ u32 r_rcv_len; /* receive data len processed */ u32 r_psn; /* expected rcv packet sequence number */ @@ -394,8 +402,7 @@ struct ipath_qp { u8 r_state; /* opcode of last packet received */ u8 r_nak_state; /* non-zero if NAK is pending */ u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ - u8 r_reuse_sge; /* for UC receive errors */ - u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */ + u8 r_flags; u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ u8 r_head_ack_queue; /* index into s_ack_queue[] */ u8 qp_access_flags; @@ -404,13 +411,13 @@ struct ipath_qp { u8 s_rnr_retry_cnt; u8 s_retry; /* requester retry counter */ u8 s_rnr_retry; /* requester RNR retry counter */ - u8 s_wait_credit; /* limit number of unacked packets sent */ u8 s_pkey_index; /* PKEY index to use */ u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ u8 s_tail_ack_queue; /* index into s_ack_queue[] */ u8 s_flags; u8 s_dmult; + u8 s_draining; u8 timeout; /* Timeout for this QP */ enum ib_mtu path_mtu; u32 remote_qpn; @@ -428,16 +435,39 @@ struct ipath_qp { struct ipath_sge r_sg_list[0]; /* verified SGEs */ }; -/* Bit definition for s_busy. */ -#define IPATH_S_BUSY 0 +/* + * Atomic bit definitions for r_aflags. + */ +#define IPATH_R_WRID_VALID 0 + +/* + * Bit definitions for r_flags. + */ +#define IPATH_R_REUSE_SGE 0x01 /* * Bit definitions for s_flags. + * + * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs + * before processing the next SWQE + * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs + * before processing the next SWQE + * IPATH_S_WAITING - waiting for RNR timeout or send buffer available. + * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE + * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating + * next send completion entry not via send DMA. */ #define IPATH_S_SIGNAL_REQ_WR 0x01 #define IPATH_S_FENCE_PENDING 0x02 #define IPATH_S_RDMAR_PENDING 0x04 #define IPATH_S_ACK_PENDING 0x08 +#define IPATH_S_BUSY 0x10 +#define IPATH_S_WAITING 0x20 +#define IPATH_S_WAIT_SSN_CREDIT 0x40 +#define IPATH_S_WAIT_DMA 0x80 + +#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \ + IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA) #define IPATH_PSN_CREDIT 512 @@ -573,13 +603,11 @@ struct ipath_ibdev { u32 n_rnr_naks; u32 n_other_naks; u32 n_timeouts; - u32 n_rc_stalls; u32 n_pkt_drops; u32 n_vl15_dropped; u32 n_wqe_errs; u32 n_rdma_dup_busy; u32 n_piowait; - u32 n_no_piobuf; u32 n_unaligned; u32 port_cap_flags; u32 pma_sample_start; @@ -657,6 +685,17 @@ static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev) return container_of(ibdev, struct ipath_ibdev, ibdev); } +/* + * This must be called with s_lock held. + */ +static inline void ipath_schedule_send(struct ipath_qp *qp) +{ + if (qp->s_flags & IPATH_S_ANY_WAIT) + qp->s_flags &= ~IPATH_S_ANY_WAIT; + if (!(qp->s_flags & IPATH_S_BUSY)) + tasklet_hi_schedule(&qp->s_task); +} + int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, @@ -706,7 +745,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr); -void ipath_free_all_qps(struct ipath_qp_table *qpt); +unsigned ipath_free_all_qps(struct ipath_qp_table *qpt); int ipath_init_qp_table(struct ipath_ibdev *idev, int size); -- cgit v1.2.3