From 35ff032e65ab5cc03bbba46cefece7376c7c562f Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 27 Apr 2007 11:11:11 -0700 Subject: IB/ipath: Don't call spin_lock_irq() from interrupt context This patch fixes the problem reported by Bernd Schubert with kernel debug options enabled: BUG: at kernel/lockdep.c:1860 trace_hardirqs_on() This was caused by using spin_lock_irq()/spin_unlock_irq() from interrupt context. Fix all the places that might be called from interrupts to use spin_lock_irqsave()/spin_unlock_irqrestore(). Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_rc.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index b4b88d0b53f..e3e53327635 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -587,6 +587,7 @@ static void send_rc_ack(struct ipath_qp *qp) u32 hwords; struct ipath_ib_header hdr; struct ipath_other_headers *ohdr; + unsigned long flags; /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ if (qp->r_head_ack_queue != qp->s_tail_ack_queue) @@ -640,11 +641,11 @@ static void send_rc_ack(struct ipath_qp *qp) dev->n_rc_qacks++; queue_ack: - spin_lock_irq(&qp->s_lock); + spin_lock_irqsave(&qp->s_lock, flags); qp->s_flags |= IPATH_S_ACK_PENDING; qp->s_nak_state = qp->r_nak_state; qp->s_ack_psn = qp->r_ack_psn; - spin_unlock_irq(&qp->s_lock); + spin_unlock_irqrestore(&qp->s_lock, flags); /* Call ipath_do_rc_send() in another thread. */ tasklet_hi_schedule(&qp->s_task); @@ -1294,6 +1295,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, struct ipath_ack_entry *e; u8 i, prev; int old_req; + unsigned long flags; if (diff > 0) { /* @@ -1327,7 +1329,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, psn &= IPATH_PSN_MASK; e = NULL; old_req = 1; - spin_lock_irq(&qp->s_lock); + spin_lock_irqsave(&qp->s_lock, flags); for (i = qp->r_head_ack_queue; ; i = prev) { if (i == qp->s_tail_ack_queue) old_req = 0; @@ -1425,7 +1427,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, * after all the previous RDMA reads and atomics. */ if (i == qp->r_head_ack_queue) { - spin_unlock_irq(&qp->s_lock); + spin_unlock_irqrestore(&qp->s_lock, flags); qp->r_nak_state = 0; qp->r_ack_psn = qp->r_psn - 1; goto send_ack; @@ -1443,7 +1445,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, tasklet_hi_schedule(&qp->s_task); unlock_done: - spin_unlock_irq(&qp->s_lock); + spin_unlock_irqrestore(&qp->s_lock, flags); done: return 1; @@ -1453,10 +1455,12 @@ send_ack: static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) { - spin_lock_irq(&qp->s_lock); + unsigned long flags; + + spin_lock_irqsave(&qp->s_lock, flags); qp->state = IB_QPS_ERR; ipath_error_qp(qp, err); - spin_unlock_irq(&qp->s_lock); + spin_unlock_irqrestore(&qp->s_lock, flags); } /** -- cgit v1.2.3 From c3af664adbe06803931dbc7a3c8588982d72fac1 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 27 Apr 2007 11:08:40 -0700 Subject: IB/ipath: Don't put QP in timeout queue if waiting to send This fixes a problem which causes too many RC timeouts and retransmits. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_rc.c | 7 +------ drivers/infiniband/hw/ipath/ipath_verbs.h | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index e3e53327635..9e68c91130d 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -228,18 +228,13 @@ int ipath_make_rc_req(struct ipath_qp *qp, goto done; if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || - qp->s_rnr_timeout) + qp->s_rnr_timeout || qp->s_wait_credit) goto bail; /* Limit the number of packets sent without an ACK. */ if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { qp->s_wait_credit = 1; dev->n_rc_stalls++; - spin_lock(&dev->pending_lock); - if (list_empty(&qp->timerwait)) - list_add_tail(&qp->timerwait, - &dev->pending[dev->pending_index]); - spin_unlock(&dev->pending_lock); goto bail; } diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 7c4929f1cb5..c62f9c5854f 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -422,7 +422,7 @@ struct ipath_qp { #define IPATH_S_RDMAR_PENDING 0x04 #define IPATH_S_ACK_PENDING 0x08 -#define IPATH_PSN_CREDIT 2048 +#define IPATH_PSN_CREDIT 512 /* * Since struct ipath_swqe is not a fixed size, we can't simply index into -- cgit v1.2.3 From 9ba6d5529dd919b442eedf5bef1dd28aca2ee9fe Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 12 Apr 2007 18:10:25 +0300 Subject: IB/mthca: Work around kernel QP starvation With mthca, RC QPs can starve each other and even UD QPs on the same hardware schedule queue. As a result, userspace MPI can starve e.g. IPoIB traffic, with netdev watchdog warnings getting printed out, and TCP connections getting stuck or failing. Reduce the chance of this happening by using three separate hardware schedule queues: one for userspace RC QPs, one for kernel RC QPs, and one for all other QPs. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_qp.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 8fe6fee7a97..fee60c852d1 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -701,6 +701,19 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH); } + if (ibqp->qp_type == IB_QPT_RC && + cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { + u8 sched_queue = ibqp->uobject ? 0x2 : 0x1; + + if (mthca_is_memfree(dev)) + qp_context->rlkey_arbel_sched_queue |= sched_queue; + else + qp_context->tavor_sched_queue |= cpu_to_be32(sched_queue); + + qp_param->opt_param_mask |= + cpu_to_be32(MTHCA_QP_OPTPAR_SCHED_QUEUE); + } + if (attr_mask & IB_QP_TIMEOUT) { qp_context->pri_path.ackto = attr->timeout << 3; qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT); -- cgit v1.2.3 From 6b66b2da1e821181a001c00b04a807724ad803cd Mon Sep 17 00:00:00 2001 From: Robert Walsh Date: Fri, 27 Apr 2007 21:07:23 -0700 Subject: IB/ipath: Don't corrupt pending mmap list when unmapped objects are freed Fix the pending mmap code so it doesn't corrupt the list of pending mmaps and crash the machine when pending mmaps are destroyed without first being mapped. Also, remove an unused variable, and use standard kernel lists instead of our own homebrewed linked list implementation to keep the pending mmap list. Signed-off-by: Robert Walsh Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_cq.c | 51 +++++++++++------------- drivers/infiniband/hw/ipath/ipath_mmap.c | 64 ++++++++++++++++++++++++++++--- drivers/infiniband/hw/ipath/ipath_qp.c | 52 ++++++++++++++----------- drivers/infiniband/hw/ipath/ipath_srq.c | 55 +++++++++++++------------- drivers/infiniband/hw/ipath/ipath_verbs.c | 3 ++ drivers/infiniband/hw/ipath/ipath_verbs.h | 18 +++++++-- 6 files changed, 153 insertions(+), 90 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index ea78e6dddc9..4715f89528c 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -243,33 +243,21 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, * See ipath_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { - struct ipath_mmap_info *ip; - __u64 offset = (__u64) wc; int err; + u32 s = sizeof *wc + sizeof(struct ib_wc) * entries; - err = ib_copy_to_udata(udata, &offset, sizeof(offset)); - if (err) { - ret = ERR_PTR(err); + cq->ip = ipath_create_mmap_info(dev, s, context, wc); + if (!cq->ip) { + ret = ERR_PTR(-ENOMEM); goto bail_wc; } - /* Allocate info for ipath_mmap(). */ - ip = kmalloc(sizeof(*ip), GFP_KERNEL); - if (!ip) { - ret = ERR_PTR(-ENOMEM); - goto bail_wc; + err = ib_copy_to_udata(udata, &cq->ip->offset, + sizeof(cq->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; } - cq->ip = ip; - ip->context = context; - ip->obj = wc; - kref_init(&ip->ref); - ip->mmap_cnt = 0; - ip->size = PAGE_ALIGN(sizeof(*wc) + - sizeof(struct ib_wc) * entries); - spin_lock_irq(&dev->pending_lock); - ip->next = dev->pending_mmaps; - dev->pending_mmaps = ip; - spin_unlock_irq(&dev->pending_lock); } else cq->ip = NULL; @@ -277,12 +265,18 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, if (dev->n_cqs_allocated == ib_ipath_max_cqs) { spin_unlock(&dev->n_cqs_lock); ret = ERR_PTR(-ENOMEM); - goto bail_wc; + goto bail_ip; } dev->n_cqs_allocated++; spin_unlock(&dev->n_cqs_lock); + if (cq->ip) { + spin_lock_irq(&dev->pending_lock); + list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + /* * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. * The number of entries should be >= the number requested or return @@ -301,12 +295,12 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, goto done; +bail_ip: + kfree(cq->ip); bail_wc: vfree(wc); - bail_cq: kfree(cq); - done: return ret; } @@ -443,13 +437,12 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) if (cq->ip) { struct ipath_ibdev *dev = to_idev(ibcq->device); struct ipath_mmap_info *ip = cq->ip; + u32 s = sizeof *wc + sizeof(struct ib_wc) * cqe; - ip->obj = wc; - ip->size = PAGE_ALIGN(sizeof(*wc) + - sizeof(struct ib_wc) * cqe); + ipath_update_mmap_info(dev, ip, s, wc); spin_lock_irq(&dev->pending_lock); - ip->next = dev->pending_mmaps; - dev->pending_mmaps = ip; + if (list_empty(&ip->pending_mmaps)) + list_add(&ip->pending_mmaps, &dev->pending_mmaps); spin_unlock_irq(&dev->pending_lock); } diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c index a82157db468..937bc3396b5 100644 --- a/drivers/infiniband/hw/ipath/ipath_mmap.c +++ b/drivers/infiniband/hw/ipath/ipath_mmap.c @@ -46,6 +46,11 @@ void ipath_release_mmap_info(struct kref *ref) { struct ipath_mmap_info *ip = container_of(ref, struct ipath_mmap_info, ref); + struct ipath_ibdev *dev = to_idev(ip->context->device); + + spin_lock_irq(&dev->pending_lock); + list_del(&ip->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); vfree(ip->obj); kfree(ip); @@ -60,14 +65,12 @@ static void ipath_vma_open(struct vm_area_struct *vma) struct ipath_mmap_info *ip = vma->vm_private_data; kref_get(&ip->ref); - ip->mmap_cnt++; } static void ipath_vma_close(struct vm_area_struct *vma) { struct ipath_mmap_info *ip = vma->vm_private_data; - ip->mmap_cnt--; kref_put(&ip->ref, ipath_release_mmap_info); } @@ -87,7 +90,7 @@ int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) struct ipath_ibdev *dev = to_idev(context->device); unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; unsigned long size = vma->vm_end - vma->vm_start; - struct ipath_mmap_info *ip, **pp; + struct ipath_mmap_info *ip, *pp; int ret = -EINVAL; /* @@ -96,15 +99,16 @@ int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) * CQ, QP, or SRQ is soon followed by a call to mmap(). */ spin_lock_irq(&dev->pending_lock); - for (pp = &dev->pending_mmaps; (ip = *pp); pp = &ip->next) { + list_for_each_entry_safe(ip, pp, &dev->pending_mmaps, + pending_mmaps) { /* Only the creator is allowed to mmap the object */ - if (context != ip->context || (void *) offset != ip->obj) + if (context != ip->context || (__u64) offset != ip->offset) continue; /* Don't allow a mmap larger than the object. */ if (size > ip->size) break; - *pp = ip->next; + list_del_init(&ip->pending_mmaps); spin_unlock_irq(&dev->pending_lock); ret = remap_vmalloc_range(vma, ip->obj, 0); @@ -119,3 +123,51 @@ int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) done: return ret; } + +/* + * Allocate information for ipath_mmap + */ +struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev, + u32 size, + struct ib_ucontext *context, + void *obj) { + struct ipath_mmap_info *ip; + + ip = kmalloc(sizeof *ip, GFP_KERNEL); + if (!ip) + goto bail; + + size = PAGE_ALIGN(size); + + spin_lock_irq(&dev->mmap_offset_lock); + if (dev->mmap_offset == 0) + dev->mmap_offset = PAGE_SIZE; + ip->offset = dev->mmap_offset; + dev->mmap_offset += size; + spin_unlock_irq(&dev->mmap_offset_lock); + + INIT_LIST_HEAD(&ip->pending_mmaps); + ip->size = size; + ip->context = context; + ip->obj = obj; + kref_init(&ip->ref); + +bail: + return ip; +} + +void ipath_update_mmap_info(struct ipath_ibdev *dev, + struct ipath_mmap_info *ip, + u32 size, void *obj) { + size = PAGE_ALIGN(size); + + spin_lock_irq(&dev->mmap_offset_lock); + if (dev->mmap_offset == 0) + dev->mmap_offset = PAGE_SIZE; + ip->offset = dev->mmap_offset; + dev->mmap_offset += size; + spin_unlock_irq(&dev->mmap_offset_lock); + + ip->size = size; + ip->obj = obj; +} diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 16db9ac0b40..bfef08ecd34 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -844,34 +844,36 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, * See ipath_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { - struct ipath_mmap_info *ip; - __u64 offset = (__u64) qp->r_rq.wq; int err; - err = ib_copy_to_udata(udata, &offset, sizeof(offset)); - if (err) { - ret = ERR_PTR(err); - goto bail_rwq; - } + if (!qp->r_rq.wq) { + __u64 offset = 0; - if (qp->r_rq.wq) { - /* Allocate info for ipath_mmap(). */ - ip = kmalloc(sizeof(*ip), GFP_KERNEL); - if (!ip) { + err = ib_copy_to_udata(udata, &offset, + sizeof(offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_rwq; + } + } else { + u32 s = sizeof(struct ipath_rwq) + + qp->r_rq.size * sz; + + qp->ip = + ipath_create_mmap_info(dev, s, + ibpd->uobject->context, + qp->r_rq.wq); + if (!qp->ip) { ret = ERR_PTR(-ENOMEM); goto bail_rwq; } - qp->ip = ip; - ip->context = ibpd->uobject->context; - ip->obj = qp->r_rq.wq; - kref_init(&ip->ref); - ip->mmap_cnt = 0; - ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) + - qp->r_rq.size * sz); - spin_lock_irq(&dev->pending_lock); - ip->next = dev->pending_mmaps; - dev->pending_mmaps = ip; - spin_unlock_irq(&dev->pending_lock); + + err = ib_copy_to_udata(udata, &(qp->ip->offset), + sizeof(qp->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } } } @@ -885,6 +887,12 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, dev->n_qps_allocated++; spin_unlock(&dev->n_qps_lock); + if (qp->ip) { + spin_lock_irq(&dev->pending_lock); + list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + ret = &qp->ibqp; goto bail; diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c index 94033503400..03acae66ba8 100644 --- a/drivers/infiniband/hw/ipath/ipath_srq.c +++ b/drivers/infiniband/hw/ipath/ipath_srq.c @@ -139,33 +139,24 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, * See ipath_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { - struct ipath_mmap_info *ip; - __u64 offset = (__u64) srq->rq.wq; int err; + u32 s = sizeof(struct ipath_rwq) + srq->rq.size * sz; - err = ib_copy_to_udata(udata, &offset, sizeof(offset)); - if (err) { - ret = ERR_PTR(err); + srq->ip = + ipath_create_mmap_info(dev, s, + ibpd->uobject->context, + srq->rq.wq); + if (!srq->ip) { + ret = ERR_PTR(-ENOMEM); goto bail_wq; } - /* Allocate info for ipath_mmap(). */ - ip = kmalloc(sizeof(*ip), GFP_KERNEL); - if (!ip) { - ret = ERR_PTR(-ENOMEM); - goto bail_wq; + err = ib_copy_to_udata(udata, &srq->ip->offset, + sizeof(srq->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; } - srq->ip = ip; - ip->context = ibpd->uobject->context; - ip->obj = srq->rq.wq; - kref_init(&ip->ref); - ip->mmap_cnt = 0; - ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) + - srq->rq.size * sz); - spin_lock_irq(&dev->pending_lock); - ip->next = dev->pending_mmaps; - dev->pending_mmaps = ip; - spin_unlock_irq(&dev->pending_lock); } else srq->ip = NULL; @@ -181,21 +172,27 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, if (dev->n_srqs_allocated == ib_ipath_max_srqs) { spin_unlock(&dev->n_srqs_lock); ret = ERR_PTR(-ENOMEM); - goto bail_wq; + goto bail_ip; } dev->n_srqs_allocated++; spin_unlock(&dev->n_srqs_lock); + if (srq->ip) { + spin_lock_irq(&dev->pending_lock); + list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + ret = &srq->ibsrq; goto done; +bail_ip: + kfree(srq->ip); bail_wq: vfree(srq->rq.wq); - bail_srq: kfree(srq); - done: return ret; } @@ -312,13 +309,13 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, if (srq->ip) { struct ipath_mmap_info *ip = srq->ip; struct ipath_ibdev *dev = to_idev(srq->ibsrq.device); + u32 s = sizeof(struct ipath_rwq) + size * sz; - ip->obj = wq; - ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) + - size * sz); + ipath_update_mmap_info(dev, ip, s, wq); spin_lock_irq(&dev->pending_lock); - ip->next = dev->pending_mmaps; - dev->pending_mmaps = ip; + if (list_empty(&ip->pending_mmaps)) + list_add(&ip->pending_mmaps, + &dev->pending_mmaps); spin_unlock_irq(&dev->pending_lock); } } else if (attr_mask & IB_SRQ_LIMIT) { diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 18c6df2052c..b676ea81fc4 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -1476,7 +1476,10 @@ int ipath_register_ib_device(struct ipath_devdata *dd) ret = -ENOMEM; goto err_lk; } + INIT_LIST_HEAD(&idev->pending_mmaps); spin_lock_init(&idev->pending_lock); + idev->mmap_offset = PAGE_SIZE; + spin_lock_init(&idev->mmap_offset_lock); INIT_LIST_HEAD(&idev->pending[0]); INIT_LIST_HEAD(&idev->pending[1]); INIT_LIST_HEAD(&idev->pending[2]); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index c62f9c5854f..ac66c00a297 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -173,12 +173,12 @@ struct ipath_ah { * this as its vm_private_data. */ struct ipath_mmap_info { - struct ipath_mmap_info *next; + struct list_head pending_mmaps; struct ib_ucontext *context; void *obj; + __u64 offset; struct kref ref; unsigned size; - unsigned mmap_cnt; }; /* @@ -485,9 +485,10 @@ struct ipath_opcode_stats { struct ipath_ibdev { struct ib_device ibdev; - struct list_head dev_list; struct ipath_devdata *dd; - struct ipath_mmap_info *pending_mmaps; + struct list_head pending_mmaps; + spinlock_t mmap_offset_lock; + u32 mmap_offset; int ib_unit; /* This is the device number */ u16 sm_lid; /* in host order */ u8 sm_sl; @@ -768,6 +769,15 @@ int ipath_dealloc_fmr(struct ib_fmr *ibfmr); void ipath_release_mmap_info(struct kref *ref); +struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev, + u32 size, + struct ib_ucontext *context, + void *obj); + +void ipath_update_mmap_info(struct ipath_ibdev *dev, + struct ipath_mmap_info *ip, + u32 size, void *obj); + int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev); -- cgit v1.2.3 From 4a97d47ef7946cf31b76945c3199b0b5cad6a8ed Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 26 Apr 2007 15:21:02 -0500 Subject: RDMA/cxgb3: Fix TERM codes Fix TERMINATE layer, type, and ecode values based on conformance testing. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_qp.c | 69 +++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 31 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 0a472c9b44d..714dddbc9a9 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -471,43 +471,62 @@ int iwch_bind_mw(struct ib_qp *qp, return err; } -static void build_term_codes(int t3err, u8 *layer_type, u8 *ecode, int tagged) +static inline void build_term_codes(struct respQ_msg_t *rsp_msg, + u8 *layer_type, u8 *ecode) { - switch (t3err) { + int status = TPT_ERR_INTERNAL_ERR; + int tagged = 0; + int opcode = -1; + int rqtype = 0; + int send_inv = 0; + + if (rsp_msg) { + status = CQE_STATUS(rsp_msg->cqe); + opcode = CQE_OPCODE(rsp_msg->cqe); + rqtype = RQ_TYPE(rsp_msg->cqe); + send_inv = (opcode == T3_SEND_WITH_INV) || + (opcode == T3_SEND_WITH_SE_INV); + tagged = (opcode == T3_RDMA_WRITE) || + (rqtype && (opcode == T3_READ_RESP)); + } + + switch (status) { case TPT_ERR_STAG: - if (tagged == 1) { - *layer_type = LAYER_DDP|DDP_TAGGED_ERR; - *ecode = DDPT_INV_STAG; - } else if (tagged == 2) { + if (send_inv) { + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_CANT_INV_STAG; + } else { *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; *ecode = RDMAP_INV_STAG; } break; case TPT_ERR_PDID: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + if ((opcode == T3_SEND_WITH_INV) || + (opcode == T3_SEND_WITH_SE_INV)) + *ecode = RDMAP_CANT_INV_STAG; + else + *ecode = RDMAP_STAG_NOT_ASSOC; + break; case TPT_ERR_QPID: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_STAG_NOT_ASSOC; + break; case TPT_ERR_ACCESS: - if (tagged == 1) { - *layer_type = LAYER_DDP|DDP_TAGGED_ERR; - *ecode = DDPT_STAG_NOT_ASSOC; - } else if (tagged == 2) { - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_STAG_NOT_ASSOC; - } + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_ACC_VIOL; break; case TPT_ERR_WRAP: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; *ecode = RDMAP_TO_WRAP; break; case TPT_ERR_BOUND: - if (tagged == 1) { + if (tagged) { *layer_type = LAYER_DDP|DDP_TAGGED_ERR; *ecode = DDPT_BASE_BOUNDS; - } else if (tagged == 2) { + } else { *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; *ecode = RDMAP_BASE_BOUNDS; - } else { - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_MSG_TOOBIG; } break; case TPT_ERR_INVALIDATE_SHARED_MR: @@ -591,8 +610,6 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) { union t3_wr *wqe; struct terminate_message *term; - int status; - int tagged = 0; struct sk_buff *skb; PDBG("%s %d\n", __FUNCTION__, __LINE__); @@ -610,17 +627,7 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) /* immediate data starts here. */ term = (struct terminate_message *)wqe->send.sgl; - if (rsp_msg) { - status = CQE_STATUS(rsp_msg->cqe); - if (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE) - tagged = 1; - if ((CQE_OPCODE(rsp_msg->cqe) == T3_READ_REQ) || - (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP)) - tagged = 2; - } else { - status = TPT_ERR_INTERNAL_ERR; - } - build_term_codes(status, &term->layer_etype, &term->ecode, tagged); + build_term_codes(rsp_msg, &term->layer_etype, &term->ecode); build_fw_riwrh((void *)wqe, T3_WR_SEND, T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 1, qhp->ep->hwtid, 5); -- cgit v1.2.3 From 1860cdf802310e4a988e0b8fca41cc97da36f779 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 26 Apr 2007 15:21:09 -0500 Subject: RDMA/cxgb3: Fail qp creation if the requested max_inline is too large Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/cxio_wr.h | 1 + drivers/infiniband/hw/cxgb3/iwch_provider.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index 90d7b8972cb..ff7290eacef 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -38,6 +38,7 @@ #include "firmware_exports.h" #define T3_MAX_SGE 4 +#define T3_MAX_INLINE 64 #define Q_EMPTY(rptr,wptr) ((rptr)==(wptr)) #define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \ diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index af28a317016..93038c00713 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -780,6 +780,9 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, if (rqsize > T3_MAX_RQ_SIZE) return ERR_PTR(-EINVAL); + if (attrs->cap.max_inline_data > T3_MAX_INLINE) + return ERR_PTR(-EINVAL); + /* * NOTE: The SQ and total WQ sizes don't need to be * a power of two. However, all the code assumes -- cgit v1.2.3 From 60be4b5966e22040f97db9dada72841bf90479d1 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 26 Apr 2007 15:21:15 -0500 Subject: RDMA/cxgb3: Initialize cpu_idx field in cpl_close_listserv_req message Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_cm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 3b4b0acd707..a02be40ee29 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1189,6 +1189,7 @@ static int listen_stop(struct iwch_listen_ep *ep) } req = (struct cpl_close_listserv_req *) skb_put(skb, sizeof(*req)); req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + req->cpu_idx = 0; OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid)); skb->priority = 1; ep->com.tdev->send(ep->com.tdev, skb); -- cgit v1.2.3 From aff9e39d97585486764572ab2f3bf5dfce18c660 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 26 Apr 2007 15:21:20 -0500 Subject: RDMA/cxgb3: Support for new abort logic The HW now posts 2 ABORT_RPL and/or PEER_ABORT_REQ messages. We need to handle them by silenty dropping the 1st but mark that we're ready for the final message. This plugs some close races between the uP and HW. Also update the minimum required firmware version. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_cm.c | 18 ++++++++++++++++++ drivers/infiniband/hw/cxgb3/iwch_cm.h | 6 ++++++ 2 files changed, 24 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index a02be40ee29..b2faff5abce 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1109,6 +1109,15 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) PDBG("%s ep %p\n", __FUNCTION__, ep); + /* + * We get 2 abort replies from the HW. The first one must + * be ignored except for scribbling that we need one more. + */ + if (!(ep->flags & ABORT_REQ_IN_PROGRESS)) { + ep->flags |= ABORT_REQ_IN_PROGRESS; + return CPL_RET_BUF_DONE; + } + close_complete_upcall(ep); state_set(&ep->com, DEAD); release_ep_resources(ep); @@ -1476,6 +1485,15 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) int ret; int state; + /* + * We get 2 peer aborts from the HW. The first one must + * be ignored except for scribbling that we need one more. + */ + if (!(ep->flags & PEER_ABORT_IN_PROGRESS)) { + ep->flags |= PEER_ABORT_IN_PROGRESS; + return CPL_RET_BUF_DONE; + } + if (is_neg_adv_abort(req->status)) { PDBG("%s neg_adv_abort ep %p tid %d\n", __FUNCTION__, ep, ep->hwtid); diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h index 0c6f281bd4a..21a388c313c 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.h +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h @@ -143,6 +143,11 @@ enum iwch_ep_state { DEAD, }; +enum iwch_ep_flags { + PEER_ABORT_IN_PROGRESS = (1 << 0), + ABORT_REQ_IN_PROGRESS = (1 << 1), +}; + struct iwch_ep_common { struct iw_cm_id *cm_id; struct iwch_qp *qp; @@ -181,6 +186,7 @@ struct iwch_ep { u16 plen; u32 ird; u32 ord; + u32 flags; }; static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id) -- cgit v1.2.3 From 6ed89b9574776d4178f1ad754d20e4f1e5a4b6c8 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Thu, 3 May 2007 12:40:51 -0700 Subject: IB/ipath: Fix two more spin lock problems Fix a missing unlock in ipath_rc_rcv_resp() and remove an extra unlock from ipath_rc_rcv_error(). Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_rc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 9e68c91130d..f4d729d5dd1 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -1257,6 +1257,7 @@ ack_err: wc.dlid_path_bits = 0; wc.port_num = 0; ipath_sqerror_qp(qp, &wc); + spin_unlock_irqrestore(&qp->s_lock, flags); bail: return; } @@ -1436,7 +1437,6 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, break; } qp->r_nak_state = 0; - spin_unlock_irq(&qp->s_lock); tasklet_hi_schedule(&qp->s_task); unlock_done: -- cgit v1.2.3 From 154257f3626ea6dd96781fac0896c3f27fe2b0a1 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Thu, 3 May 2007 12:43:03 -0700 Subject: IB/ipath: Fix a race condition when generating ACKs Fix a problem where simple ACKs can be sent ahead of RDMA read responses thus implicitly NAKing the RDMA read. Signed-off-by: Ralph Campbell Signed-off-by: Robert Walsh Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_rc.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index f4d729d5dd1..1915771fd03 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -98,13 +98,21 @@ static int ipath_make_rc_ack(struct ipath_qp *qp, case OP(RDMA_READ_RESPONSE_LAST): case OP(RDMA_READ_RESPONSE_ONLY): case OP(ATOMIC_ACKNOWLEDGE): - qp->s_ack_state = OP(ACKNOWLEDGE); + /* + * We can increment the tail pointer now that the last + * response has been sent instead of only being + * constructed. + */ + if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC) + qp->s_tail_ack_queue = 0; /* FALLTHROUGH */ + case OP(SEND_ONLY): case OP(ACKNOWLEDGE): /* Check for no next entry in the queue. */ if (qp->r_head_ack_queue == qp->s_tail_ack_queue) { if (qp->s_flags & IPATH_S_ACK_PENDING) goto normal; + qp->s_ack_state = OP(ACKNOWLEDGE); goto bail; } @@ -117,12 +125,8 @@ static int ipath_make_rc_ack(struct ipath_qp *qp, if (len > pmtu) { len = pmtu; qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); - } else { + } else qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); - if (++qp->s_tail_ack_queue > - IPATH_MAX_RDMA_ATOMIC) - qp->s_tail_ack_queue = 0; - } ohdr->u.aeth = ipath_compute_aeth(qp); hwords++; qp->s_ack_rdma_psn = e->psn; @@ -139,8 +143,6 @@ static int ipath_make_rc_ack(struct ipath_qp *qp, cpu_to_be32(e->atomic_data); hwords += sizeof(ohdr->u.at) / sizeof(u32); bth2 = e->psn; - if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC) - qp->s_tail_ack_queue = 0; } bth0 = qp->s_ack_state << 24; break; @@ -156,8 +158,6 @@ static int ipath_make_rc_ack(struct ipath_qp *qp, ohdr->u.aeth = ipath_compute_aeth(qp); hwords++; qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); - if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC) - qp->s_tail_ack_queue = 0; } bth0 = qp->s_ack_state << 24; bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; @@ -171,7 +171,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp, * the ACK before setting s_ack_state to ACKNOWLEDGE * (see above). */ - qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); + qp->s_ack_state = OP(SEND_ONLY); qp->s_flags &= ~IPATH_S_ACK_PENDING; qp->s_cur_sge = NULL; if (qp->s_nak_state) @@ -223,7 +223,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, /* Sending responses has higher priority over sending requests. */ if ((qp->r_head_ack_queue != qp->s_tail_ack_queue || (qp->s_flags & IPATH_S_ACK_PENDING) || - qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE) && + qp->s_ack_state != OP(ACKNOWLEDGE)) && ipath_make_rc_ack(qp, ohdr, pmtu, bth0p, bth2p)) goto done; @@ -585,7 +585,9 @@ static void send_rc_ack(struct ipath_qp *qp) unsigned long flags; /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ - if (qp->r_head_ack_queue != qp->s_tail_ack_queue) + if (qp->r_head_ack_queue != qp->s_tail_ack_queue || + (qp->s_flags & IPATH_S_ACK_PENDING) || + qp->s_ack_state != OP(ACKNOWLEDGE)) goto queue_ack; /* Construct the header. */ -- cgit v1.2.3 From f4fd0b224d60044d2da5ca02f8f2b5150c1d8731 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 3 May 2007 13:48:47 +0300 Subject: IB: Add CQ comp_vector support Add a num_comp_vectors member to struct ib_device and extend ib_create_cq() to pass in a comp_vector parameter -- this parallels the userspace libibverbs API. Update all hardware drivers to set num_comp_vectors to 1 and have all ULPs pass 0 for the comp_vector value. Pass the value of num_comp_vectors to userspace rather than hard-coding a value of 1. We want multiple CQ event vector support (via MSI-X or similar for adapters that can generate multiple interrupts), but it's not clear how many vectors we want, or how we want to deal with policy issues such as how to decide which vector to use or how to set up interrupt affinity. This patch is useful for experimenting, since no core changes will be necessary when updating a driver to support multiple vectors, and we know that we want to make at least these changes anyway. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/amso1100/c2_provider.c | 3 ++- drivers/infiniband/hw/cxgb3/iwch_provider.c | 3 ++- drivers/infiniband/hw/ehca/ehca_cq.c | 2 +- drivers/infiniband/hw/ehca/ehca_iverbs.h | 2 +- drivers/infiniband/hw/ehca/ehca_main.c | 3 ++- drivers/infiniband/hw/ipath/ipath_cq.c | 2 +- drivers/infiniband/hw/ipath/ipath_verbs.c | 1 + drivers/infiniband/hw/ipath/ipath_verbs.h | 2 +- drivers/infiniband/hw/mthca/mthca_provider.c | 2 ++ 9 files changed, 13 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index 607c09bf764..109166223c0 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -290,7 +290,7 @@ static int c2_destroy_qp(struct ib_qp *ib_qp) return 0; } -static struct ib_cq *c2_create_cq(struct ib_device *ibdev, int entries, +static struct ib_cq *c2_create_cq(struct ib_device *ibdev, int entries, int vector, struct ib_ucontext *context, struct ib_udata *udata) { @@ -795,6 +795,7 @@ int c2_register_device(struct c2_dev *dev) memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6); dev->ibdev.phys_port_cnt = 1; + dev->ibdev.num_comp_vectors = 1; dev->ibdev.dma_device = &dev->pcidev->dev; dev->ibdev.query_device = c2_query_device; dev->ibdev.query_port = c2_query_port; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 93038c00713..78a495f5332 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -139,7 +139,7 @@ static int iwch_destroy_cq(struct ib_cq *ib_cq) return 0; } -static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, +static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int vector, struct ib_ucontext *ib_context, struct ib_udata *udata) { @@ -1110,6 +1110,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.node_type = RDMA_NODE_RNIC; memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC)); dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports; + dev->ibdev.num_comp_vectors = 1; dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev); dev->ibdev.query_device = iwch_query_device; dev->ibdev.query_port = iwch_query_port; diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index e2cdc1a16fe..67f0670fe3b 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -113,7 +113,7 @@ struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) return ret; } -struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, +struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, struct ib_ucontext *context, struct ib_udata *udata) { diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index 95fd59fb452..aff96ac4fd1 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -123,7 +123,7 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq); void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq); -struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, +struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, struct ib_ucontext *context, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 3b23d677cb8..77bb36bba0d 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -313,6 +313,7 @@ int ehca_init_device(struct ehca_shca *shca) shca->ib_device.node_type = RDMA_NODE_IB_CA; shca->ib_device.phys_port_cnt = shca->num_ports; + shca->ib_device.num_comp_vectors = 1; shca->ib_device.dma_device = &shca->ibmebus_dev->ofdev.dev; shca->ib_device.query_device = ehca_query_device; shca->ib_device.query_port = ehca_query_port; @@ -375,7 +376,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) return -EPERM; } - ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10); + ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10, 0); if (IS_ERR(ibcq)) { ehca_err(&shca->ib_device, "Cannot create AQP1 CQ."); return PTR_ERR(ibcq); diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index 4715f89528c..00d3eb9bc69 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -204,7 +204,7 @@ static void send_complete(unsigned long data) * * Called by ib_create_cq() in the generic verbs code. */ -struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, +struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector, struct ib_ucontext *context, struct ib_udata *udata) { diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index b676ea81fc4..12933e77c7e 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -1561,6 +1561,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd) (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); dev->node_type = RDMA_NODE_IB_CA; dev->phys_port_cnt = 1; + dev->num_comp_vectors = 1; dev->dma_device = &dd->pcidev->dev; dev->query_device = ipath_query_device; dev->modify_device = ipath_modify_device; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index ac66c00a297..2d734fb6eff 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -735,7 +735,7 @@ int ipath_destroy_srq(struct ib_srq *ibsrq); int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); -struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, +struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector, struct ib_ucontext *context, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 47e6fd46d9c..1c05486c3c6 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -663,6 +663,7 @@ static int mthca_destroy_qp(struct ib_qp *qp) } static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, + int comp_vector, struct ib_ucontext *context, struct ib_udata *udata) { @@ -1292,6 +1293,7 @@ int mthca_register_device(struct mthca_dev *dev) (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; + dev->ib_dev.num_comp_vectors = 1; dev->ib_dev.dma_device = &dev->pdev->dev; dev->ib_dev.query_device = mthca_query_device; dev->ib_dev.query_port = mthca_query_port; -- cgit v1.2.3 From ed23a72778f3dbd465e55b06fe31629e7e1dd2f3 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sun, 6 May 2007 21:02:48 -0700 Subject: IB: Return "maybe missed event" hint from ib_req_notify_cq() The semantics defined by the InfiniBand specification say that completion events are only generated when a completions is added to a completion queue (CQ) after completion notification is requested. In other words, this means that the following race is possible: while (CQ is not empty) ib_poll_cq(CQ); // new completion is added after while loop is exited ib_req_notify_cq(CQ); // no event is generated for the existing completion To close this race, the IB spec recommends doing another poll of the CQ after requesting notification. However, it is not always possible to arrange code this way (for example, we have found that NAPI for IPoIB cannot poll after requesting notification). Also, some hardware (eg Mellanox HCAs) actually will generate an event for completions added before the call to ib_req_notify_cq() -- which is allowed by the spec, since there's no way for any upper-layer consumer to know exactly when a completion was really added -- so the extra poll of the CQ is just a waste. Motivated by this, we add a new flag "IB_CQ_REPORT_MISSED_EVENTS" for ib_req_notify_cq() so that it can return a hint about whether the a completion may have been added before the request for notification. The return value of ib_req_notify_cq() is extended so: < 0 means an error occurred while requesting notification == 0 means notification was requested successfully, and if IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events were missed and it is safe to wait for another event. > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed in. It means that the consumer must poll the CQ again to make sure it is empty to avoid the race described above. We add a flag to enable this behavior rather than turning it on unconditionally, because checking for missed events may incur significant overhead for some low-level drivers, and consumers that don't care about the results of this test shouldn't be forced to pay for the test. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/amso1100/c2.h | 2 +- drivers/infiniband/hw/amso1100/c2_cq.c | 16 ++++++++++++---- drivers/infiniband/hw/cxgb3/cxio_hal.c | 3 +++ drivers/infiniband/hw/cxgb3/iwch_provider.c | 8 +++++--- drivers/infiniband/hw/ehca/ehca_iverbs.h | 2 +- drivers/infiniband/hw/ehca/ehca_reqs.c | 14 +++++++++++--- drivers/infiniband/hw/ehca/ipz_pt_fn.h | 8 ++++++++ drivers/infiniband/hw/ipath/ipath_cq.c | 15 +++++++++++---- drivers/infiniband/hw/ipath/ipath_verbs.h | 2 +- drivers/infiniband/hw/mthca/mthca_cq.c | 12 +++++++----- drivers/infiniband/hw/mthca/mthca_dev.h | 4 ++-- 11 files changed, 62 insertions(+), 24 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/amso1100/c2.h b/drivers/infiniband/hw/amso1100/c2.h index 04a9db5de88..fa58200217a 100644 --- a/drivers/infiniband/hw/amso1100/c2.h +++ b/drivers/infiniband/hw/amso1100/c2.h @@ -519,7 +519,7 @@ extern void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq); extern void c2_cq_event(struct c2_dev *c2dev, u32 mq_index); extern void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index); extern int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); -extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify); +extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); /* CM */ extern int c2_llp_connect(struct iw_cm_id *cm_id, diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c index 5175c99ee58..d2b3366786d 100644 --- a/drivers/infiniband/hw/amso1100/c2_cq.c +++ b/drivers/infiniband/hw/amso1100/c2_cq.c @@ -217,17 +217,19 @@ int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) return npolled; } -int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) +int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) { struct c2_mq_shared __iomem *shared; struct c2_cq *cq; + unsigned long flags; + int ret = 0; cq = to_c2cq(ibcq); shared = cq->mq.peer; - if (notify == IB_CQ_NEXT_COMP) + if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP) writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type); - else if (notify == IB_CQ_SOLICITED) + else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type); else return -EINVAL; @@ -241,7 +243,13 @@ int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) */ readb(&shared->armed); - return 0; + if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { + spin_lock_irqsave(&cq->lock, flags); + ret = !c2_mq_empty(&cq->mq); + spin_unlock_irqrestore(&cq->lock, flags); + } + + return ret; } static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq) diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index f5e9aeec6f6..76049afc765 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -114,7 +114,10 @@ int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq, return -EIO; } } + + return 1; } + return 0; } diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 78a495f5332..a891493fd34 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -292,7 +292,7 @@ static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) #endif } -static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) +static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct iwch_dev *rhp; struct iwch_cq *chp; @@ -303,7 +303,7 @@ static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) chp = to_iwch_cq(ibcq); rhp = chp->rhp; - if (notify == IB_CQ_SOLICITED) + if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) cq_op = CQ_ARM_SE; else cq_op = CQ_ARM_AN; @@ -317,9 +317,11 @@ static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) PDBG("%s rptr 0x%x\n", __FUNCTION__, chp->cq.rptr); err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0); spin_unlock_irqrestore(&chp->lock, flag); - if (err) + if (err < 0) printk(KERN_ERR MOD "Error %d rearming CQID 0x%x\n", err, chp->cq.cqid); + if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS)) + err = 0; return err; } diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index aff96ac4fd1..e14b029332c 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -135,7 +135,7 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc); int ehca_peek_cq(struct ib_cq *cq, int wc_cnt); -int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify); +int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags); struct ib_qp *ehca_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index 08d3f892d9f..caec9dee09e 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -634,11 +634,13 @@ poll_cq_exit0: return ret; } -int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify) +int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags) { struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + unsigned long spl_flags; + int ret = 0; - switch (cq_notify) { + switch (notify_flags & IB_CQ_SOLICITED_MASK) { case IB_CQ_SOLICITED: hipz_set_cqx_n0(my_cq, 1); break; @@ -649,5 +651,11 @@ int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify) return -EINVAL; } - return 0; + if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { + spin_lock_irqsave(&my_cq->spinlock, spl_flags); + ret = ipz_qeit_is_valid(&my_cq->ipz_queue); + spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); + } + + return ret; } diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h index 8199c45768a..57f141a36bc 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h @@ -140,6 +140,14 @@ static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue) return cqe; } +static inline int ipz_qeit_is_valid(struct ipz_queue *queue) +{ + struct ehca_cqe *cqe = ipz_qeit_get(queue); + u32 cqe_flags = cqe->cqe_flags; + + return cqe_flags >> 7 == (queue->toggle_state & 1); +} + /* * returns and resets Queue Entry iterator * returns address (kv) of first Queue Entry diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index 00d3eb9bc69..3e9241badba 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -334,17 +334,18 @@ int ipath_destroy_cq(struct ib_cq *ibcq) /** * ipath_req_notify_cq - change the notification type for a completion queue * @ibcq: the completion queue - * @notify: the type of notification to request + * @notify_flags: the type of notification to request * * Returns 0 for success. * * This may be called from interrupt context. Also called by * ib_req_notify_cq() in the generic verbs code. */ -int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) +int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) { struct ipath_cq *cq = to_icq(ibcq); unsigned long flags; + int ret = 0; spin_lock_irqsave(&cq->lock, flags); /* @@ -352,9 +353,15 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). */ if (cq->notify != IB_CQ_NEXT_COMP) - cq->notify = notify; + cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; + + if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && + cq->queue->head != cq->queue->tail) + ret = 1; + spin_unlock_irqrestore(&cq->lock, flags); - return 0; + + return ret; } /** diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 2d734fb6eff..7064fc22272 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -741,7 +741,7 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vec int ipath_destroy_cq(struct ib_cq *ibcq); -int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify); +int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags); int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index efd79ef109a..cf0868f6e96 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -726,11 +726,12 @@ repoll: return err == 0 || err == -EAGAIN ? npolled : err; } -int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify) +int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags) { __be32 doorbell[2]; - doorbell[0] = cpu_to_be32((notify == IB_CQ_SOLICITED ? + doorbell[0] = cpu_to_be32(((flags & IB_CQ_SOLICITED_MASK) == + IB_CQ_SOLICITED ? MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL : MTHCA_TAVOR_CQ_DB_REQ_NOT) | to_mcq(cq)->cqn); @@ -743,7 +744,7 @@ int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify) return 0; } -int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) +int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct mthca_cq *cq = to_mcq(ibcq); __be32 doorbell[2]; @@ -755,7 +756,8 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) doorbell[0] = ci; doorbell[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) | - (notify == IB_CQ_SOLICITED ? 1 : 2)); + ((flags & IB_CQ_SOLICITED_MASK) == + IB_CQ_SOLICITED ? 1 : 2)); mthca_write_db_rec(doorbell, cq->arm_db); @@ -766,7 +768,7 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) wmb(); doorbell[0] = cpu_to_be32((sn << 28) | - (notify == IB_CQ_SOLICITED ? + ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL : MTHCA_ARBEL_CQ_DB_REQ_NOT) | cq->cqn); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index b7e42efaf43..9bae3cc6060 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -495,8 +495,8 @@ void mthca_unmap_eq_icm(struct mthca_dev *dev); int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); -int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); -int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); +int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); +int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); int mthca_init_cq(struct mthca_dev *dev, int nent, struct mthca_ucontext *ctx, u32 pdn, struct mthca_cq *cq); -- cgit v1.2.3