diff options
36 files changed, 759 insertions, 278 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 09a2bec7fd3..d98b05b2826 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -41,6 +41,8 @@ #include <net/neighbour.h> #include <net/route.h> #include <net/netevent.h> +#include <net/addrconf.h> +#include <net/ip6_route.h> #include <rdma/ib_addr.h> MODULE_AUTHOR("Sean Hefty"); @@ -49,8 +51,8 @@ MODULE_LICENSE("Dual BSD/GPL"); struct addr_req { struct list_head list; - struct sockaddr src_addr; - struct sockaddr dst_addr; + struct sockaddr_storage src_addr; + struct sockaddr_storage dst_addr; struct rdma_dev_addr *addr; struct rdma_addr_client *client; void *context; @@ -113,15 +115,32 @@ EXPORT_SYMBOL(rdma_copy_addr); int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) { struct net_device *dev; - __be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr; - int ret; + int ret = -EADDRNOTAVAIL; - dev = ip_dev_find(&init_net, ip); - if (!dev) - return -EADDRNOTAVAIL; + switch (addr->sa_family) { + case AF_INET: + dev = ip_dev_find(&init_net, + ((struct sockaddr_in *) addr)->sin_addr.s_addr); + + if (!dev) + return ret; - ret = rdma_copy_addr(dev_addr, dev, NULL); - dev_put(dev); + ret = rdma_copy_addr(dev_addr, dev, NULL); + dev_put(dev); + break; + case AF_INET6: + for_each_netdev(&init_net, dev) { + if (ipv6_chk_addr(&init_net, + &((struct sockaddr_in6 *) addr)->sin6_addr, + dev, 1)) { + ret = rdma_copy_addr(dev_addr, dev, NULL); + break; + } + } + break; + default: + break; + } return ret; } EXPORT_SYMBOL(rdma_translate_ip); @@ -156,22 +175,37 @@ static void queue_req(struct addr_req *req) mutex_unlock(&lock); } -static void addr_send_arp(struct sockaddr_in *dst_in) +static void addr_send_arp(struct sockaddr *dst_in) { struct rtable *rt; struct flowi fl; - __be32 dst_ip = dst_in->sin_addr.s_addr; + struct dst_entry *dst; memset(&fl, 0, sizeof fl); - fl.nl_u.ip4_u.daddr = dst_ip; - if (ip_route_output_key(&init_net, &rt, &fl)) - return; + if (dst_in->sa_family == AF_INET) { + fl.nl_u.ip4_u.daddr = + ((struct sockaddr_in *) dst_in)->sin_addr.s_addr; - neigh_event_send(rt->u.dst.neighbour, NULL); - ip_rt_put(rt); + if (ip_route_output_key(&init_net, &rt, &fl)) + return; + + neigh_event_send(rt->u.dst.neighbour, NULL); + ip_rt_put(rt); + + } else { + fl.nl_u.ip6_u.daddr = + ((struct sockaddr_in6 *) dst_in)->sin6_addr; + + dst = ip6_route_output(&init_net, NULL, &fl); + if (!dst) + return; + + neigh_event_send(dst->neighbour, NULL); + dst_release(dst); + } } -static int addr_resolve_remote(struct sockaddr_in *src_in, +static int addr4_resolve_remote(struct sockaddr_in *src_in, struct sockaddr_in *dst_in, struct rdma_dev_addr *addr) { @@ -220,10 +254,51 @@ out: return ret; } +static int addr6_resolve_remote(struct sockaddr_in6 *src_in, + struct sockaddr_in6 *dst_in, + struct rdma_dev_addr *addr) +{ + struct flowi fl; + struct neighbour *neigh; + struct dst_entry *dst; + int ret = -ENODATA; + + memset(&fl, 0, sizeof fl); + fl.nl_u.ip6_u.daddr = dst_in->sin6_addr; + fl.nl_u.ip6_u.saddr = src_in->sin6_addr; + + dst = ip6_route_output(&init_net, NULL, &fl); + if (!dst) + return ret; + + if (dst->dev->flags & IFF_NOARP) { + ret = rdma_copy_addr(addr, dst->dev, NULL); + } else { + neigh = dst->neighbour; + if (neigh && (neigh->nud_state & NUD_VALID)) + ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); + } + + dst_release(dst); + return ret; +} + +static int addr_resolve_remote(struct sockaddr *src_in, + struct sockaddr *dst_in, + struct rdma_dev_addr *addr) +{ + if (src_in->sa_family == AF_INET) { + return addr4_resolve_remote((struct sockaddr_in *) src_in, + (struct sockaddr_in *) dst_in, addr); + } else + return addr6_resolve_remote((struct sockaddr_in6 *) src_in, + (struct sockaddr_in6 *) dst_in, addr); +} + static void process_req(struct work_struct *work) { struct addr_req *req, *temp_req; - struct sockaddr_in *src_in, *dst_in; + struct sockaddr *src_in, *dst_in; struct list_head done_list; INIT_LIST_HEAD(&done_list); @@ -231,8 +306,8 @@ static void process_req(struct work_struct *work) mutex_lock(&lock); list_for_each_entry_safe(req, temp_req, &req_list, list) { if (req->status == -ENODATA) { - src_in = (struct sockaddr_in *) &req->src_addr; - dst_in = (struct sockaddr_in *) &req->dst_addr; + src_in = (struct sockaddr *) &req->src_addr; + dst_in = (struct sockaddr *) &req->dst_addr; req->status = addr_resolve_remote(src_in, dst_in, req->addr); if (req->status && time_after_eq(jiffies, req->timeout)) @@ -251,41 +326,72 @@ static void process_req(struct work_struct *work) list_for_each_entry_safe(req, temp_req, &done_list, list) { list_del(&req->list); - req->callback(req->status, &req->src_addr, req->addr, - req->context); + req->callback(req->status, (struct sockaddr *) &req->src_addr, + req->addr, req->context); put_client(req->client); kfree(req); } } -static int addr_resolve_local(struct sockaddr_in *src_in, - struct sockaddr_in *dst_in, +static int addr_resolve_local(struct sockaddr *src_in, + struct sockaddr *dst_in, struct rdma_dev_addr *addr) { struct net_device *dev; - __be32 src_ip = src_in->sin_addr.s_addr; - __be32 dst_ip = dst_in->sin_addr.s_addr; int ret; - dev = ip_dev_find(&init_net, dst_ip); - if (!dev) - return -EADDRNOTAVAIL; - - if (ipv4_is_zeronet(src_ip)) { - src_in->sin_family = dst_in->sin_family; - src_in->sin_addr.s_addr = dst_ip; - ret = rdma_copy_addr(addr, dev, dev->dev_addr); - } else if (ipv4_is_loopback(src_ip)) { - ret = rdma_translate_ip((struct sockaddr *)dst_in, addr); - if (!ret) - memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); + if (dst_in->sa_family == AF_INET) { + __be32 src_ip = ((struct sockaddr_in *) src_in)->sin_addr.s_addr; + __be32 dst_ip = ((struct sockaddr_in *) dst_in)->sin_addr.s_addr; + + dev = ip_dev_find(&init_net, dst_ip); + if (!dev) + return -EADDRNOTAVAIL; + + if (ipv4_is_zeronet(src_ip)) { + src_in->sa_family = dst_in->sa_family; + ((struct sockaddr_in *) src_in)->sin_addr.s_addr = dst_ip; + ret = rdma_copy_addr(addr, dev, dev->dev_addr); + } else if (ipv4_is_loopback(src_ip)) { + ret = rdma_translate_ip(dst_in, addr); + if (!ret) + memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); + } else { + ret = rdma_translate_ip(src_in, addr); + if (!ret) + memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); + } + dev_put(dev); } else { - ret = rdma_translate_ip((struct sockaddr *)src_in, addr); - if (!ret) - memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); + struct in6_addr *a; + + for_each_netdev(&init_net, dev) + if (ipv6_chk_addr(&init_net, + &((struct sockaddr_in6 *) addr)->sin6_addr, + dev, 1)) + break; + + if (!dev) + return -EADDRNOTAVAIL; + + a = &((struct sockaddr_in6 *) src_in)->sin6_addr; + + if (ipv6_addr_any(a)) { + src_in->sa_family = dst_in->sa_family; + ((struct sockaddr_in6 *) src_in)->sin6_addr = + ((struct sockaddr_in6 *) dst_in)->sin6_addr; + ret = rdma_copy_addr(addr, dev, dev->dev_addr); + } else if (ipv6_addr_loopback(a)) { + ret = rdma_translate_ip(dst_in, addr); + if (!ret) + memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); + } else { + ret = rdma_translate_ip(src_in, addr); + if (!ret) + memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); + } } - dev_put(dev); return ret; } @@ -296,7 +402,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client, struct rdma_dev_addr *addr, void *context), void *context) { - struct sockaddr_in *src_in, *dst_in; + struct sockaddr *src_in, *dst_in; struct addr_req *req; int ret = 0; @@ -313,8 +419,8 @@ int rdma_resolve_ip(struct rdma_addr_client *client, req->client = client; atomic_inc(&client->refcount); - src_in = (struct sockaddr_in *) &req->src_addr; - dst_in = (struct sockaddr_in *) &req->dst_addr; + src_in = (struct sockaddr *) &req->src_addr; + dst_in = (struct sockaddr *) &req->dst_addr; req->status = addr_resolve_local(src_in, dst_in, addr); if (req->status == -EADDRNOTAVAIL) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index d951896ff7f..2a2e50871b4 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -42,6 +42,7 @@ #include <linux/inetdevice.h> #include <net/tcp.h> +#include <net/ipv6.h> #include <rdma/rdma_cm.h> #include <rdma/rdma_cm_ib.h> @@ -636,7 +637,12 @@ static inline int cma_zero_addr(struct sockaddr *addr) static inline int cma_loopback_addr(struct sockaddr *addr) { - return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); + if (addr->sa_family == AF_INET) + return ipv4_is_loopback( + ((struct sockaddr_in *) addr)->sin_addr.s_addr); + else + return ipv6_addr_loopback( + &((struct sockaddr_in6 *) addr)->sin6_addr); } static inline int cma_any_addr(struct sockaddr *addr) @@ -1467,10 +1473,10 @@ static void cma_listen_on_all(struct rdma_id_private *id_priv) static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af) { - struct sockaddr_in addr_in; + struct sockaddr_storage addr_in; memset(&addr_in, 0, sizeof addr_in); - addr_in.sin_family = af; + addr_in.ss_family = af; return rdma_bind_addr(id, (struct sockaddr *) &addr_in); } @@ -2073,7 +2079,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) struct rdma_id_private *id_priv; int ret; - if (addr->sa_family != AF_INET) + if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) return -EAFNOSUPPORT; id_priv = container_of(id, struct rdma_id_private, id); @@ -2113,31 +2119,59 @@ EXPORT_SYMBOL(rdma_bind_addr); static int cma_format_hdr(void *hdr, enum rdma_port_space ps, struct rdma_route *route) { - struct sockaddr_in *src4, *dst4; struct cma_hdr *cma_hdr; struct sdp_hh *sdp_hdr; - src4 = (struct sockaddr_in *) &route->addr.src_addr; - dst4 = (struct sockaddr_in *) &route->addr.dst_addr; - - switch (ps) { - case RDMA_PS_SDP: - sdp_hdr = hdr; - if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION) - return -EINVAL; - sdp_set_ip_ver(sdp_hdr, 4); - sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; - sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; - sdp_hdr->port = src4->sin_port; - break; - default: - cma_hdr = hdr; - cma_hdr->cma_version = CMA_VERSION; - cma_set_ip_ver(cma_hdr, 4); - cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; - cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; - cma_hdr->port = src4->sin_port; - break; + if (route->addr.src_addr.ss_family == AF_INET) { + struct sockaddr_in *src4, *dst4; + + src4 = (struct sockaddr_in *) &route->addr.src_addr; + dst4 = (struct sockaddr_in *) &route->addr.dst_addr; + + switch (ps) { + case RDMA_PS_SDP: + sdp_hdr = hdr; + if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION) + return -EINVAL; + sdp_set_ip_ver(sdp_hdr, 4); + sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; + sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; + sdp_hdr->port = src4->sin_port; + break; + default: + cma_hdr = hdr; + cma_hdr->cma_version = CMA_VERSION; + cma_set_ip_ver(cma_hdr, 4); + cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; + cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; + cma_hdr->port = src4->sin_port; + break; + } + } else { + struct sockaddr_in6 *src6, *dst6; + + src6 = (struct sockaddr_in6 *) &route->addr.src_addr; + dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr; + + switch (ps) { + case RDMA_PS_SDP: + sdp_hdr = hdr; + if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION) + return -EINVAL; + sdp_set_ip_ver(sdp_hdr, 6); + sdp_hdr->src_addr.ip6 = src6->sin6_addr; + sdp_hdr->dst_addr.ip6 = dst6->sin6_addr; + sdp_hdr->port = src6->sin6_port; + break; + default: + cma_hdr = hdr; + cma_hdr->cma_version = CMA_VERSION; + cma_set_ip_ver(cma_hdr, 6); + cma_hdr->src_addr.ip6 = src6->sin6_addr; + cma_hdr->dst_addr.ip6 = dst6->sin6_addr; + cma_hdr->port = src6->sin6_port; + break; + } } return 0; } diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 7fc35cf0cdd..c825142a2fb 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -175,6 +175,13 @@ struct ehca_queue_map { unsigned int next_wqe_idx; /* Idx to first wqe to be flushed */ }; +/* function to calculate the next index for the qmap */ +static inline unsigned int next_index(unsigned int cur_index, unsigned int limit) +{ + unsigned int temp = cur_index + 1; + return (temp == limit) ? 0 : temp; +} + struct ehca_qp { union { struct ib_qp ib_qp; diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c index 49660dfa186..523e733c630 100644 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -113,7 +113,7 @@ int ehca_create_eq(struct ehca_shca *shca, if (h_ret != H_SUCCESS || vpage) goto create_eq_exit2; } else { - if (h_ret != H_PAGE_REGISTERED || !vpage) + if (h_ret != H_PAGE_REGISTERED) goto create_eq_exit2; } } diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index bec7e024935..3b77b674cbf 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -717,6 +717,7 @@ static int __devinit ehca_probe(struct of_device *dev, const u64 *handle; struct ib_pd *ibpd; int ret, i, eq_size; + unsigned long flags; handle = of_get_property(dev->node, "ibm,hca-handle", NULL); if (!handle) { @@ -830,9 +831,9 @@ static int __devinit ehca_probe(struct of_device *dev, ehca_err(&shca->ib_device, "Cannot create device attributes ret=%d", ret); - spin_lock(&shca_list_lock); + spin_lock_irqsave(&shca_list_lock, flags); list_add(&shca->shca_list, &shca_list); - spin_unlock(&shca_list_lock); + spin_unlock_irqrestore(&shca_list_lock, flags); return 0; @@ -878,6 +879,7 @@ probe1: static int __devexit ehca_remove(struct of_device *dev) { struct ehca_shca *shca = dev->dev.driver_data; + unsigned long flags; int ret; sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp); @@ -915,9 +917,9 @@ static int __devexit ehca_remove(struct of_device *dev) ib_dealloc_device(&shca->ib_device); - spin_lock(&shca_list_lock); + spin_lock_irqsave(&shca_list_lock, flags); list_del(&shca->shca_list); - spin_unlock(&shca_list_lock); + spin_unlock_irqrestore(&shca_list_lock, flags); return ret; } @@ -975,6 +977,7 @@ static int ehca_mem_notifier(struct notifier_block *nb, unsigned long action, void *data) { static unsigned long ehca_dmem_warn_time; + unsigned long flags; switch (action) { case MEM_CANCEL_OFFLINE: @@ -985,12 +988,12 @@ static int ehca_mem_notifier(struct notifier_block *nb, case MEM_GOING_ONLINE: case MEM_GOING_OFFLINE: /* only ok if no hca is attached to the lpar */ - spin_lock(&shca_list_lock); + spin_lock_irqsave(&shca_list_lock, flags); if (list_empty(&shca_list)) { - spin_unlock(&shca_list_lock); + spin_unlock_irqrestore(&shca_list_lock, flags); return NOTIFY_OK; } else { - spin_unlock(&shca_list_lock); + spin_unlock_irqrestore(&shca_list_lock, flags); if (printk_timed_ratelimit(&ehca_dmem_warn_time, 30 * 1000)) ehca_gen_err("DMEM operations are not allowed" diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index cadbf0cdd91..f161cf173db 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -1138,14 +1138,14 @@ static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue, return -EFAULT; } - tail_idx = (qmap->tail + 1) % qmap->entries; + tail_idx = next_index(qmap->tail, qmap->entries); wqe_idx = q_ofs / ipz_queue->qe_size; /* check all processed wqes, whether a cqe is requested or not */ while (tail_idx != wqe_idx) { if (qmap->map[tail_idx].cqe_req) qmap->left_to_poll++; - tail_idx = (tail_idx + 1) % qmap->entries; + tail_idx = next_index(tail_idx, qmap->entries); } /* save index in queue, where we have to start flushing */ qmap->next_wqe_idx = wqe_idx; @@ -1195,14 +1195,14 @@ static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca) } else { spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); my_qp->sq_map.left_to_poll = 0; - my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) % - my_qp->sq_map.entries; + my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, + my_qp->sq_map.entries); spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); my_qp->rq_map.left_to_poll = 0; - my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) % - my_qp->rq_map.entries; + my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, + my_qp->rq_map.entries); spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); } diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index 00a648f4316..c7112686782 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -726,13 +726,13 @@ repoll: * set left_to_poll to 0 because in error state, we will not * get any additional CQEs */ - my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) % - my_qp->sq_map.entries; + my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, + my_qp->sq_map.entries); my_qp->sq_map.left_to_poll = 0; ehca_add_to_err_list(my_qp, 1); - my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) % - my_qp->rq_map.entries; + my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, + my_qp->rq_map.entries); my_qp->rq_map.left_to_poll = 0; if (HAS_RQ(my_qp)) ehca_add_to_err_list(my_qp, 0); @@ -860,9 +860,8 @@ static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq, /* mark as reported and advance next_wqe pointer */ qmap_entry->reported = 1; - qmap->next_wqe_idx++; - if (qmap->next_wqe_idx == qmap->entries) - qmap->next_wqe_idx = 0; + qmap->next_wqe_idx = next_index(qmap->next_wqe_idx, + qmap->entries); qmap_entry = &qmap->map[qmap->next_wqe_idx]; wc++; nr++; diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index ad0aab60b05..69c0ce321b4 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -661,6 +661,8 @@ bail: static void __devexit cleanup_device(struct ipath_devdata *dd) { int port; + struct ipath_portdata **tmp; + unsigned long flags; if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { /* can't do anything more with chip; needs re-init */ @@ -742,20 +744,21 @@ static void __devexit cleanup_device(struct ipath_devdata *dd) /* * free any resources still in use (usually just kernel ports) - * at unload; we do for portcnt, not cfgports, because cfgports - * could have changed while we were loaded. + * at unload; we do for portcnt, because that's what we allocate. + * We acquire lock to be really paranoid that ipath_pd isn't being + * accessed from some interrupt-related code (that should not happen, + * but best to be sure). */ + spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); + tmp = dd->ipath_pd; + dd->ipath_pd = NULL; + spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); for (port = 0; port < dd->ipath_portcnt; port++) { - struct ipath_portdata *pd = dd->ipath_pd[port]; - dd->ipath_pd[port] = NULL; + struct ipath_portdata *pd = tmp[port]; + tmp[port] = NULL; /* debugging paranoia */ ipath_free_pddata(dd, pd); } - kfree(dd->ipath_pd); - /* - * debuggability, in case some cleanup path tries to use it - * after this - */ - dd->ipath_pd = NULL; + kfree(tmp); } static void __devexit ipath_remove_one(struct pci_dev *pdev) @@ -2586,6 +2589,7 @@ int ipath_reset_device(int unit) { int ret, i; struct ipath_devdata *dd = ipath_lookup(unit); + unsigned long flags; if (!dd) { ret = -ENODEV; @@ -2611,18 +2615,21 @@ int ipath_reset_device(int unit) goto bail; } + spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); if (dd->ipath_pd) for (i = 1; i < dd->ipath_cfgports; i++) { - if (dd->ipath_pd[i] && dd->ipath_pd[i]->port_cnt) { - ipath_dbg("unit %u port %d is in use " - "(PID %u cmd %s), can't reset\n", - unit, i, - pid_nr(dd->ipath_pd[i]->port_pid), - dd->ipath_pd[i]->port_comm); - ret = -EBUSY; - goto bail; - } + if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) + continue; + spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); + ipath_dbg("unit %u port %d is in use " + "(PID %u cmd %s), can't reset\n", + unit, i, + pid_nr(dd->ipath_pd[i]->port_pid), + dd->ipath_pd[i]->port_comm); + ret = -EBUSY; + goto bail; } + spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); if (dd->ipath_flags & IPATH_HAS_SEND_DMA) teardown_sdma(dd); @@ -2656,9 +2663,12 @@ static int ipath_signal_procs(struct ipath_devdata *dd, int sig) { int i, sub, any = 0; struct pid *pid; + unsigned long flags; if (!dd->ipath_pd) return 0; + + spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); for (i = 1; i < dd->ipath_cfgports; i++) { if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) continue; @@ -2682,6 +2692,7 @@ static int ipath_signal_procs(struct ipath_devdata *dd, int sig) any++; } } + spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); return any; } diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 1af1f3a907c..239d4e8068a 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -223,8 +223,13 @@ static int ipath_get_base_info(struct file *fp, (unsigned long long) kinfo->spi_subport_rcvhdr_base); } - kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) / - dd->ipath_palign; + /* + * All user buffers are 2KB buffers. If we ever support + * giving 4KB buffers to user processes, this will need some + * work. + */ + kinfo->spi_pioindex = (kinfo->spi_piobufbase - + (dd->ipath_piobufbase & 0xffffffff)) / dd->ipath_palign; kinfo->spi_pioalign = dd->ipath_palign; kinfo->spi_qpair = IPATH_KD_QP; @@ -2041,7 +2046,9 @@ static int ipath_close(struct inode *in, struct file *fp) struct ipath_filedata *fd; struct ipath_portdata *pd; struct ipath_devdata *dd; + unsigned long flags; unsigned port; + struct pid *pid; ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n", (long)in->i_rdev, fp->private_data); @@ -2074,14 +2081,13 @@ static int ipath_close(struct inode *in, struct file *fp) mutex_unlock(&ipath_mutex); goto bail; } + /* early; no interrupt users after this */ + spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); port = pd->port_port; - - if (pd->port_hdrqfull) { - ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors " - "during run\n", pd->port_comm, pid_nr(pd->port_pid), - pd->port_hdrqfull); - pd->port_hdrqfull = 0; - } + dd->ipath_pd[port] = NULL; + pid = pd->port_pid; + pd->port_pid = NULL; + spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); if (pd->port_rcvwait_to || pd->port_piowait_to || pd->port_rcvnowait || pd->port_pionowait) { @@ -2138,13 +2144,11 @@ static int ipath_close(struct inode *in, struct file *fp) unlock_expected_tids(pd); ipath_stats.sps_ports--; ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n", - pd->port_comm, pid_nr(pd->port_pid), + pd->port_comm, pid_nr(pid), dd->ipath_unit, port); } - put_pid(pd->port_pid); - pd->port_pid = NULL; - dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */ + put_pid(pid); mutex_unlock(&ipath_mutex); ipath_free_pddata(dd, pd); /* after releasing the mutex */ diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 8bb5170b4e4..53912c327bf 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -86,7 +86,7 @@ static int create_file(const char *name, mode_t mode, *dentry = NULL; mutex_lock(&parent->d_inode->i_mutex); *dentry = lookup_one_len(name, parent, strlen(name)); - if (!IS_ERR(dentry)) + if (!IS_ERR(*dentry)) error = ipathfs_mknod(parent->d_inode, *dentry, mode, fops, data); else diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index 421cc2af891..fbf8c5379ea 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -721,6 +721,12 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) INFINIPATH_HWE_SERDESPLLFAILED); } + dd->ibdeltainprog = 1; + dd->ibsymsnap = + ipath_read_creg32(dd, dd->ipath_cregs->cr_ibsymbolerrcnt); + dd->iblnkerrsnap = + ipath_read_creg32(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt); + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1); @@ -810,6 +816,36 @@ static void ipath_pe_quiet_serdes(struct ipath_devdata *dd) { u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); + if (dd->ibsymdelta || dd->iblnkerrdelta || + dd->ibdeltainprog) { + u64 diagc; + /* enable counter writes */ + diagc = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwdiagctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwdiagctrl, + diagc | INFINIPATH_DC_COUNTERWREN); + + if (dd->ibsymdelta || dd->ibdeltainprog) { + val = ipath_read_creg32(dd, + dd->ipath_cregs->cr_ibsymbolerrcnt); + if (dd->ibdeltainprog) + val -= val - dd->ibsymsnap; + val -= dd->ibsymdelta; + ipath_write_creg(dd, + dd->ipath_cregs->cr_ibsymbolerrcnt, val); + } + if (dd->iblnkerrdelta || dd->ibdeltainprog) { + val = ipath_read_creg32(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt); + if (dd->ibdeltainprog) + val -= val - dd->iblnkerrsnap; + val -= dd->iblnkerrdelta; + ipath_write_creg(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt, val); + } + + /* and disable counter writes */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwdiagctrl, diagc); + } val |= INFINIPATH_SERDC0_TXIDLE; ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n", (unsigned long long) val); @@ -1749,6 +1785,31 @@ static void ipath_pe_config_jint(struct ipath_devdata *dd, u16 a, u16 b) static int ipath_pe_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) { + if (ibup) { + if (dd->ibdeltainprog) { + dd->ibdeltainprog = 0; + dd->ibsymdelta += + ipath_read_creg32(dd, + dd->ipath_cregs->cr_ibsymbolerrcnt) - + dd->ibsymsnap; + dd->iblnkerrdelta += + ipath_read_creg32(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt) - + dd->iblnkerrsnap; + } + } else { + dd->ipath_lli_counter = 0; + if (!dd->ibdeltainprog) { + dd->ibdeltainprog = 1; + dd->ibsymsnap = + ipath_read_creg32(dd, + dd->ipath_cregs->cr_ibsymbolerrcnt); + dd->iblnkerrsnap = + ipath_read_creg32(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt); + } + } + ipath_setup_pe_setextled(dd, ipath_ib_linkstate(dd, ibcs), ipath_ib_linktrstate(dd, ibcs)); return 0; diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c index 9839e20119b..b2a9d4c155d 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba7220.c +++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c @@ -951,6 +951,12 @@ static int ipath_7220_bringup_serdes(struct ipath_devdata *dd) INFINIPATH_HWE_SERDESPLLFAILED); } + dd->ibdeltainprog = 1; + dd->ibsymsnap = + ipath_read_creg32(dd, dd->ipath_cregs->cr_ibsymbolerrcnt); + dd->iblnkerrsnap = + ipath_read_creg32(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt); + if (!dd->ipath_ibcddrctrl) { /* not on re-init after reset */ dd->ipath_ibcddrctrl = @@ -1084,6 +1090,37 @@ static void ipath_7220_config_jint(struct ipath_devdata *dd, static void ipath_7220_quiet_serdes(struct ipath_devdata *dd) { u64 val; + if (dd->ibsymdelta || dd->iblnkerrdelta || + dd->ibdeltainprog) { + u64 diagc; + /* enable counter writes */ + diagc = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwdiagctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwdiagctrl, + diagc | INFINIPATH_DC_COUNTERWREN); + + if (dd->ibsymdelta || dd->ibdeltainprog) { + val = ipath_read_creg32(dd, + dd->ipath_cregs->cr_ibsymbolerrcnt); + if (dd->ibdeltainprog) + val -= val - dd->ibsymsnap; + val -= dd->ibsymdelta; + ipath_write_creg(dd, + dd->ipath_cregs->cr_ibsymbolerrcnt, val); + } + if (dd->iblnkerrdelta || dd->ibdeltainprog) { + val = ipath_read_creg32(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt); + if (dd->ibdeltainprog) + val -= val - dd->iblnkerrsnap; + val -= dd->iblnkerrdelta; + ipath_write_creg(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt, val); + } + + /* and disable counter writes */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwdiagctrl, diagc); + } + dd->ipath_flags &= ~IPATH_IB_AUTONEG_INPROG; wake_up(&dd->ipath_autoneg_wait); cancel_delayed_work(&dd->ipath_autoneg_work); @@ -2325,7 +2362,7 @@ static void try_auto_neg(struct ipath_devdata *dd) static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) { - int ret = 0; + int ret = 0, symadj = 0; u32 ltstate = ipath_ib_linkstate(dd, ibcs); dd->ipath_link_width_active = @@ -2368,6 +2405,13 @@ static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) ipath_dbg("DDR negotiation try, %u/%u\n", dd->ipath_autoneg_tries, IPATH_AUTONEG_TRIES); + if (!dd->ibdeltainprog) { + dd->ibdeltainprog = 1; + dd->ibsymsnap = ipath_read_creg32(dd, + dd->ipath_cregs->cr_ibsymbolerrcnt); + dd->iblnkerrsnap = ipath_read_creg32(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt); + } try_auto_neg(dd); ret = 1; /* no other IB status change processing */ } else if ((dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) @@ -2388,6 +2432,7 @@ static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) set_speed_fast(dd, dd->ipath_link_speed_enabled); wake_up(&dd->ipath_autoneg_wait); + symadj = 1; } else if (dd->ipath_flags & IPATH_IB_AUTONEG_FAILED) { /* * clear autoneg failure flag, and do setup @@ -2403,22 +2448,28 @@ static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) IBA7220_IBC_IBTA_1_2_MASK; ipath_write_kreg(dd, IPATH_KREG_OFFSET(IBNCModeCtrl), 0); + symadj = 1; } } /* - * if we are in 1X, and are in autoneg width, it - * could be due to an xgxs problem, so if we haven't + * if we are in 1X on rev1 only, and are in autoneg width, + * it could be due to an xgxs problem, so if we haven't * already tried, try twice to get to 4X; if we * tried, and couldn't, report it, since it will * probably not be what is desired. */ - if ((dd->ipath_link_width_enabled & (IB_WIDTH_1X | + if (dd->ipath_minrev == 1 && + (dd->ipath_link_width_enabled & (IB_WIDTH_1X | IB_WIDTH_4X)) == (IB_WIDTH_1X | IB_WIDTH_4X) && dd->ipath_link_width_active == IB_WIDTH_1X && dd->ipath_x1_fix_tries < 3) { - if (++dd->ipath_x1_fix_tries == 3) + if (++dd->ipath_x1_fix_tries == 3) { dev_info(&dd->pcidev->dev, "IB link is in 1X mode\n"); + if (!(dd->ipath_flags & + IPATH_IB_AUTONEG_INPROG)) + symadj = 1; + } else { ipath_cdbg(VERBOSE, "IB 1X in " "auto-width, try %u to be " @@ -2429,7 +2480,8 @@ static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) dd->ipath_f_xgxs_reset(dd); ret = 1; /* skip other processing */ } - } + } else if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG)) + symadj = 1; if (!ret) { dd->delay_mult = rate_to_delay @@ -2440,6 +2492,25 @@ static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) } } + if (symadj) { + if (dd->ibdeltainprog) { + dd->ibdeltainprog = 0; + dd->ibsymdelta += ipath_read_creg32(dd, + dd->ipath_cregs->cr_ibsymbolerrcnt) - + dd->ibsymsnap; + dd->iblnkerrdelta += ipath_read_creg32(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt) - + dd->iblnkerrsnap; + } + } else if (!ibup && !dd->ibdeltainprog + && !(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG)) { + dd->ibdeltainprog = 1; + dd->ibsymsnap = ipath_read_creg32(dd, + dd->ipath_cregs->cr_ibsymbolerrcnt); + dd->iblnkerrsnap = ipath_read_creg32(dd, + dd->ipath_cregs->cr_iblinkerrrecovcnt); + } + if (!ret) ipath_setup_7220_setextled(dd, ipath_ib_linkstate(dd, ibcs), ltstate); diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 3e5baa43fc8..64aeefbd2a5 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -229,6 +229,7 @@ static int init_chip_first(struct ipath_devdata *dd) spin_lock_init(&dd->ipath_kernel_tid_lock); spin_lock_init(&dd->ipath_user_tid_lock); spin_lock_init(&dd->ipath_sendctrl_lock); + spin_lock_init(&dd->ipath_uctxt_lock); spin_lock_init(&dd->ipath_sdma_lock); spin_lock_init(&dd->ipath_gpio_lock); spin_lock_init(&dd->ipath_eep_st_lock); diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 0bd8bcb184a..6ba4861dd6a 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -355,6 +355,19 @@ struct ipath_devdata { /* errors masked because they occur too fast */ ipath_err_t ipath_maskederrs; u64 ipath_lastlinkrecov; /* link recoveries at last ACTIVE */ + /* these 5 fields are used to establish deltas for IB Symbol + * errors and linkrecovery errors. They can be reported on + * some chips during link negotiation prior to INIT, and with + * DDR when faking DDR negotiations with non-IBTA switches. + * The chip counters are adjusted at driver unload if there is + * a non-zero delta. + */ + u64 ibdeltainprog; + u64 ibsymdelta; + u64 ibsymsnap; + u64 iblnkerrdelta; + u64 iblnkerrsnap; + /* time in jiffies at which to re-enable maskederrs */ unsigned long ipath_unmasktime; /* count of egrfull errors, combined for all ports */ @@ -464,6 +477,8 @@ struct ipath_devdata { spinlock_t ipath_kernel_tid_lock; spinlock_t ipath_user_tid_lock; spinlock_t ipath_sendctrl_lock; + /* around ipath_pd and (user ports) port_cnt use (intr vs free) */ + spinlock_t ipath_uctxt_lock; /* * IPATH_STATUS_*, diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c index 8f32b17a5ee..c0e933fec21 100644 --- a/drivers/infiniband/hw/ipath/ipath_keys.c +++ b/drivers/infiniband/hw/ipath/ipath_keys.c @@ -132,6 +132,7 @@ int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, * (see ipath_get_dma_mr and ipath_dma.c). */ if (sge->lkey == 0) { + /* always a kernel port, no locking needed */ struct ipath_pd *pd = to_ipd(qp->ibqp.pd); if (pd->user) { @@ -211,6 +212,7 @@ int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss, * (see ipath_get_dma_mr and ipath_dma.c). */ if (rkey == 0) { + /* always a kernel port, no locking needed */ struct ipath_pd *pd = to_ipd(qp->ibqp.pd); if (pd->user) { diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index be4fc9ada8e..17a12319747 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -348,6 +348,7 @@ bail: */ static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys) { + /* always a kernel port, no locking needed */ struct ipath_portdata *pd = dd->ipath_pd[0]; memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys)); @@ -730,6 +731,7 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys) int i; int changed = 0; + /* always a kernel port, no locking needed */ pd = dd->ipath_pd[0]; for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 4715911101e..3a5a89b609c 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -745,6 +745,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, struct ipath_swqe *swq = NULL; struct ipath_ibdev *dev; size_t sz; + size_t sg_list_sz; struct ib_qp *ret; if (init_attr->create_flags) { @@ -789,19 +790,31 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, goto bail; } sz = sizeof(*qp); + sg_list_sz = 0; if (init_attr->srq) { struct ipath_srq *srq = to_isrq(init_attr->srq); - sz += sizeof(*qp->r_sg_list) * - srq->rq.max_sge; - } else - sz += sizeof(*qp->r_sg_list) * - init_attr->cap.max_recv_sge; - qp = kmalloc(sz, GFP_KERNEL); + if (srq->rq.max_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (srq->rq.max_sge - 1); + } else if (init_attr->cap.max_recv_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (init_attr->cap.max_recv_sge - 1); + qp = kmalloc(sz + sg_list_sz, GFP_KERNEL); if (!qp) { ret = ERR_PTR(-ENOMEM); goto bail_swq; } + if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD || + init_attr->qp_type == IB_QPT_SMI || + init_attr->qp_type == IB_QPT_GSI)) { + qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL); + if (!qp->r_ud_sg_list) { + ret = ERR_PTR(-ENOMEM); + goto bail_qp; + } + } else + qp->r_ud_sg_list = NULL; if (init_attr->srq) { sz = 0; qp->r_rq.size = 0; @@ -818,7 +831,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, qp->r_rq.size * sz); if (!qp->r_rq.wq) { ret = ERR_PTR(-ENOMEM); - goto bail_qp; + goto bail_sg_list; } } @@ -848,7 +861,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, if (err) { ret = ERR_PTR(err); vfree(qp->r_rq.wq); - goto bail_qp; + goto bail_sg_list; } qp->ip = NULL; qp->s_tx = NULL; @@ -925,6 +938,8 @@ bail_ip: vfree(qp->r_rq.wq); ipath_free_qp(&dev->qp_table, qp); free_qpn(&dev->qp_table, qp->ibqp.qp_num); +bail_sg_list: + kfree(qp->r_ud_sg_list); bail_qp: kfree(qp); bail_swq: @@ -989,6 +1004,7 @@ int ipath_destroy_qp(struct ib_qp *ibqp) kref_put(&qp->ip->ref, ipath_release_mmap_info); else vfree(qp->r_rq.wq); + kfree(qp->r_ud_sg_list); vfree(qp->s_wq); kfree(qp); return 0; diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 7b93cda1a4b..9170710b950 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -573,9 +573,8 @@ int ipath_make_rc_req(struct ipath_qp *qp) ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); qp->s_state = OP(RDMA_READ_REQUEST); hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); - bth2 = qp->s_psn++ & IPATH_PSN_MASK; - if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; + bth2 = qp->s_psn & IPATH_PSN_MASK; + qp->s_psn = wqe->lpsn + 1; ss = NULL; len = 0; qp->s_cur++; diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c index 284c9bca517..8e255adf5d9 100644 --- a/drivers/infiniband/hw/ipath/ipath_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_sdma.c @@ -698,10 +698,8 @@ retry: addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr, tx->map_len, DMA_TO_DEVICE); - if (dma_mapping_error(&dd->pcidev->dev, addr)) { - ret = -EIO; - goto unlock; - } + if (dma_mapping_error(&dd->pcidev->dev, addr)) + goto ioerr; dwoffset = tx->map_len >> 2; make_sdma_desc(dd, sdmadesc, (u64) addr, dwoffset, 0); @@ -741,6 +739,8 @@ retry: dw = (len + 3) >> 2; addr = dma_map_single(&dd->pcidev->dev, sge->vaddr, dw << 2, DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, addr)) + goto unmap; make_sdma_desc(dd, sdmadesc, (u64) addr, dw, dwoffset); /* SDmaUseLargeBuf has to be set in every descriptor */ if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF) @@ -798,7 +798,18 @@ retry: list_add_tail(&tx->txreq.list, &dd->ipath_sdma_activelist); if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_VL15) vl15_watchdog_enq(dd); - + goto unlock; + +unmap: + while (tail != dd->ipath_sdma_descq_tail) { + if (!tail) + tail = dd->ipath_sdma_descq_cnt - 1; + else + tail--; + unmap_desc(dd, tail); + } +ioerr: + ret = -EIO; unlock: spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); fail: diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c index c8e3d65f0de..f63e143e329 100644 --- a/drivers/infiniband/hw/ipath/ipath_stats.c +++ b/drivers/infiniband/hw/ipath/ipath_stats.c @@ -112,6 +112,14 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg) dd->ipath_lastrpkts = val; } val64 = dd->ipath_rpkts; + } else if (creg == dd->ipath_cregs->cr_ibsymbolerrcnt) { + if (dd->ibdeltainprog) + val64 -= val64 - dd->ibsymsnap; + val64 -= dd->ibsymdelta; + } else if (creg == dd->ipath_cregs->cr_iblinkerrrecovcnt) { + if (dd->ibdeltainprog) + val64 -= val64 - dd->iblnkerrsnap; + val64 -= dd->iblnkerrdelta; } else val64 = (u64) val; diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 729446f56aa..91c74cc797a 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -70,8 +70,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) goto done; } - rsge.sg_list = NULL; - /* * Check that the qkey matches (except for QP0, see 9.6.1.4.1). * Qkeys with the high order bit set mean use the @@ -115,21 +113,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) rq = &qp->r_rq; } - if (rq->max_sge > 1) { - /* - * XXX We could use GFP_KERNEL if ipath_do_send() - * was always called from the tasklet instead of - * from ipath_post_send(). - */ - rsge.sg_list = kmalloc((rq->max_sge - 1) * - sizeof(struct ipath_sge), - GFP_ATOMIC); - if (!rsge.sg_list) { - dev->n_pkt_drops++; - goto drop; - } - } - /* * Get the next work request entry to find where to put the data. * Note that it is safe to drop the lock after changing rq->tail @@ -147,6 +130,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) goto drop; } wqe = get_rwqe_ptr(rq, tail); + rsge.sg_list = qp->r_ud_sg_list; if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) { spin_unlock_irqrestore(&rq->lock, flags); dev->n_pkt_drops++; @@ -242,7 +226,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, swqe->wr.send_flags & IB_SEND_SOLICITED); drop: - kfree(rsge.sg_list); if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); done:; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index eabc4247860..cdf0e6abd34 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -1852,7 +1852,7 @@ unsigned ipath_get_npkeys(struct ipath_devdata *dd) } /** - * ipath_get_pkey - return the indexed PKEY from the port 0 PKEY table + * ipath_get_pkey - return the indexed PKEY from the port PKEY table * @dd: the infinipath device * @index: the PKEY index */ @@ -1860,6 +1860,7 @@ unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index) { unsigned ret; + /* always a kernel port, no locking needed */ if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys)) ret = 0; else diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 9d12ae8a778..11e3f613df9 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -431,6 +431,7 @@ struct ipath_qp { u32 s_lsn; /* limit sequence number (credit) */ struct ipath_swqe *s_wq; /* send work queue */ struct ipath_swqe *s_wqe; + struct ipath_sge *r_ud_sg_list; struct ipath_rq r_rq; /* receive work queue */ struct ipath_sge r_sg_list[0]; /* verified SGEs */ }; diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 18308494a19..8415ecce5c4 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -222,7 +222,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector } err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, - cq->db.dma, &cq->mcq, 0); + cq->db.dma, &cq->mcq, vector, 0); if (err) goto err_dbmap; @@ -325,15 +325,17 @@ static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq) static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq) { - struct mlx4_cqe *cqe; + struct mlx4_cqe *cqe, *new_cqe; int i; i = cq->mcq.cons_index; cqe = get_cqe(cq, i & cq->ibcq.cqe); while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) { - memcpy(get_cqe_from_buf(&cq->resize_buf->buf, - (i + 1) & cq->resize_buf->cqe), - get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe)); + new_cqe = get_cqe_from_buf(&cq->resize_buf->buf, + (i + 1) & cq->resize_buf->cqe); + memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe)); + new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) | + (((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0); cqe = get_cqe(cq, ++i & cq->ibcq.cqe); } ++cq->mcq.cons_index; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 2e80f8f47b0..dcefe1fceb5 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -578,7 +578,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) ibdev->num_ports++; ibdev->ib_dev.phys_port_cnt = ibdev->num_ports; - ibdev->ib_dev.num_comp_vectors = 1; + ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dma_device = &dev->pdev->dev; ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 81a82628a5f..861119593f2 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -252,6 +252,9 @@ struct iser_conn { wait_queue_head_t wait; /* waitq for conn/disconn */ atomic_t post_recv_buf_count; /* posted rx count */ atomic_t post_send_buf_count; /* posted tx count */ + atomic_t unexpected_pdu_count;/* count of received * + * unexpected pdus * + * not yet retired */ char name[ISER_OBJECT_NAME_SIZE]; struct iser_page_vec *page_vec; /* represents SG to fmr maps* * maps serialized as tx is*/ diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index cdd28318904..ed1aff21b7e 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -183,14 +183,8 @@ static int iser_post_receive_control(struct iscsi_conn *conn) struct iser_regd_buf *regd_data; struct iser_dto *recv_dto = NULL; struct iser_device *device = iser_conn->ib_conn->device; - int rx_data_size, err = 0; - - rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO); - if (rx_desc == NULL) { - iser_err("Failed to alloc desc for post recv\n"); - return -ENOMEM; - } - rx_desc->type = ISCSI_RX; + int rx_data_size, err; + int posts, outstanding_unexp_pdus; /* for the login sequence we must support rx of upto 8K; login is done * after conn create/bind (connect) and conn stop/bind (reconnect), @@ -201,46 +195,80 @@ static int iser_post_receive_control(struct iscsi_conn *conn) else /* FIXME till user space sets conn->max_recv_dlength correctly */ rx_data_size = 128; - rx_desc->data = kmalloc(rx_data_size, GFP_NOIO); - if (rx_desc->data == NULL) { - iser_err("Failed to alloc data buf for post recv\n"); - err = -ENOMEM; - goto post_rx_kmalloc_failure; - } + outstanding_unexp_pdus = + atomic_xchg(&iser_conn->ib_conn->unexpected_pdu_count, 0); - recv_dto = &rx_desc->dto; - recv_dto->ib_conn = iser_conn->ib_conn; - recv_dto->regd_vector_len = 0; + /* + * in addition to the response buffer, replace those consumed by + * unexpected pdus. + */ + for (posts = 0; posts < 1 + outstanding_unexp_pdus; posts++) { + rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO); + if (rx_desc == NULL) { + iser_err("Failed to alloc desc for post recv %d\n", + posts); + err = -ENOMEM; + goto post_rx_cache_alloc_failure; + } + rx_desc->type = ISCSI_RX; + rx_desc->data = kmalloc(rx_data_size, GFP_NOIO); + if (rx_desc->data == NULL) { + iser_err("Failed to alloc data buf for post recv %d\n", + posts); + err = -ENOMEM; + goto post_rx_kmalloc_failure; + } - regd_hdr = &rx_desc->hdr_regd_buf; - memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); - regd_hdr->device = device; - regd_hdr->virt_addr = rx_desc; /* == &rx_desc->iser_header */ - regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN; + recv_dto = &rx_desc->dto; + recv_dto->ib_conn = iser_conn->ib_conn; + recv_dto->regd_vector_len = 0; - iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE); + regd_hdr = &rx_desc->hdr_regd_buf; + memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); + regd_hdr->device = device; + regd_hdr->virt_addr = rx_desc; /* == &rx_desc->iser_header */ + regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN; - iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0); + iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE); - regd_data = &rx_desc->data_regd_buf; - memset(regd_data, 0, sizeof(struct iser_regd_buf)); - regd_data->device = device; - regd_data->virt_addr = rx_desc->data; - regd_data->data_size = rx_data_size; + iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0); - iser_reg_single(device, regd_data, DMA_FROM_DEVICE); + regd_data = &rx_desc->data_regd_buf; + memset(regd_data, 0, sizeof(struct iser_regd_buf)); + regd_data->device = device; + regd_data->virt_addr = rx_desc->data; + regd_data->data_size = rx_data_size; - iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0); + iser_reg_single(device, regd_data, DMA_FROM_DEVICE); - err = iser_post_recv(rx_desc); - if (!err) - return 0; + iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0); - /* iser_post_recv failed */ + err = iser_post_recv(rx_desc); + if (err) { + iser_err("Failed iser_post_recv for post %d\n", posts); + goto post_rx_post_recv_failure; + } + } + /* all posts successful */ + return 0; + +post_rx_post_recv_failure: iser_dto_buffs_release(recv_dto); kfree(rx_desc->data); post_rx_kmalloc_failure: kmem_cache_free(ig.desc_cache, rx_desc); +post_rx_cache_alloc_failure: + if (posts > 0) { + /* + * response buffer posted, but did not replace all unexpected + * pdu recv bufs. Ignore error, retry occurs next send + */ + outstanding_unexp_pdus -= (posts - 1); + err = 0; + } + atomic_add(outstanding_unexp_pdus, + &iser_conn->ib_conn->unexpected_pdu_count); + return err; } @@ -274,8 +302,10 @@ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn) struct iscsi_iser_conn *iser_conn = conn->dd_data; int i; - /* no need to keep it in a var, we are after login so if this should - * be negotiated, by now the result should be available here */ + /* + * FIXME this value should be declared to the target during login with + * the MaxOutstandingUnexpectedPDUs key when supported + */ int initial_post_recv_bufs_num = ISER_MAX_RX_MISC_PDUS; iser_dbg("Initially post: %d\n", initial_post_recv_bufs_num); @@ -478,6 +508,7 @@ int iser_send_control(struct iscsi_conn *conn, int err = 0; struct iser_regd_buf *regd_buf; struct iser_device *device; + unsigned char opcode; if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) { iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn); @@ -512,10 +543,15 @@ int iser_send_control(struct iscsi_conn *conn, data_seg_len); } - if (iser_post_receive_control(conn) != 0) { - iser_err("post_rcv_buff failed!\n"); - err = -ENOMEM; - goto send_control_error; + opcode = task->hdr->opcode & ISCSI_OPCODE_MASK; + + /* post recv buffer for response if one is expected */ + if (!(opcode == ISCSI_OP_NOOP_OUT && task->hdr->itt == RESERVED_ITT)) { + if (iser_post_receive_control(conn) != 0) { + iser_err("post_rcv_buff failed!\n"); + err = -ENOMEM; + goto send_control_error; + } } err = iser_post_send(mdesc); @@ -586,6 +622,20 @@ void iser_rcv_completion(struct iser_desc *rx_desc, * parallel to the execution of iser_conn_term. So the code that waits * * for the posted rx bufs refcount to become zero handles everything */ atomic_dec(&conn->ib_conn->post_recv_buf_count); + + /* + * if an unexpected PDU was received then the recv wr consumed must + * be replaced, this is done in the next send of a control-type PDU + */ + if (opcode == ISCSI_OP_NOOP_IN && hdr->itt == RESERVED_ITT) { + /* nop-in with itt = 0xffffffff */ + atomic_inc(&conn->ib_conn->unexpected_pdu_count); + } + else if (opcode == ISCSI_OP_ASYNC_EVENT) { + /* asyncronous message */ + atomic_inc(&conn->ib_conn->unexpected_pdu_count); + } + /* a reject PDU consumes the recv buf posted for the response */ } void iser_snd_completion(struct iser_desc *tx_desc) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 26ff6214a81..6dc6b174cdd 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -498,6 +498,7 @@ void iser_conn_init(struct iser_conn *ib_conn) init_waitqueue_head(&ib_conn->wait); atomic_set(&ib_conn->post_recv_buf_count, 0); atomic_set(&ib_conn->post_send_buf_count, 0); + atomic_set(&ib_conn->unexpected_pdu_count, 0); atomic_set(&ib_conn->refcount, 1); INIT_LIST_HEAD(&ib_conn->conn_list); spin_lock_init(&ib_conn->lock); diff --git a/drivers/net/mlx4/cq.c b/drivers/net/mlx4/cq.c index b7ad2829d67..ac57b6a42c6 100644 --- a/drivers/net/mlx4/cq.c +++ b/drivers/net/mlx4/cq.c @@ -189,7 +189,7 @@ EXPORT_SYMBOL_GPL(mlx4_cq_resize); int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, - int collapsed) + unsigned vector, int collapsed) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cq_table *cq_table = &priv->cq_table; @@ -198,6 +198,11 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, u64 mtt_addr; int err; + if (vector >= dev->caps.num_comp_vectors) + return -EINVAL; + + cq->vector = vector; + cq->cqn = mlx4_bitmap_alloc(&cq_table->bitmap); if (cq->cqn == -1) return -ENOMEM; @@ -227,7 +232,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, cq_context->flags = cpu_to_be32(!!collapsed << 18); cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index); - cq_context->comp_eqn = priv->eq_table.eq[MLX4_EQ_COMP].eqn; + cq_context->comp_eqn = priv->eq_table.eq[vector].eqn; cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; mtt_addr = mlx4_mtt_addr(dev, mtt); @@ -276,7 +281,7 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq) if (err) mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn); - synchronize_irq(priv->eq_table.eq[MLX4_EQ_COMP].irq); + synchronize_irq(priv->eq_table.eq[cq->vector].irq); spin_lock_irq(&cq_table->lock); radix_tree_delete(&cq_table->tree, cq->cqn); diff --git a/drivers/net/mlx4/en_cq.c b/drivers/net/mlx4/en_cq.c index 1368a8010af..674f836e225 100644 --- a/drivers/net/mlx4/en_cq.c +++ b/drivers/net/mlx4/en_cq.c @@ -51,10 +51,13 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, int err; cq->size = entries; - if (mode == RX) + if (mode == RX) { cq->buf_size = cq->size * sizeof(struct mlx4_cqe); - else + cq->vector = ring % mdev->dev->caps.num_comp_vectors; + } else { cq->buf_size = sizeof(struct mlx4_cqe); + cq->vector = 0; + } cq->ring = ring; cq->is_tx = mode; @@ -86,7 +89,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) memset(cq->buf, 0, cq->buf_size); err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar, - cq->wqres.db.dma, &cq->mcq, cq->is_tx); + cq->wqres.db.dma, &cq->mcq, cq->vector, cq->is_tx); if (err) return err; diff --git a/drivers/net/mlx4/en_main.c b/drivers/net/mlx4/en_main.c index 4b9794e97a7..c1c05852a95 100644 --- a/drivers/net/mlx4/en_main.c +++ b/drivers/net/mlx4/en_main.c @@ -170,9 +170,9 @@ static void *mlx4_en_add(struct mlx4_dev *dev) mlx4_info(mdev, "Using %d tx rings for port:%d\n", mdev->profile.prof[i].tx_ring_num, i); if (!mdev->profile.prof[i].rx_ring_num) { - mdev->profile.prof[i].rx_ring_num = 1; + mdev->profile.prof[i].rx_ring_num = dev->caps.num_comp_vectors; mlx4_info(mdev, "Defaulting to %d rx rings for port:%d\n", - 1, i); + mdev->profile.prof[i].rx_ring_num, i); } else mlx4_info(mdev, "Using %d rx rings for port:%d\n", mdev->profile.prof[i].rx_ring_num, i); diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c index de169338cd9..2c19bff7cba 100644 --- a/drivers/net/mlx4/eq.c +++ b/drivers/net/mlx4/eq.c @@ -243,10 +243,6 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) * least that often. */ if (unlikely(set_ci >= MLX4_NUM_SPARE_EQE)) { - /* - * Conditional on hca_type is OK here because - * this is a rare case, not the fast path. - */ eq_set_ci(eq, 0); set_ci = 0; } @@ -266,7 +262,7 @@ static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr) writel(priv->eq_table.clr_mask, priv->eq_table.clr_int); - for (i = 0; i < MLX4_NUM_EQ; ++i) + for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]); return IRQ_RETVAL(work); @@ -304,6 +300,17 @@ static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, MLX4_CMD_TIME_CLASS_A); } +static int mlx4_num_eq_uar(struct mlx4_dev *dev) +{ + /* + * Each UAR holds 4 EQ doorbells. To figure out how many UARs + * we need to map, take the difference of highest index and + * the lowest index we'll use and add 1. + */ + return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs) / 4 - + dev->caps.reserved_eqs / 4 + 1; +} + static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -483,9 +490,11 @@ static void mlx4_free_irqs(struct mlx4_dev *dev) if (eq_table->have_irq) free_irq(dev->pdev->irq, dev); - for (i = 0; i < MLX4_NUM_EQ; ++i) + for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) if (eq_table->eq[i].have_irq) free_irq(eq_table->eq[i].irq, eq_table->eq + i); + + kfree(eq_table->irq_names); } static int mlx4_map_clr_int(struct mlx4_dev *dev) @@ -551,57 +560,93 @@ void mlx4_unmap_eq_icm(struct mlx4_dev *dev) __free_page(priv->eq_table.icm_page); } +int mlx4_alloc_eq_table(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + priv->eq_table.eq = kcalloc(dev->caps.num_eqs - dev->caps.reserved_eqs, + sizeof *priv->eq_table.eq, GFP_KERNEL); + if (!priv->eq_table.eq) + return -ENOMEM; + + return 0; +} + +void mlx4_free_eq_table(struct mlx4_dev *dev) +{ + kfree(mlx4_priv(dev)->eq_table.eq); +} + int mlx4_init_eq_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int err; int i; + priv->eq_table.uar_map = kcalloc(sizeof *priv->eq_table.uar_map, + mlx4_num_eq_uar(dev), GFP_KERNEL); + if (!priv->eq_table.uar_map) { + err = -ENOMEM; + goto err_out_free; + } + err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs, dev->caps.num_eqs - 1, dev->caps.reserved_eqs, 0); if (err) - return err; + goto err_out_free; - for (i = 0; i < ARRAY_SIZE(priv->eq_table.uar_map); ++i) + for (i = 0; i < mlx4_num_eq_uar(dev); ++i) priv->eq_table.uar_map[i] = NULL; err = mlx4_map_clr_int(dev); if (err) - goto err_out_free; + goto err_out_bitmap; priv->eq_table.clr_mask = swab32(1 << (priv->eq_table.inta_pin & 31)); priv->eq_table.clr_int = priv->clr_base + (priv->eq_table.inta_pin < 32 ? 4 : 0); - err = mlx4_create_eq(dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE, - (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_COMP : 0, - &priv->eq_table.eq[MLX4_EQ_COMP]); - if (err) - goto err_out_unmap; + priv->eq_table.irq_names = kmalloc(16 * dev->caps.num_comp_vectors, GFP_KERNEL); + if (!priv->eq_table.irq_names) { + err = -ENOMEM; + goto err_out_bitmap; + } + + for (i = 0; i < dev->caps.num_comp_vectors; ++i) { + err = mlx4_create_eq(dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE, + (dev->flags & MLX4_FLAG_MSI_X) ? i : 0, + &priv->eq_table.eq[i]); + if (err) + goto err_out_unmap; + } err = mlx4_create_eq(dev, MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE, - (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_ASYNC : 0, - &priv->eq_table.eq[MLX4_EQ_ASYNC]); + (dev->flags & MLX4_FLAG_MSI_X) ? dev->caps.num_comp_vectors : 0, + &priv->eq_table.eq[dev->caps.num_comp_vectors]); if (err) goto err_out_comp; if (dev->flags & MLX4_FLAG_MSI_X) { - static const char *eq_name[] = { - [MLX4_EQ_COMP] = DRV_NAME " (comp)", - [MLX4_EQ_ASYNC] = DRV_NAME " (async)" - }; + static const char async_eq_name[] = "mlx4-async"; + const char *eq_name; + + for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) { + if (i < dev->caps.num_comp_vectors) { + snprintf(priv->eq_table.irq_names + i * 16, 16, + "mlx4-comp-%d", i); + eq_name = priv->eq_table.irq_names + i * 16; + } else + eq_name = async_eq_name; - for (i = 0; i < MLX4_NUM_EQ; ++i) { err = request_irq(priv->eq_table.eq[i].irq, - mlx4_msi_x_interrupt, - 0, eq_name[i], priv->eq_table.eq + i); + mlx4_msi_x_interrupt, 0, eq_name, + priv->eq_table.eq + i); if (err) goto err_out_async; priv->eq_table.eq[i].have_irq = 1; } - } else { err = request_irq(dev->pdev->irq, mlx4_interrupt, IRQF_SHARED, DRV_NAME, dev); @@ -612,28 +657,36 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) } err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0, - priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); + priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); if (err) mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n", - priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err); + priv->eq_table.eq[dev->caps.num_comp_vectors].eqn, err); - for (i = 0; i < MLX4_NUM_EQ; ++i) + for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) eq_set_ci(&priv->eq_table.eq[i], 1); return 0; err_out_async: - mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_ASYNC]); + mlx4_free_eq(dev, &priv->eq_table.eq[dev->caps.num_comp_vectors]); err_out_comp: - mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_COMP]); + i = dev->caps.num_comp_vectors - 1; err_out_unmap: + while (i >= 0) { + mlx4_free_eq(dev, &priv->eq_table.eq[i]); + --i; + } mlx4_unmap_clr_int(dev); mlx4_free_irqs(dev); -err_out_free: +err_out_bitmap: mlx4_bitmap_cleanup(&priv->eq_table.bitmap); + +err_out_free: + kfree(priv->eq_table.uar_map); + return err; } @@ -643,18 +696,20 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev) int i; mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 1, - priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); + priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); mlx4_free_irqs(dev); - for (i = 0; i < MLX4_NUM_EQ; ++i) + for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) mlx4_free_eq(dev, &priv->eq_table.eq[i]); mlx4_unmap_clr_int(dev); - for (i = 0; i < ARRAY_SIZE(priv->eq_table.uar_map); ++i) + for (i = 0; i < mlx4_num_eq_uar(dev); ++i) if (priv->eq_table.uar_map[i]) iounmap(priv->eq_table.uar_map[i]); mlx4_bitmap_cleanup(&priv->eq_table.bitmap); + + kfree(priv->eq_table.uar_map); } diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 90a0281d15e..710c79e7a2d 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -421,9 +421,7 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base, ((u64) (MLX4_CMPT_TYPE_EQ * cmpt_entry_sz) << MLX4_CMPT_SHIFT), cmpt_entry_sz, - roundup_pow_of_two(MLX4_NUM_EQ + - dev->caps.reserved_eqs), - MLX4_NUM_EQ + dev->caps.reserved_eqs, 0, 0); + dev->caps.num_eqs, dev->caps.num_eqs, 0, 0); if (err) goto err_cq; @@ -810,12 +808,12 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) if (dev->flags & MLX4_FLAG_MSI_X) { mlx4_warn(dev, "NOP command failed to generate MSI-X " "interrupt IRQ %d).\n", - priv->eq_table.eq[MLX4_EQ_ASYNC].irq); + priv->eq_table.eq[dev->caps.num_comp_vectors].irq); mlx4_warn(dev, "Trying again without MSI-X.\n"); } else { mlx4_err(dev, "NOP command failed to generate interrupt " "(IRQ %d), aborting.\n", - priv->eq_table.eq[MLX4_EQ_ASYNC].irq); + priv->eq_table.eq[dev->caps.num_comp_vectors].irq); mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n"); } @@ -908,31 +906,50 @@ err_uar_table_free: static void mlx4_enable_msi_x(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - struct msix_entry entries[MLX4_NUM_EQ]; + struct msix_entry *entries; + int nreq; int err; int i; if (msi_x) { - for (i = 0; i < MLX4_NUM_EQ; ++i) + nreq = min(dev->caps.num_eqs - dev->caps.reserved_eqs, + num_possible_cpus() + 1); + entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL); + if (!entries) + goto no_msi; + + for (i = 0; i < nreq; ++i) entries[i].entry = i; - err = pci_enable_msix(dev->pdev, entries, ARRAY_SIZE(entries)); + retry: + err = pci_enable_msix(dev->pdev, entries, nreq); if (err) { - if (err > 0) - mlx4_info(dev, "Only %d MSI-X vectors available, " - "not using MSI-X\n", err); + /* Try again if at least 2 vectors are available */ + if (err > 1) { + mlx4_info(dev, "Requested %d vectors, " + "but only %d MSI-X vectors available, " + "trying again\n", nreq, err); + nreq = err; + goto retry; + } + goto no_msi; } - for (i = 0; i < MLX4_NUM_EQ; ++i) + dev->caps.num_comp_vectors = nreq - 1; + for (i = 0; i < nreq; ++i) priv->eq_table.eq[i].irq = entries[i].vector; dev->flags |= MLX4_FLAG_MSI_X; + + kfree(entries); return; } no_msi: - for (i = 0; i < MLX4_NUM_EQ; ++i) + dev->caps.num_comp_vectors = 1; + + for (i = 0; i < 2; ++i) priv->eq_table.eq[i].irq = dev->pdev->irq; } @@ -1074,6 +1091,10 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto err_cmd; + err = mlx4_alloc_eq_table(dev); + if (err) + goto err_close; + mlx4_enable_msi_x(dev); err = mlx4_setup_hca(dev); @@ -1084,7 +1105,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) } if (err) - goto err_close; + goto err_free_eq; for (port = 1; port <= dev->caps.num_ports; port++) { err = mlx4_init_port_info(dev, port); @@ -1114,6 +1135,9 @@ err_port: mlx4_cleanup_pd_table(dev); mlx4_cleanup_uar_table(dev); +err_free_eq: + mlx4_free_eq_table(dev); + err_close: if (dev->flags & MLX4_FLAG_MSI_X) pci_disable_msix(pdev); @@ -1177,6 +1201,7 @@ static void mlx4_remove_one(struct pci_dev *pdev) iounmap(priv->kar); mlx4_uar_free(dev, &priv->driver_uar); mlx4_cleanup_uar_table(dev); + mlx4_free_eq_table(dev); mlx4_close_hca(dev); mlx4_cmd_cleanup(dev); diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index 34c909deaff..e0213bad61c 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -63,12 +63,6 @@ enum { }; enum { - MLX4_EQ_ASYNC, - MLX4_EQ_COMP, - MLX4_NUM_EQ -}; - -enum { MLX4_NUM_PDS = 1 << 15 }; @@ -205,10 +199,11 @@ struct mlx4_cq_table { struct mlx4_eq_table { struct mlx4_bitmap bitmap; + char *irq_names; void __iomem *clr_int; - void __iomem *uar_map[(MLX4_NUM_EQ + 6) / 4]; + void __iomem **uar_map; u32 clr_mask; - struct mlx4_eq eq[MLX4_NUM_EQ]; + struct mlx4_eq *eq; u64 icm_virt; struct page *icm_page; dma_addr_t icm_dma; @@ -328,6 +323,9 @@ void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap); int mlx4_reset(struct mlx4_dev *dev); +int mlx4_alloc_eq_table(struct mlx4_dev *dev); +void mlx4_free_eq_table(struct mlx4_dev *dev); + int mlx4_init_pd_table(struct mlx4_dev *dev); int mlx4_init_uar_table(struct mlx4_dev *dev); int mlx4_init_mr_table(struct mlx4_dev *dev); diff --git a/drivers/net/mlx4/profile.c b/drivers/net/mlx4/profile.c index 9ca42b213d5..919fb9eb1b6 100644 --- a/drivers/net/mlx4/profile.c +++ b/drivers/net/mlx4/profile.c @@ -107,7 +107,9 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, profile[MLX4_RES_AUXC].num = request->num_qp; profile[MLX4_RES_SRQ].num = request->num_srq; profile[MLX4_RES_CQ].num = request->num_cq; - profile[MLX4_RES_EQ].num = MLX4_NUM_EQ + dev_cap->reserved_eqs; + profile[MLX4_RES_EQ].num = min(dev_cap->max_eqs, + dev_cap->reserved_eqs + + num_possible_cpus() + 1); profile[MLX4_RES_DMPT].num = request->num_mpt; profile[MLX4_RES_CMPT].num = MLX4_NUM_CMPTS; profile[MLX4_RES_MTT].num = request->num_mtt; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 371086fd946..8f659cc2996 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -206,6 +206,7 @@ struct mlx4_caps { int reserved_cqs; int num_eqs; int reserved_eqs; + int num_comp_vectors; int num_mpts; int num_mtt_segs; int fmr_reserved_mtts; @@ -328,6 +329,7 @@ struct mlx4_cq { int arm_sn; int cqn; + unsigned vector; atomic_t refcount; struct completion free; @@ -437,7 +439,7 @@ void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres, int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, - int collapsed); + unsigned vector, int collapsed); void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base); |