diff options
Diffstat (limited to 'drivers/infiniband')
59 files changed, 5225 insertions, 2063 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 325d502e25c..bdf0891a92d 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -33,4 +33,6 @@ source "drivers/infiniband/hw/mthca/Kconfig" source "drivers/infiniband/ulp/ipoib/Kconfig" +source "drivers/infiniband/ulp/srp/Kconfig" + endmenu diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile index d256cf79821..a43fb34cca9 100644 --- a/drivers/infiniband/Makefile +++ b/drivers/infiniband/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_INFINIBAND) += core/ obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/ obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ +obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 5ac86f566dc..34b724afd28 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -37,58 +37,44 @@ * $Id: agent.c 1389 2004-12-27 22:56:47Z roland $ */ -#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/string.h> -#include <asm/bug.h> +#include "agent.h" +#include "smi.h" -#include <rdma/ib_smi.h> +#define SPFX "ib_agent: " -#include "smi.h" -#include "agent_priv.h" -#include "mad_priv.h" -#include "agent.h" +struct ib_agent_port_private { + struct list_head port_list; + struct ib_mad_agent *agent[2]; +}; -spinlock_t ib_agent_port_list_lock; +static DEFINE_SPINLOCK(ib_agent_port_list_lock); static LIST_HEAD(ib_agent_port_list); -/* - * Caller must hold ib_agent_port_list_lock - */ -static inline struct ib_agent_port_private * -__ib_get_agent_port(struct ib_device *device, int port_num, - struct ib_mad_agent *mad_agent) +static struct ib_agent_port_private * +__ib_get_agent_port(struct ib_device *device, int port_num) { struct ib_agent_port_private *entry; - BUG_ON(!(!!device ^ !!mad_agent)); /* Exactly one MUST be (!NULL) */ - - if (device) { - list_for_each_entry(entry, &ib_agent_port_list, port_list) { - if (entry->smp_agent->device == device && - entry->port_num == port_num) - return entry; - } - } else { - list_for_each_entry(entry, &ib_agent_port_list, port_list) { - if ((entry->smp_agent == mad_agent) || - (entry->perf_mgmt_agent == mad_agent)) - return entry; - } + list_for_each_entry(entry, &ib_agent_port_list, port_list) { + if (entry->agent[0]->device == device && + entry->agent[0]->port_num == port_num) + return entry; } return NULL; } -static inline struct ib_agent_port_private * -ib_get_agent_port(struct ib_device *device, int port_num, - struct ib_mad_agent *mad_agent) +static struct ib_agent_port_private * +ib_get_agent_port(struct ib_device *device, int port_num) { struct ib_agent_port_private *entry; unsigned long flags; spin_lock_irqsave(&ib_agent_port_list_lock, flags); - entry = __ib_get_agent_port(device, port_num, mad_agent); + entry = __ib_get_agent_port(device, port_num); spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); - return entry; } @@ -100,226 +86,102 @@ int smi_check_local_dr_smp(struct ib_smp *smp, if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) return 1; - port_priv = ib_get_agent_port(device, port_num, NULL); + + port_priv = ib_get_agent_port(device, port_num); if (!port_priv) { printk(KERN_DEBUG SPFX "smi_check_local_dr_smp %s port %d " - "not open\n", - device->name, port_num); + "not open\n", device->name, port_num); return 1; } - return smi_check_local_smp(port_priv->smp_agent, smp); + return smi_check_local_smp(port_priv->agent[0], smp); } -static int agent_mad_send(struct ib_mad_agent *mad_agent, - struct ib_agent_port_private *port_priv, - struct ib_mad_private *mad_priv, - struct ib_grh *grh, - struct ib_wc *wc) +int agent_send_response(struct ib_mad *mad, struct ib_grh *grh, + struct ib_wc *wc, struct ib_device *device, + int port_num, int qpn) { - struct ib_agent_send_wr *agent_send_wr; - struct ib_sge gather_list; - struct ib_send_wr send_wr; - struct ib_send_wr *bad_send_wr; - struct ib_ah_attr ah_attr; - unsigned long flags; - int ret = 1; - - agent_send_wr = kmalloc(sizeof(*agent_send_wr), GFP_KERNEL); - if (!agent_send_wr) - goto out; - agent_send_wr->mad = mad_priv; - - gather_list.addr = dma_map_single(mad_agent->device->dma_device, - &mad_priv->mad, - sizeof(mad_priv->mad), - DMA_TO_DEVICE); - gather_list.length = sizeof(mad_priv->mad); - gather_list.lkey = mad_agent->mr->lkey; - - send_wr.next = NULL; - send_wr.opcode = IB_WR_SEND; - send_wr.sg_list = &gather_list; - send_wr.num_sge = 1; - send_wr.wr.ud.remote_qpn = wc->src_qp; /* DQPN */ - send_wr.wr.ud.timeout_ms = 0; - send_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED; + struct ib_agent_port_private *port_priv; + struct ib_mad_agent *agent; + struct ib_mad_send_buf *send_buf; + struct ib_ah *ah; + int ret; - ah_attr.dlid = wc->slid; - ah_attr.port_num = mad_agent->port_num; - ah_attr.src_path_bits = wc->dlid_path_bits; - ah_attr.sl = wc->sl; - ah_attr.static_rate = 0; - ah_attr.ah_flags = 0; /* No GRH */ - if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) { - if (wc->wc_flags & IB_WC_GRH) { - ah_attr.ah_flags = IB_AH_GRH; - /* Should sgid be looked up ? */ - ah_attr.grh.sgid_index = 0; - ah_attr.grh.hop_limit = grh->hop_limit; - ah_attr.grh.flow_label = be32_to_cpu( - grh->version_tclass_flow) & 0xfffff; - ah_attr.grh.traffic_class = (be32_to_cpu( - grh->version_tclass_flow) >> 20) & 0xff; - memcpy(ah_attr.grh.dgid.raw, - grh->sgid.raw, - sizeof(ah_attr.grh.dgid)); - } + port_priv = ib_get_agent_port(device, port_num); + if (!port_priv) { + printk(KERN_ERR SPFX "Unable to find port agent\n"); + return -ENODEV; } - agent_send_wr->ah = ib_create_ah(mad_agent->qp->pd, &ah_attr); - if (IS_ERR(agent_send_wr->ah)) { - printk(KERN_ERR SPFX "No memory for address handle\n"); - kfree(agent_send_wr); - goto out; + agent = port_priv->agent[qpn]; + ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num); + if (IS_ERR(ah)) { + ret = PTR_ERR(ah); + printk(KERN_ERR SPFX "ib_create_ah_from_wc error:%d\n", ret); + return ret; } - send_wr.wr.ud.ah = agent_send_wr->ah; - if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) { - send_wr.wr.ud.pkey_index = wc->pkey_index; - send_wr.wr.ud.remote_qkey = IB_QP1_QKEY; - } else { /* for SMPs */ - send_wr.wr.ud.pkey_index = 0; - send_wr.wr.ud.remote_qkey = 0; + send_buf = ib_create_send_mad(agent, wc->src_qp, wc->pkey_index, 0, + IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, + GFP_KERNEL); + if (IS_ERR(send_buf)) { + ret = PTR_ERR(send_buf); + printk(KERN_ERR SPFX "ib_create_send_mad error:%d\n", ret); + goto err1; } - send_wr.wr.ud.mad_hdr = &mad_priv->mad.mad.mad_hdr; - send_wr.wr_id = (unsigned long)agent_send_wr; - - pci_unmap_addr_set(agent_send_wr, mapping, gather_list.addr); - /* Send */ - spin_lock_irqsave(&port_priv->send_list_lock, flags); - if (ib_post_send_mad(mad_agent, &send_wr, &bad_send_wr)) { - spin_unlock_irqrestore(&port_priv->send_list_lock, flags); - dma_unmap_single(mad_agent->device->dma_device, - pci_unmap_addr(agent_send_wr, mapping), - sizeof(mad_priv->mad), - DMA_TO_DEVICE); - ib_destroy_ah(agent_send_wr->ah); - kfree(agent_send_wr); - } else { - list_add_tail(&agent_send_wr->send_list, - &port_priv->send_posted_list); - spin_unlock_irqrestore(&port_priv->send_list_lock, flags); - ret = 0; + memcpy(send_buf->mad, mad, sizeof *mad); + send_buf->ah = ah; + if ((ret = ib_post_send_mad(send_buf, NULL))) { + printk(KERN_ERR SPFX "ib_post_send_mad error:%d\n", ret); + goto err2; } - -out: + return 0; +err2: + ib_free_send_mad(send_buf); +err1: + ib_destroy_ah(ah); return ret; } -int agent_send(struct ib_mad_private *mad, - struct ib_grh *grh, - struct ib_wc *wc, - struct ib_device *device, - int port_num) -{ - struct ib_agent_port_private *port_priv; - struct ib_mad_agent *mad_agent; - - port_priv = ib_get_agent_port(device, port_num, NULL); - if (!port_priv) { - printk(KERN_DEBUG SPFX "agent_send %s port %d not open\n", - device->name, port_num); - return 1; - } - - /* Get mad agent based on mgmt_class in MAD */ - switch (mad->mad.mad.mad_hdr.mgmt_class) { - case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: - case IB_MGMT_CLASS_SUBN_LID_ROUTED: - mad_agent = port_priv->smp_agent; - break; - case IB_MGMT_CLASS_PERF_MGMT: - mad_agent = port_priv->perf_mgmt_agent; - break; - default: - return 1; - } - - return agent_mad_send(mad_agent, port_priv, mad, grh, wc); -} - static void agent_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { - struct ib_agent_port_private *port_priv; - struct ib_agent_send_wr *agent_send_wr; - unsigned long flags; - - /* Find matching MAD agent */ - port_priv = ib_get_agent_port(NULL, 0, mad_agent); - if (!port_priv) { - printk(KERN_ERR SPFX "agent_send_handler: no matching MAD " - "agent %p\n", mad_agent); - return; - } - - agent_send_wr = (struct ib_agent_send_wr *)(unsigned long)mad_send_wc->wr_id; - spin_lock_irqsave(&port_priv->send_list_lock, flags); - /* Remove completed send from posted send MAD list */ - list_del(&agent_send_wr->send_list); - spin_unlock_irqrestore(&port_priv->send_list_lock, flags); - - dma_unmap_single(mad_agent->device->dma_device, - pci_unmap_addr(agent_send_wr, mapping), - sizeof(agent_send_wr->mad->mad), - DMA_TO_DEVICE); - - ib_destroy_ah(agent_send_wr->ah); - - /* Release allocated memory */ - kmem_cache_free(ib_mad_cache, agent_send_wr->mad); - kfree(agent_send_wr); + ib_destroy_ah(mad_send_wc->send_buf->ah); + ib_free_send_mad(mad_send_wc->send_buf); } int ib_agent_port_open(struct ib_device *device, int port_num) { - int ret; struct ib_agent_port_private *port_priv; unsigned long flags; - - /* First, check if port already open for SMI */ - port_priv = ib_get_agent_port(device, port_num, NULL); - if (port_priv) { - printk(KERN_DEBUG SPFX "%s port %d already open\n", - device->name, port_num); - return 0; - } + int ret; /* Create new device info */ - port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL); + port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); if (!port_priv) { printk(KERN_ERR SPFX "No memory for ib_agent_port_private\n"); ret = -ENOMEM; goto error1; } - memset(port_priv, 0, sizeof *port_priv); - port_priv->port_num = port_num; - spin_lock_init(&port_priv->send_list_lock); - INIT_LIST_HEAD(&port_priv->send_posted_list); - - /* Obtain send only MAD agent for SM class (SMI QP) */ - port_priv->smp_agent = ib_register_mad_agent(device, port_num, - IB_QPT_SMI, - NULL, 0, + /* Obtain send only MAD agent for SMI QP */ + port_priv->agent[0] = ib_register_mad_agent(device, port_num, + IB_QPT_SMI, NULL, 0, &agent_send_handler, - NULL, NULL); - - if (IS_ERR(port_priv->smp_agent)) { - ret = PTR_ERR(port_priv->smp_agent); + NULL, NULL); + if (IS_ERR(port_priv->agent[0])) { + ret = PTR_ERR(port_priv->agent[0]); goto error2; } - /* Obtain send only MAD agent for PerfMgmt class (GSI QP) */ - port_priv->perf_mgmt_agent = ib_register_mad_agent(device, port_num, - IB_QPT_GSI, - NULL, 0, - &agent_send_handler, - NULL, NULL); - if (IS_ERR(port_priv->perf_mgmt_agent)) { - ret = PTR_ERR(port_priv->perf_mgmt_agent); + /* Obtain send only MAD agent for GSI QP */ + port_priv->agent[1] = ib_register_mad_agent(device, port_num, + IB_QPT_GSI, NULL, 0, + &agent_send_handler, + NULL, NULL); + if (IS_ERR(port_priv->agent[1])) { + ret = PTR_ERR(port_priv->agent[1]); goto error3; } @@ -330,7 +192,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num) return 0; error3: - ib_unregister_mad_agent(port_priv->smp_agent); + ib_unregister_mad_agent(port_priv->agent[0]); error2: kfree(port_priv); error1: @@ -343,7 +205,7 @@ int ib_agent_port_close(struct ib_device *device, int port_num) unsigned long flags; spin_lock_irqsave(&ib_agent_port_list_lock, flags); - port_priv = __ib_get_agent_port(device, port_num, NULL); + port_priv = __ib_get_agent_port(device, port_num); if (port_priv == NULL) { spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); printk(KERN_ERR SPFX "Port %d not found\n", port_num); @@ -352,9 +214,8 @@ int ib_agent_port_close(struct ib_device *device, int port_num) list_del(&port_priv->port_list); spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); - ib_unregister_mad_agent(port_priv->perf_mgmt_agent); - ib_unregister_mad_agent(port_priv->smp_agent); + ib_unregister_mad_agent(port_priv->agent[1]); + ib_unregister_mad_agent(port_priv->agent[0]); kfree(port_priv); - return 0; } diff --git a/drivers/infiniband/core/agent.h b/drivers/infiniband/core/agent.h index d9426842254..86d72fab37b 100644 --- a/drivers/infiniband/core/agent.h +++ b/drivers/infiniband/core/agent.h @@ -39,17 +39,15 @@ #ifndef __AGENT_H_ #define __AGENT_H_ -extern spinlock_t ib_agent_port_list_lock; +#include <linux/err.h> +#include <rdma/ib_mad.h> -extern int ib_agent_port_open(struct ib_device *device, - int port_num); +extern int ib_agent_port_open(struct ib_device *device, int port_num); extern int ib_agent_port_close(struct ib_device *device, int port_num); -extern int agent_send(struct ib_mad_private *mad, - struct ib_grh *grh, - struct ib_wc *wc, - struct ib_device *device, - int port_num); +extern int agent_send_response(struct ib_mad *mad, struct ib_grh *grh, + struct ib_wc *wc, struct ib_device *device, + int port_num, int qpn); #endif /* __AGENT_H_ */ diff --git a/drivers/infiniband/core/agent_priv.h b/drivers/infiniband/core/agent_priv.h deleted file mode 100644 index 2ec6d7f1b7d..00000000000 --- a/drivers/infiniband/core/agent_priv.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved. - * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved. - * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. - * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. - * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * $Id: agent_priv.h 1640 2005-01-24 22:39:02Z halr $ - */ - -#ifndef __IB_AGENT_PRIV_H__ -#define __IB_AGENT_PRIV_H__ - -#include <linux/pci.h> - -#define SPFX "ib_agent: " - -struct ib_agent_send_wr { - struct list_head send_list; - struct ib_ah *ah; - struct ib_mad_private *mad; - DECLARE_PCI_UNMAP_ADDR(mapping) -}; - -struct ib_agent_port_private { - struct list_head port_list; - struct list_head send_posted_list; - spinlock_t send_list_lock; - int port_num; - struct ib_mad_agent *smp_agent; /* SM class */ - struct ib_mad_agent *perf_mgmt_agent; /* PerfMgmt class */ -}; - -#endif /* __IB_AGENT_PRIV_H__ */ diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index f014e639088..c57a3871184 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -38,6 +38,7 @@ #include <linux/module.h> #include <linux/errno.h> #include <linux/slab.h> +#include <linux/sched.h> /* INIT_WORK, schedule_work(), flush_scheduled_work() */ #include <rdma/ib_cache.h> diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 54db6d4831f..02110e00d14 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -135,6 +135,7 @@ struct cm_id_private { __be64 tid; __be32 local_qpn; __be32 remote_qpn; + enum ib_qp_type qp_type; __be32 sq_psn; __be32 rq_psn; int timeout_ms; @@ -175,8 +176,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, cm_id_priv->av.pkey_index, - ah, 0, sizeof(struct ib_mad_hdr), - sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr), + 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC); if (IS_ERR(m)) { ib_destroy_ah(ah); @@ -184,7 +184,8 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, } /* Timeout set by caller if response is expected. */ - m->send_wr.wr.ud.retries = cm_id_priv->max_cm_retries; + m->ah = ah; + m->retries = cm_id_priv->max_cm_retries; atomic_inc(&cm_id_priv->refcount); m->context[0] = cm_id_priv; @@ -205,20 +206,20 @@ static int cm_alloc_response_msg(struct cm_port *port, return PTR_ERR(ah); m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index, - ah, 0, sizeof(struct ib_mad_hdr), - sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr), + 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC); if (IS_ERR(m)) { ib_destroy_ah(ah); return PTR_ERR(m); } + m->ah = ah; *msg = m; return 0; } static void cm_free_msg(struct ib_mad_send_buf *msg) { - ib_destroy_ah(msg->send_wr.wr.ud.ah); + ib_destroy_ah(msg->ah); if (msg->context[0]) cm_deref_id(msg->context[0]); ib_free_send_mad(msg); @@ -366,9 +367,15 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) cur_cm_id_priv = rb_entry(parent, struct cm_id_private, service_node); if ((cur_cm_id_priv->id.service_mask & service_id) == - (service_mask & cur_cm_id_priv->id.service_id)) - return cm_id_priv; - if (service_id < cur_cm_id_priv->id.service_id) + (service_mask & cur_cm_id_priv->id.service_id) && + (cm_id_priv->id.device == cur_cm_id_priv->id.device)) + return cur_cm_id_priv; + + if (cm_id_priv->id.device < cur_cm_id_priv->id.device) + link = &(*link)->rb_left; + else if (cm_id_priv->id.device > cur_cm_id_priv->id.device) + link = &(*link)->rb_right; + else if (service_id < cur_cm_id_priv->id.service_id) link = &(*link)->rb_left; else link = &(*link)->rb_right; @@ -378,7 +385,8 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) return NULL; } -static struct cm_id_private * cm_find_listen(__be64 service_id) +static struct cm_id_private * cm_find_listen(struct ib_device *device, + __be64 service_id) { struct rb_node *node = cm.listen_service_table.rb_node; struct cm_id_private *cm_id_priv; @@ -386,9 +394,15 @@ static struct cm_id_private * cm_find_listen(__be64 service_id) while (node) { cm_id_priv = rb_entry(node, struct cm_id_private, service_node); if ((cm_id_priv->id.service_mask & service_id) == - (cm_id_priv->id.service_mask & cm_id_priv->id.service_id)) + cm_id_priv->id.service_id && + (cm_id_priv->id.device == device)) return cm_id_priv; - if (service_id < cm_id_priv->id.service_id) + + if (device < cm_id_priv->id.device) + node = node->rb_left; + else if (device > cm_id_priv->id.device) + node = node->rb_right; + else if (service_id < cm_id_priv->id.service_id) node = node->rb_left; else node = node->rb_right; @@ -523,18 +537,19 @@ static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv, ib_send_cm_sidr_rep(&cm_id_priv->id, ¶m); } -struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler, +struct ib_cm_id *ib_create_cm_id(struct ib_device *device, + ib_cm_handler cm_handler, void *context) { struct cm_id_private *cm_id_priv; int ret; - cm_id_priv = kmalloc(sizeof *cm_id_priv, GFP_KERNEL); + cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL); if (!cm_id_priv) return ERR_PTR(-ENOMEM); - memset(cm_id_priv, 0, sizeof *cm_id_priv); cm_id_priv->id.state = IB_CM_IDLE; + cm_id_priv->id.device = device; cm_id_priv->id.cm_handler = cm_handler; cm_id_priv->id.context = context; cm_id_priv->id.remote_cm_qpn = 1; @@ -605,10 +620,9 @@ static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id) { struct cm_timewait_info *timewait_info; - timewait_info = kmalloc(sizeof *timewait_info, GFP_KERNEL); + timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL); if (!timewait_info) return ERR_PTR(-ENOMEM); - memset(timewait_info, 0, sizeof *timewait_info); timewait_info->work.local_id = local_id; INIT_WORK(&timewait_info->work.work, cm_work_handler, @@ -662,8 +676,7 @@ retest: break; case IB_CM_SIDR_REQ_SENT: cm_id->state = IB_CM_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irqrestore(&cm_id_priv->lock, flags); break; case IB_CM_SIDR_REQ_RCVD: @@ -674,8 +687,7 @@ retest: case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); /* Fall through */ case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: @@ -692,8 +704,7 @@ retest: ib_send_cm_dreq(cm_id, NULL, 0); goto retest; case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); cm_enter_timewait(cm_id_priv); spin_unlock_irqrestore(&cm_id_priv->lock, flags); break; @@ -867,7 +878,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, struct ib_cm_req_param *param) { struct cm_id_private *cm_id_priv; - struct ib_send_wr *bad_send_wr; struct cm_req_msg *req_msg; unsigned long flags; int ret; @@ -911,6 +921,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_id_priv->responder_resources = param->responder_resources; cm_id_priv->retry_count = param->retry_count; cm_id_priv->path_mtu = param->primary_path->mtu; + cm_id_priv->qp_type = param->qp_type; ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg); if (ret) @@ -919,7 +930,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad; cm_format_req(req_msg, cm_id_priv, param); cm_id_priv->tid = req_msg->hdr.tid; - cm_id_priv->msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms; cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT; cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg); @@ -928,8 +939,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_req_get_primary_local_ack_timeout(req_msg); spin_lock_irqsave(&cm_id_priv->lock, flags); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &cm_id_priv->msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(cm_id_priv->msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); goto error2; @@ -952,7 +962,6 @@ static int cm_issue_rej(struct cm_port *port, void *ari, u8 ari_length) { struct ib_mad_send_buf *msg = NULL; - struct ib_send_wr *bad_send_wr; struct cm_rej_msg *rej_msg, *rcv_msg; int ret; @@ -975,7 +984,7 @@ static int cm_issue_rej(struct cm_port *port, memcpy(rej_msg->ari, ari, ari_length); } - ret = ib_post_send_mad(port->mad_agent, &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) cm_free_msg(msg); @@ -1047,7 +1056,6 @@ static void cm_format_req_event(struct cm_work *work, req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.req_rcvd; param->listen_id = listen_id; - param->device = cm_id_priv->av.port->mad_agent->device; param->port = cm_id_priv->av.port->port_num; param->primary_path = &work->path[0]; if (req_msg->alt_local_lid) @@ -1156,7 +1164,6 @@ static void cm_dup_req_handler(struct cm_work *work, struct cm_id_private *cm_id_priv) { struct ib_mad_send_buf *msg = NULL; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1185,8 +1192,7 @@ static void cm_dup_req_handler(struct cm_work *work, } spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, - &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) goto free; return; @@ -1226,7 +1232,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, } /* Find matching listen request. */ - listen_cm_id_priv = cm_find_listen(req_msg->service_id); + listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device, + req_msg->service_id); if (!listen_cm_id_priv) { spin_unlock_irqrestore(&cm.lock, flags); cm_issue_rej(work->port, work->mad_recv_wc, @@ -1254,7 +1261,7 @@ static int cm_req_handler(struct cm_work *work) req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id = ib_create_cm_id(NULL, NULL); + cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); @@ -1305,6 +1312,7 @@ static int cm_req_handler(struct cm_work *work) cm_req_get_primary_local_ack_timeout(req_msg); cm_id_priv->retry_count = cm_req_get_retry_count(req_msg); cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); + cm_id_priv->qp_type = cm_req_get_qp_type(req_msg); cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id); cm_process_work(cm_id_priv, work); @@ -1349,7 +1357,6 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; struct cm_rep_msg *rep_msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1371,11 +1378,10 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, rep_msg = (struct cm_rep_msg *) msg->mad; cm_format_rep(rep_msg, cm_id_priv, param); - msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT; - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -1413,7 +1419,6 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; void *data; int ret; @@ -1440,8 +1445,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id, cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, private_data, private_data_len); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -1486,7 +1490,6 @@ static void cm_dup_rep_handler(struct cm_work *work) struct cm_id_private *cm_id_priv; struct cm_rep_msg *rep_msg; struct ib_mad_send_buf *msg = NULL; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1514,8 +1517,7 @@ static void cm_dup_rep_handler(struct cm_work *work) goto unlock; spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, - &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) goto free; goto deref; @@ -1583,8 +1585,7 @@ static int cm_rep_handler(struct cm_work *work) /* todo: handle peer_to_peer */ - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -1618,8 +1619,7 @@ static int cm_establish_handler(struct cm_work *work) goto out; } - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -1658,8 +1658,7 @@ static int cm_rtu_handler(struct cm_work *work) } cm_id_priv->id.state = IB_CM_ESTABLISHED; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -1696,7 +1695,6 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1718,11 +1716,10 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id, cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, private_data, private_data_len); - msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT; - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) { cm_enter_timewait(cm_id_priv); spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -1756,7 +1753,6 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; void *data; int ret; @@ -1786,8 +1782,7 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id, cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, private_data, private_data_len); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, - &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -1804,7 +1799,6 @@ static int cm_dreq_handler(struct cm_work *work) struct cm_id_private *cm_id_priv; struct cm_dreq_msg *dreq_msg; struct ib_mad_send_buf *msg = NULL; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1823,8 +1817,7 @@ static int cm_dreq_handler(struct cm_work *work) switch (cm_id_priv->id.state) { case IB_CM_REP_SENT: case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); break; case IB_CM_ESTABLISHED: case IB_CM_MRA_REP_RCVD: @@ -1838,8 +1831,7 @@ static int cm_dreq_handler(struct cm_work *work) cm_id_priv->private_data_len); spin_unlock_irqrestore(&cm_id_priv->lock, flags); - if (ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr)) + if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); goto deref; default: @@ -1886,8 +1878,7 @@ static int cm_drep_handler(struct cm_work *work) } cm_enter_timewait(cm_id_priv); - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -1912,7 +1903,6 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1956,8 +1946,7 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id, if (ret) goto out; - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) cm_free_msg(msg); @@ -2033,8 +2022,7 @@ static int cm_rej_handler(struct cm_work *work) case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); /* fall through */ case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: @@ -2044,8 +2032,7 @@ static int cm_rej_handler(struct cm_work *work) cm_reset_to_idle(cm_id_priv); break; case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); /* fall through */ case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: @@ -2080,7 +2067,6 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; void *data; unsigned long flags; int ret; @@ -2104,8 +2090,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, CM_MSG_RESPONSE_REQ, service_timeout, private_data, private_data_len); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) goto error2; cm_id->state = IB_CM_MRA_REQ_SENT; @@ -2118,8 +2103,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, CM_MSG_RESPONSE_REP, service_timeout, private_data, private_data_len); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) goto error2; cm_id->state = IB_CM_MRA_REP_SENT; @@ -2132,8 +2116,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, CM_MSG_RESPONSE_OTHER, service_timeout, private_data, private_data_len); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) goto error2; cm_id->lap_state = IB_CM_MRA_LAP_SENT; @@ -2195,14 +2178,14 @@ static int cm_mra_handler(struct cm_work *work) case IB_CM_REQ_SENT: if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ || ib_modify_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg, timeout)) + cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD; break; case IB_CM_REP_SENT: if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP || ib_modify_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg, timeout)) + cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REP_RCVD; break; @@ -2210,7 +2193,7 @@ static int cm_mra_handler(struct cm_work *work) if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER || cm_id_priv->id.lap_state != IB_CM_LAP_SENT || ib_modify_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg, timeout)) + cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD; break; @@ -2273,7 +2256,6 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -2294,11 +2276,10 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id, cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv, alternate_path, private_data, private_data_len); - msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED; - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -2342,7 +2323,6 @@ static int cm_lap_handler(struct cm_work *work) struct cm_lap_msg *lap_msg; struct ib_cm_lap_event_param *param; struct ib_mad_send_buf *msg = NULL; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -2376,8 +2356,7 @@ static int cm_lap_handler(struct cm_work *work) cm_id_priv->private_data_len); spin_unlock_irqrestore(&cm_id_priv->lock, flags); - if (ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr)) + if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); goto deref; default: @@ -2433,7 +2412,6 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -2456,8 +2434,7 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id, cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status, info, info_length, private_data, private_data_len); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -2496,8 +2473,7 @@ static int cm_apr_handler(struct cm_work *work) goto out; } cm_id_priv->id.lap_state = IB_CM_LAP_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); cm_id_priv->msg = NULL; ret = atomic_inc_and_test(&cm_id_priv->work_count); @@ -2572,7 +2548,6 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -2595,13 +2570,12 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv, param); - msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT; spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state == IB_CM_IDLE) - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); else ret = -EINVAL; @@ -2629,7 +2603,6 @@ static void cm_format_sidr_req_event(struct cm_work *work, param = &work->cm_event.param.sidr_req_rcvd; param->pkey = __be16_to_cpu(sidr_req_msg->pkey); param->listen_id = listen_id; - param->device = work->port->mad_agent->device; param->port = work->port->port_num; work->cm_event.private_data = &sidr_req_msg->private_data; } @@ -2642,7 +2615,7 @@ static int cm_sidr_req_handler(struct cm_work *work) struct ib_wc *wc; unsigned long flags; - cm_id = ib_create_cm_id(NULL, NULL); + cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); cm_id_priv = container_of(cm_id, struct cm_id_private, id); @@ -2666,7 +2639,8 @@ static int cm_sidr_req_handler(struct cm_work *work) spin_unlock_irqrestore(&cm.lock, flags); goto out; /* Duplicate message. */ } - cur_cm_id_priv = cm_find_listen(sidr_req_msg->service_id); + cur_cm_id_priv = cm_find_listen(cm_id->device, + sidr_req_msg->service_id); if (!cur_cm_id_priv) { rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); spin_unlock_irqrestore(&cm.lock, flags); @@ -2715,7 +2689,6 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -2737,8 +2710,7 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv, param); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -2791,8 +2763,7 @@ static int cm_sidr_rep_handler(struct cm_work *work) goto out; } cm_id_priv->id.state = IB_CM_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_format_sidr_rep_event(work); @@ -2860,9 +2831,7 @@ discard: static void cm_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { - struct ib_mad_send_buf *msg; - - msg = (struct ib_mad_send_buf *)(unsigned long)mad_send_wc->wr_id; + struct ib_mad_send_buf *msg = mad_send_wc->send_buf; switch (mad_send_wc->status) { case IB_WC_SUCCESS: @@ -3064,10 +3033,10 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, case IB_CM_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; - qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; + qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE; if (cm_id_priv->responder_resources) - qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ; + qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ; qp_attr->pkey_index = cm_id_priv->av.pkey_index; qp_attr->port_num = cm_id_priv->av.port->port_num; ret = 0; @@ -3097,14 +3066,18 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, case IB_CM_MRA_REP_RCVD: case IB_CM_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | - IB_QP_DEST_QPN | IB_QP_RQ_PSN | - IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; + IB_QP_DEST_QPN | IB_QP_RQ_PSN; qp_attr->ah_attr = cm_id_priv->av.ah_attr; qp_attr->path_mtu = cm_id_priv->path_mtu; qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); - qp_attr->max_dest_rd_atomic = cm_id_priv->responder_resources; - qp_attr->min_rnr_timer = 0; + if (cm_id_priv->qp_type == IB_QPT_RC) { + *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_MIN_RNR_TIMER; + qp_attr->max_dest_rd_atomic = + cm_id_priv->responder_resources; + qp_attr->min_rnr_timer = 0; + } if (cm_id_priv->alt_av.ah_attr.dlid) { *qp_attr_mask |= IB_QP_ALT_PATH; qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; @@ -3133,14 +3106,17 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: case IB_CM_ESTABLISHED: - *qp_attr_mask = IB_QP_STATE | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | - IB_QP_RNR_RETRY | IB_QP_SQ_PSN | - IB_QP_MAX_QP_RD_ATOMIC; - qp_attr->timeout = cm_id_priv->local_ack_timeout; - qp_attr->retry_cnt = cm_id_priv->retry_count; - qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; + *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN; qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); - qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; + if (cm_id_priv->qp_type == IB_QPT_RC) { + *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | + IB_QP_MAX_QP_RD_ATOMIC; + qp_attr->timeout = cm_id_priv->local_ack_timeout; + qp_attr->retry_cnt = cm_id_priv->retry_count; + qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; + qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; + } if (cm_id_priv->alt_av.ah_attr.dlid) { *qp_attr_mask |= IB_QP_PATH_MIG_STATE; qp_attr->path_mig_state = IB_MIG_REARM; @@ -3323,6 +3299,7 @@ static void __exit ib_cm_cleanup(void) flush_workqueue(cm.wq); destroy_workqueue(cm.wq); ib_unregister_client(&cm_client); + idr_destroy(&cm.local_id_table); } module_init(ib_cm_init); diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 813ab70bf6d..4d3aee90c24 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -186,6 +186,7 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg, req_msg->offset40 = cpu_to_be32((be32_to_cpu( req_msg->offset40) & 0xFFFFFFF9) | 0x2); + break; default: req_msg->offset40 = cpu_to_be32(be32_to_cpu( req_msg->offset40) & diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index d3cf84e0158..e169e798354 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -161,17 +161,9 @@ static int alloc_name(char *name) */ struct ib_device *ib_alloc_device(size_t size) { - void *dev; - BUG_ON(size < sizeof (struct ib_device)); - dev = kmalloc(size, GFP_KERNEL); - if (!dev) - return NULL; - - memset(dev, 0, size); - - return dev; + return kzalloc(size, GFP_KERNEL); } EXPORT_SYMBOL(ib_alloc_device); @@ -514,6 +506,12 @@ int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr) { + if (device->node_type == IB_NODE_SWITCH) { + if (port_num) + return -EINVAL; + } else if (port_num < 1 || port_num > device->phys_port_cnt) + return -EINVAL; + return device->query_port(device, port_num, port_attr); } EXPORT_SYMBOL(ib_query_port); @@ -583,6 +581,12 @@ int ib_modify_port(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify) { + if (device->node_type == IB_NODE_SWITCH) { + if (port_num) + return -EINVAL; + } else if (port_num < 1 || port_num > device->phys_port_cnt) + return -EINVAL; + return device->modify_port(device, port_num, port_modify_mask, port_modify); } diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index a4a4d9c1eef..d393b504bf2 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -255,12 +255,11 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, } /* Allocate structures */ - mad_agent_priv = kmalloc(sizeof *mad_agent_priv, GFP_KERNEL); + mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL); if (!mad_agent_priv) { ret = ERR_PTR(-ENOMEM); goto error1; } - memset(mad_agent_priv, 0, sizeof *mad_agent_priv); mad_agent_priv->agent.mr = ib_get_dma_mr(port_priv->qp_info[qpn].qp->pd, IB_ACCESS_LOCAL_WRITE); @@ -356,9 +355,9 @@ error4: spin_unlock_irqrestore(&port_priv->reg_lock, flags); kfree(reg_req); error3: - kfree(mad_agent_priv); -error2: ib_dereg_mr(mad_agent_priv->agent.mr); +error2: + kfree(mad_agent_priv); error1: return ret; } @@ -448,14 +447,13 @@ struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device, goto error1; } /* Allocate structures */ - mad_snoop_priv = kmalloc(sizeof *mad_snoop_priv, GFP_KERNEL); + mad_snoop_priv = kzalloc(sizeof *mad_snoop_priv, GFP_KERNEL); if (!mad_snoop_priv) { ret = ERR_PTR(-ENOMEM); goto error1; } /* Now, fill in the various structures */ - memset(mad_snoop_priv, 0, sizeof *mad_snoop_priv); mad_snoop_priv->qp_info = &port_priv->qp_info[qpn]; mad_snoop_priv->agent.device = device; mad_snoop_priv->agent.recv_handler = recv_handler; @@ -510,8 +508,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) wait_event(mad_agent_priv->wait, !atomic_read(&mad_agent_priv->refcount)); - if (mad_agent_priv->reg_req) - kfree(mad_agent_priv->reg_req); + kfree(mad_agent_priv->reg_req); ib_dereg_mr(mad_agent_priv->agent.mr); kfree(mad_agent_priv); } @@ -579,7 +576,7 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list) } static void snoop_send(struct ib_mad_qp_info *qp_info, - struct ib_send_wr *send_wr, + struct ib_mad_send_buf *send_buf, struct ib_mad_send_wc *mad_send_wc, int mad_snoop_flags) { @@ -597,7 +594,7 @@ static void snoop_send(struct ib_mad_qp_info *qp_info, atomic_inc(&mad_snoop_priv->refcount); spin_unlock_irqrestore(&qp_info->snoop_lock, flags); mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent, - send_wr, mad_send_wc); + send_buf, mad_send_wc); if (atomic_dec_and_test(&mad_snoop_priv->refcount)) wake_up(&mad_snoop_priv->wait); spin_lock_irqsave(&qp_info->snoop_lock, flags); @@ -654,10 +651,10 @@ static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num, * Return < 0 if error */ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, - struct ib_smp *smp, - struct ib_send_wr *send_wr) + struct ib_mad_send_wr_private *mad_send_wr) { int ret; + struct ib_smp *smp = mad_send_wr->send_buf.mad; unsigned long flags; struct ib_mad_local_private *local; struct ib_mad_private *mad_priv; @@ -666,6 +663,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, struct ib_device *device = mad_agent_priv->agent.device; u8 port_num = mad_agent_priv->agent.port_num; struct ib_wc mad_wc; + struct ib_send_wr *send_wr = &mad_send_wr->send_wr; if (!smi_handle_dr_smp_send(smp, device->node_type, port_num)) { ret = -EINVAL; @@ -745,13 +743,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, goto out; } - local->send_wr = *send_wr; - local->send_wr.sg_list = local->sg_list; - memcpy(local->sg_list, send_wr->sg_list, - sizeof *send_wr->sg_list * send_wr->num_sge); - local->send_wr.next = NULL; - local->tid = send_wr->wr.ud.mad_hdr->tid; - local->wr_id = send_wr->wr_id; + local->mad_send_wr = mad_send_wr; /* Reference MAD agent until send side of local completion handled */ atomic_inc(&mad_agent_priv->refcount); /* Queue local completion to local list */ @@ -781,17 +773,17 @@ static int get_buf_length(int hdr_len, int data_len) struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, - struct ib_ah *ah, int rmpp_active, + int rmpp_active, int hdr_len, int data_len, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_send_buf *send_buf; + struct ib_mad_send_wr_private *mad_send_wr; int buf_size; void *buf; - mad_agent_priv = container_of(mad_agent, - struct ib_mad_agent_private, agent); + mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, + agent); buf_size = get_buf_length(hdr_len, data_len); if ((!mad_agent->rmpp_version && @@ -799,45 +791,39 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, (!rmpp_active && buf_size > sizeof(struct ib_mad))) return ERR_PTR(-EINVAL); - buf = kmalloc(sizeof *send_buf + buf_size, gfp_mask); + buf = kzalloc(sizeof *mad_send_wr + buf_size, gfp_mask); if (!buf) return ERR_PTR(-ENOMEM); - memset(buf, 0, sizeof *send_buf + buf_size); - - send_buf = buf + buf_size; - send_buf->mad = buf; - - send_buf->sge.addr = dma_map_single(mad_agent->device->dma_device, - buf, buf_size, DMA_TO_DEVICE); - pci_unmap_addr_set(send_buf, mapping, send_buf->sge.addr); - send_buf->sge.length = buf_size; - send_buf->sge.lkey = mad_agent->mr->lkey; - - send_buf->send_wr.wr_id = (unsigned long) send_buf; - send_buf->send_wr.sg_list = &send_buf->sge; - send_buf->send_wr.num_sge = 1; - send_buf->send_wr.opcode = IB_WR_SEND; - send_buf->send_wr.send_flags = IB_SEND_SIGNALED; - send_buf->send_wr.wr.ud.ah = ah; - send_buf->send_wr.wr.ud.mad_hdr = &send_buf->mad->mad_hdr; - send_buf->send_wr.wr.ud.remote_qpn = remote_qpn; - send_buf->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; - send_buf->send_wr.wr.ud.pkey_index = pkey_index; + + mad_send_wr = buf + buf_size; + mad_send_wr->send_buf.mad = buf; + + mad_send_wr->mad_agent_priv = mad_agent_priv; + mad_send_wr->sg_list[0].length = buf_size; + mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey; + + mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr; + mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; + mad_send_wr->send_wr.num_sge = 1; + mad_send_wr->send_wr.opcode = IB_WR_SEND; + mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED; + mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn; + mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; + mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index; if (rmpp_active) { - struct ib_rmpp_mad *rmpp_mad; - rmpp_mad = (struct ib_rmpp_mad *)send_buf->mad; + struct ib_rmpp_mad *rmpp_mad = mad_send_wr->send_buf.mad; rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len - - offsetof(struct ib_rmpp_mad, data) + data_len); + IB_MGMT_RMPP_HDR + data_len); rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version; rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA; ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); } - send_buf->mad_agent = mad_agent; + mad_send_wr->send_buf.mad_agent = mad_agent; atomic_inc(&mad_agent_priv->refcount); - return send_buf; + return &mad_send_wr->send_buf; } EXPORT_SYMBOL(ib_create_send_mad); @@ -847,10 +833,6 @@ void ib_free_send_mad(struct ib_mad_send_buf *send_buf) mad_agent_priv = container_of(send_buf->mad_agent, struct ib_mad_agent_private, agent); - - dma_unmap_single(send_buf->mad_agent->device->dma_device, - pci_unmap_addr(send_buf, mapping), - send_buf->sge.length, DMA_TO_DEVICE); kfree(send_buf->mad); if (atomic_dec_and_test(&mad_agent_priv->refcount)) @@ -861,8 +843,10 @@ EXPORT_SYMBOL(ib_free_send_mad); int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_qp_info *qp_info; - struct ib_send_wr *bad_send_wr; struct list_head *list; + struct ib_send_wr *bad_send_wr; + struct ib_mad_agent *mad_agent; + struct ib_sge *sge; unsigned long flags; int ret; @@ -871,10 +855,17 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list; mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; + mad_agent = mad_send_wr->send_buf.mad_agent; + sge = mad_send_wr->sg_list; + sge->addr = dma_map_single(mad_agent->device->dma_device, + mad_send_wr->send_buf.mad, sge->length, + DMA_TO_DEVICE); + pci_unmap_addr_set(mad_send_wr, mapping, sge->addr); + spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { - ret = ib_post_send(mad_send_wr->mad_agent_priv->agent.qp, - &mad_send_wr->send_wr, &bad_send_wr); + ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr, + &bad_send_wr); list = &qp_info->send_queue.list; } else { ret = 0; @@ -886,6 +877,11 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) list_add_tail(&mad_send_wr->mad_list.list, list); } spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); + if (ret) + dma_unmap_single(mad_agent->device->dma_device, + pci_unmap_addr(mad_send_wr, mapping), + sge->length, DMA_TO_DEVICE); + return ret; } @@ -893,45 +889,28 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated * with the registered client */ -int ib_post_send_mad(struct ib_mad_agent *mad_agent, - struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr) +int ib_post_send_mad(struct ib_mad_send_buf *send_buf, + struct ib_mad_send_buf **bad_send_buf) { - int ret = -EINVAL; struct ib_mad_agent_private *mad_agent_priv; - - /* Validate supplied parameters */ - if (!bad_send_wr) - goto error1; - - if (!mad_agent || !send_wr) - goto error2; - - if (!mad_agent->send_handler) - goto error2; - - mad_agent_priv = container_of(mad_agent, - struct ib_mad_agent_private, - agent); + struct ib_mad_send_buf *next_send_buf; + struct ib_mad_send_wr_private *mad_send_wr; + unsigned long flags; + int ret = -EINVAL; /* Walk list of send WRs and post each on send list */ - while (send_wr) { - unsigned long flags; - struct ib_send_wr *next_send_wr; - struct ib_mad_send_wr_private *mad_send_wr; - struct ib_smp *smp; - - /* Validate more parameters */ - if (send_wr->num_sge > IB_MAD_SEND_REQ_MAX_SG) - goto error2; + for (; send_buf; send_buf = next_send_buf) { - if (send_wr->wr.ud.timeout_ms && !mad_agent->recv_handler) - goto error2; - - if (!send_wr->wr.ud.mad_hdr) { - printk(KERN_ERR PFX "MAD header must be supplied " - "in WR %p\n", send_wr); - goto error2; + mad_send_wr = container_of(send_buf, + struct ib_mad_send_wr_private, + send_buf); + mad_agent_priv = mad_send_wr->mad_agent_priv; + + if (!send_buf->mad_agent->send_handler || + (send_buf->timeout_ms && + !send_buf->mad_agent->recv_handler)) { + ret = -EINVAL; + goto error; } /* @@ -939,40 +918,24 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent, * current one completes, and the user modifies the work * request associated with the completion */ - next_send_wr = (struct ib_send_wr *)send_wr->next; + next_send_buf = send_buf->next; + mad_send_wr->send_wr.wr.ud.ah = send_buf->ah; - smp = (struct ib_smp *)send_wr->wr.ud.mad_hdr; - if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - ret = handle_outgoing_dr_smp(mad_agent_priv, smp, - send_wr); + if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class == + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + ret = handle_outgoing_dr_smp(mad_agent_priv, + mad_send_wr); if (ret < 0) /* error */ - goto error2; + goto error; else if (ret == 1) /* locally consumed */ - goto next; + continue; } - /* Allocate MAD send WR tracking structure */ - mad_send_wr = kmalloc(sizeof *mad_send_wr, GFP_ATOMIC); - if (!mad_send_wr) { - printk(KERN_ERR PFX "No memory for " - "ib_mad_send_wr_private\n"); - ret = -ENOMEM; - goto error2; - } - memset(mad_send_wr, 0, sizeof *mad_send_wr); - - mad_send_wr->send_wr = *send_wr; - mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; - memcpy(mad_send_wr->sg_list, send_wr->sg_list, - sizeof *send_wr->sg_list * send_wr->num_sge); - mad_send_wr->wr_id = send_wr->wr_id; - mad_send_wr->tid = send_wr->wr.ud.mad_hdr->tid; - mad_send_wr->mad_agent_priv = mad_agent_priv; + mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid; /* Timeout will be updated after send completes */ - mad_send_wr->timeout = msecs_to_jiffies(send_wr->wr. - ud.timeout_ms); - mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries; - /* One reference for each work request to QP + response */ + mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms); + mad_send_wr->retries = send_buf->retries; + /* Reference for work request to QP + response */ mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); mad_send_wr->status = IB_WC_SUCCESS; @@ -995,16 +958,13 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent, list_del(&mad_send_wr->agent_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); atomic_dec(&mad_agent_priv->refcount); - goto error2; + goto error; } -next: - send_wr = next_send_wr; } return 0; - -error2: - *bad_send_wr = send_wr; -error1: +error: + if (bad_send_buf) + *bad_send_buf = send_buf; return ret; } EXPORT_SYMBOL(ib_post_send_mad); @@ -1075,14 +1035,12 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method, static int allocate_method_table(struct ib_mad_mgmt_method_table **method) { /* Allocate management method table */ - *method = kmalloc(sizeof **method, GFP_ATOMIC); + *method = kzalloc(sizeof **method, GFP_ATOMIC); if (!*method) { printk(KERN_ERR PFX "No memory for " "ib_mad_mgmt_method_table\n"); return -ENOMEM; } - /* Clear management method table */ - memset(*method, 0, sizeof **method); return 0; } @@ -1173,15 +1131,14 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, class = &port_priv->version[mad_reg_req->mgmt_class_version].class; if (!*class) { /* Allocate management class table for "new" class version */ - *class = kmalloc(sizeof **class, GFP_ATOMIC); + *class = kzalloc(sizeof **class, GFP_ATOMIC); if (!*class) { printk(KERN_ERR PFX "No memory for " "ib_mad_mgmt_class_table\n"); ret = -ENOMEM; goto error1; } - /* Clear management class table */ - memset(*class, 0, sizeof(**class)); + /* Allocate method table for this management class */ method = &(*class)->method_table[mgmt_class]; if ((ret = allocate_method_table(method))) @@ -1245,25 +1202,24 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, mad_reg_req->mgmt_class_version].vendor; if (!*vendor_table) { /* Allocate mgmt vendor class table for "new" class version */ - vendor = kmalloc(sizeof *vendor, GFP_ATOMIC); + vendor = kzalloc(sizeof *vendor, GFP_ATOMIC); if (!vendor) { printk(KERN_ERR PFX "No memory for " "ib_mad_mgmt_vendor_class_table\n"); goto error1; } - /* Clear management vendor class table */ - memset(vendor, 0, sizeof(*vendor)); + *vendor_table = vendor; } if (!(*vendor_table)->vendor_class[vclass]) { /* Allocate table for this management vendor class */ - vendor_class = kmalloc(sizeof *vendor_class, GFP_ATOMIC); + vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC); if (!vendor_class) { printk(KERN_ERR PFX "No memory for " "ib_mad_mgmt_vendor_class\n"); goto error2; } - memset(vendor_class, 0, sizeof(*vendor_class)); + (*vendor_table)->vendor_class[vclass] = vendor_class; } for (i = 0; i < MAX_MGMT_OUI; i++) { @@ -1447,8 +1403,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, * of MAD. */ hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32; - list_for_each_entry(entry, &port_priv->agent_list, - agent_list) { + list_for_each_entry(entry, &port_priv->agent_list, agent_list) { if (entry->agent.hi_tid == hi_tid) { mad_agent = entry; break; @@ -1571,8 +1526,7 @@ ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid) */ list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, agent_list) { - if (is_data_mad(mad_agent_priv, - mad_send_wr->send_wr.wr.ud.mad_hdr) && + if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && mad_send_wr->tid == tid && mad_send_wr->timeout) { /* Verify request has not been canceled */ return (mad_send_wr->status == IB_WC_SUCCESS) ? @@ -1628,14 +1582,14 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, spin_unlock_irqrestore(&mad_agent_priv->lock, flags); /* Defined behavior is to complete response before request */ - mad_recv_wc->wc->wr_id = mad_send_wr->wr_id; + mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf; mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, mad_recv_wc); atomic_dec(&mad_agent_priv->refcount); mad_send_wc.status = IB_WC_SUCCESS; mad_send_wc.vendor_err = 0; - mad_send_wc.wr_id = mad_send_wr->wr_id; + mad_send_wc.send_buf = &mad_send_wr->send_buf; ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); } else { mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, @@ -1728,11 +1682,11 @@ local: if (ret & IB_MAD_RESULT_CONSUMED) goto out; if (ret & IB_MAD_RESULT_REPLY) { - /* Send response */ - if (!agent_send(response, &recv->grh, wc, - port_priv->device, - port_priv->port_num)) - response = NULL; + agent_send_response(&response->mad.mad, + &recv->grh, wc, + port_priv->device, + port_priv->port_num, + qp_info->qp->qp_num); goto out; } } @@ -1866,15 +1820,15 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, if (mad_send_wr->status != IB_WC_SUCCESS ) mad_send_wc->status = mad_send_wr->status; - if (ret != IB_RMPP_RESULT_INTERNAL) + if (ret == IB_RMPP_RESULT_INTERNAL) + ib_rmpp_send_handler(mad_send_wc); + else mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, mad_send_wc); /* Release reference on agent taken when sending */ if (atomic_dec_and_test(&mad_agent_priv->refcount)) wake_up(&mad_agent_priv->wait); - - kfree(mad_send_wr); return; done: spin_unlock_irqrestore(&mad_agent_priv->lock, flags); @@ -1888,6 +1842,7 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv, struct ib_mad_qp_info *qp_info; struct ib_mad_queue *send_queue; struct ib_send_wr *bad_send_wr; + struct ib_mad_send_wc mad_send_wc; unsigned long flags; int ret; @@ -1898,6 +1853,9 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv, qp_info = send_queue->qp_info; retry: + dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device, + pci_unmap_addr(mad_send_wr, mapping), + mad_send_wr->sg_list[0].length, DMA_TO_DEVICE); queued_send_wr = NULL; spin_lock_irqsave(&send_queue->lock, flags); list_del(&mad_list->list); @@ -1914,17 +1872,17 @@ retry: } spin_unlock_irqrestore(&send_queue->lock, flags); - /* Restore client wr_id in WC and complete send */ - wc->wr_id = mad_send_wr->wr_id; + mad_send_wc.send_buf = &mad_send_wr->send_buf; + mad_send_wc.status = wc->status; + mad_send_wc.vendor_err = wc->vendor_err; if (atomic_read(&qp_info->snoop_count)) - snoop_send(qp_info, &mad_send_wr->send_wr, - (struct ib_mad_send_wc *)wc, + snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS); - ib_mad_complete_send_wr(mad_send_wr, (struct ib_mad_send_wc *)wc); + ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); if (queued_send_wr) { ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr, - &bad_send_wr); + &bad_send_wr); if (ret) { printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret); mad_send_wr = queued_send_wr; @@ -2066,38 +2024,37 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, &cancel_list, agent_list) { - mad_send_wc.wr_id = mad_send_wr->wr_id; + mad_send_wc.send_buf = &mad_send_wr->send_buf; + list_del(&mad_send_wr->agent_list); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); - - list_del(&mad_send_wr->agent_list); - kfree(mad_send_wr); atomic_dec(&mad_agent_priv->refcount); } } static struct ib_mad_send_wr_private* -find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv, u64 wr_id) +find_send_wr(struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_send_buf *send_buf) { struct ib_mad_send_wr_private *mad_send_wr; list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list, agent_list) { - if (mad_send_wr->wr_id == wr_id) + if (&mad_send_wr->send_buf == send_buf) return mad_send_wr; } list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, agent_list) { - if (is_data_mad(mad_agent_priv, - mad_send_wr->send_wr.wr.ud.mad_hdr) && - mad_send_wr->wr_id == wr_id) + if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && + &mad_send_wr->send_buf == send_buf) return mad_send_wr; } return NULL; } -int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms) +int ib_modify_mad(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf, u32 timeout_ms) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; @@ -2107,7 +2064,7 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms) mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); spin_lock_irqsave(&mad_agent_priv->lock, flags); - mad_send_wr = find_send_by_wr_id(mad_agent_priv, wr_id); + mad_send_wr = find_send_wr(mad_agent_priv, send_buf); if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); return -EINVAL; @@ -2119,7 +2076,7 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms) mad_send_wr->refcount -= (mad_send_wr->timeout > 0); } - mad_send_wr->send_wr.wr.ud.timeout_ms = timeout_ms; + mad_send_wr->send_buf.timeout_ms = timeout_ms; if (active) mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); else @@ -2130,9 +2087,10 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms) } EXPORT_SYMBOL(ib_modify_mad); -void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id) +void ib_cancel_mad(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf) { - ib_modify_mad(mad_agent, wr_id, 0); + ib_modify_mad(mad_agent, send_buf, 0); } EXPORT_SYMBOL(ib_cancel_mad); @@ -2166,10 +2124,9 @@ static void local_completions(void *data) * Defined behavior is to complete response * before request */ - build_smp_wc(local->wr_id, + build_smp_wc((unsigned long) local->mad_send_wr, be16_to_cpu(IB_LID_PERMISSIVE), - 0 /* pkey index */, - recv_mad_agent->agent.port_num, &wc); + 0, recv_mad_agent->agent.port_num, &wc); local->mad_priv->header.recv_wc.wc = &wc; local->mad_priv->header.recv_wc.mad_len = @@ -2196,11 +2153,11 @@ local_send_completion: /* Complete send */ mad_send_wc.status = IB_WC_SUCCESS; mad_send_wc.vendor_err = 0; - mad_send_wc.wr_id = local->wr_id; + mad_send_wc.send_buf = &local->mad_send_wr->send_buf; if (atomic_read(&mad_agent_priv->qp_info->snoop_count)) - snoop_send(mad_agent_priv->qp_info, &local->send_wr, - &mad_send_wc, - IB_MAD_SNOOP_SEND_COMPLETIONS); + snoop_send(mad_agent_priv->qp_info, + &local->mad_send_wr->send_buf, + &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); @@ -2221,8 +2178,7 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) if (!mad_send_wr->retries--) return -ETIMEDOUT; - mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_wr. - wr.ud.timeout_ms); + mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); if (mad_send_wr->mad_agent_priv->agent.rmpp_version) { ret = ib_retry_rmpp(mad_send_wr); @@ -2285,11 +2241,10 @@ static void timeout_sends(void *data) mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR; else mad_send_wc.status = mad_send_wr->status; - mad_send_wc.wr_id = mad_send_wr->wr_id; + mad_send_wc.send_buf = &mad_send_wr->send_buf; mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); - kfree(mad_send_wr); atomic_dec(&mad_agent_priv->refcount); spin_lock_irqsave(&mad_agent_priv->lock, flags); } @@ -2544,8 +2499,7 @@ error: static void destroy_mad_qp(struct ib_mad_qp_info *qp_info) { ib_destroy_qp(qp_info->qp); - if (qp_info->snoop_table) - kfree(qp_info->snoop_table); + kfree(qp_info->snoop_table); } /* @@ -2561,12 +2515,12 @@ static int ib_mad_port_open(struct ib_device *device, char name[sizeof "ib_mad123"]; /* Create new device info */ - port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL); + port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); if (!port_priv) { printk(KERN_ERR PFX "No memory for ib_mad_port_private\n"); return -ENOMEM; } - memset(port_priv, 0, sizeof *port_priv); + port_priv->device = device; port_priv->port_num = port_num; spin_lock_init(&port_priv->reg_lock); @@ -2683,40 +2637,47 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) static void ib_mad_init_device(struct ib_device *device) { - int num_ports, cur_port, i; + int start, end, i; if (device->node_type == IB_NODE_SWITCH) { - num_ports = 1; - cur_port = 0; + start = 0; + end = 0; } else { - num_ports = device->phys_port_cnt; - cur_port = 1; + start = 1; + end = device->phys_port_cnt; } - for (i = 0; i < num_ports; i++, cur_port++) { - if (ib_mad_port_open(device, cur_port)) { + + for (i = start; i <= end; i++) { + if (ib_mad_port_open(device, i)) { printk(KERN_ERR PFX "Couldn't open %s port %d\n", - device->name, cur_port); - goto error_device_open; + device->name, i); + goto error; } - if (ib_agent_port_open(device, cur_port)) { + if (ib_agent_port_open(device, i)) { printk(KERN_ERR PFX "Couldn't open %s port %d " "for agents\n", - device->name, cur_port); - goto error_device_open; + device->name, i); + goto error_agent; } } return; -error_device_open: - while (i > 0) { - cur_port--; - if (ib_agent_port_close(device, cur_port)) +error_agent: + if (ib_mad_port_close(device, i)) + printk(KERN_ERR PFX "Couldn't close %s port %d\n", + device->name, i); + +error: + i--; + + while (i >= start) { + if (ib_agent_port_close(device, i)) printk(KERN_ERR PFX "Couldn't close %s port %d " "for agents\n", - device->name, cur_port); - if (ib_mad_port_close(device, cur_port)) + device->name, i); + if (ib_mad_port_close(device, i)) printk(KERN_ERR PFX "Couldn't close %s port %d\n", - device->name, cur_port); + device->name, i); i--; } } @@ -2754,7 +2715,6 @@ static int __init ib_mad_init_module(void) int ret; spin_lock_init(&ib_mad_port_list_lock); - spin_lock_init(&ib_agent_port_list_lock); ib_mad_cache = kmem_cache_create("ib_mad", sizeof(struct ib_mad_private), diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index f1ba794e0da..570f78682af 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -118,9 +118,10 @@ struct ib_mad_send_wr_private { struct ib_mad_list_head mad_list; struct list_head agent_list; struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_buf send_buf; + DECLARE_PCI_UNMAP_ADDR(mapping) struct ib_send_wr send_wr; struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; - u64 wr_id; /* client WR ID */ __be64 tid; unsigned long timeout; int retries; @@ -141,10 +142,7 @@ struct ib_mad_local_private { struct list_head completion_list; struct ib_mad_private *mad_priv; struct ib_mad_agent_private *recv_mad_agent; - struct ib_send_wr send_wr; - struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; - u64 wr_id; /* client WR ID */ - __be64 tid; + struct ib_mad_send_wr_private *mad_send_wr; }; struct ib_mad_mgmt_method_table { diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index e23836d0e21..3249e1d8c07 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -103,12 +103,12 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent) static int data_offset(u8 mgmt_class) { if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM) - return offsetof(struct ib_sa_mad, data); + return IB_MGMT_SA_HDR; else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) - return offsetof(struct ib_vendor_mad, data); + return IB_MGMT_VENDOR_HDR; else - return offsetof(struct ib_rmpp_mad, data); + return IB_MGMT_RMPP_HDR; } static void format_ack(struct ib_rmpp_mad *ack, @@ -135,55 +135,52 @@ static void ack_recv(struct mad_rmpp_recv *rmpp_recv, struct ib_mad_recv_wc *recv_wc) { struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; - int hdr_len, ret; + int ret; - hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr); msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp, - recv_wc->wc->pkey_index, rmpp_recv->ah, 1, - hdr_len, sizeof(struct ib_rmpp_mad) - hdr_len, - GFP_KERNEL); + recv_wc->wc->pkey_index, 1, IB_MGMT_RMPP_HDR, + IB_MGMT_RMPP_DATA, GFP_KERNEL); if (!msg) return; - format_ack((struct ib_rmpp_mad *) msg->mad, - (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv); - ret = ib_post_send_mad(&rmpp_recv->agent->agent, &msg->send_wr, - &bad_send_wr); + format_ack(msg->mad, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, + rmpp_recv); + msg->ah = rmpp_recv->ah; + ret = ib_post_send_mad(msg, NULL); if (ret) ib_free_send_mad(msg); } -static int alloc_response_msg(struct ib_mad_agent *agent, - struct ib_mad_recv_wc *recv_wc, - struct ib_mad_send_buf **msg) +static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent, + struct ib_mad_recv_wc *recv_wc) { - struct ib_mad_send_buf *m; + struct ib_mad_send_buf *msg; struct ib_ah *ah; - int hdr_len; ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc, recv_wc->recv_buf.grh, agent->port_num); if (IS_ERR(ah)) - return PTR_ERR(ah); - - hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr); - m = ib_create_send_mad(agent, recv_wc->wc->src_qp, - recv_wc->wc->pkey_index, ah, 1, hdr_len, - sizeof(struct ib_rmpp_mad) - hdr_len, - GFP_KERNEL); - if (IS_ERR(m)) { + return (void *) ah; + + msg = ib_create_send_mad(agent, recv_wc->wc->src_qp, + recv_wc->wc->pkey_index, 1, + IB_MGMT_RMPP_HDR, IB_MGMT_RMPP_DATA, + GFP_KERNEL); + if (IS_ERR(msg)) ib_destroy_ah(ah); - return PTR_ERR(m); - } - *msg = m; - return 0; + else + msg->ah = ah; + + return msg; } -static void free_msg(struct ib_mad_send_buf *msg) +void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc) { - ib_destroy_ah(msg->send_wr.wr.ud.ah); - ib_free_send_mad(msg); + struct ib_rmpp_mad *rmpp_mad = mad_send_wc->send_buf->mad; + + if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_ACK) + ib_destroy_ah(mad_send_wc->send_buf->ah); + ib_free_send_mad(mad_send_wc->send_buf); } static void nack_recv(struct ib_mad_agent_private *agent, @@ -191,14 +188,13 @@ static void nack_recv(struct ib_mad_agent_private *agent, { struct ib_mad_send_buf *msg; struct ib_rmpp_mad *rmpp_mad; - struct ib_send_wr *bad_send_wr; int ret; - ret = alloc_response_msg(&agent->agent, recv_wc, &msg); - if (ret) + msg = alloc_response_msg(&agent->agent, recv_wc); + if (IS_ERR(msg)) return; - rmpp_mad = (struct ib_rmpp_mad *) msg->mad; + rmpp_mad = msg->mad; memcpy(rmpp_mad, recv_wc->recv_buf.mad, data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class)); @@ -210,9 +206,11 @@ static void nack_recv(struct ib_mad_agent_private *agent, rmpp_mad->rmpp_hdr.seg_num = 0; rmpp_mad->rmpp_hdr.paylen_newwin = 0; - ret = ib_post_send_mad(&agent->agent, &msg->send_wr, &bad_send_wr); - if (ret) - free_msg(msg); + ret = ib_post_send_mad(msg, NULL); + if (ret) { + ib_destroy_ah(msg->ah); + ib_free_send_mad(msg); + } } static void recv_timeout_handler(void *data) @@ -585,7 +583,7 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) int timeout; u32 paylen; - rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; + rmpp_mad = mad_send_wr->send_buf.mad; ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(mad_send_wr->seg_num); @@ -612,7 +610,7 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) } /* 2 seconds for an ACK until we can find the packet lifetime */ - timeout = mad_send_wr->send_wr.wr.ud.timeout_ms; + timeout = mad_send_wr->send_buf.timeout_ms; if (!timeout || timeout > 2000) mad_send_wr->timeout = msecs_to_jiffies(2000); mad_send_wr->seg_num++; @@ -640,7 +638,7 @@ static void abort_send(struct ib_mad_agent_private *agent, __be64 tid, wc.status = IB_WC_REM_ABORT_ERR; wc.vendor_err = rmpp_status; - wc.wr_id = mad_send_wr->wr_id; + wc.send_buf = &mad_send_wr->send_buf; ib_mad_complete_send_wr(mad_send_wr, &wc); return; out: @@ -694,12 +692,12 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, if (seg_num > mad_send_wr->last_ack) { mad_send_wr->last_ack = seg_num; - mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries; + mad_send_wr->retries = mad_send_wr->send_buf.retries; } mad_send_wr->newwin = newwin; if (mad_send_wr->last_ack == mad_send_wr->total_seg) { /* If no response is expected, the ACK completes the send */ - if (!mad_send_wr->send_wr.wr.ud.timeout_ms) { + if (!mad_send_wr->send_buf.timeout_ms) { struct ib_mad_send_wc wc; ib_mark_mad_done(mad_send_wr); @@ -707,13 +705,13 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, wc.status = IB_WC_SUCCESS; wc.vendor_err = 0; - wc.wr_id = mad_send_wr->wr_id; + wc.send_buf = &mad_send_wr->send_buf; ib_mad_complete_send_wr(mad_send_wr, &wc); return; } if (mad_send_wr->refcount == 1) - ib_reset_mad_timeout(mad_send_wr, mad_send_wr-> - send_wr.wr.ud.timeout_ms); + ib_reset_mad_timeout(mad_send_wr, + mad_send_wr->send_buf.timeout_ms); } else if (mad_send_wr->refcount == 1 && mad_send_wr->seg_num < mad_send_wr->newwin && mad_send_wr->seg_num <= mad_send_wr->total_seg) { @@ -842,7 +840,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) struct ib_rmpp_mad *rmpp_mad; int i, total_len, ret; - rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; + rmpp_mad = mad_send_wr->send_buf.mad; if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) return IB_RMPP_RESULT_UNHANDLED; @@ -863,7 +861,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) mad_send_wr->total_seg = (total_len - mad_send_wr->data_offset) / (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset); - mad_send_wr->pad = total_len - offsetof(struct ib_rmpp_mad, data) - + mad_send_wr->pad = total_len - IB_MGMT_RMPP_HDR - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); /* We need to wait for the final ACK even if there isn't a response */ @@ -878,23 +876,15 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_send_wc *mad_send_wc) { struct ib_rmpp_mad *rmpp_mad; - struct ib_mad_send_buf *msg; int ret; - rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; + rmpp_mad = mad_send_wr->send_buf.mad; if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ - if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) { - msg = (struct ib_mad_send_buf *) (unsigned long) - mad_send_wc->wr_id; - if (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_ACK) - ib_free_send_mad(msg); - else - free_msg(msg); + if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) return IB_RMPP_RESULT_INTERNAL; /* ACK, STOP, or ABORT */ - } if (mad_send_wc->status != IB_WC_SUCCESS || mad_send_wr->status != IB_WC_SUCCESS) @@ -905,7 +895,7 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, if (mad_send_wr->last_ack == mad_send_wr->total_seg) { mad_send_wr->timeout = - msecs_to_jiffies(mad_send_wr->send_wr.wr.ud.timeout_ms); + msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); return IB_RMPP_RESULT_PROCESSED; /* Send done */ } @@ -926,7 +916,7 @@ int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr) struct ib_rmpp_mad *rmpp_mad; int ret; - rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; + rmpp_mad = mad_send_wr->send_buf.mad; if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ diff --git a/drivers/infiniband/core/mad_rmpp.h b/drivers/infiniband/core/mad_rmpp.h index c4924dfb8e7..f0616fd2249 100644 --- a/drivers/infiniband/core/mad_rmpp.h +++ b/drivers/infiniband/core/mad_rmpp.h @@ -51,6 +51,8 @@ ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent, int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_send_wc *mad_send_wc); +void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc); + void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent); int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr); diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c index 35df5010e72..c972d723576 100644 --- a/drivers/infiniband/core/packer.c +++ b/drivers/infiniband/core/packer.c @@ -33,6 +33,8 @@ * $Id: packer.c 1349 2004-12-16 21:09:43Z roland $ */ +#include <linux/string.h> + #include <rdma/ib_pack.h> static u64 value_read(int offset, int size, void *structure) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 78de2dd1a4f..acda7d63d6f 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -43,6 +43,7 @@ #include <linux/dma-mapping.h> #include <linux/kref.h> #include <linux/idr.h> +#include <linux/workqueue.h> #include <rdma/ib_pack.h> #include <rdma/ib_sa.h> @@ -73,11 +74,10 @@ struct ib_sa_device { struct ib_sa_query { void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *); void (*release)(struct ib_sa_query *); - struct ib_sa_port *port; - struct ib_sa_mad *mad; - struct ib_sa_sm_ah *sm_ah; - DECLARE_PCI_UNMAP_ADDR(mapping) - int id; + struct ib_sa_port *port; + struct ib_mad_send_buf *mad_buf; + struct ib_sa_sm_ah *sm_ah; + int id; }; struct ib_sa_service_query { @@ -426,6 +426,7 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query) { unsigned long flags; struct ib_mad_agent *agent; + struct ib_mad_send_buf *mad_buf; spin_lock_irqsave(&idr_lock, flags); if (idr_find(&query_idr, id) != query) { @@ -433,9 +434,10 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query) return; } agent = query->port->agent; + mad_buf = query->mad_buf; spin_unlock_irqrestore(&idr_lock, flags); - ib_cancel_mad(agent, id); + ib_cancel_mad(agent, mad_buf); } EXPORT_SYMBOL(ib_sa_cancel_query); @@ -457,71 +459,46 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) static int send_mad(struct ib_sa_query *query, int timeout_ms) { - struct ib_sa_port *port = query->port; unsigned long flags; - int ret; - struct ib_sge gather_list; - struct ib_send_wr *bad_wr, wr = { - .opcode = IB_WR_SEND, - .sg_list = &gather_list, - .num_sge = 1, - .send_flags = IB_SEND_SIGNALED, - .wr = { - .ud = { - .mad_hdr = &query->mad->mad_hdr, - .remote_qpn = 1, - .remote_qkey = IB_QP1_QKEY, - .timeout_ms = timeout_ms, - } - } - }; + int ret, id; retry: if (!idr_pre_get(&query_idr, GFP_ATOMIC)) return -ENOMEM; spin_lock_irqsave(&idr_lock, flags); - ret = idr_get_new(&query_idr, query, &query->id); + ret = idr_get_new(&query_idr, query, &id); spin_unlock_irqrestore(&idr_lock, flags); if (ret == -EAGAIN) goto retry; if (ret) return ret; - wr.wr_id = query->id; + query->mad_buf->timeout_ms = timeout_ms; + query->mad_buf->context[0] = query; + query->id = id; - spin_lock_irqsave(&port->ah_lock, flags); - kref_get(&port->sm_ah->ref); - query->sm_ah = port->sm_ah; - wr.wr.ud.ah = port->sm_ah->ah; - spin_unlock_irqrestore(&port->ah_lock, flags); + spin_lock_irqsave(&query->port->ah_lock, flags); + kref_get(&query->port->sm_ah->ref); + query->sm_ah = query->port->sm_ah; + spin_unlock_irqrestore(&query->port->ah_lock, flags); - gather_list.addr = dma_map_single(port->agent->device->dma_device, - query->mad, - sizeof (struct ib_sa_mad), - DMA_TO_DEVICE); - gather_list.length = sizeof (struct ib_sa_mad); - gather_list.lkey = port->agent->mr->lkey; - pci_unmap_addr_set(query, mapping, gather_list.addr); + query->mad_buf->ah = query->sm_ah->ah; - ret = ib_post_send_mad(port->agent, &wr, &bad_wr); + ret = ib_post_send_mad(query->mad_buf, NULL); if (ret) { - dma_unmap_single(port->agent->device->dma_device, - pci_unmap_addr(query, mapping), - sizeof (struct ib_sa_mad), - DMA_TO_DEVICE); - kref_put(&query->sm_ah->ref, free_sm_ah); spin_lock_irqsave(&idr_lock, flags); - idr_remove(&query_idr, query->id); + idr_remove(&query_idr, id); spin_unlock_irqrestore(&idr_lock, flags); + + kref_put(&query->sm_ah->ref, free_sm_ah); } /* * It's not safe to dereference query any more, because the * send may already have completed and freed the query in - * another context. So use wr.wr_id, which has a copy of the - * query's id. + * another context. */ - return ret ? ret : wr.wr_id; + return ret ? ret : id; } static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, @@ -543,7 +520,6 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) { - kfree(sa_query->mad); kfree(container_of(sa_query, struct ib_sa_path_query, sa_query)); } @@ -574,7 +550,7 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, struct ib_sa_path_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, unsigned int __nocast gfp_mask, + int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_path_rec *resp, void *context), @@ -583,43 +559,58 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, { struct ib_sa_path_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; - struct ib_mad_agent *agent = port->agent; + struct ib_sa_port *port; + struct ib_mad_agent *agent; + struct ib_sa_mad *mad; int ret; + if (!sa_dev) + return -ENODEV; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + query = kmalloc(sizeof *query, gfp_mask); if (!query) return -ENOMEM; - query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask); - if (!query->sa_query.mad) { - kfree(query); - return -ENOMEM; + + query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, + 0, IB_MGMT_SA_HDR, + IB_MGMT_SA_DATA, gfp_mask); + if (!query->sa_query.mad_buf) { + ret = -ENOMEM; + goto err1; } query->callback = callback; query->context = context; - init_mad(query->sa_query.mad, agent); + mad = query->sa_query.mad_buf->mad; + init_mad(mad, agent); - query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL; - query->sa_query.release = ib_sa_path_rec_release; - query->sa_query.port = port; - query->sa_query.mad->mad_hdr.method = IB_MGMT_METHOD_GET; - query->sa_query.mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); - query->sa_query.mad->sa_hdr.comp_mask = comp_mask; + query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL; + query->sa_query.release = ib_sa_path_rec_release; + query->sa_query.port = port; + mad->mad_hdr.method = IB_MGMT_METHOD_GET; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); + mad->sa_hdr.comp_mask = comp_mask; - ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), - rec, query->sa_query.mad->data); + ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms); - if (ret < 0) { - *sa_query = NULL; - kfree(query->sa_query.mad); - kfree(query); - } + if (ret < 0) + goto err2; + + return ret; +err2: + *sa_query = NULL; + ib_free_send_mad(query->sa_query.mad_buf); + +err1: + kfree(query); return ret; } EXPORT_SYMBOL(ib_sa_path_rec_get); @@ -643,7 +634,6 @@ static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query, static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) { - kfree(sa_query->mad); kfree(container_of(sa_query, struct ib_sa_service_query, sa_query)); } @@ -676,7 +666,7 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, struct ib_sa_service_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, unsigned int __nocast gfp_mask, + int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_service_rec *resp, void *context), @@ -685,10 +675,17 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, { struct ib_sa_service_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; - struct ib_mad_agent *agent = port->agent; + struct ib_sa_port *port; + struct ib_mad_agent *agent; + struct ib_sa_mad *mad; int ret; + if (!sa_dev) + return -ENODEV; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + if (method != IB_MGMT_METHOD_GET && method != IB_MGMT_METHOD_SET && method != IB_SA_METHOD_DELETE) @@ -697,37 +694,45 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, query = kmalloc(sizeof *query, gfp_mask); if (!query) return -ENOMEM; - query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask); - if (!query->sa_query.mad) { - kfree(query); - return -ENOMEM; + + query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, + 0, IB_MGMT_SA_HDR, + IB_MGMT_SA_DATA, gfp_mask); + if (!query->sa_query.mad_buf) { + ret = -ENOMEM; + goto err1; } query->callback = callback; query->context = context; - init_mad(query->sa_query.mad, agent); + mad = query->sa_query.mad_buf->mad; + init_mad(mad, agent); - query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL; - query->sa_query.release = ib_sa_service_rec_release; - query->sa_query.port = port; - query->sa_query.mad->mad_hdr.method = method; - query->sa_query.mad->mad_hdr.attr_id = - cpu_to_be16(IB_SA_ATTR_SERVICE_REC); - query->sa_query.mad->sa_hdr.comp_mask = comp_mask; + query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL; + query->sa_query.release = ib_sa_service_rec_release; + query->sa_query.port = port; + mad->mad_hdr.method = method; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC); + mad->sa_hdr.comp_mask = comp_mask; ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table), - rec, query->sa_query.mad->data); + rec, mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms); - if (ret < 0) { - *sa_query = NULL; - kfree(query->sa_query.mad); - kfree(query); - } + if (ret < 0) + goto err2; + + return ret; +err2: + *sa_query = NULL; + ib_free_send_mad(query->sa_query.mad_buf); + +err1: + kfree(query); return ret; } EXPORT_SYMBOL(ib_sa_service_rec_query); @@ -751,7 +756,6 @@ static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query, static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query) { - kfree(sa_query->mad); kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query)); } @@ -759,7 +763,7 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, u8 method, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, unsigned int __nocast gfp_mask, + int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_mcmember_rec *resp, void *context), @@ -768,60 +772,69 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, { struct ib_sa_mcmember_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; - struct ib_mad_agent *agent = port->agent; + struct ib_sa_port *port; + struct ib_mad_agent *agent; + struct ib_sa_mad *mad; int ret; + if (!sa_dev) + return -ENODEV; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + query = kmalloc(sizeof *query, gfp_mask); if (!query) return -ENOMEM; - query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask); - if (!query->sa_query.mad) { - kfree(query); - return -ENOMEM; + + query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, + 0, IB_MGMT_SA_HDR, + IB_MGMT_SA_DATA, gfp_mask); + if (!query->sa_query.mad_buf) { + ret = -ENOMEM; + goto err1; } query->callback = callback; query->context = context; - init_mad(query->sa_query.mad, agent); + mad = query->sa_query.mad_buf->mad; + init_mad(mad, agent); - query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL; - query->sa_query.release = ib_sa_mcmember_rec_release; - query->sa_query.port = port; - query->sa_query.mad->mad_hdr.method = method; - query->sa_query.mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); - query->sa_query.mad->sa_hdr.comp_mask = comp_mask; + query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL; + query->sa_query.release = ib_sa_mcmember_rec_release; + query->sa_query.port = port; + mad->mad_hdr.method = method; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); + mad->sa_hdr.comp_mask = comp_mask; ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table), - rec, query->sa_query.mad->data); + rec, mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms); - if (ret < 0) { - *sa_query = NULL; - kfree(query->sa_query.mad); - kfree(query); - } + if (ret < 0) + goto err2; return ret; + +err2: + *sa_query = NULL; + ib_free_send_mad(query->sa_query.mad_buf); + +err1: + kfree(query); + return ret; } EXPORT_SYMBOL(ib_sa_mcmember_rec_query); static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { - struct ib_sa_query *query; + struct ib_sa_query *query = mad_send_wc->send_buf->context[0]; unsigned long flags; - spin_lock_irqsave(&idr_lock, flags); - query = idr_find(&query_idr, mad_send_wc->wr_id); - spin_unlock_irqrestore(&idr_lock, flags); - - if (!query) - return; - if (query->callback) switch (mad_send_wc->status) { case IB_WC_SUCCESS: @@ -838,30 +851,25 @@ static void send_handler(struct ib_mad_agent *agent, break; } - dma_unmap_single(agent->device->dma_device, - pci_unmap_addr(query, mapping), - sizeof (struct ib_sa_mad), - DMA_TO_DEVICE); - kref_put(&query->sm_ah->ref, free_sm_ah); - - query->release(query); - spin_lock_irqsave(&idr_lock, flags); - idr_remove(&query_idr, mad_send_wc->wr_id); + idr_remove(&query_idr, query->id); spin_unlock_irqrestore(&idr_lock, flags); + + ib_free_send_mad(mad_send_wc->send_buf); + kref_put(&query->sm_ah->ref, free_sm_ah); + query->release(query); } static void recv_handler(struct ib_mad_agent *mad_agent, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_sa_query *query; - unsigned long flags; + struct ib_mad_send_buf *mad_buf; - spin_lock_irqsave(&idr_lock, flags); - query = idr_find(&query_idr, mad_recv_wc->wc->wr_id); - spin_unlock_irqrestore(&idr_lock, flags); + mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id; + query = mad_buf->context[0]; - if (query && query->callback) { + if (query->callback) { if (mad_recv_wc->wc->status == IB_WC_SUCCESS) query->callback(query, mad_recv_wc->recv_buf.mad->mad_hdr.status ? @@ -975,6 +983,7 @@ static int __init ib_sa_init(void) static void __exit ib_sa_cleanup(void) { ib_unregister_client(&sa_client); + idr_destroy(&query_idr); } module_init(ib_sa_init); diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h index db25503a073..2b3c40198f8 100644 --- a/drivers/infiniband/core/smi.h +++ b/drivers/infiniband/core/smi.h @@ -39,6 +39,8 @@ #ifndef __SMI_H_ #define __SMI_H_ +#include <rdma/ib_smi.h> + int smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, int port_num, diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 211ba3223f6..08648b1a387 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -36,6 +36,9 @@ #include "core_priv.h" +#include <linux/slab.h> +#include <linux/string.h> + #include <rdma/ib_mad.h> struct ib_port { @@ -65,6 +68,11 @@ struct port_table_attribute { int index; }; +static inline int ibdev_is_alive(const struct ib_device *dev) +{ + return dev->reg_state == IB_DEV_REGISTERED; +} + static ssize_t port_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -74,6 +82,8 @@ static ssize_t port_attr_show(struct kobject *kobj, if (!port_attr->show) return -EIO; + if (!ibdev_is_alive(p->ibdev)) + return -ENODEV; return port_attr->show(p, port_attr, buf); } @@ -300,14 +310,13 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, if (!p->ibdev->process_mad) return sprintf(buf, "N/A (no PMA)\n"); - in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL); + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *in_mad, GFP_KERNEL); if (!in_mad || !out_mad) { ret = -ENOMEM; goto out; } - memset(in_mad, 0, sizeof *in_mad); in_mad->mad_hdr.base_version = 1; in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT; in_mad->mad_hdr.class_version = 1; @@ -501,10 +510,9 @@ static int add_port(struct ib_device *device, int port_num) if (ret) return ret; - p = kmalloc(sizeof *p, GFP_KERNEL); + p = kzalloc(sizeof *p, GFP_KERNEL); if (!p) return -ENOMEM; - memset(p, 0, sizeof *p); p->ibdev = device; p->port_num = port_num; @@ -581,6 +589,9 @@ static ssize_t show_node_type(struct class_device *cdev, char *buf) { struct ib_device *dev = container_of(cdev, struct ib_device, class_dev); + if (!ibdev_is_alive(dev)) + return -ENODEV; + switch (dev->node_type) { case IB_NODE_CA: return sprintf(buf, "%d: CA\n", dev->node_type); case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); @@ -595,6 +606,9 @@ static ssize_t show_sys_image_guid(struct class_device *cdev, char *buf) struct ib_device_attr attr; ssize_t ret; + if (!ibdev_is_alive(dev)) + return -ENODEV; + ret = ib_query_device(dev, &attr); if (ret) return ret; @@ -612,6 +626,9 @@ static ssize_t show_node_guid(struct class_device *cdev, char *buf) struct ib_device_attr attr; ssize_t ret; + if (!ibdev_is_alive(dev)) + return -ENODEV; + ret = ib_query_device(dev, &attr); if (ret) return ret; diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index d0f0b0a2edd..6e15787d1de 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -41,37 +41,81 @@ #include <linux/file.h> #include <linux/mount.h> #include <linux/cdev.h> +#include <linux/idr.h> #include <asm/uaccess.h> -#include "ucm.h" +#include <rdma/ib_cm.h> +#include <rdma/ib_user_cm.h> MODULE_AUTHOR("Libor Michalek"); MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access"); MODULE_LICENSE("Dual BSD/GPL"); -static int ucm_debug_level; +struct ib_ucm_device { + int devnum; + struct cdev dev; + struct class_device class_dev; + struct ib_device *ib_dev; +}; + +struct ib_ucm_file { + struct semaphore mutex; + struct file *filp; + struct ib_ucm_device *device; + + struct list_head ctxs; + struct list_head events; + wait_queue_head_t poll_wait; +}; + +struct ib_ucm_context { + int id; + wait_queue_head_t wait; + atomic_t ref; + int events_reported; + + struct ib_ucm_file *file; + struct ib_cm_id *cm_id; + __u64 uid; + + struct list_head events; /* list of pending events. */ + struct list_head file_list; /* member in file ctx list */ +}; + +struct ib_ucm_event { + struct ib_ucm_context *ctx; + struct list_head file_list; /* member in file event list */ + struct list_head ctx_list; /* member in ctx event list */ -module_param_named(debug_level, ucm_debug_level, int, 0644); -MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); + struct ib_cm_id *cm_id; + struct ib_ucm_event_resp resp; + void *data; + void *info; + int data_len; + int info_len; +}; enum { IB_UCM_MAJOR = 231, - IB_UCM_MINOR = 255 + IB_UCM_BASE_MINOR = 224, + IB_UCM_MAX_DEVICES = 32 }; -#define IB_UCM_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_MINOR) +#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) -#define PFX "UCM: " +static void ib_ucm_add_one(struct ib_device *device); +static void ib_ucm_remove_one(struct ib_device *device); -#define ucm_dbg(format, arg...) \ - do { \ - if (ucm_debug_level > 0) \ - printk(KERN_DEBUG PFX format, ## arg); \ - } while (0) +static struct ib_client ucm_client = { + .name = "ucm", + .add = ib_ucm_add_one, + .remove = ib_ucm_remove_one +}; -static struct semaphore ctx_id_mutex; -static struct idr ctx_id_table; +static DECLARE_MUTEX(ctx_id_mutex); +static DEFINE_IDR(ctx_id_table); +static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES); static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) { @@ -128,11 +172,10 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) struct ib_ucm_context *ctx; int result; - ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + ctx = kzalloc(sizeof *ctx, GFP_KERNEL); if (!ctx) return NULL; - memset(ctx, 0, sizeof *ctx); atomic_set(&ctx->ref, 1); init_waitqueue_head(&ctx->wait); ctx->file = file; @@ -152,17 +195,13 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) goto error; list_add_tail(&ctx->file_list, &file->ctxs); - ucm_dbg("Allocated CM ID <%d>\n", ctx->id); return ctx; error: kfree(ctx); return NULL; } -/* - * Event portion of the API, handle CM events - * and allow event polling. - */ + static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath, struct ib_sa_path_rec *kpath) { @@ -209,6 +248,7 @@ static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, ureq->retry_count = kreq->retry_count; ureq->rnr_retry_count = kreq->rnr_retry_count; ureq->srq = kreq->srq; + ureq->port = kreq->port; ib_ucm_event_path_get(&ureq->primary_path, kreq->primary_path); ib_ucm_event_path_get(&ureq->alternate_path, kreq->alternate_path); @@ -295,6 +335,8 @@ static int ib_ucm_event_process(struct ib_cm_event *evt, case IB_CM_SIDR_REQ_RECEIVED: uvt->resp.u.sidr_req_resp.pkey = evt->param.sidr_req_rcvd.pkey; + uvt->resp.u.sidr_req_resp.port = + evt->param.sidr_req_rcvd.port; uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; break; case IB_CM_SIDR_REP_RECEIVED: @@ -343,11 +385,10 @@ static int ib_ucm_event_handler(struct ib_cm_id *cm_id, ctx = cm_id->context; - uevent = kmalloc(sizeof(*uevent), GFP_KERNEL); + uevent = kzalloc(sizeof *uevent, GFP_KERNEL); if (!uevent) goto err1; - memset(uevent, 0, sizeof(*uevent)); uevent->ctx = ctx; uevent->cm_id = cm_id; uevent->resp.uid = ctx->uid; @@ -387,9 +428,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - /* - * wait - */ + down(&file->mutex); while (list_empty(&file->events)) { @@ -471,7 +510,6 @@ done: return result; } - static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) @@ -494,29 +532,27 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, return -ENOMEM; ctx->uid = cmd.uid; - ctx->cm_id = ib_create_cm_id(ib_ucm_event_handler, ctx); + ctx->cm_id = ib_create_cm_id(file->device->ib_dev, + ib_ucm_event_handler, ctx); if (IS_ERR(ctx->cm_id)) { result = PTR_ERR(ctx->cm_id); - goto err; + goto err1; } resp.id = ctx->id; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) { result = -EFAULT; - goto err; + goto err2; } - return 0; -err: +err2: + ib_destroy_cm_id(ctx->cm_id); +err1: down(&ctx_id_mutex); idr_remove(&ctx_id_table, ctx->id); up(&ctx_id_mutex); - - if (!IS_ERR(ctx->cm_id)) - ib_destroy_cm_id(ctx->cm_id); - kfree(ctx); return result; } @@ -1184,9 +1220,6 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; - ucm_dbg("Write. cmd <%d> in <%d> out <%d> len <%Zu>\n", - hdr.cmd, hdr.in, hdr.out, len); - if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) return -EINVAL; @@ -1231,8 +1264,7 @@ static int ib_ucm_open(struct inode *inode, struct file *filp) filp->private_data = file; file->filp = filp; - - ucm_dbg("Created struct\n"); + file->device = container_of(inode->i_cdev, struct ib_ucm_device, dev); return 0; } @@ -1263,7 +1295,17 @@ static int ib_ucm_close(struct inode *inode, struct file *filp) return 0; } -static struct file_operations ib_ucm_fops = { +static void ib_ucm_release_class_dev(struct class_device *class_dev) +{ + struct ib_ucm_device *dev; + + dev = container_of(class_dev, struct ib_ucm_device, class_dev); + cdev_del(&dev->dev); + clear_bit(dev->devnum, dev_map); + kfree(dev); +} + +static struct file_operations ucm_fops = { .owner = THIS_MODULE, .open = ib_ucm_open, .release = ib_ucm_close, @@ -1271,55 +1313,141 @@ static struct file_operations ib_ucm_fops = { .poll = ib_ucm_poll, }; +static struct class ucm_class = { + .name = "infiniband_cm", + .release = ib_ucm_release_class_dev +}; -static struct class *ib_ucm_class; -static struct cdev ib_ucm_cdev; +static ssize_t show_dev(struct class_device *class_dev, char *buf) +{ + struct ib_ucm_device *dev; + + dev = container_of(class_dev, struct ib_ucm_device, class_dev); + return print_dev_t(buf, dev->dev.dev); +} +static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL); -static int __init ib_ucm_init(void) +static ssize_t show_ibdev(struct class_device *class_dev, char *buf) { - int result; + struct ib_ucm_device *dev; + + dev = container_of(class_dev, struct ib_ucm_device, class_dev); + return sprintf(buf, "%s\n", dev->ib_dev->name); +} +static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); - result = register_chrdev_region(IB_UCM_DEV, 1, "infiniband_cm"); - if (result) { - ucm_dbg("Error <%d> registering dev\n", result); - goto err_chr; - } +static void ib_ucm_add_one(struct ib_device *device) +{ + struct ib_ucm_device *ucm_dev; + + if (!device->alloc_ucontext) + return; + + ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL); + if (!ucm_dev) + return; - cdev_init(&ib_ucm_cdev, &ib_ucm_fops); + ucm_dev->ib_dev = device; + + ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); + if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES) + goto err; + + set_bit(ucm_dev->devnum, dev_map); + + cdev_init(&ucm_dev->dev, &ucm_fops); + ucm_dev->dev.owner = THIS_MODULE; + kobject_set_name(&ucm_dev->dev.kobj, "ucm%d", ucm_dev->devnum); + if (cdev_add(&ucm_dev->dev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1)) + goto err; - result = cdev_add(&ib_ucm_cdev, IB_UCM_DEV, 1); - if (result) { - ucm_dbg("Error <%d> adding cdev\n", result); + ucm_dev->class_dev.class = &ucm_class; + ucm_dev->class_dev.dev = device->dma_device; + snprintf(ucm_dev->class_dev.class_id, BUS_ID_SIZE, "ucm%d", + ucm_dev->devnum); + if (class_device_register(&ucm_dev->class_dev)) goto err_cdev; - } - ib_ucm_class = class_create(THIS_MODULE, "infiniband_cm"); - if (IS_ERR(ib_ucm_class)) { - result = PTR_ERR(ib_ucm_class); - ucm_dbg("Error <%d> creating class\n", result); + if (class_device_create_file(&ucm_dev->class_dev, + &class_device_attr_dev)) + goto err_class; + if (class_device_create_file(&ucm_dev->class_dev, + &class_device_attr_ibdev)) goto err_class; + + ib_set_client_data(device, &ucm_client, ucm_dev); + return; + +err_class: + class_device_unregister(&ucm_dev->class_dev); +err_cdev: + cdev_del(&ucm_dev->dev); + clear_bit(ucm_dev->devnum, dev_map); +err: + kfree(ucm_dev); + return; +} + +static void ib_ucm_remove_one(struct ib_device *device) +{ + struct ib_ucm_device *ucm_dev = ib_get_client_data(device, &ucm_client); + + if (!ucm_dev) + return; + + class_device_unregister(&ucm_dev->class_dev); +} + +static ssize_t show_abi_version(struct class *class, char *buf) +{ + return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION); +} +static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); + +static int __init ib_ucm_init(void) +{ + int ret; + + ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES, + "infiniband_cm"); + if (ret) { + printk(KERN_ERR "ucm: couldn't register device number\n"); + goto err; } - class_device_create(ib_ucm_class, IB_UCM_DEV, NULL, "ucm"); + ret = class_register(&ucm_class); + if (ret) { + printk(KERN_ERR "ucm: couldn't create class infiniband_cm\n"); + goto err_chrdev; + } - idr_init(&ctx_id_table); - init_MUTEX(&ctx_id_mutex); + ret = class_create_file(&ucm_class, &class_attr_abi_version); + if (ret) { + printk(KERN_ERR "ucm: couldn't create abi_version attribute\n"); + goto err_class; + } + ret = ib_register_client(&ucm_client); + if (ret) { + printk(KERN_ERR "ucm: couldn't register client\n"); + goto err_class; + } return 0; + err_class: - cdev_del(&ib_ucm_cdev); -err_cdev: - unregister_chrdev_region(IB_UCM_DEV, 1); -err_chr: - return result; + class_unregister(&ucm_class); +err_chrdev: + unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); +err: + return ret; } static void __exit ib_ucm_cleanup(void) { - class_device_destroy(ib_ucm_class, IB_UCM_DEV); - class_destroy(ib_ucm_class); - cdev_del(&ib_ucm_cdev); - unregister_chrdev_region(IB_UCM_DEV, 1); + ib_unregister_client(&ucm_client); + class_unregister(&ucm_class); + unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); + idr_destroy(&ctx_id_table); } module_init(ib_ucm_init); diff --git a/drivers/infiniband/core/ucm.h b/drivers/infiniband/core/ucm.h deleted file mode 100644 index f46f37bc120..00000000000 --- a/drivers/infiniband/core/ucm.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * $Id: ucm.h 2208 2005-04-22 23:24:31Z libor $ - */ - -#ifndef UCM_H -#define UCM_H - -#include <linux/fs.h> -#include <linux/device.h> -#include <linux/cdev.h> -#include <linux/idr.h> - -#include <rdma/ib_cm.h> -#include <rdma/ib_user_cm.h> - -struct ib_ucm_file { - struct semaphore mutex; - struct file *filp; - - struct list_head ctxs; /* list of active connections */ - struct list_head events; /* list of pending events */ - wait_queue_head_t poll_wait; -}; - -struct ib_ucm_context { - int id; - wait_queue_head_t wait; - atomic_t ref; - int events_reported; - - struct ib_ucm_file *file; - struct ib_cm_id *cm_id; - __u64 uid; - - struct list_head events; /* list of pending events. */ - struct list_head file_list; /* member in file ctx list */ -}; - -struct ib_ucm_event { - struct ib_ucm_context *ctx; - struct list_head file_list; /* member in file event list */ - struct list_head ctx_list; /* member in ctx event list */ - - struct ib_cm_id *cm_id; - struct ib_ucm_event_resp resp; - void *data; - void *info; - int data_len; - int info_len; -}; - -#endif /* UCM_H */ diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c index 527b23450ab..997c07db6d8 100644 --- a/drivers/infiniband/core/ud_header.c +++ b/drivers/infiniband/core/ud_header.c @@ -34,6 +34,7 @@ */ #include <linux/errno.h> +#include <linux/string.h> #include <rdma/ib_pack.h> diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index a64d6b4dcc1..eb7f52537cc 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -31,7 +31,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: user_mad.c 2814 2005-07-06 19:14:09Z halr $ + * $Id: user_mad.c 4010 2005-11-09 23:11:56Z roland $ */ #include <linux/module.h> @@ -64,18 +64,42 @@ enum { IB_UMAD_MINOR_BASE = 0 }; +/* + * Our lifetime rules for these structs are the following: each time a + * device special file is opened, we look up the corresponding struct + * ib_umad_port by minor in the umad_port[] table while holding the + * port_lock. If this lookup succeeds, we take a reference on the + * ib_umad_port's struct ib_umad_device while still holding the + * port_lock; if the lookup fails, we fail the open(). We drop these + * references in the corresponding close(). + * + * In addition to references coming from open character devices, there + * is one more reference to each ib_umad_device representing the + * module's reference taken when allocating the ib_umad_device in + * ib_umad_add_one(). + * + * When destroying an ib_umad_device, we clear all of its + * ib_umad_ports from umad_port[] while holding port_lock before + * dropping the module's reference to the ib_umad_device. This is + * always safe because any open() calls will either succeed and obtain + * a reference before we clear the umad_port[] entries, or fail after + * we clear the umad_port[] entries. + */ + struct ib_umad_port { - int devnum; - struct cdev dev; - struct class_device class_dev; + struct cdev *dev; + struct class_device *class_dev; - int sm_devnum; - struct cdev sm_dev; - struct class_device sm_class_dev; + struct cdev *sm_dev; + struct class_device *sm_class_dev; struct semaphore sm_sem; + struct rw_semaphore mutex; + struct list_head file_list; + struct ib_device *ib_dev; struct ib_umad_device *umad_dev; + int dev_num; u8 port_num; }; @@ -86,42 +110,59 @@ struct ib_umad_device { }; struct ib_umad_file { - struct ib_umad_port *port; - spinlock_t recv_lock; - struct list_head recv_list; - wait_queue_head_t recv_wait; - struct rw_semaphore agent_mutex; - struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS]; - struct ib_mr *mr[IB_UMAD_MAX_AGENTS]; + struct ib_umad_port *port; + struct list_head recv_list; + struct list_head port_list; + spinlock_t recv_lock; + wait_queue_head_t recv_wait; + struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS]; + int agents_dead; }; struct ib_umad_packet { - struct ib_ah *ah; struct ib_mad_send_buf *msg; struct list_head list; int length; - DECLARE_PCI_UNMAP_ADDR(mapping) struct ib_user_mad mad; }; +static struct class *umad_class; + static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); -static spinlock_t map_lock; + +static DEFINE_SPINLOCK(port_lock); +static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS]; static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS * 2); static void ib_umad_add_one(struct ib_device *device); static void ib_umad_remove_one(struct ib_device *device); +static void ib_umad_release_dev(struct kref *ref) +{ + struct ib_umad_device *dev = + container_of(ref, struct ib_umad_device, ref); + + kfree(dev); +} + +/* caller must hold port->mutex at least for reading */ +static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id) +{ + return file->agents_dead ? NULL : file->agent[id]; +} + static int queue_packet(struct ib_umad_file *file, struct ib_mad_agent *agent, struct ib_umad_packet *packet) { int ret = 1; - down_read(&file->agent_mutex); + down_read(&file->port->mutex); + for (packet->mad.hdr.id = 0; packet->mad.hdr.id < IB_UMAD_MAX_AGENTS; packet->mad.hdr.id++) - if (agent == file->agent[packet->mad.hdr.id]) { + if (agent == __get_agent(file, packet->mad.hdr.id)) { spin_lock_irq(&file->recv_lock); list_add_tail(&packet->list, &file->recv_list); spin_unlock_irq(&file->recv_lock); @@ -130,7 +171,7 @@ static int queue_packet(struct ib_umad_file *file, break; } - up_read(&file->agent_mutex); + up_read(&file->port->mutex); return ret; } @@ -139,22 +180,19 @@ static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *send_wc) { struct ib_umad_file *file = agent->context; - struct ib_umad_packet *timeout, *packet = - (void *) (unsigned long) send_wc->wr_id; + struct ib_umad_packet *timeout; + struct ib_umad_packet *packet = send_wc->send_buf->context[0]; - ib_destroy_ah(packet->msg->send_wr.wr.ud.ah); + ib_destroy_ah(packet->msg->ah); ib_free_send_mad(packet->msg); if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { - timeout = kmalloc(sizeof *timeout + sizeof (struct ib_mad_hdr), - GFP_KERNEL); + timeout = kzalloc(sizeof *timeout + IB_MGMT_MAD_HDR, GFP_KERNEL); if (!timeout) goto out; - memset(timeout, 0, sizeof *timeout + sizeof (struct ib_mad_hdr)); - - timeout->length = sizeof (struct ib_mad_hdr); - timeout->mad.hdr.id = packet->mad.hdr.id; + timeout->length = IB_MGMT_MAD_HDR; + timeout->mad.hdr.id = packet->mad.hdr.id; timeout->mad.hdr.status = ETIMEDOUT; memcpy(timeout->mad.data, packet->mad.data, sizeof (struct ib_mad_hdr)); @@ -177,11 +215,10 @@ static void recv_handler(struct ib_mad_agent *agent, goto out; length = mad_recv_wc->mad_len; - packet = kmalloc(sizeof *packet + length, GFP_KERNEL); + packet = kzalloc(sizeof *packet + length, GFP_KERNEL); if (!packet) goto out; - memset(packet, 0, sizeof *packet + length); packet->length = length; ib_coalesce_recv_mad(mad_recv_wc, packet->mad.data); @@ -247,7 +284,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, else ret = -ENOSPC; } else if (copy_to_user(buf, &packet->mad, - packet->length + sizeof (struct ib_user_mad))) + packet->length + sizeof (struct ib_user_mad))) ret = -EFAULT; else ret = packet->length + sizeof (struct ib_user_mad); @@ -268,26 +305,23 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, struct ib_umad_packet *packet; struct ib_mad_agent *agent; struct ib_ah_attr ah_attr; - struct ib_send_wr *bad_wr; + struct ib_ah *ah; struct ib_rmpp_mad *rmpp_mad; u8 method; __be64 *tid; - int ret, length, hdr_len, data_len, rmpp_hdr_size; - int rmpp_active = 0; + int ret, length, hdr_len, copy_offset; + int rmpp_active, has_rmpp_header; - if (count < sizeof (struct ib_user_mad)) + if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR) return -EINVAL; length = count - sizeof (struct ib_user_mad); - packet = kmalloc(sizeof *packet + sizeof(struct ib_mad_hdr) + - sizeof(struct ib_rmpp_hdr), GFP_KERNEL); + packet = kmalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); if (!packet) return -ENOMEM; if (copy_from_user(&packet->mad, buf, - sizeof (struct ib_user_mad) + - sizeof(struct ib_mad_hdr) + - sizeof(struct ib_rmpp_hdr))) { + sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)) { ret = -EFAULT; goto err; } @@ -298,11 +332,9 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err; } - packet->length = length; - - down_read(&file->agent_mutex); + down_read(&file->port->mutex); - agent = file->agent[packet->mad.hdr.id]; + agent = __get_agent(file, packet->mad.hdr.id); if (!agent) { ret = -EINVAL; goto err_up; @@ -321,80 +353,63 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; } - packet->ah = ib_create_ah(agent->qp->pd, &ah_attr); - if (IS_ERR(packet->ah)) { - ret = PTR_ERR(packet->ah); + ah = ib_create_ah(agent->qp->pd, &ah_attr); + if (IS_ERR(ah)) { + ret = PTR_ERR(ah); goto err_up; } rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; - if (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE) { - /* RMPP active */ - if (!agent->rmpp_version) { - ret = -EINVAL; - goto err_ah; - } - - /* Validate that the management class can support RMPP */ - if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) { - hdr_len = offsetof(struct ib_sa_mad, data); - data_len = length - hdr_len; - } else if ((rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && - (rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) { - hdr_len = offsetof(struct ib_vendor_mad, data); - data_len = length - hdr_len; - } else { - ret = -EINVAL; - goto err_ah; - } - rmpp_active = 1; + if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) { + hdr_len = IB_MGMT_SA_HDR; + copy_offset = IB_MGMT_RMPP_HDR; + has_rmpp_header = 1; + } else if (rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START && + rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END) { + hdr_len = IB_MGMT_VENDOR_HDR; + copy_offset = IB_MGMT_RMPP_HDR; + has_rmpp_header = 1; } else { - if (length > sizeof(struct ib_mad)) { - ret = -EINVAL; - goto err_ah; - } - hdr_len = offsetof(struct ib_mad, data); - data_len = length - hdr_len; + hdr_len = IB_MGMT_MAD_HDR; + copy_offset = IB_MGMT_MAD_HDR; + has_rmpp_header = 0; + } + + if (has_rmpp_header) + rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + IB_MGMT_RMPP_FLAG_ACTIVE; + else + rmpp_active = 0; + + /* Validate that the management class can support RMPP */ + if (rmpp_active && !agent->rmpp_version) { + ret = -EINVAL; + goto err_ah; } packet->msg = ib_create_send_mad(agent, be32_to_cpu(packet->mad.hdr.qpn), - 0, packet->ah, rmpp_active, - hdr_len, data_len, + 0, rmpp_active, + hdr_len, length - hdr_len, GFP_KERNEL); if (IS_ERR(packet->msg)) { ret = PTR_ERR(packet->msg); goto err_ah; } - packet->msg->send_wr.wr.ud.timeout_ms = packet->mad.hdr.timeout_ms; - packet->msg->send_wr.wr.ud.retries = packet->mad.hdr.retries; - - /* Override send WR WRID initialized in ib_create_send_mad */ - packet->msg->send_wr.wr_id = (unsigned long) packet; - - if (!rmpp_active) { - /* Copy message from user into send buffer */ - if (copy_from_user(packet->msg->mad, - buf + sizeof(struct ib_user_mad), length)) { - ret = -EFAULT; - goto err_msg; - } - } else { - rmpp_hdr_size = sizeof(struct ib_mad_hdr) + - sizeof(struct ib_rmpp_hdr); - - /* Only copy MAD headers (RMPP header in place) */ - memcpy(packet->msg->mad, packet->mad.data, - sizeof(struct ib_mad_hdr)); + packet->msg->ah = ah; + packet->msg->timeout_ms = packet->mad.hdr.timeout_ms; + packet->msg->retries = packet->mad.hdr.retries; + packet->msg->context[0] = packet; - /* Now, copy rest of message from user into send buffer */ - if (copy_from_user(((struct ib_rmpp_mad *) packet->msg->mad)->data, - buf + sizeof (struct ib_user_mad) + rmpp_hdr_size, - length - rmpp_hdr_size)) { - ret = -EFAULT; - goto err_msg; - } + /* Copy MAD headers (RMPP header in place) */ + memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR); + /* Now, copy rest of message from user into send buffer */ + if (copy_from_user(packet->msg->mad + copy_offset, + buf + sizeof (struct ib_user_mad) + copy_offset, + length - copy_offset)) { + ret = -EFAULT; + goto err_msg; } /* @@ -403,32 +418,32 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, * transaction ID matches the agent being used to send the * MAD. */ - method = packet->msg->mad->mad_hdr.method; + method = ((struct ib_mad_hdr *) packet->msg->mad)->method; if (!(method & IB_MGMT_METHOD_RESP) && method != IB_MGMT_METHOD_TRAP_REPRESS && method != IB_MGMT_METHOD_SEND) { - tid = &packet->msg->mad->mad_hdr.tid; + tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid; *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | (be64_to_cpup(tid) & 0xffffffff)); } - ret = ib_post_send_mad(agent, &packet->msg->send_wr, &bad_wr); + ret = ib_post_send_mad(packet->msg, NULL); if (ret) goto err_msg; - up_read(&file->agent_mutex); + up_read(&file->port->mutex); - return sizeof (struct ib_user_mad_hdr) + packet->length; + return count; err_msg: ib_free_send_mad(packet->msg); err_ah: - ib_destroy_ah(packet->ah); + ib_destroy_ah(ah); err_up: - up_read(&file->agent_mutex); + up_read(&file->port->mutex); err: kfree(packet); @@ -458,7 +473,12 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg) int agent_id; int ret; - down_write(&file->agent_mutex); + down_write(&file->port->mutex); + + if (!file->port->ib_dev) { + ret = -EPIPE; + goto out; + } if (copy_from_user(&ureq, (void __user *) arg, sizeof ureq)) { ret = -EFAULT; @@ -471,7 +491,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg) } for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id) - if (!file->agent[agent_id]) + if (!__get_agent(file, agent_id)) goto found; ret = -ENOMEM; @@ -495,58 +515,46 @@ found: goto out; } - file->agent[agent_id] = agent; - - file->mr[agent_id] = ib_get_dma_mr(agent->qp->pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(file->mr[agent_id])) { - ret = -ENOMEM; - goto err; - } - if (put_user(agent_id, (u32 __user *) (arg + offsetof(struct ib_user_mad_reg_req, id)))) { ret = -EFAULT; - goto err_mr; + ib_unregister_mad_agent(agent); + goto out; } + file->agent[agent_id] = agent; ret = 0; - goto out; - -err_mr: - ib_dereg_mr(file->mr[agent_id]); - -err: - file->agent[agent_id] = NULL; - ib_unregister_mad_agent(agent); out: - up_write(&file->agent_mutex); + up_write(&file->port->mutex); return ret; } static int ib_umad_unreg_agent(struct ib_umad_file *file, unsigned long arg) { + struct ib_mad_agent *agent = NULL; u32 id; int ret = 0; - down_write(&file->agent_mutex); + if (get_user(id, (u32 __user *) arg)) + return -EFAULT; - if (get_user(id, (u32 __user *) arg)) { - ret = -EFAULT; - goto out; - } + down_write(&file->port->mutex); - if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !file->agent[id]) { + if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { ret = -EINVAL; goto out; } - ib_dereg_mr(file->mr[id]); - ib_unregister_mad_agent(file->agent[id]); + agent = file->agent[id]; file->agent[id] = NULL; out: - up_write(&file->agent_mutex); + up_write(&file->port->mutex); + + if (agent) + ib_unregister_mad_agent(agent); + return ret; } @@ -565,43 +573,76 @@ static long ib_umad_ioctl(struct file *filp, unsigned int cmd, static int ib_umad_open(struct inode *inode, struct file *filp) { - struct ib_umad_port *port = - container_of(inode->i_cdev, struct ib_umad_port, dev); + struct ib_umad_port *port; struct ib_umad_file *file; + int ret = 0; - file = kmalloc(sizeof *file, GFP_KERNEL); - if (!file) - return -ENOMEM; + spin_lock(&port_lock); + port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE]; + if (port) + kref_get(&port->umad_dev->ref); + spin_unlock(&port_lock); + + if (!port) + return -ENXIO; - memset(file, 0, sizeof *file); + down_write(&port->mutex); + + if (!port->ib_dev) { + ret = -ENXIO; + goto out; + } + + file = kzalloc(sizeof *file, GFP_KERNEL); + if (!file) { + kref_put(&port->umad_dev->ref, ib_umad_release_dev); + ret = -ENOMEM; + goto out; + } spin_lock_init(&file->recv_lock); - init_rwsem(&file->agent_mutex); INIT_LIST_HEAD(&file->recv_list); init_waitqueue_head(&file->recv_wait); file->port = port; filp->private_data = file; - return 0; + list_add_tail(&file->port_list, &port->file_list); + +out: + up_write(&port->mutex); + return ret; } static int ib_umad_close(struct inode *inode, struct file *filp) { struct ib_umad_file *file = filp->private_data; + struct ib_umad_device *dev = file->port->umad_dev; struct ib_umad_packet *packet, *tmp; + int already_dead; int i; - for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i) - if (file->agent[i]) { - ib_dereg_mr(file->mr[i]); - ib_unregister_mad_agent(file->agent[i]); - } + down_write(&file->port->mutex); + + already_dead = file->agents_dead; + file->agents_dead = 1; list_for_each_entry_safe(packet, tmp, &file->recv_list, list) kfree(packet); + list_del(&file->port_list); + + downgrade_write(&file->port->mutex); + + if (!already_dead) + for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i) + if (file->agent[i]) + ib_unregister_mad_agent(file->agent[i]); + + up_read(&file->port->mutex); + kfree(file); + kref_put(&dev->ref, ib_umad_release_dev); return 0; } @@ -619,30 +660,46 @@ static struct file_operations umad_fops = { static int ib_umad_sm_open(struct inode *inode, struct file *filp) { - struct ib_umad_port *port = - container_of(inode->i_cdev, struct ib_umad_port, sm_dev); + struct ib_umad_port *port; struct ib_port_modify props = { .set_port_cap_mask = IB_PORT_SM }; int ret; + spin_lock(&port_lock); + port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE - IB_UMAD_MAX_PORTS]; + if (port) + kref_get(&port->umad_dev->ref); + spin_unlock(&port_lock); + + if (!port) + return -ENXIO; + if (filp->f_flags & O_NONBLOCK) { - if (down_trylock(&port->sm_sem)) - return -EAGAIN; + if (down_trylock(&port->sm_sem)) { + ret = -EAGAIN; + goto fail; + } } else { - if (down_interruptible(&port->sm_sem)) - return -ERESTARTSYS; + if (down_interruptible(&port->sm_sem)) { + ret = -ERESTARTSYS; + goto fail; + } } ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); if (ret) { up(&port->sm_sem); - return ret; + goto fail; } filp->private_data = port; return 0; + +fail: + kref_put(&port->umad_dev->ref, ib_umad_release_dev); + return ret; } static int ib_umad_sm_close(struct inode *inode, struct file *filp) @@ -651,11 +708,17 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp) struct ib_port_modify props = { .clr_port_cap_mask = IB_PORT_SM }; - int ret; + int ret = 0; + + down_write(&port->mutex); + if (port->ib_dev) + ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); + up_write(&port->mutex); - ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); up(&port->sm_sem); + kref_put(&port->umad_dev->ref, ib_umad_release_dev); + return ret; } @@ -671,21 +734,13 @@ static struct ib_client umad_client = { .remove = ib_umad_remove_one }; -static ssize_t show_dev(struct class_device *class_dev, char *buf) -{ - struct ib_umad_port *port = class_get_devdata(class_dev); - - if (class_dev == &port->class_dev) - return print_dev_t(buf, port->dev.dev); - else - return print_dev_t(buf, port->sm_dev.dev); -} -static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL); - static ssize_t show_ibdev(struct class_device *class_dev, char *buf) { struct ib_umad_port *port = class_get_devdata(class_dev); + if (!port) + return -ENODEV; + return sprintf(buf, "%s\n", port->ib_dev->name); } static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); @@ -694,38 +749,13 @@ static ssize_t show_port(struct class_device *class_dev, char *buf) { struct ib_umad_port *port = class_get_devdata(class_dev); + if (!port) + return -ENODEV; + return sprintf(buf, "%d\n", port->port_num); } static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL); -static void ib_umad_release_dev(struct kref *ref) -{ - struct ib_umad_device *dev = - container_of(ref, struct ib_umad_device, ref); - - kfree(dev); -} - -static void ib_umad_release_port(struct class_device *class_dev) -{ - struct ib_umad_port *port = class_get_devdata(class_dev); - - if (class_dev == &port->class_dev) { - cdev_del(&port->dev); - clear_bit(port->devnum, dev_map); - } else { - cdev_del(&port->sm_dev); - clear_bit(port->sm_devnum, dev_map); - } - - kref_put(&port->umad_dev->ref, ib_umad_release_dev); -} - -static struct class umad_class = { - .name = "infiniband_mad", - .release = ib_umad_release_port -}; - static ssize_t show_abi_version(struct class *class, char *buf) { return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION); @@ -735,91 +765,144 @@ static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); static int ib_umad_init_port(struct ib_device *device, int port_num, struct ib_umad_port *port) { - spin_lock(&map_lock); - port->devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); - if (port->devnum >= IB_UMAD_MAX_PORTS) { - spin_unlock(&map_lock); - return -1; - } - port->sm_devnum = find_next_zero_bit(dev_map, IB_UMAD_MAX_PORTS * 2, IB_UMAD_MAX_PORTS); - if (port->sm_devnum >= IB_UMAD_MAX_PORTS * 2) { - spin_unlock(&map_lock); + spin_lock(&port_lock); + port->dev_num = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); + if (port->dev_num >= IB_UMAD_MAX_PORTS) { + spin_unlock(&port_lock); return -1; } - set_bit(port->devnum, dev_map); - set_bit(port->sm_devnum, dev_map); - spin_unlock(&map_lock); + set_bit(port->dev_num, dev_map); + spin_unlock(&port_lock); port->ib_dev = device; port->port_num = port_num; init_MUTEX(&port->sm_sem); + init_rwsem(&port->mutex); + INIT_LIST_HEAD(&port->file_list); - cdev_init(&port->dev, &umad_fops); - port->dev.owner = THIS_MODULE; - kobject_set_name(&port->dev.kobj, "umad%d", port->devnum); - if (cdev_add(&port->dev, base_dev + port->devnum, 1)) + port->dev = cdev_alloc(); + if (!port->dev) return -1; - - port->class_dev.class = &umad_class; - port->class_dev.dev = device->dma_device; - - snprintf(port->class_dev.class_id, BUS_ID_SIZE, "umad%d", port->devnum); - - if (class_device_register(&port->class_dev)) + port->dev->owner = THIS_MODULE; + port->dev->ops = &umad_fops; + kobject_set_name(&port->dev->kobj, "umad%d", port->dev_num); + if (cdev_add(port->dev, base_dev + port->dev_num, 1)) goto err_cdev; - class_set_devdata(&port->class_dev, port); - kref_get(&port->umad_dev->ref); + port->class_dev = class_device_create(umad_class, NULL, port->dev->dev, + device->dma_device, + "umad%d", port->dev_num); + if (IS_ERR(port->class_dev)) + goto err_cdev; - if (class_device_create_file(&port->class_dev, &class_device_attr_dev)) - goto err_class; - if (class_device_create_file(&port->class_dev, &class_device_attr_ibdev)) + if (class_device_create_file(port->class_dev, &class_device_attr_ibdev)) goto err_class; - if (class_device_create_file(&port->class_dev, &class_device_attr_port)) + if (class_device_create_file(port->class_dev, &class_device_attr_port)) goto err_class; - cdev_init(&port->sm_dev, &umad_sm_fops); - port->sm_dev.owner = THIS_MODULE; - kobject_set_name(&port->dev.kobj, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS); - if (cdev_add(&port->sm_dev, base_dev + port->sm_devnum, 1)) - return -1; - - port->sm_class_dev.class = &umad_class; - port->sm_class_dev.dev = device->dma_device; - - snprintf(port->sm_class_dev.class_id, BUS_ID_SIZE, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS); + port->sm_dev = cdev_alloc(); + if (!port->sm_dev) + goto err_class; + port->sm_dev->owner = THIS_MODULE; + port->sm_dev->ops = &umad_sm_fops; + kobject_set_name(&port->sm_dev->kobj, "issm%d", port->dev_num); + if (cdev_add(port->sm_dev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1)) + goto err_sm_cdev; - if (class_device_register(&port->sm_class_dev)) + port->sm_class_dev = class_device_create(umad_class, NULL, port->sm_dev->dev, + device->dma_device, + "issm%d", port->dev_num); + if (IS_ERR(port->sm_class_dev)) goto err_sm_cdev; - class_set_devdata(&port->sm_class_dev, port); - kref_get(&port->umad_dev->ref); + class_set_devdata(port->class_dev, port); + class_set_devdata(port->sm_class_dev, port); - if (class_device_create_file(&port->sm_class_dev, &class_device_attr_dev)) + if (class_device_create_file(port->sm_class_dev, &class_device_attr_ibdev)) goto err_sm_class; - if (class_device_create_file(&port->sm_class_dev, &class_device_attr_ibdev)) - goto err_sm_class; - if (class_device_create_file(&port->sm_class_dev, &class_device_attr_port)) + if (class_device_create_file(port->sm_class_dev, &class_device_attr_port)) goto err_sm_class; + spin_lock(&port_lock); + umad_port[port->dev_num] = port; + spin_unlock(&port_lock); + return 0; err_sm_class: - class_device_unregister(&port->sm_class_dev); + class_device_destroy(umad_class, port->sm_dev->dev); err_sm_cdev: - cdev_del(&port->sm_dev); + cdev_del(port->sm_dev); err_class: - class_device_unregister(&port->class_dev); + class_device_destroy(umad_class, port->dev->dev); err_cdev: - cdev_del(&port->dev); - clear_bit(port->devnum, dev_map); + cdev_del(port->dev); + clear_bit(port->dev_num, dev_map); return -1; } +static void ib_umad_kill_port(struct ib_umad_port *port) +{ + struct ib_umad_file *file; + int id; + + class_set_devdata(port->class_dev, NULL); + class_set_devdata(port->sm_class_dev, NULL); + + class_device_destroy(umad_class, port->dev->dev); + class_device_destroy(umad_class, port->sm_dev->dev); + + cdev_del(port->dev); + cdev_del(port->sm_dev); + + spin_lock(&port_lock); + umad_port[port->dev_num] = NULL; + spin_unlock(&port_lock); + + down_write(&port->mutex); + + port->ib_dev = NULL; + + /* + * Now go through the list of files attached to this port and + * unregister all of their MAD agents. We need to hold + * port->mutex while doing this to avoid racing with + * ib_umad_close(), but we can't hold the mutex for writing + * while calling ib_unregister_mad_agent(), since that might + * deadlock by calling back into queue_packet(). So we + * downgrade our lock to a read lock, and then drop and + * reacquire the write lock for the next iteration. + * + * We do list_del_init() on the file's list_head so that the + * list_del in ib_umad_close() is still OK, even after the + * file is removed from the list. + */ + while (!list_empty(&port->file_list)) { + file = list_entry(port->file_list.next, struct ib_umad_file, + port_list); + + file->agents_dead = 1; + list_del_init(&file->port_list); + + downgrade_write(&port->mutex); + + for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id) + if (file->agent[id]) + ib_unregister_mad_agent(file->agent[id]); + + up_read(&port->mutex); + down_write(&port->mutex); + } + + up_write(&port->mutex); + + clear_bit(port->dev_num, dev_map); +} + static void ib_umad_add_one(struct ib_device *device) { struct ib_umad_device *umad_dev; @@ -832,15 +915,12 @@ static void ib_umad_add_one(struct ib_device *device) e = device->phys_port_cnt; } - umad_dev = kmalloc(sizeof *umad_dev + + umad_dev = kzalloc(sizeof *umad_dev + (e - s + 1) * sizeof (struct ib_umad_port), GFP_KERNEL); if (!umad_dev) return; - memset(umad_dev, 0, sizeof *umad_dev + - (e - s + 1) * sizeof (struct ib_umad_port)); - kref_init(&umad_dev->ref); umad_dev->start_port = s; @@ -858,10 +938,8 @@ static void ib_umad_add_one(struct ib_device *device) return; err: - while (--i >= s) { - class_device_unregister(&umad_dev->port[i - s].class_dev); - class_device_unregister(&umad_dev->port[i - s].sm_class_dev); - } + while (--i >= s) + ib_umad_kill_port(&umad_dev->port[i - s]); kref_put(&umad_dev->ref, ib_umad_release_dev); } @@ -874,10 +952,8 @@ static void ib_umad_remove_one(struct ib_device *device) if (!umad_dev) return; - for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) { - class_device_unregister(&umad_dev->port[i].class_dev); - class_device_unregister(&umad_dev->port[i].sm_class_dev); - } + for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) + ib_umad_kill_port(&umad_dev->port[i]); kref_put(&umad_dev->ref, ib_umad_release_dev); } @@ -886,8 +962,6 @@ static int __init ib_umad_init(void) { int ret; - spin_lock_init(&map_lock); - ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2, "infiniband_mad"); if (ret) { @@ -895,13 +969,14 @@ static int __init ib_umad_init(void) goto out; } - ret = class_register(&umad_class); - if (ret) { + umad_class = class_create(THIS_MODULE, "infiniband_mad"); + if (IS_ERR(umad_class)) { + ret = PTR_ERR(umad_class); printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n"); goto out_chrdev; } - ret = class_create_file(&umad_class, &class_attr_abi_version); + ret = class_create_file(umad_class, &class_attr_abi_version); if (ret) { printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n"); goto out_class; @@ -916,7 +991,7 @@ static int __init ib_umad_init(void) return 0; out_class: - class_unregister(&umad_class); + class_destroy(umad_class); out_chrdev: unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); @@ -928,7 +1003,7 @@ out: static void __exit ib_umad_cleanup(void) { ib_unregister_client(&umad_client); - class_unregister(&umad_class); + class_destroy(umad_class); unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); } diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index b1897bed14a..7114e3fbab0 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -3,6 +3,7 @@ * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -38,29 +39,47 @@ #ifndef UVERBS_H #define UVERBS_H -/* Include device.h and fs.h until cdev.h is self-sufficient */ -#include <linux/fs.h> -#include <linux/device.h> -#include <linux/cdev.h> #include <linux/kref.h> #include <linux/idr.h> #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> +/* + * Our lifetime rules for these structs are the following: + * + * struct ib_uverbs_device: One reference is held by the module and + * released in ib_uverbs_remove_one(). Another reference is taken by + * ib_uverbs_open() each time the character special file is opened, + * and released in ib_uverbs_release_file() when the file is released. + * + * struct ib_uverbs_file: One reference is held by the VFS and + * released when the file is closed. Another reference is taken when + * an asynchronous event queue file is created and released when the + * event file is closed. + * + * struct ib_uverbs_event_file: One reference is held by the VFS and + * released when the file is closed. For asynchronous event files, + * another reference is held by the corresponding main context file + * and released when that file is closed. For completion event files, + * a reference is taken when a CQ is created that uses the file, and + * released when the CQ is destroyed. + */ + struct ib_uverbs_device { + struct kref ref; int devnum; - struct cdev dev; - struct class_device class_dev; + struct cdev *dev; + struct class_device *class_dev; struct ib_device *ib_dev; - int num_comp; + int num_comp_vectors; }; struct ib_uverbs_event_file { struct kref ref; + struct file *file; struct ib_uverbs_file *uverbs_file; spinlock_t lock; - int fd; int is_async; wait_queue_head_t poll_wait; struct fasync_struct *async_queue; @@ -69,11 +88,11 @@ struct ib_uverbs_event_file { struct ib_uverbs_file { struct kref ref; + struct semaphore mutex; struct ib_uverbs_device *device; struct ib_ucontext *ucontext; struct ib_event_handler event_handler; - struct ib_uverbs_event_file async_file; - struct ib_uverbs_event_file comp_file[1]; + struct ib_uverbs_event_file *async_file; }; struct ib_uverbs_event { @@ -86,14 +105,26 @@ struct ib_uverbs_event { u32 *counter; }; +struct ib_uverbs_mcast_entry { + struct list_head list; + union ib_gid gid; + u16 lid; +}; + struct ib_uevent_object { struct ib_uobject uobject; struct list_head event_list; u32 events_reported; }; +struct ib_uqp_object { + struct ib_uevent_object uevent; + struct list_head mcast_list; +}; + struct ib_ucq_object { struct ib_uobject uobject; + struct ib_uverbs_file *uverbs_file; struct list_head comp_list; struct list_head async_list; u32 comp_events_reported; @@ -109,10 +140,23 @@ extern struct idr ib_uverbs_cq_idr; extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; +struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, + int is_async, int *fd); +void ib_uverbs_release_event_file(struct kref *ref); +struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd); + +void ib_uverbs_release_ucq(struct ib_uverbs_file *file, + struct ib_uverbs_event_file *ev_file, + struct ib_ucq_object *uobj); +void ib_uverbs_release_uevent(struct ib_uverbs_file *file, + struct ib_uevent_object *uobj); + void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); +void ib_uverbs_event_handler(struct ib_event_handler *handler, + struct ib_event *event); int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, void *addr, size_t size, int write); @@ -124,21 +168,26 @@ void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem); const char __user *buf, int in_len, \ int out_len) -IB_UVERBS_DECLARE_CMD(query_params); IB_UVERBS_DECLARE_CMD(get_context); IB_UVERBS_DECLARE_CMD(query_device); IB_UVERBS_DECLARE_CMD(query_port); -IB_UVERBS_DECLARE_CMD(query_gid); -IB_UVERBS_DECLARE_CMD(query_pkey); IB_UVERBS_DECLARE_CMD(alloc_pd); IB_UVERBS_DECLARE_CMD(dealloc_pd); IB_UVERBS_DECLARE_CMD(reg_mr); IB_UVERBS_DECLARE_CMD(dereg_mr); +IB_UVERBS_DECLARE_CMD(create_comp_channel); IB_UVERBS_DECLARE_CMD(create_cq); +IB_UVERBS_DECLARE_CMD(poll_cq); +IB_UVERBS_DECLARE_CMD(req_notify_cq); IB_UVERBS_DECLARE_CMD(destroy_cq); IB_UVERBS_DECLARE_CMD(create_qp); IB_UVERBS_DECLARE_CMD(modify_qp); IB_UVERBS_DECLARE_CMD(destroy_qp); +IB_UVERBS_DECLARE_CMD(post_send); +IB_UVERBS_DECLARE_CMD(post_recv); +IB_UVERBS_DECLARE_CMD(post_srq_recv); +IB_UVERBS_DECLARE_CMD(create_ah); +IB_UVERBS_DECLARE_CMD(destroy_ah); IB_UVERBS_DECLARE_CMD(attach_mcast); IB_UVERBS_DECLARE_CMD(detach_mcast); IB_UVERBS_DECLARE_CMD(create_srq); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index e91ebde4648..a57d021d435 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -33,6 +34,9 @@ * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $ */ +#include <linux/file.h> +#include <linux/fs.h> + #include <asm/uaccess.h> #include "uverbs.h" @@ -45,29 +49,6 @@ (udata)->outlen = (olen); \ } while (0) -ssize_t ib_uverbs_query_params(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) -{ - struct ib_uverbs_query_params cmd; - struct ib_uverbs_query_params_resp resp; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - memset(&resp, 0, sizeof resp); - - resp.num_cq_events = file->device->num_comp; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -76,8 +57,9 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, struct ib_uverbs_get_context_resp resp; struct ib_udata udata; struct ib_device *ibdev = file->device->ib_dev; - int i; - int ret = in_len; + struct ib_ucontext *ucontext; + struct file *filp; + int ret; if (out_len < sizeof resp) return -ENOSPC; @@ -85,45 +67,72 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + down(&file->mutex); + + if (file->ucontext) { + ret = -EINVAL; + goto err; + } + INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - file->ucontext = ibdev->alloc_ucontext(ibdev, &udata); - if (IS_ERR(file->ucontext)) { - ret = PTR_ERR(file->ucontext); - file->ucontext = NULL; - return ret; - } + ucontext = ibdev->alloc_ucontext(ibdev, &udata); + if (IS_ERR(ucontext)) + return PTR_ERR(file->ucontext); - file->ucontext->device = ibdev; - INIT_LIST_HEAD(&file->ucontext->pd_list); - INIT_LIST_HEAD(&file->ucontext->mr_list); - INIT_LIST_HEAD(&file->ucontext->mw_list); - INIT_LIST_HEAD(&file->ucontext->cq_list); - INIT_LIST_HEAD(&file->ucontext->qp_list); - INIT_LIST_HEAD(&file->ucontext->srq_list); - INIT_LIST_HEAD(&file->ucontext->ah_list); - spin_lock_init(&file->ucontext->lock); - - resp.async_fd = file->async_file.fd; - for (i = 0; i < file->device->num_comp; ++i) - if (copy_to_user((void __user *) (unsigned long) cmd.cq_fd_tab + - i * sizeof (__u32), - &file->comp_file[i].fd, sizeof (__u32))) - goto err; + ucontext->device = ibdev; + INIT_LIST_HEAD(&ucontext->pd_list); + INIT_LIST_HEAD(&ucontext->mr_list); + INIT_LIST_HEAD(&ucontext->mw_list); + INIT_LIST_HEAD(&ucontext->cq_list); + INIT_LIST_HEAD(&ucontext->qp_list); + INIT_LIST_HEAD(&ucontext->srq_list); + INIT_LIST_HEAD(&ucontext->ah_list); + + resp.num_comp_vectors = file->device->num_comp_vectors; + + filp = ib_uverbs_alloc_event_file(file, 1, &resp.async_fd); + if (IS_ERR(filp)) { + ret = PTR_ERR(filp); + goto err_free; + } if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - goto err; + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_file; + } + + file->async_file = filp->private_data; + + INIT_IB_EVENT_HANDLER(&file->event_handler, file->device->ib_dev, + ib_uverbs_event_handler); + ret = ib_register_event_handler(&file->event_handler); + if (ret) + goto err_file; + + kref_get(&file->async_file->ref); + kref_get(&file->ref); + file->ucontext = ucontext; + + fd_install(resp.async_fd, filp); + + up(&file->mutex); return in_len; -err: - ibdev->dealloc_ucontext(file->ucontext); - file->ucontext = NULL; +err_file: + put_unused_fd(resp.async_fd); + fput(filp); - return -EFAULT; +err_free: + ibdev->dealloc_ucontext(ucontext); + +err: + up(&file->mutex); + return ret; } ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, @@ -243,62 +252,6 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, return in_len; } -ssize_t ib_uverbs_query_gid(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) -{ - struct ib_uverbs_query_gid cmd; - struct ib_uverbs_query_gid_resp resp; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - memset(&resp, 0, sizeof resp); - - ret = ib_query_gid(file->device->ib_dev, cmd.port_num, cmd.index, - (union ib_gid *) resp.gid); - if (ret) - return ret; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - -ssize_t ib_uverbs_query_pkey(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) -{ - struct ib_uverbs_query_pkey cmd; - struct ib_uverbs_query_pkey_resp resp; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - memset(&resp, 0, sizeof resp); - - ret = ib_query_pkey(file->device->ib_dev, cmd.port_num, cmd.index, - &resp.pkey); - if (ret) - return ret; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -337,24 +290,20 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, pd->uobject = uobj; atomic_set(&pd->usecnt, 0); + down(&ib_uverbs_idr_mutex); + retry: if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) { ret = -ENOMEM; - goto err_pd; + goto err_up; } - down(&ib_uverbs_idr_mutex); ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id); - up(&ib_uverbs_idr_mutex); if (ret == -EAGAIN) goto retry; if (ret) - goto err_pd; - - spin_lock_irq(&file->ucontext->lock); - list_add_tail(&uobj->list, &file->ucontext->pd_list); - spin_unlock_irq(&file->ucontext->lock); + goto err_up; memset(&resp, 0, sizeof resp); resp.pd_handle = uobj->id; @@ -362,21 +311,22 @@ retry: if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_list; + goto err_idr; } - return in_len; + down(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->pd_list); + up(&file->mutex); -err_list: - spin_lock_irq(&file->ucontext->lock); - list_del(&uobj->list); - spin_unlock_irq(&file->ucontext->lock); + up(&ib_uverbs_idr_mutex); - down(&ib_uverbs_idr_mutex); + return in_len; + +err_idr: idr_remove(&ib_uverbs_pd_idr, uobj->id); - up(&ib_uverbs_idr_mutex); -err_pd: +err_up: + up(&ib_uverbs_idr_mutex); ib_dealloc_pd(pd); err: @@ -410,9 +360,9 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, idr_remove(&ib_uverbs_pd_idr, cmd.pd_handle); - spin_lock_irq(&file->ucontext->lock); + down(&file->mutex); list_del(&uobj->list); - spin_unlock_irq(&file->ucontext->lock); + up(&file->mutex); kfree(uobj); @@ -447,6 +397,14 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) return -EINVAL; + /* + * Local write permission is required if remote write or + * remote atomic permission is also requested. + */ + if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) && + !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE)) + return -EINVAL; + obj = kmalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; @@ -512,24 +470,22 @@ retry: resp.mr_handle = obj->uobject.id; - spin_lock_irq(&file->ucontext->lock); - list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); - spin_unlock_irq(&file->ucontext->lock); - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_list; + goto err_idr; } + down(&file->mutex); + list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); + up(&file->mutex); + up(&ib_uverbs_idr_mutex); return in_len; -err_list: - spin_lock_irq(&file->ucontext->lock); - list_del(&obj->uobject.list); - spin_unlock_irq(&file->ucontext->lock); +err_idr: + idr_remove(&ib_uverbs_mr_idr, obj->uobject.id); err_unreg: ib_dereg_mr(mr); @@ -570,9 +526,9 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, idr_remove(&ib_uverbs_mr_idr, cmd.mr_handle); - spin_lock_irq(&file->ucontext->lock); + down(&file->mutex); list_del(&memobj->uobject.list); - spin_unlock_irq(&file->ucontext->lock); + up(&file->mutex); ib_umem_release(file->device->ib_dev, &memobj->umem); kfree(memobj); @@ -583,6 +539,35 @@ out: return ret ? ret : in_len; } +ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_comp_channel cmd; + struct ib_uverbs_create_comp_channel_resp resp; + struct file *filp; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + filp = ib_uverbs_alloc_event_file(file, 0, &resp.fd); + if (IS_ERR(filp)) + return PTR_ERR(filp); + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + put_unused_fd(resp.fd); + fput(filp); + return -EFAULT; + } + + fd_install(resp.fd, filp); + return in_len; +} + ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -591,6 +576,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, struct ib_uverbs_create_cq_resp resp; struct ib_udata udata; struct ib_ucq_object *uobj; + struct ib_uverbs_event_file *ev_file = NULL; struct ib_cq *cq; int ret; @@ -604,15 +590,19 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - if (cmd.event_handler >= file->device->num_comp) + if (cmd.comp_vector >= file->device->num_comp_vectors) return -EINVAL; + if (cmd.comp_channel >= 0) + ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel); + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); if (!uobj) return -ENOMEM; uobj->uobject.user_handle = cmd.user_handle; uobj->uobject.context = file->ucontext; + uobj->uverbs_file = file; uobj->comp_events_reported = 0; uobj->async_events_reported = 0; INIT_LIST_HEAD(&uobj->comp_list); @@ -629,27 +619,23 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, cq->uobject = &uobj->uobject; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; - cq->cq_context = file; + cq->cq_context = ev_file; atomic_set(&cq->usecnt, 0); + down(&ib_uverbs_idr_mutex); + retry: if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) { ret = -ENOMEM; - goto err_cq; + goto err_up; } - down(&ib_uverbs_idr_mutex); ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->uobject.id); - up(&ib_uverbs_idr_mutex); if (ret == -EAGAIN) goto retry; if (ret) - goto err_cq; - - spin_lock_irq(&file->ucontext->lock); - list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list); - spin_unlock_irq(&file->ucontext->lock); + goto err_up; memset(&resp, 0, sizeof resp); resp.cq_handle = uobj->uobject.id; @@ -658,21 +644,22 @@ retry: if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_list; + goto err_idr; } - return in_len; + down(&file->mutex); + list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list); + up(&file->mutex); -err_list: - spin_lock_irq(&file->ucontext->lock); - list_del(&uobj->uobject.list); - spin_unlock_irq(&file->ucontext->lock); + up(&ib_uverbs_idr_mutex); - down(&ib_uverbs_idr_mutex); + return in_len; + +err_idr: idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id); - up(&ib_uverbs_idr_mutex); -err_cq: +err_up: + up(&ib_uverbs_idr_mutex); ib_destroy_cq(cq); err: @@ -680,6 +667,93 @@ err: return ret; } +ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_poll_cq cmd; + struct ib_uverbs_poll_cq_resp *resp; + struct ib_cq *cq; + struct ib_wc *wc; + int ret = 0; + int i; + int rsize; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + wc = kmalloc(cmd.ne * sizeof *wc, GFP_KERNEL); + if (!wc) + return -ENOMEM; + + rsize = sizeof *resp + cmd.ne * sizeof(struct ib_uverbs_wc); + resp = kmalloc(rsize, GFP_KERNEL); + if (!resp) { + ret = -ENOMEM; + goto out_wc; + } + + down(&ib_uverbs_idr_mutex); + cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); + if (!cq || cq->uobject->context != file->ucontext) { + ret = -EINVAL; + goto out; + } + + resp->count = ib_poll_cq(cq, cmd.ne, wc); + + for (i = 0; i < resp->count; i++) { + resp->wc[i].wr_id = wc[i].wr_id; + resp->wc[i].status = wc[i].status; + resp->wc[i].opcode = wc[i].opcode; + resp->wc[i].vendor_err = wc[i].vendor_err; + resp->wc[i].byte_len = wc[i].byte_len; + resp->wc[i].imm_data = (__u32 __force) wc[i].imm_data; + resp->wc[i].qp_num = wc[i].qp_num; + resp->wc[i].src_qp = wc[i].src_qp; + resp->wc[i].wc_flags = wc[i].wc_flags; + resp->wc[i].pkey_index = wc[i].pkey_index; + resp->wc[i].slid = wc[i].slid; + resp->wc[i].sl = wc[i].sl; + resp->wc[i].dlid_path_bits = wc[i].dlid_path_bits; + resp->wc[i].port_num = wc[i].port_num; + } + + if (copy_to_user((void __user *) (unsigned long) cmd.response, resp, rsize)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + kfree(resp); + +out_wc: + kfree(wc); + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_req_notify_cq cmd; + struct ib_cq *cq; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); + if (cq && cq->uobject->context == file->ucontext) { + ib_req_notify_cq(cq, cmd.solicited_only ? + IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); + ret = in_len; + } + up(&ib_uverbs_idr_mutex); + + return ret; +} + ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -688,7 +762,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, struct ib_uverbs_destroy_cq_resp resp; struct ib_cq *cq; struct ib_ucq_object *uobj; - struct ib_uverbs_event *evt, *tmp; + struct ib_uverbs_event_file *ev_file; u64 user_handle; int ret = -EINVAL; @@ -704,7 +778,8 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, goto out; user_handle = cq->uobject->user_handle; - uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); + uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); + ev_file = cq->cq_context; ret = ib_destroy_cq(cq); if (ret) @@ -712,23 +787,11 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, idr_remove(&ib_uverbs_cq_idr, cmd.cq_handle); - spin_lock_irq(&file->ucontext->lock); + down(&file->mutex); list_del(&uobj->uobject.list); - spin_unlock_irq(&file->ucontext->lock); + up(&file->mutex); - spin_lock_irq(&file->comp_file[0].lock); - list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { - list_del(&evt->list); - kfree(evt); - } - spin_unlock_irq(&file->comp_file[0].lock); - - spin_lock_irq(&file->async_file.lock); - list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { - list_del(&evt->list); - kfree(evt); - } - spin_unlock_irq(&file->async_file.lock); + ib_uverbs_release_ucq(file, ev_file, uobj); resp.comp_events_reported = uobj->comp_events_reported; resp.async_events_reported = uobj->async_events_reported; @@ -752,7 +815,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, struct ib_uverbs_create_qp cmd; struct ib_uverbs_create_qp_resp resp; struct ib_udata udata; - struct ib_uevent_object *uobj; + struct ib_uqp_object *uobj; struct ib_pd *pd; struct ib_cq *scq, *rcq; struct ib_srq *srq; @@ -803,10 +866,11 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, attr.cap.max_recv_sge = cmd.max_recv_sge; attr.cap.max_inline_data = cmd.max_inline_data; - uobj->uobject.user_handle = cmd.user_handle; - uobj->uobject.context = file->ucontext; - uobj->events_reported = 0; - INIT_LIST_HEAD(&uobj->event_list); + uobj->uevent.uobject.user_handle = cmd.user_handle; + uobj->uevent.uobject.context = file->ucontext; + uobj->uevent.events_reported = 0; + INIT_LIST_HEAD(&uobj->uevent.event_list); + INIT_LIST_HEAD(&uobj->mcast_list); qp = pd->device->create_qp(pd, &attr, &udata); if (IS_ERR(qp)) { @@ -819,7 +883,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, qp->send_cq = attr.send_cq; qp->recv_cq = attr.recv_cq; qp->srq = attr.srq; - qp->uobject = &uobj->uobject; + qp->uobject = &uobj->uevent.uobject; qp->event_handler = attr.event_handler; qp->qp_context = attr.qp_context; qp->qp_type = attr.qp_type; @@ -838,33 +902,36 @@ retry: goto err_destroy; } - ret = idr_get_new(&ib_uverbs_qp_idr, qp, &uobj->uobject.id); + ret = idr_get_new(&ib_uverbs_qp_idr, qp, &uobj->uevent.uobject.id); if (ret == -EAGAIN) goto retry; if (ret) goto err_destroy; - resp.qp_handle = uobj->uobject.id; - - spin_lock_irq(&file->ucontext->lock); - list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list); - spin_unlock_irq(&file->ucontext->lock); + resp.qp_handle = uobj->uevent.uobject.id; + resp.max_recv_sge = attr.cap.max_recv_sge; + resp.max_send_sge = attr.cap.max_send_sge; + resp.max_recv_wr = attr.cap.max_recv_wr; + resp.max_send_wr = attr.cap.max_send_wr; + resp.max_inline_data = attr.cap.max_inline_data; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_list; + goto err_idr; } + down(&file->mutex); + list_add_tail(&uobj->uevent.uobject.list, &file->ucontext->qp_list); + up(&file->mutex); + up(&ib_uverbs_idr_mutex); return in_len; -err_list: - spin_lock_irq(&file->ucontext->lock); - list_del(&uobj->uobject.list); - spin_unlock_irq(&file->ucontext->lock); +err_idr: + idr_remove(&ib_uverbs_qp_idr, uobj->uevent.uobject.id); err_destroy: ib_destroy_qp(qp); @@ -966,8 +1033,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, struct ib_uverbs_destroy_qp cmd; struct ib_uverbs_destroy_qp_resp resp; struct ib_qp *qp; - struct ib_uevent_object *uobj; - struct ib_uverbs_event *evt, *tmp; + struct ib_uqp_object *uobj; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -981,7 +1047,12 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, if (!qp || qp->uobject->context != file->ucontext) goto out; - uobj = container_of(qp->uobject, struct ib_uevent_object, uobject); + uobj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + + if (!list_empty(&uobj->mcast_list)) { + ret = -EBUSY; + goto out; + } ret = ib_destroy_qp(qp); if (ret) @@ -989,18 +1060,13 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, idr_remove(&ib_uverbs_qp_idr, cmd.qp_handle); - spin_lock_irq(&file->ucontext->lock); - list_del(&uobj->uobject.list); - spin_unlock_irq(&file->ucontext->lock); + down(&file->mutex); + list_del(&uobj->uevent.uobject.list); + up(&file->mutex); - spin_lock_irq(&file->async_file.lock); - list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { - list_del(&evt->list); - kfree(evt); - } - spin_unlock_irq(&file->async_file.lock); + ib_uverbs_release_uevent(file, &uobj->uevent); - resp.events_reported = uobj->events_reported; + resp.events_reported = uobj->uevent.events_reported; kfree(uobj); @@ -1014,12 +1080,476 @@ out: return ret ? ret : in_len; } +ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_post_send cmd; + struct ib_uverbs_post_send_resp resp; + struct ib_uverbs_send_wr *user_wr; + struct ib_send_wr *wr = NULL, *last, *next, *bad_wr; + struct ib_qp *qp; + int i, sg_ind; + ssize_t ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count + + cmd.sge_count * sizeof (struct ib_uverbs_sge)) + return -EINVAL; + + if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr)) + return -EINVAL; + + user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL); + if (!user_wr) + return -ENOMEM; + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (!qp || qp->uobject->context != file->ucontext) + goto out; + + sg_ind = 0; + last = NULL; + for (i = 0; i < cmd.wr_count; ++i) { + if (copy_from_user(user_wr, + buf + sizeof cmd + i * cmd.wqe_size, + cmd.wqe_size)) { + ret = -EFAULT; + goto out; + } + + if (user_wr->num_sge + sg_ind > cmd.sge_count) { + ret = -EINVAL; + goto out; + } + + next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + + user_wr->num_sge * sizeof (struct ib_sge), + GFP_KERNEL); + if (!next) { + ret = -ENOMEM; + goto out; + } + + if (!last) + wr = next; + else + last->next = next; + last = next; + + next->next = NULL; + next->wr_id = user_wr->wr_id; + next->num_sge = user_wr->num_sge; + next->opcode = user_wr->opcode; + next->send_flags = user_wr->send_flags; + next->imm_data = (__be32 __force) user_wr->imm_data; + + if (qp->qp_type == IB_QPT_UD) { + next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr, + user_wr->wr.ud.ah); + if (!next->wr.ud.ah) { + ret = -EINVAL; + goto out; + } + next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn; + next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; + } else { + switch (next->opcode) { + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + case IB_WR_RDMA_READ: + next->wr.rdma.remote_addr = + user_wr->wr.rdma.remote_addr; + next->wr.rdma.rkey = + user_wr->wr.rdma.rkey; + break; + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + next->wr.atomic.remote_addr = + user_wr->wr.atomic.remote_addr; + next->wr.atomic.compare_add = + user_wr->wr.atomic.compare_add; + next->wr.atomic.swap = user_wr->wr.atomic.swap; + next->wr.atomic.rkey = user_wr->wr.atomic.rkey; + break; + default: + break; + } + } + + if (next->num_sge) { + next->sg_list = (void *) next + + ALIGN(sizeof *next, sizeof (struct ib_sge)); + if (copy_from_user(next->sg_list, + buf + sizeof cmd + + cmd.wr_count * cmd.wqe_size + + sg_ind * sizeof (struct ib_sge), + next->num_sge * sizeof (struct ib_sge))) { + ret = -EFAULT; + goto out; + } + sg_ind += next->num_sge; + } else + next->sg_list = NULL; + } + + resp.bad_wr = 0; + ret = qp->device->post_send(qp, wr, &bad_wr); + if (ret) + for (next = wr; next; next = next->next) { + ++resp.bad_wr; + if (next == bad_wr) + break; + } + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + kfree(user_wr); + + return ret ? ret : in_len; +} + +static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf, + int in_len, + u32 wr_count, + u32 sge_count, + u32 wqe_size) +{ + struct ib_uverbs_recv_wr *user_wr; + struct ib_recv_wr *wr = NULL, *last, *next; + int sg_ind; + int i; + int ret; + + if (in_len < wqe_size * wr_count + + sge_count * sizeof (struct ib_uverbs_sge)) + return ERR_PTR(-EINVAL); + + if (wqe_size < sizeof (struct ib_uverbs_recv_wr)) + return ERR_PTR(-EINVAL); + + user_wr = kmalloc(wqe_size, GFP_KERNEL); + if (!user_wr) + return ERR_PTR(-ENOMEM); + + sg_ind = 0; + last = NULL; + for (i = 0; i < wr_count; ++i) { + if (copy_from_user(user_wr, buf + i * wqe_size, + wqe_size)) { + ret = -EFAULT; + goto err; + } + + if (user_wr->num_sge + sg_ind > sge_count) { + ret = -EINVAL; + goto err; + } + + next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + + user_wr->num_sge * sizeof (struct ib_sge), + GFP_KERNEL); + if (!next) { + ret = -ENOMEM; + goto err; + } + + if (!last) + wr = next; + else + last->next = next; + last = next; + + next->next = NULL; + next->wr_id = user_wr->wr_id; + next->num_sge = user_wr->num_sge; + + if (next->num_sge) { + next->sg_list = (void *) next + + ALIGN(sizeof *next, sizeof (struct ib_sge)); + if (copy_from_user(next->sg_list, + buf + wr_count * wqe_size + + sg_ind * sizeof (struct ib_sge), + next->num_sge * sizeof (struct ib_sge))) { + ret = -EFAULT; + goto err; + } + sg_ind += next->num_sge; + } else + next->sg_list = NULL; + } + + kfree(user_wr); + return wr; + +err: + kfree(user_wr); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + return ERR_PTR(ret); +} + +ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_post_recv cmd; + struct ib_uverbs_post_recv_resp resp; + struct ib_recv_wr *wr, *next, *bad_wr; + struct ib_qp *qp; + ssize_t ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, + in_len - sizeof cmd, cmd.wr_count, + cmd.sge_count, cmd.wqe_size); + if (IS_ERR(wr)) + return PTR_ERR(wr); + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (!qp || qp->uobject->context != file->ucontext) + goto out; + + resp.bad_wr = 0; + ret = qp->device->post_recv(qp, wr, &bad_wr); + if (ret) + for (next = wr; next; next = next->next) { + ++resp.bad_wr; + if (next == bad_wr) + break; + } + + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_post_srq_recv cmd; + struct ib_uverbs_post_srq_recv_resp resp; + struct ib_recv_wr *wr, *next, *bad_wr; + struct ib_srq *srq; + ssize_t ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, + in_len - sizeof cmd, cmd.wr_count, + cmd.sge_count, cmd.wqe_size); + if (IS_ERR(wr)) + return PTR_ERR(wr); + + down(&ib_uverbs_idr_mutex); + + srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); + if (!srq || srq->uobject->context != file->ucontext) + goto out; + + resp.bad_wr = 0; + ret = srq->device->post_srq_recv(srq, wr, &bad_wr); + if (ret) + for (next = wr; next; next = next->next) { + ++resp.bad_wr; + if (next == bad_wr) + break; + } + + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_ah cmd; + struct ib_uverbs_create_ah_resp resp; + struct ib_uobject *uobj; + struct ib_pd *pd; + struct ib_ah *ah; + struct ib_ah_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + down(&ib_uverbs_idr_mutex); + + pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); + if (!pd || pd->uobject->context != file->ucontext) { + ret = -EINVAL; + goto err_up; + } + + uobj->user_handle = cmd.user_handle; + uobj->context = file->ucontext; + + attr.dlid = cmd.attr.dlid; + attr.sl = cmd.attr.sl; + attr.src_path_bits = cmd.attr.src_path_bits; + attr.static_rate = cmd.attr.static_rate; + attr.port_num = cmd.attr.port_num; + attr.grh.flow_label = cmd.attr.grh.flow_label; + attr.grh.sgid_index = cmd.attr.grh.sgid_index; + attr.grh.hop_limit = cmd.attr.grh.hop_limit; + attr.grh.traffic_class = cmd.attr.grh.traffic_class; + memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); + + ah = ib_create_ah(pd, &attr); + if (IS_ERR(ah)) { + ret = PTR_ERR(ah); + goto err_up; + } + + ah->uobject = uobj; + +retry: + if (!idr_pre_get(&ib_uverbs_ah_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_destroy; + } + + ret = idr_get_new(&ib_uverbs_ah_idr, ah, &uobj->id); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_destroy; + + resp.ah_handle = uobj->id; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_idr; + } + + down(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->ah_list); + up(&file->mutex); + + up(&ib_uverbs_idr_mutex); + + return in_len; + +err_idr: + idr_remove(&ib_uverbs_ah_idr, uobj->id); + +err_destroy: + ib_destroy_ah(ah); + +err_up: + up(&ib_uverbs_idr_mutex); + + kfree(uobj); + return ret; +} + +ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, + const char __user *buf, int in_len, int out_len) +{ + struct ib_uverbs_destroy_ah cmd; + struct ib_ah *ah; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + ah = idr_find(&ib_uverbs_ah_idr, cmd.ah_handle); + if (!ah || ah->uobject->context != file->ucontext) + goto out; + + uobj = ah->uobject; + + ret = ib_destroy_ah(ah); + if (ret) + goto out; + + idr_remove(&ib_uverbs_ah_idr, cmd.ah_handle); + + down(&file->mutex); + list_del(&uobj->list); + up(&file->mutex); + + kfree(uobj); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_attach_mcast cmd; struct ib_qp *qp; + struct ib_uqp_object *uobj; + struct ib_uverbs_mcast_entry *mcast; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -1028,9 +1558,36 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, down(&ib_uverbs_idr_mutex); qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); - if (qp && qp->uobject->context == file->ucontext) - ret = ib_attach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid); + if (!qp || qp->uobject->context != file->ucontext) + goto out; + + uobj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + list_for_each_entry(mcast, &uobj->mcast_list, list) + if (cmd.mlid == mcast->lid && + !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { + ret = 0; + goto out; + } + + mcast = kmalloc(sizeof *mcast, GFP_KERNEL); + if (!mcast) { + ret = -ENOMEM; + goto out; + } + + mcast->lid = cmd.mlid; + memcpy(mcast->gid.raw, cmd.gid, sizeof mcast->gid.raw); + + ret = ib_attach_mcast(qp, &mcast->gid, cmd.mlid); + if (!ret) { + uobj = container_of(qp->uobject, struct ib_uqp_object, + uevent.uobject); + list_add_tail(&mcast->list, &uobj->mcast_list); + } else + kfree(mcast); + +out: up(&ib_uverbs_idr_mutex); return ret ? ret : in_len; @@ -1041,7 +1598,9 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_detach_mcast cmd; + struct ib_uqp_object *uobj; struct ib_qp *qp; + struct ib_uverbs_mcast_entry *mcast; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -1050,9 +1609,24 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, down(&ib_uverbs_idr_mutex); qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); - if (qp && qp->uobject->context == file->ucontext) - ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid); + if (!qp || qp->uobject->context != file->ucontext) + goto out; + + ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid); + if (ret) + goto out; + uobj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + + list_for_each_entry(mcast, &uobj->mcast_list, list) + if (cmd.mlid == mcast->lid && + !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { + list_del(&mcast->list); + kfree(mcast); + break; + } + +out: up(&ib_uverbs_idr_mutex); return ret ? ret : in_len; @@ -1136,24 +1710,22 @@ retry: resp.srq_handle = uobj->uobject.id; - spin_lock_irq(&file->ucontext->lock); - list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list); - spin_unlock_irq(&file->ucontext->lock); - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_list; + goto err_idr; } + down(&file->mutex); + list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list); + up(&file->mutex); + up(&ib_uverbs_idr_mutex); return in_len; -err_list: - spin_lock_irq(&file->ucontext->lock); - list_del(&uobj->uobject.list); - spin_unlock_irq(&file->ucontext->lock); +err_idr: + idr_remove(&ib_uverbs_srq_idr, uobj->uobject.id); err_destroy: ib_destroy_srq(srq); @@ -1186,7 +1758,6 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, } attr.max_wr = cmd.max_wr; - attr.max_sge = cmd.max_sge; attr.srq_limit = cmd.srq_limit; ret = ib_modify_srq(srq, &attr, cmd.attr_mask); @@ -1205,7 +1776,6 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, struct ib_uverbs_destroy_srq_resp resp; struct ib_srq *srq; struct ib_uevent_object *uobj; - struct ib_uverbs_event *evt, *tmp; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -1227,16 +1797,11 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, idr_remove(&ib_uverbs_srq_idr, cmd.srq_handle); - spin_lock_irq(&file->ucontext->lock); + down(&file->mutex); list_del(&uobj->uobject.list); - spin_unlock_irq(&file->ucontext->lock); + up(&file->mutex); - spin_lock_irq(&file->async_file.lock); - list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { - list_del(&evt->list); - kfree(evt); - } - spin_unlock_irq(&file->async_file.lock); + ib_uverbs_release_uevent(file, uobj); resp.events_reported = uobj->events_reported; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index ce5bdb7af30..81737bd6fae 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -3,6 +3,7 @@ * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -43,6 +44,7 @@ #include <linux/poll.h> #include <linux/file.h> #include <linux/mount.h> +#include <linux/cdev.h> #include <asm/uaccess.h> @@ -62,6 +64,8 @@ enum { #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) +static struct class *uverbs_class; + DECLARE_MUTEX(ib_uverbs_idr_mutex); DEFINE_IDR(ib_uverbs_pd_idr); DEFINE_IDR(ib_uverbs_mr_idr); @@ -72,31 +76,37 @@ DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); static spinlock_t map_lock; +static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES]; static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) = { - [IB_USER_VERBS_CMD_QUERY_PARAMS] = ib_uverbs_query_params, - [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, - [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, - [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, - [IB_USER_VERBS_CMD_QUERY_GID] = ib_uverbs_query_gid, - [IB_USER_VERBS_CMD_QUERY_PKEY] = ib_uverbs_query_pkey, - [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, - [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, - [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, - [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, - [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, - [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, - [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, - [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, - [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, - [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, - [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, - [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, - [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, - [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, + [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, + [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, + [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, + [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, + [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, + [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, + [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, + [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, + [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, + [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, + [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, + [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, + [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, + [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, + [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, + [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, + [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, + [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, + [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, + [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, + [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, + [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, + [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, + [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, + [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, }; static struct vfsmount *uverbs_event_mnt; @@ -104,7 +114,66 @@ static struct vfsmount *uverbs_event_mnt; static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device); -static int ib_dealloc_ucontext(struct ib_ucontext *context) +static void ib_uverbs_release_dev(struct kref *ref) +{ + struct ib_uverbs_device *dev = + container_of(ref, struct ib_uverbs_device, ref); + + kfree(dev); +} + +void ib_uverbs_release_ucq(struct ib_uverbs_file *file, + struct ib_uverbs_event_file *ev_file, + struct ib_ucq_object *uobj) +{ + struct ib_uverbs_event *evt, *tmp; + + if (ev_file) { + spin_lock_irq(&ev_file->lock); + list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&ev_file->lock); + + kref_put(&ev_file->ref, ib_uverbs_release_event_file); + } + + spin_lock_irq(&file->async_file->lock); + list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&file->async_file->lock); +} + +void ib_uverbs_release_uevent(struct ib_uverbs_file *file, + struct ib_uevent_object *uobj) +{ + struct ib_uverbs_event *evt, *tmp; + + spin_lock_irq(&file->async_file->lock); + list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&file->async_file->lock); +} + +static void ib_uverbs_detach_umcast(struct ib_qp *qp, + struct ib_uqp_object *uobj) +{ + struct ib_uverbs_mcast_entry *mcast, *tmp; + + list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) { + ib_detach_mcast(qp, &mcast->gid, mcast->lid); + list_del(&mcast->list); + kfree(mcast); + } +} + +static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, + struct ib_ucontext *context) { struct ib_uobject *uobj, *tmp; @@ -113,30 +182,47 @@ static int ib_dealloc_ucontext(struct ib_ucontext *context) down(&ib_uverbs_idr_mutex); - /* XXX Free AHs */ + list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { + struct ib_ah *ah = idr_find(&ib_uverbs_ah_idr, uobj->id); + idr_remove(&ib_uverbs_ah_idr, uobj->id); + ib_destroy_ah(ah); + list_del(&uobj->list); + kfree(uobj); + } list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id); + struct ib_uqp_object *uqp = + container_of(uobj, struct ib_uqp_object, uevent.uobject); idr_remove(&ib_uverbs_qp_idr, uobj->id); + ib_uverbs_detach_umcast(qp, uqp); ib_destroy_qp(qp); list_del(&uobj->list); - kfree(container_of(uobj, struct ib_uevent_object, uobject)); + ib_uverbs_release_uevent(file, &uqp->uevent); + kfree(uqp); } list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { struct ib_cq *cq = idr_find(&ib_uverbs_cq_idr, uobj->id); + struct ib_uverbs_event_file *ev_file = cq->cq_context; + struct ib_ucq_object *ucq = + container_of(uobj, struct ib_ucq_object, uobject); idr_remove(&ib_uverbs_cq_idr, uobj->id); ib_destroy_cq(cq); list_del(&uobj->list); - kfree(container_of(uobj, struct ib_ucq_object, uobject)); + ib_uverbs_release_ucq(file, ev_file, ucq); + kfree(ucq); } list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { struct ib_srq *srq = idr_find(&ib_uverbs_srq_idr, uobj->id); + struct ib_uevent_object *uevent = + container_of(uobj, struct ib_uevent_object, uobject); idr_remove(&ib_uverbs_srq_idr, uobj->id); ib_destroy_srq(srq); list_del(&uobj->list); - kfree(container_of(uobj, struct ib_uevent_object, uobject)); + ib_uverbs_release_uevent(file, uevent); + kfree(uevent); } /* XXX Free MWs */ @@ -175,6 +261,8 @@ static void ib_uverbs_release_file(struct kref *ref) container_of(ref, struct ib_uverbs_file, ref); module_put(file->device->ib_dev->owner); + kref_put(&file->device->ref, ib_uverbs_release_dev); + kfree(file); } @@ -188,25 +276,19 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, spin_lock_irq(&file->lock); - while (list_empty(&file->event_list) && file->fd >= 0) { + while (list_empty(&file->event_list)) { spin_unlock_irq(&file->lock); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(file->poll_wait, - !list_empty(&file->event_list) || - file->fd < 0)) + !list_empty(&file->event_list))) return -ERESTARTSYS; spin_lock_irq(&file->lock); } - if (file->fd < 0) { - spin_unlock_irq(&file->lock); - return -ENODEV; - } - event = list_entry(file->event_list.next, struct ib_uverbs_event, list); if (file->is_async) @@ -248,26 +330,19 @@ static unsigned int ib_uverbs_event_poll(struct file *filp, poll_wait(filp, &file->poll_wait, wait); spin_lock_irq(&file->lock); - if (file->fd < 0) - pollflags = POLLERR; - else if (!list_empty(&file->event_list)) + if (!list_empty(&file->event_list)) pollflags = POLLIN | POLLRDNORM; spin_unlock_irq(&file->lock); return pollflags; } -static void ib_uverbs_event_release(struct ib_uverbs_event_file *file) +void ib_uverbs_release_event_file(struct kref *ref) { - struct ib_uverbs_event *entry, *tmp; + struct ib_uverbs_event_file *file = + container_of(ref, struct ib_uverbs_event_file, ref); - spin_lock_irq(&file->lock); - if (file->fd != -1) { - file->fd = -1; - list_for_each_entry_safe(entry, tmp, &file->event_list, list) - kfree(entry); - } - spin_unlock_irq(&file->lock); + kfree(file); } static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) @@ -280,21 +355,30 @@ static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) static int ib_uverbs_event_close(struct inode *inode, struct file *filp) { struct ib_uverbs_event_file *file = filp->private_data; + struct ib_uverbs_event *entry, *tmp; + + spin_lock_irq(&file->lock); + file->file = NULL; + list_for_each_entry_safe(entry, tmp, &file->event_list, list) { + if (entry->counter) + list_del(&entry->obj_list); + kfree(entry); + } + spin_unlock_irq(&file->lock); - ib_uverbs_event_release(file); ib_uverbs_event_fasync(-1, filp, 0); - kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); + + if (file->is_async) { + ib_unregister_event_handler(&file->uverbs_file->event_handler); + kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); + } + kref_put(&file->ref, ib_uverbs_release_event_file); return 0; } static struct file_operations uverbs_event_fops = { - /* - * No .owner field since we artificially create event files, - * so there is no increment to the module reference count in - * the open path. All event files come from a uverbs command - * file, which already takes a module reference, so this is OK. - */ + .owner = THIS_MODULE, .read = ib_uverbs_event_read, .poll = ib_uverbs_event_poll, .release = ib_uverbs_event_close, @@ -303,27 +387,37 @@ static struct file_operations uverbs_event_fops = { void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) { - struct ib_uverbs_file *file = cq_context; - struct ib_ucq_object *uobj; - struct ib_uverbs_event *entry; - unsigned long flags; + struct ib_uverbs_event_file *file = cq_context; + struct ib_ucq_object *uobj; + struct ib_uverbs_event *entry; + unsigned long flags; + + if (!file) + return; + + spin_lock_irqsave(&file->lock, flags); + if (!file->file) { + spin_unlock_irqrestore(&file->lock, flags); + return; + } entry = kmalloc(sizeof *entry, GFP_ATOMIC); - if (!entry) + if (!entry) { + spin_unlock_irqrestore(&file->lock, flags); return; + } uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); entry->desc.comp.cq_handle = cq->uobject->user_handle; entry->counter = &uobj->comp_events_reported; - spin_lock_irqsave(&file->comp_file[0].lock, flags); - list_add_tail(&entry->list, &file->comp_file[0].event_list); + list_add_tail(&entry->list, &file->event_list); list_add_tail(&entry->obj_list, &uobj->comp_list); - spin_unlock_irqrestore(&file->comp_file[0].lock, flags); + spin_unlock_irqrestore(&file->lock, flags); - wake_up_interruptible(&file->comp_file[0].poll_wait); - kill_fasync(&file->comp_file[0].async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&file->poll_wait); + kill_fasync(&file->async_queue, SIGIO, POLL_IN); } static void ib_uverbs_async_handler(struct ib_uverbs_file *file, @@ -334,32 +428,37 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, struct ib_uverbs_event *entry; unsigned long flags; + spin_lock_irqsave(&file->async_file->lock, flags); + if (!file->async_file->file) { + spin_unlock_irqrestore(&file->async_file->lock, flags); + return; + } + entry = kmalloc(sizeof *entry, GFP_ATOMIC); - if (!entry) + if (!entry) { + spin_unlock_irqrestore(&file->async_file->lock, flags); return; + } entry->desc.async.element = element; entry->desc.async.event_type = event; entry->counter = counter; - spin_lock_irqsave(&file->async_file.lock, flags); - list_add_tail(&entry->list, &file->async_file.event_list); + list_add_tail(&entry->list, &file->async_file->event_list); if (obj_list) list_add_tail(&entry->obj_list, obj_list); - spin_unlock_irqrestore(&file->async_file.lock, flags); + spin_unlock_irqrestore(&file->async_file->lock, flags); - wake_up_interruptible(&file->async_file.poll_wait); - kill_fasync(&file->async_file.async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&file->async_file->poll_wait); + kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN); } void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) { - struct ib_ucq_object *uobj; + struct ib_ucq_object *uobj = container_of(event->element.cq->uobject, + struct ib_ucq_object, uobject); - uobj = container_of(event->element.cq->uobject, - struct ib_ucq_object, uobject); - - ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, + ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle, event->event, &uobj->async_list, &uobj->async_events_reported); @@ -389,8 +488,8 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) &uobj->events_reported); } -static void ib_uverbs_event_handler(struct ib_event_handler *handler, - struct ib_event *event) +void ib_uverbs_event_handler(struct ib_event_handler *handler, + struct ib_event *event) { struct ib_uverbs_file *file = container_of(handler, struct ib_uverbs_file, event_handler); @@ -399,38 +498,90 @@ static void ib_uverbs_event_handler(struct ib_event_handler *handler, NULL, NULL); } -static int ib_uverbs_event_init(struct ib_uverbs_event_file *file, - struct ib_uverbs_file *uverbs_file) +struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, + int is_async, int *fd) { + struct ib_uverbs_event_file *ev_file; struct file *filp; + int ret; - spin_lock_init(&file->lock); - INIT_LIST_HEAD(&file->event_list); - init_waitqueue_head(&file->poll_wait); - file->uverbs_file = uverbs_file; - file->async_queue = NULL; - - file->fd = get_unused_fd(); - if (file->fd < 0) - return file->fd; + ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL); + if (!ev_file) + return ERR_PTR(-ENOMEM); + + kref_init(&ev_file->ref); + spin_lock_init(&ev_file->lock); + INIT_LIST_HEAD(&ev_file->event_list); + init_waitqueue_head(&ev_file->poll_wait); + ev_file->uverbs_file = uverbs_file; + ev_file->async_queue = NULL; + ev_file->is_async = is_async; + + *fd = get_unused_fd(); + if (*fd < 0) { + ret = *fd; + goto err; + } filp = get_empty_filp(); if (!filp) { - put_unused_fd(file->fd); - return -ENFILE; + ret = -ENFILE; + goto err_fd; } - filp->f_op = &uverbs_event_fops; + ev_file->file = filp; + + /* + * fops_get() can't fail here, because we're coming from a + * system call on a uverbs file, which will already have a + * module reference. + */ + filp->f_op = fops_get(&uverbs_event_fops); filp->f_vfsmnt = mntget(uverbs_event_mnt); filp->f_dentry = dget(uverbs_event_mnt->mnt_root); filp->f_mapping = filp->f_dentry->d_inode->i_mapping; filp->f_flags = O_RDONLY; filp->f_mode = FMODE_READ; - filp->private_data = file; + filp->private_data = ev_file; - fd_install(file->fd, filp); + return filp; - return 0; +err_fd: + put_unused_fd(*fd); + +err: + kfree(ev_file); + return ERR_PTR(ret); +} + +/* + * Look up a completion event file by FD. If lookup is successful, + * takes a ref to the event file struct that it returns; if + * unsuccessful, returns NULL. + */ +struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) +{ + struct ib_uverbs_event_file *ev_file = NULL; + struct file *filp; + + filp = fget(fd); + if (!filp) + return NULL; + + if (filp->f_op != &uverbs_event_fops) + goto out; + + ev_file = filp->private_data; + if (ev_file->is_async) { + ev_file = NULL; + goto out; + } + + kref_get(&ev_file->ref); + +out: + fput(filp); + return ev_file; } static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, @@ -448,11 +599,13 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if (hdr.in_words * 4 != count) return -EINVAL; - if (hdr.command < 0 || hdr.command >= ARRAY_SIZE(uverbs_cmd_table)) + if (hdr.command < 0 || + hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || + !uverbs_cmd_table[hdr.command] || + !(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command))) return -EINVAL; - if (!file->ucontext && - hdr.command != IB_USER_VERBS_CMD_QUERY_PARAMS && + if (!file->ucontext && hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT) return -EINVAL; @@ -472,60 +625,45 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) static int ib_uverbs_open(struct inode *inode, struct file *filp) { - struct ib_uverbs_device *dev = - container_of(inode->i_cdev, struct ib_uverbs_device, dev); + struct ib_uverbs_device *dev; struct ib_uverbs_file *file; - int i = 0; int ret; - if (!try_module_get(dev->ib_dev->owner)) - return -ENODEV; - - file = kmalloc(sizeof *file + - (dev->num_comp - 1) * sizeof (struct ib_uverbs_event_file), - GFP_KERNEL); - if (!file) - return -ENOMEM; - - file->device = dev; - kref_init(&file->ref); + spin_lock(&map_lock); + dev = dev_table[iminor(inode) - IB_UVERBS_BASE_MINOR]; + if (dev) + kref_get(&dev->ref); + spin_unlock(&map_lock); - file->ucontext = NULL; + if (!dev) + return -ENXIO; - ret = ib_uverbs_event_init(&file->async_file, file); - if (ret) + if (!try_module_get(dev->ib_dev->owner)) { + ret = -ENODEV; goto err; + } - file->async_file.is_async = 1; - - kref_get(&file->ref); - - for (i = 0; i < dev->num_comp; ++i) { - ret = ib_uverbs_event_init(&file->comp_file[i], file); - if (ret) - goto err_async; - kref_get(&file->ref); - file->comp_file[i].is_async = 0; + file = kmalloc(sizeof *file, GFP_KERNEL); + if (!file) { + ret = -ENOMEM; + goto err_module; } + file->device = dev; + file->ucontext = NULL; + file->async_file = NULL; + kref_init(&file->ref); + init_MUTEX(&file->mutex); filp->private_data = file; - INIT_IB_EVENT_HANDLER(&file->event_handler, dev->ib_dev, - ib_uverbs_event_handler); - if (ib_register_event_handler(&file->event_handler)) - goto err_async; - return 0; -err_async: - while (i--) - ib_uverbs_event_release(&file->comp_file[i]); - - ib_uverbs_event_release(&file->async_file); +err_module: + module_put(dev->ib_dev->owner); err: - kref_put(&file->ref, ib_uverbs_release_file); + kref_put(&dev->ref, ib_uverbs_release_dev); return ret; } @@ -533,14 +671,11 @@ err: static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; - int i; - ib_unregister_event_handler(&file->event_handler); - ib_uverbs_event_release(&file->async_file); - ib_dealloc_ucontext(file->ucontext); + ib_uverbs_cleanup_ucontext(file, file->ucontext); - for (i = 0; i < file->device->num_comp; ++i) - ib_uverbs_event_release(&file->comp_file[i]); + if (file->async_file) + kref_put(&file->async_file->ref, ib_uverbs_release_event_file); kref_put(&file->ref, ib_uverbs_release_file); @@ -570,27 +705,25 @@ static struct ib_client uverbs_client = { static ssize_t show_ibdev(struct class_device *class_dev, char *buf) { - struct ib_uverbs_device *dev = - container_of(class_dev, struct ib_uverbs_device, class_dev); + struct ib_uverbs_device *dev = class_get_devdata(class_dev); + + if (!dev) + return -ENODEV; return sprintf(buf, "%s\n", dev->ib_dev->name); } static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); -static void ib_uverbs_release_class_dev(struct class_device *class_dev) +static ssize_t show_dev_abi_version(struct class_device *class_dev, char *buf) { - struct ib_uverbs_device *dev = - container_of(class_dev, struct ib_uverbs_device, class_dev); + struct ib_uverbs_device *dev = class_get_devdata(class_dev); - cdev_del(&dev->dev); - clear_bit(dev->devnum, dev_map); - kfree(dev); -} + if (!dev) + return -ENODEV; -static struct class uverbs_class = { - .name = "infiniband_verbs", - .release = ib_uverbs_release_class_dev -}; + return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver); +} +static CLASS_DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); static ssize_t show_abi_version(struct class *class, char *buf) { @@ -605,11 +738,11 @@ static void ib_uverbs_add_one(struct ib_device *device) if (!device->alloc_ucontext) return; - uverbs_dev = kmalloc(sizeof *uverbs_dev, GFP_KERNEL); + uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL); if (!uverbs_dev) return; - memset(uverbs_dev, 0, sizeof *uverbs_dev); + kref_init(&uverbs_dev->ref); spin_lock(&map_lock); uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); @@ -620,41 +753,49 @@ static void ib_uverbs_add_one(struct ib_device *device) set_bit(uverbs_dev->devnum, dev_map); spin_unlock(&map_lock); - uverbs_dev->ib_dev = device; - uverbs_dev->num_comp = 1; + uverbs_dev->ib_dev = device; + uverbs_dev->num_comp_vectors = 1; - if (device->mmap) - cdev_init(&uverbs_dev->dev, &uverbs_mmap_fops); - else - cdev_init(&uverbs_dev->dev, &uverbs_fops); - uverbs_dev->dev.owner = THIS_MODULE; - kobject_set_name(&uverbs_dev->dev.kobj, "uverbs%d", uverbs_dev->devnum); - if (cdev_add(&uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1)) + uverbs_dev->dev = cdev_alloc(); + if (!uverbs_dev->dev) goto err; + uverbs_dev->dev->owner = THIS_MODULE; + uverbs_dev->dev->ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; + kobject_set_name(&uverbs_dev->dev->kobj, "uverbs%d", uverbs_dev->devnum); + if (cdev_add(uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1)) + goto err_cdev; - uverbs_dev->class_dev.class = &uverbs_class; - uverbs_dev->class_dev.dev = device->dma_device; - uverbs_dev->class_dev.devt = uverbs_dev->dev.dev; - snprintf(uverbs_dev->class_dev.class_id, BUS_ID_SIZE, "uverbs%d", uverbs_dev->devnum); - if (class_device_register(&uverbs_dev->class_dev)) + uverbs_dev->class_dev = class_device_create(uverbs_class, NULL, + uverbs_dev->dev->dev, + device->dma_device, + "uverbs%d", uverbs_dev->devnum); + if (IS_ERR(uverbs_dev->class_dev)) goto err_cdev; - if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_ibdev)) + class_set_devdata(uverbs_dev->class_dev, uverbs_dev); + + if (class_device_create_file(uverbs_dev->class_dev, &class_device_attr_ibdev)) + goto err_class; + if (class_device_create_file(uverbs_dev->class_dev, &class_device_attr_abi_version)) goto err_class; + spin_lock(&map_lock); + dev_table[uverbs_dev->devnum] = uverbs_dev; + spin_unlock(&map_lock); + ib_set_client_data(device, &uverbs_client, uverbs_dev); return; err_class: - class_device_unregister(&uverbs_dev->class_dev); + class_device_destroy(uverbs_class, uverbs_dev->dev->dev); err_cdev: - cdev_del(&uverbs_dev->dev); + cdev_del(uverbs_dev->dev); clear_bit(uverbs_dev->devnum, dev_map); err: - kfree(uverbs_dev); + kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); return; } @@ -665,7 +806,16 @@ static void ib_uverbs_remove_one(struct ib_device *device) if (!uverbs_dev) return; - class_device_unregister(&uverbs_dev->class_dev); + class_set_devdata(uverbs_dev->class_dev, NULL); + class_device_destroy(uverbs_class, uverbs_dev->dev->dev); + cdev_del(uverbs_dev->dev); + + spin_lock(&map_lock); + dev_table[uverbs_dev->devnum] = NULL; + spin_unlock(&map_lock); + + clear_bit(uverbs_dev->devnum, dev_map); + kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); } static struct super_block *uverbs_event_get_sb(struct file_system_type *fs_type, int flags, @@ -695,13 +845,14 @@ static int __init ib_uverbs_init(void) goto out; } - ret = class_register(&uverbs_class); - if (ret) { + uverbs_class = class_create(THIS_MODULE, "infiniband_verbs"); + if (IS_ERR(uverbs_class)) { + ret = PTR_ERR(uverbs_class); printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n"); goto out_chrdev; } - ret = class_create_file(&uverbs_class, &class_attr_abi_version); + ret = class_create_file(uverbs_class, &class_attr_abi_version); if (ret) { printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n"); goto out_class; @@ -735,7 +886,7 @@ out_fs: unregister_filesystem(&uverbs_event_fs); out_class: - class_unregister(&uverbs_class); + class_destroy(uverbs_class); out_chrdev: unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); @@ -749,8 +900,15 @@ static void __exit ib_uverbs_cleanup(void) ib_unregister_client(&uverbs_client); mntput(uverbs_event_mnt); unregister_filesystem(&uverbs_event_fs); - class_unregister(&uverbs_class); + class_destroy(uverbs_class); unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); + idr_destroy(&ib_uverbs_pd_idr); + idr_destroy(&ib_uverbs_mr_idr); + idr_destroy(&ib_uverbs_mw_idr); + idr_destroy(&ib_uverbs_ah_idr); + idr_destroy(&ib_uverbs_cq_idr); + idr_destroy(&ib_uverbs_qp_idr); + idr_destroy(&ib_uverbs_srq_idr); } module_init(ib_uverbs_init); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 5081d903e56..4c15e112736 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -40,6 +40,7 @@ #include <linux/errno.h> #include <linux/err.h> +#include <linux/string.h> #include <rdma/ib_verbs.h> #include <rdma/ib_cache.h> @@ -324,16 +325,8 @@ EXPORT_SYMBOL(ib_destroy_cq); int ib_resize_cq(struct ib_cq *cq, int cqe) { - int ret; - - if (!cq->device->resize_cq) - return -ENOSYS; - - ret = cq->device->resize_cq(cq, &cqe); - if (!ret) - cq->cqe = cqe; - - return ret; + return cq->device->resize_cq ? + cq->device->resize_cq(cq, cqe) : -ENOSYS; } EXPORT_SYMBOL(ib_resize_cq); @@ -523,16 +516,22 @@ EXPORT_SYMBOL(ib_dealloc_fmr); int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { - return qp->device->attach_mcast ? - qp->device->attach_mcast(qp, gid, lid) : - -ENOSYS; + if (!qp->device->attach_mcast) + return -ENOSYS; + if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) + return -EINVAL; + + return qp->device->attach_mcast(qp, gid, lid); } EXPORT_SYMBOL(ib_attach_mcast); int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { - return qp->device->detach_mcast ? - qp->device->detach_mcast(qp, gid, lid) : - -ENOSYS; + if (!qp->device->detach_mcast) + return -ENOSYS; + if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) + return -EINVAL; + + return qp->device->detach_mcast(qp, gid, lid); } EXPORT_SYMBOL(ib_detach_mcast); diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile index c44f7bae542..47ec5a7cba0 100644 --- a/drivers/infiniband/hw/mthca/Makefile +++ b/drivers/infiniband/hw/mthca/Makefile @@ -7,4 +7,5 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \ mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \ mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \ - mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o + mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o \ + mthca_catas.o diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index 889e8509673..22fdc446f25 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -34,6 +34,8 @@ */ #include <linux/init.h> +#include <linux/string.h> +#include <linux/slab.h> #include <rdma/ib_verbs.h> #include <rdma/ib_cache.h> diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c new file mode 100644 index 00000000000..c3bec7490f5 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include <linux/jiffies.h> +#include <linux/timer.h> + +#include "mthca_dev.h" + +enum { + MTHCA_CATAS_POLL_INTERVAL = 5 * HZ, + + MTHCA_CATAS_TYPE_INTERNAL = 0, + MTHCA_CATAS_TYPE_UPLINK = 3, + MTHCA_CATAS_TYPE_DDR = 4, + MTHCA_CATAS_TYPE_PARITY = 5, +}; + +static DEFINE_SPINLOCK(catas_lock); + +static void handle_catas(struct mthca_dev *dev) +{ + struct ib_event event; + const char *type; + int i; + + event.device = &dev->ib_dev; + event.event = IB_EVENT_DEVICE_FATAL; + event.element.port_num = 0; + + ib_dispatch_event(&event); + + switch (swab32(readl(dev->catas_err.map)) >> 24) { + case MTHCA_CATAS_TYPE_INTERNAL: + type = "internal error"; + break; + case MTHCA_CATAS_TYPE_UPLINK: + type = "uplink bus error"; + break; + case MTHCA_CATAS_TYPE_DDR: + type = "DDR data error"; + break; + case MTHCA_CATAS_TYPE_PARITY: + type = "internal parity error"; + break; + default: + type = "unknown error"; + break; + } + + mthca_err(dev, "Catastrophic error detected: %s\n", type); + for (i = 0; i < dev->catas_err.size; ++i) + mthca_err(dev, " buf[%02x]: %08x\n", + i, swab32(readl(dev->catas_err.map + i))); +} + +static void poll_catas(unsigned long dev_ptr) +{ + struct mthca_dev *dev = (struct mthca_dev *) dev_ptr; + unsigned long flags; + int i; + + for (i = 0; i < dev->catas_err.size; ++i) + if (readl(dev->catas_err.map + i)) { + handle_catas(dev); + return; + } + + spin_lock_irqsave(&catas_lock, flags); + if (!dev->catas_err.stop) + mod_timer(&dev->catas_err.timer, + jiffies + MTHCA_CATAS_POLL_INTERVAL); + spin_unlock_irqrestore(&catas_lock, flags); + + return; +} + +void mthca_start_catas_poll(struct mthca_dev *dev) +{ + unsigned long addr; + + init_timer(&dev->catas_err.timer); + dev->catas_err.stop = 0; + dev->catas_err.map = NULL; + + addr = pci_resource_start(dev->pdev, 0) + + ((pci_resource_len(dev->pdev, 0) - 1) & + dev->catas_err.addr); + + if (!request_mem_region(addr, dev->catas_err.size * 4, + DRV_NAME)) { + mthca_warn(dev, "couldn't request catastrophic error region " + "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4); + return; + } + + dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4); + if (!dev->catas_err.map) { + mthca_warn(dev, "couldn't map catastrophic error region " + "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4); + release_mem_region(addr, dev->catas_err.size * 4); + return; + } + + dev->catas_err.timer.data = (unsigned long) dev; + dev->catas_err.timer.function = poll_catas; + dev->catas_err.timer.expires = jiffies + MTHCA_CATAS_POLL_INTERVAL; + add_timer(&dev->catas_err.timer); +} + +void mthca_stop_catas_poll(struct mthca_dev *dev) +{ + spin_lock_irq(&catas_lock); + dev->catas_err.stop = 1; + spin_unlock_irq(&catas_lock); + + del_timer_sync(&dev->catas_err.timer); + + if (dev->catas_err.map) { + iounmap(dev->catas_err.map); + release_mem_region(pci_resource_start(dev->pdev, 0) + + ((pci_resource_len(dev->pdev, 0) - 1) & + dev->catas_err.addr), + dev->catas_err.size * 4); + } +} diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index cc758a2d2bc..9ed34587fc5 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -524,7 +525,7 @@ void mthca_cmd_use_polling(struct mthca_dev *dev) } struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev, - unsigned int gfp_mask) + gfp_t gfp_mask) { struct mthca_mailbox *mailbox; @@ -605,7 +606,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, err = -EINVAL; goto out; } - for (i = 0; i < mthca_icm_size(&iter) / (1 << lg); ++i, ++nent) { + for (i = 0; i < mthca_icm_size(&iter) / (1 << lg); ++i) { if (virt != -1) { pages[nent * 2] = cpu_to_be64(virt); virt += 1 << lg; @@ -616,7 +617,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, ts += 1 << (lg - 10); ++tc; - if (nent == MTHCA_MAILBOX_SIZE / 16) { + if (++nent == MTHCA_MAILBOX_SIZE / 16) { err = mthca_cmd(dev, mailbox->dma, nent, 0, op, CMD_TIME_CLASS_B, status); if (err || *status) @@ -706,9 +707,13 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET); dev->cmd.max_cmds = 1 << lg; + MTHCA_GET(dev->catas_err.addr, outbox, QUERY_FW_ERR_START_OFFSET); + MTHCA_GET(dev->catas_err.size, outbox, QUERY_FW_ERR_SIZE_OFFSET); mthca_dbg(dev, "FW version %012llx, max commands %d\n", (unsigned long long) dev->fw_ver, dev->cmd.max_cmds); + mthca_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x\n", + (unsigned long long) dev->catas_err.addr, dev->catas_err.size); if (mthca_is_memfree(dev)) { MTHCA_GET(dev->fw.arbel.fw_pages, outbox, QUERY_FW_SIZE_OFFSET); @@ -933,9 +938,9 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, goto out; MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET); - dev_lim->max_srq_sz = 1 << field; + dev_lim->max_srq_sz = (1 << field) - 1; MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET); - dev_lim->max_qp_sz = 1 << field; + dev_lim->max_qp_sz = (1 << field) - 1; MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET); dev_lim->reserved_qps = 1 << (field & 0xf); MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET); @@ -1045,6 +1050,8 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, dev_lim->max_pds, dev_lim->reserved_pds, dev_lim->reserved_uars); mthca_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n", dev_lim->max_pds, dev_lim->reserved_mgms); + mthca_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n", + dev_lim->max_cq_sz, dev_lim->max_qp_sz, dev_lim->max_srq_sz); mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags); @@ -1053,6 +1060,8 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, dev_lim->hca.arbel.resize_srq = field & 1; MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET); dev_lim->max_sg = min_t(int, field, dev_lim->max_sg); + MTHCA_GET(size, outbox, QUERY_DEV_LIM_MAX_DESC_SZ_RQ_OFFSET); + dev_lim->max_desc_sz = min_t(int, size, dev_lim->max_desc_sz); MTHCA_GET(size, outbox, QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET); dev_lim->mpt_entry_sz = size; MTHCA_GET(field, outbox, QUERY_DEV_LIM_PBL_SZ_OFFSET); diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h index 65f976a13e0..18175bec84c 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.h +++ b/drivers/infiniband/hw/mthca/mthca_cmd.h @@ -248,7 +248,7 @@ void mthca_cmd_event(struct mthca_dev *dev, u16 token, u8 status, u64 out_param); struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev, - unsigned int gfp_mask); + gfp_t gfp_mask); void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox); int mthca_SYS_EN(struct mthca_dev *dev, u8 *status); diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 8600b6c3e0c..4a8adcef207 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -208,7 +208,7 @@ static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq, } } -void mthca_cq_event(struct mthca_dev *dev, u32 cqn) +void mthca_cq_completion(struct mthca_dev *dev, u32 cqn) { struct mthca_cq *cq; @@ -224,12 +224,41 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } +void mthca_cq_event(struct mthca_dev *dev, u32 cqn, + enum ib_event_type event_type) +{ + struct mthca_cq *cq; + struct ib_event event; + + spin_lock(&dev->cq_table.lock); + + cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1)); + + if (cq) + atomic_inc(&cq->refcount); + spin_unlock(&dev->cq_table.lock); + + if (!cq) { + mthca_warn(dev, "Async event for bogus CQ %08x\n", cqn); + return; + } + + event.device = &dev->ib_dev; + event.event = event_type; + event.element.cq = &cq->ibcq; + if (cq->ibcq.event_handler) + cq->ibcq.event_handler(&event, cq->ibcq.cq_context); + + if (atomic_dec_and_test(&cq->refcount)) + wake_up(&cq->wait); +} + void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, struct mthca_srq *srq) { struct mthca_cq *cq; struct mthca_cqe *cqe; - int prod_index; + u32 prod_index; int nfreed = 0; spin_lock_irq(&dev->cq_table.lock); @@ -264,19 +293,15 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, * Now sweep backwards through the CQ, removing CQ entries * that match our QP by copying older entries on top of them. */ - while (prod_index > cq->cons_index) { - cqe = get_cqe(cq, (prod_index - 1) & cq->ibcq.cqe); + while ((int) --prod_index - (int) cq->cons_index >= 0) { + cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); if (cqe->my_qpn == cpu_to_be32(qpn)) { if (srq) mthca_free_srq_wqe(srq, be32_to_cpu(cqe->wqe)); ++nfreed; - } - else if (nfreed) - memcpy(get_cqe(cq, (prod_index - 1 + nfreed) & - cq->ibcq.cqe), - cqe, - MTHCA_CQ_ENTRY_SIZE); - --prod_index; + } else if (nfreed) + memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe), + cqe, MTHCA_CQ_ENTRY_SIZE); } if (nfreed) { diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 7bff5a8425f..497ff794ef6 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -83,6 +83,8 @@ enum { /* Arbel FW gives us these, but we need them for Tavor */ MTHCA_MPT_ENTRY_SIZE = 0x40, MTHCA_MTT_SEG_SIZE = 0x40, + + MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2) }; enum { @@ -128,12 +130,17 @@ struct mthca_limits { int num_uars; int max_sg; int num_qps; + int max_wqes; + int max_desc_sz; + int max_qp_init_rdma; int reserved_qps; int num_srqs; + int max_srq_wqes; int reserved_srqs; int num_eecs; int reserved_eecs; int num_cqs; + int max_cqes; int reserved_cqs; int num_eqs; int reserved_eqs; @@ -148,6 +155,8 @@ struct mthca_limits { int reserved_mcgs; int num_pds; int reserved_pds; + u32 page_size_cap; + u32 flags; u8 port_width_cap; }; @@ -251,6 +260,14 @@ struct mthca_mcg_table { struct mthca_icm_table *table; }; +struct mthca_catas_err { + u64 addr; + u32 __iomem *map; + unsigned long stop; + u32 size; + struct timer_list timer; +}; + struct mthca_dev { struct ib_device ib_dev; struct pci_dev *pdev; @@ -311,6 +328,8 @@ struct mthca_dev { struct mthca_av_table av_table; struct mthca_mcg_table mcg_table; + struct mthca_catas_err catas_err; + struct mthca_uar driver_uar; struct mthca_db_table *db_tab; struct mthca_pd driver_pd; @@ -398,6 +417,9 @@ void mthca_cleanup_mcg_table(struct mthca_dev *dev); int mthca_register_device(struct mthca_dev *dev); void mthca_unregister_device(struct mthca_dev *dev); +void mthca_start_catas_poll(struct mthca_dev *dev); +void mthca_stop_catas_poll(struct mthca_dev *dev); + int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar); void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar); @@ -440,13 +462,17 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, struct mthca_cq *cq); void mthca_free_cq(struct mthca_dev *dev, struct mthca_cq *cq); -void mthca_cq_event(struct mthca_dev *dev, u32 cqn); +void mthca_cq_completion(struct mthca_dev *dev, u32 cqn); +void mthca_cq_event(struct mthca_dev *dev, u32 cqn, + enum ib_event_type event_type); void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, struct mthca_srq *srq); int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, struct ib_srq_attr *attr, struct mthca_srq *srq); void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); +int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask); void mthca_srq_event(struct mthca_dev *dev, u32 srqn, enum ib_event_type event_type); void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 78152a8ad17..34d68e5a72d 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -83,7 +83,8 @@ enum { MTHCA_EVENT_TYPE_PATH_MIG = 0x01, MTHCA_EVENT_TYPE_COMM_EST = 0x02, MTHCA_EVENT_TYPE_SQ_DRAINED = 0x03, - MTHCA_EVENT_TYPE_SRQ_LAST_WQE = 0x13, + MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13, + MTHCA_EVENT_TYPE_SRQ_LIMIT = 0x14, MTHCA_EVENT_TYPE_CQ_ERROR = 0x04, MTHCA_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, MTHCA_EVENT_TYPE_EEC_CATAS_ERROR = 0x06, @@ -110,8 +111,9 @@ enum { (1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR) | \ (1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE) | \ (1ULL << MTHCA_EVENT_TYPE_ECC_DETECT)) -#define MTHCA_SRQ_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \ - (1ULL << MTHCA_EVENT_TYPE_SRQ_LAST_WQE) +#define MTHCA_SRQ_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \ + (1ULL << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE) | \ + (1ULL << MTHCA_EVENT_TYPE_SRQ_LIMIT)) #define MTHCA_CMD_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_CMD) #define MTHCA_EQ_DB_INC_CI (1 << 24) @@ -142,6 +144,9 @@ struct mthca_eqe { __be32 qpn; } __attribute__((packed)) qp; struct { + __be32 srqn; + } __attribute__((packed)) srq; + struct { __be32 cqn; u32 reserved1; u8 reserved2[3]; @@ -287,7 +292,7 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq) case MTHCA_EVENT_TYPE_COMP: disarm_cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff; disarm_cq(dev, eq->eqn, disarm_cqn); - mthca_cq_event(dev, disarm_cqn); + mthca_cq_completion(dev, disarm_cqn); break; case MTHCA_EVENT_TYPE_PATH_MIG: @@ -305,6 +310,16 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq) IB_EVENT_SQ_DRAINED); break; + case MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE: + mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_QP_LAST_WQE_REACHED); + break; + + case MTHCA_EVENT_TYPE_SRQ_LIMIT: + mthca_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff, + IB_EVENT_SRQ_LIMIT_REACHED); + break; + case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR: mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, IB_EVENT_QP_FATAL); @@ -349,6 +364,8 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq) eqe->event.cq_err.syndrome == 1 ? "overrun" : "access violation", be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff); + mthca_cq_event(dev, be32_to_cpu(eqe->event.cq_err.cqn), + IB_EVENT_CQ_ERR); break; case MTHCA_EVENT_TYPE_EQ_OVERFLOW: @@ -396,20 +413,21 @@ static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr, struct pt_regs writel(dev->eq_table.clr_mask, dev->eq_table.clr_int); ecr = readl(dev->eq_regs.tavor.ecr_base + 4); - if (ecr) { - writel(ecr, dev->eq_regs.tavor.ecr_base + - MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4); + if (!ecr) + return IRQ_NONE; - for (i = 0; i < MTHCA_NUM_EQ; ++i) - if (ecr & dev->eq_table.eq[i].eqn_mask && - mthca_eq_int(dev, &dev->eq_table.eq[i])) { + writel(ecr, dev->eq_regs.tavor.ecr_base + + MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4); + + for (i = 0; i < MTHCA_NUM_EQ; ++i) + if (ecr & dev->eq_table.eq[i].eqn_mask) { + if (mthca_eq_int(dev, &dev->eq_table.eq[i])) tavor_set_eq_ci(dev, &dev->eq_table.eq[i], dev->eq_table.eq[i].cons_index); - tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn); - } - } + tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn); + } - return IRQ_RETVAL(ecr); + return IRQ_HANDLED; } static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr, @@ -836,7 +854,7 @@ int __devinit mthca_init_eq_table(struct mthca_dev *dev) dev->eq_table.clr_mask = swab32(1 << (dev->eq_table.inta_pin & 31)); dev->eq_table.clr_int = dev->clr_base + - (dev->eq_table.inta_pin < 31 ? 4 : 0); + (dev->eq_table.inta_pin < 32 ? 4 : 0); } dev->eq_table.arm_mask = 0; diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 9804174f7f3..1229c604c6e 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -34,6 +34,9 @@ * $Id: mthca_mad.c 1349 2004-12-16 21:09:43Z roland $ */ +#include <linux/string.h> +#include <linux/slab.h> + #include <rdma/ib_verbs.h> #include <rdma/ib_mad.h> #include <rdma/ib_smi.h> @@ -46,11 +49,6 @@ enum { MTHCA_VENDOR_CLASS2 = 0xa }; -struct mthca_trap_mad { - struct ib_mad *mad; - DECLARE_PCI_UNMAP_ADDR(mapping) -}; - static void update_sm_ah(struct mthca_dev *dev, u8 port_num, u16 lid, u8 sl) { @@ -116,49 +114,14 @@ static void forward_trap(struct mthca_dev *dev, struct ib_mad *mad) { int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED; - struct mthca_trap_mad *tmad; - struct ib_sge gather_list; - struct ib_send_wr *bad_wr, wr = { - .opcode = IB_WR_SEND, - .sg_list = &gather_list, - .num_sge = 1, - .send_flags = IB_SEND_SIGNALED, - .wr = { - .ud = { - .remote_qpn = qpn, - .remote_qkey = qpn ? IB_QP1_QKEY : 0, - .timeout_ms = 0 - } - } - }; + struct ib_mad_send_buf *send_buf; struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn]; int ret; unsigned long flags; if (agent) { - tmad = kmalloc(sizeof *tmad, GFP_KERNEL); - if (!tmad) - return; - - tmad->mad = kmalloc(sizeof *tmad->mad, GFP_KERNEL); - if (!tmad->mad) { - kfree(tmad); - return; - } - - memcpy(tmad->mad, mad, sizeof *mad); - - wr.wr.ud.mad_hdr = &tmad->mad->mad_hdr; - wr.wr_id = (unsigned long) tmad; - - gather_list.addr = dma_map_single(agent->device->dma_device, - tmad->mad, - sizeof *tmad->mad, - DMA_TO_DEVICE); - gather_list.length = sizeof *tmad->mad; - gather_list.lkey = to_mpd(agent->qp->pd)->ntmr.ibmr.lkey; - pci_unmap_addr_set(tmad, mapping, gather_list.addr); - + send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR, + IB_MGMT_MAD_DATA, GFP_ATOMIC); /* * We rely here on the fact that MLX QPs don't use the * address handle after the send is posted (this is @@ -166,21 +129,15 @@ static void forward_trap(struct mthca_dev *dev, * it's OK for our devices). */ spin_lock_irqsave(&dev->sm_lock, flags); - wr.wr.ud.ah = dev->sm_ah[port_num - 1]; - if (wr.wr.ud.ah) - ret = ib_post_send_mad(agent, &wr, &bad_wr); + memcpy(send_buf->mad, mad, sizeof *mad); + if ((send_buf->ah = dev->sm_ah[port_num - 1])) + ret = ib_post_send_mad(send_buf, NULL); else ret = -EINVAL; spin_unlock_irqrestore(&dev->sm_lock, flags); - if (ret) { - dma_unmap_single(agent->device->dma_device, - pci_unmap_addr(tmad, mapping), - sizeof *tmad->mad, - DMA_TO_DEVICE); - kfree(tmad->mad); - kfree(tmad); - } + if (ret) + ib_free_send_mad(send_buf); } } @@ -267,15 +224,7 @@ int mthca_process_mad(struct ib_device *ibdev, static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { - struct mthca_trap_mad *tmad = - (void *) (unsigned long) mad_send_wc->wr_id; - - dma_unmap_single(agent->device->dma_device, - pci_unmap_addr(tmad, mapping), - sizeof *tmad->mad, - DMA_TO_DEVICE); - kfree(tmad->mad); - kfree(tmad); + ib_free_send_mad(mad_send_wc->send_buf); } int mthca_create_agents(struct mthca_dev *dev) diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index ffbcd40418d..6f94b25f3ac 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -162,9 +162,19 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim mdev->limits.pkey_table_len = dev_lim->max_pkeys; mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay; mdev->limits.max_sg = dev_lim->max_sg; + mdev->limits.max_wqes = dev_lim->max_qp_sz; + mdev->limits.max_qp_init_rdma = dev_lim->max_requester_per_qp; mdev->limits.reserved_qps = dev_lim->reserved_qps; + mdev->limits.max_srq_wqes = dev_lim->max_srq_sz; mdev->limits.reserved_srqs = dev_lim->reserved_srqs; mdev->limits.reserved_eecs = dev_lim->reserved_eecs; + mdev->limits.max_desc_sz = dev_lim->max_desc_sz; + /* + * Subtract 1 from the limit because we need to allocate a + * spare CQE so the HCA HW can tell the difference between an + * empty CQ and a full CQ. + */ + mdev->limits.max_cqes = dev_lim->max_cq_sz - 1; mdev->limits.reserved_cqs = dev_lim->reserved_cqs; mdev->limits.reserved_eqs = dev_lim->reserved_eqs; mdev->limits.reserved_mtts = dev_lim->reserved_mtts; @@ -172,6 +182,8 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim mdev->limits.reserved_uars = dev_lim->reserved_uars; mdev->limits.reserved_pds = dev_lim->reserved_pds; mdev->limits.port_width_cap = dev_lim->max_port_width; + mdev->limits.page_size_cap = ~(u32) (dev_lim->min_page_sz - 1); + mdev->limits.flags = dev_lim->flags; /* IB_DEVICE_RESIZE_MAX_WR not supported by driver. May be doable since hardware supports it for SRQ. @@ -503,6 +515,25 @@ err_free_aux: return err; } +static void mthca_free_icms(struct mthca_dev *mdev) +{ + u8 status; + + mthca_free_icm_table(mdev, mdev->mcg_table.table); + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + mthca_free_icm_table(mdev, mdev->srq_table.table); + mthca_free_icm_table(mdev, mdev->cq_table.table); + mthca_free_icm_table(mdev, mdev->qp_table.rdb_table); + mthca_free_icm_table(mdev, mdev->qp_table.eqp_table); + mthca_free_icm_table(mdev, mdev->qp_table.qp_table); + mthca_free_icm_table(mdev, mdev->mr_table.mpt_table); + mthca_free_icm_table(mdev, mdev->mr_table.mtt_table); + mthca_unmap_eq_icm(mdev); + + mthca_UNMAP_ICM_AUX(mdev, &status); + mthca_free_icm(mdev, mdev->fw.arbel.aux_icm); +} + static int __devinit mthca_init_arbel(struct mthca_dev *mdev) { struct mthca_dev_lim dev_lim; @@ -580,18 +611,7 @@ static int __devinit mthca_init_arbel(struct mthca_dev *mdev) return 0; err_free_icm: - if (mdev->mthca_flags & MTHCA_FLAG_SRQ) - mthca_free_icm_table(mdev, mdev->srq_table.table); - mthca_free_icm_table(mdev, mdev->cq_table.table); - mthca_free_icm_table(mdev, mdev->qp_table.rdb_table); - mthca_free_icm_table(mdev, mdev->qp_table.eqp_table); - mthca_free_icm_table(mdev, mdev->qp_table.qp_table); - mthca_free_icm_table(mdev, mdev->mr_table.mpt_table); - mthca_free_icm_table(mdev, mdev->mr_table.mtt_table); - mthca_unmap_eq_icm(mdev); - - mthca_UNMAP_ICM_AUX(mdev, &status); - mthca_free_icm(mdev, mdev->fw.arbel.aux_icm); + mthca_free_icms(mdev); err_stop_fw: mthca_UNMAP_FA(mdev, &status); @@ -611,18 +631,7 @@ static void mthca_close_hca(struct mthca_dev *mdev) mthca_CLOSE_HCA(mdev, 0, &status); if (mthca_is_memfree(mdev)) { - if (mdev->mthca_flags & MTHCA_FLAG_SRQ) - mthca_free_icm_table(mdev, mdev->srq_table.table); - mthca_free_icm_table(mdev, mdev->cq_table.table); - mthca_free_icm_table(mdev, mdev->qp_table.rdb_table); - mthca_free_icm_table(mdev, mdev->qp_table.eqp_table); - mthca_free_icm_table(mdev, mdev->qp_table.qp_table); - mthca_free_icm_table(mdev, mdev->mr_table.mpt_table); - mthca_free_icm_table(mdev, mdev->mr_table.mtt_table); - mthca_unmap_eq_icm(mdev); - - mthca_UNMAP_ICM_AUX(mdev, &status); - mthca_free_icm(mdev, mdev->fw.arbel.aux_icm); + mthca_free_icms(mdev); mthca_UNMAP_FA(mdev, &status); mthca_free_icm(mdev, mdev->fw.arbel.fw_icm); @@ -1050,7 +1059,7 @@ static int __devinit mthca_init_one(struct pci_dev *pdev, goto err_cmd; if (mdev->fw_ver < mthca_hca_table[id->driver_data].latest_fw) { - mthca_warn(mdev, "HCA FW version %x.%x.%x is old (%x.%x.%x is current).\n", + mthca_warn(mdev, "HCA FW version %d.%d.%d is old (%d.%d.%d is current).\n", (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff, (int) (mdev->fw_ver & 0xffff), (int) (mthca_hca_table[id->driver_data].latest_fw >> 32), diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c index a2707605f4c..2fc449da418 100644 --- a/drivers/infiniband/hw/mthca/mthca_mcg.c +++ b/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -33,14 +33,12 @@ */ #include <linux/init.h> +#include <linux/string.h> +#include <linux/slab.h> #include "mthca_dev.h" #include "mthca_cmd.h" -enum { - MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2) -}; - struct mthca_mgm { __be32 next_gid_index; u32 reserved[3]; @@ -189,7 +187,12 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } for (i = 0; i < MTHCA_QP_PER_MGM; ++i) - if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) { + if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) { + mthca_dbg(dev, "QP %06x already a member of MGM\n", + ibqp->qp_num); + err = 0; + goto out; + } else if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) { mgm->qp[i] = cpu_to_be32(ibqp->qp_num | (1 << 31)); break; } diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 1827400f189..d72fe95cba0 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -82,7 +82,7 @@ void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm) } struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages, - unsigned int gfp_mask) + gfp_t gfp_mask) { struct mthca_icm *icm; struct mthca_icm_chunk *chunk = NULL; @@ -290,7 +290,7 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, int i; u8 status; - num_icm = obj_size * nobj / MTHCA_TABLE_CHUNK_SIZE; + num_icm = (obj_size * nobj + MTHCA_TABLE_CHUNK_SIZE - 1) / MTHCA_TABLE_CHUNK_SIZE; table = kmalloc(sizeof *table + num_icm * sizeof *table->icm, GFP_KERNEL); if (!table) @@ -487,7 +487,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, } } -int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db) +int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, + u32 qn, __be32 **db) { int group; int start, end, dir; @@ -529,12 +530,25 @@ int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db) goto found; } + for (i = start; i != end; i += dir) + if (!dev->db_tab->page[i].db_rec) { + page = dev->db_tab->page + i; + goto alloc; + } + if (dev->db_tab->max_group1 >= dev->db_tab->min_group2 - 1) { ret = -ENOMEM; goto out; } + if (group == 0) + ++dev->db_tab->max_group1; + else + --dev->db_tab->min_group2; + page = dev->db_tab->page + end; + +alloc: page->db_rec = dma_alloc_coherent(&dev->pdev->dev, 4096, &page->mapping, GFP_KERNEL); if (!page->db_rec) { @@ -554,10 +568,6 @@ int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db) } bitmap_zero(page->used, MTHCA_DB_REC_PER_PAGE); - if (group == 0) - ++dev->db_tab->max_group1; - else - --dev->db_tab->min_group2; found: j = find_first_zero_bit(page->used, MTHCA_DB_REC_PER_PAGE); diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h index bafa51544aa..4fdca26eea8 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.h +++ b/drivers/infiniband/hw/mthca/mthca_memfree.h @@ -77,7 +77,7 @@ struct mthca_icm_iter { struct mthca_dev; struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages, - unsigned int gfp_mask); + gfp_t gfp_mask); void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm); struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, @@ -173,7 +173,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, int mthca_init_db_tab(struct mthca_dev *dev); void mthca_cleanup_db_tab(struct mthca_dev *dev); -int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db); +int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, + u32 qn, __be32 **db); void mthca_free_db(struct mthca_dev *dev, int type, int db_index); #endif /* MTHCA_MEMFREE_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 1f97a44477f..e995e2aa016 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -140,13 +140,11 @@ static int __devinit mthca_buddy_init(struct mthca_buddy *buddy, int max_order) buddy->max_order = max_order; spin_lock_init(&buddy->lock); - buddy->bits = kmalloc((buddy->max_order + 1) * sizeof (long *), + buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *), GFP_KERNEL); if (!buddy->bits) goto err_out; - memset(buddy->bits, 0, (buddy->max_order + 1) * sizeof (long *)); - for (i = 0; i <= buddy->max_order; ++i) { s = BITS_TO_LONGS(1 << (buddy->max_order - i)); buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL); diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c index 0576056b34f..08a909371b0 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.c +++ b/drivers/infiniband/hw/mthca/mthca_profile.c @@ -35,6 +35,8 @@ #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/string.h> +#include <linux/slab.h> #include "mthca_profile.h" @@ -80,12 +82,10 @@ u64 mthca_make_profile(struct mthca_dev *dev, struct mthca_resource tmp; int i, j; - profile = kmalloc(MTHCA_RES_NUM * sizeof *profile, GFP_KERNEL); + profile = kzalloc(MTHCA_RES_NUM * sizeof *profile, GFP_KERNEL); if (!profile) return -ENOMEM; - memset(profile, 0, MTHCA_RES_NUM * sizeof *profile); - profile[MTHCA_RES_QP].size = dev_lim->qpc_entry_sz; profile[MTHCA_RES_EEC].size = dev_lim->eec_entry_sz; profile[MTHCA_RES_SRQ].size = dev_lim->srq_entry_sz; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 1c1c2e23087..4cc7e2846df 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -37,6 +37,7 @@ */ #include <rdma/ib_smi.h> +#include <rdma/ib_user_verbs.h> #include <linux/mm.h> #include "mthca_dev.h" @@ -84,21 +85,33 @@ static int mthca_query_device(struct ib_device *ibdev, props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30)); - props->hw_ver = be16_to_cpup((__be16 *) (out_mad->data + 32)); + props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&props->sys_image_guid, out_mad->data + 4, 8); memcpy(&props->node_guid, out_mad->data + 12, 8); props->max_mr_size = ~0ull; + props->page_size_cap = mdev->limits.page_size_cap; props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps; - props->max_qp_wr = 0xffff; + props->max_qp_wr = mdev->limits.max_wqes; props->max_sge = mdev->limits.max_sg; props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs; - props->max_cqe = 0xffff; + props->max_cqe = mdev->limits.max_cqes; props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws; props->max_pd = mdev->limits.num_pds - mdev->limits.reserved_pds; props->max_qp_rd_atom = 1 << mdev->qp_table.rdb_shift; - props->max_qp_init_rd_atom = 1 << mdev->qp_table.rdb_shift; + props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma; + props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; + props->max_srq = mdev->limits.num_srqs - mdev->limits.reserved_srqs; + props->max_srq_wr = mdev->limits.max_srq_wqes; + props->max_srq_sge = mdev->limits.max_sg; props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay; + props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? + IB_ATOMIC_HCA : IB_ATOMIC_NONE; + props->max_pkeys = mdev->limits.pkey_table_len; + props->max_mcast_grp = mdev->limits.num_mgms + mdev->limits.num_amgms; + props->max_mcast_qp_attach = MTHCA_QP_PER_MGM; + props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * + props->max_mcast_grp; err = 0; out: @@ -150,9 +163,13 @@ static int mthca_query_port(struct ib_device *ibdev, props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len; props->max_msg_sz = 0x80000000; props->pkey_tbl_len = to_mdev(ibdev)->limits.pkey_table_len; + props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; props->active_speed = out_mad->data[35] >> 4; + props->max_mtu = out_mad->data[41] & 0xf; + props->active_mtu = out_mad->data[36] >> 4; + props->subnet_timeout = out_mad->data[51] & 0x1f; out: kfree(in_mad); @@ -599,11 +616,11 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, return ERR_PTR(err); } - init_attr->cap.max_inline_data = 0; init_attr->cap.max_send_wr = qp->sq.max; init_attr->cap.max_recv_wr = qp->rq.max; init_attr->cap.max_send_sge = qp->sq.max_gs; init_attr->cap.max_recv_sge = qp->rq.max_gs; + init_attr->cap.max_inline_data = qp->max_inline_data; return &qp->ibqp; } @@ -634,6 +651,9 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, int nent; int err; + if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) + return ERR_PTR(-EINVAL); + if (context) { if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) return ERR_PTR(-EFAULT); @@ -1009,7 +1029,7 @@ static ssize_t show_rev(struct class_device *cdev, char *buf) static ssize_t show_fw_ver(struct class_device *cdev, char *buf) { struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev); - return sprintf(buf, "%x.%x.%x\n", (int) (dev->fw_ver >> 32), + return sprintf(buf, "%d.%d.%d\n", (int) (dev->fw_ver >> 32), (int) (dev->fw_ver >> 16) & 0xffff, (int) dev->fw_ver & 0xffff); } @@ -1058,6 +1078,26 @@ int mthca_register_device(struct mthca_dev *dev) strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX); dev->ib_dev.owner = THIS_MODULE; + dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION; + dev->ib_dev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); dev->ib_dev.node_type = IB_NODE_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.dma_device = &dev->pdev->dev; @@ -1077,6 +1117,7 @@ int mthca_register_device(struct mthca_dev *dev) if (dev->mthca_flags & MTHCA_FLAG_SRQ) { dev->ib_dev.create_srq = mthca_create_srq; + dev->ib_dev.modify_srq = mthca_modify_srq; dev->ib_dev.destroy_srq = mthca_destroy_srq; if (mthca_is_memfree(dev)) @@ -1135,10 +1176,13 @@ int mthca_register_device(struct mthca_dev *dev) } } + mthca_start_catas_poll(dev); + return 0; } void mthca_unregister_device(struct mthca_dev *dev) { + mthca_stop_catas_poll(dev); ib_unregister_device(&dev->ib_dev); } diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index bcd4b01a339..1e73947b470 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -251,6 +251,7 @@ struct mthca_qp { struct mthca_wq sq; enum ib_sig_type sq_policy; int send_wqe_offset; + int max_inline_data; u64 *wrid; union mthca_buf queue; diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 5fa00669f9b..7450550db73 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -36,6 +36,8 @@ */ #include <linux/init.h> +#include <linux/string.h> +#include <linux/slab.h> #include <rdma/ib_verbs.h> #include <rdma/ib_cache.h> @@ -338,8 +340,7 @@ static const struct { [UC] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | - IB_QP_RQ_PSN | - IB_QP_MAX_DEST_RD_ATOMIC), + IB_QP_RQ_PSN), [RC] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | @@ -368,8 +369,7 @@ static const struct { .trans = MTHCA_TRANS_RTR2RTS, .req_param = { [UD] = IB_QP_SQ_PSN, - [UC] = (IB_QP_SQ_PSN | - IB_QP_MAX_QP_RD_ATOMIC), + [UC] = IB_QP_SQ_PSN, [RC] = (IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | @@ -446,8 +446,6 @@ static const struct { [UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [UC] = (IB_QP_AV | - IB_QP_MAX_QP_RD_ATOMIC | - IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | @@ -478,7 +476,7 @@ static const struct { .opt_param = { [UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), - [UC] = (IB_QP_CUR_STATE), + [UC] = IB_QP_CUR_STATE, [RC] = (IB_QP_CUR_STATE | IB_QP_MIN_RNR_TIMER), [MLX] = (IB_QP_CUR_STATE | @@ -586,6 +584,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) return -EINVAL; } + if ((attr_mask & IB_QP_PKEY_INDEX) && + attr->pkey_index >= dev->limits.pkey_table_len) { + mthca_dbg(dev, "PKey index (%u) too large. max is %d\n", + attr->pkey_index,dev->limits.pkey_table_len-1); + return -EINVAL; + } + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); @@ -725,15 +730,16 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) } if (attr_mask & IB_QP_ACCESS_FLAGS) { + qp_context->params2 |= + cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE ? + MTHCA_QP_BIT_RWE : 0); + /* - * Only enable RDMA/atomics if we have responder - * resources set to a non-zero value. + * Only enable RDMA reads and atomics if we have + * responder resources set to a non-zero value. */ if (qp->resp_depth) { qp_context->params2 |= - cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE ? - MTHCA_QP_BIT_RWE : 0); - qp_context->params2 |= cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_READ ? MTHCA_QP_BIT_RRE : 0); qp_context->params2 |= @@ -754,31 +760,27 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) if (qp->resp_depth && !attr->max_dest_rd_atomic) { /* * Lowering our responder resources to zero. - * Turn off RDMA/atomics as responder. - * (RWE/RRE/RAE in params2 already zero) + * Turn off reads RDMA and atomics as responder. + * (RRE/RAE in params2 already zero) */ - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE | - MTHCA_QP_OPTPAR_RRE | + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRE | MTHCA_QP_OPTPAR_RAE); } if (!qp->resp_depth && attr->max_dest_rd_atomic) { /* * Increasing our responder resources from - * zero. Turn on RDMA/atomics as appropriate. + * zero. Turn on RDMA reads and atomics as + * appropriate. */ qp_context->params2 |= - cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_WRITE ? - MTHCA_QP_BIT_RWE : 0); - qp_context->params2 |= cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_READ ? MTHCA_QP_BIT_RRE : 0); qp_context->params2 |= cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_ATOMIC ? MTHCA_QP_BIT_RAE : 0); - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE | - MTHCA_QP_OPTPAR_RRE | + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRE | MTHCA_QP_OPTPAR_RAE); } @@ -869,7 +871,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); mthca_wq_init(&qp->sq); + qp->sq.last = get_send_wqe(qp, qp->sq.max - 1); + mthca_wq_init(&qp->rq); + qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1); if (mthca_is_memfree(dev)) { *qp->sq.db = 0; @@ -880,6 +885,50 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) return err; } +static void mthca_adjust_qp_caps(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_qp *qp) +{ + int max_data_size; + + /* + * Calculate the maximum size of WQE s/g segments, excluding + * the next segment and other non-data segments. + */ + max_data_size = min(dev->limits.max_desc_sz, 1 << qp->sq.wqe_shift) - + sizeof (struct mthca_next_seg); + + switch (qp->transport) { + case MLX: + max_data_size -= 2 * sizeof (struct mthca_data_seg); + break; + + case UD: + if (mthca_is_memfree(dev)) + max_data_size -= sizeof (struct mthca_arbel_ud_seg); + else + max_data_size -= sizeof (struct mthca_tavor_ud_seg); + break; + + default: + max_data_size -= sizeof (struct mthca_raddr_seg); + break; + } + + /* We don't support inline data for kernel QPs (yet). */ + if (!pd->ibpd.uobject) + qp->max_inline_data = 0; + else + qp->max_inline_data = max_data_size - MTHCA_INLINE_HEADER_SIZE; + + qp->sq.max_gs = min_t(int, dev->limits.max_sg, + max_data_size / sizeof (struct mthca_data_seg)); + qp->rq.max_gs = min_t(int, dev->limits.max_sg, + (min(dev->limits.max_desc_sz, 1 << qp->rq.wqe_shift) - + sizeof (struct mthca_next_seg)) / + sizeof (struct mthca_data_seg)); +} + /* * Allocate and register buffer for WQEs. qp->rq.max, sq.max, * rq.max_gs and sq.max_gs must all be assigned. @@ -897,27 +946,53 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, size = sizeof (struct mthca_next_seg) + qp->rq.max_gs * sizeof (struct mthca_data_seg); + if (size > dev->limits.max_desc_sz) + return -EINVAL; + for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size; qp->rq.wqe_shift++) ; /* nothing */ - size = sizeof (struct mthca_next_seg) + - qp->sq.max_gs * sizeof (struct mthca_data_seg); + size = qp->sq.max_gs * sizeof (struct mthca_data_seg); switch (qp->transport) { case MLX: size += 2 * sizeof (struct mthca_data_seg); break; + case UD: - if (mthca_is_memfree(dev)) - size += sizeof (struct mthca_arbel_ud_seg); - else - size += sizeof (struct mthca_tavor_ud_seg); + size += mthca_is_memfree(dev) ? + sizeof (struct mthca_arbel_ud_seg) : + sizeof (struct mthca_tavor_ud_seg); + break; + + case UC: + size += sizeof (struct mthca_raddr_seg); + break; + + case RC: + size += sizeof (struct mthca_raddr_seg); + /* + * An atomic op will require an atomic segment, a + * remote address segment and one scatter entry. + */ + size = max_t(int, size, + sizeof (struct mthca_atomic_seg) + + sizeof (struct mthca_raddr_seg) + + sizeof (struct mthca_data_seg)); break; + default: - /* bind seg is as big as atomic + raddr segs */ - size += sizeof (struct mthca_bind_seg); + break; } + /* Make sure that we have enough space for a bind request */ + size = max_t(int, size, sizeof (struct mthca_bind_seg)); + + size += sizeof (struct mthca_next_seg); + + if (size > dev->limits.max_desc_sz) + return -EINVAL; + for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size; qp->sq.wqe_shift++) ; /* nothing */ @@ -1061,6 +1136,8 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, return ret; } + mthca_adjust_qp_caps(dev, pd, qp); + /* * If this is a userspace QP, we're done now. The doorbells * will be allocated and buffers will be initialized in @@ -1112,8 +1189,10 @@ static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, struct mthca_qp *qp) { /* Sanity check QP size before proceeding */ - if (cap->max_send_wr > 65536 || cap->max_recv_wr > 65536 || - cap->max_send_sge > 64 || cap->max_recv_sge > 64) + if (cap->max_send_wr > dev->limits.max_wqes || + cap->max_recv_wr > dev->limits.max_wqes || + cap->max_send_sge > dev->limits.max_sg || + cap->max_recv_sge > dev->limits.max_sg) return -EINVAL; if (mthca_is_memfree(dev)) { @@ -1479,8 +1558,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } wqe += sizeof (struct mthca_atomic_seg); - size += sizeof (struct mthca_raddr_seg) / 16 + - sizeof (struct mthca_atomic_seg); + size += (sizeof (struct mthca_raddr_seg) + + sizeof (struct mthca_atomic_seg)) / 16; break; case IB_WR_RDMA_WRITE: @@ -1630,6 +1709,7 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); + __be32 doorbell[2]; unsigned long flags; int err = 0; int nreq; @@ -1647,6 +1727,22 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, ind = qp->rq.next_ind; for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { + nreq = 0; + + doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0); + doorbell[1] = cpu_to_be32(qp->qpn << 8); + + wmb(); + + mthca_write64(doorbell, + dev->kar + MTHCA_RECEIVE_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + + qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB; + size0 = 0; + } + if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { mthca_err(dev, "RQ %06x full (%u head, %u tail," " %d max, %d nreq)\n", qp->qpn, @@ -1704,8 +1800,6 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, out: if (likely(nreq)) { - __be32 doorbell[2]; - doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0); doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq); @@ -1728,6 +1822,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); + __be32 doorbell[2]; void *wqe; void *prev_wqe; unsigned long flags; @@ -1747,6 +1842,34 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ind = qp->sq.head & (qp->sq.max - 1); for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) { + nreq = 0; + + doorbell[0] = cpu_to_be32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) | + ((qp->sq.head & 0xffff) << 8) | + f0 | op0); + doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0); + + qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB; + size0 = 0; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + *qp->sq.db = cpu_to_be32(qp->sq.head & 0xffff); + + /* + * Make sure doorbell record is written before we + * write MMIO send doorbell. + */ + wmb(); + mthca_write64(doorbell, + dev->kar + MTHCA_SEND_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + } + if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { mthca_err(dev, "SQ %06x full (%u head, %u tail," " %d max, %d nreq)\n", qp->qpn, @@ -1799,8 +1922,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } wqe += sizeof (struct mthca_atomic_seg); - size += sizeof (struct mthca_raddr_seg) / 16 + - sizeof (struct mthca_atomic_seg); + size += (sizeof (struct mthca_raddr_seg) + + sizeof (struct mthca_atomic_seg)) / 16; break; case IB_WR_RDMA_READ: @@ -1923,8 +2046,6 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, out: if (likely(nreq)) { - __be32 doorbell[2]; - doorbell[0] = cpu_to_be32((nreq << 24) | ((qp->sq.head & 0xffff) << 8) | f0 | op0); diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c index 4f995391dd1..df5e494a9d3 100644 --- a/drivers/infiniband/hw/mthca/mthca_reset.c +++ b/drivers/infiniband/hw/mthca/mthca_reset.c @@ -37,6 +37,7 @@ #include <linux/errno.h> #include <linux/pci.h> #include <linux/delay.h> +#include <linux/slab.h> #include "mthca_dev.h" #include "mthca_cmd.h" diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index 18998d48c53..f7d234295ef 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -32,6 +32,9 @@ * $Id: mthca_srq.c 3047 2005-08-10 03:59:35Z roland $ */ +#include <linux/slab.h> +#include <linux/string.h> + #include "mthca_dev.h" #include "mthca_cmd.h" #include "mthca_memfree.h" @@ -75,15 +78,16 @@ static void *get_wqe(struct mthca_srq *srq, int n) /* * Return a pointer to the location within a WQE that we're using as a - * link when the WQE is in the free list. We use an offset of 4 - * because in the Tavor case, posting a WQE may overwrite the first - * four bytes of the previous WQE. The offset avoids corrupting our - * free list if the WQE has already completed and been put on the free - * list when we post the next WQE. + * link when the WQE is in the free list. We use the imm field + * because in the Tavor case, posting a WQE may overwrite the next + * segment of the previous WQE, but a receive WQE will never touch the + * imm field. This avoids corrupting our free list if the previous + * WQE has already completed and been put on the free list when we + * post the next WQE. */ static inline int *wqe_to_link(void *wqe) { - return (int *) (wqe + 4); + return (int *) (wqe + offsetof(struct mthca_next_seg, imm)); } static void mthca_tavor_init_srq_context(struct mthca_dev *dev, @@ -186,7 +190,8 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, int err; /* Sanity check SRQ size before proceeding */ - if (attr->max_wr > 16 << 20 || attr->max_sge > 64) + if (attr->max_wr > dev->limits.max_srq_wqes || + attr->max_sge > dev->limits.max_sg) return -EINVAL; srq->max = attr->max_wr; @@ -332,6 +337,29 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) mthca_free_mailbox(dev, mailbox); } +int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask) +{ + struct mthca_dev *dev = to_mdev(ibsrq->device); + struct mthca_srq *srq = to_msrq(ibsrq); + int ret; + u8 status; + + /* We don't support resizing SRQs (yet?) */ + if (attr_mask & IB_SRQ_MAX_WR) + return -EINVAL; + + if (attr_mask & IB_SRQ_LIMIT) { + ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status); + if (ret) + return ret; + if (status) + return -EINVAL; + } + + return 0; +} + void mthca_srq_event(struct mthca_dev *dev, u32 srqn, enum ib_event_type event_type) { @@ -354,7 +382,7 @@ void mthca_srq_event(struct mthca_dev *dev, u32 srqn, event.device = &dev->ib_dev; event.event = event_type; - event.element.srq = &srq->ibsrq; + event.element.srq = &srq->ibsrq; srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context); out: @@ -389,6 +417,7 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, { struct mthca_dev *dev = to_mdev(ibsrq->device); struct mthca_srq *srq = to_msrq(ibsrq); + __be32 doorbell[2]; unsigned long flags; int err = 0; int first_ind; @@ -404,6 +433,25 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, first_ind = srq->first_free; for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { + nreq = 0; + + doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift); + doorbell[1] = cpu_to_be32(srq->srqn << 8); + + /* + * Make sure that descriptors are written + * before doorbell is rung. + */ + wmb(); + + mthca_write64(doorbell, + dev->kar + MTHCA_RECEIVE_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + + first_ind = srq->first_free; + } + ind = srq->first_free; if (ind < 0) { @@ -415,6 +463,14 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, wqe = get_wqe(srq, ind); next_ind = *wqe_to_link(wqe); + + if (next_ind < 0) { + mthca_err(dev, "SRQ %06x full\n", srq->srqn); + err = -ENOMEM; + *bad_wr = wr; + break; + } + prev_wqe = srq->last; srq->last = wqe; @@ -458,8 +514,6 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, } if (likely(nreq)) { - __be32 doorbell[2]; - doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift); doorbell[1] = cpu_to_be32((srq->srqn << 8) | nreq); @@ -506,6 +560,13 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, wqe = get_wqe(srq, ind); next_ind = *wqe_to_link(wqe); + if (next_ind < 0) { + mthca_err(dev, "SRQ %06x full\n", srq->srqn); + err = -ENOMEM; + *bad_wr = wr; + break; + } + ((struct mthca_next_seg *) wqe)->nda_op = cpu_to_be32((next_ind << srq->wqe_shift) | 1); ((struct mthca_next_seg *) wqe)->ee_nds = 0; diff --git a/drivers/infiniband/hw/mthca/mthca_uar.c b/drivers/infiniband/hw/mthca/mthca_uar.c index 1c8791ded6f..8e9219842be 100644 --- a/drivers/infiniband/hw/mthca/mthca_uar.c +++ b/drivers/infiniband/hw/mthca/mthca_uar.c @@ -32,6 +32,8 @@ * $Id$ */ +#include <asm/page.h> /* PAGE_SHIFT */ + #include "mthca_dev.h" #include "mthca_memfree.h" diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h index 41613ec8a04..bb015c6494c 100644 --- a/drivers/infiniband/hw/mthca/mthca_user.h +++ b/drivers/infiniband/hw/mthca/mthca_user.h @@ -38,6 +38,12 @@ #include <linux/types.h> /* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define MTHCA_UVERBS_ABI_VERSION 1 + +/* * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to * avoid incompatibility between 32-bit userspace and 64-bit kernels). diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h index 1f4c0ff28f7..e7d2c1e8619 100644 --- a/drivers/infiniband/hw/mthca/mthca_wqe.h +++ b/drivers/infiniband/hw/mthca/mthca_wqe.h @@ -49,7 +49,9 @@ enum { }; enum { - MTHCA_INVAL_LKEY = 0x100 + MTHCA_INVAL_LKEY = 0x100, + MTHCA_TAVOR_MAX_WQES_PER_RECV_DB = 256, + MTHCA_ARBEL_MAX_WQES_PER_SEND_DB = 255 }; struct mthca_next_seg { diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 4ea1c1ca85b..9923a15a999 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -100,7 +100,12 @@ struct ipoib_pseudoheader { struct ipoib_mcast; -struct ipoib_buf { +struct ipoib_rx_buf { + struct sk_buff *skb; + dma_addr_t mapping; +}; + +struct ipoib_tx_buf { struct sk_buff *skb; DECLARE_PCI_UNMAP_ADDR(mapping) }; @@ -150,14 +155,14 @@ struct ipoib_dev_priv { unsigned int admin_mtu; unsigned int mcast_mtu; - struct ipoib_buf *rx_ring; + struct ipoib_rx_buf *rx_ring; - spinlock_t tx_lock; - struct ipoib_buf *tx_ring; - unsigned tx_head; - unsigned tx_tail; - struct ib_sge tx_sge; - struct ib_send_wr tx_wr; + spinlock_t tx_lock; + struct ipoib_tx_buf *tx_ring; + unsigned tx_head; + unsigned tx_tail; + struct ib_sge tx_sge; + struct ib_send_wr tx_wr; struct ib_wc ibwc[IPOIB_NUM_WC]; @@ -174,6 +179,7 @@ struct ipoib_dev_priv { #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG struct list_head fs_list; struct dentry *mcg_dentry; + struct dentry *path_dentry; #endif }; @@ -230,6 +236,7 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah) kref_put(&ah->ref, ipoib_free_ah); } +int ipoib_open(struct net_device *dev); int ipoib_add_pkey_attr(struct net_device *dev); void ipoib_send(struct net_device *dev, struct sk_buff *skb, @@ -262,8 +269,8 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush); void ipoib_mcast_dev_down(struct net_device *dev); void ipoib_mcast_dev_flush(struct net_device *dev); +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev); -void ipoib_mcast_iter_free(struct ipoib_mcast_iter *iter); int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter); void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter, union ib_gid *gid, @@ -272,12 +279,18 @@ void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter, unsigned int *complete, unsigned int *send_only); +struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev); +int ipoib_path_iter_next(struct ipoib_path_iter *iter); +void ipoib_path_iter_read(struct ipoib_path_iter *iter, + struct ipoib_path *path); +#endif + int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid); int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid); -int ipoib_qp_create(struct net_device *dev); +int ipoib_init_qp(struct net_device *dev); int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca); void ipoib_transport_dev_cleanup(struct net_device *dev); @@ -291,13 +304,13 @@ void ipoib_pkey_poll(void *dev); int ipoib_pkey_dev_delay_open(struct net_device *dev); #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG -int ipoib_create_debug_file(struct net_device *dev); -void ipoib_delete_debug_file(struct net_device *dev); +void ipoib_create_debug_files(struct net_device *dev); +void ipoib_delete_debug_files(struct net_device *dev); int ipoib_register_debugfs(void); void ipoib_unregister_debugfs(void); #else -static inline int ipoib_create_debug_file(struct net_device *dev) { return 0; } -static inline void ipoib_delete_debug_file(struct net_device *dev) { } +static inline void ipoib_create_debug_files(struct net_device *dev) { } +static inline void ipoib_delete_debug_files(struct net_device *dev) { } static inline int ipoib_register_debugfs(void) { return 0; } static inline void ipoib_unregister_debugfs(void) { } #endif diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 38b150f775e..685258e3403 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -43,6 +43,18 @@ struct file_operations; static struct dentry *ipoib_root; +static void format_gid(union ib_gid *gid, char *buf) +{ + int i, n; + + for (n = 0, i = 0; i < 8; ++i) { + n += sprintf(buf + n, "%x", + be16_to_cpu(((__be16 *) gid->raw)[i])); + if (i < 7) + buf[n++] = ':'; + } +} + static void *ipoib_mcg_seq_start(struct seq_file *file, loff_t *pos) { struct ipoib_mcast_iter *iter; @@ -54,7 +66,7 @@ static void *ipoib_mcg_seq_start(struct seq_file *file, loff_t *pos) while (n--) { if (ipoib_mcast_iter_next(iter)) { - ipoib_mcast_iter_free(iter); + kfree(iter); return NULL; } } @@ -70,7 +82,7 @@ static void *ipoib_mcg_seq_next(struct seq_file *file, void *iter_ptr, (*pos)++; if (ipoib_mcast_iter_next(iter)) { - ipoib_mcast_iter_free(iter); + kfree(iter); return NULL; } @@ -87,32 +99,32 @@ static int ipoib_mcg_seq_show(struct seq_file *file, void *iter_ptr) struct ipoib_mcast_iter *iter = iter_ptr; char gid_buf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"]; union ib_gid mgid; - int i, n; unsigned long created; unsigned int queuelen, complete, send_only; - if (iter) { - ipoib_mcast_iter_read(iter, &mgid, &created, &queuelen, - &complete, &send_only); + if (!iter) + return 0; - for (n = 0, i = 0; i < sizeof mgid / 2; ++i) { - n += sprintf(gid_buf + n, "%x", - be16_to_cpu(((__be16 *) mgid.raw)[i])); - if (i < sizeof mgid / 2 - 1) - gid_buf[n++] = ':'; - } - } + ipoib_mcast_iter_read(iter, &mgid, &created, &queuelen, + &complete, &send_only); - seq_printf(file, "GID: %*s", -(1 + (int) sizeof gid_buf), gid_buf); + format_gid(&mgid, gid_buf); seq_printf(file, - " created: %10ld queuelen: %4d complete: %d send_only: %d\n", - created, queuelen, complete, send_only); + "GID: %s\n" + " created: %10ld\n" + " queuelen: %9d\n" + " complete: %9s\n" + " send_only: %8s\n" + "\n", + gid_buf, created, queuelen, + complete ? "yes" : "no", + send_only ? "yes" : "no"); return 0; } -static struct seq_operations ipoib_seq_ops = { +static struct seq_operations ipoib_mcg_seq_ops = { .start = ipoib_mcg_seq_start, .next = ipoib_mcg_seq_next, .stop = ipoib_mcg_seq_stop, @@ -124,7 +136,7 @@ static int ipoib_mcg_open(struct inode *inode, struct file *file) struct seq_file *seq; int ret; - ret = seq_open(file, &ipoib_seq_ops); + ret = seq_open(file, &ipoib_mcg_seq_ops); if (ret) return ret; @@ -134,7 +146,7 @@ static int ipoib_mcg_open(struct inode *inode, struct file *file) return 0; } -static struct file_operations ipoib_fops = { +static struct file_operations ipoib_mcg_fops = { .owner = THIS_MODULE, .open = ipoib_mcg_open, .read = seq_read, @@ -142,25 +154,138 @@ static struct file_operations ipoib_fops = { .release = seq_release }; -int ipoib_create_debug_file(struct net_device *dev) +static void *ipoib_path_seq_start(struct seq_file *file, loff_t *pos) +{ + struct ipoib_path_iter *iter; + loff_t n = *pos; + + iter = ipoib_path_iter_init(file->private); + if (!iter) + return NULL; + + while (n--) { + if (ipoib_path_iter_next(iter)) { + kfree(iter); + return NULL; + } + } + + return iter; +} + +static void *ipoib_path_seq_next(struct seq_file *file, void *iter_ptr, + loff_t *pos) +{ + struct ipoib_path_iter *iter = iter_ptr; + + (*pos)++; + + if (ipoib_path_iter_next(iter)) { + kfree(iter); + return NULL; + } + + return iter; +} + +static void ipoib_path_seq_stop(struct seq_file *file, void *iter_ptr) +{ + /* nothing for now */ +} + +static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr) +{ + struct ipoib_path_iter *iter = iter_ptr; + char gid_buf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"]; + struct ipoib_path path; + int rate; + + if (!iter) + return 0; + + ipoib_path_iter_read(iter, &path); + + format_gid(&path.pathrec.dgid, gid_buf); + + seq_printf(file, + "GID: %s\n" + " complete: %6s\n", + gid_buf, path.pathrec.dlid ? "yes" : "no"); + + if (path.pathrec.dlid) { + rate = ib_sa_rate_enum_to_int(path.pathrec.rate) * 25; + + seq_printf(file, + " DLID: 0x%04x\n" + " SL: %12d\n" + " rate: %*d%s Gb/sec\n", + be16_to_cpu(path.pathrec.dlid), + path.pathrec.sl, + 10 - ((rate % 10) ? 2 : 0), + rate / 10, rate % 10 ? ".5" : ""); + } + + seq_putc(file, '\n'); + + return 0; +} + +static struct seq_operations ipoib_path_seq_ops = { + .start = ipoib_path_seq_start, + .next = ipoib_path_seq_next, + .stop = ipoib_path_seq_stop, + .show = ipoib_path_seq_show, +}; + +static int ipoib_path_open(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + int ret; + + ret = seq_open(file, &ipoib_path_seq_ops); + if (ret) + return ret; + + seq = file->private_data; + seq->private = inode->u.generic_ip; + + return 0; +} + +static struct file_operations ipoib_path_fops = { + .owner = THIS_MODULE, + .open = ipoib_path_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +void ipoib_create_debug_files(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - char name[IFNAMSIZ + sizeof "_mcg"]; + char name[IFNAMSIZ + sizeof "_path"]; snprintf(name, sizeof name, "%s_mcg", dev->name); - priv->mcg_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO, - ipoib_root, dev, &ipoib_fops); - - return priv->mcg_dentry ? 0 : -ENOMEM; + ipoib_root, dev, &ipoib_mcg_fops); + if (!priv->mcg_dentry) + ipoib_warn(priv, "failed to create mcg debug file\n"); + + snprintf(name, sizeof name, "%s_path", dev->name); + priv->path_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO, + ipoib_root, dev, &ipoib_path_fops); + if (!priv->path_dentry) + ipoib_warn(priv, "failed to create path debug file\n"); } -void ipoib_delete_debug_file(struct net_device *dev) +void ipoib_delete_debug_files(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); if (priv->mcg_dentry) debugfs_remove(priv->mcg_dentry); + if (priv->path_dentry) + debugfs_remove(priv->path_dentry); } int ipoib_register_debugfs(void) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index f7440096b5e..23885801b6d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -95,57 +95,65 @@ void ipoib_free_ah(struct kref *kref) } } -static inline int ipoib_ib_receive(struct ipoib_dev_priv *priv, - unsigned int wr_id, - dma_addr_t addr) +static int ipoib_ib_post_receive(struct net_device *dev, int id) { - struct ib_sge list = { - .addr = addr, - .length = IPOIB_BUF_SIZE, - .lkey = priv->mr->lkey, - }; - struct ib_recv_wr param = { - .wr_id = wr_id | IPOIB_OP_RECV, - .sg_list = &list, - .num_sge = 1, - }; + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_sge list; + struct ib_recv_wr param; struct ib_recv_wr *bad_wr; + int ret; + + list.addr = priv->rx_ring[id].mapping; + list.length = IPOIB_BUF_SIZE; + list.lkey = priv->mr->lkey; + + param.next = NULL; + param.wr_id = id | IPOIB_OP_RECV; + param.sg_list = &list; + param.num_sge = 1; + + ret = ib_post_recv(priv->qp, ¶m, &bad_wr); + if (unlikely(ret)) { + ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); + dma_unmap_single(priv->ca->dma_device, + priv->rx_ring[id].mapping, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + dev_kfree_skb_any(priv->rx_ring[id].skb); + priv->rx_ring[id].skb = NULL; + } - return ib_post_recv(priv->qp, ¶m, &bad_wr); + return ret; } -static int ipoib_ib_post_receive(struct net_device *dev, int id) +static int ipoib_alloc_rx_skb(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct sk_buff *skb; dma_addr_t addr; - int ret; skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4); - if (!skb) { - ipoib_warn(priv, "failed to allocate receive buffer\n"); - - priv->rx_ring[id].skb = NULL; + if (!skb) return -ENOMEM; - } - skb_reserve(skb, 4); /* 16 byte align IP header */ - priv->rx_ring[id].skb = skb; + + /* + * IB will leave a 40 byte gap for a GRH and IPoIB adds a 4 byte + * header. So we need 4 more bytes to get to 48 and align the + * IP header to a multiple of 16. + */ + skb_reserve(skb, 4); + addr = dma_map_single(priv->ca->dma_device, skb->data, IPOIB_BUF_SIZE, DMA_FROM_DEVICE); - pci_unmap_addr_set(&priv->rx_ring[id], mapping, addr); - - ret = ipoib_ib_receive(priv, id, addr); - if (ret) { - ipoib_warn(priv, "ipoib_ib_receive failed for buf %d (%d)\n", - id, ret); - dma_unmap_single(priv->ca->dma_device, addr, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(addr))) { dev_kfree_skb_any(skb); - priv->rx_ring[id].skb = NULL; + return -EIO; } - return ret; + priv->rx_ring[id].skb = skb; + priv->rx_ring[id].mapping = addr; + + return 0; } static int ipoib_ib_post_receives(struct net_device *dev) @@ -154,6 +162,10 @@ static int ipoib_ib_post_receives(struct net_device *dev) int i; for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) { + if (ipoib_alloc_rx_skb(dev, i)) { + ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); + return -ENOMEM; + } if (ipoib_ib_post_receive(dev, i)) { ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i); return -EIO; @@ -176,28 +188,36 @@ static void ipoib_ib_handle_wc(struct net_device *dev, wr_id &= ~IPOIB_OP_RECV; if (wr_id < IPOIB_RX_RING_SIZE) { - struct sk_buff *skb = priv->rx_ring[wr_id].skb; - - priv->rx_ring[wr_id].skb = NULL; - - dma_unmap_single(priv->ca->dma_device, - pci_unmap_addr(&priv->rx_ring[wr_id], - mapping), - IPOIB_BUF_SIZE, - DMA_FROM_DEVICE); + struct sk_buff *skb = priv->rx_ring[wr_id].skb; + dma_addr_t addr = priv->rx_ring[wr_id].mapping; - if (wc->status != IB_WC_SUCCESS) { + if (unlikely(wc->status != IB_WC_SUCCESS)) { if (wc->status != IB_WC_WR_FLUSH_ERR) ipoib_warn(priv, "failed recv event " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); + dma_unmap_single(priv->ca->dma_device, addr, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(skb); + priv->rx_ring[wr_id].skb = NULL; return; } + /* + * If we can't allocate a new RX buffer, dump + * this packet and reuse the old buffer. + */ + if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { + ++priv->stats.rx_dropped; + goto repost; + } + ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); + dma_unmap_single(priv->ca->dma_device, addr, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + skb_put(skb, wc->byte_len); skb_pull(skb, IB_GRH_BYTES); @@ -220,8 +240,8 @@ static void ipoib_ib_handle_wc(struct net_device *dev, dev_kfree_skb_any(skb); } - /* repost receive */ - if (ipoib_ib_post_receive(dev, wr_id)) + repost: + if (unlikely(ipoib_ib_post_receive(dev, wr_id))) ipoib_warn(priv, "ipoib_ib_post_receive failed " "for buf %d\n", wr_id); } else @@ -229,7 +249,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev, wr_id); } else { - struct ipoib_buf *tx_req; + struct ipoib_tx_buf *tx_req; unsigned long flags; if (wr_id >= IPOIB_TX_RING_SIZE) { @@ -302,7 +322,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_ah *address, u32 qpn) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ipoib_buf *tx_req; + struct ipoib_tx_buf *tx_req; dma_addr_t addr; if (skb->len > dev->mtu + INFINIBAND_ALEN) { @@ -387,9 +407,9 @@ int ipoib_ib_dev_open(struct net_device *dev) struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; - ret = ipoib_qp_create(dev); + ret = ipoib_init_qp(dev); if (ret) { - ipoib_warn(priv, "ipoib_qp_create returned %d\n", ret); + ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret); return -1; } @@ -466,15 +486,16 @@ int ipoib_ib_dev_stop(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_attr qp_attr; - int attr_mask; unsigned long begin; - struct ipoib_buf *tx_req; + struct ipoib_tx_buf *tx_req; int i; - /* Kill the existing QP and allocate a new one */ + /* + * Move our QP to the error state and then reinitialize in + * when all work requests have completed or have been flushed. + */ qp_attr.qp_state = IB_QPS_ERR; - attr_mask = IB_QP_STATE; - if (ib_modify_qp(priv->qp, &qp_attr, attr_mask)) + if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) ipoib_warn(priv, "Failed to modify QP to ERROR state\n"); /* Wait for all sends and receives to complete */ @@ -521,8 +542,7 @@ int ipoib_ib_dev_stop(struct net_device *dev) timeout: qp_attr.qp_state = IB_QPS_RESET; - attr_mask = IB_QP_STATE; - if (ib_modify_qp(priv->qp, &qp_attr, attr_mask)) + if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) ipoib_warn(priv, "Failed to modify QP to RESET state\n"); /* Wait for all AHs to be reaped */ @@ -588,9 +608,13 @@ void ipoib_ib_dev_flush(void *_dev) if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) ipoib_ib_dev_up(dev); + down(&priv->vlan_mutex); + /* Flush any child interfaces too */ list_for_each_entry(cpriv, &priv->child_intfs, list) ipoib_ib_dev_flush(&cpriv->dev); + + up(&priv->vlan_mutex); } void ipoib_ib_dev_cleanup(struct net_device *dev) @@ -616,7 +640,6 @@ void ipoib_ib_dev_cleanup(struct net_device *dev) * Bug #2507. This implementation will probably be removed when the P_Key * change async notification is available. */ -int ipoib_open(struct net_device *dev); static void ipoib_pkey_dev_check_presence(struct net_device *dev) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 704f48e0b6a..475d98fa9e2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -58,6 +58,11 @@ module_param_named(debug_level, ipoib_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); #endif +struct ipoib_path_iter { + struct net_device *dev; + struct ipoib_path path; +}; + static const u8 ipv4_bcast_addr[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, @@ -89,8 +94,10 @@ int ipoib_open(struct net_device *dev) if (ipoib_ib_dev_open(dev)) return -EINVAL; - if (ipoib_ib_dev_up(dev)) + if (ipoib_ib_dev_up(dev)) { + ipoib_ib_dev_stop(dev); return -EINVAL; + } if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { struct ipoib_dev_priv *cpriv; @@ -250,6 +257,64 @@ static void path_free(struct net_device *dev, struct ipoib_path *path) kfree(path); } +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG + +struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev) +{ + struct ipoib_path_iter *iter; + + iter = kmalloc(sizeof *iter, GFP_KERNEL); + if (!iter) + return NULL; + + iter->dev = dev; + memset(iter->path.pathrec.dgid.raw, 0, 16); + + if (ipoib_path_iter_next(iter)) { + kfree(iter); + return NULL; + } + + return iter; +} + +int ipoib_path_iter_next(struct ipoib_path_iter *iter) +{ + struct ipoib_dev_priv *priv = netdev_priv(iter->dev); + struct rb_node *n; + struct ipoib_path *path; + int ret = 1; + + spin_lock_irq(&priv->lock); + + n = rb_first(&priv->path_tree); + + while (n) { + path = rb_entry(n, struct ipoib_path, rb_node); + + if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw, + sizeof (union ib_gid)) < 0) { + iter->path = *path; + ret = 0; + break; + } + + n = rb_next(n); + } + + spin_unlock_irq(&priv->lock); + + return ret; +} + +void ipoib_path_iter_read(struct ipoib_path_iter *iter, + struct ipoib_path *path) +{ + *path = iter->path; +} + +#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ + void ipoib_flush_paths(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -335,9 +400,9 @@ static void path_rec_completion(int status, while ((skb = __skb_dequeue(&neigh->queue))) __skb_queue_tail(&skqueue, skb); } - } else - path->query = NULL; + } + path->query = NULL; complete(&path->done); spin_unlock_irqrestore(&priv->lock, flags); @@ -356,19 +421,15 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; - path = kmalloc(sizeof *path, GFP_ATOMIC); + path = kzalloc(sizeof *path, GFP_ATOMIC); if (!path) return NULL; - path->dev = dev; - path->pathrec.dlid = 0; - path->ah = NULL; + path->dev = dev; skb_queue_head_init(&path->queue); INIT_LIST_HEAD(&path->neigh_list); - path->query = NULL; - init_completion(&path->done); memcpy(path->pathrec.dgid.raw, gid->raw, sizeof (union ib_gid)); path->pathrec.sgid = priv->local_gid; @@ -386,6 +447,8 @@ static int path_rec_start(struct net_device *dev, ipoib_dbg(priv, "Start path record lookup for " IPOIB_GID_FMT "\n", IPOIB_GID_ARG(path->pathrec.dgid)); + init_completion(&path->done); + path->query_id = ib_sa_path_rec_get(priv->ca, priv->port, &path->pathrec, @@ -474,7 +537,7 @@ err: spin_unlock(&priv->lock); } -static void path_lookup(struct sk_buff *skb, struct net_device *dev) +static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(skb->dev); @@ -551,11 +614,8 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) struct ipoib_neigh *neigh; unsigned long flags; - local_irq_save(flags); - if (!spin_trylock(&priv->tx_lock)) { - local_irq_restore(flags); + if (!spin_trylock_irqsave(&priv->tx_lock, flags)) return NETDEV_TX_LOCKED; - } /* * Check if our queue is stopped. Since we have the LLTX bit @@ -569,7 +629,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) if (skb->dst && skb->dst->neighbour) { if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) { - path_lookup(skb, dev); + ipoib_path_lookup(skb, dev); goto out; } @@ -637,8 +697,11 @@ static void ipoib_timeout(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - ipoib_warn(priv, "transmit timeout: latency %ld\n", - jiffies - dev->trans_start); + ipoib_warn(priv, "transmit timeout: latency %d msecs\n", + jiffies_to_msecs(jiffies - dev->trans_start)); + ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n", + netif_queue_stopped(dev), + priv->tx_head, priv->tx_tail); /* XXX reset QP, etc. */ } @@ -729,25 +792,21 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) /* Allocate RX/TX "rings" to hold queued skbs */ - priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf), + priv->rx_ring = kzalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf), GFP_KERNEL); if (!priv->rx_ring) { printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", ca->name, IPOIB_RX_RING_SIZE); goto out; } - memset(priv->rx_ring, 0, - IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf)); - priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf), + priv->tx_ring = kzalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf), GFP_KERNEL); if (!priv->tx_ring) { printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", ca->name, IPOIB_TX_RING_SIZE); goto out_rx_ring_cleanup; } - memset(priv->tx_ring, 0, - IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf)); /* priv->tx_head & tx_tail are already 0 */ @@ -770,7 +829,7 @@ void ipoib_dev_cleanup(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv; - ipoib_delete_debug_file(dev); + ipoib_delete_debug_files(dev); /* Delete any child interfaces first */ list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { @@ -804,10 +863,6 @@ static void ipoib_setup(struct net_device *dev) dev->watchdog_timeo = HZ; - dev->rebuild_header = NULL; - dev->set_mac_address = NULL; - dev->header_cache_update = NULL; - dev->flags |= IFF_BROADCAST | IFF_MULTICAST; /* @@ -983,8 +1038,7 @@ static struct net_device *ipoib_add_port(const char *format, goto register_failed; } - if (ipoib_create_debug_file(priv->dev)) - goto debug_failed; + ipoib_create_debug_files(priv->dev); if (ipoib_add_pkey_attr(priv->dev)) goto sysfs_failed; @@ -998,9 +1052,7 @@ static struct net_device *ipoib_add_port(const char *format, return priv->dev; sysfs_failed: - ipoib_delete_debug_file(priv->dev); - -debug_failed: + ipoib_delete_debug_files(priv->dev); unregister_netdev(priv->dev); register_failed: diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 36ce29836bf..ef3ee035bbc 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -120,12 +120,8 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast) if (mcast->ah) ipoib_put_ah(mcast->ah); - while (!skb_queue_empty(&mcast->pkt_queue)) { - struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); - - skb->dev = dev; - dev_kfree_skb_any(skb); - } + while (!skb_queue_empty(&mcast->pkt_queue)) + dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); kfree(mcast); } @@ -135,26 +131,18 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, { struct ipoib_mcast *mcast; - mcast = kmalloc(sizeof (*mcast), can_sleep ? GFP_KERNEL : GFP_ATOMIC); + mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC); if (!mcast) return NULL; - memset(mcast, 0, sizeof (*mcast)); - - init_completion(&mcast->done); - mcast->dev = dev; mcast->created = jiffies; mcast->backoff = 1; - mcast->logcount = 0; INIT_LIST_HEAD(&mcast->list); INIT_LIST_HEAD(&mcast->neigh_list); skb_queue_head_init(&mcast->pkt_queue); - mcast->ah = NULL; - mcast->query = NULL; - return mcast; } @@ -319,13 +307,8 @@ ipoib_mcast_sendonly_join_complete(int status, IPOIB_GID_ARG(mcast->mcmember.mgid), status); /* Flush out any queued packets */ - while (!skb_queue_empty(&mcast->pkt_queue)) { - struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); - - skb->dev = dev; - - dev_kfree_skb_any(skb); - } + while (!skb_queue_empty(&mcast->pkt_queue)) + dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); /* Clear the busy flag so we try again */ clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); @@ -361,6 +344,8 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) rec.port_gid = priv->local_gid; rec.pkey = cpu_to_be16(priv->pkey); + init_completion(&mcast->done); + ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | @@ -480,6 +465,8 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, rec.traffic_class = priv->broadcast->mcmember.traffic_class; } + init_completion(&mcast->done); + ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, comp_mask, mcast->backoff * 1000, GFP_ATOMIC, ipoib_mcast_join_complete, @@ -919,6 +906,8 @@ void ipoib_mcast_restart_task(void *dev_ptr) ipoib_mcast_start_thread(dev); } +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG + struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev) { struct ipoib_mcast_iter *iter; @@ -928,21 +917,16 @@ struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev) return NULL; iter->dev = dev; - memset(iter->mgid.raw, 0, sizeof iter->mgid); + memset(iter->mgid.raw, 0, 16); if (ipoib_mcast_iter_next(iter)) { - ipoib_mcast_iter_free(iter); + kfree(iter); return NULL; } return iter; } -void ipoib_mcast_iter_free(struct ipoib_mcast_iter *iter) -{ - kfree(iter); -} - int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter) { struct ipoib_dev_priv *priv = netdev_priv(iter->dev); @@ -991,3 +975,5 @@ void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter, *complete = iter->complete; *send_only = iter->send_only; } + +#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 79f59d0563e..e829e10400e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -41,7 +41,6 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_attr *qp_attr; - int attr_mask; int ret; u16 pkey_index; @@ -59,8 +58,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) /* set correct QKey for QP */ qp_attr->qkey = priv->qkey; - attr_mask = IB_QP_QKEY; - ret = ib_modify_qp(priv->qp, qp_attr, attr_mask); + ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY); if (ret) { ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret); goto out; @@ -92,7 +90,7 @@ int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid) return ret; } -int ipoib_qp_create(struct net_device *dev) +int ipoib_init_qp(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; @@ -149,10 +147,11 @@ int ipoib_qp_create(struct net_device *dev) return 0; out_fail: - ib_destroy_qp(priv->qp); - priv->qp = NULL; + qp_attr.qp_state = IB_QPS_RESET; + if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) + ipoib_warn(priv, "Failed to modify QP to RESET state\n"); - return -EINVAL; + return ret; } int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 332d730e60c..d280b341a37 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -113,8 +113,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) priv->parent = ppriv->dev; - if (ipoib_create_debug_file(priv->dev)) - goto debug_failed; + ipoib_create_debug_files(priv->dev); if (ipoib_add_pkey_attr(priv->dev)) goto sysfs_failed; @@ -130,9 +129,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) return 0; sysfs_failed: - ipoib_delete_debug_file(priv->dev); - -debug_failed: + ipoib_delete_debug_files(priv->dev); unregister_netdev(priv->dev); register_failed: diff --git a/drivers/infiniband/ulp/srp/Kbuild b/drivers/infiniband/ulp/srp/Kbuild new file mode 100644 index 00000000000..a16c73c667c --- /dev/null +++ b/drivers/infiniband/ulp/srp/Kbuild @@ -0,0 +1 @@ +obj-$(CONFIG_INFINIBAND_SRP) += ib_srp.o diff --git a/drivers/infiniband/ulp/srp/Kconfig b/drivers/infiniband/ulp/srp/Kconfig new file mode 100644 index 00000000000..8fe3be4e991 --- /dev/null +++ b/drivers/infiniband/ulp/srp/Kconfig @@ -0,0 +1,11 @@ +config INFINIBAND_SRP + tristate "InfiniBand SCSI RDMA Protocol" + depends on INFINIBAND && SCSI + ---help--- + Support for the SCSI RDMA Protocol over InfiniBand. This + allows you to access storage devices that speak SRP over + InfiniBand. + + The SRP protocol is defined by the INCITS T10 technical + committee. See <http://www.t10.org/>. + diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c new file mode 100644 index 00000000000..ee9fe226ae9 --- /dev/null +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -0,0 +1,1704 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_srp.c 3932 2005-11-01 17:19:29Z roland $ + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/string.h> +#include <linux/parser.h> +#include <linux/random.h> + +#include <asm/atomic.h> + +#include <scsi/scsi.h> +#include <scsi/scsi_device.h> +#include <scsi/scsi_dbg.h> +#include <scsi/srp.h> + +#include <rdma/ib_cache.h> + +#include "ib_srp.h" + +#define DRV_NAME "ib_srp" +#define PFX DRV_NAME ": " +#define DRV_VERSION "0.2" +#define DRV_RELDATE "November 1, 2005" + +MODULE_AUTHOR("Roland Dreier"); +MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator " + "v" DRV_VERSION " (" DRV_RELDATE ")"); +MODULE_LICENSE("Dual BSD/GPL"); + +static int topspin_workarounds = 1; + +module_param(topspin_workarounds, int, 0444); +MODULE_PARM_DESC(topspin_workarounds, + "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); + +static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad }; + +static void srp_add_one(struct ib_device *device); +static void srp_remove_one(struct ib_device *device); +static void srp_completion(struct ib_cq *cq, void *target_ptr); +static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); + +static struct ib_client srp_client = { + .name = "srp", + .add = srp_add_one, + .remove = srp_remove_one +}; + +static inline struct srp_target_port *host_to_target(struct Scsi_Host *host) +{ + return (struct srp_target_port *) host->hostdata; +} + +static const char *srp_target_info(struct Scsi_Host *host) +{ + return host_to_target(host)->target_name; +} + +static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size, + gfp_t gfp_mask, + enum dma_data_direction direction) +{ + struct srp_iu *iu; + + iu = kmalloc(sizeof *iu, gfp_mask); + if (!iu) + goto out; + + iu->buf = kzalloc(size, gfp_mask); + if (!iu->buf) + goto out_free_iu; + + iu->dma = dma_map_single(host->dev->dma_device, iu->buf, size, direction); + if (dma_mapping_error(iu->dma)) + goto out_free_buf; + + iu->size = size; + iu->direction = direction; + + return iu; + +out_free_buf: + kfree(iu->buf); +out_free_iu: + kfree(iu); +out: + return NULL; +} + +static void srp_free_iu(struct srp_host *host, struct srp_iu *iu) +{ + if (!iu) + return; + + dma_unmap_single(host->dev->dma_device, iu->dma, iu->size, iu->direction); + kfree(iu->buf); + kfree(iu); +} + +static void srp_qp_event(struct ib_event *event, void *context) +{ + printk(KERN_ERR PFX "QP event %d\n", event->event); +} + +static int srp_init_qp(struct srp_target_port *target, + struct ib_qp *qp) +{ + struct ib_qp_attr *attr; + int ret; + + attr = kmalloc(sizeof *attr, GFP_KERNEL); + if (!attr) + return -ENOMEM; + + ret = ib_find_cached_pkey(target->srp_host->dev, + target->srp_host->port, + be16_to_cpu(target->path.pkey), + &attr->pkey_index); + if (ret) + goto out; + + attr->qp_state = IB_QPS_INIT; + attr->qp_access_flags = (IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE); + attr->port_num = target->srp_host->port; + + ret = ib_modify_qp(qp, attr, + IB_QP_STATE | + IB_QP_PKEY_INDEX | + IB_QP_ACCESS_FLAGS | + IB_QP_PORT); + +out: + kfree(attr); + return ret; +} + +static int srp_create_target_ib(struct srp_target_port *target) +{ + struct ib_qp_init_attr *init_attr; + int ret; + + init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); + if (!init_attr) + return -ENOMEM; + + target->cq = ib_create_cq(target->srp_host->dev, srp_completion, + NULL, target, SRP_CQ_SIZE); + if (IS_ERR(target->cq)) { + ret = PTR_ERR(target->cq); + goto out; + } + + ib_req_notify_cq(target->cq, IB_CQ_NEXT_COMP); + + init_attr->event_handler = srp_qp_event; + init_attr->cap.max_send_wr = SRP_SQ_SIZE; + init_attr->cap.max_recv_wr = SRP_RQ_SIZE; + init_attr->cap.max_recv_sge = 1; + init_attr->cap.max_send_sge = 1; + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr->qp_type = IB_QPT_RC; + init_attr->send_cq = target->cq; + init_attr->recv_cq = target->cq; + + target->qp = ib_create_qp(target->srp_host->pd, init_attr); + if (IS_ERR(target->qp)) { + ret = PTR_ERR(target->qp); + ib_destroy_cq(target->cq); + goto out; + } + + ret = srp_init_qp(target, target->qp); + if (ret) { + ib_destroy_qp(target->qp); + ib_destroy_cq(target->cq); + goto out; + } + +out: + kfree(init_attr); + return ret; +} + +static void srp_free_target_ib(struct srp_target_port *target) +{ + int i; + + ib_destroy_qp(target->qp); + ib_destroy_cq(target->cq); + + for (i = 0; i < SRP_RQ_SIZE; ++i) + srp_free_iu(target->srp_host, target->rx_ring[i]); + for (i = 0; i < SRP_SQ_SIZE + 1; ++i) + srp_free_iu(target->srp_host, target->tx_ring[i]); +} + +static void srp_path_rec_completion(int status, + struct ib_sa_path_rec *pathrec, + void *target_ptr) +{ + struct srp_target_port *target = target_ptr; + + target->status = status; + if (status) + printk(KERN_ERR PFX "Got failed path rec status %d\n", status); + else + target->path = *pathrec; + complete(&target->done); +} + +static int srp_lookup_path(struct srp_target_port *target) +{ + target->path.numb_path = 1; + + init_completion(&target->done); + + target->path_query_id = ib_sa_path_rec_get(target->srp_host->dev, + target->srp_host->port, + &target->path, + IB_SA_PATH_REC_DGID | + IB_SA_PATH_REC_SGID | + IB_SA_PATH_REC_NUMB_PATH | + IB_SA_PATH_REC_PKEY, + SRP_PATH_REC_TIMEOUT_MS, + GFP_KERNEL, + srp_path_rec_completion, + target, &target->path_query); + if (target->path_query_id < 0) + return target->path_query_id; + + wait_for_completion(&target->done); + + if (target->status < 0) + printk(KERN_WARNING PFX "Path record query failed\n"); + + return target->status; +} + +static int srp_send_req(struct srp_target_port *target) +{ + struct { + struct ib_cm_req_param param; + struct srp_login_req priv; + } *req = NULL; + int status; + + req = kzalloc(sizeof *req, GFP_KERNEL); + if (!req) + return -ENOMEM; + + req->param.primary_path = &target->path; + req->param.alternate_path = NULL; + req->param.service_id = target->service_id; + req->param.qp_num = target->qp->qp_num; + req->param.qp_type = target->qp->qp_type; + req->param.private_data = &req->priv; + req->param.private_data_len = sizeof req->priv; + req->param.flow_control = 1; + + get_random_bytes(&req->param.starting_psn, 4); + req->param.starting_psn &= 0xffffff; + + /* + * Pick some arbitrary defaults here; we could make these + * module parameters if anyone cared about setting them. + */ + req->param.responder_resources = 4; + req->param.remote_cm_response_timeout = 20; + req->param.local_cm_response_timeout = 20; + req->param.retry_count = 7; + req->param.rnr_retry_count = 7; + req->param.max_cm_retries = 15; + + req->priv.opcode = SRP_LOGIN_REQ; + req->priv.tag = 0; + req->priv.req_it_iu_len = cpu_to_be32(SRP_MAX_IU_LEN); + req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | + SRP_BUF_FORMAT_INDIRECT); + memcpy(req->priv.initiator_port_id, target->srp_host->initiator_port_id, 16); + /* + * Topspin/Cisco SRP targets will reject our login unless we + * zero out the first 8 bytes of our initiator port ID. The + * second 8 bytes must be our local node GUID, but we always + * use that anyway. + */ + if (topspin_workarounds && !memcmp(&target->ioc_guid, topspin_oui, 3)) { + printk(KERN_DEBUG PFX "Topspin/Cisco initiator port ID workaround " + "activated for target GUID %016llx\n", + (unsigned long long) be64_to_cpu(target->ioc_guid)); + memset(req->priv.initiator_port_id, 0, 8); + } + memcpy(req->priv.target_port_id, &target->id_ext, 8); + memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8); + + status = ib_send_cm_req(target->cm_id, &req->param); + + kfree(req); + + return status; +} + +static void srp_disconnect_target(struct srp_target_port *target) +{ + /* XXX should send SRP_I_LOGOUT request */ + + init_completion(&target->done); + ib_send_cm_dreq(target->cm_id, NULL, 0); + wait_for_completion(&target->done); +} + +static void srp_remove_work(void *target_ptr) +{ + struct srp_target_port *target = target_ptr; + + spin_lock_irq(target->scsi_host->host_lock); + if (target->state != SRP_TARGET_DEAD) { + spin_unlock_irq(target->scsi_host->host_lock); + scsi_host_put(target->scsi_host); + return; + } + target->state = SRP_TARGET_REMOVED; + spin_unlock_irq(target->scsi_host->host_lock); + + down(&target->srp_host->target_mutex); + list_del(&target->list); + up(&target->srp_host->target_mutex); + + scsi_remove_host(target->scsi_host); + ib_destroy_cm_id(target->cm_id); + srp_free_target_ib(target); + scsi_host_put(target->scsi_host); + /* And another put to really free the target port... */ + scsi_host_put(target->scsi_host); +} + +static int srp_connect_target(struct srp_target_port *target) +{ + int ret; + + ret = srp_lookup_path(target); + if (ret) + return ret; + + while (1) { + init_completion(&target->done); + ret = srp_send_req(target); + if (ret) + return ret; + wait_for_completion(&target->done); + + /* + * The CM event handling code will set status to + * SRP_PORT_REDIRECT if we get a port redirect REJ + * back, or SRP_DLID_REDIRECT if we get a lid/qp + * redirect REJ back. + */ + switch (target->status) { + case 0: + return 0; + + case SRP_PORT_REDIRECT: + ret = srp_lookup_path(target); + if (ret) + return ret; + break; + + case SRP_DLID_REDIRECT: + break; + + default: + return target->status; + } + } +} + +static int srp_reconnect_target(struct srp_target_port *target) +{ + struct ib_cm_id *new_cm_id; + struct ib_qp_attr qp_attr; + struct srp_request *req; + struct ib_wc wc; + int ret; + int i; + + spin_lock_irq(target->scsi_host->host_lock); + if (target->state != SRP_TARGET_LIVE) { + spin_unlock_irq(target->scsi_host->host_lock); + return -EAGAIN; + } + target->state = SRP_TARGET_CONNECTING; + spin_unlock_irq(target->scsi_host->host_lock); + + srp_disconnect_target(target); + /* + * Now get a new local CM ID so that we avoid confusing the + * target in case things are really fouled up. + */ + new_cm_id = ib_create_cm_id(target->srp_host->dev, + srp_cm_handler, target); + if (IS_ERR(new_cm_id)) { + ret = PTR_ERR(new_cm_id); + goto err; + } + ib_destroy_cm_id(target->cm_id); + target->cm_id = new_cm_id; + + qp_attr.qp_state = IB_QPS_RESET; + ret = ib_modify_qp(target->qp, &qp_attr, IB_QP_STATE); + if (ret) + goto err; + + ret = srp_init_qp(target, target->qp); + if (ret) + goto err; + + while (ib_poll_cq(target->cq, 1, &wc) > 0) + ; /* nothing */ + + list_for_each_entry(req, &target->req_queue, list) { + req->scmnd->result = DID_RESET << 16; + req->scmnd->scsi_done(req->scmnd); + } + + target->rx_head = 0; + target->tx_head = 0; + target->tx_tail = 0; + target->req_head = 0; + for (i = 0; i < SRP_SQ_SIZE - 1; ++i) + target->req_ring[i].next = i + 1; + target->req_ring[SRP_SQ_SIZE - 1].next = -1; + INIT_LIST_HEAD(&target->req_queue); + + ret = srp_connect_target(target); + if (ret) + goto err; + + spin_lock_irq(target->scsi_host->host_lock); + if (target->state == SRP_TARGET_CONNECTING) { + ret = 0; + target->state = SRP_TARGET_LIVE; + } else + ret = -EAGAIN; + spin_unlock_irq(target->scsi_host->host_lock); + + return ret; + +err: + printk(KERN_ERR PFX "reconnect failed (%d), removing target port.\n", ret); + + /* + * We couldn't reconnect, so kill our target port off. + * However, we have to defer the real removal because we might + * be in the context of the SCSI error handler now, which + * would deadlock if we call scsi_remove_host(). + */ + spin_lock_irq(target->scsi_host->host_lock); + if (target->state == SRP_TARGET_CONNECTING) { + target->state = SRP_TARGET_DEAD; + INIT_WORK(&target->work, srp_remove_work, target); + schedule_work(&target->work); + } + spin_unlock_irq(target->scsi_host->host_lock); + + return ret; +} + +static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, + struct srp_request *req) +{ + struct srp_cmd *cmd = req->cmd->buf; + int len; + u8 fmt; + + if (!scmnd->request_buffer || scmnd->sc_data_direction == DMA_NONE) + return sizeof (struct srp_cmd); + + if (scmnd->sc_data_direction != DMA_FROM_DEVICE && + scmnd->sc_data_direction != DMA_TO_DEVICE) { + printk(KERN_WARNING PFX "Unhandled data direction %d\n", + scmnd->sc_data_direction); + return -EINVAL; + } + + if (scmnd->use_sg) { + struct scatterlist *scat = scmnd->request_buffer; + int n; + int i; + + n = dma_map_sg(target->srp_host->dev->dma_device, + scat, scmnd->use_sg, scmnd->sc_data_direction); + + if (n == 1) { + struct srp_direct_buf *buf = (void *) cmd->add_data; + + fmt = SRP_DATA_DESC_DIRECT; + + buf->va = cpu_to_be64(sg_dma_address(scat)); + buf->key = cpu_to_be32(target->srp_host->mr->rkey); + buf->len = cpu_to_be32(sg_dma_len(scat)); + + len = sizeof (struct srp_cmd) + + sizeof (struct srp_direct_buf); + } else { + struct srp_indirect_buf *buf = (void *) cmd->add_data; + u32 datalen = 0; + + fmt = SRP_DATA_DESC_INDIRECT; + + if (scmnd->sc_data_direction == DMA_TO_DEVICE) + cmd->data_out_desc_cnt = n; + else + cmd->data_in_desc_cnt = n; + + buf->table_desc.va = cpu_to_be64(req->cmd->dma + + sizeof *cmd + + sizeof *buf); + buf->table_desc.key = + cpu_to_be32(target->srp_host->mr->rkey); + buf->table_desc.len = + cpu_to_be32(n * sizeof (struct srp_direct_buf)); + + for (i = 0; i < n; ++i) { + buf->desc_list[i].va = cpu_to_be64(sg_dma_address(&scat[i])); + buf->desc_list[i].key = + cpu_to_be32(target->srp_host->mr->rkey); + buf->desc_list[i].len = cpu_to_be32(sg_dma_len(&scat[i])); + + datalen += sg_dma_len(&scat[i]); + } + + buf->len = cpu_to_be32(datalen); + + len = sizeof (struct srp_cmd) + + sizeof (struct srp_indirect_buf) + + n * sizeof (struct srp_direct_buf); + } + } else { + struct srp_direct_buf *buf = (void *) cmd->add_data; + dma_addr_t dma; + + dma = dma_map_single(target->srp_host->dev->dma_device, + scmnd->request_buffer, scmnd->request_bufflen, + scmnd->sc_data_direction); + if (dma_mapping_error(dma)) { + printk(KERN_WARNING PFX "unable to map %p/%d (dir %d)\n", + scmnd->request_buffer, (int) scmnd->request_bufflen, + scmnd->sc_data_direction); + return -EINVAL; + } + + pci_unmap_addr_set(req, direct_mapping, dma); + + buf->va = cpu_to_be64(dma); + buf->key = cpu_to_be32(target->srp_host->mr->rkey); + buf->len = cpu_to_be32(scmnd->request_bufflen); + + fmt = SRP_DATA_DESC_DIRECT; + + len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf); + } + + if (scmnd->sc_data_direction == DMA_TO_DEVICE) + cmd->buf_fmt = fmt << 4; + else + cmd->buf_fmt = fmt; + + + return len; +} + +static void srp_unmap_data(struct scsi_cmnd *scmnd, + struct srp_target_port *target, + struct srp_request *req) +{ + if (!scmnd->request_buffer || + (scmnd->sc_data_direction != DMA_TO_DEVICE && + scmnd->sc_data_direction != DMA_FROM_DEVICE)) + return; + + if (scmnd->use_sg) + dma_unmap_sg(target->srp_host->dev->dma_device, + (struct scatterlist *) scmnd->request_buffer, + scmnd->use_sg, scmnd->sc_data_direction); + else + dma_unmap_single(target->srp_host->dev->dma_device, + pci_unmap_addr(req, direct_mapping), + scmnd->request_bufflen, + scmnd->sc_data_direction); +} + +static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) +{ + struct srp_request *req; + struct scsi_cmnd *scmnd; + unsigned long flags; + s32 delta; + + delta = (s32) be32_to_cpu(rsp->req_lim_delta); + + spin_lock_irqsave(target->scsi_host->host_lock, flags); + + target->req_lim += delta; + + req = &target->req_ring[rsp->tag & ~SRP_TAG_TSK_MGMT]; + + if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) { + if (be32_to_cpu(rsp->resp_data_len) < 4) + req->tsk_status = -1; + else + req->tsk_status = rsp->data[3]; + complete(&req->done); + } else { + scmnd = req->scmnd; + if (!scmnd) + printk(KERN_ERR "Null scmnd for RSP w/tag %016llx\n", + (unsigned long long) rsp->tag); + scmnd->result = rsp->status; + + if (rsp->flags & SRP_RSP_FLAG_SNSVALID) { + memcpy(scmnd->sense_buffer, rsp->data + + be32_to_cpu(rsp->resp_data_len), + min_t(int, be32_to_cpu(rsp->sense_data_len), + SCSI_SENSE_BUFFERSIZE)); + } + + if (rsp->flags & (SRP_RSP_FLAG_DOOVER | SRP_RSP_FLAG_DOUNDER)) + scmnd->resid = be32_to_cpu(rsp->data_out_res_cnt); + else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER)) + scmnd->resid = be32_to_cpu(rsp->data_in_res_cnt); + + srp_unmap_data(scmnd, target, req); + + if (!req->tsk_mgmt) { + req->scmnd = NULL; + scmnd->host_scribble = (void *) -1L; + scmnd->scsi_done(scmnd); + + list_del(&req->list); + req->next = target->req_head; + target->req_head = rsp->tag & ~SRP_TAG_TSK_MGMT; + } else + req->cmd_done = 1; + } + + spin_unlock_irqrestore(target->scsi_host->host_lock, flags); +} + +static void srp_reconnect_work(void *target_ptr) +{ + struct srp_target_port *target = target_ptr; + + srp_reconnect_target(target); +} + +static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) +{ + struct srp_iu *iu; + u8 opcode; + + iu = target->rx_ring[wc->wr_id & ~SRP_OP_RECV]; + + dma_sync_single_for_cpu(target->srp_host->dev->dma_device, iu->dma, + target->max_ti_iu_len, DMA_FROM_DEVICE); + + opcode = *(u8 *) iu->buf; + + if (0) { + int i; + + printk(KERN_ERR PFX "recv completion, opcode 0x%02x\n", opcode); + + for (i = 0; i < wc->byte_len; ++i) { + if (i % 8 == 0) + printk(KERN_ERR " [%02x] ", i); + printk(" %02x", ((u8 *) iu->buf)[i]); + if ((i + 1) % 8 == 0) + printk("\n"); + } + + if (wc->byte_len % 8) + printk("\n"); + } + + switch (opcode) { + case SRP_RSP: + srp_process_rsp(target, iu->buf); + break; + + case SRP_T_LOGOUT: + /* XXX Handle target logout */ + printk(KERN_WARNING PFX "Got target logout request\n"); + break; + + default: + printk(KERN_WARNING PFX "Unhandled SRP opcode 0x%02x\n", opcode); + break; + } + + dma_sync_single_for_device(target->srp_host->dev->dma_device, iu->dma, + target->max_ti_iu_len, DMA_FROM_DEVICE); +} + +static void srp_completion(struct ib_cq *cq, void *target_ptr) +{ + struct srp_target_port *target = target_ptr; + struct ib_wc wc; + unsigned long flags; + + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + while (ib_poll_cq(cq, 1, &wc) > 0) { + if (wc.status) { + printk(KERN_ERR PFX "failed %s status %d\n", + wc.wr_id & SRP_OP_RECV ? "receive" : "send", + wc.status); + spin_lock_irqsave(target->scsi_host->host_lock, flags); + if (target->state == SRP_TARGET_LIVE) + schedule_work(&target->work); + spin_unlock_irqrestore(target->scsi_host->host_lock, flags); + break; + } + + if (wc.wr_id & SRP_OP_RECV) + srp_handle_recv(target, &wc); + else + ++target->tx_tail; + } +} + +static int __srp_post_recv(struct srp_target_port *target) +{ + struct srp_iu *iu; + struct ib_sge list; + struct ib_recv_wr wr, *bad_wr; + unsigned int next; + int ret; + + next = target->rx_head & (SRP_RQ_SIZE - 1); + wr.wr_id = next | SRP_OP_RECV; + iu = target->rx_ring[next]; + + list.addr = iu->dma; + list.length = iu->size; + list.lkey = target->srp_host->mr->lkey; + + wr.next = NULL; + wr.sg_list = &list; + wr.num_sge = 1; + + ret = ib_post_recv(target->qp, &wr, &bad_wr); + if (!ret) + ++target->rx_head; + + return ret; +} + +static int srp_post_recv(struct srp_target_port *target) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(target->scsi_host->host_lock, flags); + ret = __srp_post_recv(target); + spin_unlock_irqrestore(target->scsi_host->host_lock, flags); + + return ret; +} + +/* + * Must be called with target->scsi_host->host_lock held to protect + * req_lim and tx_head. Lock cannot be dropped between call here and + * call to __srp_post_send(). + */ +static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target) +{ + if (target->tx_head - target->tx_tail >= SRP_SQ_SIZE) + return NULL; + + if (unlikely(target->req_lim < 1)) { + if (printk_ratelimit()) + printk(KERN_DEBUG PFX "Target has req_lim %d\n", + target->req_lim); + return NULL; + } + + return target->tx_ring[target->tx_head & SRP_SQ_SIZE]; +} + +/* + * Must be called with target->scsi_host->host_lock held to protect + * req_lim and tx_head. + */ +static int __srp_post_send(struct srp_target_port *target, + struct srp_iu *iu, int len) +{ + struct ib_sge list; + struct ib_send_wr wr, *bad_wr; + int ret = 0; + + list.addr = iu->dma; + list.length = len; + list.lkey = target->srp_host->mr->lkey; + + wr.next = NULL; + wr.wr_id = target->tx_head & SRP_SQ_SIZE; + wr.sg_list = &list; + wr.num_sge = 1; + wr.opcode = IB_WR_SEND; + wr.send_flags = IB_SEND_SIGNALED; + + ret = ib_post_send(target->qp, &wr, &bad_wr); + + if (!ret) { + ++target->tx_head; + --target->req_lim; + } + + return ret; +} + +static int srp_queuecommand(struct scsi_cmnd *scmnd, + void (*done)(struct scsi_cmnd *)) +{ + struct srp_target_port *target = host_to_target(scmnd->device->host); + struct srp_request *req; + struct srp_iu *iu; + struct srp_cmd *cmd; + long req_index; + int len; + + if (target->state == SRP_TARGET_CONNECTING) + goto err; + + if (target->state == SRP_TARGET_DEAD || + target->state == SRP_TARGET_REMOVED) { + scmnd->result = DID_BAD_TARGET << 16; + done(scmnd); + return 0; + } + + iu = __srp_get_tx_iu(target); + if (!iu) + goto err; + + dma_sync_single_for_cpu(target->srp_host->dev->dma_device, iu->dma, + SRP_MAX_IU_LEN, DMA_TO_DEVICE); + + req_index = target->req_head; + + scmnd->scsi_done = done; + scmnd->result = 0; + scmnd->host_scribble = (void *) req_index; + + cmd = iu->buf; + memset(cmd, 0, sizeof *cmd); + + cmd->opcode = SRP_CMD; + cmd->lun = cpu_to_be64((u64) scmnd->device->lun << 48); + cmd->tag = req_index; + memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len); + + req = &target->req_ring[req_index]; + + req->scmnd = scmnd; + req->cmd = iu; + req->cmd_done = 0; + req->tsk_mgmt = NULL; + + len = srp_map_data(scmnd, target, req); + if (len < 0) { + printk(KERN_ERR PFX "Failed to map data\n"); + goto err; + } + + if (__srp_post_recv(target)) { + printk(KERN_ERR PFX "Recv failed\n"); + goto err_unmap; + } + + dma_sync_single_for_device(target->srp_host->dev->dma_device, iu->dma, + SRP_MAX_IU_LEN, DMA_TO_DEVICE); + + if (__srp_post_send(target, iu, len)) { + printk(KERN_ERR PFX "Send failed\n"); + goto err_unmap; + } + + target->req_head = req->next; + list_add_tail(&req->list, &target->req_queue); + + return 0; + +err_unmap: + srp_unmap_data(scmnd, target, req); + +err: + return SCSI_MLQUEUE_HOST_BUSY; +} + +static int srp_alloc_iu_bufs(struct srp_target_port *target) +{ + int i; + + for (i = 0; i < SRP_RQ_SIZE; ++i) { + target->rx_ring[i] = srp_alloc_iu(target->srp_host, + target->max_ti_iu_len, + GFP_KERNEL, DMA_FROM_DEVICE); + if (!target->rx_ring[i]) + goto err; + } + + for (i = 0; i < SRP_SQ_SIZE + 1; ++i) { + target->tx_ring[i] = srp_alloc_iu(target->srp_host, + SRP_MAX_IU_LEN, + GFP_KERNEL, DMA_TO_DEVICE); + if (!target->tx_ring[i]) + goto err; + } + + return 0; + +err: + for (i = 0; i < SRP_RQ_SIZE; ++i) { + srp_free_iu(target->srp_host, target->rx_ring[i]); + target->rx_ring[i] = NULL; + } + + for (i = 0; i < SRP_SQ_SIZE + 1; ++i) { + srp_free_iu(target->srp_host, target->tx_ring[i]); + target->tx_ring[i] = NULL; + } + + return -ENOMEM; +} + +static void srp_cm_rej_handler(struct ib_cm_id *cm_id, + struct ib_cm_event *event, + struct srp_target_port *target) +{ + struct ib_class_port_info *cpi; + int opcode; + + switch (event->param.rej_rcvd.reason) { + case IB_CM_REJ_PORT_CM_REDIRECT: + cpi = event->param.rej_rcvd.ari; + target->path.dlid = cpi->redirect_lid; + target->path.pkey = cpi->redirect_pkey; + cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff; + memcpy(target->path.dgid.raw, cpi->redirect_gid, 16); + + target->status = target->path.dlid ? + SRP_DLID_REDIRECT : SRP_PORT_REDIRECT; + break; + + case IB_CM_REJ_PORT_REDIRECT: + if (topspin_workarounds && + !memcmp(&target->ioc_guid, topspin_oui, 3)) { + /* + * Topspin/Cisco SRP gateways incorrectly send + * reject reason code 25 when they mean 24 + * (port redirect). + */ + memcpy(target->path.dgid.raw, + event->param.rej_rcvd.ari, 16); + + printk(KERN_DEBUG PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n", + (unsigned long long) be64_to_cpu(target->path.dgid.global.subnet_prefix), + (unsigned long long) be64_to_cpu(target->path.dgid.global.interface_id)); + + target->status = SRP_PORT_REDIRECT; + } else { + printk(KERN_WARNING " REJ reason: IB_CM_REJ_PORT_REDIRECT\n"); + target->status = -ECONNRESET; + } + break; + + case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID: + printk(KERN_WARNING " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n"); + target->status = -ECONNRESET; + break; + + case IB_CM_REJ_CONSUMER_DEFINED: + opcode = *(u8 *) event->private_data; + if (opcode == SRP_LOGIN_REJ) { + struct srp_login_rej *rej = event->private_data; + u32 reason = be32_to_cpu(rej->reason); + + if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE) + printk(KERN_WARNING PFX + "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); + else + printk(KERN_WARNING PFX + "SRP LOGIN REJECTED, reason 0x%08x\n", reason); + } else + printk(KERN_WARNING " REJ reason: IB_CM_REJ_CONSUMER_DEFINED," + " opcode 0x%02x\n", opcode); + target->status = -ECONNRESET; + break; + + default: + printk(KERN_WARNING " REJ reason 0x%x\n", + event->param.rej_rcvd.reason); + target->status = -ECONNRESET; + } +} + +static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +{ + struct srp_target_port *target = cm_id->context; + struct ib_qp_attr *qp_attr = NULL; + int attr_mask = 0; + int comp = 0; + int opcode = 0; + + switch (event->event) { + case IB_CM_REQ_ERROR: + printk(KERN_DEBUG PFX "Sending CM REQ failed\n"); + comp = 1; + target->status = -ECONNRESET; + break; + + case IB_CM_REP_RECEIVED: + comp = 1; + opcode = *(u8 *) event->private_data; + + if (opcode == SRP_LOGIN_RSP) { + struct srp_login_rsp *rsp = event->private_data; + + target->max_ti_iu_len = be32_to_cpu(rsp->max_ti_iu_len); + target->req_lim = be32_to_cpu(rsp->req_lim_delta); + + target->scsi_host->can_queue = min(target->req_lim, + target->scsi_host->can_queue); + } else { + printk(KERN_WARNING PFX "Unhandled RSP opcode %#x\n", opcode); + target->status = -ECONNRESET; + break; + } + + target->status = srp_alloc_iu_bufs(target); + if (target->status) + break; + + qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); + if (!qp_attr) { + target->status = -ENOMEM; + break; + } + + qp_attr->qp_state = IB_QPS_RTR; + target->status = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); + if (target->status) + break; + + target->status = ib_modify_qp(target->qp, qp_attr, attr_mask); + if (target->status) + break; + + target->status = srp_post_recv(target); + if (target->status) + break; + + qp_attr->qp_state = IB_QPS_RTS; + target->status = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); + if (target->status) + break; + + target->status = ib_modify_qp(target->qp, qp_attr, attr_mask); + if (target->status) + break; + + target->status = ib_send_cm_rtu(cm_id, NULL, 0); + if (target->status) + break; + + break; + + case IB_CM_REJ_RECEIVED: + printk(KERN_DEBUG PFX "REJ received\n"); + comp = 1; + + srp_cm_rej_handler(cm_id, event, target); + break; + + case IB_CM_MRA_RECEIVED: + printk(KERN_ERR PFX "MRA received\n"); + break; + + case IB_CM_DREP_RECEIVED: + break; + + case IB_CM_TIMEWAIT_EXIT: + printk(KERN_ERR PFX "connection closed\n"); + + comp = 1; + target->status = 0; + break; + + default: + printk(KERN_WARNING PFX "Unhandled CM event %d\n", event->event); + break; + } + + if (comp) + complete(&target->done); + + kfree(qp_attr); + + return 0; +} + +static int srp_send_tsk_mgmt(struct scsi_cmnd *scmnd, u8 func) +{ + struct srp_target_port *target = host_to_target(scmnd->device->host); + struct srp_request *req; + struct srp_iu *iu; + struct srp_tsk_mgmt *tsk_mgmt; + int req_index; + int ret = FAILED; + + spin_lock_irq(target->scsi_host->host_lock); + + if (scmnd->host_scribble == (void *) -1L) + goto out; + + req_index = (long) scmnd->host_scribble; + printk(KERN_ERR "Abort for req_index %d\n", req_index); + + req = &target->req_ring[req_index]; + init_completion(&req->done); + + iu = __srp_get_tx_iu(target); + if (!iu) + goto out; + + tsk_mgmt = iu->buf; + memset(tsk_mgmt, 0, sizeof *tsk_mgmt); + + tsk_mgmt->opcode = SRP_TSK_MGMT; + tsk_mgmt->lun = cpu_to_be64((u64) scmnd->device->lun << 48); + tsk_mgmt->tag = req_index | SRP_TAG_TSK_MGMT; + tsk_mgmt->tsk_mgmt_func = func; + tsk_mgmt->task_tag = req_index; + + if (__srp_post_send(target, iu, sizeof *tsk_mgmt)) + goto out; + + req->tsk_mgmt = iu; + + spin_unlock_irq(target->scsi_host->host_lock); + if (!wait_for_completion_timeout(&req->done, + msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS))) + return FAILED; + spin_lock_irq(target->scsi_host->host_lock); + + if (req->cmd_done) { + list_del(&req->list); + req->next = target->req_head; + target->req_head = req_index; + + scmnd->scsi_done(scmnd); + } else if (!req->tsk_status) { + scmnd->result = DID_ABORT << 16; + ret = SUCCESS; + } + +out: + spin_unlock_irq(target->scsi_host->host_lock); + return ret; +} + +static int srp_abort(struct scsi_cmnd *scmnd) +{ + printk(KERN_ERR "SRP abort called\n"); + + return srp_send_tsk_mgmt(scmnd, SRP_TSK_ABORT_TASK); +} + +static int srp_reset_device(struct scsi_cmnd *scmnd) +{ + printk(KERN_ERR "SRP reset_device called\n"); + + return srp_send_tsk_mgmt(scmnd, SRP_TSK_LUN_RESET); +} + +static int srp_reset_host(struct scsi_cmnd *scmnd) +{ + struct srp_target_port *target = host_to_target(scmnd->device->host); + int ret = FAILED; + + printk(KERN_ERR PFX "SRP reset_host called\n"); + + if (!srp_reconnect_target(target)) + ret = SUCCESS; + + return ret; +} + +static struct scsi_host_template srp_template = { + .module = THIS_MODULE, + .name = DRV_NAME, + .info = srp_target_info, + .queuecommand = srp_queuecommand, + .eh_abort_handler = srp_abort, + .eh_device_reset_handler = srp_reset_device, + .eh_host_reset_handler = srp_reset_host, + .can_queue = SRP_SQ_SIZE, + .this_id = -1, + .sg_tablesize = SRP_MAX_INDIRECT, + .cmd_per_lun = SRP_SQ_SIZE, + .use_clustering = ENABLE_CLUSTERING +}; + +static int srp_add_target(struct srp_host *host, struct srp_target_port *target) +{ + sprintf(target->target_name, "SRP.T10:%016llX", + (unsigned long long) be64_to_cpu(target->id_ext)); + + if (scsi_add_host(target->scsi_host, host->dev->dma_device)) + return -ENODEV; + + down(&host->target_mutex); + list_add_tail(&target->list, &host->target_list); + up(&host->target_mutex); + + target->state = SRP_TARGET_LIVE; + + /* XXX: are we supposed to have a definition of SCAN_WILD_CARD ?? */ + scsi_scan_target(&target->scsi_host->shost_gendev, + 0, target->scsi_id, ~0, 0); + + return 0; +} + +static void srp_release_class_dev(struct class_device *class_dev) +{ + struct srp_host *host = + container_of(class_dev, struct srp_host, class_dev); + + complete(&host->released); +} + +static struct class srp_class = { + .name = "infiniband_srp", + .release = srp_release_class_dev +}; + +/* + * Target ports are added by writing + * + * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>, + * pkey=<P_Key>,service_id=<service ID> + * + * to the add_target sysfs attribute. + */ +enum { + SRP_OPT_ERR = 0, + SRP_OPT_ID_EXT = 1 << 0, + SRP_OPT_IOC_GUID = 1 << 1, + SRP_OPT_DGID = 1 << 2, + SRP_OPT_PKEY = 1 << 3, + SRP_OPT_SERVICE_ID = 1 << 4, + SRP_OPT_MAX_SECT = 1 << 5, + SRP_OPT_ALL = (SRP_OPT_ID_EXT | + SRP_OPT_IOC_GUID | + SRP_OPT_DGID | + SRP_OPT_PKEY | + SRP_OPT_SERVICE_ID), +}; + +static match_table_t srp_opt_tokens = { + { SRP_OPT_ID_EXT, "id_ext=%s" }, + { SRP_OPT_IOC_GUID, "ioc_guid=%s" }, + { SRP_OPT_DGID, "dgid=%s" }, + { SRP_OPT_PKEY, "pkey=%x" }, + { SRP_OPT_SERVICE_ID, "service_id=%s" }, + { SRP_OPT_MAX_SECT, "max_sect=%d" }, + { SRP_OPT_ERR, NULL } +}; + +static int srp_parse_options(const char *buf, struct srp_target_port *target) +{ + char *options, *sep_opt; + char *p; + char dgid[3]; + substring_t args[MAX_OPT_ARGS]; + int opt_mask = 0; + int token; + int ret = -EINVAL; + int i; + + options = kstrdup(buf, GFP_KERNEL); + if (!options) + return -ENOMEM; + + sep_opt = options; + while ((p = strsep(&sep_opt, ",")) != NULL) { + if (!*p) + continue; + + token = match_token(p, srp_opt_tokens, args); + opt_mask |= token; + + switch (token) { + case SRP_OPT_ID_EXT: + p = match_strdup(args); + target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16)); + kfree(p); + break; + + case SRP_OPT_IOC_GUID: + p = match_strdup(args); + target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16)); + kfree(p); + break; + + case SRP_OPT_DGID: + p = match_strdup(args); + if (strlen(p) != 32) { + printk(KERN_WARNING PFX "bad dest GID parameter '%s'\n", p); + goto out; + } + + for (i = 0; i < 16; ++i) { + strlcpy(dgid, p + i * 2, 3); + target->path.dgid.raw[i] = simple_strtoul(dgid, NULL, 16); + } + break; + + case SRP_OPT_PKEY: + if (match_hex(args, &token)) { + printk(KERN_WARNING PFX "bad P_Key parameter '%s'\n", p); + goto out; + } + target->path.pkey = cpu_to_be16(token); + break; + + case SRP_OPT_SERVICE_ID: + p = match_strdup(args); + target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16)); + kfree(p); + break; + + case SRP_OPT_MAX_SECT: + if (match_int(args, &token)) { + printk(KERN_WARNING PFX "bad max sect parameter '%s'\n", p); + goto out; + } + target->scsi_host->max_sectors = token; + break; + + default: + printk(KERN_WARNING PFX "unknown parameter or missing value " + "'%s' in target creation request\n", p); + goto out; + } + } + + if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL) + ret = 0; + else + for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i) + if ((srp_opt_tokens[i].token & SRP_OPT_ALL) && + !(srp_opt_tokens[i].token & opt_mask)) + printk(KERN_WARNING PFX "target creation request is " + "missing parameter '%s'\n", + srp_opt_tokens[i].pattern); + +out: + kfree(options); + return ret; +} + +static ssize_t srp_create_target(struct class_device *class_dev, + const char *buf, size_t count) +{ + struct srp_host *host = + container_of(class_dev, struct srp_host, class_dev); + struct Scsi_Host *target_host; + struct srp_target_port *target; + int ret; + int i; + + target_host = scsi_host_alloc(&srp_template, + sizeof (struct srp_target_port)); + if (!target_host) + return -ENOMEM; + + target_host->max_lun = SRP_MAX_LUN; + + target = host_to_target(target_host); + memset(target, 0, sizeof *target); + + target->scsi_host = target_host; + target->srp_host = host; + + INIT_WORK(&target->work, srp_reconnect_work, target); + + for (i = 0; i < SRP_SQ_SIZE - 1; ++i) + target->req_ring[i].next = i + 1; + target->req_ring[SRP_SQ_SIZE - 1].next = -1; + INIT_LIST_HEAD(&target->req_queue); + + ret = srp_parse_options(buf, target); + if (ret) + goto err; + + ib_get_cached_gid(host->dev, host->port, 0, &target->path.sgid); + + printk(KERN_DEBUG PFX "new target: id_ext %016llx ioc_guid %016llx pkey %04x " + "service_id %016llx dgid %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + (unsigned long long) be64_to_cpu(target->id_ext), + (unsigned long long) be64_to_cpu(target->ioc_guid), + be16_to_cpu(target->path.pkey), + (unsigned long long) be64_to_cpu(target->service_id), + (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[0]), + (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[2]), + (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[4]), + (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[6]), + (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[8]), + (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[10]), + (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[12]), + (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[14])); + + ret = srp_create_target_ib(target); + if (ret) + goto err; + + target->cm_id = ib_create_cm_id(host->dev, srp_cm_handler, target); + if (IS_ERR(target->cm_id)) { + ret = PTR_ERR(target->cm_id); + goto err_free; + } + + ret = srp_connect_target(target); + if (ret) { + printk(KERN_ERR PFX "Connection failed\n"); + goto err_cm_id; + } + + ret = srp_add_target(host, target); + if (ret) + goto err_disconnect; + + return count; + +err_disconnect: + srp_disconnect_target(target); + +err_cm_id: + ib_destroy_cm_id(target->cm_id); + +err_free: + srp_free_target_ib(target); + +err: + scsi_host_put(target_host); + + return ret; +} + +static CLASS_DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target); + +static ssize_t show_ibdev(struct class_device *class_dev, char *buf) +{ + struct srp_host *host = + container_of(class_dev, struct srp_host, class_dev); + + return sprintf(buf, "%s\n", host->dev->name); +} + +static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); + +static ssize_t show_port(struct class_device *class_dev, char *buf) +{ + struct srp_host *host = + container_of(class_dev, struct srp_host, class_dev); + + return sprintf(buf, "%d\n", host->port); +} + +static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL); + +static struct srp_host *srp_add_port(struct ib_device *device, + __be64 node_guid, u8 port) +{ + struct srp_host *host; + + host = kzalloc(sizeof *host, GFP_KERNEL); + if (!host) + return NULL; + + INIT_LIST_HEAD(&host->target_list); + init_MUTEX(&host->target_mutex); + init_completion(&host->released); + host->dev = device; + host->port = port; + + host->initiator_port_id[7] = port; + memcpy(host->initiator_port_id + 8, &node_guid, 8); + + host->pd = ib_alloc_pd(device); + if (IS_ERR(host->pd)) + goto err_free; + + host->mr = ib_get_dma_mr(host->pd, + IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE); + if (IS_ERR(host->mr)) + goto err_pd; + + host->class_dev.class = &srp_class; + host->class_dev.dev = device->dma_device; + snprintf(host->class_dev.class_id, BUS_ID_SIZE, "srp-%s-%d", + device->name, port); + + if (class_device_register(&host->class_dev)) + goto err_mr; + if (class_device_create_file(&host->class_dev, &class_device_attr_add_target)) + goto err_class; + if (class_device_create_file(&host->class_dev, &class_device_attr_ibdev)) + goto err_class; + if (class_device_create_file(&host->class_dev, &class_device_attr_port)) + goto err_class; + + return host; + +err_class: + class_device_unregister(&host->class_dev); + +err_mr: + ib_dereg_mr(host->mr); + +err_pd: + ib_dealloc_pd(host->pd); + +err_free: + kfree(host); + + return NULL; +} + +static void srp_add_one(struct ib_device *device) +{ + struct list_head *dev_list; + struct srp_host *host; + struct ib_device_attr *dev_attr; + int s, e, p; + + dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL); + if (!dev_attr) + return; + + if (ib_query_device(device, dev_attr)) { + printk(KERN_WARNING PFX "Couldn't query node GUID for %s.\n", + device->name); + goto out; + } + + dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); + if (!dev_list) + goto out; + + INIT_LIST_HEAD(dev_list); + + if (device->node_type == IB_NODE_SWITCH) { + s = 0; + e = 0; + } else { + s = 1; + e = device->phys_port_cnt; + } + + for (p = s; p <= e; ++p) { + host = srp_add_port(device, dev_attr->node_guid, p); + if (host) + list_add_tail(&host->list, dev_list); + } + + ib_set_client_data(device, &srp_client, dev_list); + +out: + kfree(dev_attr); +} + +static void srp_remove_one(struct ib_device *device) +{ + struct list_head *dev_list; + struct srp_host *host, *tmp_host; + LIST_HEAD(target_list); + struct srp_target_port *target, *tmp_target; + unsigned long flags; + + dev_list = ib_get_client_data(device, &srp_client); + + list_for_each_entry_safe(host, tmp_host, dev_list, list) { + class_device_unregister(&host->class_dev); + /* + * Wait for the sysfs entry to go away, so that no new + * target ports can be created. + */ + wait_for_completion(&host->released); + + /* + * Mark all target ports as removed, so we stop queueing + * commands and don't try to reconnect. + */ + down(&host->target_mutex); + list_for_each_entry_safe(target, tmp_target, + &host->target_list, list) { + spin_lock_irqsave(target->scsi_host->host_lock, flags); + if (target->state != SRP_TARGET_REMOVED) + target->state = SRP_TARGET_REMOVED; + spin_unlock_irqrestore(target->scsi_host->host_lock, flags); + } + up(&host->target_mutex); + + /* + * Wait for any reconnection tasks that may have + * started before we marked our target ports as + * removed, and any target port removal tasks. + */ + flush_scheduled_work(); + + list_for_each_entry_safe(target, tmp_target, + &host->target_list, list) { + scsi_remove_host(target->scsi_host); + srp_disconnect_target(target); + ib_destroy_cm_id(target->cm_id); + srp_free_target_ib(target); + scsi_host_put(target->scsi_host); + } + + ib_dereg_mr(host->mr); + ib_dealloc_pd(host->pd); + kfree(host); + } + + kfree(dev_list); +} + +static int __init srp_init_module(void) +{ + int ret; + + ret = class_register(&srp_class); + if (ret) { + printk(KERN_ERR PFX "couldn't register class infiniband_srp\n"); + return ret; + } + + ret = ib_register_client(&srp_client); + if (ret) { + printk(KERN_ERR PFX "couldn't register IB client\n"); + class_unregister(&srp_class); + return ret; + } + + return 0; +} + +static void __exit srp_cleanup_module(void) +{ + ib_unregister_client(&srp_client); + class_unregister(&srp_class); +} + +module_init(srp_init_module); +module_exit(srp_cleanup_module); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h new file mode 100644 index 00000000000..b564f18caf7 --- /dev/null +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_srp.h 3932 2005-11-01 17:19:29Z roland $ + */ + +#ifndef IB_SRP_H +#define IB_SRP_H + +#include <linux/types.h> +#include <linux/list.h> + +#include <asm/semaphore.h> + +#include <scsi/scsi_host.h> +#include <scsi/scsi_cmnd.h> + +#include <rdma/ib_verbs.h> +#include <rdma/ib_sa.h> +#include <rdma/ib_cm.h> + +enum { + SRP_PATH_REC_TIMEOUT_MS = 1000, + SRP_ABORT_TIMEOUT_MS = 5000, + + SRP_PORT_REDIRECT = 1, + SRP_DLID_REDIRECT = 2, + + SRP_MAX_LUN = 512, + SRP_MAX_IU_LEN = 256, + + SRP_RQ_SHIFT = 6, + SRP_RQ_SIZE = 1 << SRP_RQ_SHIFT, + SRP_SQ_SIZE = SRP_RQ_SIZE - 1, + SRP_CQ_SIZE = SRP_SQ_SIZE + SRP_RQ_SIZE, + + SRP_TAG_TSK_MGMT = 1 << (SRP_RQ_SHIFT + 1) +}; + +#define SRP_OP_RECV (1 << 31) +#define SRP_MAX_INDIRECT ((SRP_MAX_IU_LEN - \ + sizeof (struct srp_cmd) - \ + sizeof (struct srp_indirect_buf)) / 16) + +enum srp_target_state { + SRP_TARGET_LIVE, + SRP_TARGET_CONNECTING, + SRP_TARGET_DEAD, + SRP_TARGET_REMOVED +}; + +struct srp_host { + u8 initiator_port_id[16]; + struct ib_device *dev; + u8 port; + struct ib_pd *pd; + struct ib_mr *mr; + struct class_device class_dev; + struct list_head target_list; + struct semaphore target_mutex; + struct completion released; + struct list_head list; +}; + +struct srp_request { + struct list_head list; + struct scsi_cmnd *scmnd; + struct srp_iu *cmd; + struct srp_iu *tsk_mgmt; + DECLARE_PCI_UNMAP_ADDR(direct_mapping) + struct completion done; + short next; + u8 cmd_done; + u8 tsk_status; +}; + +struct srp_target_port { + __be64 id_ext; + __be64 ioc_guid; + __be64 service_id; + struct srp_host *srp_host; + struct Scsi_Host *scsi_host; + char target_name[32]; + unsigned int scsi_id; + + struct ib_sa_path_rec path; + struct ib_sa_query *path_query; + int path_query_id; + + struct ib_cm_id *cm_id; + struct ib_cq *cq; + struct ib_qp *qp; + + int max_ti_iu_len; + s32 req_lim; + + unsigned rx_head; + struct srp_iu *rx_ring[SRP_RQ_SIZE]; + + unsigned tx_head; + unsigned tx_tail; + struct srp_iu *tx_ring[SRP_SQ_SIZE + 1]; + + int req_head; + struct list_head req_queue; + struct srp_request req_ring[SRP_SQ_SIZE]; + + struct work_struct work; + + struct list_head list; + struct completion done; + int status; + enum srp_target_state state; +}; + +struct srp_iu { + dma_addr_t dma; + void *buf; + size_t size; + enum dma_data_direction direction; +}; + +#endif /* IB_SRP_H */ |