diff options
author | Linus Torvalds <torvalds@g5.osdl.org> | 2006-09-22 15:47:06 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-09-22 15:47:06 -0700 |
commit | db392219c5f572610645696e3672f6ea38783a65 (patch) | |
tree | 240878f6367e528840cc4fe92f6b3a251651572f /drivers/infiniband/core | |
parent | 3e2ab46df697048e222cdc046a2fa97149ca32ba (diff) | |
parent | 9cd330d36b32ed48d49561b165842db20bd153cc (diff) |
Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband
* 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband: (65 commits)
IB: Fix typo in kerneldoc for ib_set_client_data()
IPoIB: Add some likely/unlikely annotations in hot path
IPoIB: Remove unused include of vmalloc.h
IPoIB: Rejoin all multicast groups after a port event
IPoIB: Create MCGs with all attributes required by RFC
IB/sa: fix ib_sa_selector names
IB/iser: INFINIBAND_ISER depends on INET
IB/mthca: Simplify calls to mthca_cq_clean()
RDMA/cma: Document rdma_accept() error handling
IB/mthca: Recover from catastrophic errors
RDMA/cma: Document rdma_destroy_id() function
IB/cm: Do not track remote QPN in timewait state
IB/sa: Require SA registration
IPoIB: Refactor completion handling
IB/iser: Do not use FMR for a single dma entry sg
IB/iser: fix some debug prints
IB/iser: make FMR "page size" be 4K and not PAGE_SIZE
IB/iser: Limit the max size of a scsi command
IB/iser: fix a check of SG alignment for RDMA
RDMA/cma: Protect against adding device during destruction
...
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r-- | drivers/infiniband/core/Makefile | 4 | ||||
-rw-r--r-- | drivers/infiniband/core/addr.c | 22 | ||||
-rw-r--r-- | drivers/infiniband/core/cache.c | 5 | ||||
-rw-r--r-- | drivers/infiniband/core/cm.c | 66 | ||||
-rw-r--r-- | drivers/infiniband/core/cma.c | 403 | ||||
-rw-r--r-- | drivers/infiniband/core/device.c | 6 | ||||
-rw-r--r-- | drivers/infiniband/core/iwcm.c | 1019 | ||||
-rw-r--r-- | drivers/infiniband/core/iwcm.h | 62 | ||||
-rw-r--r-- | drivers/infiniband/core/mad.c | 19 | ||||
-rw-r--r-- | drivers/infiniband/core/mad_priv.h | 1 | ||||
-rw-r--r-- | drivers/infiniband/core/mad_rmpp.c | 94 | ||||
-rw-r--r-- | drivers/infiniband/core/sa_query.c | 67 | ||||
-rw-r--r-- | drivers/infiniband/core/smi.c | 16 | ||||
-rw-r--r-- | drivers/infiniband/core/sysfs.c | 13 | ||||
-rw-r--r-- | drivers/infiniband/core/ucm.c | 9 | ||||
-rw-r--r-- | drivers/infiniband/core/user_mad.c | 7 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_cmd.c | 64 | ||||
-rw-r--r-- | drivers/infiniband/core/verbs.c | 21 |
18 files changed, 1731 insertions, 167 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 68e73ec2d1f..163d991eb8c 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -1,7 +1,7 @@ infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ - ib_cm.o $(infiniband-y) + ib_cm.o iw_cm.o $(infiniband-y) obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o @@ -14,6 +14,8 @@ ib_sa-y := sa_query.o ib_cm-y := cm.o +iw_cm-y := iwcm.o + rdma_cm-y := cma.o ib_addr-y := addr.o diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 1205e802782..9cbf09e2052 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -61,12 +61,15 @@ static LIST_HEAD(req_list); static DECLARE_WORK(work, process_req, NULL); static struct workqueue_struct *addr_wq; -static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, - unsigned char *dst_dev_addr) +int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, + const unsigned char *dst_dev_addr) { switch (dev->type) { case ARPHRD_INFINIBAND: - dev_addr->dev_type = IB_NODE_CA; + dev_addr->dev_type = RDMA_NODE_IB_CA; + break; + case ARPHRD_ETHER: + dev_addr->dev_type = RDMA_NODE_RNIC; break; default: return -EADDRNOTAVAIL; @@ -78,6 +81,7 @@ static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); return 0; } +EXPORT_SYMBOL(rdma_copy_addr); int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) { @@ -89,7 +93,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) if (!dev) return -EADDRNOTAVAIL; - ret = copy_addr(dev_addr, dev, NULL); + ret = rdma_copy_addr(dev_addr, dev, NULL); dev_put(dev); return ret; } @@ -161,7 +165,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in, /* If the device does ARP internally, return 'done' */ if (rt->idev->dev->flags & IFF_NOARP) { - copy_addr(addr, rt->idev->dev, NULL); + rdma_copy_addr(addr, rt->idev->dev, NULL); goto put; } @@ -181,7 +185,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in, src_in->sin_addr.s_addr = rt->rt_src; } - ret = copy_addr(addr, neigh->dev, neigh->ha); + ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); release: neigh_release(neigh); put: @@ -245,7 +249,7 @@ static int addr_resolve_local(struct sockaddr_in *src_in, if (ZERONET(src_ip)) { src_in->sin_family = dst_in->sin_family; src_in->sin_addr.s_addr = dst_ip; - ret = copy_addr(addr, dev, dev->dev_addr); + ret = rdma_copy_addr(addr, dev, dev->dev_addr); } else if (LOOPBACK(src_ip)) { ret = rdma_translate_ip((struct sockaddr *)dst_in, addr); if (!ret) @@ -327,10 +331,10 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr) } EXPORT_SYMBOL(rdma_addr_cancel); -static int netevent_callback(struct notifier_block *self, unsigned long event, +static int netevent_callback(struct notifier_block *self, unsigned long event, void *ctx) { - if (event == NETEVENT_NEIGH_UPDATE) { + if (event == NETEVENT_NEIGH_UPDATE) { struct neighbour *neigh = ctx; if (neigh->dev->type == ARPHRD_INFINIBAND && diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 75313ade2e0..20e9f64e67a 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -62,12 +62,13 @@ struct ib_update_work { static inline int start_port(struct ib_device *device) { - return device->node_type == IB_NODE_SWITCH ? 0 : 1; + return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1; } static inline int end_port(struct ib_device *device) { - return device->node_type == IB_NODE_SWITCH ? 0 : device->phys_port_cnt; + return (device->node_type == RDMA_NODE_IB_SWITCH) ? + 0 : device->phys_port_cnt; } int ib_get_cached_gid(struct ib_device *device, diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 0de335b7bfc..f35fcc4c063 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2004-2006 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. @@ -41,6 +41,7 @@ #include <linux/idr.h> #include <linux/interrupt.h> #include <linux/pci.h> +#include <linux/random.h> #include <linux/rbtree.h> #include <linux/spinlock.h> #include <linux/workqueue.h> @@ -73,6 +74,7 @@ static struct ib_cm { struct rb_root remote_id_table; struct rb_root remote_sidr_table; struct idr local_id_table; + __be32 random_id_operand; struct workqueue_struct *wq; } cm; @@ -177,7 +179,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, if (IS_ERR(ah)) return PTR_ERR(ah); - m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, + m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, cm_id_priv->av.pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC); @@ -299,15 +301,17 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) static int cm_alloc_id(struct cm_id_private *cm_id_priv) { unsigned long flags; - int ret; + int ret, id; static int next_id; do { spin_lock_irqsave(&cm.lock, flags); - ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, next_id++, - (__force int *) &cm_id_priv->id.local_id); + ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, + next_id++, &id); spin_unlock_irqrestore(&cm.lock, flags); } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); + + cm_id_priv->id.local_id = (__force __be32) (id ^ cm.random_id_operand); return ret; } @@ -316,7 +320,8 @@ static void cm_free_id(__be32 local_id) unsigned long flags; spin_lock_irqsave(&cm.lock, flags); - idr_remove(&cm.local_id_table, (__force int) local_id); + idr_remove(&cm.local_id_table, + (__force int) (local_id ^ cm.random_id_operand)); spin_unlock_irqrestore(&cm.lock, flags); } @@ -324,7 +329,8 @@ static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) { struct cm_id_private *cm_id_priv; - cm_id_priv = idr_find(&cm.local_id_table, (__force int) local_id); + cm_id_priv = idr_find(&cm.local_id_table, + (__force int) (local_id ^ cm.random_id_operand)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) atomic_inc(&cm_id_priv->refcount); @@ -679,6 +685,8 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) { int wait_time; + cm_cleanup_timewait(cm_id_priv->timewait_info); + /* * The cm_id could be destroyed by the user before we exit timewait. * To protect against this, we search for the cm_id after exiting @@ -1354,7 +1362,7 @@ static int cm_req_handler(struct cm_work *work) id.local_id); if (IS_ERR(cm_id_priv->timewait_info)) { ret = PTR_ERR(cm_id_priv->timewait_info); - goto error1; + goto destroy; } cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id; cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid; @@ -1363,7 +1371,8 @@ static int cm_req_handler(struct cm_work *work) listen_cm_id_priv = cm_match_req(work, cm_id_priv); if (!listen_cm_id_priv) { ret = -EINVAL; - goto error2; + kfree(cm_id_priv->timewait_info); + goto destroy; } cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; @@ -1373,12 +1382,22 @@ static int cm_req_handler(struct cm_work *work) cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); - if (ret) - goto error3; + if (ret) { + ib_get_cached_gid(work->port->cm_dev->device, + work->port->port_num, 0, &work->path[0].sgid); + ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, + &work->path[0].sgid, sizeof work->path[0].sgid, + NULL, 0); + goto rejected; + } if (req_msg->alt_local_lid) { ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av); - if (ret) - goto error3; + if (ret) { + ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, + &work->path[0].sgid, + sizeof work->path[0].sgid, NULL, 0); + goto rejected; + } } cm_id_priv->tid = req_msg->hdr.tid; cm_id_priv->timeout_ms = cm_convert_to_ms( @@ -1400,12 +1419,11 @@ static int cm_req_handler(struct cm_work *work) cm_deref_id(listen_cm_id_priv); return 0; -error3: atomic_dec(&cm_id_priv->refcount); +rejected: + atomic_dec(&cm_id_priv->refcount); cm_deref_id(listen_cm_id_priv); - cm_cleanup_timewait(cm_id_priv->timewait_info); -error2: kfree(cm_id_priv->timewait_info); - cm_id_priv->timewait_info = NULL; -error1: ib_destroy_cm_id(&cm_id_priv->id); +destroy: + ib_destroy_cm_id(cm_id); return ret; } @@ -2072,8 +2090,9 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) spin_unlock_irqrestore(&cm.lock, flags); return NULL; } - cm_id_priv = idr_find(&cm.local_id_table, - (__force int) timewait_info->work.local_id); + cm_id_priv = idr_find(&cm.local_id_table, (__force int) + (timewait_info->work.local_id ^ + cm.random_id_operand)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) atomic_inc(&cm_id_priv->refcount); @@ -3125,7 +3144,8 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE; if (cm_id_priv->responder_resources) - qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ; + qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_ATOMIC; qp_attr->pkey_index = cm_id_priv->av.pkey_index; qp_attr->port_num = cm_id_priv->av.port->port_num; ret = 0; @@ -3262,6 +3282,9 @@ static void cm_add_one(struct ib_device *device) int ret; u8 i; + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) * device->phys_port_cnt, GFP_KERNEL); if (!cm_dev) @@ -3349,6 +3372,7 @@ static int __init ib_cm_init(void) cm.remote_qp_table = RB_ROOT; cm.remote_sidr_table = RB_ROOT; idr_init(&cm.local_id_table); + get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); idr_pre_get(&cm.local_id_table, GFP_KERNEL); cm.wq = create_workqueue("ib_cm"); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 5d625a81193..1178bd434d1 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -35,6 +35,7 @@ #include <linux/mutex.h> #include <linux/random.h> #include <linux/idr.h> +#include <linux/inetdevice.h> #include <net/tcp.h> @@ -43,6 +44,7 @@ #include <rdma/ib_cache.h> #include <rdma/ib_cm.h> #include <rdma/ib_sa.h> +#include <rdma/iw_cm.h> MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("Generic RDMA CM Agent"); @@ -60,6 +62,7 @@ static struct ib_client cma_client = { .remove = cma_remove_one }; +static struct ib_sa_client sa_client; static LIST_HEAD(dev_list); static LIST_HEAD(listen_any_list); static DEFINE_MUTEX(lock); @@ -124,6 +127,7 @@ struct rdma_id_private { int query_id; union { struct ib_cm_id *ib; + struct iw_cm_id *iw; } cm_id; u32 seq_num; @@ -259,15 +263,24 @@ static void cma_detach_from_dev(struct rdma_id_private *id_priv) id_priv->cma_dev = NULL; } -static int cma_acquire_ib_dev(struct rdma_id_private *id_priv) +static int cma_acquire_dev(struct rdma_id_private *id_priv) { + enum rdma_node_type dev_type = id_priv->id.route.addr.dev_addr.dev_type; struct cma_device *cma_dev; union ib_gid gid; int ret = -ENODEV; - ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid), + switch (rdma_node_get_transport(dev_type)) { + case RDMA_TRANSPORT_IB: + ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); + break; + case RDMA_TRANSPORT_IWARP: + iw_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); + break; + default: + return -ENODEV; + } - mutex_lock(&lock); list_for_each_entry(cma_dev, &dev_list, list) { ret = ib_find_cached_gid(cma_dev->device, &gid, &id_priv->id.port_num, NULL); @@ -276,20 +289,9 @@ static int cma_acquire_ib_dev(struct rdma_id_private *id_priv) break; } } - mutex_unlock(&lock); return ret; } -static int cma_acquire_dev(struct rdma_id_private *id_priv) -{ - switch (id_priv->id.route.addr.dev_addr.dev_type) { - case IB_NODE_CA: - return cma_acquire_ib_dev(id_priv); - default: - return -ENODEV; - } -} - static void cma_deref_id(struct rdma_id_private *id_priv) { if (atomic_dec_and_test(&id_priv->refcount)) @@ -347,6 +349,16 @@ static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) IB_QP_PKEY_INDEX | IB_QP_PORT); } +static int cma_init_iw_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + + qp_attr.qp_state = IB_QPS_INIT; + qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE; + + return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS); +} + int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr) { @@ -362,10 +374,13 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, if (IS_ERR(qp)) return PTR_ERR(qp); - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: ret = cma_init_ib_qp(id_priv, qp); break; + case RDMA_TRANSPORT_IWARP: + ret = cma_init_iw_qp(id_priv, qp); + break; default: ret = -ENOSYS; break; @@ -451,13 +466,17 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, int ret; id_priv = container_of(id, struct rdma_id_private, id); - switch (id_priv->id.device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id_priv->id.device->node_type)) { + case RDMA_TRANSPORT_IB: ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, qp_attr_mask); if (qp_attr->qp_state == IB_QPS_RTR) qp_attr->rq_psn = id_priv->seq_num; break; + case RDMA_TRANSPORT_IWARP: + ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, + qp_attr_mask); + break; default: ret = -ENOSYS; break; @@ -590,8 +609,8 @@ static int cma_notify_user(struct rdma_id_private *id_priv, static void cma_cancel_route(struct rdma_id_private *id_priv) { - switch (id_priv->id.device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id_priv->id.device->node_type)) { + case RDMA_TRANSPORT_IB: if (id_priv->query) ib_sa_cancel_query(id_priv->query_id, id_priv->query); break; @@ -611,11 +630,15 @@ static void cma_destroy_listen(struct rdma_id_private *id_priv) cma_exch(id_priv, CMA_DESTROYING); if (id_priv->cma_dev) { - switch (id_priv->id.device->node_type) { - case IB_NODE_CA: - if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) + switch (rdma_node_get_transport(id_priv->id.device->node_type)) { + case RDMA_TRANSPORT_IB: + if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) ib_destroy_cm_id(id_priv->cm_id.ib); break; + case RDMA_TRANSPORT_IWARP: + if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw)) + iw_destroy_cm_id(id_priv->cm_id.iw); + break; default: break; } @@ -689,19 +712,25 @@ void rdma_destroy_id(struct rdma_cm_id *id) state = cma_exch(id_priv, CMA_DESTROYING); cma_cancel_operation(id_priv, state); + mutex_lock(&lock); if (id_priv->cma_dev) { - switch (id->device->node_type) { - case IB_NODE_CA: - if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) + mutex_unlock(&lock); + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: + if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) ib_destroy_cm_id(id_priv->cm_id.ib); break; + case RDMA_TRANSPORT_IWARP: + if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw)) + iw_destroy_cm_id(id_priv->cm_id.iw); + break; default: break; } - mutex_lock(&lock); + mutex_lock(&lock); cma_detach_from_dev(id_priv); - mutex_unlock(&lock); } + mutex_unlock(&lock); cma_release_port(id_priv); cma_deref_id(id_priv); @@ -869,7 +898,7 @@ static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id, ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); - rt->addr.dev_addr.dev_type = IB_NODE_CA; + rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA; id_priv = container_of(id, struct rdma_id_private, id); id_priv->state = CMA_CONNECT; @@ -898,7 +927,9 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) } atomic_inc(&conn_id->dev_remove); - ret = cma_acquire_ib_dev(conn_id); + mutex_lock(&lock); + ret = cma_acquire_dev(conn_id); + mutex_unlock(&lock); if (ret) { ret = -ENODEV; cma_release_remove(conn_id); @@ -982,6 +1013,130 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr, } } +static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) +{ + struct rdma_id_private *id_priv = iw_id->context; + enum rdma_cm_event_type event = 0; + struct sockaddr_in *sin; + int ret = 0; + + atomic_inc(&id_priv->dev_remove); + + switch (iw_event->event) { + case IW_CM_EVENT_CLOSE: + event = RDMA_CM_EVENT_DISCONNECTED; + break; + case IW_CM_EVENT_CONNECT_REPLY: + sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; + *sin = iw_event->local_addr; + sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr; + *sin = iw_event->remote_addr; + if (iw_event->status) + event = RDMA_CM_EVENT_REJECTED; + else + event = RDMA_CM_EVENT_ESTABLISHED; + break; + case IW_CM_EVENT_ESTABLISHED: + event = RDMA_CM_EVENT_ESTABLISHED; + break; + default: + BUG_ON(1); + } + + ret = cma_notify_user(id_priv, event, iw_event->status, + iw_event->private_data, + iw_event->private_data_len); + if (ret) { + /* Destroy the CM ID by returning a non-zero value. */ + id_priv->cm_id.iw = NULL; + cma_exch(id_priv, CMA_DESTROYING); + cma_release_remove(id_priv); + rdma_destroy_id(&id_priv->id); + return ret; + } + + cma_release_remove(id_priv); + return ret; +} + +static int iw_conn_req_handler(struct iw_cm_id *cm_id, + struct iw_cm_event *iw_event) +{ + struct rdma_cm_id *new_cm_id; + struct rdma_id_private *listen_id, *conn_id; + struct sockaddr_in *sin; + struct net_device *dev = NULL; + int ret; + + listen_id = cm_id->context; + atomic_inc(&listen_id->dev_remove); + if (!cma_comp(listen_id, CMA_LISTEN)) { + ret = -ECONNABORTED; + goto out; + } + + /* Create a new RDMA id for the new IW CM ID */ + new_cm_id = rdma_create_id(listen_id->id.event_handler, + listen_id->id.context, + RDMA_PS_TCP); + if (!new_cm_id) { + ret = -ENOMEM; + goto out; + } + conn_id = container_of(new_cm_id, struct rdma_id_private, id); + atomic_inc(&conn_id->dev_remove); + conn_id->state = CMA_CONNECT; + + dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr); + if (!dev) { + ret = -EADDRNOTAVAIL; + cma_release_remove(conn_id); + rdma_destroy_id(new_cm_id); + goto out; + } + ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL); + if (ret) { + cma_release_remove(conn_id); + rdma_destroy_id(new_cm_id); + goto out; + } + + mutex_lock(&lock); + ret = cma_acquire_dev(conn_id); + mutex_unlock(&lock); + if (ret) { + cma_release_remove(conn_id); + rdma_destroy_id(new_cm_id); + goto out; + } + + conn_id->cm_id.iw = cm_id; + cm_id->context = conn_id; + cm_id->cm_handler = cma_iw_handler; + + sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr; + *sin = iw_event->local_addr; + sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr; + *sin = iw_event->remote_addr; + + ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0, + iw_event->private_data, + iw_event->private_data_len); + if (ret) { + /* User wants to destroy the CM ID */ + conn_id->cm_id.iw = NULL; + cma_exch(conn_id, CMA_DESTROYING); + cma_release_remove(conn_id); + rdma_destroy_id(&conn_id->id); + } + +out: + if (dev) + dev_put(dev); + cma_release_remove(listen_id); + return ret; +} + static int cma_ib_listen(struct rdma_id_private *id_priv) { struct ib_cm_compare_data compare_data; @@ -1011,6 +1166,30 @@ static int cma_ib_listen(struct rdma_id_private *id_priv) return ret; } +static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) +{ + int ret; + struct sockaddr_in *sin; + + id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device, + iw_conn_req_handler, + id_priv); + if (IS_ERR(id_priv->cm_id.iw)) + return PTR_ERR(id_priv->cm_id.iw); + + sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; + id_priv->cm_id.iw->local_addr = *sin; + + ret = iw_cm_listen(id_priv->cm_id.iw, backlog); + + if (ret) { + iw_destroy_cm_id(id_priv->cm_id.iw); + id_priv->cm_id.iw = NULL; + } + + return ret; +} + static int cma_listen_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { @@ -1087,12 +1266,17 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) id_priv->backlog = backlog; if (id->device) { - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: ret = cma_ib_listen(id_priv); if (ret) goto err; break; + case RDMA_TRANSPORT_IWARP: + ret = cma_iw_listen(id_priv, backlog); + if (ret) + goto err; + break; default: ret = -ENOSYS; goto err; @@ -1140,7 +1324,7 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr)); path_rec.numb_path = 1; - id_priv->query_id = ib_sa_path_rec_get(id_priv->id.device, + id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, id_priv->id.port_num, &path_rec, IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH, @@ -1231,6 +1415,23 @@ err: } EXPORT_SYMBOL(rdma_set_ib_paths); +static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) +{ + struct cma_work *work; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + work->id = id_priv; + INIT_WORK(&work->work, cma_work_handler, work); + work->old_state = CMA_ROUTE_QUERY; + work->new_state = CMA_ROUTE_RESOLVED; + work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; + queue_work(cma_wq, &work->work); + return 0; +} + int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) { struct rdma_id_private *id_priv; @@ -1241,10 +1442,13 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) return -EINVAL; atomic_inc(&id_priv->refcount); - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: ret = cma_resolve_ib_route(id_priv, timeout_ms); break; + case RDMA_TRANSPORT_IWARP: + ret = cma_resolve_iw_route(id_priv, timeout_ms); + break; default: ret = -ENOSYS; break; @@ -1309,16 +1513,26 @@ static void addr_handler(int status, struct sockaddr *src_addr, enum rdma_cm_event_type event; atomic_inc(&id_priv->dev_remove); - if (!id_priv->cma_dev && !status) + + /* + * Grab mutex to block rdma_destroy_id() from removing the device while + * we're trying to acquire it. + */ + mutex_lock(&lock); + if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) { + mutex_unlock(&lock); + goto out; + } + + if (!status && !id_priv->cma_dev) status = cma_acquire_dev(id_priv); + mutex_unlock(&lock); if (status) { - if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND)) + if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND)) goto out; event = RDMA_CM_EVENT_ADDR_ERROR; } else { - if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) - goto out; memcpy(&id_priv->id.route.addr.src_addr, src_addr, ip_addr_size(src_addr)); event = RDMA_CM_EVENT_ADDR_RESOLVED; @@ -1492,7 +1706,7 @@ static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv) hlist_for_each_entry(cur_id, node, &bind_list->owners, node) { if (cma_any_addr(&cur_id->id.route.addr.src_addr)) return -EADDRNOTAVAIL; - + cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr; if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr) return -EADDRINUSE; @@ -1542,8 +1756,11 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) if (!cma_any_addr(addr)) { ret = rdma_translate_ip(addr, &id->route.addr.dev_addr); - if (!ret) + if (!ret) { + mutex_lock(&lock); ret = cma_acquire_dev(id_priv); + mutex_unlock(&lock); + } if (ret) goto err; } @@ -1649,6 +1866,47 @@ out: return ret; } +static int cma_connect_iw(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) +{ + struct iw_cm_id *cm_id; + struct sockaddr_in* sin; + int ret; + struct iw_cm_conn_param iw_param; + + cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv); + if (IS_ERR(cm_id)) { + ret = PTR_ERR(cm_id); + goto out; + } + + id_priv->cm_id.iw = cm_id; + + sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr; + cm_id->local_addr = *sin; + + sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr; + cm_id->remote_addr = *sin; + + ret = cma_modify_qp_rtr(&id_priv->id); + if (ret) { + iw_destroy_cm_id(cm_id); + return ret; + } + + iw_param.ord = conn_param->initiator_depth; + iw_param.ird = conn_param->responder_resources; + iw_param.private_data = conn_param->private_data; + iw_param.private_data_len = conn_param->private_data_len; + if (id_priv->id.qp) + iw_param.qpn = id_priv->qp_num; + else + iw_param.qpn = conn_param->qp_num; + ret = iw_cm_connect(cm_id, &iw_param); +out: + return ret; +} + int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) { struct rdma_id_private *id_priv; @@ -1664,10 +1922,13 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) id_priv->srq = conn_param->srq; } - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: ret = cma_connect_ib(id_priv, conn_param); break; + case RDMA_TRANSPORT_IWARP: + ret = cma_connect_iw(id_priv, conn_param); + break; default: ret = -ENOSYS; break; @@ -1708,6 +1969,28 @@ static int cma_accept_ib(struct rdma_id_private *id_priv, return ib_send_cm_rep(id_priv->cm_id.ib, &rep); } +static int cma_accept_iw(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) +{ + struct iw_cm_conn_param iw_param; + int ret; + + ret = cma_modify_qp_rtr(&id_priv->id); + if (ret) + return ret; + + iw_param.ord = conn_param->initiator_depth; + iw_param.ird = conn_param->responder_resources; + iw_param.private_data = conn_param->private_data; + iw_param.private_data_len = conn_param->private_data_len; + if (id_priv->id.qp) { + iw_param.qpn = id_priv->qp_num; + } else + iw_param.qpn = conn_param->qp_num; + + return iw_cm_accept(id_priv->cm_id.iw, &iw_param); +} + int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) { struct rdma_id_private *id_priv; @@ -1723,13 +2006,16 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) id_priv->srq = conn_param->srq; } - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: if (conn_param) ret = cma_accept_ib(id_priv, conn_param); else ret = cma_rep_recv(id_priv); break; + case RDMA_TRANSPORT_IWARP: + ret = cma_accept_iw(id_priv, conn_param); + break; default: ret = -ENOSYS; break; @@ -1756,12 +2042,16 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, if (!cma_comp(id_priv, CMA_CONNECT)) return -EINVAL; - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: ret = ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, private_data, private_data_len); break; + case RDMA_TRANSPORT_IWARP: + ret = iw_cm_reject(id_priv->cm_id.iw, + private_data, private_data_len); + break; default: ret = -ENOSYS; break; @@ -1780,17 +2070,20 @@ int rdma_disconnect(struct rdma_cm_id *id) !cma_comp(id_priv, CMA_DISCONNECT)) return -EINVAL; - ret = cma_modify_qp_err(id); - if (ret) - goto out; - - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: + ret = cma_modify_qp_err(id); + if (ret) + goto out; /* Initiate or respond to a disconnect. */ if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); break; + case RDMA_TRANSPORT_IWARP: + ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); + break; default: + ret = -EINVAL; break; } out: @@ -1907,12 +2200,15 @@ static int cma_init(void) if (!cma_wq) return -ENOMEM; + ib_sa_register_client(&sa_client); + ret = ib_register_client(&cma_client); if (ret) goto err; return 0; err: + ib_sa_unregister_client(&sa_client); destroy_workqueue(cma_wq); return ret; } @@ -1920,6 +2216,7 @@ err: static void cma_cleanup(void) { ib_unregister_client(&cma_client); + ib_sa_unregister_client(&sa_client); destroy_workqueue(cma_wq); idr_destroy(&sdp_ps); idr_destroy(&tcp_ps); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index b2f3cb91d9b..63d2a39fb82 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -385,7 +385,7 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client) EXPORT_SYMBOL(ib_get_client_data); /** - * ib_set_client_data - Get IB client context + * ib_set_client_data - Set IB client context * @device:Device to set context for * @client:Client to set context for * @data:Context to set @@ -505,7 +505,7 @@ int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr) { - if (device->node_type == IB_NODE_SWITCH) { + if (device->node_type == RDMA_NODE_IB_SWITCH) { if (port_num) return -EINVAL; } else if (port_num < 1 || port_num > device->phys_port_cnt) @@ -580,7 +580,7 @@ int ib_modify_port(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify) { - if (device->node_type == IB_NODE_SWITCH) { + if (device->node_type == RDMA_NODE_IB_SWITCH) { if (port_num) return -EINVAL; } else if (port_num < 1 || port_num > device->phys_port_cnt) diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c new file mode 100644 index 00000000000..c3fb304a4e8 --- /dev/null +++ b/drivers/infiniband/core/iwcm.c @@ -0,0 +1,1019 @@ +/* + * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include <linux/dma-mapping.h> +#include <linux/err.h> +#include <linux/idr.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/rbtree.h> +#include <linux/spinlock.h> +#include <linux/workqueue.h> +#include <linux/completion.h> + +#include <rdma/iw_cm.h> +#include <rdma/ib_addr.h> + +#include "iwcm.h" + +MODULE_AUTHOR("Tom Tucker"); +MODULE_DESCRIPTION("iWARP CM"); +MODULE_LICENSE("Dual BSD/GPL"); + +static struct workqueue_struct *iwcm_wq; +struct iwcm_work { + struct work_struct work; + struct iwcm_id_private *cm_id; + struct list_head list; + struct iw_cm_event event; + struct list_head free_list; +}; + +/* + * The following services provide a mechanism for pre-allocating iwcm_work + * elements. The design pre-allocates them based on the cm_id type: + * LISTENING IDS: Get enough elements preallocated to handle the + * listen backlog. + * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE + * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE + * + * Allocating them in connect and listen avoids having to deal + * with allocation failures on the event upcall from the provider (which + * is called in the interrupt context). + * + * One exception is when creating the cm_id for incoming connection requests. + * There are two cases: + * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If + * the backlog is exceeded, then no more connection request events will + * be processed. cm_event_handler() returns -ENOMEM in this case. Its up + * to the provider to reject the connectino request. + * 2) in the connection request workqueue handler, cm_conn_req_handler(). + * If work elements cannot be allocated for the new connect request cm_id, + * then IWCM will call the provider reject method. This is ok since + * cm_conn_req_handler() runs in the workqueue thread context. + */ + +static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) +{ + struct iwcm_work *work; + + if (list_empty(&cm_id_priv->work_free_list)) + return NULL; + work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work, + free_list); + list_del_init(&work->free_list); + return work; +} + +static void put_work(struct iwcm_work *work) +{ + list_add(&work->free_list, &work->cm_id->work_free_list); +} + +static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) +{ + struct list_head *e, *tmp; + + list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) + kfree(list_entry(e, struct iwcm_work, free_list)); +} + +static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) +{ + struct iwcm_work *work; + + BUG_ON(!list_empty(&cm_id_priv->work_free_list)); + while (count--) { + work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL); + if (!work) { + dealloc_work_entries(cm_id_priv); + return -ENOMEM; + } + work->cm_id = cm_id_priv; + INIT_LIST_HEAD(&work->list); + put_work(work); + } + return 0; +} + +/* + * Save private data from incoming connection requests in the + * cm_id_priv so the low level driver doesn't have to. Adjust + * the event ptr to point to the local copy. + */ +static int copy_private_data(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *event) +{ + void *p; + + p = kmalloc(event->private_data_len, GFP_ATOMIC); + if (!p) + return -ENOMEM; + memcpy(p, event->private_data, event->private_data_len); + event->private_data = p; + return 0; +} + +/* + * Release a reference on cm_id. If the last reference is being removed + * and iw_destroy_cm_id is waiting, wake up the waiting thread. + */ +static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) +{ + int ret = 0; + + BUG_ON(atomic_read(&cm_id_priv->refcount)==0); + if (atomic_dec_and_test(&cm_id_priv->refcount)) { + BUG_ON(!list_empty(&cm_id_priv->work_list)); + if (waitqueue_active(&cm_id_priv->destroy_comp.wait)) { + BUG_ON(cm_id_priv->state != IW_CM_STATE_DESTROYING); + BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, + &cm_id_priv->flags)); + ret = 1; + } + complete(&cm_id_priv->destroy_comp); + } + + return ret; +} + +static void add_ref(struct iw_cm_id *cm_id) +{ + struct iwcm_id_private *cm_id_priv; + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + atomic_inc(&cm_id_priv->refcount); +} + +static void rem_ref(struct iw_cm_id *cm_id) +{ + struct iwcm_id_private *cm_id_priv; + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + iwcm_deref_id(cm_id_priv); +} + +static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); + +struct iw_cm_id *iw_create_cm_id(struct ib_device *device, + iw_cm_handler cm_handler, + void *context) +{ + struct iwcm_id_private *cm_id_priv; + + cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL); + if (!cm_id_priv) + return ERR_PTR(-ENOMEM); + + cm_id_priv->state = IW_CM_STATE_IDLE; + cm_id_priv->id.device = device; + cm_id_priv->id.cm_handler = cm_handler; + cm_id_priv->id.context = context; + cm_id_priv->id.event_handler = cm_event_handler; + cm_id_priv->id.add_ref = add_ref; + cm_id_priv->id.rem_ref = rem_ref; + spin_lock_init(&cm_id_priv->lock); + atomic_set(&cm_id_priv->refcount, 1); + init_waitqueue_head(&cm_id_priv->connect_wait); + init_completion(&cm_id_priv->destroy_comp); + INIT_LIST_HEAD(&cm_id_priv->work_list); + INIT_LIST_HEAD(&cm_id_priv->work_free_list); + + return &cm_id_priv->id; +} +EXPORT_SYMBOL(iw_create_cm_id); + + +static int iwcm_modify_qp_err(struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + + if (!qp) + return -EINVAL; + + qp_attr.qp_state = IB_QPS_ERR; + return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); +} + +/* + * This is really the RDMAC CLOSING state. It is most similar to the + * IB SQD QP state. + */ +static int iwcm_modify_qp_sqd(struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + + BUG_ON(qp == NULL); + qp_attr.qp_state = IB_QPS_SQD; + return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); +} + +/* + * CM_ID <-- CLOSING + * + * Block if a passive or active connection is currenlty being processed. Then + * process the event as follows: + * - If we are ESTABLISHED, move to CLOSING and modify the QP state + * based on the abrupt flag + * - If the connection is already in the CLOSING or IDLE state, the peer is + * disconnecting concurrently with us and we've already seen the + * DISCONNECT event -- ignore the request and return 0 + * - Disconnect on a listening endpoint returns -EINVAL + */ +int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) +{ + struct iwcm_id_private *cm_id_priv; + unsigned long flags; + int ret = 0; + struct ib_qp *qp = NULL; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + /* Wait if we're currently in a connect or accept downcall */ + wait_event(cm_id_priv->connect_wait, + !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->state) { + case IW_CM_STATE_ESTABLISHED: + cm_id_priv->state = IW_CM_STATE_CLOSING; + + /* QP could be <nul> for user-mode client */ + if (cm_id_priv->qp) + qp = cm_id_priv->qp; + else + ret = -EINVAL; + break; + case IW_CM_STATE_LISTEN: + ret = -EINVAL; + break; + case IW_CM_STATE_CLOSING: + /* remote peer closed first */ + case IW_CM_STATE_IDLE: + /* accept or connect returned !0 */ + break; + case IW_CM_STATE_CONN_RECV: + /* + * App called disconnect before/without calling accept after + * connect_request event delivered. + */ + break; + case IW_CM_STATE_CONN_SENT: + /* Can only get here if wait above fails */ + default: + BUG(); + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (qp) { + if (abrupt) + ret = iwcm_modify_qp_err(qp); + else + ret = iwcm_modify_qp_sqd(qp); + + /* + * If both sides are disconnecting the QP could + * already be in ERR or SQD states + */ + ret = 0; + } + + return ret; +} +EXPORT_SYMBOL(iw_cm_disconnect); + +/* + * CM_ID <-- DESTROYING + * + * Clean up all resources associated with the connection and release + * the initial reference taken by iw_create_cm_id. + */ +static void destroy_cm_id(struct iw_cm_id *cm_id) +{ + struct iwcm_id_private *cm_id_priv; + unsigned long flags; + int ret; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + /* + * Wait if we're currently in a connect or accept downcall. A + * listening endpoint should never block here. + */ + wait_event(cm_id_priv->connect_wait, + !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->state) { + case IW_CM_STATE_LISTEN: + cm_id_priv->state = IW_CM_STATE_DESTROYING; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + /* destroy the listening endpoint */ + ret = cm_id->device->iwcm->destroy_listen(cm_id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + break; + case IW_CM_STATE_ESTABLISHED: + cm_id_priv->state = IW_CM_STATE_DESTROYING; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + /* Abrupt close of the connection */ + (void)iwcm_modify_qp_err(cm_id_priv->qp); + spin_lock_irqsave(&cm_id_priv->lock, flags); + break; + case IW_CM_STATE_IDLE: + case IW_CM_STATE_CLOSING: + cm_id_priv->state = IW_CM_STATE_DESTROYING; + break; + case IW_CM_STATE_CONN_RECV: + /* + * App called destroy before/without calling accept after + * receiving connection request event notification. + */ + cm_id_priv->state = IW_CM_STATE_DESTROYING; + break; + case IW_CM_STATE_CONN_SENT: + case IW_CM_STATE_DESTROYING: + default: + BUG(); + break; + } + if (cm_id_priv->qp) { + cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); + cm_id_priv->qp = NULL; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + (void)iwcm_deref_id(cm_id_priv); +} + +/* + * This function is only called by the application thread and cannot + * be called by the event thread. The function will wait for all + * references to be released on the cm_id and then kfree the cm_id + * object. + */ +void iw_destroy_cm_id(struct iw_cm_id *cm_id) +{ + struct iwcm_id_private *cm_id_priv; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)); + + destroy_cm_id(cm_id); + + wait_for_completion(&cm_id_priv->destroy_comp); + + dealloc_work_entries(cm_id_priv); + + kfree(cm_id_priv); +} +EXPORT_SYMBOL(iw_destroy_cm_id); + +/* + * CM_ID <-- LISTEN + * + * Start listening for connect requests. Generates one CONNECT_REQUEST + * event for each inbound connect request. + */ +int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) +{ + struct iwcm_id_private *cm_id_priv; + unsigned long flags; + int ret = 0; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + + ret = alloc_work_entries(cm_id_priv, backlog); + if (ret) + return ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->state) { + case IW_CM_STATE_IDLE: + cm_id_priv->state = IW_CM_STATE_LISTEN; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = cm_id->device->iwcm->create_listen(cm_id, backlog); + if (ret) + cm_id_priv->state = IW_CM_STATE_IDLE; + spin_lock_irqsave(&cm_id_priv->lock, flags); + break; + default: + ret = -EINVAL; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + return ret; +} +EXPORT_SYMBOL(iw_cm_listen); + +/* + * CM_ID <-- IDLE + * + * Rejects an inbound connection request. No events are generated. + */ +int iw_cm_reject(struct iw_cm_id *cm_id, + const void *private_data, + u8 private_data_len) +{ + struct iwcm_id_private *cm_id_priv; + unsigned long flags; + int ret; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + return -EINVAL; + } + cm_id_priv->state = IW_CM_STATE_IDLE; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + ret = cm_id->device->iwcm->reject(cm_id, private_data, + private_data_len); + + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + + return ret; +} +EXPORT_SYMBOL(iw_cm_reject); + +/* + * CM_ID <-- ESTABLISHED + * + * Accepts an inbound connection request and generates an ESTABLISHED + * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block + * until the ESTABLISHED event is received from the provider. + */ +int iw_cm_accept(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *iw_param) +{ + struct iwcm_id_private *cm_id_priv; + struct ib_qp *qp; + unsigned long flags; + int ret; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + return -EINVAL; + } + /* Get the ib_qp given the QPN */ + qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); + if (!qp) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return -EINVAL; + } + cm_id->device->iwcm->add_ref(qp); + cm_id_priv->qp = qp; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + ret = cm_id->device->iwcm->accept(cm_id, iw_param); + if (ret) { + /* An error on accept precludes provider events */ + BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); + cm_id_priv->state = IW_CM_STATE_IDLE; + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->qp) { + cm_id->device->iwcm->rem_ref(qp); + cm_id_priv->qp = NULL; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + } + + return ret; +} +EXPORT_SYMBOL(iw_cm_accept); + +/* + * Active Side: CM_ID <-- CONN_SENT + * + * If successful, results in the generation of a CONNECT_REPLY + * event. iw_cm_disconnect and iw_cm_destroy will block until the + * CONNECT_REPLY event is received from the provider. + */ +int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) +{ + struct iwcm_id_private *cm_id_priv; + int ret = 0; + unsigned long flags; + struct ib_qp *qp; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + + ret = alloc_work_entries(cm_id_priv, 4); + if (ret) + return ret; + + set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + spin_lock_irqsave(&cm_id_priv->lock, flags); + + if (cm_id_priv->state != IW_CM_STATE_IDLE) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + return -EINVAL; + } + + /* Get the ib_qp given the QPN */ + qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); + if (!qp) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return -EINVAL; + } + cm_id->device->iwcm->add_ref(qp); + cm_id_priv->qp = qp; + cm_id_priv->state = IW_CM_STATE_CONN_SENT; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + ret = cm_id->device->iwcm->connect(cm_id, iw_param); + if (ret) { + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->qp) { + cm_id->device->iwcm->rem_ref(qp); + cm_id_priv->qp = NULL; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); + cm_id_priv->state = IW_CM_STATE_IDLE; + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + } + + return ret; +} +EXPORT_SYMBOL(iw_cm_connect); + +/* + * Passive Side: new CM_ID <-- CONN_RECV + * + * Handles an inbound connect request. The function creates a new + * iw_cm_id to represent the new connection and inherits the client + * callback function and other attributes from the listening parent. + * + * The work item contains a pointer to the listen_cm_id and the event. The + * listen_cm_id contains the client cm_handler, context and + * device. These are copied when the device is cloned. The event + * contains the new four tuple. + * + * An error on the child should not affect the parent, so this + * function does not return a value. + */ +static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, + struct iw_cm_event *iw_event) +{ + unsigned long flags; + struct iw_cm_id *cm_id; + struct iwcm_id_private *cm_id_priv; + int ret; + + /* + * The provider should never generate a connection request + * event with a bad status. + */ + BUG_ON(iw_event->status); + + /* + * We could be destroying the listening id. If so, ignore this + * upcall. + */ + spin_lock_irqsave(&listen_id_priv->lock, flags); + if (listen_id_priv->state != IW_CM_STATE_LISTEN) { + spin_unlock_irqrestore(&listen_id_priv->lock, flags); + return; + } + spin_unlock_irqrestore(&listen_id_priv->lock, flags); + + cm_id = iw_create_cm_id(listen_id_priv->id.device, + listen_id_priv->id.cm_handler, + listen_id_priv->id.context); + /* If the cm_id could not be created, ignore the request */ + if (IS_ERR(cm_id)) + return; + + cm_id->provider_data = iw_event->provider_data; + cm_id->local_addr = iw_event->local_addr; + cm_id->remote_addr = iw_event->remote_addr; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + cm_id_priv->state = IW_CM_STATE_CONN_RECV; + + ret = alloc_work_entries(cm_id_priv, 3); + if (ret) { + iw_cm_reject(cm_id, NULL, 0); + iw_destroy_cm_id(cm_id); + return; + } + + /* Call the client CM handler */ + ret = cm_id->cm_handler(cm_id, iw_event); + if (ret) { + set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); + destroy_cm_id(cm_id); + if (atomic_read(&cm_id_priv->refcount)==0) + kfree(cm_id); + } + + if (iw_event->private_data_len) + kfree(iw_event->private_data); +} + +/* + * Passive Side: CM_ID <-- ESTABLISHED + * + * The provider generated an ESTABLISHED event which means that + * the MPA negotion has completed successfully and we are now in MPA + * FPDU mode. + * + * This event can only be received in the CONN_RECV state. If the + * remote peer closed, the ESTABLISHED event would be received followed + * by the CLOSE event. If the app closes, it will block until we wake + * it up after processing this event. + */ +static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event) +{ + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + + /* + * We clear the CONNECT_WAIT bit here to allow the callback + * function to call iw_cm_disconnect. Calling iw_destroy_cm_id + * from a callback handler is not allowed. + */ + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); + cm_id_priv->state = IW_CM_STATE_ESTABLISHED; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); + wake_up_all(&cm_id_priv->connect_wait); + + return ret; +} + +/* + * Active Side: CM_ID <-- ESTABLISHED + * + * The app has called connect and is waiting for the established event to + * post it's requests to the server. This event will wake up anyone + * blocked in iw_cm_disconnect or iw_destroy_id. + */ +static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event) +{ + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + /* + * Clear the connect wait bit so a callback function calling + * iw_cm_disconnect will not wait and deadlock this thread + */ + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); + if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) { + cm_id_priv->id.local_addr = iw_event->local_addr; + cm_id_priv->id.remote_addr = iw_event->remote_addr; + cm_id_priv->state = IW_CM_STATE_ESTABLISHED; + } else { + /* REJECTED or RESET */ + cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); + cm_id_priv->qp = NULL; + cm_id_priv->state = IW_CM_STATE_IDLE; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); + + if (iw_event->private_data_len) + kfree(iw_event->private_data); + + /* Wake up waiters on connect complete */ + wake_up_all(&cm_id_priv->connect_wait); + + return ret; +} + +/* + * CM_ID <-- CLOSING + * + * If in the ESTABLISHED state, move to CLOSING. + */ +static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event) +{ + unsigned long flags; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED) + cm_id_priv->state = IW_CM_STATE_CLOSING; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); +} + +/* + * CM_ID <-- IDLE + * + * If in the ESTBLISHED or CLOSING states, the QP will have have been + * moved by the provider to the ERR state. Disassociate the CM_ID from + * the QP, move to IDLE, and remove the 'connected' reference. + * + * If in some other state, the cm_id was destroyed asynchronously. + * This is the last reference that will result in waking up + * the app thread blocked in iw_destroy_cm_id. + */ +static int cm_close_handler(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event) +{ + unsigned long flags; + int ret = 0; + spin_lock_irqsave(&cm_id_priv->lock, flags); + + if (cm_id_priv->qp) { + cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); + cm_id_priv->qp = NULL; + } + switch (cm_id_priv->state) { + case IW_CM_STATE_ESTABLISHED: + case IW_CM_STATE_CLOSING: + cm_id_priv->state = IW_CM_STATE_IDLE; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); + spin_lock_irqsave(&cm_id_priv->lock, flags); + break; + case IW_CM_STATE_DESTROYING: + break; + default: + BUG(); + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + return ret; +} + +static int process_event(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event) +{ + int ret = 0; + + switch (iw_event->event) { + case IW_CM_EVENT_CONNECT_REQUEST: + cm_conn_req_handler(cm_id_priv, iw_event); + break; + case IW_CM_EVENT_CONNECT_REPLY: + ret = cm_conn_rep_handler(cm_id_priv, iw_event); + break; + case IW_CM_EVENT_ESTABLISHED: + ret = cm_conn_est_handler(cm_id_priv, iw_event); + break; + case IW_CM_EVENT_DISCONNECT: + cm_disconnect_handler(cm_id_priv, iw_event); + break; + case IW_CM_EVENT_CLOSE: + ret = cm_close_handler(cm_id_priv, iw_event); + break; + default: + BUG(); + } + + return ret; +} + +/* + * Process events on the work_list for the cm_id. If the callback + * function requests that the cm_id be deleted, a flag is set in the + * cm_id flags to indicate that when the last reference is + * removed, the cm_id is to be destroyed. This is necessary to + * distinguish between an object that will be destroyed by the app + * thread asleep on the destroy_comp list vs. an object destroyed + * here synchronously when the last reference is removed. + */ +static void cm_work_handler(void *arg) +{ + struct iwcm_work *work = arg, lwork; + struct iwcm_id_private *cm_id_priv = work->cm_id; + unsigned long flags; + int empty; + int ret = 0; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + empty = list_empty(&cm_id_priv->work_list); + while (!empty) { + work = list_entry(cm_id_priv->work_list.next, + struct iwcm_work, list); + list_del_init(&work->list); + empty = list_empty(&cm_id_priv->work_list); + lwork = *work; + put_work(work); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + ret = process_event(cm_id_priv, &work->event); + if (ret) { + set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); + destroy_cm_id(&cm_id_priv->id); + } + BUG_ON(atomic_read(&cm_id_priv->refcount)==0); + if (iwcm_deref_id(cm_id_priv)) + return; + + if (atomic_read(&cm_id_priv->refcount)==0 && + test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) { + dealloc_work_entries(cm_id_priv); + kfree(cm_id_priv); + return; + } + spin_lock_irqsave(&cm_id_priv->lock, flags); + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); +} + +/* + * This function is called on interrupt context. Schedule events on + * the iwcm_wq thread to allow callback functions to downcall into + * the CM and/or block. Events are queued to a per-CM_ID + * work_list. If this is the first event on the work_list, the work + * element is also queued on the iwcm_wq thread. + * + * Each event holds a reference on the cm_id. Until the last posted + * event has been delivered and processed, the cm_id cannot be + * deleted. + * + * Returns: + * 0 - the event was handled. + * -ENOMEM - the event was not handled due to lack of resources. + */ +static int cm_event_handler(struct iw_cm_id *cm_id, + struct iw_cm_event *iw_event) +{ + struct iwcm_work *work; + struct iwcm_id_private *cm_id_priv; + unsigned long flags; + int ret = 0; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + work = get_work(cm_id_priv); + if (!work) { + ret = -ENOMEM; + goto out; + } + + INIT_WORK(&work->work, cm_work_handler, work); + work->cm_id = cm_id_priv; + work->event = *iw_event; + + if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || + work->event.event == IW_CM_EVENT_CONNECT_REPLY) && + work->event.private_data_len) { + ret = copy_private_data(cm_id_priv, &work->event); + if (ret) { + put_work(work); + goto out; + } + } + + atomic_inc(&cm_id_priv->refcount); + if (list_empty(&cm_id_priv->work_list)) { + list_add_tail(&work->list, &cm_id_priv->work_list); + queue_work(iwcm_wq, &work->work); + } else + list_add_tail(&work->list, &cm_id_priv->work_list); +out: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} + +static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->state) { + case IW_CM_STATE_IDLE: + case IW_CM_STATE_CONN_SENT: + case IW_CM_STATE_CONN_RECV: + case IW_CM_STATE_ESTABLISHED: + *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; + qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE| + IB_ACCESS_REMOTE_READ; + ret = 0; + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} + +static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->state) { + case IW_CM_STATE_IDLE: + case IW_CM_STATE_CONN_SENT: + case IW_CM_STATE_CONN_RECV: + case IW_CM_STATE_ESTABLISHED: + *qp_attr_mask = 0; + ret = 0; + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} + +int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + struct iwcm_id_private *cm_id_priv; + int ret; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + switch (qp_attr->qp_state) { + case IB_QPS_INIT: + case IB_QPS_RTR: + ret = iwcm_init_qp_init_attr(cm_id_priv, + qp_attr, qp_attr_mask); + break; + case IB_QPS_RTS: + ret = iwcm_init_qp_rts_attr(cm_id_priv, + qp_attr, qp_attr_mask); + break; + default: + ret = -EINVAL; + break; + } + return ret; +} +EXPORT_SYMBOL(iw_cm_init_qp_attr); + +static int __init iw_cm_init(void) +{ + iwcm_wq = create_singlethread_workqueue("iw_cm_wq"); + if (!iwcm_wq) + return -ENOMEM; + + return 0; +} + +static void __exit iw_cm_cleanup(void) +{ + destroy_workqueue(iwcm_wq); +} + +module_init(iw_cm_init); +module_exit(iw_cm_cleanup); diff --git a/drivers/infiniband/core/iwcm.h b/drivers/infiniband/core/iwcm.h new file mode 100644 index 00000000000..3f6cc82564c --- /dev/null +++ b/drivers/infiniband/core/iwcm.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef IWCM_H +#define IWCM_H + +enum iw_cm_state { + IW_CM_STATE_IDLE, /* unbound, inactive */ + IW_CM_STATE_LISTEN, /* listen waiting for connect */ + IW_CM_STATE_CONN_RECV, /* inbound waiting for user accept */ + IW_CM_STATE_CONN_SENT, /* outbound waiting for peer accept */ + IW_CM_STATE_ESTABLISHED, /* established */ + IW_CM_STATE_CLOSING, /* disconnect */ + IW_CM_STATE_DESTROYING /* object being deleted */ +}; + +struct iwcm_id_private { + struct iw_cm_id id; + enum iw_cm_state state; + unsigned long flags; + struct ib_qp *qp; + struct completion destroy_comp; + wait_queue_head_t connect_wait; + struct list_head work_list; + spinlock_t lock; + atomic_t refcount; + struct list_head work_free_list; +}; + +#define IWCM_F_CALLBACK_DESTROY 1 +#define IWCM_F_CONNECT_WAIT 2 + +#endif /* IWCM_H */ diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 1c3cfbbe6a9..082f03c158f 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1246,8 +1246,8 @@ static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class, int i; for (i = 0; i < MAX_MGMT_OUI; i++) - /* Is there matching OUI for this vendor class ? */ - if (!memcmp(vendor_class->oui[i], oui, 3)) + /* Is there matching OUI for this vendor class ? */ + if (!memcmp(vendor_class->oui[i], oui, 3)) return i; return -1; @@ -2237,7 +2237,7 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, &mad_agent_priv->send_list, agent_list) { if (mad_send_wr->status == IB_WC_SUCCESS) { - mad_send_wr->status = IB_WC_WR_FLUSH_ERR; + mad_send_wr->status = IB_WC_WR_FLUSH_ERR; mad_send_wr->refcount -= (mad_send_wr->timeout > 0); } } @@ -2528,10 +2528,10 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, } } sg_list.addr = dma_map_single(qp_info->port_priv-> - device->dma_device, + device->dma_device, &mad_priv->grh, sizeof *mad_priv - - sizeof mad_priv->header, + sizeof mad_priv->header, DMA_FROM_DEVICE); pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr); recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list; @@ -2606,7 +2606,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) struct ib_qp *qp; attr = kmalloc(sizeof *attr, GFP_KERNEL); - if (!attr) { + if (!attr) { printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n"); return -ENOMEM; } @@ -2876,7 +2876,10 @@ static void ib_mad_init_device(struct ib_device *device) { int start, end, i; - if (device->node_type == IB_NODE_SWITCH) { + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + + if (device->node_type == RDMA_NODE_IB_SWITCH) { start = 0; end = 0; } else { @@ -2923,7 +2926,7 @@ static void ib_mad_remove_device(struct ib_device *device) { int i, num_ports, cur_port; - if (device->node_type == IB_NODE_SWITCH) { + if (device->node_type == RDMA_NODE_IB_SWITCH) { num_ports = 1; cur_port = 0; } else { diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index d147f3bad2c..1da9adbccae 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -39,7 +39,6 @@ #include <linux/completion.h> #include <linux/pci.h> -#include <linux/kthread.h> #include <linux/workqueue.h> #include <rdma/ib_mad.h> #include <rdma/ib_smi.h> diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index ebcd5b18177..1ef79d015a1 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -33,8 +33,6 @@ * $Id: mad_rmpp.c 1921 2005-03-02 22:58:44Z sean.hefty $ */ -#include <linux/dma-mapping.h> - #include "mad_priv.h" #include "mad_rmpp.h" @@ -60,6 +58,7 @@ struct mad_rmpp_recv { int last_ack; int seg_num; int newwin; + int repwin; __be64 tid; u32 src_qp; @@ -170,6 +169,32 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent, return msg; } +static void ack_ds_ack(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *recv_wc) +{ + struct ib_mad_send_buf *msg; + struct ib_rmpp_mad *rmpp_mad; + int ret; + + msg = alloc_response_msg(&agent->agent, recv_wc); + if (IS_ERR(msg)) + return; + + rmpp_mad = msg->mad; + memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len); + + rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP; + ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); + rmpp_mad->rmpp_hdr.seg_num = 0; + rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(1); + + ret = ib_post_send_mad(msg, NULL); + if (ret) { + ib_destroy_ah(msg->ah); + ib_free_send_mad(msg); + } +} + void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc) { struct ib_rmpp_mad *rmpp_mad = mad_send_wc->send_buf->mad; @@ -271,6 +296,7 @@ create_rmpp_recv(struct ib_mad_agent_private *agent, rmpp_recv->newwin = 1; rmpp_recv->seg_num = 1; rmpp_recv->last_ack = 0; + rmpp_recv->repwin = 1; mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr; rmpp_recv->tid = mad_hdr->tid; @@ -365,7 +391,7 @@ static inline int window_size(struct ib_mad_agent_private *agent) static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list, int seg_num) { - struct ib_mad_recv_buf *seg_buf; + struct ib_mad_recv_buf *seg_buf; int cur_seg_num; list_for_each_entry_reverse(seg_buf, rmpp_list, list) { @@ -591,6 +617,16 @@ static inline void adjust_last_ack(struct ib_mad_send_wr_private *wr, break; } +static void process_ds_ack(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc, int newwin) +{ + struct mad_rmpp_recv *rmpp_recv; + + rmpp_recv = find_rmpp_recv(agent, mad_recv_wc); + if (rmpp_recv && rmpp_recv->state == RMPP_STATE_COMPLETE) + rmpp_recv->repwin = newwin; +} + static void process_rmpp_ack(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc) { @@ -616,8 +652,18 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, spin_lock_irqsave(&agent->lock, flags); mad_send_wr = ib_find_send_mad(agent, mad_recv_wc); - if (!mad_send_wr) - goto out; /* Unmatched ACK */ + if (!mad_send_wr) { + if (!seg_num) + process_ds_ack(agent, mad_recv_wc, newwin); + goto out; /* Unmatched or DS RMPP ACK */ + } + + if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) && + (mad_send_wr->timeout)) { + spin_unlock_irqrestore(&agent->lock, flags); + ack_ds_ack(agent, mad_recv_wc); + return; /* Repeated ACK for DS RMPP transaction */ + } if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) || (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) @@ -656,6 +702,9 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, if (mad_send_wr->refcount == 1) ib_reset_mad_timeout(mad_send_wr, mad_send_wr->send_buf.timeout_ms); + spin_unlock_irqrestore(&agent->lock, flags); + ack_ds_ack(agent, mad_recv_wc); + return; } else if (mad_send_wr->refcount == 1 && mad_send_wr->seg_num < mad_send_wr->newwin && mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) { @@ -772,6 +821,39 @@ out: return NULL; } +static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv; + struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad; + struct mad_rmpp_recv *rmpp_recv; + struct ib_ah_attr ah_attr; + unsigned long flags; + int newwin = 1; + + if (!(mad_hdr->method & IB_MGMT_METHOD_RESP)) + goto out; + + spin_lock_irqsave(&agent->lock, flags); + list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { + if (rmpp_recv->tid != mad_hdr->tid || + rmpp_recv->mgmt_class != mad_hdr->mgmt_class || + rmpp_recv->class_version != mad_hdr->class_version || + (rmpp_recv->method & IB_MGMT_METHOD_RESP)) + continue; + + if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr)) + continue; + + if (rmpp_recv->slid == ah_attr.dlid) { + newwin = rmpp_recv->repwin; + break; + } + } + spin_unlock_irqrestore(&agent->lock, flags); +out: + return newwin; +} + int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_rmpp_mad *rmpp_mad; @@ -787,7 +869,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) return IB_RMPP_RESULT_INTERNAL; } - mad_send_wr->newwin = 1; + mad_send_wr->newwin = init_newwin(mad_send_wr); /* We need to wait for the final ACK even if there isn't a response */ mad_send_wr->refcount += (mad_send_wr->timeout == 0); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index d6b84226bba..1706d3c7e95 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -75,6 +76,7 @@ struct ib_sa_device { struct ib_sa_query { void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *); void (*release)(struct ib_sa_query *); + struct ib_sa_client *client; struct ib_sa_port *port; struct ib_mad_send_buf *mad_buf; struct ib_sa_sm_ah *sm_ah; @@ -415,6 +417,31 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event } } +void ib_sa_register_client(struct ib_sa_client *client) +{ + atomic_set(&client->users, 1); + init_completion(&client->comp); +} +EXPORT_SYMBOL(ib_sa_register_client); + +static inline void ib_sa_client_get(struct ib_sa_client *client) +{ + atomic_inc(&client->users); +} + +static inline void ib_sa_client_put(struct ib_sa_client *client) +{ + if (atomic_dec_and_test(&client->users)) + complete(&client->comp); +} + +void ib_sa_unregister_client(struct ib_sa_client *client) +{ + ib_sa_client_put(client); + wait_for_completion(&client->comp); +} +EXPORT_SYMBOL(ib_sa_unregister_client); + /** * ib_sa_cancel_query - try to cancel an SA query * @id:ID of query to cancel @@ -557,6 +584,7 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) /** * ib_sa_path_rec_get - Start a Path get query + * @client:SA client * @device:device to send query on * @port_num: port number to send query on * @rec:Path Record to send in query @@ -579,7 +607,8 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) * error code. Otherwise it is a query ID that can be used to cancel * the query. */ -int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, +int ib_sa_path_rec_get(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, struct ib_sa_path_rec *rec, ib_sa_comp_mask comp_mask, int timeout_ms, gfp_t gfp_mask, @@ -614,8 +643,10 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, goto err1; } - query->callback = callback; - query->context = context; + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(mad, agent); @@ -639,6 +670,7 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, err2: *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); ib_free_send_mad(query->sa_query.mad_buf); err1: @@ -671,6 +703,7 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) /** * ib_sa_service_rec_query - Start Service Record operation + * @client:SA client * @device:device to send request on * @port_num: port number to send request on * @method:SA method - should be get, set, or delete @@ -695,7 +728,8 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) * error code. Otherwise it is a request ID that can be used to cancel * the query. */ -int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, +int ib_sa_service_rec_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, u8 method, struct ib_sa_service_rec *rec, ib_sa_comp_mask comp_mask, int timeout_ms, gfp_t gfp_mask, @@ -735,8 +769,10 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, goto err1; } - query->callback = callback; - query->context = context; + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(mad, agent); @@ -761,6 +797,7 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, err2: *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); ib_free_send_mad(query->sa_query.mad_buf); err1: @@ -791,7 +828,8 @@ static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query) kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query)); } -int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, +int ib_sa_mcmember_rec_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, u8 method, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, @@ -827,8 +865,10 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, goto err1; } - query->callback = callback; - query->context = context; + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(mad, agent); @@ -853,6 +893,7 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, err2: *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); ib_free_send_mad(query->sa_query.mad_buf); err1: @@ -887,8 +928,9 @@ static void send_handler(struct ib_mad_agent *agent, idr_remove(&query_idr, query->id); spin_unlock_irqrestore(&idr_lock, flags); - ib_free_send_mad(mad_send_wc->send_buf); + ib_free_send_mad(mad_send_wc->send_buf); kref_put(&query->sm_ah->ref, free_sm_ah); + ib_sa_client_put(query->client); query->release(query); } @@ -919,7 +961,10 @@ static void ib_sa_add_one(struct ib_device *device) struct ib_sa_device *sa_dev; int s, e, i; - if (device->node_type == IB_NODE_SWITCH) + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + + if (device->node_type == RDMA_NODE_IB_SWITCH) s = e = 0; else { s = 1; diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c index 35852e794e2..54b81e17ad5 100644 --- a/drivers/infiniband/core/smi.c +++ b/drivers/infiniband/core/smi.c @@ -64,7 +64,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp, /* C14-9:2 */ if (hop_ptr && hop_ptr < hop_cnt) { - if (node_type != IB_NODE_SWITCH) + if (node_type != RDMA_NODE_IB_SWITCH) return 0; /* smp->return_path set when received */ @@ -77,7 +77,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp, if (hop_ptr == hop_cnt) { /* smp->return_path set when received */ smp->hop_ptr++; - return (node_type == IB_NODE_SWITCH || + return (node_type == RDMA_NODE_IB_SWITCH || smp->dr_dlid == IB_LID_PERMISSIVE); } @@ -95,7 +95,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp, /* C14-13:2 */ if (2 <= hop_ptr && hop_ptr <= hop_cnt) { - if (node_type != IB_NODE_SWITCH) + if (node_type != RDMA_NODE_IB_SWITCH) return 0; smp->hop_ptr--; @@ -107,7 +107,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp, if (hop_ptr == 1) { smp->hop_ptr--; /* C14-13:3 -- SMPs destined for SM shouldn't be here */ - return (node_type == IB_NODE_SWITCH || + return (node_type == RDMA_NODE_IB_SWITCH || smp->dr_slid == IB_LID_PERMISSIVE); } @@ -142,7 +142,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp, /* C14-9:2 -- intermediate hop */ if (hop_ptr && hop_ptr < hop_cnt) { - if (node_type != IB_NODE_SWITCH) + if (node_type != RDMA_NODE_IB_SWITCH) return 0; smp->return_path[hop_ptr] = port_num; @@ -156,7 +156,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp, smp->return_path[hop_ptr] = port_num; /* smp->hop_ptr updated when sending */ - return (node_type == IB_NODE_SWITCH || + return (node_type == RDMA_NODE_IB_SWITCH || smp->dr_dlid == IB_LID_PERMISSIVE); } @@ -175,7 +175,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp, /* C14-13:2 */ if (2 <= hop_ptr && hop_ptr <= hop_cnt) { - if (node_type != IB_NODE_SWITCH) + if (node_type != RDMA_NODE_IB_SWITCH) return 0; /* smp->hop_ptr updated when sending */ @@ -190,7 +190,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp, return 1; } /* smp->hop_ptr updated when sending */ - return (node_type == IB_NODE_SWITCH); + return (node_type == RDMA_NODE_IB_SWITCH); } /* C14-13:4 -- hop_ptr = 0 -> give to SM */ diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 21f9282c1b2..709323c14c5 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -68,7 +68,7 @@ struct port_table_attribute { int index; }; -static inline int ibdev_is_alive(const struct ib_device *dev) +static inline int ibdev_is_alive(const struct ib_device *dev) { return dev->reg_state == IB_DEV_REGISTERED; } @@ -589,10 +589,11 @@ static ssize_t show_node_type(struct class_device *cdev, char *buf) return -ENODEV; switch (dev->node_type) { - case IB_NODE_CA: return sprintf(buf, "%d: CA\n", dev->node_type); - case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); - case IB_NODE_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); - default: return sprintf(buf, "%d: <unknown>\n", dev->node_type); + case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type); + case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type); + case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); + case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); + default: return sprintf(buf, "%d: <unknown>\n", dev->node_type); } } @@ -708,7 +709,7 @@ int ib_device_register_sysfs(struct ib_device *device) if (ret) goto err_put; - if (device->node_type == IB_NODE_SWITCH) { + if (device->node_type == RDMA_NODE_IB_SWITCH) { ret = add_port(device, 0); if (ret) goto err_put; diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index c1c6fda9452..ad4f4d5c292 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -309,9 +309,9 @@ static int ib_ucm_event_process(struct ib_cm_event *evt, info = evt->param.apr_rcvd.apr_info; break; case IB_CM_SIDR_REQ_RECEIVED: - uvt->resp.u.sidr_req_resp.pkey = + uvt->resp.u.sidr_req_resp.pkey = evt->param.sidr_req_rcvd.pkey; - uvt->resp.u.sidr_req_resp.port = + uvt->resp.u.sidr_req_resp.port = evt->param.sidr_req_rcvd.port; uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; break; @@ -1237,7 +1237,7 @@ static struct class ucm_class = { static ssize_t show_ibdev(struct class_device *class_dev, char *buf) { struct ib_ucm_device *dev; - + dev = container_of(class_dev, struct ib_ucm_device, class_dev); return sprintf(buf, "%s\n", dev->ib_dev->name); } @@ -1247,7 +1247,8 @@ static void ib_ucm_add_one(struct ib_device *device) { struct ib_ucm_device *ucm_dev; - if (!device->alloc_ucontext) + if (!device->alloc_ucontext || + rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) return; ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 1273f8807e8..807fbd6b841 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -1032,7 +1032,10 @@ static void ib_umad_add_one(struct ib_device *device) struct ib_umad_device *umad_dev; int s, e, i; - if (device->node_type == IB_NODE_SWITCH) + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + + if (device->node_type == RDMA_NODE_IB_SWITCH) s = e = 0; else { s = 1; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 30923eb68ec..b72c7f69ca9 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -155,7 +155,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id, } static struct ib_uobject *idr_read_uobj(struct idr *idr, int id, - struct ib_ucontext *context) + struct ib_ucontext *context, int nested) { struct ib_uobject *uobj; @@ -163,7 +163,10 @@ static struct ib_uobject *idr_read_uobj(struct idr *idr, int id, if (!uobj) return NULL; - down_read(&uobj->mutex); + if (nested) + down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING); + else + down_read(&uobj->mutex); if (!uobj->live) { put_uobj_read(uobj); return NULL; @@ -190,17 +193,18 @@ static struct ib_uobject *idr_write_uobj(struct idr *idr, int id, return uobj; } -static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context) +static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context, + int nested) { struct ib_uobject *uobj; - uobj = idr_read_uobj(idr, id, context); + uobj = idr_read_uobj(idr, id, context, nested); return uobj ? uobj->object : NULL; } static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context) { - return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context); + return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0); } static void put_pd_read(struct ib_pd *pd) @@ -208,9 +212,9 @@ static void put_pd_read(struct ib_pd *pd) put_uobj_read(pd->uobject); } -static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context) +static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested) { - return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context); + return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested); } static void put_cq_read(struct ib_cq *cq) @@ -220,7 +224,7 @@ static void put_cq_read(struct ib_cq *cq) static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context) { - return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context); + return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0); } static void put_ah_read(struct ib_ah *ah) @@ -230,7 +234,7 @@ static void put_ah_read(struct ib_ah *ah) static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) { - return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context); + return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0); } static void put_qp_read(struct ib_qp *qp) @@ -240,7 +244,7 @@ static void put_qp_read(struct ib_qp *qp) static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context) { - return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context); + return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0); } static void put_srq_read(struct ib_srq *srq) @@ -837,7 +841,6 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, err_copy: idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject); - err_free: ib_destroy_cq(cq); @@ -867,7 +870,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - cq = idr_read_cq(cmd.cq_handle, file->ucontext); + cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); if (!cq) return -EINVAL; @@ -875,11 +878,10 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, if (ret) goto out; - memset(&resp, 0, sizeof resp); resp.cqe = cq->cqe; if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) + &resp, sizeof resp.cqe)) ret = -EFAULT; out: @@ -894,7 +896,6 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, { struct ib_uverbs_poll_cq cmd; struct ib_uverbs_poll_cq_resp *resp; - struct ib_uobject *uobj; struct ib_cq *cq; struct ib_wc *wc; int ret = 0; @@ -915,16 +916,15 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, goto out_wc; } - uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); - if (!uobj) { + cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + if (!cq) { ret = -EINVAL; goto out; } - cq = uobj->object; resp->count = ib_poll_cq(cq, cmd.ne, wc); - put_uobj_read(uobj); + put_cq_read(cq); for (i = 0; i < resp->count; i++) { resp->wc[i].wr_id = wc[i].wr_id; @@ -959,21 +959,19 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_req_notify_cq cmd; - struct ib_uobject *uobj; struct ib_cq *cq; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); - if (!uobj) + cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + if (!cq) return -EINVAL; - cq = uobj->object; ib_req_notify_cq(cq, cmd.solicited_only ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); - put_uobj_read(uobj); + put_cq_read(cq); return in_len; } @@ -1064,9 +1062,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL; pd = idr_read_pd(cmd.pd_handle, file->ucontext); - scq = idr_read_cq(cmd.send_cq_handle, file->ucontext); + scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0); rcq = cmd.recv_cq_handle == cmd.send_cq_handle ? - scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext); + scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1); if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) { ret = -EINVAL; @@ -1274,6 +1272,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_modify_qp cmd; + struct ib_udata udata; struct ib_qp *qp; struct ib_qp_attr *attr; int ret; @@ -1281,6 +1280,9 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, + out_len); + attr = kmalloc(sizeof *attr, GFP_KERNEL); if (!attr) return -ENOMEM; @@ -1337,7 +1339,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; - ret = ib_modify_qp(qp, attr, cmd.attr_mask); + ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata); put_qp_read(qp); @@ -1674,7 +1676,6 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, break; } - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) ret = -EFAULT; @@ -1724,7 +1725,6 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, break; } - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) ret = -EFAULT; @@ -2055,6 +2055,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_modify_srq cmd; + struct ib_udata udata; struct ib_srq *srq; struct ib_srq_attr attr; int ret; @@ -2062,6 +2063,9 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, + out_len); + srq = idr_read_srq(cmd.srq_handle, file->ucontext); if (!srq) return -EINVAL; @@ -2069,7 +2073,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, attr.max_wr = cmd.max_wr; attr.srq_limit = cmd.srq_limit; - ret = ib_modify_srq(srq, &attr, cmd.attr_mask); + ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata); put_srq_read(srq); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 468999c3880..8b5dd3649bb 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -79,6 +79,23 @@ enum ib_rate mult_to_ib_rate(int mult) } EXPORT_SYMBOL(mult_to_ib_rate); +enum rdma_transport_type +rdma_node_get_transport(enum rdma_node_type node_type) +{ + switch (node_type) { + case RDMA_NODE_IB_CA: + case RDMA_NODE_IB_SWITCH: + case RDMA_NODE_IB_ROUTER: + return RDMA_TRANSPORT_IB; + case RDMA_NODE_RNIC: + return RDMA_TRANSPORT_IWARP; + default: + BUG(); + return 0; + } +} +EXPORT_SYMBOL(rdma_node_get_transport); + /* Protection domains */ struct ib_pd *ib_alloc_pd(struct ib_device *device) @@ -231,7 +248,7 @@ int ib_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask) { - return srq->device->modify_srq(srq, srq_attr, srq_attr_mask); + return srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL); } EXPORT_SYMBOL(ib_modify_srq); @@ -547,7 +564,7 @@ int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask) { - return qp->device->modify_qp(qp, qp_attr, qp_attr_mask); + return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL); } EXPORT_SYMBOL(ib_modify_qp); |