From dd5bdff83b19d9174126e0398b47117c3a80e22d Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 22 Jul 2008 14:14:22 -0700 Subject: RDMA/cma: Add RDMA_CM_EVENT_ADDR_CHANGE event Add an RDMA_CM_EVENT_ADDR_CHANGE event can be used by rdma-cm consumers that wish to have their RDMA sessions always use the same links (eg ) as the IP stack does. In the current code, this does not happen when bonding is used and fail-over happened but the IB link used by an already existing session is operating fine. Use the netevent notification for sensing that a change has happened in the IP stack, then scan the rdma-cm ID list to see if there is an ID that is "misaligned" with respect to the IP stack, and deliver RDMA_CM_EVENT_ADDR_CHANGE for this ID. The consumer can act on the event or just ignore it. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 92 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index ae11d5cc74d..79792c92e6f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -168,6 +168,12 @@ struct cma_work { struct rdma_cm_event event; }; +struct cma_ndev_work { + struct work_struct work; + struct rdma_id_private *id; + struct rdma_cm_event event; +}; + union cma_ip_addr { struct in6_addr ip6; struct { @@ -1598,6 +1604,30 @@ out: kfree(work); } +static void cma_ndev_work_handler(struct work_struct *_work) +{ + struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work); + struct rdma_id_private *id_priv = work->id; + int destroy = 0; + + mutex_lock(&id_priv->handler_mutex); + if (id_priv->state == CMA_DESTROYING || + id_priv->state == CMA_DEVICE_REMOVAL) + goto out; + + if (id_priv->id.event_handler(&id_priv->id, &work->event)) { + cma_exch(id_priv, CMA_DESTROYING); + destroy = 1; + } + +out: + mutex_unlock(&id_priv->handler_mutex); + cma_deref_id(id_priv); + if (destroy) + rdma_destroy_id(&id_priv->id); + kfree(work); +} + static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) { struct rdma_route *route = &id_priv->id.route; @@ -2723,6 +2753,65 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) } EXPORT_SYMBOL(rdma_leave_multicast); +static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv) +{ + struct rdma_dev_addr *dev_addr; + struct cma_ndev_work *work; + + dev_addr = &id_priv->id.route.addr.dev_addr; + + if ((dev_addr->src_dev == ndev) && + memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { + printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n", + ndev->name, &id_priv->id); + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + INIT_WORK(&work->work, cma_ndev_work_handler); + work->id = id_priv; + work->event.event = RDMA_CM_EVENT_ADDR_CHANGE; + atomic_inc(&id_priv->refcount); + queue_work(cma_wq, &work->work); + } + + return 0; +} + +static int cma_netdev_callback(struct notifier_block *self, unsigned long event, + void *ctx) +{ + struct net_device *ndev = (struct net_device *)ctx; + struct cma_device *cma_dev; + struct rdma_id_private *id_priv; + int ret = NOTIFY_DONE; + + if (dev_net(ndev) != &init_net) + return NOTIFY_DONE; + + if (event != NETDEV_BONDING_FAILOVER) + return NOTIFY_DONE; + + if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING)) + return NOTIFY_DONE; + + mutex_lock(&lock); + list_for_each_entry(cma_dev, &dev_list, list) + list_for_each_entry(id_priv, &cma_dev->id_list, list) { + ret = cma_netdev_change(ndev, id_priv); + if (ret) + goto out; + } + +out: + mutex_unlock(&lock); + return ret; +} + +static struct notifier_block cma_nb = { + .notifier_call = cma_netdev_callback +}; + static void cma_add_one(struct ib_device *device) { struct cma_device *cma_dev; @@ -2831,6 +2920,7 @@ static int cma_init(void) ib_sa_register_client(&sa_client); rdma_addr_register_client(&addr_client); + register_netdevice_notifier(&cma_nb); ret = ib_register_client(&cma_client); if (ret) @@ -2838,6 +2928,7 @@ static int cma_init(void) return 0; err: + unregister_netdevice_notifier(&cma_nb); rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); destroy_workqueue(cma_wq); @@ -2847,6 +2938,7 @@ err: static void cma_cleanup(void) { ib_unregister_client(&cma_client); + unregister_netdevice_notifier(&cma_nb); rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); destroy_workqueue(cma_wq); -- cgit v1.2.3 From 38ca83a588662f0af684ba2567dd910a564268ab Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Tue, 22 Jul 2008 14:14:23 -0700 Subject: RDMA/cma: Add RDMA_CM_EVENT_TIMEWAIT_EXIT event Consumers that want to re-use their QPs in new connections need to know when the QP has exited the timewait state. Report the timewait event through the rdma_cm. Signed-off-by: Amir Vadai Acked-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 79792c92e6f..e980ff3335d 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -920,7 +920,10 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) struct rdma_cm_event event; int ret = 0; - if (cma_disable_callback(id_priv, CMA_CONNECT)) + if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && + cma_disable_callback(id_priv, CMA_CONNECT)) || + (ib_event->event == IB_CM_TIMEWAIT_EXIT && + cma_disable_callback(id_priv, CMA_DISCONNECT))) return 0; memset(&event, 0, sizeof event); @@ -956,6 +959,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) event.event = RDMA_CM_EVENT_DISCONNECTED; break; case IB_CM_TIMEWAIT_EXIT: + event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT; + break; case IB_CM_MRA_RECEIVED: /* ignore event */ goto out; -- cgit v1.2.3 From 64b784b583061ebfe1d484dd1fdc5a26c6d4293f Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 22 Jul 2008 14:18:33 -0700 Subject: IB/sa_query: Check if sm_ah is NULL in ib_sa_remove_one() If update_sm_ah() fails, it leaves the port's sm_ah as NULL. Then if the device or module is removed, ib_sa_remove_one() will dereference a NULL pointer when it calls kref_put(). Fix this by testing if sm_ah is NULL before dropping the reference. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/core/sa_query.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 1341de793e5..7863a50d56f 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1064,7 +1064,8 @@ static void ib_sa_remove_one(struct ib_device *device) for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) { ib_unregister_mad_agent(sa_dev->port[i].agent); - kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); + if (sa_dev->port[i].sm_ah) + kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); } kfree(sa_dev); -- cgit v1.2.3 From 1ca8d15619f725e223c19137350b0336b9196193 Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Tue, 22 Jul 2008 14:18:34 -0700 Subject: RDMA/iwcm: Remove IB_ACCESS_LOCAL_WRITE from remote QP attributes Remove IB_ACCESS_LOCAL_WRITE from qp.qp_access_flags because this attribute is only used to set remote permissions. Signed-off-by: Dotan Barak Signed-off-by: Roland Dreier --- drivers/infiniband/core/iwcm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 81c9195b512..8f9509e1ebf 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -942,8 +942,7 @@ static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, case IW_CM_STATE_CONN_RECV: case IW_CM_STATE_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; - qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE| + qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE| IB_ACCESS_REMOTE_READ; ret = 0; break; -- cgit v1.2.3