From cd80ec6f81db89d109187a673470c04af4c09a63 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 15 May 2008 15:28:55 -0700 Subject: IB/ipath: Fix printk format for ipath_sdma_status Commit f018c7e1 ("IB/ipath: Change ipath_devdata.ipath_sdma_status to be unsigned long") changed ipath_sdma_status to be unsigned long, but left a few debug messages that printed it out with a %016llx format, which generates the warnings drivers/infiniband/hw/ipath/ipath_sdma.c:348: warning: format '%016llx' expects type 'long long unsigned int', but argument 3 has type 'long unsigned int' drivers/infiniband/hw/ipath/ipath_sdma.c:618: warning: format '%016llx' expects type 'long long unsigned int', but argument 3 has type 'long unsigned int' Fix this by changing the format used to print out the value to %08lx (8 hex digits are now sufficient, because the highest bit used is 31). Warnings reported by Randy Dunlap . Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_sdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c index 3697449c1ba..0a8c1b8091a 100644 --- a/drivers/infiniband/hw/ipath/ipath_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_sdma.c @@ -345,7 +345,7 @@ resched: * state change */ if (jiffies > dd->ipath_sdma_abort_jiffies) { - ipath_dbg("looping with status 0x%016llx\n", + ipath_dbg("looping with status 0x%08lx\n", dd->ipath_sdma_status); dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ; } @@ -615,7 +615,7 @@ void ipath_restart_sdma(struct ipath_devdata *dd) } spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); if (!needed) { - ipath_dbg("invalid attempt to restart SDMA, status 0x%016llx\n", + ipath_dbg("invalid attempt to restart SDMA, status 0x%08lx\n", dd->ipath_sdma_status); goto bail; } -- cgit v1.2.3 From df3f0da8db6b5e7e8f0585221c8b1cd8ff806d35 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Thu, 15 May 2008 16:37:25 -0700 Subject: IB/ipath: Fix UC receive completion opcode for RDMA WRITE with immediate When I fixed the RC receive completion opcode in 2bfc8e9e ("IB/ipath: Return the correct opcode for RDMA WRITE with immediate"), I forgot to fix UC, which had the same problem for RDMA write with immediate returning the wrong opcode. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_uc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index 7fd18e83390..0596ec16fcb 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -407,12 +407,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, dev->n_pkt_drops++; goto done; } - /* XXX Need to free SGEs */ + wc.opcode = IB_WC_RECV; last_imm: ipath_copy_sge(&qp->r_sge, data, tlen); wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; - wc.opcode = IB_WC_RECV; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; wc.slid = qp->remote_ah_attr.dlid; @@ -514,6 +513,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, goto done; } wc.byte_len = qp->r_len; + wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; goto last_imm; case OP(RDMA_WRITE_LAST): -- cgit v1.2.3 From a3d8e1591dc90d359d444c759dfda2c6fc605251 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 16 May 2008 14:28:30 -0700 Subject: IB/mlx4: Fix uninitialized-var warning in mlx4_ib_post_send() drivers/infiniband/hw/mlx4/qp.c: In function 'mlx4_ib_post_send': drivers/infiniband/hw/mlx4/qp.c:1460: warning: 'seglen' may be used uninitialized in this function This is the dopey gcc-doesn't-know-that-foo(&var)-writes-to-var problem. Signed-off-by: Andrew Morton Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 8e02ecfec18..cec030e118d 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1457,7 +1457,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, unsigned ind; int uninitialized_var(stamp); int uninitialized_var(size); - unsigned seglen; + unsigned uninitialized_var(seglen); int i; spin_lock_irqsave(&qp->sq.lock, flags); -- cgit v1.2.3 From 21609ae3efa42f4118ce741f7e55d66d716cb17c Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 16 May 2008 14:58:40 -0700 Subject: RDMA/cxgb3: Fix uninitialized variable warning in iwch_post_send() drivers/infiniband/hw/cxgb3/iwch_qp.c: In function 'iwch_post_send': drivers/infiniband/hw/cxgb3/iwch_qp.c:232: warning: 't3_wr_flit_cnt' may be used uninitialized in this function This is what akpm describes as "the dopey gcc-doesn't-know-that-foo(&var)-writes-to-var problem." Signed-off-by: Roland Dreier Acked-by: Steve Wise --- drivers/infiniband/hw/cxgb3/iwch_qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 79dbe5beae5..99261379922 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -229,7 +229,7 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { int err = 0; - u8 t3_wr_flit_cnt; + u8 uninitialized_var(t3_wr_flit_cnt); enum t3_wr_opcode t3_wr_opcode = 0; enum t3_wr_flags t3_wr_flags; struct iwch_qp *qhp; -- cgit v1.2.3 From 12103dca52e79e23afe2fbcaf3d9e7fc9ceb6b18 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 16 May 2008 14:58:44 -0700 Subject: IB/mthca: Fix max_sge value returned by query_device The mthca driver returns the maximum number of scatter/gather entries returned by the firmware as the max_sge value when device properties are queried. However, the firmware also reports a limit on the maximum descriptor size allowed, and because mthca takes into account the worst case send request overhead when checking whether to allow a QP to be created, the largest number of scatter/gather entries that can be used with mthca may be limited by the maximum descriptor size rather than just by the actual s/g entry limit. This means that applications cannot actually create QPs with max_send_sge equal to the limit returned by ib_query_device(). Fix this by checking if the maximum descriptor size imposes a lower limit and if so returning that lower limit. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_main.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 9ebadd6e0cf..200cf13fc9b 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -45,6 +45,7 @@ #include "mthca_cmd.h" #include "mthca_profile.h" #include "mthca_memfree.h" +#include "mthca_wqe.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver"); @@ -200,7 +201,18 @@ static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim) mdev->limits.gid_table_len = dev_lim->max_gids; mdev->limits.pkey_table_len = dev_lim->max_pkeys; mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay; - mdev->limits.max_sg = dev_lim->max_sg; + /* + * Need to allow for worst case send WQE overhead and check + * whether max_desc_sz imposes a lower limit than max_sg; UD + * send has the biggest overhead. + */ + mdev->limits.max_sg = min_t(int, dev_lim->max_sg, + (dev_lim->max_desc_sz - + sizeof (struct mthca_next_seg) - + (mthca_is_memfree(mdev) ? + sizeof (struct mthca_arbel_ud_seg) : + sizeof (struct mthca_tavor_ud_seg))) / + sizeof (struct mthca_data_seg)); mdev->limits.max_wqes = dev_lim->max_qp_sz; mdev->limits.max_qp_init_rdma = dev_lim->max_requester_per_qp; mdev->limits.reserved_qps = dev_lim->reserved_qps; -- cgit v1.2.3 From 6c06aec2487f7568cf57471a20f422568f25d551 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 16 May 2008 17:55:12 -0700 Subject: IB: fix race in device_create There is a race from when a device is created with device_create() and then the drvdata is set with a call to dev_set_drvdata() in which a sysfs file could be open, yet the drvdata will be NULL, causing all sorts of bad things to happen. This patch fixes the problem by using the new function, device_create_drvdata(). Cc: Kay Sievers Reviewed-by: Roland Dreier Cc: Sean Hefty Cc: Hal Rosenstock Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/user_mad.c | 14 ++++++-------- drivers/infiniband/core/uverbs_main.c | 11 ++++++----- 2 files changed, 12 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 3aa2db54eae..840ede9ae96 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1005,8 +1005,9 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, if (cdev_add(port->cdev, base_dev + port->dev_num, 1)) goto err_cdev; - port->dev = device_create(umad_class, device->dma_device, - port->cdev->dev, "umad%d", port->dev_num); + port->dev = device_create_drvdata(umad_class, device->dma_device, + port->cdev->dev, port, + "umad%d", port->dev_num); if (IS_ERR(port->dev)) goto err_cdev; @@ -1024,15 +1025,12 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, if (cdev_add(port->sm_cdev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1)) goto err_sm_cdev; - port->sm_dev = device_create(umad_class, device->dma_device, - port->sm_cdev->dev, - "issm%d", port->dev_num); + port->sm_dev = device_create_drvdata(umad_class, device->dma_device, + port->sm_cdev->dev, port, + "issm%d", port->dev_num); if (IS_ERR(port->sm_dev)) goto err_sm_cdev; - dev_set_drvdata(port->dev, port); - dev_set_drvdata(port->sm_dev, port); - if (device_create_file(port->sm_dev, &dev_attr_ibdev)) goto err_sm_dev; if (device_create_file(port->sm_dev, &dev_attr_port)) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index cc1afa28c18..f806da184b5 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -755,14 +755,15 @@ static void ib_uverbs_add_one(struct ib_device *device) if (cdev_add(uverbs_dev->cdev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1)) goto err_cdev; - uverbs_dev->dev = device_create(uverbs_class, device->dma_device, - uverbs_dev->cdev->dev, - "uverbs%d", uverbs_dev->devnum); + uverbs_dev->dev = device_create_drvdata(uverbs_class, + device->dma_device, + uverbs_dev->cdev->dev, + uverbs_dev, + "uverbs%d", + uverbs_dev->devnum); if (IS_ERR(uverbs_dev->dev)) goto err_cdev; - dev_set_drvdata(uverbs_dev->dev, uverbs_dev); - if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev)) goto err_class; if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) -- cgit v1.2.3 From cd155c1c7c9e64df6afb5504d292fef7cb783a4f Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 20 May 2008 14:00:02 -0700 Subject: IB/mlx4: Fix creation of kernel QP with max number of send s/g entries When creating a kernel QP where the consumer asked for a send queue with lots of scatter/gater entries, set_kernel_sq_size() incorrectly returned an error if the send queue stride is larger than the hardware's maximum send work request descriptor size. This is not a problem; the only issue is to make sure that the actual descriptors used do not overflow the maximum descriptor size, so check this instead. Clamp the returned max_send_sge value to be no bigger than what query_device returns for the max_sge to avoid confusing hapless users, even if the hardware is capable of handling a few more s/g entries. This bug caused NFS/RDMA mounts to fail when the server adapter used the mlx4 driver. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index cec030e118d..a80df22deae 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -333,6 +333,9 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) + send_wqe_overhead(type, qp->flags); + if (s > dev->dev->caps.max_sq_desc_sz) + return -EINVAL; + /* * Hermon supports shrinking WQEs, such that a single work * request can include multiple units of 1 << wqe_shift. This @@ -372,9 +375,6 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); for (;;) { - if (1 << qp->sq.wqe_shift > dev->dev->caps.max_sq_desc_sz) - return -EINVAL; - qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift); /* @@ -395,7 +395,8 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, ++qp->sq.wqe_shift; } - qp->sq.max_gs = ((qp->sq_max_wqes_per_wr << qp->sq.wqe_shift) - + qp->sq.max_gs = (min(dev->dev->caps.max_sq_desc_sz, + (qp->sq_max_wqes_per_wr << qp->sq.wqe_shift)) - send_wqe_overhead(type, qp->flags)) / sizeof (struct mlx4_wqe_data_seg); @@ -411,7 +412,9 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, cap->max_send_wr = qp->sq.max_post = (qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr; - cap->max_send_sge = qp->sq.max_gs; + cap->max_send_sge = min(qp->sq.max_gs, + min(dev->dev->caps.max_sq_sg, + dev->dev->caps.max_rq_sg)); /* We don't support inline sends for kernel QPs (yet) */ cap->max_inline_data = 0; -- cgit v1.2.3 From e1d50dce5af77cb6d33555af70e2b8748dd84009 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 20 May 2008 15:41:09 -0700 Subject: IPoIB: Test for NULL broadcast object in ipiob_mcast_join_finish() We saw a kernel oops in our regression testing when a multicast "join finish" occurred just after the interface was -- this is . The test randomly causes the HCA physical port to go down then up. The cause of this is that ipoib_mcast_join_finish() processing happen just after ipoib_mcast_dev_flush() was invoked (in which case the broadcast pointer is NULL). This patch tests for and handles the case where priv->broadcast is NULL. Cc: Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index d00a2c174ae..3f663fb852c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -194,7 +194,13 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, /* Set the cached Q_Key before we attach if it's the broadcast group */ if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4, sizeof (union ib_gid))) { + spin_lock_irq(&priv->lock); + if (!priv->broadcast) { + spin_unlock_irq(&priv->lock); + return -EAGAIN; + } priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); + spin_unlock_irq(&priv->lock); priv->tx_wr.wr.ud.remote_qkey = priv->qkey; } -- cgit v1.2.3 From 5a4f2b675210718aceb4abf41617a3af31bba718 Mon Sep 17 00:00:00 2001 From: Dave Olson Date: Fri, 23 May 2008 10:52:59 -0700 Subject: IB/mad: Fix kernel crash when .process_mad() returns SUCCESS|CONSUMED If a low-level driver returns IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED, handle_outgoing_dr_smp() doesn't clean up properly. The fix is to kfree the local data and break, rather than falling through. This was observed with the ipath driver, but could happen with any driver. This fixes . Signed-off-by: Dave Olson Signed-off-by: Roland Dreier --- drivers/infiniband/core/mad.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index fbe16d5250a..1adf2efd3cb 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -747,7 +747,9 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, break; case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED: kmem_cache_free(ib_mad_cache, mad_priv); - break; + kfree(local); + ret = 1; + goto out; case IB_MAD_RESULT_SUCCESS: /* Treat like an incoming receive MAD */ port_priv = ib_get_mad_port(mad_agent_priv->agent.device, -- cgit v1.2.3 From e8ffef73c8dd2c2d00287829db87cdaf229d3859 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 26 May 2008 15:20:34 -0700 Subject: IB/ipath: Avoid test_bit() on u64 SDMA status value Gabriel C pointed out that when the x86 bitops are updated to operate on unsigned long, the code in sdma_abort_task() will produce warnings: drivers/infiniband/hw/ipath/ipath_sdma.c: In function 'sdma_abort_task': drivers/infiniband/hw/ipath/ipath_sdma.c:267: warning: passing argument 2 of 'constant_test_bit' from incompatible pointer type and so on, because it uses test_bit() to operation on a u64 value (returned by ipath_read_kref64() for a hardware register). Fix up these warnings by converting the test_bit() operations to &ing with appropriate symbolic defines of the bits within the hardware register. This has the benign side-effect of making the code more self-documenting as well. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_kernel.h | 5 +++++ drivers/infiniband/hw/ipath/ipath_sdma.c | 12 ++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 59a8b254b97..0bd8bcb184a 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -232,6 +232,11 @@ struct ipath_sdma_desc { #define IPATH_SDMA_TXREQ_S_ABORTED 2 #define IPATH_SDMA_TXREQ_S_SHUTDOWN 3 +#define IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG (1ull << 63) +#define IPATH_SDMA_STATUS_ABORT_IN_PROG (1ull << 62) +#define IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE (1ull << 61) +#define IPATH_SDMA_STATUS_SCB_EMPTY (1ull << 30) + /* max dwords in small buffer packet */ #define IPATH_SMALLBUF_DWORDS (dd->ipath_piosize2k >> 2) diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c index 0a8c1b8091a..eaba03273e4 100644 --- a/drivers/infiniband/hw/ipath/ipath_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_sdma.c @@ -263,14 +263,10 @@ static void sdma_abort_task(unsigned long opaque) hwstatus = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmastatus); - if (/* ScoreBoardDrainInProg */ - test_bit(63, &hwstatus) || - /* AbortInProg */ - test_bit(62, &hwstatus) || - /* InternalSDmaEnable */ - test_bit(61, &hwstatus) || - /* ScbEmpty */ - !test_bit(30, &hwstatus)) { + if ((hwstatus & (IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG | + IPATH_SDMA_STATUS_ABORT_IN_PROG | + IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE)) || + !(hwstatus & IPATH_SDMA_STATUS_SCB_EMPTY)) { if (dd->ipath_sdma_reset_wait > 0) { /* not done shutting down sdma */ --dd->ipath_sdma_reset_wait; -- cgit v1.2.3 From 03031f71c7e64aada1add057ccc4a8bc6a79924c Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Mon, 26 May 2008 15:22:17 -0700 Subject: IB/ipath: Fix device capability flags The driver supports a few features (RNR NAK, port active event, SRQ resize) that were not reported in the device capability flags. This patch fixes that. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_verbs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index e0ec540042b..7779165b2c2 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -1494,7 +1494,8 @@ static int ipath_query_device(struct ib_device *ibdev, props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | - IB_DEVICE_SYS_IMAGE_GUID; + IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | + IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; props->page_size_cap = PAGE_SIZE; props->vendor_id = dev->dd->ipath_vendorid; props->vendor_part_id = dev->dd->ipath_deviceid; -- cgit v1.2.3 From 088af1543c611f4200658250b6a4467b7eb496a6 Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Fri, 6 Jun 2008 11:21:33 -0700 Subject: IB/ehca: Reject send WRs only for RESET, INIT and RTR state Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_reqs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index bbe0436f4f7..f093b0033da 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -421,8 +421,10 @@ int ehca_post_send(struct ib_qp *qp, int ret = 0; unsigned long flags; - if (unlikely(my_qp->state != IB_QPS_RTS)) { - ehca_err(qp->device, "QP not in RTS state qpn=%x", qp->qp_num); + /* Reject WR if QP is in RESET, INIT or RTR state */ + if (unlikely(my_qp->state < IB_QPS_RTS)) { + ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", + my_qp->state, qp->qp_num); return -EINVAL; } -- cgit v1.2.3 From 27676a3e166b352928a8ef7b1c0e322f3c471a3e Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 6 Jun 2008 11:23:29 -0700 Subject: IB/ipath: Fix SM trap forwarding SM/SMA traps received by the ipath driver should be forwarded to the SM if it is running on the host. The ib_ipath driver was incorrectly replying with "bad method." Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_mad.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index 1ff46ae7dd9..5f9315d77a4 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -1492,6 +1492,10 @@ static int process_subn(struct ib_device *ibdev, int mad_flags, goto bail; } + case IB_MGMT_METHOD_TRAP: + case IB_MGMT_METHOD_REPORT: + case IB_MGMT_METHOD_REPORT_RESP: + case IB_MGMT_METHOD_TRAP_REPRESS: case IB_MGMT_METHOD_GET_RESP: /* * The ib_mad module will call us to process responses -- cgit v1.2.3 From 8079ffa0e18baaf2940e52e0c118eef420a473a4 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 6 Jun 2008 21:38:37 -0700 Subject: IB/umem: Avoid sign problems when demoting npages to integer On a 64-bit architecture, if ib_umem_get() is called with a size value that is so big that npages is negative when cast to int, then the length of the page list passed to get_user_pages(), namely min_t(int, npages, PAGE_SIZE / sizeof (struct page *)) will be negative, and get_user_pages() will immediately return 0 (at least since 900cf086, "Be more robust about bad arguments in get_user_pages()"). This leads to an infinite loop in ib_umem_get(), since the code boils down to: while (npages) { ret = get_user_pages(...); npages -= ret; } Fix this by taking the minimum as unsigned longs, so that the value of npages is never truncated. The impact of this bug isn't too severe, since the value of npages is checked against RLIMIT_MEMLOCK, so a process would need to have an astronomical limit or have CAP_IPC_LOCK to be able to trigger this, and such a process could already cause lots of mischief. But it does let buggy userspace code cause a kernel lock-up; for example I hit this with code that passes a negative value into a memory registartion function where it is promoted to a huge u64 value. Cc: Signed-off-by: Roland Dreier --- drivers/infiniband/core/umem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index fe78f7d2509..a1768dbb072 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -150,7 +150,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, ret = 0; while (npages) { ret = get_user_pages(current, current->mm, cur_base, - min_t(int, npages, + min_t(unsigned long, npages, PAGE_SIZE / sizeof (struct page *)), 1, !umem->writable, page_list, vma_list); -- cgit v1.2.3 From 4c0283fc561d79a4f94ab48ec37282e15273d1f8 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 9 Jun 2008 09:58:42 -0700 Subject: IB/core: Remove IB_DEVICE_SEND_W_INV capability flag In 2.6.26, we added some support for send with invalidate work requests, including a device capability flag to indicate whether a device supports such requests. However, the support was incomplete: the completion structure was not extended with a field for the key contained in incoming send with invalidate requests. Full support for memory management extensions (send with invalidate, local invalidate, fast register through a send queue, etc) is planned for 2.6.27. Since send with invalidate is not very useful by itself, just remove the IB_DEVICE_SEND_W_INV bit before the 2.6.26 final release; we will add an IB_DEVICE_MEM_MGT_EXTENSIONS bit in 2.6.27, which makes things simpler for applications, since they will not have quite as confusing an array of fine-grained bits to check. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/amso1100/c2_rnic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c index 9a054c6941a..b1441aeb60c 100644 --- a/drivers/infiniband/hw/amso1100/c2_rnic.c +++ b/drivers/infiniband/hw/amso1100/c2_rnic.c @@ -455,8 +455,7 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev) IB_DEVICE_CURR_QP_STATE_MOD | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_ZERO_STAG | - IB_DEVICE_MEM_WINDOW | - IB_DEVICE_SEND_W_INV); + IB_DEVICE_MEM_WINDOW); /* Allocate the qptr_array */ c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *)); -- cgit v1.2.3 From 24797a344293601f14f49e2d259c3ca447c4f802 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 10 Jun 2008 12:29:49 -0700 Subject: RDMA/nes: Fix off-by-one in nes_reg_user_mr() error path nes_reg_user_mr() should fail if page_count becomes >= 1024 * 512 rather than just testing for strict >, because page_count is essentially used as an index into an array with 1024 * 512 entries, so allowing the loop to continue with page_count == 1024 * 512 means that memory after the end of the array is corrupted. This leads to a crash triggerable by a userspace application that requests registration of a too-big region. Also get rid of the call to pci_free_consistent() here to avoid corrupting state with a double free, since the same memory will be freed in the code jumped to at reg_user_mr_err. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_verbs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 99b3c4ae86e..d617da9bd35 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -2456,10 +2456,8 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if ((page_count!=0)&&(page_count<<12)-(region->offset&(4096-1))>=region->length) goto enough_pages; if ((page_count&0x01FF) == 0) { - if (page_count>(1024*512)) { + if (page_count >= 1024 * 512) { ib_umem_release(region); - pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, - vpbl.pbl_pbase); nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); kfree(nesmr); -- cgit v1.2.3 From fb77bcef9f7be78e3e11543cb5abbcb1b1fac53e Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 18 Jun 2008 15:36:38 -0700 Subject: IB/uverbs: Fix check of is_closed flag check in ib_uverbs_async_handler() Commit 1ae5c187 ("IB/uverbs: Don't store struct file * for event files") changed the way that closed files are handled in the uverbs code. However, after the conversion, is_closed flag is checked incorrectly in ib_uverbs_async_handler(). As a result, no async events are ever passed to applications. Found by: Ronni Zimmerman Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index f806da184b5..caed42bf7ef 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -423,7 +423,7 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, unsigned long flags; spin_lock_irqsave(&file->async_file->lock, flags); - if (!file->async_file->is_closed) { + if (file->async_file->is_closed) { spin_unlock_irqrestore(&file->async_file->lock, flags); return; } -- cgit v1.2.3 From 87afd448b186c885d67a08b7417cd46253b6a9d6 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 23 Jun 2008 09:29:58 -0700 Subject: IB/mthca: Clear ICM pages before handing to FW Current memfree FW has a bug which in some cases, assumes that ICM pages passed to it are cleared. This patch uses __GFP_ZERO to allocate all ICM pages passed to the FW. Once firmware with a fix is released, we can make the workaround conditional on firmware version. This fixes the bug reported by Arthur Kepner here: http://lists.openfabrics.org/pipermail/general/2008-May/050026.html Cc: Signed-off-by: Eli Cohen [ Rewritten to be a one-liner using __GFP_ZERO instead of vmap()ing ICM memory and memset()ing it to 0. - Roland ] Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_memfree.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index b224079d4e1..d5862e5d99a 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -109,7 +109,11 @@ static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_m { struct page *page; - page = alloc_pages(gfp_mask, order); + /* + * Use __GFP_ZERO because buggy firmware assumes ICM pages are + * cleared, and subtle failures are seen if they aren't. + */ + page = alloc_pages(gfp_mask | __GFP_ZERO, order); if (!page) return -ENOMEM; -- cgit v1.2.3