From 5b673b71c8ca0fbdb99dc1b1434cfb554212d6ff Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Tue, 22 Jul 2008 14:18:07 -0700 Subject: IB/ehca: Filter PATH_MIG events if QP was never armed Certain firmware versions sometimes cause spurious PATH_MIG events to occur during QP creation. Filter these events by making sure PATH_MIG events are only handed down when they actually make sense (i.e. when the QP has been armed at least once). Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_classes.h | 1 + drivers/infiniband/hw/ehca/ehca_irq.c | 4 ++++ drivers/infiniband/hw/ehca/ehca_qp.c | 2 ++ 3 files changed, 7 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 1e9e99a1393..0b0618edd64 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -194,6 +194,7 @@ struct ehca_qp { u32 packet_count; atomic_t nr_events; /* events seen */ wait_queue_head_t wait_completion; + int mig_armed; }; #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 0792d930c48..99642a6e17c 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -178,6 +178,10 @@ static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp, { struct ib_event event; + /* PATH_MIG without the QP ever having been armed is false alarm */ + if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed) + return; + event.device = &shca->ib_device; event.event = event_type; diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 3f59587338e..ea13efddf17 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -1460,6 +1460,8 @@ static int internal_modify_qp(struct ib_qp *ibqp, goto modify_qp_exit2; } mqpcb->path_migration_state = attr->path_mig_state + 1; + if (attr->path_mig_state == IB_MIG_REARM) + my_qp->mig_armed = 1; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1); } -- cgit v1.2.3 From 593e4d4a05c8263a6dbd5452c21d47c5bdadd40c Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Tue, 22 Jul 2008 14:18:08 -0700 Subject: IB/ehca: Use default value for Local CA ACK Delay if FW returns 0 Some firmware versions report a Local CA ACK Delay of 0. In that case, return a more sensible default value of 12 (-> 16 msec) instead. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_hca.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index bc3b37d2070..46288220cfb 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -114,7 +114,9 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) } props->max_pkeys = 16; - props->local_ca_ack_delay = min_t(u8, rblock->local_ca_ack_delay, 255); + /* Some FW versions say 0 here; insert sensible value in that case */ + props->local_ca_ack_delay = rblock->local_ca_ack_delay ? + min_t(u8, rblock->local_ca_ack_delay, 255) : 12; props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp); props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp); props->max_mcast_grp = limit_uint(rblock->max_mcast_grp); -- cgit v1.2.3 From 1a867c33bb65f2921351a9bdd98548bb96f0ff8c Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 22 Jul 2008 14:18:10 -0700 Subject: IB/ehca: Release mutex in error path of alloc_small_queue_page() The pd->lock mutex is released on a successful return, so it should be released on an error return as well. The semantic patch that makes this change is as follows: (http://www.emn.fr/x-info/coccinelle/) // @@ expression l; @@ mutex_lock(l); ... when != mutex_unlock(l) when any when strict ( if (...) { ... when != mutex_unlock(l) + mutex_unlock(l); return ...; } | mutex_unlock(l); ) // Signed-off-by: Julia Lawall Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ipz_pt_fn.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c index 661f8db6270..c3a32846543 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -163,6 +163,7 @@ static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) out: ehca_err(pd->ib_pd.device, "failed to allocate small queue page"); + mutex_unlock(&pd->lock); return 0; } -- cgit v1.2.3 From 64b784b583061ebfe1d484dd1fdc5a26c6d4293f Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 22 Jul 2008 14:18:33 -0700 Subject: IB/sa_query: Check if sm_ah is NULL in ib_sa_remove_one() If update_sm_ah() fails, it leaves the port's sm_ah as NULL. Then if the device or module is removed, ib_sa_remove_one() will dereference a NULL pointer when it calls kref_put(). Fix this by testing if sm_ah is NULL before dropping the reference. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/core/sa_query.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 1341de793e5..7863a50d56f 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1064,7 +1064,8 @@ static void ib_sa_remove_one(struct ib_device *device) for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) { ib_unregister_mad_agent(sa_dev->port[i].agent); - kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); + if (sa_dev->port[i].sm_ah) + kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); } kfree(sa_dev); -- cgit v1.2.3 From 01b3fc8b15432f7931e40fe099839e1559fb0e09 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 22 Jul 2008 14:18:34 -0700 Subject: IPoIB: Include err code in trace message for ib_sa_path_rec_get() failures Print the return code of ib_sa_path_rec_get() if it fails to help debug errors. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 8be9ea0436e..f51201b17bf 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -548,7 +548,7 @@ static int path_rec_start(struct net_device *dev, path_rec_completion, path, &path->query); if (path->query_id < 0) { - ipoib_warn(priv, "ib_sa_path_rec_get failed\n"); + ipoib_warn(priv, "ib_sa_path_rec_get failed: %d\n", path->query_id); path->query = NULL; return path->query_id; } -- cgit v1.2.3 From 1ca8d15619f725e223c19137350b0336b9196193 Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Tue, 22 Jul 2008 14:18:34 -0700 Subject: RDMA/iwcm: Remove IB_ACCESS_LOCAL_WRITE from remote QP attributes Remove IB_ACCESS_LOCAL_WRITE from qp.qp_access_flags because this attribute is only used to set remote permissions. Signed-off-by: Dotan Barak Signed-off-by: Roland Dreier --- drivers/infiniband/core/iwcm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 81c9195b512..8f9509e1ebf 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -942,8 +942,7 @@ static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, case IW_CM_STATE_CONN_RECV: case IW_CM_STATE_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; - qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE| + qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE| IB_ACCESS_REMOTE_READ; ret = 0; break; -- cgit v1.2.3 From 47b374752aed1c029f995473c7c463ee3ae5fbaa Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 22 Jul 2008 14:19:39 -0700 Subject: IB/mlx4: Rename struct mlx4_lso_seg to mlx4_wqe_lso_seg Make the struct name consistent with other WQE segment struct types defined in . Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 89eb6cbe592..bda0859a5ac 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1395,7 +1395,7 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) dseg->addr = cpu_to_be64(sg->addr); } -static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr, +static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, struct mlx4_ib_qp *qp, unsigned *lso_seg_len) { unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); -- cgit v1.2.3 From e8bb4beb2b1f90d499134f2849727ed04c3bedc4 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 22 Jul 2008 14:20:05 -0700 Subject: IB/mthca: Keep free count for MTT buddy allocator MTT entries are allocated with a buddy allocator, which just keeps bitmaps for each level of the buddy table. However, all free space starts out at the highest order, and small allocations start scanning from the lowest order. When the lowest order tables have no free space, this can lead to scanning potentially millions of bits before finding a free entry at a higher order. We can avoid this by just keeping a count of how many free entries each order has, and skipping the bitmap scan when an order is completely empty. This provides a nice performance boost for a negligible increase in memory usage. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_dev.h | 1 + drivers/infiniband/hw/mthca/mthca_mr.c | 26 ++++++++++++++++++-------- 2 files changed, 19 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index ee4d073c889..252590116df 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -202,6 +202,7 @@ struct mthca_pd_table { struct mthca_buddy { unsigned long **bits; + int *num_free; int max_order; spinlock_t lock; }; diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 8489b1e81c0..882e6b73591 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -89,23 +89,26 @@ static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order) spin_lock(&buddy->lock); - for (o = order; o <= buddy->max_order; ++o) { - m = 1 << (buddy->max_order - o); - seg = find_first_bit(buddy->bits[o], m); - if (seg < m) - goto found; - } + for (o = order; o <= buddy->max_order; ++o) + if (buddy->num_free[o]) { + m = 1 << (buddy->max_order - o); + seg = find_first_bit(buddy->bits[o], m); + if (seg < m) + goto found; + } spin_unlock(&buddy->lock); return -1; found: clear_bit(seg, buddy->bits[o]); + --buddy->num_free[o]; while (o > order) { --o; seg <<= 1; set_bit(seg ^ 1, buddy->bits[o]); + ++buddy->num_free[o]; } spin_unlock(&buddy->lock); @@ -123,11 +126,13 @@ static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order) while (test_bit(seg ^ 1, buddy->bits[order])) { clear_bit(seg ^ 1, buddy->bits[order]); + --buddy->num_free[order]; seg >>= 1; ++order; } set_bit(seg, buddy->bits[order]); + ++buddy->num_free[order]; spin_unlock(&buddy->lock); } @@ -141,7 +146,9 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order) buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *), GFP_KERNEL); - if (!buddy->bits) + buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *), + GFP_KERNEL); + if (!buddy->bits || !buddy->num_free) goto err_out; for (i = 0; i <= buddy->max_order; ++i) { @@ -154,6 +161,7 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order) } set_bit(0, buddy->bits[buddy->max_order]); + buddy->num_free[buddy->max_order] = 1; return 0; @@ -161,9 +169,10 @@ err_out_free: for (i = 0; i <= buddy->max_order; ++i) kfree(buddy->bits[i]); +err_out: kfree(buddy->bits); + kfree(buddy->num_free); -err_out: return -ENOMEM; } @@ -175,6 +184,7 @@ static void mthca_buddy_cleanup(struct mthca_buddy *buddy) kfree(buddy->bits[i]); kfree(buddy->bits); + kfree(buddy->num_free); } static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order, -- cgit v1.2.3 From 95d04f0735b4fc837bff9aedcc3f3efb20ddc3d1 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 23 Jul 2008 08:12:26 -0700 Subject: IB/mlx4: Add support for memory management extensions and local DMA L_Key Add support for the following operations to mlx4 when device firmware supports them: - Send with invalidate and local invalidate send queue work requests; - Allocate/free fast register MRs; - Allocate/free fast register MR page lists; - Fast register MR send queue work requests; - Local DMA L_Key. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/cq.c | 12 ++++++ drivers/infiniband/hw/mlx4/main.c | 11 ++++++ drivers/infiniband/hw/mlx4/mlx4_ib.h | 15 ++++++++ drivers/infiniband/hw/mlx4/mr.c | 70 +++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx4/qp.c | 72 +++++++++++++++++++++++++++++++++--- 5 files changed, 175 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 299f20832ab..0b191a4842c 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -637,6 +637,7 @@ repoll: case MLX4_OPCODE_SEND_IMM: wc->wc_flags |= IB_WC_WITH_IMM; case MLX4_OPCODE_SEND: + case MLX4_OPCODE_SEND_INVAL: wc->opcode = IB_WC_SEND; break; case MLX4_OPCODE_RDMA_READ: @@ -657,6 +658,12 @@ repoll: case MLX4_OPCODE_LSO: wc->opcode = IB_WC_LSO; break; + case MLX4_OPCODE_FMR: + wc->opcode = IB_WC_FAST_REG_MR; + break; + case MLX4_OPCODE_LOCAL_INVAL: + wc->opcode = IB_WC_LOCAL_INV; + break; } } else { wc->byte_len = be32_to_cpu(cqe->byte_cnt); @@ -667,6 +674,11 @@ repoll: wc->wc_flags = IB_WC_WITH_IMM; wc->ex.imm_data = cqe->immed_rss_invalid; break; + case MLX4_RECV_OPCODE_SEND_INVAL: + wc->opcode = IB_WC_RECV; + wc->wc_flags = IB_WC_WITH_INVALIDATE; + wc->ex.invalidate_rkey = be32_to_cpu(cqe->immed_rss_invalid); + break; case MLX4_RECV_OPCODE_SEND: wc->opcode = IB_WC_RECV; wc->wc_flags = 0; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index bcf50648fa1..38d6907ab52 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -104,6 +104,12 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; if (dev->dev->caps.max_gso_sz) props->device_cap_flags |= IB_DEVICE_UD_TSO; + if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY) + props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; + if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) && + (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) && + (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR)) + props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; @@ -127,6 +133,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs; props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1; props->max_srq_sge = dev->dev->caps.max_srq_sge; + props->max_fast_reg_page_list_len = PAGE_SIZE / sizeof (u64); props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? IB_ATOMIC_HCA : IB_ATOMIC_NONE; @@ -565,6 +572,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX); ibdev->ib_dev.owner = THIS_MODULE; ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; + ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey; ibdev->ib_dev.phys_port_cnt = dev->caps.num_ports; ibdev->ib_dev.num_comp_vectors = 1; ibdev->ib_dev.dma_device = &dev->pdev->dev; @@ -627,6 +635,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; + ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr; + ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list; + ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list; ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; ibdev->ib_dev.process_mad = mlx4_ib_process_mad; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index c4cf5b69eef..d26a91317d4 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -83,6 +83,11 @@ struct mlx4_ib_mr { struct ib_umem *umem; }; +struct mlx4_ib_fast_reg_page_list { + struct ib_fast_reg_page_list ibfrpl; + dma_addr_t map; +}; + struct mlx4_ib_fmr { struct ib_fmr ibfmr; struct mlx4_fmr mfmr; @@ -199,6 +204,11 @@ static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr) return container_of(ibmr, struct mlx4_ib_mr, ibmr); } +static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) +{ + return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl); +} + static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) { return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr); @@ -239,6 +249,11 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int mlx4_ib_dereg_mr(struct ib_mr *mr); +struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, + int max_page_list_len); +struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, + int page_list_len); +void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 68e92485fc7..db2086faa4e 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -183,6 +183,76 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr) return 0; } +struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, + int max_page_list_len) +{ + struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct mlx4_ib_mr *mr; + int err; + + mr = kmalloc(sizeof *mr, GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0, + max_page_list_len, 0, &mr->mmr); + if (err) + goto err_free; + + err = mlx4_mr_enable(dev->dev, &mr->mmr); + if (err) + goto err_mr; + + return &mr->ibmr; + +err_mr: + mlx4_mr_free(dev->dev, &mr->mmr); + +err_free: + kfree(mr); + return ERR_PTR(err); +} + +struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, + int page_list_len) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + struct mlx4_ib_fast_reg_page_list *mfrpl; + int size = page_list_len * sizeof (u64); + + if (size > PAGE_SIZE) + return ERR_PTR(-EINVAL); + + mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL); + if (!mfrpl) + return ERR_PTR(-ENOMEM); + + mfrpl->ibfrpl.page_list = dma_alloc_coherent(&dev->dev->pdev->dev, + size, &mfrpl->map, + GFP_KERNEL); + if (!mfrpl->ibfrpl.page_list) + goto err_free; + + WARN_ON(mfrpl->map & 0x3f); + + return &mfrpl->ibfrpl; + +err_free: + kfree(mfrpl); + return ERR_PTR(-ENOMEM); +} + +void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) +{ + struct mlx4_ib_dev *dev = to_mdev(page_list->device); + struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); + int size = page_list->max_page_list_len * sizeof (u64); + + dma_free_coherent(&dev->dev->pdev->dev, size, page_list->page_list, + mfrpl->map); + kfree(mfrpl); +} + struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, struct ib_fmr_attr *fmr_attr) { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index bda0859a5ac..02a99bc4442 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -78,6 +78,9 @@ static const __be32 mlx4_ib_opcode[] = { [IB_WR_RDMA_READ] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ), [IB_WR_ATOMIC_CMP_AND_SWP] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), [IB_WR_ATOMIC_FETCH_AND_ADD] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), + [IB_WR_SEND_WITH_INV] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_INVAL), + [IB_WR_LOCAL_INV] = __constant_cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), + [IB_WR_FAST_REG_MR] = __constant_cpu_to_be32(MLX4_OPCODE_FMR), }; static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) @@ -976,6 +979,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn); context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); + /* Set "fast registration enabled" for all kernel QPs */ + if (!qp->ibqp.uobject) + context->params1 |= cpu_to_be32(1 << 11); + if (attr_mask & IB_QP_RNR_RETRY) { context->params1 |= cpu_to_be32(attr->rnr_retry << 13); optpar |= MLX4_QP_OPTPAR_RNR_RETRY; @@ -1322,6 +1329,38 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq return cur + nreq >= wq->max_post; } +static __be32 convert_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_PERM_ATOMIC) : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_WRITE) : 0) | + (acc & IB_ACCESS_REMOTE_READ ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_READ) : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) | + cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ); +} + +static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr) +{ + struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list); + + fseg->flags = convert_access(wr->wr.fast_reg.access_flags); + fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey); + fseg->buf_list = cpu_to_be64(mfrpl->map); + fseg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); + fseg->reg_len = cpu_to_be64(wr->wr.fast_reg.length); + fseg->offset = 0; /* XXX -- is this just for ZBVA? */ + fseg->page_size = cpu_to_be32(wr->wr.fast_reg.page_shift); + fseg->reserved[0] = 0; + fseg->reserved[1] = 0; +} + +static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) +{ + iseg->flags = 0; + iseg->mem_key = cpu_to_be32(rkey); + iseg->guest_id = 0; + iseg->pa = 0; +} + static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, u64 remote_addr, u32 rkey) { @@ -1423,6 +1462,21 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, return 0; } +static __be32 send_ieth(struct ib_send_wr *wr) +{ + switch (wr->opcode) { + case IB_WR_SEND_WITH_IMM: + case IB_WR_RDMA_WRITE_WITH_IMM: + return wr->ex.imm_data; + + case IB_WR_SEND_WITH_INV: + return cpu_to_be32(wr->ex.invalidate_rkey); + + default: + return 0; + } +} + int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -1469,11 +1523,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) | qp->sq_signal_bits; - if (wr->opcode == IB_WR_SEND_WITH_IMM || - wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) - ctrl->imm = wr->ex.imm_data; - else - ctrl->imm = 0; + ctrl->imm = send_ieth(wr); wqe += sizeof *ctrl; size = sizeof *ctrl / 16; @@ -1505,6 +1555,18 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += sizeof (struct mlx4_wqe_raddr_seg) / 16; break; + case IB_WR_LOCAL_INV: + set_local_inv_seg(wqe, wr->ex.invalidate_rkey); + wqe += sizeof (struct mlx4_wqe_local_inval_seg); + size += sizeof (struct mlx4_wqe_local_inval_seg) / 16; + break; + + case IB_WR_FAST_REG_MR: + set_fmr_seg(wqe, wr); + wqe += sizeof (struct mlx4_wqe_fmr_seg); + size += sizeof (struct mlx4_wqe_fmr_seg) / 16; + break; + default: /* No extra segments required for sends */ break; -- cgit v1.2.3