From 00f7ec36c9324928e4cd23f02e6d8550f30c32ca Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 14 Jul 2008 23:48:45 -0700 Subject: RDMA/core: Add memory management extensions support This patch adds support for the IB "base memory management extension" (BMME) and the equivalent iWARP operations (which the iWARP verbs mandates all devices must implement). The new operations are: - Allocate an ib_mr for use in fast register work requests. - Allocate/free a physical buffer lists for use in fast register work requests. This allows device drivers to allocate this memory as needed for use in posting send requests (eg via dma_alloc_coherent). - New send queue work requests: * send with remote invalidate * fast register memory region * local invalidate memory region * RDMA read with invalidate local memory region (iWARP only) Consumer interface details: - A new device capability flag IB_DEVICE_MEM_MGT_EXTENSIONS is added to indicate device support for these features. - New send work request opcodes IB_WR_FAST_REG_MR, IB_WR_LOCAL_INV, IB_WR_RDMA_READ_WITH_INV are added. - A new consumer API function, ib_alloc_mr() is added to allocate fast register memory regions. - New consumer API functions, ib_alloc_fast_reg_page_list() and ib_free_fast_reg_page_list() are added to allocate and free device-specific memory for fast registration page lists. - A new consumer API function, ib_update_fast_reg_key(), is added to allow the key portion of the R_Key and L_Key of a fast registration MR to be updated. Consumers call this if desired before posting a IB_WR_FAST_REG_MR work request. Consumers can use this as follows: - MR is allocated with ib_alloc_mr(). - Page list memory is allocated with ib_alloc_fast_reg_page_list(). - MR R_Key/L_Key "key" field is updated with ib_update_fast_reg_key(). - MR made VALID and bound to a specific page list via ib_post_send(IB_WR_FAST_REG_MR) - MR made INVALID via ib_post_send(IB_WR_LOCAL_INV), ib_post_send(IB_WR_RDMA_READ_WITH_INV) or an incoming send with invalidate operation. - MR is deallocated with ib_dereg_mr() - page lists dealloced via ib_free_fast_reg_page_list(). Applications can allocate a fast register MR once, and then can repeatedly bind the MR to different physical block lists (PBLs) via posting work requests to a send queue (SQ). For each outstanding MR-to-PBL binding in the SQ pipe, a fast_reg_page_list needs to be allocated (the fast_reg_page_list is owned by the low-level driver from the consumer posting a work request until the request completes). Thus pipelining can be achieved while still allowing device-specific page_list processing. The 32-bit fast register memory key/STag is composed of a 24-bit index and an 8-bit key. The application can change the key each time it fast registers thus allowing more control over the peer's use of the key/STag (ie it can effectively be changed each time the rkey is rebound to a page list). Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_reqs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw/ehca') diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index f093b0033da..b799b271021 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -681,7 +681,7 @@ poll_cq_one_read_cqe: wc->dlid_path_bits = cqe->dlid; wc->src_qp = cqe->remote_qp_number; wc->wc_flags = cqe->w_completion_flags; - wc->imm_data = cpu_to_be32(cqe->immediate_data); + wc->ex.imm_data = cpu_to_be32(cqe->immediate_data); wc->sl = cqe->service_level; poll_cq_one_exit0: -- cgit v1.2.3 From 3e255eac561672cbc92844b9f16cae9304c2a783 Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Mon, 14 Jul 2008 23:48:47 -0700 Subject: IB/ehca: Reject receive work requests if QP is in RESET state Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_reqs.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw/ehca') diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index b799b271021..dd9bc68f1c7 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -544,8 +544,16 @@ int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr) { - return internal_post_recv(container_of(qp, struct ehca_qp, ib_qp), - qp->device, recv_wr, bad_recv_wr); + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + + /* Reject WR if QP is in RESET state */ + if (unlikely(my_qp->state == IB_QPS_RESET)) { + ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", + my_qp->state, qp->qp_num); + return -EINVAL; + } + + return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr); } int ehca_post_srq_recv(struct ib_srq *srq, -- cgit v1.2.3 From 6f7bc01a7382641c61ec036d68ff3a9140b48a1c Mon Sep 17 00:00:00 2001 From: Stefan Roscher Date: Mon, 14 Jul 2008 23:48:47 -0700 Subject: IB/ehca: In case of lost interrupts, trigger EOI to reenable interrupts During corner case testing, we noticed that some versions of ehca do not properly transition to interrupt done in special load situations. This can be resolved by periodically triggering EOI through H_EOI, if EQEs are pending. Signed-off-by: Stefan Roscher Acked-by: Benjamin Herrenschmidt Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_irq.c | 9 +++++++-- drivers/infiniband/hw/ehca/hcp_if.c | 10 ++++++++++ drivers/infiniband/hw/ehca/hcp_if.h | 1 + 3 files changed, 18 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw/ehca') diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index ce1ab0571be..0792d930c48 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -531,7 +531,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) { struct ehca_eq *eq = &shca->eq; struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; - u64 eqe_value; + u64 eqe_value, ret; unsigned long flags; int eqe_cnt, i; int eq_empty = 0; @@ -583,8 +583,13 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) ehca_dbg(&shca->ib_device, "No eqe found for irq event"); goto unlock_irq_spinlock; - } else if (!is_irq) + } else if (!is_irq) { + ret = hipz_h_eoi(eq->ist); + if (ret != H_SUCCESS) + ehca_err(&shca->ib_device, + "bad return code EOI -rc = %ld\n", ret); ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt); + } if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE)) ehca_dbg(&shca->ib_device, "too many eqes for one irq event"); /* enable irq for new packets */ diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index 5245e13c3a3..415d3a465de 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -933,3 +933,13 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, r_cb, 0, 0, 0, 0); } + +u64 hipz_h_eoi(int irq) +{ + unsigned long xirr; + + iosync(); + xirr = (0xffULL << 24) | irq; + + return plpar_hcall_norets(H_EOI, xirr); +} diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h index 60ce02b7066..2c3c6e0ea5c 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.h +++ b/drivers/infiniband/hw/ehca/hcp_if.h @@ -260,5 +260,6 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, const u64 ressource_handle, void *rblock, unsigned long *byte_count); +u64 hipz_h_eoi(int irq); #endif /* __HCP_IF_H__ */ -- cgit v1.2.3 From 038919f29682b00ea95506e959210fc72d1aaf64 Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Mon, 14 Jul 2008 23:48:49 -0700 Subject: IB/ehca: Make device table externally visible This gives ehca an autogenerated modalias and therefore enables automatic loading. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband/hw/ehca') diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 482103eb6ea..598844d2edc 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -923,6 +923,7 @@ static struct of_device_id ehca_device_table[] = }, {}, }; +MODULE_DEVICE_TABLE(of, ehca_device_table); static struct of_platform_driver ehca_driver = { .name = "ehca", -- cgit v1.2.3