From f7c6a7b5d59980b076abbf2ceeb8735591290285 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sun, 4 Mar 2007 16:15:11 -0800 Subject: IB/uverbs: Export ib_umem_get()/ib_umem_release() to modules Export ib_umem_get()/ib_umem_release() and put low-level drivers in control of when to call ib_umem_get() to pin and DMA map userspace, rather than always calling it in ib_uverbs_reg_mr() before calling the low-level driver's reg_user_mr method. Also move these functions to be in the ib_core module instead of ib_uverbs, so that driver modules using them do not depend on ib_uverbs. This has a number of advantages: - It is better design from the standpoint of making generic code a library that can be used or overridden by device-specific code as the details of specific devices dictate. - Drivers that do not need to pin userspace memory regions do not need to take the performance hit of calling ib_mem_get(). For example, although I have not tried to implement it in this patch, the ipath driver should be able to avoid pinning memory and just use copy_{to,from}_user() to access userspace memory regions. - Buffers that need special mapping treatment can be identified by the low-level driver. For example, it may be possible to solve some Altix-specific memory ordering issues with mthca CQs in userspace by mapping CQ buffers with extra flags. - Drivers that need to pin and DMA map userspace memory for things other than memory regions can use ib_umem_get() directly, instead of hacks using extra parameters to their reg_phys_mr method. For example, the mlx4 driver that is pending being merged needs to pin and DMA map QP and CQ buffers, but it does not need to create a memory key for these buffers. So the cleanest solution is for mlx4 to call ib_umem_get() in the create_qp and create_cq methods. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_classes.h | 1 + drivers/infiniband/hw/ehca/ehca_iverbs.h | 3 +- drivers/infiniband/hw/ehca/ehca_mrmw.c | 69 +++++++++++++++++-------------- 3 files changed, 40 insertions(+), 33 deletions(-) (limited to 'drivers/infiniband/hw/ehca') diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 10fb8fbafa0..f64d42b0867 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -176,6 +176,7 @@ struct ehca_mr { struct ib_mr ib_mr; /* must always be first in ehca_mr */ struct ib_fmr ib_fmr; /* must always be first in ehca_mr */ } ib; + struct ib_umem *umem; spinlock_t mrlock; enum ehca_mr_flag flags; diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index e14b029332c..37e7fe0908c 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -78,8 +78,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, int num_phys_buf, int mr_access_flags, u64 *iova_start); -struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, - struct ib_umem *region, +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int mr_access_flags, struct ib_udata *udata); int ehca_rereg_phys_mr(struct ib_mr *mr, diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index d22ab563633..84c5bb49856 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -39,6 +39,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include + #include #include "ehca_iverbs.h" @@ -238,10 +240,8 @@ reg_phys_mr_exit0: /*----------------------------------------------------------------------*/ -struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, - struct ib_umem *region, - int mr_access_flags, - struct ib_udata *udata) +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, + int mr_access_flags, struct ib_udata *udata) { struct ib_mr *ib_mr; struct ehca_mr *e_mr; @@ -257,11 +257,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, ehca_gen_err("bad pd=%p", pd); return ERR_PTR(-EFAULT); } - if (!region) { - ehca_err(pd->device, "bad input values: region=%p", region); - ib_mr = ERR_PTR(-EINVAL); - goto reg_user_mr_exit0; - } + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && @@ -275,17 +271,10 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, ib_mr = ERR_PTR(-EINVAL); goto reg_user_mr_exit0; } - if (region->page_size != PAGE_SIZE) { - ehca_err(pd->device, "page size not supported, " - "region->page_size=%x", region->page_size); - ib_mr = ERR_PTR(-EINVAL); - goto reg_user_mr_exit0; - } - if ((region->length == 0) || - ((region->virt_base + region->length) < region->virt_base)) { + if (length == 0 || virt + length < virt) { ehca_err(pd->device, "bad input values: length=%lx " - "virt_base=%lx", region->length, region->virt_base); + "virt_base=%lx", length, virt); ib_mr = ERR_PTR(-EINVAL); goto reg_user_mr_exit0; } @@ -297,40 +286,55 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, goto reg_user_mr_exit0; } + e_mr->umem = ib_umem_get(pd->uobject->context, start, length, + mr_access_flags); + if (IS_ERR(e_mr->umem)) { + ib_mr = (void *) e_mr->umem; + goto reg_user_mr_exit1; + } + + if (e_mr->umem->page_size != PAGE_SIZE) { + ehca_err(pd->device, "page size not supported, " + "e_mr->umem->page_size=%x", e_mr->umem->page_size); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit2; + } + /* determine number of MR pages */ - num_pages_mr = (((region->virt_base % PAGE_SIZE) + region->length + - PAGE_SIZE - 1) / PAGE_SIZE); - num_pages_4k = (((region->virt_base % EHCA_PAGESIZE) + region->length + - EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); + num_pages_mr = (((virt % PAGE_SIZE) + length + PAGE_SIZE - 1) / + PAGE_SIZE); + num_pages_4k = (((virt % EHCA_PAGESIZE) + length + EHCA_PAGESIZE - 1) / + EHCA_PAGESIZE); /* register MR on HCA */ pginfo.type = EHCA_MR_PGI_USER; pginfo.num_pages = num_pages_mr; pginfo.num_4k = num_pages_4k; - pginfo.region = region; - pginfo.next_4k = region->offset / EHCA_PAGESIZE; + pginfo.region = e_mr->umem; + pginfo.next_4k = e_mr->umem->offset / EHCA_PAGESIZE; pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk, - (®ion->chunk_list), + (&e_mr->umem->chunk_list), list); - ret = ehca_reg_mr(shca, e_mr, (u64*)region->virt_base, - region->length, mr_access_flags, e_pd, &pginfo, - &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey); + ret = ehca_reg_mr(shca, e_mr, (u64*) virt, length, mr_access_flags, e_pd, + &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey); if (ret) { ib_mr = ERR_PTR(ret); - goto reg_user_mr_exit1; + goto reg_user_mr_exit2; } /* successful registration of all pages */ return &e_mr->ib.ib_mr; +reg_user_mr_exit2: + ib_umem_release(e_mr->umem); reg_user_mr_exit1: ehca_mr_delete(e_mr); reg_user_mr_exit0: if (IS_ERR(ib_mr)) - ehca_err(pd->device, "rc=%lx pd=%p region=%p mr_access_flags=%x" + ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x" " udata=%p", - PTR_ERR(ib_mr), pd, region, mr_access_flags, udata); + PTR_ERR(ib_mr), pd, mr_access_flags, udata); return ib_mr; } /* end ehca_reg_user_mr() */ @@ -596,6 +600,9 @@ int ehca_dereg_mr(struct ib_mr *mr) goto dereg_mr_exit0; } + if (e_mr->umem) + ib_umem_release(e_mr->umem); + /* successful deregistration */ ehca_mr_delete(e_mr); -- cgit v1.2.3