diff options
Diffstat (limited to 'drivers/infiniband')
28 files changed, 4055 insertions, 860 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 3cc3ff0cccb..79c8e2dd9c3 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -7,6 +7,16 @@ config INFINIBAND any protocols you wish to use as well as drivers for your InfiniBand hardware. +config INFINIBAND_USER_VERBS + tristate "InfiniBand userspace verbs support" + depends on INFINIBAND + ---help--- + Userspace InfiniBand verbs support. This is the kernel side + of userspace verbs, which allows userspace processes to + directly access InfiniBand hardware for fast-path + operations. You will also need libibverbs and a hardware + driver library from <http://www.openib.org>. + source "drivers/infiniband/hw/mthca/Kconfig" source "drivers/infiniband/ulp/ipoib/Kconfig" diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index d2dbfb52c0a..e1a7cf3e863 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -1,6 +1,7 @@ EXTRA_CFLAGS += -Idrivers/infiniband/include -obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o ib_umad.o +obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o ib_umad.o +obj-$(CONFIG_INFINIBAND_USER_VERBS) += ib_uverbs.o ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ device.o fmr_pool.o cache.o @@ -10,3 +11,5 @@ ib_mad-y := mad.o smi.o agent.o ib_sa-y := sa_query.o ib_umad-y := user_mad.o + +ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c index 5f15feffeae..eb5ff54c10d 100644 --- a/drivers/infiniband/core/packer.c +++ b/drivers/infiniband/core/packer.c @@ -96,7 +96,7 @@ void ib_pack(const struct ib_field *desc, else val = 0; - mask = cpu_to_be64(((1ull << desc[i].size_bits) - 1) << shift); + mask = cpu_to_be64((~0ull >> (64 - desc[i].size_bits)) << shift); addr = (__be64 *) ((__be32 *) buf + desc[i].offset_words); *addr = (*addr & ~mask) | (cpu_to_be64(val) & mask); } else { @@ -176,7 +176,7 @@ void ib_unpack(const struct ib_field *desc, __be64 *addr; shift = 64 - desc[i].offset_bits - desc[i].size_bits; - mask = ((1ull << desc[i].size_bits) - 1) << shift; + mask = (~0ull >> (64 - desc[i].size_bits)) << shift; addr = (__be64 *) buf + desc[i].offset_words; val = (be64_to_cpup(addr) & mask) >> shift; value_write(desc[i].struct_offset_bytes, diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 276e1a53010..5a08e81fa82 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -507,7 +507,13 @@ retry: spin_unlock_irqrestore(&idr_lock, flags); } - return ret; + /* + * It's not safe to dereference query any more, because the + * send may already have completed and freed the query in + * another context. So use wr.wr_id, which has a copy of the + * query's id. + */ + return ret ? ret : wr.wr_id; } static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, @@ -598,14 +604,15 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, rec, query->sa_query.mad->data); *sa_query = &query->sa_query; + ret = send_mad(&query->sa_query, timeout_ms); - if (ret) { + if (ret < 0) { *sa_query = NULL; kfree(query->sa_query.mad); kfree(query); } - return ret ? ret : query->sa_query.id; + return ret; } EXPORT_SYMBOL(ib_sa_path_rec_get); @@ -674,14 +681,15 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, rec, query->sa_query.mad->data); *sa_query = &query->sa_query; + ret = send_mad(&query->sa_query, timeout_ms); - if (ret) { + if (ret < 0) { *sa_query = NULL; kfree(query->sa_query.mad); kfree(query); } - return ret ? ret : query->sa_query.id; + return ret; } EXPORT_SYMBOL(ib_sa_mcmember_rec_query); diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h new file mode 100644 index 00000000000..57347f1e82c --- /dev/null +++ b/drivers/infiniband/core/uverbs.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: uverbs.h 2559 2005-06-06 19:43:16Z roland $ + */ + +#ifndef UVERBS_H +#define UVERBS_H + +/* Include device.h and fs.h until cdev.h is self-sufficient */ +#include <linux/fs.h> +#include <linux/device.h> +#include <linux/cdev.h> +#include <linux/kref.h> +#include <linux/idr.h> + +#include <ib_verbs.h> +#include <ib_user_verbs.h> + +struct ib_uverbs_device { + int devnum; + struct cdev dev; + struct class_device class_dev; + struct ib_device *ib_dev; + int num_comp; +}; + +struct ib_uverbs_event_file { + struct kref ref; + struct ib_uverbs_file *uverbs_file; + spinlock_t lock; + int fd; + int is_async; + wait_queue_head_t poll_wait; + struct list_head event_list; +}; + +struct ib_uverbs_file { + struct kref ref; + struct ib_uverbs_device *device; + struct ib_ucontext *ucontext; + struct ib_event_handler event_handler; + struct ib_uverbs_event_file async_file; + struct ib_uverbs_event_file comp_file[1]; +}; + +struct ib_uverbs_async_event { + struct ib_uverbs_async_event_desc desc; + struct list_head list; +}; + +struct ib_uverbs_comp_event { + struct ib_uverbs_comp_event_desc desc; + struct list_head list; +}; + +struct ib_uobject_mr { + struct ib_uobject uobj; + struct page *page_list; + struct scatterlist *sg_list; +}; + +extern struct semaphore ib_uverbs_idr_mutex; +extern struct idr ib_uverbs_pd_idr; +extern struct idr ib_uverbs_mr_idr; +extern struct idr ib_uverbs_mw_idr; +extern struct idr ib_uverbs_ah_idr; +extern struct idr ib_uverbs_cq_idr; +extern struct idr ib_uverbs_qp_idr; + +void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); +void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); +void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); + +int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, + void *addr, size_t size, int write); +void ib_umem_release(struct ib_device *dev, struct ib_umem *umem); +void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem); + +#define IB_UVERBS_DECLARE_CMD(name) \ + ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ + const char __user *buf, int in_len, \ + int out_len) + +IB_UVERBS_DECLARE_CMD(query_params); +IB_UVERBS_DECLARE_CMD(get_context); +IB_UVERBS_DECLARE_CMD(query_device); +IB_UVERBS_DECLARE_CMD(query_port); +IB_UVERBS_DECLARE_CMD(query_gid); +IB_UVERBS_DECLARE_CMD(query_pkey); +IB_UVERBS_DECLARE_CMD(alloc_pd); +IB_UVERBS_DECLARE_CMD(dealloc_pd); +IB_UVERBS_DECLARE_CMD(reg_mr); +IB_UVERBS_DECLARE_CMD(dereg_mr); +IB_UVERBS_DECLARE_CMD(create_cq); +IB_UVERBS_DECLARE_CMD(destroy_cq); +IB_UVERBS_DECLARE_CMD(create_qp); +IB_UVERBS_DECLARE_CMD(modify_qp); +IB_UVERBS_DECLARE_CMD(destroy_qp); +IB_UVERBS_DECLARE_CMD(attach_mcast); +IB_UVERBS_DECLARE_CMD(detach_mcast); + +#endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c new file mode 100644 index 00000000000..5f2bbcda4c7 --- /dev/null +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -0,0 +1,1006 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $ + */ + +#include <asm/uaccess.h> + +#include "uverbs.h" + +#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ + do { \ + (udata)->inbuf = (void __user *) (ibuf); \ + (udata)->outbuf = (void __user *) (obuf); \ + (udata)->inlen = (ilen); \ + (udata)->outlen = (olen); \ + } while (0) + +ssize_t ib_uverbs_query_params(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_params cmd; + struct ib_uverbs_query_params_resp resp; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + memset(&resp, 0, sizeof resp); + + resp.num_cq_events = file->device->num_comp; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_get_context cmd; + struct ib_uverbs_get_context_resp resp; + struct ib_udata udata; + struct ib_device *ibdev = file->device->ib_dev; + int i; + int ret = in_len; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + file->ucontext = ibdev->alloc_ucontext(ibdev, &udata); + if (IS_ERR(file->ucontext)) { + ret = PTR_ERR(file->ucontext); + file->ucontext = NULL; + return ret; + } + + file->ucontext->device = ibdev; + INIT_LIST_HEAD(&file->ucontext->pd_list); + INIT_LIST_HEAD(&file->ucontext->mr_list); + INIT_LIST_HEAD(&file->ucontext->mw_list); + INIT_LIST_HEAD(&file->ucontext->cq_list); + INIT_LIST_HEAD(&file->ucontext->qp_list); + INIT_LIST_HEAD(&file->ucontext->srq_list); + INIT_LIST_HEAD(&file->ucontext->ah_list); + spin_lock_init(&file->ucontext->lock); + + resp.async_fd = file->async_file.fd; + for (i = 0; i < file->device->num_comp; ++i) + if (copy_to_user((void __user *) (unsigned long) cmd.cq_fd_tab + + i * sizeof (__u32), + &file->comp_file[i].fd, sizeof (__u32))) + goto err; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + goto err; + + return in_len; + +err: + ibdev->dealloc_ucontext(file->ucontext); + file->ucontext = NULL; + + return -EFAULT; +} + +ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_device cmd; + struct ib_uverbs_query_device_resp resp; + struct ib_device_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + ret = ib_query_device(file->device->ib_dev, &attr); + if (ret) + return ret; + + memset(&resp, 0, sizeof resp); + + resp.fw_ver = attr.fw_ver; + resp.node_guid = attr.node_guid; + resp.sys_image_guid = attr.sys_image_guid; + resp.max_mr_size = attr.max_mr_size; + resp.page_size_cap = attr.page_size_cap; + resp.vendor_id = attr.vendor_id; + resp.vendor_part_id = attr.vendor_part_id; + resp.hw_ver = attr.hw_ver; + resp.max_qp = attr.max_qp; + resp.max_qp_wr = attr.max_qp_wr; + resp.device_cap_flags = attr.device_cap_flags; + resp.max_sge = attr.max_sge; + resp.max_sge_rd = attr.max_sge_rd; + resp.max_cq = attr.max_cq; + resp.max_cqe = attr.max_cqe; + resp.max_mr = attr.max_mr; + resp.max_pd = attr.max_pd; + resp.max_qp_rd_atom = attr.max_qp_rd_atom; + resp.max_ee_rd_atom = attr.max_ee_rd_atom; + resp.max_res_rd_atom = attr.max_res_rd_atom; + resp.max_qp_init_rd_atom = attr.max_qp_init_rd_atom; + resp.max_ee_init_rd_atom = attr.max_ee_init_rd_atom; + resp.atomic_cap = attr.atomic_cap; + resp.max_ee = attr.max_ee; + resp.max_rdd = attr.max_rdd; + resp.max_mw = attr.max_mw; + resp.max_raw_ipv6_qp = attr.max_raw_ipv6_qp; + resp.max_raw_ethy_qp = attr.max_raw_ethy_qp; + resp.max_mcast_grp = attr.max_mcast_grp; + resp.max_mcast_qp_attach = attr.max_mcast_qp_attach; + resp.max_total_mcast_qp_attach = attr.max_total_mcast_qp_attach; + resp.max_ah = attr.max_ah; + resp.max_fmr = attr.max_fmr; + resp.max_map_per_fmr = attr.max_map_per_fmr; + resp.max_srq = attr.max_srq; + resp.max_srq_wr = attr.max_srq_wr; + resp.max_srq_sge = attr.max_srq_sge; + resp.max_pkeys = attr.max_pkeys; + resp.local_ca_ack_delay = attr.local_ca_ack_delay; + resp.phys_port_cnt = file->device->ib_dev->phys_port_cnt; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_port cmd; + struct ib_uverbs_query_port_resp resp; + struct ib_port_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + ret = ib_query_port(file->device->ib_dev, cmd.port_num, &attr); + if (ret) + return ret; + + memset(&resp, 0, sizeof resp); + + resp.state = attr.state; + resp.max_mtu = attr.max_mtu; + resp.active_mtu = attr.active_mtu; + resp.gid_tbl_len = attr.gid_tbl_len; + resp.port_cap_flags = attr.port_cap_flags; + resp.max_msg_sz = attr.max_msg_sz; + resp.bad_pkey_cntr = attr.bad_pkey_cntr; + resp.qkey_viol_cntr = attr.qkey_viol_cntr; + resp.pkey_tbl_len = attr.pkey_tbl_len; + resp.lid = attr.lid; + resp.sm_lid = attr.sm_lid; + resp.lmc = attr.lmc; + resp.max_vl_num = attr.max_vl_num; + resp.sm_sl = attr.sm_sl; + resp.subnet_timeout = attr.subnet_timeout; + resp.init_type_reply = attr.init_type_reply; + resp.active_width = attr.active_width; + resp.active_speed = attr.active_speed; + resp.phys_state = attr.phys_state; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +ssize_t ib_uverbs_query_gid(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_gid cmd; + struct ib_uverbs_query_gid_resp resp; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + memset(&resp, 0, sizeof resp); + + ret = ib_query_gid(file->device->ib_dev, cmd.port_num, cmd.index, + (union ib_gid *) resp.gid); + if (ret) + return ret; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +ssize_t ib_uverbs_query_pkey(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_pkey cmd; + struct ib_uverbs_query_pkey_resp resp; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + memset(&resp, 0, sizeof resp); + + ret = ib_query_pkey(file->device->ib_dev, cmd.port_num, cmd.index, + &resp.pkey); + if (ret) + return ret; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_alloc_pd cmd; + struct ib_uverbs_alloc_pd_resp resp; + struct ib_udata udata; + struct ib_uobject *uobj; + struct ib_pd *pd; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + uobj->context = file->ucontext; + + pd = file->device->ib_dev->alloc_pd(file->device->ib_dev, + file->ucontext, &udata); + if (IS_ERR(pd)) { + ret = PTR_ERR(pd); + goto err; + } + + pd->device = file->device->ib_dev; + pd->uobject = uobj; + atomic_set(&pd->usecnt, 0); + +retry: + if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_pd; + } + + down(&ib_uverbs_idr_mutex); + ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id); + up(&ib_uverbs_idr_mutex); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_pd; + + spin_lock_irq(&file->ucontext->lock); + list_add_tail(&uobj->list, &file->ucontext->pd_list); + spin_unlock_irq(&file->ucontext->lock); + + memset(&resp, 0, sizeof resp); + resp.pd_handle = uobj->id; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_list; + } + + return in_len; + +err_list: + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + + down(&ib_uverbs_idr_mutex); + idr_remove(&ib_uverbs_pd_idr, uobj->id); + up(&ib_uverbs_idr_mutex); + +err_pd: + ib_dealloc_pd(pd); + +err: + kfree(uobj); + return ret; +} + +ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_dealloc_pd cmd; + struct ib_pd *pd; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); + if (!pd || pd->uobject->context != file->ucontext) + goto out; + + uobj = pd->uobject; + + ret = ib_dealloc_pd(pd); + if (ret) + goto out; + + idr_remove(&ib_uverbs_pd_idr, cmd.pd_handle); + + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + + kfree(uobj); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_reg_mr cmd; + struct ib_uverbs_reg_mr_resp resp; + struct ib_udata udata; + struct ib_umem_object *obj; + struct ib_pd *pd; + struct ib_mr *mr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) + return -EINVAL; + + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) + return -ENOMEM; + + obj->uobject.context = file->ucontext; + + /* + * We ask for writable memory if any access flags other than + * "remote read" are set. "Local write" and "remote write" + * obviously require write access. "Remote atomic" can do + * things like fetch and add, which will modify memory, and + * "MW bind" can change permissions by binding a window. + */ + ret = ib_umem_get(file->device->ib_dev, &obj->umem, + (void *) (unsigned long) cmd.start, cmd.length, + !!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ)); + if (ret) + goto err_free; + + obj->umem.virt_base = cmd.hca_va; + + down(&ib_uverbs_idr_mutex); + + pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); + if (!pd || pd->uobject->context != file->ucontext) { + ret = -EINVAL; + goto err_up; + } + + if (!pd->device->reg_user_mr) { + ret = -ENOSYS; + goto err_up; + } + + mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata); + if (IS_ERR(mr)) { + ret = PTR_ERR(mr); + goto err_up; + } + + mr->device = pd->device; + mr->pd = pd; + mr->uobject = &obj->uobject; + atomic_inc(&pd->usecnt); + atomic_set(&mr->usecnt, 0); + + memset(&resp, 0, sizeof resp); + resp.lkey = mr->lkey; + resp.rkey = mr->rkey; + +retry: + if (!idr_pre_get(&ib_uverbs_mr_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_unreg; + } + + ret = idr_get_new(&ib_uverbs_mr_idr, mr, &obj->uobject.id); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_unreg; + + resp.mr_handle = obj->uobject.id; + + spin_lock_irq(&file->ucontext->lock); + list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); + spin_unlock_irq(&file->ucontext->lock); + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_list; + } + + up(&ib_uverbs_idr_mutex); + + return in_len; + +err_list: + spin_lock_irq(&file->ucontext->lock); + list_del(&obj->uobject.list); + spin_unlock_irq(&file->ucontext->lock); + +err_unreg: + ib_dereg_mr(mr); + +err_up: + up(&ib_uverbs_idr_mutex); + + ib_umem_release(file->device->ib_dev, &obj->umem); + +err_free: + kfree(obj); + return ret; +} + +ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_dereg_mr cmd; + struct ib_mr *mr; + struct ib_umem_object *memobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + mr = idr_find(&ib_uverbs_mr_idr, cmd.mr_handle); + if (!mr || mr->uobject->context != file->ucontext) + goto out; + + memobj = container_of(mr->uobject, struct ib_umem_object, uobject); + + ret = ib_dereg_mr(mr); + if (ret) + goto out; + + idr_remove(&ib_uverbs_mr_idr, cmd.mr_handle); + + spin_lock_irq(&file->ucontext->lock); + list_del(&memobj->uobject.list); + spin_unlock_irq(&file->ucontext->lock); + + ib_umem_release(file->device->ib_dev, &memobj->umem); + kfree(memobj); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_cq cmd; + struct ib_uverbs_create_cq_resp resp; + struct ib_udata udata; + struct ib_uobject *uobj; + struct ib_cq *cq; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + if (cmd.event_handler >= file->device->num_comp) + return -EINVAL; + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + uobj->user_handle = cmd.user_handle; + uobj->context = file->ucontext; + + cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe, + file->ucontext, &udata); + if (IS_ERR(cq)) { + ret = PTR_ERR(cq); + goto err; + } + + cq->device = file->device->ib_dev; + cq->uobject = uobj; + cq->comp_handler = ib_uverbs_comp_handler; + cq->event_handler = ib_uverbs_cq_event_handler; + cq->cq_context = file; + atomic_set(&cq->usecnt, 0); + +retry: + if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_cq; + } + + down(&ib_uverbs_idr_mutex); + ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->id); + up(&ib_uverbs_idr_mutex); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_cq; + + spin_lock_irq(&file->ucontext->lock); + list_add_tail(&uobj->list, &file->ucontext->cq_list); + spin_unlock_irq(&file->ucontext->lock); + + memset(&resp, 0, sizeof resp); + resp.cq_handle = uobj->id; + resp.cqe = cq->cqe; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_list; + } + + return in_len; + +err_list: + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + + down(&ib_uverbs_idr_mutex); + idr_remove(&ib_uverbs_cq_idr, uobj->id); + up(&ib_uverbs_idr_mutex); + +err_cq: + ib_destroy_cq(cq); + +err: + kfree(uobj); + return ret; +} + +ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_destroy_cq cmd; + struct ib_cq *cq; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); + if (!cq || cq->uobject->context != file->ucontext) + goto out; + + uobj = cq->uobject; + + ret = ib_destroy_cq(cq); + if (ret) + goto out; + + idr_remove(&ib_uverbs_cq_idr, cmd.cq_handle); + + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + + kfree(uobj); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_qp cmd; + struct ib_uverbs_create_qp_resp resp; + struct ib_udata udata; + struct ib_uobject *uobj; + struct ib_pd *pd; + struct ib_cq *scq, *rcq; + struct ib_qp *qp; + struct ib_qp_init_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + down(&ib_uverbs_idr_mutex); + + pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); + scq = idr_find(&ib_uverbs_cq_idr, cmd.send_cq_handle); + rcq = idr_find(&ib_uverbs_cq_idr, cmd.recv_cq_handle); + + if (!pd || pd->uobject->context != file->ucontext || + !scq || scq->uobject->context != file->ucontext || + !rcq || rcq->uobject->context != file->ucontext) { + ret = -EINVAL; + goto err_up; + } + + attr.event_handler = ib_uverbs_qp_event_handler; + attr.qp_context = file; + attr.send_cq = scq; + attr.recv_cq = rcq; + attr.srq = NULL; + attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; + attr.qp_type = cmd.qp_type; + + attr.cap.max_send_wr = cmd.max_send_wr; + attr.cap.max_recv_wr = cmd.max_recv_wr; + attr.cap.max_send_sge = cmd.max_send_sge; + attr.cap.max_recv_sge = cmd.max_recv_sge; + attr.cap.max_inline_data = cmd.max_inline_data; + + uobj->user_handle = cmd.user_handle; + uobj->context = file->ucontext; + + qp = pd->device->create_qp(pd, &attr, &udata); + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); + goto err_up; + } + + qp->device = pd->device; + qp->pd = pd; + qp->send_cq = attr.send_cq; + qp->recv_cq = attr.recv_cq; + qp->srq = attr.srq; + qp->uobject = uobj; + qp->event_handler = attr.event_handler; + qp->qp_context = attr.qp_context; + qp->qp_type = attr.qp_type; + atomic_inc(&pd->usecnt); + atomic_inc(&attr.send_cq->usecnt); + atomic_inc(&attr.recv_cq->usecnt); + if (attr.srq) + atomic_inc(&attr.srq->usecnt); + + memset(&resp, 0, sizeof resp); + resp.qpn = qp->qp_num; + +retry: + if (!idr_pre_get(&ib_uverbs_qp_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_destroy; + } + + ret = idr_get_new(&ib_uverbs_qp_idr, qp, &uobj->id); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_destroy; + + resp.qp_handle = uobj->id; + + spin_lock_irq(&file->ucontext->lock); + list_add_tail(&uobj->list, &file->ucontext->qp_list); + spin_unlock_irq(&file->ucontext->lock); + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_list; + } + + up(&ib_uverbs_idr_mutex); + + return in_len; + +err_list: + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + +err_destroy: + ib_destroy_qp(qp); + +err_up: + up(&ib_uverbs_idr_mutex); + + kfree(uobj); + return ret; +} + +ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_modify_qp cmd; + struct ib_qp *qp; + struct ib_qp_attr *attr; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + attr = kmalloc(sizeof *attr, GFP_KERNEL); + if (!attr) + return -ENOMEM; + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (!qp || qp->uobject->context != file->ucontext) { + ret = -EINVAL; + goto out; + } + + attr->qp_state = cmd.qp_state; + attr->cur_qp_state = cmd.cur_qp_state; + attr->path_mtu = cmd.path_mtu; + attr->path_mig_state = cmd.path_mig_state; + attr->qkey = cmd.qkey; + attr->rq_psn = cmd.rq_psn; + attr->sq_psn = cmd.sq_psn; + attr->dest_qp_num = cmd.dest_qp_num; + attr->qp_access_flags = cmd.qp_access_flags; + attr->pkey_index = cmd.pkey_index; + attr->alt_pkey_index = cmd.pkey_index; + attr->en_sqd_async_notify = cmd.en_sqd_async_notify; + attr->max_rd_atomic = cmd.max_rd_atomic; + attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; + attr->min_rnr_timer = cmd.min_rnr_timer; + attr->port_num = cmd.port_num; + attr->timeout = cmd.timeout; + attr->retry_cnt = cmd.retry_cnt; + attr->rnr_retry = cmd.rnr_retry; + attr->alt_port_num = cmd.alt_port_num; + attr->alt_timeout = cmd.alt_timeout; + + memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16); + attr->ah_attr.grh.flow_label = cmd.dest.flow_label; + attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index; + attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit; + attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class; + attr->ah_attr.dlid = cmd.dest.dlid; + attr->ah_attr.sl = cmd.dest.sl; + attr->ah_attr.src_path_bits = cmd.dest.src_path_bits; + attr->ah_attr.static_rate = cmd.dest.static_rate; + attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0; + attr->ah_attr.port_num = cmd.dest.port_num; + + memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16); + attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label; + attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index; + attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit; + attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class; + attr->alt_ah_attr.dlid = cmd.alt_dest.dlid; + attr->alt_ah_attr.sl = cmd.alt_dest.sl; + attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits; + attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate; + attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; + attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; + + ret = ib_modify_qp(qp, attr, cmd.attr_mask); + if (ret) + goto out; + + ret = in_len; + +out: + up(&ib_uverbs_idr_mutex); + kfree(attr); + + return ret; +} + +ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_destroy_qp cmd; + struct ib_qp *qp; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (!qp || qp->uobject->context != file->ucontext) + goto out; + + uobj = qp->uobject; + + ret = ib_destroy_qp(qp); + if (ret) + goto out; + + idr_remove(&ib_uverbs_qp_idr, cmd.qp_handle); + + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + + kfree(uobj); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_attach_mcast cmd; + struct ib_qp *qp; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (qp && qp->uobject->context == file->ucontext) + ret = ib_attach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid); + + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_detach_mcast cmd; + struct ib_qp *qp; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (qp && qp->uobject->context == file->ucontext) + ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid); + + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c new file mode 100644 index 00000000000..fbbe03d8c90 --- /dev/null +++ b/drivers/infiniband/core/uverbs_main.c @@ -0,0 +1,698 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: uverbs_main.c 2733 2005-06-28 19:14:34Z roland $ + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/fs.h> +#include <linux/poll.h> +#include <linux/file.h> +#include <linux/mount.h> + +#include <asm/uaccess.h> + +#include "uverbs.h" + +MODULE_AUTHOR("Roland Dreier"); +MODULE_DESCRIPTION("InfiniBand userspace verbs access"); +MODULE_LICENSE("Dual BSD/GPL"); + +#define INFINIBANDEVENTFS_MAGIC 0x49426576 /* "IBev" */ + +enum { + IB_UVERBS_MAJOR = 231, + IB_UVERBS_BASE_MINOR = 192, + IB_UVERBS_MAX_DEVICES = 32 +}; + +#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) + +DECLARE_MUTEX(ib_uverbs_idr_mutex); +DEFINE_IDR(ib_uverbs_pd_idr); +DEFINE_IDR(ib_uverbs_mr_idr); +DEFINE_IDR(ib_uverbs_mw_idr); +DEFINE_IDR(ib_uverbs_ah_idr); +DEFINE_IDR(ib_uverbs_cq_idr); +DEFINE_IDR(ib_uverbs_qp_idr); + +static spinlock_t map_lock; +static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); + +static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) = { + [IB_USER_VERBS_CMD_QUERY_PARAMS] = ib_uverbs_query_params, + [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, + [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, + [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, + [IB_USER_VERBS_CMD_QUERY_GID] = ib_uverbs_query_gid, + [IB_USER_VERBS_CMD_QUERY_PKEY] = ib_uverbs_query_pkey, + [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, + [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, + [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, + [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, + [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, + [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, + [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, + [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, + [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, + [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, + [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, +}; + +static struct vfsmount *uverbs_event_mnt; + +static void ib_uverbs_add_one(struct ib_device *device); +static void ib_uverbs_remove_one(struct ib_device *device); + +static int ib_dealloc_ucontext(struct ib_ucontext *context) +{ + struct ib_uobject *uobj, *tmp; + + if (!context) + return 0; + + down(&ib_uverbs_idr_mutex); + + /* XXX Free AHs */ + + list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { + struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id); + idr_remove(&ib_uverbs_qp_idr, uobj->id); + ib_destroy_qp(qp); + list_del(&uobj->list); + kfree(uobj); + } + + list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { + struct ib_cq *cq = idr_find(&ib_uverbs_cq_idr, uobj->id); + idr_remove(&ib_uverbs_cq_idr, uobj->id); + ib_destroy_cq(cq); + list_del(&uobj->list); + kfree(uobj); + } + + /* XXX Free SRQs */ + /* XXX Free MWs */ + + list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { + struct ib_mr *mr = idr_find(&ib_uverbs_mr_idr, uobj->id); + struct ib_umem_object *memobj; + + idr_remove(&ib_uverbs_mr_idr, uobj->id); + ib_dereg_mr(mr); + + memobj = container_of(uobj, struct ib_umem_object, uobject); + ib_umem_release_on_close(mr->device, &memobj->umem); + + list_del(&uobj->list); + kfree(memobj); + } + + list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { + struct ib_pd *pd = idr_find(&ib_uverbs_pd_idr, uobj->id); + idr_remove(&ib_uverbs_pd_idr, uobj->id); + ib_dealloc_pd(pd); + list_del(&uobj->list); + kfree(uobj); + } + + up(&ib_uverbs_idr_mutex); + + return context->device->dealloc_ucontext(context); +} + +static void ib_uverbs_release_file(struct kref *ref) +{ + struct ib_uverbs_file *file = + container_of(ref, struct ib_uverbs_file, ref); + + module_put(file->device->ib_dev->owner); + kfree(file); +} + +static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct ib_uverbs_event_file *file = filp->private_data; + void *event; + int eventsz; + int ret = 0; + + spin_lock_irq(&file->lock); + + while (list_empty(&file->event_list) && file->fd >= 0) { + spin_unlock_irq(&file->lock); + + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + if (wait_event_interruptible(file->poll_wait, + !list_empty(&file->event_list) || + file->fd < 0)) + return -ERESTARTSYS; + + spin_lock_irq(&file->lock); + } + + if (file->fd < 0) { + spin_unlock_irq(&file->lock); + return -ENODEV; + } + + if (file->is_async) { + event = list_entry(file->event_list.next, + struct ib_uverbs_async_event, list); + eventsz = sizeof (struct ib_uverbs_async_event_desc); + } else { + event = list_entry(file->event_list.next, + struct ib_uverbs_comp_event, list); + eventsz = sizeof (struct ib_uverbs_comp_event_desc); + } + + if (eventsz > count) { + ret = -EINVAL; + event = NULL; + } else + list_del(file->event_list.next); + + spin_unlock_irq(&file->lock); + + if (event) { + if (copy_to_user(buf, event, eventsz)) + ret = -EFAULT; + else + ret = eventsz; + } + + kfree(event); + + return ret; +} + +static unsigned int ib_uverbs_event_poll(struct file *filp, + struct poll_table_struct *wait) +{ + unsigned int pollflags = 0; + struct ib_uverbs_event_file *file = filp->private_data; + + poll_wait(filp, &file->poll_wait, wait); + + spin_lock_irq(&file->lock); + if (file->fd < 0) + pollflags = POLLERR; + else if (!list_empty(&file->event_list)) + pollflags = POLLIN | POLLRDNORM; + spin_unlock_irq(&file->lock); + + return pollflags; +} + +static void ib_uverbs_event_release(struct ib_uverbs_event_file *file) +{ + struct list_head *entry, *tmp; + + spin_lock_irq(&file->lock); + if (file->fd != -1) { + file->fd = -1; + list_for_each_safe(entry, tmp, &file->event_list) + if (file->is_async) + kfree(list_entry(entry, struct ib_uverbs_async_event, list)); + else + kfree(list_entry(entry, struct ib_uverbs_comp_event, list)); + } + spin_unlock_irq(&file->lock); +} + +static int ib_uverbs_event_close(struct inode *inode, struct file *filp) +{ + struct ib_uverbs_event_file *file = filp->private_data; + + ib_uverbs_event_release(file); + kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); + + return 0; +} + +static struct file_operations uverbs_event_fops = { + /* + * No .owner field since we artificially create event files, + * so there is no increment to the module reference count in + * the open path. All event files come from a uverbs command + * file, which already takes a module reference, so this is OK. + */ + .read = ib_uverbs_event_read, + .poll = ib_uverbs_event_poll, + .release = ib_uverbs_event_close +}; + +void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) +{ + struct ib_uverbs_file *file = cq_context; + struct ib_uverbs_comp_event *entry; + unsigned long flags; + + entry = kmalloc(sizeof *entry, GFP_ATOMIC); + if (!entry) + return; + + entry->desc.cq_handle = cq->uobject->user_handle; + + spin_lock_irqsave(&file->comp_file[0].lock, flags); + list_add_tail(&entry->list, &file->comp_file[0].event_list); + spin_unlock_irqrestore(&file->comp_file[0].lock, flags); + + wake_up_interruptible(&file->comp_file[0].poll_wait); +} + +static void ib_uverbs_async_handler(struct ib_uverbs_file *file, + __u64 element, __u64 event) +{ + struct ib_uverbs_async_event *entry; + unsigned long flags; + + entry = kmalloc(sizeof *entry, GFP_ATOMIC); + if (!entry) + return; + + entry->desc.element = element; + entry->desc.event_type = event; + + spin_lock_irqsave(&file->async_file.lock, flags); + list_add_tail(&entry->list, &file->async_file.event_list); + spin_unlock_irqrestore(&file->async_file.lock, flags); + + wake_up_interruptible(&file->async_file.poll_wait); +} + +void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) +{ + ib_uverbs_async_handler(context_ptr, + event->element.cq->uobject->user_handle, + event->event); +} + +void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) +{ + ib_uverbs_async_handler(context_ptr, + event->element.qp->uobject->user_handle, + event->event); +} + +static void ib_uverbs_event_handler(struct ib_event_handler *handler, + struct ib_event *event) +{ + struct ib_uverbs_file *file = + container_of(handler, struct ib_uverbs_file, event_handler); + + ib_uverbs_async_handler(file, event->element.port_num, event->event); +} + +static int ib_uverbs_event_init(struct ib_uverbs_event_file *file, + struct ib_uverbs_file *uverbs_file) +{ + struct file *filp; + + spin_lock_init(&file->lock); + INIT_LIST_HEAD(&file->event_list); + init_waitqueue_head(&file->poll_wait); + file->uverbs_file = uverbs_file; + + file->fd = get_unused_fd(); + if (file->fd < 0) + return file->fd; + + filp = get_empty_filp(); + if (!filp) { + put_unused_fd(file->fd); + return -ENFILE; + } + + filp->f_op = &uverbs_event_fops; + filp->f_vfsmnt = mntget(uverbs_event_mnt); + filp->f_dentry = dget(uverbs_event_mnt->mnt_root); + filp->f_mapping = filp->f_dentry->d_inode->i_mapping; + filp->f_flags = O_RDONLY; + filp->f_mode = FMODE_READ; + filp->private_data = file; + + fd_install(file->fd, filp); + + return 0; +} + +static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct ib_uverbs_file *file = filp->private_data; + struct ib_uverbs_cmd_hdr hdr; + + if (count < sizeof hdr) + return -EINVAL; + + if (copy_from_user(&hdr, buf, sizeof hdr)) + return -EFAULT; + + if (hdr.in_words * 4 != count) + return -EINVAL; + + if (hdr.command < 0 || hdr.command >= ARRAY_SIZE(uverbs_cmd_table)) + return -EINVAL; + + if (!file->ucontext && + hdr.command != IB_USER_VERBS_CMD_QUERY_PARAMS && + hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT) + return -EINVAL; + + return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr, + hdr.in_words * 4, hdr.out_words * 4); +} + +static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct ib_uverbs_file *file = filp->private_data; + + if (!file->ucontext) + return -ENODEV; + else + return file->device->ib_dev->mmap(file->ucontext, vma); +} + +static int ib_uverbs_open(struct inode *inode, struct file *filp) +{ + struct ib_uverbs_device *dev = + container_of(inode->i_cdev, struct ib_uverbs_device, dev); + struct ib_uverbs_file *file; + int i = 0; + int ret; + + if (!try_module_get(dev->ib_dev->owner)) + return -ENODEV; + + file = kmalloc(sizeof *file + + (dev->num_comp - 1) * sizeof (struct ib_uverbs_event_file), + GFP_KERNEL); + if (!file) + return -ENOMEM; + + file->device = dev; + kref_init(&file->ref); + + file->ucontext = NULL; + + ret = ib_uverbs_event_init(&file->async_file, file); + if (ret) + goto err; + + file->async_file.is_async = 1; + + kref_get(&file->ref); + + for (i = 0; i < dev->num_comp; ++i) { + ret = ib_uverbs_event_init(&file->comp_file[i], file); + if (ret) + goto err_async; + kref_get(&file->ref); + file->comp_file[i].is_async = 0; + } + + + filp->private_data = file; + + INIT_IB_EVENT_HANDLER(&file->event_handler, dev->ib_dev, + ib_uverbs_event_handler); + if (ib_register_event_handler(&file->event_handler)) + goto err_async; + + return 0; + +err_async: + while (i--) + ib_uverbs_event_release(&file->comp_file[i]); + + ib_uverbs_event_release(&file->async_file); + +err: + kref_put(&file->ref, ib_uverbs_release_file); + + return ret; +} + +static int ib_uverbs_close(struct inode *inode, struct file *filp) +{ + struct ib_uverbs_file *file = filp->private_data; + int i; + + ib_unregister_event_handler(&file->event_handler); + ib_uverbs_event_release(&file->async_file); + ib_dealloc_ucontext(file->ucontext); + + for (i = 0; i < file->device->num_comp; ++i) + ib_uverbs_event_release(&file->comp_file[i]); + + kref_put(&file->ref, ib_uverbs_release_file); + + return 0; +} + +static struct file_operations uverbs_fops = { + .owner = THIS_MODULE, + .write = ib_uverbs_write, + .open = ib_uverbs_open, + .release = ib_uverbs_close +}; + +static struct file_operations uverbs_mmap_fops = { + .owner = THIS_MODULE, + .write = ib_uverbs_write, + .mmap = ib_uverbs_mmap, + .open = ib_uverbs_open, + .release = ib_uverbs_close +}; + +static struct ib_client uverbs_client = { + .name = "uverbs", + .add = ib_uverbs_add_one, + .remove = ib_uverbs_remove_one +}; + +static ssize_t show_ibdev(struct class_device *class_dev, char *buf) +{ + struct ib_uverbs_device *dev = + container_of(class_dev, struct ib_uverbs_device, class_dev); + + return sprintf(buf, "%s\n", dev->ib_dev->name); +} +static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); + +static void ib_uverbs_release_class_dev(struct class_device *class_dev) +{ + struct ib_uverbs_device *dev = + container_of(class_dev, struct ib_uverbs_device, class_dev); + + cdev_del(&dev->dev); + clear_bit(dev->devnum, dev_map); + kfree(dev); +} + +static struct class uverbs_class = { + .name = "infiniband_verbs", + .release = ib_uverbs_release_class_dev +}; + +static ssize_t show_abi_version(struct class *class, char *buf) +{ + return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION); +} +static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); + +static void ib_uverbs_add_one(struct ib_device *device) +{ + struct ib_uverbs_device *uverbs_dev; + + if (!device->alloc_ucontext) + return; + + uverbs_dev = kmalloc(sizeof *uverbs_dev, GFP_KERNEL); + if (!uverbs_dev) + return; + + memset(uverbs_dev, 0, sizeof *uverbs_dev); + + spin_lock(&map_lock); + uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); + if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) { + spin_unlock(&map_lock); + goto err; + } + set_bit(uverbs_dev->devnum, dev_map); + spin_unlock(&map_lock); + + uverbs_dev->ib_dev = device; + uverbs_dev->num_comp = 1; + + if (device->mmap) + cdev_init(&uverbs_dev->dev, &uverbs_mmap_fops); + else + cdev_init(&uverbs_dev->dev, &uverbs_fops); + uverbs_dev->dev.owner = THIS_MODULE; + kobject_set_name(&uverbs_dev->dev.kobj, "uverbs%d", uverbs_dev->devnum); + if (cdev_add(&uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1)) + goto err; + + uverbs_dev->class_dev.class = &uverbs_class; + uverbs_dev->class_dev.dev = device->dma_device; + uverbs_dev->class_dev.devt = uverbs_dev->dev.dev; + snprintf(uverbs_dev->class_dev.class_id, BUS_ID_SIZE, "uverbs%d", uverbs_dev->devnum); + if (class_device_register(&uverbs_dev->class_dev)) + goto err_cdev; + + if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_ibdev)) + goto err_class; + + ib_set_client_data(device, &uverbs_client, uverbs_dev); + + return; + +err_class: + class_device_unregister(&uverbs_dev->class_dev); + +err_cdev: + cdev_del(&uverbs_dev->dev); + clear_bit(uverbs_dev->devnum, dev_map); + +err: + kfree(uverbs_dev); + return; +} + +static void ib_uverbs_remove_one(struct ib_device *device) +{ + struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client); + + if (!uverbs_dev) + return; + + class_device_unregister(&uverbs_dev->class_dev); +} + +static struct super_block *uverbs_event_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) +{ + return get_sb_pseudo(fs_type, "infinibandevent:", NULL, + INFINIBANDEVENTFS_MAGIC); +} + +static struct file_system_type uverbs_event_fs = { + /* No owner field so module can be unloaded */ + .name = "infinibandeventfs", + .get_sb = uverbs_event_get_sb, + .kill_sb = kill_litter_super +}; + +static int __init ib_uverbs_init(void) +{ + int ret; + + spin_lock_init(&map_lock); + + ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES, + "infiniband_verbs"); + if (ret) { + printk(KERN_ERR "user_verbs: couldn't register device number\n"); + goto out; + } + + ret = class_register(&uverbs_class); + if (ret) { + printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n"); + goto out_chrdev; + } + + ret = class_create_file(&uverbs_class, &class_attr_abi_version); + if (ret) { + printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n"); + goto out_class; + } + + ret = register_filesystem(&uverbs_event_fs); + if (ret) { + printk(KERN_ERR "user_verbs: couldn't register infinibandeventfs\n"); + goto out_class; + } + + uverbs_event_mnt = kern_mount(&uverbs_event_fs); + if (IS_ERR(uverbs_event_mnt)) { + ret = PTR_ERR(uverbs_event_mnt); + printk(KERN_ERR "user_verbs: couldn't mount infinibandeventfs\n"); + goto out_fs; + } + + ret = ib_register_client(&uverbs_client); + if (ret) { + printk(KERN_ERR "user_verbs: couldn't register client\n"); + goto out_mnt; + } + + return 0; + +out_mnt: + mntput(uverbs_event_mnt); + +out_fs: + unregister_filesystem(&uverbs_event_fs); + +out_class: + class_unregister(&uverbs_class); + +out_chrdev: + unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); + +out: + return ret; +} + +static void __exit ib_uverbs_cleanup(void) +{ + ib_unregister_client(&uverbs_client); + mntput(uverbs_event_mnt); + unregister_filesystem(&uverbs_event_fs); + class_unregister(&uverbs_class); + unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); +} + +module_init(ib_uverbs_init); +module_exit(ib_uverbs_cleanup); diff --git a/drivers/infiniband/core/uverbs_mem.c b/drivers/infiniband/core/uverbs_mem.c new file mode 100644 index 00000000000..ed550f6595b --- /dev/null +++ b/drivers/infiniband/core/uverbs_mem.c @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: uverbs_mem.c 2743 2005-06-28 22:27:59Z roland $ + */ + +#include <linux/mm.h> +#include <linux/dma-mapping.h> + +#include "uverbs.h" + +struct ib_umem_account_work { + struct work_struct work; + struct mm_struct *mm; + unsigned long diff; +}; + + +static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) +{ + struct ib_umem_chunk *chunk, *tmp; + int i; + + list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) { + dma_unmap_sg(dev->dma_device, chunk->page_list, + chunk->nents, DMA_BIDIRECTIONAL); + for (i = 0; i < chunk->nents; ++i) { + if (umem->writable && dirty) + set_page_dirty_lock(chunk->page_list[i].page); + put_page(chunk->page_list[i].page); + } + + kfree(chunk); + } +} + +int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, + void *addr, size_t size, int write) +{ + struct page **page_list; + struct ib_umem_chunk *chunk; + unsigned long locked; + unsigned long lock_limit; + unsigned long cur_base; + unsigned long npages; + int ret = 0; + int off; + int i; + + if (!can_do_mlock()) + return -EPERM; + + page_list = (struct page **) __get_free_page(GFP_KERNEL); + if (!page_list) + return -ENOMEM; + + mem->user_base = (unsigned long) addr; + mem->length = size; + mem->offset = (unsigned long) addr & ~PAGE_MASK; + mem->page_size = PAGE_SIZE; + mem->writable = write; + + INIT_LIST_HEAD(&mem->chunk_list); + + npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT; + + down_write(¤t->mm->mmap_sem); + + locked = npages + current->mm->locked_vm; + lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + + if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { + ret = -ENOMEM; + goto out; + } + + cur_base = (unsigned long) addr & PAGE_MASK; + + while (npages) { + ret = get_user_pages(current, current->mm, cur_base, + min_t(int, npages, + PAGE_SIZE / sizeof (struct page *)), + 1, !write, page_list, NULL); + + if (ret < 0) + goto out; + + cur_base += ret * PAGE_SIZE; + npages -= ret; + + off = 0; + + while (ret) { + chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) * + min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK), + GFP_KERNEL); + if (!chunk) { + ret = -ENOMEM; + goto out; + } + + chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK); + for (i = 0; i < chunk->nents; ++i) { + chunk->page_list[i].page = page_list[i + off]; + chunk->page_list[i].offset = 0; + chunk->page_list[i].length = PAGE_SIZE; + } + + chunk->nmap = dma_map_sg(dev->dma_device, + &chunk->page_list[0], + chunk->nents, + DMA_BIDIRECTIONAL); + if (chunk->nmap <= 0) { + for (i = 0; i < chunk->nents; ++i) + put_page(chunk->page_list[i].page); + kfree(chunk); + + ret = -ENOMEM; + goto out; + } + + ret -= chunk->nents; + off += chunk->nents; + list_add_tail(&chunk->list, &mem->chunk_list); + } + + ret = 0; + } + +out: + if (ret < 0) + __ib_umem_release(dev, mem, 0); + else + current->mm->locked_vm = locked; + + up_write(¤t->mm->mmap_sem); + free_page((unsigned long) page_list); + + return ret; +} + +void ib_umem_release(struct ib_device *dev, struct ib_umem *umem) +{ + __ib_umem_release(dev, umem, 1); + + down_write(¤t->mm->mmap_sem); + current->mm->locked_vm -= + PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; + up_write(¤t->mm->mmap_sem); +} + +static void ib_umem_account(void *work_ptr) +{ + struct ib_umem_account_work *work = work_ptr; + + down_write(&work->mm->mmap_sem); + work->mm->locked_vm -= work->diff; + up_write(&work->mm->mmap_sem); + mmput(work->mm); + kfree(work); +} + +void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem) +{ + struct ib_umem_account_work *work; + struct mm_struct *mm; + + __ib_umem_release(dev, umem, 1); + + mm = get_task_mm(current); + if (!mm) + return; + + /* + * We may be called with the mm's mmap_sem already held. This + * can happen when a userspace munmap() is the call that drops + * the last reference to our file and calls our release + * method. If there are memory regions to destroy, we'll end + * up here and not be able to take the mmap_sem. Therefore we + * defer the vm_locked accounting to the system workqueue. + */ + + work = kmalloc(sizeof *work, GFP_KERNEL); + if (!work) + return; + + INIT_WORK(&work->work, ib_umem_account, work); + work->mm = mm; + work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; + + schedule_work(&work->work); +} diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 7c08ed0cd7d..2516f964651 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -4,6 +4,7 @@ * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -47,10 +48,11 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device) { struct ib_pd *pd; - pd = device->alloc_pd(device); + pd = device->alloc_pd(device, NULL, NULL); if (!IS_ERR(pd)) { - pd->device = device; + pd->device = device; + pd->uobject = NULL; atomic_set(&pd->usecnt, 0); } @@ -76,8 +78,9 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) ah = pd->device->create_ah(pd, ah_attr); if (!IS_ERR(ah)) { - ah->device = pd->device; - ah->pd = pd; + ah->device = pd->device; + ah->pd = pd; + ah->uobject = NULL; atomic_inc(&pd->usecnt); } @@ -122,7 +125,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, { struct ib_qp *qp; - qp = pd->device->create_qp(pd, qp_init_attr); + qp = pd->device->create_qp(pd, qp_init_attr, NULL); if (!IS_ERR(qp)) { qp->device = pd->device; @@ -130,6 +133,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->send_cq = qp_init_attr->send_cq; qp->recv_cq = qp_init_attr->recv_cq; qp->srq = qp_init_attr->srq; + qp->uobject = NULL; qp->event_handler = qp_init_attr->event_handler; qp->qp_context = qp_init_attr->qp_context; qp->qp_type = qp_init_attr->qp_type; @@ -197,10 +201,11 @@ struct ib_cq *ib_create_cq(struct ib_device *device, { struct ib_cq *cq; - cq = device->create_cq(device, cqe); + cq = device->create_cq(device, cqe, NULL, NULL); if (!IS_ERR(cq)) { cq->device = device; + cq->uobject = NULL; cq->comp_handler = comp_handler; cq->event_handler = event_handler; cq->cq_context = cq_context; @@ -245,8 +250,9 @@ struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags) mr = pd->device->get_dma_mr(pd, mr_access_flags); if (!IS_ERR(mr)) { - mr->device = pd->device; - mr->pd = pd; + mr->device = pd->device; + mr->pd = pd; + mr->uobject = NULL; atomic_inc(&pd->usecnt); atomic_set(&mr->usecnt, 0); } @@ -267,8 +273,9 @@ struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd, mr_access_flags, iova_start); if (!IS_ERR(mr)) { - mr->device = pd->device; - mr->pd = pd; + mr->device = pd->device; + mr->pd = pd; + mr->uobject = NULL; atomic_inc(&pd->usecnt); atomic_set(&mr->usecnt, 0); } @@ -344,8 +351,9 @@ struct ib_mw *ib_alloc_mw(struct ib_pd *pd) mw = pd->device->alloc_mw(pd); if (!IS_ERR(mw)) { - mw->device = pd->device; - mw->pd = pd; + mw->device = pd->device; + mw->pd = pd; + mw->uobject = NULL; atomic_inc(&pd->usecnt); } diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index 085baf393ca..d58dcbe6648 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index cd9ed958d92..1557a522d83 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -431,6 +431,36 @@ static int mthca_cmd_imm(struct mthca_dev *dev, timeout, status); } +int mthca_cmd_init(struct mthca_dev *dev) +{ + sema_init(&dev->cmd.hcr_sem, 1); + sema_init(&dev->cmd.poll_sem, 1); + dev->cmd.use_events = 0; + + dev->hcr = ioremap(pci_resource_start(dev->pdev, 0) + MTHCA_HCR_BASE, + MTHCA_HCR_SIZE); + if (!dev->hcr) { + mthca_err(dev, "Couldn't map command register."); + return -ENOMEM; + } + + dev->cmd.pool = pci_pool_create("mthca_cmd", dev->pdev, + MTHCA_MAILBOX_SIZE, + MTHCA_MAILBOX_SIZE, 0); + if (!dev->cmd.pool) { + iounmap(dev->hcr); + return -ENOMEM; + } + + return 0; +} + +void mthca_cmd_cleanup(struct mthca_dev *dev) +{ + pci_pool_destroy(dev->cmd.pool); + iounmap(dev->hcr); +} + /* * Switch to using events to issue FW commands (should be called after * event queue to command events has been initialized). @@ -489,6 +519,33 @@ void mthca_cmd_use_polling(struct mthca_dev *dev) up(&dev->cmd.poll_sem); } +struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev, + unsigned int gfp_mask) +{ + struct mthca_mailbox *mailbox; + + mailbox = kmalloc(sizeof *mailbox, gfp_mask); + if (!mailbox) + return ERR_PTR(-ENOMEM); + + mailbox->buf = pci_pool_alloc(dev->cmd.pool, gfp_mask, &mailbox->dma); + if (!mailbox->buf) { + kfree(mailbox); + return ERR_PTR(-ENOMEM); + } + + return mailbox; +} + +void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox) +{ + if (!mailbox) + return; + + pci_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma); + kfree(mailbox); +} + int mthca_SYS_EN(struct mthca_dev *dev, u8 *status) { u64 out; @@ -513,20 +570,20 @@ int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status) static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, u64 virt, u8 *status) { - u32 *inbox; - dma_addr_t indma; + struct mthca_mailbox *mailbox; struct mthca_icm_iter iter; + __be64 *pages; int lg; int nent = 0; int i; int err = 0; int ts = 0, tc = 0; - inbox = pci_alloc_consistent(dev->pdev, PAGE_SIZE, &indma); - if (!inbox) - return -ENOMEM; - - memset(inbox, 0, PAGE_SIZE); + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + memset(mailbox->buf, 0, MTHCA_MAILBOX_SIZE); + pages = mailbox->buf; for (mthca_icm_first(icm, &iter); !mthca_icm_last(&iter); @@ -546,19 +603,17 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, } for (i = 0; i < mthca_icm_size(&iter) / (1 << lg); ++i, ++nent) { if (virt != -1) { - *((__be64 *) (inbox + nent * 4)) = - cpu_to_be64(virt); + pages[nent * 2] = cpu_to_be64(virt); virt += 1 << lg; } - *((__be64 *) (inbox + nent * 4 + 2)) = - cpu_to_be64((mthca_icm_addr(&iter) + - (i << lg)) | (lg - 12)); + pages[nent * 2 + 1] = cpu_to_be64((mthca_icm_addr(&iter) + + (i << lg)) | (lg - 12)); ts += 1 << (lg - 10); ++tc; - if (nent == PAGE_SIZE / 16) { - err = mthca_cmd(dev, indma, nent, 0, op, + if (nent == MTHCA_MAILBOX_SIZE / 16) { + err = mthca_cmd(dev, mailbox->dma, nent, 0, op, CMD_TIME_CLASS_B, status); if (err || *status) goto out; @@ -568,7 +623,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, } if (nent) - err = mthca_cmd(dev, indma, nent, 0, op, + err = mthca_cmd(dev, mailbox->dma, nent, 0, op, CMD_TIME_CLASS_B, status); switch (op) { @@ -585,7 +640,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, } out: - pci_free_consistent(dev->pdev, PAGE_SIZE, inbox, indma); + mthca_free_mailbox(dev, mailbox); return err; } @@ -606,8 +661,8 @@ int mthca_RUN_FW(struct mthca_dev *dev, u8 *status) int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) { + struct mthca_mailbox *mailbox; u32 *outbox; - dma_addr_t outdma; int err = 0; u8 lg; @@ -625,12 +680,12 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) #define QUERY_FW_EQ_ARM_BASE_OFFSET 0x40 #define QUERY_FW_EQ_SET_CI_BASE_OFFSET 0x48 - outbox = pci_alloc_consistent(dev->pdev, QUERY_FW_OUT_SIZE, &outdma); - if (!outbox) { - return -ENOMEM; - } + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; - err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_FW, + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_FW, CMD_TIME_CLASS_A, status); if (err) @@ -681,15 +736,15 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) } out: - pci_free_consistent(dev->pdev, QUERY_FW_OUT_SIZE, outbox, outdma); + mthca_free_mailbox(dev, mailbox); return err; } int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status) { + struct mthca_mailbox *mailbox; u8 info; u32 *outbox; - dma_addr_t outdma; int err = 0; #define ENABLE_LAM_OUT_SIZE 0x100 @@ -700,11 +755,12 @@ int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status) #define ENABLE_LAM_INFO_HIDDEN_FLAG (1 << 4) #define ENABLE_LAM_INFO_ECC_MASK 0x3 - outbox = pci_alloc_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, &outdma); - if (!outbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; - err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_ENABLE_LAM, + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_ENABLE_LAM, CMD_TIME_CLASS_C, status); if (err) @@ -733,7 +789,7 @@ int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status) (unsigned long long) dev->ddr_end); out: - pci_free_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, outbox, outdma); + mthca_free_mailbox(dev, mailbox); return err; } @@ -744,9 +800,9 @@ int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status) int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status) { + struct mthca_mailbox *mailbox; u8 info; u32 *outbox; - dma_addr_t outdma; int err = 0; #define QUERY_DDR_OUT_SIZE 0x100 @@ -757,11 +813,12 @@ int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status) #define QUERY_DDR_INFO_HIDDEN_FLAG (1 << 4) #define QUERY_DDR_INFO_ECC_MASK 0x3 - outbox = pci_alloc_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, &outdma); - if (!outbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; - err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DDR, + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DDR, CMD_TIME_CLASS_A, status); if (err) @@ -787,15 +844,15 @@ int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status) (unsigned long long) dev->ddr_end); out: - pci_free_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, outbox, outdma); + mthca_free_mailbox(dev, mailbox); return err; } int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, struct mthca_dev_lim *dev_lim, u8 *status) { + struct mthca_mailbox *mailbox; u32 *outbox; - dma_addr_t outdma; u8 field; u16 size; int err; @@ -860,11 +917,12 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, #define QUERY_DEV_LIM_LAMR_OFFSET 0x9f #define QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET 0xa0 - outbox = pci_alloc_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, &outdma); - if (!outbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; - err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DEV_LIM, + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DEV_LIM, CMD_TIME_CLASS_A, status); if (err) @@ -1020,15 +1078,15 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, } out: - pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma); + mthca_free_mailbox(dev, mailbox); return err; } int mthca_QUERY_ADAPTER(struct mthca_dev *dev, struct mthca_adapter *adapter, u8 *status) { + struct mthca_mailbox *mailbox; u32 *outbox; - dma_addr_t outdma; int err; #define QUERY_ADAPTER_OUT_SIZE 0x100 @@ -1037,23 +1095,24 @@ int mthca_QUERY_ADAPTER(struct mthca_dev *dev, #define QUERY_ADAPTER_REVISION_ID_OFFSET 0x08 #define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10 - outbox = pci_alloc_consistent(dev->pdev, QUERY_ADAPTER_OUT_SIZE, &outdma); - if (!outbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; - err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_ADAPTER, + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_ADAPTER, CMD_TIME_CLASS_A, status); if (err) goto out; - MTHCA_GET(adapter->vendor_id, outbox, QUERY_ADAPTER_VENDOR_ID_OFFSET); - MTHCA_GET(adapter->device_id, outbox, QUERY_ADAPTER_DEVICE_ID_OFFSET); + MTHCA_GET(adapter->vendor_id, outbox, QUERY_ADAPTER_VENDOR_ID_OFFSET); + MTHCA_GET(adapter->device_id, outbox, QUERY_ADAPTER_DEVICE_ID_OFFSET); MTHCA_GET(adapter->revision_id, outbox, QUERY_ADAPTER_REVISION_ID_OFFSET); - MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET); + MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET); out: - pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma); + mthca_free_mailbox(dev, mailbox); return err; } @@ -1061,8 +1120,8 @@ int mthca_INIT_HCA(struct mthca_dev *dev, struct mthca_init_hca_param *param, u8 *status) { + struct mthca_mailbox *mailbox; u32 *inbox; - dma_addr_t indma; int err; #define INIT_HCA_IN_SIZE 0x200 @@ -1102,9 +1161,10 @@ int mthca_INIT_HCA(struct mthca_dev *dev, #define INIT_HCA_UAR_SCATCH_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x10) #define INIT_HCA_UAR_CTX_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x18) - inbox = pci_alloc_consistent(dev->pdev, INIT_HCA_IN_SIZE, &indma); - if (!inbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; memset(inbox, 0, INIT_HCA_IN_SIZE); @@ -1167,10 +1227,9 @@ int mthca_INIT_HCA(struct mthca_dev *dev, MTHCA_PUT(inbox, param->uarc_base, INIT_HCA_UAR_CTX_BASE_OFFSET); } - err = mthca_cmd(dev, indma, 0, 0, CMD_INIT_HCA, - HZ, status); + err = mthca_cmd(dev, mailbox->dma, 0, 0, CMD_INIT_HCA, HZ, status); - pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma); + mthca_free_mailbox(dev, mailbox); return err; } @@ -1178,8 +1237,8 @@ int mthca_INIT_IB(struct mthca_dev *dev, struct mthca_init_ib_param *param, int port, u8 *status) { + struct mthca_mailbox *mailbox; u32 *inbox; - dma_addr_t indma; int err; u32 flags; @@ -1199,9 +1258,10 @@ int mthca_INIT_IB(struct mthca_dev *dev, #define INIT_IB_NODE_GUID_OFFSET 0x18 #define INIT_IB_SI_GUID_OFFSET 0x20 - inbox = pci_alloc_consistent(dev->pdev, INIT_IB_IN_SIZE, &indma); - if (!inbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; memset(inbox, 0, INIT_IB_IN_SIZE); @@ -1221,10 +1281,10 @@ int mthca_INIT_IB(struct mthca_dev *dev, MTHCA_PUT(inbox, param->node_guid, INIT_IB_NODE_GUID_OFFSET); MTHCA_PUT(inbox, param->si_guid, INIT_IB_SI_GUID_OFFSET); - err = mthca_cmd(dev, indma, port, 0, CMD_INIT_IB, + err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_INIT_IB, CMD_TIME_CLASS_A, status); - pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma); + mthca_free_mailbox(dev, mailbox); return err; } @@ -1241,8 +1301,8 @@ int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status) int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param, int port, u8 *status) { + struct mthca_mailbox *mailbox; u32 *inbox; - dma_addr_t indma; int err; u32 flags = 0; @@ -1253,9 +1313,10 @@ int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param, #define SET_IB_CAP_MASK_OFFSET 0x04 #define SET_IB_SI_GUID_OFFSET 0x08 - inbox = pci_alloc_consistent(dev->pdev, SET_IB_IN_SIZE, &indma); - if (!inbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; memset(inbox, 0, SET_IB_IN_SIZE); @@ -1266,10 +1327,10 @@ int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param, MTHCA_PUT(inbox, param->cap_mask, SET_IB_CAP_MASK_OFFSET); MTHCA_PUT(inbox, param->si_guid, SET_IB_SI_GUID_OFFSET); - err = mthca_cmd(dev, indma, port, 0, CMD_SET_IB, + err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_SET_IB, CMD_TIME_CLASS_B, status); - pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma); + mthca_free_mailbox(dev, mailbox); return err; } @@ -1280,20 +1341,22 @@ int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *st int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status) { + struct mthca_mailbox *mailbox; u64 *inbox; - dma_addr_t indma; int err; - inbox = pci_alloc_consistent(dev->pdev, 16, &indma); - if (!inbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; inbox[0] = cpu_to_be64(virt); inbox[1] = cpu_to_be64(dma_addr); - err = mthca_cmd(dev, indma, 1, 0, CMD_MAP_ICM, CMD_TIME_CLASS_B, status); + err = mthca_cmd(dev, mailbox->dma, 1, 0, CMD_MAP_ICM, + CMD_TIME_CLASS_B, status); - pci_free_consistent(dev->pdev, 16, inbox, indma); + mthca_free_mailbox(dev, mailbox); if (!err) mthca_dbg(dev, "Mapped page at %llx to %llx for ICM.\n", @@ -1338,69 +1401,26 @@ int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages, return 0; } -int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry, +int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int mpt_index, u8 *status) { - dma_addr_t indma; - int err; - - indma = pci_map_single(dev->pdev, mpt_entry, - MTHCA_MPT_ENTRY_SIZE, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(indma)) - return -ENOMEM; - - err = mthca_cmd(dev, indma, mpt_index, 0, CMD_SW2HW_MPT, - CMD_TIME_CLASS_B, status); - - pci_unmap_single(dev->pdev, indma, - MTHCA_MPT_ENTRY_SIZE, PCI_DMA_TODEVICE); - return err; + return mthca_cmd(dev, mailbox->dma, mpt_index, 0, CMD_SW2HW_MPT, + CMD_TIME_CLASS_B, status); } -int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry, +int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int mpt_index, u8 *status) { - dma_addr_t outdma = 0; - int err; - - if (mpt_entry) { - outdma = pci_map_single(dev->pdev, mpt_entry, - MTHCA_MPT_ENTRY_SIZE, - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(outdma)) - return -ENOMEM; - } - - err = mthca_cmd_box(dev, 0, outdma, mpt_index, !mpt_entry, - CMD_HW2SW_MPT, - CMD_TIME_CLASS_B, status); - - if (mpt_entry) - pci_unmap_single(dev->pdev, outdma, - MTHCA_MPT_ENTRY_SIZE, - PCI_DMA_FROMDEVICE); - return err; + return mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index, + !mailbox, CMD_HW2SW_MPT, + CMD_TIME_CLASS_B, status); } -int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry, +int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int num_mtt, u8 *status) { - dma_addr_t indma; - int err; - - indma = pci_map_single(dev->pdev, mtt_entry, - (num_mtt + 2) * 8, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(indma)) - return -ENOMEM; - - err = mthca_cmd(dev, indma, num_mtt, 0, CMD_WRITE_MTT, - CMD_TIME_CLASS_B, status); - - pci_unmap_single(dev->pdev, indma, - (num_mtt + 2) * 8, PCI_DMA_TODEVICE); - return err; + return mthca_cmd(dev, mailbox->dma, num_mtt, 0, CMD_WRITE_MTT, + CMD_TIME_CLASS_B, status); } int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status) @@ -1418,92 +1438,38 @@ int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap, 0, CMD_MAP_EQ, CMD_TIME_CLASS_B, status); } -int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context, +int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int eq_num, u8 *status) { - dma_addr_t indma; - int err; - - indma = pci_map_single(dev->pdev, eq_context, - MTHCA_EQ_CONTEXT_SIZE, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(indma)) - return -ENOMEM; - - err = mthca_cmd(dev, indma, eq_num, 0, CMD_SW2HW_EQ, - CMD_TIME_CLASS_A, status); - - pci_unmap_single(dev->pdev, indma, - MTHCA_EQ_CONTEXT_SIZE, PCI_DMA_TODEVICE); - return err; + return mthca_cmd(dev, mailbox->dma, eq_num, 0, CMD_SW2HW_EQ, + CMD_TIME_CLASS_A, status); } -int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context, +int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int eq_num, u8 *status) { - dma_addr_t outdma = 0; - int err; - - outdma = pci_map_single(dev->pdev, eq_context, - MTHCA_EQ_CONTEXT_SIZE, - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(outdma)) - return -ENOMEM; - - err = mthca_cmd_box(dev, 0, outdma, eq_num, 0, - CMD_HW2SW_EQ, - CMD_TIME_CLASS_A, status); - - pci_unmap_single(dev->pdev, outdma, - MTHCA_EQ_CONTEXT_SIZE, - PCI_DMA_FROMDEVICE); - return err; + return mthca_cmd_box(dev, 0, mailbox->dma, eq_num, 0, + CMD_HW2SW_EQ, + CMD_TIME_CLASS_A, status); } -int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context, +int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int cq_num, u8 *status) { - dma_addr_t indma; - int err; - - indma = pci_map_single(dev->pdev, cq_context, - MTHCA_CQ_CONTEXT_SIZE, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(indma)) - return -ENOMEM; - - err = mthca_cmd(dev, indma, cq_num, 0, CMD_SW2HW_CQ, + return mthca_cmd(dev, mailbox->dma, cq_num, 0, CMD_SW2HW_CQ, CMD_TIME_CLASS_A, status); - - pci_unmap_single(dev->pdev, indma, - MTHCA_CQ_CONTEXT_SIZE, PCI_DMA_TODEVICE); - return err; } -int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context, +int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int cq_num, u8 *status) { - dma_addr_t outdma = 0; - int err; - - outdma = pci_map_single(dev->pdev, cq_context, - MTHCA_CQ_CONTEXT_SIZE, - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(outdma)) - return -ENOMEM; - - err = mthca_cmd_box(dev, 0, outdma, cq_num, 0, - CMD_HW2SW_CQ, - CMD_TIME_CLASS_A, status); - - pci_unmap_single(dev->pdev, outdma, - MTHCA_CQ_CONTEXT_SIZE, - PCI_DMA_FROMDEVICE); - return err; + return mthca_cmd_box(dev, 0, mailbox->dma, cq_num, 0, + CMD_HW2SW_CQ, + CMD_TIME_CLASS_A, status); } int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, - int is_ee, void *qp_context, u32 optmask, + int is_ee, struct mthca_mailbox *mailbox, u32 optmask, u8 *status) { static const u16 op[] = { @@ -1520,36 +1486,34 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, [MTHCA_TRANS_ANY2RST] = CMD_ERR2RST_QPEE }; u8 op_mod = 0; - - dma_addr_t indma; + int my_mailbox = 0; int err; if (trans < 0 || trans >= ARRAY_SIZE(op)) return -EINVAL; if (trans == MTHCA_TRANS_ANY2RST) { - indma = 0; op_mod = 3; /* don't write outbox, any->reset */ /* For debugging */ - qp_context = pci_alloc_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE, - &indma); - op_mod = 2; /* write outbox, any->reset */ + if (!mailbox) { + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (!IS_ERR(mailbox)) { + my_mailbox = 1; + op_mod = 2; /* write outbox, any->reset */ + } else + mailbox = NULL; + } } else { - indma = pci_map_single(dev->pdev, qp_context, - MTHCA_QP_CONTEXT_SIZE, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(indma)) - return -ENOMEM; - if (0) { int i; mthca_dbg(dev, "Dumping QP context:\n"); - printk(" opt param mask: %08x\n", be32_to_cpup(qp_context)); + printk(" opt param mask: %08x\n", be32_to_cpup(mailbox->buf)); for (i = 0; i < 0x100 / 4; ++i) { if (i % 8 == 0) printk(" [%02x] ", i * 4); - printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2])); + printk(" %08x", + be32_to_cpu(((u32 *) mailbox->buf)[i + 2])); if ((i + 1) % 8 == 0) printk("\n"); } @@ -1557,55 +1521,39 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, } if (trans == MTHCA_TRANS_ANY2RST) { - err = mthca_cmd_box(dev, 0, indma, (!!is_ee << 24) | num, - op_mod, op[trans], CMD_TIME_CLASS_C, status); + err = mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, + (!!is_ee << 24) | num, op_mod, + op[trans], CMD_TIME_CLASS_C, status); - if (0) { + if (0 && mailbox) { int i; mthca_dbg(dev, "Dumping QP context:\n"); - printk(" %08x\n", be32_to_cpup(qp_context)); + printk(" %08x\n", be32_to_cpup(mailbox->buf)); for (i = 0; i < 0x100 / 4; ++i) { if (i % 8 == 0) printk("[%02x] ", i * 4); - printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2])); + printk(" %08x", + be32_to_cpu(((u32 *) mailbox->buf)[i + 2])); if ((i + 1) % 8 == 0) printk("\n"); } } } else - err = mthca_cmd(dev, indma, (!!is_ee << 24) | num, + err = mthca_cmd(dev, mailbox->dma, (!!is_ee << 24) | num, op_mod, op[trans], CMD_TIME_CLASS_C, status); - if (trans != MTHCA_TRANS_ANY2RST) - pci_unmap_single(dev->pdev, indma, - MTHCA_QP_CONTEXT_SIZE, PCI_DMA_TODEVICE); - else - pci_free_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE, - qp_context, indma); + if (my_mailbox) + mthca_free_mailbox(dev, mailbox); + return err; } int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, - void *qp_context, u8 *status) + struct mthca_mailbox *mailbox, u8 *status) { - dma_addr_t outdma = 0; - int err; - - outdma = pci_map_single(dev->pdev, qp_context, - MTHCA_QP_CONTEXT_SIZE, - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(outdma)) - return -ENOMEM; - - err = mthca_cmd_box(dev, 0, outdma, (!!is_ee << 24) | num, 0, - CMD_QUERY_QPEE, - CMD_TIME_CLASS_A, status); - - pci_unmap_single(dev->pdev, outdma, - MTHCA_QP_CONTEXT_SIZE, - PCI_DMA_FROMDEVICE); - return err; + return mthca_cmd_box(dev, 0, mailbox->dma, (!!is_ee << 24) | num, 0, + CMD_QUERY_QPEE, CMD_TIME_CLASS_A, status); } int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, @@ -1635,11 +1583,11 @@ int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, } int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, - int port, struct ib_wc* in_wc, struct ib_grh* in_grh, + int port, struct ib_wc *in_wc, struct ib_grh *in_grh, void *in_mad, void *response_mad, u8 *status) { - void *box; - dma_addr_t dma; + struct mthca_mailbox *inmailbox, *outmailbox; + void *inbox; int err; u32 in_modifier = port; u8 op_modifier = 0; @@ -1653,11 +1601,18 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, #define MAD_IFC_PKEY_OFFSET 0x10e #define MAD_IFC_GRH_OFFSET 0x140 - box = pci_alloc_consistent(dev->pdev, MAD_IFC_BOX_SIZE, &dma); - if (!box) - return -ENOMEM; + inmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(inmailbox)) + return PTR_ERR(inmailbox); + inbox = inmailbox->buf; - memcpy(box, in_mad, 256); + outmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(outmailbox)) { + mthca_free_mailbox(dev, inmailbox); + return PTR_ERR(outmailbox); + } + + memcpy(inbox, in_mad, 256); /* * Key check traps can't be generated unless we have in_wc to @@ -1671,97 +1626,65 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, if (in_wc) { u8 val; - memset(box + 256, 0, 256); + memset(inbox + 256, 0, 256); - MTHCA_PUT(box, in_wc->qp_num, MAD_IFC_MY_QPN_OFFSET); - MTHCA_PUT(box, in_wc->src_qp, MAD_IFC_RQPN_OFFSET); + MTHCA_PUT(inbox, in_wc->qp_num, MAD_IFC_MY_QPN_OFFSET); + MTHCA_PUT(inbox, in_wc->src_qp, MAD_IFC_RQPN_OFFSET); val = in_wc->sl << 4; - MTHCA_PUT(box, val, MAD_IFC_SL_OFFSET); + MTHCA_PUT(inbox, val, MAD_IFC_SL_OFFSET); val = in_wc->dlid_path_bits | (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0); - MTHCA_PUT(box, val, MAD_IFC_GRH_OFFSET); + MTHCA_PUT(inbox, val, MAD_IFC_GRH_OFFSET); - MTHCA_PUT(box, in_wc->slid, MAD_IFC_RLID_OFFSET); - MTHCA_PUT(box, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET); + MTHCA_PUT(inbox, in_wc->slid, MAD_IFC_RLID_OFFSET); + MTHCA_PUT(inbox, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET); if (in_grh) - memcpy((u8 *) box + MAD_IFC_GRH_OFFSET, in_grh, 40); + memcpy(inbox + MAD_IFC_GRH_OFFSET, in_grh, 40); op_modifier |= 0x10; in_modifier |= in_wc->slid << 16; } - err = mthca_cmd_box(dev, dma, dma + 512, in_modifier, op_modifier, + err = mthca_cmd_box(dev, inmailbox->dma, outmailbox->dma, + in_modifier, op_modifier, CMD_MAD_IFC, CMD_TIME_CLASS_C, status); if (!err && !*status) - memcpy(response_mad, box + 512, 256); + memcpy(response_mad, outmailbox->buf, 256); - pci_free_consistent(dev->pdev, MAD_IFC_BOX_SIZE, box, dma); + mthca_free_mailbox(dev, inmailbox); + mthca_free_mailbox(dev, outmailbox); return err; } -int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm, - u8 *status) +int mthca_READ_MGM(struct mthca_dev *dev, int index, + struct mthca_mailbox *mailbox, u8 *status) { - dma_addr_t outdma = 0; - int err; - - outdma = pci_map_single(dev->pdev, mgm, - MTHCA_MGM_ENTRY_SIZE, - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(outdma)) - return -ENOMEM; - - err = mthca_cmd_box(dev, 0, outdma, index, 0, - CMD_READ_MGM, - CMD_TIME_CLASS_A, status); - - pci_unmap_single(dev->pdev, outdma, - MTHCA_MGM_ENTRY_SIZE, - PCI_DMA_FROMDEVICE); - return err; + return mthca_cmd_box(dev, 0, mailbox->dma, index, 0, + CMD_READ_MGM, CMD_TIME_CLASS_A, status); } -int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm, - u8 *status) +int mthca_WRITE_MGM(struct mthca_dev *dev, int index, + struct mthca_mailbox *mailbox, u8 *status) { - dma_addr_t indma; - int err; - - indma = pci_map_single(dev->pdev, mgm, - MTHCA_MGM_ENTRY_SIZE, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(indma)) - return -ENOMEM; - - err = mthca_cmd(dev, indma, index, 0, CMD_WRITE_MGM, - CMD_TIME_CLASS_A, status); - - pci_unmap_single(dev->pdev, indma, - MTHCA_MGM_ENTRY_SIZE, PCI_DMA_TODEVICE); - return err; + return mthca_cmd(dev, mailbox->dma, index, 0, CMD_WRITE_MGM, + CMD_TIME_CLASS_A, status); } -int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash, - u8 *status) +int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + u16 *hash, u8 *status) { - dma_addr_t indma; u64 imm; int err; - indma = pci_map_single(dev->pdev, gid, 16, PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(indma)) - return -ENOMEM; - - err = mthca_cmd_imm(dev, indma, &imm, 0, 0, CMD_MGID_HASH, + err = mthca_cmd_imm(dev, mailbox->dma, &imm, 0, 0, CMD_MGID_HASH, CMD_TIME_CLASS_A, status); - *hash = imm; - pci_unmap_single(dev->pdev, indma, 16, PCI_DMA_TODEVICE); + *hash = imm; return err; } diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h index adf039b3c54..ed517f175dd 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.h +++ b/drivers/infiniband/hw/mthca/mthca_cmd.h @@ -37,8 +37,7 @@ #include <ib_verbs.h> -#define MTHCA_CMD_MAILBOX_ALIGN 16UL -#define MTHCA_CMD_MAILBOX_EXTRA (MTHCA_CMD_MAILBOX_ALIGN - 1) +#define MTHCA_MAILBOX_SIZE 4096 enum { /* command completed successfully: */ @@ -112,6 +111,11 @@ enum { DEV_LIM_FLAG_UD_MULTI = 1 << 21, }; +struct mthca_mailbox { + dma_addr_t dma; + void *buf; +}; + struct mthca_dev_lim { int max_srq_sz; int max_qp_sz; @@ -235,11 +239,17 @@ struct mthca_set_ib_param { u32 cap_mask; }; +int mthca_cmd_init(struct mthca_dev *dev); +void mthca_cmd_cleanup(struct mthca_dev *dev); int mthca_cmd_use_events(struct mthca_dev *dev); void mthca_cmd_use_polling(struct mthca_dev *dev); void mthca_cmd_event(struct mthca_dev *dev, u16 token, u8 status, u64 out_param); +struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev, + unsigned int gfp_mask); +void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox); + int mthca_SYS_EN(struct mthca_dev *dev, u8 *status); int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status); int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status); @@ -270,41 +280,39 @@ int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status); int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status); int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages, u8 *status); -int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry, +int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int mpt_index, u8 *status); -int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry, +int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int mpt_index, u8 *status); -int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry, +int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int num_mtt, u8 *status); int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status); int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap, int eq_num, u8 *status); -int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context, +int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int eq_num, u8 *status); -int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context, +int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int eq_num, u8 *status); -int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context, +int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int cq_num, u8 *status); -int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context, +int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int cq_num, u8 *status); int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, - int is_ee, void *qp_context, u32 optmask, + int is_ee, struct mthca_mailbox *mailbox, u32 optmask, u8 *status); int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, - void *qp_context, u8 *status); + struct mthca_mailbox *mailbox, u8 *status); int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, u8 *status); int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, - int port, struct ib_wc* in_wc, struct ib_grh* in_grh, + int port, struct ib_wc *in_wc, struct ib_grh *in_grh, void *in_mad, void *response_mad, u8 *status); -int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm, - u8 *status); -int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm, - u8 *status); -int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash, - u8 *status); +int mthca_READ_MGM(struct mthca_dev *dev, int index, + struct mthca_mailbox *mailbox, u8 *status); +int mthca_WRITE_MGM(struct mthca_dev *dev, int index, + struct mthca_mailbox *mailbox, u8 *status); +int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + u16 *hash, u8 *status); int mthca_NOP(struct mthca_dev *dev, u8 *status); -#define MAILBOX_ALIGN(x) ((void *) ALIGN((unsigned long) (x), MTHCA_CMD_MAILBOX_ALIGN)) - #endif /* MTHCA_CMD_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 2bf347b84c3..b5aea7b869f 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -171,6 +173,17 @@ static inline void set_cqe_hw(struct mthca_cqe *cqe) cqe->owner = MTHCA_CQ_ENTRY_OWNER_HW; } +static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr) +{ + __be32 *cqe = cqe_ptr; + + (void) cqe; /* avoid warning if mthca_dbg compiled away... */ + mthca_dbg(dev, "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n", + be32_to_cpu(cqe[0]), be32_to_cpu(cqe[1]), be32_to_cpu(cqe[2]), + be32_to_cpu(cqe[3]), be32_to_cpu(cqe[4]), be32_to_cpu(cqe[5]), + be32_to_cpu(cqe[6]), be32_to_cpu(cqe[7])); +} + /* * incr is ignored in native Arbel (mem-free) mode, so cq->cons_index * should be correct before calling update_cons_index(). @@ -280,16 +293,12 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, int dbd; u32 new_wqe; - if (1 && cqe->syndrome != SYNDROME_WR_FLUSH_ERR) { - int j; - - mthca_dbg(dev, "%x/%d: error CQE -> QPN %06x, WQE @ %08x\n", - cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn), - be32_to_cpu(cqe->wqe)); - - for (j = 0; j < 8; ++j) - printk(KERN_DEBUG " [%2x] %08x\n", - j * 4, be32_to_cpu(((u32 *) cqe)[j])); + if (cqe->syndrome == SYNDROME_LOCAL_QP_OP_ERR) { + mthca_dbg(dev, "local QP operation err " + "(QPN %06x, WQE @ %08x, CQN %06x, index %d)\n", + be32_to_cpu(cqe->my_qpn), be32_to_cpu(cqe->wqe), + cq->cqn, cq->cons_index); + dump_cqe(dev, cqe); } /* @@ -377,15 +386,6 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, return 0; } -static void dump_cqe(struct mthca_cqe *cqe) -{ - int j; - - for (j = 0; j < 8; ++j) - printk(KERN_DEBUG " [%2x] %08x\n", - j * 4, be32_to_cpu(((u32 *) cqe)[j])); -} - static inline int mthca_poll_one(struct mthca_dev *dev, struct mthca_cq *cq, struct mthca_qp **cur_qp, @@ -414,8 +414,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev, mthca_dbg(dev, "%x/%d: CQE -> QPN %06x, WQE @ %08x\n", cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn), be32_to_cpu(cqe->wqe)); - - dump_cqe(cqe); + dump_cqe(dev, cqe); } is_error = (cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) == @@ -638,19 +637,19 @@ static void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq *cq) int size; if (cq->is_direct) - pci_free_consistent(dev->pdev, - (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE, - cq->queue.direct.buf, - pci_unmap_addr(&cq->queue.direct, - mapping)); + dma_free_coherent(&dev->pdev->dev, + (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE, + cq->queue.direct.buf, + pci_unmap_addr(&cq->queue.direct, + mapping)); else { size = (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE; for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i) if (cq->queue.page_list[i].buf) - pci_free_consistent(dev->pdev, PAGE_SIZE, - cq->queue.page_list[i].buf, - pci_unmap_addr(&cq->queue.page_list[i], - mapping)); + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + cq->queue.page_list[i].buf, + pci_unmap_addr(&cq->queue.page_list[i], + mapping)); kfree(cq->queue.page_list); } @@ -670,8 +669,8 @@ static int mthca_alloc_cq_buf(struct mthca_dev *dev, int size, npages = 1; shift = get_order(size) + PAGE_SHIFT; - cq->queue.direct.buf = pci_alloc_consistent(dev->pdev, - size, &t); + cq->queue.direct.buf = dma_alloc_coherent(&dev->pdev->dev, + size, &t, GFP_KERNEL); if (!cq->queue.direct.buf) return -ENOMEM; @@ -709,7 +708,8 @@ static int mthca_alloc_cq_buf(struct mthca_dev *dev, int size, for (i = 0; i < npages; ++i) { cq->queue.page_list[i].buf = - pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t); + dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, + &t, GFP_KERNEL); if (!cq->queue.page_list[i].buf) goto err_free; @@ -743,10 +743,11 @@ err_out: } int mthca_init_cq(struct mthca_dev *dev, int nent, + struct mthca_ucontext *ctx, u32 pdn, struct mthca_cq *cq) { int size = nent * MTHCA_CQ_ENTRY_SIZE; - void *mailbox = NULL; + struct mthca_mailbox *mailbox; struct mthca_cq_context *cq_context; int err = -ENOMEM; u8 status; @@ -754,45 +755,49 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, might_sleep(); - cq->ibcq.cqe = nent - 1; + cq->ibcq.cqe = nent - 1; + cq->is_kernel = !ctx; cq->cqn = mthca_alloc(&dev->cq_table.alloc); if (cq->cqn == -1) return -ENOMEM; if (mthca_is_memfree(dev)) { - cq->arm_sn = 1; - err = mthca_table_get(dev, dev->cq_table.table, cq->cqn); if (err) goto err_out; - err = -ENOMEM; + if (cq->is_kernel) { + cq->arm_sn = 1; - cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, - cq->cqn, &cq->set_ci_db); - if (cq->set_ci_db_index < 0) - goto err_out_icm; + err = -ENOMEM; - cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM, - cq->cqn, &cq->arm_db); - if (cq->arm_db_index < 0) - goto err_out_ci; + cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, + cq->cqn, &cq->set_ci_db); + if (cq->set_ci_db_index < 0) + goto err_out_icm; + + cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM, + cq->cqn, &cq->arm_db); + if (cq->arm_db_index < 0) + goto err_out_ci; + } } - mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) - goto err_out_mailbox; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + goto err_out_arm; - cq_context = MAILBOX_ALIGN(mailbox); + cq_context = mailbox->buf; - err = mthca_alloc_cq_buf(dev, size, cq); - if (err) - goto err_out_mailbox; + if (cq->is_kernel) { + err = mthca_alloc_cq_buf(dev, size, cq); + if (err) + goto err_out_mailbox; - for (i = 0; i < nent; ++i) - set_cqe_hw(get_cqe(cq, i)); + for (i = 0; i < nent; ++i) + set_cqe_hw(get_cqe(cq, i)); + } spin_lock_init(&cq->lock); atomic_set(&cq->refcount, 1); @@ -803,11 +808,14 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, MTHCA_CQ_STATE_DISARMED | MTHCA_CQ_FLAG_TR); cq_context->start = cpu_to_be64(0); - cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24 | - dev->driver_uar.index); + cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24); + if (ctx) + cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar.index); + else + cq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index); cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn); cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn); - cq_context->pd = cpu_to_be32(dev->driver_pd.pd_num); + cq_context->pd = cpu_to_be32(pdn); cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey); cq_context->cqn = cpu_to_be32(cq->cqn); @@ -816,7 +824,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, cq_context->state_db = cpu_to_be32(cq->arm_db_index); } - err = mthca_SW2HW_CQ(dev, cq_context, cq->cqn, &status); + err = mthca_SW2HW_CQ(dev, mailbox, cq->cqn, &status); if (err) { mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err); goto err_out_free_mr; @@ -840,22 +848,25 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, cq->cons_index = 0; - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); return 0; err_out_free_mr: - mthca_free_mr(dev, &cq->mr); - mthca_free_cq_buf(dev, cq); + if (cq->is_kernel) { + mthca_free_mr(dev, &cq->mr); + mthca_free_cq_buf(dev, cq); + } err_out_mailbox: - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); - if (mthca_is_memfree(dev)) +err_out_arm: + if (cq->is_kernel && mthca_is_memfree(dev)) mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); err_out_ci: - if (mthca_is_memfree(dev)) + if (cq->is_kernel && mthca_is_memfree(dev)) mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); err_out_icm: @@ -870,32 +881,31 @@ err_out: void mthca_free_cq(struct mthca_dev *dev, struct mthca_cq *cq) { - void *mailbox; + struct mthca_mailbox *mailbox; int err; u8 status; might_sleep(); - mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) { + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { mthca_warn(dev, "No memory for mailbox to free CQ.\n"); return; } - err = mthca_HW2SW_CQ(dev, MAILBOX_ALIGN(mailbox), cq->cqn, &status); + err = mthca_HW2SW_CQ(dev, mailbox, cq->cqn, &status); if (err) mthca_warn(dev, "HW2SW_CQ failed (%d)\n", err); else if (status) - mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n", - status); + mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n", status); if (0) { - u32 *ctx = MAILBOX_ALIGN(mailbox); + u32 *ctx = mailbox->buf; int j; printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n", - cq->cqn, cq->cons_index, !!next_cqe_sw(cq)); + cq->cqn, cq->cons_index, + cq->is_kernel ? !!next_cqe_sw(cq) : 0); for (j = 0; j < 16; ++j) printk(KERN_ERR "[%2x] %08x\n", j * 4, be32_to_cpu(ctx[j])); } @@ -913,17 +923,18 @@ void mthca_free_cq(struct mthca_dev *dev, atomic_dec(&cq->refcount); wait_event(cq->wait, !atomic_read(&cq->refcount)); - mthca_free_mr(dev, &cq->mr); - mthca_free_cq_buf(dev, cq); - - if (mthca_is_memfree(dev)) { - mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); - mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); - mthca_table_put(dev, dev->cq_table.table, cq->cqn); + if (cq->is_kernel) { + mthca_free_mr(dev, &cq->mr); + mthca_free_cq_buf(dev, cq); + if (mthca_is_memfree(dev)) { + mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); + mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); + } } + mthca_table_put(dev, dev->cq_table.table, cq->cqn); mthca_free(&dev->cq_table.alloc, cq->cqn); - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); } int __devinit mthca_init_cq_table(struct mthca_dev *dev) diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index e3d79e267dc..5ecdd2eeeb0 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -46,8 +48,8 @@ #define DRV_NAME "ib_mthca" #define PFX DRV_NAME ": " -#define DRV_VERSION "0.06-pre" -#define DRV_RELDATE "November 8, 2004" +#define DRV_VERSION "0.06" +#define DRV_RELDATE "June 23, 2005" enum { MTHCA_FLAG_DDR_HIDDEN = 1 << 1, @@ -98,6 +100,7 @@ enum { }; struct mthca_cmd { + struct pci_pool *pool; int use_events; struct semaphore hcr_sem; struct semaphore poll_sem; @@ -376,9 +379,15 @@ void mthca_unregister_device(struct mthca_dev *dev); int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar); void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar); -int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd); +int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd); void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd); +struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size); +void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt); +int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, + int start_index, u64 *buffer_list, int list_len); +int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, + u64 iova, u64 total_size, u32 access, struct mthca_mr *mr); int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, u32 access, struct mthca_mr *mr); int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, @@ -405,6 +414,7 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); int mthca_init_cq(struct mthca_dev *dev, int nent, + struct mthca_ucontext *ctx, u32 pdn, struct mthca_cq *cq); void mthca_free_cq(struct mthca_dev *dev, struct mthca_cq *cq); @@ -430,12 +440,14 @@ int mthca_alloc_qp(struct mthca_dev *dev, struct mthca_cq *recv_cq, enum ib_qp_type type, enum ib_sig_type send_policy, + struct ib_qp_cap *cap, struct mthca_qp *qp); int mthca_alloc_sqp(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_cq *send_cq, struct mthca_cq *recv_cq, enum ib_sig_type send_policy, + struct ib_qp_cap *cap, int qpn, int port, struct mthca_sqp *sqp); diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h index 821039a4904..535fad7710f 100644 --- a/drivers/infiniband/hw/mthca/mthca_doorbell.h +++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index f46d615d396..cbcf2b4722e 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -469,7 +469,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, PAGE_SIZE; u64 *dma_list = NULL; dma_addr_t t; - void *mailbox = NULL; + struct mthca_mailbox *mailbox; struct mthca_eq_context *eq_context; int err = -ENOMEM; int i; @@ -494,17 +494,16 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, if (!dma_list) goto err_out_free; - mailbox = kmalloc(sizeof *eq_context + MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) goto err_out_free; - eq_context = MAILBOX_ALIGN(mailbox); + eq_context = mailbox->buf; for (i = 0; i < npages; ++i) { - eq->page_list[i].buf = pci_alloc_consistent(dev->pdev, - PAGE_SIZE, &t); + eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev, + PAGE_SIZE, &t, GFP_KERNEL); if (!eq->page_list[i].buf) - goto err_out_free; + goto err_out_free_pages; dma_list[i] = t; pci_unmap_addr_set(&eq->page_list[i], mapping, t); @@ -517,7 +516,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, eq->eqn = mthca_alloc(&dev->eq_table.alloc); if (eq->eqn == -1) - goto err_out_free; + goto err_out_free_pages; err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num, dma_list, PAGE_SHIFT, npages, @@ -548,7 +547,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, eq_context->intr = intr; eq_context->lkey = cpu_to_be32(eq->mr.ibmr.lkey); - err = mthca_SW2HW_EQ(dev, eq_context, eq->eqn, &status); + err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn, &status); if (err) { mthca_warn(dev, "SW2HW_EQ failed (%d)\n", err); goto err_out_free_mr; @@ -561,7 +560,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, } kfree(dma_list); - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); eq->eqn_mask = swab32(1 << eq->eqn); eq->cons_index = 0; @@ -579,17 +578,19 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, err_out_free_eq: mthca_free(&dev->eq_table.alloc, eq->eqn); - err_out_free: + err_out_free_pages: for (i = 0; i < npages; ++i) if (eq->page_list[i].buf) - pci_free_consistent(dev->pdev, PAGE_SIZE, - eq->page_list[i].buf, - pci_unmap_addr(&eq->page_list[i], - mapping)); + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + eq->page_list[i].buf, + pci_unmap_addr(&eq->page_list[i], + mapping)); + + mthca_free_mailbox(dev, mailbox); + err_out_free: kfree(eq->page_list); kfree(dma_list); - kfree(mailbox); err_out: return err; @@ -598,25 +599,22 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, static void mthca_free_eq(struct mthca_dev *dev, struct mthca_eq *eq) { - void *mailbox = NULL; + struct mthca_mailbox *mailbox; int err; u8 status; int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) / PAGE_SIZE; int i; - mailbox = kmalloc(sizeof (struct mthca_eq_context) + MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) return; - err = mthca_HW2SW_EQ(dev, MAILBOX_ALIGN(mailbox), - eq->eqn, &status); + err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn, &status); if (err) mthca_warn(dev, "HW2SW_EQ failed (%d)\n", err); if (status) - mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n", - status); + mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n", status); dev->eq_table.arm_mask &= ~eq->eqn_mask; @@ -625,7 +623,7 @@ static void mthca_free_eq(struct mthca_dev *dev, for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) { if (i % 4 == 0) printk("[%02x] ", i * 4); - printk(" %08x", be32_to_cpup(MAILBOX_ALIGN(mailbox) + i * 4)); + printk(" %08x", be32_to_cpup(mailbox->buf + i * 4)); if ((i + 1) % 4 == 0) printk("\n"); } @@ -638,7 +636,7 @@ static void mthca_free_eq(struct mthca_dev *dev, pci_unmap_addr(&eq->page_list[i], mapping)); kfree(eq->page_list); - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); } static void mthca_free_irqs(struct mthca_dev *dev) @@ -709,8 +707,7 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev) if (mthca_map_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) & dev->fw.arbel.eq_arm_base) + 4, 4, &dev->eq_regs.arbel.eq_arm)) { - mthca_err(dev, "Couldn't map interrupt clear register, " - "aborting.\n"); + mthca_err(dev, "Couldn't map EQ arm register, aborting.\n"); mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) & dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, dev->clr_base); @@ -721,8 +718,7 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev) dev->fw.arbel.eq_set_ci_base, MTHCA_EQ_SET_CI_SIZE, &dev->eq_regs.arbel.eq_set_ci_base)) { - mthca_err(dev, "Couldn't map interrupt clear register, " - "aborting.\n"); + mthca_err(dev, "Couldn't map EQ CI register, aborting.\n"); mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) & dev->fw.arbel.eq_arm_base) + 4, 4, dev->eq_regs.arbel.eq_arm); diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index d40590356df..2ef916859e1 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -69,7 +70,7 @@ MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero"); #endif /* CONFIG_PCI_MSI */ static const char mthca_version[] __devinitdata = - "ib_mthca: Mellanox InfiniBand HCA driver v" + DRV_NAME ": Mellanox InfiniBand HCA driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; static struct mthca_profile default_profile = { @@ -664,7 +665,7 @@ static int __devinit mthca_setup_hca(struct mthca_dev *dev) goto err_pd_table_free; } - err = mthca_pd_alloc(dev, &dev->driver_pd); + err = mthca_pd_alloc(dev, 1, &dev->driver_pd); if (err) { mthca_err(dev, "Failed to create driver PD, " "aborting.\n"); @@ -927,13 +928,13 @@ static int __devinit mthca_init_one(struct pci_dev *pdev, */ if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) || pci_resource_len(pdev, 0) != 1 << 20) { - dev_err(&pdev->dev, "Missing DCS, aborting."); + dev_err(&pdev->dev, "Missing DCS, aborting.\n"); err = -ENODEV; goto err_disable_pdev; } if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM) || pci_resource_len(pdev, 2) != 1 << 23) { - dev_err(&pdev->dev, "Missing UAR, aborting."); + dev_err(&pdev->dev, "Missing UAR, aborting.\n"); err = -ENODEV; goto err_disable_pdev; } @@ -1004,25 +1005,18 @@ static int __devinit mthca_init_one(struct pci_dev *pdev, !pci_enable_msi(pdev)) mdev->mthca_flags |= MTHCA_FLAG_MSI; - sema_init(&mdev->cmd.hcr_sem, 1); - sema_init(&mdev->cmd.poll_sem, 1); - mdev->cmd.use_events = 0; - - mdev->hcr = ioremap(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE, MTHCA_HCR_SIZE); - if (!mdev->hcr) { - mthca_err(mdev, "Couldn't map command register, " - "aborting.\n"); - err = -ENOMEM; + if (mthca_cmd_init(mdev)) { + mthca_err(mdev, "Failed to init command interface, aborting.\n"); goto err_free_dev; } err = mthca_tune_pci(mdev); if (err) - goto err_iounmap; + goto err_cmd; err = mthca_init_hca(mdev); if (err) - goto err_iounmap; + goto err_cmd; if (mdev->fw_ver < mthca_hca_table[id->driver_data].latest_fw) { mthca_warn(mdev, "HCA FW version %x.%x.%x is old (%x.%x.%x is current).\n", @@ -1070,8 +1064,8 @@ err_cleanup: err_close: mthca_close_hca(mdev); -err_iounmap: - iounmap(mdev->hcr); +err_cmd: + mthca_cmd_cleanup(mdev); err_free_dev: if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) @@ -1118,10 +1112,8 @@ static void __devexit mthca_remove_one(struct pci_dev *pdev) iounmap(mdev->kar); mthca_uar_free(mdev, &mdev->driver_uar); mthca_cleanup_uar_table(mdev); - mthca_close_hca(mdev); - - iounmap(mdev->hcr); + mthca_cmd_cleanup(mdev); if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) pci_disable_msix(pdev); @@ -1163,7 +1155,7 @@ static struct pci_device_id mthca_pci_table[] = { MODULE_DEVICE_TABLE(pci, mthca_pci_table); static struct pci_driver mthca_driver = { - .name = "ib_mthca", + .name = DRV_NAME, .id_table = mthca_pci_table, .probe = mthca_init_one, .remove = __devexit_p(mthca_remove_one) diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c index 70a6553a588..5be7d949dbf 100644 --- a/drivers/infiniband/hw/mthca/mthca_mcg.c +++ b/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -66,22 +66,23 @@ static const u8 zero_gid[16]; /* automatically initialized to 0 */ * entry in hash chain and *mgm holds end of hash chain. */ static int find_mgm(struct mthca_dev *dev, - u8 *gid, struct mthca_mgm *mgm, + u8 *gid, struct mthca_mailbox *mgm_mailbox, u16 *hash, int *prev, int *index) { - void *mailbox; + struct mthca_mailbox *mailbox; + struct mthca_mgm *mgm = mgm_mailbox->buf; u8 *mgid; int err; u8 status; - mailbox = kmalloc(16 + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL); - if (!mailbox) + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) return -ENOMEM; - mgid = MAILBOX_ALIGN(mailbox); + mgid = mailbox->buf; memcpy(mgid, gid, 16); - err = mthca_MGID_HASH(dev, mgid, hash, &status); + err = mthca_MGID_HASH(dev, mailbox, hash, &status); if (err) goto out; if (status) { @@ -103,7 +104,7 @@ static int find_mgm(struct mthca_dev *dev, *prev = -1; do { - err = mthca_READ_MGM(dev, *index, mgm, &status); + err = mthca_READ_MGM(dev, *index, mgm_mailbox, &status); if (err) goto out; if (status) { @@ -129,14 +130,14 @@ static int find_mgm(struct mthca_dev *dev, *index = -1; out: - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); return err; } int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mthca_dev *dev = to_mdev(ibqp->device); - void *mailbox; + struct mthca_mailbox *mailbox; struct mthca_mgm *mgm; u16 hash; int index, prev; @@ -145,15 +146,15 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) int err; u8 status; - mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL); - if (!mailbox) - return -ENOMEM; - mgm = MAILBOX_ALIGN(mailbox); + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + mgm = mailbox->buf; if (down_interruptible(&dev->mcg_table.sem)) return -EINTR; - err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index); + err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index); if (err) goto out; @@ -170,7 +171,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) goto out; } - err = mthca_READ_MGM(dev, index, mgm, &status); + err = mthca_READ_MGM(dev, index, mailbox, &status); if (err) goto out; if (status) { @@ -195,7 +196,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) goto out; } - err = mthca_WRITE_MGM(dev, index, mgm, &status); + err = mthca_WRITE_MGM(dev, index, mailbox, &status); if (err) goto out; if (status) { @@ -206,7 +207,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (!link) goto out; - err = mthca_READ_MGM(dev, prev, mgm, &status); + err = mthca_READ_MGM(dev, prev, mailbox, &status); if (err) goto out; if (status) { @@ -217,7 +218,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) mgm->next_gid_index = cpu_to_be32(index << 5); - err = mthca_WRITE_MGM(dev, prev, mgm, &status); + err = mthca_WRITE_MGM(dev, prev, mailbox, &status); if (err) goto out; if (status) { @@ -227,14 +228,14 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) out: up(&dev->mcg_table.sem); - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); return err; } int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mthca_dev *dev = to_mdev(ibqp->device); - void *mailbox; + struct mthca_mailbox *mailbox; struct mthca_mgm *mgm; u16 hash; int prev, index; @@ -242,15 +243,15 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) int err; u8 status; - mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL); - if (!mailbox) - return -ENOMEM; - mgm = MAILBOX_ALIGN(mailbox); + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + mgm = mailbox->buf; if (down_interruptible(&dev->mcg_table.sem)) return -EINTR; - err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index); + err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index); if (err) goto out; @@ -285,7 +286,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) mgm->qp[loc] = mgm->qp[i - 1]; mgm->qp[i - 1] = 0; - err = mthca_WRITE_MGM(dev, index, mgm, &status); + err = mthca_WRITE_MGM(dev, index, mailbox, &status); if (err) goto out; if (status) { @@ -304,7 +305,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (be32_to_cpu(mgm->next_gid_index) >> 5) { err = mthca_READ_MGM(dev, be32_to_cpu(mgm->next_gid_index) >> 5, - mgm, &status); + mailbox, &status); if (err) goto out; if (status) { @@ -316,7 +317,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } else memset(mgm->gid, 0, 16); - err = mthca_WRITE_MGM(dev, index, mgm, &status); + err = mthca_WRITE_MGM(dev, index, mailbox, &status); if (err) goto out; if (status) { @@ -327,7 +328,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } else { /* Remove entry from AMGM */ index = be32_to_cpu(mgm->next_gid_index) >> 5; - err = mthca_READ_MGM(dev, prev, mgm, &status); + err = mthca_READ_MGM(dev, prev, mailbox, &status); if (err) goto out; if (status) { @@ -338,7 +339,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) mgm->next_gid_index = cpu_to_be32(index << 5); - err = mthca_WRITE_MGM(dev, prev, mgm, &status); + err = mthca_WRITE_MGM(dev, prev, mailbox, &status); if (err) goto out; if (status) { @@ -350,7 +351,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) out: up(&dev->mcg_table.sem); - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); return err; } diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 637b30e3559..2a864615035 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -47,6 +48,15 @@ enum { MTHCA_TABLE_CHUNK_SIZE = 1 << 18 }; +struct mthca_user_db_table { + struct semaphore mutex; + struct { + u64 uvirt; + struct scatterlist mem; + int refcount; + } page[0]; +}; + void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm) { struct mthca_icm_chunk *chunk, *tmp; @@ -179,9 +189,14 @@ out: void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj) { - int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE; + int i; u8 status; + if (!mthca_is_memfree(dev)) + return; + + i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE; + down(&table->mutex); if (--table->icm[i]->refcount == 0) { @@ -256,6 +271,9 @@ void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table, { int i; + if (!mthca_is_memfree(dev)) + return; + for (i = start; i <= end; i += MTHCA_TABLE_CHUNK_SIZE / table->obj_size) mthca_table_put(dev, table, i); } @@ -336,13 +354,133 @@ void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table) kfree(table); } -static u64 mthca_uarc_virt(struct mthca_dev *dev, int page) +static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int page) { return dev->uar_table.uarc_base + - dev->driver_uar.index * dev->uar_table.uarc_size + + uar->index * dev->uar_table.uarc_size + page * 4096; } +int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index, u64 uaddr) +{ + int ret = 0; + u8 status; + int i; + + if (!mthca_is_memfree(dev)) + return 0; + + if (index < 0 || index > dev->uar_table.uarc_size / 8) + return -EINVAL; + + down(&db_tab->mutex); + + i = index / MTHCA_DB_REC_PER_PAGE; + + if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE) || + (db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) || + (uaddr & 4095)) { + ret = -EINVAL; + goto out; + } + + if (db_tab->page[i].refcount) { + ++db_tab->page[i].refcount; + goto out; + } + + ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0, + &db_tab->page[i].mem.page, NULL); + if (ret < 0) + goto out; + + db_tab->page[i].mem.length = 4096; + db_tab->page[i].mem.offset = uaddr & ~PAGE_MASK; + + ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); + if (ret < 0) { + put_page(db_tab->page[i].mem.page); + goto out; + } + + ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem), + mthca_uarc_virt(dev, uar, i), &status); + if (!ret && status) + ret = -EINVAL; + if (ret) { + pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); + put_page(db_tab->page[i].mem.page); + goto out; + } + + db_tab->page[i].uvirt = uaddr; + db_tab->page[i].refcount = 1; + +out: + up(&db_tab->mutex); + return ret; +} + +void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index) +{ + if (!mthca_is_memfree(dev)) + return; + + /* + * To make our bookkeeping simpler, we don't unmap DB + * pages until we clean up the whole db table. + */ + + down(&db_tab->mutex); + + --db_tab->page[index / MTHCA_DB_REC_PER_PAGE].refcount; + + up(&db_tab->mutex); +} + +struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev) +{ + struct mthca_user_db_table *db_tab; + int npages; + int i; + + if (!mthca_is_memfree(dev)) + return NULL; + + npages = dev->uar_table.uarc_size / 4096; + db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL); + if (!db_tab) + return ERR_PTR(-ENOMEM); + + init_MUTEX(&db_tab->mutex); + for (i = 0; i < npages; ++i) { + db_tab->page[i].refcount = 0; + db_tab->page[i].uvirt = 0; + } + + return db_tab; +} + +void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab) +{ + int i; + u8 status; + + if (!mthca_is_memfree(dev)) + return; + + for (i = 0; i < dev->uar_table.uarc_size / 4096; ++i) { + if (db_tab->page[i].uvirt) { + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status); + pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); + put_page(db_tab->page[i].mem.page); + } + } +} + int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db) { int group; @@ -399,7 +537,8 @@ int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db) } memset(page->db_rec, 0, 4096); - ret = mthca_MAP_ICM_page(dev, page->mapping, mthca_uarc_virt(dev, i), &status); + ret = mthca_MAP_ICM_page(dev, page->mapping, + mthca_uarc_virt(dev, &dev->driver_uar, i), &status); if (!ret && status) ret = -EINVAL; if (ret) { @@ -453,7 +592,7 @@ void mthca_free_db(struct mthca_dev *dev, int type, int db_index) if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) && i >= dev->db_tab->max_group1 - 1) { - mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status); + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status); dma_free_coherent(&dev->pdev->dev, 4096, page->db_rec, page->mapping); @@ -522,7 +661,7 @@ void mthca_cleanup_db_tab(struct mthca_dev *dev) if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE)) mthca_warn(dev, "Kernel UARC page %d not empty\n", i); - mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status); + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status); dma_free_coherent(&dev->pdev->dev, 4096, dev->db_tab->page[i].db_rec, diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h index fe7be2a6bc4..4761d844cb5 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.h +++ b/drivers/infiniband/hw/mthca/mthca_memfree.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -148,7 +149,7 @@ struct mthca_db_table { struct semaphore mutex; }; -enum { +enum mthca_db_type { MTHCA_DB_TYPE_INVALID = 0x0, MTHCA_DB_TYPE_CQ_SET_CI = 0x1, MTHCA_DB_TYPE_CQ_ARM = 0x2, @@ -158,6 +159,17 @@ enum { MTHCA_DB_TYPE_GROUP_SEP = 0x7 }; +struct mthca_user_db_table; +struct mthca_uar; + +int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index, u64 uaddr); +void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index); +struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev); +void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab); + int mthca_init_db_tab(struct mthca_dev *dev); void mthca_cleanup_db_tab(struct mthca_dev *dev); int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db); diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 8960fc2306b..cbe50feaf68 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -40,6 +40,12 @@ #include "mthca_cmd.h" #include "mthca_memfree.h" +struct mthca_mtt { + struct mthca_buddy *buddy; + int order; + u32 first_seg; +}; + /* * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits. */ @@ -173,8 +179,8 @@ static void __devexit mthca_buddy_cleanup(struct mthca_buddy *buddy) kfree(buddy->bits); } -static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order, - struct mthca_buddy *buddy) +static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order, + struct mthca_buddy *buddy) { u32 seg = mthca_buddy_alloc(buddy, order); @@ -191,14 +197,102 @@ static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order, return seg; } -static void mthca_free_mtt(struct mthca_dev *dev, u32 seg, int order, - struct mthca_buddy* buddy) +static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size, + struct mthca_buddy *buddy) { - mthca_buddy_free(buddy, seg, order); + struct mthca_mtt *mtt; + int i; - if (mthca_is_memfree(dev)) - mthca_table_put_range(dev, dev->mr_table.mtt_table, seg, - seg + (1 << order) - 1); + if (size <= 0) + return ERR_PTR(-EINVAL); + + mtt = kmalloc(sizeof *mtt, GFP_KERNEL); + if (!mtt) + return ERR_PTR(-ENOMEM); + + mtt->buddy = buddy; + mtt->order = 0; + for (i = MTHCA_MTT_SEG_SIZE / 8; i < size; i <<= 1) + ++mtt->order; + + mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy); + if (mtt->first_seg == -1) { + kfree(mtt); + return ERR_PTR(-ENOMEM); + } + + return mtt; +} + +struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size) +{ + return __mthca_alloc_mtt(dev, size, &dev->mr_table.mtt_buddy); +} + +void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt) +{ + if (!mtt) + return; + + mthca_buddy_free(mtt->buddy, mtt->first_seg, mtt->order); + + mthca_table_put_range(dev, dev->mr_table.mtt_table, + mtt->first_seg, + mtt->first_seg + (1 << mtt->order) - 1); + + kfree(mtt); +} + +int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, + int start_index, u64 *buffer_list, int list_len) +{ + struct mthca_mailbox *mailbox; + u64 *mtt_entry; + int err = 0; + u8 status; + int i; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + mtt_entry = mailbox->buf; + + while (list_len > 0) { + mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base + + mtt->first_seg * MTHCA_MTT_SEG_SIZE + + start_index * 8); + mtt_entry[1] = 0; + for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i) + mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] | + MTHCA_MTT_FLAG_PRESENT); + + /* + * If we have an odd number of entries to write, add + * one more dummy entry for firmware efficiency. + */ + if (i & 1) + mtt_entry[i + 2] = 0; + + err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1, &status); + if (err) { + mthca_warn(dev, "WRITE_MTT failed (%d)\n", err); + goto out; + } + if (status) { + mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n", + status); + err = -EINVAL; + goto out; + } + + list_len -= i; + start_index += i; + buffer_list += i; + } + +out: + mthca_free_mailbox(dev, mailbox); + return err; } static inline u32 tavor_hw_index_to_key(u32 ind) @@ -237,91 +331,18 @@ static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key) return tavor_key_to_hw_index(key); } -int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, - u32 access, struct mthca_mr *mr) +int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, + u64 iova, u64 total_size, u32 access, struct mthca_mr *mr) { - void *mailbox = NULL; + struct mthca_mailbox *mailbox; struct mthca_mpt_entry *mpt_entry; u32 key; + int i; int err; u8 status; might_sleep(); - mr->order = -1; - key = mthca_alloc(&dev->mr_table.mpt_alloc); - if (key == -1) - return -ENOMEM; - mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key); - - if (mthca_is_memfree(dev)) { - err = mthca_table_get(dev, dev->mr_table.mpt_table, key); - if (err) - goto err_out_mpt_free; - } - - mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) { - err = -ENOMEM; - goto err_out_table; - } - mpt_entry = MAILBOX_ALIGN(mailbox); - - mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS | - MTHCA_MPT_FLAG_MIO | - MTHCA_MPT_FLAG_PHYSICAL | - MTHCA_MPT_FLAG_REGION | - access); - mpt_entry->page_size = 0; - mpt_entry->key = cpu_to_be32(key); - mpt_entry->pd = cpu_to_be32(pd); - mpt_entry->start = 0; - mpt_entry->length = ~0ULL; - - memset(&mpt_entry->lkey, 0, - sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey)); - - err = mthca_SW2HW_MPT(dev, mpt_entry, - key & (dev->limits.num_mpts - 1), - &status); - if (err) { - mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err); - goto err_out_table; - } else if (status) { - mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n", - status); - err = -EINVAL; - goto err_out_table; - } - - kfree(mailbox); - return err; - -err_out_table: - if (mthca_is_memfree(dev)) - mthca_table_put(dev, dev->mr_table.mpt_table, key); - -err_out_mpt_free: - mthca_free(&dev->mr_table.mpt_alloc, key); - kfree(mailbox); - return err; -} - -int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, - u64 *buffer_list, int buffer_size_shift, - int list_len, u64 iova, u64 total_size, - u32 access, struct mthca_mr *mr) -{ - void *mailbox; - u64 *mtt_entry; - struct mthca_mpt_entry *mpt_entry; - u32 key; - int err = -ENOMEM; - u8 status; - int i; - - might_sleep(); WARN_ON(buffer_size_shift >= 32); key = mthca_alloc(&dev->mr_table.mpt_alloc); @@ -335,75 +356,33 @@ int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, goto err_out_mpt_free; } - for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0; - i < list_len; - i <<= 1, ++mr->order) - ; /* nothing */ - - mr->first_seg = mthca_alloc_mtt(dev, mr->order, - &dev->mr_table.mtt_buddy); - if (mr->first_seg == -1) + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); goto err_out_table; - - /* - * If list_len is odd, we add one more dummy entry for - * firmware efficiency. - */ - mailbox = kmalloc(max(sizeof *mpt_entry, - (size_t) 8 * (list_len + (list_len & 1) + 2)) + - MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) - goto err_out_free_mtt; - - mtt_entry = MAILBOX_ALIGN(mailbox); - - mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base + - mr->first_seg * MTHCA_MTT_SEG_SIZE); - mtt_entry[1] = 0; - for (i = 0; i < list_len; ++i) - mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] | - MTHCA_MTT_FLAG_PRESENT); - if (list_len & 1) { - mtt_entry[i + 2] = 0; - ++list_len; - } - - if (0) { - mthca_dbg(dev, "Dumping MPT entry\n"); - for (i = 0; i < list_len + 2; ++i) - printk(KERN_ERR "[%2d] %016llx\n", - i, (unsigned long long) be64_to_cpu(mtt_entry[i])); - } - - err = mthca_WRITE_MTT(dev, mtt_entry, list_len, &status); - if (err) { - mthca_warn(dev, "WRITE_MTT failed (%d)\n", err); - goto err_out_mailbox_free; - } - if (status) { - mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n", - status); - err = -EINVAL; - goto err_out_mailbox_free; } - - mpt_entry = MAILBOX_ALIGN(mailbox); + mpt_entry = mailbox->buf; mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS | MTHCA_MPT_FLAG_MIO | MTHCA_MPT_FLAG_REGION | access); + if (!mr->mtt) + mpt_entry->flags |= cpu_to_be32(MTHCA_MPT_FLAG_PHYSICAL); mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12); mpt_entry->key = cpu_to_be32(key); mpt_entry->pd = cpu_to_be32(pd); mpt_entry->start = cpu_to_be64(iova); mpt_entry->length = cpu_to_be64(total_size); + memset(&mpt_entry->lkey, 0, sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey)); - mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base + - mr->first_seg * MTHCA_MTT_SEG_SIZE); + + if (mr->mtt) + mpt_entry->mtt_seg = + cpu_to_be64(dev->mr_table.mtt_base + + mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE); if (0) { mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey); @@ -416,45 +395,70 @@ int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, } } - err = mthca_SW2HW_MPT(dev, mpt_entry, + err = mthca_SW2HW_MPT(dev, mailbox, key & (dev->limits.num_mpts - 1), &status); - if (err) + if (err) { mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err); - else if (status) { + goto err_out_mailbox; + } else if (status) { mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n", status); err = -EINVAL; + goto err_out_mailbox; } - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); return err; -err_out_mailbox_free: - kfree(mailbox); - -err_out_free_mtt: - mthca_free_mtt(dev, mr->first_seg, mr->order, &dev->mr_table.mtt_buddy); +err_out_mailbox: + mthca_free_mailbox(dev, mailbox); err_out_table: - if (mthca_is_memfree(dev)) - mthca_table_put(dev, dev->mr_table.mpt_table, key); + mthca_table_put(dev, dev->mr_table.mpt_table, key); err_out_mpt_free: mthca_free(&dev->mr_table.mpt_alloc, key); return err; } -/* Free mr or fmr */ -static void mthca_free_region(struct mthca_dev *dev, u32 lkey, int order, - u32 first_seg, struct mthca_buddy *buddy) +int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, + u32 access, struct mthca_mr *mr) { - if (order >= 0) - mthca_free_mtt(dev, first_seg, order, buddy); + mr->mtt = NULL; + return mthca_mr_alloc(dev, pd, 12, 0, ~0ULL, access, mr); +} - if (mthca_is_memfree(dev)) - mthca_table_put(dev, dev->mr_table.mpt_table, - arbel_key_to_hw_index(lkey)); +int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, + u64 *buffer_list, int buffer_size_shift, + int list_len, u64 iova, u64 total_size, + u32 access, struct mthca_mr *mr) +{ + int err; + + mr->mtt = mthca_alloc_mtt(dev, list_len); + if (IS_ERR(mr->mtt)) + return PTR_ERR(mr->mtt); + + err = mthca_write_mtt(dev, mr->mtt, 0, buffer_list, list_len); + if (err) { + mthca_free_mtt(dev, mr->mtt); + return err; + } + + err = mthca_mr_alloc(dev, pd, buffer_size_shift, iova, + total_size, access, mr); + if (err) + mthca_free_mtt(dev, mr->mtt); + + return err; +} + +/* Free mr or fmr */ +static void mthca_free_region(struct mthca_dev *dev, u32 lkey) +{ + mthca_table_put(dev, dev->mr_table.mpt_table, + arbel_key_to_hw_index(lkey)); mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey)); } @@ -476,15 +480,15 @@ void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr) mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n", status); - mthca_free_region(dev, mr->ibmr.lkey, mr->order, mr->first_seg, - &dev->mr_table.mtt_buddy); + mthca_free_region(dev, mr->ibmr.lkey); + mthca_free_mtt(dev, mr->mtt); } int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, u32 access, struct mthca_fmr *mr) { struct mthca_mpt_entry *mpt_entry; - void *mailbox; + struct mthca_mailbox *mailbox; u64 mtt_seg; u32 key, idx; u8 status; @@ -522,31 +526,24 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base + sizeof *(mr->mem.tavor.mpt) * idx; - for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0; - i < list_len; - i <<= 1, ++mr->order) - ; /* nothing */ - - mr->first_seg = mthca_alloc_mtt(dev, mr->order, - dev->mr_table.fmr_mtt_buddy); - if (mr->first_seg == -1) + mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy); + if (IS_ERR(mr->mtt)) goto err_out_table; - mtt_seg = mr->first_seg * MTHCA_MTT_SEG_SIZE; + mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE; if (mthca_is_memfree(dev)) { mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table, - mr->first_seg); + mr->mtt->first_seg); BUG_ON(!mr->mem.arbel.mtts); } else mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg; - mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) goto err_out_free_mtt; - mpt_entry = MAILBOX_ALIGN(mailbox); + mpt_entry = mailbox->buf; mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS | MTHCA_MPT_FLAG_MIO | @@ -571,7 +568,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, } } - err = mthca_SW2HW_MPT(dev, mpt_entry, + err = mthca_SW2HW_MPT(dev, mailbox, key & (dev->limits.num_mpts - 1), &status); if (err) { @@ -585,19 +582,17 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, goto err_out_mailbox_free; } - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); return 0; err_out_mailbox_free: - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); err_out_free_mtt: - mthca_free_mtt(dev, mr->first_seg, mr->order, - dev->mr_table.fmr_mtt_buddy); + mthca_free_mtt(dev, mr->mtt); err_out_table: - if (mthca_is_memfree(dev)) - mthca_table_put(dev, dev->mr_table.mpt_table, key); + mthca_table_put(dev, dev->mr_table.mpt_table, key); err_out_mpt_free: mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey); @@ -609,8 +604,9 @@ int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr) if (fmr->maps) return -EBUSY; - mthca_free_region(dev, fmr->ibmr.lkey, fmr->order, fmr->first_seg, - dev->mr_table.fmr_mtt_buddy); + mthca_free_region(dev, fmr->ibmr.lkey); + mthca_free_mtt(dev, fmr->mtt); + return 0; } @@ -826,7 +822,8 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev) if (dev->limits.reserved_mtts) { i = fls(dev->limits.reserved_mtts - 1); - if (mthca_alloc_mtt(dev, i, dev->mr_table.fmr_mtt_buddy) == -1) { + if (mthca_alloc_mtt_range(dev, i, + dev->mr_table.fmr_mtt_buddy) == -1) { mthca_warn(dev, "MTT table of order %d is too small.\n", dev->mr_table.fmr_mtt_buddy->max_order); err = -ENOMEM; diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c index ea66847e4ea..c2c899844e9 100644 --- a/drivers/infiniband/hw/mthca/mthca_pd.c +++ b/drivers/infiniband/hw/mthca/mthca_pd.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -37,23 +38,27 @@ #include "mthca_dev.h" -int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd) +int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd) { - int err; + int err = 0; might_sleep(); + pd->privileged = privileged; + atomic_set(&pd->sqp_count, 0); pd->pd_num = mthca_alloc(&dev->pd_table.alloc); if (pd->pd_num == -1) return -ENOMEM; - err = mthca_mr_alloc_notrans(dev, pd->pd_num, - MTHCA_MPT_FLAG_LOCAL_READ | - MTHCA_MPT_FLAG_LOCAL_WRITE, - &pd->ntmr); - if (err) - mthca_free(&dev->pd_table.alloc, pd->pd_num); + if (privileged) { + err = mthca_mr_alloc_notrans(dev, pd->pd_num, + MTHCA_MPT_FLAG_LOCAL_READ | + MTHCA_MPT_FLAG_LOCAL_WRITE, + &pd->ntmr); + if (err) + mthca_free(&dev->pd_table.alloc, pd->pd_num); + } return err; } @@ -61,7 +66,8 @@ int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd) void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd) { might_sleep(); - mthca_free_mr(dev, &pd->ntmr); + if (pd->privileged) + mthca_free_mr(dev, &pd->ntmr); mthca_free(&dev->pd_table.alloc, pd->pd_num); } diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 159f4e6c312..7a58ce90e17 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -33,9 +35,12 @@ */ #include <ib_smi.h> +#include <linux/mm.h> #include "mthca_dev.h" #include "mthca_cmd.h" +#include "mthca_user.h" +#include "mthca_memfree.h" static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) @@ -52,7 +57,7 @@ static int mthca_query_device(struct ib_device *ibdev, if (!in_mad || !out_mad) goto out; - memset(props, 0, sizeof props); + memset(props, 0, sizeof *props); props->fw_ver = mdev->fw_ver; @@ -283,7 +288,78 @@ static int mthca_query_gid(struct ib_device *ibdev, u8 port, return err; } -static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev) +static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct mthca_alloc_ucontext_resp uresp; + struct mthca_ucontext *context; + int err; + + memset(&uresp, 0, sizeof uresp); + + uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps; + if (mthca_is_memfree(to_mdev(ibdev))) + uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size; + else + uresp.uarc_size = 0; + + context = kmalloc(sizeof *context, GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + + err = mthca_uar_alloc(to_mdev(ibdev), &context->uar); + if (err) { + kfree(context); + return ERR_PTR(err); + } + + context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev)); + if (IS_ERR(context->db_tab)) { + err = PTR_ERR(context->db_tab); + mthca_uar_free(to_mdev(ibdev), &context->uar); + kfree(context); + return ERR_PTR(err); + } + + if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) { + mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab); + mthca_uar_free(to_mdev(ibdev), &context->uar); + kfree(context); + return ERR_PTR(-EFAULT); + } + + return &context->ibucontext; +} + +static int mthca_dealloc_ucontext(struct ib_ucontext *context) +{ + mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab); + mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar); + kfree(to_mucontext(context)); + + return 0; +} + +static int mthca_mmap_uar(struct ib_ucontext *context, + struct vm_area_struct *vma) +{ + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + if (remap_pfn_range(vma, vma->vm_start, + to_mucontext(context)->uar.pfn, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) { struct mthca_pd *pd; int err; @@ -292,12 +368,20 @@ static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev) if (!pd) return ERR_PTR(-ENOMEM); - err = mthca_pd_alloc(to_mdev(ibdev), pd); + err = mthca_pd_alloc(to_mdev(ibdev), !context, pd); if (err) { kfree(pd); return ERR_PTR(err); } + if (context) { + if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) { + mthca_pd_free(to_mdev(ibdev), pd); + kfree(pd); + return ERR_PTR(-EFAULT); + } + } + return &pd->ibpd; } @@ -337,8 +421,10 @@ static int mthca_ah_destroy(struct ib_ah *ah) } static struct ib_qp *mthca_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr) + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) { + struct mthca_create_qp ucmd; struct mthca_qp *qp; int err; @@ -347,41 +433,82 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, case IB_QPT_UC: case IB_QPT_UD: { + struct mthca_ucontext *context; + qp = kmalloc(sizeof *qp, GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); - qp->sq.max = init_attr->cap.max_send_wr; - qp->rq.max = init_attr->cap.max_recv_wr; - qp->sq.max_gs = init_attr->cap.max_send_sge; - qp->rq.max_gs = init_attr->cap.max_recv_sge; + if (pd->uobject) { + context = to_mucontext(pd->uobject->context); + + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + return ERR_PTR(-EFAULT); + + err = mthca_map_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, + ucmd.sq_db_index, ucmd.sq_db_page); + if (err) { + kfree(qp); + return ERR_PTR(err); + } + + err = mthca_map_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, + ucmd.rq_db_index, ucmd.rq_db_page); + if (err) { + mthca_unmap_user_db(to_mdev(pd->device), + &context->uar, + context->db_tab, + ucmd.sq_db_index); + kfree(qp); + return ERR_PTR(err); + } + + qp->mr.ibmr.lkey = ucmd.lkey; + qp->sq.db_index = ucmd.sq_db_index; + qp->rq.db_index = ucmd.rq_db_index; + } err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd), to_mcq(init_attr->send_cq), to_mcq(init_attr->recv_cq), init_attr->qp_type, init_attr->sq_sig_type, - qp); + &init_attr->cap, qp); + + if (err && pd->uobject) { + context = to_mucontext(pd->uobject->context); + + mthca_unmap_user_db(to_mdev(pd->device), + &context->uar, + context->db_tab, + ucmd.sq_db_index); + mthca_unmap_user_db(to_mdev(pd->device), + &context->uar, + context->db_tab, + ucmd.rq_db_index); + } + qp->ibqp.qp_num = qp->qpn; break; } case IB_QPT_SMI: case IB_QPT_GSI: { + /* Don't allow userspace to create special QPs */ + if (pd->uobject) + return ERR_PTR(-EINVAL); + qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); - qp->sq.max = init_attr->cap.max_send_wr; - qp->rq.max = init_attr->cap.max_recv_wr; - qp->sq.max_gs = init_attr->cap.max_send_sge; - qp->rq.max_gs = init_attr->cap.max_recv_sge; - qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd), to_mcq(init_attr->send_cq), to_mcq(init_attr->recv_cq), - init_attr->sq_sig_type, + init_attr->sq_sig_type, &init_attr->cap, qp->ibqp.qp_num, init_attr->port_num, to_msqp(qp)); break; @@ -396,42 +523,115 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, return ERR_PTR(err); } - init_attr->cap.max_inline_data = 0; + init_attr->cap.max_inline_data = 0; + init_attr->cap.max_send_wr = qp->sq.max; + init_attr->cap.max_recv_wr = qp->rq.max; + init_attr->cap.max_send_sge = qp->sq.max_gs; + init_attr->cap.max_recv_sge = qp->rq.max_gs; return &qp->ibqp; } static int mthca_destroy_qp(struct ib_qp *qp) { + if (qp->uobject) { + mthca_unmap_user_db(to_mdev(qp->device), + &to_mucontext(qp->uobject->context)->uar, + to_mucontext(qp->uobject->context)->db_tab, + to_mqp(qp)->sq.db_index); + mthca_unmap_user_db(to_mdev(qp->device), + &to_mucontext(qp->uobject->context)->uar, + to_mucontext(qp->uobject->context)->db_tab, + to_mqp(qp)->rq.db_index); + } mthca_free_qp(to_mdev(qp->device), to_mqp(qp)); kfree(qp); return 0; } -static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries) +static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, + struct ib_ucontext *context, + struct ib_udata *udata) { + struct mthca_create_cq ucmd; struct mthca_cq *cq; int nent; int err; + if (context) { + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + return ERR_PTR(-EFAULT); + + err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, + ucmd.set_db_index, ucmd.set_db_page); + if (err) + return ERR_PTR(err); + + err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, + ucmd.arm_db_index, ucmd.arm_db_page); + if (err) + goto err_unmap_set; + } + cq = kmalloc(sizeof *cq, GFP_KERNEL); - if (!cq) - return ERR_PTR(-ENOMEM); + if (!cq) { + err = -ENOMEM; + goto err_unmap_arm; + } + + if (context) { + cq->mr.ibmr.lkey = ucmd.lkey; + cq->set_ci_db_index = ucmd.set_db_index; + cq->arm_db_index = ucmd.arm_db_index; + } for (nent = 1; nent <= entries; nent <<= 1) ; /* nothing */ - err = mthca_init_cq(to_mdev(ibdev), nent, cq); - if (err) { - kfree(cq); - cq = ERR_PTR(err); + err = mthca_init_cq(to_mdev(ibdev), nent, + context ? to_mucontext(context) : NULL, + context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num, + cq); + if (err) + goto err_free; + + if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) { + mthca_free_cq(to_mdev(ibdev), cq); + goto err_free; } return &cq->ibcq; + +err_free: + kfree(cq); + +err_unmap_arm: + if (context) + mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, ucmd.arm_db_index); + +err_unmap_set: + if (context) + mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, ucmd.set_db_index); + + return ERR_PTR(err); } static int mthca_destroy_cq(struct ib_cq *cq) { + if (cq->uobject) { + mthca_unmap_user_db(to_mdev(cq->device), + &to_mucontext(cq->uobject->context)->uar, + to_mucontext(cq->uobject->context)->db_tab, + to_mcq(cq)->arm_db_index); + mthca_unmap_user_db(to_mdev(cq->device), + &to_mucontext(cq->uobject->context)->uar, + to_mucontext(cq->uobject->context)->db_tab, + to_mcq(cq)->set_ci_db_index); + } mthca_free_cq(to_mdev(cq->device), to_mcq(cq)); kfree(cq); @@ -558,6 +758,7 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, convert_access(acc), mr); if (err) { + kfree(page_list); kfree(mr); return ERR_PTR(err); } @@ -566,6 +767,87 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, return &mr->ibmr; } +static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, + int acc, struct ib_udata *udata) +{ + struct mthca_dev *dev = to_mdev(pd->device); + struct ib_umem_chunk *chunk; + struct mthca_mr *mr; + u64 *pages; + int shift, n, len; + int i, j, k; + int err = 0; + + shift = ffs(region->page_size) - 1; + + mr = kmalloc(sizeof *mr, GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + n = 0; + list_for_each_entry(chunk, ®ion->chunk_list, list) + n += chunk->nents; + + mr->mtt = mthca_alloc_mtt(dev, n); + if (IS_ERR(mr->mtt)) { + err = PTR_ERR(mr->mtt); + goto err; + } + + pages = (u64 *) __get_free_page(GFP_KERNEL); + if (!pages) { + err = -ENOMEM; + goto err_mtt; + } + + i = n = 0; + + list_for_each_entry(chunk, ®ion->chunk_list, list) + for (j = 0; j < chunk->nmap; ++j) { + len = sg_dma_len(&chunk->page_list[j]) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = sg_dma_address(&chunk->page_list[j]) + + region->page_size * k; + /* + * Be friendly to WRITE_MTT command + * and leave two empty slots for the + * index and reserved fields of the + * mailbox. + */ + if (i == PAGE_SIZE / sizeof (u64) - 2) { + err = mthca_write_mtt(dev, mr->mtt, + n, pages, i); + if (err) + goto mtt_done; + n += i; + i = 0; + } + } + } + + if (i) + err = mthca_write_mtt(dev, mr->mtt, n, pages, i); +mtt_done: + free_page((unsigned long) pages); + if (err) + goto err_mtt; + + err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base, + region->length, convert_access(acc), mr); + + if (err) + goto err_mtt; + + return &mr->ibmr; + +err_mtt: + mthca_free_mtt(dev, mr->mtt); + +err: + kfree(mr); + return ERR_PTR(err); +} + static int mthca_dereg_mr(struct ib_mr *mr) { struct mthca_mr *mmr = to_mmr(mr); @@ -690,6 +972,8 @@ int mthca_register_device(struct mthca_dev *dev) int i; strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX); + dev->ib_dev.owner = THIS_MODULE; + dev->ib_dev.node_type = IB_NODE_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.dma_device = &dev->pdev->dev; @@ -699,6 +983,9 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.modify_port = mthca_modify_port; dev->ib_dev.query_pkey = mthca_query_pkey; dev->ib_dev.query_gid = mthca_query_gid; + dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext; + dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext; + dev->ib_dev.mmap = mthca_mmap_uar; dev->ib_dev.alloc_pd = mthca_alloc_pd; dev->ib_dev.dealloc_pd = mthca_dealloc_pd; dev->ib_dev.create_ah = mthca_ah_create; @@ -711,6 +998,7 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.poll_cq = mthca_poll_cq; dev->ib_dev.get_dma_mr = mthca_get_dma_mr; dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr; + dev->ib_dev.reg_user_mr = mthca_reg_user_mr; dev->ib_dev.dereg_mr = mthca_dereg_mr; if (dev->mthca_flags & MTHCA_FLAG_FMR) { diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 619710f95a8..1d032791cc8 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -54,18 +55,26 @@ struct mthca_uar { int index; }; +struct mthca_user_db_table; + +struct mthca_ucontext { + struct ib_ucontext ibucontext; + struct mthca_uar uar; + struct mthca_user_db_table *db_tab; +}; + +struct mthca_mtt; + struct mthca_mr { - struct ib_mr ibmr; - int order; - u32 first_seg; + struct ib_mr ibmr; + struct mthca_mtt *mtt; }; struct mthca_fmr { - struct ib_fmr ibmr; + struct ib_fmr ibmr; struct ib_fmr_attr attr; - int order; - u32 first_seg; - int maps; + struct mthca_mtt *mtt; + int maps; union { struct { struct mthca_mpt_entry __iomem *mpt; @@ -83,6 +92,7 @@ struct mthca_pd { u32 pd_num; atomic_t sqp_count; struct mthca_mr ntmr; + int privileged; }; struct mthca_eq { @@ -167,6 +177,7 @@ struct mthca_cq { int cqn; u32 cons_index; int is_direct; + int is_kernel; /* Next fields are Arbel only */ int set_ci_db_index; @@ -236,6 +247,11 @@ struct mthca_sqp { dma_addr_t header_dma; }; +static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct mthca_ucontext, ibucontext); +} + static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr) { return container_of(ibmr, struct mthca_fmr, ibmr); diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index ca73bab11a0..f7126b14d5a 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -46,7 +47,9 @@ enum { MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE, MTHCA_ACK_REQ_FREQ = 10, MTHCA_FLIGHT_LIMIT = 9, - MTHCA_UD_HEADER_SIZE = 72 /* largest UD header possible */ + MTHCA_UD_HEADER_SIZE = 72, /* largest UD header possible */ + MTHCA_INLINE_HEADER_SIZE = 4, /* data segment overhead for inline */ + MTHCA_INLINE_CHUNK_SIZE = 16 /* inline data segment chunk */ }; enum { @@ -357,6 +360,9 @@ static const struct { [UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY), + [UC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), [RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), @@ -378,6 +384,9 @@ static const struct { [UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY), + [UC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), [RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), @@ -388,6 +397,11 @@ static const struct { [IB_QPS_RTR] = { .trans = MTHCA_TRANS_INIT2RTR, .req_param = { + [UC] = (IB_QP_AV | + IB_QP_PATH_MTU | + IB_QP_DEST_QPN | + IB_QP_RQ_PSN | + IB_QP_MAX_DEST_RD_ATOMIC), [RC] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | @@ -398,6 +412,9 @@ static const struct { .opt_param = { [UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), + [UC] = (IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX), [RC] = (IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX), @@ -413,6 +430,8 @@ static const struct { .trans = MTHCA_TRANS_RTR2RTS, .req_param = { [UD] = IB_QP_SQ_PSN, + [UC] = (IB_QP_SQ_PSN | + IB_QP_MAX_QP_RD_ATOMIC), [RC] = (IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | @@ -423,6 +442,11 @@ static const struct { .opt_param = { [UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), + [UC] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | + IB_QP_PATH_MIG_STATE), [RC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | @@ -442,6 +466,9 @@ static const struct { .opt_param = { [UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), + [UC] = (IB_QP_ACCESS_FLAGS | + IB_QP_ALT_PATH | + IB_QP_PATH_MIG_STATE), [RC] = (IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE | @@ -462,6 +489,10 @@ static const struct { .opt_param = { [UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), + [UC] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PATH_MIG_STATE), [RC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | @@ -476,6 +507,14 @@ static const struct { .opt_param = { [UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), + [UC] = (IB_QP_AV | + IB_QP_MAX_QP_RD_ATOMIC | + IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | + IB_QP_PATH_MIG_STATE), [RC] = (IB_QP_AV | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | @@ -501,6 +540,7 @@ static const struct { .opt_param = { [UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), + [UC] = (IB_QP_CUR_STATE), [RC] = (IB_QP_CUR_STATE | IB_QP_MIN_RNR_TIMER), [MLX] = (IB_QP_CUR_STATE | @@ -552,7 +592,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); enum ib_qp_state cur_state, new_state; - void *mailbox = NULL; + struct mthca_mailbox *mailbox; struct mthca_qp_param *qp_param; struct mthca_qp_context *qp_context; u32 req_param, opt_param; @@ -609,10 +649,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) return -EINVAL; } - mailbox = kmalloc(sizeof (*qp_param) + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL); - if (!mailbox) - return -ENOMEM; - qp_param = MAILBOX_ALIGN(mailbox); + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + qp_param = mailbox->buf; qp_context = &qp_param->context; memset(qp_param, 0, sizeof *qp_param); @@ -652,7 +692,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) /* leave arbel_sched_queue as 0 */ - qp_context->usr_page = cpu_to_be32(dev->driver_uar.index); + if (qp->ibqp.uobject) + qp_context->usr_page = + cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index); + else + qp_context->usr_page = cpu_to_be32(dev->driver_uar.index); qp_context->local_qpn = cpu_to_be32(qp->qpn); if (attr_mask & IB_QP_DEST_QPN) { qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num); @@ -683,7 +727,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) if (attr_mask & IB_QP_AV) { qp_context->pri_path.g_mylmc = attr->ah_attr.src_path_bits & 0x7f; qp_context->pri_path.rlid = cpu_to_be16(attr->ah_attr.dlid); - qp_context->pri_path.static_rate = (!!attr->ah_attr.static_rate) << 3; + qp_context->pri_path.static_rate = !!attr->ah_attr.static_rate; if (attr->ah_attr.ah_flags & IB_AH_GRH) { qp_context->pri_path.g_mylmc |= 1 << 7; qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index; @@ -724,9 +768,9 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT); } - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { - qp_context->params1 |= cpu_to_be32(min(attr->max_dest_rd_atomic ? - ffs(attr->max_dest_rd_atomic) - 1 : 0, + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + qp_context->params1 |= cpu_to_be32(min(attr->max_rd_atomic ? + ffs(attr->max_rd_atomic) - 1 : 0, 7) << 21); qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX); } @@ -764,10 +808,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp->atomic_rd_en = attr->qp_access_flags; } - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { u8 rra_max; - if (qp->resp_depth && !attr->max_rd_atomic) { + if (qp->resp_depth && !attr->max_dest_rd_atomic) { /* * Lowering our responder resources to zero. * Turn off RDMA/atomics as responder. @@ -778,7 +822,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) MTHCA_QP_OPTPAR_RAE); } - if (!qp->resp_depth && attr->max_rd_atomic) { + if (!qp->resp_depth && attr->max_dest_rd_atomic) { /* * Increasing our responder resources from * zero. Turn on RDMA/atomics as appropriate. @@ -799,7 +843,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) } for (rra_max = 0; - 1 << rra_max < attr->max_rd_atomic && + 1 << rra_max < attr->max_dest_rd_atomic && rra_max < dev->qp_table.rdb_shift; ++rra_max) ; /* nothing */ @@ -807,7 +851,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_context->params2 |= cpu_to_be32(rra_max << 21); qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX); - qp->resp_depth = attr->max_rd_atomic; + qp->resp_depth = attr->max_dest_rd_atomic; } qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC); @@ -835,7 +879,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) } err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans, - qp->qpn, 0, qp_param, 0, &status); + qp->qpn, 0, mailbox, 0, &status); if (status) { mthca_warn(dev, "modify QP %d returned status %02x.\n", state_table[cur_state][new_state].trans, status); @@ -845,7 +889,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) if (!err) qp->state = new_state; - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); if (is_sqp(dev, qp)) store_attrs(to_msqp(qp), attr, attr_mask); @@ -917,6 +961,15 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift, 1 << qp->sq.wqe_shift); + + /* + * If this is a userspace QP, we don't actually have to + * allocate anything. All we need is to calculate the WQE + * sizes and the send_wqe_offset, so we're done now. + */ + if (pd->ibpd.uobject) + return 0; + size = PAGE_ALIGN(qp->send_wqe_offset + (qp->sq.max << qp->sq.wqe_shift)); @@ -934,7 +987,8 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, mthca_dbg(dev, "Creating direct QP of size %d (shift %d)\n", size, shift); - qp->queue.direct.buf = pci_alloc_consistent(dev->pdev, size, &t); + qp->queue.direct.buf = dma_alloc_coherent(&dev->pdev->dev, size, + &t, GFP_KERNEL); if (!qp->queue.direct.buf) goto err_out; @@ -973,7 +1027,8 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, for (i = 0; i < npages; ++i) { qp->queue.page_list[i].buf = - pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t); + dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, + &t, GFP_KERNEL); if (!qp->queue.page_list[i].buf) goto err_out_free; @@ -996,16 +1051,15 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, err_out_free: if (qp->is_direct) { - pci_free_consistent(dev->pdev, size, - qp->queue.direct.buf, - pci_unmap_addr(&qp->queue.direct, mapping)); + dma_free_coherent(&dev->pdev->dev, size, qp->queue.direct.buf, + pci_unmap_addr(&qp->queue.direct, mapping)); } else for (i = 0; i < npages; ++i) { if (qp->queue.page_list[i].buf) - pci_free_consistent(dev->pdev, PAGE_SIZE, - qp->queue.page_list[i].buf, - pci_unmap_addr(&qp->queue.page_list[i], - mapping)); + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + qp->queue.page_list[i].buf, + pci_unmap_addr(&qp->queue.page_list[i], + mapping)); } @@ -1015,10 +1069,32 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, return err; } -static int mthca_alloc_memfree(struct mthca_dev *dev, +static void mthca_free_wqe_buf(struct mthca_dev *dev, struct mthca_qp *qp) { - int ret = 0; + int i; + int size = PAGE_ALIGN(qp->send_wqe_offset + + (qp->sq.max << qp->sq.wqe_shift)); + + if (qp->is_direct) { + dma_free_coherent(&dev->pdev->dev, size, qp->queue.direct.buf, + pci_unmap_addr(&qp->queue.direct, mapping)); + } else { + for (i = 0; i < size / PAGE_SIZE; ++i) { + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + qp->queue.page_list[i].buf, + pci_unmap_addr(&qp->queue.page_list[i], + mapping)); + } + } + + kfree(qp->wrid); +} + +static int mthca_map_memfree(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + int ret; if (mthca_is_memfree(dev)) { ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn); @@ -1029,35 +1105,15 @@ static int mthca_alloc_memfree(struct mthca_dev *dev, if (ret) goto err_qpc; - ret = mthca_table_get(dev, dev->qp_table.rdb_table, - qp->qpn << dev->qp_table.rdb_shift); - if (ret) - goto err_eqpc; - - qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ, - qp->qpn, &qp->rq.db); - if (qp->rq.db_index < 0) { - ret = -ENOMEM; - goto err_rdb; - } + ret = mthca_table_get(dev, dev->qp_table.rdb_table, + qp->qpn << dev->qp_table.rdb_shift); + if (ret) + goto err_eqpc; - qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ, - qp->qpn, &qp->sq.db); - if (qp->sq.db_index < 0) { - ret = -ENOMEM; - goto err_rq_db; - } } return 0; -err_rq_db: - mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); - -err_rdb: - mthca_table_put(dev, dev->qp_table.rdb_table, - qp->qpn << dev->qp_table.rdb_shift); - err_eqpc: mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn); @@ -1067,16 +1123,41 @@ err_qpc: return ret; } +static void mthca_unmap_memfree(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + mthca_table_put(dev, dev->qp_table.rdb_table, + qp->qpn << dev->qp_table.rdb_shift); + mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn); + mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn); +} + +static int mthca_alloc_memfree(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + int ret = 0; + + if (mthca_is_memfree(dev)) { + qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ, + qp->qpn, &qp->rq.db); + if (qp->rq.db_index < 0) + return ret; + + qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ, + qp->qpn, &qp->sq.db); + if (qp->sq.db_index < 0) + mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); + } + + return ret; +} + static void mthca_free_memfree(struct mthca_dev *dev, struct mthca_qp *qp) { if (mthca_is_memfree(dev)) { mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index); mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); - mthca_table_put(dev, dev->qp_table.rdb_table, - qp->qpn << dev->qp_table.rdb_shift); - mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn); - mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn); } } @@ -1108,13 +1189,28 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, mthca_wq_init(&qp->sq); mthca_wq_init(&qp->rq); - ret = mthca_alloc_memfree(dev, qp); + ret = mthca_map_memfree(dev, qp); if (ret) return ret; ret = mthca_alloc_wqe_buf(dev, pd, qp); if (ret) { - mthca_free_memfree(dev, qp); + mthca_unmap_memfree(dev, qp); + return ret; + } + + /* + * If this is a userspace QP, we're done now. The doorbells + * will be allocated and buffers will be initialized in + * userspace. + */ + if (pd->ibpd.uobject) + return 0; + + ret = mthca_alloc_memfree(dev, qp); + if (ret) { + mthca_free_wqe_buf(dev, qp); + mthca_unmap_memfree(dev, qp); return ret; } @@ -1147,22 +1243,39 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, return 0; } -static void mthca_align_qp_size(struct mthca_dev *dev, struct mthca_qp *qp) +static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, + struct mthca_qp *qp) { - int i; - - if (!mthca_is_memfree(dev)) - return; + /* Sanity check QP size before proceeding */ + if (cap->max_send_wr > 65536 || cap->max_recv_wr > 65536 || + cap->max_send_sge > 64 || cap->max_recv_sge > 64) + return -EINVAL; - for (i = 0; 1 << i < qp->rq.max; ++i) - ; /* nothing */ + if (mthca_is_memfree(dev)) { + qp->rq.max = cap->max_recv_wr ? + roundup_pow_of_two(cap->max_recv_wr) : 0; + qp->sq.max = cap->max_send_wr ? + roundup_pow_of_two(cap->max_send_wr) : 0; + } else { + qp->rq.max = cap->max_recv_wr; + qp->sq.max = cap->max_send_wr; + } - qp->rq.max = 1 << i; + qp->rq.max_gs = cap->max_recv_sge; + qp->sq.max_gs = max_t(int, cap->max_send_sge, + ALIGN(cap->max_inline_data + MTHCA_INLINE_HEADER_SIZE, + MTHCA_INLINE_CHUNK_SIZE) / + sizeof (struct mthca_data_seg)); - for (i = 0; 1 << i < qp->sq.max; ++i) - ; /* nothing */ + /* + * For MLX transport we need 2 extra S/G entries: + * one for the header and one for the checksum at the end + */ + if ((qp->transport == MLX && qp->sq.max_gs + 2 > dev->limits.max_sg) || + qp->sq.max_gs > dev->limits.max_sg || qp->rq.max_gs > dev->limits.max_sg) + return -EINVAL; - qp->sq.max = 1 << i; + return 0; } int mthca_alloc_qp(struct mthca_dev *dev, @@ -1171,11 +1284,14 @@ int mthca_alloc_qp(struct mthca_dev *dev, struct mthca_cq *recv_cq, enum ib_qp_type type, enum ib_sig_type send_policy, + struct ib_qp_cap *cap, struct mthca_qp *qp) { int err; - mthca_align_qp_size(dev, qp); + err = mthca_set_qp_size(dev, cap, qp); + if (err) + return err; switch (type) { case IB_QPT_RC: qp->transport = RC; break; @@ -1208,14 +1324,17 @@ int mthca_alloc_sqp(struct mthca_dev *dev, struct mthca_cq *send_cq, struct mthca_cq *recv_cq, enum ib_sig_type send_policy, + struct ib_qp_cap *cap, int qpn, int port, struct mthca_sqp *sqp) { - int err = 0; u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; + int err; - mthca_align_qp_size(dev, &sqp->qp); + err = mthca_set_qp_size(dev, cap, &sqp->qp); + if (err) + return err; sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE; sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size, @@ -1274,8 +1393,6 @@ void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp) { u8 status; - int size; - int i; struct mthca_cq *send_cq; struct mthca_cq *recv_cq; @@ -1305,31 +1422,22 @@ void mthca_free_qp(struct mthca_dev *dev, if (qp->state != IB_QPS_RESET) mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status); - mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn); - if (qp->ibqp.send_cq != qp->ibqp.recv_cq) - mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn); - - mthca_free_mr(dev, &qp->mr); - - size = PAGE_ALIGN(qp->send_wqe_offset + - (qp->sq.max << qp->sq.wqe_shift)); + /* + * If this is a userspace QP, the buffers, MR, CQs and so on + * will be cleaned up in userspace, so all we have to do is + * unref the mem-free tables and free the QPN in our table. + */ + if (!qp->ibqp.uobject) { + mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn); + if (qp->ibqp.send_cq != qp->ibqp.recv_cq) + mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn); - if (qp->is_direct) { - pci_free_consistent(dev->pdev, size, - qp->queue.direct.buf, - pci_unmap_addr(&qp->queue.direct, mapping)); - } else { - for (i = 0; i < size / PAGE_SIZE; ++i) { - pci_free_consistent(dev->pdev, PAGE_SIZE, - qp->queue.page_list[i].buf, - pci_unmap_addr(&qp->queue.page_list[i], - mapping)); - } + mthca_free_mr(dev, &qp->mr); + mthca_free_memfree(dev, qp); + mthca_free_wqe_buf(dev, qp); } - kfree(qp->wrid); - - mthca_free_memfree(dev, qp); + mthca_unmap_memfree(dev, qp); if (is_sqp(dev, qp)) { atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count)); @@ -1529,6 +1637,26 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; + case UC: + switch (wr->opcode) { + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + ((struct mthca_raddr_seg *) wqe)->raddr = + cpu_to_be64(wr->wr.rdma.remote_addr); + ((struct mthca_raddr_seg *) wqe)->rkey = + cpu_to_be32(wr->wr.rdma.rkey); + ((struct mthca_raddr_seg *) wqe)->reserved = 0; + wqe += sizeof (struct mthca_raddr_seg); + size += sizeof (struct mthca_raddr_seg) / 16; + break; + + default: + /* No extra segments required for sends */ + break; + } + + break; + case UD: ((struct mthca_tavor_ud_seg *) wqe)->lkey = cpu_to_be32(to_mah(wr->wr.ud.ah)->key); @@ -1814,9 +1942,29 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, sizeof (struct mthca_atomic_seg); break; + case IB_WR_RDMA_READ: + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + ((struct mthca_raddr_seg *) wqe)->raddr = + cpu_to_be64(wr->wr.rdma.remote_addr); + ((struct mthca_raddr_seg *) wqe)->rkey = + cpu_to_be32(wr->wr.rdma.rkey); + ((struct mthca_raddr_seg *) wqe)->reserved = 0; + wqe += sizeof (struct mthca_raddr_seg); + size += sizeof (struct mthca_raddr_seg) / 16; + break; + + default: + /* No extra segments required for sends */ + break; + } + + break; + + case UC: + switch (wr->opcode) { case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - case IB_WR_RDMA_READ: ((struct mthca_raddr_seg *) wqe)->raddr = cpu_to_be64(wr->wr.rdma.remote_addr); ((struct mthca_raddr_seg *) wqe)->rkey = diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h new file mode 100644 index 00000000000..3024c1b4547 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_user.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef MTHCA_USER_H +#define MTHCA_USER_H + +#include <linux/types.h> + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ + +struct mthca_alloc_ucontext_resp { + __u32 qp_tab_size; + __u32 uarc_size; +}; + +struct mthca_alloc_pd_resp { + __u32 pdn; + __u32 reserved; +}; + +struct mthca_create_cq { + __u32 lkey; + __u32 pdn; + __u64 arm_db_page; + __u64 set_db_page; + __u32 arm_db_index; + __u32 set_db_index; +}; + +struct mthca_create_cq_resp { + __u32 cqn; + __u32 reserved; +}; + +struct mthca_create_qp { + __u32 lkey; + __u32 reserved; + __u64 sq_db_page; + __u64 rq_db_page; + __u32 sq_db_index; + __u32 rq_db_index; +}; + +#endif /* MTHCA_USER_H */ diff --git a/drivers/infiniband/include/ib_user_verbs.h b/drivers/infiniband/include/ib_user_verbs.h new file mode 100644 index 00000000000..7c613706af7 --- /dev/null +++ b/drivers/infiniband/include/ib_user_verbs.h @@ -0,0 +1,389 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_user_verbs.h 2708 2005-06-24 17:27:21Z roland $ + */ + +#ifndef IB_USER_VERBS_H +#define IB_USER_VERBS_H + +#include <linux/types.h> + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define IB_USER_VERBS_ABI_VERSION 1 + +enum { + IB_USER_VERBS_CMD_QUERY_PARAMS, + IB_USER_VERBS_CMD_GET_CONTEXT, + IB_USER_VERBS_CMD_QUERY_DEVICE, + IB_USER_VERBS_CMD_QUERY_PORT, + IB_USER_VERBS_CMD_QUERY_GID, + IB_USER_VERBS_CMD_QUERY_PKEY, + IB_USER_VERBS_CMD_ALLOC_PD, + IB_USER_VERBS_CMD_DEALLOC_PD, + IB_USER_VERBS_CMD_CREATE_AH, + IB_USER_VERBS_CMD_MODIFY_AH, + IB_USER_VERBS_CMD_QUERY_AH, + IB_USER_VERBS_CMD_DESTROY_AH, + IB_USER_VERBS_CMD_REG_MR, + IB_USER_VERBS_CMD_REG_SMR, + IB_USER_VERBS_CMD_REREG_MR, + IB_USER_VERBS_CMD_QUERY_MR, + IB_USER_VERBS_CMD_DEREG_MR, + IB_USER_VERBS_CMD_ALLOC_MW, + IB_USER_VERBS_CMD_BIND_MW, + IB_USER_VERBS_CMD_DEALLOC_MW, + IB_USER_VERBS_CMD_CREATE_CQ, + IB_USER_VERBS_CMD_RESIZE_CQ, + IB_USER_VERBS_CMD_DESTROY_CQ, + IB_USER_VERBS_CMD_POLL_CQ, + IB_USER_VERBS_CMD_PEEK_CQ, + IB_USER_VERBS_CMD_REQ_NOTIFY_CQ, + IB_USER_VERBS_CMD_CREATE_QP, + IB_USER_VERBS_CMD_QUERY_QP, + IB_USER_VERBS_CMD_MODIFY_QP, + IB_USER_VERBS_CMD_DESTROY_QP, + IB_USER_VERBS_CMD_POST_SEND, + IB_USER_VERBS_CMD_POST_RECV, + IB_USER_VERBS_CMD_ATTACH_MCAST, + IB_USER_VERBS_CMD_DETACH_MCAST +}; + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ + +struct ib_uverbs_async_event_desc { + __u64 element; + __u32 event_type; /* enum ib_event_type */ + __u32 reserved; +}; + +struct ib_uverbs_comp_event_desc { + __u64 cq_handle; +}; + +/* + * All commands from userspace should start with a __u32 command field + * followed by __u16 in_words and out_words fields (which give the + * length of the command block and response buffer if any in 32-bit + * words). The kernel driver will read these fields first and read + * the rest of the command struct based on these value. + */ + +struct ib_uverbs_cmd_hdr { + __u32 command; + __u16 in_words; + __u16 out_words; +}; + +/* + * No driver_data for "query params" command, since this is intended + * to be a core function with no possible device dependence. + */ +struct ib_uverbs_query_params { + __u64 response; +}; + +struct ib_uverbs_query_params_resp { + __u32 num_cq_events; +}; + +struct ib_uverbs_get_context { + __u64 response; + __u64 cq_fd_tab; + __u64 driver_data[0]; +}; + +struct ib_uverbs_get_context_resp { + __u32 async_fd; + __u32 reserved; +}; + +struct ib_uverbs_query_device { + __u64 response; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_device_resp { + __u64 fw_ver; + __u64 node_guid; + __u64 sys_image_guid; + __u64 max_mr_size; + __u64 page_size_cap; + __u32 vendor_id; + __u32 vendor_part_id; + __u32 hw_ver; + __u32 max_qp; + __u32 max_qp_wr; + __u32 device_cap_flags; + __u32 max_sge; + __u32 max_sge_rd; + __u32 max_cq; + __u32 max_cqe; + __u32 max_mr; + __u32 max_pd; + __u32 max_qp_rd_atom; + __u32 max_ee_rd_atom; + __u32 max_res_rd_atom; + __u32 max_qp_init_rd_atom; + __u32 max_ee_init_rd_atom; + __u32 atomic_cap; + __u32 max_ee; + __u32 max_rdd; + __u32 max_mw; + __u32 max_raw_ipv6_qp; + __u32 max_raw_ethy_qp; + __u32 max_mcast_grp; + __u32 max_mcast_qp_attach; + __u32 max_total_mcast_qp_attach; + __u32 max_ah; + __u32 max_fmr; + __u32 max_map_per_fmr; + __u32 max_srq; + __u32 max_srq_wr; + __u32 max_srq_sge; + __u16 max_pkeys; + __u8 local_ca_ack_delay; + __u8 phys_port_cnt; + __u8 reserved[4]; +}; + +struct ib_uverbs_query_port { + __u64 response; + __u8 port_num; + __u8 reserved[7]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_port_resp { + __u32 port_cap_flags; + __u32 max_msg_sz; + __u32 bad_pkey_cntr; + __u32 qkey_viol_cntr; + __u32 gid_tbl_len; + __u16 pkey_tbl_len; + __u16 lid; + __u16 sm_lid; + __u8 state; + __u8 max_mtu; + __u8 active_mtu; + __u8 lmc; + __u8 max_vl_num; + __u8 sm_sl; + __u8 subnet_timeout; + __u8 init_type_reply; + __u8 active_width; + __u8 active_speed; + __u8 phys_state; + __u8 reserved[3]; +}; + +struct ib_uverbs_query_gid { + __u64 response; + __u8 port_num; + __u8 index; + __u8 reserved[6]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_gid_resp { + __u8 gid[16]; +}; + +struct ib_uverbs_query_pkey { + __u64 response; + __u8 port_num; + __u8 index; + __u8 reserved[6]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_pkey_resp { + __u16 pkey; + __u16 reserved; +}; + +struct ib_uverbs_alloc_pd { + __u64 response; + __u64 driver_data[0]; +}; + +struct ib_uverbs_alloc_pd_resp { + __u32 pd_handle; +}; + +struct ib_uverbs_dealloc_pd { + __u32 pd_handle; +}; + +struct ib_uverbs_reg_mr { + __u64 response; + __u64 start; + __u64 length; + __u64 hca_va; + __u32 pd_handle; + __u32 access_flags; + __u64 driver_data[0]; +}; + +struct ib_uverbs_reg_mr_resp { + __u32 mr_handle; + __u32 lkey; + __u32 rkey; +}; + +struct ib_uverbs_dereg_mr { + __u32 mr_handle; +}; + +struct ib_uverbs_create_cq { + __u64 response; + __u64 user_handle; + __u32 cqe; + __u32 event_handler; + __u64 driver_data[0]; +}; + +struct ib_uverbs_create_cq_resp { + __u32 cq_handle; + __u32 cqe; +}; + +struct ib_uverbs_destroy_cq { + __u32 cq_handle; +}; + +struct ib_uverbs_create_qp { + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 send_cq_handle; + __u32 recv_cq_handle; + __u32 srq_handle; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + __u8 sq_sig_all; + __u8 qp_type; + __u8 is_srq; + __u8 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_create_qp_resp { + __u32 qp_handle; + __u32 qpn; +}; + +/* + * This struct needs to remain a multiple of 8 bytes to keep the + * alignment of the modify QP parameters. + */ +struct ib_uverbs_qp_dest { + __u8 dgid[16]; + __u32 flow_label; + __u16 dlid; + __u16 reserved; + __u8 sgid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 sl; + __u8 src_path_bits; + __u8 static_rate; + __u8 is_global; + __u8 port_num; +}; + +struct ib_uverbs_modify_qp { + struct ib_uverbs_qp_dest dest; + struct ib_uverbs_qp_dest alt_dest; + __u32 qp_handle; + __u32 attr_mask; + __u32 qkey; + __u32 rq_psn; + __u32 sq_psn; + __u32 dest_qp_num; + __u32 qp_access_flags; + __u16 pkey_index; + __u16 alt_pkey_index; + __u8 qp_state; + __u8 cur_qp_state; + __u8 path_mtu; + __u8 path_mig_state; + __u8 en_sqd_async_notify; + __u8 max_rd_atomic; + __u8 max_dest_rd_atomic; + __u8 min_rnr_timer; + __u8 port_num; + __u8 timeout; + __u8 retry_cnt; + __u8 rnr_retry; + __u8 alt_port_num; + __u8 alt_timeout; + __u8 reserved[2]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_modify_qp_resp { +}; + +struct ib_uverbs_destroy_qp { + __u32 qp_handle; +}; + +struct ib_uverbs_attach_mcast { + __u8 gid[16]; + __u32 qp_handle; + __u16 mlid; + __u16 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_detach_mcast { + __u8 gid[16]; + __u32 qp_handle; + __u16 mlid; + __u16 reserved; + __u64 driver_data[0]; +}; + +#endif /* IB_USER_VERBS_H */ diff --git a/drivers/infiniband/include/ib_verbs.h b/drivers/infiniband/include/ib_verbs.h index cf01f044a22..e5bd9a10c20 100644 --- a/drivers/infiniband/include/ib_verbs.h +++ b/drivers/infiniband/include/ib_verbs.h @@ -4,6 +4,7 @@ * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -41,7 +42,10 @@ #include <linux/types.h> #include <linux/device.h> + #include <asm/atomic.h> +#include <asm/scatterlist.h> +#include <asm/uaccess.h> union ib_gid { u8 raw[16]; @@ -544,7 +548,7 @@ struct ib_send_wr { int num_sge; enum ib_wr_opcode opcode; int send_flags; - u32 imm_data; + __be32 imm_data; union { struct { u64 remote_addr; @@ -618,29 +622,86 @@ struct ib_fmr_attr { u8 page_size; }; +struct ib_ucontext { + struct ib_device *device; + struct list_head pd_list; + struct list_head mr_list; + struct list_head mw_list; + struct list_head cq_list; + struct list_head qp_list; + struct list_head srq_list; + struct list_head ah_list; + spinlock_t lock; +}; + +struct ib_uobject { + u64 user_handle; /* handle given to us by userspace */ + struct ib_ucontext *context; /* associated user context */ + struct list_head list; /* link to context's list */ + u32 id; /* index into kernel idr */ +}; + +struct ib_umem { + unsigned long user_base; + unsigned long virt_base; + size_t length; + int offset; + int page_size; + int writable; + struct list_head chunk_list; +}; + +struct ib_umem_chunk { + struct list_head list; + int nents; + int nmap; + struct scatterlist page_list[0]; +}; + +struct ib_udata { + void __user *inbuf; + void __user *outbuf; + size_t inlen; + size_t outlen; +}; + +#define IB_UMEM_MAX_PAGE_CHUNK \ + ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \ + ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \ + (void *) &((struct ib_umem_chunk *) 0)->page_list[0])) + +struct ib_umem_object { + struct ib_uobject uobject; + struct ib_umem umem; +}; + struct ib_pd { - struct ib_device *device; - atomic_t usecnt; /* count all resources */ + struct ib_device *device; + struct ib_uobject *uobject; + atomic_t usecnt; /* count all resources */ }; struct ib_ah { struct ib_device *device; struct ib_pd *pd; + struct ib_uobject *uobject; }; typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); struct ib_cq { - struct ib_device *device; - ib_comp_handler comp_handler; - void (*event_handler)(struct ib_event *, void *); - void * cq_context; - int cqe; - atomic_t usecnt; /* count number of work queues */ + struct ib_device *device; + struct ib_uobject *uobject; + ib_comp_handler comp_handler; + void (*event_handler)(struct ib_event *, void *); + void * cq_context; + int cqe; + atomic_t usecnt; /* count number of work queues */ }; struct ib_srq { struct ib_device *device; + struct ib_uobject *uobject; struct ib_pd *pd; void *srq_context; atomic_t usecnt; @@ -652,6 +713,7 @@ struct ib_qp { struct ib_cq *send_cq; struct ib_cq *recv_cq; struct ib_srq *srq; + struct ib_uobject *uobject; void (*event_handler)(struct ib_event *, void *); void *qp_context; u32 qp_num; @@ -659,16 +721,18 @@ struct ib_qp { }; struct ib_mr { - struct ib_device *device; - struct ib_pd *pd; - u32 lkey; - u32 rkey; - atomic_t usecnt; /* count number of MWs */ + struct ib_device *device; + struct ib_pd *pd; + struct ib_uobject *uobject; + u32 lkey; + u32 rkey; + atomic_t usecnt; /* count number of MWs */ }; struct ib_mw { struct ib_device *device; struct ib_pd *pd; + struct ib_uobject *uobject; u32 rkey; }; @@ -737,7 +801,14 @@ struct ib_device { int (*modify_port)(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify); - struct ib_pd * (*alloc_pd)(struct ib_device *device); + struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device, + struct ib_udata *udata); + int (*dealloc_ucontext)(struct ib_ucontext *context); + int (*mmap)(struct ib_ucontext *context, + struct vm_area_struct *vma); + struct ib_pd * (*alloc_pd)(struct ib_device *device, + struct ib_ucontext *context, + struct ib_udata *udata); int (*dealloc_pd)(struct ib_pd *pd); struct ib_ah * (*create_ah)(struct ib_pd *pd, struct ib_ah_attr *ah_attr); @@ -747,7 +818,8 @@ struct ib_device { struct ib_ah_attr *ah_attr); int (*destroy_ah)(struct ib_ah *ah); struct ib_qp * (*create_qp)(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr); + struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata); int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask); @@ -762,8 +834,9 @@ struct ib_device { int (*post_recv)(struct ib_qp *qp, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr); - struct ib_cq * (*create_cq)(struct ib_device *device, - int cqe); + struct ib_cq * (*create_cq)(struct ib_device *device, int cqe, + struct ib_ucontext *context, + struct ib_udata *udata); int (*destroy_cq)(struct ib_cq *cq); int (*resize_cq)(struct ib_cq *cq, int *cqe); int (*poll_cq)(struct ib_cq *cq, int num_entries, @@ -780,6 +853,10 @@ struct ib_device { int num_phys_buf, int mr_access_flags, u64 *iova_start); + struct ib_mr * (*reg_user_mr)(struct ib_pd *pd, + struct ib_umem *region, + int mr_access_flags, + struct ib_udata *udata); int (*query_mr)(struct ib_mr *mr, struct ib_mr_attr *mr_attr); int (*dereg_mr)(struct ib_mr *mr); @@ -817,6 +894,7 @@ struct ib_device { struct ib_mad *in_mad, struct ib_mad *out_mad); + struct module *owner; struct class_device class_dev; struct kobject ports_parent; struct list_head port_list; @@ -852,6 +930,16 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client); void ib_set_client_data(struct ib_device *device, struct ib_client *client, void *data); +static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) +{ + return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0; +} + +static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len) +{ + return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0; +} + int ib_register_event_handler (struct ib_event_handler *event_handler); int ib_unregister_event_handler(struct ib_event_handler *event_handler); void ib_dispatch_event(struct ib_event *event); |