aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig10
-rw-r--r--drivers/infiniband/core/Makefile5
-rw-r--r--drivers/infiniband/core/packer.c4
-rw-r--r--drivers/infiniband/core/sa_query.c18
-rw-r--r--drivers/infiniband/core/uverbs.h132
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c1006
-rw-r--r--drivers/infiniband/core/uverbs_main.c698
-rw-r--r--drivers/infiniband/core/uverbs_mem.c221
-rw-r--r--drivers/infiniband/core/verbs.c32
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c531
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.h48
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c177
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h18
-rw-r--r--drivers/infiniband/hw/mthca/mthca_doorbell.h1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c58
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c34
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mcg.c63
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c151
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h14
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c367
-rw-r--r--drivers/infiniband/hw/mthca/mthca_pd.c24
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c334
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h30
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c344
-rw-r--r--drivers/infiniband/hw/mthca/mthca_user.h81
-rw-r--r--drivers/infiniband/include/ib_user_verbs.h389
-rw-r--r--drivers/infiniband/include/ib_verbs.h124
28 files changed, 4055 insertions, 860 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 3cc3ff0cccb..79c8e2dd9c3 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -7,6 +7,16 @@ config INFINIBAND
any protocols you wish to use as well as drivers for your
InfiniBand hardware.
+config INFINIBAND_USER_VERBS
+ tristate "InfiniBand userspace verbs support"
+ depends on INFINIBAND
+ ---help---
+ Userspace InfiniBand verbs support. This is the kernel side
+ of userspace verbs, which allows userspace processes to
+ directly access InfiniBand hardware for fast-path
+ operations. You will also need libibverbs and a hardware
+ driver library from <http://www.openib.org>.
+
source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/ulp/ipoib/Kconfig"
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index d2dbfb52c0a..e1a7cf3e863 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -1,6 +1,7 @@
EXTRA_CFLAGS += -Idrivers/infiniband/include
-obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o ib_umad.o
+obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o ib_umad.o
+obj-$(CONFIG_INFINIBAND_USER_VERBS) += ib_uverbs.o
ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
device.o fmr_pool.o cache.o
@@ -10,3 +11,5 @@ ib_mad-y := mad.o smi.o agent.o
ib_sa-y := sa_query.o
ib_umad-y := user_mad.o
+
+ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o
diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c
index 5f15feffeae..eb5ff54c10d 100644
--- a/drivers/infiniband/core/packer.c
+++ b/drivers/infiniband/core/packer.c
@@ -96,7 +96,7 @@ void ib_pack(const struct ib_field *desc,
else
val = 0;
- mask = cpu_to_be64(((1ull << desc[i].size_bits) - 1) << shift);
+ mask = cpu_to_be64((~0ull >> (64 - desc[i].size_bits)) << shift);
addr = (__be64 *) ((__be32 *) buf + desc[i].offset_words);
*addr = (*addr & ~mask) | (cpu_to_be64(val) & mask);
} else {
@@ -176,7 +176,7 @@ void ib_unpack(const struct ib_field *desc,
__be64 *addr;
shift = 64 - desc[i].offset_bits - desc[i].size_bits;
- mask = ((1ull << desc[i].size_bits) - 1) << shift;
+ mask = (~0ull >> (64 - desc[i].size_bits)) << shift;
addr = (__be64 *) buf + desc[i].offset_words;
val = (be64_to_cpup(addr) & mask) >> shift;
value_write(desc[i].struct_offset_bytes,
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 276e1a53010..5a08e81fa82 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -507,7 +507,13 @@ retry:
spin_unlock_irqrestore(&idr_lock, flags);
}
- return ret;
+ /*
+ * It's not safe to dereference query any more, because the
+ * send may already have completed and freed the query in
+ * another context. So use wr.wr_id, which has a copy of the
+ * query's id.
+ */
+ return ret ? ret : wr.wr_id;
}
static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
@@ -598,14 +604,15 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
rec, query->sa_query.mad->data);
*sa_query = &query->sa_query;
+
ret = send_mad(&query->sa_query, timeout_ms);
- if (ret) {
+ if (ret < 0) {
*sa_query = NULL;
kfree(query->sa_query.mad);
kfree(query);
}
- return ret ? ret : query->sa_query.id;
+ return ret;
}
EXPORT_SYMBOL(ib_sa_path_rec_get);
@@ -674,14 +681,15 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
rec, query->sa_query.mad->data);
*sa_query = &query->sa_query;
+
ret = send_mad(&query->sa_query, timeout_ms);
- if (ret) {
+ if (ret < 0) {
*sa_query = NULL;
kfree(query->sa_query.mad);
kfree(query);
}
- return ret ? ret : query->sa_query.id;
+ return ret;
}
EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
new file mode 100644
index 00000000000..57347f1e82c
--- /dev/null
+++ b/drivers/infiniband/core/uverbs.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: uverbs.h 2559 2005-06-06 19:43:16Z roland $
+ */
+
+#ifndef UVERBS_H
+#define UVERBS_H
+
+/* Include device.h and fs.h until cdev.h is self-sufficient */
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+#include <linux/kref.h>
+#include <linux/idr.h>
+
+#include <ib_verbs.h>
+#include <ib_user_verbs.h>
+
+struct ib_uverbs_device {
+ int devnum;
+ struct cdev dev;
+ struct class_device class_dev;
+ struct ib_device *ib_dev;
+ int num_comp;
+};
+
+struct ib_uverbs_event_file {
+ struct kref ref;
+ struct ib_uverbs_file *uverbs_file;
+ spinlock_t lock;
+ int fd;
+ int is_async;
+ wait_queue_head_t poll_wait;
+ struct list_head event_list;
+};
+
+struct ib_uverbs_file {
+ struct kref ref;
+ struct ib_uverbs_device *device;
+ struct ib_ucontext *ucontext;
+ struct ib_event_handler event_handler;
+ struct ib_uverbs_event_file async_file;
+ struct ib_uverbs_event_file comp_file[1];
+};
+
+struct ib_uverbs_async_event {
+ struct ib_uverbs_async_event_desc desc;
+ struct list_head list;
+};
+
+struct ib_uverbs_comp_event {
+ struct ib_uverbs_comp_event_desc desc;
+ struct list_head list;
+};
+
+struct ib_uobject_mr {
+ struct ib_uobject uobj;
+ struct page *page_list;
+ struct scatterlist *sg_list;
+};
+
+extern struct semaphore ib_uverbs_idr_mutex;
+extern struct idr ib_uverbs_pd_idr;
+extern struct idr ib_uverbs_mr_idr;
+extern struct idr ib_uverbs_mw_idr;
+extern struct idr ib_uverbs_ah_idr;
+extern struct idr ib_uverbs_cq_idr;
+extern struct idr ib_uverbs_qp_idr;
+
+void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
+void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
+void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
+
+int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
+ void *addr, size_t size, int write);
+void ib_umem_release(struct ib_device *dev, struct ib_umem *umem);
+void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem);
+
+#define IB_UVERBS_DECLARE_CMD(name) \
+ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
+ const char __user *buf, int in_len, \
+ int out_len)
+
+IB_UVERBS_DECLARE_CMD(query_params);
+IB_UVERBS_DECLARE_CMD(get_context);
+IB_UVERBS_DECLARE_CMD(query_device);
+IB_UVERBS_DECLARE_CMD(query_port);
+IB_UVERBS_DECLARE_CMD(query_gid);
+IB_UVERBS_DECLARE_CMD(query_pkey);
+IB_UVERBS_DECLARE_CMD(alloc_pd);
+IB_UVERBS_DECLARE_CMD(dealloc_pd);
+IB_UVERBS_DECLARE_CMD(reg_mr);
+IB_UVERBS_DECLARE_CMD(dereg_mr);
+IB_UVERBS_DECLARE_CMD(create_cq);
+IB_UVERBS_DECLARE_CMD(destroy_cq);
+IB_UVERBS_DECLARE_CMD(create_qp);
+IB_UVERBS_DECLARE_CMD(modify_qp);
+IB_UVERBS_DECLARE_CMD(destroy_qp);
+IB_UVERBS_DECLARE_CMD(attach_mcast);
+IB_UVERBS_DECLARE_CMD(detach_mcast);
+
+#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
new file mode 100644
index 00000000000..5f2bbcda4c7
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -0,0 +1,1006 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $
+ */
+
+#include <asm/uaccess.h>
+
+#include "uverbs.h"
+
+#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
+ do { \
+ (udata)->inbuf = (void __user *) (ibuf); \
+ (udata)->outbuf = (void __user *) (obuf); \
+ (udata)->inlen = (ilen); \
+ (udata)->outlen = (olen); \
+ } while (0)
+
+ssize_t ib_uverbs_query_params(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_query_params cmd;
+ struct ib_uverbs_query_params_resp resp;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ memset(&resp, 0, sizeof resp);
+
+ resp.num_cq_events = file->device->num_comp;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp))
+ return -EFAULT;
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_get_context cmd;
+ struct ib_uverbs_get_context_resp resp;
+ struct ib_udata udata;
+ struct ib_device *ibdev = file->device->ib_dev;
+ int i;
+ int ret = in_len;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ file->ucontext = ibdev->alloc_ucontext(ibdev, &udata);
+ if (IS_ERR(file->ucontext)) {
+ ret = PTR_ERR(file->ucontext);
+ file->ucontext = NULL;
+ return ret;
+ }
+
+ file->ucontext->device = ibdev;
+ INIT_LIST_HEAD(&file->ucontext->pd_list);
+ INIT_LIST_HEAD(&file->ucontext->mr_list);
+ INIT_LIST_HEAD(&file->ucontext->mw_list);
+ INIT_LIST_HEAD(&file->ucontext->cq_list);
+ INIT_LIST_HEAD(&file->ucontext->qp_list);
+ INIT_LIST_HEAD(&file->ucontext->srq_list);
+ INIT_LIST_HEAD(&file->ucontext->ah_list);
+ spin_lock_init(&file->ucontext->lock);
+
+ resp.async_fd = file->async_file.fd;
+ for (i = 0; i < file->device->num_comp; ++i)
+ if (copy_to_user((void __user *) (unsigned long) cmd.cq_fd_tab +
+ i * sizeof (__u32),
+ &file->comp_file[i].fd, sizeof (__u32)))
+ goto err;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ goto err;
+
+ return in_len;
+
+err:
+ ibdev->dealloc_ucontext(file->ucontext);
+ file->ucontext = NULL;
+
+ return -EFAULT;
+}
+
+ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_query_device cmd;
+ struct ib_uverbs_query_device_resp resp;
+ struct ib_device_attr attr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ ret = ib_query_device(file->device->ib_dev, &attr);
+ if (ret)
+ return ret;
+
+ memset(&resp, 0, sizeof resp);
+
+ resp.fw_ver = attr.fw_ver;
+ resp.node_guid = attr.node_guid;
+ resp.sys_image_guid = attr.sys_image_guid;
+ resp.max_mr_size = attr.max_mr_size;
+ resp.page_size_cap = attr.page_size_cap;
+ resp.vendor_id = attr.vendor_id;
+ resp.vendor_part_id = attr.vendor_part_id;
+ resp.hw_ver = attr.hw_ver;
+ resp.max_qp = attr.max_qp;
+ resp.max_qp_wr = attr.max_qp_wr;
+ resp.device_cap_flags = attr.device_cap_flags;
+ resp.max_sge = attr.max_sge;
+ resp.max_sge_rd = attr.max_sge_rd;
+ resp.max_cq = attr.max_cq;
+ resp.max_cqe = attr.max_cqe;
+ resp.max_mr = attr.max_mr;
+ resp.max_pd = attr.max_pd;
+ resp.max_qp_rd_atom = attr.max_qp_rd_atom;
+ resp.max_ee_rd_atom = attr.max_ee_rd_atom;
+ resp.max_res_rd_atom = attr.max_res_rd_atom;
+ resp.max_qp_init_rd_atom = attr.max_qp_init_rd_atom;
+ resp.max_ee_init_rd_atom = attr.max_ee_init_rd_atom;
+ resp.atomic_cap = attr.atomic_cap;
+ resp.max_ee = attr.max_ee;
+ resp.max_rdd = attr.max_rdd;
+ resp.max_mw = attr.max_mw;
+ resp.max_raw_ipv6_qp = attr.max_raw_ipv6_qp;
+ resp.max_raw_ethy_qp = attr.max_raw_ethy_qp;
+ resp.max_mcast_grp = attr.max_mcast_grp;
+ resp.max_mcast_qp_attach = attr.max_mcast_qp_attach;
+ resp.max_total_mcast_qp_attach = attr.max_total_mcast_qp_attach;
+ resp.max_ah = attr.max_ah;
+ resp.max_fmr = attr.max_fmr;
+ resp.max_map_per_fmr = attr.max_map_per_fmr;
+ resp.max_srq = attr.max_srq;
+ resp.max_srq_wr = attr.max_srq_wr;
+ resp.max_srq_sge = attr.max_srq_sge;
+ resp.max_pkeys = attr.max_pkeys;
+ resp.local_ca_ack_delay = attr.local_ca_ack_delay;
+ resp.phys_port_cnt = file->device->ib_dev->phys_port_cnt;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ return -EFAULT;
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_query_port cmd;
+ struct ib_uverbs_query_port_resp resp;
+ struct ib_port_attr attr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ ret = ib_query_port(file->device->ib_dev, cmd.port_num, &attr);
+ if (ret)
+ return ret;
+
+ memset(&resp, 0, sizeof resp);
+
+ resp.state = attr.state;
+ resp.max_mtu = attr.max_mtu;
+ resp.active_mtu = attr.active_mtu;
+ resp.gid_tbl_len = attr.gid_tbl_len;
+ resp.port_cap_flags = attr.port_cap_flags;
+ resp.max_msg_sz = attr.max_msg_sz;
+ resp.bad_pkey_cntr = attr.bad_pkey_cntr;
+ resp.qkey_viol_cntr = attr.qkey_viol_cntr;
+ resp.pkey_tbl_len = attr.pkey_tbl_len;
+ resp.lid = attr.lid;
+ resp.sm_lid = attr.sm_lid;
+ resp.lmc = attr.lmc;
+ resp.max_vl_num = attr.max_vl_num;
+ resp.sm_sl = attr.sm_sl;
+ resp.subnet_timeout = attr.subnet_timeout;
+ resp.init_type_reply = attr.init_type_reply;
+ resp.active_width = attr.active_width;
+ resp.active_speed = attr.active_speed;
+ resp.phys_state = attr.phys_state;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ return -EFAULT;
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_query_gid(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_query_gid cmd;
+ struct ib_uverbs_query_gid_resp resp;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ memset(&resp, 0, sizeof resp);
+
+ ret = ib_query_gid(file->device->ib_dev, cmd.port_num, cmd.index,
+ (union ib_gid *) resp.gid);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ return -EFAULT;
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_query_pkey(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_query_pkey cmd;
+ struct ib_uverbs_query_pkey_resp resp;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ memset(&resp, 0, sizeof resp);
+
+ ret = ib_query_pkey(file->device->ib_dev, cmd.port_num, cmd.index,
+ &resp.pkey);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ return -EFAULT;
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_alloc_pd cmd;
+ struct ib_uverbs_alloc_pd_resp resp;
+ struct ib_udata udata;
+ struct ib_uobject *uobj;
+ struct ib_pd *pd;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+ if (!uobj)
+ return -ENOMEM;
+
+ uobj->context = file->ucontext;
+
+ pd = file->device->ib_dev->alloc_pd(file->device->ib_dev,
+ file->ucontext, &udata);
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
+ goto err;
+ }
+
+ pd->device = file->device->ib_dev;
+ pd->uobject = uobj;
+ atomic_set(&pd->usecnt, 0);
+
+retry:
+ if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_pd;
+ }
+
+ down(&ib_uverbs_idr_mutex);
+ ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id);
+ up(&ib_uverbs_idr_mutex);
+
+ if (ret == -EAGAIN)
+ goto retry;
+ if (ret)
+ goto err_pd;
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_add_tail(&uobj->list, &file->ucontext->pd_list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ memset(&resp, 0, sizeof resp);
+ resp.pd_handle = uobj->id;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_list;
+ }
+
+ return in_len;
+
+err_list:
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&uobj->list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ down(&ib_uverbs_idr_mutex);
+ idr_remove(&ib_uverbs_pd_idr, uobj->id);
+ up(&ib_uverbs_idr_mutex);
+
+err_pd:
+ ib_dealloc_pd(pd);
+
+err:
+ kfree(uobj);
+ return ret;
+}
+
+ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_dealloc_pd cmd;
+ struct ib_pd *pd;
+ struct ib_uobject *uobj;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+
+ pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
+ if (!pd || pd->uobject->context != file->ucontext)
+ goto out;
+
+ uobj = pd->uobject;
+
+ ret = ib_dealloc_pd(pd);
+ if (ret)
+ goto out;
+
+ idr_remove(&ib_uverbs_pd_idr, cmd.pd_handle);
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&uobj->list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ kfree(uobj);
+
+out:
+ up(&ib_uverbs_idr_mutex);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_reg_mr cmd;
+ struct ib_uverbs_reg_mr_resp resp;
+ struct ib_udata udata;
+ struct ib_umem_object *obj;
+ struct ib_pd *pd;
+ struct ib_mr *mr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
+ return -EINVAL;
+
+ obj = kmalloc(sizeof *obj, GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ obj->uobject.context = file->ucontext;
+
+ /*
+ * We ask for writable memory if any access flags other than
+ * "remote read" are set. "Local write" and "remote write"
+ * obviously require write access. "Remote atomic" can do
+ * things like fetch and add, which will modify memory, and
+ * "MW bind" can change permissions by binding a window.
+ */
+ ret = ib_umem_get(file->device->ib_dev, &obj->umem,
+ (void *) (unsigned long) cmd.start, cmd.length,
+ !!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ));
+ if (ret)
+ goto err_free;
+
+ obj->umem.virt_base = cmd.hca_va;
+
+ down(&ib_uverbs_idr_mutex);
+
+ pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
+ if (!pd || pd->uobject->context != file->ucontext) {
+ ret = -EINVAL;
+ goto err_up;
+ }
+
+ if (!pd->device->reg_user_mr) {
+ ret = -ENOSYS;
+ goto err_up;
+ }
+
+ mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata);
+ if (IS_ERR(mr)) {
+ ret = PTR_ERR(mr);
+ goto err_up;
+ }
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->uobject = &obj->uobject;
+ atomic_inc(&pd->usecnt);
+ atomic_set(&mr->usecnt, 0);
+
+ memset(&resp, 0, sizeof resp);
+ resp.lkey = mr->lkey;
+ resp.rkey = mr->rkey;
+
+retry:
+ if (!idr_pre_get(&ib_uverbs_mr_idr, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_unreg;
+ }
+
+ ret = idr_get_new(&ib_uverbs_mr_idr, mr, &obj->uobject.id);
+
+ if (ret == -EAGAIN)
+ goto retry;
+ if (ret)
+ goto err_unreg;
+
+ resp.mr_handle = obj->uobject.id;
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_list;
+ }
+
+ up(&ib_uverbs_idr_mutex);
+
+ return in_len;
+
+err_list:
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&obj->uobject.list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+err_unreg:
+ ib_dereg_mr(mr);
+
+err_up:
+ up(&ib_uverbs_idr_mutex);
+
+ ib_umem_release(file->device->ib_dev, &obj->umem);
+
+err_free:
+ kfree(obj);
+ return ret;
+}
+
+ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_dereg_mr cmd;
+ struct ib_mr *mr;
+ struct ib_umem_object *memobj;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+
+ mr = idr_find(&ib_uverbs_mr_idr, cmd.mr_handle);
+ if (!mr || mr->uobject->context != file->ucontext)
+ goto out;
+
+ memobj = container_of(mr->uobject, struct ib_umem_object, uobject);
+
+ ret = ib_dereg_mr(mr);
+ if (ret)
+ goto out;
+
+ idr_remove(&ib_uverbs_mr_idr, cmd.mr_handle);
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&memobj->uobject.list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ ib_umem_release(file->device->ib_dev, &memobj->umem);
+ kfree(memobj);
+
+out:
+ up(&ib_uverbs_idr_mutex);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_cq cmd;
+ struct ib_uverbs_create_cq_resp resp;
+ struct ib_udata udata;
+ struct ib_uobject *uobj;
+ struct ib_cq *cq;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ if (cmd.event_handler >= file->device->num_comp)
+ return -EINVAL;
+
+ uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+ if (!uobj)
+ return -ENOMEM;
+
+ uobj->user_handle = cmd.user_handle;
+ uobj->context = file->ucontext;
+
+ cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe,
+ file->ucontext, &udata);
+ if (IS_ERR(cq)) {
+ ret = PTR_ERR(cq);
+ goto err;
+ }
+
+ cq->device = file->device->ib_dev;
+ cq->uobject = uobj;
+ cq->comp_handler = ib_uverbs_comp_handler;
+ cq->event_handler = ib_uverbs_cq_event_handler;
+ cq->cq_context = file;
+ atomic_set(&cq->usecnt, 0);
+
+retry:
+ if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_cq;
+ }
+
+ down(&ib_uverbs_idr_mutex);
+ ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->id);
+ up(&ib_uverbs_idr_mutex);
+
+ if (ret == -EAGAIN)
+ goto retry;
+ if (ret)
+ goto err_cq;
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_add_tail(&uobj->list, &file->ucontext->cq_list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ memset(&resp, 0, sizeof resp);
+ resp.cq_handle = uobj->id;
+ resp.cqe = cq->cqe;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_list;
+ }
+
+ return in_len;
+
+err_list:
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&uobj->list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ down(&ib_uverbs_idr_mutex);
+ idr_remove(&ib_uverbs_cq_idr, uobj->id);
+ up(&ib_uverbs_idr_mutex);
+
+err_cq:
+ ib_destroy_cq(cq);
+
+err:
+ kfree(uobj);
+ return ret;
+}
+
+ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_destroy_cq cmd;
+ struct ib_cq *cq;
+ struct ib_uobject *uobj;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+
+ cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
+ if (!cq || cq->uobject->context != file->ucontext)
+ goto out;
+
+ uobj = cq->uobject;
+
+ ret = ib_destroy_cq(cq);
+ if (ret)
+ goto out;
+
+ idr_remove(&ib_uverbs_cq_idr, cmd.cq_handle);
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&uobj->list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ kfree(uobj);
+
+out:
+ up(&ib_uverbs_idr_mutex);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_qp cmd;
+ struct ib_uverbs_create_qp_resp resp;
+ struct ib_udata udata;
+ struct ib_uobject *uobj;
+ struct ib_pd *pd;
+ struct ib_cq *scq, *rcq;
+ struct ib_qp *qp;
+ struct ib_qp_init_attr attr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+ if (!uobj)
+ return -ENOMEM;
+
+ down(&ib_uverbs_idr_mutex);
+
+ pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
+ scq = idr_find(&ib_uverbs_cq_idr, cmd.send_cq_handle);
+ rcq = idr_find(&ib_uverbs_cq_idr, cmd.recv_cq_handle);
+
+ if (!pd || pd->uobject->context != file->ucontext ||
+ !scq || scq->uobject->context != file->ucontext ||
+ !rcq || rcq->uobject->context != file->ucontext) {
+ ret = -EINVAL;
+ goto err_up;
+ }
+
+ attr.event_handler = ib_uverbs_qp_event_handler;
+ attr.qp_context = file;
+ attr.send_cq = scq;
+ attr.recv_cq = rcq;
+ attr.srq = NULL;
+ attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
+ attr.qp_type = cmd.qp_type;
+
+ attr.cap.max_send_wr = cmd.max_send_wr;
+ attr.cap.max_recv_wr = cmd.max_recv_wr;
+ attr.cap.max_send_sge = cmd.max_send_sge;
+ attr.cap.max_recv_sge = cmd.max_recv_sge;
+ attr.cap.max_inline_data = cmd.max_inline_data;
+
+ uobj->user_handle = cmd.user_handle;
+ uobj->context = file->ucontext;
+
+ qp = pd->device->create_qp(pd, &attr, &udata);
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
+ goto err_up;
+ }
+
+ qp->device = pd->device;
+ qp->pd = pd;
+ qp->send_cq = attr.send_cq;
+ qp->recv_cq = attr.recv_cq;
+ qp->srq = attr.srq;
+ qp->uobject = uobj;
+ qp->event_handler = attr.event_handler;
+ qp->qp_context = attr.qp_context;
+ qp->qp_type = attr.qp_type;
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&attr.send_cq->usecnt);
+ atomic_inc(&attr.recv_cq->usecnt);
+ if (attr.srq)
+ atomic_inc(&attr.srq->usecnt);
+
+ memset(&resp, 0, sizeof resp);
+ resp.qpn = qp->qp_num;
+
+retry:
+ if (!idr_pre_get(&ib_uverbs_qp_idr, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_destroy;
+ }
+
+ ret = idr_get_new(&ib_uverbs_qp_idr, qp, &uobj->id);
+
+ if (ret == -EAGAIN)
+ goto retry;
+ if (ret)
+ goto err_destroy;
+
+ resp.qp_handle = uobj->id;
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_add_tail(&uobj->list, &file->ucontext->qp_list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_list;
+ }
+
+ up(&ib_uverbs_idr_mutex);
+
+ return in_len;
+
+err_list:
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&uobj->list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+err_destroy:
+ ib_destroy_qp(qp);
+
+err_up:
+ up(&ib_uverbs_idr_mutex);
+
+ kfree(uobj);
+ return ret;
+}
+
+ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_modify_qp cmd;
+ struct ib_qp *qp;
+ struct ib_qp_attr *attr;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ attr = kmalloc(sizeof *attr, GFP_KERNEL);
+ if (!attr)
+ return -ENOMEM;
+
+ down(&ib_uverbs_idr_mutex);
+
+ qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+ if (!qp || qp->uobject->context != file->ucontext) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ attr->qp_state = cmd.qp_state;
+ attr->cur_qp_state = cmd.cur_qp_state;
+ attr->path_mtu = cmd.path_mtu;
+ attr->path_mig_state = cmd.path_mig_state;
+ attr->qkey = cmd.qkey;
+ attr->rq_psn = cmd.rq_psn;
+ attr->sq_psn = cmd.sq_psn;
+ attr->dest_qp_num = cmd.dest_qp_num;
+ attr->qp_access_flags = cmd.qp_access_flags;
+ attr->pkey_index = cmd.pkey_index;
+ attr->alt_pkey_index = cmd.pkey_index;
+ attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
+ attr->max_rd_atomic = cmd.max_rd_atomic;
+ attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic;
+ attr->min_rnr_timer = cmd.min_rnr_timer;
+ attr->port_num = cmd.port_num;
+ attr->timeout = cmd.timeout;
+ attr->retry_cnt = cmd.retry_cnt;
+ attr->rnr_retry = cmd.rnr_retry;
+ attr->alt_port_num = cmd.alt_port_num;
+ attr->alt_timeout = cmd.alt_timeout;
+
+ memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16);
+ attr->ah_attr.grh.flow_label = cmd.dest.flow_label;
+ attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index;
+ attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit;
+ attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class;
+ attr->ah_attr.dlid = cmd.dest.dlid;
+ attr->ah_attr.sl = cmd.dest.sl;
+ attr->ah_attr.src_path_bits = cmd.dest.src_path_bits;
+ attr->ah_attr.static_rate = cmd.dest.static_rate;
+ attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0;
+ attr->ah_attr.port_num = cmd.dest.port_num;
+
+ memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16);
+ attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label;
+ attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index;
+ attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit;
+ attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class;
+ attr->alt_ah_attr.dlid = cmd.alt_dest.dlid;
+ attr->alt_ah_attr.sl = cmd.alt_dest.sl;
+ attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits;
+ attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate;
+ attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
+ attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
+
+ ret = ib_modify_qp(qp, attr, cmd.attr_mask);
+ if (ret)
+ goto out;
+
+ ret = in_len;
+
+out:
+ up(&ib_uverbs_idr_mutex);
+ kfree(attr);
+
+ return ret;
+}
+
+ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_destroy_qp cmd;
+ struct ib_qp *qp;
+ struct ib_uobject *uobj;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+
+ qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+ if (!qp || qp->uobject->context != file->ucontext)
+ goto out;
+
+ uobj = qp->uobject;
+
+ ret = ib_destroy_qp(qp);
+ if (ret)
+ goto out;
+
+ idr_remove(&ib_uverbs_qp_idr, cmd.qp_handle);
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&uobj->list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ kfree(uobj);
+
+out:
+ up(&ib_uverbs_idr_mutex);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_attach_mcast cmd;
+ struct ib_qp *qp;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+
+ qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+ if (qp && qp->uobject->context == file->ucontext)
+ ret = ib_attach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
+
+ up(&ib_uverbs_idr_mutex);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_detach_mcast cmd;
+ struct ib_qp *qp;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+
+ qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+ if (qp && qp->uobject->context == file->ucontext)
+ ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
+
+ up(&ib_uverbs_idr_mutex);
+
+ return ret ? ret : in_len;
+}
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
new file mode 100644
index 00000000000..fbbe03d8c90
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -0,0 +1,698 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: uverbs_main.c 2733 2005-06-28 19:14:34Z roland $
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+
+#include <asm/uaccess.h>
+
+#include "uverbs.h"
+
+MODULE_AUTHOR("Roland Dreier");
+MODULE_DESCRIPTION("InfiniBand userspace verbs access");
+MODULE_LICENSE("Dual BSD/GPL");
+
+#define INFINIBANDEVENTFS_MAGIC 0x49426576 /* "IBev" */
+
+enum {
+ IB_UVERBS_MAJOR = 231,
+ IB_UVERBS_BASE_MINOR = 192,
+ IB_UVERBS_MAX_DEVICES = 32
+};
+
+#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
+
+DECLARE_MUTEX(ib_uverbs_idr_mutex);
+DEFINE_IDR(ib_uverbs_pd_idr);
+DEFINE_IDR(ib_uverbs_mr_idr);
+DEFINE_IDR(ib_uverbs_mw_idr);
+DEFINE_IDR(ib_uverbs_ah_idr);
+DEFINE_IDR(ib_uverbs_cq_idr);
+DEFINE_IDR(ib_uverbs_qp_idr);
+
+static spinlock_t map_lock;
+static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
+
+static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len) = {
+ [IB_USER_VERBS_CMD_QUERY_PARAMS] = ib_uverbs_query_params,
+ [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
+ [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
+ [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
+ [IB_USER_VERBS_CMD_QUERY_GID] = ib_uverbs_query_gid,
+ [IB_USER_VERBS_CMD_QUERY_PKEY] = ib_uverbs_query_pkey,
+ [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
+ [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
+ [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
+ [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
+ [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
+ [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
+ [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
+ [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
+ [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
+ [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
+ [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
+};
+
+static struct vfsmount *uverbs_event_mnt;
+
+static void ib_uverbs_add_one(struct ib_device *device);
+static void ib_uverbs_remove_one(struct ib_device *device);
+
+static int ib_dealloc_ucontext(struct ib_ucontext *context)
+{
+ struct ib_uobject *uobj, *tmp;
+
+ if (!context)
+ return 0;
+
+ down(&ib_uverbs_idr_mutex);
+
+ /* XXX Free AHs */
+
+ list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
+ struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id);
+ idr_remove(&ib_uverbs_qp_idr, uobj->id);
+ ib_destroy_qp(qp);
+ list_del(&uobj->list);
+ kfree(uobj);
+ }
+
+ list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
+ struct ib_cq *cq = idr_find(&ib_uverbs_cq_idr, uobj->id);
+ idr_remove(&ib_uverbs_cq_idr, uobj->id);
+ ib_destroy_cq(cq);
+ list_del(&uobj->list);
+ kfree(uobj);
+ }
+
+ /* XXX Free SRQs */
+ /* XXX Free MWs */
+
+ list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
+ struct ib_mr *mr = idr_find(&ib_uverbs_mr_idr, uobj->id);
+ struct ib_umem_object *memobj;
+
+ idr_remove(&ib_uverbs_mr_idr, uobj->id);
+ ib_dereg_mr(mr);
+
+ memobj = container_of(uobj, struct ib_umem_object, uobject);
+ ib_umem_release_on_close(mr->device, &memobj->umem);
+
+ list_del(&uobj->list);
+ kfree(memobj);
+ }
+
+ list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
+ struct ib_pd *pd = idr_find(&ib_uverbs_pd_idr, uobj->id);
+ idr_remove(&ib_uverbs_pd_idr, uobj->id);
+ ib_dealloc_pd(pd);
+ list_del(&uobj->list);
+ kfree(uobj);
+ }
+
+ up(&ib_uverbs_idr_mutex);
+
+ return context->device->dealloc_ucontext(context);
+}
+
+static void ib_uverbs_release_file(struct kref *ref)
+{
+ struct ib_uverbs_file *file =
+ container_of(ref, struct ib_uverbs_file, ref);
+
+ module_put(file->device->ib_dev->owner);
+ kfree(file);
+}
+
+static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_event_file *file = filp->private_data;
+ void *event;
+ int eventsz;
+ int ret = 0;
+
+ spin_lock_irq(&file->lock);
+
+ while (list_empty(&file->event_list) && file->fd >= 0) {
+ spin_unlock_irq(&file->lock);
+
+ if (filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ if (wait_event_interruptible(file->poll_wait,
+ !list_empty(&file->event_list) ||
+ file->fd < 0))
+ return -ERESTARTSYS;
+
+ spin_lock_irq(&file->lock);
+ }
+
+ if (file->fd < 0) {
+ spin_unlock_irq(&file->lock);
+ return -ENODEV;
+ }
+
+ if (file->is_async) {
+ event = list_entry(file->event_list.next,
+ struct ib_uverbs_async_event, list);
+ eventsz = sizeof (struct ib_uverbs_async_event_desc);
+ } else {
+ event = list_entry(file->event_list.next,
+ struct ib_uverbs_comp_event, list);
+ eventsz = sizeof (struct ib_uverbs_comp_event_desc);
+ }
+
+ if (eventsz > count) {
+ ret = -EINVAL;
+ event = NULL;
+ } else
+ list_del(file->event_list.next);
+
+ spin_unlock_irq(&file->lock);
+
+ if (event) {
+ if (copy_to_user(buf, event, eventsz))
+ ret = -EFAULT;
+ else
+ ret = eventsz;
+ }
+
+ kfree(event);
+
+ return ret;
+}
+
+static unsigned int ib_uverbs_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ unsigned int pollflags = 0;
+ struct ib_uverbs_event_file *file = filp->private_data;
+
+ poll_wait(filp, &file->poll_wait, wait);
+
+ spin_lock_irq(&file->lock);
+ if (file->fd < 0)
+ pollflags = POLLERR;
+ else if (!list_empty(&file->event_list))
+ pollflags = POLLIN | POLLRDNORM;
+ spin_unlock_irq(&file->lock);
+
+ return pollflags;
+}
+
+static void ib_uverbs_event_release(struct ib_uverbs_event_file *file)
+{
+ struct list_head *entry, *tmp;
+
+ spin_lock_irq(&file->lock);
+ if (file->fd != -1) {
+ file->fd = -1;
+ list_for_each_safe(entry, tmp, &file->event_list)
+ if (file->is_async)
+ kfree(list_entry(entry, struct ib_uverbs_async_event, list));
+ else
+ kfree(list_entry(entry, struct ib_uverbs_comp_event, list));
+ }
+ spin_unlock_irq(&file->lock);
+}
+
+static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_event_file *file = filp->private_data;
+
+ ib_uverbs_event_release(file);
+ kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
+
+ return 0;
+}
+
+static struct file_operations uverbs_event_fops = {
+ /*
+ * No .owner field since we artificially create event files,
+ * so there is no increment to the module reference count in
+ * the open path. All event files come from a uverbs command
+ * file, which already takes a module reference, so this is OK.
+ */
+ .read = ib_uverbs_event_read,
+ .poll = ib_uverbs_event_poll,
+ .release = ib_uverbs_event_close
+};
+
+void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
+{
+ struct ib_uverbs_file *file = cq_context;
+ struct ib_uverbs_comp_event *entry;
+ unsigned long flags;
+
+ entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+ if (!entry)
+ return;
+
+ entry->desc.cq_handle = cq->uobject->user_handle;
+
+ spin_lock_irqsave(&file->comp_file[0].lock, flags);
+ list_add_tail(&entry->list, &file->comp_file[0].event_list);
+ spin_unlock_irqrestore(&file->comp_file[0].lock, flags);
+
+ wake_up_interruptible(&file->comp_file[0].poll_wait);
+}
+
+static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
+ __u64 element, __u64 event)
+{
+ struct ib_uverbs_async_event *entry;
+ unsigned long flags;
+
+ entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+ if (!entry)
+ return;
+
+ entry->desc.element = element;
+ entry->desc.event_type = event;
+
+ spin_lock_irqsave(&file->async_file.lock, flags);
+ list_add_tail(&entry->list, &file->async_file.event_list);
+ spin_unlock_irqrestore(&file->async_file.lock, flags);
+
+ wake_up_interruptible(&file->async_file.poll_wait);
+}
+
+void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
+{
+ ib_uverbs_async_handler(context_ptr,
+ event->element.cq->uobject->user_handle,
+ event->event);
+}
+
+void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
+{
+ ib_uverbs_async_handler(context_ptr,
+ event->element.qp->uobject->user_handle,
+ event->event);
+}
+
+static void ib_uverbs_event_handler(struct ib_event_handler *handler,
+ struct ib_event *event)
+{
+ struct ib_uverbs_file *file =
+ container_of(handler, struct ib_uverbs_file, event_handler);
+
+ ib_uverbs_async_handler(file, event->element.port_num, event->event);
+}
+
+static int ib_uverbs_event_init(struct ib_uverbs_event_file *file,
+ struct ib_uverbs_file *uverbs_file)
+{
+ struct file *filp;
+
+ spin_lock_init(&file->lock);
+ INIT_LIST_HEAD(&file->event_list);
+ init_waitqueue_head(&file->poll_wait);
+ file->uverbs_file = uverbs_file;
+
+ file->fd = get_unused_fd();
+ if (file->fd < 0)
+ return file->fd;
+
+ filp = get_empty_filp();
+ if (!filp) {
+ put_unused_fd(file->fd);
+ return -ENFILE;
+ }
+
+ filp->f_op = &uverbs_event_fops;
+ filp->f_vfsmnt = mntget(uverbs_event_mnt);
+ filp->f_dentry = dget(uverbs_event_mnt->mnt_root);
+ filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
+ filp->f_flags = O_RDONLY;
+ filp->f_mode = FMODE_READ;
+ filp->private_data = file;
+
+ fd_install(file->fd, filp);
+
+ return 0;
+}
+
+static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_file *file = filp->private_data;
+ struct ib_uverbs_cmd_hdr hdr;
+
+ if (count < sizeof hdr)
+ return -EINVAL;
+
+ if (copy_from_user(&hdr, buf, sizeof hdr))
+ return -EFAULT;
+
+ if (hdr.in_words * 4 != count)
+ return -EINVAL;
+
+ if (hdr.command < 0 || hdr.command >= ARRAY_SIZE(uverbs_cmd_table))
+ return -EINVAL;
+
+ if (!file->ucontext &&
+ hdr.command != IB_USER_VERBS_CMD_QUERY_PARAMS &&
+ hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
+ return -EINVAL;
+
+ return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr,
+ hdr.in_words * 4, hdr.out_words * 4);
+}
+
+static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct ib_uverbs_file *file = filp->private_data;
+
+ if (!file->ucontext)
+ return -ENODEV;
+ else
+ return file->device->ib_dev->mmap(file->ucontext, vma);
+}
+
+static int ib_uverbs_open(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_device *dev =
+ container_of(inode->i_cdev, struct ib_uverbs_device, dev);
+ struct ib_uverbs_file *file;
+ int i = 0;
+ int ret;
+
+ if (!try_module_get(dev->ib_dev->owner))
+ return -ENODEV;
+
+ file = kmalloc(sizeof *file +
+ (dev->num_comp - 1) * sizeof (struct ib_uverbs_event_file),
+ GFP_KERNEL);
+ if (!file)
+ return -ENOMEM;
+
+ file->device = dev;
+ kref_init(&file->ref);
+
+ file->ucontext = NULL;
+
+ ret = ib_uverbs_event_init(&file->async_file, file);
+ if (ret)
+ goto err;
+
+ file->async_file.is_async = 1;
+
+ kref_get(&file->ref);
+
+ for (i = 0; i < dev->num_comp; ++i) {
+ ret = ib_uverbs_event_init(&file->comp_file[i], file);
+ if (ret)
+ goto err_async;
+ kref_get(&file->ref);
+ file->comp_file[i].is_async = 0;
+ }
+
+
+ filp->private_data = file;
+
+ INIT_IB_EVENT_HANDLER(&file->event_handler, dev->ib_dev,
+ ib_uverbs_event_handler);
+ if (ib_register_event_handler(&file->event_handler))
+ goto err_async;
+
+ return 0;
+
+err_async:
+ while (i--)
+ ib_uverbs_event_release(&file->comp_file[i]);
+
+ ib_uverbs_event_release(&file->async_file);
+
+err:
+ kref_put(&file->ref, ib_uverbs_release_file);
+
+ return ret;
+}
+
+static int ib_uverbs_close(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_file *file = filp->private_data;
+ int i;
+
+ ib_unregister_event_handler(&file->event_handler);
+ ib_uverbs_event_release(&file->async_file);
+ ib_dealloc_ucontext(file->ucontext);
+
+ for (i = 0; i < file->device->num_comp; ++i)
+ ib_uverbs_event_release(&file->comp_file[i]);
+
+ kref_put(&file->ref, ib_uverbs_release_file);
+
+ return 0;
+}
+
+static struct file_operations uverbs_fops = {
+ .owner = THIS_MODULE,
+ .write = ib_uverbs_write,
+ .open = ib_uverbs_open,
+ .release = ib_uverbs_close
+};
+
+static struct file_operations uverbs_mmap_fops = {
+ .owner = THIS_MODULE,
+ .write = ib_uverbs_write,
+ .mmap = ib_uverbs_mmap,
+ .open = ib_uverbs_open,
+ .release = ib_uverbs_close
+};
+
+static struct ib_client uverbs_client = {
+ .name = "uverbs",
+ .add = ib_uverbs_add_one,
+ .remove = ib_uverbs_remove_one
+};
+
+static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
+{
+ struct ib_uverbs_device *dev =
+ container_of(class_dev, struct ib_uverbs_device, class_dev);
+
+ return sprintf(buf, "%s\n", dev->ib_dev->name);
+}
+static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
+
+static void ib_uverbs_release_class_dev(struct class_device *class_dev)
+{
+ struct ib_uverbs_device *dev =
+ container_of(class_dev, struct ib_uverbs_device, class_dev);
+
+ cdev_del(&dev->dev);
+ clear_bit(dev->devnum, dev_map);
+ kfree(dev);
+}
+
+static struct class uverbs_class = {
+ .name = "infiniband_verbs",
+ .release = ib_uverbs_release_class_dev
+};
+
+static ssize_t show_abi_version(struct class *class, char *buf)
+{
+ return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION);
+}
+static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+
+static void ib_uverbs_add_one(struct ib_device *device)
+{
+ struct ib_uverbs_device *uverbs_dev;
+
+ if (!device->alloc_ucontext)
+ return;
+
+ uverbs_dev = kmalloc(sizeof *uverbs_dev, GFP_KERNEL);
+ if (!uverbs_dev)
+ return;
+
+ memset(uverbs_dev, 0, sizeof *uverbs_dev);
+
+ spin_lock(&map_lock);
+ uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
+ if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) {
+ spin_unlock(&map_lock);
+ goto err;
+ }
+ set_bit(uverbs_dev->devnum, dev_map);
+ spin_unlock(&map_lock);
+
+ uverbs_dev->ib_dev = device;
+ uverbs_dev->num_comp = 1;
+
+ if (device->mmap)
+ cdev_init(&uverbs_dev->dev, &uverbs_mmap_fops);
+ else
+ cdev_init(&uverbs_dev->dev, &uverbs_fops);
+ uverbs_dev->dev.owner = THIS_MODULE;
+ kobject_set_name(&uverbs_dev->dev.kobj, "uverbs%d", uverbs_dev->devnum);
+ if (cdev_add(&uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1))
+ goto err;
+
+ uverbs_dev->class_dev.class = &uverbs_class;
+ uverbs_dev->class_dev.dev = device->dma_device;
+ uverbs_dev->class_dev.devt = uverbs_dev->dev.dev;
+ snprintf(uverbs_dev->class_dev.class_id, BUS_ID_SIZE, "uverbs%d", uverbs_dev->devnum);
+ if (class_device_register(&uverbs_dev->class_dev))
+ goto err_cdev;
+
+ if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_ibdev))
+ goto err_class;
+
+ ib_set_client_data(device, &uverbs_client, uverbs_dev);
+
+ return;
+
+err_class:
+ class_device_unregister(&uverbs_dev->class_dev);
+
+err_cdev:
+ cdev_del(&uverbs_dev->dev);
+ clear_bit(uverbs_dev->devnum, dev_map);
+
+err:
+ kfree(uverbs_dev);
+ return;
+}
+
+static void ib_uverbs_remove_one(struct ib_device *device)
+{
+ struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client);
+
+ if (!uverbs_dev)
+ return;
+
+ class_device_unregister(&uverbs_dev->class_dev);
+}
+
+static struct super_block *uverbs_event_get_sb(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
+{
+ return get_sb_pseudo(fs_type, "infinibandevent:", NULL,
+ INFINIBANDEVENTFS_MAGIC);
+}
+
+static struct file_system_type uverbs_event_fs = {
+ /* No owner field so module can be unloaded */
+ .name = "infinibandeventfs",
+ .get_sb = uverbs_event_get_sb,
+ .kill_sb = kill_litter_super
+};
+
+static int __init ib_uverbs_init(void)
+{
+ int ret;
+
+ spin_lock_init(&map_lock);
+
+ ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
+ "infiniband_verbs");
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't register device number\n");
+ goto out;
+ }
+
+ ret = class_register(&uverbs_class);
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n");
+ goto out_chrdev;
+ }
+
+ ret = class_create_file(&uverbs_class, &class_attr_abi_version);
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
+ goto out_class;
+ }
+
+ ret = register_filesystem(&uverbs_event_fs);
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't register infinibandeventfs\n");
+ goto out_class;
+ }
+
+ uverbs_event_mnt = kern_mount(&uverbs_event_fs);
+ if (IS_ERR(uverbs_event_mnt)) {
+ ret = PTR_ERR(uverbs_event_mnt);
+ printk(KERN_ERR "user_verbs: couldn't mount infinibandeventfs\n");
+ goto out_fs;
+ }
+
+ ret = ib_register_client(&uverbs_client);
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't register client\n");
+ goto out_mnt;
+ }
+
+ return 0;
+
+out_mnt:
+ mntput(uverbs_event_mnt);
+
+out_fs:
+ unregister_filesystem(&uverbs_event_fs);
+
+out_class:
+ class_unregister(&uverbs_class);
+
+out_chrdev:
+ unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
+
+out:
+ return ret;
+}
+
+static void __exit ib_uverbs_cleanup(void)
+{
+ ib_unregister_client(&uverbs_client);
+ mntput(uverbs_event_mnt);
+ unregister_filesystem(&uverbs_event_fs);
+ class_unregister(&uverbs_class);
+ unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
+}
+
+module_init(ib_uverbs_init);
+module_exit(ib_uverbs_cleanup);
diff --git a/drivers/infiniband/core/uverbs_mem.c b/drivers/infiniband/core/uverbs_mem.c
new file mode 100644
index 00000000000..ed550f6595b
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_mem.c
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: uverbs_mem.c 2743 2005-06-28 22:27:59Z roland $
+ */
+
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+
+#include "uverbs.h"
+
+struct ib_umem_account_work {
+ struct work_struct work;
+ struct mm_struct *mm;
+ unsigned long diff;
+};
+
+
+static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
+{
+ struct ib_umem_chunk *chunk, *tmp;
+ int i;
+
+ list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
+ dma_unmap_sg(dev->dma_device, chunk->page_list,
+ chunk->nents, DMA_BIDIRECTIONAL);
+ for (i = 0; i < chunk->nents; ++i) {
+ if (umem->writable && dirty)
+ set_page_dirty_lock(chunk->page_list[i].page);
+ put_page(chunk->page_list[i].page);
+ }
+
+ kfree(chunk);
+ }
+}
+
+int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
+ void *addr, size_t size, int write)
+{
+ struct page **page_list;
+ struct ib_umem_chunk *chunk;
+ unsigned long locked;
+ unsigned long lock_limit;
+ unsigned long cur_base;
+ unsigned long npages;
+ int ret = 0;
+ int off;
+ int i;
+
+ if (!can_do_mlock())
+ return -EPERM;
+
+ page_list = (struct page **) __get_free_page(GFP_KERNEL);
+ if (!page_list)
+ return -ENOMEM;
+
+ mem->user_base = (unsigned long) addr;
+ mem->length = size;
+ mem->offset = (unsigned long) addr & ~PAGE_MASK;
+ mem->page_size = PAGE_SIZE;
+ mem->writable = write;
+
+ INIT_LIST_HEAD(&mem->chunk_list);
+
+ npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT;
+
+ down_write(&current->mm->mmap_sem);
+
+ locked = npages + current->mm->locked_vm;
+ lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+
+ if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ cur_base = (unsigned long) addr & PAGE_MASK;
+
+ while (npages) {
+ ret = get_user_pages(current, current->mm, cur_base,
+ min_t(int, npages,
+ PAGE_SIZE / sizeof (struct page *)),
+ 1, !write, page_list, NULL);
+
+ if (ret < 0)
+ goto out;
+
+ cur_base += ret * PAGE_SIZE;
+ npages -= ret;
+
+ off = 0;
+
+ while (ret) {
+ chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) *
+ min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK),
+ GFP_KERNEL);
+ if (!chunk) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
+ for (i = 0; i < chunk->nents; ++i) {
+ chunk->page_list[i].page = page_list[i + off];
+ chunk->page_list[i].offset = 0;
+ chunk->page_list[i].length = PAGE_SIZE;
+ }
+
+ chunk->nmap = dma_map_sg(dev->dma_device,
+ &chunk->page_list[0],
+ chunk->nents,
+ DMA_BIDIRECTIONAL);
+ if (chunk->nmap <= 0) {
+ for (i = 0; i < chunk->nents; ++i)
+ put_page(chunk->page_list[i].page);
+ kfree(chunk);
+
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret -= chunk->nents;
+ off += chunk->nents;
+ list_add_tail(&chunk->list, &mem->chunk_list);
+ }
+
+ ret = 0;
+ }
+
+out:
+ if (ret < 0)
+ __ib_umem_release(dev, mem, 0);
+ else
+ current->mm->locked_vm = locked;
+
+ up_write(&current->mm->mmap_sem);
+ free_page((unsigned long) page_list);
+
+ return ret;
+}
+
+void ib_umem_release(struct ib_device *dev, struct ib_umem *umem)
+{
+ __ib_umem_release(dev, umem, 1);
+
+ down_write(&current->mm->mmap_sem);
+ current->mm->locked_vm -=
+ PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
+ up_write(&current->mm->mmap_sem);
+}
+
+static void ib_umem_account(void *work_ptr)
+{
+ struct ib_umem_account_work *work = work_ptr;
+
+ down_write(&work->mm->mmap_sem);
+ work->mm->locked_vm -= work->diff;
+ up_write(&work->mm->mmap_sem);
+ mmput(work->mm);
+ kfree(work);
+}
+
+void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem)
+{
+ struct ib_umem_account_work *work;
+ struct mm_struct *mm;
+
+ __ib_umem_release(dev, umem, 1);
+
+ mm = get_task_mm(current);
+ if (!mm)
+ return;
+
+ /*
+ * We may be called with the mm's mmap_sem already held. This
+ * can happen when a userspace munmap() is the call that drops
+ * the last reference to our file and calls our release
+ * method. If there are memory regions to destroy, we'll end
+ * up here and not be able to take the mmap_sem. Therefore we
+ * defer the vm_locked accounting to the system workqueue.
+ */
+
+ work = kmalloc(sizeof *work, GFP_KERNEL);
+ if (!work)
+ return;
+
+ INIT_WORK(&work->work, ib_umem_account, work);
+ work->mm = mm;
+ work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
+
+ schedule_work(&work->work);
+}
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 7c08ed0cd7d..2516f964651 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -4,6 +4,7 @@
* Copyright (c) 2004 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -47,10 +48,11 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device)
{
struct ib_pd *pd;
- pd = device->alloc_pd(device);
+ pd = device->alloc_pd(device, NULL, NULL);
if (!IS_ERR(pd)) {
- pd->device = device;
+ pd->device = device;
+ pd->uobject = NULL;
atomic_set(&pd->usecnt, 0);
}
@@ -76,8 +78,9 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
ah = pd->device->create_ah(pd, ah_attr);
if (!IS_ERR(ah)) {
- ah->device = pd->device;
- ah->pd = pd;
+ ah->device = pd->device;
+ ah->pd = pd;
+ ah->uobject = NULL;
atomic_inc(&pd->usecnt);
}
@@ -122,7 +125,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
{
struct ib_qp *qp;
- qp = pd->device->create_qp(pd, qp_init_attr);
+ qp = pd->device->create_qp(pd, qp_init_attr, NULL);
if (!IS_ERR(qp)) {
qp->device = pd->device;
@@ -130,6 +133,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
qp->send_cq = qp_init_attr->send_cq;
qp->recv_cq = qp_init_attr->recv_cq;
qp->srq = qp_init_attr->srq;
+ qp->uobject = NULL;
qp->event_handler = qp_init_attr->event_handler;
qp->qp_context = qp_init_attr->qp_context;
qp->qp_type = qp_init_attr->qp_type;
@@ -197,10 +201,11 @@ struct ib_cq *ib_create_cq(struct ib_device *device,
{
struct ib_cq *cq;
- cq = device->create_cq(device, cqe);
+ cq = device->create_cq(device, cqe, NULL, NULL);
if (!IS_ERR(cq)) {
cq->device = device;
+ cq->uobject = NULL;
cq->comp_handler = comp_handler;
cq->event_handler = event_handler;
cq->cq_context = cq_context;
@@ -245,8 +250,9 @@ struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
mr = pd->device->get_dma_mr(pd, mr_access_flags);
if (!IS_ERR(mr)) {
- mr->device = pd->device;
- mr->pd = pd;
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->uobject = NULL;
atomic_inc(&pd->usecnt);
atomic_set(&mr->usecnt, 0);
}
@@ -267,8 +273,9 @@ struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
mr_access_flags, iova_start);
if (!IS_ERR(mr)) {
- mr->device = pd->device;
- mr->pd = pd;
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->uobject = NULL;
atomic_inc(&pd->usecnt);
atomic_set(&mr->usecnt, 0);
}
@@ -344,8 +351,9 @@ struct ib_mw *ib_alloc_mw(struct ib_pd *pd)
mw = pd->device->alloc_mw(pd);
if (!IS_ERR(mw)) {
- mw->device = pd->device;
- mw->pd = pd;
+ mw->device = pd->device;
+ mw->pd = pd;
+ mw->uobject = NULL;
atomic_inc(&pd->usecnt);
}
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index 085baf393ca..d58dcbe6648 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index cd9ed958d92..1557a522d83 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -431,6 +431,36 @@ static int mthca_cmd_imm(struct mthca_dev *dev,
timeout, status);
}
+int mthca_cmd_init(struct mthca_dev *dev)
+{
+ sema_init(&dev->cmd.hcr_sem, 1);
+ sema_init(&dev->cmd.poll_sem, 1);
+ dev->cmd.use_events = 0;
+
+ dev->hcr = ioremap(pci_resource_start(dev->pdev, 0) + MTHCA_HCR_BASE,
+ MTHCA_HCR_SIZE);
+ if (!dev->hcr) {
+ mthca_err(dev, "Couldn't map command register.");
+ return -ENOMEM;
+ }
+
+ dev->cmd.pool = pci_pool_create("mthca_cmd", dev->pdev,
+ MTHCA_MAILBOX_SIZE,
+ MTHCA_MAILBOX_SIZE, 0);
+ if (!dev->cmd.pool) {
+ iounmap(dev->hcr);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void mthca_cmd_cleanup(struct mthca_dev *dev)
+{
+ pci_pool_destroy(dev->cmd.pool);
+ iounmap(dev->hcr);
+}
+
/*
* Switch to using events to issue FW commands (should be called after
* event queue to command events has been initialized).
@@ -489,6 +519,33 @@ void mthca_cmd_use_polling(struct mthca_dev *dev)
up(&dev->cmd.poll_sem);
}
+struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
+ unsigned int gfp_mask)
+{
+ struct mthca_mailbox *mailbox;
+
+ mailbox = kmalloc(sizeof *mailbox, gfp_mask);
+ if (!mailbox)
+ return ERR_PTR(-ENOMEM);
+
+ mailbox->buf = pci_pool_alloc(dev->cmd.pool, gfp_mask, &mailbox->dma);
+ if (!mailbox->buf) {
+ kfree(mailbox);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return mailbox;
+}
+
+void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox)
+{
+ if (!mailbox)
+ return;
+
+ pci_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma);
+ kfree(mailbox);
+}
+
int mthca_SYS_EN(struct mthca_dev *dev, u8 *status)
{
u64 out;
@@ -513,20 +570,20 @@ int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status)
static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
u64 virt, u8 *status)
{
- u32 *inbox;
- dma_addr_t indma;
+ struct mthca_mailbox *mailbox;
struct mthca_icm_iter iter;
+ __be64 *pages;
int lg;
int nent = 0;
int i;
int err = 0;
int ts = 0, tc = 0;
- inbox = pci_alloc_consistent(dev->pdev, PAGE_SIZE, &indma);
- if (!inbox)
- return -ENOMEM;
-
- memset(inbox, 0, PAGE_SIZE);
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ memset(mailbox->buf, 0, MTHCA_MAILBOX_SIZE);
+ pages = mailbox->buf;
for (mthca_icm_first(icm, &iter);
!mthca_icm_last(&iter);
@@ -546,19 +603,17 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
}
for (i = 0; i < mthca_icm_size(&iter) / (1 << lg); ++i, ++nent) {
if (virt != -1) {
- *((__be64 *) (inbox + nent * 4)) =
- cpu_to_be64(virt);
+ pages[nent * 2] = cpu_to_be64(virt);
virt += 1 << lg;
}
- *((__be64 *) (inbox + nent * 4 + 2)) =
- cpu_to_be64((mthca_icm_addr(&iter) +
- (i << lg)) | (lg - 12));
+ pages[nent * 2 + 1] = cpu_to_be64((mthca_icm_addr(&iter) +
+ (i << lg)) | (lg - 12));
ts += 1 << (lg - 10);
++tc;
- if (nent == PAGE_SIZE / 16) {
- err = mthca_cmd(dev, indma, nent, 0, op,
+ if (nent == MTHCA_MAILBOX_SIZE / 16) {
+ err = mthca_cmd(dev, mailbox->dma, nent, 0, op,
CMD_TIME_CLASS_B, status);
if (err || *status)
goto out;
@@ -568,7 +623,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
}
if (nent)
- err = mthca_cmd(dev, indma, nent, 0, op,
+ err = mthca_cmd(dev, mailbox->dma, nent, 0, op,
CMD_TIME_CLASS_B, status);
switch (op) {
@@ -585,7 +640,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
}
out:
- pci_free_consistent(dev->pdev, PAGE_SIZE, inbox, indma);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
@@ -606,8 +661,8 @@ int mthca_RUN_FW(struct mthca_dev *dev, u8 *status)
int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
{
+ struct mthca_mailbox *mailbox;
u32 *outbox;
- dma_addr_t outdma;
int err = 0;
u8 lg;
@@ -625,12 +680,12 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
#define QUERY_FW_EQ_ARM_BASE_OFFSET 0x40
#define QUERY_FW_EQ_SET_CI_BASE_OFFSET 0x48
- outbox = pci_alloc_consistent(dev->pdev, QUERY_FW_OUT_SIZE, &outdma);
- if (!outbox) {
- return -ENOMEM;
- }
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
- err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_FW,
+ err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_FW,
CMD_TIME_CLASS_A, status);
if (err)
@@ -681,15 +736,15 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
}
out:
- pci_free_consistent(dev->pdev, QUERY_FW_OUT_SIZE, outbox, outdma);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status)
{
+ struct mthca_mailbox *mailbox;
u8 info;
u32 *outbox;
- dma_addr_t outdma;
int err = 0;
#define ENABLE_LAM_OUT_SIZE 0x100
@@ -700,11 +755,12 @@ int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status)
#define ENABLE_LAM_INFO_HIDDEN_FLAG (1 << 4)
#define ENABLE_LAM_INFO_ECC_MASK 0x3
- outbox = pci_alloc_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, &outdma);
- if (!outbox)
- return -ENOMEM;
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
- err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_ENABLE_LAM,
+ err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_ENABLE_LAM,
CMD_TIME_CLASS_C, status);
if (err)
@@ -733,7 +789,7 @@ int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status)
(unsigned long long) dev->ddr_end);
out:
- pci_free_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, outbox, outdma);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
@@ -744,9 +800,9 @@ int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status)
int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status)
{
+ struct mthca_mailbox *mailbox;
u8 info;
u32 *outbox;
- dma_addr_t outdma;
int err = 0;
#define QUERY_DDR_OUT_SIZE 0x100
@@ -757,11 +813,12 @@ int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status)
#define QUERY_DDR_INFO_HIDDEN_FLAG (1 << 4)
#define QUERY_DDR_INFO_ECC_MASK 0x3
- outbox = pci_alloc_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, &outdma);
- if (!outbox)
- return -ENOMEM;
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
- err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DDR,
+ err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DDR,
CMD_TIME_CLASS_A, status);
if (err)
@@ -787,15 +844,15 @@ int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status)
(unsigned long long) dev->ddr_end);
out:
- pci_free_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, outbox, outdma);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
struct mthca_dev_lim *dev_lim, u8 *status)
{
+ struct mthca_mailbox *mailbox;
u32 *outbox;
- dma_addr_t outdma;
u8 field;
u16 size;
int err;
@@ -860,11 +917,12 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
#define QUERY_DEV_LIM_LAMR_OFFSET 0x9f
#define QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET 0xa0
- outbox = pci_alloc_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, &outdma);
- if (!outbox)
- return -ENOMEM;
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
- err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DEV_LIM,
+ err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DEV_LIM,
CMD_TIME_CLASS_A, status);
if (err)
@@ -1020,15 +1078,15 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
}
out:
- pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
struct mthca_adapter *adapter, u8 *status)
{
+ struct mthca_mailbox *mailbox;
u32 *outbox;
- dma_addr_t outdma;
int err;
#define QUERY_ADAPTER_OUT_SIZE 0x100
@@ -1037,23 +1095,24 @@ int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
#define QUERY_ADAPTER_REVISION_ID_OFFSET 0x08
#define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10
- outbox = pci_alloc_consistent(dev->pdev, QUERY_ADAPTER_OUT_SIZE, &outdma);
- if (!outbox)
- return -ENOMEM;
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
- err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_ADAPTER,
+ err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_ADAPTER,
CMD_TIME_CLASS_A, status);
if (err)
goto out;
- MTHCA_GET(adapter->vendor_id, outbox, QUERY_ADAPTER_VENDOR_ID_OFFSET);
- MTHCA_GET(adapter->device_id, outbox, QUERY_ADAPTER_DEVICE_ID_OFFSET);
+ MTHCA_GET(adapter->vendor_id, outbox, QUERY_ADAPTER_VENDOR_ID_OFFSET);
+ MTHCA_GET(adapter->device_id, outbox, QUERY_ADAPTER_DEVICE_ID_OFFSET);
MTHCA_GET(adapter->revision_id, outbox, QUERY_ADAPTER_REVISION_ID_OFFSET);
- MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET);
+ MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET);
out:
- pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
@@ -1061,8 +1120,8 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
struct mthca_init_hca_param *param,
u8 *status)
{
+ struct mthca_mailbox *mailbox;
u32 *inbox;
- dma_addr_t indma;
int err;
#define INIT_HCA_IN_SIZE 0x200
@@ -1102,9 +1161,10 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
#define INIT_HCA_UAR_SCATCH_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x10)
#define INIT_HCA_UAR_CTX_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x18)
- inbox = pci_alloc_consistent(dev->pdev, INIT_HCA_IN_SIZE, &indma);
- if (!inbox)
- return -ENOMEM;
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ inbox = mailbox->buf;
memset(inbox, 0, INIT_HCA_IN_SIZE);
@@ -1167,10 +1227,9 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
MTHCA_PUT(inbox, param->uarc_base, INIT_HCA_UAR_CTX_BASE_OFFSET);
}
- err = mthca_cmd(dev, indma, 0, 0, CMD_INIT_HCA,
- HZ, status);
+ err = mthca_cmd(dev, mailbox->dma, 0, 0, CMD_INIT_HCA, HZ, status);
- pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
@@ -1178,8 +1237,8 @@ int mthca_INIT_IB(struct mthca_dev *dev,
struct mthca_init_ib_param *param,
int port, u8 *status)
{
+ struct mthca_mailbox *mailbox;
u32 *inbox;
- dma_addr_t indma;
int err;
u32 flags;
@@ -1199,9 +1258,10 @@ int mthca_INIT_IB(struct mthca_dev *dev,
#define INIT_IB_NODE_GUID_OFFSET 0x18
#define INIT_IB_SI_GUID_OFFSET 0x20
- inbox = pci_alloc_consistent(dev->pdev, INIT_IB_IN_SIZE, &indma);
- if (!inbox)
- return -ENOMEM;
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ inbox = mailbox->buf;
memset(inbox, 0, INIT_IB_IN_SIZE);
@@ -1221,10 +1281,10 @@ int mthca_INIT_IB(struct mthca_dev *dev,
MTHCA_PUT(inbox, param->node_guid, INIT_IB_NODE_GUID_OFFSET);
MTHCA_PUT(inbox, param->si_guid, INIT_IB_SI_GUID_OFFSET);
- err = mthca_cmd(dev, indma, port, 0, CMD_INIT_IB,
+ err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_INIT_IB,
CMD_TIME_CLASS_A, status);
- pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
@@ -1241,8 +1301,8 @@ int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status)
int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
int port, u8 *status)
{
+ struct mthca_mailbox *mailbox;
u32 *inbox;
- dma_addr_t indma;
int err;
u32 flags = 0;
@@ -1253,9 +1313,10 @@ int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
#define SET_IB_CAP_MASK_OFFSET 0x04
#define SET_IB_SI_GUID_OFFSET 0x08
- inbox = pci_alloc_consistent(dev->pdev, SET_IB_IN_SIZE, &indma);
- if (!inbox)
- return -ENOMEM;
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ inbox = mailbox->buf;
memset(inbox, 0, SET_IB_IN_SIZE);
@@ -1266,10 +1327,10 @@ int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
MTHCA_PUT(inbox, param->cap_mask, SET_IB_CAP_MASK_OFFSET);
MTHCA_PUT(inbox, param->si_guid, SET_IB_SI_GUID_OFFSET);
- err = mthca_cmd(dev, indma, port, 0, CMD_SET_IB,
+ err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_SET_IB,
CMD_TIME_CLASS_B, status);
- pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
@@ -1280,20 +1341,22 @@ int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *st
int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status)
{
+ struct mthca_mailbox *mailbox;
u64 *inbox;
- dma_addr_t indma;
int err;
- inbox = pci_alloc_consistent(dev->pdev, 16, &indma);
- if (!inbox)
- return -ENOMEM;
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ inbox = mailbox->buf;
inbox[0] = cpu_to_be64(virt);
inbox[1] = cpu_to_be64(dma_addr);
- err = mthca_cmd(dev, indma, 1, 0, CMD_MAP_ICM, CMD_TIME_CLASS_B, status);
+ err = mthca_cmd(dev, mailbox->dma, 1, 0, CMD_MAP_ICM,
+ CMD_TIME_CLASS_B, status);
- pci_free_consistent(dev->pdev, 16, inbox, indma);
+ mthca_free_mailbox(dev, mailbox);
if (!err)
mthca_dbg(dev, "Mapped page at %llx to %llx for ICM.\n",
@@ -1338,69 +1401,26 @@ int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
return 0;
}
-int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry,
+int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int mpt_index, u8 *status)
{
- dma_addr_t indma;
- int err;
-
- indma = pci_map_single(dev->pdev, mpt_entry,
- MTHCA_MPT_ENTRY_SIZE,
- PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(indma))
- return -ENOMEM;
-
- err = mthca_cmd(dev, indma, mpt_index, 0, CMD_SW2HW_MPT,
- CMD_TIME_CLASS_B, status);
-
- pci_unmap_single(dev->pdev, indma,
- MTHCA_MPT_ENTRY_SIZE, PCI_DMA_TODEVICE);
- return err;
+ return mthca_cmd(dev, mailbox->dma, mpt_index, 0, CMD_SW2HW_MPT,
+ CMD_TIME_CLASS_B, status);
}
-int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry,
+int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int mpt_index, u8 *status)
{
- dma_addr_t outdma = 0;
- int err;
-
- if (mpt_entry) {
- outdma = pci_map_single(dev->pdev, mpt_entry,
- MTHCA_MPT_ENTRY_SIZE,
- PCI_DMA_FROMDEVICE);
- if (pci_dma_mapping_error(outdma))
- return -ENOMEM;
- }
-
- err = mthca_cmd_box(dev, 0, outdma, mpt_index, !mpt_entry,
- CMD_HW2SW_MPT,
- CMD_TIME_CLASS_B, status);
-
- if (mpt_entry)
- pci_unmap_single(dev->pdev, outdma,
- MTHCA_MPT_ENTRY_SIZE,
- PCI_DMA_FROMDEVICE);
- return err;
+ return mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index,
+ !mailbox, CMD_HW2SW_MPT,
+ CMD_TIME_CLASS_B, status);
}
-int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry,
+int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int num_mtt, u8 *status)
{
- dma_addr_t indma;
- int err;
-
- indma = pci_map_single(dev->pdev, mtt_entry,
- (num_mtt + 2) * 8,
- PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(indma))
- return -ENOMEM;
-
- err = mthca_cmd(dev, indma, num_mtt, 0, CMD_WRITE_MTT,
- CMD_TIME_CLASS_B, status);
-
- pci_unmap_single(dev->pdev, indma,
- (num_mtt + 2) * 8, PCI_DMA_TODEVICE);
- return err;
+ return mthca_cmd(dev, mailbox->dma, num_mtt, 0, CMD_WRITE_MTT,
+ CMD_TIME_CLASS_B, status);
}
int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status)
@@ -1418,92 +1438,38 @@ int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
0, CMD_MAP_EQ, CMD_TIME_CLASS_B, status);
}
-int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context,
+int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int eq_num, u8 *status)
{
- dma_addr_t indma;
- int err;
-
- indma = pci_map_single(dev->pdev, eq_context,
- MTHCA_EQ_CONTEXT_SIZE,
- PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(indma))
- return -ENOMEM;
-
- err = mthca_cmd(dev, indma, eq_num, 0, CMD_SW2HW_EQ,
- CMD_TIME_CLASS_A, status);
-
- pci_unmap_single(dev->pdev, indma,
- MTHCA_EQ_CONTEXT_SIZE, PCI_DMA_TODEVICE);
- return err;
+ return mthca_cmd(dev, mailbox->dma, eq_num, 0, CMD_SW2HW_EQ,
+ CMD_TIME_CLASS_A, status);
}
-int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context,
+int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int eq_num, u8 *status)
{
- dma_addr_t outdma = 0;
- int err;
-
- outdma = pci_map_single(dev->pdev, eq_context,
- MTHCA_EQ_CONTEXT_SIZE,
- PCI_DMA_FROMDEVICE);
- if (pci_dma_mapping_error(outdma))
- return -ENOMEM;
-
- err = mthca_cmd_box(dev, 0, outdma, eq_num, 0,
- CMD_HW2SW_EQ,
- CMD_TIME_CLASS_A, status);
-
- pci_unmap_single(dev->pdev, outdma,
- MTHCA_EQ_CONTEXT_SIZE,
- PCI_DMA_FROMDEVICE);
- return err;
+ return mthca_cmd_box(dev, 0, mailbox->dma, eq_num, 0,
+ CMD_HW2SW_EQ,
+ CMD_TIME_CLASS_A, status);
}
-int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context,
+int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int cq_num, u8 *status)
{
- dma_addr_t indma;
- int err;
-
- indma = pci_map_single(dev->pdev, cq_context,
- MTHCA_CQ_CONTEXT_SIZE,
- PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(indma))
- return -ENOMEM;
-
- err = mthca_cmd(dev, indma, cq_num, 0, CMD_SW2HW_CQ,
+ return mthca_cmd(dev, mailbox->dma, cq_num, 0, CMD_SW2HW_CQ,
CMD_TIME_CLASS_A, status);
-
- pci_unmap_single(dev->pdev, indma,
- MTHCA_CQ_CONTEXT_SIZE, PCI_DMA_TODEVICE);
- return err;
}
-int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context,
+int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int cq_num, u8 *status)
{
- dma_addr_t outdma = 0;
- int err;
-
- outdma = pci_map_single(dev->pdev, cq_context,
- MTHCA_CQ_CONTEXT_SIZE,
- PCI_DMA_FROMDEVICE);
- if (pci_dma_mapping_error(outdma))
- return -ENOMEM;
-
- err = mthca_cmd_box(dev, 0, outdma, cq_num, 0,
- CMD_HW2SW_CQ,
- CMD_TIME_CLASS_A, status);
-
- pci_unmap_single(dev->pdev, outdma,
- MTHCA_CQ_CONTEXT_SIZE,
- PCI_DMA_FROMDEVICE);
- return err;
+ return mthca_cmd_box(dev, 0, mailbox->dma, cq_num, 0,
+ CMD_HW2SW_CQ,
+ CMD_TIME_CLASS_A, status);
}
int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
- int is_ee, void *qp_context, u32 optmask,
+ int is_ee, struct mthca_mailbox *mailbox, u32 optmask,
u8 *status)
{
static const u16 op[] = {
@@ -1520,36 +1486,34 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
[MTHCA_TRANS_ANY2RST] = CMD_ERR2RST_QPEE
};
u8 op_mod = 0;
-
- dma_addr_t indma;
+ int my_mailbox = 0;
int err;
if (trans < 0 || trans >= ARRAY_SIZE(op))
return -EINVAL;
if (trans == MTHCA_TRANS_ANY2RST) {
- indma = 0;
op_mod = 3; /* don't write outbox, any->reset */
/* For debugging */
- qp_context = pci_alloc_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE,
- &indma);
- op_mod = 2; /* write outbox, any->reset */
+ if (!mailbox) {
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (!IS_ERR(mailbox)) {
+ my_mailbox = 1;
+ op_mod = 2; /* write outbox, any->reset */
+ } else
+ mailbox = NULL;
+ }
} else {
- indma = pci_map_single(dev->pdev, qp_context,
- MTHCA_QP_CONTEXT_SIZE,
- PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(indma))
- return -ENOMEM;
-
if (0) {
int i;
mthca_dbg(dev, "Dumping QP context:\n");
- printk(" opt param mask: %08x\n", be32_to_cpup(qp_context));
+ printk(" opt param mask: %08x\n", be32_to_cpup(mailbox->buf));
for (i = 0; i < 0x100 / 4; ++i) {
if (i % 8 == 0)
printk(" [%02x] ", i * 4);
- printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2]));
+ printk(" %08x",
+ be32_to_cpu(((u32 *) mailbox->buf)[i + 2]));
if ((i + 1) % 8 == 0)
printk("\n");
}
@@ -1557,55 +1521,39 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
}
if (trans == MTHCA_TRANS_ANY2RST) {
- err = mthca_cmd_box(dev, 0, indma, (!!is_ee << 24) | num,
- op_mod, op[trans], CMD_TIME_CLASS_C, status);
+ err = mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0,
+ (!!is_ee << 24) | num, op_mod,
+ op[trans], CMD_TIME_CLASS_C, status);
- if (0) {
+ if (0 && mailbox) {
int i;
mthca_dbg(dev, "Dumping QP context:\n");
- printk(" %08x\n", be32_to_cpup(qp_context));
+ printk(" %08x\n", be32_to_cpup(mailbox->buf));
for (i = 0; i < 0x100 / 4; ++i) {
if (i % 8 == 0)
printk("[%02x] ", i * 4);
- printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2]));
+ printk(" %08x",
+ be32_to_cpu(((u32 *) mailbox->buf)[i + 2]));
if ((i + 1) % 8 == 0)
printk("\n");
}
}
} else
- err = mthca_cmd(dev, indma, (!!is_ee << 24) | num,
+ err = mthca_cmd(dev, mailbox->dma, (!!is_ee << 24) | num,
op_mod, op[trans], CMD_TIME_CLASS_C, status);
- if (trans != MTHCA_TRANS_ANY2RST)
- pci_unmap_single(dev->pdev, indma,
- MTHCA_QP_CONTEXT_SIZE, PCI_DMA_TODEVICE);
- else
- pci_free_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE,
- qp_context, indma);
+ if (my_mailbox)
+ mthca_free_mailbox(dev, mailbox);
+
return err;
}
int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
- void *qp_context, u8 *status)
+ struct mthca_mailbox *mailbox, u8 *status)
{
- dma_addr_t outdma = 0;
- int err;
-
- outdma = pci_map_single(dev->pdev, qp_context,
- MTHCA_QP_CONTEXT_SIZE,
- PCI_DMA_FROMDEVICE);
- if (pci_dma_mapping_error(outdma))
- return -ENOMEM;
-
- err = mthca_cmd_box(dev, 0, outdma, (!!is_ee << 24) | num, 0,
- CMD_QUERY_QPEE,
- CMD_TIME_CLASS_A, status);
-
- pci_unmap_single(dev->pdev, outdma,
- MTHCA_QP_CONTEXT_SIZE,
- PCI_DMA_FROMDEVICE);
- return err;
+ return mthca_cmd_box(dev, 0, mailbox->dma, (!!is_ee << 24) | num, 0,
+ CMD_QUERY_QPEE, CMD_TIME_CLASS_A, status);
}
int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
@@ -1635,11 +1583,11 @@ int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
}
int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
- int port, struct ib_wc* in_wc, struct ib_grh* in_grh,
+ int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
void *in_mad, void *response_mad, u8 *status)
{
- void *box;
- dma_addr_t dma;
+ struct mthca_mailbox *inmailbox, *outmailbox;
+ void *inbox;
int err;
u32 in_modifier = port;
u8 op_modifier = 0;
@@ -1653,11 +1601,18 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
#define MAD_IFC_PKEY_OFFSET 0x10e
#define MAD_IFC_GRH_OFFSET 0x140
- box = pci_alloc_consistent(dev->pdev, MAD_IFC_BOX_SIZE, &dma);
- if (!box)
- return -ENOMEM;
+ inmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(inmailbox))
+ return PTR_ERR(inmailbox);
+ inbox = inmailbox->buf;
- memcpy(box, in_mad, 256);
+ outmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(outmailbox)) {
+ mthca_free_mailbox(dev, inmailbox);
+ return PTR_ERR(outmailbox);
+ }
+
+ memcpy(inbox, in_mad, 256);
/*
* Key check traps can't be generated unless we have in_wc to
@@ -1671,97 +1626,65 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
if (in_wc) {
u8 val;
- memset(box + 256, 0, 256);
+ memset(inbox + 256, 0, 256);
- MTHCA_PUT(box, in_wc->qp_num, MAD_IFC_MY_QPN_OFFSET);
- MTHCA_PUT(box, in_wc->src_qp, MAD_IFC_RQPN_OFFSET);
+ MTHCA_PUT(inbox, in_wc->qp_num, MAD_IFC_MY_QPN_OFFSET);
+ MTHCA_PUT(inbox, in_wc->src_qp, MAD_IFC_RQPN_OFFSET);
val = in_wc->sl << 4;
- MTHCA_PUT(box, val, MAD_IFC_SL_OFFSET);
+ MTHCA_PUT(inbox, val, MAD_IFC_SL_OFFSET);
val = in_wc->dlid_path_bits |
(in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
- MTHCA_PUT(box, val, MAD_IFC_GRH_OFFSET);
+ MTHCA_PUT(inbox, val, MAD_IFC_GRH_OFFSET);
- MTHCA_PUT(box, in_wc->slid, MAD_IFC_RLID_OFFSET);
- MTHCA_PUT(box, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET);
+ MTHCA_PUT(inbox, in_wc->slid, MAD_IFC_RLID_OFFSET);
+ MTHCA_PUT(inbox, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET);
if (in_grh)
- memcpy((u8 *) box + MAD_IFC_GRH_OFFSET, in_grh, 40);
+ memcpy(inbox + MAD_IFC_GRH_OFFSET, in_grh, 40);
op_modifier |= 0x10;
in_modifier |= in_wc->slid << 16;
}
- err = mthca_cmd_box(dev, dma, dma + 512, in_modifier, op_modifier,
+ err = mthca_cmd_box(dev, inmailbox->dma, outmailbox->dma,
+ in_modifier, op_modifier,
CMD_MAD_IFC, CMD_TIME_CLASS_C, status);
if (!err && !*status)
- memcpy(response_mad, box + 512, 256);
+ memcpy(response_mad, outmailbox->buf, 256);
- pci_free_consistent(dev->pdev, MAD_IFC_BOX_SIZE, box, dma);
+ mthca_free_mailbox(dev, inmailbox);
+ mthca_free_mailbox(dev, outmailbox);
return err;
}
-int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm,
- u8 *status)
+int mthca_READ_MGM(struct mthca_dev *dev, int index,
+ struct mthca_mailbox *mailbox, u8 *status)
{
- dma_addr_t outdma = 0;
- int err;
-
- outdma = pci_map_single(dev->pdev, mgm,
- MTHCA_MGM_ENTRY_SIZE,
- PCI_DMA_FROMDEVICE);
- if (pci_dma_mapping_error(outdma))
- return -ENOMEM;
-
- err = mthca_cmd_box(dev, 0, outdma, index, 0,
- CMD_READ_MGM,
- CMD_TIME_CLASS_A, status);
-
- pci_unmap_single(dev->pdev, outdma,
- MTHCA_MGM_ENTRY_SIZE,
- PCI_DMA_FROMDEVICE);
- return err;
+ return mthca_cmd_box(dev, 0, mailbox->dma, index, 0,
+ CMD_READ_MGM, CMD_TIME_CLASS_A, status);
}
-int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm,
- u8 *status)
+int mthca_WRITE_MGM(struct mthca_dev *dev, int index,
+ struct mthca_mailbox *mailbox, u8 *status)
{
- dma_addr_t indma;
- int err;
-
- indma = pci_map_single(dev->pdev, mgm,
- MTHCA_MGM_ENTRY_SIZE,
- PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(indma))
- return -ENOMEM;
-
- err = mthca_cmd(dev, indma, index, 0, CMD_WRITE_MGM,
- CMD_TIME_CLASS_A, status);
-
- pci_unmap_single(dev->pdev, indma,
- MTHCA_MGM_ENTRY_SIZE, PCI_DMA_TODEVICE);
- return err;
+ return mthca_cmd(dev, mailbox->dma, index, 0, CMD_WRITE_MGM,
+ CMD_TIME_CLASS_A, status);
}
-int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash,
- u8 *status)
+int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ u16 *hash, u8 *status)
{
- dma_addr_t indma;
u64 imm;
int err;
- indma = pci_map_single(dev->pdev, gid, 16, PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(indma))
- return -ENOMEM;
-
- err = mthca_cmd_imm(dev, indma, &imm, 0, 0, CMD_MGID_HASH,
+ err = mthca_cmd_imm(dev, mailbox->dma, &imm, 0, 0, CMD_MGID_HASH,
CMD_TIME_CLASS_A, status);
- *hash = imm;
- pci_unmap_single(dev->pdev, indma, 16, PCI_DMA_TODEVICE);
+ *hash = imm;
return err;
}
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h
index adf039b3c54..ed517f175dd 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.h
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.h
@@ -37,8 +37,7 @@
#include <ib_verbs.h>
-#define MTHCA_CMD_MAILBOX_ALIGN 16UL
-#define MTHCA_CMD_MAILBOX_EXTRA (MTHCA_CMD_MAILBOX_ALIGN - 1)
+#define MTHCA_MAILBOX_SIZE 4096
enum {
/* command completed successfully: */
@@ -112,6 +111,11 @@ enum {
DEV_LIM_FLAG_UD_MULTI = 1 << 21,
};
+struct mthca_mailbox {
+ dma_addr_t dma;
+ void *buf;
+};
+
struct mthca_dev_lim {
int max_srq_sz;
int max_qp_sz;
@@ -235,11 +239,17 @@ struct mthca_set_ib_param {
u32 cap_mask;
};
+int mthca_cmd_init(struct mthca_dev *dev);
+void mthca_cmd_cleanup(struct mthca_dev *dev);
int mthca_cmd_use_events(struct mthca_dev *dev);
void mthca_cmd_use_polling(struct mthca_dev *dev);
void mthca_cmd_event(struct mthca_dev *dev, u16 token,
u8 status, u64 out_param);
+struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
+ unsigned int gfp_mask);
+void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox);
+
int mthca_SYS_EN(struct mthca_dev *dev, u8 *status);
int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status);
int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
@@ -270,41 +280,39 @@ int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status);
int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
u8 *status);
-int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry,
+int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int mpt_index, u8 *status);
-int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry,
+int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int mpt_index, u8 *status);
-int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry,
+int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int num_mtt, u8 *status);
int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status);
int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
int eq_num, u8 *status);
-int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context,
+int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int eq_num, u8 *status);
-int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context,
+int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int eq_num, u8 *status);
-int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context,
+int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int cq_num, u8 *status);
-int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context,
+int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int cq_num, u8 *status);
int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
- int is_ee, void *qp_context, u32 optmask,
+ int is_ee, struct mthca_mailbox *mailbox, u32 optmask,
u8 *status);
int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
- void *qp_context, u8 *status);
+ struct mthca_mailbox *mailbox, u8 *status);
int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
u8 *status);
int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
- int port, struct ib_wc* in_wc, struct ib_grh* in_grh,
+ int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
void *in_mad, void *response_mad, u8 *status);
-int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm,
- u8 *status);
-int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm,
- u8 *status);
-int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash,
- u8 *status);
+int mthca_READ_MGM(struct mthca_dev *dev, int index,
+ struct mthca_mailbox *mailbox, u8 *status);
+int mthca_WRITE_MGM(struct mthca_dev *dev, int index,
+ struct mthca_mailbox *mailbox, u8 *status);
+int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ u16 *hash, u8 *status);
int mthca_NOP(struct mthca_dev *dev, u8 *status);
-#define MAILBOX_ALIGN(x) ((void *) ALIGN((unsigned long) (x), MTHCA_CMD_MAILBOX_ALIGN))
-
#endif /* MTHCA_CMD_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 2bf347b84c3..b5aea7b869f 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -1,5 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -171,6 +173,17 @@ static inline void set_cqe_hw(struct mthca_cqe *cqe)
cqe->owner = MTHCA_CQ_ENTRY_OWNER_HW;
}
+static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr)
+{
+ __be32 *cqe = cqe_ptr;
+
+ (void) cqe; /* avoid warning if mthca_dbg compiled away... */
+ mthca_dbg(dev, "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ be32_to_cpu(cqe[0]), be32_to_cpu(cqe[1]), be32_to_cpu(cqe[2]),
+ be32_to_cpu(cqe[3]), be32_to_cpu(cqe[4]), be32_to_cpu(cqe[5]),
+ be32_to_cpu(cqe[6]), be32_to_cpu(cqe[7]));
+}
+
/*
* incr is ignored in native Arbel (mem-free) mode, so cq->cons_index
* should be correct before calling update_cons_index().
@@ -280,16 +293,12 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
int dbd;
u32 new_wqe;
- if (1 && cqe->syndrome != SYNDROME_WR_FLUSH_ERR) {
- int j;
-
- mthca_dbg(dev, "%x/%d: error CQE -> QPN %06x, WQE @ %08x\n",
- cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn),
- be32_to_cpu(cqe->wqe));
-
- for (j = 0; j < 8; ++j)
- printk(KERN_DEBUG " [%2x] %08x\n",
- j * 4, be32_to_cpu(((u32 *) cqe)[j]));
+ if (cqe->syndrome == SYNDROME_LOCAL_QP_OP_ERR) {
+ mthca_dbg(dev, "local QP operation err "
+ "(QPN %06x, WQE @ %08x, CQN %06x, index %d)\n",
+ be32_to_cpu(cqe->my_qpn), be32_to_cpu(cqe->wqe),
+ cq->cqn, cq->cons_index);
+ dump_cqe(dev, cqe);
}
/*
@@ -377,15 +386,6 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
return 0;
}
-static void dump_cqe(struct mthca_cqe *cqe)
-{
- int j;
-
- for (j = 0; j < 8; ++j)
- printk(KERN_DEBUG " [%2x] %08x\n",
- j * 4, be32_to_cpu(((u32 *) cqe)[j]));
-}
-
static inline int mthca_poll_one(struct mthca_dev *dev,
struct mthca_cq *cq,
struct mthca_qp **cur_qp,
@@ -414,8 +414,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
mthca_dbg(dev, "%x/%d: CQE -> QPN %06x, WQE @ %08x\n",
cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn),
be32_to_cpu(cqe->wqe));
-
- dump_cqe(cqe);
+ dump_cqe(dev, cqe);
}
is_error = (cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
@@ -638,19 +637,19 @@ static void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq *cq)
int size;
if (cq->is_direct)
- pci_free_consistent(dev->pdev,
- (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE,
- cq->queue.direct.buf,
- pci_unmap_addr(&cq->queue.direct,
- mapping));
+ dma_free_coherent(&dev->pdev->dev,
+ (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE,
+ cq->queue.direct.buf,
+ pci_unmap_addr(&cq->queue.direct,
+ mapping));
else {
size = (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE;
for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
if (cq->queue.page_list[i].buf)
- pci_free_consistent(dev->pdev, PAGE_SIZE,
- cq->queue.page_list[i].buf,
- pci_unmap_addr(&cq->queue.page_list[i],
- mapping));
+ dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+ cq->queue.page_list[i].buf,
+ pci_unmap_addr(&cq->queue.page_list[i],
+ mapping));
kfree(cq->queue.page_list);
}
@@ -670,8 +669,8 @@ static int mthca_alloc_cq_buf(struct mthca_dev *dev, int size,
npages = 1;
shift = get_order(size) + PAGE_SHIFT;
- cq->queue.direct.buf = pci_alloc_consistent(dev->pdev,
- size, &t);
+ cq->queue.direct.buf = dma_alloc_coherent(&dev->pdev->dev,
+ size, &t, GFP_KERNEL);
if (!cq->queue.direct.buf)
return -ENOMEM;
@@ -709,7 +708,8 @@ static int mthca_alloc_cq_buf(struct mthca_dev *dev, int size,
for (i = 0; i < npages; ++i) {
cq->queue.page_list[i].buf =
- pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t);
+ dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
+ &t, GFP_KERNEL);
if (!cq->queue.page_list[i].buf)
goto err_free;
@@ -743,10 +743,11 @@ err_out:
}
int mthca_init_cq(struct mthca_dev *dev, int nent,
+ struct mthca_ucontext *ctx, u32 pdn,
struct mthca_cq *cq)
{
int size = nent * MTHCA_CQ_ENTRY_SIZE;
- void *mailbox = NULL;
+ struct mthca_mailbox *mailbox;
struct mthca_cq_context *cq_context;
int err = -ENOMEM;
u8 status;
@@ -754,45 +755,49 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
might_sleep();
- cq->ibcq.cqe = nent - 1;
+ cq->ibcq.cqe = nent - 1;
+ cq->is_kernel = !ctx;
cq->cqn = mthca_alloc(&dev->cq_table.alloc);
if (cq->cqn == -1)
return -ENOMEM;
if (mthca_is_memfree(dev)) {
- cq->arm_sn = 1;
-
err = mthca_table_get(dev, dev->cq_table.table, cq->cqn);
if (err)
goto err_out;
- err = -ENOMEM;
+ if (cq->is_kernel) {
+ cq->arm_sn = 1;
- cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI,
- cq->cqn, &cq->set_ci_db);
- if (cq->set_ci_db_index < 0)
- goto err_out_icm;
+ err = -ENOMEM;
- cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM,
- cq->cqn, &cq->arm_db);
- if (cq->arm_db_index < 0)
- goto err_out_ci;
+ cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI,
+ cq->cqn, &cq->set_ci_db);
+ if (cq->set_ci_db_index < 0)
+ goto err_out_icm;
+
+ cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM,
+ cq->cqn, &cq->arm_db);
+ if (cq->arm_db_index < 0)
+ goto err_out_ci;
+ }
}
- mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA,
- GFP_KERNEL);
- if (!mailbox)
- goto err_out_mailbox;
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ goto err_out_arm;
- cq_context = MAILBOX_ALIGN(mailbox);
+ cq_context = mailbox->buf;
- err = mthca_alloc_cq_buf(dev, size, cq);
- if (err)
- goto err_out_mailbox;
+ if (cq->is_kernel) {
+ err = mthca_alloc_cq_buf(dev, size, cq);
+ if (err)
+ goto err_out_mailbox;
- for (i = 0; i < nent; ++i)
- set_cqe_hw(get_cqe(cq, i));
+ for (i = 0; i < nent; ++i)
+ set_cqe_hw(get_cqe(cq, i));
+ }
spin_lock_init(&cq->lock);
atomic_set(&cq->refcount, 1);
@@ -803,11 +808,14 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
MTHCA_CQ_STATE_DISARMED |
MTHCA_CQ_FLAG_TR);
cq_context->start = cpu_to_be64(0);
- cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24 |
- dev->driver_uar.index);
+ cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24);
+ if (ctx)
+ cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar.index);
+ else
+ cq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn);
- cq_context->pd = cpu_to_be32(dev->driver_pd.pd_num);
+ cq_context->pd = cpu_to_be32(pdn);
cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey);
cq_context->cqn = cpu_to_be32(cq->cqn);
@@ -816,7 +824,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
cq_context->state_db = cpu_to_be32(cq->arm_db_index);
}
- err = mthca_SW2HW_CQ(dev, cq_context, cq->cqn, &status);
+ err = mthca_SW2HW_CQ(dev, mailbox, cq->cqn, &status);
if (err) {
mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err);
goto err_out_free_mr;
@@ -840,22 +848,25 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
cq->cons_index = 0;
- kfree(mailbox);
+ mthca_free_mailbox(dev, mailbox);
return 0;
err_out_free_mr:
- mthca_free_mr(dev, &cq->mr);
- mthca_free_cq_buf(dev, cq);
+ if (cq->is_kernel) {
+ mthca_free_mr(dev, &cq->mr);
+ mthca_free_cq_buf(dev, cq);
+ }
err_out_mailbox:
- kfree(mailbox);
+ mthca_free_mailbox(dev, mailbox);
- if (mthca_is_memfree(dev))
+err_out_arm:
+ if (cq->is_kernel && mthca_is_memfree(dev))
mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
err_out_ci:
- if (mthca_is_memfree(dev))
+ if (cq->is_kernel && mthca_is_memfree(dev))
mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
err_out_icm:
@@ -870,32 +881,31 @@ err_out:
void mthca_free_cq(struct mthca_dev *dev,
struct mthca_cq *cq)
{
- void *mailbox;
+ struct mthca_mailbox *mailbox;
int err;
u8 status;
might_sleep();
- mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA,
- GFP_KERNEL);
- if (!mailbox) {
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox)) {
mthca_warn(dev, "No memory for mailbox to free CQ.\n");
return;
}
- err = mthca_HW2SW_CQ(dev, MAILBOX_ALIGN(mailbox), cq->cqn, &status);
+ err = mthca_HW2SW_CQ(dev, mailbox, cq->cqn, &status);
if (err)
mthca_warn(dev, "HW2SW_CQ failed (%d)\n", err);
else if (status)
- mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n",
- status);
+ mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n", status);
if (0) {
- u32 *ctx = MAILBOX_ALIGN(mailbox);
+ u32 *ctx = mailbox->buf;
int j;
printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n",
- cq->cqn, cq->cons_index, !!next_cqe_sw(cq));
+ cq->cqn, cq->cons_index,
+ cq->is_kernel ? !!next_cqe_sw(cq) : 0);
for (j = 0; j < 16; ++j)
printk(KERN_ERR "[%2x] %08x\n", j * 4, be32_to_cpu(ctx[j]));
}
@@ -913,17 +923,18 @@ void mthca_free_cq(struct mthca_dev *dev,
atomic_dec(&cq->refcount);
wait_event(cq->wait, !atomic_read(&cq->refcount));
- mthca_free_mr(dev, &cq->mr);
- mthca_free_cq_buf(dev, cq);
-
- if (mthca_is_memfree(dev)) {
- mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
- mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
- mthca_table_put(dev, dev->cq_table.table, cq->cqn);
+ if (cq->is_kernel) {
+ mthca_free_mr(dev, &cq->mr);
+ mthca_free_cq_buf(dev, cq);
+ if (mthca_is_memfree(dev)) {
+ mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
+ mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
+ }
}
+ mthca_table_put(dev, dev->cq_table.table, cq->cqn);
mthca_free(&dev->cq_table.alloc, cq->cqn);
- kfree(mailbox);
+ mthca_free_mailbox(dev, mailbox);
}
int __devinit mthca_init_cq_table(struct mthca_dev *dev)
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index e3d79e267dc..5ecdd2eeeb0 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -1,5 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -46,8 +48,8 @@
#define DRV_NAME "ib_mthca"
#define PFX DRV_NAME ": "
-#define DRV_VERSION "0.06-pre"
-#define DRV_RELDATE "November 8, 2004"
+#define DRV_VERSION "0.06"
+#define DRV_RELDATE "June 23, 2005"
enum {
MTHCA_FLAG_DDR_HIDDEN = 1 << 1,
@@ -98,6 +100,7 @@ enum {
};
struct mthca_cmd {
+ struct pci_pool *pool;
int use_events;
struct semaphore hcr_sem;
struct semaphore poll_sem;
@@ -376,9 +379,15 @@ void mthca_unregister_device(struct mthca_dev *dev);
int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
-int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd);
+int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd);
void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
+struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size);
+void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt);
+int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
+ int start_index, u64 *buffer_list, int list_len);
+int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
+ u64 iova, u64 total_size, u32 access, struct mthca_mr *mr);
int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
u32 access, struct mthca_mr *mr);
int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
@@ -405,6 +414,7 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
int mthca_init_cq(struct mthca_dev *dev, int nent,
+ struct mthca_ucontext *ctx, u32 pdn,
struct mthca_cq *cq);
void mthca_free_cq(struct mthca_dev *dev,
struct mthca_cq *cq);
@@ -430,12 +440,14 @@ int mthca_alloc_qp(struct mthca_dev *dev,
struct mthca_cq *recv_cq,
enum ib_qp_type type,
enum ib_sig_type send_policy,
+ struct ib_qp_cap *cap,
struct mthca_qp *qp);
int mthca_alloc_sqp(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_cq *send_cq,
struct mthca_cq *recv_cq,
enum ib_sig_type send_policy,
+ struct ib_qp_cap *cap,
int qpn,
int port,
struct mthca_sqp *sqp);
diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h
index 821039a4904..535fad7710f 100644
--- a/drivers/infiniband/hw/mthca/mthca_doorbell.h
+++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index f46d615d396..cbcf2b4722e 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -469,7 +469,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
PAGE_SIZE;
u64 *dma_list = NULL;
dma_addr_t t;
- void *mailbox = NULL;
+ struct mthca_mailbox *mailbox;
struct mthca_eq_context *eq_context;
int err = -ENOMEM;
int i;
@@ -494,17 +494,16 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
if (!dma_list)
goto err_out_free;
- mailbox = kmalloc(sizeof *eq_context + MTHCA_CMD_MAILBOX_EXTRA,
- GFP_KERNEL);
- if (!mailbox)
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
goto err_out_free;
- eq_context = MAILBOX_ALIGN(mailbox);
+ eq_context = mailbox->buf;
for (i = 0; i < npages; ++i) {
- eq->page_list[i].buf = pci_alloc_consistent(dev->pdev,
- PAGE_SIZE, &t);
+ eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev,
+ PAGE_SIZE, &t, GFP_KERNEL);
if (!eq->page_list[i].buf)
- goto err_out_free;
+ goto err_out_free_pages;
dma_list[i] = t;
pci_unmap_addr_set(&eq->page_list[i], mapping, t);
@@ -517,7 +516,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
eq->eqn = mthca_alloc(&dev->eq_table.alloc);
if (eq->eqn == -1)
- goto err_out_free;
+ goto err_out_free_pages;
err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
dma_list, PAGE_SHIFT, npages,
@@ -548,7 +547,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
eq_context->intr = intr;
eq_context->lkey = cpu_to_be32(eq->mr.ibmr.lkey);
- err = mthca_SW2HW_EQ(dev, eq_context, eq->eqn, &status);
+ err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn, &status);
if (err) {
mthca_warn(dev, "SW2HW_EQ failed (%d)\n", err);
goto err_out_free_mr;
@@ -561,7 +560,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
}
kfree(dma_list);
- kfree(mailbox);
+ mthca_free_mailbox(dev, mailbox);
eq->eqn_mask = swab32(1 << eq->eqn);
eq->cons_index = 0;
@@ -579,17 +578,19 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
err_out_free_eq:
mthca_free(&dev->eq_table.alloc, eq->eqn);
- err_out_free:
+ err_out_free_pages:
for (i = 0; i < npages; ++i)
if (eq->page_list[i].buf)
- pci_free_consistent(dev->pdev, PAGE_SIZE,
- eq->page_list[i].buf,
- pci_unmap_addr(&eq->page_list[i],
- mapping));
+ dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+ eq->page_list[i].buf,
+ pci_unmap_addr(&eq->page_list[i],
+ mapping));
+
+ mthca_free_mailbox(dev, mailbox);
+ err_out_free:
kfree(eq->page_list);
kfree(dma_list);
- kfree(mailbox);
err_out:
return err;
@@ -598,25 +599,22 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
static void mthca_free_eq(struct mthca_dev *dev,
struct mthca_eq *eq)
{
- void *mailbox = NULL;
+ struct mthca_mailbox *mailbox;
int err;
u8 status;
int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
PAGE_SIZE;
int i;
- mailbox = kmalloc(sizeof (struct mthca_eq_context) + MTHCA_CMD_MAILBOX_EXTRA,
- GFP_KERNEL);
- if (!mailbox)
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
return;
- err = mthca_HW2SW_EQ(dev, MAILBOX_ALIGN(mailbox),
- eq->eqn, &status);
+ err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn, &status);
if (err)
mthca_warn(dev, "HW2SW_EQ failed (%d)\n", err);
if (status)
- mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n",
- status);
+ mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n", status);
dev->eq_table.arm_mask &= ~eq->eqn_mask;
@@ -625,7 +623,7 @@ static void mthca_free_eq(struct mthca_dev *dev,
for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) {
if (i % 4 == 0)
printk("[%02x] ", i * 4);
- printk(" %08x", be32_to_cpup(MAILBOX_ALIGN(mailbox) + i * 4));
+ printk(" %08x", be32_to_cpup(mailbox->buf + i * 4));
if ((i + 1) % 4 == 0)
printk("\n");
}
@@ -638,7 +636,7 @@ static void mthca_free_eq(struct mthca_dev *dev,
pci_unmap_addr(&eq->page_list[i], mapping));
kfree(eq->page_list);
- kfree(mailbox);
+ mthca_free_mailbox(dev, mailbox);
}
static void mthca_free_irqs(struct mthca_dev *dev)
@@ -709,8 +707,7 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
if (mthca_map_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
dev->fw.arbel.eq_arm_base) + 4, 4,
&dev->eq_regs.arbel.eq_arm)) {
- mthca_err(dev, "Couldn't map interrupt clear register, "
- "aborting.\n");
+ mthca_err(dev, "Couldn't map EQ arm register, aborting.\n");
mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
dev->clr_base);
@@ -721,8 +718,7 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
dev->fw.arbel.eq_set_ci_base,
MTHCA_EQ_SET_CI_SIZE,
&dev->eq_regs.arbel.eq_set_ci_base)) {
- mthca_err(dev, "Couldn't map interrupt clear register, "
- "aborting.\n");
+ mthca_err(dev, "Couldn't map EQ CI register, aborting.\n");
mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
dev->fw.arbel.eq_arm_base) + 4, 4,
dev->eq_regs.arbel.eq_arm);
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index d40590356df..2ef916859e1 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -69,7 +70,7 @@ MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero");
#endif /* CONFIG_PCI_MSI */
static const char mthca_version[] __devinitdata =
- "ib_mthca: Mellanox InfiniBand HCA driver v"
+ DRV_NAME ": Mellanox InfiniBand HCA driver v"
DRV_VERSION " (" DRV_RELDATE ")\n";
static struct mthca_profile default_profile = {
@@ -664,7 +665,7 @@ static int __devinit mthca_setup_hca(struct mthca_dev *dev)
goto err_pd_table_free;
}
- err = mthca_pd_alloc(dev, &dev->driver_pd);
+ err = mthca_pd_alloc(dev, 1, &dev->driver_pd);
if (err) {
mthca_err(dev, "Failed to create driver PD, "
"aborting.\n");
@@ -927,13 +928,13 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
*/
if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
pci_resource_len(pdev, 0) != 1 << 20) {
- dev_err(&pdev->dev, "Missing DCS, aborting.");
+ dev_err(&pdev->dev, "Missing DCS, aborting.\n");
err = -ENODEV;
goto err_disable_pdev;
}
if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM) ||
pci_resource_len(pdev, 2) != 1 << 23) {
- dev_err(&pdev->dev, "Missing UAR, aborting.");
+ dev_err(&pdev->dev, "Missing UAR, aborting.\n");
err = -ENODEV;
goto err_disable_pdev;
}
@@ -1004,25 +1005,18 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
!pci_enable_msi(pdev))
mdev->mthca_flags |= MTHCA_FLAG_MSI;
- sema_init(&mdev->cmd.hcr_sem, 1);
- sema_init(&mdev->cmd.poll_sem, 1);
- mdev->cmd.use_events = 0;
-
- mdev->hcr = ioremap(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE, MTHCA_HCR_SIZE);
- if (!mdev->hcr) {
- mthca_err(mdev, "Couldn't map command register, "
- "aborting.\n");
- err = -ENOMEM;
+ if (mthca_cmd_init(mdev)) {
+ mthca_err(mdev, "Failed to init command interface, aborting.\n");
goto err_free_dev;
}
err = mthca_tune_pci(mdev);
if (err)
- goto err_iounmap;
+ goto err_cmd;
err = mthca_init_hca(mdev);
if (err)
- goto err_iounmap;
+ goto err_cmd;
if (mdev->fw_ver < mthca_hca_table[id->driver_data].latest_fw) {
mthca_warn(mdev, "HCA FW version %x.%x.%x is old (%x.%x.%x is current).\n",
@@ -1070,8 +1064,8 @@ err_cleanup:
err_close:
mthca_close_hca(mdev);
-err_iounmap:
- iounmap(mdev->hcr);
+err_cmd:
+ mthca_cmd_cleanup(mdev);
err_free_dev:
if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
@@ -1118,10 +1112,8 @@ static void __devexit mthca_remove_one(struct pci_dev *pdev)
iounmap(mdev->kar);
mthca_uar_free(mdev, &mdev->driver_uar);
mthca_cleanup_uar_table(mdev);
-
mthca_close_hca(mdev);
-
- iounmap(mdev->hcr);
+ mthca_cmd_cleanup(mdev);
if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
pci_disable_msix(pdev);
@@ -1163,7 +1155,7 @@ static struct pci_device_id mthca_pci_table[] = {
MODULE_DEVICE_TABLE(pci, mthca_pci_table);
static struct pci_driver mthca_driver = {
- .name = "ib_mthca",
+ .name = DRV_NAME,
.id_table = mthca_pci_table,
.probe = mthca_init_one,
.remove = __devexit_p(mthca_remove_one)
diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c
index 70a6553a588..5be7d949dbf 100644
--- a/drivers/infiniband/hw/mthca/mthca_mcg.c
+++ b/drivers/infiniband/hw/mthca/mthca_mcg.c
@@ -66,22 +66,23 @@ static const u8 zero_gid[16]; /* automatically initialized to 0 */
* entry in hash chain and *mgm holds end of hash chain.
*/
static int find_mgm(struct mthca_dev *dev,
- u8 *gid, struct mthca_mgm *mgm,
+ u8 *gid, struct mthca_mailbox *mgm_mailbox,
u16 *hash, int *prev, int *index)
{
- void *mailbox;
+ struct mthca_mailbox *mailbox;
+ struct mthca_mgm *mgm = mgm_mailbox->buf;
u8 *mgid;
int err;
u8 status;
- mailbox = kmalloc(16 + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
- if (!mailbox)
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
return -ENOMEM;
- mgid = MAILBOX_ALIGN(mailbox);
+ mgid = mailbox->buf;
memcpy(mgid, gid, 16);
- err = mthca_MGID_HASH(dev, mgid, hash, &status);
+ err = mthca_MGID_HASH(dev, mailbox, hash, &status);
if (err)
goto out;
if (status) {
@@ -103,7 +104,7 @@ static int find_mgm(struct mthca_dev *dev,
*prev = -1;
do {
- err = mthca_READ_MGM(dev, *index, mgm, &status);
+ err = mthca_READ_MGM(dev, *index, mgm_mailbox, &status);
if (err)
goto out;
if (status) {
@@ -129,14 +130,14 @@ static int find_mgm(struct mthca_dev *dev,
*index = -1;
out:
- kfree(mailbox);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mthca_dev *dev = to_mdev(ibqp->device);
- void *mailbox;
+ struct mthca_mailbox *mailbox;
struct mthca_mgm *mgm;
u16 hash;
int index, prev;
@@ -145,15 +146,15 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
int err;
u8 status;
- mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
- if (!mailbox)
- return -ENOMEM;
- mgm = MAILBOX_ALIGN(mailbox);
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ mgm = mailbox->buf;
if (down_interruptible(&dev->mcg_table.sem))
return -EINTR;
- err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index);
+ err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
if (err)
goto out;
@@ -170,7 +171,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
goto out;
}
- err = mthca_READ_MGM(dev, index, mgm, &status);
+ err = mthca_READ_MGM(dev, index, mailbox, &status);
if (err)
goto out;
if (status) {
@@ -195,7 +196,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
goto out;
}
- err = mthca_WRITE_MGM(dev, index, mgm, &status);
+ err = mthca_WRITE_MGM(dev, index, mailbox, &status);
if (err)
goto out;
if (status) {
@@ -206,7 +207,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
if (!link)
goto out;
- err = mthca_READ_MGM(dev, prev, mgm, &status);
+ err = mthca_READ_MGM(dev, prev, mailbox, &status);
if (err)
goto out;
if (status) {
@@ -217,7 +218,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
mgm->next_gid_index = cpu_to_be32(index << 5);
- err = mthca_WRITE_MGM(dev, prev, mgm, &status);
+ err = mthca_WRITE_MGM(dev, prev, mailbox, &status);
if (err)
goto out;
if (status) {
@@ -227,14 +228,14 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
out:
up(&dev->mcg_table.sem);
- kfree(mailbox);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mthca_dev *dev = to_mdev(ibqp->device);
- void *mailbox;
+ struct mthca_mailbox *mailbox;
struct mthca_mgm *mgm;
u16 hash;
int prev, index;
@@ -242,15 +243,15 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
int err;
u8 status;
- mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
- if (!mailbox)
- return -ENOMEM;
- mgm = MAILBOX_ALIGN(mailbox);
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ mgm = mailbox->buf;
if (down_interruptible(&dev->mcg_table.sem))
return -EINTR;
- err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index);
+ err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
if (err)
goto out;
@@ -285,7 +286,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
mgm->qp[loc] = mgm->qp[i - 1];
mgm->qp[i - 1] = 0;
- err = mthca_WRITE_MGM(dev, index, mgm, &status);
+ err = mthca_WRITE_MGM(dev, index, mailbox, &status);
if (err)
goto out;
if (status) {
@@ -304,7 +305,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
if (be32_to_cpu(mgm->next_gid_index) >> 5) {
err = mthca_READ_MGM(dev,
be32_to_cpu(mgm->next_gid_index) >> 5,
- mgm, &status);
+ mailbox, &status);
if (err)
goto out;
if (status) {
@@ -316,7 +317,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
} else
memset(mgm->gid, 0, 16);
- err = mthca_WRITE_MGM(dev, index, mgm, &status);
+ err = mthca_WRITE_MGM(dev, index, mailbox, &status);
if (err)
goto out;
if (status) {
@@ -327,7 +328,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
} else {
/* Remove entry from AMGM */
index = be32_to_cpu(mgm->next_gid_index) >> 5;
- err = mthca_READ_MGM(dev, prev, mgm, &status);
+ err = mthca_READ_MGM(dev, prev, mailbox, &status);
if (err)
goto out;
if (status) {
@@ -338,7 +339,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
mgm->next_gid_index = cpu_to_be32(index << 5);
- err = mthca_WRITE_MGM(dev, prev, mgm, &status);
+ err = mthca_WRITE_MGM(dev, prev, mailbox, &status);
if (err)
goto out;
if (status) {
@@ -350,7 +351,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
out:
up(&dev->mcg_table.sem);
- kfree(mailbox);
+ mthca_free_mailbox(dev, mailbox);
return err;
}
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 637b30e3559..2a864615035 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -47,6 +48,15 @@ enum {
MTHCA_TABLE_CHUNK_SIZE = 1 << 18
};
+struct mthca_user_db_table {
+ struct semaphore mutex;
+ struct {
+ u64 uvirt;
+ struct scatterlist mem;
+ int refcount;
+ } page[0];
+};
+
void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm)
{
struct mthca_icm_chunk *chunk, *tmp;
@@ -179,9 +189,14 @@ out:
void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
{
- int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
+ int i;
u8 status;
+ if (!mthca_is_memfree(dev))
+ return;
+
+ i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
+
down(&table->mutex);
if (--table->icm[i]->refcount == 0) {
@@ -256,6 +271,9 @@ void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
{
int i;
+ if (!mthca_is_memfree(dev))
+ return;
+
for (i = start; i <= end; i += MTHCA_TABLE_CHUNK_SIZE / table->obj_size)
mthca_table_put(dev, table, i);
}
@@ -336,13 +354,133 @@ void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table)
kfree(table);
}
-static u64 mthca_uarc_virt(struct mthca_dev *dev, int page)
+static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int page)
{
return dev->uar_table.uarc_base +
- dev->driver_uar.index * dev->uar_table.uarc_size +
+ uar->index * dev->uar_table.uarc_size +
page * 4096;
}
+int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab, int index, u64 uaddr)
+{
+ int ret = 0;
+ u8 status;
+ int i;
+
+ if (!mthca_is_memfree(dev))
+ return 0;
+
+ if (index < 0 || index > dev->uar_table.uarc_size / 8)
+ return -EINVAL;
+
+ down(&db_tab->mutex);
+
+ i = index / MTHCA_DB_REC_PER_PAGE;
+
+ if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE) ||
+ (db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) ||
+ (uaddr & 4095)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (db_tab->page[i].refcount) {
+ ++db_tab->page[i].refcount;
+ goto out;
+ }
+
+ ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0,
+ &db_tab->page[i].mem.page, NULL);
+ if (ret < 0)
+ goto out;
+
+ db_tab->page[i].mem.length = 4096;
+ db_tab->page[i].mem.offset = uaddr & ~PAGE_MASK;
+
+ ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+ if (ret < 0) {
+ put_page(db_tab->page[i].mem.page);
+ goto out;
+ }
+
+ ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem),
+ mthca_uarc_virt(dev, uar, i), &status);
+ if (!ret && status)
+ ret = -EINVAL;
+ if (ret) {
+ pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+ put_page(db_tab->page[i].mem.page);
+ goto out;
+ }
+
+ db_tab->page[i].uvirt = uaddr;
+ db_tab->page[i].refcount = 1;
+
+out:
+ up(&db_tab->mutex);
+ return ret;
+}
+
+void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab, int index)
+{
+ if (!mthca_is_memfree(dev))
+ return;
+
+ /*
+ * To make our bookkeeping simpler, we don't unmap DB
+ * pages until we clean up the whole db table.
+ */
+
+ down(&db_tab->mutex);
+
+ --db_tab->page[index / MTHCA_DB_REC_PER_PAGE].refcount;
+
+ up(&db_tab->mutex);
+}
+
+struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev)
+{
+ struct mthca_user_db_table *db_tab;
+ int npages;
+ int i;
+
+ if (!mthca_is_memfree(dev))
+ return NULL;
+
+ npages = dev->uar_table.uarc_size / 4096;
+ db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL);
+ if (!db_tab)
+ return ERR_PTR(-ENOMEM);
+
+ init_MUTEX(&db_tab->mutex);
+ for (i = 0; i < npages; ++i) {
+ db_tab->page[i].refcount = 0;
+ db_tab->page[i].uvirt = 0;
+ }
+
+ return db_tab;
+}
+
+void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab)
+{
+ int i;
+ u8 status;
+
+ if (!mthca_is_memfree(dev))
+ return;
+
+ for (i = 0; i < dev->uar_table.uarc_size / 4096; ++i) {
+ if (db_tab->page[i].uvirt) {
+ mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status);
+ pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+ put_page(db_tab->page[i].mem.page);
+ }
+ }
+}
+
int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db)
{
int group;
@@ -399,7 +537,8 @@ int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db)
}
memset(page->db_rec, 0, 4096);
- ret = mthca_MAP_ICM_page(dev, page->mapping, mthca_uarc_virt(dev, i), &status);
+ ret = mthca_MAP_ICM_page(dev, page->mapping,
+ mthca_uarc_virt(dev, &dev->driver_uar, i), &status);
if (!ret && status)
ret = -EINVAL;
if (ret) {
@@ -453,7 +592,7 @@ void mthca_free_db(struct mthca_dev *dev, int type, int db_index)
if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) &&
i >= dev->db_tab->max_group1 - 1) {
- mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status);
+ mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
dma_free_coherent(&dev->pdev->dev, 4096,
page->db_rec, page->mapping);
@@ -522,7 +661,7 @@ void mthca_cleanup_db_tab(struct mthca_dev *dev)
if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE))
mthca_warn(dev, "Kernel UARC page %d not empty\n", i);
- mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status);
+ mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
dma_free_coherent(&dev->pdev->dev, 4096,
dev->db_tab->page[i].db_rec,
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
index fe7be2a6bc4..4761d844cb5 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -148,7 +149,7 @@ struct mthca_db_table {
struct semaphore mutex;
};
-enum {
+enum mthca_db_type {
MTHCA_DB_TYPE_INVALID = 0x0,
MTHCA_DB_TYPE_CQ_SET_CI = 0x1,
MTHCA_DB_TYPE_CQ_ARM = 0x2,
@@ -158,6 +159,17 @@ enum {
MTHCA_DB_TYPE_GROUP_SEP = 0x7
};
+struct mthca_user_db_table;
+struct mthca_uar;
+
+int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab, int index, u64 uaddr);
+void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab, int index);
+struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev);
+void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab);
+
int mthca_init_db_tab(struct mthca_dev *dev);
void mthca_cleanup_db_tab(struct mthca_dev *dev);
int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db);
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index 8960fc2306b..cbe50feaf68 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -40,6 +40,12 @@
#include "mthca_cmd.h"
#include "mthca_memfree.h"
+struct mthca_mtt {
+ struct mthca_buddy *buddy;
+ int order;
+ u32 first_seg;
+};
+
/*
* Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
*/
@@ -173,8 +179,8 @@ static void __devexit mthca_buddy_cleanup(struct mthca_buddy *buddy)
kfree(buddy->bits);
}
-static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order,
- struct mthca_buddy *buddy)
+static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order,
+ struct mthca_buddy *buddy)
{
u32 seg = mthca_buddy_alloc(buddy, order);
@@ -191,14 +197,102 @@ static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order,
return seg;
}
-static void mthca_free_mtt(struct mthca_dev *dev, u32 seg, int order,
- struct mthca_buddy* buddy)
+static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size,
+ struct mthca_buddy *buddy)
{
- mthca_buddy_free(buddy, seg, order);
+ struct mthca_mtt *mtt;
+ int i;
- if (mthca_is_memfree(dev))
- mthca_table_put_range(dev, dev->mr_table.mtt_table, seg,
- seg + (1 << order) - 1);
+ if (size <= 0)
+ return ERR_PTR(-EINVAL);
+
+ mtt = kmalloc(sizeof *mtt, GFP_KERNEL);
+ if (!mtt)
+ return ERR_PTR(-ENOMEM);
+
+ mtt->buddy = buddy;
+ mtt->order = 0;
+ for (i = MTHCA_MTT_SEG_SIZE / 8; i < size; i <<= 1)
+ ++mtt->order;
+
+ mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy);
+ if (mtt->first_seg == -1) {
+ kfree(mtt);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return mtt;
+}
+
+struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size)
+{
+ return __mthca_alloc_mtt(dev, size, &dev->mr_table.mtt_buddy);
+}
+
+void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt)
+{
+ if (!mtt)
+ return;
+
+ mthca_buddy_free(mtt->buddy, mtt->first_seg, mtt->order);
+
+ mthca_table_put_range(dev, dev->mr_table.mtt_table,
+ mtt->first_seg,
+ mtt->first_seg + (1 << mtt->order) - 1);
+
+ kfree(mtt);
+}
+
+int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
+ int start_index, u64 *buffer_list, int list_len)
+{
+ struct mthca_mailbox *mailbox;
+ u64 *mtt_entry;
+ int err = 0;
+ u8 status;
+ int i;
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ mtt_entry = mailbox->buf;
+
+ while (list_len > 0) {
+ mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
+ mtt->first_seg * MTHCA_MTT_SEG_SIZE +
+ start_index * 8);
+ mtt_entry[1] = 0;
+ for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i)
+ mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
+ MTHCA_MTT_FLAG_PRESENT);
+
+ /*
+ * If we have an odd number of entries to write, add
+ * one more dummy entry for firmware efficiency.
+ */
+ if (i & 1)
+ mtt_entry[i + 2] = 0;
+
+ err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1, &status);
+ if (err) {
+ mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
+ goto out;
+ }
+ if (status) {
+ mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n",
+ status);
+ err = -EINVAL;
+ goto out;
+ }
+
+ list_len -= i;
+ start_index += i;
+ buffer_list += i;
+ }
+
+out:
+ mthca_free_mailbox(dev, mailbox);
+ return err;
}
static inline u32 tavor_hw_index_to_key(u32 ind)
@@ -237,91 +331,18 @@ static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
return tavor_key_to_hw_index(key);
}
-int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
- u32 access, struct mthca_mr *mr)
+int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
+ u64 iova, u64 total_size, u32 access, struct mthca_mr *mr)
{
- void *mailbox = NULL;
+ struct mthca_mailbox *mailbox;
struct mthca_mpt_entry *mpt_entry;
u32 key;
+ int i;
int err;
u8 status;
might_sleep();
- mr->order = -1;
- key = mthca_alloc(&dev->mr_table.mpt_alloc);
- if (key == -1)
- return -ENOMEM;
- mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
-
- if (mthca_is_memfree(dev)) {
- err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
- if (err)
- goto err_out_mpt_free;
- }
-
- mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
- GFP_KERNEL);
- if (!mailbox) {
- err = -ENOMEM;
- goto err_out_table;
- }
- mpt_entry = MAILBOX_ALIGN(mailbox);
-
- mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
- MTHCA_MPT_FLAG_MIO |
- MTHCA_MPT_FLAG_PHYSICAL |
- MTHCA_MPT_FLAG_REGION |
- access);
- mpt_entry->page_size = 0;
- mpt_entry->key = cpu_to_be32(key);
- mpt_entry->pd = cpu_to_be32(pd);
- mpt_entry->start = 0;
- mpt_entry->length = ~0ULL;
-
- memset(&mpt_entry->lkey, 0,
- sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
-
- err = mthca_SW2HW_MPT(dev, mpt_entry,
- key & (dev->limits.num_mpts - 1),
- &status);
- if (err) {
- mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
- goto err_out_table;
- } else if (status) {
- mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
- status);
- err = -EINVAL;
- goto err_out_table;
- }
-
- kfree(mailbox);
- return err;
-
-err_out_table:
- if (mthca_is_memfree(dev))
- mthca_table_put(dev, dev->mr_table.mpt_table, key);
-
-err_out_mpt_free:
- mthca_free(&dev->mr_table.mpt_alloc, key);
- kfree(mailbox);
- return err;
-}
-
-int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
- u64 *buffer_list, int buffer_size_shift,
- int list_len, u64 iova, u64 total_size,
- u32 access, struct mthca_mr *mr)
-{
- void *mailbox;
- u64 *mtt_entry;
- struct mthca_mpt_entry *mpt_entry;
- u32 key;
- int err = -ENOMEM;
- u8 status;
- int i;
-
- might_sleep();
WARN_ON(buffer_size_shift >= 32);
key = mthca_alloc(&dev->mr_table.mpt_alloc);
@@ -335,75 +356,33 @@ int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
goto err_out_mpt_free;
}
- for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0;
- i < list_len;
- i <<= 1, ++mr->order)
- ; /* nothing */
-
- mr->first_seg = mthca_alloc_mtt(dev, mr->order,
- &dev->mr_table.mtt_buddy);
- if (mr->first_seg == -1)
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
goto err_out_table;
-
- /*
- * If list_len is odd, we add one more dummy entry for
- * firmware efficiency.
- */
- mailbox = kmalloc(max(sizeof *mpt_entry,
- (size_t) 8 * (list_len + (list_len & 1) + 2)) +
- MTHCA_CMD_MAILBOX_EXTRA,
- GFP_KERNEL);
- if (!mailbox)
- goto err_out_free_mtt;
-
- mtt_entry = MAILBOX_ALIGN(mailbox);
-
- mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
- mr->first_seg * MTHCA_MTT_SEG_SIZE);
- mtt_entry[1] = 0;
- for (i = 0; i < list_len; ++i)
- mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
- MTHCA_MTT_FLAG_PRESENT);
- if (list_len & 1) {
- mtt_entry[i + 2] = 0;
- ++list_len;
- }
-
- if (0) {
- mthca_dbg(dev, "Dumping MPT entry\n");
- for (i = 0; i < list_len + 2; ++i)
- printk(KERN_ERR "[%2d] %016llx\n",
- i, (unsigned long long) be64_to_cpu(mtt_entry[i]));
- }
-
- err = mthca_WRITE_MTT(dev, mtt_entry, list_len, &status);
- if (err) {
- mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
- goto err_out_mailbox_free;
- }
- if (status) {
- mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n",
- status);
- err = -EINVAL;
- goto err_out_mailbox_free;
}
-
- mpt_entry = MAILBOX_ALIGN(mailbox);
+ mpt_entry = mailbox->buf;
mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
MTHCA_MPT_FLAG_MIO |
MTHCA_MPT_FLAG_REGION |
access);
+ if (!mr->mtt)
+ mpt_entry->flags |= cpu_to_be32(MTHCA_MPT_FLAG_PHYSICAL);
mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12);
mpt_entry->key = cpu_to_be32(key);
mpt_entry->pd = cpu_to_be32(pd);
mpt_entry->start = cpu_to_be64(iova);
mpt_entry->length = cpu_to_be64(total_size);
+
memset(&mpt_entry->lkey, 0,
sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
- mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base +
- mr->first_seg * MTHCA_MTT_SEG_SIZE);
+
+ if (mr->mtt)
+ mpt_entry->mtt_seg =
+ cpu_to_be64(dev->mr_table.mtt_base +
+ mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE);
if (0) {
mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
@@ -416,45 +395,70 @@ int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
}
}
- err = mthca_SW2HW_MPT(dev, mpt_entry,
+ err = mthca_SW2HW_MPT(dev, mailbox,
key & (dev->limits.num_mpts - 1),
&status);
- if (err)
+ if (err) {
mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
- else if (status) {
+ goto err_out_mailbox;
+ } else if (status) {
mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
status);
err = -EINVAL;
+ goto err_out_mailbox;
}
- kfree(mailbox);
+ mthca_free_mailbox(dev, mailbox);
return err;
-err_out_mailbox_free:
- kfree(mailbox);
-
-err_out_free_mtt:
- mthca_free_mtt(dev, mr->first_seg, mr->order, &dev->mr_table.mtt_buddy);
+err_out_mailbox:
+ mthca_free_mailbox(dev, mailbox);
err_out_table:
- if (mthca_is_memfree(dev))
- mthca_table_put(dev, dev->mr_table.mpt_table, key);
+ mthca_table_put(dev, dev->mr_table.mpt_table, key);
err_out_mpt_free:
mthca_free(&dev->mr_table.mpt_alloc, key);
return err;
}
-/* Free mr or fmr */
-static void mthca_free_region(struct mthca_dev *dev, u32 lkey, int order,
- u32 first_seg, struct mthca_buddy *buddy)
+int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
+ u32 access, struct mthca_mr *mr)
{
- if (order >= 0)
- mthca_free_mtt(dev, first_seg, order, buddy);
+ mr->mtt = NULL;
+ return mthca_mr_alloc(dev, pd, 12, 0, ~0ULL, access, mr);
+}
- if (mthca_is_memfree(dev))
- mthca_table_put(dev, dev->mr_table.mpt_table,
- arbel_key_to_hw_index(lkey));
+int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
+ u64 *buffer_list, int buffer_size_shift,
+ int list_len, u64 iova, u64 total_size,
+ u32 access, struct mthca_mr *mr)
+{
+ int err;
+
+ mr->mtt = mthca_alloc_mtt(dev, list_len);
+ if (IS_ERR(mr->mtt))
+ return PTR_ERR(mr->mtt);
+
+ err = mthca_write_mtt(dev, mr->mtt, 0, buffer_list, list_len);
+ if (err) {
+ mthca_free_mtt(dev, mr->mtt);
+ return err;
+ }
+
+ err = mthca_mr_alloc(dev, pd, buffer_size_shift, iova,
+ total_size, access, mr);
+ if (err)
+ mthca_free_mtt(dev, mr->mtt);
+
+ return err;
+}
+
+/* Free mr or fmr */
+static void mthca_free_region(struct mthca_dev *dev, u32 lkey)
+{
+ mthca_table_put(dev, dev->mr_table.mpt_table,
+ arbel_key_to_hw_index(lkey));
mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
}
@@ -476,15 +480,15 @@ void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
status);
- mthca_free_region(dev, mr->ibmr.lkey, mr->order, mr->first_seg,
- &dev->mr_table.mtt_buddy);
+ mthca_free_region(dev, mr->ibmr.lkey);
+ mthca_free_mtt(dev, mr->mtt);
}
int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
u32 access, struct mthca_fmr *mr)
{
struct mthca_mpt_entry *mpt_entry;
- void *mailbox;
+ struct mthca_mailbox *mailbox;
u64 mtt_seg;
u32 key, idx;
u8 status;
@@ -522,31 +526,24 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
sizeof *(mr->mem.tavor.mpt) * idx;
- for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0;
- i < list_len;
- i <<= 1, ++mr->order)
- ; /* nothing */
-
- mr->first_seg = mthca_alloc_mtt(dev, mr->order,
- dev->mr_table.fmr_mtt_buddy);
- if (mr->first_seg == -1)
+ mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy);
+ if (IS_ERR(mr->mtt))
goto err_out_table;
- mtt_seg = mr->first_seg * MTHCA_MTT_SEG_SIZE;
+ mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE;
if (mthca_is_memfree(dev)) {
mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
- mr->first_seg);
+ mr->mtt->first_seg);
BUG_ON(!mr->mem.arbel.mtts);
} else
mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
- mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
- GFP_KERNEL);
- if (!mailbox)
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
goto err_out_free_mtt;
- mpt_entry = MAILBOX_ALIGN(mailbox);
+ mpt_entry = mailbox->buf;
mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
MTHCA_MPT_FLAG_MIO |
@@ -571,7 +568,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
}
}
- err = mthca_SW2HW_MPT(dev, mpt_entry,
+ err = mthca_SW2HW_MPT(dev, mailbox,
key & (dev->limits.num_mpts - 1),
&status);
if (err) {
@@ -585,19 +582,17 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
goto err_out_mailbox_free;
}
- kfree(mailbox);
+ mthca_free_mailbox(dev, mailbox);
return 0;
err_out_mailbox_free:
- kfree(mailbox);
+ mthca_free_mailbox(dev, mailbox);
err_out_free_mtt:
- mthca_free_mtt(dev, mr->first_seg, mr->order,
- dev->mr_table.fmr_mtt_buddy);
+ mthca_free_mtt(dev, mr->mtt);
err_out_table:
- if (mthca_is_memfree(dev))
- mthca_table_put(dev, dev->mr_table.mpt_table, key);
+ mthca_table_put(dev, dev->mr_table.mpt_table, key);
err_out_mpt_free:
mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
@@ -609,8 +604,9 @@ int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr)
if (fmr->maps)
return -EBUSY;
- mthca_free_region(dev, fmr->ibmr.lkey, fmr->order, fmr->first_seg,
- dev->mr_table.fmr_mtt_buddy);
+ mthca_free_region(dev, fmr->ibmr.lkey);
+ mthca_free_mtt(dev, fmr->mtt);
+
return 0;
}
@@ -826,7 +822,8 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev)
if (dev->limits.reserved_mtts) {
i = fls(dev->limits.reserved_mtts - 1);
- if (mthca_alloc_mtt(dev, i, dev->mr_table.fmr_mtt_buddy) == -1) {
+ if (mthca_alloc_mtt_range(dev, i,
+ dev->mr_table.fmr_mtt_buddy) == -1) {
mthca_warn(dev, "MTT table of order %d is too small.\n",
dev->mr_table.fmr_mtt_buddy->max_order);
err = -ENOMEM;
diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c
index ea66847e4ea..c2c899844e9 100644
--- a/drivers/infiniband/hw/mthca/mthca_pd.c
+++ b/drivers/infiniband/hw/mthca/mthca_pd.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -37,23 +38,27 @@
#include "mthca_dev.h"
-int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd)
+int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd)
{
- int err;
+ int err = 0;
might_sleep();
+ pd->privileged = privileged;
+
atomic_set(&pd->sqp_count, 0);
pd->pd_num = mthca_alloc(&dev->pd_table.alloc);
if (pd->pd_num == -1)
return -ENOMEM;
- err = mthca_mr_alloc_notrans(dev, pd->pd_num,
- MTHCA_MPT_FLAG_LOCAL_READ |
- MTHCA_MPT_FLAG_LOCAL_WRITE,
- &pd->ntmr);
- if (err)
- mthca_free(&dev->pd_table.alloc, pd->pd_num);
+ if (privileged) {
+ err = mthca_mr_alloc_notrans(dev, pd->pd_num,
+ MTHCA_MPT_FLAG_LOCAL_READ |
+ MTHCA_MPT_FLAG_LOCAL_WRITE,
+ &pd->ntmr);
+ if (err)
+ mthca_free(&dev->pd_table.alloc, pd->pd_num);
+ }
return err;
}
@@ -61,7 +66,8 @@ int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd)
void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd)
{
might_sleep();
- mthca_free_mr(dev, &pd->ntmr);
+ if (pd->privileged)
+ mthca_free_mr(dev, &pd->ntmr);
mthca_free(&dev->pd_table.alloc, pd->pd_num);
}
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 159f4e6c312..7a58ce90e17 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1,5 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -33,9 +35,12 @@
*/
#include <ib_smi.h>
+#include <linux/mm.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
+#include "mthca_user.h"
+#include "mthca_memfree.h"
static int mthca_query_device(struct ib_device *ibdev,
struct ib_device_attr *props)
@@ -52,7 +57,7 @@ static int mthca_query_device(struct ib_device *ibdev,
if (!in_mad || !out_mad)
goto out;
- memset(props, 0, sizeof props);
+ memset(props, 0, sizeof *props);
props->fw_ver = mdev->fw_ver;
@@ -283,7 +288,78 @@ static int mthca_query_gid(struct ib_device *ibdev, u8 port,
return err;
}
-static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev)
+static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct mthca_alloc_ucontext_resp uresp;
+ struct mthca_ucontext *context;
+ int err;
+
+ memset(&uresp, 0, sizeof uresp);
+
+ uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
+ if (mthca_is_memfree(to_mdev(ibdev)))
+ uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size;
+ else
+ uresp.uarc_size = 0;
+
+ context = kmalloc(sizeof *context, GFP_KERNEL);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+
+ err = mthca_uar_alloc(to_mdev(ibdev), &context->uar);
+ if (err) {
+ kfree(context);
+ return ERR_PTR(err);
+ }
+
+ context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev));
+ if (IS_ERR(context->db_tab)) {
+ err = PTR_ERR(context->db_tab);
+ mthca_uar_free(to_mdev(ibdev), &context->uar);
+ kfree(context);
+ return ERR_PTR(err);
+ }
+
+ if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
+ mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab);
+ mthca_uar_free(to_mdev(ibdev), &context->uar);
+ kfree(context);
+ return ERR_PTR(-EFAULT);
+ }
+
+ return &context->ibucontext;
+}
+
+static int mthca_dealloc_ucontext(struct ib_ucontext *context)
+{
+ mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab);
+ mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar);
+ kfree(to_mucontext(context));
+
+ return 0;
+}
+
+static int mthca_mmap_uar(struct ib_ucontext *context,
+ struct vm_area_struct *vma)
+{
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ if (remap_pfn_range(vma, vma->vm_start,
+ to_mucontext(context)->uar.pfn,
+ PAGE_SIZE, vma->vm_page_prot))
+ return -EAGAIN;
+
+ return 0;
+}
+
+static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
{
struct mthca_pd *pd;
int err;
@@ -292,12 +368,20 @@ static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev)
if (!pd)
return ERR_PTR(-ENOMEM);
- err = mthca_pd_alloc(to_mdev(ibdev), pd);
+ err = mthca_pd_alloc(to_mdev(ibdev), !context, pd);
if (err) {
kfree(pd);
return ERR_PTR(err);
}
+ if (context) {
+ if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) {
+ mthca_pd_free(to_mdev(ibdev), pd);
+ kfree(pd);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+
return &pd->ibpd;
}
@@ -337,8 +421,10 @@ static int mthca_ah_destroy(struct ib_ah *ah)
}
static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr)
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
{
+ struct mthca_create_qp ucmd;
struct mthca_qp *qp;
int err;
@@ -347,41 +433,82 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
case IB_QPT_UC:
case IB_QPT_UD:
{
+ struct mthca_ucontext *context;
+
qp = kmalloc(sizeof *qp, GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
- qp->sq.max = init_attr->cap.max_send_wr;
- qp->rq.max = init_attr->cap.max_recv_wr;
- qp->sq.max_gs = init_attr->cap.max_send_sge;
- qp->rq.max_gs = init_attr->cap.max_recv_sge;
+ if (pd->uobject) {
+ context = to_mucontext(pd->uobject->context);
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
+ return ERR_PTR(-EFAULT);
+
+ err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
+ context->db_tab,
+ ucmd.sq_db_index, ucmd.sq_db_page);
+ if (err) {
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
+ context->db_tab,
+ ucmd.rq_db_index, ucmd.rq_db_page);
+ if (err) {
+ mthca_unmap_user_db(to_mdev(pd->device),
+ &context->uar,
+ context->db_tab,
+ ucmd.sq_db_index);
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ qp->mr.ibmr.lkey = ucmd.lkey;
+ qp->sq.db_index = ucmd.sq_db_index;
+ qp->rq.db_index = ucmd.rq_db_index;
+ }
err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd),
to_mcq(init_attr->send_cq),
to_mcq(init_attr->recv_cq),
init_attr->qp_type, init_attr->sq_sig_type,
- qp);
+ &init_attr->cap, qp);
+
+ if (err && pd->uobject) {
+ context = to_mucontext(pd->uobject->context);
+
+ mthca_unmap_user_db(to_mdev(pd->device),
+ &context->uar,
+ context->db_tab,
+ ucmd.sq_db_index);
+ mthca_unmap_user_db(to_mdev(pd->device),
+ &context->uar,
+ context->db_tab,
+ ucmd.rq_db_index);
+ }
+
qp->ibqp.qp_num = qp->qpn;
break;
}
case IB_QPT_SMI:
case IB_QPT_GSI:
{
+ /* Don't allow userspace to create special QPs */
+ if (pd->uobject)
+ return ERR_PTR(-EINVAL);
+
qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
- qp->sq.max = init_attr->cap.max_send_wr;
- qp->rq.max = init_attr->cap.max_recv_wr;
- qp->sq.max_gs = init_attr->cap.max_send_sge;
- qp->rq.max_gs = init_attr->cap.max_recv_sge;
-
qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd),
to_mcq(init_attr->send_cq),
to_mcq(init_attr->recv_cq),
- init_attr->sq_sig_type,
+ init_attr->sq_sig_type, &init_attr->cap,
qp->ibqp.qp_num, init_attr->port_num,
to_msqp(qp));
break;
@@ -396,42 +523,115 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
return ERR_PTR(err);
}
- init_attr->cap.max_inline_data = 0;
+ init_attr->cap.max_inline_data = 0;
+ init_attr->cap.max_send_wr = qp->sq.max;
+ init_attr->cap.max_recv_wr = qp->rq.max;
+ init_attr->cap.max_send_sge = qp->sq.max_gs;
+ init_attr->cap.max_recv_sge = qp->rq.max_gs;
return &qp->ibqp;
}
static int mthca_destroy_qp(struct ib_qp *qp)
{
+ if (qp->uobject) {
+ mthca_unmap_user_db(to_mdev(qp->device),
+ &to_mucontext(qp->uobject->context)->uar,
+ to_mucontext(qp->uobject->context)->db_tab,
+ to_mqp(qp)->sq.db_index);
+ mthca_unmap_user_db(to_mdev(qp->device),
+ &to_mucontext(qp->uobject->context)->uar,
+ to_mucontext(qp->uobject->context)->db_tab,
+ to_mqp(qp)->rq.db_index);
+ }
mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
kfree(qp);
return 0;
}
-static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries)
+static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
{
+ struct mthca_create_cq ucmd;
struct mthca_cq *cq;
int nent;
int err;
+ if (context) {
+ if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
+ return ERR_PTR(-EFAULT);
+
+ err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab,
+ ucmd.set_db_index, ucmd.set_db_page);
+ if (err)
+ return ERR_PTR(err);
+
+ err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab,
+ ucmd.arm_db_index, ucmd.arm_db_page);
+ if (err)
+ goto err_unmap_set;
+ }
+
cq = kmalloc(sizeof *cq, GFP_KERNEL);
- if (!cq)
- return ERR_PTR(-ENOMEM);
+ if (!cq) {
+ err = -ENOMEM;
+ goto err_unmap_arm;
+ }
+
+ if (context) {
+ cq->mr.ibmr.lkey = ucmd.lkey;
+ cq->set_ci_db_index = ucmd.set_db_index;
+ cq->arm_db_index = ucmd.arm_db_index;
+ }
for (nent = 1; nent <= entries; nent <<= 1)
; /* nothing */
- err = mthca_init_cq(to_mdev(ibdev), nent, cq);
- if (err) {
- kfree(cq);
- cq = ERR_PTR(err);
+ err = mthca_init_cq(to_mdev(ibdev), nent,
+ context ? to_mucontext(context) : NULL,
+ context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num,
+ cq);
+ if (err)
+ goto err_free;
+
+ if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) {
+ mthca_free_cq(to_mdev(ibdev), cq);
+ goto err_free;
}
return &cq->ibcq;
+
+err_free:
+ kfree(cq);
+
+err_unmap_arm:
+ if (context)
+ mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab, ucmd.arm_db_index);
+
+err_unmap_set:
+ if (context)
+ mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab, ucmd.set_db_index);
+
+ return ERR_PTR(err);
}
static int mthca_destroy_cq(struct ib_cq *cq)
{
+ if (cq->uobject) {
+ mthca_unmap_user_db(to_mdev(cq->device),
+ &to_mucontext(cq->uobject->context)->uar,
+ to_mucontext(cq->uobject->context)->db_tab,
+ to_mcq(cq)->arm_db_index);
+ mthca_unmap_user_db(to_mdev(cq->device),
+ &to_mucontext(cq->uobject->context)->uar,
+ to_mucontext(cq->uobject->context)->db_tab,
+ to_mcq(cq)->set_ci_db_index);
+ }
mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
kfree(cq);
@@ -558,6 +758,7 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
convert_access(acc), mr);
if (err) {
+ kfree(page_list);
kfree(mr);
return ERR_PTR(err);
}
@@ -566,6 +767,87 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
return &mr->ibmr;
}
+static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
+ int acc, struct ib_udata *udata)
+{
+ struct mthca_dev *dev = to_mdev(pd->device);
+ struct ib_umem_chunk *chunk;
+ struct mthca_mr *mr;
+ u64 *pages;
+ int shift, n, len;
+ int i, j, k;
+ int err = 0;
+
+ shift = ffs(region->page_size) - 1;
+
+ mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ n = 0;
+ list_for_each_entry(chunk, &region->chunk_list, list)
+ n += chunk->nents;
+
+ mr->mtt = mthca_alloc_mtt(dev, n);
+ if (IS_ERR(mr->mtt)) {
+ err = PTR_ERR(mr->mtt);
+ goto err;
+ }
+
+ pages = (u64 *) __get_free_page(GFP_KERNEL);
+ if (!pages) {
+ err = -ENOMEM;
+ goto err_mtt;
+ }
+
+ i = n = 0;
+
+ list_for_each_entry(chunk, &region->chunk_list, list)
+ for (j = 0; j < chunk->nmap; ++j) {
+ len = sg_dma_len(&chunk->page_list[j]) >> shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = sg_dma_address(&chunk->page_list[j]) +
+ region->page_size * k;
+ /*
+ * Be friendly to WRITE_MTT command
+ * and leave two empty slots for the
+ * index and reserved fields of the
+ * mailbox.
+ */
+ if (i == PAGE_SIZE / sizeof (u64) - 2) {
+ err = mthca_write_mtt(dev, mr->mtt,
+ n, pages, i);
+ if (err)
+ goto mtt_done;
+ n += i;
+ i = 0;
+ }
+ }
+ }
+
+ if (i)
+ err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
+mtt_done:
+ free_page((unsigned long) pages);
+ if (err)
+ goto err_mtt;
+
+ err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base,
+ region->length, convert_access(acc), mr);
+
+ if (err)
+ goto err_mtt;
+
+ return &mr->ibmr;
+
+err_mtt:
+ mthca_free_mtt(dev, mr->mtt);
+
+err:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
static int mthca_dereg_mr(struct ib_mr *mr)
{
struct mthca_mr *mmr = to_mmr(mr);
@@ -690,6 +972,8 @@ int mthca_register_device(struct mthca_dev *dev)
int i;
strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
+ dev->ib_dev.owner = THIS_MODULE;
+
dev->ib_dev.node_type = IB_NODE_CA;
dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
dev->ib_dev.dma_device = &dev->pdev->dev;
@@ -699,6 +983,9 @@ int mthca_register_device(struct mthca_dev *dev)
dev->ib_dev.modify_port = mthca_modify_port;
dev->ib_dev.query_pkey = mthca_query_pkey;
dev->ib_dev.query_gid = mthca_query_gid;
+ dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext;
+ dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext;
+ dev->ib_dev.mmap = mthca_mmap_uar;
dev->ib_dev.alloc_pd = mthca_alloc_pd;
dev->ib_dev.dealloc_pd = mthca_dealloc_pd;
dev->ib_dev.create_ah = mthca_ah_create;
@@ -711,6 +998,7 @@ int mthca_register_device(struct mthca_dev *dev)
dev->ib_dev.poll_cq = mthca_poll_cq;
dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr;
+ dev->ib_dev.reg_user_mr = mthca_reg_user_mr;
dev->ib_dev.dereg_mr = mthca_dereg_mr;
if (dev->mthca_flags & MTHCA_FLAG_FMR) {
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 619710f95a8..1d032791cc8 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -54,18 +55,26 @@ struct mthca_uar {
int index;
};
+struct mthca_user_db_table;
+
+struct mthca_ucontext {
+ struct ib_ucontext ibucontext;
+ struct mthca_uar uar;
+ struct mthca_user_db_table *db_tab;
+};
+
+struct mthca_mtt;
+
struct mthca_mr {
- struct ib_mr ibmr;
- int order;
- u32 first_seg;
+ struct ib_mr ibmr;
+ struct mthca_mtt *mtt;
};
struct mthca_fmr {
- struct ib_fmr ibmr;
+ struct ib_fmr ibmr;
struct ib_fmr_attr attr;
- int order;
- u32 first_seg;
- int maps;
+ struct mthca_mtt *mtt;
+ int maps;
union {
struct {
struct mthca_mpt_entry __iomem *mpt;
@@ -83,6 +92,7 @@ struct mthca_pd {
u32 pd_num;
atomic_t sqp_count;
struct mthca_mr ntmr;
+ int privileged;
};
struct mthca_eq {
@@ -167,6 +177,7 @@ struct mthca_cq {
int cqn;
u32 cons_index;
int is_direct;
+ int is_kernel;
/* Next fields are Arbel only */
int set_ci_db_index;
@@ -236,6 +247,11 @@ struct mthca_sqp {
dma_addr_t header_dma;
};
+static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct mthca_ucontext, ibucontext);
+}
+
static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr)
{
return container_of(ibmr, struct mthca_fmr, ibmr);
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index ca73bab11a0..f7126b14d5a 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -46,7 +47,9 @@ enum {
MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE,
MTHCA_ACK_REQ_FREQ = 10,
MTHCA_FLIGHT_LIMIT = 9,
- MTHCA_UD_HEADER_SIZE = 72 /* largest UD header possible */
+ MTHCA_UD_HEADER_SIZE = 72, /* largest UD header possible */
+ MTHCA_INLINE_HEADER_SIZE = 4, /* data segment overhead for inline */
+ MTHCA_INLINE_CHUNK_SIZE = 16 /* inline data segment chunk */
};
enum {
@@ -357,6 +360,9 @@ static const struct {
[UD] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_QKEY),
+ [UC] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
[RC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
@@ -378,6 +384,9 @@ static const struct {
[UD] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_QKEY),
+ [UC] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
[RC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
@@ -388,6 +397,11 @@ static const struct {
[IB_QPS_RTR] = {
.trans = MTHCA_TRANS_INIT2RTR,
.req_param = {
+ [UC] = (IB_QP_AV |
+ IB_QP_PATH_MTU |
+ IB_QP_DEST_QPN |
+ IB_QP_RQ_PSN |
+ IB_QP_MAX_DEST_RD_ATOMIC),
[RC] = (IB_QP_AV |
IB_QP_PATH_MTU |
IB_QP_DEST_QPN |
@@ -398,6 +412,9 @@ static const struct {
.opt_param = {
[UD] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
+ [UC] = (IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX),
[RC] = (IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX),
@@ -413,6 +430,8 @@ static const struct {
.trans = MTHCA_TRANS_RTR2RTS,
.req_param = {
[UD] = IB_QP_SQ_PSN,
+ [UC] = (IB_QP_SQ_PSN |
+ IB_QP_MAX_QP_RD_ATOMIC),
[RC] = (IB_QP_TIMEOUT |
IB_QP_RETRY_CNT |
IB_QP_RNR_RETRY |
@@ -423,6 +442,11 @@ static const struct {
.opt_param = {
[UD] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
+ [UC] = (IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX |
+ IB_QP_PATH_MIG_STATE),
[RC] = (IB_QP_CUR_STATE |
IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
@@ -442,6 +466,9 @@ static const struct {
.opt_param = {
[UD] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
+ [UC] = (IB_QP_ACCESS_FLAGS |
+ IB_QP_ALT_PATH |
+ IB_QP_PATH_MIG_STATE),
[RC] = (IB_QP_ACCESS_FLAGS |
IB_QP_ALT_PATH |
IB_QP_PATH_MIG_STATE |
@@ -462,6 +489,10 @@ static const struct {
.opt_param = {
[UD] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
+ [UC] = (IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PATH_MIG_STATE),
[RC] = (IB_QP_CUR_STATE |
IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
@@ -476,6 +507,14 @@ static const struct {
.opt_param = {
[UD] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
+ [UC] = (IB_QP_AV |
+ IB_QP_MAX_QP_RD_ATOMIC |
+ IB_QP_MAX_DEST_RD_ATOMIC |
+ IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX |
+ IB_QP_PATH_MIG_STATE),
[RC] = (IB_QP_AV |
IB_QP_TIMEOUT |
IB_QP_RETRY_CNT |
@@ -501,6 +540,7 @@ static const struct {
.opt_param = {
[UD] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
+ [UC] = (IB_QP_CUR_STATE),
[RC] = (IB_QP_CUR_STATE |
IB_QP_MIN_RNR_TIMER),
[MLX] = (IB_QP_CUR_STATE |
@@ -552,7 +592,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
struct mthca_dev *dev = to_mdev(ibqp->device);
struct mthca_qp *qp = to_mqp(ibqp);
enum ib_qp_state cur_state, new_state;
- void *mailbox = NULL;
+ struct mthca_mailbox *mailbox;
struct mthca_qp_param *qp_param;
struct mthca_qp_context *qp_context;
u32 req_param, opt_param;
@@ -609,10 +649,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
return -EINVAL;
}
- mailbox = kmalloc(sizeof (*qp_param) + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
- if (!mailbox)
- return -ENOMEM;
- qp_param = MAILBOX_ALIGN(mailbox);
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ qp_param = mailbox->buf;
qp_context = &qp_param->context;
memset(qp_param, 0, sizeof *qp_param);
@@ -652,7 +692,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
/* leave arbel_sched_queue as 0 */
- qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);
+ if (qp->ibqp.uobject)
+ qp_context->usr_page =
+ cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index);
+ else
+ qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);
qp_context->local_qpn = cpu_to_be32(qp->qpn);
if (attr_mask & IB_QP_DEST_QPN) {
qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
@@ -683,7 +727,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
if (attr_mask & IB_QP_AV) {
qp_context->pri_path.g_mylmc = attr->ah_attr.src_path_bits & 0x7f;
qp_context->pri_path.rlid = cpu_to_be16(attr->ah_attr.dlid);
- qp_context->pri_path.static_rate = (!!attr->ah_attr.static_rate) << 3;
+ qp_context->pri_path.static_rate = !!attr->ah_attr.static_rate;
if (attr->ah_attr.ah_flags & IB_AH_GRH) {
qp_context->pri_path.g_mylmc |= 1 << 7;
qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index;
@@ -724,9 +768,9 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT);
}
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
- qp_context->params1 |= cpu_to_be32(min(attr->max_dest_rd_atomic ?
- ffs(attr->max_dest_rd_atomic) - 1 : 0,
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+ qp_context->params1 |= cpu_to_be32(min(attr->max_rd_atomic ?
+ ffs(attr->max_rd_atomic) - 1 : 0,
7) << 21);
qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX);
}
@@ -764,10 +808,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
qp->atomic_rd_en = attr->qp_access_flags;
}
- if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
u8 rra_max;
- if (qp->resp_depth && !attr->max_rd_atomic) {
+ if (qp->resp_depth && !attr->max_dest_rd_atomic) {
/*
* Lowering our responder resources to zero.
* Turn off RDMA/atomics as responder.
@@ -778,7 +822,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
MTHCA_QP_OPTPAR_RAE);
}
- if (!qp->resp_depth && attr->max_rd_atomic) {
+ if (!qp->resp_depth && attr->max_dest_rd_atomic) {
/*
* Increasing our responder resources from
* zero. Turn on RDMA/atomics as appropriate.
@@ -799,7 +843,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
}
for (rra_max = 0;
- 1 << rra_max < attr->max_rd_atomic &&
+ 1 << rra_max < attr->max_dest_rd_atomic &&
rra_max < dev->qp_table.rdb_shift;
++rra_max)
; /* nothing */
@@ -807,7 +851,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
qp_context->params2 |= cpu_to_be32(rra_max << 21);
qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX);
- qp->resp_depth = attr->max_rd_atomic;
+ qp->resp_depth = attr->max_dest_rd_atomic;
}
qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC);
@@ -835,7 +879,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
}
err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans,
- qp->qpn, 0, qp_param, 0, &status);
+ qp->qpn, 0, mailbox, 0, &status);
if (status) {
mthca_warn(dev, "modify QP %d returned status %02x.\n",
state_table[cur_state][new_state].trans, status);
@@ -845,7 +889,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
if (!err)
qp->state = new_state;
- kfree(mailbox);
+ mthca_free_mailbox(dev, mailbox);
if (is_sqp(dev, qp))
store_attrs(to_msqp(qp), attr, attr_mask);
@@ -917,6 +961,15 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift,
1 << qp->sq.wqe_shift);
+
+ /*
+ * If this is a userspace QP, we don't actually have to
+ * allocate anything. All we need is to calculate the WQE
+ * sizes and the send_wqe_offset, so we're done now.
+ */
+ if (pd->ibpd.uobject)
+ return 0;
+
size = PAGE_ALIGN(qp->send_wqe_offset +
(qp->sq.max << qp->sq.wqe_shift));
@@ -934,7 +987,8 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
mthca_dbg(dev, "Creating direct QP of size %d (shift %d)\n",
size, shift);
- qp->queue.direct.buf = pci_alloc_consistent(dev->pdev, size, &t);
+ qp->queue.direct.buf = dma_alloc_coherent(&dev->pdev->dev, size,
+ &t, GFP_KERNEL);
if (!qp->queue.direct.buf)
goto err_out;
@@ -973,7 +1027,8 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
for (i = 0; i < npages; ++i) {
qp->queue.page_list[i].buf =
- pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t);
+ dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
+ &t, GFP_KERNEL);
if (!qp->queue.page_list[i].buf)
goto err_out_free;
@@ -996,16 +1051,15 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
err_out_free:
if (qp->is_direct) {
- pci_free_consistent(dev->pdev, size,
- qp->queue.direct.buf,
- pci_unmap_addr(&qp->queue.direct, mapping));
+ dma_free_coherent(&dev->pdev->dev, size, qp->queue.direct.buf,
+ pci_unmap_addr(&qp->queue.direct, mapping));
} else
for (i = 0; i < npages; ++i) {
if (qp->queue.page_list[i].buf)
- pci_free_consistent(dev->pdev, PAGE_SIZE,
- qp->queue.page_list[i].buf,
- pci_unmap_addr(&qp->queue.page_list[i],
- mapping));
+ dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+ qp->queue.page_list[i].buf,
+ pci_unmap_addr(&qp->queue.page_list[i],
+ mapping));
}
@@ -1015,10 +1069,32 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
return err;
}
-static int mthca_alloc_memfree(struct mthca_dev *dev,
+static void mthca_free_wqe_buf(struct mthca_dev *dev,
struct mthca_qp *qp)
{
- int ret = 0;
+ int i;
+ int size = PAGE_ALIGN(qp->send_wqe_offset +
+ (qp->sq.max << qp->sq.wqe_shift));
+
+ if (qp->is_direct) {
+ dma_free_coherent(&dev->pdev->dev, size, qp->queue.direct.buf,
+ pci_unmap_addr(&qp->queue.direct, mapping));
+ } else {
+ for (i = 0; i < size / PAGE_SIZE; ++i) {
+ dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+ qp->queue.page_list[i].buf,
+ pci_unmap_addr(&qp->queue.page_list[i],
+ mapping));
+ }
+ }
+
+ kfree(qp->wrid);
+}
+
+static int mthca_map_memfree(struct mthca_dev *dev,
+ struct mthca_qp *qp)
+{
+ int ret;
if (mthca_is_memfree(dev)) {
ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn);
@@ -1029,35 +1105,15 @@ static int mthca_alloc_memfree(struct mthca_dev *dev,
if (ret)
goto err_qpc;
- ret = mthca_table_get(dev, dev->qp_table.rdb_table,
- qp->qpn << dev->qp_table.rdb_shift);
- if (ret)
- goto err_eqpc;
-
- qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
- qp->qpn, &qp->rq.db);
- if (qp->rq.db_index < 0) {
- ret = -ENOMEM;
- goto err_rdb;
- }
+ ret = mthca_table_get(dev, dev->qp_table.rdb_table,
+ qp->qpn << dev->qp_table.rdb_shift);
+ if (ret)
+ goto err_eqpc;
- qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
- qp->qpn, &qp->sq.db);
- if (qp->sq.db_index < 0) {
- ret = -ENOMEM;
- goto err_rq_db;
- }
}
return 0;
-err_rq_db:
- mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
-
-err_rdb:
- mthca_table_put(dev, dev->qp_table.rdb_table,
- qp->qpn << dev->qp_table.rdb_shift);
-
err_eqpc:
mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
@@ -1067,16 +1123,41 @@ err_qpc:
return ret;
}
+static void mthca_unmap_memfree(struct mthca_dev *dev,
+ struct mthca_qp *qp)
+{
+ mthca_table_put(dev, dev->qp_table.rdb_table,
+ qp->qpn << dev->qp_table.rdb_shift);
+ mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
+ mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
+}
+
+static int mthca_alloc_memfree(struct mthca_dev *dev,
+ struct mthca_qp *qp)
+{
+ int ret = 0;
+
+ if (mthca_is_memfree(dev)) {
+ qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
+ qp->qpn, &qp->rq.db);
+ if (qp->rq.db_index < 0)
+ return ret;
+
+ qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
+ qp->qpn, &qp->sq.db);
+ if (qp->sq.db_index < 0)
+ mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
+ }
+
+ return ret;
+}
+
static void mthca_free_memfree(struct mthca_dev *dev,
struct mthca_qp *qp)
{
if (mthca_is_memfree(dev)) {
mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
- mthca_table_put(dev, dev->qp_table.rdb_table,
- qp->qpn << dev->qp_table.rdb_shift);
- mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
- mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
}
}
@@ -1108,13 +1189,28 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
mthca_wq_init(&qp->sq);
mthca_wq_init(&qp->rq);
- ret = mthca_alloc_memfree(dev, qp);
+ ret = mthca_map_memfree(dev, qp);
if (ret)
return ret;
ret = mthca_alloc_wqe_buf(dev, pd, qp);
if (ret) {
- mthca_free_memfree(dev, qp);
+ mthca_unmap_memfree(dev, qp);
+ return ret;
+ }
+
+ /*
+ * If this is a userspace QP, we're done now. The doorbells
+ * will be allocated and buffers will be initialized in
+ * userspace.
+ */
+ if (pd->ibpd.uobject)
+ return 0;
+
+ ret = mthca_alloc_memfree(dev, qp);
+ if (ret) {
+ mthca_free_wqe_buf(dev, qp);
+ mthca_unmap_memfree(dev, qp);
return ret;
}
@@ -1147,22 +1243,39 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
return 0;
}
-static void mthca_align_qp_size(struct mthca_dev *dev, struct mthca_qp *qp)
+static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
+ struct mthca_qp *qp)
{
- int i;
-
- if (!mthca_is_memfree(dev))
- return;
+ /* Sanity check QP size before proceeding */
+ if (cap->max_send_wr > 65536 || cap->max_recv_wr > 65536 ||
+ cap->max_send_sge > 64 || cap->max_recv_sge > 64)
+ return -EINVAL;
- for (i = 0; 1 << i < qp->rq.max; ++i)
- ; /* nothing */
+ if (mthca_is_memfree(dev)) {
+ qp->rq.max = cap->max_recv_wr ?
+ roundup_pow_of_two(cap->max_recv_wr) : 0;
+ qp->sq.max = cap->max_send_wr ?
+ roundup_pow_of_two(cap->max_send_wr) : 0;
+ } else {
+ qp->rq.max = cap->max_recv_wr;
+ qp->sq.max = cap->max_send_wr;
+ }
- qp->rq.max = 1 << i;
+ qp->rq.max_gs = cap->max_recv_sge;
+ qp->sq.max_gs = max_t(int, cap->max_send_sge,
+ ALIGN(cap->max_inline_data + MTHCA_INLINE_HEADER_SIZE,
+ MTHCA_INLINE_CHUNK_SIZE) /
+ sizeof (struct mthca_data_seg));
- for (i = 0; 1 << i < qp->sq.max; ++i)
- ; /* nothing */
+ /*
+ * For MLX transport we need 2 extra S/G entries:
+ * one for the header and one for the checksum at the end
+ */
+ if ((qp->transport == MLX && qp->sq.max_gs + 2 > dev->limits.max_sg) ||
+ qp->sq.max_gs > dev->limits.max_sg || qp->rq.max_gs > dev->limits.max_sg)
+ return -EINVAL;
- qp->sq.max = 1 << i;
+ return 0;
}
int mthca_alloc_qp(struct mthca_dev *dev,
@@ -1171,11 +1284,14 @@ int mthca_alloc_qp(struct mthca_dev *dev,
struct mthca_cq *recv_cq,
enum ib_qp_type type,
enum ib_sig_type send_policy,
+ struct ib_qp_cap *cap,
struct mthca_qp *qp)
{
int err;
- mthca_align_qp_size(dev, qp);
+ err = mthca_set_qp_size(dev, cap, qp);
+ if (err)
+ return err;
switch (type) {
case IB_QPT_RC: qp->transport = RC; break;
@@ -1208,14 +1324,17 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
struct mthca_cq *send_cq,
struct mthca_cq *recv_cq,
enum ib_sig_type send_policy,
+ struct ib_qp_cap *cap,
int qpn,
int port,
struct mthca_sqp *sqp)
{
- int err = 0;
u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
+ int err;
- mthca_align_qp_size(dev, &sqp->qp);
+ err = mthca_set_qp_size(dev, cap, &sqp->qp);
+ if (err)
+ return err;
sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE;
sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size,
@@ -1274,8 +1393,6 @@ void mthca_free_qp(struct mthca_dev *dev,
struct mthca_qp *qp)
{
u8 status;
- int size;
- int i;
struct mthca_cq *send_cq;
struct mthca_cq *recv_cq;
@@ -1305,31 +1422,22 @@ void mthca_free_qp(struct mthca_dev *dev,
if (qp->state != IB_QPS_RESET)
mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status);
- mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn);
- if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
- mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn);
-
- mthca_free_mr(dev, &qp->mr);
-
- size = PAGE_ALIGN(qp->send_wqe_offset +
- (qp->sq.max << qp->sq.wqe_shift));
+ /*
+ * If this is a userspace QP, the buffers, MR, CQs and so on
+ * will be cleaned up in userspace, so all we have to do is
+ * unref the mem-free tables and free the QPN in our table.
+ */
+ if (!qp->ibqp.uobject) {
+ mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn);
+ if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
+ mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn);
- if (qp->is_direct) {
- pci_free_consistent(dev->pdev, size,
- qp->queue.direct.buf,
- pci_unmap_addr(&qp->queue.direct, mapping));
- } else {
- for (i = 0; i < size / PAGE_SIZE; ++i) {
- pci_free_consistent(dev->pdev, PAGE_SIZE,
- qp->queue.page_list[i].buf,
- pci_unmap_addr(&qp->queue.page_list[i],
- mapping));
- }
+ mthca_free_mr(dev, &qp->mr);
+ mthca_free_memfree(dev, qp);
+ mthca_free_wqe_buf(dev, qp);
}
- kfree(qp->wrid);
-
- mthca_free_memfree(dev, qp);
+ mthca_unmap_memfree(dev, qp);
if (is_sqp(dev, qp)) {
atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count));
@@ -1529,6 +1637,26 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
+ case UC:
+ switch (wr->opcode) {
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ ((struct mthca_raddr_seg *) wqe)->raddr =
+ cpu_to_be64(wr->wr.rdma.remote_addr);
+ ((struct mthca_raddr_seg *) wqe)->rkey =
+ cpu_to_be32(wr->wr.rdma.rkey);
+ ((struct mthca_raddr_seg *) wqe)->reserved = 0;
+ wqe += sizeof (struct mthca_raddr_seg);
+ size += sizeof (struct mthca_raddr_seg) / 16;
+ break;
+
+ default:
+ /* No extra segments required for sends */
+ break;
+ }
+
+ break;
+
case UD:
((struct mthca_tavor_ud_seg *) wqe)->lkey =
cpu_to_be32(to_mah(wr->wr.ud.ah)->key);
@@ -1814,9 +1942,29 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
sizeof (struct mthca_atomic_seg);
break;
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ ((struct mthca_raddr_seg *) wqe)->raddr =
+ cpu_to_be64(wr->wr.rdma.remote_addr);
+ ((struct mthca_raddr_seg *) wqe)->rkey =
+ cpu_to_be32(wr->wr.rdma.rkey);
+ ((struct mthca_raddr_seg *) wqe)->reserved = 0;
+ wqe += sizeof (struct mthca_raddr_seg);
+ size += sizeof (struct mthca_raddr_seg) / 16;
+ break;
+
+ default:
+ /* No extra segments required for sends */
+ break;
+ }
+
+ break;
+
+ case UC:
+ switch (wr->opcode) {
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
- case IB_WR_RDMA_READ:
((struct mthca_raddr_seg *) wqe)->raddr =
cpu_to_be64(wr->wr.rdma.remote_addr);
((struct mthca_raddr_seg *) wqe)->rkey =
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h
new file mode 100644
index 00000000000..3024c1b4547
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_user.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef MTHCA_USER_H
+#define MTHCA_USER_H
+
+#include <linux/types.h>
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct mthca_alloc_ucontext_resp {
+ __u32 qp_tab_size;
+ __u32 uarc_size;
+};
+
+struct mthca_alloc_pd_resp {
+ __u32 pdn;
+ __u32 reserved;
+};
+
+struct mthca_create_cq {
+ __u32 lkey;
+ __u32 pdn;
+ __u64 arm_db_page;
+ __u64 set_db_page;
+ __u32 arm_db_index;
+ __u32 set_db_index;
+};
+
+struct mthca_create_cq_resp {
+ __u32 cqn;
+ __u32 reserved;
+};
+
+struct mthca_create_qp {
+ __u32 lkey;
+ __u32 reserved;
+ __u64 sq_db_page;
+ __u64 rq_db_page;
+ __u32 sq_db_index;
+ __u32 rq_db_index;
+};
+
+#endif /* MTHCA_USER_H */
diff --git a/drivers/infiniband/include/ib_user_verbs.h b/drivers/infiniband/include/ib_user_verbs.h
new file mode 100644
index 00000000000..7c613706af7
--- /dev/null
+++ b/drivers/infiniband/include/ib_user_verbs.h
@@ -0,0 +1,389 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: ib_user_verbs.h 2708 2005-06-24 17:27:21Z roland $
+ */
+
+#ifndef IB_USER_VERBS_H
+#define IB_USER_VERBS_H
+
+#include <linux/types.h>
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define IB_USER_VERBS_ABI_VERSION 1
+
+enum {
+ IB_USER_VERBS_CMD_QUERY_PARAMS,
+ IB_USER_VERBS_CMD_GET_CONTEXT,
+ IB_USER_VERBS_CMD_QUERY_DEVICE,
+ IB_USER_VERBS_CMD_QUERY_PORT,
+ IB_USER_VERBS_CMD_QUERY_GID,
+ IB_USER_VERBS_CMD_QUERY_PKEY,
+ IB_USER_VERBS_CMD_ALLOC_PD,
+ IB_USER_VERBS_CMD_DEALLOC_PD,
+ IB_USER_VERBS_CMD_CREATE_AH,
+ IB_USER_VERBS_CMD_MODIFY_AH,
+ IB_USER_VERBS_CMD_QUERY_AH,
+ IB_USER_VERBS_CMD_DESTROY_AH,
+ IB_USER_VERBS_CMD_REG_MR,
+ IB_USER_VERBS_CMD_REG_SMR,
+ IB_USER_VERBS_CMD_REREG_MR,
+ IB_USER_VERBS_CMD_QUERY_MR,
+ IB_USER_VERBS_CMD_DEREG_MR,
+ IB_USER_VERBS_CMD_ALLOC_MW,
+ IB_USER_VERBS_CMD_BIND_MW,
+ IB_USER_VERBS_CMD_DEALLOC_MW,
+ IB_USER_VERBS_CMD_CREATE_CQ,
+ IB_USER_VERBS_CMD_RESIZE_CQ,
+ IB_USER_VERBS_CMD_DESTROY_CQ,
+ IB_USER_VERBS_CMD_POLL_CQ,
+ IB_USER_VERBS_CMD_PEEK_CQ,
+ IB_USER_VERBS_CMD_REQ_NOTIFY_CQ,
+ IB_USER_VERBS_CMD_CREATE_QP,
+ IB_USER_VERBS_CMD_QUERY_QP,
+ IB_USER_VERBS_CMD_MODIFY_QP,
+ IB_USER_VERBS_CMD_DESTROY_QP,
+ IB_USER_VERBS_CMD_POST_SEND,
+ IB_USER_VERBS_CMD_POST_RECV,
+ IB_USER_VERBS_CMD_ATTACH_MCAST,
+ IB_USER_VERBS_CMD_DETACH_MCAST
+};
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct ib_uverbs_async_event_desc {
+ __u64 element;
+ __u32 event_type; /* enum ib_event_type */
+ __u32 reserved;
+};
+
+struct ib_uverbs_comp_event_desc {
+ __u64 cq_handle;
+};
+
+/*
+ * All commands from userspace should start with a __u32 command field
+ * followed by __u16 in_words and out_words fields (which give the
+ * length of the command block and response buffer if any in 32-bit
+ * words). The kernel driver will read these fields first and read
+ * the rest of the command struct based on these value.
+ */
+
+struct ib_uverbs_cmd_hdr {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+};
+
+/*
+ * No driver_data for "query params" command, since this is intended
+ * to be a core function with no possible device dependence.
+ */
+struct ib_uverbs_query_params {
+ __u64 response;
+};
+
+struct ib_uverbs_query_params_resp {
+ __u32 num_cq_events;
+};
+
+struct ib_uverbs_get_context {
+ __u64 response;
+ __u64 cq_fd_tab;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_get_context_resp {
+ __u32 async_fd;
+ __u32 reserved;
+};
+
+struct ib_uverbs_query_device {
+ __u64 response;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_query_device_resp {
+ __u64 fw_ver;
+ __u64 node_guid;
+ __u64 sys_image_guid;
+ __u64 max_mr_size;
+ __u64 page_size_cap;
+ __u32 vendor_id;
+ __u32 vendor_part_id;
+ __u32 hw_ver;
+ __u32 max_qp;
+ __u32 max_qp_wr;
+ __u32 device_cap_flags;
+ __u32 max_sge;
+ __u32 max_sge_rd;
+ __u32 max_cq;
+ __u32 max_cqe;
+ __u32 max_mr;
+ __u32 max_pd;
+ __u32 max_qp_rd_atom;
+ __u32 max_ee_rd_atom;
+ __u32 max_res_rd_atom;
+ __u32 max_qp_init_rd_atom;
+ __u32 max_ee_init_rd_atom;
+ __u32 atomic_cap;
+ __u32 max_ee;
+ __u32 max_rdd;
+ __u32 max_mw;
+ __u32 max_raw_ipv6_qp;
+ __u32 max_raw_ethy_qp;
+ __u32 max_mcast_grp;
+ __u32 max_mcast_qp_attach;
+ __u32 max_total_mcast_qp_attach;
+ __u32 max_ah;
+ __u32 max_fmr;
+ __u32 max_map_per_fmr;
+ __u32 max_srq;
+ __u32 max_srq_wr;
+ __u32 max_srq_sge;
+ __u16 max_pkeys;
+ __u8 local_ca_ack_delay;
+ __u8 phys_port_cnt;
+ __u8 reserved[4];
+};
+
+struct ib_uverbs_query_port {
+ __u64 response;
+ __u8 port_num;
+ __u8 reserved[7];
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_query_port_resp {
+ __u32 port_cap_flags;
+ __u32 max_msg_sz;
+ __u32 bad_pkey_cntr;
+ __u32 qkey_viol_cntr;
+ __u32 gid_tbl_len;
+ __u16 pkey_tbl_len;
+ __u16 lid;
+ __u16 sm_lid;
+ __u8 state;
+ __u8 max_mtu;
+ __u8 active_mtu;
+ __u8 lmc;
+ __u8 max_vl_num;
+ __u8 sm_sl;
+ __u8 subnet_timeout;
+ __u8 init_type_reply;
+ __u8 active_width;
+ __u8 active_speed;
+ __u8 phys_state;
+ __u8 reserved[3];
+};
+
+struct ib_uverbs_query_gid {
+ __u64 response;
+ __u8 port_num;
+ __u8 index;
+ __u8 reserved[6];
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_query_gid_resp {
+ __u8 gid[16];
+};
+
+struct ib_uverbs_query_pkey {
+ __u64 response;
+ __u8 port_num;
+ __u8 index;
+ __u8 reserved[6];
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_query_pkey_resp {
+ __u16 pkey;
+ __u16 reserved;
+};
+
+struct ib_uverbs_alloc_pd {
+ __u64 response;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_alloc_pd_resp {
+ __u32 pd_handle;
+};
+
+struct ib_uverbs_dealloc_pd {
+ __u32 pd_handle;
+};
+
+struct ib_uverbs_reg_mr {
+ __u64 response;
+ __u64 start;
+ __u64 length;
+ __u64 hca_va;
+ __u32 pd_handle;
+ __u32 access_flags;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_reg_mr_resp {
+ __u32 mr_handle;
+ __u32 lkey;
+ __u32 rkey;
+};
+
+struct ib_uverbs_dereg_mr {
+ __u32 mr_handle;
+};
+
+struct ib_uverbs_create_cq {
+ __u64 response;
+ __u64 user_handle;
+ __u32 cqe;
+ __u32 event_handler;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_create_cq_resp {
+ __u32 cq_handle;
+ __u32 cqe;
+};
+
+struct ib_uverbs_destroy_cq {
+ __u32 cq_handle;
+};
+
+struct ib_uverbs_create_qp {
+ __u64 response;
+ __u64 user_handle;
+ __u32 pd_handle;
+ __u32 send_cq_handle;
+ __u32 recv_cq_handle;
+ __u32 srq_handle;
+ __u32 max_send_wr;
+ __u32 max_recv_wr;
+ __u32 max_send_sge;
+ __u32 max_recv_sge;
+ __u32 max_inline_data;
+ __u8 sq_sig_all;
+ __u8 qp_type;
+ __u8 is_srq;
+ __u8 reserved;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_create_qp_resp {
+ __u32 qp_handle;
+ __u32 qpn;
+};
+
+/*
+ * This struct needs to remain a multiple of 8 bytes to keep the
+ * alignment of the modify QP parameters.
+ */
+struct ib_uverbs_qp_dest {
+ __u8 dgid[16];
+ __u32 flow_label;
+ __u16 dlid;
+ __u16 reserved;
+ __u8 sgid_index;
+ __u8 hop_limit;
+ __u8 traffic_class;
+ __u8 sl;
+ __u8 src_path_bits;
+ __u8 static_rate;
+ __u8 is_global;
+ __u8 port_num;
+};
+
+struct ib_uverbs_modify_qp {
+ struct ib_uverbs_qp_dest dest;
+ struct ib_uverbs_qp_dest alt_dest;
+ __u32 qp_handle;
+ __u32 attr_mask;
+ __u32 qkey;
+ __u32 rq_psn;
+ __u32 sq_psn;
+ __u32 dest_qp_num;
+ __u32 qp_access_flags;
+ __u16 pkey_index;
+ __u16 alt_pkey_index;
+ __u8 qp_state;
+ __u8 cur_qp_state;
+ __u8 path_mtu;
+ __u8 path_mig_state;
+ __u8 en_sqd_async_notify;
+ __u8 max_rd_atomic;
+ __u8 max_dest_rd_atomic;
+ __u8 min_rnr_timer;
+ __u8 port_num;
+ __u8 timeout;
+ __u8 retry_cnt;
+ __u8 rnr_retry;
+ __u8 alt_port_num;
+ __u8 alt_timeout;
+ __u8 reserved[2];
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_modify_qp_resp {
+};
+
+struct ib_uverbs_destroy_qp {
+ __u32 qp_handle;
+};
+
+struct ib_uverbs_attach_mcast {
+ __u8 gid[16];
+ __u32 qp_handle;
+ __u16 mlid;
+ __u16 reserved;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_detach_mcast {
+ __u8 gid[16];
+ __u32 qp_handle;
+ __u16 mlid;
+ __u16 reserved;
+ __u64 driver_data[0];
+};
+
+#endif /* IB_USER_VERBS_H */
diff --git a/drivers/infiniband/include/ib_verbs.h b/drivers/infiniband/include/ib_verbs.h
index cf01f044a22..e5bd9a10c20 100644
--- a/drivers/infiniband/include/ib_verbs.h
+++ b/drivers/infiniband/include/ib_verbs.h
@@ -4,6 +4,7 @@
* Copyright (c) 2004 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -41,7 +42,10 @@
#include <linux/types.h>
#include <linux/device.h>
+
#include <asm/atomic.h>
+#include <asm/scatterlist.h>
+#include <asm/uaccess.h>
union ib_gid {
u8 raw[16];
@@ -544,7 +548,7 @@ struct ib_send_wr {
int num_sge;
enum ib_wr_opcode opcode;
int send_flags;
- u32 imm_data;
+ __be32 imm_data;
union {
struct {
u64 remote_addr;
@@ -618,29 +622,86 @@ struct ib_fmr_attr {
u8 page_size;
};
+struct ib_ucontext {
+ struct ib_device *device;
+ struct list_head pd_list;
+ struct list_head mr_list;
+ struct list_head mw_list;
+ struct list_head cq_list;
+ struct list_head qp_list;
+ struct list_head srq_list;
+ struct list_head ah_list;
+ spinlock_t lock;
+};
+
+struct ib_uobject {
+ u64 user_handle; /* handle given to us by userspace */
+ struct ib_ucontext *context; /* associated user context */
+ struct list_head list; /* link to context's list */
+ u32 id; /* index into kernel idr */
+};
+
+struct ib_umem {
+ unsigned long user_base;
+ unsigned long virt_base;
+ size_t length;
+ int offset;
+ int page_size;
+ int writable;
+ struct list_head chunk_list;
+};
+
+struct ib_umem_chunk {
+ struct list_head list;
+ int nents;
+ int nmap;
+ struct scatterlist page_list[0];
+};
+
+struct ib_udata {
+ void __user *inbuf;
+ void __user *outbuf;
+ size_t inlen;
+ size_t outlen;
+};
+
+#define IB_UMEM_MAX_PAGE_CHUNK \
+ ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \
+ ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \
+ (void *) &((struct ib_umem_chunk *) 0)->page_list[0]))
+
+struct ib_umem_object {
+ struct ib_uobject uobject;
+ struct ib_umem umem;
+};
+
struct ib_pd {
- struct ib_device *device;
- atomic_t usecnt; /* count all resources */
+ struct ib_device *device;
+ struct ib_uobject *uobject;
+ atomic_t usecnt; /* count all resources */
};
struct ib_ah {
struct ib_device *device;
struct ib_pd *pd;
+ struct ib_uobject *uobject;
};
typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
struct ib_cq {
- struct ib_device *device;
- ib_comp_handler comp_handler;
- void (*event_handler)(struct ib_event *, void *);
- void * cq_context;
- int cqe;
- atomic_t usecnt; /* count number of work queues */
+ struct ib_device *device;
+ struct ib_uobject *uobject;
+ ib_comp_handler comp_handler;
+ void (*event_handler)(struct ib_event *, void *);
+ void * cq_context;
+ int cqe;
+ atomic_t usecnt; /* count number of work queues */
};
struct ib_srq {
struct ib_device *device;
+ struct ib_uobject *uobject;
struct ib_pd *pd;
void *srq_context;
atomic_t usecnt;
@@ -652,6 +713,7 @@ struct ib_qp {
struct ib_cq *send_cq;
struct ib_cq *recv_cq;
struct ib_srq *srq;
+ struct ib_uobject *uobject;
void (*event_handler)(struct ib_event *, void *);
void *qp_context;
u32 qp_num;
@@ -659,16 +721,18 @@ struct ib_qp {
};
struct ib_mr {
- struct ib_device *device;
- struct ib_pd *pd;
- u32 lkey;
- u32 rkey;
- atomic_t usecnt; /* count number of MWs */
+ struct ib_device *device;
+ struct ib_pd *pd;
+ struct ib_uobject *uobject;
+ u32 lkey;
+ u32 rkey;
+ atomic_t usecnt; /* count number of MWs */
};
struct ib_mw {
struct ib_device *device;
struct ib_pd *pd;
+ struct ib_uobject *uobject;
u32 rkey;
};
@@ -737,7 +801,14 @@ struct ib_device {
int (*modify_port)(struct ib_device *device,
u8 port_num, int port_modify_mask,
struct ib_port_modify *port_modify);
- struct ib_pd * (*alloc_pd)(struct ib_device *device);
+ struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device,
+ struct ib_udata *udata);
+ int (*dealloc_ucontext)(struct ib_ucontext *context);
+ int (*mmap)(struct ib_ucontext *context,
+ struct vm_area_struct *vma);
+ struct ib_pd * (*alloc_pd)(struct ib_device *device,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
int (*dealloc_pd)(struct ib_pd *pd);
struct ib_ah * (*create_ah)(struct ib_pd *pd,
struct ib_ah_attr *ah_attr);
@@ -747,7 +818,8 @@ struct ib_device {
struct ib_ah_attr *ah_attr);
int (*destroy_ah)(struct ib_ah *ah);
struct ib_qp * (*create_qp)(struct ib_pd *pd,
- struct ib_qp_init_attr *qp_init_attr);
+ struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata);
int (*modify_qp)(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask);
@@ -762,8 +834,9 @@ struct ib_device {
int (*post_recv)(struct ib_qp *qp,
struct ib_recv_wr *recv_wr,
struct ib_recv_wr **bad_recv_wr);
- struct ib_cq * (*create_cq)(struct ib_device *device,
- int cqe);
+ struct ib_cq * (*create_cq)(struct ib_device *device, int cqe,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
int (*destroy_cq)(struct ib_cq *cq);
int (*resize_cq)(struct ib_cq *cq, int *cqe);
int (*poll_cq)(struct ib_cq *cq, int num_entries,
@@ -780,6 +853,10 @@ struct ib_device {
int num_phys_buf,
int mr_access_flags,
u64 *iova_start);
+ struct ib_mr * (*reg_user_mr)(struct ib_pd *pd,
+ struct ib_umem *region,
+ int mr_access_flags,
+ struct ib_udata *udata);
int (*query_mr)(struct ib_mr *mr,
struct ib_mr_attr *mr_attr);
int (*dereg_mr)(struct ib_mr *mr);
@@ -817,6 +894,7 @@ struct ib_device {
struct ib_mad *in_mad,
struct ib_mad *out_mad);
+ struct module *owner;
struct class_device class_dev;
struct kobject ports_parent;
struct list_head port_list;
@@ -852,6 +930,16 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
void ib_set_client_data(struct ib_device *device, struct ib_client *client,
void *data);
+static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
+{
+ return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0;
+}
+
+static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
+{
+ return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
+}
+
int ib_register_event_handler (struct ib_event_handler *event_handler);
int ib_unregister_event_handler(struct ib_event_handler *event_handler);
void ib_dispatch_event(struct ib_event *event);