aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2006-09-29 15:18:22 -0700
committerLinus Torvalds <torvalds@g5.osdl.org>2006-09-29 15:18:22 -0700
commit9a69d1aeccf169d9a1e442c07d3a6e87f06a7b49 (patch)
tree5597011c3595867bf0e073b8f4bdffefe9238a10
parentc0341b0f47722fbe5ab45f436fc6ddc1c58c0a6f (diff)
parent3d27b00457167103fb9f7e23fc2454c801a6b8f0 (diff)
Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband
* 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband: (33 commits) IB/ipath: Fix lockdep error upon "ifconfig ibN down" IB/ipath: Fix races with ib_resize_cq() IB/ipath: Support new PCIE device, QLE7142 IB/ipath: Set CPU affinity early IB/ipath: Fix EEPROM read when driver is compiled with -Os IB/ipath: Fix and recover TXE piobuf and PBC parity errors IB/ipath: Change HT CRC message to indicate how to resolve problem IB/ipath: Clean up module exit code IB/ipath: Call mtrr_del with correct arguments IB/ipath: Flush RWQEs if access error or invalid error seen IB/ipath: Improved support for PowerPC IB/ipath: Drop unnecessary "(void *)" casts IB/ipath: Support multiple simultaneous devices of different types IB/ipath: Fix mismatch in shifts and masks for printing debug info IB/ipath: Fix compiler warnings and errors on non-x86_64 systems IB/ipath: Print more informative parity error messages IB/ipath: Ensure that PD of MR matches PD of QP checking the Rkey IB/ipath: RC and UC should validate SLID and DLID IB/ipath: Only allow complete writes to flash IB/ipath: Count SRQs properly ...
-rw-r--r--drivers/infiniband/hw/amso1100/c2_ae.c2
-rw-r--r--drivers/infiniband/hw/amso1100/c2_alloc.c2
-rw-r--r--drivers/infiniband/hw/amso1100/c2_cm.c15
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.c8
-rw-r--r--drivers/infiniband/hw/amso1100/c2_rnic.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_common.h54
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c48
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c359
-rw-r--r--drivers/infiniband/hw/ipath/ipath_eeprom.c17
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c974
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c9
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c132
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c263
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c56
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c280
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h116
-rw-r--r--drivers/infiniband/hw/ipath/ipath_keys.c12
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c16
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c3
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c16
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c77
-rw-r--r--drivers/infiniband/hw/ipath/ipath_registers.h40
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c14
-rw-r--r--drivers/infiniband/hw/ipath/ipath_srq.c23
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sysfs.c21
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_pages.c56
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c43
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h18
-rw-r--r--drivers/infiniband/hw/ipath/ipath_wc_ppc64.c20
-rw-r--r--drivers/infiniband/hw/ipath/ipath_wc_x86_64.c13
-rw-r--r--drivers/infiniband/ulp/iser/Kconfig13
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c2
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h9
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c60
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c42
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c8
38 files changed, 1973 insertions, 884 deletions
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c
index 08f46c83a3a..3aae4978e1c 100644
--- a/drivers/infiniband/hw/amso1100/c2_ae.c
+++ b/drivers/infiniband/hw/amso1100/c2_ae.c
@@ -197,7 +197,7 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
"resource=%x, qp_state=%s\n",
__FUNCTION__,
to_event_str(event_id),
- be64_to_cpu(wr->ae.ae_generic.user_context),
+ (unsigned long long) be64_to_cpu(wr->ae.ae_generic.user_context),
be32_to_cpu(wr->ae.ae_generic.resource_type),
be32_to_cpu(wr->ae.ae_generic.resource),
to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state)));
diff --git a/drivers/infiniband/hw/amso1100/c2_alloc.c b/drivers/infiniband/hw/amso1100/c2_alloc.c
index 1d2529992c0..028a60bbfca 100644
--- a/drivers/infiniband/hw/amso1100/c2_alloc.c
+++ b/drivers/infiniband/hw/amso1100/c2_alloc.c
@@ -115,7 +115,7 @@ u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
((unsigned long) &(head->shared_ptr[mqsp]) -
(unsigned long) head);
pr_debug("%s addr %p dma_addr %llx\n", __FUNCTION__,
- &(head->shared_ptr[mqsp]), (u64)*dma_addr);
+ &(head->shared_ptr[mqsp]), (unsigned long long) *dma_addr);
return &(head->shared_ptr[mqsp]);
}
return NULL;
diff --git a/drivers/infiniband/hw/amso1100/c2_cm.c b/drivers/infiniband/hw/amso1100/c2_cm.c
index 485254efdd1..75b93e9b881 100644
--- a/drivers/infiniband/hw/amso1100/c2_cm.c
+++ b/drivers/infiniband/hw/amso1100/c2_cm.c
@@ -302,7 +302,7 @@ int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
vq_req = vq_req_alloc(c2dev);
if (!vq_req) {
err = -ENOMEM;
- goto bail1;
+ goto bail0;
}
vq_req->qp = qp;
vq_req->cm_id = cm_id;
@@ -311,7 +311,7 @@ int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
if (!wr) {
err = -ENOMEM;
- goto bail2;
+ goto bail1;
}
/* Build the WR */
@@ -331,7 +331,7 @@ int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
/* Validate private_data length */
if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
err = -EINVAL;
- goto bail2;
+ goto bail1;
}
if (iw_param->private_data) {
@@ -348,19 +348,19 @@ int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
err = vq_send_wr(c2dev, (union c2wr *) wr);
if (err) {
vq_req_put(c2dev, vq_req);
- goto bail2;
+ goto bail1;
}
/* Wait for reply from adapter */
err = vq_wait_for_reply(c2dev, vq_req);
if (err)
- goto bail2;
+ goto bail1;
/* Check that reply is present */
reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg;
if (!reply) {
err = -ENOMEM;
- goto bail2;
+ goto bail1;
}
err = c2_errno(reply);
@@ -368,9 +368,8 @@ int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
if (!err)
c2_set_qp_state(qp, C2_QP_STATE_RTS);
- bail2:
- kfree(wr);
bail1:
+ kfree(wr);
vq_req_free(c2dev, vq_req);
bail0:
if (err) {
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index dd6af551108..da98d9f7142 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -390,14 +390,18 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
}
mr = kmalloc(sizeof(*mr), GFP_KERNEL);
- if (!mr)
+ if (!mr) {
+ vfree(page_list);
return ERR_PTR(-ENOMEM);
+ }
mr->pd = to_c2pd(ib_pd);
pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
"*iova_start %llx, first pa %llx, last pa %llx\n",
__FUNCTION__, page_shift, pbl_depth, total_len,
- *iova_start, page_list[0], page_list[pbl_depth-1]);
+ (unsigned long long) *iova_start,
+ (unsigned long long) page_list[0],
+ (unsigned long long) page_list[pbl_depth-1]);
err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list,
(1 << page_shift), pbl_depth,
total_len, 0, iova_start,
diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c
index f49a32b7a8f..e37c5688c21 100644
--- a/drivers/infiniband/hw/amso1100/c2_rnic.c
+++ b/drivers/infiniband/hw/amso1100/c2_rnic.c
@@ -527,7 +527,7 @@ int c2_rnic_init(struct c2_dev *c2dev)
DMA_FROM_DEVICE);
pci_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma);
pr_debug("%s rep_vq va %p dma %llx\n", __FUNCTION__, q1_pages,
- (u64)c2dev->rep_vq.host_dma);
+ (unsigned long long) c2dev->rep_vq.host_dma);
c2_mq_rep_init(&c2dev->rep_vq,
1,
qsize,
@@ -550,7 +550,7 @@ int c2_rnic_init(struct c2_dev *c2dev)
DMA_FROM_DEVICE);
pci_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma);
pr_debug("%s aeq va %p dma %llx\n", __FUNCTION__, q1_pages,
- (u64)c2dev->rep_vq.host_dma);
+ (unsigned long long) c2dev->rep_vq.host_dma);
c2_mq_rep_init(&c2dev->aeq,
2,
qsize,
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index f577905e3ac..54139d39818 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -141,8 +141,9 @@ struct infinipath_stats {
* packets if ipath not configured, etc.)
*/
__u64 sps_krdrops;
+ __u64 sps_txeparity; /* PIO buffer parity error, recovered */
/* pad for future growth */
- __u64 __sps_pad[46];
+ __u64 __sps_pad[45];
};
/*
@@ -185,6 +186,9 @@ typedef enum _ipath_ureg {
#define IPATH_RUNTIME_PCIE 0x2
#define IPATH_RUNTIME_FORCE_WC_ORDER 0x4
#define IPATH_RUNTIME_RCVHDR_COPY 0x8
+#define IPATH_RUNTIME_MASTER 0x10
+#define IPATH_RUNTIME_PBC_REWRITE 0x20
+#define IPATH_RUNTIME_LOOSE_DMA_ALIGN 0x40
/*
* This structure is returned by ipath_userinit() immediately after
@@ -202,7 +206,8 @@ struct ipath_base_info {
/* version of software, for feature checking. */
__u32 spi_sw_version;
/* InfiniPath port assigned, goes into sent packets */
- __u32 spi_port;
+ __u16 spi_port;
+ __u16 spi_subport;
/*
* IB MTU, packets IB data must be less than this.
* The MTU is in bytes, and will be a multiple of 4 bytes.
@@ -218,7 +223,7 @@ struct ipath_base_info {
__u32 spi_tidcnt;
/* size of the TID Eager list in infinipath, in entries */
__u32 spi_tidegrcnt;
- /* size of a single receive header queue entry. */
+ /* size of a single receive header queue entry in words. */
__u32 spi_rcvhdrent_size;
/*
* Count of receive header queue entries allocated.
@@ -310,6 +315,12 @@ struct ipath_base_info {
__u32 spi_filler_for_align;
/* address of readonly memory copy of the rcvhdrq tail register. */
__u64 spi_rcvhdr_tailaddr;
+
+ /* shared memory pages for subports if IPATH_RUNTIME_MASTER is set */
+ __u64 spi_subport_uregbase;
+ __u64 spi_subport_rcvegrbuf;
+ __u64 spi_subport_rcvhdr_base;
+
} __attribute__ ((aligned(8)));
@@ -328,12 +339,12 @@ struct ipath_base_info {
/*
* Minor version differences are always compatible
- * a within a major version, however if if user software is larger
+ * a within a major version, however if user software is larger
* than driver software, some new features and/or structure fields
* may not be implemented; the user code must deal with this if it
- * cares, or it must abort after initialization reports the difference
+ * cares, or it must abort after initialization reports the difference.
*/
-#define IPATH_USER_SWMINOR 2
+#define IPATH_USER_SWMINOR 3
#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
@@ -379,7 +390,16 @@ struct ipath_user_info {
*/
__u32 spu_rcvhdrsize;
- __u64 spu_unused; /* kept for compatible layout */
+ /*
+ * If two or more processes wish to share a port, each process
+ * must set the spu_subport_cnt and spu_subport_id to the same
+ * values. The only restriction on the spu_subport_id is that
+ * it be unique for a given node.
+ */
+ __u16 spu_subport_cnt;
+ __u16 spu_subport_id;
+
+ __u32 spu_unused; /* kept for compatible layout */
/*
* address of struct base_info to write to
@@ -392,19 +412,25 @@ struct ipath_user_info {
#define IPATH_CMD_MIN 16
-#define IPATH_CMD_USER_INIT 16 /* set up userspace */
+#define __IPATH_CMD_USER_INIT 16 /* old set up userspace (for old user code) */
#define IPATH_CMD_PORT_INFO 17 /* find out what resources we got */
#define IPATH_CMD_RECV_CTRL 18 /* control receipt of packets */
#define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */
#define IPATH_CMD_TID_FREE 20 /* free expected TID entries */
#define IPATH_CMD_SET_PART_KEY 21 /* add partition key */
+#define IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes */
+#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */
+#define IPATH_CMD_USER_INIT 24 /* set up userspace */
-#define IPATH_CMD_MAX 21
+#define IPATH_CMD_MAX 24
struct ipath_port_info {
__u32 num_active; /* number of active units */
__u32 unit; /* unit (chip) assigned to caller */
- __u32 port; /* port on unit assigned to caller */
+ __u16 port; /* port on unit assigned to caller */
+ __u16 subport; /* subport on unit assigned to caller */
+ __u16 num_ports; /* number of ports available on unit */
+ __u16 num_subports; /* number of subport slaves opened on port */
};
struct ipath_tid_info {
@@ -435,6 +461,8 @@ struct ipath_cmd {
__u32 recv_ctrl;
/* partition key to set */
__u16 part_key;
+ /* user address of __u32 bitmask of active slaves */
+ __u64 slave_mask_addr;
} cmd;
};
@@ -596,6 +624,10 @@ struct infinipath_counters {
/* K_PktFlags bits */
#define INFINIPATH_KPF_INTR 0x1
+#define INFINIPATH_KPF_SUBPORT_MASK 0x3
+#define INFINIPATH_KPF_SUBPORT_SHIFT 1
+
+#define INFINIPATH_MAX_SUBPORT 4
/* SendPIO per-buffer control */
#define INFINIPATH_SP_TEST 0x40
@@ -610,7 +642,7 @@ struct ipath_header {
/*
* Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset -
* 14 bits before ECO change ~28 Dec 03. After that, Vers 4,
- * Port 3, TID 11, offset 14.
+ * Port 4, TID 11, offset 13.
*/
__le32 ver_port_tid_offset;
__le16 chksum;
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index 049221bc590..87462e0cb4d 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -46,7 +46,7 @@
*/
void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
{
- struct ipath_cq_wc *wc = cq->queue;
+ struct ipath_cq_wc *wc;
unsigned long flags;
u32 head;
u32 next;
@@ -57,6 +57,7 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
* Note that the head pointer might be writable by user processes.
* Take care to verify it is a sane value.
*/
+ wc = cq->queue;
head = wc->head;
if (head >= (unsigned) cq->ibcq.cqe) {
head = cq->ibcq.cqe;
@@ -109,21 +110,27 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
{
struct ipath_cq *cq = to_icq(ibcq);
- struct ipath_cq_wc *wc = cq->queue;
+ struct ipath_cq_wc *wc;
unsigned long flags;
int npolled;
+ u32 tail;
spin_lock_irqsave(&cq->lock, flags);
+ wc = cq->queue;
+ tail = wc->tail;
+ if (tail > (u32) cq->ibcq.cqe)
+ tail = (u32) cq->ibcq.cqe;
for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
- if (wc->tail == wc->head)
+ if (tail == wc->head)
break;
- *entry = wc->queue[wc->tail];
- if (wc->tail >= cq->ibcq.cqe)
- wc->tail = 0;
+ *entry = wc->queue[tail];
+ if (tail >= cq->ibcq.cqe)
+ tail = 0;
else
- wc->tail++;
+ tail++;
}
+ wc->tail = tail;
spin_unlock_irqrestore(&cq->lock, flags);
@@ -177,11 +184,6 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
goto done;
}
- if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
- ret = ERR_PTR(-ENOMEM);
- goto done;
- }
-
/* Allocate the completion queue structure. */
cq = kmalloc(sizeof(*cq), GFP_KERNEL);
if (!cq) {
@@ -237,6 +239,16 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
} else
cq->ip = NULL;
+ spin_lock(&dev->n_cqs_lock);
+ if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
+ spin_unlock(&dev->n_cqs_lock);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_wc;
+ }
+
+ dev->n_cqs_allocated++;
+ spin_unlock(&dev->n_cqs_lock);
+
/*
* ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
* The number of entries should be >= the number requested or return
@@ -253,7 +265,6 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
ret = &cq->ibcq;
- dev->n_cqs_allocated++;
goto done;
bail_wc:
@@ -280,7 +291,9 @@ int ipath_destroy_cq(struct ib_cq *ibcq)
struct ipath_cq *cq = to_icq(ibcq);
tasklet_kill(&cq->comptask);
+ spin_lock(&dev->n_cqs_lock);
dev->n_cqs_allocated--;
+ spin_unlock(&dev->n_cqs_lock);
if (cq->ip)
kref_put(&cq->ip->ref, ipath_release_mmap_info);
else
@@ -316,10 +329,16 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
return 0;
}
+/**
+ * ipath_resize_cq - change the size of the CQ
+ * @ibcq: the completion queue
+ *
+ * Returns 0 for success.
+ */
int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
{
struct ipath_cq *cq = to_icq(ibcq);
- struct ipath_cq_wc *old_wc = cq->queue;
+ struct ipath_cq_wc *old_wc;
struct ipath_cq_wc *wc;
u32 head, tail, n;
int ret;
@@ -355,6 +374,7 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
* Make sure head and tail are sane since they
* might be user writable.
*/
+ old_wc = cq->queue;
head = old_wc->head;
if (head > (u32) cq->ibcq.cqe)
head = (u32) cq->ibcq.cqe;
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 2108466c7e3..12cefa658f3 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -95,16 +95,6 @@ const char *ipath_ibcstatus_str[] = {
"RecovIdle",
};
-/*
- * These variables are initialized in the chip-specific files
- * but are defined here.
- */
-u16 ipath_gpio_sda_num, ipath_gpio_scl_num;
-u64 ipath_gpio_sda, ipath_gpio_scl;
-u64 infinipath_i_bitsextant;
-ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
-u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
-
static void __devexit ipath_remove_one(struct pci_dev *);
static int __devinit ipath_init_one(struct pci_dev *,
const struct pci_device_id *);
@@ -527,28 +517,146 @@ bail:
return ret;
}
+static void __devexit cleanup_device(struct ipath_devdata *dd)
+{
+ int port;
+
+ ipath_shutdown_device(dd);
+
+ if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
+ /* can't do anything more with chip; needs re-init */
+ *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
+ if (dd->ipath_kregbase) {
+ /*
+ * if we haven't already cleaned up before these are
+ * to ensure any register reads/writes "fail" until
+ * re-init
+ */
+ dd->ipath_kregbase = NULL;
+ dd->ipath_uregbase = 0;
+ dd->ipath_sregbase = 0;
+ dd->ipath_cregbase = 0;
+ dd->ipath_kregsize = 0;
+ }
+ ipath_disable_wc(dd);
+ }
+
+ if (dd->ipath_pioavailregs_dma) {
+ dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
+ (void *) dd->ipath_pioavailregs_dma,
+ dd->ipath_pioavailregs_phys);
+ dd->ipath_pioavailregs_dma = NULL;
+ }
+ if (dd->ipath_dummy_hdrq) {
+ dma_free_coherent(&dd->pcidev->dev,
+ dd->ipath_pd[0]->port_rcvhdrq_size,
+ dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
+ dd->ipath_dummy_hdrq = NULL;
+ }
+
+ if (dd->ipath_pageshadow) {
+ struct page **tmpp = dd->ipath_pageshadow;
+ dma_addr_t *tmpd = dd->ipath_physshadow;
+ int i, cnt = 0;
+
+ ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
+ "locked\n");
+ for (port = 0; port < dd->ipath_cfgports; port++) {
+ int port_tidbase = port * dd->ipath_rcvtidcnt;
+ int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
+ for (i = port_tidbase; i < maxtid; i++) {
+ if (!tmpp[i])
+ continue;
+ pci_unmap_page(dd->pcidev, tmpd[i],
+ PAGE_SIZE, PCI_DMA_FROMDEVICE);
+ ipath_release_user_pages(&tmpp[i], 1);
+ tmpp[i] = NULL;
+ cnt++;
+ }
+ }
+ if (cnt) {
+ ipath_stats.sps_pageunlocks += cnt;
+ ipath_cdbg(VERBOSE, "There were still %u expTID "
+ "entries locked\n", cnt);
+ }
+ if (ipath_stats.sps_pagelocks ||
+ ipath_stats.sps_pageunlocks)
+ ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
+ "unlocked via ipath_m{un}lock\n",
+ (unsigned long long)
+ ipath_stats.sps_pagelocks,
+ (unsigned long long)
+ ipath_stats.sps_pageunlocks);
+
+ ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
+ dd->ipath_pageshadow);
+ vfree(dd->ipath_pageshadow);
+ dd->ipath_pageshadow = NULL;
+ }
+
+ /*
+ * free any resources still in use (usually just kernel ports)
+ * at unload; we do for portcnt, not cfgports, because cfgports
+ * could have changed while we were loaded.
+ */
+ for (port = 0; port < dd->ipath_portcnt; port++) {
+ struct ipath_portdata *pd = dd->ipath_pd[port];
+ dd->ipath_pd[port] = NULL;
+ ipath_free_pddata(dd, pd);
+ }
+ kfree(dd->ipath_pd);
+ /*
+ * debuggability, in case some cleanup path tries to use it
+ * after this
+ */
+ dd->ipath_pd = NULL;
+}
+
static void __devexit ipath_remove_one(struct pci_dev *pdev)
{
- struct ipath_devdata *dd;
+ struct ipath_devdata *dd = pci_get_drvdata(pdev);
- ipath_cdbg(VERBOSE, "removing, pdev=%p\n", pdev);
- if (!pdev)
- return;
+ ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
+
+ if (dd->verbs_dev)
+ ipath_unregister_ib_device(dd->verbs_dev);
- dd = pci_get_drvdata(pdev);
- ipath_unregister_ib_device(dd->verbs_dev);
ipath_diag_remove(dd);
ipath_user_remove(dd);
ipathfs_remove_device(dd);
ipath_device_remove_group(&pdev->dev, dd);
+
ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "
"unit %u\n", dd, (u32) dd->ipath_unit);
- if (dd->ipath_kregbase) {
- ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n",
- dd->ipath_kregbase);
- iounmap((volatile void __iomem *) dd->ipath_kregbase);
- dd->ipath_kregbase = NULL;
- }
+
+ cleanup_device(dd);
+
+ /*
+ * turn off rcv, send, and interrupts for all ports, all drivers
+ * should also hard reset the chip here?
+ * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
+ * for all versions of the driver, if they were allocated
+ */
+ if (pdev->irq) {
+ ipath_cdbg(VERBOSE,
+ "unit %u free_irq of irq %x\n",
+ dd->ipath_unit, pdev->irq);
+ free_irq(pdev->irq, dd);
+ } else
+ ipath_dbg("irq is 0, not doing free_irq "
+ "for unit %u\n", dd->ipath_unit);
+ /*
+ * we check for NULL here, because it's outside
+ * the kregbase check, and we need to call it
+ * after the free_irq. Thus it's possible that
+ * the function pointers were never initialized.
+ */
+ if (dd->ipath_f_cleanup)
+ /* clean up chip-specific stuff */
+ dd->ipath_f_cleanup(dd);
+
+ ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase);
+ iounmap((volatile void __iomem *) dd->ipath_kregbase);
pci_release_regions(pdev);
ipath_cdbg(VERBOSE, "calling pci_disable_device\n");
pci_disable_device(pdev);
@@ -760,8 +868,8 @@ static void get_rhf_errstring(u32 err, char *msg, size_t len)
static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum,
int err)
{
- return dd->ipath_port0_skbs ?
- (void *)dd->ipath_port0_skbs[bufnum]->data : NULL;
+ return dd->ipath_port0_skbinfo ?
+ (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL;
}
/**
@@ -783,31 +891,34 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,
*/
/*
- * We need 4 extra bytes for unaligned transfer copying
+ * We need 2 extra bytes for ipath_ether data sent in the
+ * key header. In order to keep everything dword aligned,
+ * we'll reserve 4 bytes.
*/
+ len = dd->ipath_ibmaxlen + 4;
+
if (dd->ipath_flags & IPATH_4BYTE_TID) {
- /* we need a 4KB multiple alignment, and there is no way
+ /* We need a 2KB multiple alignment, and there is no way
* to do it except to allocate extra and then skb_reserve
* enough to bring it up to the right alignment.
*/
- len = dd->ipath_ibmaxlen + 4 + (1 << 11) - 1;
+ len += 2047;
}
- else
- len = dd->ipath_ibmaxlen + 4;
+
skb = __dev_alloc_skb(len, gfp_mask);
if (!skb) {
ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n",
len);
goto bail;
}
+
+ skb_reserve(skb, 4);
+
if (dd->ipath_flags & IPATH_4BYTE_TID) {
- u32 una = ((1 << 11) - 1) & (unsigned long)(skb->data + 4);
+ u32 una = (unsigned long)skb->data & 2047;
if (una)
- skb_reserve(skb, 4 + (1 << 11) - una);
- else
- skb_reserve(skb, 4);
- } else
- skb_reserve(skb, 4);
+ skb_reserve(skb, 2048 - una);
+ }
bail:
return skb;
@@ -1326,6 +1437,9 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd,
"for port %u rcvhdrqtailaddr failed\n",
pd->port_port);
ret = -ENOMEM;
+ dma_free_coherent(&dd->pcidev->dev, amt,
+ pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
+ pd->port_rcvhdrq = NULL;
goto bail;
}
pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
@@ -1347,12 +1461,13 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd,
ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
"hdrtailaddr@%p %llx physical\n",
pd->port_port, pd->port_rcvhdrq,
- pd->port_rcvhdrq_phys, pd->port_rcvhdrtail_kvaddr,
- (unsigned long long)pd->port_rcvhdrqtailaddr_phys);
+ (unsigned long long) pd->port_rcvhdrq_phys,
+ pd->port_rcvhdrtail_kvaddr, (unsigned long long)
+ pd->port_rcvhdrqtailaddr_phys);
/* clear for security and sanity on each use */
memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
- memset((void *)pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
+ memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
/*
* tell chip each time we init it, even if we are re-using previous
@@ -1805,7 +1920,7 @@ void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
pd->port_rcvhdrq = NULL;
if (pd->port_rcvhdrtail_kvaddr) {
dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
- (void *)pd->port_rcvhdrtail_kvaddr,
+ pd->port_rcvhdrtail_kvaddr,
pd->port_rcvhdrqtailaddr_phys);
pd->port_rcvhdrtail_kvaddr = NULL;
}
@@ -1824,24 +1939,32 @@ void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
dma_free_coherent(&dd->pcidev->dev, size,
base, pd->port_rcvegrbuf_phys[e]);
}
- vfree(pd->port_rcvegrbuf);
+ kfree(pd->port_rcvegrbuf);
pd->port_rcvegrbuf = NULL;
- vfree(pd->port_rcvegrbuf_phys);
+ kfree(pd->port_rcvegrbuf_phys);
pd->port_rcvegrbuf_phys = NULL;
pd->port_rcvegrbuf_chunks = 0;
- } else if (pd->port_port == 0 && dd->ipath_port0_skbs) {
+ } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) {
unsigned e;
- struct sk_buff **skbs = dd->ipath_port0_skbs;
+ struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo;
- dd->ipath_port0_skbs = NULL;
- ipath_cdbg(VERBOSE, "free closed port %d ipath_port0_skbs "
- "@ %p\n", pd->port_port, skbs);
+ dd->ipath_port0_skbinfo = NULL;
+ ipath_cdbg(VERBOSE, "free closed port %d "
+ "ipath_port0_skbinfo @ %p\n", pd->port_port,
+ skbinfo);
for (e = 0; e < dd->ipath_rcvegrcnt; e++)
- if (skbs[e])
- dev_kfree_skb(skbs[e]);
- vfree(skbs);
+ if (skbinfo[e].skb) {
+ pci_unmap_single(dd->pcidev, skbinfo[e].phys,
+ dd->ipath_ibmaxlen,
+ PCI_DMA_FROMDEVICE);
+ dev_kfree_skb(skbinfo[e].skb);
+ }
+ vfree(skbinfo);
}
kfree(pd->port_tid_pg_list);
+ vfree(pd->subport_uregbase);
+ vfree(pd->subport_rcvegrbuf);
+ vfree(pd->subport_rcvhdr_base);
kfree(pd);
}
@@ -1907,150 +2030,12 @@ bail:
return ret;
}
-static void cleanup_device(struct ipath_devdata *dd)
-{
- int port;
-
- ipath_shutdown_device(dd);
-
- if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
- /* can't do anything more with chip; needs re-init */
- *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
- if (dd->ipath_kregbase) {
- /*
- * if we haven't already cleaned up before these are
- * to ensure any register reads/writes "fail" until
- * re-init
- */
- dd->ipath_kregbase = NULL;
- dd->ipath_uregbase = 0;
- dd->ipath_sregbase = 0;
- dd->ipath_cregbase = 0;
- dd->ipath_kregsize = 0;
- }
- ipath_disable_wc(dd);
- }
-
- if (dd->ipath_pioavailregs_dma) {
- dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
- (void *) dd->ipath_pioavailregs_dma,
- dd->ipath_pioavailregs_phys);
- dd->ipath_pioavailregs_dma = NULL;
- }
- if (dd->ipath_dummy_hdrq) {
- dma_free_coherent(&dd->pcidev->dev,
- dd->ipath_pd[0]->port_rcvhdrq_size,
- dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
- dd->ipath_dummy_hdrq = NULL;
- }
-
- if (dd->ipath_pageshadow) {
- struct page **tmpp = dd->ipath_pageshadow;
- int i, cnt = 0;
-
- ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
- "locked\n");
- for (port = 0; port < dd->ipath_cfgports; port++) {
- int port_tidbase = port * dd->ipath_rcvtidcnt;
- int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
- for (i = port_tidbase; i < maxtid; i++) {
- if (!tmpp[i])
- continue;
- ipath_release_user_pages(&tmpp[i], 1);
- tmpp[i] = NULL;
- cnt++;
- }
- }
- if (cnt) {
- ipath_stats.sps_pageunlocks += cnt;
- ipath_cdbg(VERBOSE, "There were still %u expTID "
- "entries locked\n", cnt);
- }
- if (ipath_stats.sps_pagelocks ||
- ipath_stats.sps_pageunlocks)
- ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
- "unlocked via ipath_m{un}lock\n",
- (unsigned long long)
- ipath_stats.sps_pagelocks,
- (unsigned long long)
- ipath_stats.sps_pageunlocks);
-
- ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
- dd->ipath_pageshadow);
- vfree(dd->ipath_pageshadow);
- dd->ipath_pageshadow = NULL;
- }
-
- /*
- * free any resources still in use (usually just kernel ports)
- * at unload; we do for portcnt, not cfgports, because cfgports
- * could have changed while we were loaded.
- */
- for (port = 0; port < dd->ipath_portcnt; port++) {
- struct ipath_portdata *pd = dd->ipath_pd[port];
- dd->ipath_pd[port] = NULL;
- ipath_free_pddata(dd, pd);
- }
- kfree(dd->ipath_pd);
- /*
- * debuggability, in case some cleanup path tries to use it
- * after this
- */
- dd->ipath_pd = NULL;
-}
-
static void __exit infinipath_cleanup(void)
{
- struct ipath_devdata *dd, *tmp;
- unsigned long flags;
-
- ipath_diagpkt_remove();
-
ipath_exit_ipathfs();
ipath_driver_remove_group(&ipath_driver.driver);
- spin_lock_irqsave(&ipath_devs_lock, flags);
-
- /*
- * turn off rcv, send, and interrupts for all ports, all drivers
- * should also hard reset the chip here?
- * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
- * for all versions of the driver, if they were allocated
- */
- list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
- if (dd->ipath_kregbase)
- cleanup_device(dd);
-
- if (dd->pcidev) {
- if (dd->pcidev->irq) {
- ipath_cdbg(VERBOSE,
- "unit %u free_irq of irq %x\n",
- dd->ipath_unit, dd->pcidev->irq);
- free_irq(dd->pcidev->irq, dd);
- } else
- ipath_dbg("irq is 0, not doing free_irq "
- "for unit %u\n", dd->ipath_unit);
-
- /*
- * we check for NULL here, because it's outside
- * the kregbase check, and we need to call it
- * after the free_irq. Thus it's possible that
- * the function pointers were never initialized.
- */
- if (dd->ipath_f_cleanup)
- /* clean up chip-specific stuff */
- dd->ipath_f_cleanup(dd);
-
- dd->pcidev = NULL;
- }
- spin_lock_irqsave(&ipath_devs_lock, flags);
- }
-
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
ipath_cdbg(VERBOSE, "Unregistering pci driver\n");
pci_unregister_driver(&ipath_driver);
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
index 3313356ab93..a4019a6b756 100644
--- a/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -100,9 +100,9 @@ static int i2c_gpio_set(struct ipath_devdata *dd,
gpioval = &dd->ipath_gpio_out;
read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
if (line == i2c_line_scl)
- mask = ipath_gpio_scl;
+ mask = dd->ipath_gpio_scl;
else
- mask = ipath_gpio_sda;
+ mask = dd->ipath_gpio_sda;
if (new_line_state == i2c_line_high)
/* tri-state the output rather than force high */
@@ -119,12 +119,12 @@ static int i2c_gpio_set(struct ipath_devdata *dd,
write_val = 0x0UL;
if (line == i2c_line_scl) {
- write_val <<= ipath_gpio_scl_num;
- *gpioval = *gpioval & ~(1UL << ipath_gpio_scl_num);
+ write_val <<= dd->ipath_gpio_scl_num;
+ *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_scl_num);
*gpioval |= write_val;
} else {
- write_val <<= ipath_gpio_sda_num;
- *gpioval = *gpioval & ~(1UL << ipath_gpio_sda_num);
+ write_val <<= dd->ipath_gpio_sda_num;
+ *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_sda_num);
*gpioval |= write_val;
}
ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
@@ -157,9 +157,9 @@ static int i2c_gpio_get(struct ipath_devdata *dd,
read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
/* config line to be an input */
if (line == i2c_line_scl)
- mask = ipath_gpio_scl;
+ mask = dd->ipath_gpio_scl;
else
- mask = ipath_gpio_sda;
+ mask = dd->ipath_gpio_sda;
write_val = read_val & ~mask;
ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val);
read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
@@ -187,6 +187,7 @@ bail:
static void i2c_wait_for_writes(struct ipath_devdata *dd)
{
(void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ rmb();
}
static void scl_out(struct ipath_devdata *dd, u8 bit)
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 29930e22318..a9ddc6911f6 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -41,6 +41,12 @@
#include "ipath_kernel.h"
#include "ipath_common.h"
+/*
+ * mmap64 doesn't allow all 64 bits for 32-bit applications
+ * so only use the low 43 bits.
+ */
+#define MMAP64_MASK 0x7FFFFFFFFFFUL
+
static int ipath_open(struct inode *, struct file *);
static int ipath_close(struct inode *, struct file *);
static ssize_t ipath_write(struct file *, const char __user *, size_t,
@@ -57,18 +63,35 @@ static struct file_operations ipath_file_ops = {
.mmap = ipath_mmap
};
-static int ipath_get_base_info(struct ipath_portdata *pd,
+static int ipath_get_base_info(struct file *fp,
void __user *ubase, size_t ubase_size)
{
+ struct ipath_portdata *pd = port_fp(fp);
int ret = 0;
struct ipath_base_info *kinfo = NULL;
struct ipath_devdata *dd = pd->port_dd;
+ unsigned subport_cnt;
+ int shared, master;
+ size_t sz;
+
+ subport_cnt = pd->port_subport_cnt;
+ if (!subport_cnt) {
+ shared = 0;
+ master = 0;
+ subport_cnt = 1;
+ } else {
+ shared = 1;
+ master = !subport_fp(fp);
+ }
- if (ubase_size < sizeof(*kinfo)) {
+ sz = sizeof(*kinfo);
+ /* If port sharing is not requested, allow the old size structure */
+ if (!shared)
+ sz -= 3 * sizeof(u64);
+ if (ubase_size < sz) {
ipath_cdbg(PROC,
- "Base size %lu, need %lu (version mismatch?)\n",
- (unsigned long) ubase_size,
- (unsigned long) sizeof(*kinfo));
+ "Base size %zu, need %zu (version mismatch?)\n",
+ ubase_size, sz);
ret = -EINVAL;
goto bail;
}
@@ -95,7 +118,9 @@ static int ipath_get_base_info(struct ipath_portdata *pd,
kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk;
kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen /
pd->port_rcvegrbuf_chunks;
- kinfo->spi_tidcnt = dd->ipath_rcvtidcnt;
+ kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt;
+ if (master)
+ kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt;
/*
* for this use, may be ipath_cfgports summed over all chips that
* are are configured and present
@@ -118,31 +143,75 @@ static int ipath_get_base_info(struct ipath_portdata *pd,
* page_address() macro worked, but in 2.6.11, even that returns the
* full 64 bit address (upper bits all 1's). So far, using the
* physical addresses (or chip offsets, for chip mapping) works, but
- * no doubt some future kernel release will chang that, and we'll be
- * on to yet another method of dealing with this
+ * no doubt some future kernel release will change that, and we'll be
+ * on to yet another method of dealing with this.
*/
kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys;
- kinfo->spi_rcvhdr_tailaddr = (u64)pd->port_rcvhdrqtailaddr_phys;
+ kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys;
kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys;
kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys;
kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
(void *) dd->ipath_statusp -
(void *) dd->ipath_pioavailregs_dma;
- kinfo->spi_piobufbase = (u64) pd->port_piobufs;
- kinfo->__spi_uregbase =
- dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
+ if (!shared) {
+ kinfo->spi_piocnt = dd->ipath_pbufsport;
+ kinfo->spi_piobufbase = (u64) pd->port_piobufs;
+ kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
+ dd->ipath_palign * pd->port_port;
+ } else if (master) {
+ kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) +
+ (dd->ipath_pbufsport % subport_cnt);
+ /* Master's PIO buffers are after all the slave's */
+ kinfo->spi_piobufbase = (u64) pd->port_piobufs +
+ dd->ipath_palign *
+ (dd->ipath_pbufsport - kinfo->spi_piocnt);
+ kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
+ dd->ipath_palign * pd->port_port;
+ } else {
+ unsigned slave = subport_fp(fp) - 1;
+
+ kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt;
+ kinfo->spi_piobufbase = (u64) pd->port_piobufs +
+ dd->ipath_palign * kinfo->spi_piocnt * slave;
+ kinfo->__spi_uregbase = ((u64) pd->subport_uregbase +
+ PAGE_SIZE * slave) & MMAP64_MASK;
- kinfo->spi_pioindex = dd->ipath_pbufsport * (pd->port_port - 1);
- kinfo->spi_piocnt = dd->ipath_pbufsport;
+ kinfo->spi_rcvhdr_base = ((u64) pd->subport_rcvhdr_base +
+ pd->port_rcvhdrq_size * slave) & MMAP64_MASK;
+ kinfo->spi_rcvhdr_tailaddr =
+ (u64) pd->port_rcvhdrqtailaddr_phys & MMAP64_MASK;
+ kinfo->spi_rcv_egrbufs = ((u64) pd->subport_rcvegrbuf +
+ dd->ipath_rcvegrcnt * dd->ipath_rcvegrbufsize * slave) &
+ MMAP64_MASK;
+ }
+
+ kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) /
+ dd->ipath_palign;
kinfo->spi_pioalign = dd->ipath_palign;
kinfo->spi_qpair = IPATH_KD_QP;
kinfo->spi_piosize = dd->ipath_ibmaxlen;
kinfo->spi_mtu = dd->ipath_ibmaxlen; /* maxlen, not ibmtu */
kinfo->spi_port = pd->port_port;
+ kinfo->spi_subport = subport_fp(fp);
kinfo->spi_sw_version = IPATH_KERN_SWVERSION;
kinfo->spi_hw_version = dd->ipath_revision;
+ if (master) {
+ kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
+ kinfo->spi_subport_uregbase =
+ (u64) pd->subport_uregbase & MMAP64_MASK;
+ kinfo->spi_subport_rcvegrbuf =
+ (u64) pd->subport_rcvegrbuf & MMAP64_MASK;
+ kinfo->spi_subport_rcvhdr_base =
+ (u64) pd->subport_rcvhdr_base & MMAP64_MASK;
+ ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
+ kinfo->spi_port, kinfo->spi_runtime_flags,
+ (unsigned long long) kinfo->spi_subport_uregbase,
+ (unsigned long long) kinfo->spi_subport_rcvegrbuf,
+ (unsigned long long) kinfo->spi_subport_rcvhdr_base);
+ }
+
if (copy_to_user(ubase, kinfo, sizeof(*kinfo)))
ret = -EFAULT;
@@ -154,6 +223,7 @@ bail:
/**
* ipath_tid_update - update a port TID
* @pd: the port
+ * @fp: the ipath device file
* @ti: the TID information
*
* The new implementation as of Oct 2004 is that the driver assigns
@@ -176,11 +246,11 @@ bail:
* virtually contiguous pages, that should change to improve
* performance.
*/
-static int ipath_tid_update(struct ipath_portdata *pd,
+static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
const struct ipath_tid_info *ti)
{
int ret = 0, ntids;
- u32 tid, porttid, cnt, i, tidcnt;
+ u32 tid, porttid, cnt, i, tidcnt, tidoff;
u16 *tidlist;
struct ipath_devdata *dd = pd->port_dd;
u64 physaddr;
@@ -188,6 +258,7 @@ static int ipath_tid_update(struct ipath_portdata *pd,
u64 __iomem *tidbase;
unsigned long tidmap[8];
struct page **pagep = NULL;
+ unsigned subport = subport_fp(fp);
if (!dd->ipath_pageshadow) {
ret = -ENOMEM;
@@ -204,20 +275,34 @@ static int ipath_tid_update(struct ipath_portdata *pd,
ret = -EFAULT;
goto done;
}
- tidcnt = dd->ipath_rcvtidcnt;
- if (cnt >= tidcnt) {
+ porttid = pd->port_port * dd->ipath_rcvtidcnt;
+ if (!pd->port_subport_cnt) {
+ tidcnt = dd->ipath_rcvtidcnt;
+ tid = pd->port_tidcursor;
+ tidoff = 0;
+ } else if (!subport) {
+ tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
+ (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
+ tidoff = dd->ipath_rcvtidcnt - tidcnt;
+ porttid += tidoff;
+ tid = tidcursor_fp(fp);
+ } else {
+ tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
+ tidoff = tidcnt * (subport - 1);
+ porttid += tidoff;
+ tid = tidcursor_fp(fp);
+ }
+ if (cnt > tidcnt) {
/* make sure it all fits in port_tid_pg_list */
dev_info(&dd->pcidev->dev, "Process tried to allocate %u "
"TIDs, only trying max (%u)\n", cnt, tidcnt);
cnt = tidcnt;
}
- pagep = (struct page **)pd->port_tid_pg_list;
- tidlist = (u16 *) (&pagep[cnt]);
+ pagep = &((struct page **) pd->port_tid_pg_list)[tidoff];
+ tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff];
memset(tidmap, 0, sizeof(tidmap));
- tid = pd->port_tidcursor;
/* before decrement; chip actual # */
- porttid = pd->port_port * tidcnt;
ntids = tidcnt;
tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
dd->ipath_rcvtidbase +
@@ -274,16 +359,19 @@ static int ipath_tid_update(struct ipath_portdata *pd,
ret = -ENOMEM;
break;
}
- tidlist[i] = tid;
+ tidlist[i] = tid + tidoff;
ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, "
- "vaddr %lx\n", i, tid, vaddr);
+ "vaddr %lx\n", i, tid + tidoff, vaddr);
/* we "know" system pages and TID pages are same size */
dd->ipath_pageshadow[porttid + tid] = pagep[i];
+ dd->ipath_physshadow[porttid + tid] = ipath_map_page(
+ dd->pcidev, pagep[i], 0, PAGE_SIZE,
+ PCI_DMA_FROMDEVICE);
/*
* don't need atomic or it's overhead
*/
__set_bit(tid, tidmap);
- physaddr = page_to_phys(pagep[i]);
+ physaddr = dd->ipath_physshadow[porttid + tid];
ipath_stats.sps_pagelocks++;
ipath_cdbg(VERBOSE,
"TID %u, vaddr %lx, physaddr %llx pgp %p\n",
@@ -317,6 +405,9 @@ static int ipath_tid_update(struct ipath_portdata *pd,
tid);
dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
dd->ipath_tidinvalid);
+ pci_unmap_page(dd->pcidev,
+ dd->ipath_physshadow[porttid + tid],
+ PAGE_SIZE, PCI_DMA_FROMDEVICE);
dd->ipath_pageshadow[porttid + tid] = NULL;
ipath_stats.sps_pageunlocks++;
}
@@ -341,7 +432,10 @@ static int ipath_tid_update(struct ipath_portdata *pd,
}
if (tid == tidcnt)
tid = 0;
- pd->port_tidcursor = tid;
+ if (!pd->port_subport_cnt)
+ pd->port_tidcursor = tid;
+ else
+ tidcursor_fp(fp) = tid;
}
done:
@@ -354,6 +448,7 @@ done:
/**
* ipath_tid_free - free a port TID
* @pd: the port
+ * @subport: the subport
* @ti: the TID info
*
* right now we are unlocking one page at a time, but since
@@ -367,7 +462,7 @@ done:
* they pass in to us.
*/
-static int ipath_tid_free(struct ipath_portdata *pd,
+static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
const struct ipath_tid_info *ti)
{
int ret = 0;
@@ -388,11 +483,20 @@ static int ipath_tid_free(struct ipath_portdata *pd,
}
porttid = pd->port_port * dd->ipath_rcvtidcnt;
+ if (!pd->port_subport_cnt)
+ tidcnt = dd->ipath_rcvtidcnt;
+ else if (!subport) {
+ tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
+ (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
+ porttid += dd->ipath_rcvtidcnt - tidcnt;
+ } else {
+ tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
+ porttid += tidcnt * (subport - 1);
+ }
tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
dd->ipath_rcvtidbase +
porttid * sizeof(*tidbase));
- tidcnt = dd->ipath_rcvtidcnt;
limit = sizeof(tidmap) * BITS_PER_BYTE;
if (limit > tidcnt)
/* just in case size changes in future */
@@ -417,6 +521,9 @@ static int ipath_tid_free(struct ipath_portdata *pd,
pd->port_pid, tid);
dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
dd->ipath_tidinvalid);
+ pci_unmap_page(dd->pcidev,
+ dd->ipath_physshadow[porttid + tid],
+ PAGE_SIZE, PCI_DMA_FROMDEVICE);
ipath_release_user_pages(
&dd->ipath_pageshadow[porttid + tid], 1);
dd->ipath_pageshadow[porttid + tid] = NULL;
@@ -581,20 +688,24 @@ bail:
/**
* ipath_manage_rcvq - manage a port's receive queue
* @pd: the port
+ * @subport: the subport
* @start_stop: action to carry out
*
* start_stop == 0 disables receive on the port, for use in queue
* overflow conditions. start_stop==1 re-enables, to be used to
* re-init the software copy of the head register
*/
-static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop)
+static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport,
+ int start_stop)
{
struct ipath_devdata *dd = pd->port_dd;
u64 tval;
- ipath_cdbg(PROC, "%sabling rcv for unit %u port %u\n",
+ ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n",
start_stop ? "en" : "dis", dd->ipath_unit,
- pd->port_port);
+ pd->port_port, subport);
+ if (subport)
+ goto bail;
/* atomically clear receive enable port. */
if (start_stop) {
/*
@@ -609,7 +720,7 @@ static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop)
* updated and correct itself, even in the face of software
* bugs.
*/
- *pd->port_rcvhdrtail_kvaddr = 0;
+ *(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0;
set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
&dd->ipath_rcvctrl);
} else
@@ -630,6 +741,7 @@ static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop)
tval = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
}
/* always; new head should be equal to new tail; see above */
+bail:
return 0;
}
@@ -687,6 +799,36 @@ static void ipath_clean_part_key(struct ipath_portdata *pd,
}
}
+/*
+ * Initialize the port data with the receive buffer sizes
+ * so this can be done while the master port is locked.
+ * Otherwise, there is a race with a slave opening the port
+ * and seeing these fields uninitialized.
+ */
+static void init_user_egr_sizes(struct ipath_portdata *pd)
+{
+ struct ipath_devdata *dd = pd->port_dd;
+ unsigned egrperchunk, egrcnt, size;
+
+ /*
+ * to avoid wasting a lot of memory, we allocate 32KB chunks of
+ * physically contiguous memory, advance through it until used up
+ * and then allocate more. Of course, we need memory to store those
+ * extra pointers, now. Started out with 256KB, but under heavy
+ * memory pressure (creating large files and then copying them over
+ * NFS while doing lots of MPI jobs), we hit some allocation
+ * failures, even though we can sleep... (2.6.10) Still get
+ * failures at 64K. 32K is the lowest we can go without wasting
+ * additional memory.
+ */
+ size = 0x8000;
+ egrperchunk = size / dd->ipath_rcvegrbufsize;
+ egrcnt = dd->ipath_rcvegrcnt;
+ pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk;
+ pd->port_rcvegrbufs_perchunk = egrperchunk;
+ pd->port_rcvegrbuf_size = size;
+}
+
/**
* ipath_create_user_egr - allocate eager TID buffers
* @pd: the port to allocate TID buffers for
@@ -702,7 +844,7 @@ static void ipath_clean_part_key(struct ipath_portdata *pd,
static int ipath_create_user_egr(struct ipath_portdata *pd)
{
struct ipath_devdata *dd = pd->port_dd;
- unsigned e, egrcnt, alloced, egrperchunk, chunk, egrsize, egroff;
+ unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
size_t size;
int ret;
gfp_t gfp_flags;
@@ -722,31 +864,18 @@ static int ipath_create_user_egr(struct ipath_portdata *pd)
ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid "
"offset %x, egrsize %u\n", egrcnt, egroff, egrsize);
- /*
- * to avoid wasting a lot of memory, we allocate 32KB chunks of
- * physically contiguous memory, advance through it until used up
- * and then allocate more. Of course, we need memory to store those
- * extra pointers, now. Started out with 256KB, but under heavy
- * memory pressure (creating large files and then copying them over
- * NFS while doing lots of MPI jobs), we hit some allocation
- * failures, even though we can sleep... (2.6.10) Still get
- * failures at 64K. 32K is the lowest we can go without wasting
- * additional memory.
- */
- size = 0x8000;
- alloced = ALIGN(egrsize * egrcnt, size);
- egrperchunk = size / egrsize;
- chunk = (egrcnt + egrperchunk - 1) / egrperchunk;
- pd->port_rcvegrbuf_chunks = chunk;
- pd->port_rcvegrbufs_perchunk = egrperchunk;
- pd->port_rcvegrbuf_size = size;
- pd->port_rcvegrbuf = vmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]));
+ chunk = pd->port_rcvegrbuf_chunks;
+ egrperchunk = pd->port_rcvegrbufs_perchunk;
+ size = pd->port_rcvegrbuf_size;
+ pd->port_rcvegrbuf = kmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]),
+ GFP_KERNEL);
if (!pd->port_rcvegrbuf) {
ret = -ENOMEM;
goto bail;
}
pd->port_rcvegrbuf_phys =
- vmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]));
+ kmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]),
+ GFP_KERNEL);
if (!pd->port_rcvegrbuf_phys) {
ret = -ENOMEM;
goto bail_rcvegrbuf;
@@ -791,105 +920,23 @@ bail_rcvegrbuf_phys:
pd->port_rcvegrbuf_phys[e]);
}
- vfree(pd->port_rcvegrbuf_phys);
+ kfree(pd->port_rcvegrbuf_phys);
pd->port_rcvegrbuf_phys = NULL;
bail_rcvegrbuf:
- vfree(pd->port_rcvegrbuf);
+ kfree(pd->port_rcvegrbuf);
pd->port_rcvegrbuf = NULL;
bail:
return ret;
}
-static int ipath_do_user_init(struct ipath_portdata *pd,
- const struct ipath_user_info *uinfo)
-{
- int ret = 0;
- struct ipath_devdata *dd = pd->port_dd;
- u32 head32;
-
- /* for now, if major version is different, bail */
- if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) {
- dev_info(&dd->pcidev->dev,
- "User major version %d not same as driver "
- "major %d\n", uinfo->spu_userversion >> 16,
- IPATH_USER_SWMAJOR);
- ret = -ENODEV;
- goto done;
- }
-
- if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)
- ipath_dbg("User minor version %d not same as driver "
- "minor %d\n", uinfo->spu_userversion & 0xffff,
- IPATH_USER_SWMINOR);
-
- if (uinfo->spu_rcvhdrsize) {
- ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
- if (ret)
- goto done;
- }
-
- /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
-
- /* for right now, kernel piobufs are at end, so port 1 is at 0 */
- pd->port_piobufs = dd->ipath_piobufbase +
- dd->ipath_pbufsport * (pd->port_port -
- 1) * dd->ipath_palign;
- ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n",
- pd->port_port, pd->port_piobufs);
-
- /*
- * Now allocate the rcvhdr Q and eager TIDs; skip the TID
- * array for time being. If pd->port_port > chip-supported,
- * we need to do extra stuff here to handle by handling overflow
- * through port 0, someday
- */
- ret = ipath_create_rcvhdrq(dd, pd);
- if (!ret)
- ret = ipath_create_user_egr(pd);
- if (ret)
- goto done;
-
- /*
- * set the eager head register for this port to the current values
- * of the tail pointers, since we don't know if they were
- * updated on last use of the port.
- */
- head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
- ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
- dd->ipath_lastegrheads[pd->port_port] = -1;
- dd->ipath_lastrcvhdrqtails[pd->port_port] = -1;
- ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
- pd->port_port, head32);
- pd->port_tidcursor = 0; /* start at beginning after open */
- /*
- * now enable the port; the tail registers will be written to memory
- * by the chip as soon as it sees the write to
- * dd->ipath_kregs->kr_rcvctrl. The update only happens on
- * transition from 0 to 1, so clear it first, then set it as part of
- * enabling the port. This will (very briefly) affect any other
- * open ports, but it shouldn't be long enough to be an issue.
- * We explictly set the in-memory copy to 0 beforehand, so we don't
- * have to wait to be sure the DMA update has happened.
- */
- *pd->port_rcvhdrtail_kvaddr = 0ULL;
- set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
- &dd->ipath_rcvctrl);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
- dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
- dd->ipath_rcvctrl);
-done:
- return ret;
-}
-
/* common code for the mappings on dma_alloc_coherent mem */
static int ipath_mmap_mem(struct vm_area_struct *vma,
- struct ipath_portdata *pd, unsigned len,
- int write_ok, dma_addr_t addr, char *what)
+ struct ipath_portdata *pd, unsigned len, int write_ok,
+ void *kvaddr, char *what)
{
struct ipath_devdata *dd = pd->port_dd;
- unsigned pfn = (unsigned long)addr >> PAGE_SHIFT;
+ unsigned long pfn;
int ret;
if ((vma->vm_end - vma->vm_start) > len) {
@@ -912,17 +959,17 @@ static int ipath_mmap_mem(struct vm_area_struct *vma,
vma->vm_flags &= ~VM_MAYWRITE;
}
+ pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT;
ret = remap_pfn_range(vma, vma->vm_start, pfn,
len, vma->vm_page_prot);
if (ret)
- dev_info(&dd->pcidev->dev,
- "%s port%u mmap of %lx, %x bytes r%c failed: %d\n",
- what, pd->port_port, (unsigned long)addr, len,
- write_ok?'w':'o', ret);
+ dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x "
+ "bytes r%c failed: %d\n", what, pd->port_port,
+ pfn, len, write_ok?'w':'o', ret);
else
- ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes r%c\n",
- what, pd->port_port, (unsigned long)addr, len,
- write_ok?'w':'o');
+ ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes "
+ "r%c\n", what, pd->port_port, pfn, len,
+ write_ok?'w':'o');
bail:
return ret;
}
@@ -957,7 +1004,8 @@ static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd,
static int mmap_piobufs(struct vm_area_struct *vma,
struct ipath_devdata *dd,
- struct ipath_portdata *pd)
+ struct ipath_portdata *pd,
+ unsigned piobufs, unsigned piocnt)
{
unsigned long phys;
int ret;
@@ -968,16 +1016,15 @@ static int mmap_piobufs(struct vm_area_struct *vma,
* process data, and catches users who might try to read the i/o
* space due to a bug.
*/
- if ((vma->vm_end - vma->vm_start) >
- (dd->ipath_pbufsport * dd->ipath_palign)) {
+ if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) {
dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: "
"reqlen %lx > PAGE\n",
vma->vm_end - vma->vm_start);
- ret = -EFAULT;
+ ret = -EINVAL;
goto bail;
}
- phys = dd->ipath_physaddr + pd->port_piobufs;
+ phys = dd->ipath_physaddr + piobufs;
/*
* Don't mark this as non-cached, or we don't get the
@@ -1011,7 +1058,7 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
struct ipath_devdata *dd = pd->port_dd;
unsigned long start, size;
size_t total_size, i;
- dma_addr_t *phys;
+ unsigned long pfn;
int ret;
size = pd->port_rcvegrbuf_size;
@@ -1021,7 +1068,7 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
"reqlen %lx > actual %lx\n",
vma->vm_end - vma->vm_start,
(unsigned long) total_size);
- ret = -EFAULT;
+ ret = -EINVAL;
goto bail;
}
@@ -1035,11 +1082,11 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
vma->vm_flags &= ~VM_MAYWRITE;
start = vma->vm_start;
- phys = pd->port_rcvegrbuf_phys;
for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) {
- ret = remap_pfn_range(vma, start, phys[i] >> PAGE_SHIFT,
- size, vma->vm_page_prot);
+ pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT;
+ ret = remap_pfn_range(vma, start, pfn, size,
+ vma->vm_page_prot);
if (ret < 0)
goto bail;
}
@@ -1049,6 +1096,122 @@ bail:
return ret;
}
+/*
+ * ipath_file_vma_nopage - handle a VMA page fault.
+ */
+static struct page *ipath_file_vma_nopage(struct vm_area_struct *vma,
+ unsigned long address, int *type)
+{
+ unsigned long offset = address - vma->vm_start;
+ struct page *page = NOPAGE_SIGBUS;
+ void *pageptr;
+
+ /*
+ * Convert the vmalloc address into a struct page.
+ */
+ pageptr = (void *)(offset + (vma->vm_pgoff << PAGE_SHIFT));
+ page = vmalloc_to_page(pageptr);
+ if (!page)
+ goto out;
+
+ /* Increment the reference count. */
+ get_page(page);
+ if (type)
+ *type = VM_FAULT_MINOR;
+out:
+ return page;
+}
+
+static struct vm_operations_struct ipath_file_vm_ops = {
+ .nopage = ipath_file_vma_nopage,
+};
+
+static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
+ struct ipath_portdata *pd, unsigned subport)
+{
+ unsigned long len;
+ struct ipath_devdata *dd;
+ void *addr;
+ size_t size;
+ int ret;
+
+ /* If the port is not shared, all addresses should be physical */
+ if (!pd->port_subport_cnt) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ dd = pd->port_dd;
+ size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
+
+ /*
+ * Master has all the slave uregbase, rcvhdrq, and
+ * rcvegrbufs mmapped.
+ */
+ if (subport == 0) {
+ unsigned num_slaves = pd->port_subport_cnt - 1;
+
+ if (pgaddr == ((u64) pd->subport_uregbase & MMAP64_MASK)) {
+ addr = pd->subport_uregbase;
+ size = PAGE_SIZE * num_slaves;
+ } else if (pgaddr == ((u64) pd->subport_rcvhdr_base &
+ MMAP64_MASK)) {
+ addr = pd->subport_rcvhdr_base;
+ size = pd->port_rcvhdrq_size * num_slaves;
+ } else if (pgaddr == ((u64) pd->subport_rcvegrbuf &
+ MMAP64_MASK)) {
+ addr = pd->subport_rcvegrbuf;
+ size *= num_slaves;
+ } else {
+ ret = -EINVAL;
+ goto bail;
+ }
+ } else if (pgaddr == (((u64) pd->subport_uregbase +
+ PAGE_SIZE * (subport - 1)) & MMAP64_MASK)) {
+ addr = pd->subport_uregbase + PAGE_SIZE * (subport - 1);
+ size = PAGE_SIZE;
+ } else if (pgaddr == (((u64) pd->subport_rcvhdr_base +
+ pd->port_rcvhdrq_size * (subport - 1)) &
+ MMAP64_MASK)) {
+ addr = pd->subport_rcvhdr_base +
+ pd->port_rcvhdrq_size * (subport - 1);
+ size = pd->port_rcvhdrq_size;
+ } else if (pgaddr == (((u64) pd->subport_rcvegrbuf +
+ size * (subport - 1)) & MMAP64_MASK)) {
+ addr = pd->subport_rcvegrbuf + size * (subport - 1);
+ /* rcvegrbufs are read-only on the slave */
+ if (vma->vm_flags & VM_WRITE) {
+ dev_info(&dd->pcidev->dev,
+ "Can't map eager buffers as "
+ "writable (flags=%lx)\n", vma->vm_flags);
+ ret = -EPERM;
+ goto bail;
+ }
+ /*
+ * Don't allow permission to later change to writeable
+ * with mprotect.
+ */
+ vma->vm_flags &= ~VM_MAYWRITE;
+ } else {
+ ret = -EINVAL;
+ goto bail;
+ }
+ len = vma->vm_end - vma->vm_start;
+ if (len > size) {
+ ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size);
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
+ vma->vm_ops = &ipath_file_vm_ops;
+ vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
+ ret = 0;
+
+bail:
+ return ret;
+}
+
/**
* ipath_mmap - mmap various structures into user space
* @fp: the file pointer
@@ -1064,73 +1227,99 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
struct ipath_portdata *pd;
struct ipath_devdata *dd;
u64 pgaddr, ureg;
+ unsigned piobufs, piocnt;
int ret;
pd = port_fp(fp);
+ if (!pd) {
+ ret = -EINVAL;
+ goto bail;
+ }
dd = pd->port_dd;
/*
* This is the ipath_do_user_init() code, mapping the shared buffers
* into the user process. The address referred to by vm_pgoff is the
- * virtual, not physical, address; we only do one mmap for each
- * space mapped.
+ * file offset passed via mmap(). For shared ports, this is the
+ * kernel vmalloc() address of the pages to share with the master.
+ * For non-shared or master ports, this is a physical address.
+ * We only do one mmap for each space mapped.
*/
pgaddr = vma->vm_pgoff << PAGE_SHIFT;
/*
- * Must fit in 40 bits for our hardware; some checked elsewhere,
- * but we'll be paranoid. Check for 0 is mostly in case one of the
- * allocations failed, but user called mmap anyway. We want to catch
- * that before it can match.
+ * Check for 0 in case one of the allocations failed, but user
+ * called mmap anyway.
*/
- if (!pgaddr || pgaddr >= (1ULL<<40)) {
- ipath_dev_err(dd, "Bad phys addr %llx, start %lx, end %lx\n",
- (unsigned long long)pgaddr, vma->vm_start, vma->vm_end);
- return -EINVAL;
+ if (!pgaddr) {
+ ret = -EINVAL;
+ goto bail;
}
- /* just the offset of the port user registers, not physical addr */
- ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
-
- ipath_cdbg(MM, "ushare: pgaddr %llx vm_start=%lx, vmlen %lx\n",
+ ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n",
(unsigned long long) pgaddr, vma->vm_start,
- vma->vm_end - vma->vm_start);
+ vma->vm_end - vma->vm_start, dd->ipath_unit,
+ pd->port_port, subport_fp(fp));
- if (vma->vm_start & (PAGE_SIZE-1)) {
- ipath_dev_err(dd,
- "vm_start not aligned: %lx, end=%lx phys %lx\n",
- vma->vm_start, vma->vm_end, (unsigned long)pgaddr);
- ret = -EINVAL;
+ /*
+ * Physical addresses must fit in 40 bits for our hardware.
+ * Check for kernel virtual addresses first, anything else must
+ * match a HW or memory address.
+ */
+ if (pgaddr >= (1ULL<<40)) {
+ ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
+ goto bail;
}
- else if (pgaddr == ureg)
+
+ if (!pd->port_subport_cnt) {
+ /* port is not shared */
+ ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
+ piocnt = dd->ipath_pbufsport;
+ piobufs = pd->port_piobufs;
+ } else if (!subport_fp(fp)) {
+ /* caller is the master */
+ ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
+ piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) +
+ (dd->ipath_pbufsport % pd->port_subport_cnt);
+ piobufs = pd->port_piobufs +
+ dd->ipath_palign * (dd->ipath_pbufsport - piocnt);
+ } else {
+ unsigned slave = subport_fp(fp) - 1;
+
+ /* caller is a slave */
+ ureg = 0;
+ piocnt = dd->ipath_pbufsport / pd->port_subport_cnt;
+ piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
+ }
+
+ if (pgaddr == ureg)
ret = mmap_ureg(vma, dd, ureg);
- else if (pgaddr == pd->port_piobufs)
- ret = mmap_piobufs(vma, dd, pd);
- else if (pgaddr == (u64) pd->port_rcvegr_phys)
+ else if (pgaddr == piobufs)
+ ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt);
+ else if (pgaddr == dd->ipath_pioavailregs_phys)
+ /* in-memory copy of pioavail registers */
+ ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
+ (void *) dd->ipath_pioavailregs_dma,
+ "pioavail registers");
+ else if (subport_fp(fp))
+ /* Subports don't mmap the physical receive buffers */
+ ret = -EINVAL;
+ else if (pgaddr == pd->port_rcvegr_phys)
ret = mmap_rcvegrbufs(vma, pd);
- else if (pgaddr == (u64) pd->port_rcvhdrq_phys) {
+ else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
/*
* The rcvhdrq itself; readonly except on HT (so have
* to allow writable mapping), multiple pages, contiguous
* from an i/o perspective.
*/
- unsigned total_size =
- ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize
- * sizeof(u32), PAGE_SIZE);
- ret = ipath_mmap_mem(vma, pd, total_size, 1,
- pd->port_rcvhdrq_phys,
+ ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1,
+ pd->port_rcvhdrq,
"rcvhdrq");
- }
- else if (pgaddr == (u64)pd->port_rcvhdrqtailaddr_phys)
+ else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys)
/* in-memory copy of rcvhdrq tail register */
ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
- pd->port_rcvhdrqtailaddr_phys,
+ pd->port_rcvhdrtail_kvaddr,
"rcvhdrq tail");
- else if (pgaddr == dd->ipath_pioavailregs_phys)
- /* in-memory copy of pioavail registers */
- ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
- dd->ipath_pioavailregs_phys,
- "pioavail registers");
else
ret = -EINVAL;
@@ -1138,9 +1327,10 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
if (ret < 0)
dev_info(&dd->pcidev->dev,
- "Failure %d on addr %lx, off %lx\n",
- -ret, vma->vm_start, vma->vm_pgoff);
-
+ "Failure %d on off %llx len %lx\n",
+ -ret, (unsigned long long)pgaddr,
+ vma->vm_end - vma->vm_start);
+bail:
return ret;
}
@@ -1154,6 +1344,8 @@ static unsigned int ipath_poll(struct file *fp,
struct ipath_devdata *dd;
pd = port_fp(fp);
+ if (!pd)
+ goto bail;
dd = pd->port_dd;
bit = pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT;
@@ -1176,7 +1368,7 @@ static unsigned int ipath_poll(struct file *fp,
if (tail == head) {
set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
- if(dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
+ if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
(void)ipath_write_ureg(dd, ur_rcvhdrhead,
dd->ipath_rhdrhead_intr_off
| head, pd->port_port);
@@ -1200,18 +1392,80 @@ static unsigned int ipath_poll(struct file *fp,
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl);
+bail:
return pollflag;
}
+static int init_subports(struct ipath_devdata *dd,
+ struct ipath_portdata *pd,
+ const struct ipath_user_info *uinfo)
+{
+ int ret = 0;
+ unsigned num_slaves;
+ size_t size;
+
+ /* Old user binaries don't know about subports */
+ if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)
+ goto bail;
+ /*
+ * If the user is requesting zero or one port,
+ * skip the subport allocation.
+ */
+ if (uinfo->spu_subport_cnt <= 1)
+ goto bail;
+ if (uinfo->spu_subport_cnt > 4) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ num_slaves = uinfo->spu_subport_cnt - 1;
+ pd->subport_uregbase = vmalloc(PAGE_SIZE * num_slaves);
+ if (!pd->subport_uregbase) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+ /* Note: pd->port_rcvhdrq_size isn't initialized yet. */
+ size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
+ sizeof(u32), PAGE_SIZE) * num_slaves;
+ pd->subport_rcvhdr_base = vmalloc(size);
+ if (!pd->subport_rcvhdr_base) {
+ ret = -ENOMEM;
+ goto bail_ureg;
+ }
+
+ pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks *
+ pd->port_rcvegrbuf_size *
+ num_slaves);
+ if (!pd->subport_rcvegrbuf) {
+ ret = -ENOMEM;
+ goto bail_rhdr;
+ }
+
+ pd->port_subport_cnt = uinfo->spu_subport_cnt;
+ pd->port_subport_id = uinfo->spu_subport_id;
+ pd->active_slaves = 1;
+ goto bail;
+
+bail_rhdr:
+ vfree(pd->subport_rcvhdr_base);
+bail_ureg:
+ vfree(pd->subport_uregbase);
+ pd->subport_uregbase = NULL;
+bail:
+ return ret;
+}
+
static int try_alloc_port(struct ipath_devdata *dd, int port,
- struct file *fp)
+ struct file *fp,
+ const struct ipath_user_info *uinfo)
{
+ struct ipath_portdata *pd;
int ret;
- if (!dd->ipath_pd[port]) {
- void *p, *ptmp;
+ if (!(pd = dd->ipath_pd[port])) {
+ void *ptmp;
- p = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
+ pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
/*
* Allocate memory for use in ipath_tid_update() just once
@@ -1221,34 +1475,36 @@ static int try_alloc_port(struct ipath_devdata *dd, int port,
ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) +
dd->ipath_rcvtidcnt * sizeof(struct page **),
GFP_KERNEL);
- if (!p || !ptmp) {
+ if (!pd || !ptmp) {
ipath_dev_err(dd, "Unable to allocate portdata "
"memory, failing open\n");
ret = -ENOMEM;
- kfree(p);
+ kfree(pd);
kfree(ptmp);
goto bail;
}
- dd->ipath_pd[port] = p;
+ dd->ipath_pd[port] = pd;
dd->ipath_pd[port]->port_port = port;
dd->ipath_pd[port]->port_dd = dd;
dd->ipath_pd[port]->port_tid_pg_list = ptmp;
init_waitqueue_head(&dd->ipath_pd[port]->port_wait);
}
- if (!dd->ipath_pd[port]->port_cnt) {
- dd->ipath_pd[port]->port_cnt = 1;
- fp->private_data = (void *) dd->ipath_pd[port];
+ if (!pd->port_cnt) {
+ pd->userversion = uinfo->spu_userversion;
+ init_user_egr_sizes(pd);
+ if ((ret = init_subports(dd, pd, uinfo)) != 0)
+ goto bail;
ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n",
current->comm, current->pid, dd->ipath_unit,
port);
- dd->ipath_pd[port]->port_pid = current->pid;
- strncpy(dd->ipath_pd[port]->port_comm, current->comm,
- sizeof(dd->ipath_pd[port]->port_comm));
+ pd->port_cnt = 1;
+ port_fp(fp) = pd;
+ pd->port_pid = current->pid;
+ strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
ipath_stats.sps_ports++;
ret = 0;
- goto bail;
- }
- ret = -EBUSY;
+ } else
+ ret = -EBUSY;
bail:
return ret;
@@ -1264,7 +1520,8 @@ static inline int usable(struct ipath_devdata *dd)
| IPATH_LINKUNK));
}
-static int find_free_port(int unit, struct file *fp)
+static int find_free_port(int unit, struct file *fp,
+ const struct ipath_user_info *uinfo)
{
struct ipath_devdata *dd = ipath_lookup(unit);
int ret, i;
@@ -1279,8 +1536,8 @@ static int find_free_port(int unit, struct file *fp)
goto bail;
}
- for (i = 0; i < dd->ipath_cfgports; i++) {
- ret = try_alloc_port(dd, i, fp);
+ for (i = 1; i < dd->ipath_cfgports; i++) {
+ ret = try_alloc_port(dd, i, fp, uinfo);
if (ret != -EBUSY)
goto bail;
}
@@ -1290,13 +1547,14 @@ bail:
return ret;
}
-static int find_best_unit(struct file *fp)
+static int find_best_unit(struct file *fp,
+ const struct ipath_user_info *uinfo)
{
int ret = 0, i, prefunit = -1, devmax;
int maxofallports, npresent, nup;
int ndev;
- (void) ipath_count_units(&npresent, &nup, &maxofallports);
+ devmax = ipath_count_units(&npresent, &nup, &maxofallports);
/*
* This code is present to allow a knowledgeable person to
@@ -1343,8 +1601,6 @@ static int find_best_unit(struct file *fp)
if (prefunit != -1)
devmax = prefunit + 1;
- else
- devmax = ipath_count_units(NULL, NULL, NULL);
recheck:
for (i = 1; i < maxofallports; i++) {
for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax;
@@ -1359,7 +1615,7 @@ recheck:
* next.
*/
continue;
- ret = try_alloc_port(dd, i, fp);
+ ret = try_alloc_port(dd, i, fp, uinfo);
if (!ret)
goto done;
}
@@ -1395,22 +1651,183 @@ done:
return ret;
}
+static int find_shared_port(struct file *fp,
+ const struct ipath_user_info *uinfo)
+{
+ int devmax, ndev, i;
+ int ret = 0;
+
+ devmax = ipath_count_units(NULL, NULL, NULL);
+
+ for (ndev = 0; ndev < devmax; ndev++) {
+ struct ipath_devdata *dd = ipath_lookup(ndev);
+
+ if (!dd)
+ continue;
+ for (i = 1; i < dd->ipath_cfgports; i++) {
+ struct ipath_portdata *pd = dd->ipath_pd[i];
+
+ /* Skip ports which are not yet open */
+ if (!pd || !pd->port_cnt)
+ continue;
+ /* Skip port if it doesn't match the requested one */
+ if (pd->port_subport_id != uinfo->spu_subport_id)
+ continue;
+ /* Verify the sharing process matches the master */
+ if (pd->port_subport_cnt != uinfo->spu_subport_cnt ||
+ pd->userversion != uinfo->spu_userversion ||
+ pd->port_cnt >= pd->port_subport_cnt) {
+ ret = -EINVAL;
+ goto done;
+ }
+ port_fp(fp) = pd;
+ subport_fp(fp) = pd->port_cnt++;
+ tidcursor_fp(fp) = 0;
+ pd->active_slaves |= 1 << subport_fp(fp);
+ ipath_cdbg(PROC,
+ "%s[%u] %u sharing %s[%u] unit:port %u:%u\n",
+ current->comm, current->pid,
+ subport_fp(fp),
+ pd->port_comm, pd->port_pid,
+ dd->ipath_unit, pd->port_port);
+ ret = 1;
+ goto done;
+ }
+ }
+
+done:
+ return ret;
+}
+
static int ipath_open(struct inode *in, struct file *fp)
{
- int ret, user_minor;
+ /* The real work is performed later in ipath_assign_port() */
+ fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL);
+ return fp->private_data ? 0 : -ENOMEM;
+}
+
+
+/* Get port early, so can set affinity prior to memory allocation */
+static int ipath_assign_port(struct file *fp,
+ const struct ipath_user_info *uinfo)
+{
+ int ret;
+ int i_minor;
+ unsigned swminor;
+
+ /* Check to be sure we haven't already initialized this file */
+ if (port_fp(fp)) {
+ ret = -EINVAL;
+ goto done;
+ }
+
+ /* for now, if major version is different, bail */
+ if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) {
+ ipath_dbg("User major version %d not same as driver "
+ "major %d\n", uinfo->spu_userversion >> 16,
+ IPATH_USER_SWMAJOR);
+ ret = -ENODEV;
+ goto done;
+ }
+
+ swminor = uinfo->spu_userversion & 0xffff;
+ if (swminor != IPATH_USER_SWMINOR)
+ ipath_dbg("User minor version %d not same as driver "
+ "minor %d\n", swminor, IPATH_USER_SWMINOR);
mutex_lock(&ipath_mutex);
- user_minor = iminor(in) - IPATH_USER_MINOR_BASE;
+ if (swminor == IPATH_USER_SWMINOR && uinfo->spu_subport_cnt &&
+ (ret = find_shared_port(fp, uinfo))) {
+ mutex_unlock(&ipath_mutex);
+ if (ret > 0)
+ ret = 0;
+ goto done;
+ }
+
+ i_minor = iminor(fp->f_dentry->d_inode) - IPATH_USER_MINOR_BASE;
ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n",
- (long)in->i_rdev, user_minor);
+ (long)fp->f_dentry->d_inode->i_rdev, i_minor);
- if (user_minor)
- ret = find_free_port(user_minor - 1, fp);
+ if (i_minor)
+ ret = find_free_port(i_minor - 1, fp, uinfo);
else
- ret = find_best_unit(fp);
+ ret = find_best_unit(fp, uinfo);
mutex_unlock(&ipath_mutex);
+
+done:
+ return ret;
+}
+
+
+static int ipath_do_user_init(struct file *fp,
+ const struct ipath_user_info *uinfo)
+{
+ int ret;
+ struct ipath_portdata *pd;
+ struct ipath_devdata *dd;
+ u32 head32;
+
+ pd = port_fp(fp);
+ dd = pd->port_dd;
+
+ if (uinfo->spu_rcvhdrsize) {
+ ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
+ if (ret)
+ goto done;
+ }
+
+ /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
+
+ /* for right now, kernel piobufs are at end, so port 1 is at 0 */
+ pd->port_piobufs = dd->ipath_piobufbase +
+ dd->ipath_pbufsport * (pd->port_port - 1) * dd->ipath_palign;
+ ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n",
+ pd->port_port, pd->port_piobufs);
+
+ /*
+ * Now allocate the rcvhdr Q and eager TIDs; skip the TID
+ * array for time being. If pd->port_port > chip-supported,
+ * we need to do extra stuff here to handle by handling overflow
+ * through port 0, someday
+ */
+ ret = ipath_create_rcvhdrq(dd, pd);
+ if (!ret)
+ ret = ipath_create_user_egr(pd);
+ if (ret)
+ goto done;
+
+ /*
+ * set the eager head register for this port to the current values
+ * of the tail pointers, since we don't know if they were
+ * updated on last use of the port.
+ */
+ head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
+ ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
+ dd->ipath_lastegrheads[pd->port_port] = -1;
+ dd->ipath_lastrcvhdrqtails[pd->port_port] = -1;
+ ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
+ pd->port_port, head32);
+ pd->port_tidcursor = 0; /* start at beginning after open */
+ /*
+ * now enable the port; the tail registers will be written to memory
+ * by the chip as soon as it sees the write to
+ * dd->ipath_kregs->kr_rcvctrl. The update only happens on
+ * transition from 0 to 1, so clear it first, then set it as part of
+ * enabling the port. This will (very briefly) affect any other
+ * open ports, but it shouldn't be long enough to be an issue.
+ * We explictly set the in-memory copy to 0 beforehand, so we don't
+ * have to wait to be sure the DMA update has happened.
+ */
+ *(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0ULL;
+ set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
+ &dd->ipath_rcvctrl);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl);
+done:
return ret;
}
@@ -1433,6 +1850,8 @@ static void unlock_expected_tids(struct ipath_portdata *pd)
if (!dd->ipath_pageshadow[i])
continue;
+ pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i],
+ PAGE_SIZE, PCI_DMA_FROMDEVICE);
ipath_release_user_pages_on_close(&dd->ipath_pageshadow[i],
1);
dd->ipath_pageshadow[i] = NULL;
@@ -1453,6 +1872,7 @@ static void unlock_expected_tids(struct ipath_portdata *pd)
static int ipath_close(struct inode *in, struct file *fp)
{
int ret = 0;
+ struct ipath_filedata *fd;
struct ipath_portdata *pd;
struct ipath_devdata *dd;
unsigned port;
@@ -1462,9 +1882,24 @@ static int ipath_close(struct inode *in, struct file *fp)
mutex_lock(&ipath_mutex);
- pd = port_fp(fp);
- port = pd->port_port;
+ fd = (struct ipath_filedata *) fp->private_data;
fp->private_data = NULL;
+ pd = fd->pd;
+ if (!pd) {
+ mutex_unlock(&ipath_mutex);
+ goto bail;
+ }
+ if (--pd->port_cnt) {
+ /*
+ * XXX If the master closes the port before the slave(s),
+ * revoke the mmap for the eager receive queue so
+ * the slave(s) don't wait for receive data forever.
+ */
+ pd->active_slaves &= ~(1 << fd->subport);
+ mutex_unlock(&ipath_mutex);
+ goto bail;
+ }
+ port = pd->port_port;
dd = pd->port_dd;
if (pd->port_hdrqfull) {
@@ -1503,8 +1938,6 @@ static int ipath_close(struct inode *in, struct file *fp)
/* clean up the pkeys for this port user */
ipath_clean_part_key(pd, dd);
-
-
/*
* be paranoid, and never write 0's to these, just use an
* unused part of the port 0 tail page. Of course,
@@ -1523,39 +1956,49 @@ static int ipath_close(struct inode *in, struct file *fp)
i = dd->ipath_pbufsport * (port - 1);
ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport);
+ dd->ipath_f_clear_tids(dd, pd->port_port);
+
if (dd->ipath_pageshadow)
unlock_expected_tids(pd);
ipath_stats.sps_ports--;
ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
pd->port_comm, pd->port_pid,
dd->ipath_unit, port);
-
- dd->ipath_f_clear_tids(dd, pd->port_port);
}
- pd->port_cnt = 0;
pd->port_pid = 0;
-
dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */
mutex_unlock(&ipath_mutex);
ipath_free_pddata(dd, pd); /* after releasing the mutex */
+bail:
+ kfree(fd);
return ret;
}
-static int ipath_port_info(struct ipath_portdata *pd,
+static int ipath_port_info(struct ipath_portdata *pd, u16 subport,
struct ipath_port_info __user *uinfo)
{
struct ipath_port_info info;
int nup;
int ret;
+ size_t sz;
(void) ipath_count_units(NULL, &nup, NULL);
info.num_active = nup;
info.unit = pd->port_dd->ipath_unit;
info.port = pd->port_port;
+ info.subport = subport;
+ /* Don't return new fields if old library opened the port. */
+ if ((pd->userversion & 0xffff) == IPATH_USER_SWMINOR) {
+ /* Number of user ports available for this device. */
+ info.num_ports = pd->port_dd->ipath_cfgports - 1;
+ info.num_subports = pd->port_subport_cnt;
+ sz = sizeof(info);
+ } else
+ sz = sizeof(info) - 2 * sizeof(u16);
- if (copy_to_user(uinfo, &info, sizeof(info))) {
+ if (copy_to_user(uinfo, &info, sz)) {
ret = -EFAULT;
goto bail;
}
@@ -1565,6 +2008,16 @@ bail:
return ret;
}
+static int ipath_get_slave_info(struct ipath_portdata *pd,
+ void __user *slave_mask_addr)
+{
+ int ret = 0;
+
+ if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32)))
+ ret = -EFAULT;
+ return ret;
+}
+
static ssize_t ipath_write(struct file *fp, const char __user *data,
size_t count, loff_t *off)
{
@@ -1591,6 +2044,8 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
consumed = sizeof(cmd.type);
switch (cmd.type) {
+ case IPATH_CMD_ASSIGN_PORT:
+ case __IPATH_CMD_USER_INIT:
case IPATH_CMD_USER_INIT:
copy = sizeof(cmd.cmd.user_info);
dest = &cmd.cmd.user_info;
@@ -1617,6 +2072,11 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
dest = &cmd.cmd.part_key;
src = &ucmd->cmd.part_key;
break;
+ case IPATH_CMD_SLAVE_INFO:
+ copy = sizeof(cmd.cmd.slave_mask_addr);
+ dest = &cmd.cmd.slave_mask_addr;
+ src = &ucmd->cmd.slave_mask_addr;
+ break;
default:
ret = -EINVAL;
goto bail;
@@ -1634,34 +2094,55 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
consumed += copy;
pd = port_fp(fp);
+ if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
+ cmd.type != IPATH_CMD_ASSIGN_PORT) {
+ ret = -EINVAL;
+ goto bail;
+ }
switch (cmd.type) {
+ case IPATH_CMD_ASSIGN_PORT:
+ ret = ipath_assign_port(fp, &cmd.cmd.user_info);
+ if (ret)
+ goto bail;
+ break;
+ case __IPATH_CMD_USER_INIT:
+ /* backwards compatibility, get port first */
+ ret = ipath_assign_port(fp, &cmd.cmd.user_info);
+ if (ret)
+ goto bail;
+ /* and fall through to current version. */
case IPATH_CMD_USER_INIT:
- ret = ipath_do_user_init(pd, &cmd.cmd.user_info);
- if (ret < 0)
+ ret = ipath_do_user_init(fp, &cmd.cmd.user_info);
+ if (ret)
goto bail;
ret = ipath_get_base_info(
- pd, (void __user *) (unsigned long)
+ fp, (void __user *) (unsigned long)
cmd.cmd.user_info.spu_base_info,
cmd.cmd.user_info.spu_base_info_size);
break;
case IPATH_CMD_RECV_CTRL:
- ret = ipath_manage_rcvq(pd, cmd.cmd.recv_ctrl);
+ ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl);
break;
case IPATH_CMD_PORT_INFO:
- ret = ipath_port_info(pd,
+ ret = ipath_port_info(pd, subport_fp(fp),
(struct ipath_port_info __user *)
(unsigned long) cmd.cmd.port_info);
break;
case IPATH_CMD_TID_UPDATE:
- ret = ipath_tid_update(pd, &cmd.cmd.tid_info);
+ ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info);
break;
case IPATH_CMD_TID_FREE:
- ret = ipath_tid_free(pd, &cmd.cmd.tid_info);
+ ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info);
break;
case IPATH_CMD_SET_PART_KEY:
ret = ipath_set_part_key(pd, cmd.cmd.part_key);
break;
+ case IPATH_CMD_SLAVE_INFO:
+ ret = ipath_get_slave_info(pd,
+ (void __user *) (unsigned long)
+ cmd.cmd.slave_mask_addr);
+ break;
}
if (ret >= 0)
@@ -1858,4 +2339,3 @@ void ipath_user_remove(struct ipath_devdata *dd)
bail:
return;
}
-
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index c8a8af0fe47..a507d0b5be6 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -356,19 +356,16 @@ static ssize_t flash_write(struct file *file, const char __user *buf,
pos = *ppos;
- if ( pos < 0) {
+ if (pos != 0) {
ret = -EINVAL;
goto bail;
}
- if (pos >= sizeof(struct ipath_flash)) {
- ret = 0;
+ if (count != sizeof(struct ipath_flash)) {
+ ret = -EINVAL;
goto bail;
}
- if (count > sizeof(struct ipath_flash) - pos)
- count = sizeof(struct ipath_flash) - pos;
-
tmp = kmalloc(count, GFP_KERNEL);
if (!tmp) {
ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 5c9b509e40e..9e4e8d4c6e2 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -252,8 +252,8 @@ static const struct ipath_cregs ipath_ht_cregs = {
};
/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_MASK 0x1FF
-#define INFINIPATH_I_RCVAVAIL_MASK 0x1FF
+#define INFINIPATH_I_RCVURG_MASK ((1U<<9)-1)
+#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<9)-1)
/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
#define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0
@@ -338,7 +338,7 @@ static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs,
if (crcbits) {
u16 ctrl0, ctrl1;
snprintf(bitsmsg, sizeof bitsmsg,
- "[HT%s lane %s CRC (%llx); ignore till reload]",
+ "[HT%s lane %s CRC (%llx); powercycle to completely clear]",
!(crcbits & _IPATH_HTLINK1_CRCBITS) ?
"0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS)
? "1 (B)" : "0+1 (A+B)"),
@@ -389,17 +389,28 @@ static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs,
_IPATH_HTLINK1_CRCBITS)));
}
+/* 6110 specific hardware errors... */
+static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
+ INFINIPATH_HWE_MSG(HTCBUSIREQPARITYERR, "HTC Ireq Parity"),
+ INFINIPATH_HWE_MSG(HTCBUSTREQPARITYERR, "HTC Treq Parity"),
+ INFINIPATH_HWE_MSG(HTCBUSTRESPPARITYERR, "HTC Tresp Parity"),
+ INFINIPATH_HWE_MSG(HTCMISCERR5, "HT core Misc5"),
+ INFINIPATH_HWE_MSG(HTCMISCERR6, "HT core Misc6"),
+ INFINIPATH_HWE_MSG(HTCMISCERR7, "HT core Misc7"),
+ INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
+ INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
+};
+
/**
- * ipath_ht_handle_hwerrors - display hardware errors
+ * ipath_ht_handle_hwerrors - display hardware errors.
* @dd: the infinipath device
* @msg: the output buffer
* @msgl: the size of the output buffer
*
- * Use same msg buffer as regular errors to avoid
- * excessive stack use. Most hardware errors are catastrophic, but for
- * right now, we'll print them and continue.
- * We reuse the same message buffer as ipath_handle_errors() to avoid
- * excessive stack usage.
+ * Use same msg buffer as regular errors to avoid excessive stack
+ * use. Most hardware errors are catastrophic, but for right now,
+ * we'll print them and continue. We reuse the same message buffer as
+ * ipath_handle_errors() to avoid excessive stack usage.
*/
static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
size_t msgl)
@@ -440,19 +451,49 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
* make sure we get this much out, unless told to be quiet,
* or it's occurred within the last 5 seconds
*/
- if ((hwerrs & ~dd->ipath_lasthwerror) ||
+ if ((hwerrs & ~(dd->ipath_lasthwerror |
+ ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
(ipath_debug & __IPATH_VERBDBG))
dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
"(cleared)\n", (unsigned long long) hwerrs);
dd->ipath_lasthwerror |= hwerrs;
- if (hwerrs & ~infinipath_hwe_bitsextant)
+ if (hwerrs & ~dd->ipath_hwe_bitsextant)
ipath_dev_err(dd, "hwerror interrupt with unknown errors "
"%llx set\n", (unsigned long long)
- (hwerrs & ~infinipath_hwe_bitsextant));
+ (hwerrs & ~dd->ipath_hwe_bitsextant));
ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
if (ctrl & INFINIPATH_C_FREEZEMODE) {
+ /*
+ * parity errors in send memory are recoverable,
+ * just cancel the send (if indicated in * sendbuffererror),
+ * count the occurrence, unfreeze (if no other handled
+ * hardware error bits are set), and continue. They can
+ * occur if a processor speculative read is done to the PIO
+ * buffer while we are sending a packet, for example.
+ */
+ if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
+ ipath_stats.sps_txeparity++;
+ ipath_dbg("Recovering from TXE parity error (%llu), "
+ "hwerrstatus=%llx\n",
+ (unsigned long long) ipath_stats.sps_txeparity,
+ (unsigned long long) hwerrs);
+ ipath_disarm_senderrbufs(dd);
+ hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
+ if (!hwerrs) { /* else leave in freeze mode */
+ ipath_write_kreg(dd,
+ dd->ipath_kregs->kr_control,
+ dd->ipath_control);
+ return;
+ }
+ }
if (hwerrs) {
/*
* if any set that we aren't ignoring; only
@@ -499,44 +540,16 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
bits);
strlcat(msg, bitsmsg, msgl);
}
- if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK
- << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) {
- bits = (u32) ((hwerrs >>
- INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) &
- INFINIPATH_HWE_RXEMEMPARITYERR_MASK);
- snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ",
- bits);
- strlcat(msg, bitsmsg, msgl);
- }
- if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
- bits = (u32) ((hwerrs >>
- INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) &
- INFINIPATH_HWE_TXEMEMPARITYERR_MASK);
- snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ",
- bits);
- strlcat(msg, bitsmsg, msgl);
- }
- if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR)
- strlcat(msg, "[IB2IPATH Parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR)
- strlcat(msg, "[IPATH2IB Parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_HTCBUSIREQPARITYERR)
- strlcat(msg, "[HTC Ireq Parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_HTCBUSTREQPARITYERR)
- strlcat(msg, "[HTC Treq Parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_HTCBUSTRESPPARITYERR)
- strlcat(msg, "[HTC Tresp Parity]", msgl);
+
+ ipath_format_hwerrors(hwerrs,
+ ipath_6110_hwerror_msgs,
+ sizeof(ipath_6110_hwerror_msgs) /
+ sizeof(ipath_6110_hwerror_msgs[0]),
+ msg, msgl);
if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS))
hwerr_crcbits(dd, hwerrs, msg, msgl);
- if (hwerrs & INFINIPATH_HWE_HTCMISCERR5)
- strlcat(msg, "[HT core Misc5]", msgl);
- if (hwerrs & INFINIPATH_HWE_HTCMISCERR6)
- strlcat(msg, "[HT core Misc6]", msgl);
- if (hwerrs & INFINIPATH_HWE_HTCMISCERR7)
- strlcat(msg, "[HT core Misc7]", msgl);
if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
msgl);
@@ -573,11 +586,6 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
dd->ipath_hwerrmask);
}
- if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR)
- strlcat(msg, "[Rx Dsync]", msgl);
- if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED)
- strlcat(msg, "[SerDes PLL]", msgl);
-
ipath_dev_err(dd, "%s hardware error\n", msg);
if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
/*
@@ -1080,21 +1088,21 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
}
-static void ipath_init_ht_variables(void)
+static void ipath_init_ht_variables(struct ipath_devdata *dd)
{
- ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
- ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
- ipath_gpio_sda = IPATH_GPIO_SDA;
- ipath_gpio_scl = IPATH_GPIO_SCL;
+ dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
+ dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
+ dd->ipath_gpio_sda = IPATH_GPIO_SDA;
+ dd->ipath_gpio_scl = IPATH_GPIO_SCL;
- infinipath_i_bitsextant =
+ dd->ipath_i_bitsextant =
(INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
(INFINIPATH_I_RCVAVAIL_MASK <<
INFINIPATH_I_RCVAVAIL_SHIFT) |
INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
- infinipath_e_bitsextant =
+ dd->ipath_e_bitsextant =
INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
@@ -1112,7 +1120,7 @@ static void ipath_init_ht_variables(void)
INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
INFINIPATH_E_HARDWARE;
- infinipath_hwe_bitsextant =
+ dd->ipath_hwe_bitsextant =
(INFINIPATH_HWE_HTCMEMPARITYERR_MASK <<
INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) |
(INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
@@ -1141,8 +1149,8 @@ static void ipath_init_ht_variables(void)
INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
- infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
- infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+ dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
+ dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
}
/**
@@ -1607,5 +1615,5 @@ void ipath_init_iba6110_funcs(struct ipath_devdata *dd)
* do very early init that is needed before ipath_f_bus is
* called
*/
- ipath_init_ht_variables();
+ ipath_init_ht_variables(dd);
}
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index d86516d23df..a72ab9de386 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -263,8 +263,8 @@ static const struct ipath_cregs ipath_pe_cregs = {
};
/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_MASK 0x1F
-#define INFINIPATH_I_RCVAVAIL_MASK 0x1F
+#define INFINIPATH_I_RCVURG_MASK ((1U<<5)-1)
+#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<5)-1)
/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
#define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK 0x000000000000003fULL
@@ -294,6 +294,33 @@ static const struct ipath_cregs ipath_pe_cregs = {
#define IPATH_GPIO_SCL (1ULL << \
(_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
+/*
+ * Rev2 silicon allows suppressing check for ArmLaunch errors.
+ * this can speed up short packet sends on systems that do
+ * not guaranteee write-order.
+ */
+#define INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR (1ULL<<63)
+
+/* 6120 specific hardware errors... */
+static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
+ INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"),
+ INFINIPATH_HWE_MSG(PCIECPLTIMEOUT, "PCIe completion timeout"),
+ /*
+ * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
+ * parity or memory parity error failures, because most likely we
+ * won't be able to talk to the core of the chip. Nonetheless, we
+ * might see them, if they are in parts of the PCIe core that aren't
+ * essential.
+ */
+ INFINIPATH_HWE_MSG(PCIE1PLLFAILED, "PCIePLL1"),
+ INFINIPATH_HWE_MSG(PCIE0PLLFAILED, "PCIePLL0"),
+ INFINIPATH_HWE_MSG(PCIEBUSPARITYXTLH, "PCIe XTLH core parity"),
+ INFINIPATH_HWE_MSG(PCIEBUSPARITYXADM, "PCIe ADM TX core parity"),
+ INFINIPATH_HWE_MSG(PCIEBUSPARITYRADM, "PCIe ADM RX core parity"),
+ INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
+ INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
+};
+
/**
* ipath_pe_handle_hwerrors - display hardware errors.
* @dd: the infinipath device
@@ -343,19 +370,49 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
* make sure we get this much out, unless told to be quiet,
* or it's occurred within the last 5 seconds
*/
- if ((hwerrs & ~dd->ipath_lasthwerror) ||
+ if ((hwerrs & ~(dd->ipath_lasthwerror |
+ ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
(ipath_debug & __IPATH_VERBDBG))
dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
"(cleared)\n", (unsigned long long) hwerrs);
dd->ipath_lasthwerror |= hwerrs;
- if (hwerrs & ~infinipath_hwe_bitsextant)
+ if (hwerrs & ~dd->ipath_hwe_bitsextant)
ipath_dev_err(dd, "hwerror interrupt with unknown errors "
"%llx set\n", (unsigned long long)
- (hwerrs & ~infinipath_hwe_bitsextant));
+ (hwerrs & ~dd->ipath_hwe_bitsextant));
ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
if (ctrl & INFINIPATH_C_FREEZEMODE) {
+ /*
+ * parity errors in send memory are recoverable,
+ * just cancel the send (if indicated in * sendbuffererror),
+ * count the occurrence, unfreeze (if no other handled
+ * hardware error bits are set), and continue. They can
+ * occur if a processor speculative read is done to the PIO
+ * buffer while we are sending a packet, for example.
+ */
+ if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
+ ipath_stats.sps_txeparity++;
+ ipath_dbg("Recovering from TXE parity error (%llu), "
+ "hwerrstatus=%llx\n",
+ (unsigned long long) ipath_stats.sps_txeparity,
+ (unsigned long long) hwerrs);
+ ipath_disarm_senderrbufs(dd);
+ hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
+ if (!hwerrs) { /* else leave in freeze mode */
+ ipath_write_kreg(dd,
+ dd->ipath_kregs->kr_control,
+ dd->ipath_control);
+ return;
+ }
+ }
if (hwerrs) {
/*
* if any set that we aren't ignoring only make the
@@ -379,9 +436,8 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
} else {
ipath_dbg("Clearing freezemode on ignored hardware "
"error\n");
- ctrl &= ~INFINIPATH_C_FREEZEMODE;
ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
- ctrl);
+ dd->ipath_control);
}
}
@@ -396,24 +452,13 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
dd->ipath_hwerrmask);
}
- if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK
- << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) {
- bits = (u32) ((hwerrs >>
- INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) &
- INFINIPATH_HWE_RXEMEMPARITYERR_MASK);
- snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ",
- bits);
- strlcat(msg, bitsmsg, msgl);
- }
- if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
- bits = (u32) ((hwerrs >>
- INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) &
- INFINIPATH_HWE_TXEMEMPARITYERR_MASK);
- snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ",
- bits);
- strlcat(msg, bitsmsg, msgl);
- }
+
+ ipath_format_hwerrors(hwerrs,
+ ipath_6120_hwerror_msgs,
+ sizeof(ipath_6120_hwerror_msgs)/
+ sizeof(ipath_6120_hwerror_msgs[0]),
+ msg, msgl);
+
if (hwerrs & (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK
<< INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT)) {
bits = (u32) ((hwerrs >>
@@ -423,10 +468,6 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
"[PCIe Mem Parity Errs %x] ", bits);
strlcat(msg, bitsmsg, msgl);
}
- if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR)
- strlcat(msg, "[IB2IPATH Parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR)
- strlcat(msg, "[IPATH2IB Parity]", msgl);
#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \
INFINIPATH_HWE_COREPLL_RFSLIP )
@@ -452,34 +493,6 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
dd->ipath_hwerrmask);
}
- if (hwerrs & INFINIPATH_HWE_PCIEPOISONEDTLP)
- strlcat(msg, "[PCIe Poisoned TLP]", msgl);
- if (hwerrs & INFINIPATH_HWE_PCIECPLTIMEOUT)
- strlcat(msg, "[PCIe completion timeout]", msgl);
-
- /*
- * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
- * parity or memory parity error failures, because most likely we
- * won't be able to talk to the core of the chip. Nonetheless, we
- * might see them, if they are in parts of the PCIe core that aren't
- * essential.
- */
- if (hwerrs & INFINIPATH_HWE_PCIE1PLLFAILED)
- strlcat(msg, "[PCIePLL1]", msgl);
- if (hwerrs & INFINIPATH_HWE_PCIE0PLLFAILED)
- strlcat(msg, "[PCIePLL0]", msgl);
- if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXTLH)
- strlcat(msg, "[PCIe XTLH core parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXADM)
- strlcat(msg, "[PCIe ADM TX core parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYRADM)
- strlcat(msg, "[PCIe ADM RX core parity]", msgl);
-
- if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR)
- strlcat(msg, "[Rx Dsync]", msgl);
- if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED)
- strlcat(msg, "[SerDes PLL]", msgl);
-
ipath_dev_err(dd, "%s hardware error\n", msg);
if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) {
/*
@@ -525,6 +538,9 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name,
case 5:
n = "InfiniPath_QMH7140";
break;
+ case 6:
+ n = "InfiniPath_QLE7142";
+ break;
default:
ipath_dev_err(dd,
"Don't yet know about board with ID %u\n",
@@ -571,9 +587,12 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
if (!dd->ipath_boardrev) // no PLL for Emulator
val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
- /* workaround bug 9460 in internal interface bus parity checking */
- val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM;
-
+ if (dd->ipath_minrev < 2) {
+ /* workaround bug 9460 in internal interface bus parity
+ * checking. Fixed (HW bug 9490) in Rev2.
+ */
+ val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM;
+ }
dd->ipath_hwerrmask = val;
}
@@ -583,8 +602,8 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
*/
static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
{
- u64 val, tmp, config1;
- int ret = 0, change = 0;
+ u64 val, tmp, config1, prev_val;
+ int ret = 0;
ipath_dbg("Trying to bringup serdes\n");
@@ -641,6 +660,7 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+ prev_val = val;
if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) &
INFINIPATH_XGXS_MDIOADDR_MASK) != 3) {
val &=
@@ -648,11 +668,9 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
INFINIPATH_XGXS_MDIOADDR_SHIFT);
/* MDIO address 3 */
val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT;
- change = 1;
}
if (val & INFINIPATH_XGXS_RESET) {
val &= ~INFINIPATH_XGXS_RESET;
- change = 1;
}
if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
@@ -661,9 +679,19 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
INFINIPATH_XGXS_RX_POL_SHIFT);
val |= dd->ipath_rx_pol_inv <<
INFINIPATH_XGXS_RX_POL_SHIFT;
- change = 1;
}
- if (change)
+ if (dd->ipath_minrev >= 2) {
+ /* Rev 2. can tolerate multiple writes to PBC, and
+ * allowing them can provide lower latency on some
+ * CPUs, but this feature is off by default, only
+ * turned on by setting D63 of XGXSconfig reg.
+ * May want to make this conditional more
+ * fine-grained in future. This is not exactly
+ * related to XGXS, but where the bit ended up.
+ */
+ val |= INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR;
+ }
+ if (val != prev_val)
ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
@@ -717,9 +745,25 @@ static void ipath_pe_quiet_serdes(struct ipath_devdata *dd)
ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
}
-/* this is not yet needed on this chip, so just return 0. */
static int ipath_pe_intconfig(struct ipath_devdata *dd)
{
+ u64 val;
+ u32 chiprev;
+
+ /*
+ * If the chip supports added error indication via GPIO pins,
+ * enable interrupts on those bits so the interrupt routine
+ * can count the events. Also set flag so interrupt routine
+ * can know they are expected.
+ */
+ chiprev = dd->ipath_revision >> INFINIPATH_R_CHIPREVMINOR_SHIFT;
+ if ((chiprev & INFINIPATH_R_CHIPREVMINOR_MASK) > 1) {
+ /* Rev2+ reports extra errors via internal GPIO pins */
+ dd->ipath_flags |= IPATH_GPIO_ERRINTRS;
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
+ val |= IPATH_GPIO_ERRINTR_MASK;
+ ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val);
+ }
return 0;
}
@@ -853,21 +897,23 @@ static int ipath_setup_pe_config(struct ipath_devdata *dd,
return 0;
}
-static void ipath_init_pe_variables(void)
+static void ipath_init_pe_variables(struct ipath_devdata *dd)
{
/*
* bits for selecting i2c direction and values,
* used for I2C serial flash
*/
- ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
- ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
- ipath_gpio_sda = IPATH_GPIO_SDA;
- ipath_gpio_scl = IPATH_GPIO_SCL;
+ dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
+ dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
+ dd->ipath_gpio_sda = IPATH_GPIO_SDA;
+ dd->ipath_gpio_scl = IPATH_GPIO_SCL;
/* variables for sanity checking interrupt and errors */
- infinipath_hwe_bitsextant =
+ dd->ipath_hwe_bitsextant =
(INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
+ (INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
(INFINIPATH_HWE_PCIEMEMPARITYERR_MASK <<
INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) |
INFINIPATH_HWE_PCIE1PLLFAILED |
@@ -883,13 +929,13 @@ static void ipath_init_pe_variables(void)
INFINIPATH_HWE_SERDESPLLFAILED |
INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
- infinipath_i_bitsextant =
+ dd->ipath_i_bitsextant =
(INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
(INFINIPATH_I_RCVAVAIL_MASK <<
INFINIPATH_I_RCVAVAIL_SHIFT) |
INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
- infinipath_e_bitsextant =
+ dd->ipath_e_bitsextant =
INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
@@ -907,8 +953,8 @@ static void ipath_init_pe_variables(void)
INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
INFINIPATH_E_HARDWARE;
- infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
- infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+ dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
+ dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
}
/* setup the MSI stuff again after a reset. I'd like to just call
@@ -1082,6 +1128,45 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
mmiowb();
spin_unlock_irqrestore(&dd->ipath_tid_lock, flags);
}
+/**
+ * ipath_pe_put_tid_2 - write a TID in chip, Revision 2 or higher
+ * @dd: the infinipath device
+ * @tidptr: pointer to the expected TID (in chip) to udpate
+ * @tidtype: 0 for eager, 1 for expected
+ * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
+ *
+ * This exists as a separate routine to allow for selection of the
+ * appropriate "flavor". The static calls in cleanup just use the
+ * revision-agnostic form, as they are not performance critical.
+ */
+static void ipath_pe_put_tid_2(struct ipath_devdata *dd, u64 __iomem *tidptr,
+ u32 type, unsigned long pa)
+{
+ u32 __iomem *tidp32 = (u32 __iomem *)tidptr;
+
+ if (pa != dd->ipath_tidinvalid) {
+ if (pa & ((1U << 11) - 1)) {
+ dev_info(&dd->pcidev->dev, "BUG: physaddr %lx "
+ "not 2KB aligned!\n", pa);
+ return;
+ }
+ pa >>= 11;
+ /* paranoia check */
+ if (pa & (7<<29))
+ ipath_dev_err(dd,
+ "BUG: Physical page address 0x%lx "
+ "has bits set in 31-29\n", pa);
+
+ if (type == 0)
+ pa |= dd->ipath_tidtemplate;
+ else /* for now, always full 4KB page */
+ pa |= 2 << 29;
+ }
+ if (dd->ipath_kregbase)
+ writel(pa, tidp32);
+ mmiowb();
+}
+
/**
* ipath_pe_clear_tid - clear all TID entries for a port, expected and eager
@@ -1203,7 +1288,7 @@ int __attribute__((weak)) ipath_unordered_wc(void)
/**
* ipath_init_pe_get_base_info - set chip-specific flags for user code
- * @dd: the infinipath device
+ * @pd: the infinipath port
* @kbase: ipath_base_info pointer
*
* We set the PCIE flag because the lower bandwidth on PCIe vs
@@ -1212,6 +1297,7 @@ int __attribute__((weak)) ipath_unordered_wc(void)
static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
{
struct ipath_base_info *kinfo = kbase;
+ struct ipath_devdata *dd;
if (ipath_unordered_wc()) {
kinfo->spi_runtime_flags |= IPATH_RUNTIME_FORCE_WC_ORDER;
@@ -1220,8 +1306,20 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
else
ipath_cdbg(PROC, "Not Intel processor, WC ordered\n");
- kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
+ if (pd == NULL)
+ goto done;
+ dd = pd->port_dd;
+
+ if (dd != NULL && dd->ipath_minrev >= 2) {
+ ipath_cdbg(PROC, "IBA6120 Rev2, allow multiple PBC write\n");
+ kinfo->spi_runtime_flags |= IPATH_RUNTIME_PBC_REWRITE;
+ ipath_cdbg(PROC, "IBA6120 Rev2, allow loose DMA alignment\n");
+ kinfo->spi_runtime_flags |= IPATH_RUNTIME_LOOSE_DMA_ALIGN;
+ }
+
+done:
+ kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
return 0;
}
@@ -1244,7 +1342,10 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *dd)
dd->ipath_f_quiet_serdes = ipath_pe_quiet_serdes;
dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes;
dd->ipath_f_clear_tids = ipath_pe_clear_tids;
- dd->ipath_f_put_tid = ipath_pe_put_tid;
+ if (dd->ipath_minrev >= 2)
+ dd->ipath_f_put_tid = ipath_pe_put_tid_2;
+ else
+ dd->ipath_f_put_tid = ipath_pe_put_tid;
dd->ipath_f_cleanup = ipath_setup_pe_cleanup;
dd->ipath_f_setextled = ipath_setup_pe_setextled;
dd->ipath_f_get_base_info = ipath_pe_get_base_info;
@@ -1259,6 +1360,6 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *dd)
dd->ipath_kregs = &ipath_pe_kregs;
dd->ipath_cregs = &ipath_pe_cregs;
- ipath_init_pe_variables();
+ ipath_init_pe_variables(dd);
}
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 44669dc2e22..d819cca524c 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -88,13 +88,13 @@ MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
static int create_port0_egr(struct ipath_devdata *dd)
{
unsigned e, egrcnt;
- struct sk_buff **skbs;
+ struct ipath_skbinfo *skbinfo;
int ret;
egrcnt = dd->ipath_rcvegrcnt;
- skbs = vmalloc(sizeof(*dd->ipath_port0_skbs) * egrcnt);
- if (skbs == NULL) {
+ skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt);
+ if (skbinfo == NULL) {
ipath_dev_err(dd, "allocation error for eager TID "
"skb array\n");
ret = -ENOMEM;
@@ -109,13 +109,13 @@ static int create_port0_egr(struct ipath_devdata *dd)
* 4 bytes so that the data buffer stays word aligned.
* See ipath_kreceive() for more details.
*/
- skbs[e] = ipath_alloc_skb(dd, GFP_KERNEL);
- if (!skbs[e]) {
+ skbinfo[e].skb = ipath_alloc_skb(dd, GFP_KERNEL);
+ if (!skbinfo[e].skb) {
ipath_dev_err(dd, "SKB allocation error for "
"eager TID %u\n", e);
while (e != 0)
- dev_kfree_skb(skbs[--e]);
- vfree(skbs);
+ dev_kfree_skb(skbinfo[--e].skb);
+ vfree(skbinfo);
ret = -ENOMEM;
goto bail;
}
@@ -124,14 +124,17 @@ static int create_port0_egr(struct ipath_devdata *dd)
* After loop above, so we can test non-NULL to see if ready
* to use at receive, etc.
*/
- dd->ipath_port0_skbs = skbs;
+ dd->ipath_port0_skbinfo = skbinfo;
for (e = 0; e < egrcnt; e++) {
- unsigned long phys =
- virt_to_phys(dd->ipath_port0_skbs[e]->data);
+ dd->ipath_port0_skbinfo[e].phys =
+ ipath_map_single(dd->pcidev,
+ dd->ipath_port0_skbinfo[e].skb->data,
+ dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE);
dd->ipath_f_put_tid(dd, e + (u64 __iomem *)
((char __iomem *) dd->ipath_kregbase +
- dd->ipath_rcvegrbase), 0, phys);
+ dd->ipath_rcvegrbase), 0,
+ dd->ipath_port0_skbinfo[e].phys);
}
ret = 0;
@@ -432,16 +435,33 @@ done:
*/
static void init_shadow_tids(struct ipath_devdata *dd)
{
- dd->ipath_pageshadow = (struct page **)
- vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+ struct page **pages;
+ dma_addr_t *addrs;
+
+ pages = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
sizeof(struct page *));
- if (!dd->ipath_pageshadow)
+ if (!pages) {
ipath_dev_err(dd, "failed to allocate shadow page * "
"array, no expected sends!\n");
- else
- memset(dd->ipath_pageshadow, 0,
- dd->ipath_cfgports * dd->ipath_rcvtidcnt *
- sizeof(struct page *));
+ dd->ipath_pageshadow = NULL;
+ return;
+ }
+
+ addrs = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+ sizeof(dma_addr_t));
+ if (!addrs) {
+ ipath_dev_err(dd, "failed to allocate shadow dma handle "
+ "array, no expected sends!\n");
+ vfree(dd->ipath_pageshadow);
+ dd->ipath_pageshadow = NULL;
+ return;
+ }
+
+ memset(pages, 0, dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+ sizeof(struct page *));
+
+ dd->ipath_pageshadow = pages;
+ dd->ipath_physshadow = addrs;
}
static void enable_chip(struct ipath_devdata *dd,
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 49bf7bb15b0..6bee53ce5f3 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -37,6 +37,50 @@
#include "ipath_verbs.h"
#include "ipath_common.h"
+/*
+ * Called when we might have an error that is specific to a particular
+ * PIO buffer, and may need to cancel that buffer, so it can be re-used.
+ */
+void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
+{
+ u32 piobcnt;
+ unsigned long sbuf[4];
+ /*
+ * it's possible that sendbuffererror could have bits set; might
+ * have already done this as a result of hardware error handling
+ */
+ piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
+ /* read these before writing errorclear */
+ sbuf[0] = ipath_read_kreg64(
+ dd, dd->ipath_kregs->kr_sendbuffererror);
+ sbuf[1] = ipath_read_kreg64(
+ dd, dd->ipath_kregs->kr_sendbuffererror + 1);
+ if (piobcnt > 128) {
+ sbuf[2] = ipath_read_kreg64(
+ dd, dd->ipath_kregs->kr_sendbuffererror + 2);
+ sbuf[3] = ipath_read_kreg64(
+ dd, dd->ipath_kregs->kr_sendbuffererror + 3);
+ }
+
+ if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
+ int i;
+ if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG)) {
+ __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
+ "SendbufErrs %lx %lx", sbuf[0],
+ sbuf[1]);
+ if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
+ printk(" %lx %lx ", sbuf[2], sbuf[3]);
+ printk("\n");
+ }
+
+ for (i = 0; i < piobcnt; i++)
+ if (test_bit(i, sbuf))
+ ipath_disarm_piobufs(dd, i, 1);
+ dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */
+ }
+}
+
+
/* These are all rcv-related errors which we want to count for stats */
#define E_SUM_PKTERRS \
(INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
@@ -68,53 +112,9 @@
static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
{
- unsigned long sbuf[4];
u64 ignore_this_time = 0;
- u32 piobcnt;
- /* if possible that sendbuffererror could be valid */
- piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
- /* read these before writing errorclear */
- sbuf[0] = ipath_read_kreg64(
- dd, dd->ipath_kregs->kr_sendbuffererror);
- sbuf[1] = ipath_read_kreg64(
- dd, dd->ipath_kregs->kr_sendbuffererror + 1);
- if (piobcnt > 128) {
- sbuf[2] = ipath_read_kreg64(
- dd, dd->ipath_kregs->kr_sendbuffererror + 2);
- sbuf[3] = ipath_read_kreg64(
- dd, dd->ipath_kregs->kr_sendbuffererror + 3);
- }
-
- if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
- int i;
-
- ipath_cdbg(PKT, "SendbufErrs %lx %lx ", sbuf[0], sbuf[1]);
- if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
- printk("%lx %lx ", sbuf[2], sbuf[3]);
- for (i = 0; i < piobcnt; i++) {
- if (test_bit(i, sbuf)) {
- u32 __iomem *piobuf;
- if (i < dd->ipath_piobcnt2k)
- piobuf = (u32 __iomem *)
- (dd->ipath_pio2kbase +
- i * dd->ipath_palign);
- else
- piobuf = (u32 __iomem *)
- (dd->ipath_pio4kbase +
- (i - dd->ipath_piobcnt2k) *
- dd->ipath_4kalign);
-
- ipath_cdbg(PKT,
- "PIObuf[%u] @%p pbc is %x; ",
- i, piobuf, readl(piobuf));
-
- ipath_disarm_piobufs(dd, i, 1);
- }
- }
- if (ipath_debug & __IPATH_PKTDBG)
- printk("\n");
- }
+ ipath_disarm_senderrbufs(dd);
if ((errs & E_SUM_LINK_PKTERRS) &&
!(dd->ipath_flags & IPATH_LINKACTIVE)) {
/*
@@ -132,6 +132,82 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
return ignore_this_time;
}
+/* generic hw error messages... */
+#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \
+ { \
+ .mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a << \
+ INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ), \
+ .msg = "TXE " #a " Memory Parity" \
+ }
+#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \
+ { \
+ .mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a << \
+ INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ), \
+ .msg = "RXE " #a " Memory Parity" \
+ }
+
+static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = {
+ INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"),
+ INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"),
+
+ INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF),
+ INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC),
+ INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO),
+
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO),
+};
+
+/**
+ * ipath_format_hwmsg - format a single hwerror message
+ * @msg message buffer
+ * @msgl length of message buffer
+ * @hwmsg message to add to message buffer
+ */
+static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
+{
+ strlcat(msg, "[", msgl);
+ strlcat(msg, hwmsg, msgl);
+ strlcat(msg, "]", msgl);
+}
+
+/**
+ * ipath_format_hwerrors - format hardware error messages for display
+ * @hwerrs hardware errors bit vector
+ * @hwerrmsgs hardware error descriptions
+ * @nhwerrmsgs number of hwerrmsgs
+ * @msg message buffer
+ * @msgl message buffer length
+ */
+void ipath_format_hwerrors(u64 hwerrs,
+ const struct ipath_hwerror_msgs *hwerrmsgs,
+ size_t nhwerrmsgs,
+ char *msg, size_t msgl)
+{
+ int i;
+ const int glen =
+ sizeof(ipath_generic_hwerror_msgs) /
+ sizeof(ipath_generic_hwerror_msgs[0]);
+
+ for (i=0; i<glen; i++) {
+ if (hwerrs & ipath_generic_hwerror_msgs[i].mask) {
+ ipath_format_hwmsg(msg, msgl,
+ ipath_generic_hwerror_msgs[i].msg);
+ }
+ }
+
+ for (i=0; i<nhwerrmsgs; i++) {
+ if (hwerrs & hwerrmsgs[i].mask) {
+ ipath_format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
+ }
+ }
+}
+
/* return the strings for the most common link states */
static char *ib_linkstate(u32 linkstate)
{
@@ -404,10 +480,10 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
}
- if (!noprint && (errs & ~infinipath_e_bitsextant))
+ if (!noprint && (errs & ~dd->ipath_e_bitsextant))
ipath_dev_err(dd, "error interrupt with unknown errors "
"%llx set\n", (unsigned long long)
- (errs & ~infinipath_e_bitsextant));
+ (errs & ~dd->ipath_e_bitsextant));
if (errs & E_SUM_ERRS)
ignore_this_time = handle_e_sum_errs(dd, errs);
@@ -478,6 +554,14 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
~(INFINIPATH_E_HARDWARE |
INFINIPATH_E_IBSTATUSCHANGED);
}
+
+ /* likely due to cancel, so suppress */
+ if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
+ dd->ipath_lastcancel > jiffies) {
+ ipath_dbg("Suppressed armlaunch/spktlen after error send cancel\n");
+ errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN);
+ }
+
if (!errs)
return 0;
@@ -529,7 +613,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
* don't report same point multiple times,
* except kernel
*/
- tl = (u32) * pd->port_rcvhdrtail_kvaddr;
+ tl = *(u64 *) pd->port_rcvhdrtail_kvaddr;
if (tl == dd->ipath_lastrcvhdrqtails[i])
continue;
hd = ipath_read_ureg32(dd, ur_rcvhdrhead,
@@ -729,9 +813,9 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
int rcvdint = 0;
portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) &
- infinipath_i_rcvavail_mask)
+ dd->ipath_i_rcvavail_mask)
| ((istat >> INFINIPATH_I_RCVURG_SHIFT) &
- infinipath_i_rcvurg_mask);
+ dd->ipath_i_rcvurg_mask);
for (i = 1; i < dd->ipath_cfgports; i++) {
struct ipath_portdata *pd = dd->ipath_pd[i];
if (portr & (1 << i) && pd && pd->port_cnt &&
@@ -808,7 +892,7 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
if (oldhead != curtail) {
if (dd->ipath_flags & IPATH_GPIO_INTR) {
ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
- (u64) (1 << 2));
+ (u64) (1 << IPATH_GPIO_PORT0_BIT));
istat = port0rbits | INFINIPATH_I_GPIO;
}
else
@@ -838,10 +922,10 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
if (unexpected)
unexpected = 0;
- if (unlikely(istat & ~infinipath_i_bitsextant))
+ if (unlikely(istat & ~dd->ipath_i_bitsextant))
ipath_dev_err(dd,
"interrupt with unknown interrupts %x set\n",
- istat & (u32) ~ infinipath_i_bitsextant);
+ istat & (u32) ~ dd->ipath_i_bitsextant);
else
ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat);
@@ -867,26 +951,80 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
if (istat & INFINIPATH_I_GPIO) {
/*
- * Packets are available in the port 0 rcv queue.
- * Eventually this needs to be generalized to check
- * IPATH_GPIO_INTR, and the specific GPIO bit, if
- * GPIO interrupts are used for anything else.
+ * GPIO interrupts fall in two broad classes:
+ * GPIO_2 indicates (on some HT4xx boards) that a packet
+ * has arrived for Port 0. Checking for this
+ * is controlled by flag IPATH_GPIO_INTR.
+ * GPIO_3..5 on IBA6120 Rev2 chips indicate errors
+ * that we need to count. Checking for this
+ * is controlled by flag IPATH_GPIO_ERRINTRS.
*/
- if (unlikely(!(dd->ipath_flags & IPATH_GPIO_INTR))) {
- u32 gpiostatus;
- gpiostatus = ipath_read_kreg32(
- dd, dd->ipath_kregs->kr_gpio_status);
- ipath_dbg("Unexpected GPIO interrupt bits %x\n",
- gpiostatus);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
- gpiostatus);
+ u32 gpiostatus;
+ u32 to_clear = 0;
+
+ gpiostatus = ipath_read_kreg32(
+ dd, dd->ipath_kregs->kr_gpio_status);
+ /* First the error-counter case.
+ */
+ if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) &&
+ (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) {
+ /* want to clear the bits we see asserted. */
+ to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK);
+
+ /*
+ * Count appropriately, clear bits out of our copy,
+ * as they have been "handled".
+ */
+ if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) {
+ ipath_dbg("FlowCtl on UnsupVL\n");
+ dd->ipath_rxfc_unsupvl_errs++;
+ }
+ if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) {
+ ipath_dbg("Overrun Threshold exceeded\n");
+ dd->ipath_overrun_thresh_errs++;
+ }
+ if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) {
+ ipath_dbg("Local Link Integrity error\n");
+ dd->ipath_lli_errs++;
+ }
+ gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK;
}
- else {
- /* Clear GPIO status bit 2 */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
- (u64) (1 << 2));
+ /* Now the Port0 Receive case */
+ if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) &&
+ (dd->ipath_flags & IPATH_GPIO_INTR)) {
+ /*
+ * GPIO status bit 2 is set, and we expected it.
+ * clear it and indicate in p0bits.
+ * This probably only happens if a Port0 pkt
+ * arrives at _just_ the wrong time, and we
+ * handle that by seting chk0rcv;
+ */
+ to_clear |= (1 << IPATH_GPIO_PORT0_BIT);
+ gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT);
chk0rcv = 1;
}
+ if (unlikely(gpiostatus)) {
+ /*
+ * Some unexpected bits remain. If they could have
+ * caused the interrupt, complain and clear.
+ * MEA: this is almost certainly non-ideal.
+ * we should look into auto-disable of unexpected
+ * GPIO interrupts, possibly on a "three strikes"
+ * basis.
+ */
+ u32 mask;
+ mask = ipath_read_kreg32(
+ dd, dd->ipath_kregs->kr_gpio_mask);
+ if (mask & gpiostatus) {
+ ipath_dbg("Unexpected GPIO IRQ bits %x\n",
+ gpiostatus & mask);
+ to_clear |= (gpiostatus & mask);
+ }
+ }
+ if (to_clear) {
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
+ (u64) to_clear);
+ }
}
chk0rcv |= istat & port0rbits;
@@ -911,9 +1049,9 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
istat &= ~port0rbits;
}
- if (istat & ((infinipath_i_rcvavail_mask <<
+ if (istat & ((dd->ipath_i_rcvavail_mask <<
INFINIPATH_I_RCVAVAIL_SHIFT)
- | (infinipath_i_rcvurg_mask <<
+ | (dd->ipath_i_rcvurg_mask <<
INFINIPATH_I_RCVURG_SHIFT)))
handle_urcv(dd, istat);
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index a8a56276ff1..d7540b71b45 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -39,6 +39,8 @@
*/
#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
#include <asm/io.h>
#include "ipath_common.h"
@@ -62,7 +64,7 @@ struct ipath_portdata {
/* rcvhdrq base, needs mmap before useful */
void *port_rcvhdrq;
/* kernel virtual address where hdrqtail is updated */
- volatile __le64 *port_rcvhdrtail_kvaddr;
+ void *port_rcvhdrtail_kvaddr;
/*
* temp buffer for expected send setup, allocated at open, instead
* of each setup call
@@ -79,8 +81,8 @@ struct ipath_portdata {
dma_addr_t port_rcvhdrq_phys;
dma_addr_t port_rcvhdrqtailaddr_phys;
/*
- * number of opens on this instance (0 or 1; ignoring forks, dup,
- * etc. for now)
+ * number of opens (including slave subports) on this instance
+ * (ignoring forks, dup, etc. for now)
*/
int port_cnt;
/*
@@ -89,6 +91,10 @@ struct ipath_portdata {
*/
/* instead of calculating it */
unsigned port_port;
+ /* non-zero if port is being shared. */
+ u16 port_subport_cnt;
+ /* non-zero if port is being shared. */
+ u16 port_subport_id;
/* chip offset of PIO buffers for this port */
u32 port_piobufs;
/* how many alloc_pages() chunks in port_rcvegrbuf_pages */
@@ -121,6 +127,16 @@ struct ipath_portdata {
u16 port_pkeys[4];
/* so file ops can get at unit */
struct ipath_devdata *port_dd;
+ /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
+ void *subport_uregbase;
+ /* An array of pages for the eager receive buffers * N */
+ void *subport_rcvegrbuf;
+ /* An array of pages for the eager header queue entries * N */
+ void *subport_rcvhdr_base;
+ /* The version of the library which opened this port */
+ u32 userversion;
+ /* Bitmask of active slaves */
+ u32 active_slaves;
};
struct sk_buff;
@@ -132,6 +148,11 @@ struct _ipath_layer {
void *l_arg;
};
+struct ipath_skbinfo {
+ struct sk_buff *skb;
+ dma_addr_t phys;
+};
+
struct ipath_devdata {
struct list_head ipath_list;
@@ -154,7 +175,7 @@ struct ipath_devdata {
/* ipath_cfgports pointers */
struct ipath_portdata **ipath_pd;
/* sk_buffs used by port 0 eager receive queue */
- struct sk_buff **ipath_port0_skbs;
+ struct ipath_skbinfo *ipath_port0_skbinfo;
/* kvirt address of 1st 2k pio buffer */
void __iomem *ipath_pio2kbase;
/* kvirt address of 1st 4k pio buffer */
@@ -315,12 +336,16 @@ struct ipath_devdata {
u8 ipath_ht_slave_off;
/* for write combining settings */
unsigned long ipath_wc_cookie;
+ unsigned long ipath_wc_base;
+ unsigned long ipath_wc_len;
/* ref count for each pkey */
atomic_t ipath_pkeyrefs[4];
/* shadow copy of all exptids physaddr; used only by funcsim */
u64 *ipath_tidsimshadow;
/* shadow copy of struct page *'s for exp tid pages */
struct page **ipath_pageshadow;
+ /* shadow copy of dma handles for exp tid pages */
+ dma_addr_t *ipath_physshadow;
/* lock to workaround chip bug 9437 */
spinlock_t ipath_tid_lock;
@@ -402,6 +427,9 @@ struct ipath_devdata {
unsigned long ipath_rcvctrl;
/* shadow kr_sendctrl */
unsigned long ipath_sendctrl;
+ /* ports waiting for PIOavail intr */
+ unsigned long ipath_portpiowait;
+ unsigned long ipath_lastcancel; /* to not count armlaunch after cancel */
/* value we put in kr_rcvhdrcnt */
u32 ipath_rcvhdrcnt;
@@ -465,8 +493,6 @@ struct ipath_devdata {
u32 ipath_htwidth;
/* HT speed (200,400,800,1000) from HT config */
u32 ipath_htspeed;
- /* ports waiting for PIOavail intr */
- unsigned long ipath_portpiowait;
/*
* number of sequential ibcstatus change for polling active/quiet
* (i.e., link not coming up).
@@ -510,8 +536,47 @@ struct ipath_devdata {
u32 ipath_lli_counter;
/* local link integrity errors */
u32 ipath_lli_errors;
+ /*
+ * Above counts only cases where _successive_ LocalLinkIntegrity
+ * errors were seen in the receive headers of kern-packets.
+ * Below are the three (monotonically increasing) counters
+ * maintained via GPIO interrupts on iba6120-rev2.
+ */
+ u32 ipath_rxfc_unsupvl_errs;
+ u32 ipath_overrun_thresh_errs;
+ u32 ipath_lli_errs;
+
+ /*
+ * Not all devices managed by a driver instance are the same
+ * type, so these fields must be per-device.
+ */
+ u64 ipath_i_bitsextant;
+ ipath_err_t ipath_e_bitsextant;
+ ipath_err_t ipath_hwe_bitsextant;
+
+ /*
+ * Below should be computable from number of ports,
+ * since they are never modified.
+ */
+ u32 ipath_i_rcvavail_mask;
+ u32 ipath_i_rcvurg_mask;
+
+ /*
+ * Register bits for selecting i2c direction and values, used for
+ * I2C serial flash.
+ */
+ u16 ipath_gpio_sda_num;
+ u16 ipath_gpio_scl_num;
+ u64 ipath_gpio_sda;
+ u64 ipath_gpio_scl;
};
+/* Private data for file operations */
+struct ipath_filedata {
+ struct ipath_portdata *pd;
+ unsigned subport;
+ unsigned tidcursor;
+};
extern struct list_head ipath_dev_list;
extern spinlock_t ipath_devs_lock;
extern struct ipath_devdata *ipath_lookup(int unit);
@@ -521,6 +586,7 @@ int ipath_enable_wc(struct ipath_devdata *dd);
void ipath_disable_wc(struct ipath_devdata *dd);
int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
void ipath_shutdown_device(struct ipath_devdata *);
+void ipath_disarm_senderrbufs(struct ipath_devdata *);
struct file_operations;
int ipath_cdev_init(int minor, char *name, struct file_operations *fops,
@@ -572,7 +638,11 @@ int ipath_set_lid(struct ipath_devdata *, u32, u8);
int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
/* for use in system calls, where we want to know device type, etc. */
-#define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data)
+#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd
+#define subport_fp(fp) \
+ ((struct ipath_filedata *)(fp)->private_data)->subport
+#define tidcursor_fp(fp) \
+ ((struct ipath_filedata *)(fp)->private_data)->tidcursor
/*
* values for ipath_flags
@@ -612,6 +682,15 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
/* can miss port0 rx interrupts */
#define IPATH_POLL_RX_INTR 0x40000
#define IPATH_DISABLED 0x80000 /* administratively disabled */
+ /* Use GPIO interrupts for new counters */
+#define IPATH_GPIO_ERRINTRS 0x100000
+
+/* Bits in GPIO for the added interrupts */
+#define IPATH_GPIO_PORT0_BIT 2
+#define IPATH_GPIO_RXUVL_BIT 3
+#define IPATH_GPIO_OVRUN_BIT 4
+#define IPATH_GPIO_LLI_BIT 5
+#define IPATH_GPIO_ERRINTR_MASK 0x38
/* portdata flag bit offsets */
/* waiting for a packet to arrive */
@@ -799,6 +878,13 @@ int ipathfs_add_device(struct ipath_devdata *);
int ipathfs_remove_device(struct ipath_devdata *);
/*
+ * dma_addr wrappers - all 0's invalid for hw
+ */
+dma_addr_t ipath_map_page(struct pci_dev *, struct page *, unsigned long,
+ size_t, int);
+dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
+
+/*
* Flush write combining store buffers (if present) and perform a write
* barrier.
*/
@@ -855,4 +941,20 @@ extern struct mutex ipath_mutex;
#endif /* _IPATH_DEBUGGING */
+/*
+ * this is used for formatting hw error messages...
+ */
+struct ipath_hwerror_msgs {
+ u64 mask;
+ const char *msg;
+};
+
+#define INFINIPATH_HWE_MSG(a, b) { .mask = INFINIPATH_HWE_##a, .msg = b }
+
+/* in ipath_intr.c... */
+void ipath_format_hwerrors(u64 hwerrs,
+ const struct ipath_hwerror_msgs *hwerrmsgs,
+ size_t nhwerrmsgs,
+ char *msg, size_t lmsg);
+
#endif /* _IPATH_KERNEL_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index ba1b93226ca..9a6cbd05adc 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -118,9 +118,10 @@ void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey)
* Check the IB SGE for validity and initialize our internal version
* of it.
*/
-int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
+int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
struct ib_sge *sge, int acc)
{
+ struct ipath_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
struct ipath_mregion *mr;
unsigned n, m;
size_t off;
@@ -140,7 +141,8 @@ int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
goto bail;
}
mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))];
- if (unlikely(mr == NULL || mr->lkey != sge->lkey)) {
+ if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
+ qp->ibqp.pd != mr->pd)) {
ret = 0;
goto bail;
}
@@ -188,9 +190,10 @@ bail:
*
* Return 1 if successful, otherwise 0.
*/
-int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
+int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
u32 len, u64 vaddr, u32 rkey, int acc)
{
+ struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ipath_lkey_table *rkt = &dev->lk_table;
struct ipath_sge *sge = &ss->sge;
struct ipath_mregion *mr;
@@ -214,7 +217,8 @@ int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
}
mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))];
- if (unlikely(mr == NULL || mr->lkey != rkey)) {
+ if (unlikely(mr == NULL || mr->lkey != rkey ||
+ qp->ibqp.pd != mr->pd)) {
ret = 0;
goto bail;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index 72d1db89db8..25908b02fbe 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -87,7 +87,8 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp,
struct ipath_devdata *dd = to_idev(ibdev)->dd;
u32 vendor, majrev, minrev;
- if (smp->attr_mod)
+ /* GUID 0 is illegal */
+ if (smp->attr_mod || (dd->ipath_guid == 0))
smp->status |= IB_SMP_INVALID_FIELD;
nip->base_version = 1;
@@ -131,10 +132,15 @@ static int recv_subn_get_guidinfo(struct ib_smp *smp,
* We only support one GUID for now. If this changes, the
* portinfo.guid_cap field needs to be updated too.
*/
- if (startgx == 0)
- /* The first is a copy of the read-only HW GUID. */
- *p = to_idev(ibdev)->dd->ipath_guid;
- else
+ if (startgx == 0) {
+ __be64 g = to_idev(ibdev)->dd->ipath_guid;
+ if (g == 0)
+ /* GUID 0 is illegal */
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else
+ /* The first is a copy of the read-only HW GUID. */
+ *p = g;
+ } else
smp->status |= IB_SMP_INVALID_FIELD;
return reply(smp);
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index b36f6fb3e37..a0673c1eef7 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -138,6 +138,7 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
goto bail;
}
+ mr->mr.pd = pd;
mr->mr.user_base = *iova_start;
mr->mr.iova = *iova_start;
mr->mr.length = 0;
@@ -197,6 +198,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
goto bail;
}
+ mr->mr.pd = pd;
mr->mr.user_base = region->user_base;
mr->mr.iova = region->virt_base;
mr->mr.length = region->length;
@@ -289,6 +291,7 @@ struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
* Resources are allocated but no valid mapping (RKEY can't be
* used).
*/
+ fmr->mr.pd = pd;
fmr->mr.user_base = 0;
fmr->mr.iova = 0;
fmr->mr.length = 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 224b0f40767..46c1c89bf6a 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -335,6 +335,7 @@ static void ipath_reset_qp(struct ipath_qp *qp)
qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
qp->r_nak_state = 0;
+ qp->r_wrid_valid = 0;
qp->s_rnr_timeout = 0;
qp->s_head = 0;
qp->s_tail = 0;
@@ -342,6 +343,7 @@ static void ipath_reset_qp(struct ipath_qp *qp)
qp->s_last = 0;
qp->s_ssn = 1;
qp->s_lsn = 0;
+ qp->s_wait_credit = 0;
if (qp->r_rq.wq) {
qp->r_rq.wq->head = 0;
qp->r_rq.wq->tail = 0;
@@ -352,12 +354,13 @@ static void ipath_reset_qp(struct ipath_qp *qp)
/**
* ipath_error_qp - put a QP into an error state
* @qp: the QP to put into an error state
+ * @err: the receive completion error to signal if a RWQE is active
*
* Flushes both send and receive work queues.
* QP s_lock should be held and interrupts disabled.
*/
-void ipath_error_qp(struct ipath_qp *qp)
+void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ib_wc wc;
@@ -373,7 +376,6 @@ void ipath_error_qp(struct ipath_qp *qp)
list_del_init(&qp->piowait);
spin_unlock(&dev->pending_lock);
- wc.status = IB_WC_WR_FLUSH_ERR;
wc.vendor_err = 0;
wc.byte_len = 0;
wc.imm_data = 0;
@@ -385,6 +387,12 @@ void ipath_error_qp(struct ipath_qp *qp)
wc.sl = 0;
wc.dlid_path_bits = 0;
wc.port_num = 0;
+ if (qp->r_wrid_valid) {
+ qp->r_wrid_valid = 0;
+ wc.status = err;
+ ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
+ }
+ wc.status = IB_WC_WR_FLUSH_ERR;
while (qp->s_last != qp->s_head) {
struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
@@ -501,7 +509,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
break;
case IB_QPS_ERR:
- ipath_error_qp(qp);
+ ipath_error_qp(qp, IB_WC_GENERAL_ERR);
break;
default:
@@ -516,7 +524,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
qp->remote_qpn = attr->dest_qp_num;
if (attr_mask & IB_QP_SQ_PSN) {
- qp->s_next_psn = attr->sq_psn;
+ qp->s_psn = qp->s_next_psn = attr->sq_psn;
qp->s_last_psn = qp->s_next_psn - 1;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index a08654042c0..a504cf67f27 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -201,6 +201,18 @@ int ipath_make_rc_req(struct ipath_qp *qp,
qp->s_rnr_timeout)
goto done;
+ /* Limit the number of packets sent without an ACK. */
+ if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {
+ qp->s_wait_credit = 1;
+ dev->n_rc_stalls++;
+ spin_lock(&dev->pending_lock);
+ if (list_empty(&qp->timerwait))
+ list_add_tail(&qp->timerwait,
+ &dev->pending[dev->pending_index]);
+ spin_unlock(&dev->pending_lock);
+ goto done;
+ }
+
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
bth0 = 0;
@@ -221,7 +233,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
/* Check if send work queue is empty. */
if (qp->s_tail == qp->s_head)
goto done;
- qp->s_psn = wqe->psn = qp->s_next_psn;
+ wqe->psn = qp->s_next_psn;
newreq = 1;
}
/*
@@ -393,12 +405,6 @@ int ipath_make_rc_req(struct ipath_qp *qp,
ss = &qp->s_sge;
len = qp->s_len;
if (len > pmtu) {
- /*
- * Request an ACK every 1/2 MB to avoid retransmit
- * timeouts.
- */
- if (((wqe->length - len) % (512 * 1024)) == 0)
- bth2 |= 1 << 31;
len = pmtu;
break;
}
@@ -435,12 +441,6 @@ int ipath_make_rc_req(struct ipath_qp *qp,
ss = &qp->s_sge;
len = qp->s_len;
if (len > pmtu) {
- /*
- * Request an ACK every 1/2 MB to avoid retransmit
- * timeouts.
- */
- if (((wqe->length - len) % (512 * 1024)) == 0)
- bth2 |= 1 << 31;
len = pmtu;
break;
}
@@ -498,6 +498,8 @@ int ipath_make_rc_req(struct ipath_qp *qp,
*/
goto done;
}
+ if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
+ bth2 |= 1 << 31; /* Request ACK. */
qp->s_len -= len;
qp->s_hdrwords = hwords;
qp->s_cur_sge = ss;
@@ -737,6 +739,15 @@ bail:
return;
}
+static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
+{
+ if (qp->s_wait_credit) {
+ qp->s_wait_credit = 0;
+ tasklet_hi_schedule(&qp->s_task);
+ }
+ qp->s_last_psn = psn;
+}
+
/**
* do_rc_ack - process an incoming RC ACK
* @qp: the QP the ACK came in on
@@ -805,7 +816,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
* The last valid PSN seen is the previous
* request's.
*/
- qp->s_last_psn = wqe->psn - 1;
+ update_last_psn(qp, wqe->psn - 1);
/* Retry this request. */
ipath_restart_rc(qp, wqe->psn, &wc);
/*
@@ -864,7 +875,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
ipath_get_credit(qp, aeth);
qp->s_rnr_retry = qp->s_rnr_retry_cnt;
qp->s_retry = qp->s_retry_cnt;
- qp->s_last_psn = psn;
+ update_last_psn(qp, psn);
ret = 1;
goto bail;
@@ -883,7 +894,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
goto bail;
/* The last valid PSN is the previous PSN. */
- qp->s_last_psn = psn - 1;
+ update_last_psn(qp, psn - 1);
dev->n_rc_resends += (int)qp->s_psn - (int)psn;
@@ -898,7 +909,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
case 3: /* NAK */
/* The last valid PSN seen is the previous request's. */
if (qp->s_last != qp->s_tail)
- qp->s_last_psn = wqe->psn - 1;
+ update_last_psn(qp, wqe->psn - 1);
switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
IPATH_AETH_CREDIT_MASK) {
case 0: /* PSN sequence error */
@@ -1071,7 +1082,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
* since we don't want s_sge modified.
*/
qp->s_len -= pmtu;
- qp->s_last_psn = psn;
+ update_last_psn(qp, psn);
spin_unlock_irqrestore(&qp->s_lock, flags);
ipath_copy_sge(&qp->s_sge, data, pmtu);
goto bail;
@@ -1223,7 +1234,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
* Address range must be a subset of the original
* request and start on pmtu boundaries.
*/
- ok = ipath_rkey_ok(dev, &qp->s_rdma_sge,
+ ok = ipath_rkey_ok(qp, &qp->s_rdma_sge,
qp->s_rdma_len, vaddr, rkey,
IB_ACCESS_REMOTE_READ);
if (unlikely(!ok)) {
@@ -1282,6 +1293,14 @@ done:
return 1;
}
+static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
+{
+ spin_lock_irq(&qp->s_lock);
+ qp->state = IB_QPS_ERR;
+ ipath_error_qp(qp, err);
+ spin_unlock_irq(&qp->s_lock);
+}
+
/**
* ipath_rc_rcv - process an incoming RC packet
* @dev: the device this packet came in on
@@ -1309,6 +1328,10 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
struct ib_reth *reth;
int header_in_data;
+ /* Validate the SLID. See Ch. 9.6.1.5 */
+ if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
+ goto done;
+
/* Check for GRH */
if (!has_grh) {
ohdr = &hdr->u.oth;
@@ -1370,8 +1393,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
*/
if (qp->r_ack_state >= OP(COMPARE_SWAP))
goto send_ack;
- /* XXX Flush WQEs */
- qp->state = IB_QPS_ERR;
+ ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR);
qp->r_ack_state = OP(SEND_ONLY);
qp->r_nak_state = IB_NAK_INVALID_REQUEST;
qp->r_ack_psn = qp->r_psn;
@@ -1477,9 +1499,9 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
goto nack_inv;
ipath_copy_sge(&qp->r_sge, data, tlen);
qp->r_msn++;
- if (opcode == OP(RDMA_WRITE_LAST) ||
- opcode == OP(RDMA_WRITE_ONLY))
+ if (!qp->r_wrid_valid)
break;
+ qp->r_wrid_valid = 0;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
@@ -1517,7 +1539,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int ok;
/* Check rkey & NAK */
- ok = ipath_rkey_ok(dev, &qp->r_sge,
+ ok = ipath_rkey_ok(qp, &qp->r_sge,
qp->r_len, vaddr, rkey,
IB_ACCESS_REMOTE_WRITE);
if (unlikely(!ok))
@@ -1559,7 +1581,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int ok;
/* Check rkey & NAK */
- ok = ipath_rkey_ok(dev, &qp->s_rdma_sge,
+ ok = ipath_rkey_ok(qp, &qp->s_rdma_sge,
qp->s_rdma_len, vaddr, rkey,
IB_ACCESS_REMOTE_READ);
if (unlikely(!ok)) {
@@ -1618,7 +1640,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
goto nack_inv;
rkey = be32_to_cpu(ateth->rkey);
/* Check rkey & NAK */
- if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge,
+ if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
sizeof(u64), vaddr, rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto nack_acc;
@@ -1670,8 +1692,7 @@ nack_acc:
* is pending though.
*/
if (qp->r_ack_state < OP(COMPARE_SWAP)) {
- /* XXX Flush WQEs */
- qp->state = IB_QPS_ERR;
+ ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
qp->r_ack_state = OP(RDMA_WRITE_ONLY);
qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
qp->r_ack_psn = qp->r_psn;
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index 6e23b3d632b..dffc76016d3 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -134,10 +134,24 @@
#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
#define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44
-#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL
-#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL
#define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL
#define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL
+/* txe mem parity errors (shift by INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) */
+#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF 0x1ULL
+#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC 0x2ULL
+#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOLAUNCHFIFO 0x4ULL
+/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
+#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x04ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x08ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL
+/* waldo specific -- find the rest in ipath_6110.c */
+#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL
+/* monty specific -- find the rest in ipath_6120.c */
+#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL
/* kr_hwdiagctrl bits */
#define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL
@@ -209,9 +223,9 @@
/* combination link status states that we use with some frequency */
#define IPATH_IBSTATE_MASK ((INFINIPATH_IBCS_LINKTRAININGSTATE_MASK \
- << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
+ << INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | \
(INFINIPATH_IBCS_LINKSTATE_MASK \
- <<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
+ <<INFINIPATH_IBCS_LINKSTATE_SHIFT))
#define IPATH_IBSTATE_INIT ((INFINIPATH_IBCS_L_STATE_INIT \
<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
(INFINIPATH_IBCS_LT_STATE_LINKUP \
@@ -302,6 +316,17 @@
typedef u64 ipath_err_t;
+/* The following change with the type of device, so
+ * need to be part of the ipath_devdata struct, or
+ * we could have problems plugging in devices of
+ * different types (e.g. one HT, one PCIE)
+ * in one system, to be managed by one driver.
+ * On the other hand, this file is may also be included
+ * by other code, so leave the declarations here
+ * temporarily. Minor footprint issue if common-model
+ * linker used, none if C89+ linker used.
+ */
+
/* mask of defined bits for various registers */
extern u64 infinipath_i_bitsextant;
extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
@@ -310,13 +335,6 @@ extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
/*
- * register bits for selecting i2c direction and values, used for I2C serial
- * flash
- */
-extern u16 ipath_gpio_sda_num, ipath_gpio_scl_num;
-extern u64 ipath_gpio_sda, ipath_gpio_scl;
-
-/*
* These are the infinipath general register numbers (not offsets).
* The kernel registers are used directly, those beyond the kernel
* registers are calculated from one of the base registers. The use of
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index 5c1da2d25e0..f7530512045 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -108,7 +108,6 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe)
{
- struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
int user = to_ipd(qp->ibqp.pd)->user;
int i, j, ret;
struct ib_wc wc;
@@ -119,8 +118,7 @@ static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe)
continue;
/* Check LKEY */
if ((user && wqe->sg_list[i].lkey == 0) ||
- !ipath_lkey_ok(&dev->lk_table,
- &qp->r_sg_list[j], &wqe->sg_list[i],
+ !ipath_lkey_ok(qp, &qp->r_sg_list[j], &wqe->sg_list[i],
IB_ACCESS_LOCAL_WRITE))
goto bad_lkey;
qp->r_len += wqe->sg_list[i].length;
@@ -231,6 +229,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
}
}
spin_unlock_irqrestore(&rq->lock, flags);
+ qp->r_wrid_valid = 1;
bail:
return ret;
@@ -326,7 +325,7 @@ again:
case IB_WR_RDMA_WRITE:
if (wqe->length == 0)
break;
- if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, wqe->length,
+ if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_WRITE))) {
@@ -350,7 +349,7 @@ again:
break;
case IB_WR_RDMA_READ:
- if (unlikely(!ipath_rkey_ok(dev, &sqp->s_sge, wqe->length,
+ if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_READ)))
@@ -365,7 +364,7 @@ again:
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
- if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, sizeof(u64),
+ if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_ATOMIC)))
@@ -575,8 +574,7 @@ int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
}
if (wr->sg_list[i].length == 0)
continue;
- if (!ipath_lkey_ok(&to_idev(qp->ibqp.device)->lk_table,
- &wqe->sg_list[j], &wr->sg_list[i],
+ if (!ipath_lkey_ok(qp, &wqe->sg_list[j], &wr->sg_list[i],
acc)) {
spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EINVAL;
diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c
index 941e866d951..94033503400 100644
--- a/drivers/infiniband/hw/ipath/ipath_srq.c
+++ b/drivers/infiniband/hw/ipath/ipath_srq.c
@@ -104,11 +104,6 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
u32 sz;
struct ib_srq *ret;
- if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
- ret = ERR_PTR(-ENOMEM);
- goto done;
- }
-
if (srq_init_attr->attr.max_wr == 0) {
ret = ERR_PTR(-EINVAL);
goto done;
@@ -180,10 +175,17 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
spin_lock_init(&srq->rq.lock);
srq->rq.wq->head = 0;
srq->rq.wq->tail = 0;
- srq->rq.max_sge = srq_init_attr->attr.max_sge;
srq->limit = srq_init_attr->attr.srq_limit;
- dev->n_srqs_allocated++;
+ spin_lock(&dev->n_srqs_lock);
+ if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
+ spin_unlock(&dev->n_srqs_lock);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_wq;
+ }
+
+ dev->n_srqs_allocated++;
+ spin_unlock(&dev->n_srqs_lock);
ret = &srq->ibsrq;
goto done;
@@ -351,8 +353,13 @@ int ipath_destroy_srq(struct ib_srq *ibsrq)
struct ipath_srq *srq = to_isrq(ibsrq);
struct ipath_ibdev *dev = to_idev(ibsrq->device);
+ spin_lock(&dev->n_srqs_lock);
dev->n_srqs_allocated--;
- vfree(srq->rq.wq);
+ spin_unlock(&dev->n_srqs_lock);
+ if (srq->ip)
+ kref_put(&srq->ip->ref, ipath_release_mmap_info);
+ else
+ vfree(srq->rq.wq);
kfree(srq);
return 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index e299148c4b6..182de34f9f4 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -257,7 +257,7 @@ static ssize_t store_guid(struct device *dev,
struct ipath_devdata *dd = dev_get_drvdata(dev);
ssize_t ret;
unsigned short guid[8];
- __be64 nguid;
+ __be64 new_guid;
u8 *ng;
int i;
@@ -266,7 +266,7 @@ static ssize_t store_guid(struct device *dev,
&guid[4], &guid[5], &guid[6], &guid[7]) != 8)
goto invalid;
- ng = (u8 *) &nguid;
+ ng = (u8 *) &new_guid;
for (i = 0; i < 8; i++) {
if (guid[i] > 0xff)
@@ -274,7 +274,10 @@ static ssize_t store_guid(struct device *dev,
ng[i] = guid[i];
}
- dd->ipath_guid = nguid;
+ if (new_guid == 0)
+ goto invalid;
+
+ dd->ipath_guid = new_guid;
dd->ipath_nguid = 1;
ret = strlen(buf);
@@ -297,6 +300,16 @@ static ssize_t show_nguid(struct device *dev,
return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid);
}
+static ssize_t show_nports(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+
+ /* Return the number of user ports available. */
+ return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_cfgports - 1);
+}
+
static ssize_t show_serial(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -608,6 +621,7 @@ static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid);
static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu);
static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled);
static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL);
+static DEVICE_ATTR(nports, S_IRUGO, show_nports, NULL);
static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset);
static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
@@ -623,6 +637,7 @@ static struct attribute *dev_attributes[] = {
&dev_attr_mlid.attr,
&dev_attr_mtu.attr,
&dev_attr_nguid.attr,
+ &dev_attr_nports.attr,
&dev_attr_serial.attr,
&dev_attr_status.attr,
&dev_attr_status_str.attr,
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index 0fd3cded16b..e636cfd67a8 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -246,6 +246,10 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
struct ib_reth *reth;
int header_in_data;
+ /* Validate the SLID. See Ch. 9.6.1.5 */
+ if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
+ goto done;
+
/* Check for GRH */
if (!has_grh) {
ohdr = &hdr->u.oth;
@@ -440,7 +444,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int ok;
/* Check rkey */
- ok = ipath_rkey_ok(dev, &qp->r_sge, qp->r_len,
+ ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len,
vaddr, rkey,
IB_ACCESS_REMOTE_WRITE);
if (unlikely(!ok)) {
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 6991d1d74e3..49f1102af8b 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -39,7 +39,6 @@
static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
u32 *lengthp, struct ipath_sge_state *ss)
{
- struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
int user = to_ipd(qp->ibqp.pd)->user;
int i, j, ret;
struct ib_wc wc;
@@ -50,8 +49,7 @@ static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
continue;
/* Check LKEY */
if ((user && wqe->sg_list[i].lkey == 0) ||
- !ipath_lkey_ok(&dev->lk_table,
- j ? &ss->sg_list[j - 1] : &ss->sge,
+ !ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
&wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
goto bad_lkey;
*lengthp += wqe->sg_list[i].length;
@@ -343,7 +341,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
if (wr->sg_list[i].length == 0)
continue;
- if (!ipath_lkey_ok(&dev->lk_table, ss.num_sge ?
+ if (!ipath_lkey_ok(qp, ss.num_sge ?
sg_list + ss.num_sge - 1 : &ss.sge,
&wr->sg_list[i], 0)) {
ret = -EINVAL;
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c
index e32fca9faf8..413754b1d8a 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_pages.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -90,6 +90,62 @@ bail:
}
/**
+ * ipath_map_page - a safety wrapper around pci_map_page()
+ *
+ * A dma_addr of all 0's is interpreted by the chip as "disabled".
+ * Unfortunately, it can also be a valid dma_addr returned on some
+ * architectures.
+ *
+ * The powerpc iommu assigns dma_addrs in ascending order, so we don't
+ * have to bother with retries or mapping a dummy page to insure we
+ * don't just get the same mapping again.
+ *
+ * I'm sure we won't be so lucky with other iommu's, so FIXME.
+ */
+dma_addr_t ipath_map_page(struct pci_dev *hwdev, struct page *page,
+ unsigned long offset, size_t size, int direction)
+{
+ dma_addr_t phys;
+
+ phys = pci_map_page(hwdev, page, offset, size, direction);
+
+ if (phys == 0) {
+ pci_unmap_page(hwdev, phys, size, direction);
+ phys = pci_map_page(hwdev, page, offset, size, direction);
+ /*
+ * FIXME: If we get 0 again, we should keep this page,
+ * map another, then free the 0 page.
+ */
+ }
+
+ return phys;
+}
+
+/**
+ * ipath_map_single - a safety wrapper around pci_map_single()
+ *
+ * Same idea as ipath_map_page().
+ */
+dma_addr_t ipath_map_single(struct pci_dev *hwdev, void *ptr, size_t size,
+ int direction)
+{
+ dma_addr_t phys;
+
+ phys = pci_map_single(hwdev, ptr, size, direction);
+
+ if (phys == 0) {
+ pci_unmap_single(hwdev, phys, size, direction);
+ phys = pci_map_single(hwdev, ptr, size, direction);
+ /*
+ * FIXME: If we get 0 again, we should keep this page,
+ * map another, then free the 0 page.
+ */
+ }
+
+ return phys;
+}
+
+/**
* ipath_get_user_pages - lock user pages into memory
* @start_page: the start page
* @num_pages: the number of pages
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index b8381c5e72b..42eaed88c28 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -898,7 +898,8 @@ int ipath_get_counters(struct ipath_devdata *dd,
ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt);
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt) +
+ dd->ipath_rxfc_unsupvl_errs;
cntrs->port_rcv_remphys_errors =
ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
cntrs->port_xmit_discards =
@@ -911,8 +912,10 @@ int ipath_get_counters(struct ipath_devdata *dd,
ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
cntrs->port_rcv_packets =
ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
- cntrs->local_link_integrity_errors = dd->ipath_lli_errors;
- cntrs->excessive_buffer_overrun_errors = 0; /* XXX */
+ cntrs->local_link_integrity_errors =
+ (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
+ dd->ipath_lli_errs : dd->ipath_lli_errors;
+ cntrs->excessive_buffer_overrun_errors = dd->ipath_overrun_thresh_errs;
ret = 0;
@@ -1199,6 +1202,7 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
struct ipath_ah *ah;
struct ib_ah *ret;
struct ipath_ibdev *dev = to_idev(pd->device);
+ unsigned long flags;
/* A multicast address requires a GRH (see ch. 8.4.1). */
if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
@@ -1225,16 +1229,16 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
goto bail;
}
- spin_lock(&dev->n_ahs_lock);
+ spin_lock_irqsave(&dev->n_ahs_lock, flags);
if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
- spin_unlock(&dev->n_ahs_lock);
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
kfree(ah);
ret = ERR_PTR(-ENOMEM);
goto bail;
}
dev->n_ahs_allocated++;
- spin_unlock(&dev->n_ahs_lock);
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
/* ib_create_ah() will initialize ah->ibah. */
ah->attr = *ah_attr;
@@ -1255,10 +1259,11 @@ static int ipath_destroy_ah(struct ib_ah *ibah)
{
struct ipath_ibdev *dev = to_idev(ibah->device);
struct ipath_ah *ah = to_iah(ibah);
+ unsigned long flags;
- spin_lock(&dev->n_ahs_lock);
+ spin_lock_irqsave(&dev->n_ahs_lock, flags);
dev->n_ahs_allocated--;
- spin_unlock(&dev->n_ahs_lock);
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
kfree(ah);
@@ -1380,11 +1385,13 @@ static int enable_timer(struct ipath_devdata *dd)
* processing.
*/
if (dd->ipath_flags & IPATH_GPIO_INTR) {
+ u64 val;
ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
0x2074076542310ULL);
/* Enable GPIO bit 2 interrupt */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
- (u64) (1 << 2));
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
+ val |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
+ ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val);
}
init_timer(&dd->verbs_timer);
@@ -1399,8 +1406,17 @@ static int enable_timer(struct ipath_devdata *dd)
static int disable_timer(struct ipath_devdata *dd)
{
/* Disable GPIO bit 2 interrupt */
- if (dd->ipath_flags & IPATH_GPIO_INTR)
- ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0);
+ if (dd->ipath_flags & IPATH_GPIO_INTR) {
+ u64 val;
+ /* Disable GPIO bit 2 interrupt */
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
+ val &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
+ ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val);
+ /*
+ * We might want to undo changes to debugportselect,
+ * but how?
+ */
+ }
del_timer_sync(&dd->verbs_timer);
@@ -1683,6 +1699,7 @@ static ssize_t show_stats(struct class_device *cdev, char *buf)
"RC OTH NAKs %d\n"
"RC timeouts %d\n"
"RC RDMA dup %d\n"
+ "RC stalls %d\n"
"piobuf wait %d\n"
"no piobuf %d\n"
"PKT drops %d\n"
@@ -1690,7 +1707,7 @@ static ssize_t show_stats(struct class_device *cdev, char *buf)
dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
dev->n_other_naks, dev->n_timeouts,
- dev->n_rdma_dup_busy, dev->n_piowait,
+ dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait,
dev->n_no_piobuf, dev->n_pkt_drops, dev->n_wqe_errs);
for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
const struct ipath_opcode_stats *si = &dev->opstats[i];
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 09bbb3f9a21..8039f6e5f0c 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -220,6 +220,7 @@ struct ipath_segarray {
};
struct ipath_mregion {
+ struct ib_pd *pd; /* shares refcnt of ibmr.pd */
u64 user_base; /* User's address for this region */
u64 iova; /* IB start address of this region */
size_t length;
@@ -364,12 +365,14 @@ struct ipath_qp {
u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
u8 r_reuse_sge; /* for UC receive errors */
u8 r_sge_inx; /* current index into sg_list */
+ u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */
u8 qp_access_flags;
u8 s_max_sge; /* size of s_wq->sg_list */
u8 s_retry_cnt; /* number of times to retry */
u8 s_rnr_retry_cnt;
u8 s_retry; /* requester retry counter */
u8 s_rnr_retry; /* requester RNR retry counter */
+ u8 s_wait_credit; /* limit number of unacked packets sent */
u8 s_pkey_index; /* PKEY index to use */
u8 timeout; /* Timeout for this QP */
enum ib_mtu path_mtu;
@@ -393,6 +396,8 @@ struct ipath_qp {
#define IPATH_S_BUSY 0
#define IPATH_S_SIGNAL_REQ_WR 1
+#define IPATH_PSN_CREDIT 2048
+
/*
* Since struct ipath_swqe is not a fixed size, we can't simply index into
* struct ipath_qp.s_wq. This function does the array index computation.
@@ -521,6 +526,7 @@ struct ipath_ibdev {
u32 n_rnr_naks;
u32 n_other_naks;
u32 n_timeouts;
+ u32 n_rc_stalls;
u32 n_pkt_drops;
u32 n_vl15_dropped;
u32 n_wqe_errs;
@@ -634,6 +640,8 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
int ipath_destroy_qp(struct ib_qp *ibqp);
+void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err);
+
int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
@@ -653,12 +661,6 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
-int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
- u32 len, u64 vaddr, u32 rkey, int acc);
-
-int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
- struct ib_sge *sge, int acc);
-
void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length);
void ipath_skip_sge(struct ipath_sge_state *ss, u32 length);
@@ -683,10 +685,10 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt,
void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey);
-int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
+int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
struct ib_sge *sge, int acc);
-int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
+int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
u32 len, u64 vaddr, u32 rkey, int acc);
int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
index 036fde662aa..0095bb70f34 100644
--- a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
+++ b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
@@ -38,13 +38,23 @@
#include "ipath_kernel.h"
/**
- * ipath_unordered_wc - indicate whether write combining is ordered
+ * ipath_enable_wc - enable write combining for MMIO writes to the device
+ * @dd: infinipath device
*
- * PowerPC systems (at least those in the 970 processor family)
- * write partially filled store buffers in address order, but will write
- * completely filled store buffers in "random" order, and therefore must
- * have serialization for correctness with current InfiniPath chips.
+ * Nothing to do on PowerPC, so just return without error.
+ */
+int ipath_enable_wc(struct ipath_devdata *dd)
+{
+ return 0;
+}
+
+/**
+ * ipath_unordered_wc - indicate whether write combining is unordered
*
+ * Because our performance depends on our ability to do write
+ * combining mmio writes in the most efficient way, we need to
+ * know if we are on a processor that may reorder stores when
+ * write combining.
*/
int ipath_unordered_wc(void)
{
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
index f8f9e2e8cbd..04696e62da8 100644
--- a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
+++ b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
@@ -123,6 +123,8 @@ int ipath_enable_wc(struct ipath_devdata *dd)
ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, "
"cookie is %d\n", cookie);
dd->ipath_wc_cookie = cookie;
+ dd->ipath_wc_base = (unsigned long) pioaddr;
+ dd->ipath_wc_len = (unsigned long) piolen;
}
}
@@ -136,9 +138,16 @@ int ipath_enable_wc(struct ipath_devdata *dd)
void ipath_disable_wc(struct ipath_devdata *dd)
{
if (dd->ipath_wc_cookie) {
+ int r;
ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n");
- mtrr_del(dd->ipath_wc_cookie, 0, 0);
- dd->ipath_wc_cookie = 0;
+ r = mtrr_del(dd->ipath_wc_cookie, dd->ipath_wc_base,
+ dd->ipath_wc_len);
+ if (r < 0)
+ dev_info(&dd->pcidev->dev,
+ "mtrr_del(%lx, %lx, %lx) failed: %d\n",
+ dd->ipath_wc_cookie, dd->ipath_wc_base,
+ dd->ipath_wc_len, r);
+ dd->ipath_wc_cookie = 0; /* even on failure */
}
}
diff --git a/drivers/infiniband/ulp/iser/Kconfig b/drivers/infiniband/ulp/iser/Kconfig
index 365a1b5f19e..aecbb9083f0 100644
--- a/drivers/infiniband/ulp/iser/Kconfig
+++ b/drivers/infiniband/ulp/iser/Kconfig
@@ -1,11 +1,12 @@
config INFINIBAND_ISER
- tristate "ISCSI RDMA Protocol"
+ tristate "iSCSI Extensions for RDMA (iSER)"
depends on INFINIBAND && SCSI && INET
select SCSI_ISCSI_ATTRS
---help---
- Support for the ISCSI RDMA Protocol over InfiniBand. This
- allows you to access storage devices that speak ISER/ISCSI
- over InfiniBand.
+ Support for the iSCSI Extensions for RDMA (iSER) Protocol
+ over InfiniBand. This allows you to access storage devices
+ that speak iSCSI over iSER over InfiniBand.
- The ISER protocol is defined by IETF.
- See <http://www.ietf.org/>.
+ The iSER protocol is defined by IETF.
+ See <http://www.ietf.org/internet-drafts/draft-ietf-ips-iser-05.txt>
+ and <http://www.infinibandta.org/members/spec/iser_annex_060418.pdf>
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 2a14fe2e322..eb6f98d8228 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -317,6 +317,8 @@ iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn)
struct iscsi_iser_conn *iser_conn = conn->dd_data;
iscsi_conn_teardown(cls_conn);
+ if (iser_conn->ib_conn)
+ iser_conn->ib_conn->iser_conn = NULL;
kfree(iser_conn);
}
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 2cf9ae0def1..9c53916f28c 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -192,7 +192,7 @@ struct iser_regd_buf {
struct iser_dto {
struct iscsi_iser_cmd_task *ctask;
- struct iscsi_iser_conn *conn;
+ struct iser_conn *ib_conn;
int notify_enable;
/* vector of registered buffers */
@@ -355,4 +355,11 @@ int iser_post_send(struct iser_desc *tx_desc);
int iser_conn_state_comp(struct iser_conn *ib_conn,
enum iser_ib_conn_state comp);
+
+int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask,
+ struct iser_data_buf *data,
+ enum iser_data_dir iser_dir,
+ enum dma_data_direction dma_dir);
+
+void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask);
#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index ccf56f6f723..9b3d79c796c 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -66,42 +66,6 @@ static void iser_dto_add_regd_buff(struct iser_dto *dto,
dto->regd_vector_len++;
}
-static int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask,
- struct iser_data_buf *data,
- enum iser_data_dir iser_dir,
- enum dma_data_direction dma_dir)
-{
- struct device *dma_device;
-
- iser_ctask->dir[iser_dir] = 1;
- dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device;
-
- data->dma_nents = dma_map_sg(dma_device, data->buf, data->size, dma_dir);
- if (data->dma_nents == 0) {
- iser_err("dma_map_sg failed!!!\n");
- return -EINVAL;
- }
- return 0;
-}
-
-static void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask)
-{
- struct device *dma_device;
- struct iser_data_buf *data;
-
- dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device;
-
- if (iser_ctask->dir[ISER_DIR_IN]) {
- data = &iser_ctask->data[ISER_DIR_IN];
- dma_unmap_sg(dma_device, data->buf, data->size, DMA_FROM_DEVICE);
- }
-
- if (iser_ctask->dir[ISER_DIR_OUT]) {
- data = &iser_ctask->data[ISER_DIR_OUT];
- dma_unmap_sg(dma_device, data->buf, data->size, DMA_TO_DEVICE);
- }
-}
-
/* Register user buffer memory and initialize passive rdma
* dto descriptor. Total data size is stored in
* iser_ctask->data[ISER_DIR_IN].data_len
@@ -249,7 +213,7 @@ static int iser_post_receive_control(struct iscsi_conn *conn)
}
recv_dto = &rx_desc->dto;
- recv_dto->conn = iser_conn;
+ recv_dto->ib_conn = iser_conn->ib_conn;
recv_dto->regd_vector_len = 0;
regd_hdr = &rx_desc->hdr_regd_buf;
@@ -296,7 +260,7 @@ static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn,
regd_hdr->virt_addr = tx_desc; /* == &tx_desc->iser_header */
regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN;
- send_dto->conn = iser_conn;
+ send_dto->ib_conn = iser_conn->ib_conn;
send_dto->notify_enable = 1;
send_dto->regd_vector_len = 0;
@@ -588,7 +552,7 @@ void iser_rcv_completion(struct iser_desc *rx_desc,
unsigned long dto_xfer_len)
{
struct iser_dto *dto = &rx_desc->dto;
- struct iscsi_iser_conn *conn = dto->conn;
+ struct iscsi_iser_conn *conn = dto->ib_conn->iser_conn;
struct iscsi_session *session = conn->iscsi_conn->session;
struct iscsi_cmd_task *ctask;
struct iscsi_iser_cmd_task *iser_ctask;
@@ -641,7 +605,8 @@ void iser_rcv_completion(struct iser_desc *rx_desc,
void iser_snd_completion(struct iser_desc *tx_desc)
{
struct iser_dto *dto = &tx_desc->dto;
- struct iscsi_iser_conn *iser_conn = dto->conn;
+ struct iser_conn *ib_conn = dto->ib_conn;
+ struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn;
struct iscsi_conn *conn = iser_conn->iscsi_conn;
struct iscsi_mgmt_task *mtask;
@@ -652,7 +617,7 @@ void iser_snd_completion(struct iser_desc *tx_desc)
if (tx_desc->type == ISCSI_TX_DATAOUT)
kmem_cache_free(ig.desc_cache, tx_desc);
- atomic_dec(&iser_conn->ib_conn->post_send_buf_count);
+ atomic_dec(&ib_conn->post_send_buf_count);
write_lock(conn->recv_lock);
if (conn->suspend_tx) {
@@ -698,14 +663,19 @@ void iser_ctask_rdma_init(struct iscsi_iser_cmd_task *iser_ctask)
void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask)
{
int deferred;
+ int is_rdma_aligned = 1;
/* if we were reading, copy back to unaligned sglist,
* anyway dma_unmap and free the copy
*/
- if (iser_ctask->data_copy[ISER_DIR_IN].copy_buf != NULL)
+ if (iser_ctask->data_copy[ISER_DIR_IN].copy_buf != NULL) {
+ is_rdma_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_IN);
- if (iser_ctask->data_copy[ISER_DIR_OUT].copy_buf != NULL)
+ }
+ if (iser_ctask->data_copy[ISER_DIR_OUT].copy_buf != NULL) {
+ is_rdma_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_OUT);
+ }
if (iser_ctask->dir[ISER_DIR_IN]) {
deferred = iser_regd_buff_release
@@ -725,7 +695,9 @@ void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask)
}
}
- iser_dma_unmap_task_data(iser_ctask);
+ /* if the data was unaligned, it was already unmapped and then copied */
+ if (is_rdma_aligned)
+ iser_dma_unmap_task_data(iser_ctask);
}
void iser_dto_buffs_release(struct iser_dto *dto)
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index d0b03f42658..0606744c3f8 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -369,6 +369,44 @@ static void iser_page_vec_build(struct iser_data_buf *data,
}
}
+int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask,
+ struct iser_data_buf *data,
+ enum iser_data_dir iser_dir,
+ enum dma_data_direction dma_dir)
+{
+ struct device *dma_device;
+
+ iser_ctask->dir[iser_dir] = 1;
+ dma_device =
+ iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device;
+
+ data->dma_nents = dma_map_sg(dma_device, data->buf, data->size, dma_dir);
+ if (data->dma_nents == 0) {
+ iser_err("dma_map_sg failed!!!\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask)
+{
+ struct device *dma_device;
+ struct iser_data_buf *data;
+
+ dma_device =
+ iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device;
+
+ if (iser_ctask->dir[ISER_DIR_IN]) {
+ data = &iser_ctask->data[ISER_DIR_IN];
+ dma_unmap_sg(dma_device, data->buf, data->size, DMA_FROM_DEVICE);
+ }
+
+ if (iser_ctask->dir[ISER_DIR_OUT]) {
+ data = &iser_ctask->data[ISER_DIR_OUT];
+ dma_unmap_sg(dma_device, data->buf, data->size, DMA_TO_DEVICE);
+ }
+}
+
/**
* iser_reg_rdma_mem - Registers memory intended for RDMA,
* obtaining rkey and va
@@ -394,6 +432,10 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask,
iser_err("rdma alignment violation %d/%d aligned\n",
aligned_len, mem->size);
iser_data_buf_dump(mem);
+
+ /* unmap the command data before accessing it */
+ iser_dma_unmap_task_data(iser_ctask);
+
/* allocate copy buf, if we are writing, copy the */
/* unaligned scatterlist, dma map the copy */
if (iser_start_rdma_unaligned_sg(iser_ctask, cmd_dir) != 0)
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index ecdca7fc1e4..18a00003499 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -571,6 +571,8 @@ void iser_conn_release(struct iser_conn *ib_conn)
/* on EVENT_ADDR_ERROR there's no device yet for this conn */
if (device != NULL)
iser_device_try_release(device);
+ if (ib_conn->iser_conn)
+ ib_conn->iser_conn->ib_conn = NULL;
kfree(ib_conn);
}
@@ -694,7 +696,7 @@ int iser_post_recv(struct iser_desc *rx_desc)
struct iser_dto *recv_dto = &rx_desc->dto;
/* Retrieve conn */
- ib_conn = recv_dto->conn->ib_conn;
+ ib_conn = recv_dto->ib_conn;
iser_dto_to_iov(recv_dto, iov, 2);
@@ -727,7 +729,7 @@ int iser_post_send(struct iser_desc *tx_desc)
struct iser_conn *ib_conn;
struct iser_dto *dto = &tx_desc->dto;
- ib_conn = dto->conn->ib_conn;
+ ib_conn = dto->ib_conn;
iser_dto_to_iov(dto, iov, MAX_REGD_BUF_VECTOR_LEN);
@@ -774,7 +776,7 @@ static void iser_comp_error_worker(void *data)
static void iser_handle_comp_error(struct iser_desc *desc)
{
struct iser_dto *dto = &desc->dto;
- struct iser_conn *ib_conn = dto->conn->ib_conn;
+ struct iser_conn *ib_conn = dto->ib_conn;
iser_dto_buffs_release(dto);