From 9b620d2a16814e5f2a063359c953c41f804e091a Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Wed, 18 Apr 2007 20:20:53 -0700
Subject: IB: Remove reference to obsolete CONFIG_IPATH_CORE

Since commit b1c1b6a3 ("IB/ipath: merge ipath_core and ib_ipath
drivers"), CONFIG_IPATH_CORE no longer exists, so there's no reason to
have a line for it in drivers/Makefile.

Pointed out by Robert P. J. Day <rpjday@mindspring.com>.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/Makefile | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/Makefile b/drivers/Makefile
index 3a718f51350..920c975bb6d 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -72,7 +72,6 @@ obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_MMC)		+= mmc/
 obj-$(CONFIG_NEW_LEDS)		+= leds/
 obj-$(CONFIG_INFINIBAND)	+= infiniband/
-obj-$(CONFIG_IPATH_CORE)	+= infiniband/
 obj-$(CONFIG_SGI_SN)		+= sn/
 obj-y				+= firmware/
 obj-$(CONFIG_CRYPTO)		+= crypto/
-- 
cgit v1.2.3


From 9a4b65e35714516980c863bfb7edc5f232b8b458 Mon Sep 17 00:00:00 2001
From: Hal Rosenstock <halr@voltaire.com>
Date: Mon, 2 Apr 2007 12:45:16 -0400
Subject: IB/umad: Fix declaration of dev_map[]

The current ib_umad code never accesses bits past IB_UMAD_MAX_PORTS in
dev_map[].  We shouldn't declare it to be twice as big.

Pointed-out-by: Roland Dreier <rolandd@cisco.com>

Signed-off-by: Hal Rosenstock <halr@voltaire.com>
---
 drivers/infiniband/core/user_mad.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index c069ebeba8e..2ce3eead2ba 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -135,7 +135,7 @@ static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
 
 static DEFINE_SPINLOCK(port_lock);
 static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS];
-static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS * 2);
+static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
 
 static void ib_umad_add_one(struct ib_device *device);
 static void ib_umad_remove_one(struct ib_device *device);
-- 
cgit v1.2.3


From a89875fc7e23ec91561bc3742df3bd5d12b376b4 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Wed, 18 Apr 2007 20:20:53 -0700
Subject: IPoIB: Remove pointless opcode field from debugging output

There's no point in printing the opcode field in the completion
handling debugging output, since the type of completion is already
printed at the beginning of the line.  In fact the opcode field is not
even defined for completions with a status other than success.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_cm.c | 8 ++++----
 drivers/infiniband/ulp/ipoib/ipoib_ib.c | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 2b242a4823f..7a4af7a3e04 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -351,8 +351,8 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 	u64 mapping[IPOIB_CM_RX_SG];
 	int frags;
 
-	ipoib_dbg_data(priv, "cm recv completion: id %d, op %d, status: %d\n",
-		       wr_id, wc->opcode, wc->status);
+	ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
+		       wr_id, wc->status);
 
 	if (unlikely(wr_id >= ipoib_recvq_size)) {
 		ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
@@ -504,8 +504,8 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx
 	struct ipoib_tx_buf *tx_req;
 	unsigned long flags;
 
-	ipoib_dbg_data(priv, "cm send completion: id %d, op %d, status: %d\n",
-		       wr_id, wc->opcode, wc->status);
+	ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n",
+		       wr_id, wc->status);
 
 	if (unlikely(wr_id >= ipoib_sendq_size)) {
 		ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n",
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index ba0ee5cf2ad..c17e777cdc4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -172,8 +172,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 	struct sk_buff *skb;
 	u64 addr;
 
-	ipoib_dbg_data(priv, "recv completion: id %d, op %d, status: %d\n",
-		       wr_id, wc->opcode, wc->status);
+	ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n",
+		       wr_id, wc->status);
 
 	if (unlikely(wr_id >= ipoib_recvq_size)) {
 		ipoib_warn(priv, "recv completion event with wrid %d (> %d)\n",
@@ -245,8 +245,8 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 	struct ipoib_tx_buf *tx_req;
 	unsigned long flags;
 
-	ipoib_dbg_data(priv, "send completion: id %d, op %d, status: %d\n",
-		       wr_id, wc->opcode, wc->status);
+	ipoib_dbg_data(priv, "send completion: id %d, status: %d\n",
+		       wr_id, wc->status);
 
 	if (unlikely(wr_id >= ipoib_sendq_size)) {
 		ipoib_warn(priv, "send completion event with wrid %d (> %d)\n",
-- 
cgit v1.2.3


From 946db67fbf836af30835d610b914cdde0cf467f8 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Thu, 15 Mar 2007 14:44:45 -0700
Subject: IB/ipath: Add ability to set and clear IB local loopback

This is a sticky state.  It is useful for diagnosing problems with
boards versus cable/switch problems.

Signed-off-by: Dave Olson <dave.olson@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_common.h |  2 ++
 drivers/infiniband/hw/ipath/ipath_driver.c | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 54139d39818..a4136780352 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -78,6 +78,8 @@
 #define IPATH_IB_LINKINIT		3
 #define IPATH_IB_LINKDOWN_SLEEP		4
 #define IPATH_IB_LINKDOWN_DISABLE	5
+#define IPATH_IB_LINK_LOOPBACK	6 /* enable local loopback */
+#define IPATH_IB_LINK_EXTERNAL	7 /* normal, disable local loopback */
 
 /*
  * stats maintained by the driver.  For now, at least, this is global
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index ae7f21a0cdc..c2fe5417ff4 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -1662,6 +1662,22 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
 		lstate = IPATH_LINKACTIVE;
 		break;
 
+	case IPATH_IB_LINK_LOOPBACK:
+		dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n");
+		dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+				 dd->ipath_ibcctrl);
+		ret = 0;
+		goto bail; // no state change to wait for
+
+	case IPATH_IB_LINK_EXTERNAL:
+		dev_info(&dd->pcidev->dev, "Disabling IB local loopback (normal)\n");
+		dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+				 dd->ipath_ibcctrl);
+		ret = 0;
+		goto bail; // no state change to wait for
+
 	default:
 		ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
 		ret = -EINVAL;
-- 
cgit v1.2.3


From 165c552c35052284e8ec4f7e9c027dfd33490e2c Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Thu, 15 Mar 2007 14:44:46 -0700
Subject: IB/ipath: Fix user memory region creation when IOMMU present

The loop which initializes the user memory region from an array of
pages was using the wrong limit for the array.  This worked OK when
dma_map_sg() returned the same number as the number of pages.  This
patch fixes the problem.

Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_mr.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 8cc8598d6c6..31e70732e36 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -210,9 +210,15 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
 	m = 0;
 	n = 0;
 	list_for_each_entry(chunk, &region->chunk_list, list) {
-		for (i = 0; i < chunk->nmap; i++) {
-			mr->mr.map[m]->segs[n].vaddr =
-				page_address(chunk->page_list[i].page);
+		for (i = 0; i < chunk->nents; i++) {
+			void *vaddr;
+
+			vaddr = page_address(chunk->page_list[i].page);
+			if (!vaddr) {
+				ret = ERR_PTR(-EINVAL);
+				goto bail;
+			}
+			mr->mr.map[m]->segs[n].vaddr = vaddr;
 			mr->mr.map[m]->segs[n].length = region->page_size;
 			n++;
 			if (n == IPATH_SEGSZ) {
-- 
cgit v1.2.3


From 19085745598ec254fd814411b675b52380c3bac0 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Thu, 15 Mar 2007 14:44:47 -0700
Subject: IB/ipath: Definitions of two RXE parity err bits were reversed

The chip documentation on the expected TID vs eager TID parity error
bits was reversed from what was implemented in the RTL, for both
chips.  This corrects the definitions.

Signed-off-by: Dave Olson <dave.olson@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_registers.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index dffc76016d3..e0b20529da8 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -128,7 +128,7 @@
 
 /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
 /* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
- * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: eagerTID, 3: expTID
+ * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2:  expTID, 3: eagerTID
  * 		bit 4: flag buffer, 5: datainfo, 6: header info */
 #define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL
 #define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
@@ -143,8 +143,8 @@
 /* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
 #define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF   0x01ULL
 #define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ  0x02ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x04ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID   0x08ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID   0x04ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL
 #define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF  0x10ULL
 #define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
 #define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO  0x40ULL
-- 
cgit v1.2.3


From 947d7617a1d876c2c93f73017a734e070c64d43b Mon Sep 17 00:00:00 2001
From: Ralph Campbell <ralph.campbell@qlogic.com>
Date: Thu, 15 Mar 2007 14:44:48 -0700
Subject: IB/ipath: Don't initialize port memory for subports

A recent change was made to allocate memory for a port after CPU
affinity is set. That change didn't account for subports and was
trying to allocate memory for the port twice.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_file_ops.c | 19 +++++++++++++++----
 drivers/infiniband/hw/ipath/ipath_kernel.h   |  2 ++
 2 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 5d64ff87529..5de1dd49722 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -178,8 +178,7 @@ static int ipath_get_base_info(struct file *fp,
 
 		kinfo->spi_rcvhdr_base = ((u64) pd->subport_rcvhdr_base +
 			pd->port_rcvhdrq_size * slave) & MMAP64_MASK;
-		kinfo->spi_rcvhdr_tailaddr =
-			(u64) pd->port_rcvhdrqtailaddr_phys & MMAP64_MASK;
+		kinfo->spi_rcvhdr_tailaddr = 0;
 		kinfo->spi_rcv_egrbufs = ((u64) pd->subport_rcvegrbuf +
 			dd->ipath_rcvegrcnt * dd->ipath_rcvegrbufsize * slave) &
 			MMAP64_MASK;
@@ -1443,6 +1442,7 @@ static int init_subports(struct ipath_devdata *dd,
 	pd->port_subport_cnt = uinfo->spu_subport_cnt;
 	pd->port_subport_id = uinfo->spu_subport_id;
 	pd->active_slaves = 1;
+	set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
 	goto bail;
 
 bail_rhdr:
@@ -1764,11 +1764,17 @@ static int ipath_do_user_init(struct file *fp,
 			      const struct ipath_user_info *uinfo)
 {
 	int ret;
-	struct ipath_portdata *pd;
+	struct ipath_portdata *pd = port_fp(fp);
 	struct ipath_devdata *dd;
 	u32 head32;
 
-	pd = port_fp(fp);
+	/* Subports don't need to initialize anything since master did it. */
+	if (subport_fp(fp)) {
+		ret = wait_event_interruptible(pd->port_wait,
+			!test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag));
+		goto done;
+	}
+
 	dd = pd->port_dd;
 
 	if (uinfo->spu_rcvhdrsize) {
@@ -1826,6 +1832,11 @@ static int ipath_do_user_init(struct file *fp,
 			 dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
 			 dd->ipath_rcvctrl);
+	/* Notify any waiting slaves */
+	if (pd->port_subport_cnt) {
+		clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
+		wake_up(&pd->port_wait);
+	}
 done:
 	return ret;
 }
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 6d8d05fb599..c8df65a4d19 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -701,6 +701,8 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
 #define IPATH_PORT_WAITING_RCV   2
 		/* waiting for a PIO buffer to be available */
 #define IPATH_PORT_WAITING_PIO   3
+		/* master has not finished initializing */
+#define IPATH_PORT_MASTER_UNINIT 4
 
 /* free up any allocated data at closes */
 void ipath_free_data(struct ipath_portdata *dd);
-- 
cgit v1.2.3


From 9f9630d5e12a51f38513de0d64320a55ab6f02d5 Mon Sep 17 00:00:00 2001
From: Ralph Campbell <ralph.campbell@qlogic.com>
Date: Thu, 15 Mar 2007 14:44:49 -0700
Subject: IB/ipath: Fix SRQ limit event causing dropped CQ entry

A silly programming error causes a CQ entry to not be generated if a
SRQ limit event is generated.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_ruc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index e86cb171872..146db851699 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -202,6 +202,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
 	wq->tail = tail;
 
 	ret = 1;
+	qp->r_wrid_valid = 1;
 	if (handler) {
 		u32 n;
 
@@ -229,7 +230,6 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
 		}
 	}
 	spin_unlock_irqrestore(&rq->lock, flags);
-	qp->r_wrid_valid = 1;
 
 bail:
 	return ret;
-- 
cgit v1.2.3


From 7b21d26ddad6912bf345e8e88a51a5ce98a036ad Mon Sep 17 00:00:00 2001
From: Ralph Campbell <ralph.campbell@qlogic.com>
Date: Thu, 15 Mar 2007 14:44:50 -0700
Subject: IB/ipath: NMI cpu lockup if local loopback used

If a post send is done in loopback and there is no receive queue
entry, the sending QP is put on a timeout list for a while so the
receiver has a chance to post a receive buffer. If the another post
send is done, the code incorrectly tried to put the QP on the timeout
list again an corrupted the timeout list. This eventually leads to a
spin lock deadlock NMI due to the timer function looping forever with
the lock held.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_ruc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index 146db851699..cda84933bb4 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -265,7 +265,8 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
 again:
 	spin_lock_irqsave(&sqp->s_lock, flags);
 
-	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK)) {
+	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) ||
+	    qp->s_rnr_timeout) {
 		spin_unlock_irqrestore(&sqp->s_lock, flags);
 		goto done;
 	}
-- 
cgit v1.2.3


From 3859e39d75b72f35f7d38c618fbbacb39a440c22 Mon Sep 17 00:00:00 2001
From: Ralph Campbell <ralph.campbell@qlogic.com>
Date: Thu, 15 Mar 2007 14:44:51 -0700
Subject: IB/ipath: Support larger IB_QP_MAX_DEST_RD_ATOMIC and
 IB_QP_MAX_QP_RD_ATOMIC

This patch adds support for multiple RDMA reads and atomics to be sent
before an ACK is required to be seen by the requester.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_qp.c    |  26 +-
 drivers/infiniband/hw/ipath/ipath_rc.c    | 800 +++++++++++++++++-------------
 drivers/infiniband/hw/ipath/ipath_ruc.c   |  58 +--
 drivers/infiniband/hw/ipath/ipath_uc.c    |   6 +-
 drivers/infiniband/hw/ipath/ipath_ud.c    |   2 +-
 drivers/infiniband/hw/ipath/ipath_verbs.c |   7 +-
 drivers/infiniband/hw/ipath/ipath_verbs.h |  52 +-
 7 files changed, 548 insertions(+), 403 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 64f07b19349..c122fea9145 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -320,7 +320,8 @@ static void ipath_reset_qp(struct ipath_qp *qp)
 	qp->remote_qpn = 0;
 	qp->qkey = 0;
 	qp->qp_access_flags = 0;
-	clear_bit(IPATH_S_BUSY, &qp->s_flags);
+	qp->s_busy = 0;
+	qp->s_flags &= ~IPATH_S_SIGNAL_REQ_WR;
 	qp->s_hdrwords = 0;
 	qp->s_psn = 0;
 	qp->r_psn = 0;
@@ -333,7 +334,6 @@ static void ipath_reset_qp(struct ipath_qp *qp)
 		qp->r_state = IB_OPCODE_UC_SEND_LAST;
 	}
 	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
-	qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
 	qp->r_nak_state = 0;
 	qp->r_wrid_valid = 0;
 	qp->s_rnr_timeout = 0;
@@ -344,6 +344,10 @@ static void ipath_reset_qp(struct ipath_qp *qp)
 	qp->s_ssn = 1;
 	qp->s_lsn = 0;
 	qp->s_wait_credit = 0;
+	memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
+	qp->r_head_ack_queue = 0;
+	qp->s_tail_ack_queue = 0;
+	qp->s_num_rd_atomic = 0;
 	if (qp->r_rq.wq) {
 		qp->r_rq.wq->head = 0;
 		qp->r_rq.wq->tail = 0;
@@ -503,6 +507,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		    attr->path_mig_state != IB_MIG_REARM)
 			goto inval;
 
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+		if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC)
+			goto inval;
+
 	switch (new_state) {
 	case IB_QPS_RESET:
 		ipath_reset_qp(qp);
@@ -559,6 +567,12 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	if (attr_mask & IB_QP_QKEY)
 		qp->qkey = attr->qkey;
 
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+		qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
+
+	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+		qp->s_max_rd_atomic = attr->max_rd_atomic;
+
 	qp->state = new_state;
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 
@@ -598,8 +612,8 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	attr->alt_pkey_index = 0;
 	attr->en_sqd_async_notify = 0;
 	attr->sq_draining = 0;
-	attr->max_rd_atomic = 1;
-	attr->max_dest_rd_atomic = 1;
+	attr->max_rd_atomic = qp->s_max_rd_atomic;
+	attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
 	attr->min_rnr_timer = qp->r_min_rnr_timer;
 	attr->port_num = 1;
 	attr->timeout = qp->timeout;
@@ -614,7 +628,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	init_attr->recv_cq = qp->ibqp.recv_cq;
 	init_attr->srq = qp->ibqp.srq;
 	init_attr->cap = attr->cap;
-	if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
+	if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR)
 		init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
 	else
 		init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
@@ -786,7 +800,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
 		qp->s_size = init_attr->cap.max_send_wr + 1;
 		qp->s_max_sge = init_attr->cap.max_send_sge;
 		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
-			qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR;
+			qp->s_flags = IPATH_S_SIGNAL_REQ_WR;
 		else
 			qp->s_flags = 0;
 		dev = to_idev(ibpd->device);
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 5ff20cb0449..c9c3d7cd292 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -37,6 +37,19 @@
 /* cut down ridiculously long IB macro names */
 #define OP(x) IB_OPCODE_RC_##x
 
+static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe,
+		       u32 psn, u32 pmtu)
+{
+	u32 len;
+
+	len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
+	ss->sge = wqe->sg_list[0];
+	ss->sg_list = wqe->sg_list + 1;
+	ss->num_sge = wqe->wr.num_sge;
+	ipath_skip_sge(ss, len);
+	return wqe->length - len;
+}
+
 /**
  * ipath_init_restart- initialize the qp->s_sge after a restart
  * @qp: the QP who's SGE we're restarting
@@ -47,15 +60,9 @@
 static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
 {
 	struct ipath_ibdev *dev;
-	u32 len;
 
-	len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) *
-		ib_mtu_enum_to_int(qp->path_mtu);
-	qp->s_sge.sge = wqe->sg_list[0];
-	qp->s_sge.sg_list = wqe->sg_list + 1;
-	qp->s_sge.num_sge = wqe->wr.num_sge;
-	ipath_skip_sge(&qp->s_sge, len);
-	qp->s_len = wqe->length - len;
+	qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn,
+				ib_mtu_enum_to_int(qp->path_mtu));
 	dev = to_idev(qp->ibqp.device);
 	spin_lock(&dev->pending_lock);
 	if (list_empty(&qp->timerwait))
@@ -70,107 +77,123 @@ static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
  * @ohdr: a pointer to the IB header being constructed
  * @pmtu: the path MTU
  *
- * Return bth0 if constructed; otherwise, return 0.
+ * Return 1 if constructed; otherwise, return 0.
+ * Note that we are in the responder's side of the QP context.
  * Note the QP s_lock must be held.
  */
-u32 ipath_make_rc_ack(struct ipath_qp *qp,
-		      struct ipath_other_headers *ohdr,
-		      u32 pmtu)
+static int ipath_make_rc_ack(struct ipath_qp *qp,
+			     struct ipath_other_headers *ohdr,
+			     u32 pmtu, u32 *bth0p, u32 *bth2p)
 {
+	struct ipath_ack_entry *e;
 	u32 hwords;
 	u32 len;
 	u32 bth0;
+	u32 bth2;
 
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
 	hwords = 5;
 
-	/*
-	 * Send a response.  Note that we are in the responder's
-	 * side of the QP context.
-	 */
 	switch (qp->s_ack_state) {
-	case OP(RDMA_READ_REQUEST):
-		qp->s_cur_sge = &qp->s_rdma_sge;
-		len = qp->s_rdma_len;
-		if (len > pmtu) {
-			len = pmtu;
-			qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
-		} else
-			qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
-		qp->s_rdma_len -= len;
+	case OP(RDMA_READ_RESPONSE_LAST):
+	case OP(RDMA_READ_RESPONSE_ONLY):
+	case OP(ATOMIC_ACKNOWLEDGE):
+		qp->s_ack_state = OP(ACKNOWLEDGE);
+		/* FALLTHROUGH */
+	case OP(ACKNOWLEDGE):
+		/* Check for no next entry in the queue. */
+		if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
+			if (qp->s_flags & IPATH_S_ACK_PENDING)
+				goto normal;
+			goto bail;
+		}
+
+		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+		if (e->opcode == OP(RDMA_READ_REQUEST)) {
+			/* Copy SGE state in case we need to resend */
+			qp->s_ack_rdma_sge = e->rdma_sge;
+			qp->s_cur_sge = &qp->s_ack_rdma_sge;
+			len = e->rdma_sge.sge.sge_length;
+			if (len > pmtu) {
+				len = pmtu;
+				qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
+			} else {
+				qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
+				if (++qp->s_tail_ack_queue >
+				    IPATH_MAX_RDMA_ATOMIC)
+					qp->s_tail_ack_queue = 0;
+			}
+			ohdr->u.aeth = ipath_compute_aeth(qp);
+			hwords++;
+			qp->s_ack_rdma_psn = e->psn;
+			bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
+		} else {
+			/* COMPARE_SWAP or FETCH_ADD */
+			qp->s_cur_sge = NULL;
+			len = 0;
+			qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
+			ohdr->u.at.aeth = ipath_compute_aeth(qp);
+			ohdr->u.at.atomic_ack_eth[0] =
+				cpu_to_be32(e->atomic_data >> 32);
+			ohdr->u.at.atomic_ack_eth[1] =
+				cpu_to_be32(e->atomic_data);
+			hwords += sizeof(ohdr->u.at) / sizeof(u32);
+			bth2 = e->psn;
+			if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
+				qp->s_tail_ack_queue = 0;
+		}
 		bth0 = qp->s_ack_state << 24;
-		ohdr->u.aeth = ipath_compute_aeth(qp);
-		hwords++;
 		break;
 
 	case OP(RDMA_READ_RESPONSE_FIRST):
 		qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
 		/* FALLTHROUGH */
 	case OP(RDMA_READ_RESPONSE_MIDDLE):
-		qp->s_cur_sge = &qp->s_rdma_sge;
-		len = qp->s_rdma_len;
+		len = qp->s_ack_rdma_sge.sge.sge_length;
 		if (len > pmtu)
 			len = pmtu;
 		else {
 			ohdr->u.aeth = ipath_compute_aeth(qp);
 			hwords++;
 			qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
+			if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
+				qp->s_tail_ack_queue = 0;
 		}
-		qp->s_rdma_len -= len;
 		bth0 = qp->s_ack_state << 24;
-		break;
-
-	case OP(RDMA_READ_RESPONSE_LAST):
-	case OP(RDMA_READ_RESPONSE_ONLY):
-		/*
-		 * We have to prevent new requests from changing
-		 * the r_sge state while a ipath_verbs_send()
-		 * is in progress.
-		 */
-		qp->s_ack_state = OP(ACKNOWLEDGE);
-		bth0 = 0;
-		goto bail;
-
-	case OP(COMPARE_SWAP):
-	case OP(FETCH_ADD):
-		qp->s_cur_sge = NULL;
-		len = 0;
-		/*
-		 * Set the s_ack_state so the receive interrupt handler
-		 * won't try to send an ACK (out of order) until this one
-		 * is actually sent.
-		 */
-		qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
-		bth0 = OP(ATOMIC_ACKNOWLEDGE) << 24;
-		ohdr->u.at.aeth = ipath_compute_aeth(qp);
-		ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
-		hwords += sizeof(ohdr->u.at) / 4;
+		bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
 		break;
 
 	default:
-		/* Send a regular ACK. */
-		qp->s_cur_sge = NULL;
-		len = 0;
+	normal:
 		/*
-		 * Set the s_ack_state so the receive interrupt handler
-		 * won't try to send an ACK (out of order) until this one
-		 * is actually sent.
+		 * Send a regular ACK.
+		 * Set the s_ack_state so we wait until after sending
+		 * the ACK before setting s_ack_state to ACKNOWLEDGE
+		 * (see above).
 		 */
-		qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
-		bth0 = OP(ACKNOWLEDGE) << 24;
+		qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
+		qp->s_flags &= ~IPATH_S_ACK_PENDING;
+		qp->s_cur_sge = NULL;
 		if (qp->s_nak_state)
-			ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
-						    (qp->s_nak_state <<
-						     IPATH_AETH_CREDIT_SHIFT));
+			ohdr->u.aeth =
+				cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
+					    (qp->s_nak_state <<
+					     IPATH_AETH_CREDIT_SHIFT));
 		else
 			ohdr->u.aeth = ipath_compute_aeth(qp);
 		hwords++;
+		len = 0;
+		bth0 = OP(ACKNOWLEDGE) << 24;
+		bth2 = qp->s_ack_psn & IPATH_PSN_MASK;
 	}
 	qp->s_hdrwords = hwords;
 	qp->s_cur_size = len;
+	*bth0p = bth0;
+	*bth2p = bth2;
+	return 1;
 
 bail:
-	return bth0;
+	return 0;
 }
 
 /**
@@ -197,9 +220,16 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 	u32 bth2;
 	char newreq;
 
+	/* Sending responses has higher priority over sending requests. */
+	if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
+	     (qp->s_flags & IPATH_S_ACK_PENDING) ||
+	     qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE) &&
+	    ipath_make_rc_ack(qp, ohdr, pmtu, bth0p, bth2p))
+		goto done;
+
 	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) ||
 	    qp->s_rnr_timeout)
-		goto done;
+		goto bail;
 
 	/* Limit the number of packets sent without an ACK. */
 	if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {
@@ -210,7 +240,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 			list_add_tail(&qp->timerwait,
 				      &dev->pending[dev->pending_index]);
 		spin_unlock(&dev->pending_lock);
-		goto done;
+		goto bail;
 	}
 
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -232,7 +262,16 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 		if (qp->s_cur == qp->s_tail) {
 			/* Check if send work queue is empty. */
 			if (qp->s_tail == qp->s_head)
-				goto done;
+				goto bail;
+			/*
+			 * If a fence is requested, wait for previous
+			 * RDMA read and atomic operations to finish.
+			 */
+			if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
+			    qp->s_num_rd_atomic) {
+				qp->s_flags |= IPATH_S_FENCE_PENDING;
+				goto bail;
+			}
 			wqe->psn = qp->s_next_psn;
 			newreq = 1;
 		}
@@ -250,7 +289,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 			/* If no credit, return. */
 			if (qp->s_lsn != (u32) -1 &&
 			    ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
-				goto done;
+				goto bail;
 			wqe->lpsn = wqe->psn;
 			if (len > pmtu) {
 				wqe->lpsn += (len - 1) / pmtu;
@@ -281,13 +320,13 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 			/* If no credit, return. */
 			if (qp->s_lsn != (u32) -1 &&
 			    ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
-				goto done;
+				goto bail;
 			ohdr->u.rc.reth.vaddr =
 				cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
 			ohdr->u.rc.reth.rkey =
 				cpu_to_be32(wqe->wr.wr.rdma.rkey);
 			ohdr->u.rc.reth.length = cpu_to_be32(len);
-			hwords += sizeof(struct ib_reth) / 4;
+			hwords += sizeof(struct ib_reth) / sizeof(u32);
 			wqe->lpsn = wqe->psn;
 			if (len > pmtu) {
 				wqe->lpsn += (len - 1) / pmtu;
@@ -312,14 +351,17 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 			break;
 
 		case IB_WR_RDMA_READ:
-			ohdr->u.rc.reth.vaddr =
-				cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
-			ohdr->u.rc.reth.rkey =
-				cpu_to_be32(wqe->wr.wr.rdma.rkey);
-			ohdr->u.rc.reth.length = cpu_to_be32(len);
-			qp->s_state = OP(RDMA_READ_REQUEST);
-			hwords += sizeof(ohdr->u.rc.reth) / 4;
+			/*
+			 * Don't allow more operations to be started
+			 * than the QP limits allow.
+			 */
 			if (newreq) {
+				if (qp->s_num_rd_atomic >=
+				    qp->s_max_rd_atomic) {
+					qp->s_flags |= IPATH_S_RDMAR_PENDING;
+					goto bail;
+				}
+				qp->s_num_rd_atomic++;
 				if (qp->s_lsn != (u32) -1)
 					qp->s_lsn++;
 				/*
@@ -330,6 +372,13 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 					qp->s_next_psn += (len - 1) / pmtu;
 				wqe->lpsn = qp->s_next_psn++;
 			}
+			ohdr->u.rc.reth.vaddr =
+				cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+			ohdr->u.rc.reth.rkey =
+				cpu_to_be32(wqe->wr.wr.rdma.rkey);
+			ohdr->u.rc.reth.length = cpu_to_be32(len);
+			qp->s_state = OP(RDMA_READ_REQUEST);
+			hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
 			ss = NULL;
 			len = 0;
 			if (++qp->s_cur == qp->s_size)
@@ -338,32 +387,48 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 
 		case IB_WR_ATOMIC_CMP_AND_SWP:
 		case IB_WR_ATOMIC_FETCH_AND_ADD:
-			if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP)
-				qp->s_state = OP(COMPARE_SWAP);
-			else
-				qp->s_state = OP(FETCH_ADD);
-			ohdr->u.atomic_eth.vaddr = cpu_to_be64(
-				wqe->wr.wr.atomic.remote_addr);
-			ohdr->u.atomic_eth.rkey = cpu_to_be32(
-				wqe->wr.wr.atomic.rkey);
-			ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-				wqe->wr.wr.atomic.swap);
-			ohdr->u.atomic_eth.compare_data = cpu_to_be64(
-				wqe->wr.wr.atomic.compare_add);
-			hwords += sizeof(struct ib_atomic_eth) / 4;
+			/*
+			 * Don't allow more operations to be started
+			 * than the QP limits allow.
+			 */
 			if (newreq) {
+				if (qp->s_num_rd_atomic >=
+				    qp->s_max_rd_atomic) {
+					qp->s_flags |= IPATH_S_RDMAR_PENDING;
+					goto bail;
+				}
+				qp->s_num_rd_atomic++;
 				if (qp->s_lsn != (u32) -1)
 					qp->s_lsn++;
 				wqe->lpsn = wqe->psn;
 			}
-			if (++qp->s_cur == qp->s_size)
-				qp->s_cur = 0;
+			if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+				qp->s_state = OP(COMPARE_SWAP);
+				ohdr->u.atomic_eth.swap_data = cpu_to_be64(
+					wqe->wr.wr.atomic.swap);
+				ohdr->u.atomic_eth.compare_data = cpu_to_be64(
+					wqe->wr.wr.atomic.compare_add);
+			} else {
+				qp->s_state = OP(FETCH_ADD);
+				ohdr->u.atomic_eth.swap_data = cpu_to_be64(
+					wqe->wr.wr.atomic.compare_add);
+				ohdr->u.atomic_eth.compare_data = 0;
+			}
+			ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
+				wqe->wr.wr.atomic.remote_addr >> 32);
+			ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
+				wqe->wr.wr.atomic.remote_addr);
+			ohdr->u.atomic_eth.rkey = cpu_to_be32(
+				wqe->wr.wr.atomic.rkey);
+			hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
 			ss = NULL;
 			len = 0;
+			if (++qp->s_cur == qp->s_size)
+				qp->s_cur = 0;
 			break;
 
 		default:
-			goto done;
+			goto bail;
 		}
 		qp->s_sge.sge = wqe->sg_list[0];
 		qp->s_sge.sg_list = wqe->sg_list + 1;
@@ -479,7 +544,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 			cpu_to_be32(wqe->wr.wr.rdma.rkey);
 		ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
 		qp->s_state = OP(RDMA_READ_REQUEST);
-		hwords += sizeof(ohdr->u.rc.reth) / 4;
+		hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
 		bth2 = qp->s_psn++ & IPATH_PSN_MASK;
 		if ((int)(qp->s_psn - qp->s_next_psn) > 0)
 			qp->s_next_psn = qp->s_psn;
@@ -489,20 +554,6 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 		if (qp->s_cur == qp->s_size)
 			qp->s_cur = 0;
 		break;
-
-	case OP(RDMA_READ_REQUEST):
-	case OP(COMPARE_SWAP):
-	case OP(FETCH_ADD):
-		/*
-		 * We shouldn't start anything new until this request is
-		 * finished.  The ACK will handle rescheduling us.  XXX The
-		 * number of outstanding ones is negotiated at connection
-		 * setup time (see pg. 258,289)?  XXX Also, if we support
-		 * multiple outstanding requests, we need to check the WQE
-		 * IB_SEND_FENCE flag and not send a new request if a RDMA
-		 * read or atomic is pending.
-		 */
-		goto done;
 	}
 	if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
 		bth2 |= 1 << 31;	/* Request ACK. */
@@ -512,9 +563,10 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 	qp->s_cur_size = len;
 	*bth0p = bth0 | (qp->s_state << 24);
 	*bth2p = bth2;
+done:
 	return 1;
 
-done:
+bail:
 	return 0;
 }
 
@@ -524,7 +576,8 @@ done:
  *
  * This is called from ipath_rc_rcv() and only uses the receive
  * side QP state.
- * Note that RDMA reads are handled in the send side QP state and tasklet.
+ * Note that RDMA reads and atomics are handled in the
+ * send side QP state and tasklet.
  */
 static void send_rc_ack(struct ipath_qp *qp)
 {
@@ -535,6 +588,10 @@ static void send_rc_ack(struct ipath_qp *qp)
 	struct ipath_ib_header hdr;
 	struct ipath_other_headers *ohdr;
 
+	/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
+	if (qp->r_head_ack_queue != qp->s_tail_ack_queue)
+		goto queue_ack;
+
 	/* Construct the header. */
 	ohdr = &hdr.u.oth;
 	lrh0 = IPATH_LRH_BTH;
@@ -548,19 +605,14 @@ static void send_rc_ack(struct ipath_qp *qp)
 		lrh0 = IPATH_LRH_GRH;
 	}
 	/* read pkey_index w/o lock (its atomic) */
-	bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index);
+	bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) |
+		OP(ACKNOWLEDGE) << 24;
 	if (qp->r_nak_state)
 		ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
 					    (qp->r_nak_state <<
 					     IPATH_AETH_CREDIT_SHIFT));
 	else
 		ohdr->u.aeth = ipath_compute_aeth(qp);
-	if (qp->r_ack_state >= OP(COMPARE_SWAP)) {
-		bth0 |= OP(ATOMIC_ACKNOWLEDGE) << 24;
-		ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
-		hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4;
-	} else
-		bth0 |= OP(ACKNOWLEDGE) << 24;
 	lrh0 |= qp->remote_ah_attr.sl << 4;
 	hdr.lrh[0] = cpu_to_be16(lrh0);
 	hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
@@ -574,31 +626,31 @@ static void send_rc_ack(struct ipath_qp *qp)
 	 * If we can send the ACK, clear the ACK state.
 	 */
 	if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) {
-		qp->r_ack_state = OP(ACKNOWLEDGE);
 		dev->n_unicast_xmit++;
-	} else {
-		/*
-		 * We are out of PIO buffers at the moment.
-		 * Pass responsibility for sending the ACK to the
-		 * send tasklet so that when a PIO buffer becomes
-		 * available, the ACK is sent ahead of other outgoing
-		 * packets.
-		 */
-		dev->n_rc_qacks++;
-		spin_lock_irq(&qp->s_lock);
-		/* Don't coalesce if a RDMA read or atomic is pending. */
-		if (qp->s_ack_state == OP(ACKNOWLEDGE) ||
-		    qp->s_ack_state < OP(RDMA_READ_REQUEST)) {
-			qp->s_ack_state = qp->r_ack_state;
-			qp->s_nak_state = qp->r_nak_state;
-			qp->s_ack_psn = qp->r_ack_psn;
-			qp->r_ack_state = OP(ACKNOWLEDGE);
-		}
-		spin_unlock_irq(&qp->s_lock);
-
-		/* Call ipath_do_rc_send() in another thread. */
-		tasklet_hi_schedule(&qp->s_task);
+		goto done;
 	}
+
+	/*
+	 * We are out of PIO buffers at the moment.
+	 * Pass responsibility for sending the ACK to the
+	 * send tasklet so that when a PIO buffer becomes
+	 * available, the ACK is sent ahead of other outgoing
+	 * packets.
+	 */
+	dev->n_rc_qacks++;
+
+queue_ack:
+	spin_lock_irq(&qp->s_lock);
+	qp->s_flags |= IPATH_S_ACK_PENDING;
+	qp->s_nak_state = qp->r_nak_state;
+	qp->s_ack_psn = qp->r_ack_psn;
+	spin_unlock_irq(&qp->s_lock);
+
+	/* Call ipath_do_rc_send() in another thread. */
+	tasklet_hi_schedule(&qp->s_task);
+
+done:
+	return;
 }
 
 /**
@@ -775,10 +827,6 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
 		list_del_init(&qp->timerwait);
 	spin_unlock(&dev->pending_lock);
 
-	/* Nothing is pending to ACK/NAK. */
-	if (unlikely(qp->s_last == qp->s_tail))
-		goto bail;
-
 	/*
 	 * Note that NAKs implicitly ACK outstanding SEND and RDMA write
 	 * requests and implicitly NAK RDMA read and atomic requests issued
@@ -806,7 +854,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
 		 */
 		if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
 		     (opcode != OP(RDMA_READ_RESPONSE_LAST) ||
-		       ipath_cmp24(ack_psn, wqe->lpsn) != 0)) ||
+		      ipath_cmp24(ack_psn, wqe->lpsn) != 0)) ||
 		    ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
 		      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
 		     (opcode != OP(ATOMIC_ACKNOWLEDGE) ||
@@ -824,12 +872,23 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
 			 */
 			goto bail;
 		}
-		if (wqe->wr.opcode == IB_WR_RDMA_READ ||
-		    wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-		    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-			tasklet_hi_schedule(&qp->s_task);
+		if (qp->s_num_rd_atomic &&
+		    (wqe->wr.opcode == IB_WR_RDMA_READ ||
+		     wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+		     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
+			qp->s_num_rd_atomic--;
+			/* Restart sending task if fence is complete */
+			if ((qp->s_flags & IPATH_S_FENCE_PENDING) &&
+			    !qp->s_num_rd_atomic) {
+				qp->s_flags &= ~IPATH_S_FENCE_PENDING;
+				tasklet_hi_schedule(&qp->s_task);
+			} else if (qp->s_flags & IPATH_S_RDMAR_PENDING) {
+				qp->s_flags &= ~IPATH_S_RDMAR_PENDING;
+				tasklet_hi_schedule(&qp->s_task);
+			}
+		}
 		/* Post a send completion queue entry if requested. */
-		if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
+		if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 			wc.wr_id = wqe->wr.wr_id;
 			wc.status = IB_WC_SUCCESS;
@@ -1003,6 +1062,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 				     u32 psn, u32 hdrsize, u32 pmtu,
 				     int header_in_data)
 {
+	struct ipath_swqe *wqe;
 	unsigned long flags;
 	struct ib_wc wc;
 	int diff;
@@ -1032,6 +1092,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 		goto ack_done;
 	}
 
+	if (unlikely(qp->s_last == qp->s_tail))
+		goto ack_done;
+	wqe = get_swqe_ptr(qp, qp->s_last);
+
 	switch (opcode) {
 	case OP(ACKNOWLEDGE):
 	case OP(ATOMIC_ACKNOWLEDGE):
@@ -1042,38 +1106,49 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 			aeth = be32_to_cpu(((__be32 *) data)[0]);
 			data += sizeof(__be32);
 		}
-		if (opcode == OP(ATOMIC_ACKNOWLEDGE))
-			*(u64 *) qp->s_sge.sge.vaddr = *(u64 *) data;
+		if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
+			u64 val;
+
+			if (!header_in_data) {
+				__be32 *p = ohdr->u.at.atomic_ack_eth;
+
+				val = ((u64) be32_to_cpu(p[0]) << 32) |
+					be32_to_cpu(p[1]);
+			} else
+				val = be64_to_cpu(((__be64 *) data)[0]);
+			*(u64 *) wqe->sg_list[0].vaddr = val;
+		}
 		if (!do_rc_ack(qp, aeth, psn, opcode) ||
 		    opcode != OP(RDMA_READ_RESPONSE_FIRST))
 			goto ack_done;
 		hdrsize += 4;
+		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+			goto ack_done;
 		/*
-		 * do_rc_ack() has already checked the PSN so skip
-		 * the sequence check.
+		 * If this is a response to a resent RDMA read, we
+		 * have to be careful to copy the data to the right
+		 * location.
 		 */
-		goto rdma_read;
+		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
+						  wqe, psn, pmtu);
+		goto read_middle;
 
 	case OP(RDMA_READ_RESPONSE_MIDDLE):
 		/* no AETH, no ACK */
 		if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
 			dev->n_rdma_seq++;
-			if (qp->s_last != qp->s_tail)
-				ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+			ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
 			goto ack_done;
 		}
-	rdma_read:
-		if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST)))
+		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
 			goto ack_done;
+	read_middle:
 		if (unlikely(tlen != (hdrsize + pmtu + 4)))
 			goto ack_done;
-		if (unlikely(pmtu >= qp->s_len))
+		if (unlikely(pmtu >= qp->s_rdma_read_len))
 			goto ack_done;
+
 		/* We got a response so update the timeout. */
-		if (unlikely(qp->s_last == qp->s_tail ||
-			     get_swqe_ptr(qp, qp->s_last)->wr.opcode !=
-			     IB_WR_RDMA_READ))
-			goto ack_done;
 		spin_lock(&dev->pending_lock);
 		if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait))
 			list_move_tail(&qp->timerwait,
@@ -1082,27 +1157,41 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 		/*
 		 * Update the RDMA receive state but do the copy w/o
 		 * holding the locks and blocking interrupts.
-		 * XXX Yet another place that affects relaxed RDMA order
-		 * since we don't want s_sge modified.
 		 */
-		qp->s_len -= pmtu;
+		qp->s_rdma_read_len -= pmtu;
 		update_last_psn(qp, psn);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
-		ipath_copy_sge(&qp->s_sge, data, pmtu);
+		ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu);
 		goto bail;
 
+	case OP(RDMA_READ_RESPONSE_ONLY):
+		if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
+			dev->n_rdma_seq++;
+			ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+			goto ack_done;
+		}
+		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+			goto ack_done;
+		/*
+		 * If this is a response to a resent RDMA read, we
+		 * have to be careful to copy the data to the right
+		 * location.
+		 * XXX should check PSN and wqe opcode first.
+		 */
+		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
+						  wqe, psn, pmtu);
+		goto read_last;
+
 	case OP(RDMA_READ_RESPONSE_LAST):
 		/* ACKs READ req. */
 		if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
 			dev->n_rdma_seq++;
-			if (qp->s_last != qp->s_tail)
-				ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+			ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
 			goto ack_done;
 		}
-		/* FALLTHROUGH */
-	case OP(RDMA_READ_RESPONSE_ONLY):
-		if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST)))
+		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
 			goto ack_done;
+	read_last:
 		/*
 		 * Get the number of bytes the message was padded by.
 		 */
@@ -1117,7 +1206,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 			goto ack_done;
 		}
 		tlen -= hdrsize + pad + 8;
-		if (unlikely(tlen != qp->s_len)) {
+		if (unlikely(tlen != qp->s_rdma_read_len)) {
 			/* XXX Need to generate an error CQ entry. */
 			goto ack_done;
 		}
@@ -1127,17 +1216,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 			aeth = be32_to_cpu(((__be32 *) data)[0]);
 			data += sizeof(__be32);
 		}
-		ipath_copy_sge(&qp->s_sge, data, tlen);
-		if (do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST))) {
-			/*
-			 * Change the state so we contimue
-			 * processing new requests and wake up the
-			 * tasklet if there are posted sends.
-			 */
-			qp->s_state = OP(SEND_LAST);
-			if (qp->s_tail != qp->s_head)
-				tasklet_hi_schedule(&qp->s_task);
-		}
+		ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
+		(void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST));
 		goto ack_done;
 	}
 
@@ -1162,7 +1242,7 @@ bail:
  * incoming RC packet for the given QP.
  * Called at interrupt level.
  * Return 1 if no more processing is needed; otherwise return 0 to
- * schedule a response to be sent and the s_lock unlocked.
+ * schedule a response to be sent.
  */
 static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
 				     struct ipath_other_headers *ohdr,
@@ -1173,25 +1253,23 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
 				     int diff,
 				     int header_in_data)
 {
-	struct ib_reth *reth;
+	struct ipath_ack_entry *e;
+	u8 i, prev;
+	int old_req;
 
 	if (diff > 0) {
 		/*
 		 * Packet sequence error.
 		 * A NAK will ACK earlier sends and RDMA writes.
-		 * Don't queue the NAK if a RDMA read, atomic, or
-		 * NAK is pending though.
+		 * Don't queue the NAK if we already sent one.
 		 */
-		if (qp->s_ack_state != OP(ACKNOWLEDGE) ||
-		    qp->r_nak_state != 0)
-			goto done;
-		if (qp->r_ack_state < OP(COMPARE_SWAP)) {
-			qp->r_ack_state = OP(SEND_ONLY);
+		if (!qp->r_nak_state) {
 			qp->r_nak_state = IB_NAK_PSN_ERROR;
 			/* Use the expected PSN. */
 			qp->r_ack_psn = qp->r_psn;
+			goto send_ack;
 		}
-		goto send_ack;
+		goto done;
 	}
 
 	/*
@@ -1204,8 +1282,46 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
 	 * can coalesce an outstanding duplicate ACK.  We have to
 	 * send the earliest so that RDMA reads can be restarted at
 	 * the requester's expected PSN.
+	 *
+	 * First, find where this duplicate PSN falls within the
+	 * ACKs previously sent.
 	 */
-	if (opcode == OP(RDMA_READ_REQUEST)) {
+	psn &= IPATH_PSN_MASK;
+	e = NULL;
+	old_req = 1;
+	spin_lock_irq(&qp->s_lock);
+	for (i = qp->r_head_ack_queue; ; i = prev) {
+		if (i == qp->s_tail_ack_queue)
+			old_req = 0;
+		if (i)
+			prev = i - 1;
+		else
+			prev = IPATH_MAX_RDMA_ATOMIC;
+		if (prev == qp->r_head_ack_queue) {
+			e = NULL;
+			break;
+		}
+		e = &qp->s_ack_queue[prev];
+		if (!e->opcode) {
+			e = NULL;
+			break;
+		}
+		if (ipath_cmp24(psn, e->psn) >= 0)
+			break;
+	}
+	switch (opcode) {
+	case OP(RDMA_READ_REQUEST): {
+		struct ib_reth *reth;
+		u32 offset;
+		u32 len;
+
+		/*
+		 * If we didn't find the RDMA read request in the ack queue,
+		 * or the send tasklet is already backed up to send an
+		 * earlier entry, we can ignore this request.
+		 */
+		if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req)
+			goto unlock_done;
 		/* RETH comes after BTH */
 		if (!header_in_data)
 			reth = &ohdr->u.rc.reth;
@@ -1214,88 +1330,87 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
 			data += sizeof(*reth);
 		}
 		/*
-		 * If we receive a duplicate RDMA request, it means the
-		 * requester saw a sequence error and needs to restart
-		 * from an earlier point.  We can abort the current
-		 * RDMA read send in that case.
+		 * Address range must be a subset of the original
+		 * request and start on pmtu boundaries.
+		 * We reuse the old ack_queue slot since the requester
+		 * should not back up and request an earlier PSN for the
+		 * same request.
 		 */
-		spin_lock_irq(&qp->s_lock);
-		if (qp->s_ack_state != OP(ACKNOWLEDGE) &&
-		    (qp->s_hdrwords || ipath_cmp24(psn, qp->s_ack_psn) >= 0)) {
-			/*
-			 * We are already sending earlier requested data.
-			 * Don't abort it to send later out of sequence data.
-			 */
-			spin_unlock_irq(&qp->s_lock);
-			goto done;
-		}
-		qp->s_rdma_len = be32_to_cpu(reth->length);
-		if (qp->s_rdma_len != 0) {
+		offset = ((psn - e->psn) & IPATH_PSN_MASK) *
+			ib_mtu_enum_to_int(qp->path_mtu);
+		len = be32_to_cpu(reth->length);
+		if (unlikely(offset + len > e->rdma_sge.sge.sge_length))
+			goto unlock_done;
+		if (len != 0) {
 			u32 rkey = be32_to_cpu(reth->rkey);
 			u64 vaddr = be64_to_cpu(reth->vaddr);
 			int ok;
 
-			/*
-			 * Address range must be a subset of the original
-			 * request and start on pmtu boundaries.
-			 */
-			ok = ipath_rkey_ok(qp, &qp->s_rdma_sge,
-					   qp->s_rdma_len, vaddr, rkey,
+			ok = ipath_rkey_ok(qp, &e->rdma_sge,
+					   len, vaddr, rkey,
 					   IB_ACCESS_REMOTE_READ);
-			if (unlikely(!ok)) {
-				spin_unlock_irq(&qp->s_lock);
-				goto done;
-			}
+			if (unlikely(!ok))
+				goto unlock_done;
 		} else {
-			qp->s_rdma_sge.sg_list = NULL;
-			qp->s_rdma_sge.num_sge = 0;
-			qp->s_rdma_sge.sge.mr = NULL;
-			qp->s_rdma_sge.sge.vaddr = NULL;
-			qp->s_rdma_sge.sge.length = 0;
-			qp->s_rdma_sge.sge.sge_length = 0;
+			e->rdma_sge.sg_list = NULL;
+			e->rdma_sge.num_sge = 0;
+			e->rdma_sge.sge.mr = NULL;
+			e->rdma_sge.sge.vaddr = NULL;
+			e->rdma_sge.sge.length = 0;
+			e->rdma_sge.sge.sge_length = 0;
 		}
-		qp->s_ack_state = opcode;
-		qp->s_ack_psn = psn;
-		spin_unlock_irq(&qp->s_lock);
-		tasklet_hi_schedule(&qp->s_task);
-		goto send_ack;
+		e->psn = psn;
+		qp->s_ack_state = OP(ACKNOWLEDGE);
+		qp->s_tail_ack_queue = prev;
+		break;
 	}
 
-	/*
-	 * A pending RDMA read will ACK anything before it so
-	 * ignore earlier duplicate requests.
-	 */
-	if (qp->s_ack_state != OP(ACKNOWLEDGE))
-		goto done;
-
-	/*
-	 * If an ACK is pending, don't replace the pending ACK
-	 * with an earlier one since the later one will ACK the earlier.
-	 * Also, if we already have a pending atomic, send it.
-	 */
-	if (qp->r_ack_state != OP(ACKNOWLEDGE) &&
-	    (ipath_cmp24(psn, qp->r_ack_psn) <= 0 ||
-	     qp->r_ack_state >= OP(COMPARE_SWAP)))
-		goto send_ack;
-	switch (opcode) {
 	case OP(COMPARE_SWAP):
-	case OP(FETCH_ADD):
+	case OP(FETCH_ADD): {
 		/*
-		 * Check for the PSN of the last atomic operation
-		 * performed and resend the result if found.
+		 * If we didn't find the atomic request in the ack queue
+		 * or the send tasklet is already backed up to send an
+		 * earlier entry, we can ignore this request.
 		 */
-		if ((psn & IPATH_PSN_MASK) != qp->r_atomic_psn)
-			goto done;
+		if (!e || e->opcode != (u8) opcode || old_req)
+			goto unlock_done;
+		qp->s_ack_state = OP(ACKNOWLEDGE);
+		qp->s_tail_ack_queue = prev;
+		break;
+	}
+
+	default:
+		if (old_req)
+			goto unlock_done;
+		/*
+		 * Resend the most recent ACK if this request is
+		 * after all the previous RDMA reads and atomics.
+		 */
+		if (i == qp->r_head_ack_queue) {
+			spin_unlock_irq(&qp->s_lock);
+			qp->r_nak_state = 0;
+			qp->r_ack_psn = qp->r_psn - 1;
+			goto send_ack;
+		}
+		/*
+		 * Resend the RDMA read or atomic op which
+		 * ACKs this duplicate request.
+		 */
+		qp->s_ack_state = OP(ACKNOWLEDGE);
+		qp->s_tail_ack_queue = i;
 		break;
 	}
-	qp->r_ack_state = opcode;
 	qp->r_nak_state = 0;
-	qp->r_ack_psn = psn;
-send_ack:
-	return 0;
+	spin_unlock_irq(&qp->s_lock);
+	tasklet_hi_schedule(&qp->s_task);
 
+unlock_done:
+	spin_unlock_irq(&qp->s_lock);
 done:
 	return 1;
+
+send_ack:
+	return 0;
 }
 
 static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
@@ -1391,15 +1506,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		    opcode == OP(SEND_LAST_WITH_IMMEDIATE))
 			break;
 	nack_inv:
-		/*
-		 * A NAK will ACK earlier sends and RDMA writes.
-		 * Don't queue the NAK if a RDMA read, atomic, or NAK
-		 * is pending though.
-		 */
-		if (qp->r_ack_state >= OP(COMPARE_SWAP))
-			goto send_ack;
 		ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR);
-		qp->r_ack_state = OP(SEND_ONLY);
 		qp->r_nak_state = IB_NAK_INVALID_REQUEST;
 		qp->r_ack_psn = qp->r_psn;
 		goto send_ack;
@@ -1441,9 +1548,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 			 * Don't queue the NAK if a RDMA read or atomic
 			 * is pending though.
 			 */
-			if (qp->r_ack_state >= OP(COMPARE_SWAP))
-				goto send_ack;
-			qp->r_ack_state = OP(SEND_ONLY);
+			if (qp->r_nak_state)
+				goto done;
 			qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
 			qp->r_ack_psn = qp->r_psn;
 			goto send_ack;
@@ -1567,7 +1673,19 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 			goto rnr_nak;
 		goto send_last_imm;
 
-	case OP(RDMA_READ_REQUEST):
+	case OP(RDMA_READ_REQUEST): {
+		struct ipath_ack_entry *e;
+		u32 len;
+		u8 next;
+
+		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
+			goto nack_acc;
+		next = qp->r_head_ack_queue + 1;
+		if (next > IPATH_MAX_RDMA_ATOMIC)
+			next = 0;
+		if (unlikely(next == qp->s_tail_ack_queue))
+			goto nack_inv;
+		e = &qp->s_ack_queue[qp->r_head_ack_queue];
 		/* RETH comes after BTH */
 		if (!header_in_data)
 			reth = &ohdr->u.rc.reth;
@@ -1575,72 +1693,75 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 			reth = (struct ib_reth *)data;
 			data += sizeof(*reth);
 		}
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_READ)))
-			goto nack_acc;
-		spin_lock_irq(&qp->s_lock);
-		qp->s_rdma_len = be32_to_cpu(reth->length);
-		if (qp->s_rdma_len != 0) {
+		len = be32_to_cpu(reth->length);
+		if (len) {
 			u32 rkey = be32_to_cpu(reth->rkey);
 			u64 vaddr = be64_to_cpu(reth->vaddr);
 			int ok;
 
 			/* Check rkey & NAK */
-			ok = ipath_rkey_ok(qp, &qp->s_rdma_sge,
-					   qp->s_rdma_len, vaddr, rkey,
-					   IB_ACCESS_REMOTE_READ);
-			if (unlikely(!ok)) {
-				spin_unlock_irq(&qp->s_lock);
+			ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
+					   rkey, IB_ACCESS_REMOTE_READ);
+			if (unlikely(!ok))
 				goto nack_acc;
-			}
 			/*
 			 * Update the next expected PSN.  We add 1 later
 			 * below, so only add the remainder here.
 			 */
-			if (qp->s_rdma_len > pmtu)
-				qp->r_psn += (qp->s_rdma_len - 1) / pmtu;
+			if (len > pmtu)
+				qp->r_psn += (len - 1) / pmtu;
 		} else {
-			qp->s_rdma_sge.sg_list = NULL;
-			qp->s_rdma_sge.num_sge = 0;
-			qp->s_rdma_sge.sge.mr = NULL;
-			qp->s_rdma_sge.sge.vaddr = NULL;
-			qp->s_rdma_sge.sge.length = 0;
-			qp->s_rdma_sge.sge.sge_length = 0;
+			e->rdma_sge.sg_list = NULL;
+			e->rdma_sge.num_sge = 0;
+			e->rdma_sge.sge.mr = NULL;
+			e->rdma_sge.sge.vaddr = NULL;
+			e->rdma_sge.sge.length = 0;
+			e->rdma_sge.sge.sge_length = 0;
 		}
+		e->opcode = opcode;
+		e->psn = psn;
 		/*
 		 * We need to increment the MSN here instead of when we
 		 * finish sending the result since a duplicate request would
 		 * increment it more than once.
 		 */
 		qp->r_msn++;
-
-		qp->s_ack_state = opcode;
-		qp->s_ack_psn = psn;
-		spin_unlock_irq(&qp->s_lock);
-
 		qp->r_psn++;
 		qp->r_state = opcode;
 		qp->r_nak_state = 0;
+		barrier();
+		qp->r_head_ack_queue = next;
 
 		/* Call ipath_do_rc_send() in another thread. */
 		tasklet_hi_schedule(&qp->s_task);
 
 		goto done;
+	}
 
 	case OP(COMPARE_SWAP):
 	case OP(FETCH_ADD): {
 		struct ib_atomic_eth *ateth;
+		struct ipath_ack_entry *e;
 		u64 vaddr;
+		atomic64_t *maddr;
 		u64 sdata;
 		u32 rkey;
+		u8 next;
 
+		if (unlikely(!(qp->qp_access_flags &
+			       IB_ACCESS_REMOTE_ATOMIC)))
+			goto nack_acc;
+		next = qp->r_head_ack_queue + 1;
+		if (next > IPATH_MAX_RDMA_ATOMIC)
+			next = 0;
+		if (unlikely(next == qp->s_tail_ack_queue))
+			goto nack_inv;
 		if (!header_in_data)
 			ateth = &ohdr->u.atomic_eth;
-		else {
+		else
 			ateth = (struct ib_atomic_eth *)data;
-			data += sizeof(*ateth);
-		}
-		vaddr = be64_to_cpu(ateth->vaddr);
+		vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
+			be32_to_cpu(ateth->vaddr[1]);
 		if (unlikely(vaddr & (sizeof(u64) - 1)))
 			goto nack_inv;
 		rkey = be32_to_cpu(ateth->rkey);
@@ -1649,63 +1770,50 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 					    sizeof(u64), vaddr, rkey,
 					    IB_ACCESS_REMOTE_ATOMIC)))
 			goto nack_acc;
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_ATOMIC)))
-			goto nack_acc;
 		/* Perform atomic OP and save result. */
+		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
 		sdata = be64_to_cpu(ateth->swap_data);
-		spin_lock_irq(&dev->pending_lock);
-		qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;
-		if (opcode == OP(FETCH_ADD))
-			*(u64 *) qp->r_sge.sge.vaddr =
-				qp->r_atomic_data + sdata;
-		else if (qp->r_atomic_data ==
-			 be64_to_cpu(ateth->compare_data))
-			*(u64 *) qp->r_sge.sge.vaddr = sdata;
-		spin_unlock_irq(&dev->pending_lock);
+		e = &qp->s_ack_queue[qp->r_head_ack_queue];
+		e->atomic_data = (opcode == OP(FETCH_ADD)) ?
+			(u64) atomic64_add_return(sdata, maddr) - sdata :
+			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+				      be64_to_cpu(ateth->compare_data),
+				      sdata);
+		e->opcode = opcode;
+		e->psn = psn & IPATH_PSN_MASK;
 		qp->r_msn++;
-		qp->r_atomic_psn = psn & IPATH_PSN_MASK;
-		psn |= 1 << 31;
-		break;
+		qp->r_psn++;
+		qp->r_state = opcode;
+		qp->r_nak_state = 0;
+		barrier();
+		qp->r_head_ack_queue = next;
+
+		/* Call ipath_do_rc_send() in another thread. */
+		tasklet_hi_schedule(&qp->s_task);
+
+		goto done;
 	}
 
 	default:
-		/* Drop packet for unknown opcodes. */
-		goto done;
+		/* NAK unknown opcodes. */
+		goto nack_inv;
 	}
 	qp->r_psn++;
 	qp->r_state = opcode;
+	qp->r_ack_psn = psn;
 	qp->r_nak_state = 0;
 	/* Send an ACK if requested or required. */
-	if (psn & (1 << 31)) {
-		/*
-		 * Coalesce ACKs unless there is a RDMA READ or
-		 * ATOMIC pending.
-		 */
-		if (qp->r_ack_state < OP(COMPARE_SWAP)) {
-			qp->r_ack_state = opcode;
-			qp->r_ack_psn = psn;
-		}
+	if (psn & (1 << 31))
 		goto send_ack;
-	}
 	goto done;
 
 nack_acc:
-	/*
-	 * A NAK will ACK earlier sends and RDMA writes.
-	 * Don't queue the NAK if a RDMA read, atomic, or NAK
-	 * is pending though.
-	 */
-	if (qp->r_ack_state < OP(COMPARE_SWAP)) {
-		ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
-		qp->r_ack_state = OP(RDMA_WRITE_ONLY);
-		qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
-		qp->r_ack_psn = qp->r_psn;
-	}
+	ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
+	qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
+	qp->r_ack_psn = qp->r_psn;
+
 send_ack:
-	/* Send ACK right away unless the send tasklet has a pending ACK. */
-	if (qp->s_ack_state == OP(ACKNOWLEDGE))
-		send_rc_ack(qp);
+	send_rc_ack(qp);
 
 done:
 	return;
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index cda84933bb4..d9c2a9b15d8 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -255,6 +255,7 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
 	unsigned long flags;
 	struct ib_wc wc;
 	u64 sdata;
+	atomic64_t *maddr;
 
 	qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
 	if (!qp) {
@@ -311,7 +312,7 @@ again:
 				sqp->s_rnr_retry--;
 			dev->n_rnr_naks++;
 			sqp->s_rnr_timeout =
-				ib_ipath_rnr_table[sqp->r_min_rnr_timer];
+				ib_ipath_rnr_table[qp->r_min_rnr_timer];
 			ipath_insert_rnr_queue(sqp);
 			goto done;
 		}
@@ -344,20 +345,22 @@ again:
 			wc.sl = sqp->remote_ah_attr.sl;
 			wc.dlid_path_bits = 0;
 			wc.port_num = 0;
+			spin_lock_irqsave(&sqp->s_lock, flags);
 			ipath_sqerror_qp(sqp, &wc);
+			spin_unlock_irqrestore(&sqp->s_lock, flags);
 			goto done;
 		}
 		break;
 
 	case IB_WR_RDMA_READ:
+		if (unlikely(!(qp->qp_access_flags &
+			       IB_ACCESS_REMOTE_READ)))
+			goto acc_err;
 		if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
 					    wqe->wr.wr.rdma.remote_addr,
 					    wqe->wr.wr.rdma.rkey,
 					    IB_ACCESS_REMOTE_READ)))
 			goto acc_err;
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_READ)))
-			goto acc_err;
 		qp->r_sge.sge = wqe->sg_list[0];
 		qp->r_sge.sg_list = wqe->sg_list + 1;
 		qp->r_sge.num_sge = wqe->wr.num_sge;
@@ -365,22 +368,22 @@ again:
 
 	case IB_WR_ATOMIC_CMP_AND_SWP:
 	case IB_WR_ATOMIC_FETCH_AND_ADD:
+		if (unlikely(!(qp->qp_access_flags &
+			       IB_ACCESS_REMOTE_ATOMIC)))
+			goto acc_err;
 		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
-					    wqe->wr.wr.rdma.remote_addr,
-					    wqe->wr.wr.rdma.rkey,
+					    wqe->wr.wr.atomic.remote_addr,
+					    wqe->wr.wr.atomic.rkey,
 					    IB_ACCESS_REMOTE_ATOMIC)))
 			goto acc_err;
 		/* Perform atomic OP and save result. */
-		sdata = wqe->wr.wr.atomic.swap;
-		spin_lock_irqsave(&dev->pending_lock, flags);
-		qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;
-		if (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-			*(u64 *) qp->r_sge.sge.vaddr =
-				qp->r_atomic_data + sdata;
-		else if (qp->r_atomic_data == wqe->wr.wr.atomic.compare_add)
-			*(u64 *) qp->r_sge.sge.vaddr = sdata;
-		spin_unlock_irqrestore(&dev->pending_lock, flags);
-		*(u64 *) sqp->s_sge.sge.vaddr = qp->r_atomic_data;
+		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+		sdata = wqe->wr.wr.atomic.compare_add;
+		*(u64 *) sqp->s_sge.sge.vaddr =
+			(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
+			(u64) atomic64_add_return(sdata, maddr) - sdata :
+			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+				      sdata, wqe->wr.wr.atomic.swap);
 		goto send_comp;
 
 	default:
@@ -441,7 +444,7 @@ again:
 send_comp:
 	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
 
-	if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &sqp->s_flags) ||
+	if (!(sqp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
 	    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 		wc.wr_id = wqe->wr.wr_id;
 		wc.status = IB_WC_SUCCESS;
@@ -503,7 +506,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
 	 * We clear the tasklet flag now since we are committing to return
 	 * from the tasklet function.
 	 */
-	clear_bit(IPATH_S_BUSY, &qp->s_flags);
+	clear_bit(IPATH_S_BUSY, &qp->s_busy);
 	tasklet_unlock(&qp->s_task);
 	want_buffer(dev->dd);
 	dev->n_piowait++;
@@ -542,6 +545,9 @@ int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
 		    wr->sg_list[0].addr & (sizeof(u64) - 1))) {
 		ret = -EINVAL;
 		goto bail;
+	} else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) {
+		ret = -EINVAL;
+		goto bail;
 	}
 	/* IB spec says that num_sge == 0 is OK. */
 	if (wr->num_sge > qp->s_max_sge) {
@@ -648,7 +654,7 @@ void ipath_do_ruc_send(unsigned long data)
 	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
 	struct ipath_other_headers *ohdr;
 
-	if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags))
+	if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
 		goto bail;
 
 	if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) {
@@ -684,19 +690,15 @@ again:
 	 */
 	spin_lock_irqsave(&qp->s_lock, flags);
 
-	/* Sending responses has higher priority over sending requests. */
-	if (qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE &&
-	    (bth0 = ipath_make_rc_ack(qp, ohdr, pmtu)) != 0)
-		bth2 = qp->s_ack_psn++ & IPATH_PSN_MASK;
-	else if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
-		   ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
-		   ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
+	if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
+	       ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
+	       ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
 		/*
 		 * Clear the busy bit before unlocking to avoid races with
 		 * adding new work queue items and then failing to process
 		 * them.
 		 */
-		clear_bit(IPATH_S_BUSY, &qp->s_flags);
+		clear_bit(IPATH_S_BUSY, &qp->s_busy);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 		goto bail;
 	}
@@ -729,7 +731,7 @@ again:
 	goto again;
 
 clear:
-	clear_bit(IPATH_S_BUSY, &qp->s_flags);
+	clear_bit(IPATH_S_BUSY, &qp->s_busy);
 bail:
 	return;
 }
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index 325d6634ff5..1c2b03c2ef5 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -42,7 +42,7 @@ static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
 {
 	if (++qp->s_last == qp->s_size)
 		qp->s_last = 0;
-	if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
+	if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
 	    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 		wc->wr_id = wqe->wr.wr_id;
 		wc->status = IB_WC_SUCCESS;
@@ -344,13 +344,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 	send_first:
 		if (qp->r_reuse_sge) {
 			qp->r_reuse_sge = 0;
-			qp->r_sge = qp->s_rdma_sge;
+			qp->r_sge = qp->s_rdma_read_sge;
 		} else if (!ipath_get_rwqe(qp, 0)) {
 			dev->n_pkt_drops++;
 			goto done;
 		}
 		/* Save the WQE so we can reuse it in case of an error. */
-		qp->s_rdma_sge = qp->r_sge;
+		qp->s_rdma_read_sge = qp->r_sge;
 		qp->r_rcv_len = 0;
 		if (opcode == OP(SEND_ONLY))
 			goto send_last;
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 9a3e54664ee..a20261c0b4f 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -467,7 +467,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
 
 done:
 	/* Queue the completion status entry. */
-	if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
+	if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
 	    (wr->send_flags & IB_SEND_SIGNALED)) {
 		wc.wr_id = wr->wr_id;
 		wc.status = IB_WC_SUCCESS;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 2aaacdb7e52..9bec5a9b455 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -773,7 +773,6 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
 	/* +1 is for the qword padding of pbc */
 	plen = hdrwords + ((len + 3) >> 2) + 1;
 	if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
-		ipath_dbg("packet len 0x%x too long, failing\n", plen);
 		ret = -EINVAL;
 		goto bail;
 	}
@@ -980,14 +979,14 @@ static int ipath_query_device(struct ib_device *ibdev,
 	props->max_cqe = ib_ipath_max_cqes;
 	props->max_mr = dev->lk_table.max;
 	props->max_pd = ib_ipath_max_pds;
-	props->max_qp_rd_atom = 1;
-	props->max_qp_init_rd_atom = 1;
+	props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
+	props->max_qp_init_rd_atom = 255;
 	/* props->max_res_rd_atom */
 	props->max_srq = ib_ipath_max_srqs;
 	props->max_srq_wr = ib_ipath_max_srq_wrs;
 	props->max_srq_sge = ib_ipath_max_srq_sges;
 	/* props->local_ca_ack_delay */
-	props->atomic_cap = IB_ATOMIC_HCA;
+	props->atomic_cap = IB_ATOMIC_GLOB;
 	props->max_pkeys = ipath_get_npkeys(dev->dd);
 	props->max_mcast_grp = ib_ipath_max_mcast_grps;
 	props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index c0c8d5b24a7..b0b29d97d56 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -43,6 +43,8 @@
 
 #include "ipath_layer.h"
 
+#define IPATH_MAX_RDMA_ATOMIC	4
+
 #define QPN_MAX                 (1 << 24)
 #define QPNMAP_ENTRIES          (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
 
@@ -89,7 +91,7 @@ struct ib_reth {
 } __attribute__ ((packed));
 
 struct ib_atomic_eth {
-	__be64 vaddr;
+	__be32 vaddr[2];	/* unaligned so access as 2 32-bit words */
 	__be32 rkey;
 	__be64 swap_data;
 	__be64 compare_data;
@@ -108,7 +110,7 @@ struct ipath_other_headers {
 		} rc;
 		struct {
 			__be32 aeth;
-			__be64 atomic_ack_eth;
+			__be32 atomic_ack_eth[2];
 		} at;
 		__be32 imm_data;
 		__be32 aeth;
@@ -311,6 +313,19 @@ struct ipath_sge_state {
 	u8 num_sge;
 };
 
+/*
+ * This structure holds the information that the send tasklet needs
+ * to send a RDMA read response or atomic operation.
+ */
+struct ipath_ack_entry {
+	u8 opcode;
+	u32 psn;
+	union {
+		struct ipath_sge_state rdma_sge;
+		u64 atomic_data;
+	};
+};
+
 /*
  * Variables prefixed with s_ are for the requester (sender).
  * Variables prefixed with r_ are for the responder (receiver).
@@ -333,24 +348,24 @@ struct ipath_qp {
 	struct ipath_mmap_info *ip;
 	struct ipath_sge_state *s_cur_sge;
 	struct ipath_sge_state s_sge;	/* current send request data */
-	/* current RDMA read send data */
-	struct ipath_sge_state s_rdma_sge;
+	struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1];
+	struct ipath_sge_state s_ack_rdma_sge;
+	struct ipath_sge_state s_rdma_read_sge;
 	struct ipath_sge_state r_sge;	/* current receive data */
 	spinlock_t s_lock;
-	unsigned long s_flags;
+	unsigned long s_busy;
 	u32 s_hdrwords;		/* size of s_hdr in 32 bit words */
 	u32 s_cur_size;		/* size of send packet in bytes */
 	u32 s_len;		/* total length of s_sge */
-	u32 s_rdma_len;		/* total length of s_rdma_sge */
+	u32 s_rdma_read_len;	/* total length of s_rdma_read_sge */
 	u32 s_next_psn;		/* PSN for next request */
 	u32 s_last_psn;		/* last response PSN processed */
 	u32 s_psn;		/* current packet sequence number */
-	u32 s_ack_psn;		/* PSN for RDMA_READ */
+	u32 s_ack_rdma_psn;	/* PSN for sending RDMA read responses */
+	u32 s_ack_psn;		/* PSN for acking sends and RDMA writes */
 	u32 s_rnr_timeout;	/* number of milliseconds for RNR timeout */
 	u32 r_ack_psn;		/* PSN for next ACK or atomic ACK */
 	u64 r_wr_id;		/* ID for current receive WQE */
-	u64 r_atomic_data;	/* data for last atomic op */
-	u32 r_atomic_psn;	/* PSN of last atomic op */
 	u32 r_len;		/* total length of r_sge */
 	u32 r_rcv_len;		/* receive data len processed */
 	u32 r_psn;		/* expected rcv packet sequence number */
@@ -360,12 +375,13 @@ struct ipath_qp {
 	u8 s_ack_state;		/* opcode of packet to ACK */
 	u8 s_nak_state;		/* non-zero if NAK is pending */
 	u8 r_state;		/* opcode of last packet received */
-	u8 r_ack_state;		/* opcode of packet to ACK */
 	u8 r_nak_state;		/* non-zero if NAK is pending */
 	u8 r_min_rnr_timer;	/* retry timeout value for RNR NAKs */
 	u8 r_reuse_sge;		/* for UC receive errors */
 	u8 r_sge_inx;		/* current index into sg_list */
 	u8 r_wrid_valid;	/* r_wrid set but CQ entry not yet made */
+	u8 r_max_rd_atomic;	/* max number of RDMA read/atomic to receive */
+	u8 r_head_ack_queue;	/* index into s_ack_queue[] */
 	u8 qp_access_flags;
 	u8 s_max_sge;		/* size of s_wq->sg_list */
 	u8 s_retry_cnt;		/* number of times to retry */
@@ -374,6 +390,10 @@ struct ipath_qp {
 	u8 s_rnr_retry;		/* requester RNR retry counter */
 	u8 s_wait_credit;	/* limit number of unacked packets sent */
 	u8 s_pkey_index;	/* PKEY index to use */
+	u8 s_max_rd_atomic;	/* max number of RDMA read/atomic to send */
+	u8 s_num_rd_atomic;	/* number of RDMA read/atomic pending */
+	u8 s_tail_ack_queue;	/* index into s_ack_queue[] */
+	u8 s_flags;
 	u8 timeout;		/* Timeout for this QP */
 	enum ib_mtu path_mtu;
 	u32 remote_qpn;
@@ -390,11 +410,16 @@ struct ipath_qp {
 	struct ipath_sge r_sg_list[0];	/* verified SGEs */
 };
 
+/* Bit definition for s_busy. */
+#define IPATH_S_BUSY		0
+
 /*
  * Bit definitions for s_flags.
  */
-#define IPATH_S_BUSY		0
-#define IPATH_S_SIGNAL_REQ_WR	1
+#define IPATH_S_SIGNAL_REQ_WR	0x01
+#define IPATH_S_FENCE_PENDING	0x02
+#define IPATH_S_RDMAR_PENDING	0x04
+#define IPATH_S_ACK_PENDING	0x08
 
 #define IPATH_PSN_CREDIT	2048
 
@@ -757,9 +782,6 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
 
 void ipath_do_ruc_send(unsigned long data);
 
-u32 ipath_make_rc_ack(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
-		      u32 pmtu);
-
 int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
 		      u32 pmtu, u32 *bth0p, u32 *bth2p);
 
-- 
cgit v1.2.3


From 39c0d0b919ae5080163bd2d41c0271cda250d382 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Date: Thu, 15 Mar 2007 14:44:52 -0700
Subject: IB/ipath: Fix up some debug messages

ipath_dbg doesn't need the same prefixes that printk does.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_driver.c | 3 ++-
 drivers/infiniband/hw/ipath/ipath_keys.c   | 2 +-
 drivers/infiniband/hw/ipath/ipath_qp.c     | 6 +++---
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index c2fe5417ff4..53eb4550bcd 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -1989,7 +1989,8 @@ static int __init infinipath_init(void)
 {
 	int ret;
 
-	ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
+	if (ipath_debug & __IPATH_DBG)
+		printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
 
 	/*
 	 * These must be called before the driver is registered with
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index 851763d7d2d..c93fa2f7719 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -61,7 +61,7 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
 		r = (r + 1) & (rkt->max - 1);
 		if (r == n) {
 			spin_unlock_irqrestore(&rkt->lock, flags);
-			ipath_dbg(KERN_INFO "LKEY table full\n");
+			ipath_dbg("LKEY table full\n");
 			ret = 0;
 			goto bail;
 		}
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index c122fea9145..f119f001781 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -274,7 +274,7 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt)
 				free_qpn(qpt, qp->ibqp.qp_num);
 			if (!atomic_dec_and_test(&qp->refcount) ||
 			    !ipath_destroy_qp(&qp->ibqp))
-				ipath_dbg(KERN_INFO "QP memory leak!\n");
+				ipath_dbg("QP memory leak!\n");
 			qp = nqp;
 		}
 	}
@@ -369,7 +369,7 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	struct ib_wc wc;
 
-	ipath_dbg(KERN_INFO "QP%d/%d in error state\n",
+	ipath_dbg("QP%d/%d in error state\n",
 		  qp->ibqp.qp_num, qp->remote_qpn);
 
 	spin_lock(&dev->pending_lock);
@@ -980,7 +980,7 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
 
-	ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n",
+	ipath_dbg("Send queue error on QP%d/%d: err: %d\n",
 		  qp->ibqp.qp_num, qp->remote_qpn, wc->status);
 
 	spin_lock(&dev->pending_lock);
-- 
cgit v1.2.3


From 0434d271fddaabd65aaa4dbd0145112d6e8aa388 Mon Sep 17 00:00:00 2001
From: Ralph Campbell <ralph.campbell@qlogic.com>
Date: Thu, 15 Mar 2007 14:44:53 -0700
Subject: IB/ipath: Fix QP error completion queue entries

When switching to the QP error state, the completion queue entries
(error or flush) were not being generated correctly.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_qp.c | 8 +++++---
 drivers/infiniband/hw/ipath/ipath_rc.c | 2 ++
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index f119f001781..64ea807703c 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -361,7 +361,7 @@ static void ipath_reset_qp(struct ipath_qp *qp)
  * @err: the receive completion error to signal if a RWQE is active
  *
  * Flushes both send and receive work queues.
- * QP s_lock should be held and interrupts disabled.
+ * The QP s_lock should be held and interrupts disabled.
  */
 
 void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
@@ -393,6 +393,8 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
 	wc.port_num = 0;
 	if (qp->r_wrid_valid) {
 		qp->r_wrid_valid = 0;
+		wc.wr_id = qp->r_wr_id;
+		wc.opcode = IB_WC_RECV;
 		wc.status = err;
 		ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
 	}
@@ -972,7 +974,7 @@ bail:
  * @wc: the WC responsible for putting the QP in this state
  *
  * Flushes the send work queue.
- * The QP s_lock should be held.
+ * The QP s_lock should be held and interrupts disabled.
  */
 
 void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
@@ -998,12 +1000,12 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
 	wc->status = IB_WC_WR_FLUSH_ERR;
 
 	while (qp->s_last != qp->s_head) {
+		wqe = get_swqe_ptr(qp, qp->s_last);
 		wc->wr_id = wqe->wr.wr_id;
 		wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
 		ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
 		if (++qp->s_last >= qp->s_size)
 			qp->s_last = 0;
-		wqe = get_swqe_ptr(qp, qp->s_last);
 	}
 	qp->s_cur = qp->s_tail = qp->s_head;
 	qp->state = IB_QPS_SQE;
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index c9c3d7cd292..2e4d544957a 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -895,8 +895,10 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
 			wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
 			wc.vendor_err = 0;
 			wc.byte_len = wqe->length;
+			wc.imm_data = 0;
 			wc.qp = &qp->ibqp;
 			wc.src_qp = qp->remote_qpn;
+			wc.wc_flags = 0;
 			wc.pkey_index = 0;
 			wc.slid = qp->remote_ah_attr.dlid;
 			wc.sl = qp->remote_ah_attr.sl;
-- 
cgit v1.2.3


From 6f5c407460bba332d6bee52e19f2305539395511 Mon Sep 17 00:00:00 2001
From: Ralph Campbell <ralph.campbell@qlogic.com>
Date: Thu, 15 Mar 2007 14:44:54 -0700
Subject: IB/ipath: Fix PSN update for RC retries

This patch fixes a number of bugs with updating the PSN for retries of
RC requests.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_rc.c | 65 ++++++++++++++++++++--------------
 1 file changed, 38 insertions(+), 27 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 2e4d544957a..d6aa14afa26 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -444,7 +444,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 			qp->s_psn = wqe->lpsn + 1;
 		else {
 			qp->s_psn++;
-			if ((int)(qp->s_psn - qp->s_next_psn) > 0)
+			if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
 				qp->s_next_psn = qp->s_psn;
 		}
 		/*
@@ -471,7 +471,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 		/* FALLTHROUGH */
 	case OP(SEND_MIDDLE):
 		bth2 = qp->s_psn++ & IPATH_PSN_MASK;
-		if ((int)(qp->s_psn - qp->s_next_psn) > 0)
+		if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
 			qp->s_next_psn = qp->s_psn;
 		ss = &qp->s_sge;
 		len = qp->s_len;
@@ -507,7 +507,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 		/* FALLTHROUGH */
 	case OP(RDMA_WRITE_MIDDLE):
 		bth2 = qp->s_psn++ & IPATH_PSN_MASK;
-		if ((int)(qp->s_psn - qp->s_next_psn) > 0)
+		if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
 			qp->s_next_psn = qp->s_psn;
 		ss = &qp->s_sge;
 		len = qp->s_len;
@@ -546,7 +546,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 		qp->s_state = OP(RDMA_READ_REQUEST);
 		hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
 		bth2 = qp->s_psn++ & IPATH_PSN_MASK;
-		if ((int)(qp->s_psn - qp->s_next_psn) > 0)
+		if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
 			qp->s_next_psn = qp->s_psn;
 		ss = NULL;
 		len = 0;
@@ -779,7 +779,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
 	if (wqe->wr.opcode == IB_WR_RDMA_READ)
 		dev->n_rc_resends++;
 	else
-		dev->n_rc_resends += (int)qp->s_psn - (int)psn;
+		dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
 
 	reset_psn(qp, psn);
 	tasklet_hi_schedule(&qp->s_task);
@@ -915,15 +915,19 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
 		if (qp->s_last == qp->s_cur) {
 			if (++qp->s_cur >= qp->s_size)
 				qp->s_cur = 0;
+			qp->s_last = qp->s_cur;
+			if (qp->s_last == qp->s_tail)
+				break;
 			wqe = get_swqe_ptr(qp, qp->s_cur);
 			qp->s_state = OP(SEND_LAST);
 			qp->s_psn = wqe->psn;
+		} else {
+			if (++qp->s_last >= qp->s_size)
+				qp->s_last = 0;
+			if (qp->s_last == qp->s_tail)
+				break;
+			wqe = get_swqe_ptr(qp, qp->s_last);
 		}
-		if (++qp->s_last >= qp->s_size)
-			qp->s_last = 0;
-		wqe = get_swqe_ptr(qp, qp->s_last);
-		if (qp->s_last == qp->s_tail)
-			break;
 	}
 
 	switch (aeth >> 29) {
@@ -935,6 +939,18 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
 			list_add_tail(&qp->timerwait,
 				      &dev->pending[dev->pending_index]);
 			spin_unlock(&dev->pending_lock);
+			/*
+			 * If we get a partial ACK for a resent operation,
+			 * we can stop resending the earlier packets and
+			 * continue with the next packet the receiver wants.
+			 */
+			if (ipath_cmp24(qp->s_psn, psn) <= 0) {
+				reset_psn(qp, psn + 1);
+				tasklet_hi_schedule(&qp->s_task);
+			}
+		} else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
+			qp->s_state = OP(SEND_LAST);
+			qp->s_psn = psn + 1;
 		}
 		ipath_get_credit(qp, aeth);
 		qp->s_rnr_retry = qp->s_rnr_retry_cnt;
@@ -945,22 +961,23 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
 
 	case 1:		/* RNR NAK */
 		dev->n_rnr_naks++;
+		if (qp->s_last == qp->s_tail)
+			goto bail;
 		if (qp->s_rnr_retry == 0) {
-			if (qp->s_last == qp->s_tail)
-				goto bail;
-
 			wc.status = IB_WC_RNR_RETRY_EXC_ERR;
 			goto class_b;
 		}
 		if (qp->s_rnr_retry_cnt < 7)
 			qp->s_rnr_retry--;
-		if (qp->s_last == qp->s_tail)
-			goto bail;
 
 		/* The last valid PSN is the previous PSN. */
 		update_last_psn(qp, psn - 1);
 
-		dev->n_rc_resends += (int)qp->s_psn - (int)psn;
+		if (wqe->wr.opcode == IB_WR_RDMA_READ)
+			dev->n_rc_resends++;
+		else
+			dev->n_rc_resends +=
+				(qp->s_psn - psn) & IPATH_PSN_MASK;
 
 		reset_psn(qp, psn);
 
@@ -971,26 +988,20 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
 		goto bail;
 
 	case 3:		/* NAK */
-		/* The last valid PSN seen is the previous request's. */
-		if (qp->s_last != qp->s_tail)
-			update_last_psn(qp, wqe->psn - 1);
+		if (qp->s_last == qp->s_tail)
+			goto bail;
+		/* The last valid PSN is the previous PSN. */
+		update_last_psn(qp, psn - 1);
 		switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
 			IPATH_AETH_CREDIT_MASK) {
 		case 0:	/* PSN sequence error */
 			dev->n_seq_naks++;
 			/*
-			 * Back up to the responder's expected PSN.  XXX
+			 * Back up to the responder's expected PSN.
 			 * Note that we might get a NAK in the middle of an
 			 * RDMA READ response which terminates the RDMA
 			 * READ.
 			 */
-			if (qp->s_last == qp->s_tail)
-				break;
-
-			if (ipath_cmp24(psn, wqe->psn) < 0)
-				break;
-
-			/* Retry the request. */
 			ipath_restart_rc(qp, psn, &wc);
 			break;
 
-- 
cgit v1.2.3


From 8ec1077b35359c973f4b1de7c516be570a6df495 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Thu, 15 Mar 2007 14:44:55 -0700
Subject: IB/ipath: Change packet problems vs chip errors handling and
 reporting

Some types of packet errors are moderately common with longer IB
cables and large clusters, and are not reported with prints by other
IB HCA drivers.  This suppresses those messages unless the new
__IPATH_ERRPKTDBG bit is set in ipath_debug.  Reporting of temporarily
disabled frequent error interrupts was also made clearer

We also distinguish between chip errors, and bad packets sent or
received in the wording of the messages.

Signed-off-by: Dave Olson <dave.olson@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_debug.h     |  1 +
 drivers/infiniband/hw/ipath/ipath_driver.c    | 53 ++++++++++++++++++-------
 drivers/infiniband/hw/ipath/ipath_intr.c      | 57 +++++++++++++++++++--------
 drivers/infiniband/hw/ipath/ipath_kernel.h    |  2 +-
 drivers/infiniband/hw/ipath/ipath_registers.h |  9 +++++
 drivers/infiniband/hw/ipath/ipath_stats.c     | 14 +++++--
 6 files changed, 99 insertions(+), 37 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index df69f0d80b8..42bfbdb0d3e 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -57,6 +57,7 @@
 #define __IPATH_PROCDBG     0x100
 /* print mmap/nopage stuff, not using VDBG any more */
 #define __IPATH_MMDBG       0x200
+#define __IPATH_ERRPKTDBG   0x400
 #define __IPATH_USER_SEND   0x1000	/* use user mode send */
 #define __IPATH_KERNEL_SEND 0x2000	/* use kernel mode send */
 #define __IPATH_EPKTDBG     0x4000	/* print ethernet packet data */
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 53eb4550bcd..cf40cf2d1fb 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -754,9 +754,42 @@ static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
 	return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
 }
 
-void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
+/*
+ * Decode the error status into strings, deciding whether to always
+ * print * it or not depending on "normal packet errors" vs everything
+ * else.   Return 1 if "real" errors, otherwise 0 if only packet
+ * errors, so caller can decide what to print with the string.
+ */
+int ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
 {
+	int iserr = 1;
 	*buf = '\0';
+	if (err & INFINIPATH_E_PKTERRS) {
+		if (!(err & ~INFINIPATH_E_PKTERRS))
+			iserr = 0; // if only packet errors.
+		if (ipath_debug & __IPATH_ERRPKTDBG) {
+			if (err & INFINIPATH_E_REBP)
+				strlcat(buf, "EBP ", blen);
+			if (err & INFINIPATH_E_RVCRC)
+				strlcat(buf, "VCRC ", blen);
+			if (err & INFINIPATH_E_RICRC) {
+				strlcat(buf, "CRC ", blen);
+				// clear for check below, so only once
+				err &= INFINIPATH_E_RICRC;
+			}
+			if (err & INFINIPATH_E_RSHORTPKTLEN)
+				strlcat(buf, "rshortpktlen ", blen);
+			if (err & INFINIPATH_E_SDROPPEDDATAPKT)
+				strlcat(buf, "sdroppeddatapkt ", blen);
+			if (err & INFINIPATH_E_SPKTLEN)
+				strlcat(buf, "spktlen ", blen);
+		}
+		if ((err & INFINIPATH_E_RICRC) &&
+			!(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
+			strlcat(buf, "CRC ", blen);
+		if (!iserr)
+			goto done;
+	}
 	if (err & INFINIPATH_E_RHDRLEN)
 		strlcat(buf, "rhdrlen ", blen);
 	if (err & INFINIPATH_E_RBADTID)
@@ -767,12 +800,12 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
 		strlcat(buf, "rhdr ", blen);
 	if (err & INFINIPATH_E_RLONGPKTLEN)
 		strlcat(buf, "rlongpktlen ", blen);
-	if (err & INFINIPATH_E_RSHORTPKTLEN)
-		strlcat(buf, "rshortpktlen ", blen);
 	if (err & INFINIPATH_E_RMAXPKTLEN)
 		strlcat(buf, "rmaxpktlen ", blen);
 	if (err & INFINIPATH_E_RMINPKTLEN)
 		strlcat(buf, "rminpktlen ", blen);
+	if (err & INFINIPATH_E_SMINPKTLEN)
+		strlcat(buf, "sminpktlen ", blen);
 	if (err & INFINIPATH_E_RFORMATERR)
 		strlcat(buf, "rformaterr ", blen);
 	if (err & INFINIPATH_E_RUNSUPVL)
@@ -781,32 +814,20 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
 		strlcat(buf, "runexpchar ", blen);
 	if (err & INFINIPATH_E_RIBFLOW)
 		strlcat(buf, "ribflow ", blen);
-	if (err & INFINIPATH_E_REBP)
-		strlcat(buf, "EBP ", blen);
 	if (err & INFINIPATH_E_SUNDERRUN)
 		strlcat(buf, "sunderrun ", blen);
 	if (err & INFINIPATH_E_SPIOARMLAUNCH)
 		strlcat(buf, "spioarmlaunch ", blen);
 	if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
 		strlcat(buf, "sunexperrpktnum ", blen);
-	if (err & INFINIPATH_E_SDROPPEDDATAPKT)
-		strlcat(buf, "sdroppeddatapkt ", blen);
 	if (err & INFINIPATH_E_SDROPPEDSMPPKT)
 		strlcat(buf, "sdroppedsmppkt ", blen);
 	if (err & INFINIPATH_E_SMAXPKTLEN)
 		strlcat(buf, "smaxpktlen ", blen);
-	if (err & INFINIPATH_E_SMINPKTLEN)
-		strlcat(buf, "sminpktlen ", blen);
 	if (err & INFINIPATH_E_SUNSUPVL)
 		strlcat(buf, "sunsupVL ", blen);
-	if (err & INFINIPATH_E_SPKTLEN)
-		strlcat(buf, "spktlen ", blen);
 	if (err & INFINIPATH_E_INVALIDADDR)
 		strlcat(buf, "invalidaddr ", blen);
-	if (err & INFINIPATH_E_RICRC)
-		strlcat(buf, "CRC ", blen);
-	if (err & INFINIPATH_E_RVCRC)
-		strlcat(buf, "VCRC ", blen);
 	if (err & INFINIPATH_E_RRCVEGRFULL)
 		strlcat(buf, "rcvegrfull ", blen);
 	if (err & INFINIPATH_E_RRCVHDRFULL)
@@ -819,6 +840,8 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
 		strlcat(buf, "hardware ", blen);
 	if (err & INFINIPATH_E_RESET)
 		strlcat(buf, "reset ", blen);
+done:
+	return iserr;
 }
 
 /**
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 72b9e279d19..037b8e27642 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -403,10 +403,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd,
 	 * happens so often we never want to count it.
 	 */
 	if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
-		ipath_decode_err(msg, sizeof msg, dd->ipath_lasterror &
-				 ~INFINIPATH_E_IBSTATUSCHANGED);
+		int iserr;
+		iserr = ipath_decode_err(msg, sizeof msg,
+				dd->ipath_lasterror &
+				~INFINIPATH_E_IBSTATUSCHANGED);
 		if (dd->ipath_lasterror &
-		    ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
+			~(INFINIPATH_E_RRCVEGRFULL |
+			INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
 			ipath_dev_err(dd, "Suppressed %u messages for "
 				      "fast-repeating errors (%s) (%llx)\n",
 				      supp_msgs, msg,
@@ -420,8 +423,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd,
 			 * them. So only complain about these at debug
 			 * level.
 			 */
-			ipath_dbg("Suppressed %u messages for %s\n",
-				  supp_msgs, msg);
+			if (iserr)
+				ipath_dbg("Suppressed %u messages for %s\n",
+					  supp_msgs, msg);
+			else
+				ipath_cdbg(ERRPKT,
+					"Suppressed %u messages for %s\n",
+					  supp_msgs, msg);
 		}
 	}
 }
@@ -462,7 +470,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 {
 	char msg[512];
 	u64 ignore_this_time = 0;
-	int i;
+	int i, iserr = 0;
 	int chkerrpkts = 0, noprint = 0;
 	unsigned supp_msgs;
 
@@ -502,6 +510,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 	}
 
 	if (supp_msgs == 250000) {
+		int s_iserr;
 		/*
 		 * It's not entirely reasonable assuming that the errors set
 		 * in the last clear period are all responsible for the
@@ -511,17 +520,17 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 		dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
 				 ~dd->ipath_maskederrs);
-		ipath_decode_err(msg, sizeof msg,
+		s_iserr = ipath_decode_err(msg, sizeof msg,
 				 (dd->ipath_maskederrs & ~dd->
 				  ipath_ignorederrs));
 
 		if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
-		    ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
-			ipath_dev_err(dd, "Disabling error(s) %llx because "
-				      "occurring too frequently (%s)\n",
-				      (unsigned long long)
-				      (dd->ipath_maskederrs &
-				       ~dd->ipath_ignorederrs), msg);
+			~(INFINIPATH_E_RRCVEGRFULL |
+			INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
+			ipath_dev_err(dd, "Temporarily disabling "
+			    "error(s) %llx reporting; too frequent (%s)\n",
+				(unsigned long long) (dd->ipath_maskederrs &
+				~dd->ipath_ignorederrs), msg);
 		else {
 			/*
 			 * rcvegrfull and rcvhdrqfull are "normal",
@@ -530,8 +539,15 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 			 * processing them.  So only complain about
 			 * these at debug level.
 			 */
-			ipath_dbg("Disabling frequent queue full errors "
-				  "(%s)\n", msg);
+			if (s_iserr)
+				ipath_dbg("Temporarily disabling reporting "
+				    "too frequent queue full errors (%s)\n",
+				    msg);
+			else
+				ipath_cdbg(ERRPKT,
+				    "Temporarily disabling reporting too"
+				    " frequent packet errors (%s)\n",
+				    msg);
 		}
 
 		/*
@@ -589,6 +605,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 		ipath_stats.sps_crcerrs++;
 		chkerrpkts = 1;
 	}
+	iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS);
+
 
 	/*
 	 * We don't want to print these two as they happen, or we can make
@@ -677,8 +695,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 		*dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
 	}
 
-	if (!noprint && *msg)
-		ipath_dev_err(dd, "%s error\n", msg);
+	if (!noprint && *msg) {
+		if (iserr)
+			ipath_dev_err(dd, "%s error\n", msg);
+		else
+			dev_info(&dd->pcidev->dev, "%s packet problems\n",
+				msg);
+	}
 	if (dd->ipath_state_wanted & dd->ipath_flags) {
 		ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
 			   "waking\n", dd->ipath_state_wanted,
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index c8df65a4d19..a2162853f5b 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -611,7 +611,7 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t);
 extern int ipath_diag_inuse;
 
 irqreturn_t ipath_intr(int irq, void *devid);
-void ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
+int ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
 #if __IPATH_INFO || __IPATH_DBG
 extern const char *ipath_ibcstatus_str[];
 #endif
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index e0b20529da8..6e99eafdfd7 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -126,6 +126,15 @@
 #define INFINIPATH_E_RESET           0x0004000000000000ULL
 #define INFINIPATH_E_HARDWARE        0x0008000000000000ULL
 
+/*
+ * this is used to print "common" packet errors only when the
+ * __IPATH_ERRPKTDBG bit is set in ipath_debug.
+ */
+#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \
+		| INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \
+		| INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \
+		| INFINIPATH_E_REBP )
+
 /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
 /* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
  * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2:  expTID, 3: eagerTID
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index 30a825928fc..a627342a969 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -237,11 +237,13 @@ void ipath_get_faststats(unsigned long opaque)
 	if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs)
 	    && time_after(jiffies, dd->ipath_unmasktime)) {
 		char ebuf[256];
-		ipath_decode_err(ebuf, sizeof ebuf,
+		int iserr;
+		iserr = ipath_decode_err(ebuf, sizeof ebuf,
 				 (dd->ipath_maskederrs & ~dd->
 				  ipath_ignorederrs));
 		if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
-		    ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
+				~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+				INFINIPATH_E_PKTERRS ))
 			ipath_dev_err(dd, "Re-enabling masked errors "
 				      "(%s)\n", ebuf);
 		else {
@@ -252,8 +254,12 @@ void ipath_get_faststats(unsigned long opaque)
 			 * them.  So only complain about these at debug
 			 * level.
 			 */
-			ipath_dbg("Disabling frequent queue full errors "
-				  "(%s)\n", ebuf);
+			if (iserr)
+					ipath_dbg("Re-enabling queue full errors (%s)\n",
+							ebuf);
+			else
+				ipath_cdbg(ERRPKT, "Re-enabling packet"
+						" problem interrupt (%s)\n", ebuf);
 		}
 		dd->ipath_maskederrs = dd->ipath_ignorederrs;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-- 
cgit v1.2.3


From 614d49a21e96737f84b13f644ac813f8eb6d297a Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Thu, 15 Mar 2007 14:44:56 -0700
Subject: IB/ipath: Fix bad argument to clear_bit()

Code was converted from a &= ~mask to clear_bit, but the bit was left
shifted instead of being used directly, so we were either trashing
memory several pages away, or sometimes taking a kernel page fault on
an invalid page.

Signed-off-by: Dave Olson <dave.olson@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_intr.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 037b8e27642..24853310df1 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -842,11 +842,10 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
 		struct ipath_portdata *pd = dd->ipath_pd[i];
 		if (portr & (1 << i) && pd && pd->port_cnt &&
 			test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) {
-			int rcbit;
 			clear_bit(IPATH_PORT_WAITING_RCV,
 				  &pd->port_flag);
-			rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT;
-			clear_bit(1UL << rcbit, &dd->ipath_rcvctrl);
+			clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT,
+				  &dd->ipath_rcvctrl);
 			wake_up_interruptible(&pd->port_wait);
 			rcvdint = 1;
 		}
-- 
cgit v1.2.3


From 041eab9136d8325c332429df71d05ba3e0ea8ebc Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Thu, 15 Mar 2007 14:44:57 -0700
Subject: IB/ipath: Fix CQ flushing when QP is modified to error state

If a receive work request has been removed from the queue but has not
had a CQ entry generated for it and the QP is modified to the error
state, the completion entry generated is incorrect.  This patch fixes
the problem.

Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_qp.c | 2 +-
 drivers/infiniband/hw/ipath/ipath_ud.c | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 64ea807703c..f671fd07325 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -519,7 +519,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		break;
 
 	case IB_QPS_ERR:
-		ipath_error_qp(qp, IB_WC_GENERAL_ERR);
+		ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 		break;
 
 	default:
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index a20261c0b4f..57625b8c1b9 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -647,6 +647,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
 	ipath_copy_sge(&qp->r_sge, data,
 		       wc.byte_len - sizeof(struct ib_grh));
+	qp->r_wrid_valid = 0;
 	wc.wr_id = qp->r_wr_id;
 	wc.status = IB_WC_SUCCESS;
 	wc.opcode = IB_WC_RECV;
-- 
cgit v1.2.3


From 0a5a83cffc03592c2102ad07b7532b596a16f8cd Mon Sep 17 00:00:00 2001
From: Ralph Campbell <ralph.campbell@qlogic.com>
Date: Thu, 15 Mar 2007 14:44:58 -0700
Subject: IB/ipath: Fix port sharing on powerpc

The port sharing feature mixed kernel virtual addresses as well as
physical addresses for the offset used to describe the mmap address to
map the InfiniPath hardware into user space.  This had a conflict on
powerpc.  The new scheme converts it to a physical address so it
doesn't conflict with chip addresses and yet still fits in 40/44 bits
so it isn't truncated by 32-bit applications calling mmap64().

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_file_ops.c | 88 +++++++++++++++-------------
 1 file changed, 46 insertions(+), 42 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 5de1dd49722..a1cfedf8fb1 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -41,12 +41,6 @@
 #include "ipath_kernel.h"
 #include "ipath_common.h"
 
-/*
- * mmap64 doesn't allow all 64 bits for 32-bit applications
- * so only use the low 43 bits.
- */
-#define MMAP64_MASK	0x7FFFFFFFFFFUL
-
 static int ipath_open(struct inode *, struct file *);
 static int ipath_close(struct inode *, struct file *);
 static ssize_t ipath_write(struct file *, const char __user *, size_t,
@@ -63,6 +57,24 @@ static const struct file_operations ipath_file_ops = {
 	.mmap = ipath_mmap
 };
 
+/*
+ * Convert kernel virtual addresses to physical addresses so they don't
+ * potentially conflict with the chip addresses used as mmap offsets.
+ * It doesn't really matter what mmap offset we use as long as we can
+ * interpret it correctly.
+ */
+static u64 cvt_kvaddr(void *p)
+{
+	struct page *page;
+	u64 paddr = 0;
+
+	page = vmalloc_to_page(p);
+	if (page)
+		paddr = page_to_pfn(page) << PAGE_SHIFT;
+
+	return paddr;
+}
+
 static int ipath_get_base_info(struct file *fp,
 			       void __user *ubase, size_t ubase_size)
 {
@@ -173,15 +185,14 @@ static int ipath_get_base_info(struct file *fp,
 		kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt;
 		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
 			dd->ipath_palign * kinfo->spi_piocnt * slave;
-		kinfo->__spi_uregbase = ((u64) pd->subport_uregbase +
-			PAGE_SIZE * slave) & MMAP64_MASK;
+		kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase +
+			PAGE_SIZE * slave);
 
-		kinfo->spi_rcvhdr_base = ((u64) pd->subport_rcvhdr_base +
-			pd->port_rcvhdrq_size * slave) & MMAP64_MASK;
+		kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base +
+			pd->port_rcvhdrq_size * slave);
 		kinfo->spi_rcvhdr_tailaddr = 0;
-		kinfo->spi_rcv_egrbufs = ((u64) pd->subport_rcvegrbuf +
-			dd->ipath_rcvegrcnt * dd->ipath_rcvegrbufsize * slave) &
-			MMAP64_MASK;
+		kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf +
+			dd->ipath_rcvegrcnt * dd->ipath_rcvegrbufsize * slave);
 	}
 
 	kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) /
@@ -199,11 +210,11 @@ static int ipath_get_base_info(struct file *fp,
 	if (master) {
 		kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
 		kinfo->spi_subport_uregbase =
-			(u64) pd->subport_uregbase & MMAP64_MASK;
+			cvt_kvaddr(pd->subport_uregbase);
 		kinfo->spi_subport_rcvegrbuf =
-			(u64) pd->subport_rcvegrbuf & MMAP64_MASK;
+			cvt_kvaddr(pd->subport_rcvegrbuf);
 		kinfo->spi_subport_rcvhdr_base =
-			(u64) pd->subport_rcvhdr_base & MMAP64_MASK;
+			cvt_kvaddr(pd->subport_rcvhdr_base);
 		ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
 			kinfo->spi_port, kinfo->spi_runtime_flags,
 			(unsigned long long) kinfo->spi_subport_uregbase,
@@ -1131,13 +1142,11 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
 	struct ipath_devdata *dd;
 	void *addr;
 	size_t size;
-	int ret;
+	int ret = 0;
 
 	/* If the port is not shared, all addresses should be physical */
-	if (!pd->port_subport_cnt) {
-		ret = -EINVAL;
+	if (!pd->port_subport_cnt)
 		goto bail;
-	}
 
 	dd = pd->port_dd;
 	size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
@@ -1149,33 +1158,28 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
 	if (subport == 0) {
 		unsigned num_slaves = pd->port_subport_cnt - 1;
 
-		if (pgaddr == ((u64) pd->subport_uregbase & MMAP64_MASK)) {
+		if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) {
 			addr = pd->subport_uregbase;
 			size = PAGE_SIZE * num_slaves;
-		} else if (pgaddr == ((u64) pd->subport_rcvhdr_base &
-				      MMAP64_MASK)) {
+		} else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) {
 			addr = pd->subport_rcvhdr_base;
 			size = pd->port_rcvhdrq_size * num_slaves;
-		} else if (pgaddr == ((u64) pd->subport_rcvegrbuf &
-				      MMAP64_MASK)) {
+		} else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) {
 			addr = pd->subport_rcvegrbuf;
 			size *= num_slaves;
-		} else {
-			ret = -EINVAL;
+		} else
 			goto bail;
-		}
-	} else if (pgaddr == (((u64) pd->subport_uregbase +
-			       PAGE_SIZE * (subport - 1)) & MMAP64_MASK)) {
+	} else if (pgaddr == cvt_kvaddr(pd->subport_uregbase +
+					PAGE_SIZE * (subport - 1))) {
 		addr = pd->subport_uregbase + PAGE_SIZE * (subport - 1);
 		size = PAGE_SIZE;
-	} else if (pgaddr == (((u64) pd->subport_rcvhdr_base +
-			       pd->port_rcvhdrq_size * (subport - 1)) &
-			      MMAP64_MASK)) {
+	} else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base +
+				pd->port_rcvhdrq_size * (subport - 1))) {
 		addr = pd->subport_rcvhdr_base +
 			pd->port_rcvhdrq_size * (subport - 1);
 		size = pd->port_rcvhdrq_size;
-	} else if (pgaddr == (((u64) pd->subport_rcvegrbuf +
-			       size * (subport - 1)) & MMAP64_MASK)) {
+	} else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf +
+			       size * (subport - 1))) {
 		addr = pd->subport_rcvegrbuf + size * (subport - 1);
 		/* rcvegrbufs are read-only on the slave */
 		if (vma->vm_flags & VM_WRITE) {
@@ -1190,10 +1194,8 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
 		 * with mprotect.
 		 */
 		vma->vm_flags &= ~VM_MAYWRITE;
-	} else {
-		ret = -EINVAL;
+	} else
 		goto bail;
-	}
 	len = vma->vm_end - vma->vm_start;
 	if (len > size) {
 		ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size);
@@ -1204,7 +1206,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
 	vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
 	vma->vm_ops = &ipath_file_vm_ops;
 	vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
-	ret = 0;
+	ret = 1;
 
 bail:
 	return ret;
@@ -1264,8 +1266,10 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
 	 * Check for kernel virtual addresses first, anything else must
 	 * match a HW or memory address.
 	 */
-	if (pgaddr >= (1ULL<<40)) {
-		ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
+	ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
+	if (ret) {
+		if (ret > 0)
+			ret = 0;
 		goto bail;
 	}
 
@@ -1411,7 +1415,7 @@ static int init_subports(struct ipath_devdata *dd,
 	 */
 	if (uinfo->spu_subport_cnt <= 1)
 		goto bail;
-	if (uinfo->spu_subport_cnt > 4) {
+	if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) {
 		ret = -EINVAL;
 		goto bail;
 	}
-- 
cgit v1.2.3


From c7e29ff11f23ec78b3caf691789c2b791bb596bf Mon Sep 17 00:00:00 2001
From: Mark Debbage <mark.debbage@qlogic.com>
Date: Thu, 15 Mar 2007 14:44:59 -0700
Subject: IB/ipath: Allow receive ports mapped into userspace to be shared

Improve port-sharing performance by allowing any process to receive
packets from the shared hardware port under a spin lock for mutual
exclusion. Previously, one process was nominated as the master and
that process was responsible for receiving all packets from the shared
hardware port and either consuming them or forwarding them to their
destination. This led to starvation problems for other processes when
the master process was busy in computation phases.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_common.h   |  16 ++-
 drivers/infiniband/hw/ipath/ipath_file_ops.c | 171 +++++++++++++++------------
 2 files changed, 104 insertions(+), 83 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index a4136780352..048b928bb4b 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -318,11 +318,17 @@ struct ipath_base_info {
 	/* address of readonly memory copy of the rcvhdrq tail register. */
 	__u64 spi_rcvhdr_tailaddr;
 
-	/* shared memory pages for subports if IPATH_RUNTIME_MASTER is set */
+	/* shared memory pages for subports if port is shared */
 	__u64 spi_subport_uregbase;
 	__u64 spi_subport_rcvegrbuf;
 	__u64 spi_subport_rcvhdr_base;
 
+	/* shared memory page for hardware port if it is shared */
+	__u64 spi_port_uregbase;
+	__u64 spi_port_rcvegrbuf;
+	__u64 spi_port_rcvhdr_base;
+	__u64 spi_port_rcvhdr_tailaddr;
+
 } __attribute__ ((aligned(8)));
 
 
@@ -346,7 +352,7 @@ struct ipath_base_info {
  * may not be implemented; the user code must deal with this if it
  * cares, or it must abort after initialization reports the difference.
  */
-#define IPATH_USER_SWMINOR 3
+#define IPATH_USER_SWMINOR 4
 
 #define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
 
@@ -420,7 +426,7 @@ struct ipath_user_info {
 #define IPATH_CMD_TID_UPDATE	19	/* update expected TID entries */
 #define IPATH_CMD_TID_FREE	20	/* free expected TID entries */
 #define IPATH_CMD_SET_PART_KEY	21	/* add partition key */
-#define IPATH_CMD_SLAVE_INFO	22	/* return info on slave processes */
+#define __IPATH_CMD_SLAVE_INFO	22	/* return info on slave processes (for old user code) */
 #define IPATH_CMD_ASSIGN_PORT	23	/* allocate HCA and port */
 #define IPATH_CMD_USER_INIT 	24	/* set up userspace */
 
@@ -432,7 +438,7 @@ struct ipath_port_info {
 	__u16 port;		/* port on unit assigned to caller */
 	__u16 subport;		/* subport on unit assigned to caller */
 	__u16 num_ports;	/* number of ports available on unit */
-	__u16 num_subports;	/* number of subport slaves opened on port */
+	__u16 num_subports;	/* number of subports opened on port */
 };
 
 struct ipath_tid_info {
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index a1cfedf8fb1..bb53bde80ee 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -99,7 +99,7 @@ static int ipath_get_base_info(struct file *fp,
 	sz = sizeof(*kinfo);
 	/* If port sharing is not requested, allow the old size structure */
 	if (!shared)
-		sz -= 3 * sizeof(u64);
+		sz -= 7 * sizeof(u64);
 	if (ubase_size < sz) {
 		ipath_cdbg(PROC,
 			   "Base size %zu, need %zu (version mismatch?)\n",
@@ -177,22 +177,41 @@ static int ipath_get_base_info(struct file *fp,
 		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
 			dd->ipath_palign *
 			(dd->ipath_pbufsport - kinfo->spi_piocnt);
-		kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
-			dd->ipath_palign * pd->port_port;
 	} else {
 		unsigned slave = subport_fp(fp) - 1;
 
 		kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt;
 		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
 			dd->ipath_palign * kinfo->spi_piocnt * slave;
+	}
+	if (shared) {
+		kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
+			dd->ipath_palign * pd->port_port;
+		kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs;
+		kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base;
+		kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr;
+
 		kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase +
-			PAGE_SIZE * slave);
+			PAGE_SIZE * subport_fp(fp));
 
 		kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base +
-			pd->port_rcvhdrq_size * slave);
+			pd->port_rcvhdrq_size * subport_fp(fp));
 		kinfo->spi_rcvhdr_tailaddr = 0;
 		kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf +
-			dd->ipath_rcvegrcnt * dd->ipath_rcvegrbufsize * slave);
+			pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size *
+			subport_fp(fp));
+
+		kinfo->spi_subport_uregbase =
+			cvt_kvaddr(pd->subport_uregbase);
+		kinfo->spi_subport_rcvegrbuf =
+			cvt_kvaddr(pd->subport_rcvegrbuf);
+		kinfo->spi_subport_rcvhdr_base =
+			cvt_kvaddr(pd->subport_rcvhdr_base);
+		ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
+			kinfo->spi_port, kinfo->spi_runtime_flags,
+			(unsigned long long) kinfo->spi_subport_uregbase,
+			(unsigned long long) kinfo->spi_subport_rcvegrbuf,
+			(unsigned long long) kinfo->spi_subport_rcvhdr_base);
 	}
 
 	kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) /
@@ -209,20 +228,10 @@ static int ipath_get_base_info(struct file *fp,
 
 	if (master) {
 		kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
-		kinfo->spi_subport_uregbase =
-			cvt_kvaddr(pd->subport_uregbase);
-		kinfo->spi_subport_rcvegrbuf =
-			cvt_kvaddr(pd->subport_rcvegrbuf);
-		kinfo->spi_subport_rcvhdr_base =
-			cvt_kvaddr(pd->subport_rcvhdr_base);
-		ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
-			kinfo->spi_port, kinfo->spi_runtime_flags,
-			(unsigned long long) kinfo->spi_subport_uregbase,
-			(unsigned long long) kinfo->spi_subport_rcvegrbuf,
-			(unsigned long long) kinfo->spi_subport_rcvhdr_base);
 	}
 
-	if (copy_to_user(ubase, kinfo, sizeof(*kinfo)))
+	sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo);
+	if (copy_to_user(ubase, kinfo, sz))
 		ret = -EFAULT;
 
 bail:
@@ -1152,50 +1161,47 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
 	size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
 
 	/*
-	 * Master has all the slave uregbase, rcvhdrq, and
-	 * rcvegrbufs mmapped.
+	 * Each process has all the subport uregbase, rcvhdrq, and
+	 * rcvegrbufs mmapped - as an array for all the processes,
+	 * and also separately for this process.
 	 */
-	if (subport == 0) {
-		unsigned num_slaves = pd->port_subport_cnt - 1;
-
-		if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) {
-			addr = pd->subport_uregbase;
-			size = PAGE_SIZE * num_slaves;
-		} else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) {
-			addr = pd->subport_rcvhdr_base;
-			size = pd->port_rcvhdrq_size * num_slaves;
-		} else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) {
-			addr = pd->subport_rcvegrbuf;
-			size *= num_slaves;
-		} else
-			goto bail;
-	} else if (pgaddr == cvt_kvaddr(pd->subport_uregbase +
-					PAGE_SIZE * (subport - 1))) {
-		addr = pd->subport_uregbase + PAGE_SIZE * (subport - 1);
-		size = PAGE_SIZE;
-	} else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base +
-				pd->port_rcvhdrq_size * (subport - 1))) {
-		addr = pd->subport_rcvhdr_base +
-			pd->port_rcvhdrq_size * (subport - 1);
-		size = pd->port_rcvhdrq_size;
-	} else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf +
-			       size * (subport - 1))) {
-		addr = pd->subport_rcvegrbuf + size * (subport - 1);
-		/* rcvegrbufs are read-only on the slave */
-		if (vma->vm_flags & VM_WRITE) {
-			dev_info(&dd->pcidev->dev,
-				 "Can't map eager buffers as "
-				 "writable (flags=%lx)\n", vma->vm_flags);
-			ret = -EPERM;
-			goto bail;
-		}
-		/*
-		 * Don't allow permission to later change to writeable
-		 * with mprotect.
-		 */
-		vma->vm_flags &= ~VM_MAYWRITE;
-	} else
+	if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) {
+		addr = pd->subport_uregbase;
+		size = PAGE_SIZE * pd->port_subport_cnt;
+	} else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) {
+		addr = pd->subport_rcvhdr_base;
+		size = pd->port_rcvhdrq_size * pd->port_subport_cnt;
+	} else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) {
+		addr = pd->subport_rcvegrbuf;
+		size *= pd->port_subport_cnt;
+        } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase +
+                                        PAGE_SIZE * subport)) {
+                addr = pd->subport_uregbase + PAGE_SIZE * subport;
+                size = PAGE_SIZE;
+        } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base +
+                                pd->port_rcvhdrq_size * subport)) {
+                addr = pd->subport_rcvhdr_base +
+                        pd->port_rcvhdrq_size * subport;
+                size = pd->port_rcvhdrq_size;
+        } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf +
+                               size * subport)) {
+                addr = pd->subport_rcvegrbuf + size * subport;
+                /* rcvegrbufs are read-only on the slave */
+                if (vma->vm_flags & VM_WRITE) {
+                        dev_info(&dd->pcidev->dev,
+                                 "Can't map eager buffers as "
+                                 "writable (flags=%lx)\n", vma->vm_flags);
+                        ret = -EPERM;
+                        goto bail;
+                }
+                /*
+                 * Don't allow permission to later change to writeable
+                 * with mprotect.
+                 */
+                vma->vm_flags &= ~VM_MAYWRITE;
+	} else {
 		goto bail;
+	}
 	len = vma->vm_end - vma->vm_start;
 	if (len > size) {
 		ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size);
@@ -1273,14 +1279,13 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
 		goto bail;
 	}
 
+	ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
 	if (!pd->port_subport_cnt) {
 		/* port is not shared */
-		ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
 		piocnt = dd->ipath_pbufsport;
 		piobufs = pd->port_piobufs;
 	} else if (!subport_fp(fp)) {
 		/* caller is the master */
-		ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
 		piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) +
 			 (dd->ipath_pbufsport % pd->port_subport_cnt);
 		piobufs = pd->port_piobufs +
@@ -1289,7 +1294,6 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
 		unsigned slave = subport_fp(fp) - 1;
 
 		/* caller is a slave */
-		ureg = 0;
 		piocnt = dd->ipath_pbufsport / pd->port_subport_cnt;
 		piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
 	}
@@ -1303,9 +1307,6 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
 		ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
 			      	     (void *) dd->ipath_pioavailregs_dma,
 				     "pioavail registers");
-	else if (subport_fp(fp))
-		/* Subports don't mmap the physical receive buffers */
-		ret = -EINVAL;
 	else if (pgaddr == pd->port_rcvegr_phys)
 		ret = mmap_rcvegrbufs(vma, pd);
 	else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
@@ -1403,32 +1404,41 @@ static int init_subports(struct ipath_devdata *dd,
 			 const struct ipath_user_info *uinfo)
 {
 	int ret = 0;
-	unsigned num_slaves;
+	unsigned num_subports;
 	size_t size;
 
-	/* Old user binaries don't know about subports */
-	if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)
-		goto bail;
 	/*
 	 * If the user is requesting zero or one port,
 	 * skip the subport allocation.
 	 */
 	if (uinfo->spu_subport_cnt <= 1)
 		goto bail;
+
+	/* Old user binaries don't know about new subport implementation */
+	if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) {
+		dev_info(&dd->pcidev->dev,
+			 "Mismatched user minor version (%d) and driver "
+                         "minor version (%d) while port sharing. Ensure "
+                         "that driver and library are from the same "
+                         "release.\n",
+                         (int) (uinfo->spu_userversion & 0xffff),
+	                 IPATH_USER_SWMINOR);
+		goto bail;
+	}
 	if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) {
 		ret = -EINVAL;
 		goto bail;
 	}
 
-	num_slaves = uinfo->spu_subport_cnt - 1;
-	pd->subport_uregbase = vmalloc(PAGE_SIZE * num_slaves);
+	num_subports = uinfo->spu_subport_cnt;
+	pd->subport_uregbase = vmalloc(PAGE_SIZE * num_subports);
 	if (!pd->subport_uregbase) {
 		ret = -ENOMEM;
 		goto bail;
 	}
 	/* Note: pd->port_rcvhdrq_size isn't initialized yet. */
 	size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
-		     sizeof(u32), PAGE_SIZE) * num_slaves;
+		     sizeof(u32), PAGE_SIZE) * num_subports;
 	pd->subport_rcvhdr_base = vmalloc(size);
 	if (!pd->subport_rcvhdr_base) {
 		ret = -ENOMEM;
@@ -1437,7 +1447,7 @@ static int init_subports(struct ipath_devdata *dd,
 
 	pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks *
 					pd->port_rcvegrbuf_size *
-					num_slaves);
+					num_subports);
 	if (!pd->subport_rcvegrbuf) {
 		ret = -ENOMEM;
 		goto bail_rhdr;
@@ -1447,6 +1457,11 @@ static int init_subports(struct ipath_devdata *dd,
 	pd->port_subport_id = uinfo->spu_subport_id;
 	pd->active_slaves = 1;
 	set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
+	memset(pd->subport_uregbase, 0, PAGE_SIZE * num_subports);
+	memset(pd->subport_rcvhdr_base, 0, size);
+	memset(pd->subport_rcvegrbuf, 0, pd->port_rcvegrbuf_chunks *
+				         pd->port_rcvegrbuf_size *
+				         num_subports);
 	goto bail;
 
 bail_rhdr:
@@ -1588,7 +1603,7 @@ static int find_best_unit(struct file *fp,
 		if (curcpu != -1) {
 			if (npresent) {
 				prefunit = curcpu / (ncpus / npresent);
-				ipath_dbg("%s[%u] %d chips, %d cpus, "
+				ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, "
 					  "%d cpus/chip, select unit %d\n",
 					  current->comm, current->pid,
 					  npresent, ncpus, ncpus / npresent,
@@ -2086,7 +2101,7 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
 		dest = &cmd.cmd.part_key;
 		src = &ucmd->cmd.part_key;
 		break;
-	case IPATH_CMD_SLAVE_INFO:
+	case __IPATH_CMD_SLAVE_INFO:
 		copy = sizeof(cmd.cmd.slave_mask_addr);
 		dest = &cmd.cmd.slave_mask_addr;
 		src = &ucmd->cmd.slave_mask_addr;
@@ -2152,7 +2167,7 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
 	case IPATH_CMD_SET_PART_KEY:
 		ret = ipath_set_part_key(pd, cmd.cmd.part_key);
 		break;
-	case IPATH_CMD_SLAVE_INFO:
+	case __IPATH_CMD_SLAVE_INFO:
 		ret = ipath_get_slave_info(pd,
 					   (void __user *) (unsigned long)
 					   cmd.cmd.slave_mask_addr);
-- 
cgit v1.2.3


From dd5190b6be0f3e27b6a4933a6a6d2d59957fc748 Mon Sep 17 00:00:00 2001
From: Ralph Campbell <ralph.campbell@qlogic.com>
Date: Thu, 15 Mar 2007 14:45:00 -0700
Subject: IB/ipath: Fix RDMA reads of length zero and error handling

Fix RDMA read response length checking for RDMA_READ_RESPONSE_ONLY to
allow a zero length response.  RDMA read responses which don't match
the expected length or occur in response to some other operation
should generate a completion queue error (see table 56, ch. 9.9.2.3 in
the IB spec).

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_rc.c | 63 ++++++++++++++++++++++++----------
 1 file changed, 44 insertions(+), 19 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index d6aa14afa26..b4b88d0b53f 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -1136,7 +1136,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 			goto ack_done;
 		hdrsize += 4;
 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-			goto ack_done;
+			goto ack_op_err;
 		/*
 		 * If this is a response to a resent RDMA read, we
 		 * have to be careful to copy the data to the right
@@ -1154,12 +1154,12 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 			goto ack_done;
 		}
 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-			goto ack_done;
+			goto ack_op_err;
 	read_middle:
 		if (unlikely(tlen != (hdrsize + pmtu + 4)))
-			goto ack_done;
+			goto ack_len_err;
 		if (unlikely(pmtu >= qp->s_rdma_read_len))
-			goto ack_done;
+			goto ack_len_err;
 
 		/* We got a response so update the timeout. */
 		spin_lock(&dev->pending_lock);
@@ -1184,12 +1184,20 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 			goto ack_done;
 		}
 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-			goto ack_done;
+			goto ack_op_err;
+		/* Get the number of bytes the message was padded by. */
+		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+		/*
+		 * Check that the data size is >= 0 && <= pmtu.
+		 * Remember to account for the AETH header (4) and
+		 * ICRC (4).
+		 */
+		if (unlikely(tlen < (hdrsize + pad + 8)))
+			goto ack_len_err;
 		/*
 		 * If this is a response to a resent RDMA read, we
 		 * have to be careful to copy the data to the right
 		 * location.
-		 * XXX should check PSN and wqe opcode first.
 		 */
 		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
 						  wqe, psn, pmtu);
@@ -1203,26 +1211,20 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 			goto ack_done;
 		}
 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-			goto ack_done;
-	read_last:
-		/*
-		 * Get the number of bytes the message was padded by.
-		 */
+			goto ack_op_err;
+		/* Get the number of bytes the message was padded by. */
 		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
 		/*
 		 * Check that the data size is >= 1 && <= pmtu.
 		 * Remember to account for the AETH header (4) and
 		 * ICRC (4).
 		 */
-		if (unlikely(tlen <= (hdrsize + pad + 8))) {
-			/* XXX Need to generate an error CQ entry. */
-			goto ack_done;
-		}
+		if (unlikely(tlen <= (hdrsize + pad + 8)))
+			goto ack_len_err;
+	read_last:
 		tlen -= hdrsize + pad + 8;
-		if (unlikely(tlen != qp->s_rdma_read_len)) {
-			/* XXX Need to generate an error CQ entry. */
-			goto ack_done;
-		}
+		if (unlikely(tlen != qp->s_rdma_read_len))
+			goto ack_len_err;
 		if (!header_in_data)
 			aeth = be32_to_cpu(ohdr->u.aeth);
 		else {
@@ -1236,6 +1238,29 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 
 ack_done:
 	spin_unlock_irqrestore(&qp->s_lock, flags);
+	goto bail;
+
+ack_op_err:
+	wc.status = IB_WC_LOC_QP_OP_ERR;
+	goto ack_err;
+
+ack_len_err:
+	wc.status = IB_WC_LOC_LEN_ERR;
+ack_err:
+	wc.wr_id = wqe->wr.wr_id;
+	wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
+	wc.vendor_err = 0;
+	wc.byte_len = 0;
+	wc.imm_data = 0;
+	wc.qp = &qp->ibqp;
+	wc.src_qp = qp->remote_qpn;
+	wc.wc_flags = 0;
+	wc.pkey_index = 0;
+	wc.slid = qp->remote_ah_attr.dlid;
+	wc.sl = qp->remote_ah_attr.sl;
+	wc.dlid_path_bits = 0;
+	wc.port_num = 0;
+	ipath_sqerror_qp(qp, &wc);
 bail:
 	return;
 }
-- 
cgit v1.2.3


From c8c6f5d496fe794cbb52fe5a08c2bd839eecaa07 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Thu, 15 Mar 2007 14:45:01 -0700
Subject: IB/ipath: Remove unused ipath_read_kreg64_port()

Signed-off-by: Dave Olson <dave.olson@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_driver.c  | 23 -----------------------
 drivers/infiniband/hw/ipath/ipath_iba6110.c |  4 ++--
 drivers/infiniband/hw/ipath/ipath_iba6120.c |  4 ++--
 drivers/infiniband/hw/ipath/ipath_kernel.h  |  2 --
 4 files changed, 4 insertions(+), 29 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index cf40cf2d1fb..291d4ea69a4 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -1804,29 +1804,6 @@ int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
 	return 0;
 }
 
-/**
- * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register
- * @dd: the infinipath device
- * @regno: the register number to read
- * @port: the port containing the register
- *
- * Registers that vary with the chip implementation constants (port)
- * use this routine.
- */
-u64 ipath_read_kreg64_port(const struct ipath_devdata *dd, ipath_kreg regno,
-			   unsigned port)
-{
-	u16 where;
-
-	if (port < dd->ipath_portcnt &&
-	    (regno == dd->ipath_kregs->kr_rcvhdraddr ||
-	     regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
-		where = regno + port;
-	else
-		where = -1;
-
-	return ipath_read_kreg64(dd, where);
-}
 
 /**
  * ipath_write_kreg_port - write a device's per-port 64-bit kernel register
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 99348254502..b50436c5663 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -208,8 +208,8 @@ static const struct ipath_kregs ipath_ht_kregs = {
 	.kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
 	.kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
 	/*
-	 * These should not be used directly via ipath_read_kreg64(),
-	 * use them with ipath_read_kreg64_port(),
+	 * These should not be used directly via ipath_write_kreg64(),
+	 * use them with ipath_write_kreg64_port(),
 	 */
 	.kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
 	.kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0)
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index 05918e1e7c3..5c50383880f 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -207,8 +207,8 @@ static const struct ipath_kregs ipath_pe_kregs = {
 	.kr_ibpllcfg = IPATH_KREG_OFFSET(IBPLLCfg),
 
 	/*
-	 * These should not be used directly via ipath_read_kreg64(),
-	 * use them with ipath_read_kreg64_port()
+	 * These should not be used directly via ipath_write_kreg64(),
+	 * use them with ipath_write_kreg64_port(),
 	 */
 	.kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
 	.kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index a2162853f5b..5428c2619ba 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -756,8 +756,6 @@ int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int);
 /* these are used for the registers that vary with port */
 void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg,
 			   unsigned, u64);
-u64 ipath_read_kreg64_port(const struct ipath_devdata *, ipath_kreg,
-			   unsigned);
 
 /*
  * We could have a single register get/put routine, that takes a group type,
-- 
cgit v1.2.3


From 0ed3c594e3878274787810422760dc7c51e0ee72 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Thu, 15 Mar 2007 14:45:02 -0700
Subject: IB/ipath: Fix calculation for number of kernel PIO buffers

If the module parameter "kpiobufs" is set too high, the calculation to
reset it to a sane value was incorrect.

Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_init_chip.c | 37 ++++++++++++---------------
 1 file changed, 16 insertions(+), 21 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index d4f6b5239ef..1e77b55afe9 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -668,6 +668,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 {
 	int ret = 0, i;
 	u32 val32, kpiobufs;
+	u32 piobufs, uports;
 	u64 val;
 	struct ipath_portdata *pd = NULL; /* keep gcc4 happy */
 	gfp_t gfp_flags = GFP_USER | __GFP_COMP;
@@ -702,16 +703,17 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 	 * the in memory DMA'ed copies of the registers.  This has to
 	 * be done early, before we calculate lastport, etc.
 	 */
-	val = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
+	piobufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
 	/*
 	 * calc number of pioavail registers, and save it; we have 2
 	 * bits per buffer.
 	 */
-	dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2)
+	dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
 		/ (sizeof(u64) * BITS_PER_BYTE / 2);
+	uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
 	if (ipath_kpiobufs == 0) {
 		/* not set by user (this is default) */
-		if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128)
+		if (piobufs >= (uports * IPATH_MIN_USER_PORT_BUFCNT) + 32)
 			kpiobufs = 32;
 		else
 			kpiobufs = 16;
@@ -719,31 +721,25 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 	else
 		kpiobufs = ipath_kpiobufs;
 
-	if (kpiobufs >
-	    (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
-	     (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT))) {
-		i = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
-			(dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT);
+	if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) {
+		i = (int) piobufs -
+			(int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
 		if (i < 0)
 			i = 0;
-		dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs for "
-			 "kernel leaves too few for %d user ports "
+		dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
+			 "%d for kernel leaves too few for %d user ports "
 			 "(%d each); using %u\n", kpiobufs,
-			 dd->ipath_cfgports - 1,
-			 IPATH_MIN_USER_PORT_BUFCNT, i);
+			 piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
 		/*
 		 * shouldn't change ipath_kpiobufs, because could be
 		 * different for different devices...
 		 */
 		kpiobufs = i;
 	}
-	dd->ipath_lastport_piobuf =
-		dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - kpiobufs;
-	dd->ipath_pbufsport = dd->ipath_cfgports > 1
-		? dd->ipath_lastport_piobuf / (dd->ipath_cfgports - 1)
-		: 0;
-	val32 = dd->ipath_lastport_piobuf -
-		(dd->ipath_pbufsport * (dd->ipath_cfgports - 1));
+	dd->ipath_lastport_piobuf = piobufs - kpiobufs;
+	dd->ipath_pbufsport =
+		uports ? dd->ipath_lastport_piobuf / uports : 0;
+	val32 = dd->ipath_lastport_piobuf - (dd->ipath_pbufsport * uports);
 	if (val32 > 0) {
 		ipath_dbg("allocating %u pbufs/port leaves %u unused, "
 			  "add to kernel\n", dd->ipath_pbufsport, val32);
@@ -754,8 +750,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 	dd->ipath_lastpioindex = dd->ipath_lastport_piobuf;
 	ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
 		   "each for %u user ports\n", kpiobufs,
-		   dd->ipath_piobcnt2k + dd->ipath_piobcnt4k,
-		   dd->ipath_pbufsport, dd->ipath_cfgports - 1);
+		   piobufs, dd->ipath_pbufsport, uports);
 
 	dd->ipath_f_early_init(dd);
 
-- 
cgit v1.2.3


From 5a7d4eea9185c20275307fcd1077d6f9dfdab48a Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Thu, 15 Mar 2007 14:45:03 -0700
Subject: IB/ipath: Discard multicast packets without a GRH

This patch fixes a bug where multicast packets without a GRH were not
being dropped as per the IB spec.

Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_verbs.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 9bec5a9b455..f5604b87650 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -438,6 +438,10 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
 		struct ipath_mcast *mcast;
 		struct ipath_mcast_qp *p;
 
+		if (lnh != IPATH_LRH_GRH) {
+			dev->n_pkt_drops++;
+			goto bail;
+		}
 		mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
 		if (mcast == NULL) {
 			dev->n_pkt_drops++;
@@ -445,8 +449,7 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
 		}
 		dev->n_multicast_rcv++;
 		list_for_each_entry_rcu(p, &mcast->qp_list, list)
-			ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
-				     tlen, p->qp);
+			ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
 		/*
 		 * Notify ipath_multicast_detach() if it is waiting for us
 		 * to finish.
-- 
cgit v1.2.3


From 7b196e2ff3953063b656212ff517f6115a1477b2 Mon Sep 17 00:00:00 2001
From: Arthur Jones <arthur.jones@qlogic.com>
Date: Thu, 15 Mar 2007 14:45:04 -0700
Subject: IB/ipath: Call free_irq() on chip specific initialization failure

In initialization, if we bailed at chip specific initialization, we
forgot to clean up the irq we had requested.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_driver.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 291d4ea69a4..ab7458e904c 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -486,7 +486,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 
 	ret = ipath_init_chip(dd, 0);	/* do the chip-specific init */
 	if (ret)
-		goto bail_iounmap;
+		goto bail_irqsetup;
 
 	ret = ipath_enable_wc(dd);
 
@@ -505,6 +505,9 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 
 	goto bail;
 
+bail_irqsetup:
+	if (pdev->irq) free_irq(pdev->irq, dd);
+
 bail_iounmap:
 	iounmap((volatile void __iomem *) dd->ipath_kregbase);
 
-- 
cgit v1.2.3


From 569b87b47f906d65ee35d6ecc4767f20a6390b9b Mon Sep 17 00:00:00 2001
From: Arthur Jones <arthur.jones@qlogic.com>
Date: Thu, 15 Mar 2007 14:45:05 -0700
Subject: IB/ipath: Force PIOAvail update entry point

Due to a chip bug, the PIOAvail register is not always updated to
memory.  This patch allows userspace to force an update.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_common.h   |  7 +++--
 drivers/infiniband/hw/ipath/ipath_file_ops.c | 38 ++++++++++++++++++++++------
 2 files changed, 35 insertions(+), 10 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 048b928bb4b..10c008f22ba 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -352,7 +352,7 @@ struct ipath_base_info {
  * may not be implemented; the user code must deal with this if it
  * cares, or it must abort after initialization reports the difference.
  */
-#define IPATH_USER_SWMINOR 4
+#define IPATH_USER_SWMINOR 5
 
 #define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
 
@@ -429,8 +429,11 @@ struct ipath_user_info {
 #define __IPATH_CMD_SLAVE_INFO	22	/* return info on slave processes (for old user code) */
 #define IPATH_CMD_ASSIGN_PORT	23	/* allocate HCA and port */
 #define IPATH_CMD_USER_INIT 	24	/* set up userspace */
+#define IPATH_CMD_UNUSED_1	25
+#define IPATH_CMD_UNUSED_2	26
+#define IPATH_CMD_PIOAVAILUPD	27	/* force an update of PIOAvail reg */
 
-#define IPATH_CMD_MAX		24
+#define IPATH_CMD_MAX		27
 
 struct ipath_port_info {
 	__u32 num_active;	/* number of active units */
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index bb53bde80ee..9ca582b65fe 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -2047,6 +2047,17 @@ static int ipath_get_slave_info(struct ipath_portdata *pd,
 	return ret;
 }
 
+static int ipath_force_pio_avail_update(struct ipath_devdata *dd)
+{
+	u64 reg = dd->ipath_sendctrl;
+
+	clear_bit(IPATH_S_PIOBUFAVAILUPD, &reg);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, reg);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
+
+	return 0;
+}
+
 static ssize_t ipath_write(struct file *fp, const char __user *data,
 			   size_t count, loff_t *off)
 {
@@ -2106,22 +2117,30 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
 		dest = &cmd.cmd.slave_mask_addr;
 		src = &ucmd->cmd.slave_mask_addr;
 		break;
+	case IPATH_CMD_PIOAVAILUPD:	// force an update of PIOAvail reg
+		copy = 0;
+		src = NULL;
+		dest = NULL;
+		break;
 	default:
 		ret = -EINVAL;
 		goto bail;
 	}
 
-	if ((count - consumed) < copy) {
-		ret = -EINVAL;
-		goto bail;
-	}
+	if (copy) {
+		if ((count - consumed) < copy) {
+			ret = -EINVAL;
+			goto bail;
+		}
 
-	if (copy_from_user(dest, src, copy)) {
-		ret = -EFAULT;
-		goto bail;
+		if (copy_from_user(dest, src, copy)) {
+			ret = -EFAULT;
+			goto bail;
+		}
+
+		consumed += copy;
 	}
 
-	consumed += copy;
 	pd = port_fp(fp);
 	if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
 		cmd.type != IPATH_CMD_ASSIGN_PORT) {
@@ -2172,6 +2191,9 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
 					   (void __user *) (unsigned long)
 					   cmd.cmd.slave_mask_addr);
 		break;
+	case IPATH_CMD_PIOAVAILUPD:
+		ret = ipath_force_pio_avail_update(pd->port_dd);
+		break;
 	}
 
 	if (ret >= 0)
-- 
cgit v1.2.3


From 820054b7ca7a54ba94d89db4b3c53a24d2d66633 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Date: Thu, 15 Mar 2007 14:45:06 -0700
Subject: IB/ipath: Print better error messages if kernel is misconfigured

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_driver.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index ab7458e904c..056e1066328 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -390,15 +390,23 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 
 	/* setup the chip-specific functions, as early as possible. */
 	switch (ent->device) {
-#ifdef CONFIG_HT_IRQ
 	case PCI_DEVICE_ID_INFINIPATH_HT:
+#ifdef CONFIG_HT_IRQ
 		ipath_init_iba6110_funcs(dd);
 		break;
+#else
+		ipath_dev_err(dd, "QLogic HT device 0x%x cannot work if "
+			      "CONFIG_HT_IRQ is not enabled\n", ent->device);
+		return -ENODEV;
 #endif
-#ifdef CONFIG_PCI_MSI
 	case PCI_DEVICE_ID_INFINIPATH_PE800:
+#ifdef CONFIG_PCI_MSI
 		ipath_init_iba6120_funcs(dd);
 		break;
+#else
+		ipath_dev_err(dd, "QLogic PCIE device 0x%x cannot work if "
+			      "CONFIG_PCI_MSI is not enabled\n", ent->device);
+		return -ENODEV;
 #endif
 	default:
 		ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
-- 
cgit v1.2.3


From 9783ab405844202b452ac673677e6c8f8c9a6a99 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Thu, 15 Mar 2007 14:45:07 -0700
Subject: IB/ipath: Improve handling and reporting of parity errors

Mostly cleanup.

Signed-off-by: Dave Olson <dave.olson@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_driver.c    |   3 +-
 drivers/infiniband/hw/ipath/ipath_eeprom.c    |   4 +
 drivers/infiniband/hw/ipath/ipath_iba6110.c   | 138 ++++++++++++++++----------
 drivers/infiniband/hw/ipath/ipath_iba6120.c   |  58 +++++++----
 drivers/infiniband/hw/ipath/ipath_init_chip.c |   4 +
 drivers/infiniband/hw/ipath/ipath_intr.c      |  38 ++++++-
 drivers/infiniband/hw/ipath/ipath_kernel.h    |   4 +-
 drivers/infiniband/hw/ipath/ipath_registers.h |   7 --
 8 files changed, 170 insertions(+), 86 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 056e1066328..13b9785e684 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -605,8 +605,9 @@ static void __devexit cleanup_device(struct ipath_devdata *dd)
 
 		ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
 			   dd->ipath_pageshadow);
-		vfree(dd->ipath_pageshadow);
+		tmpp = dd->ipath_pageshadow;
 		dd->ipath_pageshadow = NULL;
+		vfree(tmpp);
 	}
 
 	/*
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
index a4019a6b756..030185f90ee 100644
--- a/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -626,6 +626,10 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
 	} else
 		memcpy(dd->ipath_serial, ifp->if_serial,
 		       sizeof ifp->if_serial);
+	if (!strstr(ifp->if_comment, "Tested successfully"))
+		ipath_dev_err(dd, "Board SN %s did not pass functional "
+			"test: %s\n", dd->ipath_serial,
+			ifp->if_comment);
 
 	ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
 		   (unsigned long long) be64_to_cpu(dd->ipath_guid));
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index b50436c5663..8e0794d316f 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -284,6 +284,14 @@ static const struct ipath_cregs ipath_ht_cregs = {
 #define INFINIPATH_EXTS_MEMBIST_ENDTEST     0x0000000000004000
 #define INFINIPATH_EXTS_MEMBIST_CORRECT     0x0000000000008000
 
+
+/* TID entries (memory), HT-only */
+#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL	/* 40 bits valid */
+#define INFINIPATH_RT_VALID 0x8000000000000000ULL
+#define INFINIPATH_RT_ADDR_SHIFT 0
+#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL
+#define INFINIPATH_RT_BUFSIZE_SHIFT 48
+
 /*
  * masks and bits that are different in different chips, or present only
  * in one
@@ -402,6 +410,14 @@ static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
 	INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
 };
 
+#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
+		        INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
+		        << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
+#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
+			  << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
+
+static int ipath_ht_txe_recover(struct ipath_devdata *);
+
 /**
  * ipath_ht_handle_hwerrors - display hardware errors.
  * @dd: the infinipath device
@@ -450,13 +466,12 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 
 	/*
 	 * make sure we get this much out, unless told to be quiet,
+	 * it's a parity error we may recover from,
 	 * or it's occurred within the last 5 seconds
 	 */
-	if ((hwerrs & ~(dd->ipath_lasthwerror |
-			((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
-			  INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
-			<< INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
-	    (ipath_debug & __IPATH_VERBDBG))
+	if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY |
+		RXE_EAGER_PARITY)) ||
+		(ipath_debug & __IPATH_VERBDBG))
 		dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
 			 "(cleared)\n", (unsigned long long) hwerrs);
 	dd->ipath_lasthwerror |= hwerrs;
@@ -467,7 +482,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 			      (hwerrs & ~dd->ipath_hwe_bitsextant));
 
 	ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
-	if (ctrl & INFINIPATH_C_FREEZEMODE) {
+	if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
 		/*
 		 * parity errors in send memory are recoverable,
 		 * just cancel the send (if indicated in * sendbuffererror),
@@ -476,50 +491,14 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 		 * occur if a processor speculative read is done to the PIO
 		 * buffer while we are sending a packet, for example.
 		 */
-		if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
-			       INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
-			      << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
-			ipath_stats.sps_txeparity++;
-			ipath_dbg("Recovering from TXE parity error (%llu), "
-			    	  "hwerrstatus=%llx\n",
-				  (unsigned long long) ipath_stats.sps_txeparity,
-				  (unsigned long long) hwerrs);
-			ipath_disarm_senderrbufs(dd);
-			hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
-				     INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
-				    << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
-			if (!hwerrs) { /* else leave in freeze mode */
-				ipath_write_kreg(dd,
-						 dd->ipath_kregs->kr_control,
-						 dd->ipath_control);
-				return;
-			}
-		}
-		if (hwerrs) {
-			/*
-			 * if any set that we aren't ignoring; only
-			 * make the complaint once, in case it's stuck
-			 * or recurring, and we get here multiple
-			 * times.
-			 */
-			if (dd->ipath_flags & IPATH_INITTED) {
-				ipath_dev_err(dd, "Fatal Hardware Error (freeze "
-					      "mode), no longer usable, SN %.16s\n",
-						  dd->ipath_serial);
-				isfatal = 1;
-			}
-			*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-			/* mark as having had error */
-			*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
-			/*
-			 * mark as not usable, at a minimum until driver
-			 * is reloaded, probably until reboot, since no
-			 * other reset is possible.
-			 */
-			dd->ipath_flags &= ~IPATH_INITTED;
-		} else {
-			ipath_dbg("Clearing freezemode on ignored hardware "
-				  "error\n");
+		if ((hwerrs & TXE_PIO_PARITY) && ipath_ht_txe_recover(dd))
+			hwerrs &= ~TXE_PIO_PARITY;
+		if (hwerrs & RXE_EAGER_PARITY)
+			ipath_dev_err(dd, "RXE parity, Eager TID error is not "
+				"recoverable\n");
+		if (!hwerrs) {
+			ipath_dbg("Clearing freezemode on ignored or "
+				  "recovered hardware error\n");
 			ctrl &= ~INFINIPATH_C_FREEZEMODE;
 			ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
 					 ctrl);
@@ -587,7 +566,32 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 				 dd->ipath_hwerrmask);
 	}
 
-	ipath_dev_err(dd, "%s hardware error\n", msg);
+	if (hwerrs) {
+		/*
+		 * if any set that we aren't ignoring; only
+		 * make the complaint once, in case it's stuck
+		 * or recurring, and we get here multiple
+		 * times.
+		 */
+		ipath_dev_err(dd, "%s hardware error\n", msg);
+		if (dd->ipath_flags & IPATH_INITTED) {
+			ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+					  "mode), no longer usable, SN %.16s\n",
+					  dd->ipath_serial);
+			isfatal = 1;
+		}
+		*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
+		/* mark as having had error */
+		*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
+		/*
+		 * mark as not usable, at a minimum until driver
+		 * is reloaded, probably until reboot, since no
+		 * other reset is possible.
+		 */
+		dd->ipath_flags &= ~IPATH_INITTED;
+	}
+	else
+		*msg = 0; /* recovered from all of them */
 	if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
 		/*
 		 * for status file; if no trailing brace is copied,
@@ -658,7 +662,8 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
 	if (n)
 		snprintf(name, namelen, "%s", n);
 
-	if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) {
+	if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
+		dd->ipath_minrev > 3)) {
 		/*
 		 * This version of the driver only supports Rev 3.2 and 3.3
 		 */
@@ -1163,6 +1168,8 @@ static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
 
 	if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
 		ipath_dev_err(dd, "MemBIST did not complete!\n");
+	if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT)
+		ipath_dbg("MemBIST corrected\n");
 
 	ipath_check_htlink(dd);
 
@@ -1366,6 +1373,9 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
 			     u64 __iomem *tidptr, u32 type,
 			     unsigned long pa)
 {
+	if (!dd->ipath_kregbase)
+		return;
+
 	if (pa != dd->ipath_tidinvalid) {
 		if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) {
 			dev_info(&dd->pcidev->dev,
@@ -1382,10 +1392,10 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
 			pa |= lenvalid | INFINIPATH_RT_VALID;
 		}
 	}
-	if (dd->ipath_kregbase)
-		writeq(pa, tidptr);
+	writeq(pa, tidptr);
 }
 
+
 /**
  * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager
  * @dd: the infinipath device
@@ -1515,7 +1525,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
 			 INFINIPATH_S_ABORT);
 
 	ipath_get_eeprom_info(dd);
-	if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
+	if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
 		dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') {
 		/*
 		 * Later production QHT7040 has same changes as QHT7140, so
@@ -1528,6 +1538,24 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
 	return 0;
 }
 
+
+static int ipath_ht_txe_recover(struct ipath_devdata *dd)
+{
+	int cnt = ++ipath_stats.sps_txeparity;
+	if (cnt >= IPATH_MAX_PARITY_ATTEMPTS)  {
+		if (cnt == IPATH_MAX_PARITY_ATTEMPTS)
+			ipath_dev_err(dd,
+				"Too many attempts to recover from "
+				"TXE parity, giving up\n");
+		return 0;
+	}
+	dev_info(&dd->pcidev->dev,
+		"Recovering from TXE PIO parity error\n");
+	ipath_disarm_senderrbufs(dd, 1);
+	return 1;
+}
+
+
 /**
  * ipath_init_ht_get_base_info - set chip-specific flags for user code
  * @dd: the infinipath device
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index 5c50383880f..aa2b5194433 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -321,6 +321,12 @@ static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
 	INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
 };
 
+#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
+		        INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
+		        << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
+
+static int ipath_pe_txe_recover(struct ipath_devdata *);
+
 /**
  * ipath_pe_handle_hwerrors - display hardware errors.
  * @dd: the infinipath device
@@ -394,25 +400,8 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 		 * occur if a processor speculative read is done to the PIO
 		 * buffer while we are sending a packet, for example.
 		 */
-		if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
-			       INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
-			      << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
-			ipath_stats.sps_txeparity++;
-			ipath_dbg("Recovering from TXE parity error (%llu), "
-			    	  "hwerrstatus=%llx\n",
-				  (unsigned long long) ipath_stats.sps_txeparity,
-				  (unsigned long long) hwerrs);
-			ipath_disarm_senderrbufs(dd);
-			hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
-				     INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
-				    << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
-			if (!hwerrs) { /* else leave in freeze mode */
-				ipath_write_kreg(dd,
-						 dd->ipath_kregs->kr_control,
-						 dd->ipath_control);
-			    return;
-			}
-		}
+		if ((hwerrs & TXE_PIO_PARITY) && ipath_pe_txe_recover(dd))
+			hwerrs &= ~TXE_PIO_PARITY;
 		if (hwerrs) {
 			/*
 			 * if any set that we aren't ignoring only make the
@@ -581,6 +570,8 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
 
 	if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
 		ipath_dev_err(dd, "MemBIST did not complete!\n");
+	if (extsval & INFINIPATH_EXTS_MEMBIST_FOUND)
+		ipath_dbg("MemBIST corrected\n");
 
 	val = ~0ULL;	/* barring bugs, all hwerrors become interrupts, */
 
@@ -1330,6 +1321,35 @@ static void ipath_pe_free_irq(struct ipath_devdata *dd)
 	dd->ipath_irq = 0;
 }
 
+/*
+ * On platforms using this chip, and not having ordered WC stores, we
+ * can get TXE parity errors due to speculative reads to the PIO buffers,
+ * and this, due to a chip bug can result in (many) false parity error
+ * reports.  So it's a debug print on those, and an info print on systems
+ * where the speculative reads don't occur.
+ * Because we can get lots of false errors, we have no upper limit
+ * on recovery attempts on those platforms.
+ */
+static int ipath_pe_txe_recover(struct ipath_devdata *dd)
+{
+	if (ipath_unordered_wc())
+		ipath_dbg("Recovering from TXE PIO parity error\n");
+	else {
+		int cnt = ++ipath_stats.sps_txeparity;
+		if (cnt >= IPATH_MAX_PARITY_ATTEMPTS)  {
+			if (cnt == IPATH_MAX_PARITY_ATTEMPTS)
+				ipath_dev_err(dd,
+					"Too many attempts to recover from "
+					"TXE parity, giving up\n");
+			return 0;
+		}
+		dev_info(&dd->pcidev->dev,
+			"Recovering from TXE PIO parity error\n");
+	}
+	ipath_disarm_senderrbufs(dd, 1);
+	return 1;
+}
+
 /**
  * ipath_init_iba6120_funcs - set up the chip-specific function pointers
  * @dd: the infinipath device
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 1e77b55afe9..72caa9f091f 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -590,6 +590,10 @@ static int init_housekeeping(struct ipath_devdata *dd,
 		goto done;
 	}
 
+
+	/* clear diagctrl register, in case diags were running and crashed */
+	ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0);
+
 	/* clear the initial reset flag, in case first driver load */
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
 			 INFINIPATH_E_RESET);
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 24853310df1..45d033169c6 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -37,11 +37,40 @@
 #include "ipath_verbs.h"
 #include "ipath_common.h"
 
+/*
+ * clear (write) a pio buffer, to clear a parity error.   This routine
+ * should only be called when in freeze mode, and the buffer should be
+ * canceled afterwards.
+ */
+static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum)
+{
+	u32 __iomem *pbuf;
+	u32 dwcnt; /* dword count to write */
+	if (pnum < dd->ipath_piobcnt2k) {
+		pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum *
+			dd->ipath_palign);
+		dwcnt = dd->ipath_piosize2k >> 2;
+	}
+	else {
+		pbuf = (u32 __iomem *) (dd->ipath_pio4kbase +
+			(pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
+		dwcnt = dd->ipath_piosize4k >> 2;
+	}
+	dev_info(&dd->pcidev->dev,
+		"Rewrite PIO buffer %u, to recover from parity error\n",
+		pnum);
+	*pbuf = dwcnt+1; /* no flush required, since already in freeze */
+	while(--dwcnt)
+		*pbuf++ = 0;
+}
+
 /*
  * Called when we might have an error that is specific to a particular
  * PIO buffer, and may need to cancel that buffer, so it can be re-used.
+ * If rewrite is true, and bits are set in the sendbufferror registers,
+ * we'll write to the buffer, for error recovery on parity errors.
  */
-void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
+void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
 {
 	u32 piobcnt;
 	unsigned long sbuf[4];
@@ -74,8 +103,11 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
 		}
 
 		for (i = 0; i < piobcnt; i++)
-			if (test_bit(i, sbuf))
+			if (test_bit(i, sbuf)) {
+				if (rewrite)
+					ipath_clrpiobuf(dd, i);
 				ipath_disarm_piobufs(dd, i, 1);
+			}
 		dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */
 	}
 }
@@ -114,7 +146,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
 {
 	u64 ignore_this_time = 0;
 
-	ipath_disarm_senderrbufs(dd);
+	ipath_disarm_senderrbufs(dd, 0);
 	if ((errs & E_SUM_LINK_PKTERRS) &&
 	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
 		/*
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 5428c2619ba..e900c2593f4 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -590,7 +590,6 @@ int ipath_enable_wc(struct ipath_devdata *dd);
 void ipath_disable_wc(struct ipath_devdata *dd);
 int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
 void ipath_shutdown_device(struct ipath_devdata *);
-void ipath_disarm_senderrbufs(struct ipath_devdata *);
 
 struct file_operations;
 int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
@@ -713,6 +712,7 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *);
 void ipath_init_iba6110_funcs(struct ipath_devdata *);
 void ipath_get_eeprom_info(struct ipath_devdata *);
 u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
+void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
 
 /*
  * number of words used for protocol header if not set by ipath_userinit();
@@ -897,6 +897,8 @@ dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
 
 extern unsigned ipath_debug; /* debugging bit mask */
 
+#define IPATH_MAX_PARITY_ATTEMPTS 10000 /* max times to try recovery */
+
 const char *ipath_get_unit_name(int unit);
 
 extern struct mutex ipath_mutex;
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index 6e99eafdfd7..c182bcd6209 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -308,13 +308,6 @@
 #define INFINIPATH_XGXS_RX_POL_SHIFT 19
 #define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
 
-#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL	/* 40 bits valid */
-
-/* TID entries (memory), HT-only */
-#define INFINIPATH_RT_VALID 0x8000000000000000ULL
-#define INFINIPATH_RT_ADDR_SHIFT 0
-#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF
-#define INFINIPATH_RT_BUFSIZE_SHIFT 48
 
 /*
  * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our
-- 
cgit v1.2.3


From 27b044a815df7d4530bc68560796680ed588070c Mon Sep 17 00:00:00 2001
From: Michael Albaugh <Michael.Albaugh@QLogic.com>
Date: Thu, 15 Mar 2007 14:45:08 -0700
Subject: IB/ipath: Fix driver crash (in interrupt or during unload) after chip
 reset

Re-init of the kernel structures after a chip reset was leaving the
portdata structure for port zero in an inconsistent state, and a
pointer to it either stale (in re-init code) or NULL (in devdata)
Fixing the order of operations on this struct, and the condition for
interrupt access, prevents the crashes.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_init_chip.c | 45 ++++++++++++++++++++-------
 drivers/infiniband/hw/ipath/ipath_stats.c     |  2 +-
 2 files changed, 35 insertions(+), 12 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 72caa9f091f..7045ba68949 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -216,6 +216,20 @@ static int bringup_link(struct ipath_devdata *dd)
 	return ret;
 }
 
+static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd)
+{
+	struct ipath_portdata *pd = NULL;
+
+	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+	if (pd) {
+		pd->port_dd = dd;
+		pd->port_cnt = 1;
+		/* The port 0 pkey table is used by the layer interface. */
+		pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
+	}
+	return pd;
+}
+
 static int init_chip_first(struct ipath_devdata *dd,
 			   struct ipath_portdata **pdp)
 {
@@ -271,20 +285,16 @@ static int init_chip_first(struct ipath_devdata *dd,
 		goto done;
 	}
 
-	dd->ipath_pd[0] = kzalloc(sizeof(*pd), GFP_KERNEL);
+	pd = create_portdata0(dd);
 
-	if (!dd->ipath_pd[0]) {
+	if (!pd) {
 		ipath_dev_err(dd, "Unable to allocate portdata for port "
 			      "0, failing\n");
 		ret = -ENOMEM;
 		goto done;
 	}
-	pd = dd->ipath_pd[0];
-	pd->port_dd = dd;
-	pd->port_port = 0;
-	pd->port_cnt = 1;
-	/* The port 0 pkey table is used by the layer interface. */
-	pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
+	dd->ipath_pd[0] = pd;
+
 	dd->ipath_rcvtidcnt =
 		ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
 	dd->ipath_rcvtidbase =
@@ -838,11 +848,24 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 	 * Set up the port 0 (kernel) rcvhdr q and egr TIDs.  If doing
 	 * re-init, the simplest way to handle this is to free
 	 * existing, and re-allocate.
+	 * Need to re-create rest of port 0 portdata as well.
 	 */
 	if (reinit) {
-		struct ipath_portdata *pd = dd->ipath_pd[0];
-		dd->ipath_pd[0] = NULL;
-		ipath_free_pddata(dd, pd);
+		/* Alloc and init new ipath_portdata for port0,
+		 * Then free old pd. Could lead to fragmentation, but also
+		 * makes later support for hot-swap easier.
+		 */
+		struct ipath_portdata *npd;
+		npd = create_portdata0(dd);
+		if (npd) {
+			ipath_free_pddata(dd, pd);
+			dd->ipath_pd[0] = pd = npd;
+		} else {
+			ipath_dev_err(dd, "Unable to allocate portdata for"
+				      "  port 0, failing\n");
+			ret = -ENOMEM;
+			goto done;
+		}
 	}
 	dd->ipath_f_tidtemplate(dd);
 	ret = ipath_create_rcvhdrq(dd, pd);
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index a627342a969..9307f7187ca 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -207,7 +207,7 @@ void ipath_get_faststats(unsigned long opaque)
 	 * don't access the chip while running diags, or memory diags can
 	 * fail
 	 */
-	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT) ||
+	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) ||
 	    ipath_diag_inuse)
 		/* but re-arm the timer, for diags case; won't hurt other */
 		goto done;
-- 
cgit v1.2.3


From f5408ac7ccec0a7edd2b6add0da82735375a37a0 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Thu, 15 Mar 2007 14:45:09 -0700
Subject: IB/ipath: On unrecoverable errors, force link down, LEDs off

If the chip is no longer usable, LEDs should be turned off so system
can be found easily in the cluster.

Also some minor reorganizing so both chips print hardware error
message at same point and only if there were unrecovered errors

Signed-off-by: Dave Olson <dave.olson@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_iba6110.c | 12 +++++++++++-
 drivers/infiniband/hw/ipath/ipath_iba6120.c | 11 ++++++++++-
 2 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 8e0794d316f..4171198fc20 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -43,6 +43,9 @@
 #include "ipath_kernel.h"
 #include "ipath_registers.h"
 
+static void ipath_setup_ht_setextled(struct ipath_devdata *, u64, u64);
+
+
 /*
  * This lists the InfiniPath registers, in the actual chip layout.
  * This structure should never be directly accessed.
@@ -572,9 +575,14 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 		 * make the complaint once, in case it's stuck
 		 * or recurring, and we get here multiple
 		 * times.
+		 * force link down, so switch knows, and
+		 * LEDs are turned off
 		 */
-		ipath_dev_err(dd, "%s hardware error\n", msg);
 		if (dd->ipath_flags & IPATH_INITTED) {
+			ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
+			ipath_setup_ht_setextled(dd,
+				INFINIPATH_IBCS_L_STATE_DOWN,
+				INFINIPATH_IBCS_LT_STATE_DISABLED);
 			ipath_dev_err(dd, "Fatal Hardware Error (freeze "
 					  "mode), no longer usable, SN %.16s\n",
 					  dd->ipath_serial);
@@ -592,6 +600,8 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 	}
 	else
 		*msg = 0; /* recovered from all of them */
+	if (*msg)
+		ipath_dev_err(dd, "%s hardware error\n", msg);
 	if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
 		/*
 		 * for status file; if no trailing brace is copied,
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index aa2b5194433..1b9c3085775 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -43,6 +43,8 @@
 #include "ipath_kernel.h"
 #include "ipath_registers.h"
 
+static void ipath_setup_pe_setextled(struct ipath_devdata *, u64, u64);
+
 /*
  * This file contains all the chip-specific register information and
  * access functions for the QLogic InfiniPath PCI-Express chip.
@@ -407,8 +409,14 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 			 * if any set that we aren't ignoring only make the
 			 * complaint once, in case it's stuck or recurring,
 			 * and we get here multiple times
+			 * Force link down, so switch knows, and
+			 * LEDs are turned off
 			 */
 			if (dd->ipath_flags & IPATH_INITTED) {
+				ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
+				ipath_setup_pe_setextled(dd,
+					INFINIPATH_IBCS_L_STATE_DOWN,
+					INFINIPATH_IBCS_LT_STATE_DISABLED);
 				ipath_dev_err(dd, "Fatal Hardware Error (freeze "
 					      "mode), no longer usable, SN %.16s\n",
 						  dd->ipath_serial);
@@ -482,7 +490,8 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 				 dd->ipath_hwerrmask);
 	}
 
-	ipath_dev_err(dd, "%s hardware error\n", msg);
+	if (*msg)
+		ipath_dev_err(dd, "%s hardware error\n", msg);
 	if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) {
 		/*
 		 * for /sys status file ; if no trailing } is copied, we'll
-- 
cgit v1.2.3


From 490462c2686df6e35c21d1efe935e0b4a3bddb39 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Thu, 15 Mar 2007 14:45:10 -0700
Subject: IB/ipath: Prevent random program use of diags interface

To prevent random utility reads and writes of the diag interface to the
chip, we first require a handshake of reading from offset 0 and writing
to offset 0 before any other reads or writes can be done through the
diags device.   Otherwise chip errors can be triggered.

Signed-off-by: Dave Olson <dave.olson@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_diag.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 0f13a2182cc..63e8368b0e9 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -296,7 +296,7 @@ static int ipath_diag_open(struct inode *in, struct file *fp)
 	}
 
 	fp->private_data = dd;
-	ipath_diag_inuse = 1;
+	ipath_diag_inuse = -2;
 	diag_set_link = 0;
 	ret = 0;
 
@@ -461,6 +461,8 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data,
 	else if ((count % 4) || (*off % 4))
 		/* address or length is not 32-bit aligned, hence invalid */
 		ret = -EINVAL;
+	else if (ipath_diag_inuse < 1 && (*off || count != 8))
+		ret = -EINVAL;  /* prevent cat /dev/ipath_diag* */
 	else if ((count % 8) || (*off % 8))
 		/* address or length not 64-bit aligned; do 32-bit reads */
 		ret = ipath_read_umem32(dd, data, kreg_base + *off, count);
@@ -470,6 +472,8 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data,
 	if (ret >= 0) {
 		*off += count;
 		ret = count;
+		if (ipath_diag_inuse == -2)
+			ipath_diag_inuse++;
 	}
 
 	return ret;
@@ -489,6 +493,9 @@ static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
 	else if ((count % 4) || (*off % 4))
 		/* address or length is not 32-bit aligned, hence invalid */
 		ret = -EINVAL;
+	else if ((ipath_diag_inuse == -1 && (*off || count != 8)) ||
+		 ipath_diag_inuse == -2)  /* read qw off 0, write qw off 0 */
+		ret = -EINVAL;  /* before any other write allowed */
 	else if ((count % 8) || (*off % 8))
 		/* address or length not 64-bit aligned; do 32-bit writes */
 		ret = ipath_write_umem32(dd, kreg_base + *off, data, count);
@@ -498,6 +505,8 @@ static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
 	if (ret >= 0) {
 		*off += count;
 		ret = count;
+		if (ipath_diag_inuse == -1)
+			ipath_diag_inuse = 1; /* all read/write OK now */
 	}
 
 	return ret;
-- 
cgit v1.2.3


From 53c1d2c943a67fb129ed2797182305a4633531fb Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Thu, 15 Mar 2007 14:45:11 -0700
Subject: IB/ipath: Disable IB link earlier in shutdown sequence

Move the code that shuts down the IB link earlier in the unload
process, to be sure no new packets can arrive while we are unloading.

Signed-off-by: Dave Olson <dave.olson@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_driver.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 13b9785e684..e3a22320971 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -536,8 +536,6 @@ static void __devexit cleanup_device(struct ipath_devdata *dd)
 {
 	int port;
 
-	ipath_shutdown_device(dd);
-
 	if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
 		/* can't do anything more with chip; needs re-init */
 		*dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
@@ -634,6 +632,12 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev)
 
 	ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
 
+	/*
+	 * disable the IB link early, to be sure no new packets arrive, which
+	 * complicates the shutdown process
+	 */
+	ipath_shutdown_device(dd);
+
 	if (dd->verbs_dev)
 		ipath_unregister_ib_device(dd->verbs_dev);
 
-- 
cgit v1.2.3


From 662af5813be9aadf95ca310b7b6d1d37070c9922 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Thu, 15 Mar 2007 14:45:12 -0700
Subject: IB/ipath: Don't allow QPs 0 and 1 to be opened multiple times

Signed-off-by: Robert Walsh <robert.walsh@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_qp.c | 91 ++++++++++++++++++++--------------
 1 file changed, 53 insertions(+), 38 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index f671fd07325..16db9ac0b40 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -81,11 +81,51 @@ static u32 credit_table[31] = {
 	32768			/* 1E */
 };
 
-static u32 alloc_qpn(struct ipath_qp_table *qpt)
+
+static void get_map_page(struct ipath_qp_table *qpt, struct qpn_map *map)
+{
+	unsigned long page = get_zeroed_page(GFP_KERNEL);
+	unsigned long flags;
+
+	/*
+	 * Free the page if someone raced with us installing it.
+	 */
+
+	spin_lock_irqsave(&qpt->lock, flags);
+	if (map->page)
+		free_page(page);
+	else
+		map->page = (void *)page;
+	spin_unlock_irqrestore(&qpt->lock, flags);
+}
+
+
+static int alloc_qpn(struct ipath_qp_table *qpt, enum ib_qp_type type)
 {
 	u32 i, offset, max_scan, qpn;
 	struct qpn_map *map;
-	u32 ret;
+	u32 ret = -1;
+
+	if (type == IB_QPT_SMI)
+		ret = 0;
+	else if (type == IB_QPT_GSI)
+		ret = 1;
+
+	if (ret != -1) {
+		map = &qpt->map[0];
+		if (unlikely(!map->page)) {
+			get_map_page(qpt, map);
+			if (unlikely(!map->page)) {
+				ret = -ENOMEM;
+				goto bail;
+			}
+		}
+		if (!test_and_set_bit(ret, map->page))
+			atomic_dec(&map->n_free);
+		else
+			ret = -EBUSY;
+		goto bail;
+	}
 
 	qpn = qpt->last + 1;
 	if (qpn >= QPN_MAX)
@@ -95,19 +135,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt)
 	max_scan = qpt->nmaps - !offset;
 	for (i = 0;;) {
 		if (unlikely(!map->page)) {
-			unsigned long page = get_zeroed_page(GFP_KERNEL);
-			unsigned long flags;
-
-			/*
-			 * Free the page if someone raced with us
-			 * installing it:
-			 */
-			spin_lock_irqsave(&qpt->lock, flags);
-			if (map->page)
-				free_page(page);
-			else
-				map->page = (void *)page;
-			spin_unlock_irqrestore(&qpt->lock, flags);
+			get_map_page(qpt, map);
 			if (unlikely(!map->page))
 				break;
 		}
@@ -151,7 +179,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt)
 		qpn = mk_qpn(qpt, map, offset);
 	}
 
-	ret = 0;
+	ret = -ENOMEM;
 
 bail:
 	return ret;
@@ -180,29 +208,19 @@ static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp,
 			   enum ib_qp_type type)
 {
 	unsigned long flags;
-	u32 qpn;
 	int ret;
 
-	if (type == IB_QPT_SMI)
-		qpn = 0;
-	else if (type == IB_QPT_GSI)
-		qpn = 1;
-	else {
-		/* Allocate the next available QPN */
-		qpn = alloc_qpn(qpt);
-		if (qpn == 0) {
-			ret = -ENOMEM;
-			goto bail;
-		}
-	}
-	qp->ibqp.qp_num = qpn;
+	ret = alloc_qpn(qpt, type);
+	if (ret < 0)
+		goto bail;
+	qp->ibqp.qp_num = ret;
 
 	/* Add the QP to the hash table. */
 	spin_lock_irqsave(&qpt->lock, flags);
 
-	qpn %= qpt->max;
-	qp->next = qpt->table[qpn];
-	qpt->table[qpn] = qp;
+	ret %= qpt->max;
+	qp->next = qpt->table[ret];
+	qpt->table[ret] = qp;
 	atomic_inc(&qp->refcount);
 
 	spin_unlock_irqrestore(&qpt->lock, flags);
@@ -245,9 +263,7 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
 	if (!fnd)
 		return;
 
-	/* If QPN is not reserved, mark QPN free in the bitmap. */
-	if (qp->ibqp.qp_num > 1)
-		free_qpn(qpt, qp->ibqp.qp_num);
+	free_qpn(qpt, qp->ibqp.qp_num);
 
 	wait_event(qp->wait, !atomic_read(&qp->refcount));
 }
@@ -270,8 +286,7 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt)
 
 		while (qp) {
 			nqp = qp->next;
-			if (qp->ibqp.qp_num > 1)
-				free_qpn(qpt, qp->ibqp.qp_num);
+			free_qpn(qpt, qp->ibqp.qp_num);
 			if (!atomic_dec_and_test(&qp->refcount) ||
 			    !ipath_destroy_qp(&qp->ibqp))
 				ipath_dbg("QP memory leak!\n");
-- 
cgit v1.2.3


From f0810daf74c564a3615eba5002cc11c21a0949ba Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Thu, 15 Mar 2007 14:45:13 -0700
Subject: IB/ipath: Fix unit selection when all CPU affinity bits set

At some point things changed so that all the affinity bits can be set,
but cpus_full() macro is not true.  This caused problems with the unit
selection logic on multi-unit (board) configurations.

Signed-off-by: Dave Olson <dave.olson@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_file_ops.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 9ca582b65fe..1272aaf2a78 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1592,15 +1592,16 @@ static int find_best_unit(struct file *fp,
 	 */
 	if (!cpus_empty(current->cpus_allowed) &&
 	    !cpus_full(current->cpus_allowed)) {
-		int ncpus = num_online_cpus(), curcpu = -1;
+		int ncpus = num_online_cpus(), curcpu = -1, nset = 0;
 		for (i = 0; i < ncpus; i++)
 			if (cpu_isset(i, current->cpus_allowed)) {
 				ipath_cdbg(PROC, "%s[%u] affinity set for "
-					   "cpu %d\n", current->comm,
-					   current->pid, i);
+					   "cpu %d/%d\n", current->comm,
+					   current->pid, i, ncpus);
 				curcpu = i;
+				nset++;
 			}
-		if (curcpu != -1) {
+		if (curcpu != -1 && nset != ncpus) {
 			if (npresent) {
 				prefunit = curcpu / (ncpus / npresent);
 				ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, "
-- 
cgit v1.2.3


From 253fb3902008353831525ab711909abdd5ee191f Mon Sep 17 00:00:00 2001
From: Robert Walsh <robert.walsh@qlogic.com>
Date: Thu, 15 Mar 2007 14:45:14 -0700
Subject: IB/ipath: Check reserved memory keys

Don't let userspace use the direct-physical-map L_key or R_key.

Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_keys.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index c93fa2f7719..dd487c100f5 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -133,6 +133,12 @@ int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
 	 * being reversible by calling bus_to_virt().
 	 */
 	if (sge->lkey == 0) {
+		struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
+
+		if (pd->user) {
+			ret = 0;
+			goto bail;
+		}
 		isge->mr = NULL;
 		isge->vaddr = (void *) sge->addr;
 		isge->length = sge->length;
@@ -206,6 +212,12 @@ int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
 	 * (see ipath_get_dma_mr and ipath_dma.c).
 	 */
 	if (rkey == 0) {
+		struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
+
+		if (pd->user) {
+			ret = 0;
+			goto bail;
+		}
 		sge->mr = NULL;
 		sge->vaddr = (void *) vaddr;
 		sge->length = len;
-- 
cgit v1.2.3


From 0d6172a4284b21e4762e8638a4d693ef52f63bfe Mon Sep 17 00:00:00 2001
From: Robert Walsh <robert.walsh@qlogic.com>
Date: Thu, 15 Mar 2007 14:45:15 -0700
Subject: IB/ipath: Remove duplicate stuff from ipath_verbs.h

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_verbs.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index b0b29d97d56..37e742b2a52 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -731,8 +731,6 @@ int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
 
 int ipath_destroy_srq(struct ib_srq *ibsrq);
 
-void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
-
 int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
 
 struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
-- 
cgit v1.2.3


From 6ce73b07db7aa05d4a30716d6a99c832b6d9db4a Mon Sep 17 00:00:00 2001
From: Robert Walsh <robert.walsh@qlogic.com>
Date: Thu, 15 Mar 2007 14:45:16 -0700
Subject: IB/ipath: Check that a UD work request's address handle is valid

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_ud.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 57625b8c1b9..a518f7c8fa8 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -308,6 +308,11 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
 		goto bail;
 	}
 
+	if (wr->wr.ud.ah->pd != qp->ibqp.pd) {
+		ret = -EPERM;
+		goto bail;
+	}
+
 	/* IB spec says that num_sge == 0 is OK. */
 	if (wr->num_sge > qp->s_max_sge) {
 		ret = -EINVAL;
-- 
cgit v1.2.3


From 40b90430ecac40cc9adb26b808cc12a3d569da5d Mon Sep 17 00:00:00 2001
From: Robert Walsh <robert.walsh@qlogic.com>
Date: Thu, 15 Mar 2007 14:45:17 -0700
Subject: IB/ipath: Fix WC format drift between user and kernel space

The kernel ib_wc structure now uses a QP pointer, but the user space
equivalent uses a QP number instead.  This means we can no longer use
a simple structure copy to copy stuff into user space.

Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ipath/ipath_cq.c    | 38 +++++++++++++++++++++++++++++--
 drivers/infiniband/hw/ipath/ipath_verbs.h |  3 ++-
 2 files changed, 38 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index 87462e0cb4d..ea78e6dddc9 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -76,7 +76,20 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
 		}
 		return;
 	}
-	wc->queue[head] = *entry;
+	wc->queue[head].wr_id = entry->wr_id;
+	wc->queue[head].status = entry->status;
+	wc->queue[head].opcode = entry->opcode;
+	wc->queue[head].vendor_err = entry->vendor_err;
+	wc->queue[head].byte_len = entry->byte_len;
+	wc->queue[head].imm_data = (__u32 __force)entry->imm_data;
+	wc->queue[head].qp_num = entry->qp->qp_num;
+	wc->queue[head].src_qp = entry->src_qp;
+	wc->queue[head].wc_flags = entry->wc_flags;
+	wc->queue[head].pkey_index = entry->pkey_index;
+	wc->queue[head].slid = entry->slid;
+	wc->queue[head].sl = entry->sl;
+	wc->queue[head].dlid_path_bits = entry->dlid_path_bits;
+	wc->queue[head].port_num = entry->port_num;
 	wc->head = next;
 
 	if (cq->notify == IB_CQ_NEXT_COMP ||
@@ -122,9 +135,30 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
 	if (tail > (u32) cq->ibcq.cqe)
 		tail = (u32) cq->ibcq.cqe;
 	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
+		struct ipath_qp *qp;
+
 		if (tail == wc->head)
 			break;
-		*entry = wc->queue[tail];
+
+		qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table,
+				      wc->queue[tail].qp_num);
+		entry->qp = &qp->ibqp;
+		if (atomic_dec_and_test(&qp->refcount))
+			wake_up(&qp->wait);
+
+		entry->wr_id = wc->queue[tail].wr_id;
+		entry->status = wc->queue[tail].status;
+		entry->opcode = wc->queue[tail].opcode;
+		entry->vendor_err = wc->queue[tail].vendor_err;
+		entry->byte_len = wc->queue[tail].byte_len;
+		entry->imm_data = wc->queue[tail].imm_data;
+		entry->src_qp = wc->queue[tail].src_qp;
+		entry->wc_flags = wc->queue[tail].wc_flags;
+		entry->pkey_index = wc->queue[tail].pkey_index;
+		entry->slid = wc->queue[tail].slid;
+		entry->sl = wc->queue[tail].sl;
+		entry->dlid_path_bits = wc->queue[tail].dlid_path_bits;
+		entry->port_num = wc->queue[tail].port_num;
 		if (tail >= cq->ibcq.cqe)
 			tail = 0;
 		else
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 37e742b2a52..7c4929f1cb5 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -40,6 +40,7 @@
 #include <linux/interrupt.h>
 #include <linux/kref.h>
 #include <rdma/ib_pack.h>
+#include <rdma/ib_user_verbs.h>
 
 #include "ipath_layer.h"
 
@@ -188,7 +189,7 @@ struct ipath_mmap_info {
 struct ipath_cq_wc {
 	u32 head;		/* index of next entry to fill */
 	u32 tail;		/* index of next ib_poll_cq() entry */
-	struct ib_wc queue[1];	/* this is actually size ibcq.cqe + 1 */
+	struct ib_uverbs_wc queue[1]; /* this is actually size ibcq.cqe + 1 */
 };
 
 /*
-- 
cgit v1.2.3


From 3f114853d4f7c1746389f26e1d500887294da8fd Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Wed, 18 Apr 2007 20:21:02 -0700
Subject: IB/mthca: Update HCA firmware revisions

Update the driver's list of current firmware versions with Mellanox's
latest releases.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mthca/mthca_main.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 0d9b7d06bbc..773145e2994 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -1013,14 +1013,14 @@ static struct {
 	u64 latest_fw;
 	u32 flags;
 } mthca_hca_table[] = {
-	[TAVOR]        = { .latest_fw = MTHCA_FW_VER(3, 4, 0),
+	[TAVOR]        = { .latest_fw = MTHCA_FW_VER(3, 5, 0),
 			   .flags     = 0 },
-	[ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 600),
+	[ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 8, 200),
 			   .flags     = MTHCA_FLAG_PCIE },
-	[ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 400),
+	[ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 2, 0),
 			   .flags     = MTHCA_FLAG_MEMFREE |
 					MTHCA_FLAG_PCIE },
-	[SINAI]        = { .latest_fw = MTHCA_FW_VER(1, 1, 0),
+	[SINAI]        = { .latest_fw = MTHCA_FW_VER(1, 2, 0),
 			   .flags     = MTHCA_FLAG_MEMFREE |
 					MTHCA_FLAG_PCIE    |
 					MTHCA_FLAG_SINAI_OPT }
@@ -1135,7 +1135,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
 		goto err_cmd;
 
 	if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) {
-		mthca_warn(mdev, "HCA FW version %d.%d.%d is old (%d.%d.%d is current).\n",
+		mthca_warn(mdev, "HCA FW version %d.%d.%3d is old (%d.%d.%3d is current).\n",
 			   (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff,
 			   (int) (mdev->fw_ver & 0xffff),
 			   (int) (mthca_hca_table[hca_type].latest_fw >> 32),
-- 
cgit v1.2.3


From 532c3b581725e2c6480a20c845fff920690286f1 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Tue, 24 Apr 2007 16:31:04 -0700
Subject: IB/mthca: Fix mthca_write_mtt() on HCAs with hidden memory

Commit b2875d4c ("IB/mthca: Always fill MTTs from CPU") causes a crash
in mthca_write_mtt() with non-memfree HCAs that have their memory
hidden (that is, have only two PCI BARs instead of having a third BAR
that allows access to the RAM attached to the HCA) on 64-bit
architectures.  This is because the commit just before, c20e20ab
("IB/mthca: Merge MR and FMR space on 64-bit systems") makes
dev->mr_table.fmr_mtt_buddy equal to &dev->mr_table.mtt_buddy and
hence mthca_write_mtt() tries to write directly into the HCA's MTT
table.  However, since that table is in the HCA's memory, this is
impossible without the PCI BAR that gives access to that memory.

This causes a crash because mthca_tavor_write_mtt_seg() basically
tries to dereference some offset of a NULL pointer.  Fix this by
adding a test of MTHCA_FLAG_FMR in mthca_write_mtt() so that we always
use the WRITE_MTT firmware command rather than writing directly if
FMRs are not enabled.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mthca/mthca_mr.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index ee561c569d5..aa6c70a6a36 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -297,7 +297,8 @@ out:
 
 int mthca_write_mtt_size(struct mthca_dev *dev)
 {
-	if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy)
+	if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
+	    !(dev->mthca_flags & MTHCA_FLAG_FMR))
 		/*
 		 * Be friendly to WRITE_MTT command
 		 * and leave two empty slots for the
@@ -355,7 +356,8 @@ int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
 	int size = mthca_write_mtt_size(dev);
 	int chunk;
 
-	if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy)
+	if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
+	    !(dev->mthca_flags & MTHCA_FLAG_FMR))
 		return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len);
 
 	while (list_len > 0) {
-- 
cgit v1.2.3


From 30c00986f3a610cdcee2602b8254c3ffa6cddc04 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Tue, 24 Apr 2007 16:31:11 -0700
Subject: IB/mthca: Simplify CQ cleaning in mthca_free_qp()

mthca_free_qp() already has local variables to hold the QP's send_cq
and recv_cq, so we can slightly clean up the calls to mthca_cq_clean()
by using those local variables instead of expressions like
to_mcq(qp->ibqp.send_cq).

Also, by cleaning the recv_cq first, we can avoid worrying about
whether the QP is attached to an SRQ for the second call, because we
would only clean send_cq if send_cq is not equal to recv_cq, and that
means send_cq cannot have any receive completions from the QP being
destroyed.

All this work even improves the generated code a bit:

add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-5 (-5)
function                                     old     new   delta
mthca_free_qp                                510     505      -5

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mthca/mthca_qp.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 1c6b63aca26..8fe6fee7a97 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1419,11 +1419,10 @@ void mthca_free_qp(struct mthca_dev *dev,
 	 * unref the mem-free tables and free the QPN in our table.
 	 */
 	if (!qp->ibqp.uobject) {
-		mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn,
+		mthca_cq_clean(dev, recv_cq, qp->qpn,
 			       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
-		if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
-			mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
-				       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
+		if (send_cq != recv_cq)
+			mthca_cq_clean(dev, send_cq, qp->qpn, NULL);
 
 		mthca_free_memfree(dev, qp);
 		mthca_free_wqe_buf(dev, qp);
-- 
cgit v1.2.3


From d92f76448c1a3e40ff3df96a653ecd83aeac6ee7 Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Thu, 5 Apr 2007 10:49:51 -0700
Subject: RDMA/ucma: Simplify ucma_get_event()

Use wait_event_interruptible() instead of a more complicated
open-coded equivalent.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
---
 drivers/infiniband/core/ucma.c | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index c859134c1da..53b4c94a7eb 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -306,26 +306,18 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
 
 	mutex_lock(&file->mut);
 	while (list_empty(&file->event_list)) {
-		if (file->filp->f_flags & O_NONBLOCK) {
-			ret = -EAGAIN;
-			break;
-		}
+		mutex_unlock(&file->mut);
 
-		if (signal_pending(current)) {
-			ret = -ERESTARTSYS;
-			break;
-		}
+		if (file->filp->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		if (wait_event_interruptible(file->poll_wait,
+					     !list_empty(&file->event_list)))
+			return -ERESTARTSYS;
 
-		prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE);
-		mutex_unlock(&file->mut);
-		schedule();
 		mutex_lock(&file->mut);
-		finish_wait(&file->poll_wait, &wait);
 	}
 
-	if (ret)
-		goto done;
-
 	uevent = list_entry(file->event_list.next, struct ucma_event, list);
 
 	if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
-- 
cgit v1.2.3


From 9d41b7fdeadb76bd4d06c16803daffd9fcf8dc7f Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Thu, 5 Apr 2007 10:51:05 -0700
Subject: IB/ucm: Simplify ib_ucm_event()

Use wait_event_interruptible() instead of a more complicated
open-coded equivalent.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
---
 drivers/infiniband/core/ucm.c | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index ee51d79a7ad..2586a3ee8eb 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -407,29 +407,18 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
 
 	mutex_lock(&file->file_mutex);
 	while (list_empty(&file->events)) {
+		mutex_unlock(&file->file_mutex);
 
-		if (file->filp->f_flags & O_NONBLOCK) {
-			result = -EAGAIN;
-			break;
-		}
+		if (file->filp->f_flags & O_NONBLOCK)
+			return -EAGAIN;
 
-		if (signal_pending(current)) {
-			result = -ERESTARTSYS;
-			break;
-		}
+		if (wait_event_interruptible(file->poll_wait,
+					     !list_empty(&file->events)))
+			return -ERESTARTSYS;
 
-		prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE);
-
-		mutex_unlock(&file->file_mutex);
-		schedule();
 		mutex_lock(&file->file_mutex);
-
-		finish_wait(&file->poll_wait, &wait);
 	}
 
-	if (result)
-		goto done;
-
 	uevent = list_entry(file->events.next, struct ib_ucm_event, file_list);
 
 	if (ib_ucm_new_cm_id(uevent->resp.event)) {
-- 
cgit v1.2.3


From d0e7bb141837db620f24406ca8b4667424138d42 Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Thu, 5 Apr 2007 10:51:10 -0700
Subject: IB/sa: Set src_path_bits correctly in ib_init_ah_from_path()

src_path_bits needs to mask off the base LID value.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
---
 drivers/infiniband/core/sa_query.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 68db633711c..9a7eaadb168 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -57,6 +57,7 @@ MODULE_LICENSE("Dual BSD/GPL");
 struct ib_sa_sm_ah {
 	struct ib_ah        *ah;
 	struct kref          ref;
+	u8		     src_path_mask;
 };
 
 struct ib_sa_port {
@@ -380,6 +381,7 @@ static void update_sm_ah(struct work_struct *work)
 	}
 
 	kref_init(&new_ah->ref);
+	new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
 
 	memset(&ah_attr, 0, sizeof ah_attr);
 	ah_attr.dlid     = port_attr.sm_lid;
@@ -460,6 +462,25 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
 }
 EXPORT_SYMBOL(ib_sa_cancel_query);
 
+static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
+{
+	struct ib_sa_device *sa_dev;
+	struct ib_sa_port   *port;
+	unsigned long flags;
+	u8 src_path_mask;
+
+	sa_dev = ib_get_client_data(device, &sa_client);
+	if (!sa_dev)
+		return 0x7f;
+
+	port  = &sa_dev->port[port_num - sa_dev->start_port];
+	spin_lock_irqsave(&port->ah_lock, flags);
+	src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f;
+	spin_unlock_irqrestore(&port->ah_lock, flags);
+
+	return src_path_mask;
+}
+
 int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
 			 struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
 {
@@ -469,7 +490,8 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
 	memset(ah_attr, 0, sizeof *ah_attr);
 	ah_attr->dlid = be16_to_cpu(rec->dlid);
 	ah_attr->sl = rec->sl;
-	ah_attr->src_path_bits = be16_to_cpu(rec->slid) & 0x7f;
+	ah_attr->src_path_bits = be16_to_cpu(rec->slid) &
+				 get_src_path_mask(device, port_num);
 	ah_attr->port_num = port_num;
 	ah_attr->static_rate = rec->rate;
 
-- 
cgit v1.2.3


From 46f1b3d7aff99ef4c1e729e023b9c8ee51de5973 Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Thu, 5 Apr 2007 11:50:11 -0700
Subject: IB/ipoib: Use ib_init_ah_from_path to initialize ah_attr

To support destinations that are not on the local IB subnet, IPoIB
should include the GRH information when constructing an address
handle.  Using the existing ib_init_ah_from_path() call will do this
for us.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index f2a40ae8e7d..b4c380c5a3b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -395,14 +395,10 @@ static void path_rec_completion(int status,
 	skb_queue_head_init(&skqueue);
 
 	if (!status) {
-		struct ib_ah_attr av = {
-			.dlid 	       = be16_to_cpu(pathrec->dlid),
-			.sl 	       = pathrec->sl,
-			.port_num      = priv->port,
-			.static_rate   = pathrec->rate
-		};
-
-		ah = ipoib_create_ah(dev, priv->pd, &av);
+		struct ib_ah_attr av;
+
+		if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av))
+			ah = ipoib_create_ah(dev, priv->pd, &av);
 	}
 
 	spin_lock_irqsave(&priv->lock, flags);
-- 
cgit v1.2.3


From aeba84a9251968a51fc6faae846518aac4e77565 Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Thu, 5 Apr 2007 11:49:21 -0700
Subject: IB/umad: Implement GRH handling for sent/received MADs

We need to set the SGID index for routed MADs and pass received
GRH information to userspace when a MAD is received.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
---
 drivers/infiniband/core/user_mad.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 2ce3eead2ba..8199b83052a 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -231,12 +231,17 @@ static void recv_handler(struct ib_mad_agent *agent,
 	packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits;
 	packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
 	if (packet->mad.hdr.grh_present) {
-		/* XXX parse GRH */
-		packet->mad.hdr.gid_index 	= 0;
-		packet->mad.hdr.hop_limit 	= 0;
-		packet->mad.hdr.traffic_class	= 0;
-		memset(packet->mad.hdr.gid, 0, 16);
-		packet->mad.hdr.flow_label	= 0;
+		struct ib_ah_attr ah_attr;
+
+		ib_init_ah_from_wc(agent->device, agent->port_num,
+				   mad_recv_wc->wc, mad_recv_wc->recv_buf.grh,
+				   &ah_attr);
+
+		packet->mad.hdr.gid_index = ah_attr.grh.sgid_index;
+		packet->mad.hdr.hop_limit = ah_attr.grh.hop_limit;
+		packet->mad.hdr.traffic_class = ah_attr.grh.traffic_class;
+		memcpy(packet->mad.hdr.gid, &ah_attr.grh.dgid, 16);
+		packet->mad.hdr.flow_label = cpu_to_be32(ah_attr.grh.flow_label);
 	}
 
 	if (queue_packet(file, agent, packet))
@@ -473,6 +478,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
 	if (packet->mad.hdr.grh_present) {
 		ah_attr.ah_flags = IB_AH_GRH;
 		memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16);
+		ah_attr.grh.sgid_index	   = packet->mad.hdr.gid_index;
 		ah_attr.grh.flow_label 	   = be32_to_cpu(packet->mad.hdr.flow_label);
 		ah_attr.grh.hop_limit  	   = packet->mad.hdr.hop_limit;
 		ah_attr.grh.traffic_class  = packet->mad.hdr.traffic_class;
-- 
cgit v1.2.3


From de493d47d8b4738827d8914a4dc94058c58f4249 Mon Sep 17 00:00:00 2001
From: Hal Rosenstock <halr@voltaire.com>
Date: Mon, 2 Apr 2007 11:24:07 -0400
Subject: IB/mad: Change SMI to use enums rather than magic return codes

Clarify code by changing return values from SMI functions to named
enum values instead of magic 0/1 values.

Signed-off-by: Hal Rosenstock <halr@voltaire.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/mad.c | 34 +++++++++--------
 drivers/infiniband/core/smi.c | 86 +++++++++++++++++++++----------------------
 drivers/infiniband/core/smi.h | 34 ++++++++++-------
 3 files changed, 82 insertions(+), 72 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 13efd417034..6edfecf1be7 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
  *
@@ -31,7 +31,6 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: mad.c 5596 2006-03-03 01:00:07Z sean.hefty $
  */
 #include <linux/dma-mapping.h>
 #include <rdma/ib_cache.h>
@@ -668,7 +667,7 @@ static void build_smp_wc(struct ib_qp *qp,
 static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
 				  struct ib_mad_send_wr_private *mad_send_wr)
 {
-	int ret;
+	int ret = 0;
 	struct ib_smp *smp = mad_send_wr->send_buf.mad;
 	unsigned long flags;
 	struct ib_mad_local_private *local;
@@ -688,14 +687,15 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
 	 */
 	if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) ==
 	     IB_LID_PERMISSIVE &&
-	    !smi_handle_dr_smp_send(smp, device->node_type, port_num)) {
+	     smi_handle_dr_smp_send(smp, device->node_type, port_num) ==
+	     IB_SMI_DISCARD) {
 		ret = -EINVAL;
 		printk(KERN_ERR PFX "Invalid directed route\n");
 		goto out;
 	}
+
 	/* Check to post send on QP or process locally */
-	ret = smi_check_local_smp(smp, device);
-	if (!ret)
+	if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD)
 		goto out;
 
 	local = kmalloc(sizeof *local, GFP_ATOMIC);
@@ -1874,18 +1874,22 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
 
 	if (recv->mad.mad.mad_hdr.mgmt_class ==
 	    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
-		if (!smi_handle_dr_smp_recv(&recv->mad.smp,
-					    port_priv->device->node_type,
-					    port_priv->port_num,
-					    port_priv->device->phys_port_cnt))
+		if (smi_handle_dr_smp_recv(&recv->mad.smp,
+					   port_priv->device->node_type,
+					   port_priv->port_num,
+					   port_priv->device->phys_port_cnt) ==
+					   IB_SMI_DISCARD)
 			goto out;
-		if (!smi_check_forward_dr_smp(&recv->mad.smp))
+
+		if (smi_check_forward_dr_smp(&recv->mad.smp) == IB_SMI_LOCAL)
 			goto local;
-		if (!smi_handle_dr_smp_send(&recv->mad.smp,
-					    port_priv->device->node_type,
-					    port_priv->port_num))
+
+		if (smi_handle_dr_smp_send(&recv->mad.smp,
+					   port_priv->device->node_type,
+					   port_priv->port_num) == IB_SMI_DISCARD)
 			goto out;
-		if (!smi_check_local_smp(&recv->mad.smp, port_priv->device))
+
+		if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD)
 			goto out;
 	}
 
diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c
index 54b81e17ad5..2bca753eb62 100644
--- a/drivers/infiniband/core/smi.c
+++ b/drivers/infiniband/core/smi.c
@@ -3,7 +3,7 @@
  * Copyright (c) 2004, 2005 Infinicon Corporation.  All rights reserved.
  * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004, 2005 Topspin Corporation.  All rights reserved.
- * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2004-2007 Voltaire Corporation.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -34,7 +34,6 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: smi.c 1389 2004-12-27 22:56:47Z roland $
  */
 
 #include <rdma/ib_smi.h>
@@ -44,9 +43,8 @@
  * Fixup a directed route SMP for sending
  * Return 0 if the SMP should be discarded
  */
-int smi_handle_dr_smp_send(struct ib_smp *smp,
-			   u8 node_type,
-			   int port_num)
+enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
+				       u8 node_type, int port_num)
 {
 	u8 hop_ptr, hop_cnt;
 
@@ -59,18 +57,18 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
 		if (hop_cnt && hop_ptr == 0) {
 			smp->hop_ptr++;
 			return (smp->initial_path[smp->hop_ptr] ==
-				port_num);
+				port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
 		}
 
 		/* C14-9:2 */
 		if (hop_ptr && hop_ptr < hop_cnt) {
 			if (node_type != RDMA_NODE_IB_SWITCH)
-				return 0;
+				return IB_SMI_DISCARD;
 
 			/* smp->return_path set when received */
 			smp->hop_ptr++;
 			return (smp->initial_path[smp->hop_ptr] ==
-				port_num);
+				port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
 		}
 
 		/* C14-9:3 -- We're at the end of the DR segment of path */
@@ -78,29 +76,30 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
 			/* smp->return_path set when received */
 			smp->hop_ptr++;
 			return (node_type == RDMA_NODE_IB_SWITCH ||
-				smp->dr_dlid == IB_LID_PERMISSIVE);
+				smp->dr_dlid == IB_LID_PERMISSIVE ?
+				IB_SMI_HANDLE : IB_SMI_DISCARD);
 		}
 
 		/* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
 		/* C14-9:5 -- Fail unreasonable hop pointer */
-		return (hop_ptr == hop_cnt + 1);
+		return (hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD);
 
 	} else {
 		/* C14-13:1 */
 		if (hop_cnt && hop_ptr == hop_cnt + 1) {
 			smp->hop_ptr--;
 			return (smp->return_path[smp->hop_ptr] ==
-				port_num);
+				port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
 		}
 
 		/* C14-13:2 */
 		if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
 			if (node_type != RDMA_NODE_IB_SWITCH)
-				return 0;
+				return IB_SMI_DISCARD;
 
 			smp->hop_ptr--;
 			return (smp->return_path[smp->hop_ptr] ==
-				port_num);
+				port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
 		}
 
 		/* C14-13:3 -- at the end of the DR segment of path */
@@ -108,15 +107,16 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
 			smp->hop_ptr--;
 			/* C14-13:3 -- SMPs destined for SM shouldn't be here */
 			return (node_type == RDMA_NODE_IB_SWITCH ||
-				smp->dr_slid == IB_LID_PERMISSIVE);
+				smp->dr_slid == IB_LID_PERMISSIVE ?
+				IB_SMI_HANDLE : IB_SMI_DISCARD);
 		}
 
 		/* C14-13:4 -- hop_ptr = 0 -> should have gone to SM */
 		if (hop_ptr == 0)
-			return 1;
+			return IB_SMI_HANDLE;
 
 		/* C14-13:5 -- Check for unreasonable hop pointer */
-		return 0;
+		return IB_SMI_DISCARD;
 	}
 }
 
@@ -124,10 +124,8 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
  * Adjust information for a received SMP
  * Return 0 if the SMP should be dropped
  */
-int smi_handle_dr_smp_recv(struct ib_smp *smp,
-			   u8 node_type,
-			   int port_num,
-			   int phys_port_cnt)
+enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
+				       int port_num, int phys_port_cnt)
 {
 	u8 hop_ptr, hop_cnt;
 
@@ -138,16 +136,17 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 	if (!ib_get_smp_direction(smp)) {
 		/* C14-9:1 -- sender should have incremented hop_ptr */
 		if (hop_cnt && hop_ptr == 0)
-			return 0;
+			return IB_SMI_DISCARD;
 
 		/* C14-9:2 -- intermediate hop */
 		if (hop_ptr && hop_ptr < hop_cnt) {
 			if (node_type != RDMA_NODE_IB_SWITCH)
-				return 0;
+				return IB_SMI_DISCARD;
 
 			smp->return_path[hop_ptr] = port_num;
 			/* smp->hop_ptr updated when sending */
-			return (smp->initial_path[hop_ptr+1] <= phys_port_cnt);
+			return (smp->initial_path[hop_ptr+1] <= phys_port_cnt ?
+				IB_SMI_HANDLE : IB_SMI_DISCARD);
 		}
 
 		/* C14-9:3 -- We're at the end of the DR segment of path */
@@ -157,12 +156,13 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 			/* smp->hop_ptr updated when sending */
 
 			return (node_type == RDMA_NODE_IB_SWITCH ||
-				smp->dr_dlid == IB_LID_PERMISSIVE);
+				smp->dr_dlid == IB_LID_PERMISSIVE ?
+				IB_SMI_HANDLE : IB_SMI_DISCARD);
 		}
 
 		/* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
 		/* C14-9:5 -- fail unreasonable hop pointer */
-		return (hop_ptr == hop_cnt + 1);
+		return (hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD);
 
 	} else {
 
@@ -170,16 +170,17 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 		if (hop_cnt && hop_ptr == hop_cnt + 1) {
 			smp->hop_ptr--;
 			return (smp->return_path[smp->hop_ptr] ==
-				port_num);
+				port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
 		}
 
 		/* C14-13:2 */
 		if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
 			if (node_type != RDMA_NODE_IB_SWITCH)
-				return 0;
+				return IB_SMI_DISCARD;
 
 			/* smp->hop_ptr updated when sending */
-			return (smp->return_path[hop_ptr-1] <= phys_port_cnt);
+			return (smp->return_path[hop_ptr-1] <= phys_port_cnt ?
+				IB_SMI_HANDLE : IB_SMI_DISCARD);
 		}
 
 		/* C14-13:3 -- We're at the end of the DR segment of path */
@@ -187,23 +188,20 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 			if (smp->dr_slid == IB_LID_PERMISSIVE) {
 				/* giving SMP to SM - update hop_ptr */
 				smp->hop_ptr--;
-				return 1;
+				return IB_SMI_HANDLE;
 			}
 			/* smp->hop_ptr updated when sending */
-			return (node_type == RDMA_NODE_IB_SWITCH);
+			return (node_type == RDMA_NODE_IB_SWITCH ?
+				IB_SMI_HANDLE: IB_SMI_DISCARD);
 		}
 
 		/* C14-13:4 -- hop_ptr = 0 -> give to SM */
 		/* C14-13:5 -- Check for unreasonable hop pointer */
-		return (hop_ptr == 0);
+		return (hop_ptr == 0 ? IB_SMI_HANDLE : IB_SMI_DISCARD);
 	}
 }
 
-/*
- * Return 1 if the received DR SMP should be forwarded to the send queue
- * Return 0 if the SMP should be completed up the stack
- */
-int smi_check_forward_dr_smp(struct ib_smp *smp)
+enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp)
 {
 	u8 hop_ptr, hop_cnt;
 
@@ -213,23 +211,25 @@ int smi_check_forward_dr_smp(struct ib_smp *smp)
 	if (!ib_get_smp_direction(smp)) {
 		/* C14-9:2 -- intermediate hop */
 		if (hop_ptr && hop_ptr < hop_cnt)
-			return 1;
+			return IB_SMI_SEND;
 
 		/* C14-9:3 -- at the end of the DR segment of path */
 		if (hop_ptr == hop_cnt)
-			return (smp->dr_dlid == IB_LID_PERMISSIVE);
+			return (smp->dr_dlid == IB_LID_PERMISSIVE ?
+				IB_SMI_SEND : IB_SMI_LOCAL);
 
 		/* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
 		if (hop_ptr == hop_cnt + 1)
-			return 1;
+			return IB_SMI_SEND;
 	} else {
-		/* C14-13:2 */
+		/* C14-13:2  -- intermediate hop */
 		if (2 <= hop_ptr && hop_ptr <= hop_cnt)
-			return 1;
+			return IB_SMI_SEND;
 
 		/* C14-13:3 -- at the end of the DR segment of path */
 		if (hop_ptr == 1)
-			return (smp->dr_slid != IB_LID_PERMISSIVE);
+			return (smp->dr_slid != IB_LID_PERMISSIVE ?
+				IB_SMI_SEND : IB_SMI_LOCAL);
 	}
-	return 0;
+	return IB_SMI_LOCAL;
 }
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
index 3011bfd86dc..9a4b349efc3 100644
--- a/drivers/infiniband/core/smi.h
+++ b/drivers/infiniband/core/smi.h
@@ -3,7 +3,7 @@
  * Copyright (c) 2004 Infinicon Corporation.  All rights reserved.
  * Copyright (c) 2004 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
- * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2004-2007 Voltaire Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -33,7 +33,6 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: smi.h 1389 2004-12-27 22:56:47Z roland $
  */
 
 #ifndef __SMI_H_
@@ -41,26 +40,33 @@
 
 #include <rdma/ib_smi.h>
 
-int smi_handle_dr_smp_recv(struct ib_smp *smp,
-			   u8 node_type,
-			   int port_num,
-			   int phys_port_cnt);
-extern int smi_check_forward_dr_smp(struct ib_smp *smp);
-extern int smi_handle_dr_smp_send(struct ib_smp *smp,
-				  u8 node_type,
-				  int port_num);
+enum smi_action {
+	IB_SMI_DISCARD,
+	IB_SMI_HANDLE
+};
+
+enum smi_forward_action {
+	IB_SMI_LOCAL,	/* SMP should be completed up the stack */
+	IB_SMI_SEND,	/* received DR SMP should be forwarded to the send queue */
+};
+
+enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
+				       int port_num, int phys_port_cnt);
+extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp);
+extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
+					      u8 node_type, int port_num);
 
 /*
  * Return 1 if the SMP should be handled by the local SMA/SM via process_mad
  */
-static inline int smi_check_local_smp(struct ib_smp *smp,
-				      struct ib_device *device)
+static inline enum smi_action smi_check_local_smp(struct ib_smp *smp,
+						  struct ib_device *device)
 {
 	/* C14-9:3 -- We're at the end of the DR segment of path */
 	/* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */
 	return ((device->process_mad &&
 		!ib_get_smp_direction(smp) &&
-		(smp->hop_ptr == smp->hop_cnt + 1)));
+		(smp->hop_ptr == smp->hop_cnt + 1)) ?
+		IB_SMI_HANDLE : IB_SMI_DISCARD);
 }
-
 #endif	/* __SMI_H_ */
-- 
cgit v1.2.3


From 37aebbde7023d75bf09fbadb6796276d0a65a068 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Tue, 24 Apr 2007 21:30:37 -0700
Subject: IPoIB/cm: spin_lock_irqsave() -> spin_lock_irq() replacements

There are quite a few places in ipoib_cm.c where we know IRQs are
enabled because we do something that sleeps in the same function, so
we can convert several occurrences of spin_lock_irqsave() to a plain
spin_lock_irq().  This cleans up the source a little and makes the
code smaller too:

add/remove: 0/0 grow/shrink: 1/5 up/down: 3/-51 (-48)
function                                     old     new   delta
ipoib_cm_tx_reap                             403     406      +3
ipoib_cm_stale_task                          146     145      -1
ipoib_cm_dev_stop                            173     172      -1
ipoib_cm_tx_handler                          964     956      -8
ipoib_cm_rx_handler                          956     937     -19
ipoib_cm_skb_reap                            212     190     -22

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_cm.c | 56 ++++++++++++++-------------------
 1 file changed, 24 insertions(+), 32 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 7a4af7a3e04..da7e10230cf 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -228,7 +228,6 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
 	struct net_device *dev = cm_id->context;
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_cm_rx *p;
-	unsigned long flags;
 	unsigned psn;
 	int ret;
 
@@ -257,9 +256,9 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
 
 	cm_id->context = p;
 	p->jiffies = jiffies;
-	spin_lock_irqsave(&priv->lock, flags);
+	spin_lock_irq(&priv->lock);
 	list_add(&p->list, &priv->cm.passive_ids);
-	spin_unlock_irqrestore(&priv->lock, flags);
+	spin_unlock_irq(&priv->lock);
 	queue_delayed_work(ipoib_workqueue,
 			   &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
 	return 0;
@@ -277,7 +276,6 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
 {
 	struct ipoib_cm_rx *p;
 	struct ipoib_dev_priv *priv;
-	unsigned long flags;
 	int ret;
 
 	switch (event->event) {
@@ -290,14 +288,14 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
 	case IB_CM_REJ_RECEIVED:
 		p = cm_id->context;
 		priv = netdev_priv(p->dev);
-		spin_lock_irqsave(&priv->lock, flags);
+		spin_lock_irq(&priv->lock);
 		if (list_empty(&p->list))
 			ret = 0; /* Connection is going away already. */
 		else {
 			list_del_init(&p->list);
 			ret = -ECONNRESET;
 		}
-		spin_unlock_irqrestore(&priv->lock, flags);
+		spin_unlock_irq(&priv->lock);
 		if (ret) {
 			ib_destroy_qp(p->qp);
 			kfree(p);
@@ -612,23 +610,22 @@ void ipoib_cm_dev_stop(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_cm_rx *p;
-	unsigned long flags;
 
 	if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
 		return;
 
 	ib_destroy_cm_id(priv->cm.id);
-	spin_lock_irqsave(&priv->lock, flags);
+	spin_lock_irq(&priv->lock);
 	while (!list_empty(&priv->cm.passive_ids)) {
 		p = list_entry(priv->cm.passive_ids.next, typeof(*p), list);
 		list_del_init(&p->list);
-		spin_unlock_irqrestore(&priv->lock, flags);
+		spin_unlock_irq(&priv->lock);
 		ib_destroy_cm_id(p->id);
 		ib_destroy_qp(p->qp);
 		kfree(p);
-		spin_lock_irqsave(&priv->lock, flags);
+		spin_lock_irq(&priv->lock);
 	}
-	spin_unlock_irqrestore(&priv->lock, flags);
+	spin_unlock_irq(&priv->lock);
 
 	cancel_delayed_work(&priv->cm.stale_task);
 }
@@ -642,7 +639,6 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
 	struct ib_qp_attr qp_attr;
 	int qp_attr_mask, ret;
 	struct sk_buff *skb;
-	unsigned long flags;
 
 	p->mtu = be32_to_cpu(data->mtu);
 
@@ -680,12 +676,12 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
 
 	skb_queue_head_init(&skqueue);
 
-	spin_lock_irqsave(&priv->lock, flags);
+	spin_lock_irq(&priv->lock);
 	set_bit(IPOIB_FLAG_OPER_UP, &p->flags);
 	if (p->neigh)
 		while ((skb = __skb_dequeue(&p->neigh->queue)))
 			__skb_queue_tail(&skqueue, skb);
-	spin_unlock_irqrestore(&priv->lock, flags);
+	spin_unlock_irq(&priv->lock);
 
 	while ((skb = __skb_dequeue(&skqueue))) {
 		skb->dev = p->dev;
@@ -895,7 +891,6 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
 	struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
 	struct net_device *dev = priv->dev;
 	struct ipoib_neigh *neigh;
-	unsigned long flags;
 	int ret;
 
 	switch (event->event) {
@@ -914,7 +909,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
 	case IB_CM_REJ_RECEIVED:
 	case IB_CM_TIMEWAIT_EXIT:
 		ipoib_dbg(priv, "CM error %d.\n", event->event);
-		spin_lock_irqsave(&priv->tx_lock, flags);
+		spin_lock_irq(&priv->tx_lock);
 		spin_lock(&priv->lock);
 		neigh = tx->neigh;
 
@@ -934,7 +929,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
 		}
 
 		spin_unlock(&priv->lock);
-		spin_unlock_irqrestore(&priv->tx_lock, flags);
+		spin_unlock_irq(&priv->tx_lock);
 		break;
 	default:
 		break;
@@ -1023,21 +1018,20 @@ static void ipoib_cm_tx_reap(struct work_struct *work)
 	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
 						   cm.reap_task);
 	struct ipoib_cm_tx *p;
-	unsigned long flags;
 
-	spin_lock_irqsave(&priv->tx_lock, flags);
+	spin_lock_irq(&priv->tx_lock);
 	spin_lock(&priv->lock);
 	while (!list_empty(&priv->cm.reap_list)) {
 		p = list_entry(priv->cm.reap_list.next, typeof(*p), list);
 		list_del(&p->list);
 		spin_unlock(&priv->lock);
-		spin_unlock_irqrestore(&priv->tx_lock, flags);
+		spin_unlock_irq(&priv->tx_lock);
 		ipoib_cm_tx_destroy(p);
-		spin_lock_irqsave(&priv->tx_lock, flags);
+		spin_lock_irq(&priv->tx_lock);
 		spin_lock(&priv->lock);
 	}
 	spin_unlock(&priv->lock);
-	spin_unlock_irqrestore(&priv->tx_lock, flags);
+	spin_unlock_irq(&priv->tx_lock);
 }
 
 static void ipoib_cm_skb_reap(struct work_struct *work)
@@ -1046,15 +1040,14 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
 						   cm.skb_task);
 	struct net_device *dev = priv->dev;
 	struct sk_buff *skb;
-	unsigned long flags;
 
 	unsigned mtu = priv->mcast_mtu;
 
-	spin_lock_irqsave(&priv->tx_lock, flags);
+	spin_lock_irq(&priv->tx_lock);
 	spin_lock(&priv->lock);
 	while ((skb = skb_dequeue(&priv->cm.skb_queue))) {
 		spin_unlock(&priv->lock);
-		spin_unlock_irqrestore(&priv->tx_lock, flags);
+		spin_unlock_irq(&priv->tx_lock);
 		if (skb->protocol == htons(ETH_P_IP))
 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -1062,11 +1055,11 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
 #endif
 		dev_kfree_skb_any(skb);
-		spin_lock_irqsave(&priv->tx_lock, flags);
+		spin_lock_irq(&priv->tx_lock);
 		spin_lock(&priv->lock);
 	}
 	spin_unlock(&priv->lock);
-	spin_unlock_irqrestore(&priv->tx_lock, flags);
+	spin_unlock_irq(&priv->tx_lock);
 }
 
 void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
@@ -1088,9 +1081,8 @@ static void ipoib_cm_stale_task(struct work_struct *work)
 	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
 						   cm.stale_task.work);
 	struct ipoib_cm_rx *p;
-	unsigned long flags;
 
-	spin_lock_irqsave(&priv->lock, flags);
+	spin_lock_irq(&priv->lock);
 	while (!list_empty(&priv->cm.passive_ids)) {
 		/* List if sorted by LRU, start from tail,
 		 * stop when we see a recently used entry */
@@ -1098,13 +1090,13 @@ static void ipoib_cm_stale_task(struct work_struct *work)
 		if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT))
 			break;
 		list_del_init(&p->list);
-		spin_unlock_irqrestore(&priv->lock, flags);
+		spin_unlock_irq(&priv->lock);
 		ib_destroy_cm_id(p->id);
 		ib_destroy_qp(p->qp);
 		kfree(p);
-		spin_lock_irqsave(&priv->lock, flags);
+		spin_lock_irq(&priv->lock);
 	}
-	spin_unlock_irqrestore(&priv->lock, flags);
+	spin_unlock_irq(&priv->lock);
 }
 
 
-- 
cgit v1.2.3


From c4ed790dfd4b2182c76e0fcd79d4aa85ab02eccf Mon Sep 17 00:00:00 2001
From: Joachim Fenkes <fenkes@de.ibm.com>
Date: Tue, 24 Apr 2007 17:44:31 +0200
Subject: IB/ehca: Implement modify_port

Add "Modify Port" verb support to eHCA driver.  The IB communication
manager needs this to set the IsCM port capability bit when
initializing.

Signed-off-by: Joachim Fenkes <fenkes@de.ibm.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/ehca/ehca_classes.h |  1 +
 drivers/infiniband/hw/ehca/ehca_hca.c     | 55 +++++++++++++++++++++++++++++--
 drivers/infiniband/hw/ehca/ehca_main.c    |  1 +
 drivers/infiniband/hw/ehca/hcp_if.c       | 24 ++++++++++++++
 drivers/infiniband/hw/ehca/hcp_if.h       |  4 +++
 5 files changed, 83 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 82ded44c6ce..10fb8fbafa0 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -106,6 +106,7 @@ struct ehca_shca {
 	struct ehca_mr *maxmr;
 	struct ehca_pd *pd;
 	struct h_galpas galpas;
+	struct mutex modify_mutex;
 };
 
 struct ehca_pd {
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index 30eb45df9f0..32b55a4f0e5 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -147,6 +147,7 @@ int ehca_query_port(struct ib_device *ibdev,
 		break;
 	}
 
+	props->port_cap_flags  = rblock->capability_mask;
 	props->gid_tbl_len     = rblock->gid_tbl_len;
 	props->max_msg_sz      = rblock->max_msg_sz;
 	props->bad_pkey_cntr   = rblock->bad_pkey_cntr;
@@ -236,10 +237,60 @@ query_gid1:
 	return ret;
 }
 
+const u32 allowed_port_caps = (
+	IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP |
+	IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP |
+	IB_PORT_VENDOR_CLASS_SUP);
+
 int ehca_modify_port(struct ib_device *ibdev,
 		     u8 port, int port_modify_mask,
 		     struct ib_port_modify *props)
 {
-	/* Not implemented yet */
-	return -EFAULT;
+	int ret = 0;
+	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device);
+	struct hipz_query_port *rblock;
+	u32 cap;
+	u64 hret;
+
+	if ((props->set_port_cap_mask | props->clr_port_cap_mask)
+	    & ~allowed_port_caps) {
+		ehca_err(&shca->ib_device, "Non-changeable bits set in masks  "
+			 "set=%x  clr=%x  allowed=%x", props->set_port_cap_mask,
+			 props->clr_port_cap_mask, allowed_port_caps);
+		return -EINVAL;
+	}
+
+	if (mutex_lock_interruptible(&shca->modify_mutex))
+                return -ERESTARTSYS;
+
+	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device,  "Can't allocate rblock memory.");
+		ret = -ENOMEM;
+		goto modify_port1;
+	}
+
+	if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query port properties");
+		ret = -EINVAL;
+		goto modify_port2;
+	}
+
+	cap = (rblock->capability_mask | props->set_port_cap_mask)
+		& ~props->clr_port_cap_mask;
+
+	hret = hipz_h_modify_port(shca->ipz_hca_handle, port,
+				  cap, props->init_type, port_modify_mask);
+	if (hret != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Modify port failed  hret=%lx", hret);
+		ret = -EINVAL;
+	}
+
+modify_port2:
+	ehca_free_fw_ctrlblock(rblock);
+
+modify_port1:
+        mutex_unlock(&shca->modify_mutex);
+
+	return ret;
 }
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 059da9628bb..3b23d677cb8 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -587,6 +587,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
 		ehca_gen_err("Cannot allocate shca memory.");
 		return -ENOMEM;
 	}
+	mutex_init(&shca->modify_mutex);
 
 	shca->ibmebus_dev = dev;
 	shca->ipz_hca_handle.handle = *handle;
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index 3fb46e67df8..b564fcd3b28 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -70,6 +70,10 @@
 #define H_ALL_RES_QP_SQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(0, 31)
 #define H_ALL_RES_QP_RQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(32, 63)
 
+#define H_MP_INIT_TYPE                  EHCA_BMASK_IBM(44, 47)
+#define H_MP_SHUTDOWN                   EHCA_BMASK_IBM(48, 48)
+#define H_MP_RESET_QKEY_CTR             EHCA_BMASK_IBM(49, 49)
+
 /* direct access qp controls */
 #define DAQP_CTRL_ENABLE    0x01
 #define DAQP_CTRL_SEND_COMP 0x20
@@ -364,6 +368,26 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
 	return ret;
 }
 
+u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
+		       const u8 port_id, const u32 port_cap,
+		       const u8 init_type, const int modify_mask)
+{
+	u64 port_attributes = port_cap;
+
+	if (modify_mask & IB_PORT_SHUTDOWN)
+		port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1);
+	if (modify_mask & IB_PORT_INIT_TYPE)
+		port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type);
+	if (modify_mask & IB_PORT_RESET_QKEY_CNTR)
+		port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1);
+
+	return ehca_plpar_hcall_norets(H_MODIFY_PORT,
+				       adapter_handle.handle, /* r4 */
+				       port_id,               /* r5 */
+				       port_attributes,       /* r6 */
+				       0, 0, 0, 0);
+}
+
 u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
 		     struct hipz_query_hca *query_hca_rblock)
 {
diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h
index 587ebd47095..2869f7dd619 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.h
+++ b/drivers/infiniband/hw/ehca/hcp_if.h
@@ -85,6 +85,10 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
 		      const u8 port_id,
 		      struct hipz_query_port *query_port_response_block);
 
+u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
+		       const u8 port_id, const u32 port_cap,
+		       const u8 init_type, const int modify_mask);
+
 u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
 		     struct hipz_query_hca *query_hca_rblock);
 
-- 
cgit v1.2.3


From 1912ffbb88efe872eb8fa8113dfb3cb0b7238764 Mon Sep 17 00:00:00 2001
From: Joachim Fenkes <fenkes@de.ibm.com>
Date: Mon, 23 Apr 2007 18:20:27 +0200
Subject: IB: Set class_dev->dev in core for nice device symlink

All RDMA drivers except ehca set class_dev->dev to their dma_device
value (ehca leaves this unset).  dma_device is the only value that
makes any sense, so move this assignment to core/sysfs.c.  This reduce
the duplicated code in the rest of the drivers and gives ehca a nice
/sys/class/infiniband/ehcaX/device symlink.

Signed-off-by: Joachim Fenkes <fenkes@de.ibm.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/sysfs.c              | 1 +
 drivers/infiniband/hw/amso1100/c2_provider.c | 1 -
 drivers/infiniband/hw/cxgb3/iwch_provider.c  | 1 -
 drivers/infiniband/hw/ipath/ipath_verbs.c    | 1 -
 drivers/infiniband/hw/mthca/mthca_provider.c | 1 -
 5 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 000c086bf2e..08c299ebf4a 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -683,6 +683,7 @@ int ib_device_register_sysfs(struct ib_device *device)
 
 	class_dev->class      = &ib_class;
 	class_dev->class_data = device;
+	class_dev->dev	      = device->dma_device;
 	strlcpy(class_dev->class_id, device->name, BUS_ID_SIZE);
 
 	INIT_LIST_HEAD(&device->port_list);
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index fef97275291..607c09bf764 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -796,7 +796,6 @@ int c2_register_device(struct c2_dev *dev)
 	memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6);
 	dev->ibdev.phys_port_cnt = 1;
 	dev->ibdev.dma_device = &dev->pcidev->dev;
-	dev->ibdev.class_dev.dev = &dev->pcidev->dev;
 	dev->ibdev.query_device = c2_query_device;
 	dev->ibdev.query_port = c2_query_port;
 	dev->ibdev.modify_port = c2_modify_port;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 24e0df04f7d..af28a317016 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1108,7 +1108,6 @@ int iwch_register_device(struct iwch_dev *dev)
 	memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
 	dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
 	dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev);
-	dev->ibdev.class_dev.dev = &(dev->rdev.rnic_info.pdev->dev);
 	dev->ibdev.query_device = iwch_query_device;
 	dev->ibdev.query_port = iwch_query_port;
 	dev->ibdev.modify_port = iwch_modify_port;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index f5604b87650..18c6df2052c 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -1559,7 +1559,6 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
 	dev->node_type = RDMA_NODE_IB_CA;
 	dev->phys_port_cnt = 1;
 	dev->dma_device = &dd->pcidev->dev;
-	dev->class_dev.dev = dev->dma_device;
 	dev->query_device = ipath_query_device;
 	dev->modify_device = ipath_modify_device;
 	dev->query_port = ipath_query_port;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 0725ad7ad9b..47e6fd46d9c 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1293,7 +1293,6 @@ int mthca_register_device(struct mthca_dev *dev)
 	dev->ib_dev.node_type            = RDMA_NODE_IB_CA;
 	dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
 	dev->ib_dev.dma_device           = &dev->pdev->dev;
-	dev->ib_dev.class_dev.dev        = &dev->pdev->dev;
 	dev->ib_dev.query_device         = mthca_query_device;
 	dev->ib_dev.query_port           = mthca_query_port;
 	dev->ib_dev.modify_device        = mthca_modify_device;
-- 
cgit v1.2.3


From 8570419fb7be0af84085ac8f13307392a748482c Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 6 Mar 2007 20:19:26 -0800
Subject: [ATM] ENI: Convert to struct timeval to ktime_t.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/eni.c | 4 ++--
 drivers/atm/eni.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index 8fccf018f16..0d3a38b1cb0 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -536,7 +536,7 @@ static int rx_aal0(struct atm_vcc *vcc)
 		return 0;
 	}
 	skb_put(skb,length);
-	skb_set_timestamp(skb, &eni_vcc->timestamp);
+	skb->tstamp = eni_vcc->timestamp;
 	DPRINTK("got len %ld\n",length);
 	if (do_rx_dma(vcc,skb,1,length >> 2,length >> 2)) return 1;
 	eni_vcc->rxing++;
@@ -701,7 +701,7 @@ static void get_service(struct atm_dev *dev)
 			DPRINTK("Grr, servicing VCC %ld twice\n",vci);
 			continue;
 		}
-		do_gettimeofday(&ENI_VCC(vcc)->timestamp);
+		ENI_VCC(vcc)->timestamp = ktime_get_real();
 		ENI_VCC(vcc)->next = NULL;
 		if (vcc->qos.rxtp.traffic_class == ATM_CBR) {
 			if (eni_dev->fast)
diff --git a/drivers/atm/eni.h b/drivers/atm/eni.h
index 385090c2a58..d04fefb0841 100644
--- a/drivers/atm/eni.h
+++ b/drivers/atm/eni.h
@@ -59,7 +59,7 @@ struct eni_vcc {
 	int rxing;			/* number of pending PDUs */
 	int servicing;			/* number of waiting VCs (0 or 1) */
 	int txing;			/* number of pending TX bytes */
-	struct timeval timestamp;	/* for RX timing */
+	ktime_t timestamp;		/* for RX timing */
 	struct atm_vcc *next;		/* next pending RX */
 	struct sk_buff *last;		/* last PDU being DMAed (used to carry
 					   discard information) */
-- 
cgit v1.2.3


From cb69cc52364690d7789940c480b3a9490784b680 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 7 Mar 2007 19:33:52 -0800
Subject: [TCP/DCCP/RANDOM]: Remove unused exports.

This patch removes the following not or no longer used exports:
- drivers/char/random.c: secure_tcp_sequence_number
- net/dccp/options.c: sysctl_dccp_feat_sequence_window
- net/netlink/af_netlink.c: netlink_set_err

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/char/random.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index b9dc7aa1dfb..03af50f900d 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1556,8 +1556,6 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 	return seq;
 }
 
-EXPORT_SYMBOL(secure_tcp_sequence_number);
-
 /* Generate secure starting point for ephemeral IPV4 transport port search */
 u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
 {
-- 
cgit v1.2.3


From c1a4b86e396b6870b420d23e4d49c7b685aef0a4 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Mar 2007 15:27:07 -0700
Subject: [TR]: Use tr_hdr() were appropriate

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tokenring/lanstreamer.c | 6 +++---
 drivers/net/tokenring/olympic.c     | 9 +++++----
 drivers/s390/net/qeth_main.c        | 2 +-
 3 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/tokenring/lanstreamer.c b/drivers/net/tokenring/lanstreamer.c
index e999feb8c0b..5e1b884e10c 100644
--- a/drivers/net/tokenring/lanstreamer.c
+++ b/drivers/net/tokenring/lanstreamer.c
@@ -1607,10 +1607,12 @@ static void streamer_arb_cmd(struct net_device *dev)
 				      frame_data, buffer_len);
 		} while (next_ptr && (buff_off = next_ptr));
 
+		mac_frame->dev = dev;
+		mac_frame->protocol = tr_type_trans(mac_frame, dev);
 #if STREAMER_NETWORK_MONITOR
 		printk(KERN_WARNING "%s: Received MAC Frame, details: \n",
 		       dev->name);
-		mac_hdr = (struct trh_hdr *) mac_frame->data;
+		mac_hdr = tr_hdr(mac_frame);
 		printk(KERN_WARNING
 		       "%s: MAC Frame Dest. Addr: %02x:%02x:%02x:%02x:%02x:%02x \n",
 		       dev->name, mac_hdr->daddr[0], mac_hdr->daddr[1],
@@ -1622,8 +1624,6 @@ static void streamer_arb_cmd(struct net_device *dev)
 		       mac_hdr->saddr[2], mac_hdr->saddr[3],
 		       mac_hdr->saddr[4], mac_hdr->saddr[5]);
 #endif
-		mac_frame->dev = dev;
-		mac_frame->protocol = tr_type_trans(mac_frame, dev);
 		netif_rx(mac_frame);
 
 		/* Now tell the card we have dealt with the received frame */
diff --git a/drivers/net/tokenring/olympic.c b/drivers/net/tokenring/olympic.c
index 8f4ecc1109c..683186afcab 100644
--- a/drivers/net/tokenring/olympic.c
+++ b/drivers/net/tokenring/olympic.c
@@ -1440,16 +1440,17 @@ static void olympic_arb_cmd(struct net_device *dev)
 			next_ptr=readw(buf_ptr+offsetof(struct mac_receive_buffer,next)); 
 		} while (next_ptr && (buf_ptr=olympic_priv->olympic_lap + ntohs(next_ptr)));
 
+		mac_frame->dev = dev;
+		mac_frame->protocol = tr_type_trans(mac_frame, dev);
+
 		if (olympic_priv->olympic_network_monitor) { 
 			struct trh_hdr *mac_hdr ; 
 			printk(KERN_WARNING "%s: Received MAC Frame, details: \n",dev->name) ;
-			mac_hdr = (struct trh_hdr *)mac_frame->data ; 
+			mac_hdr = tr_hdr(mac_frame);
 			printk(KERN_WARNING "%s: MAC Frame Dest. Addr: %02x:%02x:%02x:%02x:%02x:%02x \n", dev->name , mac_hdr->daddr[0], mac_hdr->daddr[1], mac_hdr->daddr[2], mac_hdr->daddr[3], mac_hdr->daddr[4], mac_hdr->daddr[5]) ; 
 			printk(KERN_WARNING "%s: MAC Frame Srce. Addr: %02x:%02x:%02x:%02x:%02x:%02x \n", dev->name , mac_hdr->saddr[0], mac_hdr->saddr[1], mac_hdr->saddr[2], mac_hdr->saddr[3], mac_hdr->saddr[4], mac_hdr->saddr[5]) ; 
 		}
-		mac_frame->dev = dev ; 
-		mac_frame->protocol = tr_type_trans(mac_frame,dev);
-		netif_rx(mac_frame) ; 	
+		netif_rx(mac_frame);
 		dev->last_rx = jiffies;
 
 drop_frame:
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index d8a86f5af37..f2b9b1b1ec1 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -2308,7 +2308,7 @@ qeth_rebuild_skb_fake_ll_tr(struct qeth_card *card, struct sk_buff *skb,
 	QETH_DBF_TEXT(trace,5,"skbfktr");
 	skb->mac.raw = skb->data - QETH_FAKE_LL_LEN_TR;
 	/* this is a fake ethernet header */
-	fake_hdr = (struct trh_hdr *) skb->mac.raw;
+	fake_hdr = tr_hdr(skb);
 
 	/* the destination MAC address */
 	switch (skb->pkt_type){
-- 
cgit v1.2.3


From c8fb7948dc1aeff0515b2912b564d4236f6c0ebd Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Mar 2007 15:29:16 -0700
Subject: [TR]: Make tr_type_trans set skb->dev

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tokenring/3c359.c       | 4 ----
 drivers/net/tokenring/ibmtr.c       | 1 -
 drivers/net/tokenring/lanstreamer.c | 3 ---
 drivers/net/tokenring/olympic.c     | 3 ---
 drivers/net/tokenring/smctr.c       | 2 --
 drivers/net/tokenring/tms380tr.c    | 1 -
 6 files changed, 14 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/tokenring/3c359.c b/drivers/net/tokenring/3c359.c
index 7580bdeacad..d293423ee8e 100644
--- a/drivers/net/tokenring/3c359.c
+++ b/drivers/net/tokenring/3c359.c
@@ -933,8 +933,6 @@ static void xl_rx(struct net_device *dev)
 				return ; 				
 			}
 	
-			skb->dev = dev ; 
-
 			while (xl_priv->rx_ring_tail != temp_ring_loc) { 
 				copy_len = xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfraglen & 0x7FFF ; 
 				frame_length -= copy_len ;  
@@ -967,8 +965,6 @@ static void xl_rx(struct net_device *dev)
 				return ; 
 			}
 
-			skb->dev = dev ; 
-
 			skb2 = xl_priv->rx_ring_skb[xl_priv->rx_ring_tail] ; 
 			pci_unmap_single(xl_priv->pdev, xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr, xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 
 			skb_put(skb2, frame_length) ; 
diff --git a/drivers/net/tokenring/ibmtr.c b/drivers/net/tokenring/ibmtr.c
index 01d55315ee8..1e8958ee2d0 100644
--- a/drivers/net/tokenring/ibmtr.c
+++ b/drivers/net/tokenring/ibmtr.c
@@ -1771,7 +1771,6 @@ static void tr_rx(struct net_device *dev)
 	/*BMS again, if she comes in with few but leaves with many */
 	skb_reserve(skb, sizeof(struct trh_hdr) - lan_hdr_len);
 	skb_put(skb, length);
-	skb->dev = dev;
 	data = skb->data;
 	rbuffer_len = ntohs(readw(rbuf + offsetof(struct rec_buf, buf_len)));
 	rbufdata = rbuf + offsetof(struct rec_buf, data);
diff --git a/drivers/net/tokenring/lanstreamer.c b/drivers/net/tokenring/lanstreamer.c
index 5e1b884e10c..5d849c089a3 100644
--- a/drivers/net/tokenring/lanstreamer.c
+++ b/drivers/net/tokenring/lanstreamer.c
@@ -944,8 +944,6 @@ static void streamer_rx(struct net_device *dev)
 				printk(KERN_WARNING "%s: Not enough memory to copy packet to upper layers. \n",	dev->name);
 				streamer_priv->streamer_stats.rx_dropped++;
 			} else {	/* we allocated an skb OK */
-				skb->dev = dev;
-
 				if (buffer_cnt == 1) {
 					/* release the DMA mapping */
 					pci_unmap_single(streamer_priv->pci_dev, 
@@ -1607,7 +1605,6 @@ static void streamer_arb_cmd(struct net_device *dev)
 				      frame_data, buffer_len);
 		} while (next_ptr && (buff_off = next_ptr));
 
-		mac_frame->dev = dev;
 		mac_frame->protocol = tr_type_trans(mac_frame, dev);
 #if STREAMER_NETWORK_MONITOR
 		printk(KERN_WARNING "%s: Received MAC Frame, details: \n",
diff --git a/drivers/net/tokenring/olympic.c b/drivers/net/tokenring/olympic.c
index 683186afcab..a6206580888 100644
--- a/drivers/net/tokenring/olympic.c
+++ b/drivers/net/tokenring/olympic.c
@@ -814,8 +814,6 @@ static void olympic_rx(struct net_device *dev)
 					olympic_priv->rx_ring_last_received += i ; 
 					olympic_priv->rx_ring_last_received &= (OLYMPIC_RX_RING_SIZE -1) ;  
 				} else  {
-					skb->dev = dev ; 
-
 					/* Optimise based upon number of buffers used. 
 			   	   	   If only one buffer is used we can simply swap the buffers around.
 			   	   	   If more than one then we must use the new buffer and copy the information
@@ -1440,7 +1438,6 @@ static void olympic_arb_cmd(struct net_device *dev)
 			next_ptr=readw(buf_ptr+offsetof(struct mac_receive_buffer,next)); 
 		} while (next_ptr && (buf_ptr=olympic_priv->olympic_lap + ntohs(next_ptr)));
 
-		mac_frame->dev = dev;
 		mac_frame->protocol = tr_type_trans(mac_frame, dev);
 
 		if (olympic_priv->olympic_network_monitor) { 
diff --git a/drivers/net/tokenring/smctr.c b/drivers/net/tokenring/smctr.c
index cec282a6f62..b0296d80e46 100644
--- a/drivers/net/tokenring/smctr.c
+++ b/drivers/net/tokenring/smctr.c
@@ -3896,7 +3896,6 @@ static int smctr_process_rx_packet(MAC_HEADER *rmf, __u16 size,
                 tp->MacStat.rx_bytes += skb->len;
 
                 /* Kick the packet on up. */
-                skb->dev = dev;
                 skb->protocol = tr_type_trans(skb, dev);
                 netif_rx(skb);
 		dev->last_rx = jiffies;
@@ -4483,7 +4482,6 @@ static int smctr_rx_frame(struct net_device *dev)
                                 	tp->MacStat.rx_bytes += skb->len;
 
                                 	/* Kick the packet on up. */
-                                	skb->dev = dev;
                                 	skb->protocol = tr_type_trans(skb, dev);
                                 	netif_rx(skb);
 					dev->last_rx = jiffies;
diff --git a/drivers/net/tokenring/tms380tr.c b/drivers/net/tokenring/tms380tr.c
index ea797ca2b98..de6f72775ec 100644
--- a/drivers/net/tokenring/tms380tr.c
+++ b/drivers/net/tokenring/tms380tr.c
@@ -2168,7 +2168,6 @@ static void tms380tr_rcv_status_irq(struct net_device *dev)
 				}
 				else
 				{
-					skb->dev	= dev;
 					skb_put(skb, tp->MaxPacketSize);
 					rpl->SkbStat 	= SKB_DATA_COPY;
 					ReceiveDataPtr 	= rpl->MData;
-- 
cgit v1.2.3


From 0a4f23fbbff70c268b0f2f5e0b87301c132fb305 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 10:57:13 -0300
Subject: [HIPPI/FDDI]: Make {hippi,fddi}_type_trans set skb->dev

Now all the _type_trans routines are consistent in this regard.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/defxx.c       | 2 --
 drivers/net/rrunner.c     | 1 -
 drivers/net/skfp/skfddi.c | 1 -
 drivers/s390/net/lcs.c    | 1 -
 4 files changed, 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/defxx.c b/drivers/net/defxx.c
index 07d2731c1aa..8d29fae1c71 100644
--- a/drivers/net/defxx.c
+++ b/drivers/net/defxx.c
@@ -3096,8 +3096,6 @@ static void dfx_rcv_queue_process(
 
 					skb_reserve(skb,3);		/* adjust data field so that it points to FC byte */
 					skb_put(skb, pkt_len);		/* pass up packet length, NOT including CRC */
-					skb->dev = bp->dev;		/* pass up device pointer */
-
 					skb->protocol = fddi_type_trans(skb, bp->dev);
 					bp->rcv_total_bytes += skb->len;
 					netif_rx(skb);
diff --git a/drivers/net/rrunner.c b/drivers/net/rrunner.c
index d81536f90df..3a4fce38450 100644
--- a/drivers/net/rrunner.c
+++ b/drivers/net/rrunner.c
@@ -1029,7 +1029,6 @@ static void rx_int(struct net_device *dev, u32 rxlimit, u32 index)
 					goto defer;
 				}
 			}
-			skb->dev = dev;
 			skb->protocol = hippi_type_trans(skb, dev);
 
 			netif_rx(skb);		/* send it up */
diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c
index 9733a11c614..064e7c21c01 100644
--- a/drivers/net/skfp/skfddi.c
+++ b/drivers/net/skfp/skfddi.c
@@ -1680,7 +1680,6 @@ void mac_drv_rx_complete(struct s_smc *smc, volatile struct s_smt_fp_rxd *rxd,
 	rxd->rxd_os.skb = NULL;
 	skb_trim(skb, len);
 	skb->protocol = fddi_type_trans(skb, bp->dev);
-	skb->dev = bp->dev;	/* pass up device pointer */
 
 	netif_rx(skb);
 	bp->dev->last_rx = jiffies;
diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c
index ecca1046714..1c23e187a3b 100644
--- a/drivers/s390/net/lcs.c
+++ b/drivers/s390/net/lcs.c
@@ -1784,7 +1784,6 @@ lcs_get_skb(struct lcs_card *card, char *skb_data, unsigned int skb_len)
 		card->stats.rx_dropped++;
 		return;
 	}
-	skb->dev = card->dev;
 	memcpy(skb_put(skb, skb_len), skb_data, skb_len);
 	skb->protocol =	card->lan_type_trans(skb, card->dev);
 	card->stats.rx_bytes += skb_len;
-- 
cgit v1.2.3


From 4839fccea04b5f4d2b3ce01585d6bdbcbc24002c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 11:13:59 -0300
Subject: [QETH]: Use eth_hdr()

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index f2b9b1b1ec1..d502b77adf6 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -2361,7 +2361,7 @@ qeth_rebuild_skb_fake_ll_eth(struct qeth_card *card, struct sk_buff *skb,
 	QETH_DBF_TEXT(trace,5,"skbfketh");
 	skb->mac.raw = skb->data - QETH_FAKE_LL_LEN_ETH;
 	/* this is a fake ethernet header */
-	fake_hdr = (struct ethhdr *) skb->mac.raw;
+	fake_hdr = eth_hdr(skb);
 
 	/* the destination MAC address */
 	switch (skb->pkt_type){
-- 
cgit v1.2.3


From 029720f15dcd3c6c16824177cfc486083b229411 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 11:20:07 -0300
Subject: [AOE]: Introduce aoe_hdr()

For consistency with other skb->mac.raw users.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/block/aoe/aoe.h    |  9 +++++++++
 drivers/block/aoe/aoecmd.c | 14 +++++++-------
 drivers/block/aoe/aoenet.c |  2 +-
 3 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 2308e83e5f3..4c34f8d31cc 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -48,6 +48,15 @@ struct aoe_hdr {
 	__be32 tag;
 };
 
+#ifdef __KERNEL__
+#include <linux/skbuff.h>
+
+static inline struct aoe_hdr *aoe_hdr(const struct sk_buff *skb)
+{
+	return (struct aoe_hdr *)skb->mac.raw;
+}
+#endif
+
 struct aoe_atahdr {
 	unsigned char aflags;
 	unsigned char errfeat;
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 8d17d8df366..4ab7b40e8c5 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -118,7 +118,7 @@ aoecmd_ata_rw(struct aoedev *d, struct frame *f)
 
 	/* initialize the headers & frame */
 	skb = f->skb;
-	h = (struct aoe_hdr *) skb->mac.raw;
+	h = aoe_hdr(skb);
 	ah = (struct aoe_atahdr *) (h+1);
 	skb_put(skb, sizeof *h + sizeof *ah);
 	memset(h, 0, skb->len);
@@ -207,7 +207,7 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail)
 		skb->dev = ifp;
 		if (sl_tail == NULL)
 			sl_tail = skb;
-		h = (struct aoe_hdr *) skb->mac.raw;
+		h = aoe_hdr(skb);
 		memset(h, 0, sizeof *h + sizeof *ch);
 
 		memset(h->dst, 0xff, sizeof h->dst);
@@ -300,7 +300,7 @@ rexmit(struct aoedev *d, struct frame *f)
 	aoechr_error(buf);
 
 	skb = f->skb;
-	h = (struct aoe_hdr *) skb->mac.raw;
+	h = aoe_hdr(skb);
 	ah = (struct aoe_atahdr *) (h+1);
 	f->tag = n;
 	h->tag = cpu_to_be32(n);
@@ -529,7 +529,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 	char ebuf[128];
 	u16 aoemajor;
 
-	hin = (struct aoe_hdr *) skb->mac.raw;
+	hin = aoe_hdr(skb);
 	aoemajor = be16_to_cpu(get_unaligned(&hin->major));
 	d = aoedev_by_aoeaddr(aoemajor, hin->minor);
 	if (d == NULL) {
@@ -561,7 +561,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 	calc_rttavg(d, tsince(f->tag));
 
 	ahin = (struct aoe_atahdr *) (hin+1);
-	hout = (struct aoe_hdr *) f->skb->mac.raw;
+	hout = aoe_hdr(f->skb);
 	ahout = (struct aoe_atahdr *) (hout+1);
 	buf = f->buf;
 
@@ -695,7 +695,7 @@ aoecmd_ata_id(struct aoedev *d)
 
 	/* initialize the headers & frame */
 	skb = f->skb;
-	h = (struct aoe_hdr *) skb->mac.raw;
+	h = aoe_hdr(skb);
 	ah = (struct aoe_atahdr *) (h+1);
 	skb_put(skb, sizeof *h + sizeof *ah);
 	memset(h, 0, skb->len);
@@ -726,7 +726,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
 	enum { MAXFRAMES = 16 };
 	u16 n;
 
-	h = (struct aoe_hdr *) skb->mac.raw;
+	h = aoe_hdr(skb);
 	ch = (struct aoe_cfghdr *) (h+1);
 
 	/*
diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c
index aab6d91a2c2..f9ddfda4d9c 100644
--- a/drivers/block/aoe/aoenet.c
+++ b/drivers/block/aoe/aoenet.c
@@ -123,7 +123,7 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt,
 		goto exit;
 	skb_push(skb, ETH_HLEN);	/* (1) */
 
-	h = (struct aoe_hdr *) skb->mac.raw;
+	h = aoe_hdr(skb);
 	n = be32_to_cpu(get_unaligned(&h->tag));
 	if ((h->verfl & AOEFL_RSP) == 0 || (n & 1<<31))
 		goto exit;
-- 
cgit v1.2.3


From 4c13eb6657fe9ef7b4dc8f1a405c902e9e5234e0 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 25 Apr 2007 17:40:23 -0700
Subject: [ETH]: Make eth_type_trans set skb->dev like the other *_type_trans

One less thing for drivers writers to worry about.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/amso1100/c2.c           | 1 -
 drivers/isdn/hysdn/hysdn_net.c                | 2 --
 drivers/net/3c501.c                           | 1 -
 drivers/net/3c505.c                           | 1 -
 drivers/net/3c507.c                           | 1 -
 drivers/net/3c509.c                           | 1 -
 drivers/net/3c515.c                           | 2 --
 drivers/net/3c523.c                           | 1 -
 drivers/net/3c527.c                           | 1 -
 drivers/net/3c59x.c                           | 2 --
 drivers/net/7990.c                            | 1 -
 drivers/net/8139cp.c                          | 2 --
 drivers/net/8139too.c                         | 1 -
 drivers/net/82596.c                           | 1 -
 drivers/net/a2065.c                           | 1 -
 drivers/net/acenic.c                          | 1 -
 drivers/net/amd8111e.c                        | 4 ----
 drivers/net/ariadne.c                         | 1 -
 drivers/net/arm/am79c961a.c                   | 1 -
 drivers/net/arm/at91_ether.c                  | 1 -
 drivers/net/arm/ep93xx_eth.c                  | 1 -
 drivers/net/arm/ether1.c                      | 1 -
 drivers/net/arm/ether3.c                      | 1 -
 drivers/net/at1700.c                          | 1 -
 drivers/net/atari_bionet.c                    | 1 -
 drivers/net/atarilance.c                      | 1 -
 drivers/net/atl1/atl1_main.c                  | 2 --
 drivers/net/atp.c                             | 1 -
 drivers/net/au1000_eth.c                      | 1 -
 drivers/net/b44.c                             | 1 -
 drivers/net/bmac.c                            | 1 -
 drivers/net/cassini.c                         | 1 -
 drivers/net/chelsio/sge.c                     | 3 +--
 drivers/net/cris/eth_v10.c                    | 1 -
 drivers/net/cs89x0.c                          | 2 --
 drivers/net/cxgb3/sge.c                       | 3 +--
 drivers/net/de600.c                           | 1 -
 drivers/net/de620.c                           | 1 -
 drivers/net/declance.c                        | 1 -
 drivers/net/depca.c                           | 1 -
 drivers/net/dgrs.c                            | 1 -
 drivers/net/dl2k.c                            | 4 ----
 drivers/net/dm9000.c                          | 1 -
 drivers/net/eepro.c                           | 1 -
 drivers/net/eepro100.c                        | 1 -
 drivers/net/eexpress.c                        | 1 -
 drivers/net/epic100.c                         | 3 ---
 drivers/net/eth16i.c                          | 1 -
 drivers/net/ewrk3.c                           | 1 -
 drivers/net/fealnx.c                          | 1 -
 drivers/net/fec.c                             | 1 -
 drivers/net/fec_8xx/fec_main.c                | 1 -
 drivers/net/forcedeth.c                       | 2 --
 drivers/net/fs_enet/fs_enet-main.c            | 3 ---
 drivers/net/gianfar.c                         | 2 --
 drivers/net/hamachi.c                         | 1 -
 drivers/net/hp100.c                           | 1 -
 drivers/net/ibm_emac/ibm_emac_core.c          | 1 -
 drivers/net/ibmlana.c                         | 1 -
 drivers/net/ibmveth.c                         | 1 -
 drivers/net/ioc3-eth.c                        | 3 ---
 drivers/net/iseries_veth.c                    | 1 -
 drivers/net/ixp2000/ixpdev.c                  | 3 +--
 drivers/net/lance.c                           | 1 -
 drivers/net/lasi_82596.c                      | 1 -
 drivers/net/lib8390.c                         | 1 -
 drivers/net/loopback.c                        | 1 -
 drivers/net/lp486e.c                          | 1 -
 drivers/net/mac89x0.c                         | 1 -
 drivers/net/macb.c                            | 1 -
 drivers/net/mace.c                            | 1 -
 drivers/net/macmace.c                         | 1 -
 drivers/net/meth.c                            | 1 -
 drivers/net/mipsnet.c                         | 1 -
 drivers/net/mv643xx_eth.c                     | 1 -
 drivers/net/myri10ge/myri10ge.c               | 1 -
 drivers/net/natsemi.c                         | 1 -
 drivers/net/netx-eth.c                        | 1 -
 drivers/net/netxen/netxen_nic_init.c          | 1 -
 drivers/net/ni5010.c                          | 1 -
 drivers/net/ni52.c                            | 1 -
 drivers/net/ni65.c                            | 2 --
 drivers/net/ns83820.c                         | 1 -
 drivers/net/pasemi_mac.c                      | 2 --
 drivers/net/pci-skeleton.c                    | 1 -
 drivers/net/pcmcia/3c574_cs.c                 | 1 -
 drivers/net/pcmcia/3c589_cs.c                 | 1 -
 drivers/net/pcmcia/axnet_cs.c                 | 1 -
 drivers/net/pcmcia/fmvj18x_cs.c               | 1 -
 drivers/net/pcmcia/nmclan_cs.c                | 2 --
 drivers/net/pcmcia/smc91c92_cs.c              | 1 -
 drivers/net/pcmcia/xirc2ps_cs.c               | 1 -
 drivers/net/pcnet32.c                         | 1 -
 drivers/net/qla3xxx.c                         | 2 --
 drivers/net/r8169.c                           | 1 -
 drivers/net/rionet.c                          | 1 -
 drivers/net/saa9730.c                         | 1 -
 drivers/net/sb1250-mac.c                      | 3 ---
 drivers/net/sc92031.c                         | 1 -
 drivers/net/seeq8005.c                        | 1 -
 drivers/net/sgiseeq.c                         | 1 -
 drivers/net/sis190.c                          | 1 -
 drivers/net/sis900.c                          | 3 ---
 drivers/net/sk98lin/skge.c                    | 2 --
 drivers/net/smc911x.c                         | 2 --
 drivers/net/smc9194.c                         | 1 -
 drivers/net/smc91x.c                          | 1 -
 drivers/net/sonic.c                           | 2 --
 drivers/net/spider_net.c                      | 1 -
 drivers/net/starfire.c                        | 1 -
 drivers/net/sun3_82586.c                      | 1 -
 drivers/net/sun3lance.c                       | 1 -
 drivers/net/sunbmac.c                         | 1 -
 drivers/net/sundance.c                        | 1 -
 drivers/net/sungem.c                          | 1 -
 drivers/net/sunhme.c                          | 1 -
 drivers/net/sunlance.c                        | 2 --
 drivers/net/sunqe.c                           | 1 -
 drivers/net/tc35815.c                         | 1 -
 drivers/net/tlan.c                            | 2 --
 drivers/net/tsi108_eth.c                      | 1 -
 drivers/net/tulip/de2104x.c                   | 1 -
 drivers/net/tulip/de4x5.c                     | 2 --
 drivers/net/tulip/dmfe.c                      | 6 ++----
 drivers/net/tulip/interrupt.c                 | 2 --
 drivers/net/tulip/uli526x.c                   | 6 ++----
 drivers/net/tulip/winbond-840.c               | 3 ---
 drivers/net/tulip/xircom_cb.c                 | 1 -
 drivers/net/tulip/xircom_tulip_cb.c           | 1 -
 drivers/net/tun.c                             | 2 +-
 drivers/net/typhoon.c                         | 1 -
 drivers/net/via-rhine.c                       | 1 -
 drivers/net/via-velocity.c                    | 3 +--
 drivers/net/wan/hdlc_fr.c                     | 1 -
 drivers/net/wan/sbni.c                        | 5 -----
 drivers/net/wireless/airo.c                   | 5 +----
 drivers/net/wireless/arlan-main.c             | 1 -
 drivers/net/wireless/atmel.c                  | 2 --
 drivers/net/wireless/hostap/hostap_80211_rx.c | 1 -
 drivers/net/wireless/netwave_cs.c             | 1 -
 drivers/net/wireless/orinoco.c                | 1 -
 drivers/net/wireless/prism54/islpci_eth.c     | 4 ----
 drivers/net/wireless/ray_cs.c                 | 1 -
 drivers/net/wireless/wavelan.c                | 2 --
 drivers/net/wireless/wavelan_cs.c             | 2 --
 drivers/net/wireless/zd1201.c                 | 2 --
 drivers/net/yellowfin.c                       | 1 -
 drivers/net/znet.c                            | 1 -
 drivers/usb/gadget/ether.c                    | 1 -
 drivers/usb/net/catc.c                        | 1 -
 drivers/usb/net/kaweth.c                      | 2 --
 drivers/usb/net/pegasus.c                     | 5 +++--
 drivers/usb/net/rtl8150.c                     | 1 -
 drivers/usb/net/usbnet.c                      | 1 -
 154 files changed, 13 insertions(+), 217 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index 59243d9aedd..7698feafa6a 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -523,7 +523,6 @@ static void c2_rx_interrupt(struct net_device *netdev)
 		skb->data += sizeof(*rxp_hdr);
 		skb->tail = skb->data + buflen;
 		skb->len = buflen;
-		skb->dev = netdev;
 		skb->protocol = eth_type_trans(skb, netdev);
 
 		netif_rx(skb);
diff --git a/drivers/isdn/hysdn/hysdn_net.c b/drivers/isdn/hysdn/hysdn_net.c
index 557d96c78a6..cfa8fa5e44a 100644
--- a/drivers/isdn/hysdn/hysdn_net.c
+++ b/drivers/isdn/hysdn/hysdn_net.c
@@ -214,8 +214,6 @@ hysdn_rx_netpkt(hysdn_card * card, unsigned char *buf, unsigned short len)
 		lp->stats.rx_dropped++;
 		return;
 	}
-	skb->dev = &lp->netdev;
-
 	/* copy the data */
 	memcpy(skb_put(skb, len), buf, len);
 
diff --git a/drivers/net/3c501.c b/drivers/net/3c501.c
index 06e33786078..4bee99ba7db 100644
--- a/drivers/net/3c501.c
+++ b/drivers/net/3c501.c
@@ -735,7 +735,6 @@ static void el_receive(struct net_device *dev)
 	else
 	{
     		skb_reserve(skb,2);	/* Force 16 byte alignment */
-		skb->dev = dev;
 		/*
 		 *	The read increments through the bytes. The interrupt
 		 *	handler will fix the pointer when it returns to
diff --git a/drivers/net/3c505.c b/drivers/net/3c505.c
index 702bfb2a5e9..c693b5a7950 100644
--- a/drivers/net/3c505.c
+++ b/drivers/net/3c505.c
@@ -615,7 +615,6 @@ static void receive_packet(struct net_device *dev, int len)
 	if (test_and_set_bit(0, (void *) &adapter->dmaing))
 		printk(KERN_ERR "%s: rx blocked, DMA in progress, dir %d\n", dev->name, adapter->current_dma.direction);
 
-	skb->dev = dev;
 	adapter->current_dma.direction = 0;
 	adapter->current_dma.length = rlen;
 	adapter->current_dma.skb = skb;
diff --git a/drivers/net/3c507.c b/drivers/net/3c507.c
index 54e1d5aebed..eed4299dc42 100644
--- a/drivers/net/3c507.c
+++ b/drivers/net/3c507.c
@@ -873,7 +873,6 @@ static void el16_rx(struct net_device *dev)
 			}
 
 			skb_reserve(skb,2);
-			skb->dev = dev;
 
 			/* 'skb->data' points to the start of sk_buff data area. */
 			memcpy_fromio(skb_put(skb,pkt_len), data_frame + 10, pkt_len);
diff --git a/drivers/net/3c509.c b/drivers/net/3c509.c
index f791bf026e5..c7511c4d3b6 100644
--- a/drivers/net/3c509.c
+++ b/drivers/net/3c509.c
@@ -1091,7 +1091,6 @@ el3_rx(struct net_device *dev)
 				printk("Receiving packet size %d status %4.4x.\n",
 					   pkt_len, rx_status);
 			if (skb != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);     /* Align IP on 16 byte */
 
 				/* 'skb->data' points to the start of sk_buff data area. */
diff --git a/drivers/net/3c515.c b/drivers/net/3c515.c
index c307ce66145..290166d5e7d 100644
--- a/drivers/net/3c515.c
+++ b/drivers/net/3c515.c
@@ -1292,7 +1292,6 @@ static int corkscrew_rx(struct net_device *dev)
 				printk("Receiving packet size %d status %4.4x.\n",
 				     pkt_len, rx_status);
 			if (skb != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 				/* 'skb_put()' points to the start of sk_buff data area. */
 				insl(ioaddr + RX_FIFO,
@@ -1363,7 +1362,6 @@ static int boomerang_rx(struct net_device *dev)
 			   copying to a properly sized skbuff. */
 			if (pkt_len < rx_copybreak
 			    && (skb = dev_alloc_skb(pkt_len + 4)) != 0) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 				/* 'skb_put()' points to the start of sk_buff data area. */
 				memcpy(skb_put(skb, pkt_len),
diff --git a/drivers/net/3c523.c b/drivers/net/3c523.c
index 17d61eb0a7e..6b2036df685 100644
--- a/drivers/net/3c523.c
+++ b/drivers/net/3c523.c
@@ -988,7 +988,6 @@ static void elmc_rcv_int(struct net_device *dev)
 				rbd->status = 0;
 				skb = (struct sk_buff *) dev_alloc_skb(totlen + 2);
 				if (skb != NULL) {
-					skb->dev = dev;
 					skb_reserve(skb, 2);	/* 16 byte alignment */
 					skb_put(skb,totlen);
 					eth_copy_and_sum(skb, (char *) p->base+(unsigned long) rbd->buffer,totlen,0);
diff --git a/drivers/net/3c527.c b/drivers/net/3c527.c
index 6c7437e60bd..c7b571be20e 100644
--- a/drivers/net/3c527.c
+++ b/drivers/net/3c527.c
@@ -1189,7 +1189,6 @@ static void mc32_rx_ring(struct net_device *dev)
 			}
 
 			skb->protocol=eth_type_trans(skb,dev);
-			skb->dev=dev;
 			dev->last_rx = jiffies;
  			lp->net_stats.rx_packets++;
  			lp->net_stats.rx_bytes += length;
diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index b406ecfa726..80924f76dee 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -2414,7 +2414,6 @@ static int vortex_rx(struct net_device *dev)
 				printk(KERN_DEBUG "Receiving packet size %d status %4.4x.\n",
 					   pkt_len, rx_status);
 			if (skb != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 				/* 'skb_put()' points to the start of sk_buff data area. */
 				if (vp->bus_master &&
@@ -2491,7 +2490,6 @@ boomerang_rx(struct net_device *dev)
 			/* Check if the packet is long enough to just accept without
 			   copying to a properly sized skbuff. */
 			if (pkt_len < rx_copybreak && (skb = dev_alloc_skb(pkt_len + 2)) != 0) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 				pci_dma_sync_single_for_cpu(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
 				/* 'skb_put()' points to the start of sk_buff data area. */
diff --git a/drivers/net/7990.c b/drivers/net/7990.c
index 1b3d11ed6cf..c50264aea16 100644
--- a/drivers/net/7990.c
+++ b/drivers/net/7990.c
@@ -331,7 +331,6 @@ static int lance_rx (struct net_device *dev)
                                 return 0;
                         }
 
-                        skb->dev = dev;
                         skb_reserve (skb, 2);           /* 16 byte align */
                         skb_put (skb, len);             /* make room */
                         eth_copy_and_sum(skb,
diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c
index 12c8453f44b..2f704cb06e7 100644
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -573,7 +573,6 @@ rx_status_loop:
 		}
 
 		skb_reserve(new_skb, RX_OFFSET);
-		new_skb->dev = dev;
 
 		pci_unmap_single(cp->pdev, mapping,
 				 buflen, PCI_DMA_FROMDEVICE);
@@ -1082,7 +1081,6 @@ static int cp_refill_rx (struct cp_private *cp)
 		if (!skb)
 			goto err_out;
 
-		skb->dev = cp->dev;
 		skb_reserve(skb, RX_OFFSET);
 
 		mapping = pci_map_single(cp->pdev, skb->data, cp->rx_buf_sz,
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c
index 99304b2aa86..2101334a8ac 100644
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -2013,7 +2013,6 @@ no_early_rx:
 
 		skb = dev_alloc_skb (pkt_size + 2);
 		if (likely(skb)) {
-			skb->dev = dev;
 			skb_reserve (skb, 2);	/* 16 byte align the IP fields. */
 #if RX_BUF_IDX == 3
 			wrap_copy(skb, rx_ring, ring_offset+4, pkt_size);
diff --git a/drivers/net/82596.c b/drivers/net/82596.c
index 640d7ca2ebc..3ff1155459a 100644
--- a/drivers/net/82596.c
+++ b/drivers/net/82596.c
@@ -830,7 +830,6 @@ memory_squeeze:
 				lp->stats.rx_dropped++;
 			}
 			else {
-				skb->dev = dev;
 				if (!rx_in_place) {
 					/* 16 byte align the data fields */
 					skb_reserve(skb, 2);
diff --git a/drivers/net/a2065.c b/drivers/net/a2065.c
index d76548e7535..b38fc65005e 100644
--- a/drivers/net/a2065.c
+++ b/drivers/net/a2065.c
@@ -320,7 +320,6 @@ static int lance_rx (struct net_device *dev)
 				return 0;
 			}
 
-			skb->dev = dev;
 			skb_reserve (skb, 2);		/* 16 byte align */
 			skb_put (skb, len);		/* make room */
 			eth_copy_and_sum(skb,
diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c
index 7138e0e025b..7122b7ba8d6 100644
--- a/drivers/net/acenic.c
+++ b/drivers/net/acenic.c
@@ -2027,7 +2027,6 @@ static void ace_rx_int(struct net_device *dev, u32 rxretprd, u32 rxretcsm)
 		 */
 		csum = retdesc->tcp_udp_csum;
 
-		skb->dev = dev;
 		skb->protocol = eth_type_trans(skb, dev);
 
 		/*
diff --git a/drivers/net/amd8111e.c b/drivers/net/amd8111e.c
index 962c954c2d5..675fe918421 100644
--- a/drivers/net/amd8111e.c
+++ b/drivers/net/amd8111e.c
@@ -798,9 +798,7 @@ static int amd8111e_rx_poll(struct net_device *dev, int * budget)
 			pci_unmap_single(lp->pci_dev,lp->rx_dma_addr[rx_index],
 					 lp->rx_buff_len-2, PCI_DMA_FROMDEVICE);
 			skb_put(skb, pkt_len);
-			skb->dev = dev;
 			lp->rx_skbuff[rx_index] = new_skb;
-			new_skb->dev = dev;
 			lp->rx_dma_addr[rx_index] = pci_map_single(lp->pci_dev,
 								   new_skb->data,
 								   lp->rx_buff_len-2,
@@ -926,9 +924,7 @@ static int amd8111e_rx(struct net_device *dev)
 		pci_unmap_single(lp->pci_dev,lp->rx_dma_addr[rx_index],
 			lp->rx_buff_len-2, PCI_DMA_FROMDEVICE);
 		skb_put(skb, pkt_len);
-		skb->dev = dev;
 		lp->rx_skbuff[rx_index] = new_skb;
-		new_skb->dev = dev;
 		lp->rx_dma_addr[rx_index] = pci_map_single(lp->pci_dev,
 			new_skb->data, lp->rx_buff_len-2,PCI_DMA_FROMDEVICE);
 
diff --git a/drivers/net/ariadne.c b/drivers/net/ariadne.c
index 9dfc09b181c..a0e68e71853 100644
--- a/drivers/net/ariadne.c
+++ b/drivers/net/ariadne.c
@@ -743,7 +743,6 @@ static int ariadne_rx(struct net_device *dev)
 	    }
 
 
-	    skb->dev = dev;
 	    skb_reserve(skb,2);		/* 16 byte align */
 	    skb_put(skb,pkt_len);	/* Make room */
 	    eth_copy_and_sum(skb, (char *)priv->rx_buff[entry], pkt_len,0);
diff --git a/drivers/net/arm/am79c961a.c b/drivers/net/arm/am79c961a.c
index ddd12d44ff2..8f0d7ce503c 100644
--- a/drivers/net/arm/am79c961a.c
+++ b/drivers/net/arm/am79c961a.c
@@ -526,7 +526,6 @@ am79c961_rx(struct net_device *dev, struct dev_priv *priv)
 		skb = dev_alloc_skb(len + 2);
 
 		if (skb) {
-			skb->dev = dev;
 			skb_reserve(skb, 2);
 
 			am_readbuffer(dev, pktaddr, skb_put(skb, len), len);
diff --git a/drivers/net/arm/at91_ether.c b/drivers/net/arm/at91_ether.c
index 1621b8fe35c..152fa7a042b 100644
--- a/drivers/net/arm/at91_ether.c
+++ b/drivers/net/arm/at91_ether.c
@@ -858,7 +858,6 @@ static void at91ether_rx(struct net_device *dev)
 			skb_reserve(skb, 2);
 			memcpy(skb_put(skb, pktlen), p_recv, pktlen);
 
-			skb->dev = dev;
 			skb->protocol = eth_type_trans(skb, dev);
 			dev->last_rx = jiffies;
 			lp->stats.rx_bytes += pktlen;
diff --git a/drivers/net/arm/ep93xx_eth.c b/drivers/net/arm/ep93xx_eth.c
index dd698b033a6..2438c5bff23 100644
--- a/drivers/net/arm/ep93xx_eth.c
+++ b/drivers/net/arm/ep93xx_eth.c
@@ -255,7 +255,6 @@ static int ep93xx_rx(struct net_device *dev, int *budget)
 
 		skb = dev_alloc_skb(length + 2);
 		if (likely(skb != NULL)) {
-			skb->dev = dev;
 			skb_reserve(skb, 2);
 			dma_sync_single(NULL, ep->descs->rdesc[entry].buf_addr,
 						length, DMA_FROM_DEVICE);
diff --git a/drivers/net/arm/ether1.c b/drivers/net/arm/ether1.c
index a2921882eba..f075cebe84a 100644
--- a/drivers/net/arm/ether1.c
+++ b/drivers/net/arm/ether1.c
@@ -875,7 +875,6 @@ ether1_recv_done (struct net_device *dev)
 			skb = dev_alloc_skb (length + 2);
 
 			if (skb) {
-				skb->dev = dev;
 				skb_reserve (skb, 2);
 
 				ether1_readbuffer (dev, skb_put (skb, length), rbd.rbd_bufl, length);
diff --git a/drivers/net/arm/ether3.c b/drivers/net/arm/ether3.c
index 841178343a0..32da2eb9bce 100644
--- a/drivers/net/arm/ether3.c
+++ b/drivers/net/arm/ether3.c
@@ -661,7 +661,6 @@ if (next_ptr < RX_START || next_ptr >= RX_END) {
 			if (skb) {
 				unsigned char *buf;
 
-				skb->dev = dev;
 				skb_reserve(skb, 2);
 				buf = skb_put(skb, length);
 				ether3_readbuffer(dev, buf + 12, length - 12);
diff --git a/drivers/net/at1700.c b/drivers/net/at1700.c
index 56ae8babd91..bed8e0ebaf1 100644
--- a/drivers/net/at1700.c
+++ b/drivers/net/at1700.c
@@ -768,7 +768,6 @@ net_rx(struct net_device *dev)
 				lp->stats.rx_dropped++;
 				break;
 			}
-			skb->dev = dev;
 			skb_reserve(skb,2);
 
 			insw(ioaddr + DATAPORT, skb_put(skb,pkt_len), (pkt_len + 1) >> 1);
diff --git a/drivers/net/atari_bionet.c b/drivers/net/atari_bionet.c
index 4e3bf6a1f22..f52e7f22f63 100644
--- a/drivers/net/atari_bionet.c
+++ b/drivers/net/atari_bionet.c
@@ -544,7 +544,6 @@ bionet_poll_rx(struct net_device *dev) {
 				break;
 			}
 
-			skb->dev = dev;
 			skb_reserve( skb, 2 );		/* 16 Byte align  */
 			skb_put( skb, pkt_len );	/* make room */
 
diff --git a/drivers/net/atarilance.c b/drivers/net/atarilance.c
index 7e37ac86a69..dfa8b9ba4c8 100644
--- a/drivers/net/atarilance.c
+++ b/drivers/net/atarilance.c
@@ -1047,7 +1047,6 @@ static int lance_rx( struct net_device *dev )
 						   pkt_len );
 				}
 
-				skb->dev = dev;
 				skb_reserve( skb, 2 );	/* 16 byte align */
 				skb_put( skb, pkt_len );	/* Make room */
 				lp->memcpy_f( skb->data, PKTBUF_ADDR(head), pkt_len );
diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c
index 8606eac5bec..e3f181602e4 100644
--- a/drivers/net/atl1/atl1_main.c
+++ b/drivers/net/atl1/atl1_main.c
@@ -408,7 +408,6 @@ static void atl1_rx_checksum(struct atl1_adapter *adapter,
 static u16 atl1_alloc_rx_buffers(struct atl1_adapter *adapter)
 {
 	struct atl1_rfd_ring *rfd_ring = &adapter->rfd_ring;
-	struct net_device *netdev = adapter->netdev;
 	struct pci_dev *pdev = adapter->pdev;
 	struct page *page;
 	unsigned long offset;
@@ -444,7 +443,6 @@ static u16 atl1_alloc_rx_buffers(struct atl1_adapter *adapter)
 		 * the 14 byte MAC header is removed
 		 */
 		skb_reserve(skb, NET_IP_ALIGN);
-		skb->dev = netdev;
 
 		buffer_info->alloced = 1;
 		buffer_info->skb = skb;
diff --git a/drivers/net/atp.c b/drivers/net/atp.c
index 2d306fcb7f3..18aba838c1f 100644
--- a/drivers/net/atp.c
+++ b/drivers/net/atp.c
@@ -793,7 +793,6 @@ static void net_rx(struct net_device *dev)
 			lp->stats.rx_dropped++;
 			goto done;
 		}
-		skb->dev = dev;
 
 		skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 		read_block(ioaddr, pkt_len, skb_put(skb,pkt_len), dev->if_port);
diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c
index 69ae229b680..97b55f2546c 100644
--- a/drivers/net/au1000_eth.c
+++ b/drivers/net/au1000_eth.c
@@ -1205,7 +1205,6 @@ static int au1000_rx(struct net_device *dev)
 				aup->stats.rx_dropped++;
 				continue;
 			}
-			skb->dev = dev;
 			skb_reserve(skb, 2);	/* 16 byte IP header align */
 			eth_copy_and_sum(skb,
 				(unsigned char *)pDB->vaddr, frmlen, 0);
diff --git a/drivers/net/b44.c b/drivers/net/b44.c
index d742bfe2447..f67d97de97f 100644
--- a/drivers/net/b44.c
+++ b/drivers/net/b44.c
@@ -825,7 +825,6 @@ static int b44_rx(struct b44 *bp, int budget)
 			if (copy_skb == NULL)
 				goto drop_it_no_recycle;
 
-			copy_skb->dev = bp->dev;
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			/* DMA sync done above, copy just the actual packet */
diff --git a/drivers/net/bmac.c b/drivers/net/bmac.c
index c143304dcff..4612725965d 100644
--- a/drivers/net/bmac.c
+++ b/drivers/net/bmac.c
@@ -715,7 +715,6 @@ static irqreturn_t bmac_rxdma_intr(int irq, void *dev_id)
 		if (skb != NULL) {
 			nb -= ETHERCRC;
 			skb_put(skb, nb);
-			skb->dev = dev;
 			skb->protocol = eth_type_trans(skb, dev);
 			netif_rx(skb);
 			dev->last_rx = jiffies;
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index c8126484c2b..68e37a655fe 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -1995,7 +1995,6 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
 		return -1;
 
 	*skbref = skb;
-	skb->dev = cp->dev;
 	skb_reserve(skb, swivel);
 
 	p = skb->data;
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 326d4a66512..47fa8dcf752 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1379,12 +1379,11 @@ static void sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len)
 	}
 	__skb_pull(skb, sizeof(*p));
 
-	skb->dev = adapter->port[p->iff].dev;
 	skb->dev->last_rx = jiffies;
 	st = per_cpu_ptr(sge->port_stats[p->iff], smp_processor_id());
 	st->rx_packets++;
 
-	skb->protocol = eth_type_trans(skb, skb->dev);
+	skb->protocol = eth_type_trans(skb, adapter->port[p->iff].dev);
 	if ((adapter->flags & RX_CSUM_ENABLED) && p->csum == 0xffff &&
 	    skb->protocol == htons(ETH_P_IP) &&
 	    (skb->data[9] == IPPROTO_TCP || skb->data[9] == IPPROTO_UDP)) {
diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c
index 8eb57127600..98643801a3b 100644
--- a/drivers/net/cris/eth_v10.c
+++ b/drivers/net/cris/eth_v10.c
@@ -1375,7 +1375,6 @@ e100_rx(struct net_device *dev)
 		myNextRxDesc->descr.buf = L1_CACHE_ALIGN(virt_to_phys(myNextRxDesc->skb->data));
 	}
 
-	skb->dev = dev;
 	skb->protocol = eth_type_trans(skb, dev);
 
 	/* Send the packet to the upper layers */
diff --git a/drivers/net/cs89x0.c b/drivers/net/cs89x0.c
index 4612f71a710..9774bb1b3e8 100644
--- a/drivers/net/cs89x0.c
+++ b/drivers/net/cs89x0.c
@@ -1004,7 +1004,6 @@ skip_this_frame:
 		return;
 	}
 	skb_reserve(skb, 2);	/* longword align L3 header */
-	skb->dev = dev;
 
 	if (bp + length > lp->end_dma_buff) {
 		int semi_cnt = lp->end_dma_buff - bp;
@@ -1702,7 +1701,6 @@ net_rx(struct net_device *dev)
 		return;
 	}
 	skb_reserve(skb, 2);	/* longword align L3 header */
-	skb->dev = dev;
 
 	readwords(ioaddr, RX_FRAME_PORT, skb_put(skb, length), length >> 1);
 	if (length & 1)
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 027ab2c3825..8946f7aa97c 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -1684,9 +1684,8 @@ static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
 	struct port_info *pi;
 
 	skb_pull(skb, sizeof(*p) + pad);
-	skb->dev = adap->port[p->iff];
 	skb->dev->last_rx = jiffies;
-	skb->protocol = eth_type_trans(skb, skb->dev);
+	skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
 	pi = netdev_priv(skb->dev);
 	if (pi->rx_csum_offload && p->csum_valid && p->csum == 0xffff &&
 	    !p->fragment) {
diff --git a/drivers/net/de600.c b/drivers/net/de600.c
index e547ce14eef..dae97b860da 100644
--- a/drivers/net/de600.c
+++ b/drivers/net/de600.c
@@ -359,7 +359,6 @@ static void de600_rx_intr(struct net_device *dev)
 	}
 	/* else */
 
-	skb->dev = dev;
 	skb_reserve(skb,2);	/* Align */
 
 	/* 'skb->data' points to the start of sk_buff data area. */
diff --git a/drivers/net/de620.c b/drivers/net/de620.c
index b6ad0cb5055..dc489242617 100644
--- a/drivers/net/de620.c
+++ b/drivers/net/de620.c
@@ -697,7 +697,6 @@ static int de620_rx_intr(struct net_device *dev)
 		}
 		else { /* Yep! Go get it! */
 			skb_reserve(skb,2);	/* Align */
-			skb->dev = dev;
 			/* skb->data points to the start of sk_buff data area */
 			buffer = skb_put(skb,size);
 			/* copy the packet into the buffer */
diff --git a/drivers/net/declance.c b/drivers/net/declance.c
index 9f7e1db8ce6..95d854e2295 100644
--- a/drivers/net/declance.c
+++ b/drivers/net/declance.c
@@ -616,7 +616,6 @@ static int lance_rx(struct net_device *dev)
 			}
 			lp->stats.rx_bytes += len;
 
-			skb->dev = dev;
 			skb_reserve(skb, 2);	/* 16 byte align */
 			skb_put(skb, len);	/* make room */
 
diff --git a/drivers/net/depca.c b/drivers/net/depca.c
index f3807aaf10a..183497020bf 100644
--- a/drivers/net/depca.c
+++ b/drivers/net/depca.c
@@ -1044,7 +1044,6 @@ static int depca_rx(struct net_device *dev)
 					unsigned char *buf;
 					skb_reserve(skb, 2);	/* 16 byte align the IP header */
 					buf = skb_put(skb, pkt_len);
-					skb->dev = dev;
 					if (entry < lp->rx_old) {	/* Wrapped buffer */
 						len = (lp->rxRingMask - lp->rx_old + 1) * RX_BUFF_SZ;
 						memcpy_fromio(buf, lp->rx_buff[lp->rx_old], len);
diff --git a/drivers/net/dgrs.c b/drivers/net/dgrs.c
index a79520295fd..d223c38966f 100644
--- a/drivers/net/dgrs.c
+++ b/drivers/net/dgrs.c
@@ -503,7 +503,6 @@ dgrs_rcv_frame(
 		/* discarding the frame */
 		goto out;
 	}
-	skb->dev = devN;
 	skb_reserve(skb, 2);	/* Align IP header */
 
 again:
diff --git a/drivers/net/dl2k.c b/drivers/net/dl2k.c
index 9d446a0fe0b..74ec64a1625 100644
--- a/drivers/net/dl2k.c
+++ b/drivers/net/dl2k.c
@@ -504,7 +504,6 @@ rio_timer (unsigned long data)
 					break;
 				}
 				np->rx_skbuff[entry] = skb;
-				skb->dev = dev;
 				/* 16 byte align the IP header */
 				skb_reserve (skb, 2);
 				np->rx_ring[entry].fraginfo =
@@ -575,7 +574,6 @@ alloc_list (struct net_device *dev)
 				dev->name);
 			break;
 		}
-		skb->dev = dev;	/* Mark as being used by this device. */
 		skb_reserve (skb, 2);	/* 16 byte align the IP header. */
 		/* Rubicon now supports 40 bits of addressing space. */
 		np->rx_ring[i].fraginfo =
@@ -866,7 +864,6 @@ receive_packet (struct net_device *dev)
 							    	DMA_48BIT_MASK,
 							    np->rx_buf_sz,
 							    PCI_DMA_FROMDEVICE);
-				skb->dev = dev;
 				/* 16 byte align the IP header */
 				skb_reserve (skb, 2);
 				eth_copy_and_sum (skb,
@@ -910,7 +907,6 @@ receive_packet (struct net_device *dev)
 				break;
 			}
 			np->rx_skbuff[entry] = skb;
-			skb->dev = dev;
 			/* 16 byte align the IP header */
 			skb_reserve (skb, 2);
 			np->rx_ring[entry].fraginfo =
diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c
index 615d2b14efa..8cc1174e7f6 100644
--- a/drivers/net/dm9000.c
+++ b/drivers/net/dm9000.c
@@ -954,7 +954,6 @@ dm9000_rx(struct net_device *dev)
 		/* Move data from DM9000 */
 		if (GoodPacket
 		    && ((skb = dev_alloc_skb(RxLen + 4)) != NULL)) {
-			skb->dev = dev;
 			skb_reserve(skb, 2);
 			rdptr = (u8 *) skb_put(skb, RxLen - 4);
 
diff --git a/drivers/net/eepro.c b/drivers/net/eepro.c
index b4463094c93..39654e1e2be 100644
--- a/drivers/net/eepro.c
+++ b/drivers/net/eepro.c
@@ -1591,7 +1591,6 @@ eepro_rx(struct net_device *dev)
 
 				break;
 			}
-			skb->dev = dev;
 			skb_reserve(skb,2);
 
 			if (lp->version == LAN595)
diff --git a/drivers/net/eepro100.c b/drivers/net/eepro100.c
index e28bb1e38f8..db658bc491a 100644
--- a/drivers/net/eepro100.c
+++ b/drivers/net/eepro100.c
@@ -1793,7 +1793,6 @@ speedo_rx(struct net_device *dev)
 			   copying to a properly sized skbuff. */
 			if (pkt_len < rx_copybreak
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != 0) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 				/* 'skb_put()' points to the start of sk_buff data area. */
 				pci_dma_sync_single_for_cpu(sp->pdev, sp->rx_ring_dma[entry],
diff --git a/drivers/net/eexpress.c b/drivers/net/eexpress.c
index 3868b803126..8aaf5ec0c36 100644
--- a/drivers/net/eexpress.c
+++ b/drivers/net/eexpress.c
@@ -976,7 +976,6 @@ static void eexp_hw_rx_pio(struct net_device *dev)
 					lp->stats.rx_dropped++;
 					break;
 				}
-				skb->dev = dev;
 				skb_reserve(skb, 2);
 				outw(pbuf+10, ioaddr+READ_PTR);
 			        insw(ioaddr+DATAPORT, skb_put(skb,pkt_len),(pkt_len+1)>>1);
diff --git a/drivers/net/epic100.c b/drivers/net/epic100.c
index 3a6a83d3ee1..4e3f14c9c71 100644
--- a/drivers/net/epic100.c
+++ b/drivers/net/epic100.c
@@ -934,7 +934,6 @@ static void epic_init_ring(struct net_device *dev)
 		ep->rx_skbuff[i] = skb;
 		if (skb == NULL)
 			break;
-		skb->dev = dev;			/* Mark as being used by this device. */
 		skb_reserve(skb, 2);	/* 16 byte align the IP header. */
 		ep->rx_ring[i].bufaddr = pci_map_single(ep->pci_dev,
 			skb->data, ep->rx_buf_sz, PCI_DMA_FROMDEVICE);
@@ -1199,7 +1198,6 @@ static int epic_rx(struct net_device *dev, int budget)
 			   to a minimally-sized skbuff. */
 			if (pkt_len < rx_copybreak
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				pci_dma_sync_single_for_cpu(ep->pci_dev,
 							    ep->rx_ring[entry].bufaddr,
@@ -1236,7 +1234,6 @@ static int epic_rx(struct net_device *dev, int budget)
 			skb = ep->rx_skbuff[entry] = dev_alloc_skb(ep->rx_buf_sz);
 			if (skb == NULL)
 				break;
-			skb->dev = dev;			/* Mark as being used by this device. */
 			skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 			ep->rx_ring[entry].bufaddr = pci_map_single(ep->pci_dev,
 				skb->data, ep->rx_buf_sz, PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/eth16i.c b/drivers/net/eth16i.c
index 93283e386f3..04abf59e500 100644
--- a/drivers/net/eth16i.c
+++ b/drivers/net/eth16i.c
@@ -1175,7 +1175,6 @@ static void eth16i_rx(struct net_device *dev)
 				break;
 			}
 
-			skb->dev = dev;
 			skb_reserve(skb,2);
 
 			/*
diff --git a/drivers/net/ewrk3.c b/drivers/net/ewrk3.c
index 714ea1176ec..cb0792c187b 100644
--- a/drivers/net/ewrk3.c
+++ b/drivers/net/ewrk3.c
@@ -993,7 +993,6 @@ static int ewrk3_rx(struct net_device *dev)
 
 					if ((skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
 						unsigned char *p;
-						skb->dev = dev;
 						skb_reserve(skb, 2);	/* Align to 16 bytes */
 						p = skb_put(skb, pkt_len);
 
diff --git a/drivers/net/fealnx.c b/drivers/net/fealnx.c
index 38a13f44053..abe9b089c61 100644
--- a/drivers/net/fealnx.c
+++ b/drivers/net/fealnx.c
@@ -1719,7 +1719,6 @@ static int netdev_rx(struct net_device *dev)
 			   to a minimally-sized skbuff. */
 			if (pkt_len < rx_copybreak &&
 			    (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				pci_dma_sync_single_for_cpu(np->pci_dev,
 							    np->cur_rx->buffer,
diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 6764281b453..255b09124e1 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -647,7 +647,6 @@ while (!((status = bdp->cbd_sc) & BD_ENET_RX_EMPTY)) {
 		printk("%s: Memory squeeze, dropping packet.\n", dev->name);
 		fep->stats.rx_dropped++;
 	} else {
-		skb->dev = dev;
 		skb_put(skb,pkt_len-4);	/* Make room */
 		eth_copy_and_sum(skb, data, pkt_len-4, 0);
 		skb->protocol=eth_type_trans(skb,dev);
diff --git a/drivers/net/fec_8xx/fec_main.c b/drivers/net/fec_8xx/fec_main.c
index 77f747a5afa..698dba8f2aa 100644
--- a/drivers/net/fec_8xx/fec_main.c
+++ b/drivers/net/fec_8xx/fec_main.c
@@ -561,7 +561,6 @@ static int fec_enet_rx_common(struct net_device *dev, int *budget)
 				skbn = dev_alloc_skb(ENET_RX_FRSIZE);
 
 			if (skbn != NULL) {
-				skb->dev = dev;
 				skb_put(skb, pkt_len);	/* Make room */
 				skb->protocol = eth_type_trans(skb, dev);
 				received++;
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index d04214e4e58..d5d458c3421 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -1385,7 +1385,6 @@ static int nv_alloc_rx(struct net_device *dev)
 	while (np->put_rx.orig != less_rx) {
 		struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD);
 		if (skb) {
-			skb->dev = dev;
 			np->put_rx_ctx->skb = skb;
 			np->put_rx_ctx->dma = pci_map_single(np->pci_dev, skb->data,
 							     skb->end-skb->data, PCI_DMA_FROMDEVICE);
@@ -1416,7 +1415,6 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
 	while (np->put_rx.ex != less_rx) {
 		struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD);
 		if (skb) {
-			skb->dev = dev;
 			np->put_rx_ctx->skb = skb;
 			np->put_rx_ctx->dma = pci_map_single(np->pci_dev, skb->data,
 							     skb->end-skb->data, PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c
index 4a05c14bf7e..9f6ef315ce5 100644
--- a/drivers/net/fs_enet/fs_enet-main.c
+++ b/drivers/net/fs_enet/fs_enet-main.c
@@ -170,7 +170,6 @@ static int fs_enet_rx_napi(struct net_device *dev, int *budget)
 				skbn = dev_alloc_skb(ENET_RX_FRSIZE);
 
 			if (skbn != NULL) {
-				skb->dev = dev;
 				skb_put(skb, pkt_len);	/* Make room */
 				skb->protocol = eth_type_trans(skb, dev);
 				received++;
@@ -304,7 +303,6 @@ static int fs_enet_rx_non_napi(struct net_device *dev)
 				skbn = dev_alloc_skb(ENET_RX_FRSIZE);
 
 			if (skbn != NULL) {
-				skb->dev = dev;
 				skb_put(skb, pkt_len);	/* Make room */
 				skb->protocol = eth_type_trans(skb, dev);
 				received++;
@@ -516,7 +514,6 @@ void fs_init_bds(struct net_device *dev)
 			break;
 		}
 		fep->rx_skbuff[i] = skb;
-		skb->dev = dev;
 		CBDW_BUFADDR(bdp,
 			dma_map_single(fep->dev, skb->data,
 				L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index d981d4c41dd..1d019195a39 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -1295,8 +1295,6 @@ struct sk_buff * gfar_new_skb(struct net_device *dev, struct rxbd8 *bdp)
 	 */
 	skb_reserve(skb, alignamount);
 
-	skb->dev = dev;
-
 	bdp->bufPtr = dma_map_single(NULL, skb->data,
 			priv->rx_buffer_size, DMA_FROM_DEVICE);
 
diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c
index c3c0d67fc38..2521b111b3a 100644
--- a/drivers/net/hamachi.c
+++ b/drivers/net/hamachi.c
@@ -1568,7 +1568,6 @@ static int hamachi_rx(struct net_device *dev)
 				printk(KERN_ERR "%s: rx_copybreak non-zero "
 				  "not good with RX_CHECKSUM\n", dev->name);
 #endif
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				pci_dma_sync_single_for_cpu(hmp->pci_dev,
 							    hmp->rx_ring[entry].addr,
diff --git a/drivers/net/hp100.c b/drivers/net/hp100.c
index 7dc5185aa2c..8118a6750b6 100644
--- a/drivers/net/hp100.c
+++ b/drivers/net/hp100.c
@@ -1816,7 +1816,6 @@ static void hp100_rx(struct net_device *dev)
 			u_char *ptr;
 
 			skb_reserve(skb,2);
-			skb->dev = dev;
 
 			/* ptr to start of the sk_buff data area */
 			skb_put(skb, pkt_len);
diff --git a/drivers/net/ibm_emac/ibm_emac_core.c b/drivers/net/ibm_emac/ibm_emac_core.c
index dd8ad874682..b1ad62d89eb 100644
--- a/drivers/net/ibm_emac/ibm_emac_core.c
+++ b/drivers/net/ibm_emac/ibm_emac_core.c
@@ -1398,7 +1398,6 @@ static int emac_poll_rx(void *param, int budget)
 
 		skb_put(skb, len);
 	      push_packet:
-		skb->dev = dev->ndev;
 		skb->protocol = eth_type_trans(skb, dev->ndev);
 		emac_rx_csum(dev, skb, ctrl);
 
diff --git a/drivers/net/ibmlana.c b/drivers/net/ibmlana.c
index 3f946c81151..fe85d6fcba3 100644
--- a/drivers/net/ibmlana.c
+++ b/drivers/net/ibmlana.c
@@ -601,7 +601,6 @@ static void irqrx_handler(struct net_device *dev)
 
 				/* set up skb fields */
 
-				skb->dev = dev;
 				skb->protocol = eth_type_trans(skb, dev);
 				skb->ip_summed = CHECKSUM_NONE;
 
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index 458db0538a9..0573fcfcb2c 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -798,7 +798,6 @@ static int ibmveth_poll(struct net_device *netdev, int *budget)
 
 				skb_reserve(skb, offset);
 				skb_put(skb, length);
-				skb->dev = netdev;
 				skb->protocol = eth_type_trans(skb, netdev);
 
 				netif_receive_skb(skb);	/* send it up */
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c
index 4ad780719a8..ea07aa5ba51 100644
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -633,8 +633,6 @@ static inline void ioc3_rx(struct ioc3_private *ip)
 
 			ip->rx_skbs[rx_entry] = NULL;	/* Poison  */
 
-			new_skb->dev = priv_netdev(ip);
-
 			/* Because we reserve afterwards. */
 			skb_put(new_skb, (1664 + RX_OFFSET));
 			rxb = (struct ioc3_erxbuf *) new_skb->data;
@@ -940,7 +938,6 @@ static void ioc3_alloc_rings(struct net_device *dev)
 			}
 
 			ip->rx_skbs[i] = skb;
-			skb->dev = dev;
 
 			/* Because we reserve afterwards. */
 			skb_put(skb, (1664 + RX_OFFSET));
diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c
index 0e9ba3c3faf..347d50cd77d 100644
--- a/drivers/net/iseries_veth.c
+++ b/drivers/net/iseries_veth.c
@@ -1540,7 +1540,6 @@ static void veth_receive(struct veth_lpar_connection *cnx,
 		}
 
 		skb_put(skb, length);
-		skb->dev = dev;
 		skb->protocol = eth_type_trans(skb, dev);
 		skb->ip_summed = CHECKSUM_NONE;
 		netif_rx(skb);	/* send it up */
diff --git a/drivers/net/ixp2000/ixpdev.c b/drivers/net/ixp2000/ixpdev.c
index a4eccb11d67..6683afc02aa 100644
--- a/drivers/net/ixp2000/ixpdev.c
+++ b/drivers/net/ixp2000/ixpdev.c
@@ -110,11 +110,10 @@ static int ixpdev_rx(struct net_device *dev, int *budget)
 
 		skb = dev_alloc_skb(desc->pkt_length + 2);
 		if (likely(skb != NULL)) {
-			skb->dev = nds[desc->channel];
 			skb_reserve(skb, 2);
 			eth_copy_and_sum(skb, buf, desc->pkt_length, 0);
 			skb_put(skb, desc->pkt_length);
-			skb->protocol = eth_type_trans(skb, skb->dev);
+			skb->protocol = eth_type_trans(skb, nds[desc->channel]);
 
 			skb->dev->last_rx = jiffies;
 
diff --git a/drivers/net/lance.c b/drivers/net/lance.c
index a3843320dbe..11cbcb946db 100644
--- a/drivers/net/lance.c
+++ b/drivers/net/lance.c
@@ -1184,7 +1184,6 @@ lance_rx(struct net_device *dev)
 					}
 					break;
 				}
-				skb->dev = dev;
 				skb_reserve(skb,2);	/* 16 byte align */
 				skb_put(skb,pkt_len);	/* Make room */
 				eth_copy_and_sum(skb,
diff --git a/drivers/net/lasi_82596.c b/drivers/net/lasi_82596.c
index 452863d5d49..0edcd125fd6 100644
--- a/drivers/net/lasi_82596.c
+++ b/drivers/net/lasi_82596.c
@@ -801,7 +801,6 @@ memory_squeeze:
 				lp->stats.rx_dropped++;
 			}
 			else {
-				skb->dev = dev;
 				if (!rx_in_place) {
 					/* 16 byte align the data fields */
 					dma_sync_single_for_cpu(lp->dev, (dma_addr_t)WSWAPchar(rbd->b_data), PKT_BUF_SZ, DMA_FROM_DEVICE);
diff --git a/drivers/net/lib8390.c b/drivers/net/lib8390.c
index e726c06b8dc..5c86e737f95 100644
--- a/drivers/net/lib8390.c
+++ b/drivers/net/lib8390.c
@@ -722,7 +722,6 @@ static void ei_receive(struct net_device *dev)
 			else
 			{
 				skb_reserve(skb,2);	/* IP headers on 16 byte boundaries */
-				skb->dev = dev;
 				skb_put(skb, pkt_len);	/* Make room */
 				ei_block_input(dev, pkt_len, skb, current_offset + sizeof(rx_frame));
 				skb->protocol=eth_type_trans(skb,dev);
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 2b739fd584f..4380e5e89dc 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -137,7 +137,6 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_orphan(skb);
 
 	skb->protocol = eth_type_trans(skb,dev);
-	skb->dev = dev;
 #ifndef LOOPBACK_MUST_CHECKSUM
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 #endif
diff --git a/drivers/net/lp486e.c b/drivers/net/lp486e.c
index 177c502f738..5fc18da1873 100644
--- a/drivers/net/lp486e.c
+++ b/drivers/net/lp486e.c
@@ -676,7 +676,6 @@ i596_rx_one(struct net_device *dev, struct i596_private *lp,
 			return 1;
 		}
 
-		skb->dev = dev;
 		memcpy(skb_put(skb,pkt_len), rfd->data, pkt_len);
 
 		skb->protocol = eth_type_trans(skb,dev);
diff --git a/drivers/net/mac89x0.c b/drivers/net/mac89x0.c
index e960138011c..90e695d5326 100644
--- a/drivers/net/mac89x0.c
+++ b/drivers/net/mac89x0.c
@@ -530,7 +530,6 @@ net_rx(struct net_device *dev)
 		return;
 	}
 	skb_put(skb, length);
-	skb->dev = dev;
 
 	memcpy_fromio(skb->data, dev->mem_start + PP_RxFrame, length);
 
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index 2e9571bf073..0c3649be0d0 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -357,7 +357,6 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
 	}
 
 	skb_reserve(skb, RX_OFFSET);
-	skb->dev = bp->dev;
 	skb->ip_summed = CHECKSUM_NONE;
 	skb_put(skb, len);
 
diff --git a/drivers/net/mace.c b/drivers/net/mace.c
index 9ec24f0d5d6..b3bd6239495 100644
--- a/drivers/net/mace.c
+++ b/drivers/net/mace.c
@@ -939,7 +939,6 @@ static irqreturn_t mace_rxdma_intr(int irq, void *dev_id)
 		else	/* Ethernet header; mace includes FCS */
 		    nb -= 8;
 		skb_put(skb, nb);
-		skb->dev = dev;
 		skb->protocol = eth_type_trans(skb, dev);
 		mp->stats.rx_bytes += skb->len;
 		netif_rx(skb);
diff --git a/drivers/net/macmace.c b/drivers/net/macmace.c
index 5d541e87304..8c07ffc9c24 100644
--- a/drivers/net/macmace.c
+++ b/drivers/net/macmace.c
@@ -621,7 +621,6 @@ static void mace_dma_rx_frame(struct net_device *dev, struct mace_frame *mf)
 	skb_reserve(skb,2);
 	memcpy(skb_put(skb, mf->len), mf->data, mf->len);
 
-	skb->dev = dev;
 	skb->protocol = eth_type_trans(skb, dev);
 	netif_rx(skb);
 	dev->last_rx = jiffies;
diff --git a/drivers/net/meth.c b/drivers/net/meth.c
index 7e69ca6edd9..fafe6783523 100644
--- a/drivers/net/meth.c
+++ b/drivers/net/meth.c
@@ -421,7 +421,6 @@ static void meth_rx(struct net_device* dev, unsigned long int_status)
 					/* Write metadata, and then pass to the receive level */
 					skb_put(skb_c, len);
 					priv->rx_skbs[priv->rx_write] = skb;
-					skb_c->dev = dev;
 					skb_c->protocol = eth_type_trans(skb_c, dev);
 					dev->last_rx = jiffies;
 					priv->stats.rx_packets++;
diff --git a/drivers/net/mipsnet.c b/drivers/net/mipsnet.c
index f42b9e20193..403f63afd20 100644
--- a/drivers/net/mipsnet.c
+++ b/drivers/net/mipsnet.c
@@ -101,7 +101,6 @@ static inline ssize_t mipsnet_get_fromdev(struct net_device *dev, size_t count)
 	if (ioiocpy_frommipsnet(dev, skb_put(skb, len), len))
 		return -EFAULT;
 
-	skb->dev = dev;
 	skb->protocol = eth_type_trans(skb, dev);
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index 8015a7c5b0c..cd9369a285e 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -434,7 +434,6 @@ static int mv643xx_eth_receive_queue(struct net_device *dev, int budget)
 			 * received packet
 			 */
 			skb_put(skb, pkt_info.byte_cnt - 4);
-			skb->dev = dev;
 
 			if (pkt_info.cmd_sts & ETH_LAYER_4_CHECKSUM_OK) {
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index f8efe0e70a6..7c04179c7b1 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -1020,7 +1020,6 @@ myri10ge_rx_done(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
 		skb_shinfo(skb)->nr_frags = 0;
 	}
 	skb->protocol = eth_type_trans(skb, dev);
-	skb->dev = dev;
 
 	if (mgp->csum_flag) {
 		if ((skb->protocol == htons(ETH_P_IP)) ||
diff --git a/drivers/net/natsemi.c b/drivers/net/natsemi.c
index 349b96a3ec4..a8d7ff2c96a 100644
--- a/drivers/net/natsemi.c
+++ b/drivers/net/natsemi.c
@@ -2289,7 +2289,6 @@ static void netdev_rx(struct net_device *dev, int *work_done, int work_to_do)
 			 * without copying to a minimally-sized skbuff. */
 			if (pkt_len < rx_copybreak
 			    && (skb = dev_alloc_skb(pkt_len + RX_OFFSET)) != NULL) {
-				skb->dev = dev;
 				/* 16 byte align the IP header */
 				skb_reserve(skb, RX_OFFSET);
 				pci_dma_sync_single_for_cpu(np->pci_dev,
diff --git a/drivers/net/netx-eth.c b/drivers/net/netx-eth.c
index a53644f6a29..2b8da0a5499 100644
--- a/drivers/net/netx-eth.c
+++ b/drivers/net/netx-eth.c
@@ -168,7 +168,6 @@ static void netx_eth_receive(struct net_device *ndev)
 		FIFO_PTR_SEGMENT(seg) | FIFO_PTR_FRAMENO(frameno));
 
 	ndev->last_rx = jiffies;
-	skb->dev = ndev;
 	skb->protocol = eth_type_trans(skb, ndev);
 	netif_rx(skb);
 	priv->stats.rx_packets++;
diff --git a/drivers/net/netxen/netxen_nic_init.c b/drivers/net/netxen/netxen_nic_init.c
index eff965dc5ff..5cd40562da7 100644
--- a/drivers/net/netxen/netxen_nic_init.c
+++ b/drivers/net/netxen/netxen_nic_init.c
@@ -1129,7 +1129,6 @@ netxen_process_rcv(struct netxen_adapter *adapter, int ctxid,
 		port->stats.csummed++;
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
-	skb->dev = netdev;
 	if (desc_ctx == RCV_DESC_LRO_CTXID) {
 		/* True length was only available on the last pkt */
 		skb_put(skb, buffer->lro_length);
diff --git a/drivers/net/ni5010.c b/drivers/net/ni5010.c
index 8be0d030d6f..3d5b4232f65 100644
--- a/drivers/net/ni5010.c
+++ b/drivers/net/ni5010.c
@@ -562,7 +562,6 @@ static void ni5010_rx(struct net_device *dev)
 		return;
 	}
 
-	skb->dev = dev;
 	skb_reserve(skb, 2);
 
 	/* Read packet into buffer */
diff --git a/drivers/net/ni52.c b/drivers/net/ni52.c
index a6f4b24b017..70b6812a8a7 100644
--- a/drivers/net/ni52.c
+++ b/drivers/net/ni52.c
@@ -934,7 +934,6 @@ static void ni52_rcv_int(struct net_device *dev)
 					skb = (struct sk_buff *) dev_alloc_skb(totlen+2);
 					if(skb != NULL)
 					{
-						skb->dev = dev;
 						skb_reserve(skb,2);
 						skb_put(skb,totlen);
 						eth_copy_and_sum(skb,(char *) p->base+(unsigned long) rbd->buffer,totlen,0);
diff --git a/drivers/net/ni65.c b/drivers/net/ni65.c
index 1578f4d9849..782201d12c2 100644
--- a/drivers/net/ni65.c
+++ b/drivers/net/ni65.c
@@ -610,7 +610,6 @@ static void *ni65_alloc_mem(struct net_device *dev,char *what,int size,int type)
 			printk(KERN_WARNING "%s: unable to allocate %s memory.\n",dev->name,what);
 			return NULL;
 		}
-		skb->dev = dev;
 		skb_reserve(skb,2+16);
 		skb_put(skb,R_BUF_SIZE);	 /* grab the whole space .. (not necessary) */
 		ptr = skb->data;
@@ -1094,7 +1093,6 @@ static void ni65_recv_intr(struct net_device *dev,int csr0)
 			if(skb)
 			{
 				skb_reserve(skb,2);
-	skb->dev = dev;
 #ifdef RCV_VIA_SKB
 				if( (unsigned long) (skb->data + R_BUF_SIZE) > 0x1000000) {
 					skb_put(skb,len);
diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c
index 9ec6e9e54f4..747988b12ec 100644
--- a/drivers/net/ns83820.c
+++ b/drivers/net/ns83820.c
@@ -607,7 +607,6 @@ static inline int rx_refill(struct net_device *ndev, gfp_t gfp)
 		res &= 0xf;
 		skb_reserve(skb, res);
 
-		skb->dev = ndev;
 		if (gfp != GFP_ATOMIC)
 			spin_lock_irqsave(&dev->rx_info.lock, flags);
 		res = ns83820_add_rx_skb(dev, skb);
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index d670ac74824..3f4213f3d5d 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -334,8 +334,6 @@ static void pasemi_mac_replenish_rx_ring(struct net_device *dev)
 			break;
 		}
 
-		skb->dev = dev;
-
 		dma = pci_map_single(mac->dma_pdev, skb->data, skb->len,
 				     PCI_DMA_FROMDEVICE);
 
diff --git a/drivers/net/pci-skeleton.c b/drivers/net/pci-skeleton.c
index 6ca4e4fa6b8..099972c977e 100644
--- a/drivers/net/pci-skeleton.c
+++ b/drivers/net/pci-skeleton.c
@@ -1565,7 +1565,6 @@ static void netdrv_rx_interrupt (struct net_device *dev,
 
 		skb = dev_alloc_skb (pkt_size + 2);
 		if (skb) {
-			skb->dev = dev;
 			skb_reserve (skb, 2);	/* 16 byte align the IP fields. */
 
 			eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0);
diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c
index c7bd9c1c7f3..2b395ee21f7 100644
--- a/drivers/net/pcmcia/3c574_cs.c
+++ b/drivers/net/pcmcia/3c574_cs.c
@@ -1056,7 +1056,6 @@ static int el3_rx(struct net_device *dev, int worklimit)
 			DEBUG(3, "  Receiving packet size %d status %4.4x.\n",
 				  pkt_len, rx_status);
 			if (skb != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);
 				insl(ioaddr+RX_FIFO, skb_put(skb, pkt_len),
 						((pkt_len+3)>>2));
diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c
index 461e8274ef6..143ae2ff309 100644
--- a/drivers/net/pcmcia/3c589_cs.c
+++ b/drivers/net/pcmcia/3c589_cs.c
@@ -883,7 +883,6 @@ static int el3_rx(struct net_device *dev)
 	    DEBUG(3, "    Receiving packet size %d status %4.4x.\n",
 		  pkt_len, rx_status);
 	    if (skb != NULL) {
-		skb->dev = dev;
 		skb_reserve(skb, 2);
 		insl(ioaddr+RX_FIFO, skb_put(skb, pkt_len),
 			(pkt_len+3)>>2);
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index 6139048f811..fabbe95c7ef 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -1496,7 +1496,6 @@ static void ei_receive(struct net_device *dev)
 			else
 			{
 				skb_reserve(skb,2);	/* IP headers on 16 byte boundaries */
-				skb->dev = dev;
 				skb_put(skb, pkt_len);	/* Make room */
 				ei_block_input(dev, pkt_len, skb, current_offset + sizeof(rx_frame));
 				skb->protocol=eth_type_trans(skb,dev);
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index 0d7de617e53..3f93d493323 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -999,7 +999,6 @@ static void fjn_rx(struct net_device *dev)
 		lp->stats.rx_dropped++;
 		break;
 	    }
-	    skb->dev = dev;
 
 	    skb_reserve(skb, 2);
 	    insw(ioaddr + DATAPORT, skb_put(skb, pkt_len),
diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c
index 3b707747a81..ec0af65cd5d 100644
--- a/drivers/net/pcmcia/nmclan_cs.c
+++ b/drivers/net/pcmcia/nmclan_cs.c
@@ -1182,8 +1182,6 @@ static int mace_rx(struct net_device *dev, unsigned char RxCnt)
       skb = dev_alloc_skb(pkt_len+2);
 
       if (skb != NULL) {
-	skb->dev = dev;
-
 	skb_reserve(skb, 2);
 	insw(ioaddr + AM2150_RCV, skb_put(skb, pkt_len), pkt_len>>1);
 	if (pkt_len & 1)
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index 2561f76033e..7912dbd1425 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -1669,7 +1669,6 @@ static void smc_rx(struct net_device *dev)
 	     (packet_length+1)>>1);
 	skb->protocol = eth_type_trans(skb, dev);
 	
-	skb->dev = dev;
 	netif_rx(skb);
 	dev->last_rx = jiffies;
 	smc->stats.rx_packets++;
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index 5879e7c3698..809ec440b8e 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -1226,7 +1226,6 @@ xirc2ps_interrupt(int irq, void *dev_id)
 			    (pktlen+1)>>1);
 		}
 		skb->protocol = eth_type_trans(skb, dev);
-		skb->dev = dev;
 		netif_rx(skb);
 		dev->last_rx = jiffies;
 		lp->stats.rx_packets++;
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c
index 4d94ba7899b..0791360a6a6 100644
--- a/drivers/net/pcnet32.c
+++ b/drivers/net/pcnet32.c
@@ -1206,7 +1206,6 @@ static void pcnet32_rx_entry(struct net_device *dev,
 					 PCI_DMA_FROMDEVICE);
 			skb_put(skb, pkt_len);
 			lp->rx_skbuff[entry] = newskb;
-			newskb->dev = dev;
 			lp->rx_dma_addr[entry] =
 					    pci_map_single(lp->pci_dev,
 							   newskb->data,
diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c
index a8246eb2f8d..40d2639eedc 100755
--- a/drivers/net/qla3xxx.c
+++ b/drivers/net/qla3xxx.c
@@ -1873,7 +1873,6 @@ static void ql_process_mac_rx_intr(struct ql3_adapter *qdev,
 			 pci_unmap_len(lrg_buf_cb2, maplen),
 			 PCI_DMA_FROMDEVICE);
 	prefetch(skb->data);
-	skb->dev = qdev->ndev;
 	skb->ip_summed = CHECKSUM_NONE;
 	skb->protocol = eth_type_trans(skb, qdev->ndev);
 
@@ -1946,7 +1945,6 @@ static void ql_process_macip_rx_intr(struct ql3_adapter *qdev,
 			skb2->ip_summed = CHECKSUM_UNNECESSARY;
 		}
 	}
-	skb2->dev = qdev->ndev;
 	skb2->protocol = eth_type_trans(skb2, qdev->ndev);
 
 	netif_receive_skb(skb2);
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 6a77b8a9224..34280f94e9f 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -2586,7 +2586,6 @@ rtl8169_rx_interrupt(struct net_device *dev, struct rtl8169_private *tp,
 			pci_action(tp->pci_dev, le64_to_cpu(desc->addr),
 				   tp->rx_buf_sz, PCI_DMA_FROMDEVICE);
 
-			skb->dev = dev;
 			skb_put(skb, pkt_size);
 			skb->protocol = eth_type_trans(skb, dev);
 
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index b7ff484af3e..df6b73872fd 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -115,7 +115,6 @@ static int rionet_rx_clean(struct net_device *ndev)
 
 		rnet->rx_skb[i]->data = data;
 		skb_put(rnet->rx_skb[i], RIO_MAX_MSG_SIZE);
-		rnet->rx_skb[i]->dev = ndev;
 		rnet->rx_skb[i]->protocol =
 		    eth_type_trans(rnet->rx_skb[i], ndev);
 		error = netif_rx(rnet->rx_skb[i]);
diff --git a/drivers/net/saa9730.c b/drivers/net/saa9730.c
index 143958f1ef0..ad94358ece8 100644
--- a/drivers/net/saa9730.c
+++ b/drivers/net/saa9730.c
@@ -688,7 +688,6 @@ static int lan_saa9730_rx(struct net_device *dev)
 			} else {
 				lp->stats.rx_bytes += len;
 				lp->stats.rx_packets++;
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align */
 				skb_put(skb, len);	/* make room */
 				eth_copy_and_sum(skb,
diff --git a/drivers/net/sb1250-mac.c b/drivers/net/sb1250-mac.c
index 103c3174ab5..0a3a379b634 100644
--- a/drivers/net/sb1250-mac.c
+++ b/drivers/net/sb1250-mac.c
@@ -933,9 +933,6 @@ static int sbdma_add_rcvbuffer(sbmacdma_t *d,struct sk_buff *sb)
 		}
 
 		sbdma_align_skb(sb_new, SMP_CACHE_BYTES, ETHER_ALIGN);
-
-		/* mark skbuff owned by our device */
-		sb_new->dev = d->sbdma_eth->sbm_dev;
 	}
 	else {
 		sb_new = sb;
diff --git a/drivers/net/sc92031.c b/drivers/net/sc92031.c
index c32c21af3fd..5b7284c955d 100644
--- a/drivers/net/sc92031.c
+++ b/drivers/net/sc92031.c
@@ -814,7 +814,6 @@ static void _sc92031_rx_tasklet(struct net_device *dev)
 			memcpy(skb_put(skb, pkt_size), rx_ring + rx_ring_offset, pkt_size);
 		}
 
-		skb->dev = dev;
 		skb->protocol = eth_type_trans(skb, dev);
 		dev->last_rx = jiffies;
 		netif_rx(skb);
diff --git a/drivers/net/seeq8005.c b/drivers/net/seeq8005.c
index 0d6c95c7aed..4bce7c4f373 100644
--- a/drivers/net/seeq8005.c
+++ b/drivers/net/seeq8005.c
@@ -550,7 +550,6 @@ static void seeq8005_rx(struct net_device *dev)
 				lp->stats.rx_dropped++;
 				break;
 			}
-			skb->dev = dev;
 			skb_reserve(skb, 2);	/* align data on 16 byte */
 			buf = skb_put(skb,pkt_len);
 
diff --git a/drivers/net/sgiseeq.c b/drivers/net/sgiseeq.c
index 52ed522a234..5a891913218 100644
--- a/drivers/net/sgiseeq.c
+++ b/drivers/net/sgiseeq.c
@@ -318,7 +318,6 @@ static inline void sgiseeq_rx(struct net_device *dev, struct sgiseeq_private *sp
 			skb = dev_alloc_skb(len + 2);
 
 			if (skb) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);
 				skb_put(skb, len);
 
diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c
index 34463ce6f13..bc8de48da31 100644
--- a/drivers/net/sis190.c
+++ b/drivers/net/sis190.c
@@ -632,7 +632,6 @@ static int sis190_rx_interrupt(struct net_device *dev,
 			pci_action(tp->pci_dev, le32_to_cpu(desc->addr),
 				   tp->rx_buf_sz, PCI_DMA_FROMDEVICE);
 
-			skb->dev = dev;
 			skb_put(skb, pkt_size);
 			skb->protocol = eth_type_trans(skb, dev);
 
diff --git a/drivers/net/sis900.c b/drivers/net/sis900.c
index b2a3b19d773..dea0126723d 100644
--- a/drivers/net/sis900.c
+++ b/drivers/net/sis900.c
@@ -1160,7 +1160,6 @@ sis900_init_rx_ring(struct net_device *net_dev)
 			   buffer */
 			break;
 		}
-		skb->dev = net_dev;
 		sis_priv->rx_skbuff[i] = skb;
 		sis_priv->rx_ring[i].cmdsts = RX_BUF_SIZE;
                 sis_priv->rx_ring[i].bufptr = pci_map_single(sis_priv->pci_dev,
@@ -1800,7 +1799,6 @@ static int sis900_rx(struct net_device *net_dev)
 			sis_priv->stats.rx_packets++;
 			sis_priv->dirty_rx++;
 refill_rx_ring:
-			skb->dev = net_dev;
 			sis_priv->rx_skbuff[entry] = skb;
 			sis_priv->rx_ring[entry].cmdsts = RX_BUF_SIZE;
                 	sis_priv->rx_ring[entry].bufptr =
@@ -1832,7 +1830,6 @@ refill_rx_ring:
 				sis_priv->stats.rx_dropped++;
 				break;
 			}
-			skb->dev = net_dev;
 			sis_priv->rx_skbuff[entry] = skb;
 			sis_priv->rx_ring[entry].cmdsts = RX_BUF_SIZE;
                 	sis_priv->rx_ring[entry].bufptr =
diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c
index e94ab256b54..9ac1fe659dc 100644
--- a/drivers/net/sk98lin/skge.c
+++ b/drivers/net/sk98lin/skge.c
@@ -2193,7 +2193,6 @@ rx_start:
 				SK_PNMI_CNT_RX_OCTETS_DELIVERED(pAC,
 					FrameLength, pRxPort->PortIndex);
 
-				pMsg->dev = pAC->dev[pRxPort->PortIndex];
 				pMsg->protocol = eth_type_trans(pMsg,
 					pAC->dev[pRxPort->PortIndex]);
 				netif_rx(pMsg);
@@ -2246,7 +2245,6 @@ rx_start:
 				(IFF_PROMISC | IFF_ALLMULTI)) != 0 ||
 				(ForRlmt & SK_RLMT_RX_PROTOCOL) ==
 				SK_RLMT_RX_PROTOCOL) {
-				pMsg->dev = pAC->dev[pRxPort->PortIndex];
 				pMsg->protocol = eth_type_trans(pMsg,
 					pAC->dev[pRxPort->PortIndex]);
 				netif_rx(pMsg);
diff --git a/drivers/net/smc911x.c b/drivers/net/smc911x.c
index c9561413198..8a2109a913b 100644
--- a/drivers/net/smc911x.c
+++ b/drivers/net/smc911x.c
@@ -502,7 +502,6 @@ static inline void	 smc911x_rcv(struct net_device *dev)
 		DBG(SMC_DEBUG_PKTS, "%s: Received packet\n", dev->name,);
 		PRINT_PKT(data, ((pkt_len - 4) <= 64) ? pkt_len - 4 : 64);
 		dev->last_rx = jiffies;
-		skb->dev = dev;
 		skb->protocol = eth_type_trans(skb, dev);
 		netif_rx(skb);
 		lp->stats.rx_packets++;
@@ -1307,7 +1306,6 @@ smc911x_rx_dma_irq(int dma, void *data)
 	lp->current_rx_skb = NULL;
 	PRINT_PKT(skb->data, skb->len);
 	dev->last_rx = jiffies;
-	skb->dev = dev;
 	skb->protocol = eth_type_trans(skb, dev);
 	netif_rx(skb);
 	lp->stats.rx_packets++;
diff --git a/drivers/net/smc9194.c b/drivers/net/smc9194.c
index bd6e84506c2..36c1ebadbf2 100644
--- a/drivers/net/smc9194.c
+++ b/drivers/net/smc9194.c
@@ -1262,7 +1262,6 @@ static void smc_rcv(struct net_device *dev)
 
 		skb_reserve( skb, 2 );   /* 16 bit alignment */
 
-		skb->dev = dev;
 		data = skb_put( skb, packet_length);
 
 #ifdef USE_32_BIT
diff --git a/drivers/net/smc91x.c b/drivers/net/smc91x.c
index 49f4b7712eb..01cc3c742c3 100644
--- a/drivers/net/smc91x.c
+++ b/drivers/net/smc91x.c
@@ -568,7 +568,6 @@ static inline void  smc_rcv(struct net_device *dev)
 		PRINT_PKT(data, packet_len - 4);
 
 		dev->last_rx = jiffies;
-		skb->dev = dev;
 		skb->protocol = eth_type_trans(skb, dev);
 		netif_rx(skb);
 		lp->stats.rx_packets++;
diff --git a/drivers/net/sonic.c b/drivers/net/sonic.c
index ed7aa0a5acc..c6320c71993 100644
--- a/drivers/net/sonic.c
+++ b/drivers/net/sonic.c
@@ -85,7 +85,6 @@ static int sonic_open(struct net_device *dev)
 			       dev->name);
 			return -ENOMEM;
 		}
-		skb->dev = dev;
 		/* align IP header unless DMA requires otherwise */
 		if (SONIC_BUS_SCALE(lp->dma_bitmode) == 2)
 			skb_reserve(skb, 2);
@@ -451,7 +450,6 @@ static void sonic_rx(struct net_device *dev)
 				lp->stats.rx_dropped++;
 				break;
 			}
-			new_skb->dev = dev;
 			/* provide 16 byte IP header alignment unless DMA requires otherwise */
 			if(SONIC_BUS_SCALE(lp->dma_bitmode) == 2)
 				skb_reserve(new_skb, 2);
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index e3019d52c30..f7e0ac7f789 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -990,7 +990,6 @@ spider_net_pass_skb_up(struct spider_net_descr *descr,
 	netdev = card->netdev;
 
 	skb = descr->skb;
-	skb->dev = netdev;
 	skb_put(skb, hwdescr->valid_size);
 
 	/* the card seems to add 2 bytes of junk in front
diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c
index 8bba2e3da7e..9d6e454a8f9 100644
--- a/drivers/net/starfire.c
+++ b/drivers/net/starfire.c
@@ -1452,7 +1452,6 @@ static int __netdev_rx(struct net_device *dev, int *quota)
 		   to a minimally-sized skbuff. */
 		if (pkt_len < rx_copybreak
 		    && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-			skb->dev = dev;
 			skb_reserve(skb, 2);	/* 16 byte align the IP header */
 			pci_dma_sync_single_for_cpu(np->pci_dev,
 						    np->rx_info[entry].mapping,
diff --git a/drivers/net/sun3_82586.c b/drivers/net/sun3_82586.c
index 4757aa647c7..5bcc749bef1 100644
--- a/drivers/net/sun3_82586.c
+++ b/drivers/net/sun3_82586.c
@@ -775,7 +775,6 @@ static void sun3_82586_rcv_int(struct net_device *dev)
 					skb = (struct sk_buff *) dev_alloc_skb(totlen+2);
 					if(skb != NULL)
 					{
-						skb->dev = dev;
 						skb_reserve(skb,2);
 						skb_put(skb,totlen);
 						eth_copy_and_sum(skb,(char *) p->base+swab32((unsigned long) rbd->buffer),totlen,0);
diff --git a/drivers/net/sun3lance.c b/drivers/net/sun3lance.c
index 7bee45b42a2..0454827c8c2 100644
--- a/drivers/net/sun3lance.c
+++ b/drivers/net/sun3lance.c
@@ -851,7 +851,6 @@ static int lance_rx( struct net_device *dev )
 				}
 
 
-				skb->dev = dev;
 				skb_reserve( skb, 2 );	/* 16 byte align */
 				skb_put( skb, pkt_len );	/* Make room */
 //			        memcpy( skb->data, PKTBUF_ADDR(head), pkt_len );
diff --git a/drivers/net/sunbmac.c b/drivers/net/sunbmac.c
index 18f88853e1e..2ad8d58dee3 100644
--- a/drivers/net/sunbmac.c
+++ b/drivers/net/sunbmac.c
@@ -855,7 +855,6 @@ static void bigmac_rx(struct bigmac *bp)
 				drops++;
 				goto drop_it;
 			}
-			copy_skb->dev = bp->dev;
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			sbus_dma_sync_single_for_cpu(bp->bigmac_sdev,
diff --git a/drivers/net/sundance.c b/drivers/net/sundance.c
index c06ecc8002b..f51ba31970a 100644
--- a/drivers/net/sundance.c
+++ b/drivers/net/sundance.c
@@ -1308,7 +1308,6 @@ static void rx_poll(unsigned long data)
 			   to a minimally-sized skbuff. */
 			if (pkt_len < rx_copybreak
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				pci_dma_sync_single_for_cpu(np->pci_dev,
 							    desc->frag[0].addr,
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 08ea61db46f..db2e1a6b723 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -845,7 +845,6 @@ static int gem_rx(struct gem *gp, int work_to_do)
 				goto drop_it;
 			}
 
-			copy_skb->dev = gp->dev;
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			pci_dma_sync_single_for_cpu(gp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index 192bbc91c73..aca592bc032 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -2058,7 +2058,6 @@ static void happy_meal_rx(struct happy_meal *hp, struct net_device *dev)
 				goto drop_it;
 			}
 
-			copy_skb->dev = dev;
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			hme_dma_sync_for_cpu(hp, dma_addr, len, DMA_FROMDEVICE);
diff --git a/drivers/net/sunlance.c b/drivers/net/sunlance.c
index b0929a457b6..8f53a1ef608 100644
--- a/drivers/net/sunlance.c
+++ b/drivers/net/sunlance.c
@@ -547,7 +547,6 @@ static void lance_rx_dvma(struct net_device *dev)
 
 			lp->stats.rx_bytes += len;
 
-			skb->dev = dev;
 			skb_reserve(skb, 2);		/* 16 byte align */
 			skb_put(skb, len);		/* make room */
 			eth_copy_and_sum(skb,
@@ -721,7 +720,6 @@ static void lance_rx_pio(struct net_device *dev)
 
 			lp->stats.rx_bytes += len;
 
-			skb->dev = dev;
 			skb_reserve (skb, 2);		/* 16 byte align */
 			skb_put(skb, len);		/* make room */
 			lance_piocopy_to_skb(skb, &(ib->rx_buf[entry][0]), len);
diff --git a/drivers/net/sunqe.c b/drivers/net/sunqe.c
index f3bad56d476..fbfb98284fd 100644
--- a/drivers/net/sunqe.c
+++ b/drivers/net/sunqe.c
@@ -437,7 +437,6 @@ static void qe_rx(struct sunqe *qep)
 				drops++;
 				qep->net_stats.rx_dropped++;
 			} else {
-				skb->dev = qep->dev;
 				skb_reserve(skb, 2);
 				skb_put(skb, len);
 				eth_copy_and_sum(skb, (unsigned char *) this_qbuf,
diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c
index e3a7e3ceab7..d7741e23f8d 100644
--- a/drivers/net/tc35815.c
+++ b/drivers/net/tc35815.c
@@ -1145,7 +1145,6 @@ tc35815_rx(struct net_device *dev)
 				break;
 			}
 			skb_reserve(skb, 2);   /* 16 bit alignment */
-			skb->dev = dev;
 
 			data = skb_put(skb, pkt_len);
 
diff --git a/drivers/net/tlan.c b/drivers/net/tlan.c
index f85f0025112..2ede3f58cf9 100644
--- a/drivers/net/tlan.c
+++ b/drivers/net/tlan.c
@@ -1577,7 +1577,6 @@ u32 TLan_HandleRxEOF( struct net_device *dev, u16 host_int )
 				printk(KERN_INFO "TLAN: Couldn't allocate memory for received data.\n");
 			else {
 				head_buffer = priv->rxBuffer + (priv->rxHead * TLAN_MAX_FRAME_SIZE);
-				skb->dev = dev;
 				skb_reserve(skb, 2);
 				t = (void *) skb_put(skb, frameSize);
 
@@ -1608,7 +1607,6 @@ u32 TLan_HandleRxEOF( struct net_device *dev, u16 host_int )
 				skb->protocol = eth_type_trans( skb, dev );
 				netif_rx( skb );
 
-				new_skb->dev = dev;
 				skb_reserve( new_skb, 2 );
 				t = (void *) skb_put( new_skb, TLAN_MAX_FRAME_SIZE );
 				head_list->buffer[0].address = pci_map_single(priv->pciDev, new_skb->data, TLAN_MAX_FRAME_SIZE, PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/tsi108_eth.c b/drivers/net/tsi108_eth.c
index d92c5c597e1..0bfc2c9c1c0 100644
--- a/drivers/net/tsi108_eth.c
+++ b/drivers/net/tsi108_eth.c
@@ -788,7 +788,6 @@ static int tsi108_complete_rx(struct net_device *dev, int budget)
 			printk(".\n");
 		}
 
-		skb->dev = dev;
 		skb_put(skb, data->rxring[rx].len);
 		skb->protocol = eth_type_trans(skb, dev);
 		netif_receive_skb(skb);
diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c
index c82befa209a..8a7effa7090 100644
--- a/drivers/net/tulip/de2104x.c
+++ b/drivers/net/tulip/de2104x.c
@@ -435,7 +435,6 @@ static void de_rx (struct de_private *de)
 			rx_work = 100;
 			goto rx_next;
 		}
-		copy_skb->dev = de->dev;
 
 		if (!copying_skb) {
 			pci_unmap_single(de->pdev, mapping,
diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c
index 4b3cd3d8b62..e40ddb86958 100644
--- a/drivers/net/tulip/de4x5.c
+++ b/drivers/net/tulip/de4x5.c
@@ -3634,7 +3634,6 @@ de4x5_alloc_rx_buff(struct net_device *dev, int index, int len)
     p = dev_alloc_skb(IEEE802_3_SZ + DE4X5_ALIGN + 2);
     if (!p) return NULL;
 
-    p->dev = dev;
     tmp = virt_to_bus(p->data);
     i = ((tmp + DE4X5_ALIGN) & ~DE4X5_ALIGN) - tmp;
     skb_reserve(p, i);
@@ -3655,7 +3654,6 @@ de4x5_alloc_rx_buff(struct net_device *dev, int index, int len)
     p = dev_alloc_skb(len + 2);
     if (!p) return NULL;
 
-    p->dev = dev;
     skb_reserve(p, 2);	                               /* Align */
     if (index < lp->rx_old) {                          /* Wrapped buffer */
 	short tlen = (lp->rxRingSize - lp->rx_old) * RX_BUFF_SZ;
diff --git a/drivers/net/tulip/dmfe.c b/drivers/net/tulip/dmfe.c
index 9aeac76184f..a5e0237a653 100644
--- a/drivers/net/tulip/dmfe.c
+++ b/drivers/net/tulip/dmfe.c
@@ -988,14 +988,12 @@ static void dmfe_rx_packet(struct DEVICE *dev, struct dmfe_board_info * db)
 
 						skb = newskb;
 						/* size less than COPY_SIZE, allocate a rxlen SKB */
-						skb->dev = dev;
 						skb_reserve(skb, 2); /* 16byte align */
 						memcpy(skb_put(skb, rxlen), rxptr->rx_skb_ptr->data, rxlen);
 						dmfe_reuse_skb(db, rxptr->rx_skb_ptr);
-					} else {
-						skb->dev = dev;
+					} else
 						skb_put(skb, rxlen);
-					}
+
 					skb->protocol = eth_type_trans(skb, dev);
 					netif_rx(skb);
 					dev->last_rx = jiffies;
diff --git a/drivers/net/tulip/interrupt.c b/drivers/net/tulip/interrupt.c
index e3488d7b8ed..e86df07769a 100644
--- a/drivers/net/tulip/interrupt.c
+++ b/drivers/net/tulip/interrupt.c
@@ -192,7 +192,6 @@ int tulip_poll(struct net_device *dev, int *budget)
                                   to a minimally-sized skbuff. */
                                if (pkt_len < tulip_rx_copybreak
                                    && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-                                       skb->dev = dev;
                                        skb_reserve(skb, 2);    /* 16 byte align the IP header */
                                        pci_dma_sync_single_for_cpu(tp->pdev,
 								   tp->rx_buffers[entry].mapping,
@@ -416,7 +415,6 @@ static int tulip_rx(struct net_device *dev)
 			   to a minimally-sized skbuff. */
 			if (pkt_len < tulip_rx_copybreak
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				pci_dma_sync_single_for_cpu(tp->pdev,
 							    tp->rx_buffers[entry].mapping,
diff --git a/drivers/net/tulip/uli526x.c b/drivers/net/tulip/uli526x.c
index 229158e8e4b..9a5850fa644 100644
--- a/drivers/net/tulip/uli526x.c
+++ b/drivers/net/tulip/uli526x.c
@@ -828,14 +828,12 @@ static void uli526x_rx_packet(struct net_device *dev, struct uli526x_board_info
 					( (skb = dev_alloc_skb(rxlen + 2) )
 					!= NULL) ) {
 					/* size less than COPY_SIZE, allocate a rxlen SKB */
-					skb->dev = dev;
 					skb_reserve(skb, 2); /* 16byte align */
 					memcpy(skb_put(skb, rxlen), rxptr->rx_skb_ptr->tail, rxlen);
 					uli526x_reuse_skb(db, rxptr->rx_skb_ptr);
-				} else {
-					skb->dev = dev;
+				} else
 					skb_put(skb, rxlen);
-				}
+
 				skb->protocol = eth_type_trans(skb, dev);
 				netif_rx(skb);
 				dev->last_rx = jiffies;
diff --git a/drivers/net/tulip/winbond-840.c b/drivers/net/tulip/winbond-840.c
index 002a05e0722..d74fa871de1 100644
--- a/drivers/net/tulip/winbond-840.c
+++ b/drivers/net/tulip/winbond-840.c
@@ -813,7 +813,6 @@ static void init_rxtx_rings(struct net_device *dev)
 		np->rx_skbuff[i] = skb;
 		if (skb == NULL)
 			break;
-		skb->dev = dev;			/* Mark as being used by this device. */
 		np->rx_addr[i] = pci_map_single(np->pci_dev,skb->data,
 					np->rx_buf_sz,PCI_DMA_FROMDEVICE);
 
@@ -1229,7 +1228,6 @@ static int netdev_rx(struct net_device *dev)
 			   to a minimally-sized skbuff. */
 			if (pkt_len < rx_copybreak
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				pci_dma_sync_single_for_cpu(np->pci_dev,np->rx_addr[entry],
 							    np->rx_skbuff[entry]->len,
@@ -1278,7 +1276,6 @@ static int netdev_rx(struct net_device *dev)
 			np->rx_skbuff[entry] = skb;
 			if (skb == NULL)
 				break;			/* Better luck next round. */
-			skb->dev = dev;			/* Mark as being used by this device. */
 			np->rx_addr[entry] = pci_map_single(np->pci_dev,
 							skb->data,
 							np->rx_buf_sz, PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/tulip/xircom_cb.c b/drivers/net/tulip/xircom_cb.c
index 61d313049dd..1fe3734e155 100644
--- a/drivers/net/tulip/xircom_cb.c
+++ b/drivers/net/tulip/xircom_cb.c
@@ -1207,7 +1207,6 @@ static void investigate_read_descriptor(struct net_device *dev,struct xircom_pri
 				card->stats.rx_dropped++;
 				goto out;
 			}
-			skb->dev = dev;
 			skb_reserve(skb, 2);
 			eth_copy_and_sum(skb, (unsigned char*)&card->rx_buffer[bufferoffset / 4], pkt_len, 0);
 			skb_put(skb, pkt_len);
diff --git a/drivers/net/tulip/xircom_tulip_cb.c b/drivers/net/tulip/xircom_tulip_cb.c
index a998c5d0ae9..3f24c82755f 100644
--- a/drivers/net/tulip/xircom_tulip_cb.c
+++ b/drivers/net/tulip/xircom_tulip_cb.c
@@ -1238,7 +1238,6 @@ xircom_rx(struct net_device *dev)
 			   to a minimally-sized skbuff. */
 			if (pkt_len < rx_copybreak
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 #if ! defined(__alpha__)
 				eth_copy_and_sum(skb, bus_to_virt(tp->rx_ring[entry].buffer1),
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 5643d1e84ed..a57aa010cb2 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -254,11 +254,11 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv,
 		return -EFAULT;
 	}
 
-	skb->dev = tun->dev;
 	switch (tun->flags & TUN_TYPE_MASK) {
 	case TUN_TUN_DEV:
 		skb->mac.raw = skb->data;
 		skb->protocol = pi.proto;
+		skb->dev = tun->dev;
 		break;
 	case TUN_TAP_DEV:
 		skb->protocol = eth_type_trans(skb, tun->dev);
diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c
index 0d91d094edd..f2dd7763cd0 100644
--- a/drivers/net/typhoon.c
+++ b/drivers/net/typhoon.c
@@ -1708,7 +1708,6 @@ typhoon_rx(struct typhoon *tp, struct basic_ring *rxRing, volatile u32 * ready,
 
 		if(pkt_len < rx_copybreak &&
 		   (new_skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-			new_skb->dev = tp->dev;
 			skb_reserve(new_skb, 2);
 			pci_dma_sync_single_for_cpu(tp->pdev, dma_addr,
 						    PKT_BUF_SZ,
diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c
index f3a972e74e9..adea290a9d5 100644
--- a/drivers/net/via-rhine.c
+++ b/drivers/net/via-rhine.c
@@ -1486,7 +1486,6 @@ static int rhine_rx(struct net_device *dev, int limit)
 			   copying to a minimally-sized skbuff. */
 			if (pkt_len < rx_copybreak &&
 				(skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				pci_dma_sync_single_for_cpu(rp->pdev,
 							    rp->rx_skbuff_dma[entry],
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 8e5d82051bd..9f6cc1569b3 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -1398,7 +1398,6 @@ static int velocity_receive_frame(struct velocity_info *vptr, int idx)
 		vptr->stats.multicast++;
 
 	skb = rd_info->skb;
-	skb->dev = vptr->dev;
 
 	pci_dma_sync_single_for_cpu(vptr->pdev, rd_info->skb_dma,
 				    vptr->rx_buf_sz, PCI_DMA_FROMDEVICE);
@@ -1428,7 +1427,7 @@ static int velocity_receive_frame(struct velocity_info *vptr, int idx)
 		   PCI_DMA_FROMDEVICE);
 
 	skb_put(skb, pkt_len - 4);
-	skb->protocol = eth_type_trans(skb, skb->dev);
+	skb->protocol = eth_type_trans(skb, vptr->dev);
 
 	stats->rx_bytes += pkt_len;
 	netif_rx(skb);
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index c6c3c757d6f..3240d10fc86 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -1011,7 +1011,6 @@ static int fr_rx(struct sk_buff *skb)
 		stats->rx_bytes += skb->len;
 		if (pvc->state.becn)
 			stats->rx_compressed++;
-		skb->dev = dev;
 		netif_rx(skb);
 		return NET_RX_SUCCESS;
 	} else {
diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c
index fc5c0c611ff..35eded7ffb2 100644
--- a/drivers/net/wan/sbni.c
+++ b/drivers/net/wan/sbni.c
@@ -999,11 +999,6 @@ get_rx_buf( struct net_device  *dev )
 	if( !skb )
 		return  NULL;
 
-#ifdef CONFIG_SBNI_MULTILINE
-	skb->dev = ((struct net_local *) dev->priv)->master;
-#else
-	skb->dev = dev;
-#endif
 	skb_reserve( skb, 2 );		/* Align IP on longword boundaries */
 	return  skb;
 }
diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c
index 2ada76a93cb..e50b1482d79 100644
--- a/drivers/net/wireless/airo.c
+++ b/drivers/net/wireless/airo.c
@@ -3415,10 +3415,8 @@ badrx:
 				skb->pkt_type = PACKET_OTHERHOST;
 				skb->dev = apriv->wifidev;
 				skb->protocol = htons(ETH_P_802_2);
-			} else {
-				skb->dev = dev;
+			} else
 				skb->protocol = eth_type_trans(skb,dev);
-			}
 			skb->dev->last_rx = jiffies;
 			skb->ip_summed = CHECKSUM_NONE;
 
@@ -3641,7 +3639,6 @@ badmic:
 		}
 #endif /* WIRELESS_SPY */
 
-		skb->dev = ai->dev;
 		skb->ip_summed = CHECKSUM_NONE;
 		skb->protocol = eth_type_trans(skb, ai->dev);
 		skb->dev->last_rx = jiffies;
diff --git a/drivers/net/wireless/arlan-main.c b/drivers/net/wireless/arlan-main.c
index 4688e56b69c..498e8486d12 100644
--- a/drivers/net/wireless/arlan-main.c
+++ b/drivers/net/wireless/arlan-main.c
@@ -1500,7 +1500,6 @@ static void arlan_rx_interrupt(struct net_device *dev, u_char rxStatus, u_short
 				break;
 			}
 			skb_reserve(skb, 2);
-			skb->dev = dev;
 			skbtmp = skb_put(skb, pkt_len);
 
 			memcpy_fromio(skbtmp + ARLAN_FAKE_HDR_LEN, ((char __iomem *) arlan) + rxOffset, pkt_len - ARLAN_FAKE_HDR_LEN);
diff --git a/drivers/net/wireless/atmel.c b/drivers/net/wireless/atmel.c
index 23eba698aec..1c17cbe007b 100644
--- a/drivers/net/wireless/atmel.c
+++ b/drivers/net/wireless/atmel.c
@@ -920,7 +920,6 @@ static void fast_rx_path(struct atmel_private *priv,
 		memcpy(&skbp[6], header->addr2, 6); /* source address */
 
 	priv->dev->last_rx = jiffies;
-	skb->dev = priv->dev;
 	skb->protocol = eth_type_trans(skb, priv->dev);
 	skb->ip_summed = CHECKSUM_NONE;
 	netif_rx(skb);
@@ -1028,7 +1027,6 @@ static void frag_rx_path(struct atmel_private *priv,
 				       priv->rx_buf,
 				       priv->frag_len + 12);
 				priv->dev->last_rx = jiffies;
-				skb->dev = priv->dev;
 				skb->protocol = eth_type_trans(skb, priv->dev);
 				skb->ip_summed = CHECKSUM_NONE;
 				netif_rx(skb);
diff --git a/drivers/net/wireless/hostap/hostap_80211_rx.c b/drivers/net/wireless/hostap/hostap_80211_rx.c
index 7e04dc94b3b..f78ee26d787 100644
--- a/drivers/net/wireless/hostap/hostap_80211_rx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_rx.c
@@ -1083,7 +1083,6 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
 	if (skb) {
 		skb->protocol = eth_type_trans(skb, dev);
 		memset(skb->cb, 0, sizeof(skb->cb));
-		skb->dev = dev;
 		netif_rx(skb);
 	}
 
diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c
index a009ab51771..45b00e13ab2 100644
--- a/drivers/net/wireless/netwave_cs.c
+++ b/drivers/net/wireless/netwave_cs.c
@@ -1283,7 +1283,6 @@ static int netwave_rx(struct net_device *dev)
 
 	skb_reserve( skb, 2);  /* Align IP on 16 byte */
 	skb_put( skb, rcvLen);
-	skb->dev = dev;
 
 	/* Copy packet fragments to the skb data area */
 	ptr = (u_char*) skb->data;
diff --git a/drivers/net/wireless/orinoco.c b/drivers/net/wireless/orinoco.c
index 4e7f6cf5143..3f9d78d059b 100644
--- a/drivers/net/wireless/orinoco.c
+++ b/drivers/net/wireless/orinoco.c
@@ -915,7 +915,6 @@ static void __orinoco_ev_rx(struct net_device *dev, hermes_t *hw)
 		memcpy(hdr->h_source, desc.addr2, ETH_ALEN);
 
 	dev->last_rx = jiffies;
-	skb->dev = dev;
 	skb->protocol = eth_type_trans(skb, dev);
 	skb->ip_summed = CHECKSUM_NONE;
 	if (fc & IEEE80211_FCTL_TODS)
diff --git a/drivers/net/wireless/prism54/islpci_eth.c b/drivers/net/wireless/prism54/islpci_eth.c
index b1122912ee2..fc2e0f3a896 100644
--- a/drivers/net/wireless/prism54/islpci_eth.c
+++ b/drivers/net/wireless/prism54/islpci_eth.c
@@ -374,10 +374,6 @@ islpci_eth_receive(islpci_private *priv)
 	DEBUG(SHOW_BUFFER_CONTENTS, "\nrx %p ", skb->data);
 	display_buffer((char *) skb->data, skb->len);
 #endif
-
-	/* do some additional sk_buff and network layer parameters */
-	skb->dev = ndev;
-
 	/* take care of monitor mode and spy monitoring. */
 	if (unlikely(priv->iw_mode == IW_MODE_MONITOR))
 		discard = islpci_monitor_rx(priv, &skb);
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 47b2ccb6a63..9633b0457f8 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -2232,7 +2232,6 @@ static void rx_data(struct net_device *dev, struct rcs __iomem *prcs, unsigned i
         return;
     }
     skb_reserve( skb, 2);   /* Align IP on 16 byte (TBD check this)*/
-    skb->dev = dev;
 
     DEBUG(4,"ray_cs rx_data total_len = %x, rx_len = %x\n",total_len,rx_len);
 
diff --git a/drivers/net/wireless/wavelan.c b/drivers/net/wireless/wavelan.c
index 2aa3c761dd8..69cb1471096 100644
--- a/drivers/net/wireless/wavelan.c
+++ b/drivers/net/wireless/wavelan.c
@@ -2512,8 +2512,6 @@ wv_packet_read(struct net_device * dev, u16 buf_off, int sksize)
 		return;
 	}
 
-	skb->dev = dev;
-
 	/* Copy the packet to the buffer. */
 	obram_read(ioaddr, buf_off, skb_put(skb, sksize), sksize);
 	skb->protocol = eth_type_trans(skb, dev);
diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c
index b04239792f6..9351ee77331 100644
--- a/drivers/net/wireless/wavelan_cs.c
+++ b/drivers/net/wireless/wavelan_cs.c
@@ -2884,8 +2884,6 @@ wv_packet_read(struct net_device *		dev,
       return;
     }
 
-  skb->dev = dev;
-
   skb_reserve(skb, 2);
   fd_p = read_ringbuf(dev, fd_p, (char *) skb_put(skb, sksize), sksize);
   skb->protocol = eth_type_trans(skb, dev);
diff --git a/drivers/net/wireless/zd1201.c b/drivers/net/wireless/zd1201.c
index 6cb66a356c9..1fe013a7297 100644
--- a/drivers/net/wireless/zd1201.c
+++ b/drivers/net/wireless/zd1201.c
@@ -327,7 +327,6 @@ static void zd1201_usbrx(struct urb *urb)
 			memcpy(skb_put(skb, 6), &data[datalen-8], 6);
 			memcpy(skb_put(skb, 2), &data[datalen-24], 2);
 			memcpy(skb_put(skb, len), data, len);
-			skb->dev = zd->dev;
 			skb->dev->last_rx = jiffies;
 			skb->protocol = eth_type_trans(skb, zd->dev);
 			zd->stats.rx_packets++;
@@ -385,7 +384,6 @@ static void zd1201_usbrx(struct urb *urb)
 			memcpy(skb_put(skb, 2), &data[6], 2);
 			memcpy(skb_put(skb, len), data+8, len);
 		}
-		skb->dev = zd->dev;
 		skb->dev->last_rx = jiffies;
 		skb->protocol = eth_type_trans(skb, zd->dev);
 		zd->stats.rx_packets++;
diff --git a/drivers/net/yellowfin.c b/drivers/net/yellowfin.c
index 2412ce4917f..3f4a7cf9efe 100644
--- a/drivers/net/yellowfin.c
+++ b/drivers/net/yellowfin.c
@@ -1137,7 +1137,6 @@ static int yellowfin_rx(struct net_device *dev)
 				skb = dev_alloc_skb(pkt_len + 2);
 				if (skb == NULL)
 					break;
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				eth_copy_and_sum(skb, rx_skb->data, pkt_len, 0);
 				skb_put(skb, pkt_len);
diff --git a/drivers/net/znet.c b/drivers/net/znet.c
index b24b0727108..4032e9f6f9b 100644
--- a/drivers/net/znet.c
+++ b/drivers/net/znet.c
@@ -774,7 +774,6 @@ static void znet_rx(struct net_device *dev)
 				znet->stats.rx_dropped++;
 				break;
 			}
-			skb->dev = dev;
 
 			if (&znet->rx_cur[(pkt_len+1)>>1] > znet->rx_end) {
 				int semi_cnt = (znet->rx_end - znet->rx_cur)<<1;
diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c
index 04e6b8508fb..8f9f217e0a6 100644
--- a/drivers/usb/gadget/ether.c
+++ b/drivers/usb/gadget/ether.c
@@ -1766,7 +1766,6 @@ static void rx_complete (struct usb_ep *ep, struct usb_request *req)
 			break;
 		}
 
-		skb->dev = dev->net;
 		skb->protocol = eth_type_trans (skb, dev->net);
 		dev->stats.rx_packets++;
 		dev->stats.rx_bytes += skb->len;
diff --git a/drivers/usb/net/catc.c b/drivers/usb/net/catc.c
index 4852012735f..d82022dd7f2 100644
--- a/drivers/usb/net/catc.c
+++ b/drivers/usb/net/catc.c
@@ -255,7 +255,6 @@ static void catc_rx_done(struct urb *urb)
 		if (!(skb = dev_alloc_skb(pkt_len)))
 			return;
 
-		skb->dev = catc->netdev;
 		eth_copy_and_sum(skb, pkt_start + pkt_offset, pkt_len, 0);
 		skb_put(skb, pkt_len);
 
diff --git a/drivers/usb/net/kaweth.c b/drivers/usb/net/kaweth.c
index de95268ae4b..a0cc05d21a6 100644
--- a/drivers/usb/net/kaweth.c
+++ b/drivers/usb/net/kaweth.c
@@ -636,8 +636,6 @@ static void kaweth_usb_receive(struct urb *urb)
 
 		skb_reserve(skb, 2);    /* Align IP on 16 byte boundaries */
 
-		skb->dev = net;
-
 		eth_copy_and_sum(skb, kaweth->rx_buf + 2, pkt_len, 0);
 
 		skb_put(skb, pkt_len);
diff --git a/drivers/usb/net/pegasus.c b/drivers/usb/net/pegasus.c
index 6d12961cf9f..13f70e09ea4 100644
--- a/drivers/usb/net/pegasus.c
+++ b/drivers/usb/net/pegasus.c
@@ -575,7 +575,6 @@ static void fill_skb_pool(pegasus_t * pegasus)
 		 */
 		if (pegasus->rx_pool[i] == NULL)
 			return;
-		pegasus->rx_pool[i]->dev = pegasus->net;
 		skb_reserve(pegasus->rx_pool[i], 2);
 	}
 }
@@ -1415,8 +1414,10 @@ static void pegasus_disconnect(struct usb_interface *intf)
 	unlink_all_urbs(pegasus);
 	free_all_urbs(pegasus);
 	free_skb_pool(pegasus);
-	if (pegasus->rx_skb)
+	if (pegasus->rx_skb != NULL) {
 		dev_kfree_skb(pegasus->rx_skb);
+		pegasus->rx_skb = NULL;
+	}
 	free_netdev(pegasus->net);
 }
 
diff --git a/drivers/usb/net/rtl8150.c b/drivers/usb/net/rtl8150.c
index ea153dc9b0a..fa598f0340c 100644
--- a/drivers/usb/net/rtl8150.c
+++ b/drivers/usb/net/rtl8150.c
@@ -646,7 +646,6 @@ static void fill_skb_pool(rtl8150_t *dev)
 		if (!skb) {
 			return;
 		}
-		skb->dev = dev->netdev;
 		skb_reserve(skb, 2);
 		dev->rx_skb_pool[i] = skb;
 	}
diff --git a/drivers/usb/net/usbnet.c b/drivers/usb/net/usbnet.c
index de69b183bd2..0c5465a7909 100644
--- a/drivers/usb/net/usbnet.c
+++ b/drivers/usb/net/usbnet.c
@@ -203,7 +203,6 @@ void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb)
 {
 	int	status;
 
-	skb->dev = dev->net;
 	skb->protocol = eth_type_trans (skb, dev->net);
 	dev->stats.rx_packets++;
 	dev->stats.rx_bytes += skb->len;
-- 
cgit v1.2.3


From 459a98ed881802dee55897441bc7f77af614368e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Mar 2007 15:30:44 -0700
Subject: [SK_BUFF]: Introduce skb_reset_mac_header(skb)

For the common, open coded 'skb->mac.raw = skb->data' operation, so that we can
later turn skb->mac.raw into a offset, reducing the size of struct sk_buff in
64bit land while possibly keeping it as a pointer on 32bit.

This one touches just the most simple case, next will handle the slightly more
"complex" cases.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/block/aoe/aoecmd.c                    |  3 ++-
 drivers/ieee1394/eth1394.c                    |  2 +-
 drivers/infiniband/ulp/ipoib/ipoib_cm.c       |  2 +-
 drivers/infiniband/ulp/ipoib/ipoib_ib.c       |  2 +-
 drivers/isdn/i4l/isdn_net.c                   |  4 ++--
 drivers/isdn/i4l/isdn_ppp.c                   |  2 +-
 drivers/message/fusion/mptlan.c               |  4 ++--
 drivers/net/appletalk/cops.c                  |  2 +-
 drivers/net/appletalk/ltpc.c                  |  2 +-
 drivers/net/arcnet/arc-rawmode.c              |  2 +-
 drivers/net/arcnet/capmode.c                  | 11 ++++-------
 drivers/net/arcnet/rfc1051.c                  |  2 +-
 drivers/net/arcnet/rfc1201.c                  |  2 +-
 drivers/net/bonding/bond_3ad.c                |  4 ++--
 drivers/net/bonding/bond_alb.c                |  4 ++--
 drivers/net/cxgb3/cxgb3_offload.c             |  2 +-
 drivers/net/cxgb3/sge.c                       |  3 ++-
 drivers/net/irda/ali-ircc.c                   |  2 +-
 drivers/net/irda/au1k_ir.c                    |  2 +-
 drivers/net/irda/donauboe.c                   |  2 +-
 drivers/net/irda/irda-usb.c                   |  2 +-
 drivers/net/irda/mcs7780.c                    |  4 ++--
 drivers/net/irda/nsc-ircc.c                   |  2 +-
 drivers/net/irda/pxaficp_ir.c                 |  2 +-
 drivers/net/irda/sa1100_ir.c                  |  2 +-
 drivers/net/irda/smsc-ircc2.c                 |  2 +-
 drivers/net/irda/stir4200.c                   |  2 +-
 drivers/net/irda/via-ircc.c                   |  8 ++++----
 drivers/net/irda/vlsi_ir.c                    |  2 +-
 drivers/net/irda/w83977af_ir.c                |  2 +-
 drivers/net/myri_sbus.c                       |  2 +-
 drivers/net/ppp_generic.c                     |  2 +-
 drivers/net/sb1000.c                          |  2 +-
 drivers/net/tun.c                             |  2 +-
 drivers/net/wan/cosa.c                        |  2 +-
 drivers/net/wan/cycx_x25.c                    |  2 +-
 drivers/net/wan/dlci.c                        |  2 +-
 drivers/net/wan/farsync.c                     |  2 +-
 drivers/net/wan/lmc/lmc_main.c                |  4 ++--
 drivers/net/wan/pc300_drv.c                   |  2 +-
 drivers/net/wan/pc300_tty.c                   |  2 +-
 drivers/net/wireless/airo.c                   |  4 ++--
 drivers/net/wireless/hostap/hostap_80211_rx.c |  7 ++++---
 drivers/net/wireless/hostap/hostap_80211_tx.c |  2 +-
 drivers/net/wireless/hostap/hostap_ap.c       |  3 ++-
 drivers/net/wireless/hostap/hostap_hw.c       |  2 +-
 drivers/net/wireless/hostap/hostap_main.c     |  3 ++-
 drivers/net/wireless/ipw2200.c                |  2 +-
 drivers/net/wireless/orinoco.c                |  2 +-
 drivers/net/wireless/prism54/islpci_eth.c     |  2 +-
 drivers/net/wireless/strip.c                  |  2 +-
 drivers/s390/net/ctcmain.c                    |  4 ++--
 drivers/s390/net/netiucv.c                    |  4 ++--
 drivers/s390/net/qeth_eddp.c                  |  2 +-
 drivers/s390/net/qeth_main.c                  |  4 ++--
 55 files changed, 78 insertions(+), 76 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 4ab7b40e8c5..74062dc4e90 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -27,7 +27,8 @@ new_skb(ulong len)
 
 	skb = alloc_skb(len, GFP_ATOMIC);
 	if (skb) {
-		skb->nh.raw = skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
+		skb->nh.raw = skb->data;
 		skb->protocol = __constant_htons(ETH_P_AOE);
 		skb->priority = 0;
 		skb->next = skb->prev = NULL;
diff --git a/drivers/ieee1394/eth1394.c b/drivers/ieee1394/eth1394.c
index 03e44b337eb..db2346f4d20 100644
--- a/drivers/ieee1394/eth1394.c
+++ b/drivers/ieee1394/eth1394.c
@@ -834,7 +834,7 @@ static inline u16 ether1394_type_trans(struct sk_buff *skb,
 	struct eth1394hdr *eth;
 	unsigned char *rawp;
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull (skb, ETH1394_HLEN);
 	eth = eth1394_hdr(skb);
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 2b242a4823f..c722e5c141b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -408,7 +408,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 	skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
 
 	skb->protocol = ((struct ipoib_header *) skb->data)->proto;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, IPOIB_ENCAP_LEN);
 
 	dev->last_rx = jiffies;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index ba0ee5cf2ad..93f74567897 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -216,7 +216,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 	if (wc->slid != priv->local_lid ||
 	    wc->src_qp != priv->qp->qp_num) {
 		skb->protocol = ((struct ipoib_header *) skb->data)->proto;
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb_pull(skb, IPOIB_ENCAP_LEN);
 
 		dev->last_rx = jiffies;
diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index 838b3734e2b..fadb9291bc1 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -1366,7 +1366,7 @@ isdn_net_type_trans(struct sk_buff *skb, struct net_device *dev)
 	struct ethhdr *eth;
 	unsigned char *rawp;
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN);
 	eth = eth_hdr(skb);
 
@@ -1786,7 +1786,7 @@ isdn_net_receive(struct net_device *ndev, struct sk_buff *skb)
 	}
 	skb->dev = ndev;
 	skb->pkt_type = PACKET_HOST;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 #ifdef ISDN_DEBUG_NET_DUMP
 	isdn_dumppkt("R:", skb->data, skb->len, 40);
 #endif
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 1b2df80c3bc..be915051cb2 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -1167,7 +1167,7 @@ isdn_ppp_push_higher(isdn_net_dev * net_dev, isdn_net_local * lp, struct sk_buff
 		mlp->huptimer = 0;
 #endif /* CONFIG_IPPP_FILTER */
 	skb->dev = dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	netif_rx(skb);
 	/* net_dev->local->stats.rx_packets++; done in isdn_net.c */
 	return;
diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c
index b691292ff59..d5b878d5628 100644
--- a/drivers/message/fusion/mptlan.c
+++ b/drivers/message/fusion/mptlan.c
@@ -753,7 +753,7 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
 	/* Set the mac.raw pointer, since this apparently isn't getting
 	 * done before we get the skb. Pull the data pointer past the mac data.
 	 */
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, 12);
 
         dma = pci_map_single(mpt_dev->pcidev, skb->data, skb->len,
@@ -1549,7 +1549,7 @@ mpt_lan_type_trans(struct sk_buff *skb, struct net_device *dev)
 	struct mpt_lan_ohdr *fch = (struct mpt_lan_ohdr *)skb->data;
 	struct fcllc *fcllc;
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, sizeof(struct mpt_lan_ohdr));
 
 	if (fch->dtype == htons(0xffff)) {
diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c
index dba5e516545..28cb79cee91 100644
--- a/drivers/net/appletalk/cops.c
+++ b/drivers/net/appletalk/cops.c
@@ -853,7 +853,7 @@ static void cops_rx(struct net_device *dev)
                 return;
         }
 
-        skb->mac.raw    = skb->data;    /* Point to entire packet. */
+        skb_reset_mac_header(skb);    /* Point to entire packet. */
         skb_pull(skb,3);
         skb->h.raw      = skb->data;    /* Point to data (Skip header). */
 
diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c
index 2ea44ce4981..12682439f8b 100644
--- a/drivers/net/appletalk/ltpc.c
+++ b/drivers/net/appletalk/ltpc.c
@@ -770,7 +770,7 @@ static int sendup_buffer (struct net_device *dev)
 	skb->data[0] = dnode;
 	skb->data[1] = snode;
 	skb->data[2] = llaptype;
-	skb->mac.raw = skb->data;	/* save pointer to llap header */
+	skb_reset_mac_header(skb);	/* save pointer to llap header */
 	skb_pull(skb,3);
 
 	/* copy ddp(s,e)hdr + contents */
diff --git a/drivers/net/arcnet/arc-rawmode.c b/drivers/net/arcnet/arc-rawmode.c
index 6318814a11a..e0a18e7c73c 100644
--- a/drivers/net/arcnet/arc-rawmode.c
+++ b/drivers/net/arcnet/arc-rawmode.c
@@ -110,7 +110,7 @@ static void rx(struct net_device *dev, int bufnum,
 
 	pkt = (struct archdr *) skb->data;
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, ARC_HDR_SIZE);
 
 	/* up to sizeof(pkt->soft) has already been copied from the card */
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index 66485585ab3..6c764b66e9c 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -122,10 +122,8 @@ static void rx(struct net_device *dev, int bufnum,
 	}
 	skb_put(skb, length + ARC_HDR_SIZE + sizeof(int));
 	skb->dev = dev;
-
-	pkt = (struct archdr *) skb->data;
-
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
+	pkt = (struct archdr *)skb->mac.raw;
 	skb_pull(skb, ARC_HDR_SIZE);
 
 	/* up to sizeof(pkt->soft) has already been copied from the card */
@@ -270,9 +268,8 @@ static int ack_tx(struct net_device *dev, int acked)
   skb_put(ackskb, length + ARC_HDR_SIZE );
   ackskb->dev = dev;
 
-  ackpkt = (struct archdr *) ackskb->data;
-
-  ackskb->mac.raw = ackskb->data;
+  skb_reset_mac_header(ackskb);
+  ackpkt = (struct archdr *)ackskb->mac.raw;
   /* skb_pull(ackskb, ARC_HDR_SIZE); */
 
 
diff --git a/drivers/net/arcnet/rfc1051.c b/drivers/net/arcnet/rfc1051.c
index 6d6c69f036e..2de8877ece2 100644
--- a/drivers/net/arcnet/rfc1051.c
+++ b/drivers/net/arcnet/rfc1051.c
@@ -94,7 +94,7 @@ static unsigned short type_trans(struct sk_buff *skb, struct net_device *dev)
 	int hdr_size = ARC_HDR_SIZE + RFC1051_HDR_SIZE;
 
 	/* Pull off the arcnet header. */
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, hdr_size);
 
 	if (pkt->hard.dest == 0)
diff --git a/drivers/net/arcnet/rfc1201.c b/drivers/net/arcnet/rfc1201.c
index bee34226abf..460a095000c 100644
--- a/drivers/net/arcnet/rfc1201.c
+++ b/drivers/net/arcnet/rfc1201.c
@@ -96,7 +96,7 @@ static unsigned short type_trans(struct sk_buff *skb, struct net_device *dev)
 	int hdr_size = ARC_HDR_SIZE + RFC1201_HDR_SIZE;
 
 	/* Pull off the arcnet header. */
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, hdr_size);
 
 	if (pkt->hard.dest == 0)
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 3fb354d9c51..e3c9e2e56d1 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -884,7 +884,7 @@ static int ad_lacpdu_send(struct port *port)
 	}
 
 	skb->dev = slave->dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->nh.raw = skb->data + ETH_HLEN;
 	skb->protocol = PKT_TYPE_LACPDU;
 	skb->priority = TC_PRIO_CONTROL;
@@ -928,7 +928,7 @@ static int ad_marker_send(struct port *port, struct marker *marker)
 	skb_reserve(skb, 16);
 
 	skb->dev = slave->dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->nh.raw = skb->data + ETH_HLEN;
 	skb->protocol = PKT_TYPE_LACPDU;
 
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 217a2eedee0..916162ca0c9 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -890,7 +890,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
 		data = skb_put(skb, size);
 		memcpy(data, &pkt, size);
 
-		skb->mac.raw = data;
+		skb_reset_mac_header(skb);
 		skb->nh.raw = data + ETH_HLEN;
 		skb->protocol = pkt.type;
 		skb->priority = TC_PRIO_CONTROL;
@@ -1266,7 +1266,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 	u8 *hash_start = NULL;
 	int res = 1;
 
-	skb->mac.raw = (unsigned char *)skb->data;
+	skb_reset_mac_header(skb);
 	eth_data = eth_hdr(skb);
 
 	/* make sure that the curr_active_slave and the slaves list do
diff --git a/drivers/net/cxgb3/cxgb3_offload.c b/drivers/net/cxgb3/cxgb3_offload.c
index 199e5066acf..ebcf35e4cf5 100644
--- a/drivers/net/cxgb3/cxgb3_offload.c
+++ b/drivers/net/cxgb3/cxgb3_offload.c
@@ -783,7 +783,7 @@ static int do_trace(struct t3cdev *dev, struct sk_buff *skb)
 	skb->protocol = htons(0xffff);
 	skb->dev = dev->lldev;
 	skb_pull(skb, sizeof(*p));
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	netif_receive_skb(skb);
 	return 0;
 }
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 8946f7aa97c..b5cf2a60834 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -1620,7 +1620,8 @@ static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
 			     unsigned int gather_idx)
 {
 	rq->offload_pkts++;
-	skb->mac.raw = skb->nh.raw = skb->h.raw = skb->data;
+	skb_reset_mac_header(skb);
+	skb->nh.raw = skb->h.raw = skb->data;
 
 	if (rq->polling) {
 		rx_gather[gather_idx++] = skb;
diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c
index cebf8c374bc..0f10758226f 100644
--- a/drivers/net/irda/ali-ircc.c
+++ b/drivers/net/irda/ali-ircc.c
@@ -1932,7 +1932,7 @@ static int  ali_ircc_dma_receive_complete(struct ali_ircc_cb *self)
 			self->stats.rx_packets++;
 
 			skb->dev = self->netdev;
-			skb->mac.raw  = skb->data;
+			skb_reset_mac_header(skb);
 			skb->protocol = htons(ETH_P_IRDA);
 			netif_rx(skb);
 			self->netdev->last_rx = jiffies;
diff --git a/drivers/net/irda/au1k_ir.c b/drivers/net/irda/au1k_ir.c
index 37914dc5b90..27afd0f367d 100644
--- a/drivers/net/irda/au1k_ir.c
+++ b/drivers/net/irda/au1k_ir.c
@@ -606,7 +606,7 @@ static int au1k_irda_rx(struct net_device *dev)
 				skb_put(skb, count-2);
 			memcpy(skb->data, (void *)pDB->vaddr, count-2);
 			skb->dev = dev;
-			skb->mac.raw = skb->data;
+			skb_reset_mac_header(skb);
 			skb->protocol = htons(ETH_P_IRDA);
 			netif_rx(skb);
 			prxd->count_0 = 0;
diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c
index 11af0ae7510..ddfa6c38a16 100644
--- a/drivers/net/irda/donauboe.c
+++ b/drivers/net/irda/donauboe.c
@@ -1286,7 +1286,7 @@ dumpbufs(self->rx_bufs[self->rxs],len,'<');
 
                       self->stats.rx_packets++;
                       skb->dev = self->netdev;
-                      skb->mac.raw = skb->data;
+                      skb_reset_mac_header(skb);
                       skb->protocol = htons (ETH_P_IRDA);
                     }
                   else
diff --git a/drivers/net/irda/irda-usb.c b/drivers/net/irda/irda-usb.c
index 1d510bdc9b8..6ef375a095f 100644
--- a/drivers/net/irda/irda-usb.c
+++ b/drivers/net/irda/irda-usb.c
@@ -921,7 +921,7 @@ static void irda_usb_receive(struct urb *urb)
 
 	/* Ask the networking layer to queue the packet for the IrDA stack */
 	dataskb->dev = self->netdev;
-	dataskb->mac.raw  = dataskb->data;
+	skb_reset_mac_header(dataskb);
 	dataskb->protocol = htons(ETH_P_IRDA);
 	len = dataskb->len;
 	netif_rx(dataskb);
diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c
index f0c61f3b2a8..3ff1f4b33c0 100644
--- a/drivers/net/irda/mcs7780.c
+++ b/drivers/net/irda/mcs7780.c
@@ -428,7 +428,7 @@ static void mcs_unwrap_mir(struct mcs_cb *mcs, __u8 *buf, int len)
 	skb_reserve(skb, 1);
 	memcpy(skb->data, buf, new_len);
 	skb_put(skb, new_len);
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->protocol = htons(ETH_P_IRDA);
 	skb->dev = mcs->netdev;
 
@@ -481,7 +481,7 @@ static void mcs_unwrap_fir(struct mcs_cb *mcs, __u8 *buf, int len)
 	skb_reserve(skb, 1);
 	memcpy(skb->data, buf, new_len);
 	skb_put(skb, new_len);
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->protocol = htons(ETH_P_IRDA);
 	skb->dev = mcs->netdev;
 
diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c
index 29b5ccd29d0..8ce7dad582f 100644
--- a/drivers/net/irda/nsc-ircc.c
+++ b/drivers/net/irda/nsc-ircc.c
@@ -1881,7 +1881,7 @@ static int nsc_ircc_dma_receive_complete(struct nsc_ircc_cb *self, int iobase)
 			self->stats.rx_packets++;
 
 			skb->dev = self->netdev;
-			skb->mac.raw  = skb->data;
+			skb_reset_mac_header(skb);
 			skb->protocol = htons(ETH_P_IRDA);
 			netif_rx(skb);
 			self->netdev->last_rx = jiffies;
diff --git a/drivers/net/irda/pxaficp_ir.c b/drivers/net/irda/pxaficp_ir.c
index 2272156af31..f35d7d42624 100644
--- a/drivers/net/irda/pxaficp_ir.c
+++ b/drivers/net/irda/pxaficp_ir.c
@@ -391,7 +391,7 @@ static void pxa_irda_fir_irq_eif(struct pxa_irda *si, struct net_device *dev, in
 
 		/* Feed it to IrLAP  */
 		skb->dev = dev;
-		skb->mac.raw  = skb->data;
+		skb_reset_mac_header(skb);
 		skb->protocol = htons(ETH_P_IRDA);
 		netif_rx(skb);
 
diff --git a/drivers/net/irda/sa1100_ir.c b/drivers/net/irda/sa1100_ir.c
index 937372d0039..056639f72be 100644
--- a/drivers/net/irda/sa1100_ir.c
+++ b/drivers/net/irda/sa1100_ir.c
@@ -504,7 +504,7 @@ static void sa1100_irda_fir_error(struct sa1100_irda *si, struct net_device *dev
 
 		skb_put(skb, len);
 		skb->dev = dev;
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb->protocol = htons(ETH_P_IRDA);
 		si->stats.rx_packets++;
 		si->stats.rx_bytes += len;
diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c
index 31c623381ea..103a2d18ed2 100644
--- a/drivers/net/irda/smsc-ircc2.c
+++ b/drivers/net/irda/smsc-ircc2.c
@@ -1412,7 +1412,7 @@ static void smsc_ircc_dma_receive_complete(struct smsc_ircc_cb *self)
 	self->stats.rx_bytes += len;
 
 	skb->dev = self->netdev;
-	skb->mac.raw  = skb->data;
+	skb_reset_mac_header(skb);
 	skb->protocol = htons(ETH_P_IRDA);
 	netif_rx(skb);
 }
diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c
index 20d306fea4c..a22175f4ea8 100644
--- a/drivers/net/irda/stir4200.c
+++ b/drivers/net/irda/stir4200.c
@@ -364,7 +364,7 @@ static void fir_eof(struct stir_cb *stir)
 
 	skb_put(skb, len);
 
-	skb->mac.raw  = skb->data;
+	skb_reset_mac_header(skb);
 	skb->protocol = htons(ETH_P_IRDA);
 	skb->dev = stir->netdev;
 
diff --git a/drivers/net/irda/via-ircc.c b/drivers/net/irda/via-ircc.c
index c3ed9b3067e..5ff41631460 100644
--- a/drivers/net/irda/via-ircc.c
+++ b/drivers/net/irda/via-ircc.c
@@ -1125,7 +1125,7 @@ static int via_ircc_dma_receive_complete(struct via_ircc_cb *self,
 		self->stats.rx_bytes += len;
 		self->stats.rx_packets++;
 		skb->dev = self->netdev;
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb->protocol = htons(ETH_P_IRDA);
 		netif_rx(skb);
 		return TRUE;
@@ -1198,7 +1198,7 @@ F01_E */
 		self->stats.rx_bytes += len;
 		self->stats.rx_packets++;
 		skb->dev = self->netdev;
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb->protocol = htons(ETH_P_IRDA);
 		netif_rx(skb);
 
@@ -1244,7 +1244,7 @@ static int upload_rxdata(struct via_ircc_cb *self, int iobase)
 	self->stats.rx_bytes += len;
 	self->stats.rx_packets++;
 	skb->dev = self->netdev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->protocol = htons(ETH_P_IRDA);
 	netif_rx(skb);
 	if (st_fifo->len < (MAX_RX_WINDOW + 2)) {
@@ -1313,7 +1313,7 @@ static int RxTimerHandler(struct via_ircc_cb *self, int iobase)
 			self->stats.rx_bytes += len;
 			self->stats.rx_packets++;
 			skb->dev = self->netdev;
-			skb->mac.raw = skb->data;
+			skb_reset_mac_header(skb);
 			skb->protocol = htons(ETH_P_IRDA);
 			netif_rx(skb);
 		}		//while
diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c
index 3457e9d8b66..79b407f3a49 100644
--- a/drivers/net/irda/vlsi_ir.c
+++ b/drivers/net/irda/vlsi_ir.c
@@ -595,7 +595,7 @@ static int vlsi_process_rx(struct vlsi_ring *r, struct ring_descr *rd)
 	rd->skb = NULL;
 	skb->dev = ndev;
 	memcpy(skb_put(skb,len), rd->buf, len);
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	if (in_interrupt())
 		netif_rx(skb);
 	else
diff --git a/drivers/net/irda/w83977af_ir.c b/drivers/net/irda/w83977af_ir.c
index 4212657fa4f..bee44513095 100644
--- a/drivers/net/irda/w83977af_ir.c
+++ b/drivers/net/irda/w83977af_ir.c
@@ -919,7 +919,7 @@ int w83977af_dma_receive_complete(struct w83977af_ir *self)
 			self->stats.rx_packets++;
 			
 			skb->dev = self->netdev;
-			skb->mac.raw  = skb->data;
+			skb_reset_mac_header(skb);
 			skb->protocol = htons(ETH_P_IRDA);
 			netif_rx(skb);
 			self->netdev->last_rx = jiffies;
diff --git a/drivers/net/myri_sbus.c b/drivers/net/myri_sbus.c
index ee26ef52289..de092658db6 100644
--- a/drivers/net/myri_sbus.c
+++ b/drivers/net/myri_sbus.c
@@ -368,7 +368,7 @@ static __be16 myri_type_trans(struct sk_buff *skb, struct net_device *dev)
 	struct ethhdr *eth;
 	unsigned char *rawp;
 
-	skb->mac.raw = (((unsigned char *)skb->data) + MYRI_PAD_LEN);
+	skb->mac.raw = skb->data + MYRI_PAD_LEN;
 	skb_pull(skb, dev->hard_header_len);
 	eth = eth_hdr(skb);
 
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index ef58e412878..18f1790aab9 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -1685,7 +1685,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
 			skb_pull_rcsum(skb, 2);
 			skb->dev = ppp->dev;
 			skb->protocol = htons(npindex_to_ethertype[npi]);
-			skb->mac.raw = skb->data;
+			skb_reset_mac_header(skb);
 			netif_rx(skb);
 			ppp->dev->last_rx = jiffies;
 		}
diff --git a/drivers/net/sb1000.c b/drivers/net/sb1000.c
index b9fa4fbb139..1de3eec1a79 100644
--- a/drivers/net/sb1000.c
+++ b/drivers/net/sb1000.c
@@ -834,7 +834,7 @@ printk("cm0: IP identification: %02x%02x  fragment offset: %02x%02x\n", buffer[3
 			goto dropped_frame;
 		}
 		skb->dev = dev;
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb->protocol = (unsigned short) buffer[NewDatagramHeaderSkip + 16];
 		insw(ioaddr, skb_put(skb, NewDatagramDataSize),
 			NewDatagramDataSize / 2);
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index a57aa010cb2..288d8559f8c 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -256,7 +256,7 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv,
 
 	switch (tun->flags & TUN_TYPE_MASK) {
 	case TUN_TUN_DEV:
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb->protocol = pi.proto;
 		skb->dev = tun->dev;
 		break;
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 5b82e4fd0d7..c198511ec3f 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -773,7 +773,7 @@ static int sppp_rx_done(struct channel_data *chan)
 	}
 	chan->rx_skb->protocol = htons(ETH_P_WAN_PPP);
 	chan->rx_skb->dev = chan->pppdev.dev;
-	chan->rx_skb->mac.raw = chan->rx_skb->data;
+	skb_reset_mac_header(chan->rx_skb)
 	chan->stats.rx_packets++;
 	chan->stats.rx_bytes += chan->cosa->rxsize;
 	netif_rx(chan->rx_skb);
diff --git a/drivers/net/wan/cycx_x25.c b/drivers/net/wan/cycx_x25.c
index a631d1c2fa1..016b3ff3ea5 100644
--- a/drivers/net/wan/cycx_x25.c
+++ b/drivers/net/wan/cycx_x25.c
@@ -834,7 +834,7 @@ static void cycx_x25_irq_rx(struct cycx_device *card, struct cycx_x25_cmd *cmd)
 	++chan->ifstats.rx_packets;
 	chan->ifstats.rx_bytes += pktlen;
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	netif_rx(skb);
 	dev->last_rx = jiffies;		/* timestamp */
 }
diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c
index 73698755943..66be20c292b 100644
--- a/drivers/net/wan/dlci.c
+++ b/drivers/net/wan/dlci.c
@@ -176,7 +176,7 @@ static void dlci_receive(struct sk_buff *skb, struct net_device *dev)
 	if (process)
 	{
 		/* we've set up the protocol, so discard the header */
-		skb->mac.raw = skb->data; 
+		skb_reset_mac_header(skb);
 		skb_pull(skb, header);
 		dlp->stats.rx_bytes += skb->len;
 		netif_rx(skb);
diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index c45d6a83339..58a53b6d9b4 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -864,7 +864,7 @@ fst_tx_dma_complete(struct fst_card_info *card, struct fst_port_info *port,
 static __be16 farsync_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
 	skb->dev = dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->pkt_type = PACKET_HOST;
 	return htons(ETH_P_CUST);
 }
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index 2b54f1bc3a0..6d288839dda 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -1667,7 +1667,7 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
             skb_put (skb, len);
             skb->protocol = lmc_proto_type(sc, skb);
             skb->protocol = htons(ETH_P_WAN_PPP);
-            skb->mac.raw = skb->data;
+            skb_reset_mac_header(skb);
 //            skb->nh.raw = skb->data;
             skb->dev = dev;
             lmc_proto_netif(sc, skb);
@@ -1705,7 +1705,7 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
             memcpy(skb_put(nsb, len), skb->data, len);
             
             nsb->protocol = lmc_proto_type(sc, skb);
-            nsb->mac.raw = nsb->data;
+            skb_reset_mac_header(nsb);
 //            nsb->nh.raw = nsb->data;
             nsb->dev = dev;
             lmc_proto_netif(sc, nsb);
diff --git a/drivers/net/wan/pc300_drv.c b/drivers/net/wan/pc300_drv.c
index 62184dee377..edbc55528be 100644
--- a/drivers/net/wan/pc300_drv.c
+++ b/drivers/net/wan/pc300_drv.c
@@ -1755,7 +1755,7 @@ cpc_trace(struct net_device *dev, struct sk_buff *skb_main, char rx_tx)
 
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_CUST);
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->pkt_type = PACKET_HOST;
 	skb->len = 10 + skb_main->len;
 
diff --git a/drivers/net/wan/pc300_tty.c b/drivers/net/wan/pc300_tty.c
index 5873c346e7e..de02a07259c 100644
--- a/drivers/net/wan/pc300_tty.c
+++ b/drivers/net/wan/pc300_tty.c
@@ -1003,7 +1003,7 @@ static void cpc_tty_trace(pc300dev_t *dev, char* buf, int len, char rxtx)
 	skb_put (skb, 10 + len); 
 	skb->dev = dev->dev; 
 	skb->protocol = htons(ETH_P_CUST); 
-	skb->mac.raw = skb->data; 
+	skb_reset_mac_header(skb);
 	skb->pkt_type = PACKET_HOST; 
 	skb->len = 10 + len; 
 
diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c
index e50b1482d79..692a23f9834 100644
--- a/drivers/net/wireless/airo.c
+++ b/drivers/net/wireless/airo.c
@@ -3411,7 +3411,7 @@ badrx:
 			OUT4500( apriv, EVACK, EV_RX);
 
 			if (test_bit(FLAG_802_11, &apriv->flags)) {
-				skb->mac.raw = skb->data;
+				skb_reset_mac_header(skb);
 				skb->pkt_type = PACKET_OTHERHOST;
 				skb->dev = apriv->wifidev;
 				skb->protocol = htons(ETH_P_802_2);
@@ -3746,7 +3746,7 @@ void mpi_receive_802_11 (struct airo_info *ai)
 		wireless_spy_update(ai->dev, sa, &wstats);
 	}
 #endif /* IW_WIRELESS_SPY */
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->pkt_type = PACKET_OTHERHOST;
 	skb->dev = ai->wifidev;
 	skb->protocol = htons(ETH_P_802_2);
diff --git a/drivers/net/wireless/hostap/hostap_80211_rx.c b/drivers/net/wireless/hostap/hostap_80211_rx.c
index f78ee26d787..e4082f9d766 100644
--- a/drivers/net/wireless/hostap/hostap_80211_rx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_rx.c
@@ -167,7 +167,7 @@ hdr->f.status = s; hdr->f.len = l; hdr->f.data = d
 
 	ret = skb->len - phdrlen;
 	skb->dev = dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, hdrlen);
 	if (prism_header)
 		skb_pull(skb, phdrlen);
@@ -1073,10 +1073,11 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
 
 	if (skb2 != NULL) {
 		/* send to wireless media */
+		skb2->dev = dev;
 		skb2->protocol = __constant_htons(ETH_P_802_3);
-		skb2->mac.raw = skb2->nh.raw = skb2->data;
+		skb_reset_mac_header(skb2);
+		skb2->nh.raw = skb2->data;
 		/* skb2->nh.raw = skb2->data + ETH_HLEN; */
-		skb2->dev = dev;
 		dev_queue_xmit(skb2);
 	}
 
diff --git a/drivers/net/wireless/hostap/hostap_80211_tx.c b/drivers/net/wireless/hostap/hostap_80211_tx.c
index 4a5be70c041..159baef18e4 100644
--- a/drivers/net/wireless/hostap/hostap_80211_tx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_tx.c
@@ -237,7 +237,7 @@ int hostap_data_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	iface->stats.tx_packets++;
 	iface->stats.tx_bytes += skb->len;
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	meta = (struct hostap_skb_tx_data *) skb->cb;
 	memset(meta, 0, sizeof(*meta));
 	meta->magic = HOSTAP_SKB_TX_DATA_MAGIC;
diff --git a/drivers/net/wireless/hostap/hostap_ap.c b/drivers/net/wireless/hostap/hostap_ap.c
index efb8cf3bd8a..cc18f9686d2 100644
--- a/drivers/net/wireless/hostap/hostap_ap.c
+++ b/drivers/net/wireless/hostap/hostap_ap.c
@@ -982,7 +982,8 @@ static void prism2_send_mgmt(struct net_device *dev,
 	meta->tx_cb_idx = tx_cb_idx;
 
 	skb->dev = dev;
-	skb->mac.raw = skb->nh.raw = skb->data;
+	skb_reset_mac_header(skb);
+	skb->nh.raw = skb->data;
 	dev_queue_xmit(skb);
 }
 #endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */
diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c
index 3079378fb8c..9003ff7d151 100644
--- a/drivers/net/wireless/hostap/hostap_hw.c
+++ b/drivers/net/wireless/hostap/hostap_hw.c
@@ -2217,7 +2217,7 @@ static void hostap_tx_callback(local_info_t *local,
 		memcpy(skb_put(skb, len), payload, len);
 
 	skb->dev = local->dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 
 	cb->func(skb, ok, cb->data);
 }
diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c
index 9077e6edde3..0e29ff76287 100644
--- a/drivers/net/wireless/hostap/hostap_main.c
+++ b/drivers/net/wireless/hostap/hostap_main.c
@@ -1063,7 +1063,8 @@ int prism2_sta_send_mgmt(local_info_t *local, u8 *dst, u16 stype,
 	meta->iface = netdev_priv(dev);
 
 	skb->dev = dev;
-	skb->mac.raw = skb->nh.raw = skb->data;
+	skb_reset_mac_header(skb);
+	skb->nh.raw = skb->data;
 	dev_queue_xmit(skb);
 
 	return 0;
diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c
index c878a2f3239..b04c56a25cc 100644
--- a/drivers/net/wireless/ipw2200.c
+++ b/drivers/net/wireless/ipw2200.c
@@ -8133,7 +8133,7 @@ static void ipw_handle_mgmt_packet(struct ipw_priv *priv,
 		skb->dev = priv->ieee->dev;
 
 		/* Point raw at the ieee80211_stats */
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 
 		skb->pkt_type = PACKET_OTHERHOST;
 		skb->protocol = __constant_htons(ETH_P_80211_STATS);
diff --git a/drivers/net/wireless/orinoco.c b/drivers/net/wireless/orinoco.c
index 3f9d78d059b..f1415bff527 100644
--- a/drivers/net/wireless/orinoco.c
+++ b/drivers/net/wireless/orinoco.c
@@ -770,7 +770,7 @@ static void orinoco_rx_monitor(struct net_device *dev, u16 rxfid,
 
 	/* Copy the 802.11 header to the skb */
 	memcpy(skb_put(skb, hdrlen), &(desc->frame_ctl), hdrlen);
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 
 	/* If any, copy the data from the card to the skb */
 	if (datalen > 0) {
diff --git a/drivers/net/wireless/prism54/islpci_eth.c b/drivers/net/wireless/prism54/islpci_eth.c
index fc2e0f3a896..6ebfff03424 100644
--- a/drivers/net/wireless/prism54/islpci_eth.c
+++ b/drivers/net/wireless/prism54/islpci_eth.c
@@ -303,7 +303,7 @@ islpci_monitor_rx(islpci_private *priv, struct sk_buff **skb)
 		skb_pull(*skb, sizeof (struct rfmon_header));
 
 	(*skb)->protocol = htons(ETH_P_802_2);
-	(*skb)->mac.raw = (*skb)->data;
+	skb_reset_mac_header(*skb);
 	(*skb)->pkt_type = PACKET_OTHERHOST;
 
 	return 0;
diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c
index f5ce1c6063d..2a299a0676a 100644
--- a/drivers/net/wireless/strip.c
+++ b/drivers/net/wireless/strip.c
@@ -2009,7 +2009,7 @@ static void deliver_packet(struct strip *strip_info, STRIP_Header * header,
 		       packetlen);
 		skb->dev = get_strip_dev(strip_info);
 		skb->protocol = header->protocol;
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 
 		/* Having put a fake header on the front of the sk_buff for the */
 		/* benefit of tools like tcpdump, skb_pull now 'consumes' that  */
diff --git a/drivers/s390/net/ctcmain.c b/drivers/s390/net/ctcmain.c
index 0d6d5fcc128..787c0131704 100644
--- a/drivers/s390/net/ctcmain.c
+++ b/drivers/s390/net/ctcmain.c
@@ -455,7 +455,7 @@ ctc_unpack_skb(struct channel *ch, struct sk_buff *pskb)
 			return;
 		}
 		skb_put(pskb, header->length);
-		pskb->mac.raw = pskb->data;
+		skb_reset_mac_header(pskb);
 		len -= header->length;
 		skb = dev_alloc_skb(pskb->len);
 		if (!skb) {
@@ -473,7 +473,7 @@ ctc_unpack_skb(struct channel *ch, struct sk_buff *pskb)
 			return;
 		}
 		memcpy(skb_put(skb, pskb->len), pskb->data, pskb->len);
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb->dev = pskb->dev;
 		skb->protocol = pskb->protocol;
 		pskb->ip_summed = CHECKSUM_UNNECESSARY;
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index 594320ca1b7..82edf201440 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -635,7 +635,7 @@ static void netiucv_unpack_skb(struct iucv_connection *conn,
 			return;
 		}
 		skb_put(pskb, header->next);
-		pskb->mac.raw = pskb->data;
+		skb_reset_mac_header(pskb);
 		skb = dev_alloc_skb(pskb->len);
 		if (!skb) {
 			PRINT_WARN("%s Out of memory in netiucv_unpack_skb\n",
@@ -646,7 +646,7 @@ static void netiucv_unpack_skb(struct iucv_connection *conn,
 			return;
 		}
 		memcpy(skb_put(skb, pskb->len), pskb->data, pskb->len);
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb->dev = pskb->dev;
 		skb->protocol = pskb->protocol;
 		pskb->ip_summed = CHECKSUM_UNNECESSARY;
diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c
index 7c735e1fe06..910a8ab66b0 100644
--- a/drivers/s390/net/qeth_eddp.c
+++ b/drivers/s390/net/qeth_eddp.c
@@ -486,7 +486,7 @@ qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
 		return -ENOMEM;
 	}
 	if (qhdr->hdr.l2.id == QETH_HEADER_TYPE_LAYER2) {
-		skb->mac.raw = (skb->data) + sizeof(struct qeth_hdr);
+		skb->mac.raw = skb->data + sizeof(struct qeth_hdr);
 		memcpy(&eddp->mac, eth_hdr(skb), ETH_HLEN);
 #ifdef CONFIG_QETH_VLAN
 		if (eddp->mac.h_proto == __constant_htons(ETH_P_8021Q)) {
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index d502b77adf6..28822025b79 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -2278,7 +2278,7 @@ qeth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	    (card->info.link_type == QETH_LINK_TYPE_LANE_TR))
 	 	return tr_type_trans(skb,dev);
 #endif /* CONFIG_TR */
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN );
 	eth = eth_hdr(skb);
 
@@ -2461,7 +2461,7 @@ qeth_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
 	if (card->options.fake_ll)
 		qeth_rebuild_skb_fake_ll(card, skb, hdr);
 	else
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 	skb->ip_summed = card->options.checksum_type;
 	if (card->options.checksum_type == HW_CHECKSUMMING){
 		if ( (hdr->hdr.l3.ext_flags &
-- 
cgit v1.2.3


From 48d49d0ccdaa9caff4636ef9c3410973d28131b5 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 12:30:58 -0300
Subject: [SK_BUFF]: Introduce skb_set_mac_header()

For the cases where we want to set skb->mac.raw to an offset from skb->data.

Simple cases first, the memmove ones and specially pktgen will be left for later.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/loopback.c       | 2 +-
 drivers/net/myri_sbus.c      | 2 +-
 drivers/s390/net/qeth_eddp.c | 2 +-
 drivers/s390/net/qeth_main.c | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 4380e5e89dc..a71d8e0a9b5 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -90,7 +90,7 @@ static void emulate_large_send_offload(struct sk_buff *skb)
 		if (!nskb)
 			break;
 		skb_reserve(nskb, 32);
-		nskb->mac.raw = nskb->data - 14;
+		skb_set_mac_header(nskb, -ETH_HLEN);
 		nskb->nh.raw = nskb->data;
 		iph = nskb->nh.iph;
 		memcpy(nskb->data, skb->nh.raw, doffset);
diff --git a/drivers/net/myri_sbus.c b/drivers/net/myri_sbus.c
index de092658db6..e1f16fb0584 100644
--- a/drivers/net/myri_sbus.c
+++ b/drivers/net/myri_sbus.c
@@ -368,7 +368,7 @@ static __be16 myri_type_trans(struct sk_buff *skb, struct net_device *dev)
 	struct ethhdr *eth;
 	unsigned char *rawp;
 
-	skb->mac.raw = skb->data + MYRI_PAD_LEN;
+	skb_set_mac_header(skb, MYRI_PAD_LEN);
 	skb_pull(skb, dev->hard_header_len);
 	eth = eth_hdr(skb);
 
diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c
index 910a8ab66b0..893125403c6 100644
--- a/drivers/s390/net/qeth_eddp.c
+++ b/drivers/s390/net/qeth_eddp.c
@@ -486,7 +486,7 @@ qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
 		return -ENOMEM;
 	}
 	if (qhdr->hdr.l2.id == QETH_HEADER_TYPE_LAYER2) {
-		skb->mac.raw = skb->data + sizeof(struct qeth_hdr);
+		skb_set_mac_header(skb, sizeof(struct qeth_hdr));
 		memcpy(&eddp->mac, eth_hdr(skb), ETH_HLEN);
 #ifdef CONFIG_QETH_VLAN
 		if (eddp->mac.h_proto == __constant_htons(ETH_P_8021Q)) {
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index 28822025b79..c0ee6d94ea3 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -2306,7 +2306,7 @@ qeth_rebuild_skb_fake_ll_tr(struct qeth_card *card, struct sk_buff *skb,
 	struct iphdr *ip_hdr;
 
 	QETH_DBF_TEXT(trace,5,"skbfktr");
-	skb->mac.raw = skb->data - QETH_FAKE_LL_LEN_TR;
+	skb_set_mac_header(skb, -QETH_FAKE_LL_LEN_TR);
 	/* this is a fake ethernet header */
 	fake_hdr = tr_hdr(skb);
 
@@ -2359,7 +2359,7 @@ qeth_rebuild_skb_fake_ll_eth(struct qeth_card *card, struct sk_buff *skb,
 	struct iphdr *ip_hdr;
 
 	QETH_DBF_TEXT(trace,5,"skbfketh");
-	skb->mac.raw = skb->data - QETH_FAKE_LL_LEN_ETH;
+	skb_set_mac_header(skb, -QETH_FAKE_LL_LEN_ETH);
 	/* this is a fake ethernet header */
 	fake_hdr = eth_hdr(skb);
 
-- 
cgit v1.2.3


From 98e399f82ab3a6d863d1d4a7ea48925cc91c830e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Mar 2007 15:33:04 -0700
Subject: [SK_BUFF]: Introduce skb_mac_header()

For the places where we need a pointer to the mac header, it is still legal to
touch skb->mac.raw directly if just adding to, subtracting from or setting it
to another layer header.

This one also converts some more cases to skb_reset_mac_header() that my
regex missed as it had no spaces before nor after '=', ugh.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/block/aoe/aoe.h                   |  2 +-
 drivers/ieee1394/eth1394.h                |  2 +-
 drivers/media/dvb/dvb-core/dvb_net.c      |  2 +-
 drivers/message/fusion/mptlan.c           | 26 ++++++++++++++------------
 drivers/net/arcnet/capmode.c              |  4 ++--
 drivers/net/plip.c                        |  2 +-
 drivers/net/slip.c                        |  2 +-
 drivers/net/wan/hostess_sv11.c            |  2 +-
 drivers/net/wan/sealevel.c                |  2 +-
 drivers/net/wan/syncppp.c                 |  2 +-
 drivers/net/wireless/airo.c               |  2 +-
 drivers/net/wireless/hostap/hostap_main.c | 14 +++++++-------
 drivers/net/wireless/orinoco.c            |  2 +-
 drivers/net/wireless/wavelan.c            |  5 +++--
 drivers/net/wireless/wavelan_cs.c         |  4 ++--
 drivers/s390/net/claw.c                   |  2 +-
 16 files changed, 39 insertions(+), 36 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 4c34f8d31cc..1d846681794 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -53,7 +53,7 @@ struct aoe_hdr {
 
 static inline struct aoe_hdr *aoe_hdr(const struct sk_buff *skb)
 {
-	return (struct aoe_hdr *)skb->mac.raw;
+	return (struct aoe_hdr *)skb_mac_header(skb);
 }
 #endif
 
diff --git a/drivers/ieee1394/eth1394.h b/drivers/ieee1394/eth1394.h
index c45cbff9138..1e835653514 100644
--- a/drivers/ieee1394/eth1394.h
+++ b/drivers/ieee1394/eth1394.h
@@ -90,7 +90,7 @@ struct eth1394hdr {
 
 static inline struct eth1394hdr *eth1394_hdr(const struct sk_buff *skb)
 {
-	return (struct eth1394hdr *)skb->mac.raw;
+	return (struct eth1394hdr *)skb_mac_header(skb);
 }
 #endif
 
diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c
index 76e9c36597e..c6b004182d9 100644
--- a/drivers/media/dvb/dvb-core/dvb_net.c
+++ b/drivers/media/dvb/dvb-core/dvb_net.c
@@ -174,7 +174,7 @@ static unsigned short dvb_net_eth_type_trans(struct sk_buff *skb,
 	struct ethhdr *eth;
 	unsigned char *rawp;
 
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb,dev->hard_header_len);
 	eth = eth_hdr(skb);
 
diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c
index d5b878d5628..21fe1b66808 100644
--- a/drivers/message/fusion/mptlan.c
+++ b/drivers/message/fusion/mptlan.c
@@ -714,6 +714,7 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
 	LANSendRequest_t *pSendReq;
 	SGETransaction32_t *pTrans;
 	SGESimple64_t *pSimple;
+	const unsigned char *mac;
 	dma_addr_t dma;
 	unsigned long flags;
 	int ctx;
@@ -784,6 +785,7 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
 //			IOC_AND_NETDEV_NAMES_s_s(dev),
 //			ctx, skb, skb->data));
 
+	mac = skb_mac_header(skb);
 #ifdef QLOGIC_NAA_WORKAROUND
 {
 	struct NAA_Hosed *nh;
@@ -793,12 +795,12 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
 	   drops. */
 	read_lock_irq(&bad_naa_lock);
 	for (nh = mpt_bad_naa; nh != NULL; nh=nh->next) {
-		if ((nh->ieee[0] == skb->mac.raw[0]) &&
-		    (nh->ieee[1] == skb->mac.raw[1]) &&
-		    (nh->ieee[2] == skb->mac.raw[2]) &&
-		    (nh->ieee[3] == skb->mac.raw[3]) &&
-		    (nh->ieee[4] == skb->mac.raw[4]) &&
-		    (nh->ieee[5] == skb->mac.raw[5])) {
+		if ((nh->ieee[0] == mac[0]) &&
+		    (nh->ieee[1] == mac[1]) &&
+		    (nh->ieee[2] == mac[2]) &&
+		    (nh->ieee[3] == mac[3]) &&
+		    (nh->ieee[4] == mac[4]) &&
+		    (nh->ieee[5] == mac[5])) {
 			cur_naa = nh->NAA;
 			dlprintk ((KERN_INFO "mptlan/sdu_send: using NAA value "
 				  "= %04x.\n", cur_naa));
@@ -810,12 +812,12 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
 #endif
 
 	pTrans->TransactionDetails[0] = cpu_to_le32((cur_naa         << 16) |
-						    (skb->mac.raw[0] <<  8) |
-						    (skb->mac.raw[1] <<  0));
-	pTrans->TransactionDetails[1] = cpu_to_le32((skb->mac.raw[2] << 24) |
-						    (skb->mac.raw[3] << 16) |
-						    (skb->mac.raw[4] <<  8) |
-						    (skb->mac.raw[5] <<  0));
+						    (mac[0] <<  8) |
+						    (mac[1] <<  0));
+	pTrans->TransactionDetails[1] = cpu_to_le32((mac[2] << 24) |
+						    (mac[3] << 16) |
+						    (mac[4] <<  8) |
+						    (mac[5] <<  0));
 
 	pSimple = (SGESimple64_t *) &pTrans->TransactionDetails[2];
 
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index 6c764b66e9c..f6a87bd20ff 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -123,7 +123,7 @@ static void rx(struct net_device *dev, int bufnum,
 	skb_put(skb, length + ARC_HDR_SIZE + sizeof(int));
 	skb->dev = dev;
 	skb_reset_mac_header(skb);
-	pkt = (struct archdr *)skb->mac.raw;
+	pkt = (struct archdr *)skb_mac_header(skb);
 	skb_pull(skb, ARC_HDR_SIZE);
 
 	/* up to sizeof(pkt->soft) has already been copied from the card */
@@ -269,7 +269,7 @@ static int ack_tx(struct net_device *dev, int acked)
   ackskb->dev = dev;
 
   skb_reset_mac_header(ackskb);
-  ackpkt = (struct archdr *)ackskb->mac.raw;
+  ackpkt = (struct archdr *)skb_mac_header(ackskb);
   /* skb_pull(ackskb, ARC_HDR_SIZE); */
 
 
diff --git a/drivers/net/plip.c b/drivers/net/plip.c
index 6bb085f5443..8754cf3356b 100644
--- a/drivers/net/plip.c
+++ b/drivers/net/plip.c
@@ -546,7 +546,7 @@ static __be16 plip_type_trans(struct sk_buff *skb, struct net_device *dev)
 	struct ethhdr *eth;
 	unsigned char *rawp;
 
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb,dev->hard_header_len);
 	eth = eth_hdr(skb);
 
diff --git a/drivers/net/slip.c b/drivers/net/slip.c
index 2f4b1de7a2b..65bd20fac82 100644
--- a/drivers/net/slip.c
+++ b/drivers/net/slip.c
@@ -363,7 +363,7 @@ sl_bump(struct slip *sl)
 	}
 	skb->dev = sl->dev;
 	memcpy(skb_put(skb,count), sl->rbuff, count);
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 	skb->protocol=htons(ETH_P_IP);
 	netif_rx(skb);
 	sl->dev->last_rx = jiffies;
diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c
index a02c5fb4056..9ba3e4ee6ec 100644
--- a/drivers/net/wan/hostess_sv11.c
+++ b/drivers/net/wan/hostess_sv11.c
@@ -59,7 +59,7 @@ static void hostess_input(struct z8530_channel *c, struct sk_buff *skb)
 	/* Drop the CRC - it's not a good idea to try and negotiate it ;) */
 	skb_trim(skb, skb->len-2);
 	skb->protocol=__constant_htons(ETH_P_WAN_PPP);
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 	skb->dev=c->netdevice;
 	/*
 	 *	Send it to the PPP layer. We don't have time to process
diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index 70fb1b98b1d..131358108c5 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -61,7 +61,7 @@ static void sealevel_input(struct z8530_channel *c, struct sk_buff *skb)
 	/* Drop the CRC - it's not a good idea to try and negotiate it ;) */
 	skb_trim(skb, skb->len-2);
 	skb->protocol=htons(ETH_P_WAN_PPP);
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 	skb->dev=c->netdevice;
 	/*
 	 *	Send it to the PPP layer. We don't have time to process
diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c
index 218f7b574ab..67fc67cfd45 100644
--- a/drivers/net/wan/syncppp.c
+++ b/drivers/net/wan/syncppp.c
@@ -227,7 +227,7 @@ static void sppp_input (struct net_device *dev, struct sk_buff *skb)
 	unsigned long flags;
 
 	skb->dev=dev;
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 
 	if (dev->flags & IFF_RUNNING)
 	{
diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c
index 692a23f9834..7fe0a61091a 100644
--- a/drivers/net/wireless/airo.c
+++ b/drivers/net/wireless/airo.c
@@ -2444,7 +2444,7 @@ static int add_airo_dev( struct net_device *dev );
 
 static int wll_header_parse(struct sk_buff *skb, unsigned char *haddr)
 {
-	memcpy(haddr, skb->mac.raw + 10, ETH_ALEN);
+	memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN);
 	return ETH_ALEN;
 }
 
diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c
index 0e29ff76287..c2616e7b005 100644
--- a/drivers/net/wireless/hostap/hostap_main.c
+++ b/drivers/net/wireless/hostap/hostap_main.c
@@ -590,20 +590,20 @@ void hostap_dump_tx_header(const char *name, const struct hfa384x_tx_frame *tx)
 
 int hostap_80211_header_parse(struct sk_buff *skb, unsigned char *haddr)
 {
-	memcpy(haddr, skb->mac.raw + 10, ETH_ALEN); /* addr2 */
+	memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN); /* addr2 */
 	return ETH_ALEN;
 }
 
 
 int hostap_80211_prism_header_parse(struct sk_buff *skb, unsigned char *haddr)
 {
-	if (*(u32 *)skb->mac.raw == LWNG_CAP_DID_BASE) {
-		memcpy(haddr, skb->mac.raw +
-		       sizeof(struct linux_wlan_ng_prism_hdr) + 10,
+	const unsigned char *mac = skb_mac_header(skb);
+
+	if (*(u32 *)mac == LWNG_CAP_DID_BASE) {
+		memcpy(haddr, mac + sizeof(struct linux_wlan_ng_prism_hdr) + 10,
 		       ETH_ALEN); /* addr2 */
-	} else { /* (*(u32 *)skb->mac.raw == htonl(LWNG_CAPHDR_VERSION)) */
-		memcpy(haddr, skb->mac.raw +
-		       sizeof(struct linux_wlan_ng_cap_hdr) + 10,
+	} else { /* (*(u32 *)mac == htonl(LWNG_CAPHDR_VERSION)) */
+		memcpy(haddr, mac + sizeof(struct linux_wlan_ng_cap_hdr) + 10,
 		       ETH_ALEN); /* addr2 */
 	}
 	return ETH_ALEN;
diff --git a/drivers/net/wireless/orinoco.c b/drivers/net/wireless/orinoco.c
index f1415bff527..062286dc8e1 100644
--- a/drivers/net/wireless/orinoco.c
+++ b/drivers/net/wireless/orinoco.c
@@ -689,7 +689,7 @@ static void orinoco_stat_gather(struct net_device *dev,
 	/* Note : gcc will optimise the whole section away if
 	 * WIRELESS_SPY is not defined... - Jean II */
 	if (SPY_NUMBER(priv)) {
-		orinoco_spy_gather(dev, skb->mac.raw + ETH_ALEN,
+		orinoco_spy_gather(dev, skb_mac_header(skb) + ETH_ALEN,
 				   desc->signal, desc->silence);
 	}
 }
diff --git a/drivers/net/wireless/wavelan.c b/drivers/net/wireless/wavelan.c
index 69cb1471096..2bf77b1ee53 100644
--- a/drivers/net/wireless/wavelan.c
+++ b/drivers/net/wireless/wavelan.c
@@ -2517,7 +2517,8 @@ wv_packet_read(struct net_device * dev, u16 buf_off, int sksize)
 	skb->protocol = eth_type_trans(skb, dev);
 
 #ifdef DEBUG_RX_INFO
-	wv_packet_info(skb->mac.raw, sksize, dev->name, "wv_packet_read");
+	wv_packet_info(skb_mac_header(skb), sksize, dev->name,
+		       "wv_packet_read");
 #endif				/* DEBUG_RX_INFO */
 
 	/* Statistics-gathering and associated stuff.
@@ -2553,7 +2554,7 @@ wv_packet_read(struct net_device * dev, u16 buf_off, int sksize)
 
 		/* Spying stuff */
 #ifdef IW_WIRELESS_SPY
-		wl_spy_gather(dev, skb->mac.raw + WAVELAN_ADDR_SIZE,
+		wl_spy_gather(dev, skb_mac_header(skb) + WAVELAN_ADDR_SIZE,
 			      stats);
 #endif /* IW_WIRELESS_SPY */
 #ifdef HISTOGRAM
diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c
index 9351ee77331..67b867f837c 100644
--- a/drivers/net/wireless/wavelan_cs.c
+++ b/drivers/net/wireless/wavelan_cs.c
@@ -2889,7 +2889,7 @@ wv_packet_read(struct net_device *		dev,
   skb->protocol = eth_type_trans(skb, dev);
 
 #ifdef DEBUG_RX_INFO
-  wv_packet_info(skb->mac.raw, sksize, dev->name, "wv_packet_read");
+  wv_packet_info(skb_mac_header(skb), sksize, dev->name, "wv_packet_read");
 #endif	/* DEBUG_RX_INFO */
      
   /* Statistics gathering & stuff associated.
@@ -2923,7 +2923,7 @@ wv_packet_read(struct net_device *		dev,
 #endif	/* WAVELAN_ROAMING */
 	  
 #ifdef WIRELESS_SPY
-      wl_spy_gather(dev, skb->mac.raw + WAVELAN_ADDR_SIZE, stats);
+      wl_spy_gather(dev, skb_mac_header(skb) + WAVELAN_ADDR_SIZE, stats);
 #endif	/* WIRELESS_SPY */
 #ifdef HISTOGRAM
       wl_his_gather(dev, stats);
diff --git a/drivers/s390/net/claw.c b/drivers/s390/net/claw.c
index 7809a79feec..6dd64d0c8d4 100644
--- a/drivers/s390/net/claw.c
+++ b/drivers/s390/net/claw.c
@@ -3525,8 +3525,8 @@ unpack_next:
                                 memcpy(skb_put(skb,len_of_data),
 					privptr->p_mtc_envelope,
 					len_of_data);
-                                skb->mac.raw=skb->data;
                                 skb->dev=dev;
+				skb_reset_mac_header(skb);
                                 skb->protocol=htons(ETH_P_IP);
                                 skb->ip_summed=CHECKSUM_UNNECESSARY;
                                 privptr->stats.rx_packets++;
-- 
cgit v1.2.3


From 797659fb4a4a511649cd71028141c32ad1698a12 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 15:56:08 -0300
Subject: [PPPOE]: Introduce pppoe_hdr()

For consistency with all the other skb->nh.raw accessors.

Also do some really obvious simplifications in pppoe_recvmsg, well the
kfree_skb one is not so obvious, but free() and kfree() have the same behaviour
(hint :-) ).

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pppoe.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index ebfa2967cd6..3080a44b23a 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -347,7 +347,7 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb)
 	struct pppox_sock *relay_po = NULL;
 
 	if (sk->sk_state & PPPOX_BOUND) {
-		struct pppoe_hdr *ph = (struct pppoe_hdr *) skb->nh.raw;
+		struct pppoe_hdr *ph = pppoe_hdr(skb);
 		int len = ntohs(ph->length);
 		skb_pull_rcsum(skb, sizeof(struct pppoe_hdr));
 		if (pskb_trim_rcsum(skb, len))
@@ -401,7 +401,7 @@ static int pppoe_rcv(struct sk_buff *skb,
 	if (!(skb = skb_share_check(skb, GFP_ATOMIC)))
 		goto out;
 
-	ph = (struct pppoe_hdr *) skb->nh.raw;
+	ph = pppoe_hdr(skb);
 
 	po = get_item((unsigned long) ph->sid, eth_hdr(skb)->h_source, dev->ifindex);
 	if (po != NULL)
@@ -433,7 +433,7 @@ static int pppoe_disc_rcv(struct sk_buff *skb,
 	if (!(skb = skb_share_check(skb, GFP_ATOMIC)))
 		goto out;
 
-	ph = (struct pppoe_hdr *) skb->nh.raw;
+	ph = pppoe_hdr(skb);
 	if (ph->code != PADT_CODE)
 		goto abort;
 
@@ -931,8 +931,6 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock,
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb = NULL;
 	int error = 0;
-	int len;
-	struct pppoe_hdr *ph = NULL;
 
 	if (sk->sk_state & PPPOX_BOUND) {
 		error = -EIO;
@@ -949,19 +947,15 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock,
 	m->msg_namelen = 0;
 
 	if (skb) {
-		error = 0;
-		ph = (struct pppoe_hdr *) skb->nh.raw;
-		len = ntohs(ph->length);
+		struct pppoe_hdr *ph = pppoe_hdr(skb);
+		const int len = ntohs(ph->length);
 
 		error = memcpy_toiovec(m->msg_iov, (unsigned char *) &ph->tag[0], len);
-		if (error < 0)
-			goto do_skb_free;
-		error = len;
+		if (error == 0)
+			error = len;
 	}
 
-do_skb_free:
-	if (skb)
-		kfree_skb(skb);
+	kfree_skb(skb);
 end:
 	return error;
 }
-- 
cgit v1.2.3


From a16aeb36239ce612699ed64a75a03c88cbc657e8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 16:07:19 -0300
Subject: [BONDING]: Introduce arp_pkt()

For consistency with all the other skb->nh.raw accessors.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_alb.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 916162ca0c9..36b8e860107 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -104,6 +104,11 @@ struct arp_pkt {
 };
 #pragma pack()
 
+static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
+{
+	return (struct arp_pkt *)skb->nh.raw;
+}
+
 /* Forward declaration */
 static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
 
@@ -613,7 +618,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, u32 src_ip)
 static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond)
 {
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
-	struct arp_pkt *arp = (struct arp_pkt *)skb->nh.raw;
+	struct arp_pkt *arp = arp_pkt(skb);
 	struct slave *assigned_slave;
 	struct rlb_client_info *client_info;
 	u32 hash_index = 0;
@@ -701,7 +706,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
  */
 static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
 {
-	struct arp_pkt *arp = (struct arp_pkt *)skb->nh.raw;
+	struct arp_pkt *arp = arp_pkt(skb);
 	struct slave *tx_slave = NULL;
 
 	if (arp->op_code == __constant_htons(ARPOP_REPLY)) {
-- 
cgit v1.2.3


From c1d2bbe1cd6c7bbdc6d532cefebb66c7efb789ce Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 10 Apr 2007 20:45:18 -0700
Subject: [SK_BUFF]: Introduce skb_reset_network_header(skb)

For the common, open coded 'skb->nh.raw = skb->data' operation, so that we can
later turn skb->nh.raw into a offset, reducing the size of struct sk_buff in
64bit land while possibly keeping it as a pointer on 32bit.

This one touches just the most simple case, next will handle the slightly more
"complex" cases.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/block/aoe/aoecmd.c                    | 2 +-
 drivers/net/cxgb3/sge.c                       | 3 ++-
 drivers/net/hamradio/bpqether.c               | 2 +-
 drivers/net/loopback.c                        | 2 +-
 drivers/net/pppoe.c                           | 4 ++--
 drivers/net/wan/hdlc_cisco.c                  | 2 +-
 drivers/net/wan/hdlc_fr.c                     | 2 +-
 drivers/net/wan/lmc/lmc_main.c                | 4 ++--
 drivers/net/wireless/hostap/hostap_80211_rx.c | 4 ++--
 drivers/net/wireless/hostap/hostap_ap.c       | 2 +-
 drivers/net/wireless/hostap/hostap_main.c     | 2 +-
 11 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 74062dc4e90..1a6aeac5a1c 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -28,7 +28,7 @@ new_skb(ulong len)
 	skb = alloc_skb(len, GFP_ATOMIC);
 	if (skb) {
 		skb_reset_mac_header(skb);
-		skb->nh.raw = skb->data;
+		skb_reset_network_header(skb);
 		skb->protocol = __constant_htons(ETH_P_AOE);
 		skb->priority = 0;
 		skb->next = skb->prev = NULL;
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index b5cf2a60834..4dd712088bc 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -1621,7 +1621,8 @@ static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
 {
 	rq->offload_pkts++;
 	skb_reset_mac_header(skb);
-	skb->nh.raw = skb->h.raw = skb->data;
+	skb_reset_network_header(skb);
+	skb->h.raw = skb->data;
 
 	if (rq->polling) {
 		rx_gather[gather_idx++] = skb;
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index d2542697e29..656f2789c9b 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -282,7 +282,7 @@ static int bpq_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	skb->protocol = ax25_type_trans(skb, dev);
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 	dev->hard_header(skb, dev, ETH_P_BPQ, bpq->dest_addr, NULL, 0);
 	bpq->stats.tx_packets++;
 	bpq->stats.tx_bytes+=skb->len;
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index a71d8e0a9b5..af476d2a513 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -91,7 +91,7 @@ static void emulate_large_send_offload(struct sk_buff *skb)
 			break;
 		skb_reserve(nskb, 32);
 		skb_set_mac_header(nskb, -ETH_HLEN);
-		nskb->nh.raw = nskb->data;
+		skb_reset_network_header(nskb);
 		iph = nskb->nh.iph;
 		memcpy(nskb->data, skb->nh.raw, doffset);
 		if (skb_copy_bits(skb,
diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index 3080a44b23a..e94790632d5 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -799,7 +799,7 @@ static int pppoe_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 	/* Reserve space for headers. */
 	skb_reserve(skb, dev->hard_header_len);
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	skb->dev = dev;
 
@@ -884,7 +884,7 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
 	memcpy(ph, &hdr, sizeof(struct pppoe_hdr));
 	skb2->protocol = __constant_htons(ETH_P_PPP_SES);
 
-	skb2->nh.raw = skb2->data;
+	skb_reset_network_header(skb2);
 
 	skb2->dev = dev;
 
diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c
index c9664fd8a91..00e0aaadabc 100644
--- a/drivers/net/wan/hdlc_cisco.c
+++ b/drivers/net/wan/hdlc_cisco.c
@@ -124,7 +124,7 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type,
 	skb_put(skb, sizeof(struct cisco_packet));
 	skb->priority = TC_PRIO_CONTROL;
 	skb->dev = dev;
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	dev_queue_xmit(skb);
 }
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index 3240d10fc86..b747228c719 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -590,7 +590,7 @@ static void fr_lmi_send(struct net_device *dev, int fullrep)
 	skb_put(skb, i);
 	skb->priority = TC_PRIO_CONTROL;
 	skb->dev = dev;
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	dev_queue_xmit(skb);
 }
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index 6d288839dda..d4851465c83 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -1668,7 +1668,7 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
             skb->protocol = lmc_proto_type(sc, skb);
             skb->protocol = htons(ETH_P_WAN_PPP);
             skb_reset_mac_header(skb);
-//            skb->nh.raw = skb->data;
+            /* skb_reset_network_header(skb); */
             skb->dev = dev;
             lmc_proto_netif(sc, skb);
 
@@ -1706,7 +1706,7 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
             
             nsb->protocol = lmc_proto_type(sc, skb);
             skb_reset_mac_header(nsb);
-//            nsb->nh.raw = nsb->data;
+            /* skb_reset_network_header(nsb); */
             nsb->dev = dev;
             lmc_proto_netif(sc, nsb);
         }
diff --git a/drivers/net/wireless/hostap/hostap_80211_rx.c b/drivers/net/wireless/hostap/hostap_80211_rx.c
index e4082f9d766..7b7c1ca8f1f 100644
--- a/drivers/net/wireless/hostap/hostap_80211_rx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_rx.c
@@ -1076,8 +1076,8 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
 		skb2->dev = dev;
 		skb2->protocol = __constant_htons(ETH_P_802_3);
 		skb_reset_mac_header(skb2);
-		skb2->nh.raw = skb2->data;
-		/* skb2->nh.raw = skb2->data + ETH_HLEN; */
+		skb_reset_network_header(skb2);
+		/* skb2->nh.raw += ETH_HLEN; */
 		dev_queue_xmit(skb2);
 	}
 
diff --git a/drivers/net/wireless/hostap/hostap_ap.c b/drivers/net/wireless/hostap/hostap_ap.c
index cc18f9686d2..797d950d5d6 100644
--- a/drivers/net/wireless/hostap/hostap_ap.c
+++ b/drivers/net/wireless/hostap/hostap_ap.c
@@ -983,7 +983,7 @@ static void prism2_send_mgmt(struct net_device *dev,
 
 	skb->dev = dev;
 	skb_reset_mac_header(skb);
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 	dev_queue_xmit(skb);
 }
 #endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */
diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c
index c2616e7b005..1f9edd91565 100644
--- a/drivers/net/wireless/hostap/hostap_main.c
+++ b/drivers/net/wireless/hostap/hostap_main.c
@@ -1064,7 +1064,7 @@ int prism2_sta_send_mgmt(local_info_t *local, u8 *dst, u16 stype,
 
 	skb->dev = dev;
 	skb_reset_mac_header(skb);
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 	dev_queue_xmit(skb);
 
 	return 0;
-- 
cgit v1.2.3


From e7dd65dafda5737a983c04d652a69ab8da78ee3f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 20:09:45 -0300
Subject: [SK_BUFF] bonding: Set skb->nh.raw relative to skb->mac.raw

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_3ad.c | 4 ++--
 drivers/net/bonding/bond_alb.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index e3c9e2e56d1..05c870d6f6c 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -885,7 +885,7 @@ static int ad_lacpdu_send(struct port *port)
 
 	skb->dev = slave->dev;
 	skb_reset_mac_header(skb);
-	skb->nh.raw = skb->data + ETH_HLEN;
+	skb->nh.raw = skb->mac.raw + ETH_HLEN;
 	skb->protocol = PKT_TYPE_LACPDU;
 	skb->priority = TC_PRIO_CONTROL;
 
@@ -929,7 +929,7 @@ static int ad_marker_send(struct port *port, struct marker *marker)
 
 	skb->dev = slave->dev;
 	skb_reset_mac_header(skb);
-	skb->nh.raw = skb->data + ETH_HLEN;
+	skb->nh.raw = skb->mac.raw + ETH_HLEN;
 	skb->protocol = PKT_TYPE_LACPDU;
 
 	marker_header = (struct marker_header *)skb_put(skb, length);
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 36b8e860107..5c2a12c2b99 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -896,7 +896,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
 		memcpy(data, &pkt, size);
 
 		skb_reset_mac_header(skb);
-		skb->nh.raw = data + ETH_HLEN;
+		skb->nh.raw = skb->mac.raw + ETH_HLEN;
 		skb->protocol = pkt.type;
 		skb->priority = TC_PRIO_CONTROL;
 		skb->dev = slave->dev;
-- 
cgit v1.2.3


From bbe735e4247dba32568a305553b010081c8dea99 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 22:16:10 -0300
Subject: [SK_BUFF]: Introduce skb_network_offset()

For the quite common 'skb->nh.raw - skb->data' sequence.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/i4l/isdn_net.c        | 2 +-
 drivers/net/atl1/atl1_main.c       | 2 +-
 drivers/net/chelsio/sge.c          | 2 +-
 drivers/net/cxgb3/sge.c            | 2 +-
 drivers/net/e1000/e1000_main.c     | 2 +-
 drivers/net/gianfar.c              | 2 +-
 drivers/net/ixgb/ixgb_main.c       | 2 +-
 drivers/net/netxen/netxen_nic_hw.c | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'drivers')

diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index fadb9291bc1..0c2b3752e46 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -1121,7 +1121,7 @@ isdn_net_adjust_hdr(struct sk_buff *skb, struct net_device *dev)
 	if (!skb)
 		return;
 	if (lp->p_encap == ISDN_NET_ENCAP_ETHER) {
-		int pullsize = (ulong)skb->nh.raw - (ulong)skb->data - ETH_HLEN;
+		const int pullsize = skb_network_offset(skb) - ETH_HLEN;
 		if (pullsize > 0) {
 			printk(KERN_DEBUG "isdn_net: Pull junk %d\n", pullsize);
 			skb_pull(skb, pullsize);
diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c
index e3f181602e4..793a61b2140 100644
--- a/drivers/net/atl1/atl1_main.c
+++ b/drivers/net/atl1/atl1_main.c
@@ -1300,7 +1300,7 @@ static int atl1_tso(struct atl1_adapter *adapter, struct sk_buff *skb,
 			    ~csum_tcpudp_magic(skb->nh.iph->saddr,
 					       skb->nh.iph->daddr, 0,
 					       IPPROTO_TCP, 0);
-			ipofst = skb->nh.raw - skb->data;
+			ipofst = skb_network_offset(skb);
 			if (ipofst != ENET_HEADER_SIZE) /* 802.3 frame */
 				tso->tsopl |= 1 << TSO_PARAM_ETHTYPE_SHIFT;
 
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 47fa8dcf752..8cdee67d582 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1865,7 +1865,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		++st->tx_tso;
 
-		eth_type = skb->nh.raw - skb->data == ETH_HLEN ?
+		eth_type = skb_network_offset(skb) == ETH_HLEN ?
 			CPL_ETH_II : CPL_ETH_II_VLAN;
 
 		hdr = (struct cpl_tx_pkt_lso *)skb_push(skb, sizeof(*hdr));
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 4dd712088bc..7e9e9db4fb9 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -897,7 +897,7 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
 		d->flit[2] = 0;
 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
 		hdr->cntrl = htonl(cntrl);
-		eth_type = skb->nh.raw - skb->data == ETH_HLEN ?
+		eth_type = skb_network_offset(skb) == ETH_HLEN ?
 		    CPL_ETH_II : CPL_ETH_II_VLAN;
 		tso_info |= V_LSO_ETH_TYPE(eth_type) |
 		    V_LSO_IPHDR_WORDS(skb->nh.iph->ihl) |
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index b28a915bd98..86161011b53 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2910,7 +2910,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 						 0);
 			ipcse = 0;
 		}
-		ipcss = skb->nh.raw - skb->data;
+		ipcss = skb_network_offset(skb);
 		ipcso = (void *)&(skb->nh.iph->check) - (void *)skb->data;
 		tucss = skb->h.raw - skb->data;
 		tucso = (void *)&(skb->h.th->check) - (void *)skb->data;
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 1d019195a39..c7a70933c75 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -952,7 +952,7 @@ static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb)
 	 * frame (skb->data) and the start of the IP hdr.
 	 * l4os is the distance between the start of the
 	 * l3 hdr and the l4 hdr */
-	fcb->l3os = (u16)(skb->nh.raw - skb->data - GMAC_FCB_LEN);
+	fcb->l3os = (u16)(skb_network_offset(skb) - GMAC_FCB_LEN);
 	fcb->l4os = (u16)(skb->h.raw - skb->nh.raw);
 
 	fcb->flags = flags;
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index afc2ec72529..cfb791bb45e 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1195,7 +1195,7 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb)
 		skb->h.th->check = ~csum_tcpudp_magic(skb->nh.iph->saddr,
 						      skb->nh.iph->daddr,
 						      0, IPPROTO_TCP, 0);
-		ipcss = skb->nh.raw - skb->data;
+		ipcss = skb_network_offset(skb);
 		ipcso = (void *)&(skb->nh.iph->check) - (void *)skb->data;
 		ipcse = skb->h.raw - skb->data - 1;
 		tucss = skb->h.raw - skb->data;
diff --git a/drivers/net/netxen/netxen_nic_hw.c b/drivers/net/netxen/netxen_nic_hw.c
index 6537574a9cd..625e11ed6aa 100644
--- a/drivers/net/netxen/netxen_nic_hw.c
+++ b/drivers/net/netxen/netxen_nic_hw.c
@@ -386,7 +386,7 @@ void netxen_tso_check(struct netxen_adapter *adapter,
 	}
 	adapter->stats.xmitcsummed++;
 	desc->tcp_hdr_offset = skb->h.raw - skb->data;
-	desc->ip_hdr_offset = skb->nh.raw - skb->data;
+	desc->ip_hdr_offset = skb_network_offset(skb);
 }
 
 int netxen_is_flash_supported(struct netxen_adapter *adapter)
-- 
cgit v1.2.3


From d56f90a7c96da5187f0cdf07ee7434fe6aa78bbc Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 10 Apr 2007 20:50:43 -0700
Subject: [SK_BUFF]: Introduce skb_network_header()

For the places where we need a pointer to the network header, it is still legal
to touch skb->nh.raw directly if just adding to, subtracting from or setting it
to another layer header.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/i4l/isdn_net.c    | 5 +++--
 drivers/net/bonding/bond_alb.c | 2 +-
 drivers/net/loopback.c         | 7 ++++---
 drivers/net/pasemi_mac.c       | 6 ++++--
 drivers/s390/net/qeth_main.c   | 6 ++++--
 5 files changed, 16 insertions(+), 10 deletions(-)

(limited to 'drivers')

diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index 0c2b3752e46..cd3b1fa4a41 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -872,7 +872,8 @@ typedef struct {
 static void
 isdn_net_log_skb(struct sk_buff * skb, isdn_net_local * lp)
 {
-	u_char *p = skb->nh.raw; /* hopefully, this was set correctly */
+	/* hopefully, this was set correctly */
+	const u_char *p = skb_network_header(skb);
 	unsigned short proto = ntohs(skb->protocol);
 	int data_ofs;
 	ip_ports *ipp;
@@ -880,7 +881,7 @@ isdn_net_log_skb(struct sk_buff * skb, isdn_net_local * lp)
 
 	addinfo[0] = '\0';
 	/* This check stolen from 2.1.72 dev_queue_xmit_nit() */
-	if (skb->nh.raw < skb->data || skb->nh.raw >= skb->tail) {
+	if (p < skb->data || p >= skb->tail) {
 		/* fall back to old isdn_net_log_packet method() */
 		char * buf = skb->data;
 
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 5c2a12c2b99..86cfcb3f813 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -106,7 +106,7 @@ struct arp_pkt {
 
 static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
 {
-	return (struct arp_pkt *)skb->nh.raw;
+	return (struct arp_pkt *)skb_network_header(skb);
 }
 
 /* Forward declaration */
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index af476d2a513..9265c27b13b 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -76,7 +76,8 @@ static DEFINE_PER_CPU(struct pcpu_lstats, pcpu_lstats);
 static void emulate_large_send_offload(struct sk_buff *skb)
 {
 	struct iphdr *iph = skb->nh.iph;
-	struct tcphdr *th = (struct tcphdr*)(skb->nh.raw + (iph->ihl * 4));
+	struct tcphdr *th = (struct tcphdr *)(skb_network_header(skb) +
+					      (iph->ihl * 4));
 	unsigned int doffset = (iph->ihl + th->doff) * 4;
 	unsigned int mtu = skb_shinfo(skb)->gso_size + doffset;
 	unsigned int offset = 0;
@@ -93,7 +94,7 @@ static void emulate_large_send_offload(struct sk_buff *skb)
 		skb_set_mac_header(nskb, -ETH_HLEN);
 		skb_reset_network_header(nskb);
 		iph = nskb->nh.iph;
-		memcpy(nskb->data, skb->nh.raw, doffset);
+		memcpy(nskb->data, skb_network_header(skb), doffset);
 		if (skb_copy_bits(skb,
 				  doffset + offset,
 				  nskb->data + doffset,
@@ -108,7 +109,7 @@ static void emulate_large_send_offload(struct sk_buff *skb)
 		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
 		nskb->pkt_type = skb->pkt_type;
 
-		th = (struct tcphdr*)(nskb->nh.raw + iph->ihl*4);
+		th = (struct tcphdr *)(skb_network_header(nskb) + iph->ihl * 4);
 		iph->tot_len = htons(frag_size + doffset);
 		iph->id = htons(id);
 		iph->check = 0;
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index 3f4213f3d5d..82218720bc3 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -729,16 +729,18 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
 	dflags = XCT_MACTX_O | XCT_MACTX_ST | XCT_MACTX_SS | XCT_MACTX_CRC_PAD;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		const unsigned char *nh = skb_network_header(skb);
+
 		switch (skb->nh.iph->protocol) {
 		case IPPROTO_TCP:
 			dflags |= XCT_MACTX_CSUM_TCP;
 			dflags |= XCT_MACTX_IPH((skb->h.raw - skb->nh.raw) >> 2);
-			dflags |= XCT_MACTX_IPO(skb->nh.raw - skb->data);
+			dflags |= XCT_MACTX_IPO(nh - skb->data);
 			break;
 		case IPPROTO_UDP:
 			dflags |= XCT_MACTX_CSUM_UDP;
 			dflags |= XCT_MACTX_IPH((skb->h.raw - skb->nh.raw) >> 2);
-			dflags |= XCT_MACTX_IPO(skb->nh.raw - skb->data);
+			dflags |= XCT_MACTX_IPO(nh - skb->data);
 			break;
 		}
 	}
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index c0ee6d94ea3..0ff29e0628b 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -3778,9 +3778,11 @@ qeth_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
 	}
 	/* try something else */
 	if (skb->protocol == ETH_P_IPV6)
-		return (skb->nh.raw[24] == 0xff) ? RTN_MULTICAST : 0;
+		return (skb_network_header(skb)[24] == 0xff) ?
+				RTN_MULTICAST : 0;
 	else if (skb->protocol == ETH_P_IP)
-		return ((skb->nh.raw[16] & 0xf0) == 0xe0) ? RTN_MULTICAST : 0;
+		return ((skb_network_header(skb)[16] & 0xf0) == 0xe0) ?
+				RTN_MULTICAST : 0;
 	/* ... */
 	if (!memcmp(skb->data, skb->dev->broadcast, 6))
 		return RTN_BROADCAST;
-- 
cgit v1.2.3


From c9bdd4b5257406b0608385d19c40b5511decf4f6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 12 Mar 2007 20:09:15 -0300
Subject: [IP]: Introduce ip_hdrlen()

For the common sequence "skb->nh.iph->ihl * 4", removing a good number of open
coded skb->nh.iph uses, now to go after the rest...

Just out of curiosity, here are the idioms found to get the same result:

skb->nh.iph->ihl << 2
skb->nh.iph->ihl<<2
skb->nh.iph->ihl * 4
skb->nh.iph->ihl*4
(skb->nh.iph)->ihl * sizeof(u32)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bnx2.c                   |  2 +-
 drivers/net/ehea/ehea_main.c         |  4 ++--
 drivers/net/netxen/netxen_nic_hw.c   |  8 +++++---
 drivers/net/netxen/netxen_nic_main.c |  6 +++---
 drivers/net/sky2.c                   |  3 ++-
 drivers/net/tg3.c                    |  6 +++---
 drivers/s390/net/qeth_eddp.c         | 13 ++++++++-----
 7 files changed, 24 insertions(+), 18 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index e85f5ec48f9..b8091c55d44 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -4527,7 +4527,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		if (skb->h.th->doff > 5) {
 			tcp_opt_len = (skb->h.th->doff - 5) << 2;
 		}
-		ip_tcp_len = (skb->nh.iph->ihl << 2) + sizeof(struct tcphdr);
+		ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
 
 		skb->nh.iph->check = 0;
 		skb->nh.iph->tot_len = htons(mss + ip_tcp_len + tcp_opt_len);
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 0e4042bc0a4..b1c90a4fe31 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -1263,7 +1263,7 @@ static inline void write_ip_start_end(struct ehea_swqe *swqe,
 				      const struct sk_buff *skb)
 {
 	swqe->ip_start = (u8)(((u64)skb->nh.iph) - ((u64)skb->data));
-	swqe->ip_end = (u8)(swqe->ip_start + skb->nh.iph->ihl * 4 - 1);
+	swqe->ip_end = (u8)(swqe->ip_start + ip_hdrlen(skb) - 1);
 }
 
 static inline void write_tcp_offset_end(struct ehea_swqe *swqe,
@@ -1300,7 +1300,7 @@ static void write_swqe2_TSO(struct sk_buff *skb,
 	/* copy only eth/ip/tcp headers to immediate data and
 	 * the rest of skb->data to sg1entry
 	 */
-	headersize = ETH_HLEN + (skb->nh.iph->ihl * 4) + (skb->h.th->doff * 4);
+	headersize = ETH_HLEN + ip_hdrlen(skb) + (skb->h.th->doff * 4);
 
 	skb_data_size = skb->len - skb->data_len;
 
diff --git a/drivers/net/netxen/netxen_nic_hw.c b/drivers/net/netxen/netxen_nic_hw.c
index 625e11ed6aa..b2f5032937e 100644
--- a/drivers/net/netxen/netxen_nic_hw.c
+++ b/drivers/net/netxen/netxen_nic_hw.c
@@ -35,6 +35,8 @@
 #include "netxen_nic_hw.h"
 #include "netxen_nic_phan_reg.h"
 
+#include <net/ip.h>
+
 /*  PCI Windowing for DDR regions.  */
 
 #define ADDR_IN_RANGE(addr, low, high)	\
@@ -371,9 +373,9 @@ void netxen_tso_check(struct netxen_adapter *adapter,
 		      struct cmd_desc_type0 *desc, struct sk_buff *skb)
 {
 	if (desc->mss) {
-		desc->total_hdr_length = sizeof(struct ethhdr) +
-		    ((skb->nh.iph)->ihl * sizeof(u32)) +
-		    ((skb->h.th)->doff * sizeof(u32));
+		desc->total_hdr_length = (sizeof(struct ethhdr) +
+					  ip_hdrlen(skb) +
+					  skb->h.th->doff * 4);
 		netxen_set_cmd_desc_opcode(desc, TX_TCP_LSO);
 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		if (skb->nh.iph->protocol == IPPROTO_TCP) {
diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c
index 7d2525e76ab..b548a30e5c8 100644
--- a/drivers/net/netxen/netxen_nic_main.c
+++ b/drivers/net/netxen/netxen_nic_main.c
@@ -41,6 +41,7 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/vmalloc.h>
+#include <net/ip.h>
 
 MODULE_DESCRIPTION("NetXen Multi port (1/10) Gigabit Network Driver");
 MODULE_LICENSE("GPL");
@@ -778,9 +779,8 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 		if (skb_shinfo(skb)->gso_size > 0) {
 
 			no_of_desc++;
-			if (((skb->nh.iph)->ihl * sizeof(u32)) +
-			    ((skb->h.th)->doff * sizeof(u32)) +
-			    sizeof(struct ethhdr) >
+			if ((ip_hdrlen(skb) + skb->h.th->doff * 4 +
+			     sizeof(struct ethhdr)) >
 			    (sizeof(struct cmd_desc_type0) - 2)) {
 				no_of_desc++;
 			}
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index ac36152c68b..51e994f26a8 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -32,6 +32,7 @@
 #include <linux/ethtool.h>
 #include <linux/pci.h>
 #include <linux/ip.h>
+#include <net/ip.h>
 #include <linux/tcp.h>
 #include <linux/in.h>
 #include <linux/delay.h>
@@ -1392,7 +1393,7 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 	mss = skb_shinfo(skb)->gso_size;
 	if (mss != 0) {
 		mss += ((skb->h.th->doff - 5) * 4);	/* TCP options */
-		mss += (skb->nh.iph->ihl * 4) + sizeof(struct tcphdr);
+		mss += ip_hdrlen(skb) + sizeof(struct tcphdr);
 		mss += ETH_HLEN;
 
 		if (mss != sky2->tx_last_mss) {
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 256969e1300..62a3bba0097 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -40,6 +40,7 @@
 #include <linux/dma-mapping.h>
 
 #include <net/checksum.h>
+#include <net/ip.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
@@ -3909,8 +3910,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			mss |= (skb_headlen(skb) - ETH_HLEN) << 9;
 		else {
 			tcp_opt_len = ((skb->h.th->doff - 5) * 4);
-			ip_tcp_len = (skb->nh.iph->ihl * 4) +
-				     sizeof(struct tcphdr);
+			ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
 
 			skb->nh.iph->check = 0;
 			skb->nh.iph->tot_len = htons(mss + ip_tcp_len +
@@ -4064,7 +4064,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
 		}
 
 		tcp_opt_len = ((skb->h.th->doff - 5) * 4);
-		ip_tcp_len = (skb->nh.iph->ihl * 4) + sizeof(struct tcphdr);
+		ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
 
 		hdr_len = ip_tcp_len + tcp_opt_len;
 		if (unlikely((ETH_HLEN + hdr_len) > 80) &&
diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c
index 893125403c6..1574247abaa 100644
--- a/drivers/s390/net/qeth_eddp.c
+++ b/drivers/s390/net/qeth_eddp.c
@@ -473,9 +473,11 @@ qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
 	QETH_DBF_TEXT(trace, 5, "eddpficx");
 	/* create our segmentation headers and copy original headers */
 	if (skb->protocol == htons(ETH_P_IP))
-		eddp = qeth_eddp_create_eddp_data(qhdr, (u8 *)skb->nh.iph,
-				skb->nh.iph->ihl*4,
-				(u8 *)skb->h.th, skb->h.th->doff*4);
+		eddp = qeth_eddp_create_eddp_data(qhdr,
+						  skb_network_header(skb),
+						  ip_hdrlen(skb),
+						  skb->h.raw,
+						  skb->h.th->doff * 4);
 	else
 		eddp = qeth_eddp_create_eddp_data(qhdr, (u8 *)skb->nh.ipv6h,
 				sizeof(struct ipv6hdr),
@@ -590,8 +592,9 @@ qeth_eddp_create_context_tcp(struct qeth_card *card, struct sk_buff *skb,
 	QETH_DBF_TEXT(trace, 5, "creddpct");
 	if (skb->protocol == htons(ETH_P_IP))
 		ctx = qeth_eddp_create_context_generic(card, skb,
-			sizeof(struct qeth_hdr) + skb->nh.iph->ihl*4 +
-			skb->h.th->doff*4);
+						       (sizeof(struct qeth_hdr) +
+						        ip_hdrlen(skb) +
+							skb->h.th->doff * 4));
 	else if (skb->protocol == htons(ETH_P_IPV6))
 		ctx = qeth_eddp_create_context_generic(card, skb,
 			sizeof(struct qeth_hdr) + sizeof(struct ipv6hdr) +
-- 
cgit v1.2.3


From eddc9ec53be2ecdbf4efe0efd4a83052594f0ac0 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 20 Apr 2007 22:47:35 -0700
Subject: [SK_BUFF]: Introduce ip_hdr(), remove skb->nh.iph

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ieee1394/eth1394.c         |  2 +-
 drivers/net/8139cp.c               |  4 ++--
 drivers/net/atl1/atl1_main.c       | 15 ++++++++-------
 drivers/net/bnx2.c                 | 18 +++++++++---------
 drivers/net/bonding/bond_alb.c     | 17 ++++++++++-------
 drivers/net/bonding/bond_main.c    |  2 +-
 drivers/net/chelsio/sge.c          |  4 ++--
 drivers/net/cxgb3/sge.c            |  2 +-
 drivers/net/e1000/e1000_main.c     | 16 +++++++---------
 drivers/net/ehea/ehea_main.c       | 20 +++++++++++---------
 drivers/net/gianfar.c              |  2 +-
 drivers/net/ioc3-eth.c             |  4 ++--
 drivers/net/ixgb/ixgb_main.c       | 12 +++++++-----
 drivers/net/loopback.c             |  6 +++---
 drivers/net/mv643xx_eth.c          |  4 ++--
 drivers/net/netxen/netxen_nic_hw.c |  4 ++--
 drivers/net/ns83820.c              |  4 ++--
 drivers/net/pasemi_mac.c           |  2 +-
 drivers/net/r8169.c                |  2 +-
 drivers/net/sky2.c                 |  2 +-
 drivers/net/spider_net.c           |  2 +-
 drivers/net/tg3.c                  | 30 +++++++++++++++---------------
 drivers/net/via-velocity.c         |  2 +-
 drivers/s390/net/qeth_main.c       | 15 +++++++++------
 drivers/s390/net/qeth_tso.h        | 12 ++++--------
 25 files changed, 104 insertions(+), 99 deletions(-)

(limited to 'drivers')

diff --git a/drivers/ieee1394/eth1394.c b/drivers/ieee1394/eth1394.c
index db2346f4d20..a364003ba47 100644
--- a/drivers/ieee1394/eth1394.c
+++ b/drivers/ieee1394/eth1394.c
@@ -1668,7 +1668,7 @@ static int ether1394_tx (struct sk_buff *skb, struct net_device *dev)
 	if (memcmp(eth->h_dest, dev->broadcast, ETH1394_ALEN) == 0 ||
 	    proto == htons(ETH_P_ARP) ||
 	    (proto == htons(ETH_P_IP) &&
-	     IN_MULTICAST(ntohl(skb->nh.iph->daddr)))) {
+	     IN_MULTICAST(ntohl(ip_hdr(skb)->daddr)))) {
 		tx_type = ETH1394_GASP;
 		dest_node = LOCAL_BUS | ALL_NODES;
 		max_payload = priv->bc_maxpayload - ETHER1394_GASP_OVERHEAD;
diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c
index 2f704cb06e7..e8c9f27817b 100644
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -806,7 +806,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev)
 		if (mss)
 			flags |= LargeSend | ((mss & MSSMask) << MSSShift);
 		else if (skb->ip_summed == CHECKSUM_PARTIAL) {
-			const struct iphdr *ip = skb->nh.iph;
+			const struct iphdr *ip = ip_hdr(skb);
 			if (ip->protocol == IPPROTO_TCP)
 				flags |= IPCS | TCPCS;
 			else if (ip->protocol == IPPROTO_UDP)
@@ -825,7 +825,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev)
 		u32 first_len, first_eor;
 		dma_addr_t first_mapping;
 		int frag, first_entry = entry;
-		const struct iphdr *ip = skb->nh.iph;
+		const struct iphdr *ip = ip_hdr(skb);
 
 		/* We must give this initial chunk to the device last.
 		 * Otherwise we could race with the device.
diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c
index 793a61b2140..d2be79a30f8 100644
--- a/drivers/net/atl1/atl1_main.c
+++ b/drivers/net/atl1/atl1_main.c
@@ -1294,17 +1294,18 @@ static int atl1_tso(struct atl1_adapter *adapter, struct sk_buff *skb,
 		}
 
 		if (skb->protocol == ntohs(ETH_P_IP)) {
-			skb->nh.iph->tot_len = 0;
-			skb->nh.iph->check = 0;
-			skb->h.th->check =
-			    ~csum_tcpudp_magic(skb->nh.iph->saddr,
-					       skb->nh.iph->daddr, 0,
-					       IPPROTO_TCP, 0);
+			struct iphdr *iph = ip_hdr(skb);
+
+			iph->tot_len = 0;
+			iph->check = 0;
+			skb->h.th->check = ~csum_tcpudp_magic(iph->saddr,
+							      iph->daddr, 0,
+							      IPPROTO_TCP, 0);
 			ipofst = skb_network_offset(skb);
 			if (ipofst != ENET_HEADER_SIZE) /* 802.3 frame */
 				tso->tsopl |= 1 << TSO_PARAM_ETHTYPE_SHIFT;
 
-			tso->tsopl |= (skb->nh.iph->ihl &
+			tso->tsopl |= (iph->ihl &
 				CSUM_PARAM_IPHL_MASK) << CSUM_PARAM_IPHL_SHIFT;
 			tso->tsopl |= ((skb->h.th->doff << 2) &
 				TSO_PARAM_TCPHDRLEN_MASK) << TSO_PARAM_TCPHDRLEN_SHIFT;
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index b8091c55d44..eb0c4f1d448 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -4513,6 +4513,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if ((mss = skb_shinfo(skb)->gso_size) &&
 		(skb->len > (bp->dev->mtu + ETH_HLEN))) {
 		u32 tcp_opt_len, ip_tcp_len;
+		struct iphdr *iph;
 
 		if (skb_header_cloned(skb) &&
 		    pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
@@ -4529,16 +4530,15 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 		ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
 
-		skb->nh.iph->check = 0;
-		skb->nh.iph->tot_len = htons(mss + ip_tcp_len + tcp_opt_len);
-		skb->h.th->check =
-			~csum_tcpudp_magic(skb->nh.iph->saddr,
-					    skb->nh.iph->daddr,
-					    0, IPPROTO_TCP, 0);
+		iph = ip_hdr(skb);
+		iph->check = 0;
+		iph->tot_len = htons(mss + ip_tcp_len + tcp_opt_len);
+		skb->h.th->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+						      0, IPPROTO_TCP, 0);
 
-		if (tcp_opt_len || (skb->nh.iph->ihl > 5)) {
-			vlan_tag_flags |= ((skb->nh.iph->ihl - 5) +
-				(tcp_opt_len >> 2)) << 8;
+		if (tcp_opt_len || (iph->ihl > 5)) {
+			vlan_tag_flags |= ((iph->ihl - 5) +
+					   (tcp_opt_len >> 2)) << 8;
 		}
 	}
 	else
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 86cfcb3f813..8555afa574a 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -112,7 +112,7 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
 /* Forward declaration */
 static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
 
-static inline u8 _simple_hash(u8 *hash_start, int hash_size)
+static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
 {
 	int i;
 	u8 hash = 0;
@@ -1268,7 +1268,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 	int hash_size = 0;
 	int do_tx_balance = 1;
 	u32 hash_index = 0;
-	u8 *hash_start = NULL;
+	const u8 *hash_start = NULL;
 	int res = 1;
 
 	skb_reset_mac_header(skb);
@@ -1285,15 +1285,18 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 	}
 
 	switch (ntohs(skb->protocol)) {
-	case ETH_P_IP:
+	case ETH_P_IP: {
+		const struct iphdr *iph = ip_hdr(skb);
+
 		if ((memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) ||
-		    (skb->nh.iph->daddr == ip_bcast) ||
-		    (skb->nh.iph->protocol == IPPROTO_IGMP)) {
+		    (iph->daddr == ip_bcast) ||
+		    (iph->protocol == IPPROTO_IGMP)) {
 			do_tx_balance = 0;
 			break;
 		}
-		hash_start = (char*)&(skb->nh.iph->daddr);
-		hash_size = sizeof(skb->nh.iph->daddr);
+		hash_start = (char *)&(iph->daddr);
+		hash_size = sizeof(iph->daddr);
+	}
 		break;
 	case ETH_P_IPV6:
 		if (memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) {
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index e4724d874e7..7f11388893f 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3476,7 +3476,7 @@ static int bond_xmit_hash_policy_l34(struct sk_buff *skb,
 				    struct net_device *bond_dev, int count)
 {
 	struct ethhdr *data = (struct ethhdr *)skb->data;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	u16 *layer4hdr = (u16 *)((u32 *)iph + iph->ihl);
 	int layer4_xor = 0;
 
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 8cdee67d582..c357f45a16c 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1871,7 +1871,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		hdr = (struct cpl_tx_pkt_lso *)skb_push(skb, sizeof(*hdr));
 		hdr->opcode = CPL_TX_PKT_LSO;
 		hdr->ip_csum_dis = hdr->l4_csum_dis = 0;
-		hdr->ip_hdr_words = skb->nh.iph->ihl;
+		hdr->ip_hdr_words = ip_hdr(skb)->ihl;
 		hdr->tcp_hdr_words = skb->h.th->doff;
 		hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type,
 							  skb_shinfo(skb)->gso_size));
@@ -1912,7 +1912,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		if (!(adapter->flags & UDP_CSUM_CAPABLE) &&
 		    skb->ip_summed == CHECKSUM_PARTIAL &&
-		    skb->nh.iph->protocol == IPPROTO_UDP) {
+		    ip_hdr(skb)->protocol == IPPROTO_UDP) {
 			if (unlikely(skb_checksum_help(skb))) {
 				pr_debug("%s: unable to do udp checksum\n", dev->name);
 				dev_kfree_skb_any(skb);
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 7e9e9db4fb9..892e5dcafa0 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -900,7 +900,7 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
 		eth_type = skb_network_offset(skb) == ETH_HLEN ?
 		    CPL_ETH_II : CPL_ETH_II_VLAN;
 		tso_info |= V_LSO_ETH_TYPE(eth_type) |
-		    V_LSO_IPHDR_WORDS(skb->nh.iph->ihl) |
+		    V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
 		    V_LSO_TCPHDR_WORDS(skb->h.th->doff);
 		hdr->lso_info = htonl(tso_info);
 		flits = 3;
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 86161011b53..c324866c978 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2890,14 +2890,12 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 		hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
 		mss = skb_shinfo(skb)->gso_size;
 		if (skb->protocol == htons(ETH_P_IP)) {
-			skb->nh.iph->tot_len = 0;
-			skb->nh.iph->check = 0;
-			skb->h.th->check =
-				~csum_tcpudp_magic(skb->nh.iph->saddr,
-						   skb->nh.iph->daddr,
-						   0,
-						   IPPROTO_TCP,
-						   0);
+			struct iphdr *iph = ip_hdr(skb);
+			iph->tot_len = 0;
+			iph->check = 0;
+			skb->h.th->check = ~csum_tcpudp_magic(iph->saddr,
+							      iph->daddr, 0,
+							      IPPROTO_TCP, 0);
 			cmd_length = E1000_TXD_CMD_IP;
 			ipcse = skb->h.raw - skb->data - 1;
 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
@@ -2911,7 +2909,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 			ipcse = 0;
 		}
 		ipcss = skb_network_offset(skb);
-		ipcso = (void *)&(skb->nh.iph->check) - (void *)skb->data;
+		ipcso = (void *)&(ip_hdr(skb)->check) - (void *)skb->data;
 		tucss = skb->h.raw - skb->data;
 		tucso = (void *)&(skb->h.th->check) - (void *)skb->data;
 		tucse = 0;
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index b1c90a4fe31..0dc701e611e 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -1262,7 +1262,7 @@ static int ehea_clean_portres(struct ehea_port *port, struct ehea_port_res *pr)
 static inline void write_ip_start_end(struct ehea_swqe *swqe,
 				      const struct sk_buff *skb)
 {
-	swqe->ip_start = (u8)(((u64)skb->nh.iph) - ((u64)skb->data));
+	swqe->ip_start = skb_network_offset(skb);
 	swqe->ip_end = (u8)(swqe->ip_start + ip_hdrlen(skb) - 1);
 }
 
@@ -1688,6 +1688,7 @@ static void ehea_xmit2(struct sk_buff *skb, struct net_device *dev,
 		       struct ehea_swqe *swqe, u32 lkey)
 {
 	if (skb->protocol == htons(ETH_P_IP)) {
+		const struct iphdr *iph = ip_hdr(skb);
 		/* IPv4 */
 		swqe->tx_control |= EHEA_SWQE_CRC
 				 | EHEA_SWQE_IP_CHECKSUM
@@ -1697,15 +1698,15 @@ static void ehea_xmit2(struct sk_buff *skb, struct net_device *dev,
 
 		write_ip_start_end(swqe, skb);
 
-		if (skb->nh.iph->protocol == IPPROTO_UDP) {
-			if ((skb->nh.iph->frag_off & IP_MF) ||
-			    (skb->nh.iph->frag_off & IP_OFFSET))
+		if (iph->protocol == IPPROTO_UDP) {
+			if ((iph->frag_off & IP_MF) ||
+			    (iph->frag_off & IP_OFFSET))
 				/* IP fragment, so don't change cs */
 				swqe->tx_control &= ~EHEA_SWQE_TCP_CHECKSUM;
 			else
 				write_udp_offset_end(swqe, skb);
 
-		} else if (skb->nh.iph->protocol == IPPROTO_TCP) {
+		} else if (iph->protocol == IPPROTO_TCP) {
 			write_tcp_offset_end(swqe, skb);
 		}
 
@@ -1731,10 +1732,11 @@ static void ehea_xmit3(struct sk_buff *skb, struct net_device *dev,
 	int i;
 
 	if (skb->protocol == htons(ETH_P_IP)) {
+		const struct iphdr *iph = ip_hdr(skb);
 		/* IPv4 */
 		write_ip_start_end(swqe, skb);
 
-		if (skb->nh.iph->protocol == IPPROTO_TCP) {
+		if (iph->protocol == IPPROTO_TCP) {
 			swqe->tx_control |= EHEA_SWQE_CRC
 					 | EHEA_SWQE_IP_CHECKSUM
 					 | EHEA_SWQE_TCP_CHECKSUM
@@ -1742,9 +1744,9 @@ static void ehea_xmit3(struct sk_buff *skb, struct net_device *dev,
 
 			write_tcp_offset_end(swqe, skb);
 
-		} else if (skb->nh.iph->protocol == IPPROTO_UDP) {
-			if ((skb->nh.iph->frag_off & IP_MF) ||
-			    (skb->nh.iph->frag_off & IP_OFFSET))
+		} else if (iph->protocol == IPPROTO_UDP) {
+			if ((iph->frag_off & IP_MF) ||
+			    (iph->frag_off & IP_OFFSET))
 				/* IP fragment, so don't change cs */
 				swqe->tx_control |= EHEA_SWQE_CRC
 						 | EHEA_SWQE_IMM_DATA_PRESENT;
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index c7a70933c75..c9abc96a091 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -942,7 +942,7 @@ static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb)
 
 	/* Tell the controller what the protocol is */
 	/* And provide the already calculated phcs */
-	if (skb->nh.iph->protocol == IPPROTO_UDP) {
+	if (ip_hdr(skb)->protocol == IPPROTO_UDP) {
 		flags |= TXFCB_UDP;
 		fcb->phcs = skb->h.uh->check;
 	} else
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c
index ea07aa5ba51..d375e786b4b 100644
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -1393,9 +1393,9 @@ static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * manually.
 	 */
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		int proto = ntohs(skb->nh.iph->protocol);
+		const struct iphdr *ih = ip_hdr(skb);
+		const int proto = ntohs(ih->protocol);
 		unsigned int csoff;
-		struct iphdr *ih = skb->nh.iph;
 		uint32_t csum, ehsum;
 		uint16_t *eh;
 
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index cfb791bb45e..bba4dcaf92e 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1182,6 +1182,8 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb)
 
 	if (likely(skb_is_gso(skb))) {
 		struct ixgb_buffer *buffer_info;
+		struct iphdr *iph;
+
 		if (skb_header_cloned(skb)) {
 			err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
 			if (err)
@@ -1190,13 +1192,13 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb)
 
 		hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
 		mss = skb_shinfo(skb)->gso_size;
-		skb->nh.iph->tot_len = 0;
-		skb->nh.iph->check = 0;
-		skb->h.th->check = ~csum_tcpudp_magic(skb->nh.iph->saddr,
-						      skb->nh.iph->daddr,
+		iph = ip_hdr(skb);
+		iph->tot_len = 0;
+		iph->check = 0;
+		skb->h.th->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
 						      0, IPPROTO_TCP, 0);
 		ipcss = skb_network_offset(skb);
-		ipcso = (void *)&(skb->nh.iph->check) - (void *)skb->data;
+		ipcso = (void *)&(iph->check) - (void *)skb->data;
 		ipcse = skb->h.raw - skb->data - 1;
 		tucss = skb->h.raw - skb->data;
 		tucso = (void *)&(skb->h.th->check) - (void *)skb->data;
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 9265c27b13b..20b5cb10136 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -75,7 +75,7 @@ static DEFINE_PER_CPU(struct pcpu_lstats, pcpu_lstats);
 #ifdef LOOPBACK_TSO
 static void emulate_large_send_offload(struct sk_buff *skb)
 {
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	struct tcphdr *th = (struct tcphdr *)(skb_network_header(skb) +
 					      (iph->ihl * 4));
 	unsigned int doffset = (iph->ihl + th->doff) * 4;
@@ -93,7 +93,7 @@ static void emulate_large_send_offload(struct sk_buff *skb)
 		skb_reserve(nskb, 32);
 		skb_set_mac_header(nskb, -ETH_HLEN);
 		skb_reset_network_header(nskb);
-		iph = nskb->nh.iph;
+		iph = ip_hdr(nskb);
 		memcpy(nskb->data, skb_network_header(skb), doffset);
 		if (skb_copy_bits(skb,
 				  doffset + offset,
@@ -145,7 +145,7 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
 #ifdef LOOPBACK_TSO
 	if (skb_is_gso(skb)) {
 		BUG_ON(skb->protocol != htons(ETH_P_IP));
-		BUG_ON(skb->nh.iph->protocol != IPPROTO_TCP);
+		BUG_ON(ip_hdr(skb)->protocol != IPPROTO_TCP);
 
 		emulate_large_send_offload(skb);
 		return 0;
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index cd9369a285e..6b39a268ec2 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -1161,9 +1161,9 @@ static void eth_tx_submit_descs_for_skb(struct mv643xx_private *mp,
 
 		cmd_sts |= ETH_GEN_TCP_UDP_CHECKSUM |
 			   ETH_GEN_IP_V_4_CHECKSUM  |
-			   skb->nh.iph->ihl << ETH_TX_IHL_SHIFT;
+			   ip_hdr(skb)->ihl << ETH_TX_IHL_SHIFT;
 
-		switch (skb->nh.iph->protocol) {
+		switch (ip_hdr(skb)->protocol) {
 		case IPPROTO_UDP:
 			cmd_sts |= ETH_UDP_FRAME;
 			desc->l4i_chk = skb->h.uh->check;
diff --git a/drivers/net/netxen/netxen_nic_hw.c b/drivers/net/netxen/netxen_nic_hw.c
index b2f5032937e..28d68c3550e 100644
--- a/drivers/net/netxen/netxen_nic_hw.c
+++ b/drivers/net/netxen/netxen_nic_hw.c
@@ -378,9 +378,9 @@ void netxen_tso_check(struct netxen_adapter *adapter,
 					  skb->h.th->doff * 4);
 		netxen_set_cmd_desc_opcode(desc, TX_TCP_LSO);
 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		if (skb->nh.iph->protocol == IPPROTO_TCP) {
+		if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
 			netxen_set_cmd_desc_opcode(desc, TX_TCP_PKT);
-		} else if (skb->nh.iph->protocol == IPPROTO_UDP) {
+		} else if (ip_hdr(skb)->protocol == IPPROTO_UDP) {
 			netxen_set_cmd_desc_opcode(desc, TX_UDP_PKT);
 		} else {
 			return;
diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c
index 747988b12ec..6a32338623f 100644
--- a/drivers/net/ns83820.c
+++ b/drivers/net/ns83820.c
@@ -1156,9 +1156,9 @@ again:
 	extsts = 0;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		extsts |= EXTSTS_IPPKT;
-		if (IPPROTO_TCP == skb->nh.iph->protocol)
+		if (IPPROTO_TCP == ip_hdr(skb)->protocol)
 			extsts |= EXTSTS_TCPPKT;
-		else if (IPPROTO_UDP == skb->nh.iph->protocol)
+		else if (IPPROTO_UDP == ip_hdr(skb)->protocol)
 			extsts |= EXTSTS_UDPPKT;
 	}
 
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index 82218720bc3..1d8129986cc 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -731,7 +731,7 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		const unsigned char *nh = skb_network_header(skb);
 
-		switch (skb->nh.iph->protocol) {
+		switch (ip_hdr(skb)->protocol) {
 		case IPPROTO_TCP:
 			dflags |= XCT_MACTX_CSUM_TCP;
 			dflags |= XCT_MACTX_IPH((skb->h.raw - skb->nh.raw) >> 2);
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 34280f94e9f..45876a854f0 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -2284,7 +2284,7 @@ static inline u32 rtl8169_tso_csum(struct sk_buff *skb, struct net_device *dev)
 			return LargeSend | ((mss & MSSMask) << MSSShift);
 	}
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		const struct iphdr *ip = skb->nh.iph;
+		const struct iphdr *ip = ip_hdr(skb);
 
 		if (ip->protocol == IPPROTO_TCP)
 			return IPCS | TCPCS;
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 51e994f26a8..a37bb205f3d 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -1428,7 +1428,7 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 		tcpsum |= offset + skb->csum_offset;	/* sum write */
 
 		ctrl = CALSUM | WR_SUM | INIT_SUM | LOCK_SUM;
-		if (skb->nh.iph->protocol == IPPROTO_UDP)
+		if (ip_hdr(skb)->protocol == IPPROTO_UDP)
 			ctrl |= UDPTCP;
 
 		if (tcpsum != sky2->tx_tcpsum) {
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index f7e0ac7f789..230da14b1b6 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -720,7 +720,7 @@ spider_net_prepare_tx_descr(struct spider_net_card *card,
 	spin_unlock_irqrestore(&chain->lock, flags);
 
 	if (skb->protocol == htons(ETH_P_IP) && skb->ip_summed == CHECKSUM_PARTIAL)
-		switch (skb->nh.iph->protocol) {
+		switch (ip_hdr(skb)->protocol) {
 		case IPPROTO_TCP:
 			hwdescr->dmac_cmd_status |= SPIDER_NET_DMAC_TCP;
 			break;
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 62a3bba0097..76a31afe20d 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -3909,12 +3909,13 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
 			mss |= (skb_headlen(skb) - ETH_HLEN) << 9;
 		else {
+			struct iphdr *iph = ip_hdr(skb);
+
 			tcp_opt_len = ((skb->h.th->doff - 5) * 4);
 			ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
 
-			skb->nh.iph->check = 0;
-			skb->nh.iph->tot_len = htons(mss + ip_tcp_len +
-						     tcp_opt_len);
+			iph->check = 0;
+			iph->tot_len = htons(mss + ip_tcp_len + tcp_opt_len);
 			mss |= (ip_tcp_len + tcp_opt_len) << 9;
 		}
 
@@ -4055,6 +4056,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
 	mss = 0;
 	if (skb->len > (tp->dev->mtu + ETH_HLEN) &&
 	    (mss = skb_shinfo(skb)->gso_size) != 0) {
+		struct iphdr *iph;
 		int tcp_opt_len, ip_tcp_len, hdr_len;
 
 		if (skb_header_cloned(skb) &&
@@ -4074,34 +4076,32 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
 		base_flags |= (TXD_FLAG_CPU_PRE_DMA |
 			       TXD_FLAG_CPU_POST_DMA);
 
-		skb->nh.iph->check = 0;
-		skb->nh.iph->tot_len = htons(mss + hdr_len);
+		iph = ip_hdr(skb);
+		iph->check = 0;
+		iph->tot_len = htons(mss + hdr_len);
 		if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) {
 			skb->h.th->check = 0;
 			base_flags &= ~TXD_FLAG_TCPUDP_CSUM;
 		}
 		else {
-			skb->h.th->check =
-				~csum_tcpudp_magic(skb->nh.iph->saddr,
-						   skb->nh.iph->daddr,
-						   0, IPPROTO_TCP, 0);
+			skb->h.th->check = ~csum_tcpudp_magic(iph->saddr,
+							      iph->daddr, 0,
+							      IPPROTO_TCP, 0);
 		}
 
 		if ((tp->tg3_flags2 & TG3_FLG2_HW_TSO) ||
 		    (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705)) {
-			if (tcp_opt_len || skb->nh.iph->ihl > 5) {
+			if (tcp_opt_len || iph->ihl > 5) {
 				int tsflags;
 
-				tsflags = ((skb->nh.iph->ihl - 5) +
-					   (tcp_opt_len >> 2));
+				tsflags = (iph->ihl - 5) + (tcp_opt_len >> 2);
 				mss |= (tsflags << 11);
 			}
 		} else {
-			if (tcp_opt_len || skb->nh.iph->ihl > 5) {
+			if (tcp_opt_len || iph->ihl > 5) {
 				int tsflags;
 
-				tsflags = ((skb->nh.iph->ihl - 5) +
-					   (tcp_opt_len >> 2));
+				tsflags = (iph->ihl - 5) + (tcp_opt_len >> 2);
 				base_flags |= tsflags << 12;
 			}
 		}
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 9f6cc1569b3..422eaf8ea12 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -2006,7 +2006,7 @@ static int velocity_xmit(struct sk_buff *skb, struct net_device *dev)
 	 */
 	if ((vptr->flags & VELOCITY_FLAGS_TX_CSUM)
 				 && (skb->ip_summed == CHECKSUM_PARTIAL)) {
-		struct iphdr *ip = skb->nh.iph;
+		const struct iphdr *ip = ip_hdr(skb);
 		if (ip->protocol == IPPROTO_TCP)
 			td_ptr->tdesc1.TCR |= TCR0_TCPCK;
 		else if (ip->protocol == IPPROTO_UDP)
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index 0ff29e0628b..8a07d548a05 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -3820,18 +3820,20 @@ qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb,
 			return card->info.is_multicast_different &
 				(card->qdio.no_out_queues - 1);
 		if (card->qdio.do_prio_queueing && (ipv == 4)) {
+			const u8 tos = ip_hdr(skb)->tos;
+
 			if (card->qdio.do_prio_queueing==QETH_PRIO_Q_ING_TOS){
-				if (skb->nh.iph->tos & IP_TOS_NOTIMPORTANT)
+				if (tos & IP_TOS_NOTIMPORTANT)
 					return 3;
-				if (skb->nh.iph->tos & IP_TOS_HIGHRELIABILITY)
+				if (tos & IP_TOS_HIGHRELIABILITY)
 					return 2;
-				if (skb->nh.iph->tos & IP_TOS_HIGHTHROUGHPUT)
+				if (tos & IP_TOS_HIGHTHROUGHPUT)
 					return 1;
-				if (skb->nh.iph->tos & IP_TOS_LOWDELAY)
+				if (tos & IP_TOS_LOWDELAY)
 					return 0;
 			}
 			if (card->qdio.do_prio_queueing==QETH_PRIO_Q_ING_PREC)
-				return 3 - (skb->nh.iph->tos >> 6);
+				return 3 - (tos >> 6);
 		} else if (card->qdio.do_prio_queueing && (ipv == 6)) {
 			/* TODO: IPv6!!! */
 		}
@@ -4041,7 +4043,8 @@ qeth_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
 			    *((u32 *) skb->dst->neighbour->primary_key);
 		} else {
 			/* fill in destination address used in ip header */
-			*((u32 *) (&hdr->hdr.l3.dest_addr[12])) = skb->nh.iph->daddr;
+			*((u32 *)(&hdr->hdr.l3.dest_addr[12])) =
+							   ip_hdr(skb)->daddr;
 		}
 	} else if (ipv == 6) { /* IPv6 or passthru */
 		hdr->hdr.l3.flags = qeth_get_qeth_hdr_flags6(cast_type);
diff --git a/drivers/s390/net/qeth_tso.h b/drivers/s390/net/qeth_tso.h
index 14504afb044..255cb2e9c79 100644
--- a/drivers/s390/net/qeth_tso.h
+++ b/drivers/s390/net/qeth_tso.h
@@ -40,7 +40,7 @@ qeth_tso_fill_header(struct qeth_card *card, struct sk_buff *skb)
 	QETH_DBF_TEXT(trace, 5, "tsofhdr");
 
 	hdr  = (struct qeth_hdr_tso *) skb->data;
-	iph  = skb->nh.iph;
+	iph  = ip_hdr(skb);
 	tcph = skb->h.th;
 	/*fix header to TSO values ...*/
 	hdr->hdr.hdr.l3.id = QETH_HEADER_TYPE_TSO;
@@ -63,13 +63,9 @@ qeth_tso_fill_header(struct qeth_card *card, struct sk_buff *skb)
 static inline void
 qeth_tso_set_tcpip_header(struct qeth_card *card, struct sk_buff *skb)
 {
-	struct iphdr *iph;
-	struct ipv6hdr *ip6h;
-	struct tcphdr *tcph;
-
-	iph  = skb->nh.iph;
-	ip6h = skb->nh.ipv6h;
-	tcph = skb->h.th;
+	struct iphdr *iph    = ip_hdr(skb);
+	struct ipv6hdr *ip6h = skb->nh.ipv6h;
+	struct tcphdr *tcph  = skb->h.th;
 
 	tcph->check = 0;
 	if (skb->protocol == ETH_P_IPV6) {
-- 
cgit v1.2.3


From d0a92be05ed4aea7d35c2b257e3f9173565fe4eb Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 12 Mar 2007 20:56:31 -0300
Subject: [SK_BUFF]: Introduce arp_hdr(), remove skb->nh.arph

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 2 +-
 drivers/net/chelsio/sge.c       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 7f11388893f..76d3504505b 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2524,7 +2524,7 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack
 				 (2 * sizeof(u32)))))
 		goto out_unlock;
 
-	arp = skb->nh.arph;
+	arp = arp_hdr(skb);
 	if (arp->ar_hln != dev->addr_len ||
 	    skb->pkt_type == PACKET_OTHERHOST ||
 	    skb->pkt_type == PACKET_LOOPBACK ||
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index c357f45a16c..a4204dff363 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1925,7 +1925,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		 */
 		if ((unlikely(!adapter->sge->espibug_skb[dev->if_port]))) {
 			if (skb->protocol == htons(ETH_P_ARP) &&
-			    skb->nh.arph->ar_op == htons(ARPOP_REQUEST)) {
+			    arp_hdr(skb)->ar_op == htons(ARPOP_REQUEST)) {
 				adapter->sge->espibug_skb[dev->if_port] = skb;
 				/* We want to re-use this skb later. We
 				 * simply bump the reference count and it
-- 
cgit v1.2.3


From 0660e03f6b18f19b6bbafe7583265a51b90daf36 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 25 Apr 2007 17:54:47 -0700
Subject: [SK_BUFF]: Introduce ipv6_hdr(), remove skb->nh.ipv6h

Now the skb->nh union has just one member, .raw, i.e. it is just like the
skb->mac union, strange, no? I'm just leaving it like that till the transport
layer is done with, when we'll rename skb->mac.raw to skb->mac_header (or
->mac_header_offset?), ditto for ->{h,nh}.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_alb.c |  4 ++--
 drivers/net/e1000/e1000_main.c | 10 ++++------
 drivers/s390/net/qeth_eddp.c   |  8 +++++---
 drivers/s390/net/qeth_main.c   |  3 ++-
 drivers/s390/net/qeth_tso.h    |  2 +-
 5 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 8555afa574a..b8cf777542f 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -1304,8 +1304,8 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 			break;
 		}
 
-		hash_start = (char*)&(skb->nh.ipv6h->daddr);
-		hash_size = sizeof(skb->nh.ipv6h->daddr);
+		hash_start = (char *)&(ipv6_hdr(skb)->daddr);
+		hash_size = sizeof(ipv6_hdr(skb)->daddr);
 		break;
 	case ETH_P_IPX:
 		if (ipx_hdr(skb)->ipx_checksum !=
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index c324866c978..a3d9986b417 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2899,13 +2899,11 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 			cmd_length = E1000_TXD_CMD_IP;
 			ipcse = skb->h.raw - skb->data - 1;
 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
-			skb->nh.ipv6h->payload_len = 0;
+			ipv6_hdr(skb)->payload_len = 0;
 			skb->h.th->check =
-				~csum_ipv6_magic(&skb->nh.ipv6h->saddr,
-						 &skb->nh.ipv6h->daddr,
-						 0,
-						 IPPROTO_TCP,
-						 0);
+				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+						 &ipv6_hdr(skb)->daddr,
+						 0, IPPROTO_TCP, 0);
 			ipcse = 0;
 		}
 		ipcss = skb_network_offset(skb);
diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c
index 1574247abaa..90da58b4e53 100644
--- a/drivers/s390/net/qeth_eddp.c
+++ b/drivers/s390/net/qeth_eddp.c
@@ -479,9 +479,11 @@ qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
 						  skb->h.raw,
 						  skb->h.th->doff * 4);
 	else
-		eddp = qeth_eddp_create_eddp_data(qhdr, (u8 *)skb->nh.ipv6h,
-				sizeof(struct ipv6hdr),
-				(u8 *)skb->h.th, skb->h.th->doff*4);
+		eddp = qeth_eddp_create_eddp_data(qhdr,
+						  skb_network_header(skb),
+						  sizeof(struct ipv6hdr),
+						  skb->h.raw,
+						  skb->h.th->doff * 4);
 
 	if (eddp == NULL) {
 		QETH_DBF_TEXT(trace, 2, "eddpfcnm");
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index 8a07d548a05..df7f279ec40 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -4053,7 +4053,8 @@ qeth_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
 			       skb->dst->neighbour->primary_key, 16);
 		} else {
 			/* fill in destination address used in ip header */
-			memcpy(hdr->hdr.l3.dest_addr, &skb->nh.ipv6h->daddr, 16);
+			memcpy(hdr->hdr.l3.dest_addr,
+			       &ipv6_hdr(skb)->daddr, 16);
 		}
 	} else { /* passthrough */
                 if((skb->dev->type == ARPHRD_IEEE802_TR) &&
diff --git a/drivers/s390/net/qeth_tso.h b/drivers/s390/net/qeth_tso.h
index 255cb2e9c79..4040bdd8c32 100644
--- a/drivers/s390/net/qeth_tso.h
+++ b/drivers/s390/net/qeth_tso.h
@@ -64,7 +64,7 @@ static inline void
 qeth_tso_set_tcpip_header(struct qeth_card *card, struct sk_buff *skb)
 {
 	struct iphdr *iph    = ip_hdr(skb);
-	struct ipv6hdr *ip6h = skb->nh.ipv6h;
+	struct ipv6hdr *ip6h = ipv6_hdr(skb);
 	struct tcphdr *tcph  = skb->h.th;
 
 	tcph->check = 0;
-- 
cgit v1.2.3


From badff6d01a8589a1c828b0bf118903ca38627f4e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 13 Mar 2007 13:06:52 -0300
Subject: [SK_BUFF]: Introduce skb_reset_transport_header(skb)

For the common, open coded 'skb->h.raw = skb->data' operation, so that we can
later turn skb->h.raw into a offset, reducing the size of struct sk_buff in
64bit land while possibly keeping it as a pointer on 32bit.

This one touches just the most simple cases:

skb->h.raw = skb->data;
skb->h.raw = {skb_push|[__]skb_pull}()

The next ones will handle the slightly more "complex" cases.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/cxgb3/iwch_cm.c | 6 +++---
 drivers/net/appletalk/cops.c          | 2 +-
 drivers/net/appletalk/ltpc.c          | 4 ++--
 drivers/net/cxgb3/sge.c               | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 2d2de9b8b72..66ad4d40ba1 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -507,7 +507,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
 	 */
 	skb_get(skb);
 	set_arp_failure_handler(skb, arp_failure_discard);
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	len = skb->len;
 	req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
 	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
@@ -559,7 +559,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
 	skb_get(skb);
 	skb->priority = CPL_PRIORITY_DATA;
 	set_arp_failure_handler(skb, arp_failure_discard);
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
 	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
 	req->wr_lo = htonl(V_WR_TID(ep->hwtid));
@@ -610,7 +610,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
 	 */
 	skb_get(skb);
 	set_arp_failure_handler(skb, arp_failure_discard);
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	len = skb->len;
 	req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
 	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c
index 28cb79cee91..da6ffa8cd81 100644
--- a/drivers/net/appletalk/cops.c
+++ b/drivers/net/appletalk/cops.c
@@ -855,7 +855,7 @@ static void cops_rx(struct net_device *dev)
 
         skb_reset_mac_header(skb);    /* Point to entire packet. */
         skb_pull(skb,3);
-        skb->h.raw      = skb->data;    /* Point to data (Skip header). */
+        skb_reset_transport_header(skb);    /* Point to data (Skip header). */
 
         /* Update the counters. */
         lp->stats.rx_packets++;
diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c
index 12682439f8b..dc3bce992dc 100644
--- a/drivers/net/appletalk/ltpc.c
+++ b/drivers/net/appletalk/ltpc.c
@@ -776,7 +776,7 @@ static int sendup_buffer (struct net_device *dev)
 	/* copy ddp(s,e)hdr + contents */
 	memcpy(skb->data,(void*)ltdmabuf,len);
 
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 
 	stats->rx_packets++;
 	stats->rx_bytes+=skb->len;
@@ -923,7 +923,7 @@ static int ltpc_xmit(struct sk_buff *skb, struct net_device *dev)
 	cbuf.laptype = skb->data[2];
 	skb_pull(skb,3);	/* skip past LLAP header */
 	cbuf.length = skb->len;	/* this is host order */
-	skb->h.raw=skb->data;
+	skb_reset_transport_header(skb);
 
 	if(debug & DEBUG_UPPER) {
 		printk("command ");
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 892e5dcafa0..a891f6f8152 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -1622,7 +1622,7 @@ static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
 	rq->offload_pkts++;
 	skb_reset_mac_header(skb);
 	skb_reset_network_header(skb);
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 
 	if (rq->polling) {
 		rx_gather[gather_idx++] = skb;
-- 
cgit v1.2.3


From ea2ae17d6443abddc79480dc9f7af8feacabddc4 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 25 Apr 2007 17:55:53 -0700
Subject: [SK_BUFF]: Introduce skb_transport_offset()

For the quite common 'skb->h.raw - skb->data' sequence.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/atl1/atl1_main.c       | 10 +++++-----
 drivers/net/cassini.c              |  6 ++----
 drivers/net/cxgb3/sge.c            |  7 ++++---
 drivers/net/e1000/e1000_main.c     | 10 +++++-----
 drivers/net/ixgb/ixgb_main.c       |  8 ++++----
 drivers/net/myri10ge/myri10ge.c    |  5 +++--
 drivers/net/netxen/netxen_nic_hw.c |  2 +-
 drivers/net/sk98lin/skge.c         |  4 ++--
 drivers/net/skge.c                 |  2 +-
 drivers/net/sky2.c                 |  2 +-
 drivers/net/sungem.c               |  6 ++----
 drivers/net/sunhme.c               |  6 ++----
 12 files changed, 32 insertions(+), 36 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c
index d2be79a30f8..c26f8ce320e 100644
--- a/drivers/net/atl1/atl1_main.c
+++ b/drivers/net/atl1/atl1_main.c
@@ -1326,8 +1326,8 @@ static int atl1_tx_csum(struct atl1_adapter *adapter, struct sk_buff *skb,
 	u8 css, cso;
 
 	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
-		cso = skb->h.raw - skb->data;
-		css = (skb->h.raw + skb->csum_offset) - skb->data;
+		cso = skb_transport_offset(skb);
+		css = cso + skb->csum;
 		if (unlikely(cso & 0x1)) {
 			printk(KERN_DEBUG "%s: payload offset != even number\n",
 				atl1_driver_name);
@@ -1369,8 +1369,8 @@ static void atl1_tx_map(struct atl1_adapter *adapter,
 
 	if (tcp_seg) {
 		/* TSO/GSO */
-		proto_hdr_len =
-		    ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
+		proto_hdr_len = (skb_transport_offset(skb) +
+				 (skb->h.th->doff << 2));
 		buffer_info->length = proto_hdr_len;
 		page = virt_to_page(skb->data);
 		offset = (unsigned long)skb->data & ~PAGE_MASK;
@@ -1562,7 +1562,7 @@ static int atl1_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 	mss = skb_shinfo(skb)->gso_size;
 	if (mss) {
 		if (skb->protocol == htons(ETH_P_IP)) {
-			proto_hdr_len = ((skb->h.raw - skb->data) +
+			proto_hdr_len = (skb_transport_offset(skb) +
 					 (skb->h.th->doff << 2));
 			if (unlikely(proto_hdr_len > len)) {
 				dev_kfree_skb_any(skb);
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index 68e37a655fe..bd3ab6493e3 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -2821,10 +2821,8 @@ static inline int cas_xmit_tx_ringN(struct cas *cp, int ring,
 
 	ctrl = 0;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		u64 csum_start_off, csum_stuff_off;
-
-		csum_start_off = (u64) (skb->h.raw - skb->data);
-		csum_stuff_off = csum_start_off + skb->csum_offset;
+		const u64 csum_start_off = skb_transport_offset(skb);
+		const u64 csum_stuff_off = csum_start_off + skb->csum_offset;
 
 		ctrl =  TX_DESC_CSUM_EN |
 			CAS_BASE(TX_DESC_CSUM_START, csum_start_off) |
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index a891f6f8152..d38b1bcd138 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -1319,9 +1319,10 @@ static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
 	/* Only TX_DATA builds SGLs */
 
 	from = (struct work_request_hdr *)skb->data;
-	memcpy(&d->flit[1], &from[1], skb->h.raw - skb->data - sizeof(*from));
+	memcpy(&d->flit[1], &from[1],
+	       skb_transport_offset(skb) - sizeof(*from));
 
-	flits = (skb->h.raw - skb->data) / 8;
+	flits = skb_transport_offset(skb) / 8;
 	sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
 	sgl_flits = make_sgl(skb, sgp, skb->h.raw, skb->tail - skb->h.raw,
 			     adap->pdev);
@@ -1349,7 +1350,7 @@ static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
 	if (skb->len <= WR_LEN && cnt == 0)
 		return 1;	/* packet fits as immediate data */
 
-	flits = (skb->h.raw - skb->data) / 8;	/* headers */
+	flits = skb_transport_offset(skb) / 8;	/* headers */
 	if (skb->tail != skb->h.raw)
 		cnt++;
 	return flits_to_desc(flits + sgl_len(cnt));
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index a3d9986b417..78cf417cf23 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2887,7 +2887,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 				return err;
 		}
 
-		hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
+		hdr_len = (skb_transport_offset(skb) + (skb->h.th->doff << 2));
 		mss = skb_shinfo(skb)->gso_size;
 		if (skb->protocol == htons(ETH_P_IP)) {
 			struct iphdr *iph = ip_hdr(skb);
@@ -2897,7 +2897,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 							      iph->daddr, 0,
 							      IPPROTO_TCP, 0);
 			cmd_length = E1000_TXD_CMD_IP;
-			ipcse = skb->h.raw - skb->data - 1;
+			ipcse = skb_transport_offset(skb) - 1;
 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
 			ipv6_hdr(skb)->payload_len = 0;
 			skb->h.th->check =
@@ -2908,7 +2908,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 		}
 		ipcss = skb_network_offset(skb);
 		ipcso = (void *)&(ip_hdr(skb)->check) - (void *)skb->data;
-		tucss = skb->h.raw - skb->data;
+		tucss = skb_transport_offset(skb);
 		tucso = (void *)&(skb->h.th->check) - (void *)skb->data;
 		tucse = 0;
 
@@ -2950,7 +2950,7 @@ e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 	uint8_t css;
 
 	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
-		css = skb->h.raw - skb->data;
+		css = skb_transport_offset(skb);
 
 		i = tx_ring->next_to_use;
 		buffer_info = &tx_ring->buffer_info[i];
@@ -3292,7 +3292,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 		/* TSO Workaround for 82571/2/3 Controllers -- if skb->data
 		* points to just header, pull a few bytes of payload from
 		* frags into skb->data */
-		hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
+		hdr_len = (skb_transport_offset(skb) + (skb->h.th->doff << 2));
 		if (skb->data_len && (hdr_len == (skb->len - skb->data_len))) {
 			switch (adapter->hw.mac_type) {
 				unsigned int pull_size;
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index bba4dcaf92e..ceea6e45792 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1190,7 +1190,7 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb)
 				return err;
 		}
 
-		hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
+		hdr_len = (skb_transport_offset(skb) + (skb->h.th->doff << 2));
 		mss = skb_shinfo(skb)->gso_size;
 		iph = ip_hdr(skb);
 		iph->tot_len = 0;
@@ -1199,8 +1199,8 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb)
 						      0, IPPROTO_TCP, 0);
 		ipcss = skb_network_offset(skb);
 		ipcso = (void *)&(iph->check) - (void *)skb->data;
-		ipcse = skb->h.raw - skb->data - 1;
-		tucss = skb->h.raw - skb->data;
+		ipcse = skb_transport_offset(skb) - 1;
+		tucss = skb_transport_offset(skb);
 		tucso = (void *)&(skb->h.th->check) - (void *)skb->data;
 		tucse = 0;
 
@@ -1245,7 +1245,7 @@ ixgb_tx_csum(struct ixgb_adapter *adapter, struct sk_buff *skb)
 
 	if(likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 		struct ixgb_buffer *buffer_info;
-		css = skb->h.raw - skb->data;
+		css = skb_transport_offset(skb);
 		cso = css + skb->csum_offset;
 
 		i = adapter->tx_ring.next_to_use;
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index 7c04179c7b1..e04228c7b14 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -2029,7 +2029,7 @@ again:
 	odd_flag = 0;
 	flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST);
 	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
-		cksum_offset = (skb->h.raw - skb->data);
+		cksum_offset = skb_transport_offset(skb);
 		pseudo_hdr_offset = cksum_offset + skb->csum_offset;
 		/* If the headers are excessively large, then we must
 		 * fall back to a software checksum */
@@ -2054,7 +2054,8 @@ again:
 		 * send loop that we are still in the
 		 * header portion of the TSO packet.
 		 * TSO header must be at most 134 bytes long */
-		cum_len = -((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
+		cum_len = -(skb_transport_offset(skb) +
+			    (skb->h.th->doff << 2));
 
 		/* for TSO, pseudo_hdr_offset holds mss.
 		 * The firmware figures out where to put
diff --git a/drivers/net/netxen/netxen_nic_hw.c b/drivers/net/netxen/netxen_nic_hw.c
index 28d68c3550e..09ca2192cbf 100644
--- a/drivers/net/netxen/netxen_nic_hw.c
+++ b/drivers/net/netxen/netxen_nic_hw.c
@@ -387,7 +387,7 @@ void netxen_tso_check(struct netxen_adapter *adapter,
 		}
 	}
 	adapter->stats.xmitcsummed++;
-	desc->tcp_hdr_offset = skb->h.raw - skb->data;
+	desc->tcp_hdr_offset = skb_transport_offset(skb);
 	desc->ip_hdr_offset = skb_network_offset(skb);
 }
 
diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c
index 9ac1fe659dc..e4ab7a8acc1 100644
--- a/drivers/net/sk98lin/skge.c
+++ b/drivers/net/sk98lin/skge.c
@@ -1562,7 +1562,7 @@ struct sk_buff	*pMessage)	/* pointer to send-message              */
 	pTxd->pMBuf     = pMessage;
 
 	if (pMessage->ip_summed == CHECKSUM_PARTIAL) {
-		u16 hdrlen = pMessage->h.raw - pMessage->data;
+		u16 hdrlen = skb_transport_offset(pMessage);
 		u16 offset = hdrlen + pMessage->csum_offset;
 
 		if ((pMessage->h.ipiph->protocol == IPPROTO_UDP ) &&
@@ -1681,7 +1681,7 @@ struct sk_buff	*pMessage)	/* pointer to send-message              */
 	** Does the HW need to evaluate checksum for TCP or UDP packets? 
 	*/
 	if (pMessage->ip_summed == CHECKSUM_PARTIAL) {
-		u16 hdrlen = pMessage->h.raw - pMessage->data;
+		u16 hdrlen = skb_transport_offset(pMessage);
 		u16 offset = hdrlen + pMessage->csum_offset;
 
 		Control = BMU_STFWD;
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index d476a3cc2e9..ca7a0e03984 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -2654,7 +2654,7 @@ static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 	td->dma_hi = map >> 32;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		int offset = skb->h.raw - skb->data;
+		const int offset = skb_transport_offset(skb);
 
 		/* This seems backwards, but it is what the sk98lin
 		 * does.  Looks like hardware is wrong?
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index a37bb205f3d..a35f2f2784a 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -1421,7 +1421,7 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 
 	/* Handle TCP checksum offload */
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		unsigned offset = skb->h.raw - skb->data;
+		const unsigned offset = skb_transport_offset(skb);
 		u32 tcpsum;
 
 		tcpsum = offset << 16;		/* sum start */
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index db2e1a6b723..4bb89dec565 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -1028,10 +1028,8 @@ static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	ctrl = 0;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		u64 csum_start_off, csum_stuff_off;
-
-		csum_start_off = (u64) (skb->h.raw - skb->data);
-		csum_stuff_off = csum_start_off + skb->csum_offset;
+		const u64 csum_start_off = skb_transport_offset(skb);
+		const u64 csum_stuff_off = csum_start_off + skb->csum_offset;
 
 		ctrl = (TXDCTRL_CENAB |
 			(csum_start_off << 15) |
diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index aca592bc032..4b69c1deb9f 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -2269,10 +2269,8 @@ static int happy_meal_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	tx_flags = TXFLAG_OWN;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		u32 csum_start_off, csum_stuff_off;
-
-		csum_start_off = (u32) (skb->h.raw - skb->data);
-		csum_stuff_off = csum_start_off + skb->csum_offset;
+		const u32 csum_start_off = skb_transport_offset(skb);
+		const u32 csum_stuff_off = csum_start_off + skb->csum_offset;
 
 		tx_flags = (TXFLAG_OWN | TXFLAG_CSENABLE |
 			    ((csum_start_off << 14) & TXFLAG_CSBUFBEGIN) |
-- 
cgit v1.2.3


From 4bedb45203eab92a87b4c863fe2d0cded633427f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 13 Mar 2007 14:28:48 -0300
Subject: [SK_BUFF]: Introduce udp_hdr(), remove skb->h.uh

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/gianfar.c     | 4 ++--
 drivers/net/ioc3-eth.c    | 2 +-
 drivers/net/mv643xx_eth.c | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index c9abc96a091..b9f44602c5e 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -944,9 +944,9 @@ static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb)
 	/* And provide the already calculated phcs */
 	if (ip_hdr(skb)->protocol == IPPROTO_UDP) {
 		flags |= TXFCB_UDP;
-		fcb->phcs = skb->h.uh->check;
+		fcb->phcs = udp_hdr(skb)->check;
 	} else
-		fcb->phcs = skb->h.th->check;
+		fcb->phcs = udp_hdr(skb)->check;
 
 	/* l3os is the distance between the start of the
 	 * frame (skb->data) and the start of the IP hdr.
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c
index d375e786b4b..ba012e10d79 100644
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -1422,7 +1422,7 @@ static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		csoff = ETH_HLEN + (ih->ihl << 2);
 		if (proto == IPPROTO_UDP) {
 			csoff += offsetof(struct udphdr, check);
-			skb->h.uh->check = csum;
+			udp_hdr(skb)->check = csum;
 		}
 		if (proto == IPPROTO_TCP) {
 			csoff += offsetof(struct tcphdr, check);
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index 6b39a268ec2..43723839e93 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -1166,7 +1166,7 @@ static void eth_tx_submit_descs_for_skb(struct mv643xx_private *mp,
 		switch (ip_hdr(skb)->protocol) {
 		case IPPROTO_UDP:
 			cmd_sts |= ETH_UDP_FRAME;
-			desc->l4i_chk = skb->h.uh->check;
+			desc->l4i_chk = udp_hdr(skb)->check;
 			break;
 		case IPPROTO_TCP:
 			desc->l4i_chk = skb->h.th->check;
-- 
cgit v1.2.3


From ab6a5bb6b28a970104a34f0f6959b73cf61bdc72 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 18 Mar 2007 17:43:48 -0700
Subject: [TCP]: Introduce tcp_hdrlen() and tcp_optlen()

The ip_hdrlen() buddy, created to reduce the number of skb->h.th-> uses and to
avoid the longer, open coded equivalent.

Ditched a no-op in bnx2 in the process.

I wonder if we should have a BUG_ON(skb->h.th->doff < 5) in tcp_optlen()...

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/atl1/atl1_main.c         | 7 +++----
 drivers/net/bnx2.c                   | 7 +++----
 drivers/net/e1000/e1000_main.c       | 4 ++--
 drivers/net/ehea/ehea_main.c         | 2 +-
 drivers/net/ixgb/ixgb_main.c         | 2 +-
 drivers/net/myri10ge/myri10ge.c      | 3 +--
 drivers/net/netxen/netxen_nic_hw.c   | 3 +--
 drivers/net/netxen/netxen_nic_main.c | 2 +-
 drivers/net/sky2.c                   | 2 +-
 drivers/net/tg3.c                    | 4 ++--
 drivers/s390/net/qeth_eddp.c         | 8 ++++----
 11 files changed, 20 insertions(+), 24 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c
index c26f8ce320e..8d5994751e2 100644
--- a/drivers/net/atl1/atl1_main.c
+++ b/drivers/net/atl1/atl1_main.c
@@ -1307,7 +1307,7 @@ static int atl1_tso(struct atl1_adapter *adapter, struct sk_buff *skb,
 
 			tso->tsopl |= (iph->ihl &
 				CSUM_PARAM_IPHL_MASK) << CSUM_PARAM_IPHL_SHIFT;
-			tso->tsopl |= ((skb->h.th->doff << 2) &
+			tso->tsopl |= (tcp_hdrlen(skb) &
 				TSO_PARAM_TCPHDRLEN_MASK) << TSO_PARAM_TCPHDRLEN_SHIFT;
 			tso->tsopl |= (skb_shinfo(skb)->gso_size &
 				TSO_PARAM_MSS_MASK) << TSO_PARAM_MSS_SHIFT;
@@ -1369,8 +1369,7 @@ static void atl1_tx_map(struct atl1_adapter *adapter,
 
 	if (tcp_seg) {
 		/* TSO/GSO */
-		proto_hdr_len = (skb_transport_offset(skb) +
-				 (skb->h.th->doff << 2));
+		proto_hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
 		buffer_info->length = proto_hdr_len;
 		page = virt_to_page(skb->data);
 		offset = (unsigned long)skb->data & ~PAGE_MASK;
@@ -1563,7 +1562,7 @@ static int atl1_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 	if (mss) {
 		if (skb->protocol == htons(ETH_P_IP)) {
 			proto_hdr_len = (skb_transport_offset(skb) +
-					 (skb->h.th->doff << 2));
+					 tcp_hdrlen(skb));
 			if (unlikely(proto_hdr_len > len)) {
 				dev_kfree_skb_any(skb);
 				return NETDEV_TX_OK;
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index eb0c4f1d448..73512fb1645 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -4521,13 +4521,12 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			return NETDEV_TX_OK;
 		}
 
-		tcp_opt_len = ((skb->h.th->doff - 5) * 4);
 		vlan_tag_flags |= TX_BD_FLAGS_SW_LSO;
 
 		tcp_opt_len = 0;
-		if (skb->h.th->doff > 5) {
-			tcp_opt_len = (skb->h.th->doff - 5) << 2;
-		}
+		if (skb->h.th->doff > 5)
+			tcp_opt_len = tcp_optlen(skb);
+
 		ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
 
 		iph = ip_hdr(skb);
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 78cf417cf23..4572fbba50f 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2887,7 +2887,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 				return err;
 		}
 
-		hdr_len = (skb_transport_offset(skb) + (skb->h.th->doff << 2));
+		hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
 		mss = skb_shinfo(skb)->gso_size;
 		if (skb->protocol == htons(ETH_P_IP)) {
 			struct iphdr *iph = ip_hdr(skb);
@@ -3292,7 +3292,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 		/* TSO Workaround for 82571/2/3 Controllers -- if skb->data
 		* points to just header, pull a few bytes of payload from
 		* frags into skb->data */
-		hdr_len = (skb_transport_offset(skb) + (skb->h.th->doff << 2));
+		hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
 		if (skb->data_len && (hdr_len == (skb->len - skb->data_len))) {
 			switch (adapter->hw.mac_type) {
 				unsigned int pull_size;
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 0dc701e611e..63732d2305b 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -1300,7 +1300,7 @@ static void write_swqe2_TSO(struct sk_buff *skb,
 	/* copy only eth/ip/tcp headers to immediate data and
 	 * the rest of skb->data to sg1entry
 	 */
-	headersize = ETH_HLEN + ip_hdrlen(skb) + (skb->h.th->doff * 4);
+	headersize = ETH_HLEN + ip_hdrlen(skb) + tcp_hdrlen(skb);
 
 	skb_data_size = skb->len - skb->data_len;
 
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index ceea6e45792..96550d68162 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1190,7 +1190,7 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb)
 				return err;
 		}
 
-		hdr_len = (skb_transport_offset(skb) + (skb->h.th->doff << 2));
+		hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
 		mss = skb_shinfo(skb)->gso_size;
 		iph = ip_hdr(skb);
 		iph->tot_len = 0;
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index e04228c7b14..e4b69a0485b 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -2054,8 +2054,7 @@ again:
 		 * send loop that we are still in the
 		 * header portion of the TSO packet.
 		 * TSO header must be at most 134 bytes long */
-		cum_len = -(skb_transport_offset(skb) +
-			    (skb->h.th->doff << 2));
+		cum_len = -(skb_transport_offset(skb) + tcp_hdrlen(skb));
 
 		/* for TSO, pseudo_hdr_offset holds mss.
 		 * The firmware figures out where to put
diff --git a/drivers/net/netxen/netxen_nic_hw.c b/drivers/net/netxen/netxen_nic_hw.c
index 09ca2192cbf..0fba8f19076 100644
--- a/drivers/net/netxen/netxen_nic_hw.c
+++ b/drivers/net/netxen/netxen_nic_hw.c
@@ -374,8 +374,7 @@ void netxen_tso_check(struct netxen_adapter *adapter,
 {
 	if (desc->mss) {
 		desc->total_hdr_length = (sizeof(struct ethhdr) +
-					  ip_hdrlen(skb) +
-					  skb->h.th->doff * 4);
+					  ip_hdrlen(skb) + tcp_hdrlen(skb));
 		netxen_set_cmd_desc_opcode(desc, TX_TCP_LSO);
 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c
index b548a30e5c8..b488e94bc4c 100644
--- a/drivers/net/netxen/netxen_nic_main.c
+++ b/drivers/net/netxen/netxen_nic_main.c
@@ -779,7 +779,7 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 		if (skb_shinfo(skb)->gso_size > 0) {
 
 			no_of_desc++;
-			if ((ip_hdrlen(skb) + skb->h.th->doff * 4 +
+			if ((ip_hdrlen(skb) + tcp_hdrlen(skb) +
 			     sizeof(struct ethhdr)) >
 			    (sizeof(struct cmd_desc_type0) - 2)) {
 				no_of_desc++;
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index a35f2f2784a..fd291fc9316 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -1392,7 +1392,7 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 	/* Check for TCP Segmentation Offload */
 	mss = skb_shinfo(skb)->gso_size;
 	if (mss != 0) {
-		mss += ((skb->h.th->doff - 5) * 4);	/* TCP options */
+		mss += tcp_optlen(skb); /* TCP options */
 		mss += ip_hdrlen(skb) + sizeof(struct tcphdr);
 		mss += ETH_HLEN;
 
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 76a31afe20d..7ca30d76bf6 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -3911,7 +3911,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		else {
 			struct iphdr *iph = ip_hdr(skb);
 
-			tcp_opt_len = ((skb->h.th->doff - 5) * 4);
+			tcp_opt_len = tcp_optlen(skb);
 			ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
 
 			iph->check = 0;
@@ -4065,7 +4065,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
 			goto out_unlock;
 		}
 
-		tcp_opt_len = ((skb->h.th->doff - 5) * 4);
+		tcp_opt_len = tcp_optlen(skb);
 		ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
 
 		hdr_len = ip_tcp_len + tcp_opt_len;
diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c
index 90da58b4e53..273f1745a00 100644
--- a/drivers/s390/net/qeth_eddp.c
+++ b/drivers/s390/net/qeth_eddp.c
@@ -477,13 +477,13 @@ qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
 						  skb_network_header(skb),
 						  ip_hdrlen(skb),
 						  skb->h.raw,
-						  skb->h.th->doff * 4);
+						  tcp_hdrlen(skb));
 	else
 		eddp = qeth_eddp_create_eddp_data(qhdr,
 						  skb_network_header(skb),
 						  sizeof(struct ipv6hdr),
 						  skb->h.raw,
-						  skb->h.th->doff * 4);
+						  tcp_hdrlen(skb));
 
 	if (eddp == NULL) {
 		QETH_DBF_TEXT(trace, 2, "eddpfcnm");
@@ -596,11 +596,11 @@ qeth_eddp_create_context_tcp(struct qeth_card *card, struct sk_buff *skb,
 		ctx = qeth_eddp_create_context_generic(card, skb,
 						       (sizeof(struct qeth_hdr) +
 						        ip_hdrlen(skb) +
-							skb->h.th->doff * 4));
+							tcp_hdrlen(skb)));
 	else if (skb->protocol == htons(ETH_P_IPV6))
 		ctx = qeth_eddp_create_context_generic(card, skb,
 			sizeof(struct qeth_hdr) + sizeof(struct ipv6hdr) +
-			skb->h.th->doff*4);
+			tcp_hdrlen(skb));
 	else
 		QETH_DBF_TEXT(trace, 2, "cetcpinv");
 
-- 
cgit v1.2.3


From aa8223c7bb0b05183e1737881ed21827aa5b9e73 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 10 Apr 2007 21:04:22 -0700
Subject: [SK_BUFF]: Introduce tcp_hdr(), remove skb->h.th

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/atl1/atl1_main.c   |  7 ++++---
 drivers/net/bnx2.c             |  8 ++++----
 drivers/net/chelsio/sge.c      |  2 +-
 drivers/net/cxgb3/sge.c        |  2 +-
 drivers/net/e1000/e1000_main.c | 11 ++++++-----
 drivers/net/ioc3-eth.c         |  2 +-
 drivers/net/ixgb/ixgb_main.c   |  7 ++++---
 drivers/net/mv643xx_eth.c      |  2 +-
 drivers/net/tg3.c              | 15 +++++++--------
 drivers/s390/net/qeth_eddp.c   |  2 +-
 drivers/s390/net/qeth_tso.h    |  4 ++--
 11 files changed, 32 insertions(+), 30 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c
index 8d5994751e2..d60c2217332 100644
--- a/drivers/net/atl1/atl1_main.c
+++ b/drivers/net/atl1/atl1_main.c
@@ -1298,9 +1298,10 @@ static int atl1_tso(struct atl1_adapter *adapter, struct sk_buff *skb,
 
 			iph->tot_len = 0;
 			iph->check = 0;
-			skb->h.th->check = ~csum_tcpudp_magic(iph->saddr,
-							      iph->daddr, 0,
-							      IPPROTO_TCP, 0);
+			tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+								 iph->daddr, 0,
+								 IPPROTO_TCP,
+								 0);
 			ipofst = skb_network_offset(skb);
 			if (ipofst != ENET_HEADER_SIZE) /* 802.3 frame */
 				tso->tsopl |= 1 << TSO_PARAM_ETHTYPE_SHIFT;
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 73512fb1645..7e7b5f34403 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -4524,7 +4524,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		vlan_tag_flags |= TX_BD_FLAGS_SW_LSO;
 
 		tcp_opt_len = 0;
-		if (skb->h.th->doff > 5)
+		if (tcp_hdr(skb)->doff > 5)
 			tcp_opt_len = tcp_optlen(skb);
 
 		ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
@@ -4532,9 +4532,9 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		iph = ip_hdr(skb);
 		iph->check = 0;
 		iph->tot_len = htons(mss + ip_tcp_len + tcp_opt_len);
-		skb->h.th->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-						      0, IPPROTO_TCP, 0);
-
+		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+							 iph->daddr, 0,
+							 IPPROTO_TCP, 0);
 		if (tcp_opt_len || (iph->ihl > 5)) {
 			vlan_tag_flags |= ((iph->ihl - 5) +
 					   (tcp_opt_len >> 2)) << 8;
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index a4204dff363..43e92f9f0bc 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1872,7 +1872,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		hdr->opcode = CPL_TX_PKT_LSO;
 		hdr->ip_csum_dis = hdr->l4_csum_dis = 0;
 		hdr->ip_hdr_words = ip_hdr(skb)->ihl;
-		hdr->tcp_hdr_words = skb->h.th->doff;
+		hdr->tcp_hdr_words = tcp_hdr(skb)->doff;
 		hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type,
 							  skb_shinfo(skb)->gso_size));
 		hdr->len = htonl(skb->len - sizeof(*hdr));
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index d38b1bcd138..a70fe9145a2 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -901,7 +901,7 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
 		    CPL_ETH_II : CPL_ETH_II_VLAN;
 		tso_info |= V_LSO_ETH_TYPE(eth_type) |
 		    V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
-		    V_LSO_TCPHDR_WORDS(skb->h.th->doff);
+		    V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
 		hdr->lso_info = htonl(tso_info);
 		flits = 3;
 	} else {
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 4572fbba50f..e86deb2ef82 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2893,14 +2893,15 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 			struct iphdr *iph = ip_hdr(skb);
 			iph->tot_len = 0;
 			iph->check = 0;
-			skb->h.th->check = ~csum_tcpudp_magic(iph->saddr,
-							      iph->daddr, 0,
-							      IPPROTO_TCP, 0);
+			tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+								 iph->daddr, 0,
+								 IPPROTO_TCP,
+								 0);
 			cmd_length = E1000_TXD_CMD_IP;
 			ipcse = skb_transport_offset(skb) - 1;
 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
 			ipv6_hdr(skb)->payload_len = 0;
-			skb->h.th->check =
+			tcp_hdr(skb)->check =
 				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 						 &ipv6_hdr(skb)->daddr,
 						 0, IPPROTO_TCP, 0);
@@ -2909,7 +2910,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 		ipcss = skb_network_offset(skb);
 		ipcso = (void *)&(ip_hdr(skb)->check) - (void *)skb->data;
 		tucss = skb_transport_offset(skb);
-		tucso = (void *)&(skb->h.th->check) - (void *)skb->data;
+		tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data;
 		tucse = 0;
 
 		cmd_length |= (E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE |
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c
index ba012e10d79..bc62e770a25 100644
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -1426,7 +1426,7 @@ static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 		if (proto == IPPROTO_TCP) {
 			csoff += offsetof(struct tcphdr, check);
-			skb->h.th->check = csum;
+			tcp_hdr(skb)->check = csum;
 		}
 
 		w0 = ETXD_DOCHECKSUM | (csoff << ETXD_CHKOFF_SHIFT);
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index 96550d68162..e729ced52dc 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1195,13 +1195,14 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb)
 		iph = ip_hdr(skb);
 		iph->tot_len = 0;
 		iph->check = 0;
-		skb->h.th->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-						      0, IPPROTO_TCP, 0);
+		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+							 iph->daddr, 0,
+							 IPPROTO_TCP, 0);
 		ipcss = skb_network_offset(skb);
 		ipcso = (void *)&(iph->check) - (void *)skb->data;
 		ipcse = skb_transport_offset(skb) - 1;
 		tucss = skb_transport_offset(skb);
-		tucso = (void *)&(skb->h.th->check) - (void *)skb->data;
+		tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data;
 		tucse = 0;
 
 		i = adapter->tx_ring.next_to_use;
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index 43723839e93..ab15ecd4b3d 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -1169,7 +1169,7 @@ static void eth_tx_submit_descs_for_skb(struct mv643xx_private *mp,
 			desc->l4i_chk = udp_hdr(skb)->check;
 			break;
 		case IPPROTO_TCP:
-			desc->l4i_chk = skb->h.th->check;
+			desc->l4i_chk = tcp_hdr(skb)->check;
 			break;
 		default:
 			BUG();
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 7ca30d76bf6..414365c3198 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -3922,7 +3922,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		base_flags |= (TXD_FLAG_CPU_PRE_DMA |
 			       TXD_FLAG_CPU_POST_DMA);
 
-		skb->h.th->check = 0;
+		tcp_hdr(skb)->check = 0;
 
 	}
 	else if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -4080,14 +4080,13 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
 		iph->check = 0;
 		iph->tot_len = htons(mss + hdr_len);
 		if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) {
-			skb->h.th->check = 0;
+			tcp_hdr(skb)->check = 0;
 			base_flags &= ~TXD_FLAG_TCPUDP_CSUM;
-		}
-		else {
-			skb->h.th->check = ~csum_tcpudp_magic(iph->saddr,
-							      iph->daddr, 0,
-							      IPPROTO_TCP, 0);
-		}
+		} else
+			tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+								 iph->daddr, 0,
+								 IPPROTO_TCP,
+								 0);
 
 		if ((tp->tg3_flags2 & TG3_FLG2_HW_TSO) ||
 		    (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705)) {
diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c
index 273f1745a00..b8e84674e17 100644
--- a/drivers/s390/net/qeth_eddp.c
+++ b/drivers/s390/net/qeth_eddp.c
@@ -416,7 +416,7 @@ __qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
                        eddp->skb_offset += VLAN_HLEN;
 #endif /* CONFIG_QETH_VLAN */
        }
-	tcph = eddp->skb->h.th;
+	tcph = tcp_hdr(eddp->skb);
 	while (eddp->skb_offset < eddp->skb->len) {
 		data_len = min((int)skb_shinfo(eddp->skb)->gso_size,
 			       (int)(eddp->skb->len - eddp->skb_offset));
diff --git a/drivers/s390/net/qeth_tso.h b/drivers/s390/net/qeth_tso.h
index 4040bdd8c32..c20e923cf9a 100644
--- a/drivers/s390/net/qeth_tso.h
+++ b/drivers/s390/net/qeth_tso.h
@@ -41,7 +41,7 @@ qeth_tso_fill_header(struct qeth_card *card, struct sk_buff *skb)
 
 	hdr  = (struct qeth_hdr_tso *) skb->data;
 	iph  = ip_hdr(skb);
-	tcph = skb->h.th;
+	tcph = tcp_hdr(skb);
 	/*fix header to TSO values ...*/
 	hdr->hdr.hdr.l3.id = QETH_HEADER_TYPE_TSO;
 	/*set values which are fix for the first approach ...*/
@@ -65,7 +65,7 @@ qeth_tso_set_tcpip_header(struct qeth_card *card, struct sk_buff *skb)
 {
 	struct iphdr *iph    = ip_hdr(skb);
 	struct ipv6hdr *ip6h = ipv6_hdr(skb);
-	struct tcphdr *tcph  = skb->h.th;
+	struct tcphdr *tcph  = tcp_hdr(skb);
 
 	tcph->check = 0;
 	if (skb->protocol == ETH_P_IPV6) {
-- 
cgit v1.2.3


From b0061ce49c83657563b64ffcf1ec137110230d93 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 25 Apr 2007 18:02:22 -0700
Subject: [SK_BUFF]: Introduce ipip_hdr(), remove skb->h.ipiph

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/sk98lin/skge.c | 4 ++--
 drivers/net/skge.c         | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c
index e4ab7a8acc1..b987a5c3f42 100644
--- a/drivers/net/sk98lin/skge.c
+++ b/drivers/net/sk98lin/skge.c
@@ -1565,7 +1565,7 @@ struct sk_buff	*pMessage)	/* pointer to send-message              */
 		u16 hdrlen = skb_transport_offset(pMessage);
 		u16 offset = hdrlen + pMessage->csum_offset;
 
-		if ((pMessage->h.ipiph->protocol == IPPROTO_UDP ) &&
+		if ((ipip_hdr(pMessage)->protocol == IPPROTO_UDP) &&
 			(pAC->GIni.GIChipRev == 0) &&
 			(pAC->GIni.GIChipId == CHIP_ID_YUKON)) {
 			pTxd->TBControl = BMU_TCP_CHECK;
@@ -1691,7 +1691,7 @@ struct sk_buff	*pMessage)	/* pointer to send-message              */
 		** opcode for udp is not working in the hardware yet 
 		** (Revision 2.0)
 		*/
-		if ((pMessage->h.ipiph->protocol == IPPROTO_UDP ) &&
+		if ((ipip_hdr(pMessage)->protocol == IPPROTO_UDP) &&
 			(pAC->GIni.GIChipRev == 0) &&
 			(pAC->GIni.GIChipId == CHIP_ID_YUKON)) {
 			Control |= BMU_TCP_CHECK;
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index ca7a0e03984..99b61cfb7ce 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -2659,7 +2659,7 @@ static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 		/* This seems backwards, but it is what the sk98lin
 		 * does.  Looks like hardware is wrong?
 		 */
-		if (skb->h.ipiph->protocol == IPPROTO_UDP
+		if (ipip_hdr(skb)->protocol == IPPROTO_UDP
 	            && hw->chip_rev == 0 && hw->chip_id == CHIP_ID_YUKON)
 			control = BMU_TCP_CHECK;
 		else
-- 
cgit v1.2.3


From 9c70220b73908f64792422a2c39c593c4792f2c5 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 25 Apr 2007 18:04:18 -0700
Subject: [SK_BUFF]: Introduce skb_transport_header(skb)

For the places where we need a pointer to the transport header, it is
still legal to touch skb->h.raw directly if just adding to,
subtracting from or setting it to another layer header.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/appletalk/ltpc.c | 7 +++++--
 drivers/net/cxgb3/sge.c      | 8 +++++---
 drivers/s390/net/qeth_eddp.c | 4 ++--
 3 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c
index dc3bce992dc..43c17c85c97 100644
--- a/drivers/net/appletalk/ltpc.c
+++ b/drivers/net/appletalk/ltpc.c
@@ -917,6 +917,7 @@ static int ltpc_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	int i;
 	struct lt_sendlap cbuf;
+	unsigned char *hdr;
 
 	cbuf.command = LT_SENDLAP;
 	cbuf.dnode = skb->data[0];
@@ -932,11 +933,13 @@ static int ltpc_xmit(struct sk_buff *skb, struct net_device *dev)
 		printk("\n");
 	}
 
-	do_write(dev,&cbuf,sizeof(cbuf),skb->h.raw,skb->len);
+	hdr = skb_transport_header(skb);
+	do_write(dev, &cbuf, sizeof(cbuf), hdr, skb->len);
 
 	if(debug & DEBUG_UPPER) {
 		printk("sent %d ddp bytes\n",skb->len);
-		for(i=0;i<skb->len;i++) printk("%02x ",skb->h.raw[i]);
+		for (i = 0; i < skb->len; i++)
+			printk("%02x ", hdr[i]);
 		printk("\n");
 	}
 
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index a70fe9145a2..610e4769efa 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -1324,12 +1324,14 @@ static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
 
 	flits = skb_transport_offset(skb) / 8;
 	sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
-	sgl_flits = make_sgl(skb, sgp, skb->h.raw, skb->tail - skb->h.raw,
+	sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
+			     skb->tail - skb_transport_header(skb),
 			     adap->pdev);
 	if (need_skb_unmap()) {
 		setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
 		skb->destructor = deferred_unmap_destructor;
-		((struct unmap_info *)skb->cb)->len = skb->tail - skb->h.raw;
+		((struct unmap_info *)skb->cb)->len = (skb->tail -
+						       skb_transport_header(skb));
 	}
 
 	write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
@@ -1351,7 +1353,7 @@ static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
 		return 1;	/* packet fits as immediate data */
 
 	flits = skb_transport_offset(skb) / 8;	/* headers */
-	if (skb->tail != skb->h.raw)
+	if (skb->tail != skb_transport_header(skb))
 		cnt++;
 	return flits_to_desc(flits + sgl_len(cnt));
 }
diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c
index b8e84674e17..5890bb5ad23 100644
--- a/drivers/s390/net/qeth_eddp.c
+++ b/drivers/s390/net/qeth_eddp.c
@@ -476,13 +476,13 @@ qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
 		eddp = qeth_eddp_create_eddp_data(qhdr,
 						  skb_network_header(skb),
 						  ip_hdrlen(skb),
-						  skb->h.raw,
+						  skb_transport_header(skb),
 						  tcp_hdrlen(skb));
 	else
 		eddp = qeth_eddp_create_eddp_data(qhdr,
 						  skb_network_header(skb),
 						  sizeof(struct ipv6hdr),
-						  skb->h.raw,
+						  skb_transport_header(skb),
 						  tcp_hdrlen(skb));
 
 	if (eddp == NULL) {
-- 
cgit v1.2.3


From cfe1fc7759fdacb0c650b575daed1692bf3eaece Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 16 Mar 2007 17:26:39 -0300
Subject: [SK_BUFF]: Introduce skb_network_header_len

For the common sequence "skb->h.raw - skb->nh.raw", similar to skb->mac_len,
that is precalculated tho, don't think we need to bloat skb with one more
member, so just use this new helper, reducing the number of non-skbuff.h
references to the layer headers even more.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/gianfar.c    | 2 +-
 drivers/net/pasemi_mac.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index b9f44602c5e..b666a0cc064 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -953,7 +953,7 @@ static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb)
 	 * l4os is the distance between the start of the
 	 * l3 hdr and the l4 hdr */
 	fcb->l3os = (u16)(skb_network_offset(skb) - GMAC_FCB_LEN);
-	fcb->l4os = (u16)(skb->h.raw - skb->nh.raw);
+	fcb->l4os = skb_network_header_len(skb);
 
 	fcb->flags = flags;
 }
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index 1d8129986cc..76fe9dd8e84 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -734,12 +734,12 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
 		switch (ip_hdr(skb)->protocol) {
 		case IPPROTO_TCP:
 			dflags |= XCT_MACTX_CSUM_TCP;
-			dflags |= XCT_MACTX_IPH((skb->h.raw - skb->nh.raw) >> 2);
+			dflags |= XCT_MACTX_IPH(skb_network_header_len(skb) >> 2);
 			dflags |= XCT_MACTX_IPO(nh - skb->data);
 			break;
 		case IPPROTO_UDP:
 			dflags |= XCT_MACTX_CSUM_UDP;
-			dflags |= XCT_MACTX_IPH((skb->h.raw - skb->nh.raw) >> 2);
+			dflags |= XCT_MACTX_IPH(skb_network_header_len(skb) >> 2);
 			dflags |= XCT_MACTX_IPO(nh - skb->data);
 			break;
 		}
-- 
cgit v1.2.3


From b0e380b1d8a8e0aca215df97702f99815f05c094 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 10 Apr 2007 21:21:55 -0700
Subject: [SK_BUFF]: unions of just one member don't get anything done, kill
 them

Renaming skb->h to skb->transport_header, skb->nh to skb->network_header and
skb->mac to skb->mac_header, to match the names of the associated helpers
(skb[_[re]set]_{transport,network,mac}_header).

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/arcnet/arcnet.c                   | 17 +++++++++++------
 drivers/net/bonding/bond_3ad.c                |  4 ++--
 drivers/net/bonding/bond_alb.c                |  2 +-
 drivers/net/wireless/hostap/hostap_80211_rx.c |  2 +-
 4 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 83004fdab0a..681e20b8466 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -519,9 +519,12 @@ static int arcnet_header(struct sk_buff *skb, struct net_device *dev,
 		 * real header when we do rebuild_header.
 		 */
 		*(uint16_t *) skb_push(skb, 2) = type;
-		if (skb->nh.raw - skb->mac.raw != 2)
+		/*
+		 * XXX: Why not use skb->mac_len?
+		 */
+		if (skb->network_header - skb->mac_header != 2)
 			BUGMSG(D_NORMAL, "arcnet_header: Yikes!  diff (%d) is not 2!\n",
-			       (int)(skb->nh.raw - skb->mac.raw));
+			       (int)(skb->network_header - skb->mac_header));
 		return -2;	/* return error -- can't transmit yet! */
 	}
 	else {
@@ -554,11 +557,13 @@ static int arcnet_rebuild_header(struct sk_buff *skb)
 	unsigned short type;
 	uint8_t daddr=0;
 	struct ArcProto *proto;
-
-	if (skb->nh.raw - skb->mac.raw != 2) {
+	/*
+	 * XXX: Why not use skb->mac_len?
+	 */
+	if (skb->network_header - skb->mac_header != 2) {
 		BUGMSG(D_NORMAL,
-		     "rebuild_header: shouldn't be here! (hdrsize=%d)\n",
-		     (int)(skb->nh.raw - skb->mac.raw));
+		       "rebuild_header: shouldn't be here! (hdrsize=%d)\n",
+		       (int)(skb->network_header - skb->mac_header));
 		return 0;
 	}
 	type = *(uint16_t *) skb_pull(skb, 2);
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 05c870d6f6c..7e03f41ae2c 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -885,7 +885,7 @@ static int ad_lacpdu_send(struct port *port)
 
 	skb->dev = slave->dev;
 	skb_reset_mac_header(skb);
-	skb->nh.raw = skb->mac.raw + ETH_HLEN;
+	skb->network_header = skb->mac_header + ETH_HLEN;
 	skb->protocol = PKT_TYPE_LACPDU;
 	skb->priority = TC_PRIO_CONTROL;
 
@@ -929,7 +929,7 @@ static int ad_marker_send(struct port *port, struct marker *marker)
 
 	skb->dev = slave->dev;
 	skb_reset_mac_header(skb);
-	skb->nh.raw = skb->mac.raw + ETH_HLEN;
+	skb->network_header = skb->mac_header + ETH_HLEN;
 	skb->protocol = PKT_TYPE_LACPDU;
 
 	marker_header = (struct marker_header *)skb_put(skb, length);
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index b8cf777542f..92c3b6f6a8e 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -896,7 +896,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
 		memcpy(data, &pkt, size);
 
 		skb_reset_mac_header(skb);
-		skb->nh.raw = skb->mac.raw + ETH_HLEN;
+		skb->network_header = skb->mac_header + ETH_HLEN;
 		skb->protocol = pkt.type;
 		skb->priority = TC_PRIO_CONTROL;
 		skb->dev = slave->dev;
diff --git a/drivers/net/wireless/hostap/hostap_80211_rx.c b/drivers/net/wireless/hostap/hostap_80211_rx.c
index 7b7c1ca8f1f..35a3a50724f 100644
--- a/drivers/net/wireless/hostap/hostap_80211_rx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_rx.c
@@ -1077,7 +1077,7 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
 		skb2->protocol = __constant_htons(ETH_P_802_3);
 		skb_reset_mac_header(skb2);
 		skb_reset_network_header(skb2);
-		/* skb2->nh.raw += ETH_HLEN; */
+		/* skb2->network_header += ETH_HLEN; */
 		dev_queue_xmit(skb2);
 	}
 
-- 
cgit v1.2.3


From 5c81cd75fa63eaf2df0b8904508e53e953f316cf Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <samuel@sortiz.org>
Date: Fri, 16 Mar 2007 20:35:25 -0700
Subject: [IrDA]: removing stir4200 useless include

stir4200 doesn't need to include irlap.h

Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/irda/stir4200.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c
index a22175f4ea8..aec86a21434 100644
--- a/drivers/net/irda/stir4200.c
+++ b/drivers/net/irda/stir4200.c
@@ -52,7 +52,6 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <net/irda/irda.h>
-#include <net/irda/irlap.h>
 #include <net/irda/irda_device.h>
 #include <net/irda/wrapper.h>
 #include <net/irda/crc.h>
-- 
cgit v1.2.3


From afdf27c95629634ea40703197b6788e454d31609 Mon Sep 17 00:00:00 2001
From: Peter Kovar <peter.kovar@gmail.com>
Date: Fri, 16 Mar 2007 20:39:25 -0700
Subject: [IrDA]: SMC SuperIO Chip LPC47N227 not identified properly

SMC SuperIO Chip LPC47N227 used for IrDA is not detected because its device
identification byte can be 0x7A instead of 0x5A.

Patch from Peter Kovar <peter.kovar@gmail.com>
Cc: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/irda/smsc-ircc2.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers')

diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c
index 103a2d18ed2..e8453868d74 100644
--- a/drivers/net/irda/smsc-ircc2.c
+++ b/drivers/net/irda/smsc-ircc2.c
@@ -315,6 +315,7 @@ static struct smsc_chip __initdata lpc_chips_flat[] =
 {
 	/* Base address 0x2E or 0x4E */
 	{ "47N227",	KEY55_1|FIR|SERx4,	0x5a, 0x00 },
+	{ "47N227",	KEY55_1|FIR|SERx4,	0x7a, 0x00 },
 	{ "47N267",	KEY55_1|FIR|SERx4,	0x5e, 0x00 },
 	{ NULL }
 };
-- 
cgit v1.2.3


From 27a884dc3cb63b93c2b3b643f5b31eed5f8a4d26 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 19 Apr 2007 20:29:13 -0700
Subject: [SK_BUFF]: Convert skb->tail to sk_buff_data_t

So that it is also an offset from skb->head, reduces its size from 8 to 4 bytes
on 64bit architectures, allowing us to combine the 4 bytes hole left by the
layer headers conversion, reducing struct sk_buff size to 256 bytes, i.e. 4
64byte cachelines, and since the sk_buff slab cache is SLAB_HWCACHE_ALIGN...
:-)

Many calculations that previously required that skb->{transport,network,
mac}_header be first converted to a pointer now can be done directly, being
meaningful as offsets or pointers.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/he.c                              |  4 ++--
 drivers/atm/idt77252.c                        |  3 ++-
 drivers/atm/nicstar.c                         | 10 ++++++----
 drivers/infiniband/hw/amso1100/c2.c           |  5 +++--
 drivers/isdn/i4l/isdn_net.c                   |  2 +-
 drivers/media/dvb/dvb-core/dvb_net.c          | 10 ++++++----
 drivers/net/cris/eth_v10.c                    |  2 +-
 drivers/net/cxgb3/sge.c                       |  6 +++---
 drivers/net/e1000/e1000_main.c                |  4 ++--
 drivers/net/ibm_emac/ibm_emac_core.c          |  2 +-
 drivers/net/macb.c                            |  3 ++-
 drivers/net/pcmcia/nmclan_cs.c                |  2 +-
 drivers/net/s2io.c                            |  4 ++--
 drivers/net/tulip/uli526x.c                   | 14 +++++++++++---
 drivers/net/wan/hdlc_fr.c                     |  2 +-
 drivers/net/wan/lmc/lmc_main.c                |  4 ++--
 drivers/net/wireless/hostap/hostap_80211_rx.c |  2 +-
 drivers/s390/net/ctcmain.c                    | 11 +++++++----
 drivers/s390/net/netiucv.c                    | 10 ++++++----
 drivers/usb/atm/usbatm.c                      | 10 +++++-----
 drivers/usb/net/asix.c                        |  6 +++---
 drivers/usb/net/gl620a.c                      |  2 +-
 drivers/usb/net/net1080.c                     |  2 +-
 drivers/usb/net/rndis_host.c                  |  2 +-
 24 files changed, 71 insertions(+), 51 deletions(-)

(limited to 'drivers')

diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 8510026b690..d33aba6864c 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -1901,13 +1901,13 @@ he_service_rbrq(struct he_dev *he_dev, int group)
 			case ATM_AAL0:
 				/* 2.10.1.5 raw cell receive */
 				skb->len = ATM_AAL0_SDU;
-				skb->tail = skb->data + skb->len;
+				skb_set_tail_pointer(skb, skb->len);
 				break;
 			case ATM_AAL5:
 				/* 2.10.1.2 aal5 receive */
 
 				skb->len = AAL5_LEN(skb->data, he_vcc->pdu_len);
-				skb->tail = skb->data + skb->len;
+				skb_set_tail_pointer(skb, skb->len);
 #ifdef USE_CHECKSUM_HW
 				if (vcc->vpi == 0 && vcc->vci >= ATM_NOT_RSV_VCI) {
 					skb->ip_summed = CHECKSUM_COMPLETE;
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index b4b80140c39..1e49799cd6c 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -1816,7 +1816,8 @@ push_rx_skb(struct idt77252_dev *card, struct sk_buff *skb, int queue)
 	u32 handle;
 	u32 addr;
 
-	skb->data = skb->tail = skb->head;
+	skb->data = skb->head;
+	skb_reset_tail_pointer(skb);
 	skb->len = 0;
 
 	skb_reserve(skb, 16);
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index aab9b3733d5..26f4b703349 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -2208,7 +2208,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
          if (i == 1 && ns_rsqe_eopdu(rsqe))
             *((u32 *) sb->data) |= 0x00000002;
          skb_put(sb, NS_AAL0_HEADER);
-         memcpy(sb->tail, cell, ATM_CELL_PAYLOAD);
+         memcpy(skb_tail_pointer(sb), cell, ATM_CELL_PAYLOAD);
          skb_put(sb, ATM_CELL_PAYLOAD);
          ATM_SKB(sb)->vcc = vcc;
 	 __net_timestamp(sb);
@@ -2252,7 +2252,8 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
       vc->rx_iov = iovb;
       NS_SKB(iovb)->iovcnt = 0;
       iovb->len = 0;
-      iovb->tail = iovb->data = iovb->head;
+      iovb->data = iovb->head;
+      skb_reset_tail_pointer(iovb);
       NS_SKB(iovb)->vcc = vcc;
       /* IMPORTANT: a pointer to the sk_buff containing the small or large
                     buffer is stored as iovec base, NOT a pointer to the 
@@ -2265,7 +2266,8 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
       recycle_iovec_rx_bufs(card, (struct iovec *) iovb->data, NS_MAX_IOVECS);
       NS_SKB(iovb)->iovcnt = 0;
       iovb->len = 0;
-      iovb->tail = iovb->data = iovb->head;
+      iovb->data = iovb->head;
+      skb_reset_tail_pointer(iovb);
       NS_SKB(iovb)->vcc = vcc;
    }
    iov = &((struct iovec *) iovb->data)[NS_SKB(iovb)->iovcnt++];
@@ -2489,7 +2491,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
             {
                lb = (struct sk_buff *) iov->iov_base;
                tocopy = min_t(int, remaining, iov->iov_len);
-               memcpy(hb->tail, lb->data, tocopy);
+               memcpy(skb_tail_pointer(hb), lb->data, tocopy);
                skb_put(hb, tocopy);
                iov++;
                remaining -= tocopy;
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index 7698feafa6a..58bc272bd40 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -439,7 +439,8 @@ static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem)
 	}
 
 	/* Setup the skb for reuse since we're dropping this pkt */
-	elem->skb->tail = elem->skb->data = elem->skb->head;
+	elem->skb->data = elem->skb->head;
+	skb_reset_tail_pointer(elem->skb);
 
 	/* Zero out the rxp hdr in the sk_buff */
 	memset(elem->skb->data, 0, sizeof(*rxp_hdr));
@@ -521,7 +522,7 @@ static void c2_rx_interrupt(struct net_device *netdev)
 		 * "sizeof(struct c2_rxp_hdr)".
 		 */
 		skb->data += sizeof(*rxp_hdr);
-		skb->tail = skb->data + buflen;
+		skb_set_tail_pointer(skb, buflen);
 		skb->len = buflen;
 		skb->protocol = eth_type_trans(skb, netdev);
 
diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index cd3b1fa4a41..aa83277aba7 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -881,7 +881,7 @@ isdn_net_log_skb(struct sk_buff * skb, isdn_net_local * lp)
 
 	addinfo[0] = '\0';
 	/* This check stolen from 2.1.72 dev_queue_xmit_nit() */
-	if (p < skb->data || p >= skb->tail) {
+	if (p < skb->data || skb->network_header >= skb->tail) {
 		/* fall back to old isdn_net_log_packet method() */
 		char * buf = skb->data;
 
diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c
index c6b004182d9..9de177a5b9f 100644
--- a/drivers/media/dvb/dvb-core/dvb_net.c
+++ b/drivers/media/dvb/dvb-core/dvb_net.c
@@ -600,6 +600,7 @@ static void dvb_net_ule( struct net_device *dev, const u8 *buf, size_t buf_len )
 			/* Check CRC32, we've got it in our skb already. */
 			unsigned short ulen = htons(priv->ule_sndu_len);
 			unsigned short utype = htons(priv->ule_sndu_type);
+			const u8 *tail;
 			struct kvec iov[3] = {
 				{ &ulen, sizeof ulen },
 				{ &utype, sizeof utype },
@@ -613,10 +614,11 @@ static void dvb_net_ule( struct net_device *dev, const u8 *buf, size_t buf_len )
 			}
 
 			ule_crc = iov_crc32(ule_crc, iov, 3);
-			expected_crc = *((u8 *)priv->ule_skb->tail - 4) << 24 |
-				       *((u8 *)priv->ule_skb->tail - 3) << 16 |
-				       *((u8 *)priv->ule_skb->tail - 2) << 8 |
-				       *((u8 *)priv->ule_skb->tail - 1);
+			tail = skb_tail_pointer(priv->ule_skb);
+			expected_crc = *(tail - 4) << 24 |
+				       *(tail - 3) << 16 |
+				       *(tail - 2) << 8 |
+				       *(tail - 1);
 			if (ule_crc != expected_crc) {
 				printk(KERN_WARNING "%lu: CRC32 check FAILED: %08x / %08x, SNDU len %d type %#x, ts_remain %d, next 2: %x.\n",
 				       priv->ts_count, ule_crc, expected_crc, priv->ule_sndu_len, priv->ule_sndu_type, ts_remain, ts_remain > 2 ? *(unsigned short *)from_where : 0);
diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c
index 98643801a3b..7feb9c56114 100644
--- a/drivers/net/cris/eth_v10.c
+++ b/drivers/net/cris/eth_v10.c
@@ -1348,7 +1348,7 @@ e100_rx(struct net_device *dev)
 
 #ifdef ETHDEBUG
 		printk("head = 0x%x, data = 0x%x, tail = 0x%x, end = 0x%x\n",
-		  skb->head, skb->data, skb->tail, skb->end);
+		  skb->head, skb->data, skb_tail_pointer(skb), skb->end);
 		printk("copying packet to 0x%x.\n", skb_data_ptr);
 #endif
 
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 610e4769efa..c5faf1380e1 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -1325,13 +1325,13 @@ static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
 	flits = skb_transport_offset(skb) / 8;
 	sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
 	sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
-			     skb->tail - skb_transport_header(skb),
+			     skb->tail - skb->transport_header,
 			     adap->pdev);
 	if (need_skb_unmap()) {
 		setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
 		skb->destructor = deferred_unmap_destructor;
 		((struct unmap_info *)skb->cb)->len = (skb->tail -
-						       skb_transport_header(skb));
+						       skb->transport_header);
 	}
 
 	write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
@@ -1353,7 +1353,7 @@ static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
 		return 1;	/* packet fits as immediate data */
 
 	flits = skb_transport_offset(skb) / 8;	/* headers */
-	if (skb->tail != skb_transport_header(skb))
+	if (skb->tail != skb->transport_header)
 		cnt++;
 	return flits_to_desc(flits + sgl_len(cnt));
 }
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index e86deb2ef82..e7c93f44f81 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -3304,7 +3304,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 				 * NOTE: this is a TSO only workaround
 				 * if end byte alignment not correct move us
 				 * into the next dword */
-				if ((unsigned long)(skb->tail - 1) & 4)
+				if ((unsigned long)(skb_tail_pointer(skb) - 1) & 4)
 					break;
 				/* fall through */
 			case e1000_82571:
@@ -4388,7 +4388,7 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter,
 				PCI_DMA_FROMDEVICE);
 			vaddr = kmap_atomic(ps_page->ps_page[0],
 			                    KM_SKB_DATA_SOFTIRQ);
-			memcpy(skb->tail, vaddr, l1);
+			memcpy(skb_tail_pointer(skb), vaddr, l1);
 			kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
 			pci_dma_sync_single_for_device(pdev,
 				ps_page_dma->ps_page_dma[0],
diff --git a/drivers/net/ibm_emac/ibm_emac_core.c b/drivers/net/ibm_emac/ibm_emac_core.c
index b1ad62d89eb..3d82d46f499 100644
--- a/drivers/net/ibm_emac/ibm_emac_core.c
+++ b/drivers/net/ibm_emac/ibm_emac_core.c
@@ -1338,7 +1338,7 @@ static inline int emac_rx_sg_append(struct ocp_enet_private *dev, int slot)
 			dev_kfree_skb(dev->rx_sg_skb);
 			dev->rx_sg_skb = NULL;
 		} else {
-			cacheable_memcpy(dev->rx_sg_skb->tail,
+			cacheable_memcpy(skb_tail_pointer(dev->rx_sg_skb),
 					 dev->rx_skb[slot]->data, len);
 			skb_put(dev->rx_sg_skb, len);
 			emac_recycle_rx_skb(dev, slot, len);
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index 0c3649be0d0..98bf51afcee 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -575,7 +575,8 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	int i;
 	dev_dbg(&bp->pdev->dev,
 		"start_xmit: len %u head %p data %p tail %p end %p\n",
-		skb->len, skb->head, skb->data, skb->tail, skb->end);
+		skb->len, skb->head, skb->data,
+		skb_tail_pointer(skb), skb->end);
 	dev_dbg(&bp->pdev->dev,
 		"data:");
 	for (i = 0; i < 16; i++)
diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c
index ec0af65cd5d..73da611fd53 100644
--- a/drivers/net/pcmcia/nmclan_cs.c
+++ b/drivers/net/pcmcia/nmclan_cs.c
@@ -1185,7 +1185,7 @@ static int mace_rx(struct net_device *dev, unsigned char RxCnt)
 	skb_reserve(skb, 2);
 	insw(ioaddr + AM2150_RCV, skb_put(skb, pkt_len), pkt_len>>1);
 	if (pkt_len & 1)
-	    *(skb->tail-1) = inb(ioaddr + AM2150_RCV);
+	    *(skb_tail_pointer(skb) - 1) = inb(ioaddr + AM2150_RCV);
 	skb->protocol = eth_type_trans(skb, dev);
 	
 	netif_rx(skb); /* Send the packet to the upper (protocol) layers. */
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 46ebf141ee5..600d3ff347f 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -2195,7 +2195,7 @@ static int fill_rxd_3buf(struct s2io_nic *nic, struct RxD_t *rxdp, struct \
 	frag_list->next = NULL;
 	tmp = (void *)ALIGN((long)frag_list->data, ALIGN_SIZE + 1);
 	frag_list->data = tmp;
-	frag_list->tail = tmp;
+	skb_reset_tail_pointer(frag_list);
 
 	/* Buffer-2 receives L4 data payload */
 	((struct RxD3*)rxdp)->Buffer2_ptr = pci_map_single(nic->pdev,
@@ -2349,7 +2349,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
 			tmp += ALIGN_SIZE;
 			tmp &= ~ALIGN_SIZE;
 			skb->data = (void *) (unsigned long)tmp;
-			skb->tail = (void *) (unsigned long)tmp;
+			skb_reset_tail_pointer(skb);
 
 			if (!(((struct RxD3*)rxdp)->Buffer0_ptr))
 				((struct RxD3*)rxdp)->Buffer0_ptr =
diff --git a/drivers/net/tulip/uli526x.c b/drivers/net/tulip/uli526x.c
index 9a5850fa644..e46f4cb02c1 100644
--- a/drivers/net/tulip/uli526x.c
+++ b/drivers/net/tulip/uli526x.c
@@ -829,7 +829,9 @@ static void uli526x_rx_packet(struct net_device *dev, struct uli526x_board_info
 					!= NULL) ) {
 					/* size less than COPY_SIZE, allocate a rxlen SKB */
 					skb_reserve(skb, 2); /* 16byte align */
-					memcpy(skb_put(skb, rxlen), rxptr->rx_skb_ptr->tail, rxlen);
+					memcpy(skb_put(skb, rxlen),
+					       skb_tail_pointer(rxptr->rx_skb_ptr),
+					       rxlen);
 					uli526x_reuse_skb(db, rxptr->rx_skb_ptr);
 				} else
 					skb_put(skb, rxlen);
@@ -1175,7 +1177,10 @@ static void uli526x_reuse_skb(struct uli526x_board_info *db, struct sk_buff * sk
 
 	if (!(rxptr->rdes0 & cpu_to_le32(0x80000000))) {
 		rxptr->rx_skb_ptr = skb;
-		rxptr->rdes2 = cpu_to_le32( pci_map_single(db->pdev, skb->tail, RX_ALLOC_SIZE, PCI_DMA_FROMDEVICE) );
+		rxptr->rdes2 = cpu_to_le32(pci_map_single(db->pdev,
+							  skb_tail_pointer(skb),
+							  RX_ALLOC_SIZE,
+							  PCI_DMA_FROMDEVICE));
 		wmb();
 		rxptr->rdes0 = cpu_to_le32(0x80000000);
 		db->rx_avail_cnt++;
@@ -1339,7 +1344,10 @@ static void allocate_rx_buffer(struct uli526x_board_info *db)
 		if ( ( skb = dev_alloc_skb(RX_ALLOC_SIZE) ) == NULL )
 			break;
 		rxptr->rx_skb_ptr = skb; /* FIXME (?) */
-		rxptr->rdes2 = cpu_to_le32( pci_map_single(db->pdev, skb->tail, RX_ALLOC_SIZE, PCI_DMA_FROMDEVICE) );
+		rxptr->rdes2 = cpu_to_le32(pci_map_single(db->pdev,
+							  skb_tail_pointer(skb),
+							  RX_ALLOC_SIZE,
+							  PCI_DMA_FROMDEVICE));
 		wmb();
 		rxptr->rdes0 = cpu_to_le32(0x80000000);
 		rxptr = rxptr->next_rx_desc;
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index b747228c719..aeb2789adf2 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -533,7 +533,7 @@ static void fr_lmi_send(struct net_device *dev, int fullrep)
 		skb->protocol = __constant_htons(NLPID_CCITT_ANSI_LMI);
 		fr_hard_header(&skb, LMI_CCITT_ANSI_DLCI);
 	}
-	data = skb->tail;
+	data = skb_tail_pointer(skb);
 	data[i++] = LMI_CALLREF;
 	data[i++] = dce ? LMI_STATUS : LMI_STATUS_ENQUIRY;
 	if (lmi == LMI_ANSI)
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index d4851465c83..b731f3aae0d 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -1636,7 +1636,7 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
             if (nsb) {
                 sc->lmc_rxq[i] = nsb;
                 nsb->dev = dev;
-                sc->lmc_rxring[i].buffer1 = virt_to_bus (nsb->tail);
+                sc->lmc_rxring[i].buffer1 = virt_to_bus(skb_tail_pointer(nsb));
             }
             sc->failed_recv_alloc = 1;
             goto skip_packet;
@@ -1679,7 +1679,7 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
             if (nsb) {
                 sc->lmc_rxq[i] = nsb;
                 nsb->dev = dev;
-                sc->lmc_rxring[i].buffer1 = virt_to_bus (nsb->tail);
+                sc->lmc_rxring[i].buffer1 = virt_to_bus(skb_tail_pointer(nsb));
                 /* Transferred to 21140 below */
             }
             else {
diff --git a/drivers/net/wireless/hostap/hostap_80211_rx.c b/drivers/net/wireless/hostap/hostap_80211_rx.c
index 35a3a50724f..5e3e9e26270 100644
--- a/drivers/net/wireless/hostap/hostap_80211_rx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_rx.c
@@ -922,7 +922,7 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
 		if (frag != 0)
 			flen -= hdrlen;
 
-		if (frag_skb->tail + flen > frag_skb->end) {
+		if (skb_tail_pointer(frag_skb) + flen > frag_skb->end) {
 			printk(KERN_WARNING "%s: host decrypted and "
 			       "reassembled frame did not fit skb\n",
 			       dev->name);
diff --git a/drivers/s390/net/ctcmain.c b/drivers/s390/net/ctcmain.c
index 787c0131704..54e3f806cd5 100644
--- a/drivers/s390/net/ctcmain.c
+++ b/drivers/s390/net/ctcmain.c
@@ -706,7 +706,8 @@ ch_action_txdone(fsm_instance * fi, int event, void *arg)
 			spin_unlock(&ch->collect_lock);
 			return;
 		}
-		ch->trans_skb->tail = ch->trans_skb->data = ch->trans_skb_data;
+		ch->trans_skb->data = ch->trans_skb_data;
+		skb_reset_tail_pointer(ch->trans_skb);
 		ch->trans_skb->len = 0;
 		if (ch->prof.maxmulti < (ch->collect_len + 2))
 			ch->prof.maxmulti = ch->collect_len + 2;
@@ -831,7 +832,8 @@ ch_action_rx(fsm_instance * fi, int event, void *arg)
 		ctc_unpack_skb(ch, skb);
 	}
  again:
-	skb->data = skb->tail = ch->trans_skb_data;
+	skb->data = ch->trans_skb_data;
+	skb_reset_tail_pointer(skb);
 	skb->len = 0;
 	if (ctc_checkalloc_buffer(ch, 1))
 		return;
@@ -2226,7 +2228,8 @@ transmit_skb(struct channel *ch, struct sk_buff *skb)
 		 * IDAL support in CTC is broken, so we have to
 		 * care about skb's above 2G ourselves.
 		 */
-		hi = ((unsigned long) skb->tail + LL_HEADER_LENGTH) >> 31;
+		hi = ((unsigned long)skb_tail_pointer(skb) +
+		      LL_HEADER_LENGTH) >> 31;
 		if (hi) {
 			nskb = alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA);
 			if (!nskb) {
@@ -2262,7 +2265,7 @@ transmit_skb(struct channel *ch, struct sk_buff *skb)
 				return -EBUSY;
 			}
 
-			ch->trans_skb->tail = ch->trans_skb->data;
+			skb_reset_tail_pointer(ch->trans_skb);
 			ch->trans_skb->len = 0;
 			ch->ccw[1].count = skb->len;
 			memcpy(skb_put(ch->trans_skb, skb->len), skb->data,
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index 82edf201440..cd42bd54988 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -689,7 +689,8 @@ static void conn_action_rx(fsm_instance *fi, int event, void *arg)
 			       msg->length, conn->max_buffsize);
 		return;
 	}
-	conn->rx_buff->data = conn->rx_buff->tail = conn->rx_buff->head;
+	conn->rx_buff->data = conn->rx_buff->head;
+	skb_reset_tail_pointer(conn->rx_buff);
 	conn->rx_buff->len = 0;
 	rc = iucv_message_receive(conn->path, msg, 0, conn->rx_buff->data,
 				  msg->length, NULL);
@@ -735,7 +736,8 @@ static void conn_action_txdone(fsm_instance *fi, int event, void *arg)
 			}
 		}
 	}
-	conn->tx_buff->data = conn->tx_buff->tail = conn->tx_buff->head;
+	conn->tx_buff->data = conn->tx_buff->head;
+	skb_reset_tail_pointer(conn->tx_buff);
 	conn->tx_buff->len = 0;
 	spin_lock_irqsave(&conn->collect_lock, saveflags);
 	while ((skb = skb_dequeue(&conn->collect_queue))) {
@@ -1164,8 +1166,8 @@ static int netiucv_transmit_skb(struct iucv_connection *conn,
 		 * Copy the skb to a new allocated skb in lowmem only if the
 		 * data is located above 2G in memory or tailroom is < 2.
 		 */
-		unsigned long hi =
-			((unsigned long)(skb->tail + NETIUCV_HDRLEN)) >> 31;
+		unsigned long hi = ((unsigned long)(skb_tail_pointer(skb) +
+				    NETIUCV_HDRLEN)) >> 31;
 		int copied = 0;
 		if (hi || (skb_tailroom(skb) < 2)) {
 			nskb = alloc_skb(skb->len + NETIUCV_HDRLEN +
diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c
index ec63b0ee074..4d8f282b23d 100644
--- a/drivers/usb/atm/usbatm.c
+++ b/drivers/usb/atm/usbatm.c
@@ -335,15 +335,15 @@ static void usbatm_extract_one_cell(struct usbatm_data *instance, unsigned char
 
 	sarb = instance->cached_vcc->sarb;
 
-	if (sarb->tail + ATM_CELL_PAYLOAD > sarb->end) {
+	if (skb_tail_pointer(sarb) + ATM_CELL_PAYLOAD > sarb->end) {
 		atm_rldbg(instance, "%s: buffer overrun (sarb->len %u, vcc: 0x%p)!\n",
 				__func__, sarb->len, vcc);
 		/* discard cells already received */
 		skb_trim(sarb, 0);
-		UDSL_ASSERT(sarb->tail + ATM_CELL_PAYLOAD <= sarb->end);
+		UDSL_ASSERT(skb_tail_pointer(sarb) + ATM_CELL_PAYLOAD <= sarb->end);
 	}
 
-	memcpy(sarb->tail, source + ATM_CELL_HEADER, ATM_CELL_PAYLOAD);
+	memcpy(skb_tail_pointer(sarb), source + ATM_CELL_HEADER, ATM_CELL_PAYLOAD);
 	__skb_put(sarb, ATM_CELL_PAYLOAD);
 
 	if (pti & 1) {
@@ -370,7 +370,7 @@ static void usbatm_extract_one_cell(struct usbatm_data *instance, unsigned char
 			goto out;
 		}
 
-		if (crc32_be(~0, sarb->tail - pdu_length, pdu_length) != 0xc704dd7b) {
+		if (crc32_be(~0, skb_tail_pointer(sarb) - pdu_length, pdu_length) != 0xc704dd7b) {
 			atm_rldbg(instance, "%s: packet failed crc check (vcc: 0x%p)!\n",
 				  __func__, vcc);
 			atomic_inc(&vcc->stats->rx_err);
@@ -396,7 +396,7 @@ static void usbatm_extract_one_cell(struct usbatm_data *instance, unsigned char
 			goto out;	/* atm_charge increments rx_drop */
 		}
 
-		memcpy(skb->data, sarb->tail - pdu_length, length);
+		memcpy(skb->data, skb_tail_pointer(sarb) - pdu_length, length);
 		__skb_put(skb, length);
 
 		vdbg("%s: sending skb 0x%p, skb->len %u, skb->truesize %u",
diff --git a/drivers/usb/net/asix.c b/drivers/usb/net/asix.c
index 5808ea08245..f56e2dab371 100644
--- a/drivers/usb/net/asix.c
+++ b/drivers/usb/net/asix.c
@@ -298,7 +298,7 @@ static int asix_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 		if (ax_skb) {
 			ax_skb->len = size;
 			ax_skb->data = packet;
-			ax_skb->tail = packet + size;
+			skb_set_tail_pointer(ax_skb, size);
 			usbnet_skb_return(dev, ax_skb);
 		} else {
 			return 0;
@@ -338,7 +338,7 @@ static struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 	    && ((headroom + tailroom) >= (4 + padlen))) {
 		if ((headroom < 4) || (tailroom < padlen)) {
 			skb->data = memmove(skb->head + 4, skb->data, skb->len);
-			skb->tail = skb->data + skb->len;
+			skb_set_tail_pointer(skb, skb->len);
 		}
 	} else {
 		struct sk_buff *skb2;
@@ -356,7 +356,7 @@ static struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 
 	if ((skb->len % 512) == 0) {
 		cpu_to_le32s(&padbytes);
-		memcpy( skb->tail, &padbytes, sizeof(padbytes));
+		memcpy(skb_tail_pointer(skb), &padbytes, sizeof(padbytes));
 		skb_put(skb, sizeof(padbytes));
 	}
 	return skb;
diff --git a/drivers/usb/net/gl620a.c b/drivers/usb/net/gl620a.c
index d257a8e026d..031cf5ca4db 100644
--- a/drivers/usb/net/gl620a.c
+++ b/drivers/usb/net/gl620a.c
@@ -157,7 +157,7 @@ genelink_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 		if ((headroom < (4 + 4*1)) || (tailroom < padlen)) {
 			skb->data = memmove(skb->head + (4 + 4*1),
 					     skb->data, skb->len);
-			skb->tail = skb->data + skb->len;
+			skb_set_tail_pointer(skb, skb->len);
 		}
 	} else {
 		struct sk_buff	*skb2;
diff --git a/drivers/usb/net/net1080.c b/drivers/usb/net/net1080.c
index ccebfdef475..19bf8dae70c 100644
--- a/drivers/usb/net/net1080.c
+++ b/drivers/usb/net/net1080.c
@@ -520,7 +520,7 @@ net1080_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 			skb->data = memmove(skb->head
 						+ sizeof (struct nc_header),
 					    skb->data, skb->len);
-			skb->tail = skb->data + len;
+			skb_set_tail_pointer(skb, len);
 			goto encapsulate;
 		}
 	}
diff --git a/drivers/usb/net/rndis_host.c b/drivers/usb/net/rndis_host.c
index 39a21c74fdf..1d36772ba6e 100644
--- a/drivers/usb/net/rndis_host.c
+++ b/drivers/usb/net/rndis_host.c
@@ -588,7 +588,7 @@ rndis_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 		if (likely((sizeof *hdr) <= room)) {
 			skb->data = memmove(skb->head + sizeof *hdr,
 					    skb->data, len);
-			skb->tail = skb->data + len;
+			skb_set_tail_pointer(skb, len);
 			goto fill;
 		}
 	}
-- 
cgit v1.2.3


From 4305b541357ddbd205aa145dc378926b7cb12283 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 19 Apr 2007 20:43:29 -0700
Subject: [SK_BUFF]: Convert skb->end to sk_buff_data_t

Now to convert the last one, skb->data, that will allow many simplifications
and removal of some of the offset helpers.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/ambassador.c                      |  2 +-
 drivers/atm/idt77252.c                        | 24 +++++++++++++--------
 drivers/infiniband/hw/cxgb3/iwch_cm.c         |  2 +-
 drivers/net/cris/eth_v10.c                    |  3 ++-
 drivers/net/forcedeth.c                       | 30 ++++++++++++++++++---------
 drivers/net/macb.c                            |  2 +-
 drivers/net/wan/lmc/lmc_main.c                |  2 +-
 drivers/net/wireless/hostap/hostap_80211_rx.c |  2 +-
 drivers/usb/atm/usbatm.c                      |  4 ++--
 9 files changed, 44 insertions(+), 27 deletions(-)

(limited to 'drivers')

diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c
index 3c372e08f77..59651abfa4f 100644
--- a/drivers/atm/ambassador.c
+++ b/drivers/atm/ambassador.c
@@ -821,7 +821,7 @@ static inline void fill_rx_pool (amb_dev * dev, unsigned char pool,
     }
     // cast needed as there is no %? for pointer differences
     PRINTD (DBG_SKB, "allocated skb at %p, head %p, area %li",
-	    skb, skb->head, (long) (skb->end - skb->head));
+	    skb, skb->head, (long) (skb_end_pointer(skb) - skb->head));
     rx.handle = virt_to_bus (skb);
     rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
     if (rx_give (dev, &rx, pool))
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 1e49799cd6c..20f2a3a8265 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -1065,7 +1065,8 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe)
 	vcc = vc->rx_vcc;
 
 	pci_dma_sync_single_for_cpu(card->pcidev, IDT77252_PRV_PADDR(skb),
-				    skb->end - skb->data, PCI_DMA_FROMDEVICE);
+				    skb_end_pointer(skb) - skb->data,
+				    PCI_DMA_FROMDEVICE);
 
 	if ((vcc->qos.aal == ATM_AAL0) ||
 	    (vcc->qos.aal == ATM_AAL34)) {
@@ -1194,7 +1195,8 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe)
 		}
 
 		pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
-				 skb->end - skb->data, PCI_DMA_FROMDEVICE);
+				 skb_end_pointer(skb) - skb->data,
+				 PCI_DMA_FROMDEVICE);
 		sb_pool_remove(card, skb);
 
 		skb_trim(skb, len);
@@ -1267,7 +1269,7 @@ idt77252_rx_raw(struct idt77252_dev *card)
 	tail = readl(SAR_REG_RAWCT);
 
 	pci_dma_sync_single_for_cpu(card->pcidev, IDT77252_PRV_PADDR(queue),
-				    queue->end - queue->head - 16,
+				    skb_end_pointer(queue) - queue->head - 16,
 				    PCI_DMA_FROMDEVICE);
 
 	while (head != tail) {
@@ -1363,7 +1365,8 @@ drop:
 				queue = card->raw_cell_head;
 				pci_dma_sync_single_for_cpu(card->pcidev,
 							    IDT77252_PRV_PADDR(queue),
-							    queue->end - queue->data,
+							    (skb_end_pointer(queue) -
+							     queue->data),
 							    PCI_DMA_FROMDEVICE);
 			} else {
 				card->raw_cell_head = NULL;
@@ -1875,7 +1878,7 @@ add_rx_skb(struct idt77252_dev *card, int queue,
 		}
 
 		paddr = pci_map_single(card->pcidev, skb->data,
-				       skb->end - skb->data,
+				       skb_end_pointer(skb) - skb->data,
 				       PCI_DMA_FROMDEVICE);
 		IDT77252_PRV_PADDR(skb) = paddr;
 
@@ -1889,7 +1892,7 @@ add_rx_skb(struct idt77252_dev *card, int queue,
 
 outunmap:
 	pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
-			 skb->end - skb->data, PCI_DMA_FROMDEVICE);
+			 skb_end_pointer(skb) - skb->data, PCI_DMA_FROMDEVICE);
 
 	handle = IDT77252_PRV_POOL(skb);
 	card->sbpool[POOL_QUEUE(handle)].skb[POOL_INDEX(handle)] = NULL;
@@ -1906,12 +1909,14 @@ recycle_rx_skb(struct idt77252_dev *card, struct sk_buff *skb)
 	int err;
 
 	pci_dma_sync_single_for_device(card->pcidev, IDT77252_PRV_PADDR(skb),
-				       skb->end - skb->data, PCI_DMA_FROMDEVICE);
+				       skb_end_pointer(skb) - skb->data,
+				       PCI_DMA_FROMDEVICE);
 
 	err = push_rx_skb(card, skb, POOL_QUEUE(handle));
 	if (err) {
 		pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
-				 skb->end - skb->data, PCI_DMA_FROMDEVICE);
+				 skb_end_pointer(skb) - skb->data,
+				 PCI_DMA_FROMDEVICE);
 		sb_pool_remove(card, skb);
 		dev_kfree_skb(skb);
 	}
@@ -3123,7 +3128,8 @@ deinit_card(struct idt77252_dev *card)
 			if (skb) {
 				pci_unmap_single(card->pcidev,
 						 IDT77252_PRV_PADDR(skb),
-						 skb->end - skb->data,
+						 (skb_end_pointer(skb) -
+						  skb->data),
 						 PCI_DMA_FROMDEVICE);
 				card->sbpool[i].skb[j] = NULL;
 				dev_kfree_skb(skb);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 66ad4d40ba1..e842c65a3f4 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -477,7 +477,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
 	BUG_ON(skb_cloned(skb));
 
 	mpalen = sizeof(*mpa) + ep->plen;
-	if (skb->data + mpalen + sizeof(*req) > skb->end) {
+	if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) {
 		kfree_skb(skb);
 		skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL);
 		if (!skb) {
diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c
index 7feb9c56114..5bdf5ca85a6 100644
--- a/drivers/net/cris/eth_v10.c
+++ b/drivers/net/cris/eth_v10.c
@@ -1348,7 +1348,8 @@ e100_rx(struct net_device *dev)
 
 #ifdef ETHDEBUG
 		printk("head = 0x%x, data = 0x%x, tail = 0x%x, end = 0x%x\n",
-		  skb->head, skb->data, skb_tail_pointer(skb), skb->end);
+		       skb->head, skb->data, skb_tail_pointer(skb),
+		       skb_end_pointer(skb));
 		printk("copying packet to 0x%x.\n", skb_data_ptr);
 #endif
 
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index d5d458c3421..d3f4bcaa969 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -1386,9 +1386,13 @@ static int nv_alloc_rx(struct net_device *dev)
 		struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD);
 		if (skb) {
 			np->put_rx_ctx->skb = skb;
-			np->put_rx_ctx->dma = pci_map_single(np->pci_dev, skb->data,
-							     skb->end-skb->data, PCI_DMA_FROMDEVICE);
-			np->put_rx_ctx->dma_len = skb->end-skb->data;
+			np->put_rx_ctx->dma = pci_map_single(np->pci_dev,
+							     skb->data,
+							(skb_end_pointer(skb) -
+							 skb->data),
+							     PCI_DMA_FROMDEVICE);
+			np->put_rx_ctx->dma_len = (skb_end_pointer(skb) -
+						   skb->data);
 			np->put_rx.orig->buf = cpu_to_le32(np->put_rx_ctx->dma);
 			wmb();
 			np->put_rx.orig->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL);
@@ -1416,9 +1420,13 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
 		struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD);
 		if (skb) {
 			np->put_rx_ctx->skb = skb;
-			np->put_rx_ctx->dma = pci_map_single(np->pci_dev, skb->data,
-							     skb->end-skb->data, PCI_DMA_FROMDEVICE);
-			np->put_rx_ctx->dma_len = skb->end-skb->data;
+			np->put_rx_ctx->dma = pci_map_single(np->pci_dev,
+							     skb->data,
+							     (skb_end_pointer(skb) -
+							      skb->data),
+							     PCI_DMA_FROMDEVICE);
+			np->put_rx_ctx->dma_len = (skb_end_pointer(skb) -
+						   skb->data);
 			np->put_rx.ex->bufhigh = cpu_to_le64(np->put_rx_ctx->dma) >> 32;
 			np->put_rx.ex->buflow = cpu_to_le64(np->put_rx_ctx->dma) & 0x0FFFFFFFF;
 			wmb();
@@ -1602,8 +1610,9 @@ static void nv_drain_rx(struct net_device *dev)
 		wmb();
 		if (np->rx_skb[i].skb) {
 			pci_unmap_single(np->pci_dev, np->rx_skb[i].dma,
-						np->rx_skb[i].skb->end-np->rx_skb[i].skb->data,
-						PCI_DMA_FROMDEVICE);
+					 (skb_end_pointer(np->rx_skb[i].skb) -
+					  np->rx_skb[i].skb->data),
+					 PCI_DMA_FROMDEVICE);
 			dev_kfree_skb(np->rx_skb[i].skb);
 			np->rx_skb[i].skb = NULL;
 		}
@@ -4378,7 +4387,8 @@ static int nv_loopback_test(struct net_device *dev)
 	for (i = 0; i < pkt_len; i++)
 		pkt_data[i] = (u8)(i & 0xff);
 	test_dma_addr = pci_map_single(np->pci_dev, tx_skb->data,
-				       tx_skb->end-tx_skb->data, PCI_DMA_FROMDEVICE);
+				       (skb_end_pointer(tx_skb) -
+					tx_skb->data), PCI_DMA_FROMDEVICE);
 
 	if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
 		np->tx_ring.orig[0].buf = cpu_to_le32(test_dma_addr);
@@ -4435,7 +4445,7 @@ static int nv_loopback_test(struct net_device *dev)
 	}
 
 	pci_unmap_page(np->pci_dev, test_dma_addr,
-		       tx_skb->end-tx_skb->data,
+		       (skb_end_pointer(tx_skb) - tx_skb->data),
 		       PCI_DMA_TODEVICE);
 	dev_kfree_skb_any(tx_skb);
  out:
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index 98bf51afcee..9e233f8216a 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -576,7 +576,7 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	dev_dbg(&bp->pdev->dev,
 		"start_xmit: len %u head %p data %p tail %p end %p\n",
 		skb->len, skb->head, skb->data,
-		skb_tail_pointer(skb), skb->end);
+		skb_tail_pointer(skb), skb_end_pointer(skb));
 	dev_dbg(&bp->pdev->dev,
 		"data:");
 	for (i = 0; i < 16; i++)
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index b731f3aae0d..5bb18c0955b 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -1932,7 +1932,7 @@ static void lmc_softreset (lmc_softc_t * const sc) /*fold00*/
         sc->lmc_rxring[i].status = 0x80000000;
 
         /* used to be PKT_BUF_SZ now uses skb since we lose some to head room */
-        sc->lmc_rxring[i].length = skb->end - skb->data;
+        sc->lmc_rxring[i].length = skb_end_pointer(skb) - skb->data;
 
         /* use to be tail which is dumb since you're thinking why write
          * to the end of the packj,et but since there's nothing there tail == data
diff --git a/drivers/net/wireless/hostap/hostap_80211_rx.c b/drivers/net/wireless/hostap/hostap_80211_rx.c
index 5e3e9e26270..35a3a50724f 100644
--- a/drivers/net/wireless/hostap/hostap_80211_rx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_rx.c
@@ -922,7 +922,7 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
 		if (frag != 0)
 			flen -= hdrlen;
 
-		if (skb_tail_pointer(frag_skb) + flen > frag_skb->end) {
+		if (frag_skb->tail + flen > frag_skb->end) {
 			printk(KERN_WARNING "%s: host decrypted and "
 			       "reassembled frame did not fit skb\n",
 			       dev->name);
diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c
index 4d8f282b23d..a076f735a7b 100644
--- a/drivers/usb/atm/usbatm.c
+++ b/drivers/usb/atm/usbatm.c
@@ -335,12 +335,12 @@ static void usbatm_extract_one_cell(struct usbatm_data *instance, unsigned char
 
 	sarb = instance->cached_vcc->sarb;
 
-	if (skb_tail_pointer(sarb) + ATM_CELL_PAYLOAD > sarb->end) {
+	if (sarb->tail + ATM_CELL_PAYLOAD > sarb->end) {
 		atm_rldbg(instance, "%s: buffer overrun (sarb->len %u, vcc: 0x%p)!\n",
 				__func__, sarb->len, vcc);
 		/* discard cells already received */
 		skb_trim(sarb, 0);
-		UDSL_ASSERT(skb_tail_pointer(sarb) + ATM_CELL_PAYLOAD <= sarb->end);
+		UDSL_ASSERT(sarb->tail + ATM_CELL_PAYLOAD <= sarb->end);
 	}
 
 	memcpy(skb_tail_pointer(sarb), source + ATM_CELL_HEADER, ATM_CELL_PAYLOAD);
-- 
cgit v1.2.3


From b529ccf2799c14346d1518e9bdf1f88f03643e99 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 25 Apr 2007 19:08:35 -0700
Subject: [NETLINK]: Introduce nlmsg_hdr() helper

For the common "(struct nlmsghdr *)skb->data" sequence, so that we reduce the
number of direct accesses to skb->data and for consistency with all the other
cast skb member helpers.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/connector/connector.c       | 2 +-
 drivers/scsi/scsi_netlink.c         | 2 +-
 drivers/scsi/scsi_transport_iscsi.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index a905f782033..7f9c4fb7e5b 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -212,7 +212,7 @@ static void cn_rx_skb(struct sk_buff *__skb)
 	skb = skb_get(__skb);
 
 	if (skb->len >= NLMSG_SPACE(0)) {
-		nlh = (struct nlmsghdr *)skb->data;
+		nlh = nlmsg_hdr(skb);
 
 		if (nlh->nlmsg_len < sizeof(struct cn_msg) ||
 		    skb->len < nlh->nlmsg_len ||
diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c
index 1b59b27e887..45646a28524 100644
--- a/drivers/scsi/scsi_netlink.c
+++ b/drivers/scsi/scsi_netlink.c
@@ -50,7 +50,7 @@ scsi_nl_rcv_msg(struct sk_buff *skb)
 	while (skb->len >= NLMSG_SPACE(0)) {
 		err = 0;
 
-		nlh = (struct nlmsghdr *) skb->data;
+		nlh = nlmsg_hdr(skb);
 		if ((nlh->nlmsg_len < (sizeof(*nlh) + sizeof(*hdr))) ||
 		    (skb->len < nlh->nlmsg_len)) {
 			printk(KERN_WARNING "%s: discarding partial skb\n",
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index ce0d14af33c..10590cd7e9e 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1081,7 +1081,7 @@ iscsi_if_rx(struct sock *sk, int len)
 			struct nlmsghdr	*nlh;
 			struct iscsi_uevent *ev;
 
-			nlh = (struct nlmsghdr *)skb->data;
+			nlh = nlmsg_hdr(skb);
 			if (nlh->nlmsg_len < sizeof(*nlh) ||
 			    skb->len < nlh->nlmsg_len) {
 				break;
-- 
cgit v1.2.3


From f2adc9866742e7904f0268824edc53c948741415 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 20 Mar 2007 11:52:34 -0300
Subject: [ATM] idt77252: Fix double kfree_skb on failure in push_rx_skb

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 drivers/atm/idt77252.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 20f2a3a8265..057efbc55d3 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -1839,7 +1839,6 @@ push_rx_skb(struct idt77252_dev *card, struct sk_buff *skb, int queue)
 		skb_put(skb, SAR_FB_SIZE_3);
 		break;
 	default:
-		dev_kfree_skb(skb);
 		return -1;
 	}
 
-- 
cgit v1.2.3


From d004b8d4903180c111e114726982c194adf2a04f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 20 Mar 2007 12:00:44 -0300
Subject: [LMC]: lmc_main wants to use skb_tailroom

At that point it is equivalent to what was being used, skb->end - skb->data,
and the need is clearly the one skb_tailroom satisfies.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/lmc/lmc_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index 5bb18c0955b..a576113abbd 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -1932,7 +1932,7 @@ static void lmc_softreset (lmc_softc_t * const sc) /*fold00*/
         sc->lmc_rxring[i].status = 0x80000000;
 
         /* used to be PKT_BUF_SZ now uses skb since we lose some to head room */
-        sc->lmc_rxring[i].length = skb_end_pointer(skb) - skb->data;
+        sc->lmc_rxring[i].length = skb_tailroom(skb);
 
         /* use to be tail which is dumb since you're thinking why write
          * to the end of the packj,et but since there's nothing there tail == data
-- 
cgit v1.2.3


From 8b5be26831b973d8013e8b4c9860d9694310cdc6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 20 Mar 2007 12:08:20 -0300
Subject: [FORCEDETH]: Use skb_tailroom where appropriate

Reducing the number of skb->data direct accesses.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/forcedeth.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index d3f4bcaa969..7a018027fcc 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -1388,11 +1388,9 @@ static int nv_alloc_rx(struct net_device *dev)
 			np->put_rx_ctx->skb = skb;
 			np->put_rx_ctx->dma = pci_map_single(np->pci_dev,
 							     skb->data,
-							(skb_end_pointer(skb) -
-							 skb->data),
+							     skb_tailroom(skb),
 							     PCI_DMA_FROMDEVICE);
-			np->put_rx_ctx->dma_len = (skb_end_pointer(skb) -
-						   skb->data);
+			np->put_rx_ctx->dma_len = skb_tailroom(skb);
 			np->put_rx.orig->buf = cpu_to_le32(np->put_rx_ctx->dma);
 			wmb();
 			np->put_rx.orig->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL);
@@ -1422,11 +1420,9 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
 			np->put_rx_ctx->skb = skb;
 			np->put_rx_ctx->dma = pci_map_single(np->pci_dev,
 							     skb->data,
-							     (skb_end_pointer(skb) -
-							      skb->data),
+							     skb_tailroom(skb),
 							     PCI_DMA_FROMDEVICE);
-			np->put_rx_ctx->dma_len = (skb_end_pointer(skb) -
-						   skb->data);
+			np->put_rx_ctx->dma_len = skb_tailroom(skb);
 			np->put_rx.ex->bufhigh = cpu_to_le64(np->put_rx_ctx->dma) >> 32;
 			np->put_rx.ex->buflow = cpu_to_le64(np->put_rx_ctx->dma) & 0x0FFFFFFFF;
 			wmb();
@@ -4383,12 +4379,12 @@ static int nv_loopback_test(struct net_device *dev)
 		ret = 0;
 		goto out;
 	}
+	test_dma_addr = pci_map_single(np->pci_dev, tx_skb->data,
+				       skb_tailroom(tx_skb),
+				       PCI_DMA_FROMDEVICE);
 	pkt_data = skb_put(tx_skb, pkt_len);
 	for (i = 0; i < pkt_len; i++)
 		pkt_data[i] = (u8)(i & 0xff);
-	test_dma_addr = pci_map_single(np->pci_dev, tx_skb->data,
-				       (skb_end_pointer(tx_skb) -
-					tx_skb->data), PCI_DMA_FROMDEVICE);
 
 	if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
 		np->tx_ring.orig[0].buf = cpu_to_le32(test_dma_addr);
-- 
cgit v1.2.3


From 2f7826c02447480c7c1b5500b34fc783f1ed8145 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Mon, 26 Mar 2007 02:00:58 -0700
Subject: [WAN] cosa.c: Build fix.

Caused by skb_reset_mac_header() changes, missing semicolon.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/cosa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index c198511ec3f..23464735fa8 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -773,7 +773,7 @@ static int sppp_rx_done(struct channel_data *chan)
 	}
 	chan->rx_skb->protocol = htons(ETH_P_WAN_PPP);
 	chan->rx_skb->dev = chan->pppdev.dev;
-	skb_reset_mac_header(chan->rx_skb)
+	skb_reset_mac_header(chan->rx_skb);
 	chan->stats.rx_packets++;
 	chan->stats.rx_bytes += chan->cosa->rxsize;
 	netif_rx(chan->rx_skb);
-- 
cgit v1.2.3


From 2a123b86e2b242a4a6db990d2851d45e192f88e5 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 27 Mar 2007 18:38:07 -0300
Subject: [BLUETOOTH]: Introduce skb->data accessor methods for
 hci_{acl,event,sco}_hdr

For consistency with other skb data accessors, reducing the number of direct
accesses to skb->data.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 drivers/bluetooth/bluecard_cs.c | 6 +++---
 drivers/bluetooth/bt3c_cs.c     | 6 +++---
 drivers/bluetooth/btuart_cs.c   | 6 +++---
 drivers/bluetooth/hci_h4.c      | 6 +++---
 4 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'drivers')

diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c
index acfb6a430dc..851de4d5b7d 100644
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -461,20 +461,20 @@ static void bluecard_receive(bluecard_info_t *info, unsigned int offset)
 				switch (info->rx_state) {
 
 				case RECV_WAIT_EVENT_HEADER:
-					eh = (struct hci_event_hdr *)(info->rx_skb->data);
+					eh = hci_event_hdr(info->rx_skb);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = eh->plen;
 					break;
 
 				case RECV_WAIT_ACL_HEADER:
-					ah = (struct hci_acl_hdr *)(info->rx_skb->data);
+					ah = hci_acl_hdr(info->rx_skb);
 					dlen = __le16_to_cpu(ah->dlen);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = dlen;
 					break;
 
 				case RECV_WAIT_SCO_HEADER:
-					sh = (struct hci_sco_hdr *)(info->rx_skb->data);
+					sh = hci_sco_hdr(info->rx_skb);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = sh->dlen;
 					break;
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index 18b0f3992c5..39516074636 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -303,20 +303,20 @@ static void bt3c_receive(bt3c_info_t *info)
 				switch (info->rx_state) {
 
 				case RECV_WAIT_EVENT_HEADER:
-					eh = (struct hci_event_hdr *)(info->rx_skb->data);
+					eh = hci_event_hdr(info->rx_skb);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = eh->plen;
 					break;
 
 				case RECV_WAIT_ACL_HEADER:
-					ah = (struct hci_acl_hdr *)(info->rx_skb->data);
+					ah = hci_acl_hdr(info->rx_skb);
 					dlen = __le16_to_cpu(ah->dlen);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = dlen;
 					break;
 
 				case RECV_WAIT_SCO_HEADER:
-					sh = (struct hci_sco_hdr *)(info->rx_skb->data);
+					sh = hci_sco_hdr(info->rx_skb);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = sh->dlen;
 					break;
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
index c1bce75148f..d7d2ea0d86a 100644
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -250,20 +250,20 @@ static void btuart_receive(btuart_info_t *info)
 				switch (info->rx_state) {
 
 				case RECV_WAIT_EVENT_HEADER:
-					eh = (struct hci_event_hdr *)(info->rx_skb->data);
+					eh = hci_event_hdr(info->rx_skb);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = eh->plen;
 					break;
 
 				case RECV_WAIT_ACL_HEADER:
-					ah = (struct hci_acl_hdr *)(info->rx_skb->data);
+					ah = hci_acl_hdr(info->rx_skb);
 					dlen = __le16_to_cpu(ah->dlen);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = dlen;
 					break;
 
 				case RECV_WAIT_SCO_HEADER:
-					sh = (struct hci_sco_hdr *)(info->rx_skb->data);
+					sh = hci_sco_hdr(info->rx_skb);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = sh->dlen;
 					break;
diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c
index 34f0afc4240..bfbae14cf93 100644
--- a/drivers/bluetooth/hci_h4.c
+++ b/drivers/bluetooth/hci_h4.c
@@ -188,7 +188,7 @@ static int h4_recv(struct hci_uart *hu, void *data, int count)
 				continue;
 
 			case H4_W4_EVENT_HDR:
-				eh = (struct hci_event_hdr *) h4->rx_skb->data;
+				eh = hci_event_hdr(h4->rx_skb);
 
 				BT_DBG("Event header: evt 0x%2.2x plen %d", eh->evt, eh->plen);
 
@@ -196,7 +196,7 @@ static int h4_recv(struct hci_uart *hu, void *data, int count)
 				continue;
 
 			case H4_W4_ACL_HDR:
-				ah = (struct hci_acl_hdr *) h4->rx_skb->data;
+				ah = hci_acl_hdr(h4->rx_skb);
 				dlen = __le16_to_cpu(ah->dlen);
 
 				BT_DBG("ACL header: dlen %d", dlen);
@@ -205,7 +205,7 @@ static int h4_recv(struct hci_uart *hu, void *data, int count)
 				continue;
 
 			case H4_W4_SCO_HDR:
-				sh = (struct hci_sco_hdr *) h4->rx_skb->data;
+				sh = hci_sco_hdr(h4->rx_skb);
 
 				BT_DBG("SCO header: dlen %d", sh->dlen);
 
-- 
cgit v1.2.3


From d626f62b11e00c16e81e4308ab93d3f13551812a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 27 Mar 2007 18:55:52 -0300
Subject: [SK_BUFF]: Introduce skb_copy_from_linear_data{_offset}

To clearly state the intent of copying from linear sk_buffs, _offset being a
overly long variant but interesting for the sake of saving some bytes.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 drivers/atm/atmtcp.c                          |  4 ++--
 drivers/atm/nicstar.c                         |  6 +++---
 drivers/bluetooth/bfusb.c                     |  2 +-
 drivers/bluetooth/bpa10x.c                    |  4 ++--
 drivers/bluetooth/dtl1_cs.c                   |  2 +-
 drivers/char/pcmcia/synclink_cs.c             |  2 +-
 drivers/infiniband/hw/cxgb3/iwch_cm.c         |  9 ++++++---
 drivers/isdn/act2000/module.c                 |  2 +-
 drivers/isdn/gigaset/usb-gigaset.c            |  2 +-
 drivers/isdn/hardware/avm/b1dma.c             |  3 ++-
 drivers/isdn/hardware/avm/c4.c                |  3 ++-
 drivers/isdn/hisax/elsa_ser.c                 |  6 ++++--
 drivers/isdn/hisax/isdnl2.c                   |  3 ++-
 drivers/isdn/hysdn/hycapi.c                   |  2 +-
 drivers/isdn/hysdn/hysdn_sched.c              |  5 +++--
 drivers/isdn/i4l/isdn_common.c                |  2 +-
 drivers/isdn/i4l/isdn_ppp.c                   |  7 +++++--
 drivers/isdn/isdnloop/isdnloop.c              |  3 ++-
 drivers/isdn/pcbit/capi.c                     | 12 +++++++-----
 drivers/media/dvb/dvb-core/dvb_net.c          |  4 +++-
 drivers/message/fusion/mptlan.c               |  6 +++---
 drivers/net/3c505.c                           |  2 +-
 drivers/net/3c523.c                           |  2 +-
 drivers/net/7990.c                            |  2 +-
 drivers/net/a2065.c                           |  2 +-
 drivers/net/arcnet/capmode.c                  |  3 ++-
 drivers/net/atari_bionet.c                    |  3 ++-
 drivers/net/atari_pamsnet.c                   |  3 ++-
 drivers/net/au1000_eth.c                      |  2 +-
 drivers/net/b44.c                             |  7 ++++---
 drivers/net/bnx2.c                            |  6 ++----
 drivers/net/cassini.c                         |  4 ++--
 drivers/net/chelsio/sge.c                     |  2 +-
 drivers/net/cxgb3/sge.c                       |  5 +++--
 drivers/net/dgrs.c                            |  2 +-
 drivers/net/eepro100.c                        |  5 +++--
 drivers/net/ehea/ehea_main.c                  | 11 ++++++-----
 drivers/net/fec_8xx/fec_main.c                |  4 +++-
 drivers/net/fs_enet/fs_enet-main.c            |  6 ++++--
 drivers/net/hamradio/dmascc.c                 |  2 +-
 drivers/net/hamradio/hdlcdrv.c                |  4 +++-
 drivers/net/hamradio/yam.c                    |  4 +++-
 drivers/net/ioc3-eth.c                        |  2 +-
 drivers/net/irda/ali-ircc.c                   |  5 ++---
 drivers/net/irda/au1k_ir.c                    |  2 +-
 drivers/net/irda/donauboe.c                   |  2 +-
 drivers/net/irda/irda-usb.c                   |  4 ++--
 drivers/net/irda/mcs7780.c                    |  4 ++--
 drivers/net/irda/nsc-ircc.c                   |  5 ++---
 drivers/net/irda/pxaficp_ir.c                 |  2 +-
 drivers/net/irda/smsc-ircc2.c                 |  2 +-
 drivers/net/irda/via-ircc.c                   |  4 ++--
 drivers/net/irda/vlsi_ir.c                    |  2 +-
 drivers/net/irda/w83977af_ir.c                |  2 +-
 drivers/net/lance.c                           |  2 +-
 drivers/net/macmace.c                         |  3 +--
 drivers/net/meth.c                            | 10 +++++-----
 drivers/net/myri_sbus.c                       |  2 +-
 drivers/net/netxen/netxen_nic_main.c          |  6 ++++--
 drivers/net/ni52.c                            |  2 +-
 drivers/net/ni65.c                            |  5 +++--
 drivers/net/pci-skeleton.c                    |  2 +-
 drivers/net/pcmcia/axnet_cs.c                 |  2 +-
 drivers/net/ppp_synctty.c                     |  3 ++-
 drivers/net/pppoe.c                           |  3 ++-
 drivers/net/qla3xxx.c                         |  3 ++-
 drivers/net/rrunner.c                         |  2 +-
 drivers/net/sgiseeq.c                         |  2 +-
 drivers/net/skge.c                            |  2 +-
 drivers/net/sky2.c                            |  2 +-
 drivers/net/sun3_82586.c                      |  2 +-
 drivers/net/sun3lance.c                       |  2 +-
 drivers/net/sungem.c                          |  2 +-
 drivers/net/sunhme.c                          |  2 +-
 drivers/net/sunlance.c                        |  2 +-
 drivers/net/sunqe.c                           |  2 +-
 drivers/net/tg3.c                             |  2 +-
 drivers/net/tlan.c                            |  2 +-
 drivers/net/tokenring/3c359.c                 |  7 +++++--
 drivers/net/tokenring/olympic.c               |  8 ++++++--
 drivers/net/tokenring/tms380tr.c              |  2 +-
 drivers/net/tulip/de2104x.c                   |  4 ++--
 drivers/net/tulip/dmfe.c                      |  6 ++++--
 drivers/net/tulip/uli526x.c                   |  2 +-
 drivers/net/tulip/xircom_cb.c                 |  6 +++---
 drivers/net/tulip/xircom_tulip_cb.c           |  4 +++-
 drivers/net/tun.c                             |  4 ++--
 drivers/net/via-velocity.c                    |  7 ++++---
 drivers/net/wan/lmc/lmc_main.c                |  2 +-
 drivers/net/wan/pc300_drv.c                   |  2 +-
 drivers/net/wan/z85230.c                      |  2 +-
 drivers/net/wireless/atmel.c                  |  4 ++--
 drivers/net/wireless/bcm43xx/bcm43xx_dma.c    |  3 ++-
 drivers/net/wireless/hostap/hostap_80211_rx.c | 13 ++++++++-----
 drivers/net/wireless/hostap/hostap_80211_tx.c | 23 ++++++++++++++---------
 drivers/net/wireless/hostap/hostap_ap.c       |  4 ++--
 drivers/net/wireless/hostap/hostap_hw.c       |  5 +++--
 drivers/net/wireless/ipw2100.c                |  5 +++--
 drivers/net/wireless/ipw2200.c                |  2 +-
 drivers/net/wireless/prism54/islpci_eth.c     | 13 +++++++++----
 drivers/net/wireless/ray_cs.c                 |  3 ++-
 drivers/net/wireless/wavelan.c                |  2 +-
 drivers/net/wireless/zd1201.c                 |  4 ++--
 drivers/s390/net/ctcmain.c                    | 13 ++++++++-----
 drivers/s390/net/lcs.c                        |  2 +-
 drivers/s390/net/netiucv.c                    |  7 +++++--
 drivers/s390/net/qeth_eddp.c                  |  3 ++-
 drivers/usb/atm/usbatm.c                      |  2 +-
 drivers/usb/net/catc.c                        |  2 +-
 drivers/usb/net/pegasus.c                     |  2 +-
 110 files changed, 255 insertions(+), 186 deletions(-)

(limited to 'drivers')

diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c
index fc518d85543..1b9493a16ac 100644
--- a/drivers/atm/atmtcp.c
+++ b/drivers/atm/atmtcp.c
@@ -221,7 +221,7 @@ static int atmtcp_v_send(struct atm_vcc *vcc,struct sk_buff *skb)
 	hdr->vpi = htons(vcc->vpi);
 	hdr->vci = htons(vcc->vci);
 	hdr->length = htonl(skb->len);
-	memcpy(skb_put(new_skb,skb->len),skb->data,skb->len);
+	skb_copy_from_linear_data(skb, skb_put(new_skb, skb->len), skb->len);
 	if (vcc->pop) vcc->pop(vcc,skb);
 	else dev_kfree_skb(skb);
 	out_vcc->push(out_vcc,new_skb);
@@ -310,7 +310,7 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb)
 		goto done;
 	}
 	__net_timestamp(new_skb);
-	memcpy(skb_put(new_skb,skb->len),skb->data,skb->len);
+	skb_copy_from_linear_data(skb, skb_put(new_skb, skb->len), skb->len);
 	out_vcc->push(out_vcc,new_skb);
 	atomic_inc(&vcc->stats->tx);
 	atomic_inc(&out_vcc->stats->rx);
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index 26f4b703349..14ced85b3f5 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -2395,7 +2395,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
                skb->destructor = ns_lb_destructor;
 #endif /* NS_USE_DESTRUCTORS */
                skb_push(skb, NS_SMBUFSIZE);
-               memcpy(skb->data, sb->data, NS_SMBUFSIZE);
+               skb_copy_from_linear_data(sb, skb->data, NS_SMBUFSIZE);
                skb_put(skb, len - NS_SMBUFSIZE);
                ATM_SKB(skb)->vcc = vcc;
 	       __net_timestamp(skb);
@@ -2479,7 +2479,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
 	 {
             /* Copy the small buffer to the huge buffer */
             sb = (struct sk_buff *) iov->iov_base;
-            memcpy(hb->data, sb->data, iov->iov_len);
+            skb_copy_from_linear_data(sb, hb->data, iov->iov_len);
             skb_put(hb, iov->iov_len);
             remaining = len - iov->iov_len;
             iov++;
@@ -2491,7 +2491,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
             {
                lb = (struct sk_buff *) iov->iov_base;
                tocopy = min_t(int, remaining, iov->iov_len);
-               memcpy(skb_tail_pointer(hb), lb->data, tocopy);
+               skb_copy_from_linear_data(lb, skb_tail_pointer(hb), tocopy);
                skb_put(hb, tocopy);
                iov++;
                remaining -= tocopy;
diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c
index 4c766f36d88..b990805806a 100644
--- a/drivers/bluetooth/bfusb.c
+++ b/drivers/bluetooth/bfusb.c
@@ -527,7 +527,7 @@ static int bfusb_send_frame(struct sk_buff *skb)
 		buf[2] = (size == BFUSB_MAX_BLOCK_SIZE) ? 0 : size;
 
 		memcpy(skb_put(nskb, 3), buf, 3);
-		memcpy(skb_put(nskb, size), skb->data + sent, size);
+		skb_copy_from_linear_data_offset(skb, sent, skb_put(nskb, size), size);
 
 		sent  += size;
 		count -= size;
diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c
index 9fca6513562..e8ebd5d3de8 100644
--- a/drivers/bluetooth/bpa10x.c
+++ b/drivers/bluetooth/bpa10x.c
@@ -231,7 +231,7 @@ static void bpa10x_wakeup(struct bpa10x_data *data)
 		cr = (struct usb_ctrlrequest *) urb->setup_packet;
 		cr->wLength = __cpu_to_le16(skb->len);
 
-		memcpy(urb->transfer_buffer, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, urb->transfer_buffer, skb->len);
 		urb->transfer_buffer_length = skb->len;
 
 		err = usb_submit_urb(urb, GFP_ATOMIC);
@@ -250,7 +250,7 @@ static void bpa10x_wakeup(struct bpa10x_data *data)
 		skb = skb_dequeue(&data->tx_queue);
 
 	if (skb) {
-		memcpy(urb->transfer_buffer, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, urb->transfer_buffer, skb->len);
 		urb->transfer_buffer_length = skb->len;
 
 		err = usb_submit_urb(urb, GFP_ATOMIC);
diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c
index 459aa97937a..7f9c54b9964 100644
--- a/drivers/bluetooth/dtl1_cs.c
+++ b/drivers/bluetooth/dtl1_cs.c
@@ -425,7 +425,7 @@ static int dtl1_hci_send_frame(struct sk_buff *skb)
 		return -ENOMEM;
 
 	skb_reserve(s, NSHL);
-	memcpy(skb_put(s, skb->len), skb->data, skb->len);
+	skb_copy_from_linear_data(skb, skb_put(s, skb->len), skb->len);
 	if (skb->len & 0x0001)
 		*skb_put(s, 1) = 0;	/* PAD */
 
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 8d025e9b5bc..157b1d09ab5 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -4169,7 +4169,7 @@ static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev)
 	netif_stop_queue(dev);
 
 	/* copy data to device buffers */
-	memcpy(info->tx_buf, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, info->tx_buf, skb->len);
 	info->tx_get = 0;
 	info->tx_put = info->tx_count = skb->len;
 
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index e842c65a3f4..3b4b0acd707 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -821,7 +821,8 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
 	/*
 	 * copy the new data into our accumulation buffer.
 	 */
-	memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len);
+	skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
+				  skb->len);
 	ep->mpa_pkt_len += skb->len;
 
 	/*
@@ -940,7 +941,8 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
 	/*
 	 * Copy the new data into our accumulation buffer.
 	 */
-	memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len);
+	skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
+				  skb->len);
 	ep->mpa_pkt_len += skb->len;
 
 	/*
@@ -1619,7 +1621,8 @@ static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 	PDBG("%s ep %p\n", __FUNCTION__, ep);
 	skb_pull(skb, sizeof(struct cpl_rdma_terminate));
 	PDBG("%s saving %d bytes of term msg\n", __FUNCTION__, skb->len);
-	memcpy(ep->com.qp->attr.terminate_buffer, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer,
+				  skb->len);
 	ep->com.qp->attr.terminate_msg_len = skb->len;
 	ep->com.qp->attr.is_terminate_local = 0;
 	return CPL_RET_BUF_DONE;
diff --git a/drivers/isdn/act2000/module.c b/drivers/isdn/act2000/module.c
index e3e5c139907..ee2b0b9f8f4 100644
--- a/drivers/isdn/act2000/module.c
+++ b/drivers/isdn/act2000/module.c
@@ -442,7 +442,7 @@ act2000_sendbuf(act2000_card *card, int channel, int ack, struct sk_buff *skb)
 			return 0;
 		}
 		skb_reserve(xmit_skb, 19);
-		memcpy(skb_put(xmit_skb, len), skb->data, len);
+		skb_copy_from_linear_data(skb, skb_put(xmit_skb, len), len);
 	} else {
 		xmit_skb = skb_clone(skb, GFP_ATOMIC);
 		if (!xmit_skb) {
diff --git a/drivers/isdn/gigaset/usb-gigaset.c b/drivers/isdn/gigaset/usb-gigaset.c
index 2baef349c12..c8e1c357cec 100644
--- a/drivers/isdn/gigaset/usb-gigaset.c
+++ b/drivers/isdn/gigaset/usb-gigaset.c
@@ -652,7 +652,7 @@ static int write_modem(struct cardstate *cs)
 	 * transmit data
 	 */
 	count = min(bcs->tx_skb->len, (unsigned) ucs->bulk_out_size);
-	memcpy(ucs->bulk_out_buffer, bcs->tx_skb->data, count);
+	skb_copy_from_linear_data(bcs->tx_skb, ucs->bulk_out_buffer, count);
 	skb_pull(bcs->tx_skb, count);
 	atomic_set(&ucs->busy, 1);
 	gig_dbg(DEBUG_OUTPUT, "write_modem: send %d bytes", count);
diff --git a/drivers/isdn/hardware/avm/b1dma.c b/drivers/isdn/hardware/avm/b1dma.c
index 1e2d38e3d68..428872b653e 100644
--- a/drivers/isdn/hardware/avm/b1dma.c
+++ b/drivers/isdn/hardware/avm/b1dma.c
@@ -404,7 +404,8 @@ static void b1dma_dispatch_tx(avmcard *card)
 		printk(KERN_DEBUG "tx: put 0x%x len=%d\n", 
 		       skb->data[2], txlen);
 #endif
-		memcpy(dma->sendbuf.dmabuf, skb->data+2, skb->len-2);
+		skb_copy_from_linear_data_offset(skb, 2, dma->sendbuf.dmabuf,
+						 skb->len - 2);
 	}
 	txlen = (txlen + 3) & ~3;
 
diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c
index 6f5efa8d78c..d58f927e766 100644
--- a/drivers/isdn/hardware/avm/c4.c
+++ b/drivers/isdn/hardware/avm/c4.c
@@ -457,7 +457,8 @@ static void c4_dispatch_tx(avmcard *card)
 		printk(KERN_DEBUG "%s: tx put 0x%x len=%d\n",
 				card->name, skb->data[2], txlen);
 #endif
-		memcpy(dma->sendbuf.dmabuf, skb->data+2, skb->len-2);
+		skb_copy_from_linear_data_offset(skb, 2, dma->sendbuf.dmabuf,
+						 skb->len - 2);
 	}
 	txlen = (txlen + 3) & ~3;
 
diff --git a/drivers/isdn/hisax/elsa_ser.c b/drivers/isdn/hisax/elsa_ser.c
index ae377e81277..1642dca988a 100644
--- a/drivers/isdn/hisax/elsa_ser.c
+++ b/drivers/isdn/hisax/elsa_ser.c
@@ -254,14 +254,16 @@ write_modem(struct BCState *bcs) {
 	count = len;
 	if (count > MAX_MODEM_BUF - fp) {
 		count = MAX_MODEM_BUF - fp;
-		memcpy(cs->hw.elsa.transbuf + fp, bcs->tx_skb->data, count);
+		skb_copy_from_linear_data(bcs->tx_skb,
+					  cs->hw.elsa.transbuf + fp, count);
 		skb_pull(bcs->tx_skb, count);
 		cs->hw.elsa.transcnt += count;
 		ret = count;
 		count = len - count;
 		fp = 0;
 	}
-	memcpy((cs->hw.elsa.transbuf + fp), bcs->tx_skb->data, count);
+	skb_copy_from_linear_data(bcs->tx_skb,
+				  cs->hw.elsa.transbuf + fp, count);
 	skb_pull(bcs->tx_skb, count);
 	cs->hw.elsa.transcnt += count;
 	ret += count;
diff --git a/drivers/isdn/hisax/isdnl2.c b/drivers/isdn/hisax/isdnl2.c
index cd3b5ad5349..3446f249d67 100644
--- a/drivers/isdn/hisax/isdnl2.c
+++ b/drivers/isdn/hisax/isdnl2.c
@@ -1293,7 +1293,8 @@ l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
 		oskb = skb;
 		skb = alloc_skb(oskb->len + i, GFP_ATOMIC);
 		memcpy(skb_put(skb, i), header, i);
-		memcpy(skb_put(skb, oskb->len), oskb->data, oskb->len);
+		skb_copy_from_linear_data(oskb,
+					  skb_put(skb, oskb->len), oskb->len);
 		dev_kfree_skb(oskb);
 	}
 	st->l2.l2l1(st, PH_PULL | INDICATION, skb);
diff --git a/drivers/isdn/hysdn/hycapi.c b/drivers/isdn/hysdn/hycapi.c
index b2ae4ec1e49..4433ce0fca5 100644
--- a/drivers/isdn/hysdn/hycapi.c
+++ b/drivers/isdn/hysdn/hycapi.c
@@ -398,7 +398,7 @@ static u16 hycapi_send_message(struct capi_ctr *ctrl, struct sk_buff *skb)
 			_len = CAPIMSG_LEN(skb->data);
 			if (_len > 22) {
 				_len2 = _len - 22;
-				memcpy(msghead, skb->data, 22);
+				skb_copy_from_linear_data(skb, msghead, 22);
 				memcpy(skb->data + _len2, msghead, 22);
 				skb_pull(skb, _len2);
 				CAPIMSG_SETLEN(skb->data, 22);
diff --git a/drivers/isdn/hysdn/hysdn_sched.c b/drivers/isdn/hysdn/hysdn_sched.c
index b7b5aa4748a..81db4a190d4 100644
--- a/drivers/isdn/hysdn/hysdn_sched.c
+++ b/drivers/isdn/hysdn/hysdn_sched.c
@@ -113,7 +113,8 @@ hysdn_sched_tx(hysdn_card *card, unsigned char *buf,
 	    (skb = hysdn_tx_netget(card)) != NULL) 
 	{
 		if (skb->len <= maxlen) {
-			memcpy(buf, skb->data, skb->len);	/* copy the packet to the buffer */
+			/* copy the packet to the buffer */
+			skb_copy_from_linear_data(skb, buf, skb->len);
 			*len = skb->len;
 			*chan = CHAN_NDIS_DATA;
 			card->net_tx_busy = 1;	/* we are busy sending network data */
@@ -126,7 +127,7 @@ hysdn_sched_tx(hysdn_card *card, unsigned char *buf,
 	    ((skb = hycapi_tx_capiget(card)) != NULL) )
 	{
 		if (skb->len <= maxlen) {
-			memcpy(buf, skb->data, skb->len);
+			skb_copy_from_linear_data(skb, buf, skb->len);
 			*len = skb->len;
 			*chan = CHAN_CAPI;
 			hycapi_tx_capiack(card);
diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c
index 9c926e41b11..c97330b1987 100644
--- a/drivers/isdn/i4l/isdn_common.c
+++ b/drivers/isdn/i4l/isdn_common.c
@@ -829,7 +829,7 @@ isdn_readbchan(int di, int channel, u_char * buf, u_char * fp, int len, wait_que
 				dflag = 0;
 			}
 			count_put = count_pull;
-			memcpy(cp, skb->data, count_put);
+			skb_copy_from_linear_data(skb, cp, count_put);
 			cp += count_put;
 			len -= count_put;
 #ifdef CONFIG_ISDN_AUDIO
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index be915051cb2..387392cb3d6 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -1100,7 +1100,8 @@ isdn_ppp_push_higher(isdn_net_dev * net_dev, isdn_net_local * lp, struct sk_buff
 					goto drop_packet;
 				}
 				skb_put(skb, skb_old->len + 128);
-				memcpy(skb->data, skb_old->data, skb_old->len);
+				skb_copy_from_linear_data(skb_old, skb->data,
+							  skb_old->len);
 				if (net_dev->local->ppp_slot < 0) {
 					printk(KERN_ERR "%s: net_dev->local->ppp_slot(%d) out of range\n",
 						__FUNCTION__, net_dev->local->ppp_slot);
@@ -1902,7 +1903,9 @@ void isdn_ppp_mp_reassembly( isdn_net_dev * net_dev, isdn_net_local * lp,
 		while( from != to ) {
 			unsigned int len = from->len - MP_HEADER_LEN;
 
-			memcpy(skb_put(skb,len), from->data+MP_HEADER_LEN, len);
+			skb_copy_from_linear_data_offset(from, MP_HEADER_LEN,
+							 skb_put(skb,len),
+							 len);
 			frag = from->next;
 			isdn_ppp_mp_free_skb(mp, from);
 			from = frag; 
diff --git a/drivers/isdn/isdnloop/isdnloop.c b/drivers/isdn/isdnloop/isdnloop.c
index e3add27dd0e..e93ad59f60b 100644
--- a/drivers/isdn/isdnloop/isdnloop.c
+++ b/drivers/isdn/isdnloop/isdnloop.c
@@ -415,7 +415,8 @@ isdnloop_sendbuf(int channel, struct sk_buff *skb, isdnloop_card * card)
 		spin_lock_irqsave(&card->isdnloop_lock, flags);
 		nskb = dev_alloc_skb(skb->len);
 		if (nskb) {
-			memcpy(skb_put(nskb, len), skb->data, len);
+			skb_copy_from_linear_data(skb,
+						  skb_put(nskb, len), len);
 			skb_queue_tail(&card->bqueue[channel], nskb);
 			dev_kfree_skb(skb);
 		} else
diff --git a/drivers/isdn/pcbit/capi.c b/drivers/isdn/pcbit/capi.c
index 47c59e95898..7b55e151f1b 100644
--- a/drivers/isdn/pcbit/capi.c
+++ b/drivers/isdn/pcbit/capi.c
@@ -429,8 +429,9 @@ int capi_decode_conn_ind(struct pcbit_chan * chan,
 		if (!(info->data.setup.CallingPN = kmalloc(len - count + 1, GFP_ATOMIC)))
 			return -1;
        
-		memcpy(info->data.setup.CallingPN, skb->data + count + 1, 
-		       len - count);
+		skb_copy_from_linear_data_offset(skb, count + 1,
+						 info->data.setup.CallingPN,
+						 len - count);
 		info->data.setup.CallingPN[len - count] = 0;
 
 	}
@@ -457,8 +458,9 @@ int capi_decode_conn_ind(struct pcbit_chan * chan,
 		if (!(info->data.setup.CalledPN = kmalloc(len - count + 1, GFP_ATOMIC)))
 			return -1;
         
-		memcpy(info->data.setup.CalledPN, skb->data + count + 1, 
-		       len - count); 
+		skb_copy_from_linear_data_offset(skb, count + 1,
+						 info->data.setup.CalledPN,
+						 len - count);
 		info->data.setup.CalledPN[len - count] = 0;
 
 	}
@@ -539,7 +541,7 @@ int capi_decode_conn_actv_ind(struct pcbit_chan * chan, struct sk_buff *skb)
 
 #ifdef DEBUG
 	if (len > 1 && len < 31) {
-		memcpy(str, skb->data + 2, len - 1);
+		skb_copy_from_linear_data_offset(skb, 2, str, len - 1);
 		str[len] = 0;
 		printk(KERN_DEBUG "Connected Party Number: %s\n", str);
 	}
diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c
index 9de177a5b9f..6a5ab409c4e 100644
--- a/drivers/media/dvb/dvb-core/dvb_net.c
+++ b/drivers/media/dvb/dvb-core/dvb_net.c
@@ -697,7 +697,9 @@ static void dvb_net_ule( struct net_device *dev, const u8 *buf, size_t buf_len )
 					}
 					else
 					{
-						memcpy(dest_addr,  priv->ule_skb->data, ETH_ALEN);
+						skb_copy_from_linear_data(priv->ule_skb,
+							      dest_addr,
+							      ETH_ALEN);
 						skb_pull(priv->ule_skb, ETH_ALEN);
 					}
 				}
diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c
index 21fe1b66808..7dd34bd28ef 100644
--- a/drivers/message/fusion/mptlan.c
+++ b/drivers/message/fusion/mptlan.c
@@ -932,7 +932,7 @@ mpt_lan_receive_post_turbo(struct net_device *dev, u32 tmsg)
 		pci_dma_sync_single_for_cpu(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
 					    priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
 
-		memcpy(skb_put(skb, len), old_skb->data, len);
+		skb_copy_from_linear_data(old_skb, skb_put(skb, len), len);
 
 		pci_dma_sync_single_for_device(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
 					       priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
@@ -1093,7 +1093,7 @@ mpt_lan_receive_post_reply(struct net_device *dev,
 						    priv->RcvCtl[ctx].dma,
 						    priv->RcvCtl[ctx].len,
 						    PCI_DMA_FROMDEVICE);
-			memcpy(skb_put(skb, l), old_skb->data, l);
+			skb_copy_from_linear_data(old_skb, skb_put(skb, l), l);
 
 			pci_dma_sync_single_for_device(mpt_dev->pcidev,
 						       priv->RcvCtl[ctx].dma,
@@ -1122,7 +1122,7 @@ mpt_lan_receive_post_reply(struct net_device *dev,
 					    priv->RcvCtl[ctx].len,
 					    PCI_DMA_FROMDEVICE);
 
-		memcpy(skb_put(skb, len), old_skb->data, len);
+		skb_copy_from_linear_data(old_skb, skb_put(skb, len), len);
 
 		pci_dma_sync_single_for_device(mpt_dev->pcidev,
 					       priv->RcvCtl[ctx].dma,
diff --git a/drivers/net/3c505.c b/drivers/net/3c505.c
index c693b5a7950..e985a85a562 100644
--- a/drivers/net/3c505.c
+++ b/drivers/net/3c505.c
@@ -1025,7 +1025,7 @@ static int send_packet(struct net_device *dev, struct sk_buff *skb)
 	adapter->current_dma.start_time = jiffies;
 
 	if ((unsigned long)(skb->data + nlen) >= MAX_DMA_ADDRESS || nlen != skb->len) {
-		memcpy(adapter->dma_buffer, skb->data, nlen);
+		skb_copy_from_linear_data(skb, adapter->dma_buffer, nlen);
 		memset(adapter->dma_buffer+skb->len, 0, nlen-skb->len);
 		target = isa_virt_to_bus(adapter->dma_buffer);
 	}
diff --git a/drivers/net/3c523.c b/drivers/net/3c523.c
index 6b2036df685..a384f7d478a 100644
--- a/drivers/net/3c523.c
+++ b/drivers/net/3c523.c
@@ -1145,7 +1145,7 @@ static int elmc_send_packet(struct sk_buff *skb, struct net_device *dev)
 
 	if (len != skb->len)
 		memset((char *) p->xmit_cbuffs[p->xmit_count], 0, ETH_ZLEN);
-	memcpy((char *) p->xmit_cbuffs[p->xmit_count], (char *) (skb->data), skb->len);
+	skb_copy_from_linear_data(skb, p->xmit_cbuffs[p->xmit_count], skb->len);
 
 #if (NUM_XMIT_BUFFS == 1)
 #ifdef NO_NOPCOMMANDS
diff --git a/drivers/net/7990.c b/drivers/net/7990.c
index c50264aea16..d396f996af5 100644
--- a/drivers/net/7990.c
+++ b/drivers/net/7990.c
@@ -567,7 +567,7 @@ int lance_start_xmit (struct sk_buff *skb, struct net_device *dev)
 
     	if (skb->len < ETH_ZLEN)
     		memset((char *)&ib->tx_buf[entry][0], 0, ETH_ZLEN);
-        memcpy ((char *)&ib->tx_buf [entry][0], skb->data, skblen);
+        skb_copy_from_linear_data(skb, &ib->tx_buf[entry][0], skblen);
 
         /* Now, give the packet to the lance */
         ib->btx_ring [entry].tmd1_bits = (LE_T1_POK|LE_T1_OWN);
diff --git a/drivers/net/a2065.c b/drivers/net/a2065.c
index b38fc65005e..1226cbba045 100644
--- a/drivers/net/a2065.c
+++ b/drivers/net/a2065.c
@@ -598,7 +598,7 @@ static int lance_start_xmit (struct sk_buff *skb, struct net_device *dev)
 	ib->btx_ring [entry].length = (-len) | 0xf000;
 	ib->btx_ring [entry].misc = 0;
 
-	memcpy ((char *)&ib->tx_buf [entry][0], skb->data, skblen);
+	skb_copy_from_linear_data(skb, &ib->tx_buf [entry][0], skblen);
 
 	/* Clear the slack of the packet, do I need this? */
 	if (len != skblen)
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index f6a87bd20ff..cc4610db639 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -273,7 +273,8 @@ static int ack_tx(struct net_device *dev, int acked)
   /* skb_pull(ackskb, ARC_HDR_SIZE); */
 
 
-  memcpy(ackpkt, lp->outgoing.skb->data, ARC_HDR_SIZE+sizeof(struct arc_cap));
+  skb_copy_from_linear_data(lp->outgoing.skb, ackpkt,
+		ARC_HDR_SIZE + sizeof(struct arc_cap));
   ackpkt->soft.cap.proto=0; /* using protocol 0 for acknowledge */
   ackpkt->soft.cap.mes.ack=acked;
 
diff --git a/drivers/net/atari_bionet.c b/drivers/net/atari_bionet.c
index f52e7f22f63..13dbed368d6 100644
--- a/drivers/net/atari_bionet.c
+++ b/drivers/net/atari_bionet.c
@@ -453,7 +453,8 @@ bionet_send_packet(struct sk_buff *skb, struct net_device *dev) {
 		stdma_lock(bionet_intr, NULL);
 		local_irq_restore(flags);
 		if( !STRAM_ADDR(buf+length-1) ) {
-			memcpy(nic_packet->buffer, skb->data, length);
+			skb_copy_from_linear_data(skb, nic_packet->buffer,
+						  length);
 			buf = (unsigned long)&((struct nic_pkt_s *)phys_nic_packet)->buffer;
 		}
 
diff --git a/drivers/net/atari_pamsnet.c b/drivers/net/atari_pamsnet.c
index 3b543614928..745101d7451 100644
--- a/drivers/net/atari_pamsnet.c
+++ b/drivers/net/atari_pamsnet.c
@@ -717,7 +717,8 @@ pamsnet_send_packet(struct sk_buff *skb, struct net_device *dev) {
 
 		local_irq_restore(flags);
 		if( !STRAM_ADDR(buf+length-1) ) {
-			memcpy(nic_packet->buffer, skb->data, length);
+			skb_copy_from_linear_data(skb, nic_packet->buffer,
+						  length);
 			buf = (unsigned long)phys_nic_packet;
 		}
 
diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c
index 97b55f2546c..d10fb80e9a6 100644
--- a/drivers/net/au1000_eth.c
+++ b/drivers/net/au1000_eth.c
@@ -1125,7 +1125,7 @@ static int au1000_tx(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	pDB = aup->tx_db_inuse[aup->tx_head];
-	memcpy((void *)pDB->vaddr, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, pDB->vaddr, skb->len);
 	if (skb->len < ETH_ZLEN) {
 		for (i=skb->len; i<ETH_ZLEN; i++) {
 			((char *)pDB->vaddr)[i] = 0;
diff --git a/drivers/net/b44.c b/drivers/net/b44.c
index f67d97de97f..879a2fff474 100644
--- a/drivers/net/b44.c
+++ b/drivers/net/b44.c
@@ -828,8 +828,8 @@ static int b44_rx(struct b44 *bp, int budget)
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			/* DMA sync done above, copy just the actual packet */
-			memcpy(copy_skb->data, skb->data+bp->rx_offset, len);
-
+			skb_copy_from_linear_data_offset(skb, bp->rx_offset,
+							 copy_skb->data, len);
 			skb = copy_skb;
 		}
 		skb->ip_summed = CHECKSUM_NONE;
@@ -1006,7 +1006,8 @@ static int b44_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			goto err_out;
 		}
 
-		memcpy(skb_put(bounce_skb, len), skb->data, skb->len);
+		skb_copy_from_linear_data(skb, skb_put(bounce_skb, len),
+					  skb->len);
 		dev_kfree_skb_any(skb);
 		skb = bounce_skb;
 	}
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 7e7b5f34403..f98a2205a09 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -1884,10 +1884,8 @@ bnx2_rx_int(struct bnx2 *bp, int budget)
 				goto reuse_rx;
 
 			/* aligned copy */
-			memcpy(new_skb->data,
-				skb->data + bp->rx_offset - 2,
-				len + 2);
-
+			skb_copy_from_linear_data_offset(skb, bp->rx_offset - 2,
+				      new_skb->data, len + 2);
 			skb_reserve(new_skb, 2);
 			skb_put(new_skb, len);
 
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index bd3ab6493e3..4aec747d9e4 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -2846,8 +2846,8 @@ static inline int cas_xmit_tx_ringN(struct cas *cp, int ring,
 			      ctrl | TX_DESC_SOF, 0);
 		entry = TX_DESC_NEXT(ring, entry);
 
-		memcpy(tx_tiny_buf(cp, ring, entry), skb->data +
-		       len - tabort, tabort);
+		skb_copy_from_linear_data_offset(skb, len - tabort,
+			      tx_tiny_buf(cp, ring, entry), tabort);
 		mapping = tx_tiny_map(cp, ring, entry, tentry);
 		cas_write_txd(cp, ring, entry, mapping, tabort, ctrl,
 			      (nr_frags == 0));
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 43e92f9f0bc..1be1bbd1616 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1062,7 +1062,7 @@ static inline struct sk_buff *get_packet(struct pci_dev *pdev,
 					    pci_unmap_addr(ce, dma_addr),
 					    pci_unmap_len(ce, dma_len),
 					    PCI_DMA_FROMDEVICE);
-		memcpy(skb->data, ce->skb->data, len);
+		skb_copy_from_linear_data(ce->skb, skb->data, len);
 		pci_dma_sync_single_for_device(pdev,
 					       pci_unmap_addr(ce, dma_addr),
 					       pci_unmap_len(ce, dma_len),
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index c5faf1380e1..166c959c94b 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -913,7 +913,8 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
 		if (skb->len <= WR_LEN - sizeof(*cpl)) {
 			q->sdesc[pidx].skb = NULL;
 			if (!skb->data_len)
-				memcpy(&d->flit[2], skb->data, skb->len);
+				skb_copy_from_linear_data(skb, &d->flit[2],
+							  skb->len);
 			else
 				skb_copy_bits(skb, 0, &d->flit[2], skb->len);
 
@@ -1771,7 +1772,7 @@ static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
 			__skb_put(skb, len);
 			pci_dma_sync_single_for_cpu(adap->pdev, mapping, len,
 						    PCI_DMA_FROMDEVICE);
-			memcpy(skb->data, sd->t.skb->data, len);
+			skb_copy_from_linear_data(sd->t.skb, skb->data, len);
 			pci_dma_sync_single_for_device(adap->pdev, mapping, len,
 						       PCI_DMA_FROMDEVICE);
 		} else if (!drop_thres)
diff --git a/drivers/net/dgrs.c b/drivers/net/dgrs.c
index d223c38966f..df62c0232f3 100644
--- a/drivers/net/dgrs.c
+++ b/drivers/net/dgrs.c
@@ -741,7 +741,7 @@ static int dgrs_start_xmit(struct sk_buff *skb, struct net_device *devN)
 		}
 
 		amt = min_t(unsigned int, len, rbdp->size - count);
-		memcpy( (char *) S2H(rbdp->buf) + count, skb->data + i, amt);
+		skb_copy_from_linear_data_offset(skb, i, S2H(rbdp->buf) + count, amt);
 		i += amt;
 		count += amt;
 		len -= amt;
diff --git a/drivers/net/eepro100.c b/drivers/net/eepro100.c
index db658bc491a..6c267c38df9 100644
--- a/drivers/net/eepro100.c
+++ b/drivers/net/eepro100.c
@@ -1804,8 +1804,9 @@ speedo_rx(struct net_device *dev)
 				eth_copy_and_sum(skb, sp->rx_skbuff[entry]->data, pkt_len, 0);
 				skb_put(skb, pkt_len);
 #else
-				memcpy(skb_put(skb, pkt_len), sp->rx_skbuff[entry]->data,
-					   pkt_len);
+				skb_copy_from_linear_data(sp->rx_skbuff[entry],
+							  skb_put(skb, pkt_len),
+							  pkt_len);
 #endif
 				pci_dma_sync_single_for_device(sp->pdev, sp->rx_ring_dma[entry],
 											   sizeof(struct RxFD) + pkt_len,
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 63732d2305b..8b539207263 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -1306,7 +1306,7 @@ static void write_swqe2_TSO(struct sk_buff *skb,
 
 	if (skb_data_size >= headersize) {
 		/* copy immediate data */
-		memcpy(imm_data, skb->data, headersize);
+		skb_copy_from_linear_data(skb, imm_data, headersize);
 		swqe->immediate_data_length = headersize;
 
 		if (skb_data_size > headersize) {
@@ -1337,7 +1337,7 @@ static void write_swqe2_nonTSO(struct sk_buff *skb,
 	 */
 	if (skb_data_size >= SWQE2_MAX_IMM) {
 		/* copy immediate data */
-		memcpy(imm_data, skb->data, SWQE2_MAX_IMM);
+		skb_copy_from_linear_data(skb, imm_data, SWQE2_MAX_IMM);
 
 		swqe->immediate_data_length = SWQE2_MAX_IMM;
 
@@ -1350,7 +1350,7 @@ static void write_swqe2_nonTSO(struct sk_buff *skb,
 			swqe->descriptors++;
 		}
 	} else {
-		memcpy(imm_data, skb->data, skb_data_size);
+		skb_copy_from_linear_data(skb, imm_data, skb_data_size);
 		swqe->immediate_data_length = skb_data_size;
 	}
 }
@@ -1772,10 +1772,11 @@ static void ehea_xmit3(struct sk_buff *skb, struct net_device *dev,
 	/* copy (immediate) data */
 	if (nfrags == 0) {
 		/* data is in a single piece */
-		memcpy(imm_data, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, imm_data, skb->len);
 	} else {
 		/* first copy data from the skb->data buffer ... */
-		memcpy(imm_data, skb->data, skb->len - skb->data_len);
+		skb_copy_from_linear_data(skb, imm_data,
+					  skb->len - skb->data_len);
 		imm_data += skb->len - skb->data_len;
 
 		/* ... then copy data from the fragments */
diff --git a/drivers/net/fec_8xx/fec_main.c b/drivers/net/fec_8xx/fec_main.c
index 698dba8f2aa..e824d5d231a 100644
--- a/drivers/net/fec_8xx/fec_main.c
+++ b/drivers/net/fec_8xx/fec_main.c
@@ -551,7 +551,9 @@ static int fec_enet_rx_common(struct net_device *dev, int *budget)
 				skbn = dev_alloc_skb(pkt_len + 2);
 				if (skbn != NULL) {
 					skb_reserve(skbn, 2);	/* align IP header */
-					memcpy(skbn->data, skb->data, pkt_len);
+					skb_copy_from_linear_data(skb
+								  skbn->data,
+								  pkt_len);
 					/* swap */
 					skbt = skb;
 					skb = skbn;
diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c
index 9f6ef315ce5..e2ddd617493 100644
--- a/drivers/net/fs_enet/fs_enet-main.c
+++ b/drivers/net/fs_enet/fs_enet-main.c
@@ -160,7 +160,8 @@ static int fs_enet_rx_napi(struct net_device *dev, int *budget)
 				skbn = dev_alloc_skb(pkt_len + 2);
 				if (skbn != NULL) {
 					skb_reserve(skbn, 2);	/* align IP header */
-					memcpy(skbn->data, skb->data, pkt_len);
+					skb_copy_from_linear_data(skb,
+						      skbn->data, pkt_len);
 					/* swap */
 					skbt = skb;
 					skb = skbn;
@@ -293,7 +294,8 @@ static int fs_enet_rx_non_napi(struct net_device *dev)
 				skbn = dev_alloc_skb(pkt_len + 2);
 				if (skbn != NULL) {
 					skb_reserve(skbn, 2);	/* align IP header */
-					memcpy(skbn->data, skb->data, pkt_len);
+					skb_copy_from_linear_data(skb,
+						      skbn->data, pkt_len);
 					/* swap */
 					skbt = skb;
 					skb = skbn;
diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c
index 0fbb414b5a4..3be8c504759 100644
--- a/drivers/net/hamradio/dmascc.c
+++ b/drivers/net/hamradio/dmascc.c
@@ -930,7 +930,7 @@ static int scc_send_packet(struct sk_buff *skb, struct net_device *dev)
 
 	/* Transfer data to DMA buffer */
 	i = priv->tx_head;
-	memcpy(priv->tx_buf[i], skb->data + 1, skb->len - 1);
+	skb_copy_from_linear_data_offset(skb, 1, priv->tx_buf[i], skb->len - 1);
 	priv->tx_len[i] = skb->len - 1;
 
 	/* Clear interrupts while we touch our circular buffers */
diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c
index f5a17ad9d3d..b33adc6a340 100644
--- a/drivers/net/hamradio/hdlcdrv.c
+++ b/drivers/net/hamradio/hdlcdrv.c
@@ -317,7 +317,9 @@ void hdlcdrv_transmitter(struct net_device *dev, struct hdlcdrv_state *s)
 				dev_kfree_skb_irq(skb);
 				break;
 			}
-			memcpy(s->hdlctx.buffer, skb->data+1, pkt_len);
+			skb_copy_from_linear_data_offset(skb, 1,
+							 s->hdlctx.buffer,
+							 pkt_len);
 			dev_kfree_skb_irq(skb);
 			s->hdlctx.bp = s->hdlctx.buffer;
 			append_crc_ccitt(s->hdlctx.buffer, pkt_len);
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index ee3ea4fa729..ac2d6dd9dbe 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -638,7 +638,9 @@ static void yam_tx_byte(struct net_device *dev, struct yam_port *yp)
         			dev_kfree_skb_any(skb);
 				break;
 			}
-			memcpy(yp->tx_buf, skb->data + 1, yp->tx_len);
+			skb_copy_from_linear_data_offset(skb->data, 1,
+							 yp->tx_buf,
+							 yp->tx_len);
 			dev_kfree_skb_any(skb);
 			yp->tx_count = 0;
 			yp->tx_crcl = 0x21;
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c
index bc62e770a25..f749e07c642 100644
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -1443,7 +1443,7 @@ static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (len <= 104) {
 		/* Short packet, let's copy it directly into the ring.  */
-		memcpy(desc->data, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, desc->data, skb->len);
 		if (len < ETH_ZLEN) {
 			/* Very short packet, pad with zeros at the end. */
 			memset(desc->data + len, 0, ETH_ZLEN - len);
diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c
index 0f10758226f..fb2248a2551 100644
--- a/drivers/net/irda/ali-ircc.c
+++ b/drivers/net/irda/ali-ircc.c
@@ -1472,9 +1472,8 @@ static int ali_ircc_fir_hard_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	self->stats.tx_bytes += skb->len;
 
-	memcpy(self->tx_fifo.queue[self->tx_fifo.free].start, skb->data, 
-	       skb->len);
-	
+	skb_copy_from_linear_data(skb, self->tx_fifo.queue[self->tx_fifo.free].start,
+		      skb->len);
 	self->tx_fifo.len++;
 	self->tx_fifo.free++;
 
diff --git a/drivers/net/irda/au1k_ir.c b/drivers/net/irda/au1k_ir.c
index 27afd0f367d..cdd1f6c1e74 100644
--- a/drivers/net/irda/au1k_ir.c
+++ b/drivers/net/irda/au1k_ir.c
@@ -526,7 +526,7 @@ static int au1k_irda_hard_xmit(struct sk_buff *skb, struct net_device *dev)
 	
 	if (aup->speed == 4000000) {
 		/* FIR */
-		memcpy((void *)pDB->vaddr, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, pDB->vaddr, skb->len);
 		ptxd->count_0 = skb->len & 0xff;
 		ptxd->count_1 = (skb->len >> 8) & 0xff;
 
diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c
index ddfa6c38a16..9987a0dc1ea 100644
--- a/drivers/net/irda/donauboe.c
+++ b/drivers/net/irda/donauboe.c
@@ -1119,7 +1119,7 @@ dumpbufs(skb->data,skb->len,'>');
   else
     {
       len = skb->len;
-      memcpy (self->tx_bufs[self->txs], skb->data, len);
+      skb_copy_from_linear_data(skb, self->tx_bufs[self->txs], len);
     }
   self->ring->tx[self->txs].len = len & 0x0fff;
 
diff --git a/drivers/net/irda/irda-usb.c b/drivers/net/irda/irda-usb.c
index 6ef375a095f..0ac240ca905 100644
--- a/drivers/net/irda/irda-usb.c
+++ b/drivers/net/irda/irda-usb.c
@@ -441,7 +441,7 @@ static int irda_usb_hard_xmit(struct sk_buff *skb, struct net_device *netdev)
 		goto drop;
 	}
 
-	memcpy(self->tx_buff + self->header_length, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, self->tx_buff + self->header_length, skb->len);
 
 	/* Change setting for next frame */
 	if (self->capability & IUC_STIR421X) {
@@ -902,7 +902,7 @@ static void irda_usb_receive(struct urb *urb)
 
 	if(docopy) {
 		/* Copy packet, so we can recycle the original */
-		memcpy(newskb->data, skb->data, urb->actual_length);
+		skb_copy_from_linear_data(skb, newskb->data, urb->actual_length);
 		/* Deliver this new skb */
 		dataskb = newskb;
 		/* And hook the old skb to the URB
diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c
index 3ff1f4b33c0..4b0037e498f 100644
--- a/drivers/net/irda/mcs7780.c
+++ b/drivers/net/irda/mcs7780.c
@@ -353,7 +353,7 @@ static unsigned mcs_wrap_fir_skb(const struct sk_buff *skb, __u8 *buf)
 	buf[0] = len & 0xff;
 	buf[1] = (len >> 8) & 0xff;
 	/* copy the data into the tx buffer. */
-	memcpy(buf+2, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, buf + 2, skb->len);
 	/* put the fcs in the last four bytes in little endian order. */
 	buf[len - 4] = fcs & 0xff;
 	buf[len - 3] = (fcs >> 8) & 0xff;
@@ -377,7 +377,7 @@ static unsigned mcs_wrap_mir_skb(const struct sk_buff *skb, __u8 *buf)
 	buf[0] = len & 0xff;
 	buf[1] = (len >> 8) & 0xff;
 	/* copy the data */
-	memcpy(buf+2, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, buf + 2, skb->len);
 	/* put the fcs in last two bytes in little endian order. */
 	buf[len - 2] = fcs & 0xff;
 	buf[len - 1] = (fcs >> 8) & 0xff;
diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c
index 8ce7dad582f..0ff99271413 100644
--- a/drivers/net/irda/nsc-ircc.c
+++ b/drivers/net/irda/nsc-ircc.c
@@ -1466,9 +1466,8 @@ static int nsc_ircc_hard_xmit_fir(struct sk_buff *skb, struct net_device *dev)
 
 	self->stats.tx_bytes += skb->len;
 
-	memcpy(self->tx_fifo.queue[self->tx_fifo.free].start, skb->data, 
-	       skb->len);
-	
+	skb_copy_from_linear_data(skb, self->tx_fifo.queue[self->tx_fifo.free].start,
+		      skb->len);
 	self->tx_fifo.len++;
 	self->tx_fifo.free++;
 
diff --git a/drivers/net/irda/pxaficp_ir.c b/drivers/net/irda/pxaficp_ir.c
index f35d7d42624..b3e1107420a 100644
--- a/drivers/net/irda/pxaficp_ir.c
+++ b/drivers/net/irda/pxaficp_ir.c
@@ -484,7 +484,7 @@ static int pxa_irda_hard_xmit(struct sk_buff *skb, struct net_device *dev)
 		unsigned long mtt = irda_get_mtt(skb);
 
 		si->dma_tx_buff_len = skb->len;
-		memcpy(si->dma_tx_buff, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, si->dma_tx_buff, skb->len);
 
 		if (mtt)
 			while ((unsigned)(OSCR - si->last_oscr)/4 < mtt)
diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c
index e8453868d74..198bf3bfa70 100644
--- a/drivers/net/irda/smsc-ircc2.c
+++ b/drivers/net/irda/smsc-ircc2.c
@@ -1162,7 +1162,7 @@ static int smsc_ircc_hard_xmit_fir(struct sk_buff *skb, struct net_device *dev)
 		self->new_speed = speed;
 	}
 
-	memcpy(self->tx_buff.head, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, self->tx_buff.head, skb->len);
 
 	self->tx_buff.len = skb->len;
 	self->tx_buff.data = self->tx_buff.head;
diff --git a/drivers/net/irda/via-ircc.c b/drivers/net/irda/via-ircc.c
index 5ff41631460..45bbd668615 100644
--- a/drivers/net/irda/via-ircc.c
+++ b/drivers/net/irda/via-ircc.c
@@ -925,8 +925,8 @@ static int via_ircc_hard_xmit_fir(struct sk_buff *skb,
 
 	self->tx_fifo.tail += skb->len;
 	self->stats.tx_bytes += skb->len;
-	memcpy(self->tx_fifo.queue[self->tx_fifo.free].start, skb->data,
-	       skb->len);
+	skb_copy_from_linear_data(skb,
+		      self->tx_fifo.queue[self->tx_fifo.free].start, skb->len);
 	self->tx_fifo.len++;
 	self->tx_fifo.free++;
 //F01   if (self->tx_fifo.len == 1) {
diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c
index 79b407f3a49..c4be973867a 100644
--- a/drivers/net/irda/vlsi_ir.c
+++ b/drivers/net/irda/vlsi_ir.c
@@ -993,7 +993,7 @@ static int vlsi_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 			goto drop;
 		}
 		else
-			memcpy(rd->buf, skb->data, len);
+			skb_copy_from_linear_data(skb, rd->buf, len);
 	}
 
 	rd->skb = skb;			/* remember skb for tx-complete stats */
diff --git a/drivers/net/irda/w83977af_ir.c b/drivers/net/irda/w83977af_ir.c
index bee44513095..0d4a68618fc 100644
--- a/drivers/net/irda/w83977af_ir.c
+++ b/drivers/net/irda/w83977af_ir.c
@@ -529,7 +529,7 @@ int w83977af_hard_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Decide if we should use PIO or DMA transfer */
 	if (self->io.speed > PIO_MAX_SPEED) {
 		self->tx_buff.data = self->tx_buff.head;
-		memcpy(self->tx_buff.data, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, self->tx_buff.data, skb->len);
 		self->tx_buff.len = skb->len;
 		
 		mtt = irda_get_mtt(skb);
diff --git a/drivers/net/lance.c b/drivers/net/lance.c
index 11cbcb946db..0fe96c85828 100644
--- a/drivers/net/lance.c
+++ b/drivers/net/lance.c
@@ -988,7 +988,7 @@ static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		if (lance_debug > 5)
 			printk("%s: bouncing a high-memory packet (%#x).\n",
 				   dev->name, (u32)isa_virt_to_bus(skb->data));
-		memcpy(&lp->tx_bounce_buffs[entry], skb->data, skb->len);
+		skb_copy_from_linear_data(skb, &lp->tx_bounce_buffs[entry], skb->len);
 		lp->tx_ring[entry].base =
 			((u32)isa_virt_to_bus((lp->tx_bounce_buffs + entry)) & 0xffffff) | 0x83000000;
 		dev_kfree_skb(skb);
diff --git a/drivers/net/macmace.c b/drivers/net/macmace.c
index 8c07ffc9c24..27911c07558 100644
--- a/drivers/net/macmace.c
+++ b/drivers/net/macmace.c
@@ -420,8 +420,7 @@ static int mace_xmit_start(struct sk_buff *skb, struct net_device *dev)
 	mp->stats.tx_bytes += skb->len;
 
 	/* We need to copy into our xmit buffer to take care of alignment and caching issues */
-
-	memcpy((void *) mp->tx_ring, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, mp->tx_ring, skb->len);
 
 	/* load the Tx DMA and fire it off */
 
diff --git a/drivers/net/meth.c b/drivers/net/meth.c
index fafe6783523..0343ea12b29 100644
--- a/drivers/net/meth.c
+++ b/drivers/net/meth.c
@@ -608,7 +608,7 @@ static void meth_tx_short_prepare(struct meth_private *priv,
 
 	desc->header.raw = METH_TX_CMD_INT_EN | (len-1) | ((128-len) << 16);
 	/* maybe I should set whole thing to 0 first... */
-	memcpy(desc->data.dt + (120 - len), skb->data, skb->len);
+	skb_copy_from_linear_data(skb, desc->data.dt + (120 - len), skb->len);
 	if (skb->len < len)
 		memset(desc->data.dt + 120 - len + skb->len, 0, len-skb->len);
 }
@@ -626,8 +626,8 @@ static void meth_tx_1page_prepare(struct meth_private *priv,
 
 	/* unaligned part */
 	if (unaligned_len) {
-		memcpy(desc->data.dt + (120 - unaligned_len),
-		       skb->data, unaligned_len);
+		skb_copy_from_linear_data(skb, desc->data.dt + (120 - unaligned_len),
+			      unaligned_len);
 		desc->header.raw |= (128 - unaligned_len) << 16;
 	}
 
@@ -652,8 +652,8 @@ static void meth_tx_2page_prepare(struct meth_private *priv,
 	desc->header.raw = METH_TX_CMD_INT_EN | TX_CATBUF1 | TX_CATBUF2| (skb->len - 1);
 	/* unaligned part */
 	if (unaligned_len){
-		memcpy(desc->data.dt + (120 - unaligned_len),
-		       skb->data, unaligned_len);
+		skb_copy_from_linear_data(skb, desc->data.dt + (120 - unaligned_len),
+			      unaligned_len);
 		desc->header.raw |= (128 - unaligned_len) << 16;
 	}
 
diff --git a/drivers/net/myri_sbus.c b/drivers/net/myri_sbus.c
index e1f16fb0584..13444da9327 100644
--- a/drivers/net/myri_sbus.c
+++ b/drivers/net/myri_sbus.c
@@ -502,7 +502,7 @@ static void myri_rx(struct myri_eth *mp, struct net_device *dev)
 			copy_skb->dev = dev;
 			DRX(("resv_and_put "));
 			skb_put(copy_skb, len);
-			memcpy(copy_skb->data, skb->data, len);
+			skb_copy_from_linear_data(skb, copy_skb->data, len);
 
 			/* Reuse original ring buffer. */
 			DRX(("reuse "));
diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c
index b488e94bc4c..ab25c225a07 100644
--- a/drivers/net/netxen/netxen_nic_main.c
+++ b/drivers/net/netxen/netxen_nic_main.c
@@ -920,8 +920,10 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 			/* copy the next 64 bytes - should be enough except
 			 * for pathological case
 			 */
-			memcpy((void *)hwdesc, (void *)(skb->data) +
-			       first_hdr_len, hdr_len - first_hdr_len);
+			skb_copy_from_linear_data_offset(skb, first_hdr_len,
+							 hwdesc,
+							 (hdr_len -
+							  first_hdr_len));
 			producer = get_next_index(producer, max_tx_desc_count);
 		}
 	}
diff --git a/drivers/net/ni52.c b/drivers/net/ni52.c
index 70b6812a8a7..8646698c77d 100644
--- a/drivers/net/ni52.c
+++ b/drivers/net/ni52.c
@@ -1182,7 +1182,7 @@ static int ni52_send_packet(struct sk_buff *skb, struct net_device *dev)
 	else
 #endif
 	{
-		memcpy((char *)p->xmit_cbuffs[p->xmit_count],(char *)(skb->data),skb->len);
+		skb_copy_from_linear_data(skb, p->xmit_cbuffs[p->xmit_count], skb->len);
 		len = skb->len;
 		if (len < ETH_ZLEN) {
 			len = ETH_ZLEN;
diff --git a/drivers/net/ni65.c b/drivers/net/ni65.c
index 782201d12c2..3818edf0ac1 100644
--- a/drivers/net/ni65.c
+++ b/drivers/net/ni65.c
@@ -1176,8 +1176,9 @@ static int ni65_send_packet(struct sk_buff *skb, struct net_device *dev)
 		if( (unsigned long) (skb->data + skb->len) > 0x1000000) {
 #endif
 
-			memcpy((char *) p->tmdbounce[p->tmdbouncenum] ,(char *)skb->data,
-							 (skb->len > T_BUF_SIZE) ? T_BUF_SIZE : skb->len);
+			skb_copy_from_linear_data(skb, p->tmdbounce[p->tmdbouncenum],
+				      skb->len > T_BUF_SIZE ? T_BUF_SIZE :
+							      skb->len);
 			if (len > skb->len)
 				memset((char *)p->tmdbounce[p->tmdbouncenum]+skb->len, 0, len-skb->len);
 			dev_kfree_skb (skb);
diff --git a/drivers/net/pci-skeleton.c b/drivers/net/pci-skeleton.c
index 099972c977e..df8998b4f37 100644
--- a/drivers/net/pci-skeleton.c
+++ b/drivers/net/pci-skeleton.c
@@ -1344,7 +1344,7 @@ static int netdrv_start_xmit (struct sk_buff *skb, struct net_device *dev)
 
 	tp->tx_info[entry].skb = skb;
 	/* tp->tx_info[entry].mapping = 0; */
-	memcpy (tp->tx_buf[entry], skb->data, skb->len);
+	skb_copy_from_linear_data(skb, tp->tx_buf[entry], skb->len);
 
 	/* Note: the chip doesn't have auto-pad! */
 	NETDRV_W32 (TxStatus0 + (entry * sizeof(u32)),
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index fabbe95c7ef..808fae1577e 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -1136,7 +1136,7 @@ static int ei_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		ei_block_output(dev, length, skb->data, output_page);
 	else {
 		memset(packet, 0, ETH_ZLEN);
-		memcpy(packet, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, packet, skb->len);
 		ei_block_output(dev, length, packet, output_page);
 	}
 	
diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c
index b6f0e9a25e2..5918fab3834 100644
--- a/drivers/net/ppp_synctty.c
+++ b/drivers/net/ppp_synctty.c
@@ -594,7 +594,8 @@ ppp_sync_txmunge(struct syncppp *ap, struct sk_buff *skb)
 				return NULL;
 			}
 			skb_reserve(npkt,2);
-			memcpy(skb_put(npkt,skb->len), skb->data, skb->len);
+			skb_copy_from_linear_data(skb,
+				      skb_put(npkt, skb->len), skb->len);
 			kfree_skb(skb);
 			skb = npkt;
 		}
diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index e94790632d5..e9fb616ff66 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -869,7 +869,8 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
 			goto abort;
 
 		skb_reserve(skb2, dev->hard_header_len + sizeof(struct pppoe_hdr));
-		memcpy(skb_put(skb2, skb->len), skb->data, skb->len);
+		skb_copy_from_linear_data(skb, skb_put(skb2, skb->len),
+					  skb->len);
 	} else {
 		/* Make a clone so as to not disturb the original skb,
 		 * give dev_queue_xmit something it can free.
diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c
index 40d2639eedc..7b80fb7a9d9 100755
--- a/drivers/net/qla3xxx.c
+++ b/drivers/net/qla3xxx.c
@@ -1927,7 +1927,8 @@ static void ql_process_macip_rx_intr(struct ql3_adapter *qdev,
 		 * Copy the ethhdr from first buffer to second. This
 		 * is necessary for 3022 IP completions.
 		 */
-		memcpy(skb_push(skb2, size), skb1->data + VLAN_ID_LEN, size);
+		skb_copy_from_linear_data_offset(skb1, VLAN_ID_LEN,
+						 skb_push(skb2, size), size);
 	} else {
 		u16 checksum = le16_to_cpu(ib_ip_rsp_ptr->checksum);
 		if (checksum & 
diff --git a/drivers/net/rrunner.c b/drivers/net/rrunner.c
index 3a4fce38450..25c73d47daa 100644
--- a/drivers/net/rrunner.c
+++ b/drivers/net/rrunner.c
@@ -1451,7 +1451,7 @@ static int rr_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 		skb_reserve(new_skb, 8);
 		skb_put(new_skb, len);
-		memcpy(new_skb->data, skb->data, len);
+		skb_copy_from_linear_data(skb, new_skb->data, len);
 		dev_kfree_skb(skb);
 		skb = new_skb;
 	}
diff --git a/drivers/net/sgiseeq.c b/drivers/net/sgiseeq.c
index 5a891913218..d8c9c5d66d4 100644
--- a/drivers/net/sgiseeq.c
+++ b/drivers/net/sgiseeq.c
@@ -534,7 +534,7 @@ static int sgiseeq_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 *    entry and the HPC got to the end of the chain before we
 	 *    added this new entry and restarted it.
 	 */
-	memcpy((char *)(long)td->buf_vaddr, skb->data, skblen);
+	skb_copy_from_linear_data(skb, (char *)(long)td->buf_vaddr, skblen);
 	if (len != skblen)
 		memset((char *)(long)td->buf_vaddr + skb->len, 0, len-skblen);
 	td->tdma.cntinfo = (len & HPCDMA_BCNT) |
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index 99b61cfb7ce..f1a0e6c0fbd 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -2950,7 +2950,7 @@ static struct sk_buff *skge_rx_get(struct net_device *dev,
 		pci_dma_sync_single_for_cpu(skge->hw->pdev,
 					    pci_unmap_addr(e, mapaddr),
 					    len, PCI_DMA_FROMDEVICE);
-		memcpy(skb->data, e->skb->data, len);
+		skb_copy_from_linear_data(e->skb, skb->data, len);
 		pci_dma_sync_single_for_device(skge->hw->pdev,
 					       pci_unmap_addr(e, mapaddr),
 					       len, PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index fd291fc9316..238c2ca34da 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -1971,7 +1971,7 @@ static struct sk_buff *receive_copy(struct sky2_port *sky2,
 		skb_reserve(skb, 2);
 		pci_dma_sync_single_for_cpu(sky2->hw->pdev, re->data_addr,
 					    length, PCI_DMA_FROMDEVICE);
-		memcpy(skb->data, re->skb->data, length);
+		skb_copy_from_linear_data(re->skb, skb->data, length);
 		skb->ip_summed = re->skb->ip_summed;
 		skb->csum = re->skb->csum;
 		pci_dma_sync_single_for_device(sky2->hw->pdev, re->data_addr,
diff --git a/drivers/net/sun3_82586.c b/drivers/net/sun3_82586.c
index 5bcc749bef1..396c3d961f8 100644
--- a/drivers/net/sun3_82586.c
+++ b/drivers/net/sun3_82586.c
@@ -1026,7 +1026,7 @@ static int sun3_82586_send_packet(struct sk_buff *skb, struct net_device *dev)
 			memset((char *)p->xmit_cbuffs[p->xmit_count], 0, ETH_ZLEN);
 			len = ETH_ZLEN;
 		}
-		memcpy((char *)p->xmit_cbuffs[p->xmit_count],(char *)(skb->data),skb->len);
+		skb_copy_from_linear_data(skb, p->xmit_cbuffs[p->xmit_count], skb->len);
 
 #if (NUM_XMIT_BUFFS == 1)
 #	ifdef NO_NOPCOMMANDS
diff --git a/drivers/net/sun3lance.c b/drivers/net/sun3lance.c
index 0454827c8c2..327ed7962fb 100644
--- a/drivers/net/sun3lance.c
+++ b/drivers/net/sun3lance.c
@@ -629,7 +629,7 @@ static int lance_start_xmit( struct sk_buff *skb, struct net_device *dev )
 	head->length = (-len) | 0xf000;
 	head->misc = 0;
 
-	memcpy( PKTBUF_ADDR(head), (void *)skb->data, skb->len );
+	skb_copy_from_linear_data(skb, PKTBUF_ADDR(head), skb->len);
 	if (len != skb->len)
 		memset(PKTBUF_ADDR(head) + skb->len, 0, len-skb->len);
 
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 4bb89dec565..9df1038ec6b 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -848,7 +848,7 @@ static int gem_rx(struct gem *gp, int work_to_do)
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			pci_dma_sync_single_for_cpu(gp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
-			memcpy(copy_skb->data, skb->data, len);
+			skb_copy_from_linear_data(skb, copy_skb->data, len);
 			pci_dma_sync_single_for_device(gp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
 
 			/* We'll reuse the original ring buffer. */
diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index 4b69c1deb9f..5304d7b94e5 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -2061,7 +2061,7 @@ static void happy_meal_rx(struct happy_meal *hp, struct net_device *dev)
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			hme_dma_sync_for_cpu(hp, dma_addr, len, DMA_FROMDEVICE);
-			memcpy(copy_skb->data, skb->data, len);
+			skb_copy_from_linear_data(skb, copy_skb->data, len);
 			hme_dma_sync_for_device(hp, dma_addr, len, DMA_FROMDEVICE);
 
 			/* Reuse original ring buffer. */
diff --git a/drivers/net/sunlance.c b/drivers/net/sunlance.c
index 8f53a1ef608..42722530ab2 100644
--- a/drivers/net/sunlance.c
+++ b/drivers/net/sunlance.c
@@ -1143,7 +1143,7 @@ static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		struct lance_init_block *ib = lp->init_block_mem;
 		ib->btx_ring [entry].length = (-len) | 0xf000;
 		ib->btx_ring [entry].misc = 0;
-		memcpy((char *)&ib->tx_buf [entry][0], skb->data, skblen);
+		skb_copy_from_linear_data(skb, &ib->tx_buf [entry][0], skblen);
 		if (len != skblen)
 			memset((char *) &ib->tx_buf [entry][skblen], 0, len - skblen);
 		ib->btx_ring [entry].tmd1_bits = (LE_T1_POK | LE_T1_OWN);
diff --git a/drivers/net/sunqe.c b/drivers/net/sunqe.c
index fbfb98284fd..fa70e0b78af 100644
--- a/drivers/net/sunqe.c
+++ b/drivers/net/sunqe.c
@@ -592,7 +592,7 @@ static int qe_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Avoid a race... */
 	qep->qe_block->qe_txd[entry].tx_flags = TXD_UPDATE;
 
-	memcpy(txbuf, skb->data, len);
+	skb_copy_from_linear_data(skb, txbuf, len);
 
 	qep->qe_block->qe_txd[entry].tx_addr = txbuf_dvma;
 	qep->qe_block->qe_txd[entry].tx_flags =
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 414365c3198..38383e4e07a 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -3350,7 +3350,7 @@ static int tg3_rx(struct tg3 *tp, int budget)
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			pci_dma_sync_single_for_cpu(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
-			memcpy(copy_skb->data, skb->data, len);
+			skb_copy_from_linear_data(skb, copy_skb->data, len);
 			pci_dma_sync_single_for_device(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
 
 			/* We'll reuse the original ring buffer. */
diff --git a/drivers/net/tlan.c b/drivers/net/tlan.c
index 2ede3f58cf9..106dc1ef0ac 100644
--- a/drivers/net/tlan.c
+++ b/drivers/net/tlan.c
@@ -1112,7 +1112,7 @@ static int TLan_StartTx( struct sk_buff *skb, struct net_device *dev )
 
 	if ( bbuf ) {
 		tail_buffer = priv->txBuffer + ( priv->txTail * TLAN_MAX_FRAME_SIZE );
-		memcpy( tail_buffer, skb->data, skb->len );
+		skb_copy_from_linear_data(skb, tail_buffer, skb->len);
 	} else {
 		tail_list->buffer[0].address = pci_map_single(priv->pciDev, skb->data, skb->len, PCI_DMA_TODEVICE);
 		TLan_StoreSKB(tail_list, skb);
diff --git a/drivers/net/tokenring/3c359.c b/drivers/net/tokenring/3c359.c
index d293423ee8e..e22a3f5333e 100644
--- a/drivers/net/tokenring/3c359.c
+++ b/drivers/net/tokenring/3c359.c
@@ -937,14 +937,17 @@ static void xl_rx(struct net_device *dev)
 				copy_len = xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfraglen & 0x7FFF ; 
 				frame_length -= copy_len ;  
 				pci_dma_sync_single_for_cpu(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
-				memcpy(skb_put(skb,copy_len), xl_priv->rx_ring_skb[xl_priv->rx_ring_tail]->data, copy_len) ; 
+				skb_copy_from_linear_data(xl_priv->rx_ring_skb[xl_priv->rx_ring_tail],
+							  skb_put(skb, copy_len),
+							  copy_len);
 				pci_dma_sync_single_for_device(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
 				adv_rx_ring(dev) ; 
 			} 
 
 			/* Now we have found the last fragment */
 			pci_dma_sync_single_for_cpu(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
-			memcpy(skb_put(skb,copy_len), xl_priv->rx_ring_skb[xl_priv->rx_ring_tail]->data, frame_length) ; 
+			skb_copy_from_linear_data(xl_priv->rx_ring_skb[xl_priv->rx_ring_tail],
+				      skb_put(skb,copy_len), frame_length);
 /*			memcpy(skb_put(skb,frame_length), bus_to_virt(xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr), frame_length) ; */
 			pci_dma_sync_single_for_device(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
 			adv_rx_ring(dev) ; 
diff --git a/drivers/net/tokenring/olympic.c b/drivers/net/tokenring/olympic.c
index a6206580888..09b3cfb8e80 100644
--- a/drivers/net/tokenring/olympic.c
+++ b/drivers/net/tokenring/olympic.c
@@ -845,7 +845,9 @@ static void olympic_rx(struct net_device *dev)
 							pci_dma_sync_single_for_cpu(olympic_priv->pdev,
 								le32_to_cpu(olympic_priv->olympic_rx_ring[rx_ring_last_received].buffer),
 								olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 
-							memcpy(skb_put(skb,length-4),olympic_priv->rx_ring_skb[rx_ring_last_received]->data,length-4) ; 
+							skb_copy_from_linear_data(olympic_priv->rx_ring_skb[rx_ring_last_received],
+								      skb_put(skb,length - 4),
+								      length - 4);
 							pci_dma_sync_single_for_device(olympic_priv->pdev,
 								le32_to_cpu(olympic_priv->olympic_rx_ring[rx_ring_last_received].buffer),
 								olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
@@ -862,7 +864,9 @@ static void olympic_rx(struct net_device *dev)
 								olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 
 							rx_desc = &(olympic_priv->olympic_rx_ring[rx_ring_last_received]);
 							cpy_length = (i == 1 ? frag_len : le32_to_cpu(rx_desc->res_length)); 
-							memcpy(skb_put(skb, cpy_length), olympic_priv->rx_ring_skb[rx_ring_last_received]->data, cpy_length) ;
+							skb_copy_from_linear_data(olympic_priv->rx_ring_skb[rx_ring_last_received],
+								      skb_put(skb, cpy_length),
+								      cpy_length);
 							pci_dma_sync_single_for_device(olympic_priv->pdev,
 								le32_to_cpu(olympic_priv->olympic_rx_ring[rx_ring_last_received].buffer),
 								olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
diff --git a/drivers/net/tokenring/tms380tr.c b/drivers/net/tokenring/tms380tr.c
index de6f72775ec..e6f0817c350 100644
--- a/drivers/net/tokenring/tms380tr.c
+++ b/drivers/net/tokenring/tms380tr.c
@@ -644,7 +644,7 @@ static int tms380tr_hardware_send_packet(struct sk_buff *skb, struct net_device
 		dmabuf  = 0;
 		i 	= tp->TplFree->TPLIndex;
 		buf 	= tp->LocalTxBuffers[i];
-		memcpy(buf, skb->data, length);
+		skb_copy_from_linear_data(skb, buf, length);
 		newbuf 	= ((char *)buf - (char *)tp) + tp->dmabuffer;
 	}
 	else {
diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c
index 8a7effa7090..d19f8568440 100644
--- a/drivers/net/tulip/de2104x.c
+++ b/drivers/net/tulip/de2104x.c
@@ -449,8 +449,8 @@ static void de_rx (struct de_private *de)
 		} else {
 			pci_dma_sync_single_for_cpu(de->pdev, mapping, len, PCI_DMA_FROMDEVICE);
 			skb_reserve(copy_skb, RX_OFFSET);
-			memcpy(skb_put(copy_skb, len), skb->data, len);
-
+			skb_copy_from_linear_data(skb, skb_put(copy_skb, len),
+						  len);
 			pci_dma_sync_single_for_device(de->pdev, mapping, len, PCI_DMA_FROMDEVICE);
 
 			/* We'll reuse the original ring buffer. */
diff --git a/drivers/net/tulip/dmfe.c b/drivers/net/tulip/dmfe.c
index a5e0237a653..b3a64ca9863 100644
--- a/drivers/net/tulip/dmfe.c
+++ b/drivers/net/tulip/dmfe.c
@@ -682,7 +682,7 @@ static int dmfe_start_xmit(struct sk_buff *skb, struct DEVICE *dev)
 
 	/* transmit this packet */
 	txptr = db->tx_insert_ptr;
-	memcpy(txptr->tx_buf_ptr, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, txptr->tx_buf_ptr, skb->len);
 	txptr->tdes1 = cpu_to_le32(0xe1000000 | skb->len);
 
 	/* Point to next transmit free descriptor */
@@ -989,7 +989,9 @@ static void dmfe_rx_packet(struct DEVICE *dev, struct dmfe_board_info * db)
 						skb = newskb;
 						/* size less than COPY_SIZE, allocate a rxlen SKB */
 						skb_reserve(skb, 2); /* 16byte align */
-						memcpy(skb_put(skb, rxlen), rxptr->rx_skb_ptr->data, rxlen);
+						skb_copy_from_linear_data(rxptr->rx_skb_ptr,
+							  skb_put(skb, rxlen),
+									  rxlen);
 						dmfe_reuse_skb(db, rxptr->rx_skb_ptr);
 					} else
 						skb_put(skb, rxlen);
diff --git a/drivers/net/tulip/uli526x.c b/drivers/net/tulip/uli526x.c
index e46f4cb02c1..ca2548eb7d6 100644
--- a/drivers/net/tulip/uli526x.c
+++ b/drivers/net/tulip/uli526x.c
@@ -583,7 +583,7 @@ static int uli526x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* transmit this packet */
 	txptr = db->tx_insert_ptr;
-	memcpy(txptr->tx_buf_ptr, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, txptr->tx_buf_ptr, skb->len);
 	txptr->tdes1 = cpu_to_le32(0xe1000000 | skb->len);
 
 	/* Point to next transmit free descriptor */
diff --git a/drivers/net/tulip/xircom_cb.c b/drivers/net/tulip/xircom_cb.c
index 1fe3734e155..985a1810ca5 100644
--- a/drivers/net/tulip/xircom_cb.c
+++ b/drivers/net/tulip/xircom_cb.c
@@ -411,9 +411,9 @@ static int xircom_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			   sometimes sends more than you ask it to. */
 
 			memset(&card->tx_buffer[bufferoffsets[desc]/4],0,1536);
-			memcpy(&(card->tx_buffer[bufferoffsets[desc]/4]),skb->data,skb->len);
-
-
+			skb_copy_from_linear_data(skb,
+				  &(card->tx_buffer[bufferoffsets[desc] / 4]),
+						  skb->len);
 			/* FIXME: The specification tells us that the length we send HAS to be a multiple of
 			   4 bytes. */
 
diff --git a/drivers/net/tulip/xircom_tulip_cb.c b/drivers/net/tulip/xircom_tulip_cb.c
index 3f24c82755f..696b3b8aac8 100644
--- a/drivers/net/tulip/xircom_tulip_cb.c
+++ b/drivers/net/tulip/xircom_tulip_cb.c
@@ -915,7 +915,9 @@ xircom_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	tp->tx_skbuff[entry] = skb;
 	if (tp->chip_id == X3201_3) {
-		memcpy(tp->tx_aligned_skbuff[entry]->data,skb->data,skb->len);
+		skb_copy_from_linear_data(skb,
+					  tp->tx_aligned_skbuff[entry]->data,
+					  skb->len);
 		tp->tx_ring[entry].buffer1 = virt_to_bus(tp->tx_aligned_skbuff[entry]->data);
 	} else
 		tp->tx_ring[entry].buffer1 = virt_to_bus(skb->data);
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 288d8559f8c..4d461595406 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -386,8 +386,8 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
 		 *   - we are multicast promiscous.
 		 *   - we belong to the multicast group.
 		 */
-		memcpy(addr, skb->data,
-		       min_t(size_t, sizeof addr, skb->len));
+		skb_copy_from_linear_data(skb, addr, min_t(size_t, sizeof addr,
+								   skb->len));
 		bit_nr = ether_crc(sizeof addr, addr) >> 26;
 		if ((tun->if_flags & IFF_PROMISC) ||
 				memcmp(addr, tun->dev_addr, sizeof addr) == 0 ||
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 422eaf8ea12..25b75b61518 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -1339,7 +1339,8 @@ static inline int velocity_rx_copy(struct sk_buff **rx_skb, int pkt_size,
 			if (vptr->flags & VELOCITY_FLAGS_IP_ALIGN)
 				skb_reserve(new_skb, 2);
 
-			memcpy(new_skb->data, rx_skb[0]->data, pkt_size);
+			skb_copy_from_linear_data(rx_skb[0], new_skb->data,
+						  pkt_size);
 			*rx_skb = new_skb;
 			ret = 0;
 		}
@@ -1927,7 +1928,7 @@ static int velocity_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (pktlen < ETH_ZLEN) {
 		/* Cannot occur until ZC support */
 		pktlen = ETH_ZLEN;
-		memcpy(tdinfo->buf, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, tdinfo->buf, skb->len);
 		memset(tdinfo->buf + skb->len, 0, ETH_ZLEN - skb->len);
 		tdinfo->skb = skb;
 		tdinfo->skb_dma[0] = tdinfo->buf_dma;
@@ -1943,7 +1944,7 @@ static int velocity_xmit(struct sk_buff *skb, struct net_device *dev)
 		int nfrags = skb_shinfo(skb)->nr_frags;
 		tdinfo->skb = skb;
 		if (nfrags > 6) {
-			memcpy(tdinfo->buf, skb->data, skb->len);
+			skb_copy_from_linear_data(skb, tdinfo->buf, skb->len);
 			tdinfo->skb_dma[0] = tdinfo->buf_dma;
 			td_ptr->tdesc0.pktsize =
 			td_ptr->td_buf[0].pa_low = cpu_to_le32(tdinfo->skb_dma[0]);
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index a576113abbd..ae132c1c545 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -1702,7 +1702,7 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
             if(!nsb) {
                 goto give_it_anyways;
             }
-            memcpy(skb_put(nsb, len), skb->data, len);
+            skb_copy_from_linear_data(skb, skb_put(nsb, len), len);
             
             nsb->protocol = lmc_proto_type(sc, skb);
             skb_reset_mac_header(nsb);
diff --git a/drivers/net/wan/pc300_drv.c b/drivers/net/wan/pc300_drv.c
index edbc55528be..8ba75bb1732 100644
--- a/drivers/net/wan/pc300_drv.c
+++ b/drivers/net/wan/pc300_drv.c
@@ -1765,7 +1765,7 @@ cpc_trace(struct net_device *dev, struct sk_buff *skb_main, char rx_tx)
 	skb->data[7] = ']';
 	skb->data[8] = ':';
 	skb->data[9] = ' ';
-	memcpy(&skb->data[10], skb_main->data, skb_main->len);
+	skb_copy_from_linear_data(skb_main, &skb->data[10], skb_main->len);
 
 	netif_rx(skb);
 }
diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c
index 8b4540bfc1b..9432d2ce774 100644
--- a/drivers/net/wan/z85230.c
+++ b/drivers/net/wan/z85230.c
@@ -1782,7 +1782,7 @@ int z8530_queue_xmit(struct z8530_channel *c, struct sk_buff *skb)
 		 */
 		c->tx_next_ptr=c->tx_dma_buf[c->tx_dma_used];
 		c->tx_dma_used^=1;	/* Flip temp buffer */
-		memcpy(c->tx_next_ptr, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, c->tx_next_ptr, skb->len);
 	}
 	else
 		c->tx_next_ptr=skb->data;	
diff --git a/drivers/net/wireless/atmel.c b/drivers/net/wireless/atmel.c
index 1c17cbe007b..51a7db53afa 100644
--- a/drivers/net/wireless/atmel.c
+++ b/drivers/net/wireless/atmel.c
@@ -827,14 +827,14 @@ static int start_tx(struct sk_buff *skb, struct net_device *dev)
 	if (priv->wep_is_on)
 		frame_ctl |= IEEE80211_FCTL_PROTECTED;
 	if (priv->operating_mode == IW_MODE_ADHOC) {
-		memcpy(&header.addr1, skb->data, 6);
+		skb_copy_from_linear_data(skb, &header.addr1, 6);
 		memcpy(&header.addr2, dev->dev_addr, 6);
 		memcpy(&header.addr3, priv->BSSID, 6);
 	} else {
 		frame_ctl |= IEEE80211_FCTL_TODS;
 		memcpy(&header.addr1, priv->CurrentBSSID, 6);
 		memcpy(&header.addr2, dev->dev_addr, 6);
-		memcpy(&header.addr3, skb->data, 6);
+		skb_copy_from_linear_data(skb, &header.addr3, 6);
 	}
 
 	if (priv->use_wpa)
diff --git a/drivers/net/wireless/bcm43xx/bcm43xx_dma.c b/drivers/net/wireless/bcm43xx/bcm43xx_dma.c
index 6e0dc76400e..e3d2e61a31e 100644
--- a/drivers/net/wireless/bcm43xx/bcm43xx_dma.c
+++ b/drivers/net/wireless/bcm43xx/bcm43xx_dma.c
@@ -998,7 +998,8 @@ static void dma_tx_fragment(struct bcm43xx_dmaring *ring,
 			assert(0);
 			return;
 		}
-		memcpy(skb_put(bounce_skb, skb->len), skb->data, skb->len);
+		skb_copy_from_linear_data(skb, skb_put(bounce_skb, skb->len),
+					  skb->len);
 		dev_kfree_skb_any(skb);
 		skb = bounce_skb;
 	}
diff --git a/drivers/net/wireless/hostap/hostap_80211_rx.c b/drivers/net/wireless/hostap/hostap_80211_rx.c
index 35a3a50724f..cbedc9ee740 100644
--- a/drivers/net/wireless/hostap/hostap_80211_rx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_rx.c
@@ -933,12 +933,14 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
 		if (frag == 0) {
 			/* copy first fragment (including full headers) into
 			 * beginning of the fragment cache skb */
-			memcpy(skb_put(frag_skb, flen), skb->data, flen);
+			skb_copy_from_linear_data(skb, skb_put(frag_skb, flen),
+						  flen);
 		} else {
 			/* append frame payload to the end of the fragment
 			 * cache skb */
-			memcpy(skb_put(frag_skb, flen), skb->data + hdrlen,
-			       flen);
+			skb_copy_from_linear_data_offset(skb, hdrlen,
+							 skb_put(frag_skb,
+								 flen), flen);
 		}
 		dev_kfree_skb(skb);
 		skb = NULL;
@@ -1044,8 +1046,9 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
 	    skb->len >= ETH_HLEN + ETH_ALEN) {
 		/* Non-standard frame: get addr4 from its bogus location after
 		 * the payload */
-		memcpy(skb->data + ETH_ALEN,
-		       skb->data + skb->len - ETH_ALEN, ETH_ALEN);
+		skb_copy_from_linear_data_offset(skb, skb->len - ETH_ALEN,
+						 skb->data + ETH_ALEN,
+						 ETH_ALEN);
 		skb_trim(skb, skb->len - ETH_ALEN);
 	}
 
diff --git a/drivers/net/wireless/hostap/hostap_80211_tx.c b/drivers/net/wireless/hostap/hostap_80211_tx.c
index 159baef18e4..246fac0e800 100644
--- a/drivers/net/wireless/hostap/hostap_80211_tx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_tx.c
@@ -146,7 +146,8 @@ int hostap_data_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			fc |= IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS;
 			/* From&To DS: Addr1 = RA, Addr2 = TA, Addr3 = DA,
 			 * Addr4 = SA */
-			memcpy(&hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN);
+			skb_copy_from_linear_data_offset(skb, ETH_ALEN,
+							 &hdr.addr4, ETH_ALEN);
 			hdr_len += ETH_ALEN;
 		} else {
 			/* bogus 4-addr format to workaround Prism2 station
@@ -159,7 +160,8 @@ int hostap_data_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			/* SA from skb->data + ETH_ALEN will be added after
 			 * frame payload; use hdr.addr4 as a temporary buffer
 			 */
-			memcpy(&hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN);
+			skb_copy_from_linear_data_offset(skb, ETH_ALEN,
+							 &hdr.addr4, ETH_ALEN);
 			need_tailroom += ETH_ALEN;
 		}
 
@@ -174,24 +176,27 @@ int hostap_data_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		else
 			memcpy(&hdr.addr1, local->bssid, ETH_ALEN);
 		memcpy(&hdr.addr2, dev->dev_addr, ETH_ALEN);
-		memcpy(&hdr.addr3, skb->data, ETH_ALEN);
+		skb_copy_from_linear_data(skb, &hdr.addr3, ETH_ALEN);
 	} else if (local->iw_mode == IW_MODE_MASTER && !to_assoc_ap) {
 		fc |= IEEE80211_FCTL_FROMDS;
 		/* From DS: Addr1 = DA, Addr2 = BSSID, Addr3 = SA */
-		memcpy(&hdr.addr1, skb->data, ETH_ALEN);
+		skb_copy_from_linear_data(skb, &hdr.addr1, ETH_ALEN);
 		memcpy(&hdr.addr2, dev->dev_addr, ETH_ALEN);
-		memcpy(&hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN);
+		skb_copy_from_linear_data_offset(skb, ETH_ALEN, &hdr.addr3,
+						 ETH_ALEN);
 	} else if (local->iw_mode == IW_MODE_INFRA || to_assoc_ap) {
 		fc |= IEEE80211_FCTL_TODS;
 		/* To DS: Addr1 = BSSID, Addr2 = SA, Addr3 = DA */
 		memcpy(&hdr.addr1, to_assoc_ap ?
 		       local->assoc_ap_addr : local->bssid, ETH_ALEN);
-		memcpy(&hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
-		memcpy(&hdr.addr3, skb->data, ETH_ALEN);
+		skb_copy_from_linear_data_offset(skb, ETH_ALEN, &hdr.addr2,
+						 ETH_ALEN);
+		skb_copy_from_linear_data(skb, &hdr.addr3, ETH_ALEN);
 	} else if (local->iw_mode == IW_MODE_ADHOC) {
 		/* not From/To DS: Addr1 = DA, Addr2 = SA, Addr3 = BSSID */
-		memcpy(&hdr.addr1, skb->data, ETH_ALEN);
-		memcpy(&hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
+		skb_copy_from_linear_data(skb, &hdr.addr1, ETH_ALEN);
+		skb_copy_from_linear_data_offset(skb, ETH_ALEN, &hdr.addr2,
+						 ETH_ALEN);
 		memcpy(&hdr.addr3, local->bssid, ETH_ALEN);
 	}
 
diff --git a/drivers/net/wireless/hostap/hostap_ap.c b/drivers/net/wireless/hostap/hostap_ap.c
index 797d950d5d6..4ca8a27b8c5 100644
--- a/drivers/net/wireless/hostap/hostap_ap.c
+++ b/drivers/net/wireless/hostap/hostap_ap.c
@@ -1277,8 +1277,8 @@ static char * ap_auth_make_challenge(struct ap_data *ap)
 		return NULL;
 	}
 
-	memcpy(tmpbuf, skb->data + ap->crypt->extra_mpdu_prefix_len,
-	       WLAN_AUTH_CHALLENGE_LEN);
+	skb_copy_from_linear_data_offset(skb, ap->crypt->extra_mpdu_prefix_len,
+					 tmpbuf, WLAN_AUTH_CHALLENGE_LEN);
 	dev_kfree_skb(skb);
 
 	return tmpbuf;
diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c
index 9003ff7d151..fb01fb95a9f 100644
--- a/drivers/net/wireless/hostap/hostap_hw.c
+++ b/drivers/net/wireless/hostap/hostap_hw.c
@@ -1838,13 +1838,14 @@ static int prism2_tx_80211(struct sk_buff *skb, struct net_device *dev)
 
 	/* skb->data starts with txdesc->frame_control */
 	hdr_len = 24;
-	memcpy(&txdesc.frame_control, skb->data, hdr_len);
+	skb_copy_from_linear_data(skb, &txdesc.frame_control, hdr_len);
  	fc = le16_to_cpu(txdesc.frame_control);
 	if (WLAN_FC_GET_TYPE(fc) == IEEE80211_FTYPE_DATA &&
 	    (fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS) &&
 	    skb->len >= 30) {
 		/* Addr4 */
-		memcpy(txdesc.addr4, skb->data + hdr_len, ETH_ALEN);
+		skb_copy_from_linear_data_offset(skb, hdr_len, txdesc.addr4,
+						 ETH_ALEN);
 		hdr_len += ETH_ALEN;
 	}
 
diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c
index ad6e4a42835..9137a4dd02e 100644
--- a/drivers/net/wireless/ipw2100.c
+++ b/drivers/net/wireless/ipw2100.c
@@ -2416,8 +2416,9 @@ static void isr_rx(struct ipw2100_priv *priv, int i,
 #ifdef IPW2100_RX_DEBUG
 	/* Make a copy of the frame so we can dump it to the logs if
 	 * ieee80211_rx fails */
-	memcpy(packet_data, packet->skb->data,
-	       min_t(u32, status->frame_size, IPW_RX_NIC_BUFFER_LENGTH));
+	skb_copy_from_linear_data(packet->skb, packet_data,
+				  min_t(u32, status->frame_size,
+					     IPW_RX_NIC_BUFFER_LENGTH));
 #endif
 
 	if (!ieee80211_rx(priv->ieee, packet->skb, stats)) {
diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c
index b04c56a25cc..4839a45098c 100644
--- a/drivers/net/wireless/ipw2200.c
+++ b/drivers/net/wireless/ipw2200.c
@@ -10355,7 +10355,7 @@ static void ipw_handle_promiscuous_tx(struct ipw_priv *priv,
 
 		rt_hdr->it_len = dst->len;
 
-		memcpy(skb_put(dst, len), src->data, len);
+		skb_copy_from_linear_data(src, skb_put(dst, len), len);
 
 		if (!ieee80211_rx(priv->prom_priv->ieee, dst, &dummystats))
 			dev_kfree_skb_any(dst);
diff --git a/drivers/net/wireless/prism54/islpci_eth.c b/drivers/net/wireless/prism54/islpci_eth.c
index 6ebfff03424..7d8bff1dbc4 100644
--- a/drivers/net/wireless/prism54/islpci_eth.c
+++ b/drivers/net/wireless/prism54/islpci_eth.c
@@ -162,13 +162,16 @@ islpci_eth_transmit(struct sk_buff *skb, struct net_device *ndev)
 
 			skb_put(newskb, init_wds ? skb->len + 6 : skb->len);
 			if (init_wds) {
-				memcpy(newskb->data + 6, skb->data, skb->len);
+				skb_copy_from_linear_data(skb,
+							  newskb->data + 6,
+							  skb->len);
 				memcpy(newskb->data, wds_mac, 6);
 #ifdef ISLPCI_ETH_DEBUG
 				printk("islpci_eth_transmit:wds_mac\n");
 #endif
 			} else
-				memcpy(newskb->data, skb->data, skb->len);
+				skb_copy_from_linear_data(skb, newskb->data,
+							  skb->len);
 
 #if VERBOSE > SHOW_ERROR_MESSAGES
 			DEBUG(SHOW_TRACING, "memcpy %p %p %i wds %i\n",
@@ -394,8 +397,10 @@ islpci_eth_receive(islpci_private *priv)
 			/* Update spy records */
 			wireless_spy_update(ndev, annex->addr2, &wstats);
 
-			memcpy(skb->data + sizeof (struct rfmon_header),
-			       skb->data, 2 * ETH_ALEN);
+			skb_copy_from_linear_data(skb,
+						  (skb->data +
+						   sizeof(struct rfmon_header)),
+						  2 * ETH_ALEN);
 			skb_pull(skb, sizeof (struct rfmon_header));
 		}
 		skb->protocol = eth_type_trans(skb, ndev);
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 9633b0457f8..3be624295a1 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -2242,7 +2242,8 @@ static void rx_data(struct net_device *dev, struct rcs __iomem *prcs, unsigned i
     rx_ptr += copy_from_rx_buff(local, rx_ptr, pkt_addr & RX_BUFF_END, rx_len);
     /* Get source address */
 #ifdef WIRELESS_SPY
-    memcpy(linksrcaddr, ((struct mac_header *)skb->data)->addr_2, ETH_ALEN);
+    skb_copy_from_linear_data_offset(skb, offsetof(struct mac_header, addr_2),
+				     linksrcaddr, ETH_ALEN);
 #endif
     /* Now, deal with encapsulation/translation/sniffer */
     if (!sniffer) {
diff --git a/drivers/net/wireless/wavelan.c b/drivers/net/wireless/wavelan.c
index 2bf77b1ee53..1cf090d60ed 100644
--- a/drivers/net/wireless/wavelan.c
+++ b/drivers/net/wireless/wavelan.c
@@ -2938,7 +2938,7 @@ static int wavelan_packet_xmit(struct sk_buff *skb, struct net_device * dev)
 	 * need to pad. Jean II */
 	if (skb->len < ETH_ZLEN) {
 		memset(data, 0, ETH_ZLEN);
-		memcpy(data, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, data, skb->len);
 		/* Write packet on the card */
 		if(wv_packet_write(dev, data, ETH_ZLEN))
 			return 1;	/* We failed */
diff --git a/drivers/net/wireless/zd1201.c b/drivers/net/wireless/zd1201.c
index 1fe013a7297..935b144d9b5 100644
--- a/drivers/net/wireless/zd1201.c
+++ b/drivers/net/wireless/zd1201.c
@@ -807,10 +807,10 @@ static int zd1201_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	txbuf[4] = 0x00;
 	txbuf[5] = 0x00;
 
-	memcpy(txbuf+6, skb->data+12, skb->len-12);
+	skb_copy_from_linear_data_offset(skb, 12, txbuf + 6, skb->len - 12);
 	if (pad)
 		txbuf[skb->len-12+6]=0;
-	memcpy(txbuf+skb->len-12+6+pad, skb->data, 12);
+	skb_copy_from_linear_data(skb, txbuf + skb->len - 12 + 6 + pad, 12);
 	*(__be16*)&txbuf[skb->len+6+pad] = htons(skb->len-12+6);
 	txbuf[txbuflen-1] = 0;
 
diff --git a/drivers/s390/net/ctcmain.c b/drivers/s390/net/ctcmain.c
index 54e3f806cd5..b0f813e6f48 100644
--- a/drivers/s390/net/ctcmain.c
+++ b/drivers/s390/net/ctcmain.c
@@ -472,7 +472,8 @@ ctc_unpack_skb(struct channel *ch, struct sk_buff *pskb)
 			privptr->stats.rx_dropped++;
 			return;
 		}
-		memcpy(skb_put(skb, pskb->len), pskb->data, pskb->len);
+		skb_copy_from_linear_data(pskb, skb_put(skb, pskb->len),
+					  pskb->len);
 		skb_reset_mac_header(skb);
 		skb->dev = pskb->dev;
 		skb->protocol = pskb->protocol;
@@ -716,8 +717,9 @@ ch_action_txdone(fsm_instance * fi, int event, void *arg)
 		*((__u16 *) skb_put(ch->trans_skb, 2)) = ch->collect_len + 2;
 		i = 0;
 		while ((skb = skb_dequeue(&ch->collect_queue))) {
-			memcpy(skb_put(ch->trans_skb, skb->len), skb->data,
-			       skb->len);
+			skb_copy_from_linear_data(skb, skb_put(ch->trans_skb,
+							       skb->len),
+						  skb->len);
 			privptr->stats.tx_packets++;
 			privptr->stats.tx_bytes += skb->len - LL_HEADER_LENGTH;
 			atomic_dec(&skb->users);
@@ -2268,8 +2270,9 @@ transmit_skb(struct channel *ch, struct sk_buff *skb)
 			skb_reset_tail_pointer(ch->trans_skb);
 			ch->trans_skb->len = 0;
 			ch->ccw[1].count = skb->len;
-			memcpy(skb_put(ch->trans_skb, skb->len), skb->data,
-			       skb->len);
+			skb_copy_from_linear_data(skb, skb_put(ch->trans_skb,
+							       skb->len),
+						  skb->len);
 			atomic_dec(&skb->users);
 			dev_kfree_skb_irq(skb);
 			ccw_idx = 0;
diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c
index 1c23e187a3b..08a994fdd1a 100644
--- a/drivers/s390/net/lcs.c
+++ b/drivers/s390/net/lcs.c
@@ -1576,7 +1576,7 @@ __lcs_start_xmit(struct lcs_card *card, struct sk_buff *skb,
 	header->offset = card->tx_buffer->count;
 	header->type = card->lan_type;
 	header->slot = card->portno;
-	memcpy(header + 1, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, header + 1, skb->len);
 	spin_unlock(&card->lock);
 	card->stats.tx_bytes += skb->len;
 	card->stats.tx_packets++;
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index cd42bd54988..e10e85e85c8 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -645,7 +645,8 @@ static void netiucv_unpack_skb(struct iucv_connection *conn,
 			privptr->stats.rx_dropped++;
 			return;
 		}
-		memcpy(skb_put(skb, pskb->len), pskb->data, pskb->len);
+		skb_copy_from_linear_data(pskb, skb_put(skb, pskb->len),
+					  pskb->len);
 		skb_reset_mac_header(skb);
 		skb->dev = pskb->dev;
 		skb->protocol = pskb->protocol;
@@ -744,7 +745,9 @@ static void conn_action_txdone(fsm_instance *fi, int event, void *arg)
 		header.next = conn->tx_buff->len + skb->len + NETIUCV_HDRLEN;
 		memcpy(skb_put(conn->tx_buff, NETIUCV_HDRLEN), &header,
 		       NETIUCV_HDRLEN);
-		memcpy(skb_put(conn->tx_buff, skb->len), skb->data, skb->len);
+		skb_copy_from_linear_data(skb,
+					  skb_put(conn->tx_buff, skb->len),
+					  skb->len);
 		txbytes += skb->len;
 		txpackets++;
 		stat_maxcq++;
diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c
index 5890bb5ad23..dd7034fbfff 100644
--- a/drivers/s390/net/qeth_eddp.c
+++ b/drivers/s390/net/qeth_eddp.c
@@ -267,7 +267,8 @@ qeth_eddp_copy_data_tcp(char *dst, struct qeth_eddp_data *eddp, int len,
 
 	QETH_DBF_TEXT(trace, 5, "eddpcdtc");
 	if (skb_shinfo(eddp->skb)->nr_frags == 0) {
-		memcpy(dst, eddp->skb->data + eddp->skb_offset, len);
+		skb_copy_from_linear_data_offset(eddp->skb, eddp->skb_offset,
+						 dst, len);
 		*hcsum = csum_partial(eddp->skb->data + eddp->skb_offset, len,
 				      *hcsum);
 		eddp->skb_offset += len;
diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c
index a076f735a7b..d287c575522 100644
--- a/drivers/usb/atm/usbatm.c
+++ b/drivers/usb/atm/usbatm.c
@@ -484,7 +484,7 @@ static unsigned int usbatm_write_cells(struct usbatm_data *instance,
 		ptr[4] = 0xec;
 		ptr += ATM_CELL_HEADER;
 
-		memcpy(ptr, skb->data, data_len);
+		skb_copy_from_linear_data(skb, ptr, data_len);
 		ptr += data_len;
 		__skb_pull(skb, data_len);
 
diff --git a/drivers/usb/net/catc.c b/drivers/usb/net/catc.c
index d82022dd7f2..ffec2e01b89 100644
--- a/drivers/usb/net/catc.c
+++ b/drivers/usb/net/catc.c
@@ -418,7 +418,7 @@ static int catc_hard_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 	catc->tx_ptr = (((catc->tx_ptr - 1) >> 6) + 1) << 6;
 	tx_buf = catc->tx_buf[catc->tx_idx] + catc->tx_ptr;
 	*((u16*)tx_buf) = (catc->is_f5u011) ? cpu_to_be16((u16)skb->len) : cpu_to_le16((u16)skb->len);
-	memcpy(tx_buf + 2, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, tx_buf + 2, skb->len);
 	catc->tx_ptr += skb->len + 2;
 
 	if (!test_and_set_bit(TX_RUNNING, &catc->flags))
diff --git a/drivers/usb/net/pegasus.c b/drivers/usb/net/pegasus.c
index 13f70e09ea4..1ad4ee54b18 100644
--- a/drivers/usb/net/pegasus.c
+++ b/drivers/usb/net/pegasus.c
@@ -889,7 +889,7 @@ static int pegasus_start_xmit(struct sk_buff *skb, struct net_device *net)
 	netif_stop_queue(net);
 
 	((__le16 *) pegasus->tx_buff)[0] = cpu_to_le16(l16);
-	memcpy(pegasus->tx_buff + 2, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, pegasus->tx_buff + 2, skb->len);
 	usb_fill_bulk_urb(pegasus->tx_urb, pegasus->usb,
 			  usb_sndbulkpipe(pegasus->usb, 2),
 			  pegasus->tx_buff, count,
-- 
cgit v1.2.3


From f85958151900f9d30fa5ff941b0ce71eaa45a7de Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 28 Mar 2007 14:22:33 -0700
Subject: [NET]: random functions can use nsec resolution instead of usec

In order to get more randomness for secure_tcpv6_sequence_number(),
secure_tcp_sequence_number(), secure_dccp_sequence_number() functions,
we can use the high resolution time services, providing nanosec
resolution.

I've also done two kmalloc()/kzalloc() conversions.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/char/random.c | 36 +++++++++++++-----------------------
 1 file changed, 13 insertions(+), 23 deletions(-)

(limited to 'drivers')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 03af50f900d..46c1b97748b 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -881,15 +881,15 @@ EXPORT_SYMBOL(get_random_bytes);
  */
 static void init_std_data(struct entropy_store *r)
 {
-	struct timeval tv;
+	ktime_t now;
 	unsigned long flags;
 
 	spin_lock_irqsave(&r->lock, flags);
 	r->entropy_count = 0;
 	spin_unlock_irqrestore(&r->lock, flags);
 
-	do_gettimeofday(&tv);
-	add_entropy_words(r, (__u32 *)&tv, sizeof(tv)/4);
+	now = ktime_get_real();
+	add_entropy_words(r, (__u32 *)&now, sizeof(now)/4);
 	add_entropy_words(r, (__u32 *)utsname(),
 			  sizeof(*(utsname()))/4);
 }
@@ -911,14 +911,12 @@ void rand_initialize_irq(int irq)
 		return;
 
 	/*
-	 * If kmalloc returns null, we just won't use that entropy
+	 * If kzalloc returns null, we just won't use that entropy
 	 * source.
 	 */
-	state = kmalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
-	if (state) {
-		memset(state, 0, sizeof(struct timer_rand_state));
+	state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
+	if (state)
 		irq_timer_state[irq] = state;
-	}
 }
 
 #ifdef CONFIG_BLOCK
@@ -927,14 +925,12 @@ void rand_initialize_disk(struct gendisk *disk)
 	struct timer_rand_state *state;
 
 	/*
-	 * If kmalloc returns null, we just won't use that entropy
+	 * If kzalloc returns null, we just won't use that entropy
 	 * source.
 	 */
-	state = kmalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
-	if (state) {
-		memset(state, 0, sizeof(struct timer_rand_state));
+	state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
+	if (state)
 		disk->random = state;
-	}
 }
 #endif
 
@@ -1469,7 +1465,6 @@ late_initcall(seqgen_init);
 __u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr,
 				   __be16 sport, __be16 dport)
 {
-	struct timeval tv;
 	__u32 seq;
 	__u32 hash[12];
 	struct keydata *keyptr = get_keyptr();
@@ -1485,8 +1480,7 @@ __u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr,
 	seq = twothirdsMD4Transform((const __u32 *)daddr, hash) & HASH_MASK;
 	seq += keyptr->count;
 
-	do_gettimeofday(&tv);
-	seq += tv.tv_usec + tv.tv_sec * 1000000;
+	seq += ktime_get_real().tv64;
 
 	return seq;
 }
@@ -1521,7 +1515,6 @@ __u32 secure_ip_id(__be32 daddr)
 __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 				 __be16 sport, __be16 dport)
 {
-	struct timeval tv;
 	__u32 seq;
 	__u32 hash[4];
 	struct keydata *keyptr = get_keyptr();
@@ -1543,12 +1536,11 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 	 *	As close as possible to RFC 793, which
 	 *	suggests using a 250 kHz clock.
 	 *	Further reading shows this assumes 2 Mb/s networks.
-	 *	For 10 Mb/s Ethernet, a 1 MHz clock is appropriate.
+	 *	For 10 Gb/s Ethernet, a 1 GHz clock is appropriate.
 	 *	That's funny, Linux has one built in!  Use it!
 	 *	(Networks are faster now - should this be increased?)
 	 */
-	do_gettimeofday(&tv);
-	seq += tv.tv_usec + tv.tv_sec * 1000000;
+	seq += ktime_get_real().tv64;
 #if 0
 	printk("init_seq(%lx, %lx, %d, %d) = %d\n",
 	       saddr, daddr, sport, dport, seq);
@@ -1596,7 +1588,6 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16
 u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
 				__be16 sport, __be16 dport)
 {
-	struct timeval tv;
 	u64 seq;
 	__u32 hash[4];
 	struct keydata *keyptr = get_keyptr();
@@ -1609,8 +1600,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
 	seq = half_md4_transform(hash, keyptr->secret);
 	seq |= ((u64)keyptr->count) << (32 - HASH_BITS);
 
-	do_gettimeofday(&tv);
-	seq += tv.tv_usec + tv.tv_sec * 1000000;
+	seq += ktime_get_real().tv64;
 	seq &= (1ull << 48) - 1;
 #if 0
 	printk("dccp init_seq(%lx, %lx, %d, %d) = %d\n",
-- 
cgit v1.2.3


From c45d286e72dd72c0229dc9e2849743ba427fee84 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 28 Mar 2007 14:29:08 -0700
Subject: [NET]: Inline net_device_stats

Network drivers which keep stats allocate their own stats structure
then write a get_stats() function to return them.  It would be nice if
this were done by default.

1) Add a new "stats" field to "struct net_device".
2) Add a new feature field to say "this driver uses the internal one"
3) Have a default "get_stats" which returns NULL if that feature not set.
4) Change callers to check result of get_stats call for NULL, not if
   ->get_stats is set.

This should not break backwards compatibility with older drivers, yet
allow modern drivers to shed some boilerplate code.

Lightly tested: works for a modified lguest network driver.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 5 ++---
 drivers/parisc/led.c            | 4 ++--
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 76d3504505b..cea3783c92c 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3640,9 +3640,8 @@ static struct net_device_stats *bond_get_stats(struct net_device *bond_dev)
 	read_lock_bh(&bond->lock);
 
 	bond_for_each_slave(bond, slave, i) {
-		if (slave->dev->get_stats) {
-			sstats = slave->dev->get_stats(slave->dev);
-
+		sstats = slave->dev->get_stats(slave->dev);
+		if (sstats) {
 			stats->rx_packets += sstats->rx_packets;
 			stats->rx_bytes += sstats->rx_bytes;
 			stats->rx_errors += sstats->rx_errors;
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index d190c05d87e..453e6829756 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -372,9 +372,9 @@ static __inline__ int led_get_net_activity(void)
 		continue;
 	    if (LOOPBACK(in_dev->ifa_list->ifa_local))
 		continue;
-	    if (!dev->get_stats) 
-		continue;
 	    stats = dev->get_stats(dev);
+	    if (!stats)
+		continue;
 	    rx_total += stats->rx_packets;
 	    tx_total += stats->tx_packets;
 	}
-- 
cgit v1.2.3


From 3dbad80ac7632f243b824d469301abb97ec634a1 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 29 Mar 2007 19:16:03 -0700
Subject: [NET]: Fix warnings in 3c523.c and ni52.c

We have to put back the cast to "char *" because these
pointers are volatile.

Reported by Andrew Morton.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/3c523.c | 2 +-
 drivers/net/ni52.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/3c523.c b/drivers/net/3c523.c
index a384f7d478a..da1a22c1386 100644
--- a/drivers/net/3c523.c
+++ b/drivers/net/3c523.c
@@ -1145,7 +1145,7 @@ static int elmc_send_packet(struct sk_buff *skb, struct net_device *dev)
 
 	if (len != skb->len)
 		memset((char *) p->xmit_cbuffs[p->xmit_count], 0, ETH_ZLEN);
-	skb_copy_from_linear_data(skb, p->xmit_cbuffs[p->xmit_count], skb->len);
+	skb_copy_from_linear_data(skb, (char *) p->xmit_cbuffs[p->xmit_count], skb->len);
 
 #if (NUM_XMIT_BUFFS == 1)
 #ifdef NO_NOPCOMMANDS
diff --git a/drivers/net/ni52.c b/drivers/net/ni52.c
index 8646698c77d..8dbd6d1900b 100644
--- a/drivers/net/ni52.c
+++ b/drivers/net/ni52.c
@@ -1182,7 +1182,7 @@ static int ni52_send_packet(struct sk_buff *skb, struct net_device *dev)
 	else
 #endif
 	{
-		skb_copy_from_linear_data(skb, p->xmit_cbuffs[p->xmit_count], skb->len);
+		skb_copy_from_linear_data(skb, (char *) p->xmit_cbuffs[p->xmit_count], skb->len);
 		len = skb->len;
 		if (len < ETH_ZLEN) {
 			len = ETH_ZLEN;
-- 
cgit v1.2.3


From 27d7ff46a3498d3debc6ba68fb8014c702b81170 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Sat, 31 Mar 2007 11:55:19 -0300
Subject: [SK_BUFF]: Introduce skb_copy_to_linear_data{_offset}

To clearly state the intent of copying to linear sk_buffs, _offset being a
overly long variant but interesting for the sake of saving some bytes.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
---
 drivers/isdn/hysdn/hycapi.c               |  3 ++-
 drivers/net/8139too.c                     |  6 +++---
 drivers/net/appletalk/ltpc.c              |  2 +-
 drivers/net/atari_bionet.c                |  3 ++-
 drivers/net/atari_pamsnet.c               |  3 ++-
 drivers/net/chelsio/sge.c                 | 24 ++++++++++++++++--------
 drivers/net/cxgb3/sge.c                   |  6 +++---
 drivers/net/defxx.c                       |  4 +++-
 drivers/net/e100.c                        |  2 +-
 drivers/net/e1000/e1000_main.c            |  9 ++++++---
 drivers/net/ehea/ehea_main.c              |  4 ++--
 drivers/net/irda/ali-ircc.c               |  2 +-
 drivers/net/irda/au1k_ir.c                |  2 +-
 drivers/net/irda/donauboe.c               |  4 ++--
 drivers/net/irda/mcs7780.c                |  4 ++--
 drivers/net/irda/nsc-ircc.c               |  8 ++++++--
 drivers/net/irda/pxaficp_ir.c             |  2 +-
 drivers/net/irda/stir4200.c               |  2 +-
 drivers/net/irda/via-ircc.c               |  6 +++---
 drivers/net/irda/w83977af_ir.c            |  8 ++++++--
 drivers/net/ixgb/ixgb_main.c              |  9 ++++++---
 drivers/net/loopback.c                    |  3 ++-
 drivers/net/macb.c                        |  7 ++++---
 drivers/net/myri10ge/myri10ge.c           |  2 +-
 drivers/net/sk98lin/skge.c                |  2 +-
 drivers/net/skfp/skfddi.c                 |  2 +-
 drivers/net/sun3lance.c                   |  2 +-
 drivers/net/tokenring/smctr.c             |  4 ++--
 drivers/net/tokenring/tms380tr.c          |  3 ++-
 drivers/net/wan/dscc4.c                   |  3 ++-
 drivers/net/wan/pc300_drv.c               |  2 +-
 drivers/net/wan/pc300_tty.c               |  4 ++--
 drivers/net/wan/z85230.c                  |  2 +-
 drivers/net/wireless/prism54/islpci_eth.c |  4 ++--
 drivers/s390/net/qeth_main.c              |  9 +++++----
 drivers/usb/atm/usbatm.c                  |  4 +++-
 drivers/usb/net/asix.c                    |  2 +-
 37 files changed, 101 insertions(+), 67 deletions(-)

(limited to 'drivers')

diff --git a/drivers/isdn/hysdn/hycapi.c b/drivers/isdn/hysdn/hycapi.c
index 4433ce0fca5..f85450146bd 100644
--- a/drivers/isdn/hysdn/hycapi.c
+++ b/drivers/isdn/hysdn/hycapi.c
@@ -399,7 +399,8 @@ static u16 hycapi_send_message(struct capi_ctr *ctrl, struct sk_buff *skb)
 			if (_len > 22) {
 				_len2 = _len - 22;
 				skb_copy_from_linear_data(skb, msghead, 22);
-				memcpy(skb->data + _len2, msghead, 22);
+				skb_copy_to_linear_data_offset(skb, _len2,
+							       msghead, 22);
 				skb_pull(skb, _len2);
 				CAPIMSG_SETLEN(skb->data, 22);
 				retval = capilib_data_b3_req(&cinfo->ncci_head,
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c
index 2101334a8ac..a844b1fe2dc 100644
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -1904,10 +1904,10 @@ static __inline__ void wrap_copy(struct sk_buff *skb, const unsigned char *ring,
 	u32 left = RX_BUF_LEN - offset;
 
 	if (size > left) {
-		memcpy(skb->data, ring + offset, left);
-		memcpy(skb->data+left, ring, size - left);
+		skb_copy_to_linear_data(skb, ring + offset, left);
+		skb_copy_to_linear_data_offset(skb, left, ring, size - left);
 	} else
-		memcpy(skb->data, ring + offset, size);
+		skb_copy_to_linear_data(skb, ring + offset, size);
 }
 #endif
 
diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c
index 43c17c85c97..6a6cbd331a1 100644
--- a/drivers/net/appletalk/ltpc.c
+++ b/drivers/net/appletalk/ltpc.c
@@ -774,7 +774,7 @@ static int sendup_buffer (struct net_device *dev)
 	skb_pull(skb,3);
 
 	/* copy ddp(s,e)hdr + contents */
-	memcpy(skb->data,(void*)ltdmabuf,len);
+	skb_copy_to_linear_data(skb, ltdmabuf, len);
 
 	skb_reset_transport_header(skb);
 
diff --git a/drivers/net/atari_bionet.c b/drivers/net/atari_bionet.c
index 13dbed368d6..3d87bd2b419 100644
--- a/drivers/net/atari_bionet.c
+++ b/drivers/net/atari_bionet.c
@@ -550,7 +550,8 @@ bionet_poll_rx(struct net_device *dev) {
 
 			/* 'skb->data' points to the start of sk_buff data area.
 			 */
-			memcpy(skb->data, nic_packet->buffer, pkt_len);
+			skb_copy_to_linear_data(skb, nic_packet->buffer,
+						pkt_len);
 			skb->protocol = eth_type_trans( skb, dev );
 			netif_rx(skb);
 			dev->last_rx = jiffies;
diff --git a/drivers/net/atari_pamsnet.c b/drivers/net/atari_pamsnet.c
index 745101d7451..54714409a09 100644
--- a/drivers/net/atari_pamsnet.c
+++ b/drivers/net/atari_pamsnet.c
@@ -793,7 +793,8 @@ pamsnet_poll_rx(struct net_device *dev) {
 
 			/* 'skb->data' points to the start of sk_buff data area.
 			 */
-			memcpy(skb->data, nic_packet->buffer, pkt_len);
+			skb_copy_to_linear_data(skb, nic_packet->buffer,
+						pkt_len);
 			netif_rx(skb);
 			dev->last_rx = jiffies;
 			lp->stats.rx_packets++;
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 1be1bbd1616..e4f874a70fe 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -2095,10 +2095,14 @@ static void espibug_workaround_t204(unsigned long data)
 					0x0, 0x7, 0x43, 0x0, 0x0, 0x0
 				};
 
-				memcpy(skb->data + sizeof(struct cpl_tx_pkt),
-					ch_mac_addr, ETH_ALEN);
-				memcpy(skb->data + skb->len - 10,
-					ch_mac_addr, ETH_ALEN);
+				skb_copy_to_linear_data_offset(skb,
+						    sizeof(struct cpl_tx_pkt),
+							       ch_mac_addr,
+							       ETH_ALEN);
+				skb_copy_to_linear_data_offset(skb,
+							       skb->len - 10,
+							       ch_mac_addr,
+							       ETH_ALEN);
 				skb->cb[0] = 0xff;
 			}
 
@@ -2125,10 +2129,14 @@ static void espibug_workaround(unsigned long data)
 	                if (!skb->cb[0]) {
 	                        u8 ch_mac_addr[ETH_ALEN] =
 	                            {0x0, 0x7, 0x43, 0x0, 0x0, 0x0};
-	                        memcpy(skb->data + sizeof(struct cpl_tx_pkt),
-	                               ch_mac_addr, ETH_ALEN);
-	                        memcpy(skb->data + skb->len - 10, ch_mac_addr,
-	                               ETH_ALEN);
+	                        skb_copy_to_linear_data_offset(skb,
+						     sizeof(struct cpl_tx_pkt),
+							       ch_mac_addr,
+							       ETH_ALEN);
+	                        skb_copy_to_linear_data_offset(skb,
+							       skb->len - 10,
+							       ch_mac_addr,
+							       ETH_ALEN);
 	                        skb->cb[0] = 0xff;
 	                }
 
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 166c959c94b..3666586a483 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -661,7 +661,7 @@ static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
 
 	if (skb) {
 		__skb_put(skb, IMMED_PKT_SIZE);
-		memcpy(skb->data, resp->imm_data, IMMED_PKT_SIZE);
+		skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
 	}
 	return skb;
 }
@@ -1722,11 +1722,11 @@ static void skb_data_init(struct sk_buff *skb, struct sge_fl_page *p,
 {
 	skb->len = len;
 	if (len <= SKB_DATA_SIZE) {
-		memcpy(skb->data, p->va, len);
+		skb_copy_to_linear_data(skb, p->va, len);
 		skb->tail += len;
 		put_page(p->frag.page);
 	} else {
-		memcpy(skb->data, p->va, SKB_DATA_SIZE);
+		skb_copy_to_linear_data(skb, p->va, SKB_DATA_SIZE);
 		skb_shinfo(skb)->frags[0].page = p->frag.page;
 		skb_shinfo(skb)->frags[0].page_offset =
 		    p->frag.page_offset + SKB_DATA_SIZE;
diff --git a/drivers/net/defxx.c b/drivers/net/defxx.c
index 8d29fae1c71..571d82f8008 100644
--- a/drivers/net/defxx.c
+++ b/drivers/net/defxx.c
@@ -3091,7 +3091,9 @@ static void dfx_rcv_queue_process(
 					{
 						/* Receive buffer allocated, pass receive packet up */
 
-						memcpy(skb->data, p_buff + RCV_BUFF_K_PADDING, pkt_len+3);
+						skb_copy_to_linear_data(skb,
+							       p_buff + RCV_BUFF_K_PADDING,
+							       pkt_len + 3);
 					}
 
 					skb_reserve(skb,3);		/* adjust data field so that it points to FC byte */
diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index 0cefef5e3f0..4d0e0aea72b 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -1769,7 +1769,7 @@ static int e100_rx_alloc_skb(struct nic *nic, struct rx *rx)
 
 	/* Align, init, and map the RFD. */
 	skb_reserve(rx->skb, NET_IP_ALIGN);
-	memcpy(rx->skb->data, &nic->blank_rfd, sizeof(struct rfd));
+	skb_copy_to_linear_data(rx->skb, &nic->blank_rfd, sizeof(struct rfd));
 	rx->dma_addr = pci_map_single(nic->pdev, rx->skb->data,
 		RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL);
 
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index e7c93f44f81..610216ec491 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -4224,9 +4224,12 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter,
 			    netdev_alloc_skb(netdev, length + NET_IP_ALIGN);
 			if (new_skb) {
 				skb_reserve(new_skb, NET_IP_ALIGN);
-				memcpy(new_skb->data - NET_IP_ALIGN,
-				       skb->data - NET_IP_ALIGN,
-				       length + NET_IP_ALIGN);
+				skb_copy_to_linear_data_offset(new_skb,
+							       -NET_IP_ALIGN,
+							       (skb->data -
+							        NET_IP_ALIGN),
+							       (length +
+							        NET_IP_ALIGN));
 				/* save the skb in buffer_info as good */
 				buffer_info->skb = skb;
 				skb = new_skb;
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 8b539207263..58364a0ff37 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -391,8 +391,8 @@ static int ehea_poll(struct net_device *dev, int *budget)
 					if (!skb)
 						break;
 				}
-				memcpy(skb->data, ((char*)cqe) + 64,
-				       cqe->num_bytes_transfered - 4);
+				skb_copy_to_linear_data(skb, ((char*)cqe) + 64,
+					       cqe->num_bytes_transfered - 4);
 				ehea_fill_skb(dev, skb, cqe);
 			} else if (rq == 2) {  /* RQ2 */
 				skb = get_skb_by_index(skb_arr_rq2,
diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c
index fb2248a2551..f9c889c0dd0 100644
--- a/drivers/net/irda/ali-ircc.c
+++ b/drivers/net/irda/ali-ircc.c
@@ -1923,7 +1923,7 @@ static int  ali_ircc_dma_receive_complete(struct ali_ircc_cb *self)
 			
 			/* Copy frame without CRC, CRC is removed by hardware*/
 			skb_put(skb, len);
-			memcpy(skb->data, self->rx_buff.data, len);
+			skb_copy_to_linear_data(skb, self->rx_buff.data, len);
 
 			/* Move to next frame */
 			self->rx_buff.data += len;
diff --git a/drivers/net/irda/au1k_ir.c b/drivers/net/irda/au1k_ir.c
index cdd1f6c1e74..4dbdfaaf37b 100644
--- a/drivers/net/irda/au1k_ir.c
+++ b/drivers/net/irda/au1k_ir.c
@@ -604,7 +604,7 @@ static int au1k_irda_rx(struct net_device *dev)
 				skb_put(skb, count);
 			else
 				skb_put(skb, count-2);
-			memcpy(skb->data, (void *)pDB->vaddr, count-2);
+			skb_copy_to_linear_data(skb, pDB->vaddr, count - 2);
 			skb->dev = dev;
 			skb_reset_mac_header(skb);
 			skb->protocol = htons(ETH_P_IRDA);
diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c
index 9987a0dc1ea..3ca47bf6dfe 100644
--- a/drivers/net/irda/donauboe.c
+++ b/drivers/net/irda/donauboe.c
@@ -1282,8 +1282,8 @@ dumpbufs(self->rx_bufs[self->rxs],len,'<');
                       skb_reserve (skb, 1);
 
                       skb_put (skb, len);
-                      memcpy (skb->data, self->rx_bufs[self->rxs], len);
-
+                      skb_copy_to_linear_data(skb, self->rx_bufs[self->rxs],
+					      len);
                       self->stats.rx_packets++;
                       skb->dev = self->netdev;
                       skb_reset_mac_header(skb);
diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c
index 4b0037e498f..54d1d543c92 100644
--- a/drivers/net/irda/mcs7780.c
+++ b/drivers/net/irda/mcs7780.c
@@ -426,7 +426,7 @@ static void mcs_unwrap_mir(struct mcs_cb *mcs, __u8 *buf, int len)
 	}
 
 	skb_reserve(skb, 1);
-	memcpy(skb->data, buf, new_len);
+	skb_copy_to_linear_data(skb, buf, new_len);
 	skb_put(skb, new_len);
 	skb_reset_mac_header(skb);
 	skb->protocol = htons(ETH_P_IRDA);
@@ -479,7 +479,7 @@ static void mcs_unwrap_fir(struct mcs_cb *mcs, __u8 *buf, int len)
 	}
 
 	skb_reserve(skb, 1);
-	memcpy(skb->data, buf, new_len);
+	skb_copy_to_linear_data(skb, buf, new_len);
 	skb_put(skb, new_len);
 	skb_reset_mac_header(skb);
 	skb->protocol = htons(ETH_P_IRDA);
diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c
index 0ff99271413..d96c89751a7 100644
--- a/drivers/net/irda/nsc-ircc.c
+++ b/drivers/net/irda/nsc-ircc.c
@@ -1868,10 +1868,14 @@ static int nsc_ircc_dma_receive_complete(struct nsc_ircc_cb *self, int iobase)
 			/* Copy frame without CRC */
 			if (self->io.speed < 4000000) {
 				skb_put(skb, len-2);
-				memcpy(skb->data, self->rx_buff.data, len-2);
+				skb_copy_to_linear_data(skb,
+							self->rx_buff.data,
+							len - 2);
 			} else {
 				skb_put(skb, len-4);
-				memcpy(skb->data, self->rx_buff.data, len-4);
+				skb_copy_to_linear_data(skb,
+							self->rx_buff.data,
+							len - 4);
 			}
 
 			/* Move to next frame */
diff --git a/drivers/net/irda/pxaficp_ir.c b/drivers/net/irda/pxaficp_ir.c
index b3e1107420a..fb196fd9185 100644
--- a/drivers/net/irda/pxaficp_ir.c
+++ b/drivers/net/irda/pxaficp_ir.c
@@ -386,7 +386,7 @@ static void pxa_irda_fir_irq_eif(struct pxa_irda *si, struct net_device *dev, in
 
 		/* Align IP header to 20 bytes  */
 		skb_reserve(skb, 1);
-		memcpy(skb->data, si->dma_rx_buff, len);
+		skb_copy_to_linear_data(skb, si->dma_rx_buff, len);
 		skb_put(skb, len);
 
 		/* Feed it to IrLAP  */
diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c
index aec86a21434..755aa444a4d 100644
--- a/drivers/net/irda/stir4200.c
+++ b/drivers/net/irda/stir4200.c
@@ -348,7 +348,7 @@ static void fir_eof(struct stir_cb *stir)
 		}
 		skb_reserve(nskb, 1);
 		skb = nskb;
-		memcpy(nskb->data, rx_buff->data, len);
+		skb_copy_to_linear_data(nskb, rx_buff->data, len);
 	} else {
 		nskb = dev_alloc_skb(rx_buff->truesize);
 		if (unlikely(!nskb)) {
diff --git a/drivers/net/irda/via-ircc.c b/drivers/net/irda/via-ircc.c
index 45bbd668615..ff5358574d0 100644
--- a/drivers/net/irda/via-ircc.c
+++ b/drivers/net/irda/via-ircc.c
@@ -1189,7 +1189,7 @@ F01_E */
 		skb_reserve(skb, 1);
 		skb_put(skb, len - 4);
 
-		memcpy(skb->data, self->rx_buff.data, len - 4);
+		skb_copy_to_linear_data(skb, self->rx_buff.data, len - 4);
 		IRDA_DEBUG(2, "%s(): len=%x.rx_buff=%p\n", __FUNCTION__,
 			   len - 4, self->rx_buff.data);
 
@@ -1234,7 +1234,7 @@ static int upload_rxdata(struct via_ircc_cb *self, int iobase)
 	}
 	skb_reserve(skb, 1);
 	skb_put(skb, len - 4 + 1);
-	memcpy(skb->data, self->rx_buff.data, len - 4 + 1);
+	skb_copy_to_linear_data(skb, self->rx_buff.data, len - 4 + 1);
 	st_fifo->tail++;
 	st_fifo->len++;
 	if (st_fifo->tail > MAX_RX_WINDOW)
@@ -1303,7 +1303,7 @@ static int RxTimerHandler(struct via_ircc_cb *self, int iobase)
 			}
 			skb_reserve(skb, 1);
 			skb_put(skb, len - 4);
-			memcpy(skb->data, self->rx_buff.data, len - 4);
+			skb_copy_to_linear_data(skb, self->rx_buff.data, len - 4);
 
 			IRDA_DEBUG(2, "%s(): len=%x.head=%x\n", __FUNCTION__,
 				   len - 4, st_fifo->head);
diff --git a/drivers/net/irda/w83977af_ir.c b/drivers/net/irda/w83977af_ir.c
index 0d4a68618fc..5182e800cc1 100644
--- a/drivers/net/irda/w83977af_ir.c
+++ b/drivers/net/irda/w83977af_ir.c
@@ -908,10 +908,14 @@ int w83977af_dma_receive_complete(struct w83977af_ir *self)
 			/* Copy frame without CRC */
 			if (self->io.speed < 4000000) {
 				skb_put(skb, len-2);
-				memcpy(skb->data, self->rx_buff.data, len-2);
+				skb_copy_to_linear_data(skb,
+							self->rx_buff.data,
+							len - 2);
 			} else {
 				skb_put(skb, len-4);
-				memcpy(skb->data, self->rx_buff.data, len-4);
+				skb_copy_to_linear_data(skb,
+							self->rx_buff.data,
+							len - 4);
 			}
 
 			/* Move to next frame */
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index e729ced52dc..dfde80e54ae 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -2017,9 +2017,12 @@ ixgb_clean_rx_irq(struct ixgb_adapter *adapter)
 			    netdev_alloc_skb(netdev, length + NET_IP_ALIGN);
 			if (new_skb) {
 				skb_reserve(new_skb, NET_IP_ALIGN);
-				memcpy(new_skb->data - NET_IP_ALIGN,
-				       skb->data - NET_IP_ALIGN,
-				       length + NET_IP_ALIGN);
+				skb_copy_to_linear_data_offset(new_skb,
+							       -NET_IP_ALIGN,
+							       (skb->data -
+							        NET_IP_ALIGN),
+							       (length +
+							        NET_IP_ALIGN));
 				/* save the skb in buffer_info as good */
 				buffer_info->skb = skb;
 				skb = new_skb;
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 20b5cb10136..6df673a058c 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -94,7 +94,8 @@ static void emulate_large_send_offload(struct sk_buff *skb)
 		skb_set_mac_header(nskb, -ETH_HLEN);
 		skb_reset_network_header(nskb);
 		iph = ip_hdr(nskb);
-		memcpy(nskb->data, skb_network_header(skb), doffset);
+		skb_copy_to_linear_data(nskb, skb_network_header(skb),
+					doffset);
 		if (skb_copy_bits(skb,
 				  doffset + offset,
 				  nskb->data + doffset,
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index 9e233f8216a..0e04f7ac3f2 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -367,9 +367,10 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
 			BUG_ON(frag != last_frag);
 			frag_len = len - offset;
 		}
-		memcpy(skb->data + offset,
-		       bp->rx_buffers + (RX_BUFFER_SIZE * frag),
-		       frag_len);
+		skb_copy_to_linear_data_offset(skb, offset,
+					       (bp->rx_buffers +
+					        (RX_BUFFER_SIZE * frag)),
+					       frag_len);
 		offset += RX_BUFFER_SIZE;
 		bp->rx_ring[frag].addr &= ~MACB_BIT(RX_USED);
 		wmb();
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index e4b69a0485b..16e3c4315e8 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -879,7 +879,7 @@ myri10ge_rx_skb_build(struct sk_buff *skb, u8 * va,
 	 * skb_pull() (for ether_pad and eth_type_trans()) requires
 	 * the beginning of the packet in skb_headlen(), move it
 	 * manually */
-	memcpy(skb->data, va, hlen);
+	skb_copy_to_linear_data(skb, va, hlen);
 	skb_shinfo(skb)->frags[0].page_offset += hlen;
 	skb_shinfo(skb)->frags[0].size -= hlen;
 	skb->data_len -= hlen;
diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c
index b987a5c3f42..e0a93005e6d 100644
--- a/drivers/net/sk98lin/skge.c
+++ b/drivers/net/sk98lin/skge.c
@@ -2127,7 +2127,7 @@ rx_start:
 						    (dma_addr_t) PhysAddr,
 						    FrameLength,
 						    PCI_DMA_FROMDEVICE);
-			memcpy(pNewMsg->data, pMsg, FrameLength);
+			skb_copy_to_linear_data(pNewMsg, pMsg, FrameLength);
 
 			pci_dma_sync_single_for_device(pAC->PciDev,
 						       (dma_addr_t) PhysAddr,
diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c
index 064e7c21c01..a7ef6c8b772 100644
--- a/drivers/net/skfp/skfddi.c
+++ b/drivers/net/skfp/skfddi.c
@@ -1937,7 +1937,7 @@ int mac_drv_rx_init(struct s_smc *smc, int len, int fc,
 	}
 	skb_reserve(skb, 3);
 	skb_put(skb, len);
-	memcpy(skb->data, look_ahead, len);
+	skb_copy_to_linear_data(skb, look_ahead, len);
 
 	// deliver frame to system
 	skb->protocol = fddi_type_trans(skb, smc->os.dev);
diff --git a/drivers/net/sun3lance.c b/drivers/net/sun3lance.c
index 327ed7962fb..791e081fdc1 100644
--- a/drivers/net/sun3lance.c
+++ b/drivers/net/sun3lance.c
@@ -853,7 +853,7 @@ static int lance_rx( struct net_device *dev )
 
 				skb_reserve( skb, 2 );	/* 16 byte align */
 				skb_put( skb, pkt_len );	/* Make room */
-//			        memcpy( skb->data, PKTBUF_ADDR(head), pkt_len );
+//			        skb_copy_to_linear_data(skb, PKTBUF_ADDR(head), pkt_len);
 				eth_copy_and_sum(skb,
 						 PKTBUF_ADDR(head),
 						 pkt_len, 0);
diff --git a/drivers/net/tokenring/smctr.c b/drivers/net/tokenring/smctr.c
index b0296d80e46..9bbea5c8acf 100644
--- a/drivers/net/tokenring/smctr.c
+++ b/drivers/net/tokenring/smctr.c
@@ -3889,7 +3889,7 @@ static int smctr_process_rx_packet(MAC_HEADER *rmf, __u16 size,
 
                 /* Slide data into a sleek skb. */
                 skb_put(skb, skb->len);
-                memcpy(skb->data, rmf, skb->len);
+                skb_copy_to_linear_data(skb, rmf, skb->len);
 
                 /* Update Counters */
                 tp->MacStat.rx_packets++;
@@ -4475,7 +4475,7 @@ static int smctr_rx_frame(struct net_device *dev)
 				if (skb) {
                                 	skb_put(skb, rx_size);
 
-                                	memcpy(skb->data, pbuff, rx_size);
+					skb_copy_to_linear_data(skb, pbuff, rx_size);
 
                                 	/* Update Counters */
                                 	tp->MacStat.rx_packets++;
diff --git a/drivers/net/tokenring/tms380tr.c b/drivers/net/tokenring/tms380tr.c
index e6f0817c350..12bd294045a 100644
--- a/drivers/net/tokenring/tms380tr.c
+++ b/drivers/net/tokenring/tms380tr.c
@@ -2178,7 +2178,8 @@ static void tms380tr_rcv_status_irq(struct net_device *dev)
 				|| rpl->SkbStat == SKB_DMA_DIRECT))
 			{
 				if(rpl->SkbStat == SKB_DATA_COPY)
-					memcpy(skb->data, ReceiveDataPtr, Length);
+					skb_copy_to_linear_data(skb, ReceiveDataPtr,
+						       Length);
 
 				/* Deliver frame to system */
 				rpl->Skb = NULL;
diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c
index 25021a7992a..dca02447145 100644
--- a/drivers/net/wan/dscc4.c
+++ b/drivers/net/wan/dscc4.c
@@ -1904,7 +1904,8 @@ static struct sk_buff *dscc4_init_dummy_skb(struct dscc4_dev_priv *dpriv)
 		struct TxFD *tx_fd = dpriv->tx_fd + last;
 
 		skb->len = DUMMY_SKB_SIZE;
-		memcpy(skb->data, version, strlen(version)%DUMMY_SKB_SIZE);
+		skb_copy_to_linear_data(skb, version,
+					strlen(version) % DUMMY_SKB_SIZE);
 		tx_fd->state = FrameEnd | TO_STATE_TX(DUMMY_SKB_SIZE);
 		tx_fd->data = pci_map_single(dpriv->pci_priv->pdev, skb->data,
 					     DUMMY_SKB_SIZE, PCI_DMA_TODEVICE);
diff --git a/drivers/net/wan/pc300_drv.c b/drivers/net/wan/pc300_drv.c
index 8ba75bb1732..999bf71937c 100644
--- a/drivers/net/wan/pc300_drv.c
+++ b/drivers/net/wan/pc300_drv.c
@@ -1759,7 +1759,7 @@ cpc_trace(struct net_device *dev, struct sk_buff *skb_main, char rx_tx)
 	skb->pkt_type = PACKET_HOST;
 	skb->len = 10 + skb_main->len;
 
-	memcpy(skb->data, dev->name, 5);
+	skb_copy_to_linear_data(skb, dev->name, 5);
 	skb->data[5] = '[';
 	skb->data[6] = rx_tx;
 	skb->data[7] = ']';
diff --git a/drivers/net/wan/pc300_tty.c b/drivers/net/wan/pc300_tty.c
index de02a07259c..07dbdfbfc15 100644
--- a/drivers/net/wan/pc300_tty.c
+++ b/drivers/net/wan/pc300_tty.c
@@ -1007,13 +1007,13 @@ static void cpc_tty_trace(pc300dev_t *dev, char* buf, int len, char rxtx)
 	skb->pkt_type = PACKET_HOST; 
 	skb->len = 10 + len; 
 
-	memcpy(skb->data,dev->dev->name,5);
+	skb_copy_to_linear_data(skb, dev->dev->name, 5);
 	skb->data[5] = '['; 
 	skb->data[6] = rxtx; 
 	skb->data[7] = ']'; 
 	skb->data[8] = ':'; 
 	skb->data[9] = ' '; 
-	memcpy(&skb->data[10], buf, len); 
+	skb_copy_to_linear_data_offset(skb, 10, buf, len);
 	netif_rx(skb); 
 } 	
 
diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c
index 9432d2ce774..98ef400908b 100644
--- a/drivers/net/wan/z85230.c
+++ b/drivers/net/wan/z85230.c
@@ -1656,7 +1656,7 @@ static void z8530_rx_done(struct z8530_channel *c)
 		else
 		{
 			skb_put(skb, ct);
-			memcpy(skb->data, rxb, ct);
+			skb_copy_to_linear_data(skb, rxb, ct);
 			c->stats.rx_packets++;
 			c->stats.rx_bytes+=ct;
 		}
diff --git a/drivers/net/wireless/prism54/islpci_eth.c b/drivers/net/wireless/prism54/islpci_eth.c
index 7d8bff1dbc4..dd070cccf32 100644
--- a/drivers/net/wireless/prism54/islpci_eth.c
+++ b/drivers/net/wireless/prism54/islpci_eth.c
@@ -136,7 +136,7 @@ islpci_eth_transmit(struct sk_buff *skb, struct net_device *ndev)
 				printk("islpci_eth_transmit:wds_mac\n");
 #endif
 				memmove(skb->data + 6, src, skb->len);
-				memcpy(skb->data, wds_mac, 6);
+				skb_copy_to_linear_data(skb, wds_mac, 6);
 			} else {
 				memmove(skb->data, src, skb->len);
 			}
@@ -165,7 +165,7 @@ islpci_eth_transmit(struct sk_buff *skb, struct net_device *ndev)
 				skb_copy_from_linear_data(skb,
 							  newskb->data + 6,
 							  skb->len);
-				memcpy(newskb->data, wds_mac, 6);
+				skb_copy_to_linear_data(newskb, wds_mac, 6);
 #ifdef ISLPCI_ETH_DEBUG
 				printk("islpci_eth_transmit:wds_mac\n");
 #endif
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index df7f279ec40..ad7792dc1a0 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -2501,7 +2501,8 @@ qeth_process_inbound_buffer(struct qeth_card *card,
 			vlan_tag = qeth_rebuild_skb(card, skb, hdr);
 		else { /*in case of OSN*/
 			skb_push(skb, sizeof(struct qeth_hdr));
-			memcpy(skb->data, hdr, sizeof(struct qeth_hdr));
+			skb_copy_to_linear_data(skb, hdr,
+						sizeof(struct qeth_hdr));
 		}
 		/* is device UP ? */
 		if (!(card->dev->flags & IFF_UP)){
@@ -3870,9 +3871,9 @@ __qeth_prepare_skb(struct qeth_card *card, struct sk_buff *skb, int ipv)
 		 * memcpys instead of one memmove to save cycles.
 		 */
 		skb_push(skb, VLAN_HLEN);
-		memcpy(skb->data, skb->data + 4, 4);
-		memcpy(skb->data + 4, skb->data + 8, 4);
-		memcpy(skb->data + 8, skb->data + 12, 4);
+		skb_copy_to_linear_data(skb, skb->data + 4, 4);
+		skb_copy_to_linear_data_offset(skb, 4, skb->data + 8, 4);
+		skb_copy_to_linear_data_offset(skb, 8, skb->data + 12, 4);
 		tag = (u16 *)(skb->data + 12);
 		/*
 		 * first two bytes  = ETH_P_8021Q (0x8100)
diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c
index d287c575522..d3e2c5f90a2 100644
--- a/drivers/usb/atm/usbatm.c
+++ b/drivers/usb/atm/usbatm.c
@@ -396,7 +396,9 @@ static void usbatm_extract_one_cell(struct usbatm_data *instance, unsigned char
 			goto out;	/* atm_charge increments rx_drop */
 		}
 
-		memcpy(skb->data, skb_tail_pointer(sarb) - pdu_length, length);
+		skb_copy_to_linear_data(skb,
+					skb_tail_pointer(sarb) - pdu_length,
+					length);
 		__skb_put(skb, length);
 
 		vdbg("%s: sending skb 0x%p, skb->len %u, skb->truesize %u",
diff --git a/drivers/usb/net/asix.c b/drivers/usb/net/asix.c
index f56e2dab371..d5ef97bc4d0 100644
--- a/drivers/usb/net/asix.c
+++ b/drivers/usb/net/asix.c
@@ -352,7 +352,7 @@ static struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 	skb_push(skb, 4);
 	packet_len = (((skb->len - 4) ^ 0x0000ffff) << 16) + (skb->len - 4);
 	cpu_to_le32s(&packet_len);
-	memcpy(skb->data, &packet_len, sizeof(packet_len));
+	skb_copy_to_linear_data(skb, &packet_len, sizeof(packet_len));
 
 	if ((skb->len % 512) == 0) {
 		cpu_to_le32s(&padbytes);
-- 
cgit v1.2.3


From 628592ccdbbb5bb751217cf02e2e7abb500d7ffe Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 23 Apr 2007 17:06:40 -0700
Subject: [NETDRV]: Perform missing csum_offset conversions

When csum_offset was introduced we did a conversion from csum to
csum_offset where applicable.  A couple of drivers were missed in
this process.

It was harmless to begin with since the two fields coincided.  Now
that we've made them different with the addition of csum_start, the
missed drivers must be converted or they can't send packets out at
all that require checksum offload.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/atl1/atl1_main.c   | 2 +-
 drivers/net/e1000/e1000_main.c | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c
index d60c2217332..4b1d4d153ec 100644
--- a/drivers/net/atl1/atl1_main.c
+++ b/drivers/net/atl1/atl1_main.c
@@ -1328,7 +1328,7 @@ static int atl1_tx_csum(struct atl1_adapter *adapter, struct sk_buff *skb,
 
 	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 		cso = skb_transport_offset(skb);
-		css = cso + skb->csum;
+		css = cso + skb->csum_offset;
 		if (unlikely(cso & 0x1)) {
 			printk(KERN_DEBUG "%s: payload offset != even number\n",
 				atl1_driver_name);
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 610216ec491..48e2ade704d 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2959,7 +2959,8 @@ e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 
 		context_desc->lower_setup.ip_config = 0;
 		context_desc->upper_setup.tcp_fields.tucss = css;
-		context_desc->upper_setup.tcp_fields.tucso = css + skb->csum;
+		context_desc->upper_setup.tcp_fields.tucso =
+			css + skb->csum_offset;
 		context_desc->upper_setup.tcp_fields.tucse = 0;
 		context_desc->tcp_seg_setup.data = 0;
 		context_desc->cmd_and_length = cpu_to_le32(E1000_TXD_CMD_DEXT);
-- 
cgit v1.2.3


From 33036807b32d5ed1f4fdfe2d5e6bab2bb260b9f7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 10 Apr 2007 13:25:40 -0700
Subject: [NET]: loopback driver can use loopback_dev integrated
 net_device_stats

Rusty added a new 'stats' field to struct net_device.

loopback driver can use it instead of declaring another struct
net_device_stats This saves some memory.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/loopback.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 6df673a058c..6ba6ed2b480 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -164,11 +164,9 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
-static struct net_device_stats loopback_stats;
-
 static struct net_device_stats *get_stats(struct net_device *dev)
 {
-	struct net_device_stats *stats = &loopback_stats;
+	struct net_device_stats *stats = &dev->stats;
 	unsigned long bytes = 0;
 	unsigned long packets = 0;
 	int i;
@@ -208,7 +206,6 @@ static const struct ethtool_ops loopback_ethtool_ops = {
 struct net_device loopback_dev = {
 	.name	 		= "lo",
 	.get_stats		= &get_stats,
-	.priv			= &loopback_stats,
 	.mtu			= (16 * 1024) + 20 + 20 + 12,
 	.hard_start_xmit	= loopback_xmit,
 	.hard_header		= eth_header,
-- 
cgit v1.2.3


From af65bdfce98d7965fbe93a48b8128444a2eea024 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 20 Apr 2007 14:14:21 -0700
Subject: [NETLINK]: Switch cb_lock spinlock to mutex and allow to override it

Switch cb_lock to mutex and allow netlink kernel users to override it
with a subsystem specific mutex for consistent locking in dump callbacks.
All netlink_dump_start users have been audited not to rely on any
side-effects of the previously used spinlock.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/connector/connector.c       | 2 +-
 drivers/scsi/scsi_netlink.c         | 3 ++-
 drivers/scsi/scsi_transport_iscsi.c | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 7f9c4fb7e5b..a7b9e9bb3e8 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -448,7 +448,7 @@ static int __devinit cn_init(void)
 
 	dev->nls = netlink_kernel_create(NETLINK_CONNECTOR,
 					 CN_NETLINK_USERS + 0xf,
-					 dev->input, THIS_MODULE);
+					 dev->input, NULL, THIS_MODULE);
 	if (!dev->nls)
 		return -EIO;
 
diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c
index 45646a28524..4bf9aa547c7 100644
--- a/drivers/scsi/scsi_netlink.c
+++ b/drivers/scsi/scsi_netlink.c
@@ -168,7 +168,8 @@ scsi_netlink_init(void)
 	}
 
 	scsi_nl_sock = netlink_kernel_create(NETLINK_SCSITRANSPORT,
-				SCSI_NL_GRP_CNT, scsi_nl_rcv, THIS_MODULE);
+				SCSI_NL_GRP_CNT, scsi_nl_rcv, NULL,
+				THIS_MODULE);
 	if (!scsi_nl_sock) {
 		printk(KERN_ERR "%s: register of recieve handler failed\n",
 				__FUNCTION__);
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 10590cd7e9e..aabaa0576ab 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1435,7 +1435,7 @@ static __init int iscsi_transport_init(void)
 	if (err)
 		goto unregister_conn_class;
 
-	nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx,
+	nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx, NULL,
 			THIS_MODULE);
 	if (!nls) {
 		err = -ENOBUFS;
-- 
cgit v1.2.3


From bfafb26e11849fe99e03cc1902a91f7f65354e47 Mon Sep 17 00:00:00 2001
From: Florian Zumbiehl <florz@florz.de>
Date: Fri, 20 Apr 2007 16:56:31 -0700
Subject: [PPPoE]: miscellaneous smaller cleanups

below is a patch that just removes dead code/initializers without any
effect (first access is an assignment) that I stumbled accross while
reading the source.

Signed-off-by: Florian Zumbiehl <florz@florz.de>
Acked-by: Michal Ostrowski <mostrows@earthlink.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pppoe.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index e9fb616ff66..f761a9aae4c 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -207,7 +207,7 @@ static inline struct pppox_sock *get_item(unsigned long sid,
 
 static inline struct pppox_sock *get_item_by_addr(struct sockaddr_pppox *sp)
 {
-	struct net_device *dev = NULL;
+	struct net_device *dev;
 	int ifindex;
 
 	dev = dev_get_by_name(sp->sa_addr.pppoe.dev);
@@ -222,9 +222,6 @@ static inline int set_item(struct pppox_sock *po)
 {
 	int i;
 
-	if (!po)
-		return -EINVAL;
-
 	write_lock_bh(&pppoe_hash_lock);
 	i = __set_item(po);
 	write_unlock_bh(&pppoe_hash_lock);
@@ -344,7 +341,7 @@ static struct notifier_block pppoe_notifier = {
 static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb)
 {
 	struct pppox_sock *po = pppox_sk(sk);
-	struct pppox_sock *relay_po = NULL;
+	struct pppox_sock *relay_po;
 
 	if (sk->sk_state & PPPOX_BOUND) {
 		struct pppoe_hdr *ph = pppoe_hdr(skb);
@@ -514,7 +511,6 @@ static int pppoe_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
 	struct pppox_sock *po;
-	int error = 0;
 
 	if (!sk)
 		return 0;
@@ -543,7 +539,7 @@ static int pppoe_release(struct socket *sock)
 	skb_queue_purge(&sk->sk_receive_queue);
 	sock_put(sk);
 
-	return error;
+	return 0;
 }
 
 
@@ -762,10 +758,10 @@ static int pppoe_ioctl(struct socket *sock, unsigned int cmd,
 static int pppoe_sendmsg(struct kiocb *iocb, struct socket *sock,
 		  struct msghdr *m, size_t total_len)
 {
-	struct sk_buff *skb = NULL;
+	struct sk_buff *skb;
 	struct sock *sk = sock->sk;
 	struct pppox_sock *po = pppox_sk(sk);
-	int error = 0;
+	int error;
 	struct pppoe_hdr hdr;
 	struct pppoe_hdr *ph;
 	struct net_device *dev;
@@ -930,7 +926,7 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock,
 		  struct msghdr *m, size_t total_len, int flags)
 {
 	struct sock *sk = sock->sk;
-	struct sk_buff *skb = NULL;
+	struct sk_buff *skb;
 	int error = 0;
 
 	if (sk->sk_state & PPPOX_BOUND) {
@@ -941,9 +937,8 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock,
 	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
 				flags & MSG_DONTWAIT, &error);
 
-	if (error < 0) {
+	if (error < 0)
 		goto end;
-	}
 
 	m->msg_namelen = 0;
 
@@ -986,7 +981,7 @@ out:
 
 static __inline__ struct pppox_sock *pppoe_get_idx(loff_t pos)
 {
-	struct pppox_sock *po = NULL;
+	struct pppox_sock *po;
 	int i = 0;
 
 	for (; i < PPPOE_HASH_SIZE; i++) {
-- 
cgit v1.2.3


From 74b885cf86def9bc836772e3c1788c00b72a35c9 Mon Sep 17 00:00:00 2001
From: Florian Zumbiehl <florz@florz.de>
Date: Fri, 20 Apr 2007 16:57:27 -0700
Subject: [PPPOE]: race between interface going down and connect()

below you find a patch that (hopefully) fixes a race between an interface
going down and a connect() to a peer on that interface. Before,
connect() would determine that an interface is up, then the interface
could go down and all entries referring to that interface in the
item_hash_table would be marked as ZOMBIEs and their references to
the device would be freed, and after that, connect() would put a new
entry into the hash table referring to the device that meanwhile is
down already - which also would cause unregister_netdevice() to wait
until the socket has been release()d.

This patch does not suffice if we are not allowed to accept connect()s
referring to a device that we already acked a NETDEV_GOING_DOWN for
(that is: all references are only guaranteed to be freed after
NETDEV_DOWN has been acknowledged, not necessarily after the
NETDEV_GOING_DOWN already). And if we are allowed to, we could avoid
looking through the hash table upon NETDEV_GOING_DOWN completely and
only do that once we get the NETDEV_DOWN ...

mostrows:
pppoe_flush_dev is called on NETDEV_GOING_DOWN and NETDEV_DOWN to deal with
this "late connect" issue.  Ideally one would hope to notify users at the
"NETDEV_GOING_DOWN" phase (just to pretend to be nice).  However, it is the
NETDEV_DOWN scan that takes all the responsibility for ensuring nobody is
hanging around at that time.

Signed-off-by: Florian Zumbiehl <florz@florz.de>
Acked-by: Michal Ostrowski <mostrows@earthlink.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pppoe.c | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index f761a9aae4c..bc4fc30bc85 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -218,17 +218,6 @@ static inline struct pppox_sock *get_item_by_addr(struct sockaddr_pppox *sp)
 	return get_item(sp->sa_addr.pppoe.sid, sp->sa_addr.pppoe.remote, ifindex);
 }
 
-static inline int set_item(struct pppox_sock *po)
-{
-	int i;
-
-	write_lock_bh(&pppoe_hash_lock);
-	i = __set_item(po);
-	write_unlock_bh(&pppoe_hash_lock);
-
-	return i;
-}
-
 static inline struct pppox_sock *delete_item(unsigned long sid, char *addr, int ifindex)
 {
 	struct pppox_sock *ret;
@@ -595,14 +584,18 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
 		po->pppoe_dev = dev;
 		po->pppoe_ifindex = dev->ifindex;
 
-		if (!(dev->flags & IFF_UP))
+		write_lock_bh(&pppoe_hash_lock);
+		if (!(dev->flags & IFF_UP)){
+			write_unlock_bh(&pppoe_hash_lock);
 			goto err_put;
+		}
 
 		memcpy(&po->pppoe_pa,
 		       &sp->sa_addr.pppoe,
 		       sizeof(struct pppoe_addr));
 
-		error = set_item(po);
+		error = __set_item(po);
+		write_unlock_bh(&pppoe_hash_lock);
 		if (error < 0)
 			goto err_put;
 
-- 
cgit v1.2.3


From 202a03acf9994076055df40ae093a5c5474ad0bd Mon Sep 17 00:00:00 2001
From: Florian Zumbiehl <florz@florz.de>
Date: Fri, 20 Apr 2007 16:58:14 -0700
Subject: [PPPOE]: memory leak when socket is release()d before PPPIOCGCHAN has
 been called on it

below you find a patch that fixes a memory leak when a PPPoE socket is
release()d after it has been connect()ed, but before the PPPIOCGCHAN ioctl
ever has been called on it.

This is somewhat of a security problem, too, since PPPoE sockets can be
created by any user, so any user can easily allocate all the machine's
RAM to non-swappable address space and thus DoS the system.

Is there any specific reason for PPPoE sockets being available to any
unprivileged process, BTW? After all, you need a packet socket for the
discovery stage anyway, so it's unlikely that any unprivileged process
will ever need to create a PPPoE socket, no? Allocating all session IDs
for a known AC is a kind of DoS, too, after all - with Juniper ERXes,
this is really easy, actually, since they don't ever assign session ids
above 8000 ...

Signed-off-by: Florian Zumbiehl <florz@florz.de>
Acked-by: Michal Ostrowski <mostrows@earthlink.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pppox.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/net/pppox.c b/drivers/net/pppox.c
index 9315046b3f5..3f8115db4d5 100644
--- a/drivers/net/pppox.c
+++ b/drivers/net/pppox.c
@@ -58,7 +58,7 @@ void pppox_unbind_sock(struct sock *sk)
 {
 	/* Clear connection to ppp device, if attached. */
 
-	if (sk->sk_state & (PPPOX_BOUND | PPPOX_ZOMBIE)) {
+	if (sk->sk_state & (PPPOX_BOUND | PPPOX_CONNECTED | PPPOX_ZOMBIE)) {
 		ppp_unregister_channel(&pppox_sk(sk)->chan);
 		sk->sk_state = PPPOX_DEAD;
 	}
-- 
cgit v1.2.3


From 42dc9cd54b7290f862874a2544e50395e5719985 Mon Sep 17 00:00:00 2001
From: Michal Ostrowski <mostrows@earthlink.net>
Date: Fri, 20 Apr 2007 16:59:24 -0700
Subject: [PPPOE]: Fix device tear-down notification.

pppoe_flush_dev() kicks all sockets bound to a device that is going down.
In doing so, locks must be taken in the right order consistently (sock lock,
followed by the pppoe_hash_lock).  However, the scan process is based on
us holding the sock lock.  So, when something is found in the scan we must
release the lock we're holding and grab the sock lock.

This patch fixes race conditions between this code and pppoe_release(),
both of which perform similar functions but would naturally prefer to grab
locks in opposing orders.  Both code paths are now going after these locks
in a consistent manner.

pppoe_hash_lock protects the contents of the "pppox_sock" objects that reside
inside the hash.  Thus, NULL'ing out the pppoe_dev field should be done
under the protection of this lock.

Signed-off-by: Michal Ostrowski <mostrows@earthlink.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pppoe.c | 87 ++++++++++++++++++++++++++++++-----------------------
 1 file changed, 50 insertions(+), 37 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index bc4fc30bc85..6f98834e6ac 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -241,54 +241,53 @@ static inline struct pppox_sock *delete_item(unsigned long sid, char *addr, int
 static void pppoe_flush_dev(struct net_device *dev)
 {
 	int hash;
-
 	BUG_ON(dev == NULL);
 
-	read_lock_bh(&pppoe_hash_lock);
+	write_lock_bh(&pppoe_hash_lock);
 	for (hash = 0; hash < PPPOE_HASH_SIZE; hash++) {
 		struct pppox_sock *po = item_hash_table[hash];
 
 		while (po != NULL) {
-			if (po->pppoe_dev == dev) {
-				struct sock *sk = sk_pppox(po);
-
-				sock_hold(sk);
-				po->pppoe_dev = NULL;
+			struct sock *sk = sk_pppox(po);
+			if (po->pppoe_dev != dev) {
+				po = po->next;
+				continue;
+			}
+			po->pppoe_dev = NULL;
+			dev_put(dev);
 
-				/* We hold a reference to SK, now drop the
-				 * hash table lock so that we may attempt
-				 * to lock the socket (which can sleep).
-				 */
-				read_unlock_bh(&pppoe_hash_lock);
 
-				lock_sock(sk);
+			/* We always grab the socket lock, followed by the
+			 * pppoe_hash_lock, in that order.  Since we should
+			 * hold the sock lock while doing any unbinding,
+			 * we need to release the lock we're holding.
+			 * Hold a reference to the sock so it doesn't disappear
+			 * as we're jumping between locks.
+			 */
 
-				if (sk->sk_state &
-				    (PPPOX_CONNECTED | PPPOX_BOUND)) {
-					pppox_unbind_sock(sk);
-					dev_put(dev);
-					sk->sk_state = PPPOX_ZOMBIE;
-					sk->sk_state_change(sk);
-				}
+			sock_hold(sk);
 
-				release_sock(sk);
+			write_unlock_bh(&pppoe_hash_lock);
+			lock_sock(sk);
 
-				sock_put(sk);
+			if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
+				pppox_unbind_sock(sk);
+				sk->sk_state = PPPOX_ZOMBIE;
+				sk->sk_state_change(sk);
+			}
 
-				read_lock_bh(&pppoe_hash_lock);
+			release_sock(sk);
+			sock_put(sk);
 
-				/* Now restart from the beginning of this
-				 * hash chain.  We always NULL out pppoe_dev
-				 * so we are guaranteed to make forward
-				 * progress.
-				 */
-				po = item_hash_table[hash];
-				continue;
-			}
-			po = po->next;
+			/* Restart scan at the beginning of this hash chain.
+			 * While the lock was dropped the chain contents may
+			 * have changed.
+			 */
+			write_lock_bh(&pppoe_hash_lock);
+			po = item_hash_table[hash];
 		}
 	}
-	read_unlock_bh(&pppoe_hash_lock);
+	write_unlock_bh(&pppoe_hash_lock);
 }
 
 static int pppoe_device_event(struct notifier_block *this,
@@ -504,28 +503,42 @@ static int pppoe_release(struct socket *sock)
 	if (!sk)
 		return 0;
 
-	if (sock_flag(sk, SOCK_DEAD))
+	lock_sock(sk);
+	if (sock_flag(sk, SOCK_DEAD)){
+		release_sock(sk);
 		return -EBADF;
+	}
 
 	pppox_unbind_sock(sk);
 
 	/* Signal the death of the socket. */
 	sk->sk_state = PPPOX_DEAD;
 
+
+	/* Write lock on hash lock protects the entire "po" struct from
+	 * concurrent updates via pppoe_flush_dev. The "po" struct should
+	 * be considered part of the hash table contents, thus protected
+	 * by the hash table lock */
+	write_lock_bh(&pppoe_hash_lock);
+
 	po = pppox_sk(sk);
 	if (po->pppoe_pa.sid) {
-		delete_item(po->pppoe_pa.sid, po->pppoe_pa.remote, po->pppoe_ifindex);
+		__delete_item(po->pppoe_pa.sid,
+			      po->pppoe_pa.remote, po->pppoe_ifindex);
 	}
 
-	if (po->pppoe_dev)
+	if (po->pppoe_dev) {
 		dev_put(po->pppoe_dev);
+		po->pppoe_dev = NULL;
+	}
 
-	po->pppoe_dev = NULL;
+	write_unlock_bh(&pppoe_hash_lock);
 
 	sock_orphan(sk);
 	sock->sk = NULL;
 
 	skb_queue_purge(&sk->sk_receive_queue);
+	release_sock(sk);
 	sock_put(sk);
 
 	return 0;
-- 
cgit v1.2.3


From 599b1fa91439cff8605a71f1a2b5bb42c177b667 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <samuel@ortiz.org>
Date: Fri, 20 Apr 2007 22:12:07 -0700
Subject: [IrDA]: Adding carriage returns to mcs7780 debug statements

Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/irda/mcs7780.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c
index 54d1d543c92..0de867288a4 100644
--- a/drivers/net/irda/mcs7780.c
+++ b/drivers/net/irda/mcs7780.c
@@ -200,14 +200,14 @@ static inline int mcs_setup_transceiver_vishay(struct mcs_cb *mcs)
 /* Setup a communication between mcs7780 and agilent chip. */
 static inline int mcs_setup_transceiver_agilent(struct mcs_cb *mcs)
 {
-	IRDA_WARNING("This transceiver type is not supported yet.");
+	IRDA_WARNING("This transceiver type is not supported yet.\n");
 	return 1;
 }
 
 /* Setup a communication between mcs7780 and sharp chip. */
 static inline int mcs_setup_transceiver_sharp(struct mcs_cb *mcs)
 {
-	IRDA_WARNING("This transceiver type is not supported yet.");
+	IRDA_WARNING("This transceiver type is not supported yet.\n");
 	return 1;
 }
 
@@ -279,7 +279,7 @@ static inline int mcs_setup_transceiver(struct mcs_cb *mcs)
 		break;
 
 	default:
-		IRDA_WARNING("Unknown transceiver type: %d",
+		IRDA_WARNING("Unknown transceiver type: %d\n",
 			     mcs->transceiver_type);
 		ret = 1;
 	}
@@ -318,7 +318,7 @@ static inline int mcs_setup_transceiver(struct mcs_cb *mcs)
 		return ret;
 
 error:
-	IRDA_ERROR("%s", msg);
+	IRDA_ERROR("%s\n", msg);
 	return ret;
 }
 
@@ -587,7 +587,7 @@ static int mcs_speed_change(struct mcs_cb *mcs)
 	} while(cnt++ < 100 && (rval & MCS_IRINTX));
 
 	if(cnt >= 100) {
-		IRDA_ERROR("unable to change speed");
+		IRDA_ERROR("unable to change speed\n");
 		ret = -EIO;
 		goto error;
 	}
@@ -638,7 +638,7 @@ static int mcs_speed_change(struct mcs_cb *mcs)
 
 		default:
 			ret = 1;
-			IRDA_WARNING("Unknown transceiver type: %d",
+			IRDA_WARNING("Unknown transceiver type: %d\n",
 				     mcs->transceiver_type);
 		}
 	if (unlikely(ret))
@@ -733,7 +733,7 @@ static int mcs_net_open(struct net_device *netdev)
 	sprintf(hwname, "usb#%d", mcs->usbdev->devnum);
 	mcs->irlap = irlap_open(netdev, &mcs->qos, hwname);
 	if (!mcs->irlap) {
-		IRDA_ERROR("mcs7780: irlap_open failed");
+		IRDA_ERROR("mcs7780: irlap_open failed\n");
 		goto error2;
 	}
 
@@ -862,7 +862,7 @@ static int mcs_hard_xmit(struct sk_buff *skb, struct net_device *ndev)
 			  mcs->out_buf, wraplen, mcs_send_irq, mcs);
 
 	if ((ret = usb_submit_urb(mcs->tx_urb, GFP_ATOMIC))) {
-		IRDA_ERROR("failed tx_urb: %d", ret);
+		IRDA_ERROR("failed tx_urb: %d\n", ret);
 		switch (ret) {
 		case -ENODEV:
 		case -EPIPE:
@@ -897,7 +897,7 @@ static int mcs_probe(struct usb_interface *intf,
 	if (!ndev)
 		goto error1;
 
-	IRDA_DEBUG(1, "MCS7780 USB-IrDA bridge found at %d.", udev->devnum);
+	IRDA_DEBUG(1, "MCS7780 USB-IrDA bridge found at %d.\n", udev->devnum);
 
 	/* what is it realy for? */
 	SET_MODULE_OWNER(ndev);
@@ -905,7 +905,7 @@ static int mcs_probe(struct usb_interface *intf,
 
 	ret = usb_reset_configuration(udev);
 	if (ret != 0) {
-		IRDA_ERROR("mcs7780: usb reset configuration failed");
+		IRDA_ERROR("mcs7780: usb reset configuration failed\n");
 		goto error2;
 	}
 
@@ -950,7 +950,7 @@ static int mcs_probe(struct usb_interface *intf,
 	if (ret != 0)
 		goto error2;
 
-	IRDA_DEBUG(1, "IrDA: Registered MosChip MCS7780 device as %s",
+	IRDA_DEBUG(1, "IrDA: Registered MosChip MCS7780 device as %s\n",
 		   ndev->name);
 
 	mcs->transceiver_type = transceiver_type;
@@ -981,7 +981,7 @@ static void mcs_disconnect(struct usb_interface *intf)
 	free_netdev(mcs->netdev);
 
 	usb_set_intfdata(intf, NULL);
-	IRDA_DEBUG(0, "MCS7780 now disconnected.");
+	IRDA_DEBUG(0, "MCS7780 now disconnected.\n");
 }
 
 /* Module insertion */
@@ -992,7 +992,7 @@ static int __init mcs_init(void)
 	/* register this driver with the USB subsystem */
 	result = usb_register(&mcs_driver);
 	if (result)
-		IRDA_ERROR("usb_register failed. Error number %d", result);
+		IRDA_ERROR("usb_register failed. Error number %d\n", result);
 
 	return result;
 }
-- 
cgit v1.2.3


From 2a5e1c0eb9efe26eed1dd072fe08de5797a7efd5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 23 Apr 2007 12:19:12 -0700
Subject: [WIRELESS]: Refactor wireless Kconfig.

This patch refactors the wireless Kconfig all over and already
introduces net/wireless/Kconfig with just the WEXT bit for now,
the cfg80211 patch will add to that as well.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Makefile                  |   2 +-
 drivers/net/wireless/Kconfig          | 120 ++++++++++++++--------------------
 drivers/net/wireless/bcm43xx/Kconfig  |   3 +-
 drivers/net/wireless/hostap/Kconfig   |   3 +-
 drivers/net/wireless/zd1211rw/Kconfig |   3 +-
 5 files changed, 57 insertions(+), 74 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 33af833667d..58527322a39 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -206,7 +206,7 @@ obj-$(CONFIG_TR) += tokenring/
 obj-$(CONFIG_WAN) += wan/
 obj-$(CONFIG_ARCNET) += arcnet/
 obj-$(CONFIG_NET_PCMCIA) += pcmcia/
-obj-$(CONFIG_NET_RADIO) += wireless/
+obj-y += wireless/
 obj-$(CONFIG_NET_TULIP) += tulip/
 obj-$(CONFIG_HAMRADIO) += hamradio/
 obj-$(CONFIG_IRDA) += irda/
diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig
index ece3d9c2dc6..880c628dcd4 100644
--- a/drivers/net/wireless/Kconfig
+++ b/drivers/net/wireless/Kconfig
@@ -2,47 +2,21 @@
 # Wireless LAN device configuration
 #
 
-menu "Wireless LAN (non-hamradio)"
-	depends on NETDEVICES
-
-config NET_RADIO
-	bool "Wireless LAN drivers (non-hamradio) & Wireless Extensions"
-	select WIRELESS_EXT
-	---help---
-	  Support for wireless LANs and everything having to do with radio,
-	  but not with amateur radio or FM broadcasting.
-
-	  Saying Y here also enables the Wireless Extensions (creates
-	  /proc/net/wireless and enables iwconfig access). The Wireless
-	  Extension is a generic API allowing a driver to expose to the user
-	  space configuration and statistics specific to common Wireless LANs.
-	  The beauty of it is that a single set of tool can support all the
-	  variations of Wireless LANs, regardless of their type (as long as
-	  the driver supports Wireless Extension). Another advantage is that
-	  these parameters may be changed on the fly without restarting the
-	  driver (or Linux). If you wish to use Wireless Extensions with
-	  wireless PCMCIA (PC-) cards, you need to say Y here; you can fetch
-	  the tools from
-	  <http://www.hpl.hp.com/personal/Jean_Tourrilhes/Linux/Tools.html>.
+menu "Wireless LAN"
 
-config NET_WIRELESS_RTNETLINK
-	bool "Wireless Extension API over RtNetlink"
-	depends on NET_RADIO
+config WLAN_PRE80211
+	bool "Wireless LAN (pre-802.11)"
+	depends on NETDEVICES
 	---help---
-	  Support the Wireless Extension API over the RtNetlink socket
-	  in addition to the traditional ioctl interface (selected above).
+	  Say Y if you have any pre-802.11 wireless LAN hardware.
 
-	  For now, few tools use this facility, but it might grow in the
-	  future. The only downside is that it adds 4.5 kB to your kernel.
-
-# Note : the cards are obsolete (can't buy them anymore), but the drivers
-# are not, as people are still using them...
-comment "Obsolete Wireless cards support (pre-802.11)"
-	depends on NET_RADIO && (INET || ISA || PCMCIA)
+	  This option does not affect the kernel build, it only
+	  let's you choose drivers.
 
 config STRIP
 	tristate "STRIP (Metricom starmode radio IP)"
-	depends on NET_RADIO && INET
+	depends on INET && WLAN_PRE80211
+	select WIRELESS_EXT
 	---help---
 	  Say Y if you have a Metricom radio and intend to use Starmode Radio
 	  IP. STRIP is a radio protocol developed for the MosquitoNet project
@@ -65,7 +39,8 @@ config STRIP
 
 config ARLAN
 	tristate "Aironet Arlan 655 & IC2200 DS support"
-	depends on NET_RADIO && ISA && !64BIT
+	depends on ISA && !64BIT && WLAN_PRE80211
+	select WIRELESS_EXT
 	---help---
 	  Aironet makes Arlan, a class of wireless LAN adapters. These use the
 	  www.Telxon.com chip, which is also used on several similar cards.
@@ -80,7 +55,8 @@ config ARLAN
 
 config WAVELAN
 	tristate "AT&T/Lucent old WaveLAN & DEC RoamAbout DS ISA support"
-	depends on NET_RADIO && ISA
+	depends on ISA && WLAN_PRE80211
+	select WIRELESS_EXT
 	---help---
 	  The Lucent WaveLAN (formerly NCR and AT&T; or DEC RoamAbout DS) is
 	  a Radio LAN (wireless Ethernet-like Local Area Network) using the
@@ -107,7 +83,8 @@ config WAVELAN
 
 config PCMCIA_WAVELAN
 	tristate "AT&T/Lucent old WaveLAN Pcmcia wireless support"
-	depends on NET_RADIO && PCMCIA
+	depends on PCMCIA && WLAN_PRE80211
+	select WIRELESS_EXT
 	help
 	  Say Y here if you intend to attach an AT&T/Lucent Wavelan PCMCIA
 	  (PC-card) wireless Ethernet networking card to your computer.  This
@@ -118,7 +95,8 @@ config PCMCIA_WAVELAN
 
 config PCMCIA_NETWAVE
 	tristate "Xircom Netwave AirSurfer Pcmcia wireless support"
-	depends on NET_RADIO && PCMCIA
+	depends on PCMCIA && WLAN_PRE80211
+	select WIRELESS_EXT
 	help
 	  Say Y here if you intend to attach this type of PCMCIA (PC-card)
 	  wireless Ethernet networking card to your computer.
@@ -126,12 +104,20 @@ config PCMCIA_NETWAVE
 	  To compile this driver as a module, choose M here: the module will be
 	  called netwave_cs.  If unsure, say N.
 
-comment "Wireless 802.11 Frequency Hopping cards support"
-	depends on NET_RADIO && PCMCIA
+
+config WLAN_80211
+	bool "Wireless LAN (IEEE 802.11)"
+	depends on NETDEVICES
+	---help---
+	  Say Y if you have any 802.11 wireless LAN hardware.
+
+	  This option does not affect the kernel build, it only
+	  let's you choose drivers.
 
 config PCMCIA_RAYCS
 	tristate "Aviator/Raytheon 2.4MHz wireless support"
-	depends on NET_RADIO && PCMCIA
+	depends on PCMCIA && WLAN_80211
+	select WIRELESS_EXT
 	---help---
 	  Say Y here if you intend to attach an Aviator/Raytheon PCMCIA
 	  (PC-card) wireless Ethernet networking card to your computer.
@@ -141,12 +127,10 @@ config PCMCIA_RAYCS
 	  To compile this driver as a module, choose M here: the module will be
 	  called ray_cs.  If unsure, say N.
 
-comment "Wireless 802.11b ISA/PCI cards support"
-	depends on NET_RADIO && (ISA || PCI || PPC_PMAC || PCMCIA)
-
 config IPW2100
 	tristate "Intel PRO/Wireless 2100 Network Connection"
-	depends on NET_RADIO && PCI
+	depends on PCI && WLAN_80211
+	select WIRELESS_EXT
 	select FW_LOADER
 	select IEEE80211
 	---help---
@@ -200,7 +184,8 @@ config IPW2100_DEBUG
 
 config IPW2200
 	tristate "Intel PRO/Wireless 2200BG and 2915ABG Network Connection"
-	depends on NET_RADIO && PCI
+	depends on PCI && WLAN_80211
+	select WIRELESS_EXT
 	select FW_LOADER
 	select IEEE80211
 	---help---
@@ -282,7 +267,8 @@ config IPW2200_DEBUG
 
 config AIRO
 	tristate "Cisco/Aironet 34X/35X/4500/4800 ISA and PCI cards"
- 	depends on NET_RADIO && ISA_DMA_API && (PCI || BROKEN)
+	depends on ISA_DMA_API && WLAN_80211 && (PCI || BROKEN)
+	select WIRELESS_EXT
 	select CRYPTO
 	---help---
 	  This is the standard Linux driver to support Cisco/Aironet ISA and
@@ -299,7 +285,8 @@ config AIRO
 
 config HERMES
 	tristate "Hermes chipset 802.11b support (Orinoco/Prism2/Symbol)"
-	depends on NET_RADIO && (PPC_PMAC || PCI || PCMCIA)
+	depends on (PPC_PMAC || PCI || PCMCIA) && WLAN_80211
+	select WIRELESS_EXT
 	---help---
 	  A driver for 802.11b wireless cards based on the "Hermes" or
 	  Intersil HFA384x (Prism 2) MAC controller.  This includes the vast
@@ -373,7 +360,8 @@ config PCI_HERMES
 
 config ATMEL
       tristate "Atmel at76c50x chipset  802.11b support"
-      depends on NET_RADIO && (PCI || PCMCIA)
+      depends on (PCI || PCMCIA) && WLAN_80211
+      select WIRELESS_EXT
       select FW_LOADER
       select CRC32
        ---help---
@@ -394,13 +382,9 @@ config PCI_ATMEL
         Enable support for PCI and mini-PCI cards containing the
         Atmel at76c506 chip.
 
-# If Pcmcia is compiled in, offer Pcmcia cards...
-comment "Wireless 802.11b Pcmcia/Cardbus cards support"
-	depends on NET_RADIO && PCMCIA
-
 config PCMCIA_HERMES
 	tristate "Hermes PCMCIA card support"
-	depends on NET_RADIO && PCMCIA && HERMES
+	depends on PCMCIA && HERMES
 	---help---
 	  A driver for "Hermes" chipset based PCMCIA wireless adaptors, such
 	  as the Lucent WavelanIEEE/Orinoco cards and their OEM (Cabletron/
@@ -420,7 +404,7 @@ config PCMCIA_HERMES
 
 config PCMCIA_SPECTRUM
 	tristate "Symbol Spectrum24 Trilogy PCMCIA card support"
-	depends on NET_RADIO && PCMCIA && HERMES
+	depends on PCMCIA && HERMES
 	select FW_LOADER
 	---help---
 
@@ -434,7 +418,8 @@ config PCMCIA_SPECTRUM
 
 config AIRO_CS
 	tristate "Cisco/Aironet 34X/35X/4500/4800 PCMCIA cards"
-	depends on NET_RADIO && PCMCIA && (BROKEN || !M32R)
+	depends on PCMCIA && (BROKEN || !M32R) && WLAN_80211
+	select WIRELESS_EXT
 	select CRYPTO
 	select CRYPTO_AES
 	---help---
@@ -458,7 +443,8 @@ config AIRO_CS
 
 config PCMCIA_ATMEL
 	tristate "Atmel at76c502/at76c504 PCMCIA cards"
-	depends on NET_RADIO && ATMEL && PCMCIA
+	depends on ATMEL && PCMCIA
+	select WIRELESS_EXT
 	select FW_LOADER
 	select CRC32
 	---help---
@@ -467,17 +453,17 @@ config PCMCIA_ATMEL
 
 config PCMCIA_WL3501
       tristate "Planet WL3501 PCMCIA cards"
-      depends on NET_RADIO && EXPERIMENTAL && PCMCIA
+      depends on EXPERIMENTAL && PCMCIA && WLAN_80211
+      select WIRELESS_EXT
        ---help---
          A driver for WL3501 PCMCIA 802.11 wireless cards made by Planet.
 	 It has basic support for Linux wireless extensions and initial
 	 micro support for ethtool.
 
-comment "Prism GT/Duette 802.11(a/b/g) PCI/Cardbus support"
-	depends on NET_RADIO && PCI
 config PRISM54
 	tristate 'Intersil Prism GT/Duette/Indigo PCI/Cardbus' 
-	depends on PCI && NET_RADIO && EXPERIMENTAL
+	depends on PCI && EXPERIMENTAL && WLAN_80211
+	select WIRELESS_EXT
 	select FW_LOADER
 	---help---
 	  Enable PCI and Cardbus support for the following chipset based cards:
@@ -523,7 +509,8 @@ config PRISM54
 
 config USB_ZD1201
 	tristate "USB ZD1201 based Wireless device support"
-	depends on USB && NET_RADIO
+	depends on USB && WLAN_80211
+	select WIRELESS_EXT
 	select FW_LOADER
 	---help---
 	  Say Y if you want to use wireless LAN adapters based on the ZyDAS
@@ -542,11 +529,4 @@ source "drivers/net/wireless/hostap/Kconfig"
 source "drivers/net/wireless/bcm43xx/Kconfig"
 source "drivers/net/wireless/zd1211rw/Kconfig"
 
-# yes, this works even when no drivers are selected
-config NET_WIRELESS
-	bool
-	depends on NET_RADIO && (ISA || PCI || PPC_PMAC || PCMCIA)
-	default y
-
 endmenu
-
diff --git a/drivers/net/wireless/bcm43xx/Kconfig b/drivers/net/wireless/bcm43xx/Kconfig
index 533993f538f..ce397e4284f 100644
--- a/drivers/net/wireless/bcm43xx/Kconfig
+++ b/drivers/net/wireless/bcm43xx/Kconfig
@@ -1,6 +1,7 @@
 config BCM43XX
 	tristate "Broadcom BCM43xx wireless support"
-	depends on PCI && IEEE80211 && IEEE80211_SOFTMAC && NET_RADIO && EXPERIMENTAL
+	depends on PCI && IEEE80211 && IEEE80211_SOFTMAC && WLAN_80211 && EXPERIMENTAL
+	select WIRELESS_EXT
 	select FW_LOADER
 	select HW_RANDOM
 	---help---
diff --git a/drivers/net/wireless/hostap/Kconfig b/drivers/net/wireless/hostap/Kconfig
index 308f773ad56..1fef33169fd 100644
--- a/drivers/net/wireless/hostap/Kconfig
+++ b/drivers/net/wireless/hostap/Kconfig
@@ -1,6 +1,7 @@
 config HOSTAP
 	tristate "IEEE 802.11 for Host AP (Prism2/2.5/3 and WEP/TKIP/CCMP)"
-	depends on NET_RADIO
+	depends on WLAN_80211
+	select WIRELESS_EXT
 	select IEEE80211
 	select IEEE80211_CRYPT_WEP
 	---help---
diff --git a/drivers/net/wireless/zd1211rw/Kconfig b/drivers/net/wireless/zd1211rw/Kconfig
index 66ed55bc546..d1ab24a9563 100644
--- a/drivers/net/wireless/zd1211rw/Kconfig
+++ b/drivers/net/wireless/zd1211rw/Kconfig
@@ -1,6 +1,7 @@
 config ZD1211RW
 	tristate "ZyDAS ZD1211/ZD1211B USB-wireless support"
-	depends on USB && IEEE80211 && IEEE80211_SOFTMAC && NET_RADIO && EXPERIMENTAL
+	depends on USB && IEEE80211_SOFTMAC && WLAN_80211 && EXPERIMENTAL
+	select WIRELESS_EXT
 	select FW_LOADER
 	---help---
 	  This is an experimental driver for the ZyDAS ZD1211/ZD1211B wireless
-- 
cgit v1.2.3


From 42431592e74a968d919a46baf0515a2ee6978dac Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Mon, 23 Apr 2007 13:28:49 -0700
Subject: [WIRELESS] drivers/net/wireless/Kconfig: correct minor typo

Correct minor typo in drivers/net/wireless/Kconfig identified by
Stefano Brivio <stefano.brivio@polimi.it>.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wireless/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig
index 880c628dcd4..4426841b2be 100644
--- a/drivers/net/wireless/Kconfig
+++ b/drivers/net/wireless/Kconfig
@@ -11,7 +11,7 @@ config WLAN_PRE80211
 	  Say Y if you have any pre-802.11 wireless LAN hardware.
 
 	  This option does not affect the kernel build, it only
-	  let's you choose drivers.
+	  lets you choose drivers.
 
 config STRIP
 	tristate "STRIP (Metricom starmode radio IP)"
@@ -112,7 +112,7 @@ config WLAN_80211
 	  Say Y if you have any 802.11 wireless LAN hardware.
 
 	  This option does not affect the kernel build, it only
-	  let's you choose drivers.
+	  lets you choose drivers.
 
 config PCMCIA_RAYCS
 	tristate "Aviator/Raytheon 2.4MHz wireless support"
-- 
cgit v1.2.3


From e0aac5a289b1dacbc94bd9ae8c449bcdf9ab508c Mon Sep 17 00:00:00 2001
From: Auke Kok <auke-jan.h.kok@intel.com>
Date: Tue, 6 Mar 2007 08:57:21 -0800
Subject: e1000: FIX: be ready for incoming irq at pci_request_irq

DEBUG_SHIRQ code exposed that e1000 was not ready for incoming interrupts
after having called pci_request_irq. This obviously requires us to finish
our software setup which assigns the irq handler before we request the
irq.

Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 drivers/net/e1000/e1000_main.c | 66 ++++++++++++++++++++++++++++--------------
 1 file changed, 45 insertions(+), 21 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index b28a915bd98..f777d37d8e6 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -522,14 +522,15 @@ e1000_release_manageability(struct e1000_adapter *adapter)
 	}
 }
 
-int
-e1000_up(struct e1000_adapter *adapter)
+/**
+ * e1000_configure - configure the hardware for RX and TX
+ * @adapter = private board structure
+ **/
+static void e1000_configure(struct e1000_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
 	int i;
 
-	/* hardware has been reset, we need to reload some things */
-
 	e1000_set_multi(netdev);
 
 	e1000_restore_vlan(adapter);
@@ -548,14 +549,20 @@ e1000_up(struct e1000_adapter *adapter)
 	}
 
 	adapter->tx_queue_len = netdev->tx_queue_len;
+}
+
+int e1000_up(struct e1000_adapter *adapter)
+{
+	/* hardware has been reset, we need to reload some things */
+	e1000_configure(adapter);
+
+	clear_bit(__E1000_DOWN, &adapter->flags);
 
 #ifdef CONFIG_E1000_NAPI
-	netif_poll_enable(netdev);
+	netif_poll_enable(adapter->netdev);
 #endif
 	e1000_irq_enable(adapter);
 
-	clear_bit(__E1000_DOWN, &adapter->flags);
-
 	/* fire a link change interrupt to start the watchdog */
 	E1000_WRITE_REG(&adapter->hw, ICS, E1000_ICS_LSC);
 	return 0;
@@ -640,15 +647,15 @@ e1000_down(struct e1000_adapter *adapter)
 	 * reschedule our watchdog timer */
 	set_bit(__E1000_DOWN, &adapter->flags);
 
+#ifdef CONFIG_E1000_NAPI
+	netif_poll_disable(netdev);
+#endif
 	e1000_irq_disable(adapter);
 
 	del_timer_sync(&adapter->tx_fifo_stall_timer);
 	del_timer_sync(&adapter->watchdog_timer);
 	del_timer_sync(&adapter->phy_info_timer);
 
-#ifdef CONFIG_E1000_NAPI
-	netif_poll_disable(netdev);
-#endif
 	netdev->tx_queue_len = adapter->tx_queue_len;
 	adapter->link_speed = 0;
 	adapter->link_duplex = 0;
@@ -1410,21 +1417,17 @@ e1000_open(struct net_device *netdev)
 		return -EBUSY;
 
 	/* allocate transmit descriptors */
-	if ((err = e1000_setup_all_tx_resources(adapter)))
+	err = e1000_setup_all_tx_resources(adapter);
+	if (err)
 		goto err_setup_tx;
 
 	/* allocate receive descriptors */
-	if ((err = e1000_setup_all_rx_resources(adapter)))
-		goto err_setup_rx;
-
-	err = e1000_request_irq(adapter);
+	err = e1000_setup_all_rx_resources(adapter);
 	if (err)
-		goto err_req_irq;
+		goto err_setup_rx;
 
 	e1000_power_up_phy(adapter);
 
-	if ((err = e1000_up(adapter)))
-		goto err_up;
 	adapter->mng_vlan_id = E1000_MNG_VLAN_NONE;
 	if ((adapter->hw.mng_cookie.status &
 			  E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT)) {
@@ -1437,12 +1440,33 @@ e1000_open(struct net_device *netdev)
 	    e1000_check_mng_mode(&adapter->hw))
 		e1000_get_hw_control(adapter);
 
+	/* before we allocate an interrupt, we must be ready to handle it.
+	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
+	 * as soon as we call pci_request_irq, so we have to setup our
+	 * clean_rx handler before we do so.  */
+	e1000_configure(adapter);
+
+	err = e1000_request_irq(adapter);
+	if (err)
+		goto err_req_irq;
+
+	/* From here on the code is the same as e1000_up() */
+	clear_bit(__E1000_DOWN, &adapter->flags);
+
+#ifdef CONFIG_E1000_NAPI
+	netif_poll_enable(netdev);
+#endif
+
+	e1000_irq_enable(adapter);
+
+	/* fire a link status change interrupt to start the watchdog */
+	E1000_WRITE_REG(&adapter->hw, ICS, E1000_ICS_LSC);
+
 	return E1000_SUCCESS;
 
-err_up:
-	e1000_power_down_phy(adapter);
-	e1000_free_irq(adapter);
 err_req_irq:
+	e1000_release_hw_control(adapter);
+	e1000_power_down_phy(adapter);
 	e1000_free_all_rx_resources(adapter);
 err_setup_rx:
 	e1000_free_all_tx_resources(adapter);
-- 
cgit v1.2.3


From 31d76442f719af834718cbf5bf866370acc36093 Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Tue, 6 Mar 2007 08:57:24 -0800
Subject: e1000: FIX: firmware handover bits

Upon code inspection it was spotted that the firmware handover bit get/set
mismatched, which may have resulted in management issues on PCI-E
adapters. Setting them correctly may fix some management issues such
as arp routing etc.

Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>
Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 drivers/net/e1000/e1000_main.c | 33 ++++++++++++---------------------
 1 file changed, 12 insertions(+), 21 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index f777d37d8e6..dd66f8816b2 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -409,25 +409,21 @@ e1000_release_hw_control(struct e1000_adapter *adapter)
 {
 	uint32_t ctrl_ext;
 	uint32_t swsm;
-	uint32_t extcnf;
 
 	/* Let firmware taken over control of h/w */
 	switch (adapter->hw.mac_type) {
-	case e1000_82571:
-	case e1000_82572:
-	case e1000_80003es2lan:
-		ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT);
-		E1000_WRITE_REG(&adapter->hw, CTRL_EXT,
-				ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
-		break;
 	case e1000_82573:
 		swsm = E1000_READ_REG(&adapter->hw, SWSM);
 		E1000_WRITE_REG(&adapter->hw, SWSM,
 				swsm & ~E1000_SWSM_DRV_LOAD);
+		break;
+	case e1000_82571:
+	case e1000_82572:
+	case e1000_80003es2lan:
 	case e1000_ich8lan:
-		extcnf = E1000_READ_REG(&adapter->hw, CTRL_EXT);
+		ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT);
 		E1000_WRITE_REG(&adapter->hw, CTRL_EXT,
-				extcnf & ~E1000_CTRL_EXT_DRV_LOAD);
+				ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
 		break;
 	default:
 		break;
@@ -450,26 +446,21 @@ e1000_get_hw_control(struct e1000_adapter *adapter)
 {
 	uint32_t ctrl_ext;
 	uint32_t swsm;
-	uint32_t extcnf;
 
 	/* Let firmware know the driver has taken over */
 	switch (adapter->hw.mac_type) {
-	case e1000_82571:
-	case e1000_82572:
-	case e1000_80003es2lan:
-		ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT);
-		E1000_WRITE_REG(&adapter->hw, CTRL_EXT,
-				ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
-		break;
 	case e1000_82573:
 		swsm = E1000_READ_REG(&adapter->hw, SWSM);
 		E1000_WRITE_REG(&adapter->hw, SWSM,
 				swsm | E1000_SWSM_DRV_LOAD);
 		break;
+	case e1000_82571:
+	case e1000_82572:
+	case e1000_80003es2lan:
 	case e1000_ich8lan:
-		extcnf = E1000_READ_REG(&adapter->hw, EXTCNF_CTRL);
-		E1000_WRITE_REG(&adapter->hw, EXTCNF_CTRL,
-				extcnf | E1000_EXTCNF_CTRL_SWFLAG);
+		ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT);
+		E1000_WRITE_REG(&adapter->hw, CTRL_EXT,
+				ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
 		break;
 	default:
 		break;
-- 
cgit v1.2.3


From f50393fe869ba457cd75569c74c0f9bd2e7f7a0f Mon Sep 17 00:00:00 2001
From: Mark Huth <mhuth@mvista.com>
Date: Tue, 6 Mar 2007 08:57:26 -0800
Subject: e1000: FIX: Stop raw interrupts disabled nag from RT

Current e1000_xmit_frame spews raw interrupt disabled nag messages when
used with RT kernel patches.  This patch uses spin_trylock_irqsave,
which allows RT patches to properly manage the irq semantics.

Signed-off-by: Mark Huth <mhuth@mvista.com>
Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 drivers/net/e1000/e1000_main.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index dd66f8816b2..eb3ff1ff746 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -3378,12 +3378,9 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 	    (adapter->hw.mac_type == e1000_82573))
 		e1000_transfer_dhcp_info(adapter, skb);
 
-	local_irq_save(flags);
-	if (!spin_trylock(&tx_ring->tx_lock)) {
+	if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags))
 		/* Collision - tell upper layer to requeue */
-		local_irq_restore(flags);
 		return NETDEV_TX_LOCKED;
-	}
 
 	/* need: count + 2 desc gap to keep tail from touching
 	 * head, otherwise try next time */
-- 
cgit v1.2.3


From 55404bca6c45595fee1a546f1a0cc616aeef0b00 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 26 Apr 2007 00:55:53 -0700
Subject: [NET]: Fix yam.c

drivers/net/hamradio/yam.c: In function `yam_tx_byte':
drivers/net/hamradio/yam.c:643: warning: passing arg 1 of `skb_copy_from_linear_data_offset' from incompatible pointer type

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hamradio/yam.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index ac2d6dd9dbe..467559debfd 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -638,7 +638,7 @@ static void yam_tx_byte(struct net_device *dev, struct yam_port *yp)
         			dev_kfree_skb_any(skb);
 				break;
 			}
-			skb_copy_from_linear_data_offset(skb->data, 1,
+			skb_copy_from_linear_data_offset(skb, 1,
 							 yp->tx_buf,
 							 yp->tx_len);
 			dev_kfree_skb_any(skb);
-- 
cgit v1.2.3


From 36226a8ded46b89a94f9de5976f554bb5e02d84c Mon Sep 17 00:00:00 2001
From: Brian Braunstein <linuxkernel@bristyle.com>
Date: Thu, 26 Apr 2007 01:00:55 -0700
Subject: [NET] tun/tap: fixed hw address handling

Fixed tun/tap driver's handling of hw addresses.  The hw address is stored
in both the net_device.dev_addr and tun.dev_addr fields.  These fields were
not kept synchronized, and in fact weren't even initialized to the same
value.  Now during both init and when performing SIOCSIFHWADDR on the tun
device these values are both updated.  However, if SIOCSIFHWADDR is
performed on the net device directly (for instance, setting the hw address
using ifconfig), the tun device does not get updated.  Perhaps the
tun.dev_addr field should be removed completely at some point, as it is
redundant and net_device.dev_addr can be used anywhere it is used.

Signed-off-by: Brian Braunstein <linuxkernel@bristyle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 38 +++++++++++++++++++++++++++-----------
 1 file changed, 27 insertions(+), 11 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 4d461595406..a2c6caaaae9 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -18,6 +18,10 @@
 /*
  *  Changes:
  *
+ *  Brian Braunstein <linuxkernel@bristyle.com> 2007/03/23
+ *    Fixed hw address handling.  Now net_device.dev_addr is kept consistent
+ *    with tun.dev_addr when the address is set by this module.
+ *
  *  Mike Kershaw <dragorn@kismetwireless.net> 2005/08/14
  *    Add TUNSETLINK ioctl to set the link encapsulation
  *
@@ -196,7 +200,10 @@ static void tun_net_init(struct net_device *dev)
 		dev->set_multicast_list = tun_net_mclist;
 
 		ether_setup(dev);
-		random_ether_addr(dev->dev_addr);
+
+		/* random address already created for us by tun_set_iff, use it */
+		memcpy(dev->dev_addr, tun->dev_addr, min(sizeof(tun->dev_addr), sizeof(dev->dev_addr)) );
+
 		dev->tx_queue_len = TUN_READQ_SIZE;  /* We prefer our own queue length */
 		break;
 	}
@@ -636,6 +643,7 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
 		return 0;
 
 	case SIOCGIFHWADDR:
+		/* Note: the actual net device's address may be different */
 		memcpy(ifr.ifr_hwaddr.sa_data, tun->dev_addr,
 				min(sizeof ifr.ifr_hwaddr.sa_data, sizeof tun->dev_addr));
 		if (copy_to_user( argp, &ifr, sizeof ifr))
@@ -643,16 +651,24 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
 		return 0;
 
 	case SIOCSIFHWADDR:
-		/** Set the character device's hardware address. This is used when
-		 * filtering packets being sent from the network device to the character
-		 * device. */
-		memcpy(tun->dev_addr, ifr.ifr_hwaddr.sa_data,
-				min(sizeof ifr.ifr_hwaddr.sa_data, sizeof tun->dev_addr));
-		DBG(KERN_DEBUG "%s: set hardware address: %x:%x:%x:%x:%x:%x\n",
-				tun->dev->name,
-				tun->dev_addr[0], tun->dev_addr[1], tun->dev_addr[2],
-				tun->dev_addr[3], tun->dev_addr[4], tun->dev_addr[5]);
-		return 0;
+	{
+		/* try to set the actual net device's hw address */
+		int ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr);
+
+		if (ret == 0) {
+			/** Set the character device's hardware address. This is used when
+			 * filtering packets being sent from the network device to the character
+			 * device. */
+			memcpy(tun->dev_addr, ifr.ifr_hwaddr.sa_data,
+					min(sizeof ifr.ifr_hwaddr.sa_data, sizeof tun->dev_addr));
+			DBG(KERN_DEBUG "%s: set hardware address: %x:%x:%x:%x:%x:%x\n",
+					tun->dev->name,
+					tun->dev_addr[0], tun->dev_addr[1], tun->dev_addr[2],
+					tun->dev_addr[3], tun->dev_addr[4], tun->dev_addr[5]);
+		}
+
+		return  ret;
+	}
 
 	case SIOCADDMULTI:
 		/** Add the specified group to the character device's multicast filter
-- 
cgit v1.2.3


From 57cd5f754e04240ee587c51b7be8d3b7793542ae Mon Sep 17 00:00:00 2001
From: Milind Arun Choudhary <milindchoudhary@gmail.com>
Date: Thu, 26 Apr 2007 01:01:53 -0700
Subject: [NET]: ROUND_UP macro cleanup in drivers/net/ppp_generic.c

ROUND_UP macro cleanup use DIV_ROUND_UP

Signed-off-by: Milind Arun Choudhary <milindchoudhary@gmail.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp_generic.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 18f1790aab9..6d596ca50cf 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -88,8 +88,6 @@ struct ppp_file {
 #define PF_TO_PPP(pf)		PF_TO_X(pf, struct ppp)
 #define PF_TO_CHANNEL(pf)	PF_TO_X(pf, struct channel)
 
-#define ROUNDUP(n, x)		(((n) + (x) - 1) / (x))
-
 /*
  * Data structure describing one ppp unit.
  * A ppp unit corresponds to a ppp network interface device
@@ -1297,7 +1295,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
 	 */
 	fragsize = len;
 	if (nfree > 1)
-		fragsize = ROUNDUP(fragsize, nfree);
+		fragsize = DIV_ROUND_UP(fragsize, nfree);
 	/* nbigger channels get fragsize bytes, the rest get fragsize-1,
 	   except if nbigger==0, then they all get fragsize. */
 	nbigger = len % nfree;
-- 
cgit v1.2.3


From 4ef8d0aeafda8388dd51f2671b7059192b1e5a5f Mon Sep 17 00:00:00 2001
From: Milind Arun Choudhary <milindchoudhary@gmail.com>
Date: Thu, 26 Apr 2007 01:37:44 -0700
Subject: [NET]: SPIN_LOCK_UNLOCKED cleanup in drivers/atm, net

SPIN_LOCK_UNLOCKED cleanup,use __SPIN_LOCK_UNLOCKED instead

Signed-off-by: Milind Arun Choudhary <milindchoudhary@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/atmtcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c
index 1b9493a16ac..02ad83d6b56 100644
--- a/drivers/atm/atmtcp.c
+++ b/drivers/atm/atmtcp.c
@@ -352,7 +352,7 @@ static struct atm_dev atmtcp_control_dev = {
 	.ops		= &atmtcp_c_dev_ops,
 	.type		= "atmtcp",
 	.number		= 999,
-	.lock		= SPIN_LOCK_UNLOCKED
+	.lock		= __SPIN_LOCK_UNLOCKED(atmtcp_control_dev.lock)
 };
 
 
-- 
cgit v1.2.3


From bfbf3c0968498f5232c02965cf41695edae1bc4d Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <matthias.kaehlcke@gmail.com>
Date: Thu, 26 Apr 2007 01:41:49 -0700
Subject: [ATM]: Use mutex instead of binary semaphore in FORE Systems
 200E-series driver

(akpm: remove CVS control string too)

Signed-off-by: Matthias Kaehlcke <matthias.kaehlcke@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/fore200e.c | 20 +++++++++-----------
 drivers/atm/fore200e.h |  2 +-
 2 files changed, 10 insertions(+), 12 deletions(-)

(limited to 'drivers')

diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index a7c0ed3107e..405ee5e0922 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -1,6 +1,4 @@
 /*
-  $Id: fore200e.c,v 1.5 2000/04/14 10:10:34 davem Exp $
-
   A FORE Systems 200E-series driver for ATM on Linux.
   Christophe Lizzi (lizzi@cnam.fr), October 1999-March 2003.
 
@@ -1502,9 +1500,9 @@ fore200e_open(struct atm_vcc *vcc)
     /* pseudo-CBR bandwidth requested? */
     if ((vcc->qos.txtp.traffic_class == ATM_CBR) && (vcc->qos.txtp.max_pcr > 0)) {
 	
-	down(&fore200e->rate_sf);
+	mutex_lock(&fore200e->rate_mtx);
 	if (fore200e->available_cell_rate < vcc->qos.txtp.max_pcr) {
-	    up(&fore200e->rate_sf);
+	    mutex_unlock(&fore200e->rate_mtx);
 
 	    kfree(fore200e_vcc);
 	    vc_map->vcc = NULL;
@@ -1513,7 +1511,7 @@ fore200e_open(struct atm_vcc *vcc)
 
 	/* reserve bandwidth */
 	fore200e->available_cell_rate -= vcc->qos.txtp.max_pcr;
-	up(&fore200e->rate_sf);
+	mutex_unlock(&fore200e->rate_mtx);
     }
     
     vcc->itf = vcc->dev->number;
@@ -1599,9 +1597,9 @@ fore200e_close(struct atm_vcc* vcc)
     /* release reserved bandwidth, if any */
     if ((vcc->qos.txtp.traffic_class == ATM_CBR) && (vcc->qos.txtp.max_pcr > 0)) {
 
-	down(&fore200e->rate_sf);
+	mutex_lock(&fore200e->rate_mtx);
 	fore200e->available_cell_rate += vcc->qos.txtp.max_pcr;
-	up(&fore200e->rate_sf);
+	mutex_unlock(&fore200e->rate_mtx);
 
 	clear_bit(ATM_VF_HASQOS, &vcc->flags);
     }
@@ -2064,16 +2062,16 @@ fore200e_change_qos(struct atm_vcc* vcc,struct atm_qos* qos, int flags)
 
     if ((qos->txtp.traffic_class == ATM_CBR) && (qos->txtp.max_pcr > 0)) {
 
-	down(&fore200e->rate_sf);
+	mutex_lock(&fore200e->rate_mtx);
 	if (fore200e->available_cell_rate + vcc->qos.txtp.max_pcr < qos->txtp.max_pcr) {
-	    up(&fore200e->rate_sf);
+	    mutex_unlock(&fore200e->rate_mtx);
 	    return -EAGAIN;
 	}
 
 	fore200e->available_cell_rate += vcc->qos.txtp.max_pcr;
 	fore200e->available_cell_rate -= qos->txtp.max_pcr;
 
-	up(&fore200e->rate_sf);
+	mutex_unlock(&fore200e->rate_mtx);
 	
 	memcpy(&vcc->qos, qos, sizeof(struct atm_qos));
 	
@@ -2459,7 +2457,7 @@ fore200e_initialize(struct fore200e* fore200e)
 
     DPRINTK(2, "device %s being initialized\n", fore200e->name);
 
-    init_MUTEX(&fore200e->rate_sf);
+    mutex_init(&fore200e->rate_mtx);
     spin_lock_init(&fore200e->q_lock);
 
     cpq = fore200e->cp_queues = fore200e->virt_base + FORE200E_CP_QUEUES_OFFSET;
diff --git a/drivers/atm/fore200e.h b/drivers/atm/fore200e.h
index f9abfdac33e..b85a54613de 100644
--- a/drivers/atm/fore200e.h
+++ b/drivers/atm/fore200e.h
@@ -869,7 +869,7 @@ typedef struct fore200e {
 
     struct stats*              stats;                  /* last snapshot of the stats         */
     
-    struct semaphore           rate_sf;                /* protects rate reservation ops      */
+    struct mutex               rate_mtx;               /* protects rate reservation ops      */
     spinlock_t                 q_lock;                 /* protects queue ops                 */
 #ifdef FORE200E_USE_TASKLET
     struct tasklet_struct      tx_tasklet;             /* performs tx interrupt work         */
-- 
cgit v1.2.3


From ccf0dec6fcadb4e1c877b9bafb031a6bdb7112b9 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 29 Mar 2007 00:49:54 -0700
Subject: [SPARC/64] constify of_get_property return: drivers

The only unfortunate bit here is that the name field of struct map_info
is not const, so for now we put a cast on the assignment of it.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/mtd/maps/sun_uflash.c  | 4 ++--
 drivers/net/sungem.c           | 2 +-
 drivers/net/sunhme.c           | 6 +++---
 drivers/net/tg3.c              | 2 +-
 drivers/net/tulip/tulip_core.c | 2 +-
 drivers/sbus/char/envctrl.c    | 8 ++++----
 drivers/sbus/char/flash.c      | 2 +-
 drivers/sbus/char/openprom.c   | 4 ++--
 drivers/sbus/sbus.c            | 4 ++--
 drivers/scsi/qlogicpti.c       | 2 +-
 drivers/serial/sunsu.c         | 4 ++--
 drivers/video/cg3.c            | 2 +-
 12 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'drivers')

diff --git a/drivers/mtd/maps/sun_uflash.c b/drivers/mtd/maps/sun_uflash.c
index 4db2055cee3..001af7f7ddd 100644
--- a/drivers/mtd/maps/sun_uflash.c
+++ b/drivers/mtd/maps/sun_uflash.c
@@ -39,7 +39,7 @@ MODULE_VERSION("2.0");
 
 static LIST_HEAD(device_list);
 struct uflash_dev {
-	char			*name;	/* device name */
+	const char		*name;	/* device name */
 	struct map_info 	map;	/* mtd map info */
 	struct mtd_info		*mtd;	/* mtd info */
 };
@@ -80,7 +80,7 @@ int uflash_devinit(struct linux_ebus_device *edev, struct device_node *dp)
 
 	up->name = of_get_property(dp, "model", NULL);
 	if (up->name && 0 < strlen(up->name))
-		up->map.name = up->name;
+		up->map.name = (char *)up->name;
 
 	up->map.phys = res->start;
 
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 08ea61db46f..bb07b79817f 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -2914,7 +2914,7 @@ static int __devinit gem_get_device_address(struct gem *gp)
 	int use_idprom = 1;
 
 	if (pcp != NULL) {
-		unsigned char *addr;
+		const unsigned char *addr;
 		int len;
 
 		addr = of_get_property(pcp->prom_node, "local-mac-address",
diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index 192bbc91c73..bee5e5b309b 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -2704,7 +2704,7 @@ static int __devinit happy_meal_sbus_probe_one(struct sbus_dev *sdev, int is_qfe
 			dev->dev_addr[i] = macaddr[i];
 		macaddr[5]++;
 	} else {
-		unsigned char *addr;
+		const unsigned char *addr;
 		int len;
 
 		addr = of_get_property(dp, "local-mac-address", &len);
@@ -3081,7 +3081,7 @@ static int __devinit happy_meal_pci_probe(struct pci_dev *pdev,
 		macaddr[5]++;
 	} else {
 #ifdef CONFIG_SPARC
-		unsigned char *addr;
+		const unsigned char *addr;
 		int len;
 
 		if (qfe_slot != -1 &&
@@ -3300,7 +3300,7 @@ static int __devinit hme_sbus_probe(struct of_device *dev, const struct of_devic
 {
 	struct sbus_dev *sdev = to_sbus_device(&dev->dev);
 	struct device_node *dp = dev->node;
-	char *model = of_get_property(dp, "model", NULL);
+	const char *model = of_get_property(dp, "model", NULL);
 	int is_qfe = (match->data != NULL);
 
 	if (!is_qfe && model && !strcmp(model, "SUNW,sbus-qfe"))
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 256969e1300..45b4e018b5e 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -10996,7 +10996,7 @@ static int __devinit tg3_get_macaddr_sparc(struct tg3 *tp)
 	struct pcidev_cookie *pcp = pdev->sysdata;
 
 	if (pcp != NULL) {
-		unsigned char *addr;
+		const unsigned char *addr;
 		int len;
 
 		addr = of_get_property(pcp->prom_node, "local-mac-address",
diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c
index e3774a52237..03c14181b6b 100644
--- a/drivers/net/tulip/tulip_core.c
+++ b/drivers/net/tulip/tulip_core.c
@@ -1544,7 +1544,7 @@ static int __devinit tulip_init_one (struct pci_dev *pdev,
 		dev->dev_addr[i] = last_phys_addr[i] + 1;
 #if defined(__sparc__)
 		if (pcp) {
-			unsigned char *addr;
+			const unsigned char *addr;
 			int len;
 
 			addr = of_get_property(pcp->prom_node,
diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c
index 2cea4f5d208..f2be2ead874 100644
--- a/drivers/sbus/char/envctrl.c
+++ b/drivers/sbus/char/envctrl.c
@@ -726,7 +726,7 @@ static struct miscdevice envctrl_dev = {
  * Return: None.
  */
 static void envctrl_set_mon(struct i2c_child_t *pchild,
-			    char *chnl_desc,
+			    const char *chnl_desc,
 			    int chnl_no)
 {
 	/* Firmware only has temperature type.  It does not distinguish
@@ -763,8 +763,8 @@ static void envctrl_set_mon(struct i2c_child_t *pchild,
 static void envctrl_init_adc(struct i2c_child_t *pchild, struct device_node *dp)
 {
 	int i = 0, len;
-	char *pos;
-	unsigned int *pval;
+	const char *pos;
+	const unsigned int *pval;
 
 	/* Firmware describe channels into a stream separated by a '\0'. */
 	pos = of_get_property(dp, "channels-description", &len);
@@ -859,7 +859,7 @@ static void envctrl_init_i2c_child(struct linux_ebus_child *edev_child,
 {
 	int len, i, tbls_size = 0;
 	struct device_node *dp = edev_child->prom_node;
-	void *pval;
+	const void *pval;
 
 	/* Get device address. */
 	pval = of_get_property(dp, "reg", &len);
diff --git a/drivers/sbus/char/flash.c b/drivers/sbus/char/flash.c
index 6e99507aeb1..262f01e6859 100644
--- a/drivers/sbus/char/flash.c
+++ b/drivers/sbus/char/flash.c
@@ -190,7 +190,7 @@ static int __init flash_init(void)
 	}
 	if (!sdev) {
 #ifdef CONFIG_PCI
-		struct linux_prom_registers *ebus_regs;
+		const struct linux_prom_registers *ebus_regs;
 
 		for_each_ebus(ebus) {
 			for_each_ebusdev(edev, ebus) {
diff --git a/drivers/sbus/char/openprom.c b/drivers/sbus/char/openprom.c
index 5041c9dfbe3..aec3b9f991f 100644
--- a/drivers/sbus/char/openprom.c
+++ b/drivers/sbus/char/openprom.c
@@ -141,7 +141,7 @@ static int copyout(void __user *info, struct openpromio *opp, int len)
 
 static int opromgetprop(void __user *argp, struct device_node *dp, struct openpromio *op, int bufsize)
 {
-	void *pval;
+	const void *pval;
 	int len;
 
 	if (!dp ||
@@ -410,7 +410,7 @@ static int opiocget(void __user *argp, DATA *data)
 	struct opiocdesc op;
 	struct device_node *dp;
 	char *str;
-	void *pval;
+	const void *pval;
 	int err, len;
 
 	if (copy_from_user(&op, argp, sizeof(op)))
diff --git a/drivers/sbus/sbus.c b/drivers/sbus/sbus.c
index 6349dd617f8..eee590a51d8 100644
--- a/drivers/sbus/sbus.c
+++ b/drivers/sbus/sbus.c
@@ -35,7 +35,7 @@ struct sbus_bus *sbus_root;
 static void __init fill_sbus_device(struct device_node *dp, struct sbus_dev *sdev)
 {
 	unsigned long base;
-	void *pval;
+	const void *pval;
 	int len, err;
 
 	sdev->prom_node = dp->node;
@@ -86,7 +86,7 @@ static void __init fill_sbus_device(struct device_node *dp, struct sbus_dev *sde
 
 static void __init sbus_bus_ranges_init(struct device_node *dp, struct sbus_bus *sbus)
 {
-	void *pval;
+	const void *pval;
 	int len;
 
 	pval = of_get_property(dp, "ranges", &len);
diff --git a/drivers/scsi/qlogicpti.c b/drivers/scsi/qlogicpti.c
index 9f10689905a..c4195ea869e 100644
--- a/drivers/scsi/qlogicpti.c
+++ b/drivers/scsi/qlogicpti.c
@@ -1403,7 +1403,7 @@ static int __devinit qpti_sbus_probe(struct of_device *dev, const struct of_devi
 	struct scsi_host_template *tpnt = match->data;
 	struct Scsi_Host *host;
 	struct qlogicpti *qpti;
-	char *fcode;
+	const char *fcode;
 
 	/* Sometimes Antares cards come up not completely
 	 * setup, and we get a report of a zero IRQ.
diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c
index 96a852aa190..bfd44177a21 100644
--- a/drivers/serial/sunsu.c
+++ b/drivers/serial/sunsu.c
@@ -1387,8 +1387,8 @@ static enum su_type __devinit su_get_type(struct device_node *dp)
 	struct device_node *ap = of_find_node_by_path("/aliases");
 
 	if (ap) {
-		char *keyb = of_get_property(ap, "keyboard", NULL);
-		char *ms = of_get_property(ap, "mouse", NULL);
+		const char *keyb = of_get_property(ap, "keyboard", NULL);
+		const char *ms = of_get_property(ap, "mouse", NULL);
 
 		if (keyb) {
 			if (dp == of_find_node_by_path(keyb))
diff --git a/drivers/video/cg3.c b/drivers/video/cg3.c
index 767c850f8eb..f042428a84f 100644
--- a/drivers/video/cg3.c
+++ b/drivers/video/cg3.c
@@ -266,7 +266,7 @@ static void __devinit cg3_init_fix(struct fb_info *info, int linebytes,
 static void __devinit cg3_rdi_maybe_fixup_var(struct fb_var_screeninfo *var,
 					      struct device_node *dp)
 {
-	char *params;
+	const char *params;
 	char *p;
 	int ww, hh;
 
-- 
cgit v1.2.3


From 9f47df264fa53e562cafa0de4a405d0846a81fbd Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 29 Mar 2007 01:33:46 -0700
Subject: [RADEON]: Probe clocks and monitor using OF properties on sparc.

Just like powerpc does.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/video/aty/radeon_base.c    | 12 +++++++-----
 drivers/video/aty/radeon_monitor.c | 16 ++++++++--------
 drivers/video/aty/radeonfb.h       |  4 ++--
 3 files changed, 17 insertions(+), 15 deletions(-)

(limited to 'drivers')

diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c
index 1bf6f42eb40..a4b3fd185de 100644
--- a/drivers/video/aty/radeon_base.c
+++ b/drivers/video/aty/radeon_base.c
@@ -410,7 +410,7 @@ static int  __devinit radeon_find_mem_vbios(struct radeonfb_info *rinfo)
 }
 #endif
 
-#ifdef CONFIG_PPC_OF
+#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
 /*
  * Read XTAL (ref clock), SCLK and MCLK from Open Firmware device
  * tree. Hopefully, ATI OF driver is kind enough to fill these
@@ -440,7 +440,7 @@ static int __devinit radeon_read_xtal_OF (struct radeonfb_info *rinfo)
 
        	return 0;
 }
-#endif /* CONFIG_PPC_OF */
+#endif /* CONFIG_PPC_OF || CONFIG_SPARC */
 
 /*
  * Read PLL infos from chip registers
@@ -645,7 +645,7 @@ static void __devinit radeon_get_pllinfo(struct radeonfb_info *rinfo)
 	rinfo->pll.ref_div = INPLL(PPLL_REF_DIV) & PPLL_REF_DIV_MASK;
 
 
-#ifdef CONFIG_PPC_OF
+#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
 	/*
 	 * Retrieve PLL infos from Open Firmware first
 	 */
@@ -653,7 +653,7 @@ static void __devinit radeon_get_pllinfo(struct radeonfb_info *rinfo)
        		printk(KERN_INFO "radeonfb: Retrieved PLL infos from Open Firmware\n");
 		goto found;
 	}
-#endif /* CONFIG_PPC_OF */
+#endif /* CONFIG_PPC_OF || CONFIG_SPARC */
 
 	/*
 	 * Check out if we have an X86 which gave us some PLL informations
@@ -2231,7 +2231,7 @@ static int __devinit radeonfb_pci_register (struct pci_dev *pdev,
 	    rinfo->family == CHIP_FAMILY_RS200)
 		rinfo->errata |= CHIP_ERRATA_PLL_DELAY;
 
-#ifdef CONFIG_PPC_OF
+#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
 	/* On PPC, we obtain the OF device-node pointer to the firmware
 	 * data for this chip
 	 */
@@ -2240,6 +2240,8 @@ static int __devinit radeonfb_pci_register (struct pci_dev *pdev,
 		printk(KERN_WARNING "radeonfb (%s): Cannot match card to OF node !\n",
 		       pci_name(rinfo->pdev));
 
+#endif /* CONFIG_PPC_OF || CONFIG_SPARC */
+#ifdef CONFIG_PPC_OF
 	/* On PPC, the firmware sets up a memory mapping that tends
 	 * to cause lockups when enabling the engine. We reconfigure
 	 * the card internal memory mappings properly
diff --git a/drivers/video/aty/radeon_monitor.c b/drivers/video/aty/radeon_monitor.c
index 38c7dbf8c15..737b5c09dbd 100644
--- a/drivers/video/aty/radeon_monitor.c
+++ b/drivers/video/aty/radeon_monitor.c
@@ -52,7 +52,7 @@ static char *radeon_get_mon_name(int type)
 }
 
 
-#ifdef CONFIG_PPC_OF
+#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
 /*
  * Try to find monitor informations & EDID data out of the Open Firmware
  * device-tree. This also contains some "hacks" to work around a few machine
@@ -156,7 +156,7 @@ static int __devinit radeon_probe_OF_head(struct radeonfb_info *rinfo, int head_
 	}
         return MT_NONE;
 }
-#endif /* CONFIG_PPC_OF */
+#endif /* CONFIG_PPC_OF || CONFIG_SPARC */
 
 
 static int __devinit radeon_get_panel_info_BIOS(struct radeonfb_info *rinfo)
@@ -495,11 +495,11 @@ void __devinit radeon_probe_screens(struct radeonfb_info *rinfo,
 		 * Old single head cards
 		 */
 		if (!rinfo->has_CRTC2) {
-#ifdef CONFIG_PPC_OF
+#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
 			if (rinfo->mon1_type == MT_NONE)
 				rinfo->mon1_type = radeon_probe_OF_head(rinfo, 0,
 									&rinfo->mon1_EDID);
-#endif /* CONFIG_PPC_OF */
+#endif /* CONFIG_PPC_OF || CONFIG_SPARC */
 #ifdef CONFIG_FB_RADEON_I2C
 			if (rinfo->mon1_type == MT_NONE)
 				rinfo->mon1_type =
@@ -544,11 +544,11 @@ void __devinit radeon_probe_screens(struct radeonfb_info *rinfo,
 		/*
 		 * Probe primary head (DVI or laptop internal panel)
 		 */
-#ifdef CONFIG_PPC_OF
+#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
 		if (rinfo->mon1_type == MT_NONE)
 			rinfo->mon1_type = radeon_probe_OF_head(rinfo, 0,
 								&rinfo->mon1_EDID);
-#endif /* CONFIG_PPC_OF */
+#endif /* CONFIG_PPC_OF || CONFIG_SPARC */
 #ifdef CONFIG_FB_RADEON_I2C
 		if (rinfo->mon1_type == MT_NONE)
 			rinfo->mon1_type = radeon_probe_i2c_connector(rinfo, ddc_dvi,
@@ -572,11 +572,11 @@ void __devinit radeon_probe_screens(struct radeonfb_info *rinfo,
 		/*
 		 * Probe secondary head (mostly VGA, can be DVI)
 		 */
-#ifdef CONFIG_PPC_OF
+#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
 		if (rinfo->mon2_type == MT_NONE)
 			rinfo->mon2_type = radeon_probe_OF_head(rinfo, 1,
 								&rinfo->mon2_EDID);
-#endif /* CONFIG_PPC_OF */
+#endif /* CONFIG_PPC_OF || defined(CONFIG_SPARC) */
 #ifdef CONFIG_FB_RADEON_I2C
 		if (rinfo->mon2_type == MT_NONE)
 			rinfo->mon2_type = radeon_probe_i2c_connector(rinfo, ddc_vga,
diff --git a/drivers/video/aty/radeonfb.h b/drivers/video/aty/radeonfb.h
index d5ff224a625..31900036028 100644
--- a/drivers/video/aty/radeonfb.h
+++ b/drivers/video/aty/radeonfb.h
@@ -16,7 +16,7 @@
 
 #include <asm/io.h>
 
-#ifdef CONFIG_PPC_OF
+#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
 #include <asm/prom.h>
 #endif
 
@@ -292,7 +292,7 @@ struct radeonfb_info {
 	unsigned long		fb_local_base;
 
 	struct pci_dev		*pdev;
-#ifdef CONFIG_PPC_OF
+#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
 	struct device_node	*of_node;
 #endif
 
-- 
cgit v1.2.3


From dadb830dac401c4b1420ee2fd6c7559871b43319 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Wed, 28 Feb 2007 15:42:50 -0800
Subject: [SUNGEM]: __sparc__ --> CONFIG_SPARC

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/sungem.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index bb07b79817f..4fa3e49f935 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -64,7 +64,7 @@
 #include <asm/uaccess.h>
 #include <asm/irq.h>
 
-#ifdef __sparc__
+#ifdef CONFIG_SPARC
 #include <asm/idprom.h>
 #include <asm/openprom.h>
 #include <asm/oplib.h>
@@ -2849,7 +2849,7 @@ static int gem_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	return rc;
 }
 
-#if (!defined(__sparc__) && !defined(CONFIG_PPC_PMAC))
+#if (!defined(CONFIG_SPARC) && !defined(CONFIG_PPC_PMAC))
 /* Fetch MAC address from vital product data of PCI ROM. */
 static int find_eth_addr_in_vpd(void __iomem *rom_base, int len, unsigned char *dev_addr)
 {
@@ -2904,11 +2904,11 @@ static void get_gem_mac_nonobp(struct pci_dev *pdev, unsigned char *dev_addr)
 
 static int __devinit gem_get_device_address(struct gem *gp)
 {
-#if defined(__sparc__) || defined(CONFIG_PPC_PMAC)
+#if defined(CONFIG_SPARC) || defined(CONFIG_PPC_PMAC)
 	struct net_device *dev = gp->dev;
 #endif
 
-#if defined(__sparc__)
+#if defined(CONFIG_SPARC)
 	struct pci_dev *pdev = gp->pdev;
 	struct pcidev_cookie *pcp = pdev->sysdata;
 	int use_idprom = 1;
-- 
cgit v1.2.3


From 457e1a8afbcf5deffa501f2e9829526c18ed55b5 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 29 Mar 2007 01:36:44 -0700
Subject: [SUNGEM]: Consolidate powerpc and sparc MAC probing code.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/sungem.c | 31 ++++++-------------------------
 drivers/net/sungem.h |  2 +-
 2 files changed, 7 insertions(+), 26 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 4fa3e49f935..07f38907884 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -66,9 +66,7 @@
 
 #ifdef CONFIG_SPARC
 #include <asm/idprom.h>
-#include <asm/openprom.h>
-#include <asm/oplib.h>
-#include <asm/pbm.h>
+#include <asm/prom.h>
 #endif
 
 #ifdef CONFIG_PPC_PMAC
@@ -2906,34 +2904,17 @@ static int __devinit gem_get_device_address(struct gem *gp)
 {
 #if defined(CONFIG_SPARC) || defined(CONFIG_PPC_PMAC)
 	struct net_device *dev = gp->dev;
-#endif
-
-#if defined(CONFIG_SPARC)
-	struct pci_dev *pdev = gp->pdev;
-	struct pcidev_cookie *pcp = pdev->sysdata;
-	int use_idprom = 1;
-
-	if (pcp != NULL) {
-		const unsigned char *addr;
-		int len;
-
-		addr = of_get_property(pcp->prom_node, "local-mac-address",
-				       &len);
-		if (addr && len == 6) {
-			use_idprom = 0;
-			memcpy(dev->dev_addr, addr, 6);
-		}
-	}
-	if (use_idprom)
-		memcpy(dev->dev_addr, idprom->id_ethaddr, 6);
-#elif defined(CONFIG_PPC_PMAC)
 	const unsigned char *addr;
 
 	addr = get_property(gp->of_node, "local-mac-address", NULL);
 	if (addr == NULL) {
+#ifdef CONFIG_SPARC
+		addr = idprom->id_ethaddr;
+#else
 		printk("\n");
 		printk(KERN_ERR "%s: can't get mac-address\n", dev->name);
 		return -1;
+#endif
 	}
 	memcpy(dev->dev_addr, addr, 6);
 #else
@@ -3091,7 +3072,7 @@ static int __devinit gem_init_one(struct pci_dev *pdev,
 	/* On Apple, we want a reference to the Open Firmware device-tree
 	 * node. We use it for clock control.
 	 */
-#ifdef CONFIG_PPC_PMAC
+#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_SPARC)
 	gp->of_node = pci_device_to_OF_node(pdev);
 #endif
 
diff --git a/drivers/net/sungem.h b/drivers/net/sungem.h
index a70067c85cc..58cf87c5751 100644
--- a/drivers/net/sungem.h
+++ b/drivers/net/sungem.h
@@ -1025,7 +1025,7 @@ struct gem {
 
 	struct pci_dev		*pdev;
 	struct net_device	*dev;
-#ifdef CONFIG_PPC_PMAC
+#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_SPARC)
 	struct device_node	*of_node;
 #endif
 };
-- 
cgit v1.2.3


From 6f85a8597d1d0d8ceeec5a82881c6ddf5cfb45e5 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Wed, 28 Feb 2007 16:40:57 -0800
Subject: [SUNHME]: Use pci_device_to_OF_node().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/sunhme.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index bee5e5b309b..bf0d4844f9f 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -55,9 +55,6 @@
 
 #ifdef CONFIG_PCI
 #include <linux/pci.h>
-#ifdef CONFIG_SPARC
-#include <asm/pbm.h>
-#endif
 #endif
 
 #include "sunhme.h"
@@ -2986,7 +2983,7 @@ static int __devinit happy_meal_pci_probe(struct pci_dev *pdev,
 {
 	struct quattro *qp = NULL;
 #ifdef CONFIG_SPARC
-	struct pcidev_cookie *pcp;
+	struct device_node *dp;
 #endif
 	struct happy_meal *hp;
 	struct net_device *dev;
@@ -2998,13 +2995,8 @@ static int __devinit happy_meal_pci_probe(struct pci_dev *pdev,
 
 	/* Now make sure pci_dev cookie is there. */
 #ifdef CONFIG_SPARC
-	pcp = pdev->sysdata;
-	if (pcp == NULL) {
-		printk(KERN_ERR "happymeal(PCI): Some PCI device info missing\n");
-		return -ENODEV;
-	}
-
-	strcpy(prom_name, pcp->prom_node->name);
+	dp = pci_device_to_OF_node(pdev);
+	strcpy(prom_name, dp->name);
 #else
 	if (is_quattro_p(pdev))
 		strcpy(prom_name, "SUNW,qfe");
@@ -3085,7 +3077,7 @@ static int __devinit happy_meal_pci_probe(struct pci_dev *pdev,
 		int len;
 
 		if (qfe_slot != -1 &&
-		    (addr = of_get_property(pcp->prom_node,
+		    (addr = of_get_property(dp,
 					    "local-mac-address", &len)) != NULL
 		    && len == 6) {
 			memcpy(dev->dev_addr, addr, 6);
@@ -3105,7 +3097,7 @@ static int __devinit happy_meal_pci_probe(struct pci_dev *pdev,
 	hp->tcvregs    = (hpreg_base + 0x7000UL);
 
 #ifdef CONFIG_SPARC
-	hp->hm_revision = of_getintprop_default(pcp->prom_node, "hm-rev", 0xff);
+	hp->hm_revision = of_getintprop_default(dp, "hm-rev", 0xff);
 	if (hp->hm_revision == 0xff) {
 		unsigned char prev;
 
-- 
cgit v1.2.3


From 49b6e95ff6d05722bcf7a52b00454566ce0c44eb Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 29 Mar 2007 01:38:42 -0700
Subject: [TG3]: Use pci_device_to_OF_node() on sparc.

And use CONFIG_SPARC instead of CONFIG_SPARC64 as the
test.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tg3.c | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 45b4e018b5e..6aef1e95d39 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -46,10 +46,9 @@
 #include <asm/byteorder.h>
 #include <asm/uaccess.h>
 
-#ifdef CONFIG_SPARC64
+#ifdef CONFIG_SPARC
 #include <asm/idprom.h>
-#include <asm/oplib.h>
-#include <asm/pbm.h>
+#include <asm/prom.h>
 #endif
 
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
@@ -10988,24 +10987,20 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 	return err;
 }
 
-#ifdef CONFIG_SPARC64
+#ifdef CONFIG_SPARC
 static int __devinit tg3_get_macaddr_sparc(struct tg3 *tp)
 {
 	struct net_device *dev = tp->dev;
 	struct pci_dev *pdev = tp->pdev;
-	struct pcidev_cookie *pcp = pdev->sysdata;
-
-	if (pcp != NULL) {
-		const unsigned char *addr;
-		int len;
-
-		addr = of_get_property(pcp->prom_node, "local-mac-address",
-					&len);
-		if (addr && len == 6) {
-			memcpy(dev->dev_addr, addr, 6);
-			memcpy(dev->perm_addr, dev->dev_addr, 6);
-			return 0;
-		}
+	struct device_node *dp = pci_device_to_OF_node(pdev);
+	unsigned char *addr;
+	int len;
+
+	addr = of_get_property(dp, "local-mac-address", &len);
+	if (addr && len == 6) {
+		memcpy(dev->dev_addr, addr, 6);
+		memcpy(dev->perm_addr, dev->dev_addr, 6);
+		return 0;
 	}
 	return -ENODEV;
 }
@@ -11026,7 +11021,7 @@ static int __devinit tg3_get_device_address(struct tg3 *tp)
 	u32 hi, lo, mac_offset;
 	int addr_ok = 0;
 
-#ifdef CONFIG_SPARC64
+#ifdef CONFIG_SPARC
 	if (!tg3_get_macaddr_sparc(tp))
 		return 0;
 #endif
-- 
cgit v1.2.3


From 49345103fef36617abc9a649dfc34f7e921c6878 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 29 Mar 2007 01:39:44 -0700
Subject: [TULIP]: Use CONFIG_SPARC consistently in ifdef tests.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tulip/de2104x.c         |  2 +-
 drivers/net/tulip/de4x5.c           | 10 +++-------
 drivers/net/tulip/tulip_core.c      | 12 ++++++------
 drivers/net/tulip/winbond-840.c     |  2 +-
 drivers/net/tulip/xircom_tulip_cb.c |  2 +-
 5 files changed, 12 insertions(+), 16 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c
index c82befa209a..a0eda563559 100644
--- a/drivers/net/tulip/de2104x.c
+++ b/drivers/net/tulip/de2104x.c
@@ -63,7 +63,7 @@ MODULE_PARM_DESC (debug, "de2104x bitmapped message enable number");
 
 /* Set the copy breakpoint for the copy-only-tiny-buffer Rx structure. */
 #if defined(__alpha__) || defined(__arm__) || defined(__hppa__) \
-        || defined(__sparc__) || defined(__ia64__) \
+        || defined(CONFIG_SPARC) || defined(__ia64__) \
         || defined(__sh__) || defined(__mips__)
 static int rx_copybreak = 1518;
 #else
diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c
index 4b3cd3d8b62..e4af94e2282 100644
--- a/drivers/net/tulip/de4x5.c
+++ b/drivers/net/tulip/de4x5.c
@@ -1160,7 +1160,7 @@ de4x5_hw_init(struct net_device *dev, u_long iobase, struct device *gendev)
 	sprintf(lp->adapter_name,"%s (%s)", name, gendev->bus_id);
 
 	lp->dma_size = (NUM_RX_DESC + NUM_TX_DESC) * sizeof(struct de4x5_desc);
-#if defined(__alpha__) || defined(__powerpc__) || defined(__sparc_v9__) || defined(DE4X5_DO_MEMCPY)
+#if defined(__alpha__) || defined(__powerpc__) || defined(CONFIG_SPARC) || defined(DE4X5_DO_MEMCPY)
 	lp->dma_size += RX_BUFF_SZ * NUM_RX_DESC + DE4X5_ALIGN;
 #endif
 	lp->rx_ring = dma_alloc_coherent(gendev, lp->dma_size,
@@ -1175,7 +1175,7 @@ de4x5_hw_init(struct net_device *dev, u_long iobase, struct device *gendev)
 	** Set up the RX descriptor ring (Intels)
 	** Allocate contiguous receive buffers, long word aligned (Alphas)
 	*/
-#if !defined(__alpha__) && !defined(__powerpc__) && !defined(__sparc_v9__) && !defined(DE4X5_DO_MEMCPY)
+#if !defined(__alpha__) && !defined(__powerpc__) && !defined(CONFIG_SPARC) && !defined(DE4X5_DO_MEMCPY)
 	for (i=0; i<NUM_RX_DESC; i++) {
 	    lp->rx_ring[i].status = 0;
 	    lp->rx_ring[i].des1 = cpu_to_le32(RX_BUFF_SZ);
@@ -1252,11 +1252,7 @@ de4x5_hw_init(struct net_device *dev, u_long iobase, struct device *gendev)
 	    mii_get_phy(dev);
 	}
 
-#ifndef __sparc_v9__
 	printk("      and requires IRQ%d (provided by %s).\n", dev->irq,
-#else
-	printk("      and requires IRQ%x (provided by %s).\n", dev->irq,
-#endif
 	       ((lp->bus == PCI) ? "PCI BIOS" : "EISA CNFG"));
     }
 
@@ -3627,7 +3623,7 @@ de4x5_alloc_rx_buff(struct net_device *dev, int index, int len)
     struct de4x5_private *lp = netdev_priv(dev);
     struct sk_buff *p;
 
-#if !defined(__alpha__) && !defined(__powerpc__) && !defined(__sparc_v9__) && !defined(DE4X5_DO_MEMCPY)
+#if !defined(__alpha__) && !defined(__powerpc__) && !defined(CONFIG_SPARC) && !defined(DE4X5_DO_MEMCPY)
     struct sk_buff *ret;
     u_long i=0, tmp;
 
diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c
index 03c14181b6b..81bcc564e15 100644
--- a/drivers/net/tulip/tulip_core.c
+++ b/drivers/net/tulip/tulip_core.c
@@ -36,7 +36,7 @@
 #include <asm/unaligned.h>
 #include <asm/uaccess.h>
 
-#ifdef __sparc__
+#ifdef CONFIG_SPARC
 #include <asm/pbm.h>
 #endif
 
@@ -67,7 +67,7 @@ const char * const medianame[32] = {
 
 /* Set the copy breakpoint for the copy-only-tiny-buffer Rx structure. */
 #if defined(__alpha__) || defined(__arm__) || defined(__hppa__) \
-	|| defined(__sparc__) || defined(__ia64__) \
+	|| defined(CONFIG_SPARC) || defined(__ia64__) \
 	|| defined(__sh__) || defined(__mips__)
 static int rx_copybreak = 1518;
 #else
@@ -91,7 +91,7 @@ static int rx_copybreak = 100;
 static int csr0 = 0x01A00000 | 0xE000;
 #elif defined(__i386__) || defined(__powerpc__) || defined(__x86_64__)
 static int csr0 = 0x01A00000 | 0x8000;
-#elif defined(__sparc__) || defined(__hppa__)
+#elif defined(CONFIG_SPARC) || defined(__hppa__)
 /* The UltraSparc PCI controllers will disconnect at every 64-byte
  * crossing anyways so it makes no sense to tell Tulip to burst
  * any more than that.
@@ -1315,7 +1315,7 @@ static int __devinit tulip_init_one (struct pci_dev *pdev,
 	/* DM9102A has troubles with MRM & clear reserved bits 24:22, 20, 16, 7:1 */
 	if (tulip_uli_dm_quirk(pdev)) {
 		csr0 &= ~0x01f100ff;
-#if defined(__sparc__)
+#if defined(CONFIG_SPARC)
                 csr0 = (csr0 & ~0xff00) | 0xe000;
 #endif
 	}
@@ -1535,14 +1535,14 @@ static int __devinit tulip_init_one (struct pci_dev *pdev,
 	   Many PCI BIOSes also incorrectly report the IRQ line, so we correct
 	   that here as well. */
 	if (sum == 0  || sum == 6*0xff) {
-#if defined(__sparc__)
+#if defined(CONFIG_SPARC)
 		struct pcidev_cookie *pcp = pdev->sysdata;
 #endif
 		eeprom_missing = 1;
 		for (i = 0; i < 5; i++)
 			dev->dev_addr[i] = last_phys_addr[i];
 		dev->dev_addr[i] = last_phys_addr[i] + 1;
-#if defined(__sparc__)
+#if defined(CONFIG_SPARC)
 		if (pcp) {
 			const unsigned char *addr;
 			int len;
diff --git a/drivers/net/tulip/winbond-840.c b/drivers/net/tulip/winbond-840.c
index 002a05e0722..e164a0d83fe 100644
--- a/drivers/net/tulip/winbond-840.c
+++ b/drivers/net/tulip/winbond-840.c
@@ -903,7 +903,7 @@ static void init_registers(struct net_device *dev)
 	}
 #elif defined(__powerpc__) || defined(__i386__) || defined(__alpha__) || defined(__ia64__) || defined(__x86_64__)
 	i |= 0xE000;
-#elif defined(__sparc__) || defined (CONFIG_PARISC)
+#elif defined(CONFIG_SPARC) || defined (CONFIG_PARISC)
 	i |= 0x4800;
 #else
 #warning Processor architecture undefined
diff --git a/drivers/net/tulip/xircom_tulip_cb.c b/drivers/net/tulip/xircom_tulip_cb.c
index a998c5d0ae9..15a1be6a2fa 100644
--- a/drivers/net/tulip/xircom_tulip_cb.c
+++ b/drivers/net/tulip/xircom_tulip_cb.c
@@ -65,7 +65,7 @@ static int rx_copybreak = 100;
 static int csr0 = 0x01A00000 | 0xE000;
 #elif defined(__powerpc__)
 static int csr0 = 0x01B00000 | 0x8000;
-#elif defined(__sparc__)
+#elif defined(CONFIG_SPARC)
 static int csr0 = 0x01B00080 | 0x8000;
 #elif defined(__i386__)
 static int csr0 = 0x01A00000 | 0x8000;
-- 
cgit v1.2.3


From d297c31fd101473983c17734a7e8a3752da1880f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 29 Mar 2007 01:41:28 -0700
Subject: [TULIP]: Use pci_device_to_OF_node() on sparc.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tulip/tulip_core.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c
index 81bcc564e15..e9bf526ec53 100644
--- a/drivers/net/tulip/tulip_core.c
+++ b/drivers/net/tulip/tulip_core.c
@@ -37,7 +37,7 @@
 #include <asm/uaccess.h>
 
 #ifdef CONFIG_SPARC
-#include <asm/pbm.h>
+#include <asm/prom.h>
 #endif
 
 static char version[] __devinitdata =
@@ -1536,22 +1536,18 @@ static int __devinit tulip_init_one (struct pci_dev *pdev,
 	   that here as well. */
 	if (sum == 0  || sum == 6*0xff) {
 #if defined(CONFIG_SPARC)
-		struct pcidev_cookie *pcp = pdev->sysdata;
+		struct device_node *dp = pci_device_to_OF_node(pdev);
+		const unsigned char *addr;
+		int len;
 #endif
 		eeprom_missing = 1;
 		for (i = 0; i < 5; i++)
 			dev->dev_addr[i] = last_phys_addr[i];
 		dev->dev_addr[i] = last_phys_addr[i] + 1;
 #if defined(CONFIG_SPARC)
-		if (pcp) {
-			const unsigned char *addr;
-			int len;
-
-			addr = of_get_property(pcp->prom_node,
-					       "local-mac-address", &len);
-			if (addr && len == 6)
-				memcpy(dev->dev_addr, addr, 6);
-		}
+		addr = of_get_property(dp, "local-mac-address", &len);
+		if (addr && len == 6)
+			memcpy(dev->dev_addr, addr, 6);
 #endif
 #if defined(__i386__) || defined(__x86_64__)	/* Patch up x86 BIOS bug. */
 		if (last_irq)
-- 
cgit v1.2.3


From fa449bd602c8871da48e6dbadfa0faaf4d33d32e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Wed, 25 Apr 2007 16:01:51 -0700
Subject: [OPENPROM]: Use pci_device_to_OF_node().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/sbus/char/openprom.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'drivers')

diff --git a/drivers/sbus/char/openprom.c b/drivers/sbus/char/openprom.c
index aec3b9f991f..fbfeb89a6f3 100644
--- a/drivers/sbus/char/openprom.c
+++ b/drivers/sbus/char/openprom.c
@@ -44,7 +44,6 @@
 #include <asm/openpromio.h>
 #ifdef CONFIG_PCI
 #include <linux/pci.h>
-#include <asm/pbm.h>
 #endif
 
 MODULE_AUTHOR("Thomas K. Dyas (tdyas@noc.rutgers.edu) and Eddie C. Dost  (ecd@skynet.be)");
@@ -248,18 +247,17 @@ static int oprompci2node(void __user *argp, struct device_node *dp, struct openp
 	if (bufsize >= 2*sizeof(int)) {
 #ifdef CONFIG_PCI
 		struct pci_dev *pdev;
-		struct pcidev_cookie *pcp;
+		struct device_node *dp;
+
 		pdev = pci_get_bus_and_slot (((int *) op->oprom_array)[0],
 				      ((int *) op->oprom_array)[1]);
 
-		pcp = pdev->sysdata;
-		if (pcp != NULL) {
-			dp = pcp->prom_node;
-			data->current_node = dp;
-			*((int *)op->oprom_array) = dp->node;
-			op->oprom_size = sizeof(int);
-			err = copyout(argp, op, bufsize + sizeof(int));
-		}
+		dp = pci_device_to_OF_node(pdev);
+		data->current_node = dp;
+		*((int *)op->oprom_array) = dp->node;
+		op->oprom_size = sizeof(int);
+		err = copyout(argp, op, bufsize + sizeof(int));
+
 		pci_dev_put(pdev);
 #endif
 	}
-- 
cgit v1.2.3


From a02079cdb74dde27391d019abca4a37988504b4e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Wed, 28 Feb 2007 17:02:45 -0800
Subject: [ATYFB]: Use pci_device_to_OF_node() in sparc.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/video/aty/atyfb_base.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index d7627fc4f11..8514f2a6f06 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -2899,7 +2899,7 @@ static int __devinit atyfb_setup_sparc(struct pci_dev *pdev,
 			struct fb_info *info, unsigned long addr)
 {
 	struct atyfb_par *par = info->par;
-	struct pcidev_cookie *pcp;
+	struct device_node *dp;
 	char prop[128];
 	int node, len, i, j, ret;
 	u32 mem, chip_id;
@@ -3037,8 +3037,8 @@ static int __devinit atyfb_setup_sparc(struct pci_dev *pdev,
 			node = 0;
 	}
 
-	pcp = pdev->sysdata;
-	if (node == pcp->prom_node->node) {
+	dp = pci_device_to_OF_node(pdev);
+	if (node == dp->node) {
 		struct fb_var_screeninfo *var = &default_var;
 		unsigned int N, P, Q, M, T, R;
 		u32 v_total, h_total;
-- 
cgit v1.2.3


From 9b1caafe09ccec8e0103e9375b711e3a0c838260 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Wed, 28 Feb 2007 17:05:06 -0800
Subject: [IGAFB]: Use pci_device_to_OF_node() on sparc.

Also __sparc__ --> CONFIG_SPARC

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/video/igafb.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'drivers')

diff --git a/drivers/video/igafb.c b/drivers/video/igafb.c
index 90592fb5915..eb1a4812ad1 100644
--- a/drivers/video/igafb.c
+++ b/drivers/video/igafb.c
@@ -44,8 +44,8 @@
 
 #include <asm/io.h>
 
-#ifdef __sparc__
-#include <asm/pbm.h>
+#ifdef CONFIG_SPARC
+#include <asm/prom.h>
 #include <asm/pcic.h>
 #endif
 
@@ -96,7 +96,7 @@ struct fb_var_screeninfo default_var = {
 	.vmode		= FB_VMODE_NONINTERLACED
 };
 
-#ifdef __sparc__
+#ifdef CONFIG_SPARC
 struct fb_var_screeninfo default_var_1024x768 __initdata = {
 	/* 1024x768, 75 Hz, Non-Interlaced (78.75 MHz dotclock) */
 	.xres		= 1024,
@@ -188,7 +188,7 @@ static inline void iga_outb(struct iga_par *par, unsigned char val,
         pci_outb(par, val, reg+1);
 }
 
-#endif /* __sparc__ */
+#endif /* CONFIG_SPARC */
 
 /*
  *  Very important functionality for the JavaEngine1 computer:
@@ -217,7 +217,7 @@ static void iga_blank_border(struct iga_par *par)
 		iga_outb(par, 0, IGA_EXT_CNTRL, IGA_IDX_OVERSCAN_COLOR + i);
 }
 
-#ifdef __sparc__
+#ifdef CONFIG_SPARC
 static int igafb_mmap(struct fb_info *info,
 		      struct vm_area_struct *vma)
 {
@@ -271,7 +271,7 @@ static int igafb_mmap(struct fb_info *info,
 	vma->vm_flags |= VM_IO;
 	return 0;
 }
-#endif /* __sparc__ */
+#endif /* CONFIG_SPARC */
 
 static int igafb_setcolreg(unsigned regno, unsigned red, unsigned green,
                            unsigned blue, unsigned transp,
@@ -323,7 +323,7 @@ static struct fb_ops igafb_ops = {
 	.fb_fillrect	= cfb_fillrect,
 	.fb_copyarea	= cfb_copyarea,
 	.fb_imageblit	= cfb_imageblit,
-#ifdef __sparc__
+#ifdef CONFIG_SPARC
 	.fb_mmap 	= igafb_mmap,
 #endif
 };
@@ -424,7 +424,7 @@ int __init igafb_init(void)
 
 	par->frame_buffer_phys = addr & PCI_BASE_ADDRESS_MEM_MASK;
 
-#ifdef __sparc__
+#ifdef CONFIG_SPARC
 	/*
 	 * The following is sparc specific and this is why:
 	 *
@@ -477,8 +477,8 @@ int __init igafb_init(void)
 	 * Set default vmode and cmode from PROM properties.
 	 */
 	{
-                struct pcidev_cookie *cookie = pdev->sysdata;
-                int node = cookie->prom_node;
+		struct device_node *dp = pci_device_to_OF_node(pdev);
+                int node = dp->node;
                 int width = prom_getintdefault(node, "width", 1024);
                 int height = prom_getintdefault(node, "height", 768);
                 int depth = prom_getintdefault(node, "depth", 8);
@@ -534,7 +534,7 @@ int __init igafb_init(void)
 		kfree(info);
         }
 
-#ifdef __sparc__
+#ifdef CONFIG_SPARC
 	    /*
 	     * Add /dev/fb mmap values.
 	     */
@@ -552,7 +552,7 @@ int __init igafb_init(void)
 	    par->mmap_map[1].size = PAGE_SIZE * 2; /* X wants 2 pages */
 	    par->mmap_map[1].prot_mask = SRMMU_CACHE;
 	    par->mmap_map[1].prot_flag = SRMMU_WRITE;
-#endif /* __sparc__ */
+#endif /* CONFIG_SPARC */
 
 	return 0;
 }
-- 
cgit v1.2.3


From 374d4cac6283469f101282ca83ee008368bd8350 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 29 Mar 2007 01:57:57 -0700
Subject: [TIGON3]: of_get_property() returns const.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tg3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 6aef1e95d39..9d07d2d5dfa 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -10993,7 +10993,7 @@ static int __devinit tg3_get_macaddr_sparc(struct tg3 *tp)
 	struct net_device *dev = tp->dev;
 	struct pci_dev *pdev = tp->pdev;
 	struct device_node *dp = pci_device_to_OF_node(pdev);
-	unsigned char *addr;
+	const unsigned char *addr;
 	int len;
 
 	addr = of_get_property(dp, "local-mac-address", &len);
-- 
cgit v1.2.3


From cd9ad58d4061494e7fdd70ded7bcf2418daf356a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 26 Apr 2007 21:19:23 -0700
Subject: [SCSI] SUNESP: Complete driver rewrite to version 2.0

Major features:

1) Tagged queuing support.
2) Will properly negotiate for synchronous transfers even on
   devices that reject the wide negotiation message, such as
   CDROMs
3) Significantly lower kernel stack usage in interrupt
   handler path by elimination of function vector arrays,
   replaced by a top-level switch statement state machine.
4) Uses generic scsi infrastructure as much as possible to
   avoid code duplication.
5) Automatic request of sense data in response to CHECK_CONDITION
6) Portable to other platforms using ESP such as DEC and Sun3
   systems.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/scsi/Kconfig    |    6 +
 drivers/scsi/Makefile   |    3 +-
 drivers/scsi/esp.c      | 4394 -----------------------------------------------
 drivers/scsi/esp.h      |  406 -----
 drivers/scsi/esp_scsi.c | 2710 +++++++++++++++++++++++++++++
 drivers/scsi/esp_scsi.h |  560 ++++++
 drivers/scsi/sun_esp.c  |  634 +++++++
 7 files changed, 3912 insertions(+), 4801 deletions(-)
 delete mode 100644 drivers/scsi/esp.c
 delete mode 100644 drivers/scsi/esp.h
 create mode 100644 drivers/scsi/esp_scsi.c
 create mode 100644 drivers/scsi/esp_scsi.h
 create mode 100644 drivers/scsi/sun_esp.c

(limited to 'drivers')

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 4cd280e8696..fcc4cb6c7f4 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1763,9 +1763,15 @@ config SUN3X_ESP
 	  The ESP was an on-board SCSI controller used on Sun 3/80
 	  machines.  Say Y here to compile in support for it.
 
+config SCSI_ESP_CORE
+	tristate "ESP Scsi Driver Core"
+	depends on SCSI
+	select SCSI_SPI_ATTRS
+
 config SCSI_SUNESP
 	tristate "Sparc ESP Scsi Driver"
 	depends on SBUS && SCSI
+	select SCSI_ESP_CORE
 	help
 	  This is the driver for the Sun ESP SCSI host adapter. The ESP
 	  chipset is present in most SPARC SBUS-based computers.
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 79ecf4ebe6e..70cff4c599d 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -106,7 +106,8 @@ obj-$(CONFIG_MEGARAID_LEGACY)	+= megaraid.o
 obj-$(CONFIG_MEGARAID_NEWGEN)	+= megaraid/
 obj-$(CONFIG_MEGARAID_SAS)	+= megaraid/
 obj-$(CONFIG_SCSI_ACARD)	+= atp870u.o
-obj-$(CONFIG_SCSI_SUNESP)	+= esp.o
+obj-$(CONFIG_SCSI_ESP_CORE)	+= esp_scsi.o
+obj-$(CONFIG_SCSI_SUNESP)	+= sun_esp.o
 obj-$(CONFIG_SCSI_GDTH)		+= gdth.o
 obj-$(CONFIG_SCSI_INITIO)	+= initio.o
 obj-$(CONFIG_SCSI_INIA100)	+= a100u2w.o
diff --git a/drivers/scsi/esp.c b/drivers/scsi/esp.c
deleted file mode 100644
index 2c2fe80bc42..00000000000
--- a/drivers/scsi/esp.c
+++ /dev/null
@@ -1,4394 +0,0 @@
-/* esp.c: ESP Sun SCSI driver.
- *
- * Copyright (C) 1995, 1998, 2006 David S. Miller (davem@davemloft.net)
- */
-
-/* TODO:
- *
- * 1) Maybe disable parity checking in config register one for SCSI1
- *    targets.  (Gilmore says parity error on the SBus can lock up
- *    old sun4c's)
- * 2) Add support for DMA2 pipelining.
- * 3) Add tagged queueing.
- */
-
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/blkdev.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-
-#include "esp.h"
-
-#include <asm/sbus.h>
-#include <asm/dma.h>
-#include <asm/system.h>
-#include <asm/ptrace.h>
-#include <asm/pgtable.h>
-#include <asm/oplib.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#ifndef __sparc_v9__
-#include <asm/machines.h>
-#include <asm/idprom.h>
-#endif
-
-#include <scsi/scsi.h>
-#include <scsi/scsi_cmnd.h>
-#include <scsi/scsi_device.h>
-#include <scsi/scsi_eh.h>
-#include <scsi/scsi_host.h>
-#include <scsi/scsi_tcq.h>
-
-#define DRV_VERSION "1.101"
-
-#define DEBUG_ESP
-/* #define DEBUG_ESP_HME */
-/* #define DEBUG_ESP_DATA */
-/* #define DEBUG_ESP_QUEUE */
-/* #define DEBUG_ESP_DISCONNECT */
-/* #define DEBUG_ESP_STATUS */
-/* #define DEBUG_ESP_PHASES */
-/* #define DEBUG_ESP_WORKBUS */
-/* #define DEBUG_STATE_MACHINE */
-/* #define DEBUG_ESP_CMDS */
-/* #define DEBUG_ESP_IRQS */
-/* #define DEBUG_SDTR */
-/* #define DEBUG_ESP_SG */
-
-/* Use the following to sprinkle debugging messages in a way which
- * suits you if combinations of the above become too verbose when
- * trying to track down a specific problem.
- */
-/* #define DEBUG_ESP_MISC */
-
-#if defined(DEBUG_ESP)
-#define ESPLOG(foo)  printk foo
-#else
-#define ESPLOG(foo)
-#endif /* (DEBUG_ESP) */
-
-#if defined(DEBUG_ESP_HME)
-#define ESPHME(foo)  printk foo
-#else
-#define ESPHME(foo)
-#endif
-
-#if defined(DEBUG_ESP_DATA)
-#define ESPDATA(foo)  printk foo
-#else
-#define ESPDATA(foo)
-#endif
-
-#if defined(DEBUG_ESP_QUEUE)
-#define ESPQUEUE(foo)  printk foo
-#else
-#define ESPQUEUE(foo)
-#endif
-
-#if defined(DEBUG_ESP_DISCONNECT)
-#define ESPDISC(foo)  printk foo
-#else
-#define ESPDISC(foo)
-#endif
-
-#if defined(DEBUG_ESP_STATUS)
-#define ESPSTAT(foo)  printk foo
-#else
-#define ESPSTAT(foo)
-#endif
-
-#if defined(DEBUG_ESP_PHASES)
-#define ESPPHASE(foo)  printk foo
-#else
-#define ESPPHASE(foo)
-#endif
-
-#if defined(DEBUG_ESP_WORKBUS)
-#define ESPBUS(foo)  printk foo
-#else
-#define ESPBUS(foo)
-#endif
-
-#if defined(DEBUG_ESP_IRQS)
-#define ESPIRQ(foo)  printk foo
-#else
-#define ESPIRQ(foo)
-#endif
-
-#if defined(DEBUG_SDTR)
-#define ESPSDTR(foo)  printk foo
-#else
-#define ESPSDTR(foo)
-#endif
-
-#if defined(DEBUG_ESP_MISC)
-#define ESPMISC(foo)  printk foo
-#else
-#define ESPMISC(foo)
-#endif
-
-/* Command phase enumeration. */
-enum {
-	not_issued    = 0x00,  /* Still in the issue_SC queue.          */
-
-	/* Various forms of selecting a target. */
-#define in_slct_mask    0x10
-	in_slct_norm  = 0x10,  /* ESP is arbitrating, normal selection  */
-	in_slct_stop  = 0x11,  /* ESP will select, then stop with IRQ   */
-	in_slct_msg   = 0x12,  /* select, then send a message           */
-	in_slct_tag   = 0x13,  /* select and send tagged queue msg      */
-	in_slct_sneg  = 0x14,  /* select and acquire sync capabilities  */
-
-	/* Any post selection activity. */
-#define in_phases_mask  0x20
-	in_datain     = 0x20,  /* Data is transferring from the bus     */
-	in_dataout    = 0x21,  /* Data is transferring to the bus       */
-	in_data_done  = 0x22,  /* Last DMA data operation done (maybe)  */
-	in_msgin      = 0x23,  /* Eating message from target            */
-	in_msgincont  = 0x24,  /* Eating more msg bytes from target     */
-	in_msgindone  = 0x25,  /* Decide what to do with what we got    */
-	in_msgout     = 0x26,  /* Sending message to target             */
-	in_msgoutdone = 0x27,  /* Done sending msg out                  */
-	in_cmdbegin   = 0x28,  /* Sending cmd after abnormal selection  */
-	in_cmdend     = 0x29,  /* Done sending slow cmd                 */
-	in_status     = 0x2a,  /* Was in status phase, finishing cmd    */
-	in_freeing    = 0x2b,  /* freeing the bus for cmd cmplt or disc */
-	in_the_dark   = 0x2c,  /* Don't know what bus phase we are in   */
-
-	/* Special states, ie. not normal bus transitions... */
-#define in_spec_mask    0x80
-	in_abortone   = 0x80,  /* Aborting one command currently        */
-	in_abortall   = 0x81,  /* Blowing away all commands we have     */
-	in_resetdev   = 0x82,  /* SCSI target reset in progress         */
-	in_resetbus   = 0x83,  /* SCSI bus reset in progress            */
-	in_tgterror   = 0x84,  /* Target did something stupid           */
-};
-
-enum {
-	/* Zero has special meaning, see skipahead[12]. */
-/*0*/	do_never,
-
-/*1*/	do_phase_determine,
-/*2*/	do_reset_bus,
-/*3*/	do_reset_complete,
-/*4*/	do_work_bus,
-/*5*/	do_intr_end
-};
-
-/* Forward declarations. */
-static irqreturn_t esp_intr(int irq, void *dev_id);
-
-/* Debugging routines */
-struct esp_cmdstrings {
-	u8 cmdchar;
-	char *text;
-} esp_cmd_strings[] = {
-	/* Miscellaneous */
-	{ ESP_CMD_NULL, "ESP_NOP", },
-	{ ESP_CMD_FLUSH, "FIFO_FLUSH", },
-	{ ESP_CMD_RC, "RSTESP", },
-	{ ESP_CMD_RS, "RSTSCSI", },
-	/* Disconnected State Group */
-	{ ESP_CMD_RSEL, "RESLCTSEQ", },
-	{ ESP_CMD_SEL, "SLCTNATN", },
-	{ ESP_CMD_SELA, "SLCTATN", },
-	{ ESP_CMD_SELAS, "SLCTATNSTOP", },
-	{ ESP_CMD_ESEL, "ENSLCTRESEL", },
-	{ ESP_CMD_DSEL, "DISSELRESEL", },
-	{ ESP_CMD_SA3, "SLCTATN3", },
-	{ ESP_CMD_RSEL3, "RESLCTSEQ", },
-	/* Target State Group */
-	{ ESP_CMD_SMSG, "SNDMSG", },
-	{ ESP_CMD_SSTAT, "SNDSTATUS", },
-	{ ESP_CMD_SDATA, "SNDDATA", },
-	{ ESP_CMD_DSEQ, "DISCSEQ", },
-	{ ESP_CMD_TSEQ, "TERMSEQ", },
-	{ ESP_CMD_TCCSEQ, "TRGTCMDCOMPSEQ", },
-	{ ESP_CMD_DCNCT, "DISC", },
-	{ ESP_CMD_RMSG, "RCVMSG", },
-	{ ESP_CMD_RCMD, "RCVCMD", },
-	{ ESP_CMD_RDATA, "RCVDATA", },
-	{ ESP_CMD_RCSEQ, "RCVCMDSEQ", },
-	/* Initiator State Group */
-	{ ESP_CMD_TI, "TRANSINFO", },
-	{ ESP_CMD_ICCSEQ, "INICMDSEQCOMP", },
-	{ ESP_CMD_MOK, "MSGACCEPTED", },
-	{ ESP_CMD_TPAD, "TPAD", },
-	{ ESP_CMD_SATN, "SATN", },
-	{ ESP_CMD_RATN, "RATN", },
-};
-#define NUM_ESP_COMMANDS  ((sizeof(esp_cmd_strings)) / (sizeof(struct esp_cmdstrings)))
-
-/* Print textual representation of an ESP command */
-static inline void esp_print_cmd(u8 espcmd)
-{
-	u8 dma_bit = espcmd & ESP_CMD_DMA;
-	int i;
-
-	espcmd &= ~dma_bit;
-	for (i = 0; i < NUM_ESP_COMMANDS; i++)
-		if (esp_cmd_strings[i].cmdchar == espcmd)
-			break;
-	if (i == NUM_ESP_COMMANDS)
-		printk("ESP_Unknown");
-	else
-		printk("%s%s", esp_cmd_strings[i].text,
-		       ((dma_bit) ? "+DMA" : ""));
-}
-
-/* Print the status register's value */
-static inline void esp_print_statreg(u8 statreg)
-{
-	u8 phase;
-
-	printk("STATUS<");
-	phase = statreg & ESP_STAT_PMASK;
-	printk("%s,", (phase == ESP_DOP ? "DATA-OUT" :
-		       (phase == ESP_DIP ? "DATA-IN" :
-			(phase == ESP_CMDP ? "COMMAND" :
-			 (phase == ESP_STATP ? "STATUS" :
-			  (phase == ESP_MOP ? "MSG-OUT" :
-			   (phase == ESP_MIP ? "MSG_IN" :
-			    "unknown")))))));
-	if (statreg & ESP_STAT_TDONE)
-		printk("TRANS_DONE,");
-	if (statreg & ESP_STAT_TCNT)
-		printk("TCOUNT_ZERO,");
-	if (statreg & ESP_STAT_PERR)
-		printk("P_ERROR,");
-	if (statreg & ESP_STAT_SPAM)
-		printk("SPAM,");
-	if (statreg & ESP_STAT_INTR)
-		printk("IRQ,");
-	printk(">");
-}
-
-/* Print the interrupt register's value */
-static inline void esp_print_ireg(u8 intreg)
-{
-	printk("INTREG< ");
-	if (intreg & ESP_INTR_S)
-		printk("SLCT_NATN ");
-	if (intreg & ESP_INTR_SATN)
-		printk("SLCT_ATN ");
-	if (intreg & ESP_INTR_RSEL)
-		printk("RSLCT ");
-	if (intreg & ESP_INTR_FDONE)
-		printk("FDONE ");
-	if (intreg & ESP_INTR_BSERV)
-		printk("BSERV ");
-	if (intreg & ESP_INTR_DC)
-		printk("DISCNCT ");
-	if (intreg & ESP_INTR_IC)
-		printk("ILL_CMD ");
-	if (intreg & ESP_INTR_SR)
-		printk("SCSI_BUS_RESET ");
-	printk(">");
-}
-
-/* Print the sequence step registers contents */
-static inline void esp_print_seqreg(u8 stepreg)
-{
-	stepreg &= ESP_STEP_VBITS;
-	printk("STEP<%s>",
-	       (stepreg == ESP_STEP_ASEL ? "SLCT_ARB_CMPLT" :
-		(stepreg == ESP_STEP_SID ? "1BYTE_MSG_SENT" :
-		 (stepreg == ESP_STEP_NCMD ? "NOT_IN_CMD_PHASE" :
-		  (stepreg == ESP_STEP_PPC ? "CMD_BYTES_LOST" :
-		   (stepreg == ESP_STEP_FINI4 ? "CMD_SENT_OK" :
-		    "UNKNOWN"))))));
-}
-
-static char *phase_string(int phase)
-{
-	switch (phase) {
-	case not_issued:
-		return "UNISSUED";
-	case in_slct_norm:
-		return "SLCTNORM";
-	case in_slct_stop:
-		return "SLCTSTOP";
-	case in_slct_msg:
-		return "SLCTMSG";
-	case in_slct_tag:
-		return "SLCTTAG";
-	case in_slct_sneg:
-		return "SLCTSNEG";
-	case in_datain:
-		return "DATAIN";
-	case in_dataout:
-		return "DATAOUT";
-	case in_data_done:
-		return "DATADONE";
-	case in_msgin:
-		return "MSGIN";
-	case in_msgincont:
-		return "MSGINCONT";
-	case in_msgindone:
-		return "MSGINDONE";
-	case in_msgout:
-		return "MSGOUT";
-	case in_msgoutdone:
-		return "MSGOUTDONE";
-	case in_cmdbegin:
-		return "CMDBEGIN";
-	case in_cmdend:
-		return "CMDEND";
-	case in_status:
-		return "STATUS";
-	case in_freeing:
-		return "FREEING";
-	case in_the_dark:
-		return "CLUELESS";
-	case in_abortone:
-		return "ABORTONE";
-	case in_abortall:
-		return "ABORTALL";
-	case in_resetdev:
-		return "RESETDEV";
-	case in_resetbus:
-		return "RESETBUS";
-	case in_tgterror:
-		return "TGTERROR";
-	default:
-		return "UNKNOWN";
-	};
-}
-
-#ifdef DEBUG_STATE_MACHINE
-static inline void esp_advance_phase(struct scsi_cmnd *s, int newphase)
-{
-	ESPLOG(("<%s>", phase_string(newphase)));
-	s->SCp.sent_command = s->SCp.phase;
-	s->SCp.phase = newphase;
-}
-#else
-#define esp_advance_phase(__s, __newphase) \
-	(__s)->SCp.sent_command = (__s)->SCp.phase; \
-	(__s)->SCp.phase = (__newphase);
-#endif
-
-#ifdef DEBUG_ESP_CMDS
-static inline void esp_cmd(struct esp *esp, u8 cmd)
-{
-	esp->espcmdlog[esp->espcmdent] = cmd;
-	esp->espcmdent = (esp->espcmdent + 1) & 31;
-	sbus_writeb(cmd, esp->eregs + ESP_CMD);
-}
-#else
-#define esp_cmd(__esp, __cmd)	\
-	sbus_writeb((__cmd), ((__esp)->eregs) + ESP_CMD)
-#endif
-
-#define ESP_INTSOFF(__dregs)	\
-	sbus_writel(sbus_readl((__dregs)+DMA_CSR)&~(DMA_INT_ENAB), (__dregs)+DMA_CSR)
-#define ESP_INTSON(__dregs)	\
-	sbus_writel(sbus_readl((__dregs)+DMA_CSR)|DMA_INT_ENAB, (__dregs)+DMA_CSR)
-#define ESP_IRQ_P(__dregs)	\
-	(sbus_readl((__dregs)+DMA_CSR) & (DMA_HNDL_INTR|DMA_HNDL_ERROR))
-
-/* How we use the various Linux SCSI data structures for operation.
- *
- * struct scsi_cmnd:
- *
- *   We keep track of the synchronous capabilities of a target
- *   in the device member, using sync_min_period and
- *   sync_max_offset.  These are the values we directly write
- *   into the ESP registers while running a command.  If offset
- *   is zero the ESP will use asynchronous transfers.
- *   If the borken flag is set we assume we shouldn't even bother
- *   trying to negotiate for synchronous transfer as this target
- *   is really stupid.  If we notice the target is dropping the
- *   bus, and we have been allowing it to disconnect, we clear
- *   the disconnect flag.
- */
-
-
-/* Manipulation of the ESP command queues.  Thanks to the aha152x driver
- * and its author, Juergen E. Fischer, for the methods used here.
- * Note that these are per-ESP queues, not global queues like
- * the aha152x driver uses.
- */
-static inline void append_SC(struct scsi_cmnd **SC, struct scsi_cmnd *new_SC)
-{
-	struct scsi_cmnd *end;
-
-	new_SC->host_scribble = (unsigned char *) NULL;
-	if (!*SC)
-		*SC = new_SC;
-	else {
-		for (end=*SC;end->host_scribble;end=(struct scsi_cmnd *)end->host_scribble)
-			;
-		end->host_scribble = (unsigned char *) new_SC;
-	}
-}
-
-static inline void prepend_SC(struct scsi_cmnd **SC, struct scsi_cmnd *new_SC)
-{
-	new_SC->host_scribble = (unsigned char *) *SC;
-	*SC = new_SC;
-}
-
-static inline struct scsi_cmnd *remove_first_SC(struct scsi_cmnd **SC)
-{
-	struct scsi_cmnd *ptr;
-	ptr = *SC;
-	if (ptr)
-		*SC = (struct scsi_cmnd *) (*SC)->host_scribble;
-	return ptr;
-}
-
-static inline struct scsi_cmnd *remove_SC(struct scsi_cmnd **SC, int target, int lun)
-{
-	struct scsi_cmnd *ptr, *prev;
-
-	for (ptr = *SC, prev = NULL;
-	     ptr && ((ptr->device->id != target) || (ptr->device->lun != lun));
-	     prev = ptr, ptr = (struct scsi_cmnd *) ptr->host_scribble)
-		;
-	if (ptr) {
-		if (prev)
-			prev->host_scribble=ptr->host_scribble;
-		else
-			*SC=(struct scsi_cmnd *)ptr->host_scribble;
-	}
-	return ptr;
-}
-
-/* Resetting various pieces of the ESP scsi driver chipset/buses. */
-static void esp_reset_dma(struct esp *esp)
-{
-	int can_do_burst16, can_do_burst32, can_do_burst64;
-	int can_do_sbus64;
-	u32 tmp;
-
-	can_do_burst16 = (esp->bursts & DMA_BURST16) != 0;
-	can_do_burst32 = (esp->bursts & DMA_BURST32) != 0;
-	can_do_burst64 = 0;
-	can_do_sbus64 = 0;
-	if (sbus_can_dma_64bit(esp->sdev))
-		can_do_sbus64 = 1;
-	if (sbus_can_burst64(esp->sdev))
-		can_do_burst64 = (esp->bursts & DMA_BURST64) != 0;
-
-	/* Punt the DVMA into a known state. */
-	if (esp->dma->revision != dvmahme) {
-		tmp = sbus_readl(esp->dregs + DMA_CSR);
-		sbus_writel(tmp | DMA_RST_SCSI, esp->dregs + DMA_CSR);
-		sbus_writel(tmp & ~DMA_RST_SCSI, esp->dregs + DMA_CSR);
-	}
-	switch (esp->dma->revision) {
-	case dvmahme:
-		/* This is the HME DVMA gate array. */
-
-		sbus_writel(DMA_RESET_FAS366, esp->dregs + DMA_CSR);
-		sbus_writel(DMA_RST_SCSI, esp->dregs + DMA_CSR);
-
-		esp->prev_hme_dmacsr = (DMA_PARITY_OFF|DMA_2CLKS|DMA_SCSI_DISAB|DMA_INT_ENAB);
-		esp->prev_hme_dmacsr &= ~(DMA_ENABLE|DMA_ST_WRITE|DMA_BRST_SZ);
-
-		if (can_do_burst64)
-			esp->prev_hme_dmacsr |= DMA_BRST64;
-		else if (can_do_burst32)
-			esp->prev_hme_dmacsr |= DMA_BRST32;
-
-		if (can_do_sbus64) {
-			esp->prev_hme_dmacsr |= DMA_SCSI_SBUS64;
-			sbus_set_sbus64(esp->sdev, esp->bursts);
-		}
-
-		/* This chip is horrible. */
-		while (sbus_readl(esp->dregs + DMA_CSR) & DMA_PEND_READ)
-			udelay(1);
-
-		sbus_writel(0, esp->dregs + DMA_CSR);
-		sbus_writel(esp->prev_hme_dmacsr, esp->dregs + DMA_CSR);
-
-		/* This is necessary to avoid having the SCSI channel
-		 * engine lock up on us.
-		 */
-		sbus_writel(0, esp->dregs + DMA_ADDR);
-
-		break;
-	case dvmarev2:
-		/* This is the gate array found in the sun4m
-		 * NCR SBUS I/O subsystem.
-		 */
-		if (esp->erev != esp100) {
-			tmp = sbus_readl(esp->dregs + DMA_CSR);
-			sbus_writel(tmp | DMA_3CLKS, esp->dregs + DMA_CSR);
-		}
-		break;
-	case dvmarev3:
-		tmp = sbus_readl(esp->dregs + DMA_CSR);
-		tmp &= ~DMA_3CLKS;
-		tmp |= DMA_2CLKS;
-		if (can_do_burst32) {
-			tmp &= ~DMA_BRST_SZ;
-			tmp |= DMA_BRST32;
-		}
-		sbus_writel(tmp, esp->dregs + DMA_CSR);
-		break;
-	case dvmaesc1:
-		/* This is the DMA unit found on SCSI/Ether cards. */
-		tmp = sbus_readl(esp->dregs + DMA_CSR);
-		tmp |= DMA_ADD_ENABLE;
-		tmp &= ~DMA_BCNT_ENAB;
-		if (!can_do_burst32 && can_do_burst16) {
-			tmp |= DMA_ESC_BURST;
-		} else {
-			tmp &= ~(DMA_ESC_BURST);
-		}
-		sbus_writel(tmp, esp->dregs + DMA_CSR);
-		break;
-	default:
-		break;
-	};
-	ESP_INTSON(esp->dregs);
-}
-
-/* Reset the ESP chip, _not_ the SCSI bus. */
-static void __init esp_reset_esp(struct esp *esp)
-{
-	u8 family_code, version;
-	int i;
-
-	/* Now reset the ESP chip */
-	esp_cmd(esp, ESP_CMD_RC);
-	esp_cmd(esp, ESP_CMD_NULL | ESP_CMD_DMA);
-	esp_cmd(esp, ESP_CMD_NULL | ESP_CMD_DMA);
-
-	/* Reload the configuration registers */
-	sbus_writeb(esp->cfact, esp->eregs + ESP_CFACT);
-	esp->prev_stp = 0;
-	sbus_writeb(esp->prev_stp, esp->eregs + ESP_STP);
-	esp->prev_soff = 0;
-	sbus_writeb(esp->prev_soff, esp->eregs + ESP_SOFF);
-	sbus_writeb(esp->neg_defp, esp->eregs + ESP_TIMEO);
-
-	/* This is the only point at which it is reliable to read
-	 * the ID-code for a fast ESP chip variants.
-	 */
-	esp->max_period = ((35 * esp->ccycle) / 1000);
-	if (esp->erev == fast) {
-		version = sbus_readb(esp->eregs + ESP_UID);
-		family_code = (version & 0xf8) >> 3;
-		if (family_code == 0x02)
-			esp->erev = fas236;
-		else if (family_code == 0x0a)
-			esp->erev = fashme; /* Version is usually '5'. */
-		else
-			esp->erev = fas100a;
-		ESPMISC(("esp%d: FAST chip is %s (family=%d, version=%d)\n",
-			 esp->esp_id,
-			 (esp->erev == fas236) ? "fas236" :
-			 ((esp->erev == fas100a) ? "fas100a" :
-			  "fasHME"), family_code, (version & 7)));
-
-		esp->min_period = ((4 * esp->ccycle) / 1000);
-	} else {
-		esp->min_period = ((5 * esp->ccycle) / 1000);
-	}
-	esp->max_period = (esp->max_period + 3)>>2;
-	esp->min_period = (esp->min_period + 3)>>2;
-
-	sbus_writeb(esp->config1, esp->eregs + ESP_CFG1);
-	switch (esp->erev) {
-	case esp100:
-		/* nothing to do */
-		break;
-	case esp100a:
-		sbus_writeb(esp->config2, esp->eregs + ESP_CFG2);
-		break;
-	case esp236:
-		/* Slow 236 */
-		sbus_writeb(esp->config2, esp->eregs + ESP_CFG2);
-		esp->prev_cfg3 = esp->config3[0];
-		sbus_writeb(esp->prev_cfg3, esp->eregs + ESP_CFG3);
-		break;
-	case fashme:
-		esp->config2 |= (ESP_CONFIG2_HME32 | ESP_CONFIG2_HMEFENAB);
-		/* fallthrough... */
-	case fas236:
-		/* Fast 236 or HME */
-		sbus_writeb(esp->config2, esp->eregs + ESP_CFG2);
-		for (i = 0; i < 16; i++) {
-			if (esp->erev == fashme) {
-				u8 cfg3;
-
-				cfg3 = ESP_CONFIG3_FCLOCK | ESP_CONFIG3_OBPUSH;
-				if (esp->scsi_id >= 8)
-					cfg3 |= ESP_CONFIG3_IDBIT3;
-				esp->config3[i] |= cfg3;
-			} else {
-				esp->config3[i] |= ESP_CONFIG3_FCLK;
-			}
-		}
-		esp->prev_cfg3 = esp->config3[0];
-		sbus_writeb(esp->prev_cfg3, esp->eregs + ESP_CFG3);
-		if (esp->erev == fashme) {
-			esp->radelay = 80;
-		} else {
-			if (esp->diff)
-				esp->radelay = 0;
-			else
-				esp->radelay = 96;
-		}
-		break;
-	case fas100a:
-		/* Fast 100a */
-		sbus_writeb(esp->config2, esp->eregs + ESP_CFG2);
-		for (i = 0; i < 16; i++)
-			esp->config3[i] |= ESP_CONFIG3_FCLOCK;
-		esp->prev_cfg3 = esp->config3[0];
-		sbus_writeb(esp->prev_cfg3, esp->eregs + ESP_CFG3);
-		esp->radelay = 32;
-		break;
-	default:
-		panic("esp: what could it be... I wonder...");
-		break;
-	};
-
-	/* Eat any bitrot in the chip */
-	sbus_readb(esp->eregs + ESP_INTRPT);
-	udelay(100);
-}
-
-/* This places the ESP into a known state at boot time. */
-static void __init esp_bootup_reset(struct esp *esp)
-{
-	u8 tmp;
-
-	/* Reset the DMA */
-	esp_reset_dma(esp);
-
-	/* Reset the ESP */
-	esp_reset_esp(esp);
-
-	/* Reset the SCSI bus, but tell ESP not to generate an irq */
-	tmp = sbus_readb(esp->eregs + ESP_CFG1);
-	tmp |= ESP_CONFIG1_SRRDISAB;
-	sbus_writeb(tmp, esp->eregs + ESP_CFG1);
-
-	esp_cmd(esp, ESP_CMD_RS);
-	udelay(400);
-
-	sbus_writeb(esp->config1, esp->eregs + ESP_CFG1);
-
-	/* Eat any bitrot in the chip and we are done... */
-	sbus_readb(esp->eregs + ESP_INTRPT);
-}
-
-static int __init esp_find_dvma(struct esp *esp, struct sbus_dev *dma_sdev)
-{
-	struct sbus_dev *sdev = esp->sdev;
-	struct sbus_dma *dma;
-
-	if (dma_sdev != NULL) {
-		for_each_dvma(dma) {
-			if (dma->sdev == dma_sdev)
-				break;
-		}
-	} else {
-		for_each_dvma(dma) {
-			/* If allocated already, can't use it. */
-			if (dma->allocated)
-				continue;
-
-			if (dma->sdev == NULL)
-				break;
-
-			/* If bus + slot are the same and it has the
-			 * correct OBP name, it's ours.
-			 */
-			if (sdev->bus == dma->sdev->bus &&
-			    sdev->slot == dma->sdev->slot &&
-			    (!strcmp(dma->sdev->prom_name, "dma") ||
-			     !strcmp(dma->sdev->prom_name, "espdma")))
-				break;
-		}
-	}
-
-	/* If we don't know how to handle the dvma,
-	 * do not use this device.
-	 */
-	if (dma == NULL) {
-		printk("Cannot find dvma for ESP%d's SCSI\n", esp->esp_id);
-		return -1;
-	}
-	if (dma->allocated) {
-		printk("esp%d: can't use my espdma\n", esp->esp_id);
-		return -1;
-	}
-	dma->allocated = 1;
-	esp->dma = dma;
-	esp->dregs = dma->regs;
-
-	return 0;
-}
-
-static int __init esp_map_regs(struct esp *esp, int hme)
-{
-	struct sbus_dev *sdev = esp->sdev;
-	struct resource *res;
-
-	/* On HME, two reg sets exist, first is DVMA,
-	 * second is ESP registers.
-	 */
-	if (hme)
-		res = &sdev->resource[1];
-	else
-		res = &sdev->resource[0];
-
-	esp->eregs = sbus_ioremap(res, 0, ESP_REG_SIZE, "ESP Registers");
-
-	if (esp->eregs == 0)
-		return -1;
-	return 0;
-}
-
-static int __init esp_map_cmdarea(struct esp *esp)
-{
-	struct sbus_dev *sdev = esp->sdev;
-
-	esp->esp_command = sbus_alloc_consistent(sdev, 16,
-						 &esp->esp_command_dvma);
-	if (esp->esp_command == NULL ||
-	    esp->esp_command_dvma == 0)
-		return -1;
-	return 0;
-}
-
-static int __init esp_register_irq(struct esp *esp)
-{
-	esp->ehost->irq = esp->irq = esp->sdev->irqs[0];
-
-	/* We used to try various overly-clever things to
-	 * reduce the interrupt processing overhead on
-	 * sun4c/sun4m when multiple ESP's shared the
-	 * same IRQ.  It was too complex and messy to
-	 * sanely maintain.
-	 */
-	if (request_irq(esp->ehost->irq, esp_intr,
-			IRQF_SHARED, "ESP SCSI", esp)) {
-		printk("esp%d: Cannot acquire irq line\n",
-		       esp->esp_id);
-		return -1;
-	}
-
-	printk("esp%d: IRQ %d ", esp->esp_id,
-	       esp->ehost->irq);
-
-	return 0;
-}
-
-static void __init esp_get_scsi_id(struct esp *esp)
-{
-	struct sbus_dev *sdev = esp->sdev;
-	struct device_node *dp = sdev->ofdev.node;
-
-	esp->scsi_id = of_getintprop_default(dp,
-					     "initiator-id",
-					     -1);
-	if (esp->scsi_id == -1)
-		esp->scsi_id = of_getintprop_default(dp,
-						     "scsi-initiator-id",
-						     -1);
-	if (esp->scsi_id == -1)
-		esp->scsi_id = (sdev->bus == NULL) ? 7 :
-			of_getintprop_default(sdev->bus->ofdev.node,
-					      "scsi-initiator-id",
-					      7);
-	esp->ehost->this_id = esp->scsi_id;
-	esp->scsi_id_mask = (1 << esp->scsi_id);
-
-}
-
-static void __init esp_get_clock_params(struct esp *esp)
-{
-	struct sbus_dev *sdev = esp->sdev;
-	int prom_node = esp->prom_node;
-	int sbus_prom_node;
-	unsigned int fmhz;
-	u8 ccf;
-
-	if (sdev != NULL && sdev->bus != NULL)
-		sbus_prom_node = sdev->bus->prom_node;
-	else
-		sbus_prom_node = 0;
-
-	/* This is getting messy but it has to be done
-	 * correctly or else you get weird behavior all
-	 * over the place.  We are trying to basically
-	 * figure out three pieces of information.
-	 *
-	 * a) Clock Conversion Factor
-	 *
-	 *    This is a representation of the input
-	 *    crystal clock frequency going into the
-	 *    ESP on this machine.  Any operation whose
-	 *    timing is longer than 400ns depends on this
-	 *    value being correct.  For example, you'll
-	 *    get blips for arbitration/selection during
-	 *    high load or with multiple targets if this
-	 *    is not set correctly.
-	 *
-	 * b) Selection Time-Out
-	 *
-	 *    The ESP isn't very bright and will arbitrate
-	 *    for the bus and try to select a target
-	 *    forever if you let it.  This value tells
-	 *    the ESP when it has taken too long to
-	 *    negotiate and that it should interrupt
-	 *    the CPU so we can see what happened.
-	 *    The value is computed as follows (from
-	 *    NCR/Symbios chip docs).
-	 *
-	 *          (Time Out Period) *  (Input Clock)
-	 *    STO = ----------------------------------
-	 *          (8192) * (Clock Conversion Factor)
-	 *
-	 *    You usually want the time out period to be
-	 *    around 250ms, I think we'll set it a little
-	 *    bit higher to account for fully loaded SCSI
-	 *    bus's and slow devices that don't respond so
-	 *    quickly to selection attempts. (yeah, I know
-	 *    this is out of spec. but there is a lot of
-	 *    buggy pieces of firmware out there so bite me)
-	 *
-	 * c) Imperical constants for synchronous offset
-	 *    and transfer period register values
-	 *
-	 *    This entails the smallest and largest sync
-	 *    period we could ever handle on this ESP.
-	 */
-
-	fmhz = prom_getintdefault(prom_node, "clock-frequency", -1);
-	if (fmhz == -1)
-		fmhz = (!sbus_prom_node) ? 0 :
-			prom_getintdefault(sbus_prom_node, "clock-frequency", -1);
-
-	if (fmhz <= (5000000))
-		ccf = 0;
-	else
-		ccf = (((5000000 - 1) + (fmhz))/(5000000));
-
-	if (!ccf || ccf > 8) {
-		/* If we can't find anything reasonable,
-		 * just assume 20MHZ.  This is the clock
-		 * frequency of the older sun4c's where I've
-		 * been unable to find the clock-frequency
-		 * PROM property.  All other machines provide
-		 * useful values it seems.
-		 */
-		ccf = ESP_CCF_F4;
-		fmhz = (20000000);
-	}
-
-	if (ccf == (ESP_CCF_F7 + 1))
-		esp->cfact = ESP_CCF_F0;
-	else if (ccf == ESP_CCF_NEVER)
-		esp->cfact = ESP_CCF_F2;
-	else
-		esp->cfact = ccf;
-	esp->raw_cfact = ccf;
-
-	esp->cfreq = fmhz;
-	esp->ccycle = ESP_MHZ_TO_CYCLE(fmhz);
-	esp->ctick = ESP_TICK(ccf, esp->ccycle);
-	esp->neg_defp = ESP_NEG_DEFP(fmhz, ccf);
-	esp->sync_defp = SYNC_DEFP_SLOW;
-
-	printk("SCSI ID %d Clk %dMHz CCYC=%d CCF=%d TOut %d ",
-	       esp->scsi_id, (fmhz / 1000000),
-	       (int)esp->ccycle, (int)ccf, (int) esp->neg_defp);
-}
-
-static void __init esp_get_bursts(struct esp *esp, struct sbus_dev *dma)
-{
-	struct sbus_dev *sdev = esp->sdev;
-	u8 bursts;
-
-	bursts = prom_getintdefault(esp->prom_node, "burst-sizes", 0xff);
-
-	if (dma) {
-		u8 tmp = prom_getintdefault(dma->prom_node,
-					    "burst-sizes", 0xff);
-		if (tmp != 0xff)
-			bursts &= tmp;
-	}
-
-	if (sdev->bus) {
-		u8 tmp = prom_getintdefault(sdev->bus->prom_node,
-					    "burst-sizes", 0xff);
-		if (tmp != 0xff)
-			bursts &= tmp;
-	}
-
-	if (bursts == 0xff ||
-	    (bursts & DMA_BURST16) == 0 ||
-	    (bursts & DMA_BURST32) == 0)
-		bursts = (DMA_BURST32 - 1);
-
-	esp->bursts = bursts;
-}
-
-static void __init esp_get_revision(struct esp *esp)
-{
-	u8 tmp;
-
-	esp->config1 = (ESP_CONFIG1_PENABLE | (esp->scsi_id & 7));
-	esp->config2 = (ESP_CONFIG2_SCSI2ENAB | ESP_CONFIG2_REGPARITY);
-	sbus_writeb(esp->config2, esp->eregs + ESP_CFG2);
-
-	tmp = sbus_readb(esp->eregs + ESP_CFG2);
-	tmp &= ~ESP_CONFIG2_MAGIC;
-	if (tmp != (ESP_CONFIG2_SCSI2ENAB | ESP_CONFIG2_REGPARITY)) {
-		/* If what we write to cfg2 does not come back, cfg2
-		 * is not implemented, therefore this must be a plain
-		 * esp100.
-		 */
-		esp->erev = esp100;
-		printk("NCR53C90(esp100)\n");
-	} else {
-		esp->config2 = 0;
-		esp->prev_cfg3 = esp->config3[0] = 5;
-		sbus_writeb(esp->config2, esp->eregs + ESP_CFG2);
-		sbus_writeb(0, esp->eregs + ESP_CFG3);
-		sbus_writeb(esp->prev_cfg3, esp->eregs + ESP_CFG3);
-
-		tmp = sbus_readb(esp->eregs + ESP_CFG3);
-		if (tmp != 5) {
-			/* The cfg2 register is implemented, however
-			 * cfg3 is not, must be esp100a.
-			 */
-			esp->erev = esp100a;
-			printk("NCR53C90A(esp100a)\n");
-		} else {
-			int target;
-
-			for (target = 0; target < 16; target++)
-				esp->config3[target] = 0;
-			esp->prev_cfg3 = 0;
-			sbus_writeb(esp->prev_cfg3, esp->eregs + ESP_CFG3);
-
-			/* All of cfg{1,2,3} implemented, must be one of
-			 * the fas variants, figure out which one.
-			 */
-			if (esp->raw_cfact > ESP_CCF_F5) {
-				esp->erev = fast;
-				esp->sync_defp = SYNC_DEFP_FAST;
-				printk("NCR53C9XF(espfast)\n");
-			} else {
-				esp->erev = esp236;
-				printk("NCR53C9x(esp236)\n");
-			}
-			esp->config2 = 0;
-			sbus_writeb(esp->config2, esp->eregs + ESP_CFG2);
-		}
-	}
-}
-
-static void __init esp_init_swstate(struct esp *esp)
-{
-	int i;
-
-	/* Command queues... */
-	esp->current_SC = NULL;
-	esp->disconnected_SC = NULL;
-	esp->issue_SC = NULL;
-
-	/* Target and current command state... */
-	esp->targets_present = 0;
-	esp->resetting_bus = 0;
-	esp->snip = 0;
-
-	init_waitqueue_head(&esp->reset_queue);
-
-	/* Debugging... */
-	for(i = 0; i < 32; i++)
-		esp->espcmdlog[i] = 0;
-	esp->espcmdent = 0;
-
-	/* MSG phase state... */
-	for(i = 0; i < 16; i++) {
-		esp->cur_msgout[i] = 0;
-		esp->cur_msgin[i] = 0;
-	}
-	esp->prevmsgout = esp->prevmsgin = 0;
-	esp->msgout_len = esp->msgin_len = 0;
-
-	/* Clear the one behind caches to hold unmatchable values. */
-	esp->prev_soff = esp->prev_stp = esp->prev_cfg3 = 0xff;
-	esp->prev_hme_dmacsr = 0xffffffff;
-}
-
-static int __init detect_one_esp(struct scsi_host_template *tpnt,
-				 struct device *dev,
-				 struct sbus_dev *esp_dev,
-				 struct sbus_dev *espdma,
-				 struct sbus_bus *sbus,
-				 int hme)
-{
-	static int instance;
-	struct Scsi_Host *esp_host = scsi_host_alloc(tpnt, sizeof(struct esp));
-	struct esp *esp;
-	
-	if (!esp_host)
-		return -ENOMEM;
-
-	if (hme)
-		esp_host->max_id = 16;
-	esp = (struct esp *) esp_host->hostdata;
-	esp->ehost = esp_host;
-	esp->sdev = esp_dev;
-	esp->esp_id = instance;
-	esp->prom_node = esp_dev->prom_node;
-	prom_getstring(esp->prom_node, "name", esp->prom_name,
-		       sizeof(esp->prom_name));
-
-	if (esp_find_dvma(esp, espdma) < 0)
-		goto fail_unlink;
-	if (esp_map_regs(esp, hme) < 0) {
-		printk("ESP registers unmappable");
-		goto fail_dvma_release;
-	}
-	if (esp_map_cmdarea(esp) < 0) {
-		printk("ESP DVMA transport area unmappable");
-		goto fail_unmap_regs;
-	}
-	if (esp_register_irq(esp) < 0)
-		goto fail_unmap_cmdarea;
-
-	esp_get_scsi_id(esp);
-
-	esp->diff = prom_getbool(esp->prom_node, "differential");
-	if (esp->diff)
-		printk("Differential ");
-
-	esp_get_clock_params(esp);
-	esp_get_bursts(esp, espdma);
-	esp_get_revision(esp);
-	esp_init_swstate(esp);
-
-	esp_bootup_reset(esp);
-
-	if (scsi_add_host(esp_host, dev))
-		goto fail_free_irq;
-
-	dev_set_drvdata(&esp_dev->ofdev.dev, esp);
-
-	scsi_scan_host(esp_host);
-	instance++;
-
-	return 0;
-
-fail_free_irq:
-	free_irq(esp->ehost->irq, esp);
-
-fail_unmap_cmdarea:
-	sbus_free_consistent(esp->sdev, 16,
-			     (void *) esp->esp_command,
-			     esp->esp_command_dvma);
-
-fail_unmap_regs:
-	sbus_iounmap(esp->eregs, ESP_REG_SIZE);
-
-fail_dvma_release:
-	esp->dma->allocated = 0;
-
-fail_unlink:
-	scsi_host_put(esp_host);
-	return -1;
-}
-
-/* Detecting ESP chips on the machine.  This is the simple and easy
- * version.
- */
-static int __devexit esp_remove_common(struct esp *esp)
-{
-	unsigned int irq = esp->ehost->irq;
-
-	scsi_remove_host(esp->ehost);
-
-	ESP_INTSOFF(esp->dregs);
-#if 0
-	esp_reset_dma(esp);
-	esp_reset_esp(esp);
-#endif
-
-	free_irq(irq, esp);
-	sbus_free_consistent(esp->sdev, 16,
-			     (void *) esp->esp_command, esp->esp_command_dvma);
-	sbus_iounmap(esp->eregs, ESP_REG_SIZE);
-	esp->dma->allocated = 0;
-
-	scsi_host_put(esp->ehost);
-
-	return 0;
-}
-
-
-#ifdef CONFIG_SUN4
-
-#include <asm/sun4paddr.h>
-
-static struct sbus_dev sun4_esp_dev;
-
-static int __init esp_sun4_probe(struct scsi_host_template *tpnt)
-{
-	if (sun4_esp_physaddr) {
-		memset(&sun4_esp_dev, 0, sizeof(sun4_esp_dev));
-		sun4_esp_dev.reg_addrs[0].phys_addr = sun4_esp_physaddr;
-		sun4_esp_dev.irqs[0] = 4;
-		sun4_esp_dev.resource[0].start = sun4_esp_physaddr;
-		sun4_esp_dev.resource[0].end =
-			sun4_esp_physaddr + ESP_REG_SIZE - 1;
-		sun4_esp_dev.resource[0].flags = IORESOURCE_IO;
-
-		return detect_one_esp(tpnt, NULL,
-				      &sun4_esp_dev, NULL, NULL, 0);
-	}
-	return 0;
-}
-
-static int __devexit esp_sun4_remove(void)
-{
-	struct of_device *dev = &sun4_esp_dev.ofdev;
-	struct esp *esp = dev_get_drvdata(&dev->dev);
-
-	return esp_remove_common(esp);
-}
-
-#else /* !CONFIG_SUN4 */
-
-static int __devinit esp_sbus_probe(struct of_device *dev, const struct of_device_id *match)
-{
-	struct sbus_dev *sdev = to_sbus_device(&dev->dev);
-	struct device_node *dp = dev->node;
-	struct sbus_dev *dma_sdev = NULL;
-	int hme = 0;
-
-	if (dp->parent &&
-	    (!strcmp(dp->parent->name, "espdma") ||
-	     !strcmp(dp->parent->name, "dma")))
-		dma_sdev = sdev->parent;
-	else if (!strcmp(dp->name, "SUNW,fas")) {
-		dma_sdev = sdev;
-		hme = 1;
-	}
-
-	return detect_one_esp(match->data, &dev->dev,
-			      sdev, dma_sdev, sdev->bus, hme);
-}
-
-static int __devexit esp_sbus_remove(struct of_device *dev)
-{
-	struct esp *esp = dev_get_drvdata(&dev->dev);
-
-	return esp_remove_common(esp);
-}
-
-#endif /* !CONFIG_SUN4 */
-
-/* The info function will return whatever useful
- * information the developer sees fit.  If not provided, then
- * the name field will be used instead.
- */
-static const char *esp_info(struct Scsi_Host *host)
-{
-	struct esp *esp;
-
-	esp = (struct esp *) host->hostdata;
-	switch (esp->erev) {
-	case esp100:
-		return "Sparc ESP100 (NCR53C90)";
-	case esp100a:
-		return "Sparc ESP100A (NCR53C90A)";
-	case esp236:
-		return "Sparc ESP236";
-	case fas236:
-		return "Sparc ESP236-FAST";
-	case fashme:
-		return "Sparc ESP366-HME";
-	case fas100a:
-		return "Sparc ESP100A-FAST";
-	default:
-		return "Bogon ESP revision";
-	};
-}
-
-/* From Wolfgang Stanglmeier's NCR scsi driver. */
-struct info_str
-{
-	char *buffer;
-	int length;
-	int offset;
-	int pos;
-};
-
-static void copy_mem_info(struct info_str *info, char *data, int len)
-{
-	if (info->pos + len > info->length)
-		len = info->length - info->pos;
-
-	if (info->pos + len < info->offset) {
-		info->pos += len;
-		return;
-	}
-	if (info->pos < info->offset) {
-		data += (info->offset - info->pos);
-		len  -= (info->offset - info->pos);
-	}
-
-	if (len > 0) {
-		memcpy(info->buffer + info->pos, data, len);
-		info->pos += len;
-	}
-}
-
-static int copy_info(struct info_str *info, char *fmt, ...)
-{
-	va_list args;
-	char buf[81];
-	int len;
-
-	va_start(args, fmt);
-	len = vsprintf(buf, fmt, args);
-	va_end(args);
-
-	copy_mem_info(info, buf, len);
-	return len;
-}
-
-static int esp_host_info(struct esp *esp, char *ptr, off_t offset, int len)
-{
-	struct scsi_device *sdev;
-	struct info_str info;
-	int i;
-
-	info.buffer	= ptr;
-	info.length	= len;
-	info.offset	= offset;
-	info.pos	= 0;
-
-	copy_info(&info, "Sparc ESP Host Adapter:\n");
-	copy_info(&info, "\tPROM node\t\t%08x\n", (unsigned int) esp->prom_node);
-	copy_info(&info, "\tPROM name\t\t%s\n", esp->prom_name);
-	copy_info(&info, "\tESP Model\t\t");
-	switch (esp->erev) {
-	case esp100:
-		copy_info(&info, "ESP100\n");
-		break;
-	case esp100a:
-		copy_info(&info, "ESP100A\n");
-		break;
-	case esp236:
-		copy_info(&info, "ESP236\n");
-		break;
-	case fas236:
-		copy_info(&info, "FAS236\n");
-		break;
-	case fas100a:
-		copy_info(&info, "FAS100A\n");
-		break;
-	case fast:
-		copy_info(&info, "FAST\n");
-		break;
-	case fashme:
-		copy_info(&info, "Happy Meal FAS\n");
-		break;
-	case espunknown:
-	default:
-		copy_info(&info, "Unknown!\n");
-		break;
-	};
-	copy_info(&info, "\tDMA Revision\t\t");
-	switch (esp->dma->revision) {
-	case dvmarev0:
-		copy_info(&info, "Rev 0\n");
-		break;
-	case dvmaesc1:
-		copy_info(&info, "ESC Rev 1\n");
-		break;
-	case dvmarev1:
-		copy_info(&info, "Rev 1\n");
-		break;
-	case dvmarev2:
-		copy_info(&info, "Rev 2\n");
-		break;
-	case dvmarev3:
-		copy_info(&info, "Rev 3\n");
-		break;
-	case dvmarevplus:
-		copy_info(&info, "Rev 1+\n");
-		break;
-	case dvmahme:
-		copy_info(&info, "Rev HME/FAS\n");
-		break;
-	default:
-		copy_info(&info, "Unknown!\n");
-		break;
-	};
-	copy_info(&info, "\tLive Targets\t\t[ ");
-	for (i = 0; i < 15; i++) {
-		if (esp->targets_present & (1 << i))
-			copy_info(&info, "%d ", i);
-	}
-	copy_info(&info, "]\n\n");
-	
-	/* Now describe the state of each existing target. */
-	copy_info(&info, "Target #\tconfig3\t\tSync Capabilities\tDisconnect\tWide\n");
-
-	shost_for_each_device(sdev, esp->ehost) {
-		struct esp_device *esp_dev = sdev->hostdata;
-		uint id = sdev->id;
-
-		if (!(esp->targets_present & (1 << id)))
-			continue;
-
-		copy_info(&info, "%d\t\t", id);
-		copy_info(&info, "%08lx\t", esp->config3[id]);
-		copy_info(&info, "[%02lx,%02lx]\t\t\t",
-			esp_dev->sync_max_offset,
-			esp_dev->sync_min_period);
-		copy_info(&info, "%s\t\t",
-			esp_dev->disconnect ? "yes" : "no");
-		copy_info(&info, "%s\n",
-			(esp->config3[id] & ESP_CONFIG3_EWIDE) ? "yes" : "no");
-	}
-	return info.pos > info.offset? info.pos - info.offset : 0;
-}
-
-/* ESP proc filesystem code. */
-static int esp_proc_info(struct Scsi_Host *host, char *buffer, char **start, off_t offset,
-			 int length, int inout)
-{
-	struct esp *esp = (struct esp *) host->hostdata;
-
-	if (inout)
-		return -EINVAL; /* not yet */
-
-	if (start)
-		*start = buffer;
-
-	return esp_host_info(esp, buffer, offset, length);
-}
-
-static void esp_get_dmabufs(struct esp *esp, struct scsi_cmnd *sp)
-{
-	if (sp->use_sg == 0) {
-		sp->SCp.this_residual = sp->request_bufflen;
-		sp->SCp.buffer = (struct scatterlist *) sp->request_buffer;
-		sp->SCp.buffers_residual = 0;
-		if (sp->request_bufflen) {
-			sp->SCp.have_data_in = sbus_map_single(esp->sdev, sp->SCp.buffer,
-							       sp->SCp.this_residual,
-							       sp->sc_data_direction);
-			sp->SCp.ptr = (char *) ((unsigned long)sp->SCp.have_data_in);
-		} else {
-			sp->SCp.ptr = NULL;
-		}
-	} else {
-		sp->SCp.buffer = (struct scatterlist *) sp->request_buffer;
-		sp->SCp.buffers_residual = sbus_map_sg(esp->sdev,
-						       sp->SCp.buffer,
-						       sp->use_sg,
-						       sp->sc_data_direction);
-		sp->SCp.this_residual = sg_dma_len(sp->SCp.buffer);
-		sp->SCp.ptr = (char *) ((unsigned long)sg_dma_address(sp->SCp.buffer));
-	}
-}
-
-static void esp_release_dmabufs(struct esp *esp, struct scsi_cmnd *sp)
-{
-	if (sp->use_sg) {
-		sbus_unmap_sg(esp->sdev, sp->request_buffer, sp->use_sg,
-			      sp->sc_data_direction);
-	} else if (sp->request_bufflen) {
-		sbus_unmap_single(esp->sdev,
-				  sp->SCp.have_data_in,
-				  sp->request_bufflen,
-				  sp->sc_data_direction);
-	}
-}
-
-static void esp_restore_pointers(struct esp *esp, struct scsi_cmnd *sp)
-{
-	struct esp_pointers *ep = &esp->data_pointers[sp->device->id];
-
-	sp->SCp.ptr = ep->saved_ptr;
-	sp->SCp.buffer = ep->saved_buffer;
-	sp->SCp.this_residual = ep->saved_this_residual;
-	sp->SCp.buffers_residual = ep->saved_buffers_residual;
-}
-
-static void esp_save_pointers(struct esp *esp, struct scsi_cmnd *sp)
-{
-	struct esp_pointers *ep = &esp->data_pointers[sp->device->id];
-
-	ep->saved_ptr = sp->SCp.ptr;
-	ep->saved_buffer = sp->SCp.buffer;
-	ep->saved_this_residual = sp->SCp.this_residual;
-	ep->saved_buffers_residual = sp->SCp.buffers_residual;
-}
-
-/* Some rules:
- *
- *   1) Never ever panic while something is live on the bus.
- *      If there is to be any chance of syncing the disks this
- *      rule is to be obeyed.
- *
- *   2) Any target that causes a foul condition will no longer
- *      have synchronous transfers done to it, no questions
- *      asked.
- *
- *   3) Keep register accesses to a minimum.  Think about some
- *      day when we have Xbus machines this is running on and
- *      the ESP chip is on the other end of the machine on a
- *      different board from the cpu where this is running.
- */
-
-/* Fire off a command.  We assume the bus is free and that the only
- * case where we could see an interrupt is where we have disconnected
- * commands active and they are trying to reselect us.
- */
-static inline void esp_check_cmd(struct esp *esp, struct scsi_cmnd *sp)
-{
-	switch (sp->cmd_len) {
-	case 6:
-	case 10:
-	case 12:
-		esp->esp_slowcmd = 0;
-		break;
-
-	default:
-		esp->esp_slowcmd = 1;
-		esp->esp_scmdleft = sp->cmd_len;
-		esp->esp_scmdp = &sp->cmnd[0];
-		break;
-	};
-}
-
-static inline void build_sync_nego_msg(struct esp *esp, int period, int offset)
-{
-	esp->cur_msgout[0] = EXTENDED_MESSAGE;
-	esp->cur_msgout[1] = 3;
-	esp->cur_msgout[2] = EXTENDED_SDTR;
-	esp->cur_msgout[3] = period;
-	esp->cur_msgout[4] = offset;
-	esp->msgout_len = 5;
-}
-
-/* SIZE is in bits, currently HME only supports 16 bit wide transfers. */
-static inline void build_wide_nego_msg(struct esp *esp, int size)
-{
-	esp->cur_msgout[0] = EXTENDED_MESSAGE;
-	esp->cur_msgout[1] = 2;
-	esp->cur_msgout[2] = EXTENDED_WDTR;
-	switch (size) {
-	case 32:
-		esp->cur_msgout[3] = 2;
-		break;
-	case 16:
-		esp->cur_msgout[3] = 1;
-		break;
-	case 8:
-	default:
-		esp->cur_msgout[3] = 0;
-		break;
-	};
-
-	esp->msgout_len = 4;
-}
-
-static void esp_exec_cmd(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr;
-	struct scsi_device *SDptr;
-	struct esp_device *esp_dev;
-	volatile u8 *cmdp = esp->esp_command;
-	u8 the_esp_command;
-	int lun, target;
-	int i;
-
-	/* Hold off if we have disconnected commands and
-	 * an IRQ is showing...
-	 */
-	if (esp->disconnected_SC && ESP_IRQ_P(esp->dregs))
-		return;
-
-	/* Grab first member of the issue queue. */
-	SCptr = esp->current_SC = remove_first_SC(&esp->issue_SC);
-
-	/* Safe to panic here because current_SC is null. */
-	if (!SCptr)
-		panic("esp: esp_exec_cmd and issue queue is NULL");
-
-	SDptr = SCptr->device;
-	esp_dev = SDptr->hostdata;
-	lun = SCptr->device->lun;
-	target = SCptr->device->id;
-
-	esp->snip = 0;
-	esp->msgout_len = 0;
-
-	/* Send it out whole, or piece by piece?   The ESP
-	 * only knows how to automatically send out 6, 10,
-	 * and 12 byte commands.  I used to think that the
-	 * Linux SCSI code would never throw anything other
-	 * than that to us, but then again there is the
-	 * SCSI generic driver which can send us anything.
-	 */
-	esp_check_cmd(esp, SCptr);
-
-	/* If arbitration/selection is successful, the ESP will leave
-	 * ATN asserted, causing the target to go into message out
-	 * phase.  The ESP will feed the target the identify and then
-	 * the target can only legally go to one of command,
-	 * datain/out, status, or message in phase, or stay in message
-	 * out phase (should we be trying to send a sync negotiation
-	 * message after the identify).  It is not allowed to drop
-	 * BSY, but some buggy targets do and we check for this
-	 * condition in the selection complete code.  Most of the time
-	 * we'll make the command bytes available to the ESP and it
-	 * will not interrupt us until it finishes command phase, we
-	 * cannot do this for command sizes the ESP does not
-	 * understand and in this case we'll get interrupted right
-	 * when the target goes into command phase.
-	 *
-	 * It is absolutely _illegal_ in the presence of SCSI-2 devices
-	 * to use the ESP select w/o ATN command.  When SCSI-2 devices are
-	 * present on the bus we _must_ always go straight to message out
-	 * phase with an identify message for the target.  Being that
-	 * selection attempts in SCSI-1 w/o ATN was an option, doing SCSI-2
-	 * selections should not confuse SCSI-1 we hope.
-	 */
-
-	if (esp_dev->sync) {
-		/* this targets sync is known */
-#ifndef __sparc_v9__
-do_sync_known:
-#endif
-		if (esp_dev->disconnect)
-			*cmdp++ = IDENTIFY(1, lun);
-		else
-			*cmdp++ = IDENTIFY(0, lun);
-
-		if (esp->esp_slowcmd) {
-			the_esp_command = (ESP_CMD_SELAS | ESP_CMD_DMA);
-			esp_advance_phase(SCptr, in_slct_stop);
-		} else {
-			the_esp_command = (ESP_CMD_SELA | ESP_CMD_DMA);
-			esp_advance_phase(SCptr, in_slct_norm);
-		}
-	} else if (!(esp->targets_present & (1<<target)) || !(esp_dev->disconnect)) {
-		/* After the bootup SCSI code sends both the
-		 * TEST_UNIT_READY and INQUIRY commands we want
-		 * to at least attempt allowing the device to
-		 * disconnect.
-		 */
-		ESPMISC(("esp: Selecting device for first time. target=%d "
-			 "lun=%d\n", target, SCptr->device->lun));
-		if (!SDptr->borken && !esp_dev->disconnect)
-			esp_dev->disconnect = 1;
-
-		*cmdp++ = IDENTIFY(0, lun);
-		esp->prevmsgout = NOP;
-		esp_advance_phase(SCptr, in_slct_norm);
-		the_esp_command = (ESP_CMD_SELA | ESP_CMD_DMA);
-
-		/* Take no chances... */
-		esp_dev->sync_max_offset = 0;
-		esp_dev->sync_min_period = 0;
-	} else {
-		/* Sorry, I have had way too many problems with
-		 * various CDROM devices on ESP. -DaveM
-		 */
-		int cdrom_hwbug_wkaround = 0;
-
-#ifndef __sparc_v9__
-		/* Never allow disconnects or synchronous transfers on
-		 * SparcStation1 and SparcStation1+.  Allowing those
-		 * to be enabled seems to lockup the machine completely.
-		 */
-		if ((idprom->id_machtype == (SM_SUN4C | SM_4C_SS1)) ||
-		    (idprom->id_machtype == (SM_SUN4C | SM_4C_SS1PLUS))) {
-			/* But we are nice and allow tapes and removable
-			 * disks (but not CDROMs) to disconnect.
-			 */
-			if(SDptr->type == TYPE_TAPE ||
-			   (SDptr->type != TYPE_ROM && SDptr->removable))
-				esp_dev->disconnect = 1;
-			else
-				esp_dev->disconnect = 0;
-			esp_dev->sync_max_offset = 0;
-			esp_dev->sync_min_period = 0;
-			esp_dev->sync = 1;
-			esp->snip = 0;
-			goto do_sync_known;
-		}
-#endif /* !(__sparc_v9__) */
-
-		/* We've talked to this guy before,
-		 * but never negotiated.  Let's try,
-		 * need to attempt WIDE first, before
-		 * sync nego, as per SCSI 2 standard.
-		 */
-		if (esp->erev == fashme && !esp_dev->wide) {
-			if (!SDptr->borken &&
-			   SDptr->type != TYPE_ROM &&
-			   SDptr->removable == 0) {
-				build_wide_nego_msg(esp, 16);
-				esp_dev->wide = 1;
-				esp->wnip = 1;
-				goto after_nego_msg_built;
-			} else {
-				esp_dev->wide = 1;
-				/* Fall through and try sync. */
-			}
-		}
-
-		if (!SDptr->borken) {
-			if ((SDptr->type == TYPE_ROM)) {
-				/* Nice try sucker... */
-				ESPMISC(("esp%d: Disabling sync for buggy "
-					 "CDROM.\n", esp->esp_id));
-				cdrom_hwbug_wkaround = 1;
-				build_sync_nego_msg(esp, 0, 0);
-			} else if (SDptr->removable != 0) {
-				ESPMISC(("esp%d: Not negotiating sync/wide but "
-					 "allowing disconnect for removable media.\n",
-					 esp->esp_id));
-				build_sync_nego_msg(esp, 0, 0);
-			} else {
-				build_sync_nego_msg(esp, esp->sync_defp, 15);
-			}
-		} else {
-			build_sync_nego_msg(esp, 0, 0);
-		}
-		esp_dev->sync = 1;
-		esp->snip = 1;
-
-after_nego_msg_built:
-		/* A fix for broken SCSI1 targets, when they disconnect
-		 * they lock up the bus and confuse ESP.  So disallow
-		 * disconnects for SCSI1 targets for now until we
-		 * find a better fix.
-		 *
-		 * Addendum: This is funny, I figured out what was going
-		 *           on.  The blotzed SCSI1 target would disconnect,
-		 *           one of the other SCSI2 targets or both would be
-		 *           disconnected as well.  The SCSI1 target would
-		 *           stay disconnected long enough that we start
-		 *           up a command on one of the SCSI2 targets.  As
-		 *           the ESP is arbitrating for the bus the SCSI1
-		 *           target begins to arbitrate as well to reselect
-		 *           the ESP.  The SCSI1 target refuses to drop it's
-		 *           ID bit on the data bus even though the ESP is
-		 *           at ID 7 and is the obvious winner for any
-		 *           arbitration.  The ESP is a poor sport and refuses
-		 *           to lose arbitration, it will continue indefinitely
-		 *           trying to arbitrate for the bus and can only be
-		 *           stopped via a chip reset or SCSI bus reset.
-		 *           Therefore _no_ disconnects for SCSI1 targets
-		 *           thank you very much. ;-)
-		 */
-		if(((SDptr->scsi_level < 3) &&
-		    (SDptr->type != TYPE_TAPE) &&
-		    SDptr->removable == 0) ||
-		    cdrom_hwbug_wkaround || SDptr->borken) {
-			ESPMISC((KERN_INFO "esp%d: Disabling DISCONNECT for target %d "
-				 "lun %d\n", esp->esp_id, SCptr->device->id, SCptr->device->lun));
-			esp_dev->disconnect = 0;
-			*cmdp++ = IDENTIFY(0, lun);
-		} else {
-			*cmdp++ = IDENTIFY(1, lun);
-		}
-
-		/* ESP fifo is only so big...
-		 * Make this look like a slow command.
-		 */
-		esp->esp_slowcmd = 1;
-		esp->esp_scmdleft = SCptr->cmd_len;
-		esp->esp_scmdp = &SCptr->cmnd[0];
-
-		the_esp_command = (ESP_CMD_SELAS | ESP_CMD_DMA);
-		esp_advance_phase(SCptr, in_slct_msg);
-	}
-
-	if (!esp->esp_slowcmd)
-		for (i = 0; i < SCptr->cmd_len; i++)
-			*cmdp++ = SCptr->cmnd[i];
-
-	/* HME sucks... */
-	if (esp->erev == fashme)
-		sbus_writeb((target & 0xf) | (ESP_BUSID_RESELID | ESP_BUSID_CTR32BIT),
-			    esp->eregs + ESP_BUSID);
-	else
-		sbus_writeb(target & 7, esp->eregs + ESP_BUSID);
-	if (esp->prev_soff != esp_dev->sync_max_offset ||
-	    esp->prev_stp  != esp_dev->sync_min_period ||
-	    (esp->erev > esp100a &&
-	     esp->prev_cfg3 != esp->config3[target])) {
-		esp->prev_soff = esp_dev->sync_max_offset;
-		esp->prev_stp = esp_dev->sync_min_period;
-		sbus_writeb(esp->prev_soff, esp->eregs + ESP_SOFF);
-		sbus_writeb(esp->prev_stp, esp->eregs + ESP_STP);
-		if (esp->erev > esp100a) {
-			esp->prev_cfg3 = esp->config3[target];
-			sbus_writeb(esp->prev_cfg3, esp->eregs + ESP_CFG3);
-		}
-	}
-	i = (cmdp - esp->esp_command);
-
-	if (esp->erev == fashme) {
-		esp_cmd(esp, ESP_CMD_FLUSH); /* Grrr! */
-
-		/* Set up the DMA and HME counters */
-		sbus_writeb(i, esp->eregs + ESP_TCLOW);
-		sbus_writeb(0, esp->eregs + ESP_TCMED);
-		sbus_writeb(0, esp->eregs + FAS_RLO);
-		sbus_writeb(0, esp->eregs + FAS_RHI);
-		esp_cmd(esp, the_esp_command);
-
-		/* Talk about touchy hardware... */
-		esp->prev_hme_dmacsr = ((esp->prev_hme_dmacsr |
-					 (DMA_SCSI_DISAB | DMA_ENABLE)) &
-					~(DMA_ST_WRITE));
-		sbus_writel(16, esp->dregs + DMA_COUNT);
-		sbus_writel(esp->esp_command_dvma, esp->dregs + DMA_ADDR);
-		sbus_writel(esp->prev_hme_dmacsr, esp->dregs + DMA_CSR);
-	} else {
-		u32 tmp;
-
-		/* Set up the DMA and ESP counters */
-		sbus_writeb(i, esp->eregs + ESP_TCLOW);
-		sbus_writeb(0, esp->eregs + ESP_TCMED);
-		tmp = sbus_readl(esp->dregs + DMA_CSR);
-		tmp &= ~DMA_ST_WRITE;
-		tmp |= DMA_ENABLE;
-		sbus_writel(tmp, esp->dregs + DMA_CSR);
-		if (esp->dma->revision == dvmaesc1) {
-			if (i) /* Workaround ESC gate array SBUS rerun bug. */
-				sbus_writel(PAGE_SIZE, esp->dregs + DMA_COUNT);
-		}
-		sbus_writel(esp->esp_command_dvma, esp->dregs + DMA_ADDR);
-
-		/* Tell ESP to "go". */
-		esp_cmd(esp, the_esp_command);
-	}
-}
-
-/* Queue a SCSI command delivered from the mid-level Linux SCSI code. */
-static int esp_queue(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
-{
-	struct esp *esp;
-
-	/* Set up func ptr and initial driver cmd-phase. */
-	SCpnt->scsi_done = done;
-	SCpnt->SCp.phase = not_issued;
-
-	/* We use the scratch area. */
-	ESPQUEUE(("esp_queue: target=%d lun=%d ", SCpnt->device->id, SCpnt->device->lun));
-	ESPDISC(("N<%02x,%02x>", SCpnt->device->id, SCpnt->device->lun));
-
-	esp = (struct esp *) SCpnt->device->host->hostdata;
-	esp_get_dmabufs(esp, SCpnt);
-	esp_save_pointers(esp, SCpnt); /* FIXME for tag queueing */
-
-	SCpnt->SCp.Status           = CHECK_CONDITION;
-	SCpnt->SCp.Message          = 0xff;
-	SCpnt->SCp.sent_command     = 0;
-
-	/* Place into our queue. */
-	if (SCpnt->cmnd[0] == REQUEST_SENSE) {
-		ESPQUEUE(("RQSENSE\n"));
-		prepend_SC(&esp->issue_SC, SCpnt);
-	} else {
-		ESPQUEUE(("\n"));
-		append_SC(&esp->issue_SC, SCpnt);
-	}
-
-	/* Run it now if we can. */
-	if (!esp->current_SC && !esp->resetting_bus)
-		esp_exec_cmd(esp);
-
-	return 0;
-}
-
-/* Dump driver state. */
-static void esp_dump_cmd(struct scsi_cmnd *SCptr)
-{
-	ESPLOG(("[tgt<%02x> lun<%02x> "
-		"pphase<%s> cphase<%s>]",
-		SCptr->device->id, SCptr->device->lun,
-		phase_string(SCptr->SCp.sent_command),
-		phase_string(SCptr->SCp.phase)));
-}
-
-static void esp_dump_state(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr = esp->current_SC;
-#ifdef DEBUG_ESP_CMDS
-	int i;
-#endif
-
-	ESPLOG(("esp%d: dumping state\n", esp->esp_id));
-	ESPLOG(("esp%d: dma -- cond_reg<%08x> addr<%08x>\n",
-		esp->esp_id,
-		sbus_readl(esp->dregs + DMA_CSR),
-		sbus_readl(esp->dregs + DMA_ADDR)));
-	ESPLOG(("esp%d: SW [sreg<%02x> sstep<%02x> ireg<%02x>]\n",
-		esp->esp_id, esp->sreg, esp->seqreg, esp->ireg));
-	ESPLOG(("esp%d: HW reread [sreg<%02x> sstep<%02x> ireg<%02x>]\n",
-		esp->esp_id,
-		sbus_readb(esp->eregs + ESP_STATUS),
-		sbus_readb(esp->eregs + ESP_SSTEP),
-		sbus_readb(esp->eregs + ESP_INTRPT)));
-#ifdef DEBUG_ESP_CMDS
-	printk("esp%d: last ESP cmds [", esp->esp_id);
-	i = (esp->espcmdent - 1) & 31;
-	printk("<"); esp_print_cmd(esp->espcmdlog[i]); printk(">");
-	i = (i - 1) & 31;
-	printk("<"); esp_print_cmd(esp->espcmdlog[i]); printk(">");
-	i = (i - 1) & 31;
-	printk("<"); esp_print_cmd(esp->espcmdlog[i]); printk(">");
-	i = (i - 1) & 31;
-	printk("<"); esp_print_cmd(esp->espcmdlog[i]); printk(">");
-	printk("]\n");
-#endif /* (DEBUG_ESP_CMDS) */
-
-	if (SCptr) {
-		ESPLOG(("esp%d: current command ", esp->esp_id));
-		esp_dump_cmd(SCptr);
-	}
-	ESPLOG(("\n"));
-	SCptr = esp->disconnected_SC;
-	ESPLOG(("esp%d: disconnected ", esp->esp_id));
-	while (SCptr) {
-		esp_dump_cmd(SCptr);
-		SCptr = (struct scsi_cmnd *) SCptr->host_scribble;
-	}
-	ESPLOG(("\n"));
-}
-
-/* Abort a command.  The host_lock is acquired by caller. */
-static int esp_abort(struct scsi_cmnd *SCptr)
-{
-	struct esp *esp = (struct esp *) SCptr->device->host->hostdata;
-	int don;
-
-	ESPLOG(("esp%d: Aborting command\n", esp->esp_id));
-	esp_dump_state(esp);
-
-	/* Wheee, if this is the current command on the bus, the
-	 * best we can do is assert ATN and wait for msgout phase.
-	 * This should even fix a hung SCSI bus when we lose state
-	 * in the driver and timeout because the eventual phase change
-	 * will cause the ESP to (eventually) give an interrupt.
-	 */
-	if (esp->current_SC == SCptr) {
-		esp->cur_msgout[0] = ABORT;
-		esp->msgout_len = 1;
-		esp->msgout_ctr = 0;
-		esp_cmd(esp, ESP_CMD_SATN);
-		return SUCCESS;
-	}
-
-	/* If it is still in the issue queue then we can safely
-	 * call the completion routine and report abort success.
-	 */
-	don = (sbus_readl(esp->dregs + DMA_CSR) & DMA_INT_ENAB);
-	if (don) {
-		ESP_INTSOFF(esp->dregs);
-	}
-	if (esp->issue_SC) {
-		struct scsi_cmnd **prev, *this;
-		for (prev = (&esp->issue_SC), this = esp->issue_SC;
-		     this != NULL;
-		     prev = (struct scsi_cmnd **) &(this->host_scribble),
-			     this = (struct scsi_cmnd *) this->host_scribble) {
-
-			if (this == SCptr) {
-				*prev = (struct scsi_cmnd *) this->host_scribble;
-				this->host_scribble = NULL;
-
-				esp_release_dmabufs(esp, this);
-				this->result = DID_ABORT << 16;
-				this->scsi_done(this);
-
-				if (don)
-					ESP_INTSON(esp->dregs);
-
-				return SUCCESS;
-			}
-		}
-	}
-
-	/* Yuck, the command to abort is disconnected, it is not
-	 * worth trying to abort it now if something else is live
-	 * on the bus at this time.  So, we let the SCSI code wait
-	 * a little bit and try again later.
-	 */
-	if (esp->current_SC) {
-		if (don)
-			ESP_INTSON(esp->dregs);
-		return FAILED;
-	}
-
-	/* It's disconnected, we have to reconnect to re-establish
-	 * the nexus and tell the device to abort.  However, we really
-	 * cannot 'reconnect' per se.  Don't try to be fancy, just
-	 * indicate failure, which causes our caller to reset the whole
-	 * bus.
-	 */
-
-	if (don)
-		ESP_INTSON(esp->dregs);
-
-	return FAILED;
-}
-
-/* We've sent ESP_CMD_RS to the ESP, the interrupt had just
- * arrived indicating the end of the SCSI bus reset.  Our job
- * is to clean out the command queues and begin re-execution
- * of SCSI commands once more.
- */
-static int esp_finish_reset(struct esp *esp)
-{
-	struct scsi_cmnd *sp = esp->current_SC;
-
-	/* Clean up currently executing command, if any. */
-	if (sp != NULL) {
-		esp->current_SC = NULL;
-
-		esp_release_dmabufs(esp, sp);
-		sp->result = (DID_RESET << 16);
-
-		sp->scsi_done(sp);
-	}
-
-	/* Clean up disconnected queue, they have been invalidated
-	 * by the bus reset.
-	 */
-	if (esp->disconnected_SC) {
-		while ((sp = remove_first_SC(&esp->disconnected_SC)) != NULL) {
-			esp_release_dmabufs(esp, sp);
-			sp->result = (DID_RESET << 16);
-
-			sp->scsi_done(sp);
-		}
-	}
-
-	/* SCSI bus reset is complete. */
-	esp->resetting_bus = 0;
-	wake_up(&esp->reset_queue);
-
-	/* Ok, now it is safe to get commands going once more. */
-	if (esp->issue_SC)
-		esp_exec_cmd(esp);
-
-	return do_intr_end;
-}
-
-static int esp_do_resetbus(struct esp *esp)
-{
-	ESPLOG(("esp%d: Resetting scsi bus\n", esp->esp_id));
-	esp->resetting_bus = 1;
-	esp_cmd(esp, ESP_CMD_RS);
-
-	return do_intr_end;
-}
-
-/* Reset ESP chip, reset hanging bus, then kill active and
- * disconnected commands for targets without soft reset.
- *
- * The host_lock is acquired by caller.
- */
-static int esp_reset(struct scsi_cmnd *SCptr)
-{
-	struct esp *esp = (struct esp *) SCptr->device->host->hostdata;
-
-	spin_lock_irq(esp->ehost->host_lock);
-	(void) esp_do_resetbus(esp);
-	spin_unlock_irq(esp->ehost->host_lock);
-
-	wait_event(esp->reset_queue, (esp->resetting_bus == 0));
-
-	return SUCCESS;
-}
-
-/* Internal ESP done function. */
-static void esp_done(struct esp *esp, int error)
-{
-	struct scsi_cmnd *done_SC = esp->current_SC;
-
-	esp->current_SC = NULL;
-
-	esp_release_dmabufs(esp, done_SC);
-	done_SC->result = error;
-
-	done_SC->scsi_done(done_SC);
-
-	/* Bus is free, issue any commands in the queue. */
-	if (esp->issue_SC && !esp->current_SC)
-		esp_exec_cmd(esp);
-
-}
-
-/* Wheee, ESP interrupt engine. */  
-
-/* Forward declarations. */
-static int esp_do_phase_determine(struct esp *esp);
-static int esp_do_data_finale(struct esp *esp);
-static int esp_select_complete(struct esp *esp);
-static int esp_do_status(struct esp *esp);
-static int esp_do_msgin(struct esp *esp);
-static int esp_do_msgindone(struct esp *esp);
-static int esp_do_msgout(struct esp *esp);
-static int esp_do_cmdbegin(struct esp *esp);
-
-#define sreg_datainp(__sreg)  (((__sreg) & ESP_STAT_PMASK) == ESP_DIP)
-#define sreg_dataoutp(__sreg) (((__sreg) & ESP_STAT_PMASK) == ESP_DOP)
-
-/* Read any bytes found in the FAS366 fifo, storing them into
- * the ESP driver software state structure.
- */
-static void hme_fifo_read(struct esp *esp)
-{
-	u8 count = 0;
-	u8 status = esp->sreg;
-
-	/* Cannot safely frob the fifo for these following cases, but
-	 * we must always read the fifo when the reselect interrupt
-	 * is pending.
-	 */
-	if (((esp->ireg & ESP_INTR_RSEL) == 0)	&&
-	    (sreg_datainp(status)		||
-	     sreg_dataoutp(status)		||
-	     (esp->current_SC &&
-	      esp->current_SC->SCp.phase == in_data_done))) {
-		ESPHME(("<wkaround_skipped>"));
-	} else {
-		unsigned long fcnt = sbus_readb(esp->eregs + ESP_FFLAGS) & ESP_FF_FBYTES;
-
-		/* The HME stores bytes in multiples of 2 in the fifo. */
-		ESPHME(("hme_fifo[fcnt=%d", (int)fcnt));
-		while (fcnt) {
-			esp->hme_fifo_workaround_buffer[count++] =
-				sbus_readb(esp->eregs + ESP_FDATA);
-			esp->hme_fifo_workaround_buffer[count++] =
-				sbus_readb(esp->eregs + ESP_FDATA);
-			ESPHME(("<%02x,%02x>", esp->hme_fifo_workaround_buffer[count-2], esp->hme_fifo_workaround_buffer[count-1]));
-			fcnt--;
-		}
-		if (sbus_readb(esp->eregs + ESP_STATUS2) & ESP_STAT2_F1BYTE) {
-			ESPHME(("<poke_byte>"));
-			sbus_writeb(0, esp->eregs + ESP_FDATA);
-			esp->hme_fifo_workaround_buffer[count++] =
-				sbus_readb(esp->eregs + ESP_FDATA);
-			ESPHME(("<%02x,0x00>", esp->hme_fifo_workaround_buffer[count-1]));
-			ESPHME(("CMD_FLUSH"));
-			esp_cmd(esp, ESP_CMD_FLUSH);
-		} else {
-			ESPHME(("no_xtra_byte"));
-		}
-	}
-	ESPHME(("wkarnd_cnt=%d]", (int)count));
-	esp->hme_fifo_workaround_count = count;
-}
-
-static inline void hme_fifo_push(struct esp *esp, u8 *bytes, u8 count)
-{
-	esp_cmd(esp, ESP_CMD_FLUSH);
-	while (count) {
-		u8 tmp = *bytes++;
-		sbus_writeb(tmp, esp->eregs + ESP_FDATA);
-		sbus_writeb(0, esp->eregs + ESP_FDATA);
-		count--;
-	}
-}
-
-/* We try to avoid some interrupts by jumping ahead and see if the ESP
- * has gotten far enough yet.  Hence the following.
- */
-static inline int skipahead1(struct esp *esp, struct scsi_cmnd *scp,
-			     int prev_phase, int new_phase)
-{
-	if (scp->SCp.sent_command != prev_phase)
-		return 0;
-	if (ESP_IRQ_P(esp->dregs)) {
-		/* Yes, we are able to save an interrupt. */
-		if (esp->erev == fashme)
-			esp->sreg2 = sbus_readb(esp->eregs + ESP_STATUS2);
-		esp->sreg = (sbus_readb(esp->eregs + ESP_STATUS) & ~(ESP_STAT_INTR));
-		esp->ireg = sbus_readb(esp->eregs + ESP_INTRPT);
-		if (esp->erev == fashme) {
-			/* This chip is really losing. */
-			ESPHME(("HME["));
-			/* Must latch fifo before reading the interrupt
-			 * register else garbage ends up in the FIFO
-			 * which confuses the driver utterly.
-			 * Happy Meal indeed....
-			 */
-			ESPHME(("fifo_workaround]"));
-			if (!(esp->sreg2 & ESP_STAT2_FEMPTY) ||
-			    (esp->sreg2 & ESP_STAT2_F1BYTE))
-				hme_fifo_read(esp);
-		}
-		if (!(esp->ireg & ESP_INTR_SR))
-			return 0;
-		else
-			return do_reset_complete;
-	}
-	/* Ho hum, target is taking forever... */
-	scp->SCp.sent_command = new_phase; /* so we don't recurse... */
-	return do_intr_end;
-}
-
-static inline int skipahead2(struct esp *esp, struct scsi_cmnd *scp,
-			     int prev_phase1, int prev_phase2, int new_phase)
-{
-	if (scp->SCp.sent_command != prev_phase1 &&
-	    scp->SCp.sent_command != prev_phase2)
-		return 0;
-	if (ESP_IRQ_P(esp->dregs)) {
-		/* Yes, we are able to save an interrupt. */
-		if (esp->erev == fashme)
-			esp->sreg2 = sbus_readb(esp->eregs + ESP_STATUS2);
-		esp->sreg = (sbus_readb(esp->eregs + ESP_STATUS) & ~(ESP_STAT_INTR));
-		esp->ireg = sbus_readb(esp->eregs + ESP_INTRPT);
-		if (esp->erev == fashme) {
-			/* This chip is really losing. */
-			ESPHME(("HME["));
-
-			/* Must latch fifo before reading the interrupt
-			 * register else garbage ends up in the FIFO
-			 * which confuses the driver utterly.
-			 * Happy Meal indeed....
-			 */
-			ESPHME(("fifo_workaround]"));
-			if (!(esp->sreg2 & ESP_STAT2_FEMPTY) ||
-			    (esp->sreg2 & ESP_STAT2_F1BYTE))
-				hme_fifo_read(esp);
-		}
-		if (!(esp->ireg & ESP_INTR_SR))
-			return 0;
-		else
-			return do_reset_complete;
-	}
-	/* Ho hum, target is taking forever... */
-	scp->SCp.sent_command = new_phase; /* so we don't recurse... */
-	return do_intr_end;
-}
-
-/* Now some dma helpers. */
-static void dma_setup(struct esp *esp, __u32 addr, int count, int write)
-{
-	u32 nreg = sbus_readl(esp->dregs + DMA_CSR);
-
-	if (write)
-		nreg |= DMA_ST_WRITE;
-	else
-		nreg &= ~(DMA_ST_WRITE);
-	nreg |= DMA_ENABLE;
-	sbus_writel(nreg, esp->dregs + DMA_CSR);
-	if (esp->dma->revision == dvmaesc1) {
-		/* This ESC gate array sucks! */
-		__u32 src = addr;
-		__u32 dest = src + count;
-
-		if (dest & (PAGE_SIZE - 1))
-			count = PAGE_ALIGN(count);
-		sbus_writel(count, esp->dregs + DMA_COUNT);
-	}
-	sbus_writel(addr, esp->dregs + DMA_ADDR);
-}
-
-static void dma_drain(struct esp *esp)
-{
-	u32 tmp;
-
-	if (esp->dma->revision == dvmahme)
-		return;
-	if ((tmp = sbus_readl(esp->dregs + DMA_CSR)) & DMA_FIFO_ISDRAIN) {
-		switch (esp->dma->revision) {
-		default:
-			tmp |= DMA_FIFO_STDRAIN;
-			sbus_writel(tmp, esp->dregs + DMA_CSR);
-
-		case dvmarev3:
-		case dvmaesc1:
-			while (sbus_readl(esp->dregs + DMA_CSR) & DMA_FIFO_ISDRAIN)
-				udelay(1);
-		};
-	}
-}
-
-static void dma_invalidate(struct esp *esp)
-{
-	u32 tmp;
-
-	if (esp->dma->revision == dvmahme) {
-		sbus_writel(DMA_RST_SCSI, esp->dregs + DMA_CSR);
-
-		esp->prev_hme_dmacsr = ((esp->prev_hme_dmacsr |
-					 (DMA_PARITY_OFF | DMA_2CLKS |
-					  DMA_SCSI_DISAB | DMA_INT_ENAB)) &
-					~(DMA_ST_WRITE | DMA_ENABLE));
-
-		sbus_writel(0, esp->dregs + DMA_CSR);
-		sbus_writel(esp->prev_hme_dmacsr, esp->dregs + DMA_CSR);
-
-		/* This is necessary to avoid having the SCSI channel
-		 * engine lock up on us.
-		 */
-		sbus_writel(0, esp->dregs + DMA_ADDR);
-	} else {
-		while ((tmp = sbus_readl(esp->dregs + DMA_CSR)) & DMA_PEND_READ)
-			udelay(1);
-
-		tmp &= ~(DMA_ENABLE | DMA_ST_WRITE | DMA_BCNT_ENAB);
-		tmp |= DMA_FIFO_INV;
-		sbus_writel(tmp, esp->dregs + DMA_CSR);
-		tmp &= ~DMA_FIFO_INV;
-		sbus_writel(tmp, esp->dregs + DMA_CSR);
-	}
-}
-
-static inline void dma_flashclear(struct esp *esp)
-{
-	dma_drain(esp);
-	dma_invalidate(esp);
-}
-
-static int dma_can_transfer(struct esp *esp, struct scsi_cmnd *sp)
-{
-	__u32 base, end, sz;
-
-	if (esp->dma->revision == dvmarev3) {
-		sz = sp->SCp.this_residual;
-		if (sz > 0x1000000)
-			sz = 0x1000000;
-	} else {
-		base = ((__u32)((unsigned long)sp->SCp.ptr));
-		base &= (0x1000000 - 1);
-		end = (base + sp->SCp.this_residual);
-		if (end > 0x1000000)
-			end = 0x1000000;
-		sz = (end - base);
-	}
-	return sz;
-}
-
-/* Misc. esp helper macros. */
-#define esp_setcount(__eregs, __cnt, __hme) \
-	sbus_writeb(((__cnt)&0xff), (__eregs) + ESP_TCLOW); \
-	sbus_writeb((((__cnt)>>8)&0xff), (__eregs) + ESP_TCMED); \
-	if (__hme) { \
-		sbus_writeb((((__cnt)>>16)&0xff), (__eregs) + FAS_RLO); \
-		sbus_writeb(0, (__eregs) + FAS_RHI); \
-	}
-
-#define esp_getcount(__eregs, __hme) \
-	((sbus_readb((__eregs) + ESP_TCLOW)&0xff) | \
-	 ((sbus_readb((__eregs) + ESP_TCMED)&0xff) << 8) | \
-         ((__hme) ? sbus_readb((__eregs) + FAS_RLO) << 16 : 0))
-
-#define fcount(__esp) \
-	(((__esp)->erev == fashme) ? \
-	  (__esp)->hme_fifo_workaround_count : \
-	  sbus_readb(((__esp)->eregs) + ESP_FFLAGS) & ESP_FF_FBYTES)
-
-#define fnzero(__esp) \
-	(((__esp)->erev == fashme) ? 0 : \
-	 sbus_readb(((__esp)->eregs) + ESP_FFLAGS) & ESP_FF_ONOTZERO)
-
-/* XXX speculative nops unnecessary when continuing amidst a data phase
- * XXX even on esp100!!!  another case of flooding the bus with I/O reg
- * XXX writes...
- */
-#define esp_maybe_nop(__esp) \
-	if ((__esp)->erev == esp100) \
-		esp_cmd((__esp), ESP_CMD_NULL)
-
-#define sreg_to_dataphase(__sreg) \
-	((((__sreg) & ESP_STAT_PMASK) == ESP_DOP) ? in_dataout : in_datain)
-
-/* The ESP100 when in synchronous data phase, can mistake a long final
- * REQ pulse from the target as an extra byte, it places whatever is on
- * the data lines into the fifo.  For now, we will assume when this
- * happens that the target is a bit quirky and we don't want to
- * be talking synchronously to it anyways.  Regardless, we need to
- * tell the ESP to eat the extraneous byte so that we can proceed
- * to the next phase.
- */
-static int esp100_sync_hwbug(struct esp *esp, struct scsi_cmnd *sp, int fifocnt)
-{
-	/* Do not touch this piece of code. */
-	if ((!(esp->erev == esp100)) ||
-	    (!(sreg_datainp((esp->sreg = sbus_readb(esp->eregs + ESP_STATUS))) &&
-	       !fifocnt) &&
-	     !(sreg_dataoutp(esp->sreg) && !fnzero(esp)))) {
-		if (sp->SCp.phase == in_dataout)
-			esp_cmd(esp, ESP_CMD_FLUSH);
-		return 0;
-	} else {
-		/* Async mode for this guy. */
-		build_sync_nego_msg(esp, 0, 0);
-
-		/* Ack the bogus byte, but set ATN first. */
-		esp_cmd(esp, ESP_CMD_SATN);
-		esp_cmd(esp, ESP_CMD_MOK);
-		return 1;
-	}
-}
-
-/* This closes the window during a selection with a reselect pending, because
- * we use DMA for the selection process the FIFO should hold the correct
- * contents if we get reselected during this process.  So we just need to
- * ack the possible illegal cmd interrupt pending on the esp100.
- */
-static inline int esp100_reconnect_hwbug(struct esp *esp)
-{
-	u8 tmp;
-
-	if (esp->erev != esp100)
-		return 0;
-	tmp = sbus_readb(esp->eregs + ESP_INTRPT);
-	if (tmp & ESP_INTR_SR)
-		return 1;
-	return 0;
-}
-
-/* This verifies the BUSID bits during a reselection so that we know which
- * target is talking to us.
- */
-static inline int reconnect_target(struct esp *esp)
-{
-	int it, me = esp->scsi_id_mask, targ = 0;
-
-	if (2 != fcount(esp))
-		return -1;
-	if (esp->erev == fashme) {
-		/* HME does not latch it's own BUS ID bits during
-		 * a reselection.  Also the target number is given
-		 * as an unsigned char, not as a sole bit number
-		 * like the other ESP's do.
-		 * Happy Meal indeed....
-		 */
-		targ = esp->hme_fifo_workaround_buffer[0];
-	} else {
-		it = sbus_readb(esp->eregs + ESP_FDATA);
-		if (!(it & me))
-			return -1;
-		it &= ~me;
-		if (it & (it - 1))
-			return -1;
-		while (!(it & 1))
-			targ++, it >>= 1;
-	}
-	return targ;
-}
-
-/* This verifies the identify from the target so that we know which lun is
- * being reconnected.
- */
-static inline int reconnect_lun(struct esp *esp)
-{
-	int lun;
-
-	if ((esp->sreg & ESP_STAT_PMASK) != ESP_MIP)
-		return -1;
-	if (esp->erev == fashme)
-		lun = esp->hme_fifo_workaround_buffer[1];
-	else
-		lun = sbus_readb(esp->eregs + ESP_FDATA);
-
-	/* Yes, you read this correctly.  We report lun of zero
-	 * if we see parity error.  ESP reports parity error for
-	 * the lun byte, and this is the only way to hope to recover
-	 * because the target is connected.
-	 */
-	if (esp->sreg & ESP_STAT_PERR)
-		return 0;
-
-	/* Check for illegal bits being set in the lun. */
-	if ((lun & 0x40) || !(lun & 0x80))
-		return -1;
-
-	return lun & 7;
-}
-
-/* This puts the driver in a state where it can revitalize a command that
- * is being continued due to reselection.
- */
-static inline void esp_connect(struct esp *esp, struct scsi_cmnd *sp)
-{
-	struct esp_device *esp_dev = sp->device->hostdata;
-
-	if (esp->prev_soff  != esp_dev->sync_max_offset ||
-	    esp->prev_stp   != esp_dev->sync_min_period ||
-	    (esp->erev > esp100a &&
-	     esp->prev_cfg3 != esp->config3[sp->device->id])) {
-		esp->prev_soff = esp_dev->sync_max_offset;
-		esp->prev_stp = esp_dev->sync_min_period;
-		sbus_writeb(esp->prev_soff, esp->eregs + ESP_SOFF);
-		sbus_writeb(esp->prev_stp, esp->eregs + ESP_STP);
-		if (esp->erev > esp100a) {
-			esp->prev_cfg3 = esp->config3[sp->device->id];
-			sbus_writeb(esp->prev_cfg3, esp->eregs + ESP_CFG3);
-		}
-	}
-	esp->current_SC = sp;
-}
-
-/* This will place the current working command back into the issue queue
- * if we are to receive a reselection amidst a selection attempt.
- */
-static inline void esp_reconnect(struct esp *esp, struct scsi_cmnd *sp)
-{
-	if (!esp->disconnected_SC)
-		ESPLOG(("esp%d: Weird, being reselected but disconnected "
-			"command queue is empty.\n", esp->esp_id));
-	esp->snip = 0;
-	esp->current_SC = NULL;
-	sp->SCp.phase = not_issued;
-	append_SC(&esp->issue_SC, sp);
-}
-
-/* Begin message in phase. */
-static int esp_do_msgin(struct esp *esp)
-{
-	/* Must be very careful with the fifo on the HME */
-	if ((esp->erev != fashme) ||
-	    !(sbus_readb(esp->eregs + ESP_STATUS2) & ESP_STAT2_FEMPTY))
-		esp_cmd(esp, ESP_CMD_FLUSH);
-	esp_maybe_nop(esp);
-	esp_cmd(esp, ESP_CMD_TI);
-	esp->msgin_len = 1;
-	esp->msgin_ctr = 0;
-	esp_advance_phase(esp->current_SC, in_msgindone);
-	return do_work_bus;
-}
-
-/* This uses various DMA csr fields and the fifo flags count value to
- * determine how many bytes were successfully sent/received by the ESP.
- */
-static inline int esp_bytes_sent(struct esp *esp, int fifo_count)
-{
-	int rval = sbus_readl(esp->dregs + DMA_ADDR) - esp->esp_command_dvma;
-
-	if (esp->dma->revision == dvmarev1)
-		rval -= (4 - ((sbus_readl(esp->dregs + DMA_CSR) & DMA_READ_AHEAD)>>11));
-	return rval - fifo_count;
-}
-
-static inline void advance_sg(struct scsi_cmnd *sp)
-{
-	++sp->SCp.buffer;
-	--sp->SCp.buffers_residual;
-	sp->SCp.this_residual = sg_dma_len(sp->SCp.buffer);
-	sp->SCp.ptr = (char *)((unsigned long)sg_dma_address(sp->SCp.buffer));
-}
-
-/* Please note that the way I've coded these routines is that I _always_
- * check for a disconnect during any and all information transfer
- * phases.  The SCSI standard states that the target _can_ cause a BUS
- * FREE condition by dropping all MSG/CD/IO/BSY signals.  Also note
- * that during information transfer phases the target controls every
- * change in phase, the only thing the initiator can do is "ask" for
- * a message out phase by driving ATN true.  The target can, and sometimes
- * will, completely ignore this request so we cannot assume anything when
- * we try to force a message out phase to abort/reset a target.  Most of
- * the time the target will eventually be nice and go to message out, so
- * we may have to hold on to our state about what we want to tell the target
- * for some period of time.
- */
-
-/* I think I have things working here correctly.  Even partial transfers
- * within a buffer or sub-buffer should not upset us at all no matter
- * how bad the target and/or ESP fucks things up.
- */
-static int esp_do_data(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr = esp->current_SC;
-	int thisphase, hmuch;
-
-	ESPDATA(("esp_do_data: "));
-	esp_maybe_nop(esp);
-	thisphase = sreg_to_dataphase(esp->sreg);
-	esp_advance_phase(SCptr, thisphase);
-	ESPDATA(("newphase<%s> ", (thisphase == in_datain) ? "DATAIN" : "DATAOUT"));
-	hmuch = dma_can_transfer(esp, SCptr);
-	if (hmuch > (64 * 1024) && (esp->erev != fashme))
-		hmuch = (64 * 1024);
-	ESPDATA(("hmuch<%d> ", hmuch));
-	esp->current_transfer_size = hmuch;
-
-	if (esp->erev == fashme) {
-		u32 tmp = esp->prev_hme_dmacsr;
-
-		/* Always set the ESP count registers first. */
-		esp_setcount(esp->eregs, hmuch, 1);
-
-		/* Get the DMA csr computed. */
-		tmp |= (DMA_SCSI_DISAB | DMA_ENABLE);
-		if (thisphase == in_datain)
-			tmp |= DMA_ST_WRITE;
-		else
-			tmp &= ~(DMA_ST_WRITE);
-		esp->prev_hme_dmacsr = tmp;
-
-		ESPDATA(("DMA|TI --> do_intr_end\n"));
-		if (thisphase == in_datain) {
-			sbus_writel(hmuch, esp->dregs + DMA_COUNT);
-			esp_cmd(esp, ESP_CMD_DMA | ESP_CMD_TI);
-		} else {
-			esp_cmd(esp, ESP_CMD_DMA | ESP_CMD_TI);
-			sbus_writel(hmuch, esp->dregs + DMA_COUNT);
-		}
-		sbus_writel((__u32)((unsigned long)SCptr->SCp.ptr), esp->dregs+DMA_ADDR);
-		sbus_writel(esp->prev_hme_dmacsr, esp->dregs + DMA_CSR);
-	} else {
-		esp_setcount(esp->eregs, hmuch, 0);
-		dma_setup(esp, ((__u32)((unsigned long)SCptr->SCp.ptr)),
-			  hmuch, (thisphase == in_datain));
-		ESPDATA(("DMA|TI --> do_intr_end\n"));
-		esp_cmd(esp, ESP_CMD_DMA | ESP_CMD_TI);
-	}
-	return do_intr_end;
-}
-
-/* See how successful the data transfer was. */
-static int esp_do_data_finale(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr = esp->current_SC;
-	struct esp_device *esp_dev = SCptr->device->hostdata;
-	int bogus_data = 0, bytes_sent = 0, fifocnt, ecount = 0;
-
-	ESPDATA(("esp_do_data_finale: "));
-
-	if (SCptr->SCp.phase == in_datain) {
-		if (esp->sreg & ESP_STAT_PERR) {
-			/* Yuck, parity error.  The ESP asserts ATN
-			 * so that we can go to message out phase
-			 * immediately and inform the target that
-			 * something bad happened.
-			 */
-			ESPLOG(("esp%d: data bad parity detected.\n",
-				esp->esp_id));
-			esp->cur_msgout[0] = INITIATOR_ERROR;
-			esp->msgout_len = 1;
-		}
-		dma_drain(esp);
-	}
-	dma_invalidate(esp);
-
-	/* This could happen for the above parity error case. */
-	if (esp->ireg != ESP_INTR_BSERV) {
-		/* Please go to msgout phase, please please please... */
-		ESPLOG(("esp%d: !BSERV after data, probably to msgout\n",
-			esp->esp_id));
-		return esp_do_phase_determine(esp);
-	}	
-
-	/* Check for partial transfers and other horrible events.
-	 * Note, here we read the real fifo flags register even
-	 * on HME broken adapters because we skip the HME fifo
-	 * workaround code in esp_handle() if we are doing data
-	 * phase things.  We don't want to fuck directly with
-	 * the fifo like that, especially if doing synchronous
-	 * transfers!  Also, will need to double the count on
-	 * HME if we are doing wide transfers, as the HME fifo
-	 * will move and count 16-bit quantities during wide data.
-	 * SMCC _and_ Qlogic can both bite me.
-	 */
-	fifocnt = (sbus_readb(esp->eregs + ESP_FFLAGS) & ESP_FF_FBYTES);
-	if (esp->erev != fashme)
-		ecount = esp_getcount(esp->eregs, 0);
-	bytes_sent = esp->current_transfer_size;
-
-	ESPDATA(("trans_sz(%d), ", bytes_sent));
-	if (esp->erev == fashme) {
-		if (!(esp->sreg & ESP_STAT_TCNT)) {
-			ecount = esp_getcount(esp->eregs, 1);
-			bytes_sent -= ecount;
-		}
-
-		/* Always subtract any cruft remaining in the FIFO. */
-		if (esp->prev_cfg3 & ESP_CONFIG3_EWIDE)
-			fifocnt <<= 1;
-		if (SCptr->SCp.phase == in_dataout)
-			bytes_sent -= fifocnt;
-
-		/* I have an IBM disk which exhibits the following
-		 * behavior during writes to it.  It disconnects in
-		 * the middle of a partial transfer, the current sglist
-		 * buffer is 1024 bytes, the disk stops data transfer
-		 * at 512 bytes.
-		 *
-		 * However the FAS366 reports that 32 more bytes were
-		 * transferred than really were.  This is precisely
-		 * the size of a fully loaded FIFO in wide scsi mode.
-		 * The FIFO state recorded indicates that it is empty.
-		 *
-		 * I have no idea if this is a bug in the FAS366 chip
-		 * or a bug in the firmware on this IBM disk.  In any
-		 * event the following seems to be a good workaround.  -DaveM
-		 */
-		if (bytes_sent != esp->current_transfer_size &&
-		    SCptr->SCp.phase == in_dataout) {
-			int mask = (64 - 1);
-
-			if ((esp->prev_cfg3 & ESP_CONFIG3_EWIDE) == 0)
-				mask >>= 1;
-
-			if (bytes_sent & mask)
-				bytes_sent -= (bytes_sent & mask);
-		}
-	} else {
-		if (!(esp->sreg & ESP_STAT_TCNT))
-			bytes_sent -= ecount;
-		if (SCptr->SCp.phase == in_dataout)
-			bytes_sent -= fifocnt;
-	}
-
-	ESPDATA(("bytes_sent(%d), ", bytes_sent));
-
-	/* If we were in synchronous mode, check for peculiarities. */
-	if (esp->erev == fashme) {
-		if (esp_dev->sync_max_offset) {
-			if (SCptr->SCp.phase == in_dataout)
-				esp_cmd(esp, ESP_CMD_FLUSH);
-		} else {
-			esp_cmd(esp, ESP_CMD_FLUSH);
-		}
-	} else {
-		if (esp_dev->sync_max_offset)
-			bogus_data = esp100_sync_hwbug(esp, SCptr, fifocnt);
-		else
-			esp_cmd(esp, ESP_CMD_FLUSH);
-	}
-
-	/* Until we are sure of what has happened, we are certainly
-	 * in the dark.
-	 */
-	esp_advance_phase(SCptr, in_the_dark);
-
-	if (bytes_sent < 0) {
-		/* I've seen this happen due to lost state in this
-		 * driver.  No idea why it happened, but allowing
-		 * this value to be negative caused things to
-		 * lock up.  This allows greater chance of recovery.
-		 * In fact every time I've seen this, it has been
-		 * a driver bug without question.
-		 */
-		ESPLOG(("esp%d: yieee, bytes_sent < 0!\n", esp->esp_id));
-		ESPLOG(("esp%d: csz=%d fifocount=%d ecount=%d\n",
-			esp->esp_id,
-			esp->current_transfer_size, fifocnt, ecount));
-		ESPLOG(("esp%d: use_sg=%d ptr=%p this_residual=%d\n",
-			esp->esp_id,
-			SCptr->use_sg, SCptr->SCp.ptr, SCptr->SCp.this_residual));
-		ESPLOG(("esp%d: Forcing async for target %d\n", esp->esp_id, 
-			SCptr->device->id));
-		SCptr->device->borken = 1;
-		esp_dev->sync = 0;
-		bytes_sent = 0;
-	}
-
-	/* Update the state of our transfer. */
-	SCptr->SCp.ptr += bytes_sent;
-	SCptr->SCp.this_residual -= bytes_sent;
-	if (SCptr->SCp.this_residual < 0) {
-		/* shit */
-		ESPLOG(("esp%d: Data transfer overrun.\n", esp->esp_id));
-		SCptr->SCp.this_residual = 0;
-	}
-
-	/* Maybe continue. */
-	if (!bogus_data) {
-		ESPDATA(("!bogus_data, "));
-
-		/* NO MATTER WHAT, we advance the scatterlist,
-		 * if the target should decide to disconnect
-		 * in between scatter chunks (which is common)
-		 * we could die horribly!  I used to have the sg
-		 * advance occur only if we are going back into
-		 * (or are staying in) a data phase, you can
-		 * imagine the hell I went through trying to
-		 * figure this out.
-		 */
-		if (SCptr->use_sg && !SCptr->SCp.this_residual)
-			advance_sg(SCptr);
-		if (sreg_datainp(esp->sreg) || sreg_dataoutp(esp->sreg)) {
-			ESPDATA(("to more data\n"));
-			return esp_do_data(esp);
-		}
-		ESPDATA(("to new phase\n"));
-		return esp_do_phase_determine(esp);
-	}
-	/* Bogus data, just wait for next interrupt. */
-	ESPLOG(("esp%d: bogus_data during end of data phase\n",
-		esp->esp_id));
-	return do_intr_end;
-}
-
-/* We received a non-good status return at the end of
- * running a SCSI command.  This is used to decide if
- * we should clear our synchronous transfer state for
- * such a device when that happens.
- *
- * The idea is that when spinning up a disk or rewinding
- * a tape, we don't want to go into a loop re-negotiating
- * synchronous capabilities over and over.
- */
-static int esp_should_clear_sync(struct scsi_cmnd *sp)
-{
-	u8 cmd = sp->cmnd[0];
-
-	/* These cases are for spinning up a disk and
-	 * waiting for that spinup to complete.
-	 */
-	if (cmd == START_STOP)
-		return 0;
-
-	if (cmd == TEST_UNIT_READY)
-		return 0;
-
-	/* One more special case for SCSI tape drives,
-	 * this is what is used to probe the device for
-	 * completion of a rewind or tape load operation.
-	 */
-	if (sp->device->type == TYPE_TAPE) {
-		if (cmd == MODE_SENSE)
-			return 0;
-	}
-
-	return 1;
-}
-
-/* Either a command is completing or a target is dropping off the bus
- * to continue the command in the background so we can do other work.
- */
-static int esp_do_freebus(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr = esp->current_SC;
-	struct esp_device *esp_dev = SCptr->device->hostdata;
-	int rval;
-
-	rval = skipahead2(esp, SCptr, in_status, in_msgindone, in_freeing);
-	if (rval)
-		return rval;
-	if (esp->ireg != ESP_INTR_DC) {
-		ESPLOG(("esp%d: Target will not disconnect\n", esp->esp_id));
-		return do_reset_bus; /* target will not drop BSY... */
-	}
-	esp->msgout_len = 0;
-	esp->prevmsgout = NOP;
-	if (esp->prevmsgin == COMMAND_COMPLETE) {
-		/* Normal end of nexus. */
-		if (esp->disconnected_SC || (esp->erev == fashme))
-			esp_cmd(esp, ESP_CMD_ESEL);
-
-		if (SCptr->SCp.Status != GOOD &&
-		    SCptr->SCp.Status != CONDITION_GOOD &&
-		    ((1<<SCptr->device->id) & esp->targets_present) &&
-		    esp_dev->sync &&
-		    esp_dev->sync_max_offset) {
-			/* SCSI standard says that the synchronous capabilities
-			 * should be renegotiated at this point.  Most likely
-			 * we are about to request sense from this target
-			 * in which case we want to avoid using sync
-			 * transfers until we are sure of the current target
-			 * state.
-			 */
-			ESPMISC(("esp: Status <%d> for target %d lun %d\n",
-				 SCptr->SCp.Status, SCptr->device->id, SCptr->device->lun));
-
-			/* But don't do this when spinning up a disk at
-			 * boot time while we poll for completion as it
-			 * fills up the console with messages.  Also, tapes
-			 * can report not ready many times right after
-			 * loading up a tape.
-			 */
-			if (esp_should_clear_sync(SCptr) != 0)
-				esp_dev->sync = 0;
-		}
-		ESPDISC(("F<%02x,%02x>", SCptr->device->id, SCptr->device->lun));
-		esp_done(esp, ((SCptr->SCp.Status & 0xff) |
-			       ((SCptr->SCp.Message & 0xff)<<8) |
-			       (DID_OK << 16)));
-	} else if (esp->prevmsgin == DISCONNECT) {
-		/* Normal disconnect. */
-		esp_cmd(esp, ESP_CMD_ESEL);
-		ESPDISC(("D<%02x,%02x>", SCptr->device->id, SCptr->device->lun));
-		append_SC(&esp->disconnected_SC, SCptr);
-		esp->current_SC = NULL;
-		if (esp->issue_SC)
-			esp_exec_cmd(esp);
-	} else {
-		/* Driver bug, we do not expect a disconnect here
-		 * and should not have advanced the state engine
-		 * to in_freeing.
-		 */
-		ESPLOG(("esp%d: last msg not disc and not cmd cmplt.\n",
-			esp->esp_id));
-		return do_reset_bus;
-	}
-	return do_intr_end;
-}
-
-/* When a reselect occurs, and we cannot find the command to
- * reconnect to in our queues, we do this.
- */
-static int esp_bad_reconnect(struct esp *esp)
-{
-	struct scsi_cmnd *sp;
-
-	ESPLOG(("esp%d: Eieeee, reconnecting unknown command!\n",
-		esp->esp_id));
-	ESPLOG(("QUEUE DUMP\n"));
-	sp = esp->issue_SC;
-	ESPLOG(("esp%d: issue_SC[", esp->esp_id));
-	while (sp) {
-		ESPLOG(("<%02x,%02x>", sp->device->id, sp->device->lun));
-		sp = (struct scsi_cmnd *) sp->host_scribble;
-	}
-	ESPLOG(("]\n"));
-	sp = esp->current_SC;
-	ESPLOG(("esp%d: current_SC[", esp->esp_id));
-	if (sp)
-		ESPLOG(("<%02x,%02x>", sp->device->id, sp->device->lun));
-	else
-		ESPLOG(("<NULL>"));
-	ESPLOG(("]\n"));
-	sp = esp->disconnected_SC;
-	ESPLOG(("esp%d: disconnected_SC[", esp->esp_id));
-	while (sp) {
-		ESPLOG(("<%02x,%02x>", sp->device->id, sp->device->lun));
-		sp = (struct scsi_cmnd *) sp->host_scribble;
-	}
-	ESPLOG(("]\n"));
-	return do_reset_bus;
-}
-
-/* Do the needy when a target tries to reconnect to us. */
-static int esp_do_reconnect(struct esp *esp)
-{
-	int lun, target;
-	struct scsi_cmnd *SCptr;
-
-	/* Check for all bogus conditions first. */
-	target = reconnect_target(esp);
-	if (target < 0) {
-		ESPDISC(("bad bus bits\n"));
-		return do_reset_bus;
-	}
-	lun = reconnect_lun(esp);
-	if (lun < 0) {
-		ESPDISC(("target=%2x, bad identify msg\n", target));
-		return do_reset_bus;
-	}
-
-	/* Things look ok... */
-	ESPDISC(("R<%02x,%02x>", target, lun));
-
-	/* Must not flush FIFO or DVMA on HME. */
-	if (esp->erev != fashme) {
-		esp_cmd(esp, ESP_CMD_FLUSH);
-		if (esp100_reconnect_hwbug(esp))
-			return do_reset_bus;
-		esp_cmd(esp, ESP_CMD_NULL);
-	}
-
-	SCptr = remove_SC(&esp->disconnected_SC, (u8) target, (u8) lun);
-	if (!SCptr)
-		return esp_bad_reconnect(esp);
-
-	esp_connect(esp, SCptr);
-	esp_cmd(esp, ESP_CMD_MOK);
-
-	if (esp->erev == fashme)
-		sbus_writeb(((SCptr->device->id & 0xf) |
-			     (ESP_BUSID_RESELID | ESP_BUSID_CTR32BIT)),
-			    esp->eregs + ESP_BUSID);
-
-	/* Reconnect implies a restore pointers operation. */
-	esp_restore_pointers(esp, SCptr);
-
-	esp->snip = 0;
-	esp_advance_phase(SCptr, in_the_dark);
-	return do_intr_end;
-}
-
-/* End of NEXUS (hopefully), pick up status + message byte then leave if
- * all goes well.
- */
-static int esp_do_status(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr = esp->current_SC;
-	int intr, rval;
-
-	rval = skipahead1(esp, SCptr, in_the_dark, in_status);
-	if (rval)
-		return rval;
-	intr = esp->ireg;
-	ESPSTAT(("esp_do_status: "));
-	if (intr != ESP_INTR_DC) {
-		int message_out = 0; /* for parity problems */
-
-		/* Ack the message. */
-		ESPSTAT(("ack msg, "));
-		esp_cmd(esp, ESP_CMD_MOK);
-
-		if (esp->erev != fashme) {
-			dma_flashclear(esp);
-
-			/* Wait till the first bits settle. */
-			while (esp->esp_command[0] == 0xff)
-				udelay(1);
-		} else {
-			esp->esp_command[0] = esp->hme_fifo_workaround_buffer[0];
-			esp->esp_command[1] = esp->hme_fifo_workaround_buffer[1];
-		}
-
-		ESPSTAT(("got something, "));
-		/* ESP chimes in with one of
-		 *
-		 * 1) function done interrupt:
-		 *	both status and message in bytes
-		 *	are available
-		 *
-		 * 2) bus service interrupt:
-		 *	only status byte was acquired
-		 *
-		 * 3) Anything else:
-		 *	can't happen, but we test for it
-		 *	anyways
-		 *
-		 * ALSO: If bad parity was detected on either
-		 *       the status _or_ the message byte then
-		 *       the ESP has asserted ATN on the bus
-		 *       and we must therefore wait for the
-		 *       next phase change.
-		 */
-		if (intr & ESP_INTR_FDONE) {
-			/* We got it all, hallejulia. */
-			ESPSTAT(("got both, "));
-			SCptr->SCp.Status = esp->esp_command[0];
-			SCptr->SCp.Message = esp->esp_command[1];
-			esp->prevmsgin = SCptr->SCp.Message;
-			esp->cur_msgin[0] = SCptr->SCp.Message;
-			if (esp->sreg & ESP_STAT_PERR) {
-				/* There was bad parity for the
-				 * message byte, the status byte
-				 * was ok.
-				 */
-				message_out = MSG_PARITY_ERROR;
-			}
-		} else if (intr == ESP_INTR_BSERV) {
-			/* Only got status byte. */
-			ESPLOG(("esp%d: got status only, ", esp->esp_id));
-			if (!(esp->sreg & ESP_STAT_PERR)) {
-				SCptr->SCp.Status = esp->esp_command[0];
-				SCptr->SCp.Message = 0xff;
-			} else {
-				/* The status byte had bad parity.
-				 * we leave the scsi_pointer Status
-				 * field alone as we set it to a default
-				 * of CHECK_CONDITION in esp_queue.
-				 */
-				message_out = INITIATOR_ERROR;
-			}
-		} else {
-			/* This shouldn't happen ever. */
-			ESPSTAT(("got bolixed\n"));
-			esp_advance_phase(SCptr, in_the_dark);
-			return esp_do_phase_determine(esp);
-		}
-
-		if (!message_out) {
-			ESPSTAT(("status=%2x msg=%2x, ", SCptr->SCp.Status,
-				SCptr->SCp.Message));
-			if (SCptr->SCp.Message == COMMAND_COMPLETE) {
-				ESPSTAT(("and was COMMAND_COMPLETE\n"));
-				esp_advance_phase(SCptr, in_freeing);
-				return esp_do_freebus(esp);
-			} else {
-				ESPLOG(("esp%d: and _not_ COMMAND_COMPLETE\n",
-					esp->esp_id));
-				esp->msgin_len = esp->msgin_ctr = 1;
-				esp_advance_phase(SCptr, in_msgindone);
-				return esp_do_msgindone(esp);
-			}
-		} else {
-			/* With luck we'll be able to let the target
-			 * know that bad parity happened, it will know
-			 * which byte caused the problems and send it
-			 * again.  For the case where the status byte
-			 * receives bad parity, I do not believe most
-			 * targets recover very well.  We'll see.
-			 */
-			ESPLOG(("esp%d: bad parity somewhere mout=%2x\n",
-				esp->esp_id, message_out));
-			esp->cur_msgout[0] = message_out;
-			esp->msgout_len = esp->msgout_ctr = 1;
-			esp_advance_phase(SCptr, in_the_dark);
-			return esp_do_phase_determine(esp);
-		}
-	} else {
-		/* If we disconnect now, all hell breaks loose. */
-		ESPLOG(("esp%d: whoops, disconnect\n", esp->esp_id));
-		esp_advance_phase(SCptr, in_the_dark);
-		return esp_do_phase_determine(esp);
-	}
-}
-
-static int esp_enter_status(struct esp *esp)
-{
-	u8 thecmd = ESP_CMD_ICCSEQ;
-
-	esp_cmd(esp, ESP_CMD_FLUSH);
-	if (esp->erev != fashme) {
-		u32 tmp;
-
-		esp->esp_command[0] = esp->esp_command[1] = 0xff;
-		sbus_writeb(2, esp->eregs + ESP_TCLOW);
-		sbus_writeb(0, esp->eregs + ESP_TCMED);
-		tmp = sbus_readl(esp->dregs + DMA_CSR);
-		tmp |= (DMA_ST_WRITE | DMA_ENABLE);
-		sbus_writel(tmp, esp->dregs + DMA_CSR);
-		if (esp->dma->revision == dvmaesc1)
-			sbus_writel(0x100, esp->dregs + DMA_COUNT);
-		sbus_writel(esp->esp_command_dvma, esp->dregs + DMA_ADDR);
-		thecmd |= ESP_CMD_DMA;
-	}
-	esp_cmd(esp, thecmd);
-	esp_advance_phase(esp->current_SC, in_status);
-
-	return esp_do_status(esp);
-}
-
-static int esp_disconnect_amidst_phases(struct esp *esp)
-{
-	struct scsi_cmnd *sp = esp->current_SC;
-	struct esp_device *esp_dev = sp->device->hostdata;
-
-	/* This means real problems if we see this
-	 * here.  Unless we were actually trying
-	 * to force the device to abort/reset.
-	 */
-	ESPLOG(("esp%d Disconnect amidst phases, ", esp->esp_id));
-	ESPLOG(("pphase<%s> cphase<%s>, ",
-		phase_string(sp->SCp.phase),
-		phase_string(sp->SCp.sent_command)));
-
-	if (esp->disconnected_SC != NULL || (esp->erev == fashme))
-		esp_cmd(esp, ESP_CMD_ESEL);
-
-	switch (esp->cur_msgout[0]) {
-	default:
-		/* We didn't expect this to happen at all. */
-		ESPLOG(("device is bolixed\n"));
-		esp_advance_phase(sp, in_tgterror);
-		esp_done(esp, (DID_ERROR << 16));
-		break;
-
-	case BUS_DEVICE_RESET:
-		ESPLOG(("device reset successful\n"));
-		esp_dev->sync_max_offset = 0;
-		esp_dev->sync_min_period = 0;
-		esp_dev->sync = 0;
-		esp_advance_phase(sp, in_resetdev);
-		esp_done(esp, (DID_RESET << 16));
-		break;
-
-	case ABORT:
-		ESPLOG(("device abort successful\n"));
-		esp_advance_phase(sp, in_abortone);
-		esp_done(esp, (DID_ABORT << 16));
-		break;
-
-	};
-	return do_intr_end;
-}
-
-static int esp_enter_msgout(struct esp *esp)
-{
-	esp_advance_phase(esp->current_SC, in_msgout);
-	return esp_do_msgout(esp);
-}
-
-static int esp_enter_msgin(struct esp *esp)
-{
-	esp_advance_phase(esp->current_SC, in_msgin);
-	return esp_do_msgin(esp);
-}
-
-static int esp_enter_cmd(struct esp *esp)
-{
-	esp_advance_phase(esp->current_SC, in_cmdbegin);
-	return esp_do_cmdbegin(esp);
-}
-
-static int esp_enter_badphase(struct esp *esp)
-{
-	ESPLOG(("esp%d: Bizarre bus phase %2x.\n", esp->esp_id,
-		esp->sreg & ESP_STAT_PMASK));
-	return do_reset_bus;
-}
-
-typedef int (*espfunc_t)(struct esp *);
-
-static espfunc_t phase_vector[] = {
-	esp_do_data,		/* ESP_DOP */
-	esp_do_data,		/* ESP_DIP */
-	esp_enter_cmd,		/* ESP_CMDP */
-	esp_enter_status,	/* ESP_STATP */
-	esp_enter_badphase,	/* ESP_STAT_PMSG */
-	esp_enter_badphase,	/* ESP_STAT_PMSG | ESP_STAT_PIO */
-	esp_enter_msgout,	/* ESP_MOP */
-	esp_enter_msgin,	/* ESP_MIP */
-};
-
-/* The target has control of the bus and we have to see where it has
- * taken us.
- */
-static int esp_do_phase_determine(struct esp *esp)
-{
-	if ((esp->ireg & ESP_INTR_DC) != 0)
-		return esp_disconnect_amidst_phases(esp);
-	return phase_vector[esp->sreg & ESP_STAT_PMASK](esp);
-}
-
-/* First interrupt after exec'ing a cmd comes here. */
-static int esp_select_complete(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr = esp->current_SC;
-	struct esp_device *esp_dev = SCptr->device->hostdata;
-	int cmd_bytes_sent, fcnt;
-
-	if (esp->erev != fashme)
-		esp->seqreg = (sbus_readb(esp->eregs + ESP_SSTEP) & ESP_STEP_VBITS);
-
-	if (esp->erev == fashme)
-		fcnt = esp->hme_fifo_workaround_count;
-	else
-		fcnt = (sbus_readb(esp->eregs + ESP_FFLAGS) & ESP_FF_FBYTES);
-
-	cmd_bytes_sent = esp_bytes_sent(esp, fcnt);
-	dma_invalidate(esp);
-
-	/* Let's check to see if a reselect happened
-	 * while we we're trying to select.  This must
-	 * be checked first.
-	 */
-	if (esp->ireg == (ESP_INTR_RSEL | ESP_INTR_FDONE)) {
-		esp_reconnect(esp, SCptr);
-		return esp_do_reconnect(esp);
-	}
-
-	/* Looks like things worked, we should see a bus service &
-	 * a function complete interrupt at this point.  Note we
-	 * are doing a direct comparison because we don't want to
-	 * be fooled into thinking selection was successful if
-	 * ESP_INTR_DC is set, see below.
-	 */
-	if (esp->ireg == (ESP_INTR_FDONE | ESP_INTR_BSERV)) {
-		/* target speaks... */
-		esp->targets_present |= (1<<SCptr->device->id);
-
-		/* What if the target ignores the sdtr? */
-		if (esp->snip)
-			esp_dev->sync = 1;
-
-		/* See how far, if at all, we got in getting
-		 * the information out to the target.
-		 */
-		switch (esp->seqreg) {
-		default:
-
-		case ESP_STEP_ASEL:
-			/* Arbitration won, target selected, but
-			 * we are in some phase which is not command
-			 * phase nor is it message out phase.
-			 *
-			 * XXX We've confused the target, obviously.
-			 * XXX So clear it's state, but we also end
-			 * XXX up clearing everyone elses.  That isn't
-			 * XXX so nice.  I'd like to just reset this
-			 * XXX target, but if I cannot even get it's
-			 * XXX attention and finish selection to talk
-			 * XXX to it, there is not much more I can do.
-			 * XXX If we have a loaded bus we're going to
-			 * XXX spend the next second or so renegotiating
-			 * XXX for synchronous transfers.
-			 */
-			ESPLOG(("esp%d: STEP_ASEL for tgt %d\n",
-				esp->esp_id, SCptr->device->id));
-
-		case ESP_STEP_SID:
-			/* Arbitration won, target selected, went
-			 * to message out phase, sent one message
-			 * byte, then we stopped.  ATN is asserted
-			 * on the SCSI bus and the target is still
-			 * there hanging on.  This is a legal
-			 * sequence step if we gave the ESP a select
-			 * and stop command.
-			 *
-			 * XXX See above, I could set the borken flag
-			 * XXX in the device struct and retry the
-			 * XXX command.  But would that help for
-			 * XXX tagged capable targets?
-			 */
-
-		case ESP_STEP_NCMD:
-			/* Arbitration won, target selected, maybe
-			 * sent the one message byte in message out
-			 * phase, but we did not go to command phase
-			 * in the end.  Actually, we could have sent
-			 * only some of the message bytes if we tried
-			 * to send out the entire identify and tag
-			 * message using ESP_CMD_SA3.
-			 */
-			cmd_bytes_sent = 0;
-			break;
-
-		case ESP_STEP_PPC:
-			/* No, not the powerPC pinhead.  Arbitration
-			 * won, all message bytes sent if we went to
-			 * message out phase, went to command phase
-			 * but only part of the command was sent.
-			 *
-			 * XXX I've seen this, but usually in conjunction
-			 * XXX with a gross error which appears to have
-			 * XXX occurred between the time I told the
-			 * XXX ESP to arbitrate and when I got the
-			 * XXX interrupt.  Could I have misloaded the
-			 * XXX command bytes into the fifo?  Actually,
-			 * XXX I most likely missed a phase, and therefore
-			 * XXX went into never never land and didn't even
-			 * XXX know it.  That was the old driver though.
-			 * XXX What is even more peculiar is that the ESP
-			 * XXX showed the proper function complete and
-			 * XXX bus service bits in the interrupt register.
-			 */
-
-		case ESP_STEP_FINI4:
-		case ESP_STEP_FINI5:
-		case ESP_STEP_FINI6:
-		case ESP_STEP_FINI7:
-			/* Account for the identify message */
-			if (SCptr->SCp.phase == in_slct_norm)
-				cmd_bytes_sent -= 1;
-		};
-
-		if (esp->erev != fashme)
-			esp_cmd(esp, ESP_CMD_NULL);
-
-		/* Be careful, we could really get fucked during synchronous
-		 * data transfers if we try to flush the fifo now.
-		 */
-		if ((esp->erev != fashme) && /* not a Happy Meal and... */
-		    !fcnt && /* Fifo is empty and... */
-		    /* either we are not doing synchronous transfers or... */
-		    (!esp_dev->sync_max_offset ||
-		     /* We are not going into data in phase. */
-		     ((esp->sreg & ESP_STAT_PMASK) != ESP_DIP)))
-			esp_cmd(esp, ESP_CMD_FLUSH); /* flush is safe */
-
-		/* See how far we got if this is not a slow command. */
-		if (!esp->esp_slowcmd) {
-			if (cmd_bytes_sent < 0)
-				cmd_bytes_sent = 0;
-			if (cmd_bytes_sent != SCptr->cmd_len) {
-				/* Crapola, mark it as a slowcmd
-				 * so that we have some chance of
-				 * keeping the command alive with
-				 * good luck.
-				 *
-				 * XXX Actually, if we didn't send it all
-				 * XXX this means either we didn't set things
-				 * XXX up properly (driver bug) or the target
-				 * XXX or the ESP detected parity on one of
-				 * XXX the command bytes.  This makes much
-				 * XXX more sense, and therefore this code
-				 * XXX should be changed to send out a
-				 * XXX parity error message or if the status
-				 * XXX register shows no parity error then
-				 * XXX just expect the target to bring the
-				 * XXX bus into message in phase so that it
-				 * XXX can send us the parity error message.
-				 * XXX SCSI sucks...
-				 */
-				esp->esp_slowcmd = 1;
-				esp->esp_scmdp = &(SCptr->cmnd[cmd_bytes_sent]);
-				esp->esp_scmdleft = (SCptr->cmd_len - cmd_bytes_sent);
-			}
-		}
-
-		/* Now figure out where we went. */
-		esp_advance_phase(SCptr, in_the_dark);
-		return esp_do_phase_determine(esp);
-	}
-
-	/* Did the target even make it? */
-	if (esp->ireg == ESP_INTR_DC) {
-		/* wheee... nobody there or they didn't like
-		 * what we told it to do, clean up.
-		 */
-
-		/* If anyone is off the bus, but working on
-		 * a command in the background for us, tell
-		 * the ESP to listen for them.
-		 */
-		if (esp->disconnected_SC)
-			esp_cmd(esp, ESP_CMD_ESEL);
-
-		if (((1<<SCptr->device->id) & esp->targets_present) &&
-		    esp->seqreg != 0 &&
-		    (esp->cur_msgout[0] == EXTENDED_MESSAGE) &&
-		    (SCptr->SCp.phase == in_slct_msg ||
-		     SCptr->SCp.phase == in_slct_stop)) {
-			/* shit */
-			esp->snip = 0;
-			ESPLOG(("esp%d: Failed synchronous negotiation for target %d "
-				"lun %d\n", esp->esp_id, SCptr->device->id, SCptr->device->lun));
-			esp_dev->sync_max_offset = 0;
-			esp_dev->sync_min_period = 0;
-			esp_dev->sync = 1; /* so we don't negotiate again */
-
-			/* Run the command again, this time though we
-			 * won't try to negotiate for synchronous transfers.
-			 *
-			 * XXX I'd like to do something like send an
-			 * XXX INITIATOR_ERROR or ABORT message to the
-			 * XXX target to tell it, "Sorry I confused you,
-			 * XXX please come back and I will be nicer next
-			 * XXX time".  But that requires having the target
-			 * XXX on the bus, and it has dropped BSY on us.
-			 */
-			esp->current_SC = NULL;
-			esp_advance_phase(SCptr, not_issued);
-			prepend_SC(&esp->issue_SC, SCptr);
-			esp_exec_cmd(esp);
-			return do_intr_end;
-		}
-
-		/* Ok, this is normal, this is what we see during boot
-		 * or whenever when we are scanning the bus for targets.
-		 * But first make sure that is really what is happening.
-		 */
-		if (((1<<SCptr->device->id) & esp->targets_present)) {
-			ESPLOG(("esp%d: Warning, live target %d not responding to "
-				"selection.\n", esp->esp_id, SCptr->device->id));
-
-			/* This _CAN_ happen.  The SCSI standard states that
-			 * the target is to _not_ respond to selection if
-			 * _it_ detects bad parity on the bus for any reason.
-			 * Therefore, we assume that if we've talked successfully
-			 * to this target before, bad parity is the problem.
-			 */
-			esp_done(esp, (DID_PARITY << 16));
-		} else {
-			/* Else, there really isn't anyone there. */
-			ESPMISC(("esp: selection failure, maybe nobody there?\n"));
-			ESPMISC(("esp: target %d lun %d\n",
-				 SCptr->device->id, SCptr->device->lun));
-			esp_done(esp, (DID_BAD_TARGET << 16));
-		}
-		return do_intr_end;
-	}
-
-	ESPLOG(("esp%d: Selection failure.\n", esp->esp_id));
-	printk("esp%d: Currently -- ", esp->esp_id);
-	esp_print_ireg(esp->ireg); printk(" ");
-	esp_print_statreg(esp->sreg); printk(" ");
-	esp_print_seqreg(esp->seqreg); printk("\n");
-	printk("esp%d: New -- ", esp->esp_id);
-	esp->sreg = sbus_readb(esp->eregs + ESP_STATUS);
-	esp->seqreg = sbus_readb(esp->eregs + ESP_SSTEP);
-	esp->ireg = sbus_readb(esp->eregs + ESP_INTRPT);
-	esp_print_ireg(esp->ireg); printk(" ");
-	esp_print_statreg(esp->sreg); printk(" ");
-	esp_print_seqreg(esp->seqreg); printk("\n");
-	ESPLOG(("esp%d: resetting bus\n", esp->esp_id));
-	return do_reset_bus; /* ugh... */
-}
-
-/* Continue reading bytes for msgin phase. */
-static int esp_do_msgincont(struct esp *esp)
-{
-	if (esp->ireg & ESP_INTR_BSERV) {
-		/* in the right phase too? */
-		if ((esp->sreg & ESP_STAT_PMASK) == ESP_MIP) {
-			/* phew... */
-			esp_cmd(esp, ESP_CMD_TI);
-			esp_advance_phase(esp->current_SC, in_msgindone);
-			return do_intr_end;
-		}
-
-		/* We changed phase but ESP shows bus service,
-		 * in this case it is most likely that we, the
-		 * hacker who has been up for 20hrs straight
-		 * staring at the screen, drowned in coffee
-		 * smelling like retched cigarette ashes
-		 * have miscoded something..... so, try to
-		 * recover as best we can.
-		 */
-		ESPLOG(("esp%d: message in mis-carriage.\n", esp->esp_id));
-	}
-	esp_advance_phase(esp->current_SC, in_the_dark);
-	return do_phase_determine;
-}
-
-static int check_singlebyte_msg(struct esp *esp)
-{
-	esp->prevmsgin = esp->cur_msgin[0];
-	if (esp->cur_msgin[0] & 0x80) {
-		/* wheee... */
-		ESPLOG(("esp%d: target sends identify amidst phases\n",
-			esp->esp_id));
-		esp_advance_phase(esp->current_SC, in_the_dark);
-		return 0;
-	} else if (((esp->cur_msgin[0] & 0xf0) == 0x20) ||
-		   (esp->cur_msgin[0] == EXTENDED_MESSAGE)) {
-		esp->msgin_len = 2;
-		esp_advance_phase(esp->current_SC, in_msgincont);
-		return 0;
-	}
-	esp_advance_phase(esp->current_SC, in_the_dark);
-	switch (esp->cur_msgin[0]) {
-	default:
-		/* We don't want to hear about it. */
-		ESPLOG(("esp%d: msg %02x which we don't know about\n", esp->esp_id,
-			esp->cur_msgin[0]));
-		return MESSAGE_REJECT;
-
-	case NOP:
-		ESPLOG(("esp%d: target %d sends a nop\n", esp->esp_id,
-			esp->current_SC->device->id));
-		return 0;
-
-	case RESTORE_POINTERS:
-		/* In this case we might also have to backup the
-		 * "slow command" pointer.  It is rare to get such
-		 * a save/restore pointer sequence so early in the
-		 * bus transition sequences, but cover it.
-		 */
-		if (esp->esp_slowcmd) {
-			esp->esp_scmdleft = esp->current_SC->cmd_len;
-			esp->esp_scmdp = &esp->current_SC->cmnd[0];
-		}
-		esp_restore_pointers(esp, esp->current_SC);
-		return 0;
-
-	case SAVE_POINTERS:
-		esp_save_pointers(esp, esp->current_SC);
-		return 0;
-
-	case COMMAND_COMPLETE:
-	case DISCONNECT:
-		/* Freeing the bus, let it go. */
-		esp->current_SC->SCp.phase = in_freeing;
-		return 0;
-
-	case MESSAGE_REJECT:
-		ESPMISC(("msg reject, "));
-		if (esp->prevmsgout == EXTENDED_MESSAGE) {
-			struct esp_device *esp_dev = esp->current_SC->device->hostdata;
-
-			/* Doesn't look like this target can
-			 * do synchronous or WIDE transfers.
-			 */
-			ESPSDTR(("got reject, was trying nego, clearing sync/WIDE\n"));
-			esp_dev->sync = 1;
-			esp_dev->wide = 1;
-			esp_dev->sync_min_period = 0;
-			esp_dev->sync_max_offset = 0;
-			return 0;
-		} else {
-			ESPMISC(("not sync nego, sending ABORT\n"));
-			return ABORT;
-		}
-	};
-}
-
-/* Target negotiates for synchronous transfers before we do, this
- * is legal although very strange.  What is even funnier is that
- * the SCSI2 standard specifically recommends against targets doing
- * this because so many initiators cannot cope with this occurring.
- */
-static int target_with_ants_in_pants(struct esp *esp,
-				     struct scsi_cmnd *SCptr,
-				     struct esp_device *esp_dev)
-{
-	if (esp_dev->sync || SCptr->device->borken) {
-		/* sorry, no can do */
-		ESPSDTR(("forcing to async, "));
-		build_sync_nego_msg(esp, 0, 0);
-		esp_dev->sync = 1;
-		esp->snip = 1;
-		ESPLOG(("esp%d: hoping for msgout\n", esp->esp_id));
-		esp_advance_phase(SCptr, in_the_dark);
-		return EXTENDED_MESSAGE;
-	}
-
-	/* Ok, we'll check them out... */
-	return 0;
-}
-
-static void sync_report(struct esp *esp)
-{
-	int msg3, msg4;
-	char *type;
-
-	msg3 = esp->cur_msgin[3];
-	msg4 = esp->cur_msgin[4];
-	if (msg4) {
-		int hz = 1000000000 / (msg3 * 4);
-		int integer = hz / 1000000;
-		int fraction = (hz - (integer * 1000000)) / 10000;
-		if ((esp->erev == fashme) &&
-		    (esp->config3[esp->current_SC->device->id] & ESP_CONFIG3_EWIDE)) {
-			type = "FAST-WIDE";
-			integer <<= 1;
-			fraction <<= 1;
-		} else if ((msg3 * 4) < 200) {
-			type = "FAST";
-		} else {
-			type = "synchronous";
-		}
-
-		/* Do not transform this back into one big printk
-		 * again, it triggers a bug in our sparc64-gcc272
-		 * sibling call optimization.  -DaveM
-		 */
-		ESPLOG((KERN_INFO "esp%d: target %d ",
-			esp->esp_id, esp->current_SC->device->id));
-		ESPLOG(("[period %dns offset %d %d.%02dMHz ",
-			(int) msg3 * 4, (int) msg4,
-			integer, fraction));
-		ESPLOG(("%s SCSI%s]\n", type,
-			(((msg3 * 4) < 200) ? "-II" : "")));
-	} else {
-		ESPLOG((KERN_INFO "esp%d: target %d asynchronous\n",
-			esp->esp_id, esp->current_SC->device->id));
-	}
-}
-
-static int check_multibyte_msg(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr = esp->current_SC;
-	struct esp_device *esp_dev = SCptr->device->hostdata;
-	u8 regval = 0;
-	int message_out = 0;
-
-	ESPSDTR(("chk multibyte msg: "));
-	if (esp->cur_msgin[2] == EXTENDED_SDTR) {
-		int period = esp->cur_msgin[3];
-		int offset = esp->cur_msgin[4];
-
-		ESPSDTR(("is sync nego response, "));
-		if (!esp->snip) {
-			int rval;
-
-			/* Target negotiates first! */
-			ESPSDTR(("target jumps the gun, "));
-			message_out = EXTENDED_MESSAGE; /* we must respond */
-			rval = target_with_ants_in_pants(esp, SCptr, esp_dev);
-			if (rval)
-				return rval;
-		}
-
-		ESPSDTR(("examining sdtr, "));
-
-		/* Offset cannot be larger than ESP fifo size. */
-		if (offset > 15) {
-			ESPSDTR(("offset too big %2x, ", offset));
-			offset = 15;
-			ESPSDTR(("sending back new offset\n"));
-			build_sync_nego_msg(esp, period, offset);
-			return EXTENDED_MESSAGE;
-		}
-
-		if (offset && period > esp->max_period) {
-			/* Yeee, async for this slow device. */
-			ESPSDTR(("period too long %2x, ", period));
-			build_sync_nego_msg(esp, 0, 0);
-			ESPSDTR(("hoping for msgout\n"));
-			esp_advance_phase(esp->current_SC, in_the_dark);
-			return EXTENDED_MESSAGE;
-		} else if (offset && period < esp->min_period) {
-			ESPSDTR(("period too short %2x, ", period));
-			period = esp->min_period;
-			if (esp->erev > esp236)
-				regval = 4;
-			else
-				regval = 5;
-		} else if (offset) {
-			int tmp;
-
-			ESPSDTR(("period is ok, "));
-			tmp = esp->ccycle / 1000;
-			regval = (((period << 2) + tmp - 1) / tmp);
-			if (regval && ((esp->erev == fas100a ||
-					esp->erev == fas236  ||
-					esp->erev == fashme))) {
-				if (period >= 50)
-					regval--;
-			}
-		}
-
-		if (offset) {
-			u8 bit;
-
-			esp_dev->sync_min_period = (regval & 0x1f);
-			esp_dev->sync_max_offset = (offset | esp->radelay);
-			if (esp->erev == fas100a || esp->erev == fas236 || esp->erev == fashme) {
-				if ((esp->erev == fas100a) || (esp->erev == fashme))
-					bit = ESP_CONFIG3_FAST;
-				else
-					bit = ESP_CONFIG3_FSCSI;
-				if (period < 50) {
-					/* On FAS366, if using fast-20 synchronous transfers
-					 * we need to make sure the REQ/ACK assert/deassert
-					 * control bits are clear.
-					 */
-					if (esp->erev == fashme)
-						esp_dev->sync_max_offset &= ~esp->radelay;
-					esp->config3[SCptr->device->id] |= bit;
-				} else {
-					esp->config3[SCptr->device->id] &= ~bit;
-				}
-				esp->prev_cfg3 = esp->config3[SCptr->device->id];
-				sbus_writeb(esp->prev_cfg3, esp->eregs + ESP_CFG3);
-			}
-			esp->prev_soff = esp_dev->sync_max_offset;
-			esp->prev_stp = esp_dev->sync_min_period;
-			sbus_writeb(esp->prev_soff, esp->eregs + ESP_SOFF);
-			sbus_writeb(esp->prev_stp, esp->eregs + ESP_STP);
-			ESPSDTR(("soff=%2x stp=%2x cfg3=%2x\n",
-				 esp_dev->sync_max_offset,
-				 esp_dev->sync_min_period,
-				 esp->config3[SCptr->device->id]));
-
-			esp->snip = 0;
-		} else if (esp_dev->sync_max_offset) {
-			u8 bit;
-
-			/* back to async mode */
-			ESPSDTR(("unaccaptable sync nego, forcing async\n"));
-			esp_dev->sync_max_offset = 0;
-			esp_dev->sync_min_period = 0;
-			esp->prev_soff = 0;
-			esp->prev_stp = 0;
-			sbus_writeb(esp->prev_soff, esp->eregs + ESP_SOFF);
-			sbus_writeb(esp->prev_stp, esp->eregs + ESP_STP);
-			if (esp->erev == fas100a || esp->erev == fas236 || esp->erev == fashme) {
-				if ((esp->erev == fas100a) || (esp->erev == fashme))
-					bit = ESP_CONFIG3_FAST;
-				else
-					bit = ESP_CONFIG3_FSCSI;
-				esp->config3[SCptr->device->id] &= ~bit;
-				esp->prev_cfg3 = esp->config3[SCptr->device->id];
-				sbus_writeb(esp->prev_cfg3, esp->eregs + ESP_CFG3);
-			}
-		}
-
-		sync_report(esp);
-
-		ESPSDTR(("chk multibyte msg: sync is known, "));
-		esp_dev->sync = 1;
-
-		if (message_out) {
-			ESPLOG(("esp%d: sending sdtr back, hoping for msgout\n",
-				esp->esp_id));
-			build_sync_nego_msg(esp, period, offset);
-			esp_advance_phase(SCptr, in_the_dark);
-			return EXTENDED_MESSAGE;
-		}
-
-		ESPSDTR(("returning zero\n"));
-		esp_advance_phase(SCptr, in_the_dark); /* ...or else! */
-		return 0;
-	} else if (esp->cur_msgin[2] == EXTENDED_WDTR) {
-		int size = 8 << esp->cur_msgin[3];
-
-		esp->wnip = 0;
-		if (esp->erev != fashme) {
-			ESPLOG(("esp%d: AIEEE wide msg received and not HME.\n",
-				esp->esp_id));
-			message_out = MESSAGE_REJECT;
-		} else if (size > 16) {
-			ESPLOG(("esp%d: AIEEE wide transfer for %d size "
-				"not supported.\n", esp->esp_id, size));
-			message_out = MESSAGE_REJECT;
-		} else {
-			/* Things look good; let's see what we got. */
-			if (size == 16) {
-				/* Set config 3 register for this target. */
-				esp->config3[SCptr->device->id] |= ESP_CONFIG3_EWIDE;
-			} else {
-				/* Just make sure it was one byte sized. */
-				if (size != 8) {
-					ESPLOG(("esp%d: Aieee, wide nego of %d size.\n",
-						esp->esp_id, size));
-					message_out = MESSAGE_REJECT;
-					goto finish;
-				}
-				/* Pure paranoia. */
-				esp->config3[SCptr->device->id] &= ~(ESP_CONFIG3_EWIDE);
-			}
-			esp->prev_cfg3 = esp->config3[SCptr->device->id];
-			sbus_writeb(esp->prev_cfg3, esp->eregs + ESP_CFG3);
-
-			/* Regardless, next try for sync transfers. */
-			build_sync_nego_msg(esp, esp->sync_defp, 15);
-			esp_dev->sync = 1;
-			esp->snip = 1;
-			message_out = EXTENDED_MESSAGE;
-		}
-	} else if (esp->cur_msgin[2] == EXTENDED_MODIFY_DATA_POINTER) {
-		ESPLOG(("esp%d: rejecting modify data ptr msg\n", esp->esp_id));
-		message_out = MESSAGE_REJECT;
-	}
-finish:
-	esp_advance_phase(SCptr, in_the_dark);
-	return message_out;
-}
-
-static int esp_do_msgindone(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr = esp->current_SC;
-	int message_out = 0, it = 0, rval;
-
-	rval = skipahead1(esp, SCptr, in_msgin, in_msgindone);
-	if (rval)
-		return rval;
-	if (SCptr->SCp.sent_command != in_status) {
-		if (!(esp->ireg & ESP_INTR_DC)) {
-			if (esp->msgin_len && (esp->sreg & ESP_STAT_PERR)) {
-				message_out = MSG_PARITY_ERROR;
-				esp_cmd(esp, ESP_CMD_FLUSH);
-			} else if (esp->erev != fashme &&
-			  (it = (sbus_readb(esp->eregs + ESP_FFLAGS) & ESP_FF_FBYTES)) != 1) {
-				/* We certainly dropped the ball somewhere. */
-				message_out = INITIATOR_ERROR;
-				esp_cmd(esp, ESP_CMD_FLUSH);
-			} else if (!esp->msgin_len) {
-				if (esp->erev == fashme)
-					it = esp->hme_fifo_workaround_buffer[0];
-				else
-					it = sbus_readb(esp->eregs + ESP_FDATA);
-				esp_advance_phase(SCptr, in_msgincont);
-			} else {
-				/* it is ok and we want it */
-				if (esp->erev == fashme)
-					it = esp->cur_msgin[esp->msgin_ctr] =
-						esp->hme_fifo_workaround_buffer[0];
-				else
-					it = esp->cur_msgin[esp->msgin_ctr] =
-						sbus_readb(esp->eregs + ESP_FDATA);
-				esp->msgin_ctr++;
-			}
-		} else {
-			esp_advance_phase(SCptr, in_the_dark);
-			return do_work_bus;
-		}
-	} else {
-		it = esp->cur_msgin[0];
-	}
-	if (!message_out && esp->msgin_len) {
-		if (esp->msgin_ctr < esp->msgin_len) {
-			esp_advance_phase(SCptr, in_msgincont);
-		} else if (esp->msgin_len == 1) {
-			message_out = check_singlebyte_msg(esp);
-		} else if (esp->msgin_len == 2) {
-			if (esp->cur_msgin[0] == EXTENDED_MESSAGE) {
-				if ((it + 2) >= 15) {
-					message_out = MESSAGE_REJECT;
-				} else {
-					esp->msgin_len = (it + 2);
-					esp_advance_phase(SCptr, in_msgincont);
-				}
-			} else {
-				message_out = MESSAGE_REJECT; /* foo on you */
-			}
-		} else {
-			message_out = check_multibyte_msg(esp);
-		}
-	}
-	if (message_out < 0) {
-		return -message_out;
-	} else if (message_out) {
-		if (((message_out != 1) &&
-		     ((message_out < 0x20) || (message_out & 0x80))))
-			esp->msgout_len = 1;
-		esp->cur_msgout[0] = message_out;
-		esp_cmd(esp, ESP_CMD_SATN);
-		esp_advance_phase(SCptr, in_the_dark);
-		esp->msgin_len = 0;
-	}
-	esp->sreg = sbus_readb(esp->eregs + ESP_STATUS);
-	esp->sreg &= ~(ESP_STAT_INTR);
-	if ((esp->sreg & (ESP_STAT_PMSG|ESP_STAT_PCD)) == (ESP_STAT_PMSG|ESP_STAT_PCD))
-		esp_cmd(esp, ESP_CMD_MOK);
-	if ((SCptr->SCp.sent_command == in_msgindone) &&
-	    (SCptr->SCp.phase == in_freeing))
-		return esp_do_freebus(esp);
-	return do_intr_end;
-}
-
-static int esp_do_cmdbegin(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr = esp->current_SC;
-
-	esp_advance_phase(SCptr, in_cmdend);
-	if (esp->erev == fashme) {
-		u32 tmp = sbus_readl(esp->dregs + DMA_CSR);
-		int i;
-
-		for (i = 0; i < esp->esp_scmdleft; i++)
-			esp->esp_command[i] = *esp->esp_scmdp++;
-		esp->esp_scmdleft = 0;
-		esp_cmd(esp, ESP_CMD_FLUSH);
-		esp_setcount(esp->eregs, i, 1);
-		esp_cmd(esp, (ESP_CMD_DMA | ESP_CMD_TI));
-		tmp |= (DMA_SCSI_DISAB | DMA_ENABLE);
-		tmp &= ~(DMA_ST_WRITE);
-		sbus_writel(i, esp->dregs + DMA_COUNT);
-		sbus_writel(esp->esp_command_dvma, esp->dregs + DMA_ADDR);
-		sbus_writel(tmp, esp->dregs + DMA_CSR);
-	} else {
-		u8 tmp;
-
-		esp_cmd(esp, ESP_CMD_FLUSH);
-		tmp = *esp->esp_scmdp++;
-		esp->esp_scmdleft--;
-		sbus_writeb(tmp, esp->eregs + ESP_FDATA);
-		esp_cmd(esp, ESP_CMD_TI);
-	}
-	return do_intr_end;
-}
-
-static int esp_do_cmddone(struct esp *esp)
-{
-	if (esp->erev == fashme)
-		dma_invalidate(esp);
-	else
-		esp_cmd(esp, ESP_CMD_NULL);
-
-	if (esp->ireg & ESP_INTR_BSERV) {
-		esp_advance_phase(esp->current_SC, in_the_dark);
-		return esp_do_phase_determine(esp);
-	}
-
-	ESPLOG(("esp%d: in do_cmddone() but didn't get BSERV interrupt.\n",
-		esp->esp_id));
-	return do_reset_bus;
-}
-
-static int esp_do_msgout(struct esp *esp)
-{
-	esp_cmd(esp, ESP_CMD_FLUSH);
-	switch (esp->msgout_len) {
-	case 1:
-		if (esp->erev == fashme)
-			hme_fifo_push(esp, &esp->cur_msgout[0], 1);
-		else
-			sbus_writeb(esp->cur_msgout[0], esp->eregs + ESP_FDATA);
-
-		esp_cmd(esp, ESP_CMD_TI);
-		break;
-
-	case 2:
-		esp->esp_command[0] = esp->cur_msgout[0];
-		esp->esp_command[1] = esp->cur_msgout[1];
-
-		if (esp->erev == fashme) {
-			hme_fifo_push(esp, &esp->cur_msgout[0], 2);
-			esp_cmd(esp, ESP_CMD_TI);
-		} else {
-			dma_setup(esp, esp->esp_command_dvma, 2, 0);
-			esp_setcount(esp->eregs, 2, 0);
-			esp_cmd(esp, ESP_CMD_DMA | ESP_CMD_TI);
-		}
-		break;
-
-	case 4:
-		esp->esp_command[0] = esp->cur_msgout[0];
-		esp->esp_command[1] = esp->cur_msgout[1];
-		esp->esp_command[2] = esp->cur_msgout[2];
-		esp->esp_command[3] = esp->cur_msgout[3];
-		esp->snip = 1;
-
-		if (esp->erev == fashme) {
-			hme_fifo_push(esp, &esp->cur_msgout[0], 4);
-			esp_cmd(esp, ESP_CMD_TI);
-		} else {
-			dma_setup(esp, esp->esp_command_dvma, 4, 0);
-			esp_setcount(esp->eregs, 4, 0);
-			esp_cmd(esp, ESP_CMD_DMA | ESP_CMD_TI);
-		}
-		break;
-
-	case 5:
-		esp->esp_command[0] = esp->cur_msgout[0];
-		esp->esp_command[1] = esp->cur_msgout[1];
-		esp->esp_command[2] = esp->cur_msgout[2];
-		esp->esp_command[3] = esp->cur_msgout[3];
-		esp->esp_command[4] = esp->cur_msgout[4];
-		esp->snip = 1;
-
-		if (esp->erev == fashme) {
-			hme_fifo_push(esp, &esp->cur_msgout[0], 5);
-			esp_cmd(esp, ESP_CMD_TI);
-		} else {
-			dma_setup(esp, esp->esp_command_dvma, 5, 0);
-			esp_setcount(esp->eregs, 5, 0);
-			esp_cmd(esp, ESP_CMD_DMA | ESP_CMD_TI);
-		}
-		break;
-
-	default:
-		/* whoops */
-		ESPMISC(("bogus msgout sending NOP\n"));
-		esp->cur_msgout[0] = NOP;
-
-		if (esp->erev == fashme) {
-			hme_fifo_push(esp, &esp->cur_msgout[0], 1);
-		} else {
-			sbus_writeb(esp->cur_msgout[0], esp->eregs + ESP_FDATA);
-		}
-
-		esp->msgout_len = 1;
-		esp_cmd(esp, ESP_CMD_TI);
-		break;
-	};
-
-	esp_advance_phase(esp->current_SC, in_msgoutdone);
-	return do_intr_end;
-}
-
-static int esp_do_msgoutdone(struct esp *esp)
-{
-	if (esp->msgout_len > 1) {
-		/* XXX HME/FAS ATN deassert workaround required,
-		 * XXX no DMA flushing, only possible ESP_CMD_FLUSH
-		 * XXX to kill the fifo.
-		 */
-		if (esp->erev != fashme) {
-			u32 tmp;
-
-			while ((tmp = sbus_readl(esp->dregs + DMA_CSR)) & DMA_PEND_READ)
-				udelay(1);
-			tmp &= ~DMA_ENABLE;
-			sbus_writel(tmp, esp->dregs + DMA_CSR);
-			dma_invalidate(esp);
-		} else {
-			esp_cmd(esp, ESP_CMD_FLUSH);
-		}
-	}
-	if (!(esp->ireg & ESP_INTR_DC)) {
-		if (esp->erev != fashme)
-			esp_cmd(esp, ESP_CMD_NULL);
-		switch (esp->sreg & ESP_STAT_PMASK) {
-		case ESP_MOP:
-			/* whoops, parity error */
-			ESPLOG(("esp%d: still in msgout, parity error assumed\n",
-				esp->esp_id));
-			if (esp->msgout_len > 1)
-				esp_cmd(esp, ESP_CMD_SATN);
-			esp_advance_phase(esp->current_SC, in_msgout);
-			return do_work_bus;
-
-		case ESP_DIP:
-			break;
-
-		default:
-			/* Happy Meal fifo is touchy... */
-			if ((esp->erev != fashme) &&
-			    !fcount(esp) &&
-			    !(((struct esp_device *)esp->current_SC->device->hostdata)->sync_max_offset))
-				esp_cmd(esp, ESP_CMD_FLUSH);
-			break;
-
-		};
-	} else {
-		ESPLOG(("esp%d: disconnect, resetting bus\n", esp->esp_id));
-		return do_reset_bus;
-	}
-
-	/* If we sent out a synchronous negotiation message, update
-	 * our state.
-	 */
-	if (esp->cur_msgout[2] == EXTENDED_MESSAGE &&
-	    esp->cur_msgout[4] == EXTENDED_SDTR) {
-		esp->snip = 1; /* anal retentiveness... */
-	}
-
-	esp->prevmsgout = esp->cur_msgout[0];
-	esp->msgout_len = 0;
-	esp_advance_phase(esp->current_SC, in_the_dark);
-	return esp_do_phase_determine(esp);
-}
-
-static int esp_bus_unexpected(struct esp *esp)
-{
-	ESPLOG(("esp%d: command in weird state %2x\n",
-		esp->esp_id, esp->current_SC->SCp.phase));
-	return do_reset_bus;
-}
-
-static espfunc_t bus_vector[] = {
-	esp_do_data_finale,
-	esp_do_data_finale,
-	esp_bus_unexpected,
-	esp_do_msgin,
-	esp_do_msgincont,
-	esp_do_msgindone,
-	esp_do_msgout,
-	esp_do_msgoutdone,
-	esp_do_cmdbegin,
-	esp_do_cmddone,
-	esp_do_status,
-	esp_do_freebus,
-	esp_do_phase_determine,
-	esp_bus_unexpected,
-	esp_bus_unexpected,
-	esp_bus_unexpected,
-};
-
-/* This is the second tier in our dual-level SCSI state machine. */
-static int esp_work_bus(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr = esp->current_SC;
-	unsigned int phase;
-
-	ESPBUS(("esp_work_bus: "));
-	if (!SCptr) {
-		ESPBUS(("reconnect\n"));
-		return esp_do_reconnect(esp);
-	}
-	phase = SCptr->SCp.phase;
-	if ((phase & 0xf0) == in_phases_mask)
-		return bus_vector[(phase & 0x0f)](esp);
-	else if ((phase & 0xf0) == in_slct_mask)
-		return esp_select_complete(esp);
-	else
-		return esp_bus_unexpected(esp);
-}
-
-static espfunc_t isvc_vector[] = {
-	NULL,
-	esp_do_phase_determine,
-	esp_do_resetbus,
-	esp_finish_reset,
-	esp_work_bus
-};
-
-/* Main interrupt handler for an esp adapter. */
-static void esp_handle(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr;
-	int what_next = do_intr_end;
-
-	SCptr = esp->current_SC;
-
-	/* Check for errors. */
-	esp->sreg = sbus_readb(esp->eregs + ESP_STATUS);
-	esp->sreg &= (~ESP_STAT_INTR);
-	if (esp->erev == fashme) {
-		esp->sreg2 = sbus_readb(esp->eregs + ESP_STATUS2);
-		esp->seqreg = (sbus_readb(esp->eregs + ESP_SSTEP) & ESP_STEP_VBITS);
-	}
-
-	if (esp->sreg & (ESP_STAT_SPAM)) {
-		/* Gross error, could be due to one of:
-		 *
-		 * - top of fifo overwritten, could be because
-		 *   we tried to do a synchronous transfer with
-		 *   an offset greater than ESP fifo size
-		 *
-		 * - top of command register overwritten
-		 *
-		 * - DMA setup to go in one direction, SCSI
-		 *   bus points in the other, whoops
-		 *
-		 * - weird phase change during asynchronous
-		 *   data phase while we are initiator
-		 */
-		ESPLOG(("esp%d: Gross error sreg=%2x\n", esp->esp_id, esp->sreg));
-
-		/* If a command is live on the bus we cannot safely
-		 * reset the bus, so we'll just let the pieces fall
-		 * where they may.  Here we are hoping that the
-		 * target will be able to cleanly go away soon
-		 * so we can safely reset things.
-		 */
-		if (!SCptr) {
-			ESPLOG(("esp%d: No current cmd during gross error, "
-				"resetting bus\n", esp->esp_id));
-			what_next = do_reset_bus;
-			goto state_machine;
-		}
-	}
-
-	if (sbus_readl(esp->dregs + DMA_CSR) & DMA_HNDL_ERROR) {
-		/* A DMA gate array error.  Here we must
-		 * be seeing one of two things.  Either the
-		 * virtual to physical address translation
-		 * on the SBUS could not occur, else the
-		 * translation it did get pointed to a bogus
-		 * page.  Ho hum...
-		 */
-		ESPLOG(("esp%d: DMA error %08x\n", esp->esp_id,
-			sbus_readl(esp->dregs + DMA_CSR)));
-
-		/* DMA gate array itself must be reset to clear the
-		 * error condition.
-		 */
-		esp_reset_dma(esp);
-
-		what_next = do_reset_bus;
-		goto state_machine;
-	}
-
-	esp->ireg = sbus_readb(esp->eregs + ESP_INTRPT);   /* Unlatch intr reg */
-
-	if (esp->erev == fashme) {
-		/* This chip is really losing. */
-		ESPHME(("HME["));
-
-		ESPHME(("sreg2=%02x,", esp->sreg2));
-		/* Must latch fifo before reading the interrupt
-		 * register else garbage ends up in the FIFO
-		 * which confuses the driver utterly.
-		 */
-		if (!(esp->sreg2 & ESP_STAT2_FEMPTY) ||
-		    (esp->sreg2 & ESP_STAT2_F1BYTE)) {
-			ESPHME(("fifo_workaround]"));
-			hme_fifo_read(esp);
-		} else {
-			ESPHME(("no_fifo_workaround]"));
-		}
-	}
-
-	/* No current cmd is only valid at this point when there are
-	 * commands off the bus or we are trying a reset.
-	 */
-	if (!SCptr && !esp->disconnected_SC && !(esp->ireg & ESP_INTR_SR)) {
-		/* Panic is safe, since current_SC is null. */
-		ESPLOG(("esp%d: no command in esp_handle()\n", esp->esp_id));
-		panic("esp_handle: current_SC == penguin within interrupt!");
-	}
-
-	if (esp->ireg & (ESP_INTR_IC)) {
-		/* Illegal command fed to ESP.  Outside of obvious
-		 * software bugs that could cause this, there is
-		 * a condition with esp100 where we can confuse the
-		 * ESP into an erroneous illegal command interrupt
-		 * because it does not scrape the FIFO properly
-		 * for reselection.  See esp100_reconnect_hwbug()
-		 * to see how we try very hard to avoid this.
-		 */
-		ESPLOG(("esp%d: invalid command\n", esp->esp_id));
-
-		esp_dump_state(esp);
-
-		if (SCptr != NULL) {
-			/* Devices with very buggy firmware can drop BSY
-			 * during a scatter list interrupt when using sync
-			 * mode transfers.  We continue the transfer as
-			 * expected, the target drops the bus, the ESP
-			 * gets confused, and we get a illegal command
-			 * interrupt because the bus is in the disconnected
-			 * state now and ESP_CMD_TI is only allowed when
-			 * a nexus is alive on the bus.
-			 */
-			ESPLOG(("esp%d: Forcing async and disabling disconnect for "
-				"target %d\n", esp->esp_id, SCptr->device->id));
-			SCptr->device->borken = 1; /* foo on you */
-		}
-
-		what_next = do_reset_bus;
-	} else if (!(esp->ireg & ~(ESP_INTR_FDONE | ESP_INTR_BSERV | ESP_INTR_DC))) {
-		if (SCptr) {
-			unsigned int phase = SCptr->SCp.phase;
-
-			if (phase & in_phases_mask) {
-				what_next = esp_work_bus(esp);
-			} else if (phase & in_slct_mask) {
-				what_next = esp_select_complete(esp);
-			} else {
-				ESPLOG(("esp%d: interrupt for no good reason...\n",
-					esp->esp_id));
-				what_next = do_intr_end;
-			}
-		} else {
-			ESPLOG(("esp%d: BSERV or FDONE or DC while SCptr==NULL\n",
-				esp->esp_id));
-			what_next = do_reset_bus;
-		}
-	} else if (esp->ireg & ESP_INTR_SR) {
-		ESPLOG(("esp%d: SCSI bus reset interrupt\n", esp->esp_id));
-		what_next = do_reset_complete;
-	} else if (esp->ireg & (ESP_INTR_S | ESP_INTR_SATN)) {
-		ESPLOG(("esp%d: AIEEE we have been selected by another initiator!\n",
-			esp->esp_id));
-		what_next = do_reset_bus;
-	} else if (esp->ireg & ESP_INTR_RSEL) {
-		if (SCptr == NULL) {
-			/* This is ok. */
-			what_next = esp_do_reconnect(esp);
-		} else if (SCptr->SCp.phase & in_slct_mask) {
-			/* Only selection code knows how to clean
-			 * up properly.
-			 */
-			ESPDISC(("Reselected during selection attempt\n"));
-			what_next = esp_select_complete(esp);
-		} else {
-			ESPLOG(("esp%d: Reselected while bus is busy\n",
-				esp->esp_id));
-			what_next = do_reset_bus;
-		}
-	}
-
-	/* This is tier-one in our dual level SCSI state machine. */
-state_machine:
-	while (what_next != do_intr_end) {
-		if (what_next >= do_phase_determine &&
-		    what_next < do_intr_end) {
-			what_next = isvc_vector[what_next](esp);
-		} else {
-			/* state is completely lost ;-( */
-			ESPLOG(("esp%d: interrupt engine loses state, resetting bus\n",
-				esp->esp_id));
-			what_next = do_reset_bus;
-		}
-	}
-}
-
-/* Service only the ESP described by dev_id. */
-static irqreturn_t esp_intr(int irq, void *dev_id)
-{
-	struct esp *esp = dev_id;
-	unsigned long flags;
-
-	spin_lock_irqsave(esp->ehost->host_lock, flags);
-	if (ESP_IRQ_P(esp->dregs)) {
-		ESP_INTSOFF(esp->dregs);
-
-		ESPIRQ(("I[%d:%d](", smp_processor_id(), esp->esp_id));
-		esp_handle(esp);
-		ESPIRQ((")"));
-
-		ESP_INTSON(esp->dregs);
-	}
-	spin_unlock_irqrestore(esp->ehost->host_lock, flags);
-
-	return IRQ_HANDLED;
-}
-
-static int esp_slave_alloc(struct scsi_device *SDptr)
-{
-	struct esp_device *esp_dev =
-		kmalloc(sizeof(struct esp_device), GFP_ATOMIC);
-
-	if (!esp_dev)
-		return -ENOMEM;
-	memset(esp_dev, 0, sizeof(struct esp_device));
-	SDptr->hostdata = esp_dev;
-	return 0;
-}
-
-static void esp_slave_destroy(struct scsi_device *SDptr)
-{
-	struct esp *esp = (struct esp *) SDptr->host->hostdata;
-
-	esp->targets_present &= ~(1 << SDptr->id);
-	kfree(SDptr->hostdata);
-	SDptr->hostdata = NULL;
-}
-
-static struct scsi_host_template esp_template = {
-	.module			= THIS_MODULE,
-	.name			= "esp",
-	.info			= esp_info,
-	.slave_alloc		= esp_slave_alloc,
-	.slave_destroy		= esp_slave_destroy,
-	.queuecommand		= esp_queue,
-	.eh_abort_handler	= esp_abort,
-	.eh_bus_reset_handler	= esp_reset,
-	.can_queue		= 7,
-	.this_id		= 7,
-	.sg_tablesize		= SG_ALL,
-	.cmd_per_lun		= 1,
-	.use_clustering		= ENABLE_CLUSTERING,
-	.proc_name		= "esp",
-	.proc_info		= esp_proc_info,
-};
-
-#ifndef CONFIG_SUN4
-static struct of_device_id esp_match[] = {
-	{
-		.name = "SUNW,esp",
-		.data = &esp_template,
-	},
-	{
-		.name = "SUNW,fas",
-		.data = &esp_template,
-	},
-	{
-		.name = "esp",
-		.data = &esp_template,
-	},
-	{},
-};
-MODULE_DEVICE_TABLE(of, esp_match);
-
-static struct of_platform_driver esp_sbus_driver = {
-	.name		= "esp",
-	.match_table	= esp_match,
-	.probe		= esp_sbus_probe,
-	.remove		= __devexit_p(esp_sbus_remove),
-};
-#endif
-
-static int __init esp_init(void)
-{
-#ifdef CONFIG_SUN4
-	return esp_sun4_probe(&esp_template);
-#else
-	return of_register_driver(&esp_sbus_driver, &sbus_bus_type);
-#endif
-}
-
-static void __exit esp_exit(void)
-{
-#ifdef CONFIG_SUN4
-	esp_sun4_remove();
-#else
-	of_unregister_driver(&esp_sbus_driver);
-#endif
-}
-
-MODULE_DESCRIPTION("ESP Sun SCSI driver");
-MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
-MODULE_LICENSE("GPL");
-MODULE_VERSION(DRV_VERSION);
-
-module_init(esp_init);
-module_exit(esp_exit);
diff --git a/drivers/scsi/esp.h b/drivers/scsi/esp.h
deleted file mode 100644
index a98cda9121f..00000000000
--- a/drivers/scsi/esp.h
+++ /dev/null
@@ -1,406 +0,0 @@
-/* $Id: esp.h,v 1.29 2001/12/11 04:55:47 davem Exp $
- * esp.h:  Defines and structures for the Sparc ESP (Enhanced SCSI
- *         Processor) driver under Linux.
- *
- * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
- */
-
-#ifndef _SPARC_ESP_H
-#define _SPARC_ESP_H
-
-/* For dvma controller register definitions. */
-#include <asm/dma.h>
-
-/* The ESP SCSI controllers have their register sets in three
- * "classes":
- *
- * 1) Registers which are both read and write.
- * 2) Registers which are read only.
- * 3) Registers which are write only.
- *
- * Yet, they all live within the same IO space.
- */
-
-/* All the ESP registers are one byte each and are accessed longwords
- * apart with a big-endian ordering to the bytes.
- */
-					/* Access    Description              Offset */
-#define ESP_TCLOW	0x00UL		/* rw  Low bits of the transfer count 0x00   */
-#define ESP_TCMED	0x04UL		/* rw  Mid bits of the transfer count 0x04   */
-#define ESP_FDATA	0x08UL		/* rw  FIFO data bits                 0x08   */
-#define ESP_CMD		0x0cUL		/* rw  SCSI command bits              0x0c   */
-#define ESP_STATUS	0x10UL		/* ro  ESP status register            0x10   */
-#define ESP_BUSID	ESP_STATUS	/* wo  Bus ID for select/reselect     0x10   */
-#define ESP_INTRPT	0x14UL		/* ro  Kind of interrupt              0x14   */
-#define ESP_TIMEO	ESP_INTRPT	/* wo  Timeout value for select/resel 0x14   */
-#define ESP_SSTEP	0x18UL		/* ro  Sequence step register         0x18   */
-#define ESP_STP		ESP_SSTEP	/* wo  Transfer period per sync       0x18   */
-#define ESP_FFLAGS	0x1cUL		/* ro  Bits of current FIFO info      0x1c   */
-#define ESP_SOFF	ESP_FFLAGS	/* wo  Sync offset                    0x1c   */
-#define ESP_CFG1	0x20UL		/* rw  First configuration register   0x20   */
-#define ESP_CFACT	0x24UL		/* wo  Clock conversion factor        0x24   */
-#define ESP_STATUS2	ESP_CFACT	/* ro  HME status2 register           0x24   */
-#define ESP_CTEST	0x28UL		/* wo  Chip test register             0x28   */
-#define ESP_CFG2	0x2cUL		/* rw  Second configuration register  0x2c   */
-#define ESP_CFG3	0x30UL		/* rw  Third configuration register   0x30   */
-#define ESP_TCHI	0x38UL		/* rw  High bits of transfer count    0x38   */
-#define ESP_UID		ESP_TCHI	/* ro  Unique ID code                 0x38   */
-#define FAS_RLO		ESP_TCHI	/* rw  HME extended counter           0x38   */
-#define ESP_FGRND	0x3cUL		/* rw  Data base for fifo             0x3c   */
-#define FAS_RHI		ESP_FGRND	/* rw  HME extended counter           0x3c   */
-#define ESP_REG_SIZE	0x40UL
-
-/* Various revisions of the ESP board. */
-enum esp_rev {
-	esp100     = 0x00,  /* NCR53C90 - very broken */
-	esp100a    = 0x01,  /* NCR53C90A */
-	esp236     = 0x02,
-	fas236     = 0x03,
-	fas100a    = 0x04,
-	fast       = 0x05,
-	fashme     = 0x06,
-	espunknown = 0x07
-};
-
-/* We allocate one of these for each scsi device and attach it to
- * SDptr->hostdata for use in the driver
- */
-struct esp_device {
-  unsigned char sync_min_period;
-  unsigned char sync_max_offset;
-  unsigned sync:1;
-  unsigned wide:1;
-  unsigned disconnect:1;
-};
-
-struct scsi_cmnd;
-
-/* We get one of these for each ESP probed. */
-struct esp {
-	void __iomem		*eregs;		/* ESP controller registers */
-	void __iomem		*dregs;		/* DMA controller registers */
-	struct sbus_dma		*dma;		/* DMA controller sw state */
-	struct Scsi_Host	*ehost;		/* Backpointer to SCSI Host */
-	struct sbus_dev		*sdev;		/* Pointer to SBus entry */
-
-	/* ESP Configuration Registers */
-	u8			config1;	/* Copy of the 1st config register */
-	u8			config2;	/* Copy of the 2nd config register */
-	u8			config3[16];	/* Copy of the 3rd config register */
-
-	/* The current command we are sending to the ESP chip.  This esp_command
-	 * ptr needs to be mapped in DVMA area so we can send commands and read
-	 * from the ESP fifo without burning precious CPU cycles.  Programmed I/O
-	 * sucks when we have the DVMA to do it for us.  The ESP is stupid and will
-	 * only send out 6, 10, and 12 byte SCSI commands, others we need to send
-	 * one byte at a time.  esp_slowcmd being set says that we are doing one
-	 * of the command types ESP doesn't understand, esp_scmdp keeps track of
-	 * which byte we are sending, esp_scmdleft says how many bytes to go.
-	 */
-	volatile u8		*esp_command;    /* Location of command (CPU view)  */
-	__u32			esp_command_dvma;/* Location of command (DVMA view) */
-	unsigned char		esp_clen;	 /* Length of this command */
-	unsigned char		esp_slowcmd;
-	unsigned char		*esp_scmdp;
-	unsigned char		esp_scmdleft;
-
-	/* The following are used to determine the cause of an IRQ. Upon every
-	 * IRQ entry we synchronize these with the hardware registers.
-	 */
-	u8			ireg;		/* Copy of ESP interrupt register */
-	u8			sreg;		/* Copy of ESP status register */
-	u8			seqreg;		/* Copy of ESP sequence step register */
-	u8			sreg2;		/* Copy of HME status2 register */
-
-	/* To save register writes to the ESP, which can be expensive, we
-	 * keep track of the previous value that various registers had for
-	 * the last target we connected to.  If they are the same for the
-	 * current target, we skip the register writes as they are not needed.
-	 */
-	u8			prev_soff, prev_stp;
-	u8			prev_cfg3, __cache_pad;
-
-	/* We also keep a cache of the previous FAS/HME DMA CSR register value.  */
-	u32			prev_hme_dmacsr;
-
-	/* The HME is the biggest piece of shit I have ever seen. */
-	u8			hme_fifo_workaround_buffer[16 * 2];
-	u8			hme_fifo_workaround_count;
-
-	/* For each target we keep track of save/restore data
-	 * pointer information.  This needs to be updated majorly
-	 * when we add support for tagged queueing.  -DaveM
-	 */
-	struct esp_pointers {
-		char			*saved_ptr;
-		struct scatterlist	*saved_buffer;
-		int			saved_this_residual;
-		int			saved_buffers_residual;
-	} data_pointers[16] /*XXX [MAX_TAGS_PER_TARGET]*/;
-
-	/* Clock periods, frequencies, synchronization, etc. */
-	unsigned int		cfreq;		/* Clock frequency in HZ */
-	unsigned int		cfact;		/* Clock conversion factor */
-	unsigned int		raw_cfact;	/* Raw copy from probing */
-	unsigned int		ccycle;		/* One ESP clock cycle */
-	unsigned int		ctick;		/* One ESP clock time */
-	unsigned int		radelay;	/* FAST chip req/ack delay */
-	unsigned int		neg_defp;	/* Default negotiation period */
-	unsigned int		sync_defp;	/* Default sync transfer period */
-	unsigned int		max_period;	/* longest our period can be */
-	unsigned int		min_period;	/* shortest period we can withstand */
-
-	struct esp		*next;		/* Next ESP we probed or NULL */
-	char			prom_name[64];	/* Name of ESP device from prom */
-	int			prom_node;	/* Prom node where ESP found */
-	int			esp_id;		/* Unique per-ESP ID number */
-
-	/* For slow to medium speed input clock rates we shoot for 5mb/s,
-	 * but for high input clock rates we try to do 10mb/s although I
-	 * don't think a transfer can even run that fast with an ESP even
-	 * with DMA2 scatter gather pipelining.
-	 */
-#define SYNC_DEFP_SLOW            0x32   /* 5mb/s  */
-#define SYNC_DEFP_FAST            0x19   /* 10mb/s */
-
-	unsigned int		snip;		/* Sync. negotiation in progress */
-	unsigned int		wnip;		/* WIDE negotiation in progress */
-	unsigned int		targets_present;/* targets spoken to before */
-
-	int		current_transfer_size;	/* Set at beginning of data dma */
-
-	u8			espcmdlog[32];	/* Log of current esp cmds sent. */
-	u8			espcmdent;	/* Current entry in esp cmd log. */
-
-	/* Misc. info about this ESP */
-	enum esp_rev		erev;		/* ESP revision */
-	int			irq;		/* SBus IRQ for this ESP */
-	int			scsi_id;	/* Who am I as initiator? */
-	int			scsi_id_mask;	/* Bitmask of 'me'. */
-	int			diff;		/* Differential SCSI bus? */
-	int			bursts;		/* Burst sizes our DVMA supports */
-
-	/* Our command queues, only one cmd lives in the current_SC queue. */
-	struct scsi_cmnd	*issue_SC;	/* Commands to be issued */
-	struct scsi_cmnd	*current_SC;	/* Who is currently working the bus */
-	struct scsi_cmnd	*disconnected_SC;/* Commands disconnected from the bus */
-
-	/* Message goo */
-	u8			cur_msgout[16];
-	u8			cur_msgin[16];
-	u8			prevmsgout, prevmsgin;
-	u8			msgout_len, msgin_len;
-	u8			msgout_ctr, msgin_ctr;
-
-	/* States that we cannot keep in the per cmd structure because they
-	 * cannot be assosciated with any specific command.
-	 */
-	u8			resetting_bus;
-	wait_queue_head_t	reset_queue;
-};
-
-/* Bitfield meanings for the above registers. */
-
-/* ESP config reg 1, read-write, found on all ESP chips */
-#define ESP_CONFIG1_ID        0x07             /* My BUS ID bits */
-#define ESP_CONFIG1_CHTEST    0x08             /* Enable ESP chip tests */
-#define ESP_CONFIG1_PENABLE   0x10             /* Enable parity checks */
-#define ESP_CONFIG1_PARTEST   0x20             /* Parity test mode enabled? */
-#define ESP_CONFIG1_SRRDISAB  0x40             /* Disable SCSI reset reports */
-#define ESP_CONFIG1_SLCABLE   0x80             /* Enable slow cable mode */
-
-/* ESP config reg 2, read-write, found only on esp100a+esp200+esp236 chips */
-#define ESP_CONFIG2_DMAPARITY 0x01             /* enable DMA Parity (200,236) */
-#define ESP_CONFIG2_REGPARITY 0x02             /* enable reg Parity (200,236) */
-#define ESP_CONFIG2_BADPARITY 0x04             /* Bad parity target abort  */
-#define ESP_CONFIG2_SCSI2ENAB 0x08             /* Enable SCSI-2 features (tmode only) */
-#define ESP_CONFIG2_HI        0x10             /* High Impedance DREQ ???  */
-#define ESP_CONFIG2_HMEFENAB  0x10             /* HME features enable */
-#define ESP_CONFIG2_BCM       0x20             /* Enable byte-ctrl (236)   */
-#define ESP_CONFIG2_DISPINT   0x20             /* Disable pause irq (hme) */
-#define ESP_CONFIG2_FENAB     0x40             /* Enable features (fas100,esp216)      */
-#define ESP_CONFIG2_SPL       0x40             /* Enable status-phase latch (esp236)   */
-#define ESP_CONFIG2_MKDONE    0x40             /* HME magic feature */
-#define ESP_CONFIG2_HME32     0x80             /* HME 32 extended */
-#define ESP_CONFIG2_MAGIC     0xe0             /* Invalid bits... */
-
-/* ESP config register 3 read-write, found only esp236+fas236+fas100a+hme chips */
-#define ESP_CONFIG3_FCLOCK    0x01             /* FAST SCSI clock rate (esp100a/hme) */
-#define ESP_CONFIG3_TEM       0x01             /* Enable thresh-8 mode (esp/fas236)  */
-#define ESP_CONFIG3_FAST      0x02             /* Enable FAST SCSI     (esp100a/hme) */
-#define ESP_CONFIG3_ADMA      0x02             /* Enable alternate-dma (esp/fas236)  */
-#define ESP_CONFIG3_TENB      0x04             /* group2 SCSI2 support (esp100a/hme) */
-#define ESP_CONFIG3_SRB       0x04             /* Save residual byte   (esp/fas236)  */
-#define ESP_CONFIG3_TMS       0x08             /* Three-byte msg's ok  (esp100a/hme) */
-#define ESP_CONFIG3_FCLK      0x08             /* Fast SCSI clock rate (esp/fas236)  */
-#define ESP_CONFIG3_IDMSG     0x10             /* ID message checking  (esp100a/hme) */
-#define ESP_CONFIG3_FSCSI     0x10             /* Enable FAST SCSI     (esp/fas236)  */
-#define ESP_CONFIG3_GTM       0x20             /* group2 SCSI2 support (esp/fas236)  */
-#define ESP_CONFIG3_IDBIT3    0x20             /* Bit 3 of HME SCSI-ID (hme)         */
-#define ESP_CONFIG3_TBMS      0x40             /* Three-byte msg's ok  (esp/fas236)  */
-#define ESP_CONFIG3_EWIDE     0x40             /* Enable Wide-SCSI     (hme)         */
-#define ESP_CONFIG3_IMS       0x80             /* ID msg chk'ng        (esp/fas236)  */
-#define ESP_CONFIG3_OBPUSH    0x80             /* Push odd-byte to dma (hme)         */
-
-/* ESP command register read-write */
-/* Group 1 commands:  These may be sent at any point in time to the ESP
- *                    chip.  None of them can generate interrupts 'cept
- *                    the "SCSI bus reset" command if you have not disabled
- *                    SCSI reset interrupts in the config1 ESP register.
- */
-#define ESP_CMD_NULL          0x00             /* Null command, ie. a nop */
-#define ESP_CMD_FLUSH         0x01             /* FIFO Flush */
-#define ESP_CMD_RC            0x02             /* Chip reset */
-#define ESP_CMD_RS            0x03             /* SCSI bus reset */
-
-/* Group 2 commands:  ESP must be an initiator and connected to a target
- *                    for these commands to work.
- */
-#define ESP_CMD_TI            0x10             /* Transfer Information */
-#define ESP_CMD_ICCSEQ        0x11             /* Initiator cmd complete sequence */
-#define ESP_CMD_MOK           0x12             /* Message okie-dokie */
-#define ESP_CMD_TPAD          0x18             /* Transfer Pad */
-#define ESP_CMD_SATN          0x1a             /* Set ATN */
-#define ESP_CMD_RATN          0x1b             /* De-assert ATN */
-
-/* Group 3 commands:  ESP must be in the MSGOUT or MSGIN state and be connected
- *                    to a target as the initiator for these commands to work.
- */
-#define ESP_CMD_SMSG          0x20             /* Send message */
-#define ESP_CMD_SSTAT         0x21             /* Send status */
-#define ESP_CMD_SDATA         0x22             /* Send data */
-#define ESP_CMD_DSEQ          0x23             /* Discontinue Sequence */
-#define ESP_CMD_TSEQ          0x24             /* Terminate Sequence */
-#define ESP_CMD_TCCSEQ        0x25             /* Target cmd cmplt sequence */
-#define ESP_CMD_DCNCT         0x27             /* Disconnect */
-#define ESP_CMD_RMSG          0x28             /* Receive Message */
-#define ESP_CMD_RCMD          0x29             /* Receive Command */
-#define ESP_CMD_RDATA         0x2a             /* Receive Data */
-#define ESP_CMD_RCSEQ         0x2b             /* Receive cmd sequence */
-
-/* Group 4 commands:  The ESP must be in the disconnected state and must
- *                    not be connected to any targets as initiator for
- *                    these commands to work.
- */
-#define ESP_CMD_RSEL          0x40             /* Reselect */
-#define ESP_CMD_SEL           0x41             /* Select w/o ATN */
-#define ESP_CMD_SELA          0x42             /* Select w/ATN */
-#define ESP_CMD_SELAS         0x43             /* Select w/ATN & STOP */
-#define ESP_CMD_ESEL          0x44             /* Enable selection */
-#define ESP_CMD_DSEL          0x45             /* Disable selections */
-#define ESP_CMD_SA3           0x46             /* Select w/ATN3 */
-#define ESP_CMD_RSEL3         0x47             /* Reselect3 */
-
-/* This bit enables the ESP's DMA on the SBus */
-#define ESP_CMD_DMA           0x80             /* Do DMA? */
-
-
-/* ESP status register read-only */
-#define ESP_STAT_PIO          0x01             /* IO phase bit */
-#define ESP_STAT_PCD          0x02             /* CD phase bit */
-#define ESP_STAT_PMSG         0x04             /* MSG phase bit */
-#define ESP_STAT_PMASK        0x07             /* Mask of phase bits */
-#define ESP_STAT_TDONE        0x08             /* Transfer Completed */
-#define ESP_STAT_TCNT         0x10             /* Transfer Counter Is Zero */
-#define ESP_STAT_PERR         0x20             /* Parity error */
-#define ESP_STAT_SPAM         0x40             /* Real bad error */
-/* This indicates the 'interrupt pending' condition on esp236, it is a reserved
- * bit on other revs of the ESP.
- */
-#define ESP_STAT_INTR         0x80             /* Interrupt */
-
-/* HME only: status 2 register */
-#define ESP_STAT2_SCHBIT      0x01 /* Upper bits 3-7 of sstep enabled */
-#define ESP_STAT2_FFLAGS      0x02 /* The fifo flags are now latched */
-#define ESP_STAT2_XCNT        0x04 /* The transfer counter is latched */
-#define ESP_STAT2_CREGA       0x08 /* The command reg is active now */
-#define ESP_STAT2_WIDE        0x10 /* Interface on this adapter is wide */
-#define ESP_STAT2_F1BYTE      0x20 /* There is one byte at top of fifo */
-#define ESP_STAT2_FMSB        0x40 /* Next byte in fifo is most significant */
-#define ESP_STAT2_FEMPTY      0x80 /* FIFO is empty */
-
-/* The status register can be masked with ESP_STAT_PMASK and compared
- * with the following values to determine the current phase the ESP
- * (at least thinks it) is in.  For our purposes we also add our own
- * software 'done' bit for our phase management engine.
- */
-#define ESP_DOP   (0)                                       /* Data Out  */
-#define ESP_DIP   (ESP_STAT_PIO)                            /* Data In   */
-#define ESP_CMDP  (ESP_STAT_PCD)                            /* Command   */
-#define ESP_STATP (ESP_STAT_PCD|ESP_STAT_PIO)               /* Status    */
-#define ESP_MOP   (ESP_STAT_PMSG|ESP_STAT_PCD)              /* Message Out */
-#define ESP_MIP   (ESP_STAT_PMSG|ESP_STAT_PCD|ESP_STAT_PIO) /* Message In */
-
-/* ESP interrupt register read-only */
-#define ESP_INTR_S            0x01             /* Select w/o ATN */
-#define ESP_INTR_SATN         0x02             /* Select w/ATN */
-#define ESP_INTR_RSEL         0x04             /* Reselected */
-#define ESP_INTR_FDONE        0x08             /* Function done */
-#define ESP_INTR_BSERV        0x10             /* Bus service */
-#define ESP_INTR_DC           0x20             /* Disconnect */
-#define ESP_INTR_IC           0x40             /* Illegal command given */
-#define ESP_INTR_SR           0x80             /* SCSI bus reset detected */
-
-/* Interrupt status macros */
-#define ESP_SRESET_IRQ(esp)  ((esp)->intreg & (ESP_INTR_SR))
-#define ESP_ILLCMD_IRQ(esp)  ((esp)->intreg & (ESP_INTR_IC))
-#define ESP_SELECT_WITH_ATN_IRQ(esp)     ((esp)->intreg & (ESP_INTR_SATN))
-#define ESP_SELECT_WITHOUT_ATN_IRQ(esp)  ((esp)->intreg & (ESP_INTR_S))
-#define ESP_SELECTION_IRQ(esp)  ((ESP_SELECT_WITH_ATN_IRQ(esp)) ||         \
-				 (ESP_SELECT_WITHOUT_ATN_IRQ(esp)))
-#define ESP_RESELECTION_IRQ(esp)         ((esp)->intreg & (ESP_INTR_RSEL))
-
-/* ESP sequence step register read-only */
-#define ESP_STEP_VBITS        0x07             /* Valid bits */
-#define ESP_STEP_ASEL         0x00             /* Selection&Arbitrate cmplt */
-#define ESP_STEP_SID          0x01             /* One msg byte sent */
-#define ESP_STEP_NCMD         0x02             /* Was not in command phase */
-#define ESP_STEP_PPC          0x03             /* Early phase chg caused cmnd
-                                                * bytes to be lost
-                                                */
-#define ESP_STEP_FINI4        0x04             /* Command was sent ok */
-
-/* Ho hum, some ESP's set the step register to this as well... */
-#define ESP_STEP_FINI5        0x05
-#define ESP_STEP_FINI6        0x06
-#define ESP_STEP_FINI7        0x07
-
-/* ESP chip-test register read-write */
-#define ESP_TEST_TARG         0x01             /* Target test mode */
-#define ESP_TEST_INI          0x02             /* Initiator test mode */
-#define ESP_TEST_TS           0x04             /* Tristate test mode */
-
-/* ESP unique ID register read-only, found on fas236+fas100a only */
-#define ESP_UID_F100A         0x00             /* ESP FAS100A  */
-#define ESP_UID_F236          0x02             /* ESP FAS236   */
-#define ESP_UID_REV           0x07             /* ESP revision */
-#define ESP_UID_FAM           0xf8             /* ESP family   */
-
-/* ESP fifo flags register read-only */
-/* Note that the following implies a 16 byte FIFO on the ESP. */
-#define ESP_FF_FBYTES         0x1f             /* Num bytes in FIFO */
-#define ESP_FF_ONOTZERO       0x20             /* offset ctr not zero (esp100) */
-#define ESP_FF_SSTEP          0xe0             /* Sequence step */
-
-/* ESP clock conversion factor register write-only */
-#define ESP_CCF_F0            0x00             /* 35.01MHz - 40MHz */
-#define ESP_CCF_NEVER         0x01             /* Set it to this and die */
-#define ESP_CCF_F2            0x02             /* 10MHz */
-#define ESP_CCF_F3            0x03             /* 10.01MHz - 15MHz */
-#define ESP_CCF_F4            0x04             /* 15.01MHz - 20MHz */
-#define ESP_CCF_F5            0x05             /* 20.01MHz - 25MHz */
-#define ESP_CCF_F6            0x06             /* 25.01MHz - 30MHz */
-#define ESP_CCF_F7            0x07             /* 30.01MHz - 35MHz */
-
-/* HME only... */
-#define ESP_BUSID_RESELID     0x10
-#define ESP_BUSID_CTR32BIT    0x40
-
-#define ESP_BUS_TIMEOUT        275             /* In milli-seconds */
-#define ESP_TIMEO_CONST       8192
-#define ESP_NEG_DEFP(mhz, cfact) \
-        ((ESP_BUS_TIMEOUT * ((mhz) / 1000)) / (8192 * (cfact)))
-#define ESP_MHZ_TO_CYCLE(mhertz)  ((1000000000) / ((mhertz) / 1000))
-#define ESP_TICK(ccf, cycle)  ((7682 * (ccf) * (cycle) / 1000))
-
-#endif /* !(_SPARC_ESP_H) */
diff --git a/drivers/scsi/esp_scsi.c b/drivers/scsi/esp_scsi.c
new file mode 100644
index 00000000000..3cd5bf723da
--- /dev/null
+++ b/drivers/scsi/esp_scsi.c
@@ -0,0 +1,2710 @@
+/* esp_scsi.c: ESP SCSI driver.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/list.h>
+#include <linux/completion.h>
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/dma.h>
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/scsi_dbg.h>
+#include <scsi/scsi_transport_spi.h>
+
+#include "esp_scsi.h"
+
+#define DRV_MODULE_NAME		"esp"
+#define PFX DRV_MODULE_NAME	": "
+#define DRV_VERSION		"2.000"
+#define DRV_MODULE_RELDATE	"April 19, 2007"
+
+/* SCSI bus reset settle time in seconds.  */
+static int esp_bus_reset_settle = 3;
+
+static u32 esp_debug;
+#define ESP_DEBUG_INTR		0x00000001
+#define ESP_DEBUG_SCSICMD	0x00000002
+#define ESP_DEBUG_RESET		0x00000004
+#define ESP_DEBUG_MSGIN		0x00000008
+#define ESP_DEBUG_MSGOUT	0x00000010
+#define ESP_DEBUG_CMDDONE	0x00000020
+#define ESP_DEBUG_DISCONNECT	0x00000040
+#define ESP_DEBUG_DATASTART	0x00000080
+#define ESP_DEBUG_DATADONE	0x00000100
+#define ESP_DEBUG_RECONNECT	0x00000200
+#define ESP_DEBUG_AUTOSENSE	0x00000400
+
+#define esp_log_intr(f, a...) \
+do {	if (esp_debug & ESP_DEBUG_INTR) \
+		printk(f, ## a); \
+} while (0)
+
+#define esp_log_reset(f, a...) \
+do {	if (esp_debug & ESP_DEBUG_RESET) \
+		printk(f, ## a); \
+} while (0)
+
+#define esp_log_msgin(f, a...) \
+do {	if (esp_debug & ESP_DEBUG_MSGIN) \
+		printk(f, ## a); \
+} while (0)
+
+#define esp_log_msgout(f, a...) \
+do {	if (esp_debug & ESP_DEBUG_MSGOUT) \
+		printk(f, ## a); \
+} while (0)
+
+#define esp_log_cmddone(f, a...) \
+do {	if (esp_debug & ESP_DEBUG_CMDDONE) \
+		printk(f, ## a); \
+} while (0)
+
+#define esp_log_disconnect(f, a...) \
+do {	if (esp_debug & ESP_DEBUG_DISCONNECT) \
+		printk(f, ## a); \
+} while (0)
+
+#define esp_log_datastart(f, a...) \
+do {	if (esp_debug & ESP_DEBUG_DATASTART) \
+		printk(f, ## a); \
+} while (0)
+
+#define esp_log_datadone(f, a...) \
+do {	if (esp_debug & ESP_DEBUG_DATADONE) \
+		printk(f, ## a); \
+} while (0)
+
+#define esp_log_reconnect(f, a...) \
+do {	if (esp_debug & ESP_DEBUG_RECONNECT) \
+		printk(f, ## a); \
+} while (0)
+
+#define esp_log_autosense(f, a...) \
+do {	if (esp_debug & ESP_DEBUG_AUTOSENSE) \
+		printk(f, ## a); \
+} while (0)
+
+#define esp_read8(REG)		esp->ops->esp_read8(esp, REG)
+#define esp_write8(VAL,REG)	esp->ops->esp_write8(esp, VAL, REG)
+
+static void esp_log_fill_regs(struct esp *esp,
+			      struct esp_event_ent *p)
+{
+	p->sreg = esp->sreg;
+	p->seqreg = esp->seqreg;
+	p->sreg2 = esp->sreg2;
+	p->ireg = esp->ireg;
+	p->select_state = esp->select_state;
+	p->event = esp->event;
+}
+
+void scsi_esp_cmd(struct esp *esp, u8 val)
+{
+	struct esp_event_ent *p;
+	int idx = esp->esp_event_cur;
+
+	p = &esp->esp_event_log[idx];
+	p->type = ESP_EVENT_TYPE_CMD;
+	p->val = val;
+	esp_log_fill_regs(esp, p);
+
+	esp->esp_event_cur = (idx + 1) & (ESP_EVENT_LOG_SZ - 1);
+
+	esp_write8(val, ESP_CMD);
+}
+EXPORT_SYMBOL(scsi_esp_cmd);
+
+static void esp_event(struct esp *esp, u8 val)
+{
+	struct esp_event_ent *p;
+	int idx = esp->esp_event_cur;
+
+	p = &esp->esp_event_log[idx];
+	p->type = ESP_EVENT_TYPE_EVENT;
+	p->val = val;
+	esp_log_fill_regs(esp, p);
+
+	esp->esp_event_cur = (idx + 1) & (ESP_EVENT_LOG_SZ - 1);
+
+	esp->event = val;
+}
+
+static void esp_dump_cmd_log(struct esp *esp)
+{
+	int idx = esp->esp_event_cur;
+	int stop = idx;
+
+	printk(KERN_INFO PFX "esp%d: Dumping command log\n",
+	       esp->host->unique_id);
+	do {
+		struct esp_event_ent *p = &esp->esp_event_log[idx];
+
+		printk(KERN_INFO PFX "esp%d: ent[%d] %s ",
+		       esp->host->unique_id, idx,
+		       p->type == ESP_EVENT_TYPE_CMD ? "CMD" : "EVENT");
+
+		printk("val[%02x] sreg[%02x] seqreg[%02x] "
+		       "sreg2[%02x] ireg[%02x] ss[%02x] event[%02x]\n",
+		       p->val, p->sreg, p->seqreg,
+		       p->sreg2, p->ireg, p->select_state, p->event);
+
+		idx = (idx + 1) & (ESP_EVENT_LOG_SZ - 1);
+	} while (idx != stop);
+}
+
+static void esp_flush_fifo(struct esp *esp)
+{
+	scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+	if (esp->rev == ESP236) {
+		int lim = 1000;
+
+		while (esp_read8(ESP_FFLAGS) & ESP_FF_FBYTES) {
+			if (--lim == 0) {
+				printk(KERN_ALERT PFX "esp%d: ESP_FF_BYTES "
+				       "will not clear!\n",
+				       esp->host->unique_id);
+				break;
+			}
+			udelay(1);
+		}
+	}
+}
+
+static void hme_read_fifo(struct esp *esp)
+{
+	int fcnt = esp_read8(ESP_FFLAGS) & ESP_FF_FBYTES;
+	int idx = 0;
+
+	while (fcnt--) {
+		esp->fifo[idx++] = esp_read8(ESP_FDATA);
+		esp->fifo[idx++] = esp_read8(ESP_FDATA);
+	}
+	if (esp->sreg2 & ESP_STAT2_F1BYTE) {
+		esp_write8(0, ESP_FDATA);
+		esp->fifo[idx++] = esp_read8(ESP_FDATA);
+		scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+	}
+	esp->fifo_cnt = idx;
+}
+
+static void esp_set_all_config3(struct esp *esp, u8 val)
+{
+	int i;
+
+	for (i = 0; i < ESP_MAX_TARGET; i++)
+		esp->target[i].esp_config3 = val;
+}
+
+/* Reset the ESP chip, _not_ the SCSI bus. */
+static void esp_reset_esp(struct esp *esp)
+{
+	u8 family_code, version;
+
+	/* Now reset the ESP chip */
+	scsi_esp_cmd(esp, ESP_CMD_RC);
+	scsi_esp_cmd(esp, ESP_CMD_NULL | ESP_CMD_DMA);
+	scsi_esp_cmd(esp, ESP_CMD_NULL | ESP_CMD_DMA);
+
+	/* Reload the configuration registers */
+	esp_write8(esp->cfact, ESP_CFACT);
+
+	esp->prev_stp = 0;
+	esp_write8(esp->prev_stp, ESP_STP);
+
+	esp->prev_soff = 0;
+	esp_write8(esp->prev_soff, ESP_SOFF);
+
+	esp_write8(esp->neg_defp, ESP_TIMEO);
+
+	/* This is the only point at which it is reliable to read
+	 * the ID-code for a fast ESP chip variants.
+	 */
+	esp->max_period = ((35 * esp->ccycle) / 1000);
+	if (esp->rev == FAST) {
+		version = esp_read8(ESP_UID);
+		family_code = (version & 0xf8) >> 3;
+		if (family_code == 0x02)
+			esp->rev = FAS236;
+		else if (family_code == 0x0a)
+			esp->rev = FASHME; /* Version is usually '5'. */
+		else
+			esp->rev = FAS100A;
+		esp->min_period = ((4 * esp->ccycle) / 1000);
+	} else {
+		esp->min_period = ((5 * esp->ccycle) / 1000);
+	}
+	esp->max_period = (esp->max_period + 3)>>2;
+	esp->min_period = (esp->min_period + 3)>>2;
+
+	esp_write8(esp->config1, ESP_CFG1);
+	switch (esp->rev) {
+	case ESP100:
+		/* nothing to do */
+		break;
+
+	case ESP100A:
+		esp_write8(esp->config2, ESP_CFG2);
+		break;
+
+	case ESP236:
+		/* Slow 236 */
+		esp_write8(esp->config2, ESP_CFG2);
+		esp->prev_cfg3 = esp->target[0].esp_config3;
+		esp_write8(esp->prev_cfg3, ESP_CFG3);
+		break;
+
+	case FASHME:
+		esp->config2 |= (ESP_CONFIG2_HME32 | ESP_CONFIG2_HMEFENAB);
+		/* fallthrough... */
+
+	case FAS236:
+		/* Fast 236 or HME */
+		esp_write8(esp->config2, ESP_CFG2);
+		if (esp->rev == FASHME) {
+			u8 cfg3 = esp->target[0].esp_config3;
+
+			cfg3 |= ESP_CONFIG3_FCLOCK | ESP_CONFIG3_OBPUSH;
+			if (esp->scsi_id >= 8)
+				cfg3 |= ESP_CONFIG3_IDBIT3;
+			esp_set_all_config3(esp, cfg3);
+		} else {
+			u32 cfg3 = esp->target[0].esp_config3;
+
+			cfg3 |= ESP_CONFIG3_FCLK;
+			esp_set_all_config3(esp, cfg3);
+		}
+		esp->prev_cfg3 = esp->target[0].esp_config3;
+		esp_write8(esp->prev_cfg3, ESP_CFG3);
+		if (esp->rev == FASHME) {
+			esp->radelay = 80;
+		} else {
+			if (esp->flags & ESP_FLAG_DIFFERENTIAL)
+				esp->radelay = 0;
+			else
+				esp->radelay = 96;
+		}
+		break;
+
+	case FAS100A:
+		/* Fast 100a */
+		esp_write8(esp->config2, ESP_CFG2);
+		esp_set_all_config3(esp,
+				    (esp->target[0].esp_config3 |
+				     ESP_CONFIG3_FCLOCK));
+		esp->prev_cfg3 = esp->target[0].esp_config3;
+		esp_write8(esp->prev_cfg3, ESP_CFG3);
+		esp->radelay = 32;
+		break;
+
+	default:
+		break;
+	}
+
+	/* Eat any bitrot in the chip */
+	esp_read8(ESP_INTRPT);
+	udelay(100);
+}
+
+static void esp_map_dma(struct esp *esp, struct scsi_cmnd *cmd)
+{
+	struct esp_cmd_priv *spriv = ESP_CMD_PRIV(cmd);
+	struct scatterlist *sg = cmd->request_buffer;
+	int dir = cmd->sc_data_direction;
+	int total, i;
+
+	if (dir == DMA_NONE)
+		return;
+
+	BUG_ON(cmd->use_sg == 0);
+
+	spriv->u.num_sg = esp->ops->map_sg(esp, sg,
+					   cmd->use_sg, dir);
+	spriv->cur_residue = sg_dma_len(sg);
+	spriv->cur_sg = sg;
+
+	total = 0;
+	for (i = 0; i < spriv->u.num_sg; i++)
+		total += sg_dma_len(&sg[i]);
+	spriv->tot_residue = total;
+}
+
+static dma_addr_t esp_cur_dma_addr(struct esp_cmd_entry *ent,
+				   struct scsi_cmnd *cmd)
+{
+	struct esp_cmd_priv *p = ESP_CMD_PRIV(cmd);
+
+	if (ent->flags & ESP_CMD_FLAG_AUTOSENSE) {
+		return ent->sense_dma +
+			(ent->sense_ptr - cmd->sense_buffer);
+	}
+
+	return sg_dma_address(p->cur_sg) +
+		(sg_dma_len(p->cur_sg) -
+		 p->cur_residue);
+}
+
+static unsigned int esp_cur_dma_len(struct esp_cmd_entry *ent,
+				    struct scsi_cmnd *cmd)
+{
+	struct esp_cmd_priv *p = ESP_CMD_PRIV(cmd);
+
+	if (ent->flags & ESP_CMD_FLAG_AUTOSENSE) {
+		return SCSI_SENSE_BUFFERSIZE -
+			(ent->sense_ptr - cmd->sense_buffer);
+	}
+	return p->cur_residue;
+}
+
+static void esp_advance_dma(struct esp *esp, struct esp_cmd_entry *ent,
+			    struct scsi_cmnd *cmd, unsigned int len)
+{
+	struct esp_cmd_priv *p = ESP_CMD_PRIV(cmd);
+
+	if (ent->flags & ESP_CMD_FLAG_AUTOSENSE) {
+		ent->sense_ptr += len;
+		return;
+	}
+
+	p->cur_residue -= len;
+	p->tot_residue -= len;
+	if (p->cur_residue < 0 || p->tot_residue < 0) {
+		printk(KERN_ERR PFX "esp%d: Data transfer overflow.\n",
+		       esp->host->unique_id);
+		printk(KERN_ERR PFX "esp%d: cur_residue[%d] tot_residue[%d] "
+		       "len[%u]\n",
+		       esp->host->unique_id,
+		       p->cur_residue, p->tot_residue, len);
+		p->cur_residue = 0;
+		p->tot_residue = 0;
+	}
+	if (!p->cur_residue && p->tot_residue) {
+		p->cur_sg++;
+		p->cur_residue = sg_dma_len(p->cur_sg);
+	}
+}
+
+static void esp_unmap_dma(struct esp *esp, struct scsi_cmnd *cmd)
+{
+	struct esp_cmd_priv *spriv = ESP_CMD_PRIV(cmd);
+	int dir = cmd->sc_data_direction;
+
+	if (dir == DMA_NONE)
+		return;
+
+	esp->ops->unmap_sg(esp, cmd->request_buffer,
+			   spriv->u.num_sg, dir);
+}
+
+static void esp_save_pointers(struct esp *esp, struct esp_cmd_entry *ent)
+{
+	struct scsi_cmnd *cmd = ent->cmd;
+	struct esp_cmd_priv *spriv = ESP_CMD_PRIV(cmd);
+
+	if (ent->flags & ESP_CMD_FLAG_AUTOSENSE) {
+		ent->saved_sense_ptr = ent->sense_ptr;
+		return;
+	}
+	ent->saved_cur_residue = spriv->cur_residue;
+	ent->saved_cur_sg = spriv->cur_sg;
+	ent->saved_tot_residue = spriv->tot_residue;
+}
+
+static void esp_restore_pointers(struct esp *esp, struct esp_cmd_entry *ent)
+{
+	struct scsi_cmnd *cmd = ent->cmd;
+	struct esp_cmd_priv *spriv = ESP_CMD_PRIV(cmd);
+
+	if (ent->flags & ESP_CMD_FLAG_AUTOSENSE) {
+		ent->sense_ptr = ent->saved_sense_ptr;
+		return;
+	}
+	spriv->cur_residue = ent->saved_cur_residue;
+	spriv->cur_sg = ent->saved_cur_sg;
+	spriv->tot_residue = ent->saved_tot_residue;
+}
+
+static void esp_check_command_len(struct esp *esp, struct scsi_cmnd *cmd)
+{
+	if (cmd->cmd_len == 6 ||
+	    cmd->cmd_len == 10 ||
+	    cmd->cmd_len == 12) {
+		esp->flags &= ~ESP_FLAG_DOING_SLOWCMD;
+	} else {
+		esp->flags |= ESP_FLAG_DOING_SLOWCMD;
+	}
+}
+
+static void esp_write_tgt_config3(struct esp *esp, int tgt)
+{
+	if (esp->rev > ESP100A) {
+		u8 val = esp->target[tgt].esp_config3;
+
+		if (val != esp->prev_cfg3) {
+			esp->prev_cfg3 = val;
+			esp_write8(val, ESP_CFG3);
+		}
+	}
+}
+
+static void esp_write_tgt_sync(struct esp *esp, int tgt)
+{
+	u8 off = esp->target[tgt].esp_offset;
+	u8 per = esp->target[tgt].esp_period;
+
+	if (off != esp->prev_soff) {
+		esp->prev_soff = off;
+		esp_write8(off, ESP_SOFF);
+	}
+	if (per != esp->prev_stp) {
+		esp->prev_stp = per;
+		esp_write8(per, ESP_STP);
+	}
+}
+
+static u32 esp_dma_length_limit(struct esp *esp, u32 dma_addr, u32 dma_len)
+{
+	if (esp->rev == FASHME) {
+		/* Arbitrary segment boundaries, 24-bit counts.  */
+		if (dma_len > (1U << 24))
+			dma_len = (1U << 24);
+	} else {
+		u32 base, end;
+
+		/* ESP chip limits other variants by 16-bits of transfer
+		 * count.  Actually on FAS100A and FAS236 we could get
+		 * 24-bits of transfer count by enabling ESP_CONFIG2_FENAB
+		 * in the ESP_CFG2 register but that causes other unwanted
+		 * changes so we don't use it currently.
+		 */
+		if (dma_len > (1U << 16))
+			dma_len = (1U << 16);
+
+		/* All of the DMA variants hooked up to these chips
+		 * cannot handle crossing a 24-bit address boundary.
+		 */
+		base = dma_addr & ((1U << 24) - 1U);
+		end = base + dma_len;
+		if (end > (1U << 24))
+			end = (1U <<24);
+		dma_len = end - base;
+	}
+	return dma_len;
+}
+
+static int esp_need_to_nego_wide(struct esp_target_data *tp)
+{
+	struct scsi_target *target = tp->starget;
+
+	return spi_width(target) != tp->nego_goal_width;
+}
+
+static int esp_need_to_nego_sync(struct esp_target_data *tp)
+{
+	struct scsi_target *target = tp->starget;
+
+	/* When offset is zero, period is "don't care".  */
+	if (!spi_offset(target) && !tp->nego_goal_offset)
+		return 0;
+
+	if (spi_offset(target) == tp->nego_goal_offset &&
+	    spi_period(target) == tp->nego_goal_period)
+		return 0;
+
+	return 1;
+}
+
+static int esp_alloc_lun_tag(struct esp_cmd_entry *ent,
+			     struct esp_lun_data *lp)
+{
+	if (!ent->tag[0]) {
+		/* Non-tagged, slot already taken?  */
+		if (lp->non_tagged_cmd)
+			return -EBUSY;
+
+		if (lp->hold) {
+			/* We are being held by active tagged
+			 * commands.
+			 */
+			if (lp->num_tagged)
+				return -EBUSY;
+
+			/* Tagged commands completed, we can unplug
+			 * the queue and run this untagged command.
+			 */
+			lp->hold = 0;
+		} else if (lp->num_tagged) {
+			/* Plug the queue until num_tagged decreases
+			 * to zero in esp_free_lun_tag.
+			 */
+			lp->hold = 1;
+			return -EBUSY;
+		}
+
+		lp->non_tagged_cmd = ent;
+		return 0;
+	} else {
+		/* Tagged command, see if blocked by a
+		 * non-tagged one.
+		 */
+		if (lp->non_tagged_cmd || lp->hold)
+			return -EBUSY;
+	}
+
+	BUG_ON(lp->tagged_cmds[ent->tag[1]]);
+
+	lp->tagged_cmds[ent->tag[1]] = ent;
+	lp->num_tagged++;
+
+	return 0;
+}
+
+static void esp_free_lun_tag(struct esp_cmd_entry *ent,
+			     struct esp_lun_data *lp)
+{
+	if (ent->tag[0]) {
+		BUG_ON(lp->tagged_cmds[ent->tag[1]] != ent);
+		lp->tagged_cmds[ent->tag[1]] = NULL;
+		lp->num_tagged--;
+	} else {
+		BUG_ON(lp->non_tagged_cmd != ent);
+		lp->non_tagged_cmd = NULL;
+	}
+}
+
+/* When a contingent allegiance conditon is created, we force feed a
+ * REQUEST_SENSE command to the device to fetch the sense data.  I
+ * tried many other schemes, relying on the scsi error handling layer
+ * to send out the REQUEST_SENSE automatically, but this was difficult
+ * to get right especially in the presence of applications like smartd
+ * which use SG_IO to send out their own REQUEST_SENSE commands.
+ */
+static void esp_autosense(struct esp *esp, struct esp_cmd_entry *ent)
+{
+	struct scsi_cmnd *cmd = ent->cmd;
+	struct scsi_device *dev = cmd->device;
+	int tgt, lun;
+	u8 *p, val;
+
+	tgt = dev->id;
+	lun = dev->lun;
+
+
+	if (!ent->sense_ptr) {
+		esp_log_autosense("esp%d: Doing auto-sense for "
+				  "tgt[%d] lun[%d]\n",
+				  esp->host->unique_id, tgt, lun);
+
+		ent->sense_ptr = cmd->sense_buffer;
+		ent->sense_dma = esp->ops->map_single(esp,
+						      ent->sense_ptr,
+						      SCSI_SENSE_BUFFERSIZE,
+						      DMA_FROM_DEVICE);
+	}
+	ent->saved_sense_ptr = ent->sense_ptr;
+
+	esp->active_cmd = ent;
+
+	p = esp->command_block;
+	esp->msg_out_len = 0;
+
+	*p++ = IDENTIFY(0, lun);
+	*p++ = REQUEST_SENSE;
+	*p++ = ((dev->scsi_level <= SCSI_2) ?
+		(lun << 5) : 0);
+	*p++ = 0;
+	*p++ = 0;
+	*p++ = SCSI_SENSE_BUFFERSIZE;
+	*p++ = 0;
+
+	esp->select_state = ESP_SELECT_BASIC;
+
+	val = tgt;
+	if (esp->rev == FASHME)
+		val |= ESP_BUSID_RESELID | ESP_BUSID_CTR32BIT;
+	esp_write8(val, ESP_BUSID);
+
+	esp_write_tgt_sync(esp, tgt);
+	esp_write_tgt_config3(esp, tgt);
+
+	val = (p - esp->command_block);
+
+	if (esp->rev == FASHME)
+		scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+	esp->ops->send_dma_cmd(esp, esp->command_block_dma,
+			       val, 16, 0, ESP_CMD_DMA | ESP_CMD_SELA);
+}
+
+static struct esp_cmd_entry *find_and_prep_issuable_command(struct esp *esp)
+{
+	struct esp_cmd_entry *ent;
+
+	list_for_each_entry(ent, &esp->queued_cmds, list) {
+		struct scsi_cmnd *cmd = ent->cmd;
+		struct scsi_device *dev = cmd->device;
+		struct esp_lun_data *lp = dev->hostdata;
+
+		if (ent->flags & ESP_CMD_FLAG_AUTOSENSE) {
+			ent->tag[0] = 0;
+			ent->tag[1] = 0;
+			return ent;
+		}
+
+		if (!scsi_populate_tag_msg(cmd, &ent->tag[0])) {
+			ent->tag[0] = 0;
+			ent->tag[1] = 0;
+		}
+
+		if (esp_alloc_lun_tag(ent, lp) < 0)
+			continue;
+
+		return ent;
+	}
+
+	return NULL;
+}
+
+static void esp_maybe_execute_command(struct esp *esp)
+{
+	struct esp_target_data *tp;
+	struct esp_lun_data *lp;
+	struct scsi_device *dev;
+	struct scsi_cmnd *cmd;
+	struct esp_cmd_entry *ent;
+	int tgt, lun, i;
+	u32 val, start_cmd;
+	u8 *p;
+
+	if (esp->active_cmd ||
+	    (esp->flags & ESP_FLAG_RESETTING))
+		return;
+
+	ent = find_and_prep_issuable_command(esp);
+	if (!ent)
+		return;
+
+	if (ent->flags & ESP_CMD_FLAG_AUTOSENSE) {
+		esp_autosense(esp, ent);
+		return;
+	}
+
+	cmd = ent->cmd;
+	dev = cmd->device;
+	tgt = dev->id;
+	lun = dev->lun;
+	tp = &esp->target[tgt];
+	lp = dev->hostdata;
+
+	list_del(&ent->list);
+	list_add(&ent->list, &esp->active_cmds);
+
+	esp->active_cmd = ent;
+
+	esp_map_dma(esp, cmd);
+	esp_save_pointers(esp, ent);
+
+	esp_check_command_len(esp, cmd);
+
+	p = esp->command_block;
+
+	esp->msg_out_len = 0;
+	if (tp->flags & ESP_TGT_CHECK_NEGO) {
+		/* Need to negotiate.  If the target is broken
+		 * go for synchronous transfers and non-wide.
+		 */
+		if (tp->flags & ESP_TGT_BROKEN) {
+			tp->flags &= ~ESP_TGT_DISCONNECT;
+			tp->nego_goal_period = 0;
+			tp->nego_goal_offset = 0;
+			tp->nego_goal_width = 0;
+			tp->nego_goal_tags = 0;
+		}
+
+		/* If the settings are not changing, skip this.  */
+		if (spi_width(tp->starget) == tp->nego_goal_width &&
+		    spi_period(tp->starget) == tp->nego_goal_period &&
+		    spi_offset(tp->starget) == tp->nego_goal_offset) {
+			tp->flags &= ~ESP_TGT_CHECK_NEGO;
+			goto build_identify;
+		}
+
+		if (esp->rev == FASHME && esp_need_to_nego_wide(tp)) {
+			esp->msg_out_len =
+				spi_populate_width_msg(&esp->msg_out[0],
+						       (tp->nego_goal_width ?
+							1 : 0));
+			tp->flags |= ESP_TGT_NEGO_WIDE;
+		} else if (esp_need_to_nego_sync(tp)) {
+			esp->msg_out_len =
+				spi_populate_sync_msg(&esp->msg_out[0],
+						      tp->nego_goal_period,
+						      tp->nego_goal_offset);
+			tp->flags |= ESP_TGT_NEGO_SYNC;
+		} else {
+			tp->flags &= ~ESP_TGT_CHECK_NEGO;
+		}
+
+		/* Process it like a slow command.  */
+		if (tp->flags & (ESP_TGT_NEGO_WIDE | ESP_TGT_NEGO_SYNC))
+			esp->flags |= ESP_FLAG_DOING_SLOWCMD;
+	}
+
+build_identify:
+	/* If we don't have a lun-data struct yet, we're probing
+	 * so do not disconnect.  Also, do not disconnect unless
+	 * we have a tag on this command.
+	 */
+	if (lp && (tp->flags & ESP_TGT_DISCONNECT) && ent->tag[0])
+		*p++ = IDENTIFY(1, lun);
+	else
+		*p++ = IDENTIFY(0, lun);
+
+	if (ent->tag[0] && esp->rev == ESP100) {
+		/* ESP100 lacks select w/atn3 command, use select
+		 * and stop instead.
+		 */
+		esp->flags |= ESP_FLAG_DOING_SLOWCMD;
+	}
+
+	if (!(esp->flags & ESP_FLAG_DOING_SLOWCMD)) {
+		start_cmd = ESP_CMD_DMA | ESP_CMD_SELA;
+		if (ent->tag[0]) {
+			*p++ = ent->tag[0];
+			*p++ = ent->tag[1];
+
+			start_cmd = ESP_CMD_DMA | ESP_CMD_SA3;
+		}
+
+		for (i = 0; i < cmd->cmd_len; i++)
+			*p++ = cmd->cmnd[i];
+
+		esp->select_state = ESP_SELECT_BASIC;
+	} else {
+		esp->cmd_bytes_left = cmd->cmd_len;
+		esp->cmd_bytes_ptr = &cmd->cmnd[0];
+
+		if (ent->tag[0]) {
+			for (i = esp->msg_out_len - 1;
+			     i >= 0; i--)
+				esp->msg_out[i + 2] = esp->msg_out[i];
+			esp->msg_out[0] = ent->tag[0];
+			esp->msg_out[1] = ent->tag[1];
+			esp->msg_out_len += 2;
+		}
+
+		start_cmd = ESP_CMD_DMA | ESP_CMD_SELAS;
+		esp->select_state = ESP_SELECT_MSGOUT;
+	}
+	val = tgt;
+	if (esp->rev == FASHME)
+		val |= ESP_BUSID_RESELID | ESP_BUSID_CTR32BIT;
+	esp_write8(val, ESP_BUSID);
+
+	esp_write_tgt_sync(esp, tgt);
+	esp_write_tgt_config3(esp, tgt);
+
+	val = (p - esp->command_block);
+
+	if (esp_debug & ESP_DEBUG_SCSICMD) {
+		printk("ESP: tgt[%d] lun[%d] scsi_cmd [ ", tgt, lun);
+		for (i = 0; i < cmd->cmd_len; i++)
+			printk("%02x ", cmd->cmnd[i]);
+		printk("]\n");
+	}
+
+	if (esp->rev == FASHME)
+		scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+	esp->ops->send_dma_cmd(esp, esp->command_block_dma,
+			       val, 16, 0, start_cmd);
+}
+
+static struct esp_cmd_entry *esp_get_ent(struct esp *esp)
+{
+	struct list_head *head = &esp->esp_cmd_pool;
+	struct esp_cmd_entry *ret;
+
+	if (list_empty(head)) {
+		ret = kzalloc(sizeof(struct esp_cmd_entry), GFP_ATOMIC);
+	} else {
+		ret = list_entry(head->next, struct esp_cmd_entry, list);
+		list_del(&ret->list);
+		memset(ret, 0, sizeof(*ret));
+	}
+	return ret;
+}
+
+static void esp_put_ent(struct esp *esp, struct esp_cmd_entry *ent)
+{
+	list_add(&ent->list, &esp->esp_cmd_pool);
+}
+
+static void esp_cmd_is_done(struct esp *esp, struct esp_cmd_entry *ent,
+			    struct scsi_cmnd *cmd, unsigned int result)
+{
+	struct scsi_device *dev = cmd->device;
+	int tgt = dev->id;
+	int lun = dev->lun;
+
+	esp->active_cmd = NULL;
+	esp_unmap_dma(esp, cmd);
+	esp_free_lun_tag(ent, dev->hostdata);
+	cmd->result = result;
+
+	if (ent->eh_done) {
+		complete(ent->eh_done);
+		ent->eh_done = NULL;
+	}
+
+	if (ent->flags & ESP_CMD_FLAG_AUTOSENSE) {
+		esp->ops->unmap_single(esp, ent->sense_dma,
+				       SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE);
+		ent->sense_ptr = NULL;
+
+		/* Restore the message/status bytes to what we actually
+		 * saw originally.  Also, report that we are providing
+		 * the sense data.
+		 */
+		cmd->result = ((DRIVER_SENSE << 24) |
+			       (DID_OK << 16) |
+			       (COMMAND_COMPLETE << 8) |
+			       (SAM_STAT_CHECK_CONDITION << 0));
+
+		ent->flags &= ~ESP_CMD_FLAG_AUTOSENSE;
+		if (esp_debug & ESP_DEBUG_AUTOSENSE) {
+			int i;
+
+			printk("esp%d: tgt[%d] lun[%d] AUTO SENSE[ ",
+			       esp->host->unique_id, tgt, lun);
+			for (i = 0; i < 18; i++)
+				printk("%02x ", cmd->sense_buffer[i]);
+			printk("]\n");
+		}
+	}
+
+	cmd->scsi_done(cmd);
+
+	list_del(&ent->list);
+	esp_put_ent(esp, ent);
+
+	esp_maybe_execute_command(esp);
+}
+
+static unsigned int compose_result(unsigned int status, unsigned int message,
+				   unsigned int driver_code)
+{
+	return (status | (message << 8) | (driver_code << 16));
+}
+
+static void esp_event_queue_full(struct esp *esp, struct esp_cmd_entry *ent)
+{
+	struct scsi_device *dev = ent->cmd->device;
+	struct esp_lun_data *lp = dev->hostdata;
+
+	scsi_track_queue_full(dev, lp->num_tagged - 1);
+}
+
+static int esp_queuecommand(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
+{
+	struct scsi_device *dev = cmd->device;
+	struct esp *esp = host_to_esp(dev->host);
+	struct esp_cmd_priv *spriv;
+	struct esp_cmd_entry *ent;
+
+	ent = esp_get_ent(esp);
+	if (!ent)
+		return SCSI_MLQUEUE_HOST_BUSY;
+
+	ent->cmd = cmd;
+
+	cmd->scsi_done = done;
+
+	spriv = ESP_CMD_PRIV(cmd);
+	spriv->u.dma_addr = ~(dma_addr_t)0x0;
+
+	list_add_tail(&ent->list, &esp->queued_cmds);
+
+	esp_maybe_execute_command(esp);
+
+	return 0;
+}
+
+static int esp_check_gross_error(struct esp *esp)
+{
+	if (esp->sreg & ESP_STAT_SPAM) {
+		/* Gross Error, could be one of:
+		 * - top of fifo overwritten
+		 * - top of command register overwritten
+		 * - DMA programmed with wrong direction
+		 * - improper phase change
+		 */
+		printk(KERN_ERR PFX "esp%d: Gross error sreg[%02x]\n",
+		       esp->host->unique_id, esp->sreg);
+		/* XXX Reset the chip. XXX */
+		return 1;
+	}
+	return 0;
+}
+
+static int esp_check_spur_intr(struct esp *esp)
+{
+	switch (esp->rev) {
+	case ESP100:
+	case ESP100A:
+		/* The interrupt pending bit of the status register cannot
+		 * be trusted on these revisions.
+		 */
+		esp->sreg &= ~ESP_STAT_INTR;
+		break;
+
+	default:
+		if (!(esp->sreg & ESP_STAT_INTR)) {
+			esp->ireg = esp_read8(ESP_INTRPT);
+			if (esp->ireg & ESP_INTR_SR)
+				return 1;
+
+			/* If the DMA is indicating interrupt pending and the
+			 * ESP is not, the only possibility is a DMA error.
+			 */
+			if (!esp->ops->dma_error(esp)) {
+				printk(KERN_ERR PFX "esp%d: Spurious irq, "
+				       "sreg=%x.\n",
+				       esp->host->unique_id, esp->sreg);
+				return -1;
+			}
+
+			printk(KERN_ERR PFX "esp%d: DMA error\n",
+			       esp->host->unique_id);
+
+			/* XXX Reset the chip. XXX */
+			return -1;
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static void esp_schedule_reset(struct esp *esp)
+{
+	esp_log_reset("ESP: esp_schedule_reset() from %p\n",
+		      __builtin_return_address(0));
+	esp->flags |= ESP_FLAG_RESETTING;
+	esp_event(esp, ESP_EVENT_RESET);
+}
+
+/* In order to avoid having to add a special half-reconnected state
+ * into the driver we just sit here and poll through the rest of
+ * the reselection process to get the tag message bytes.
+ */
+static struct esp_cmd_entry *esp_reconnect_with_tag(struct esp *esp,
+						    struct esp_lun_data *lp)
+{
+	struct esp_cmd_entry *ent;
+	int i;
+
+	if (!lp->num_tagged) {
+		printk(KERN_ERR PFX "esp%d: Reconnect w/num_tagged==0\n",
+		       esp->host->unique_id);
+		return NULL;
+	}
+
+	esp_log_reconnect("ESP: reconnect tag, ");
+
+	for (i = 0; i < ESP_QUICKIRQ_LIMIT; i++) {
+		if (esp->ops->irq_pending(esp))
+			break;
+	}
+	if (i == ESP_QUICKIRQ_LIMIT) {
+		printk(KERN_ERR PFX "esp%d: Reconnect IRQ1 timeout\n",
+		       esp->host->unique_id);
+		return NULL;
+	}
+
+	esp->sreg = esp_read8(ESP_STATUS);
+	esp->ireg = esp_read8(ESP_INTRPT);
+
+	esp_log_reconnect("IRQ(%d:%x:%x), ",
+			  i, esp->ireg, esp->sreg);
+
+	if (esp->ireg & ESP_INTR_DC) {
+		printk(KERN_ERR PFX "esp%d: Reconnect, got disconnect.\n",
+		       esp->host->unique_id);
+		return NULL;
+	}
+
+	if ((esp->sreg & ESP_STAT_PMASK) != ESP_MIP) {
+		printk(KERN_ERR PFX "esp%d: Reconnect, not MIP sreg[%02x].\n",
+		       esp->host->unique_id, esp->sreg);
+		return NULL;
+	}
+
+	/* DMA in the tag bytes... */
+	esp->command_block[0] = 0xff;
+	esp->command_block[1] = 0xff;
+	esp->ops->send_dma_cmd(esp, esp->command_block_dma,
+			       2, 2, 1, ESP_CMD_DMA | ESP_CMD_TI);
+
+	/* ACK the msssage.  */
+	scsi_esp_cmd(esp, ESP_CMD_MOK);
+
+	for (i = 0; i < ESP_RESELECT_TAG_LIMIT; i++) {
+		if (esp->ops->irq_pending(esp)) {
+			esp->sreg = esp_read8(ESP_STATUS);
+			esp->ireg = esp_read8(ESP_INTRPT);
+			if (esp->ireg & ESP_INTR_FDONE)
+				break;
+		}
+		udelay(1);
+	}
+	if (i == ESP_RESELECT_TAG_LIMIT) {
+		printk(KERN_ERR PFX "esp%d: Reconnect IRQ2 timeout\n",
+		       esp->host->unique_id);
+		return NULL;
+	}
+	esp->ops->dma_drain(esp);
+	esp->ops->dma_invalidate(esp);
+
+	esp_log_reconnect("IRQ2(%d:%x:%x) tag[%x:%x]\n",
+			  i, esp->ireg, esp->sreg,
+			  esp->command_block[0],
+			  esp->command_block[1]);
+
+	if (esp->command_block[0] < SIMPLE_QUEUE_TAG ||
+	    esp->command_block[0] > ORDERED_QUEUE_TAG) {
+		printk(KERN_ERR PFX "esp%d: Reconnect, bad tag "
+		       "type %02x.\n",
+		       esp->host->unique_id, esp->command_block[0]);
+		return NULL;
+	}
+
+	ent = lp->tagged_cmds[esp->command_block[1]];
+	if (!ent) {
+		printk(KERN_ERR PFX "esp%d: Reconnect, no entry for "
+		       "tag %02x.\n",
+		       esp->host->unique_id, esp->command_block[1]);
+		return NULL;
+	}
+
+	return ent;
+}
+
+static int esp_reconnect(struct esp *esp)
+{
+	struct esp_cmd_entry *ent;
+	struct esp_target_data *tp;
+	struct esp_lun_data *lp;
+	struct scsi_device *dev;
+	int target, lun;
+
+	BUG_ON(esp->active_cmd);
+	if (esp->rev == FASHME) {
+		/* FASHME puts the target and lun numbers directly
+		 * into the fifo.
+		 */
+		target = esp->fifo[0];
+		lun = esp->fifo[1] & 0x7;
+	} else {
+		u8 bits = esp_read8(ESP_FDATA);
+
+		/* Older chips put the lun directly into the fifo, but
+		 * the target is given as a sample of the arbitration
+		 * lines on the bus at reselection time.  So we should
+		 * see the ID of the ESP and the one reconnecting target
+		 * set in the bitmap.
+		 */
+		if (!(bits & esp->scsi_id_mask))
+			goto do_reset;
+		bits &= ~esp->scsi_id_mask;
+		if (!bits || (bits & (bits - 1)))
+			goto do_reset;
+
+		target = ffs(bits) - 1;
+		lun = (esp_read8(ESP_FDATA) & 0x7);
+
+		scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+		if (esp->rev == ESP100) {
+			u8 ireg = esp_read8(ESP_INTRPT);
+			/* This chip has a bug during reselection that can
+			 * cause a spurious illegal-command interrupt, which
+			 * we simply ACK here.  Another possibility is a bus
+			 * reset so we must check for that.
+			 */
+			if (ireg & ESP_INTR_SR)
+				goto do_reset;
+		}
+		scsi_esp_cmd(esp, ESP_CMD_NULL);
+	}
+
+	esp_write_tgt_sync(esp, target);
+	esp_write_tgt_config3(esp, target);
+
+	scsi_esp_cmd(esp, ESP_CMD_MOK);
+
+	if (esp->rev == FASHME)
+		esp_write8(target | ESP_BUSID_RESELID | ESP_BUSID_CTR32BIT,
+			   ESP_BUSID);
+
+	tp = &esp->target[target];
+	dev = __scsi_device_lookup_by_target(tp->starget, lun);
+	if (!dev) {
+		printk(KERN_ERR PFX "esp%d: Reconnect, no lp "
+		       "tgt[%u] lun[%u]\n",
+		       esp->host->unique_id, target, lun);
+		goto do_reset;
+	}
+	lp = dev->hostdata;
+
+	ent = lp->non_tagged_cmd;
+	if (!ent) {
+		ent = esp_reconnect_with_tag(esp, lp);
+		if (!ent)
+			goto do_reset;
+	}
+
+	esp->active_cmd = ent;
+
+	if (ent->flags & ESP_CMD_FLAG_ABORT) {
+		esp->msg_out[0] = ABORT_TASK_SET;
+		esp->msg_out_len = 1;
+		scsi_esp_cmd(esp, ESP_CMD_SATN);
+	}
+
+	esp_event(esp, ESP_EVENT_CHECK_PHASE);
+	esp_restore_pointers(esp, ent);
+	esp->flags |= ESP_FLAG_QUICKIRQ_CHECK;
+	return 1;
+
+do_reset:
+	esp_schedule_reset(esp);
+	return 0;
+}
+
+static int esp_finish_select(struct esp *esp)
+{
+	struct esp_cmd_entry *ent;
+	struct scsi_cmnd *cmd;
+	u8 orig_select_state;
+
+	orig_select_state = esp->select_state;
+
+	/* No longer selecting.  */
+	esp->select_state = ESP_SELECT_NONE;
+
+	esp->seqreg = esp_read8(ESP_SSTEP) & ESP_STEP_VBITS;
+	ent = esp->active_cmd;
+	cmd = ent->cmd;
+
+	if (esp->ops->dma_error(esp)) {
+		/* If we see a DMA error during or as a result of selection,
+		 * all bets are off.
+		 */
+		esp_schedule_reset(esp);
+		esp_cmd_is_done(esp, ent, cmd, (DID_ERROR << 16));
+		return 0;
+	}
+
+	esp->ops->dma_invalidate(esp);
+
+	if (esp->ireg == (ESP_INTR_RSEL | ESP_INTR_FDONE)) {
+		struct esp_target_data *tp = &esp->target[cmd->device->id];
+
+		/* Carefully back out of the selection attempt.  Release
+		 * resources (such as DMA mapping & TAG) and reset state (such
+		 * as message out and command delivery variables).
+		 */
+		if (!(ent->flags & ESP_CMD_FLAG_AUTOSENSE)) {
+			esp_unmap_dma(esp, cmd);
+			esp_free_lun_tag(ent, cmd->device->hostdata);
+			tp->flags &= ~(ESP_TGT_NEGO_SYNC | ESP_TGT_NEGO_WIDE);
+			esp->flags &= ~ESP_FLAG_DOING_SLOWCMD;
+			esp->cmd_bytes_ptr = NULL;
+			esp->cmd_bytes_left = 0;
+		} else {
+			esp->ops->unmap_single(esp, ent->sense_dma,
+					       SCSI_SENSE_BUFFERSIZE,
+					       DMA_FROM_DEVICE);
+			ent->sense_ptr = NULL;
+		}
+
+		/* Now that the state is unwound properly, put back onto
+		 * the issue queue.  This command is no longer active.
+		 */
+		list_del(&ent->list);
+		list_add(&ent->list, &esp->queued_cmds);
+		esp->active_cmd = NULL;
+
+		/* Return value ignored by caller, it directly invokes
+		 * esp_reconnect().
+		 */
+		return 0;
+	}
+
+	if (esp->ireg == ESP_INTR_DC) {
+		struct scsi_device *dev = cmd->device;
+
+		/* Disconnect.  Make sure we re-negotiate sync and
+		 * wide parameters if this target starts responding
+		 * again in the future.
+		 */
+		esp->target[dev->id].flags |= ESP_TGT_CHECK_NEGO;
+
+		scsi_esp_cmd(esp, ESP_CMD_ESEL);
+		esp_cmd_is_done(esp, ent, cmd, (DID_BAD_TARGET << 16));
+		return 1;
+	}
+
+	if (esp->ireg == (ESP_INTR_FDONE | ESP_INTR_BSERV)) {
+		/* Selection successful.  On pre-FAST chips we have
+		 * to do a NOP and possibly clean out the FIFO.
+		 */
+		if (esp->rev <= ESP236) {
+			int fcnt = esp_read8(ESP_FFLAGS) & ESP_FF_FBYTES;
+
+			scsi_esp_cmd(esp, ESP_CMD_NULL);
+
+			if (!fcnt &&
+			    (!esp->prev_soff ||
+			     ((esp->sreg & ESP_STAT_PMASK) != ESP_DIP)))
+				esp_flush_fifo(esp);
+		}
+
+		/* If we are doing a slow command, negotiation, etc.
+		 * we'll do the right thing as we transition to the
+		 * next phase.
+		 */
+		esp_event(esp, ESP_EVENT_CHECK_PHASE);
+		return 0;
+	}
+
+	printk("ESP: Unexpected selection completion ireg[%x].\n",
+	       esp->ireg);
+	esp_schedule_reset(esp);
+	return 0;
+}
+
+static int esp_data_bytes_sent(struct esp *esp, struct esp_cmd_entry *ent,
+			       struct scsi_cmnd *cmd)
+{
+	int fifo_cnt, ecount, bytes_sent, flush_fifo;
+
+	fifo_cnt = esp_read8(ESP_FFLAGS) & ESP_FF_FBYTES;
+	if (esp->prev_cfg3 & ESP_CONFIG3_EWIDE)
+		fifo_cnt <<= 1;
+
+	ecount = 0;
+	if (!(esp->sreg & ESP_STAT_TCNT)) {
+		ecount = ((unsigned int)esp_read8(ESP_TCLOW) |
+			  (((unsigned int)esp_read8(ESP_TCMED)) << 8));
+		if (esp->rev == FASHME)
+			ecount |= ((unsigned int)esp_read8(FAS_RLO)) << 16;
+	}
+
+	bytes_sent = esp->data_dma_len;
+	bytes_sent -= ecount;
+
+	if (!(ent->flags & ESP_CMD_FLAG_WRITE))
+		bytes_sent -= fifo_cnt;
+
+	flush_fifo = 0;
+	if (!esp->prev_soff) {
+		/* Synchronous data transfer, always flush fifo. */
+		flush_fifo = 1;
+	} else {
+		if (esp->rev == ESP100) {
+			u32 fflags, phase;
+
+			/* ESP100 has a chip bug where in the synchronous data
+			 * phase it can mistake a final long REQ pulse from the
+			 * target as an extra data byte.  Fun.
+			 *
+			 * To detect this case we resample the status register
+			 * and fifo flags.  If we're still in a data phase and
+			 * we see spurious chunks in the fifo, we return error
+			 * to the caller which should reset and set things up
+			 * such that we only try future transfers to this
+			 * target in synchronous mode.
+			 */
+			esp->sreg = esp_read8(ESP_STATUS);
+			phase = esp->sreg & ESP_STAT_PMASK;
+			fflags = esp_read8(ESP_FFLAGS);
+
+			if ((phase == ESP_DOP &&
+			     (fflags & ESP_FF_ONOTZERO)) ||
+			    (phase == ESP_DIP &&
+			     (fflags & ESP_FF_FBYTES)))
+				return -1;
+		}
+		if (!(ent->flags & ESP_CMD_FLAG_WRITE))
+			flush_fifo = 1;
+	}
+
+	if (flush_fifo)
+		esp_flush_fifo(esp);
+
+	return bytes_sent;
+}
+
+static void esp_setsync(struct esp *esp, struct esp_target_data *tp,
+			u8 scsi_period, u8 scsi_offset,
+			u8 esp_stp, u8 esp_soff)
+{
+	spi_period(tp->starget) = scsi_period;
+	spi_offset(tp->starget) = scsi_offset;
+	spi_width(tp->starget) = (tp->flags & ESP_TGT_WIDE) ? 1 : 0;
+
+	if (esp_soff) {
+		esp_stp &= 0x1f;
+		esp_soff |= esp->radelay;
+		if (esp->rev >= FAS236) {
+			u8 bit = ESP_CONFIG3_FSCSI;
+			if (esp->rev >= FAS100A)
+				bit = ESP_CONFIG3_FAST;
+
+			if (scsi_period < 50) {
+				if (esp->rev == FASHME)
+					esp_soff &= ~esp->radelay;
+				tp->esp_config3 |= bit;
+			} else {
+				tp->esp_config3 &= ~bit;
+			}
+			esp->prev_cfg3 = tp->esp_config3;
+			esp_write8(esp->prev_cfg3, ESP_CFG3);
+		}
+	}
+
+	tp->esp_period = esp->prev_stp = esp_stp;
+	tp->esp_offset = esp->prev_soff = esp_soff;
+
+	esp_write8(esp_soff, ESP_SOFF);
+	esp_write8(esp_stp, ESP_STP);
+
+	tp->flags &= ~(ESP_TGT_NEGO_SYNC | ESP_TGT_CHECK_NEGO);
+
+	spi_display_xfer_agreement(tp->starget);
+}
+
+static void esp_msgin_reject(struct esp *esp)
+{
+	struct esp_cmd_entry *ent = esp->active_cmd;
+	struct scsi_cmnd *cmd = ent->cmd;
+	struct esp_target_data *tp;
+	int tgt;
+
+	tgt = cmd->device->id;
+	tp = &esp->target[tgt];
+
+	if (tp->flags & ESP_TGT_NEGO_WIDE) {
+		tp->flags &= ~(ESP_TGT_NEGO_WIDE | ESP_TGT_WIDE);
+
+		if (!esp_need_to_nego_sync(tp)) {
+			tp->flags &= ~ESP_TGT_CHECK_NEGO;
+			scsi_esp_cmd(esp, ESP_CMD_RATN);
+		} else {
+			esp->msg_out_len =
+				spi_populate_sync_msg(&esp->msg_out[0],
+						      tp->nego_goal_period,
+						      tp->nego_goal_offset);
+			tp->flags |= ESP_TGT_NEGO_SYNC;
+			scsi_esp_cmd(esp, ESP_CMD_SATN);
+		}
+		return;
+	}
+
+	if (tp->flags & ESP_TGT_NEGO_SYNC) {
+		tp->flags &= ~(ESP_TGT_NEGO_SYNC | ESP_TGT_CHECK_NEGO);
+		tp->esp_period = 0;
+		tp->esp_offset = 0;
+		esp_setsync(esp, tp, 0, 0, 0, 0);
+		scsi_esp_cmd(esp, ESP_CMD_RATN);
+		return;
+	}
+
+	esp->msg_out[0] = ABORT_TASK_SET;
+	esp->msg_out_len = 1;
+	scsi_esp_cmd(esp, ESP_CMD_SATN);
+}
+
+static void esp_msgin_sdtr(struct esp *esp, struct esp_target_data *tp)
+{
+	u8 period = esp->msg_in[3];
+	u8 offset = esp->msg_in[4];
+	u8 stp;
+
+	if (!(tp->flags & ESP_TGT_NEGO_SYNC))
+		goto do_reject;
+
+	if (offset > 15)
+		goto do_reject;
+
+	if (offset) {
+		int rounded_up, one_clock;
+
+		if (period > esp->max_period) {
+			period = offset = 0;
+			goto do_sdtr;
+		}
+		if (period < esp->min_period)
+			goto do_reject;
+
+		one_clock = esp->ccycle / 1000;
+		rounded_up = (period << 2);
+		rounded_up = (rounded_up + one_clock - 1) / one_clock;
+		stp = rounded_up;
+		if (stp && esp->rev >= FAS236) {
+			if (stp >= 50)
+				stp--;
+		}
+	} else {
+		stp = 0;
+	}
+
+	esp_setsync(esp, tp, period, offset, stp, offset);
+	return;
+
+do_reject:
+	esp->msg_out[0] = MESSAGE_REJECT;
+	esp->msg_out_len = 1;
+	scsi_esp_cmd(esp, ESP_CMD_SATN);
+	return;
+
+do_sdtr:
+	tp->nego_goal_period = period;
+	tp->nego_goal_offset = offset;
+	esp->msg_out_len =
+		spi_populate_sync_msg(&esp->msg_out[0],
+				      tp->nego_goal_period,
+				      tp->nego_goal_offset);
+	scsi_esp_cmd(esp, ESP_CMD_SATN);
+}
+
+static void esp_msgin_wdtr(struct esp *esp, struct esp_target_data *tp)
+{
+	int size = 8 << esp->msg_in[3];
+	u8 cfg3;
+
+	if (esp->rev != FASHME)
+		goto do_reject;
+
+	if (size != 8 && size != 16)
+		goto do_reject;
+
+	if (!(tp->flags & ESP_TGT_NEGO_WIDE))
+		goto do_reject;
+
+	cfg3 = tp->esp_config3;
+	if (size == 16) {
+		tp->flags |= ESP_TGT_WIDE;
+		cfg3 |= ESP_CONFIG3_EWIDE;
+	} else {
+		tp->flags &= ~ESP_TGT_WIDE;
+		cfg3 &= ~ESP_CONFIG3_EWIDE;
+	}
+	tp->esp_config3 = cfg3;
+	esp->prev_cfg3 = cfg3;
+	esp_write8(cfg3, ESP_CFG3);
+
+	tp->flags &= ~ESP_TGT_NEGO_WIDE;
+
+	spi_period(tp->starget) = 0;
+	spi_offset(tp->starget) = 0;
+	if (!esp_need_to_nego_sync(tp)) {
+		tp->flags &= ~ESP_TGT_CHECK_NEGO;
+		scsi_esp_cmd(esp, ESP_CMD_RATN);
+	} else {
+		esp->msg_out_len =
+			spi_populate_sync_msg(&esp->msg_out[0],
+					      tp->nego_goal_period,
+					      tp->nego_goal_offset);
+		tp->flags |= ESP_TGT_NEGO_SYNC;
+		scsi_esp_cmd(esp, ESP_CMD_SATN);
+	}
+	return;
+
+do_reject:
+	esp->msg_out[0] = MESSAGE_REJECT;
+	esp->msg_out_len = 1;
+	scsi_esp_cmd(esp, ESP_CMD_SATN);
+}
+
+static void esp_msgin_extended(struct esp *esp)
+{
+	struct esp_cmd_entry *ent = esp->active_cmd;
+	struct scsi_cmnd *cmd = ent->cmd;
+	struct esp_target_data *tp;
+	int tgt = cmd->device->id;
+
+	tp = &esp->target[tgt];
+	if (esp->msg_in[2] == EXTENDED_SDTR) {
+		esp_msgin_sdtr(esp, tp);
+		return;
+	}
+	if (esp->msg_in[2] == EXTENDED_WDTR) {
+		esp_msgin_wdtr(esp, tp);
+		return;
+	}
+
+	printk("ESP: Unexpected extended msg type %x\n",
+	       esp->msg_in[2]);
+
+	esp->msg_out[0] = ABORT_TASK_SET;
+	esp->msg_out_len = 1;
+	scsi_esp_cmd(esp, ESP_CMD_SATN);
+}
+
+/* Analyze msgin bytes received from target so far.  Return non-zero
+ * if there are more bytes needed to complete the message.
+ */
+static int esp_msgin_process(struct esp *esp)
+{
+	u8 msg0 = esp->msg_in[0];
+	int len = esp->msg_in_len;
+
+	if (msg0 & 0x80) {
+		/* Identify */
+		printk("ESP: Unexpected msgin identify\n");
+		return 0;
+	}
+
+	switch (msg0) {
+	case EXTENDED_MESSAGE:
+		if (len == 1)
+			return 1;
+		if (len < esp->msg_in[1] + 2)
+			return 1;
+		esp_msgin_extended(esp);
+		return 0;
+
+	case IGNORE_WIDE_RESIDUE: {
+		struct esp_cmd_entry *ent;
+		struct esp_cmd_priv *spriv;
+		if (len == 1)
+			return 1;
+
+		if (esp->msg_in[1] != 1)
+			goto do_reject;
+
+		ent = esp->active_cmd;
+		spriv = ESP_CMD_PRIV(ent->cmd);
+
+		if (spriv->cur_residue == sg_dma_len(spriv->cur_sg)) {
+			spriv->cur_sg--;
+			spriv->cur_residue = 1;
+		} else
+			spriv->cur_residue++;
+		spriv->tot_residue++;
+		return 0;
+	}
+	case NOP:
+		return 0;
+	case RESTORE_POINTERS:
+		esp_restore_pointers(esp, esp->active_cmd);
+		return 0;
+	case SAVE_POINTERS:
+		esp_save_pointers(esp, esp->active_cmd);
+		return 0;
+
+	case COMMAND_COMPLETE:
+	case DISCONNECT: {
+		struct esp_cmd_entry *ent = esp->active_cmd;
+
+		ent->message = msg0;
+		esp_event(esp, ESP_EVENT_FREE_BUS);
+		esp->flags |= ESP_FLAG_QUICKIRQ_CHECK;
+		return 0;
+	}
+	case MESSAGE_REJECT:
+		esp_msgin_reject(esp);
+		return 0;
+
+	default:
+	do_reject:
+		esp->msg_out[0] = MESSAGE_REJECT;
+		esp->msg_out_len = 1;
+		scsi_esp_cmd(esp, ESP_CMD_SATN);
+		return 0;
+	}
+}
+
+static int esp_process_event(struct esp *esp)
+{
+	int write;
+
+again:
+	write = 0;
+	switch (esp->event) {
+	case ESP_EVENT_CHECK_PHASE:
+		switch (esp->sreg & ESP_STAT_PMASK) {
+		case ESP_DOP:
+			esp_event(esp, ESP_EVENT_DATA_OUT);
+			break;
+		case ESP_DIP:
+			esp_event(esp, ESP_EVENT_DATA_IN);
+			break;
+		case ESP_STATP:
+			esp_flush_fifo(esp);
+			scsi_esp_cmd(esp, ESP_CMD_ICCSEQ);
+			esp_event(esp, ESP_EVENT_STATUS);
+			esp->flags |= ESP_FLAG_QUICKIRQ_CHECK;
+			return 1;
+
+		case ESP_MOP:
+			esp_event(esp, ESP_EVENT_MSGOUT);
+			break;
+
+		case ESP_MIP:
+			esp_event(esp, ESP_EVENT_MSGIN);
+			break;
+
+		case ESP_CMDP:
+			esp_event(esp, ESP_EVENT_CMD_START);
+			break;
+
+		default:
+			printk("ESP: Unexpected phase, sreg=%02x\n",
+			       esp->sreg);
+			esp_schedule_reset(esp);
+			return 0;
+		}
+		goto again;
+		break;
+
+	case ESP_EVENT_DATA_IN:
+		write = 1;
+		/* fallthru */
+
+	case ESP_EVENT_DATA_OUT: {
+		struct esp_cmd_entry *ent = esp->active_cmd;
+		struct scsi_cmnd *cmd = ent->cmd;
+		dma_addr_t dma_addr = esp_cur_dma_addr(ent, cmd);
+		unsigned int dma_len = esp_cur_dma_len(ent, cmd);
+
+		if (esp->rev == ESP100)
+			scsi_esp_cmd(esp, ESP_CMD_NULL);
+
+		if (write)
+			ent->flags |= ESP_CMD_FLAG_WRITE;
+		else
+			ent->flags &= ~ESP_CMD_FLAG_WRITE;
+
+		dma_len = esp_dma_length_limit(esp, dma_addr, dma_len);
+		esp->data_dma_len = dma_len;
+
+		if (!dma_len) {
+			printk(KERN_ERR PFX "esp%d: DMA length is zero!\n",
+			       esp->host->unique_id);
+			printk(KERN_ERR PFX "esp%d: cur adr[%08x] len[%08x]\n",
+			       esp->host->unique_id,
+			       esp_cur_dma_addr(ent, cmd),
+			       esp_cur_dma_len(ent, cmd));
+			esp_schedule_reset(esp);
+			return 0;
+		}
+
+		esp_log_datastart("ESP: start data addr[%08x] len[%u] "
+				  "write(%d)\n",
+				  dma_addr, dma_len, write);
+
+		esp->ops->send_dma_cmd(esp, dma_addr, dma_len, dma_len,
+				       write, ESP_CMD_DMA | ESP_CMD_TI);
+		esp_event(esp, ESP_EVENT_DATA_DONE);
+		break;
+	}
+	case ESP_EVENT_DATA_DONE: {
+		struct esp_cmd_entry *ent = esp->active_cmd;
+		struct scsi_cmnd *cmd = ent->cmd;
+		int bytes_sent;
+
+		if (esp->ops->dma_error(esp)) {
+			printk("ESP: data done, DMA error, resetting\n");
+			esp_schedule_reset(esp);
+			return 0;
+		}
+
+		if (ent->flags & ESP_CMD_FLAG_WRITE) {
+			/* XXX parity errors, etc. XXX */
+
+			esp->ops->dma_drain(esp);
+		}
+		esp->ops->dma_invalidate(esp);
+
+		if (esp->ireg != ESP_INTR_BSERV) {
+			/* We should always see exactly a bus-service
+			 * interrupt at the end of a successful transfer.
+			 */
+			printk("ESP: data done, not BSERV, resetting\n");
+			esp_schedule_reset(esp);
+			return 0;
+		}
+
+		bytes_sent = esp_data_bytes_sent(esp, ent, cmd);
+
+		esp_log_datadone("ESP: data done flgs[%x] sent[%d]\n",
+				 ent->flags, bytes_sent);
+
+		if (bytes_sent < 0) {
+			/* XXX force sync mode for this target XXX */
+			esp_schedule_reset(esp);
+			return 0;
+		}
+
+		esp_advance_dma(esp, ent, cmd, bytes_sent);
+		esp_event(esp, ESP_EVENT_CHECK_PHASE);
+		goto again;
+		break;
+	}
+
+	case ESP_EVENT_STATUS: {
+		struct esp_cmd_entry *ent = esp->active_cmd;
+
+		if (esp->ireg & ESP_INTR_FDONE) {
+			ent->status = esp_read8(ESP_FDATA);
+			ent->message = esp_read8(ESP_FDATA);
+			scsi_esp_cmd(esp, ESP_CMD_MOK);
+		} else if (esp->ireg == ESP_INTR_BSERV) {
+			ent->status = esp_read8(ESP_FDATA);
+			ent->message = 0xff;
+			esp_event(esp, ESP_EVENT_MSGIN);
+			return 0;
+		}
+
+		if (ent->message != COMMAND_COMPLETE) {
+			printk("ESP: Unexpected message %x in status\n",
+			       ent->message);
+			esp_schedule_reset(esp);
+			return 0;
+		}
+
+		esp_event(esp, ESP_EVENT_FREE_BUS);
+		esp->flags |= ESP_FLAG_QUICKIRQ_CHECK;
+		break;
+	}
+	case ESP_EVENT_FREE_BUS: {
+		struct esp_cmd_entry *ent = esp->active_cmd;
+		struct scsi_cmnd *cmd = ent->cmd;
+
+		if (ent->message == COMMAND_COMPLETE ||
+		    ent->message == DISCONNECT)
+			scsi_esp_cmd(esp, ESP_CMD_ESEL);
+
+		if (ent->message == COMMAND_COMPLETE) {
+			esp_log_cmddone("ESP: Command done status[%x] "
+					"message[%x]\n",
+					ent->status, ent->message);
+			if (ent->status == SAM_STAT_TASK_SET_FULL)
+				esp_event_queue_full(esp, ent);
+
+			if (ent->status == SAM_STAT_CHECK_CONDITION &&
+			    !(ent->flags & ESP_CMD_FLAG_AUTOSENSE)) {
+				ent->flags |= ESP_CMD_FLAG_AUTOSENSE;
+				esp_autosense(esp, ent);
+			} else {
+				esp_cmd_is_done(esp, ent, cmd,
+						compose_result(ent->status,
+							       ent->message,
+							       DID_OK));
+			}
+		} else if (ent->message == DISCONNECT) {
+			esp_log_disconnect("ESP: Disconnecting tgt[%d] "
+					   "tag[%x:%x]\n",
+					   cmd->device->id,
+					   ent->tag[0], ent->tag[1]);
+
+			esp->active_cmd = NULL;
+			esp_maybe_execute_command(esp);
+		} else {
+			printk("ESP: Unexpected message %x in freebus\n",
+			       ent->message);
+			esp_schedule_reset(esp);
+			return 0;
+		}
+		if (esp->active_cmd)
+			esp->flags |= ESP_FLAG_QUICKIRQ_CHECK;
+		break;
+	}
+	case ESP_EVENT_MSGOUT: {
+		scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+
+		if (esp_debug & ESP_DEBUG_MSGOUT) {
+			int i;
+			printk("ESP: Sending message [ ");
+			for (i = 0; i < esp->msg_out_len; i++)
+				printk("%02x ", esp->msg_out[i]);
+			printk("]\n");
+		}
+
+		if (esp->rev == FASHME) {
+			int i;
+
+			/* Always use the fifo.  */
+			for (i = 0; i < esp->msg_out_len; i++) {
+				esp_write8(esp->msg_out[i], ESP_FDATA);
+				esp_write8(0, ESP_FDATA);
+			}
+			scsi_esp_cmd(esp, ESP_CMD_TI);
+		} else {
+			if (esp->msg_out_len == 1) {
+				esp_write8(esp->msg_out[0], ESP_FDATA);
+				scsi_esp_cmd(esp, ESP_CMD_TI);
+			} else {
+				/* Use DMA. */
+				memcpy(esp->command_block,
+				       esp->msg_out,
+				       esp->msg_out_len);
+
+				esp->ops->send_dma_cmd(esp,
+						       esp->command_block_dma,
+						       esp->msg_out_len,
+						       esp->msg_out_len,
+						       0,
+						       ESP_CMD_DMA|ESP_CMD_TI);
+			}
+		}
+		esp_event(esp, ESP_EVENT_MSGOUT_DONE);
+		break;
+	}
+	case ESP_EVENT_MSGOUT_DONE:
+		if (esp->rev == FASHME) {
+			scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+		} else {
+			if (esp->msg_out_len > 1)
+				esp->ops->dma_invalidate(esp);
+		}
+
+		if (!(esp->ireg & ESP_INTR_DC)) {
+			if (esp->rev != FASHME)
+				scsi_esp_cmd(esp, ESP_CMD_NULL);
+		}
+		esp_event(esp, ESP_EVENT_CHECK_PHASE);
+		goto again;
+	case ESP_EVENT_MSGIN:
+		if (esp->ireg & ESP_INTR_BSERV) {
+			if (esp->rev == FASHME) {
+				if (!(esp_read8(ESP_STATUS2) &
+				      ESP_STAT2_FEMPTY))
+					scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+			} else {
+				scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+				if (esp->rev == ESP100)
+					scsi_esp_cmd(esp, ESP_CMD_NULL);
+			}
+			scsi_esp_cmd(esp, ESP_CMD_TI);
+			esp->flags |= ESP_FLAG_QUICKIRQ_CHECK;
+			return 1;
+		}
+		if (esp->ireg & ESP_INTR_FDONE) {
+			u8 val;
+
+			if (esp->rev == FASHME)
+				val = esp->fifo[0];
+			else
+				val = esp_read8(ESP_FDATA);
+			esp->msg_in[esp->msg_in_len++] = val;
+
+			esp_log_msgin("ESP: Got msgin byte %x\n", val);
+
+			if (!esp_msgin_process(esp))
+				esp->msg_in_len = 0;
+
+			if (esp->rev == FASHME)
+				scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+
+			scsi_esp_cmd(esp, ESP_CMD_MOK);
+
+			if (esp->event != ESP_EVENT_FREE_BUS)
+				esp_event(esp, ESP_EVENT_CHECK_PHASE);
+		} else {
+			printk("ESP: MSGIN neither BSERV not FDON, resetting");
+			esp_schedule_reset(esp);
+			return 0;
+		}
+		break;
+	case ESP_EVENT_CMD_START:
+		memcpy(esp->command_block, esp->cmd_bytes_ptr,
+		       esp->cmd_bytes_left);
+		if (esp->rev == FASHME)
+			scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+		esp->ops->send_dma_cmd(esp, esp->command_block_dma,
+				       esp->cmd_bytes_left, 16, 0,
+				       ESP_CMD_DMA | ESP_CMD_TI);
+		esp_event(esp, ESP_EVENT_CMD_DONE);
+		esp->flags |= ESP_FLAG_QUICKIRQ_CHECK;
+		break;
+	case ESP_EVENT_CMD_DONE:
+		esp->ops->dma_invalidate(esp);
+		if (esp->ireg & ESP_INTR_BSERV) {
+			esp_event(esp, ESP_EVENT_CHECK_PHASE);
+			goto again;
+		}
+		esp_schedule_reset(esp);
+		return 0;
+		break;
+
+	case ESP_EVENT_RESET:
+		scsi_esp_cmd(esp, ESP_CMD_RS);
+		break;
+
+	default:
+		printk("ESP: Unexpected event %x, resetting\n",
+		       esp->event);
+		esp_schedule_reset(esp);
+		return 0;
+		break;
+	}
+	return 1;
+}
+
+static void esp_reset_cleanup_one(struct esp *esp, struct esp_cmd_entry *ent)
+{
+	struct scsi_cmnd *cmd = ent->cmd;
+
+	esp_unmap_dma(esp, cmd);
+	esp_free_lun_tag(ent, cmd->device->hostdata);
+	cmd->result = DID_RESET << 16;
+
+	if (ent->flags & ESP_CMD_FLAG_AUTOSENSE) {
+		esp->ops->unmap_single(esp, ent->sense_dma,
+				       SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE);
+		ent->sense_ptr = NULL;
+	}
+
+	cmd->scsi_done(cmd);
+	list_del(&ent->list);
+	esp_put_ent(esp, ent);
+}
+
+static void esp_clear_hold(struct scsi_device *dev, void *data)
+{
+	struct esp_lun_data *lp = dev->hostdata;
+
+	BUG_ON(lp->num_tagged);
+	lp->hold = 0;
+}
+
+static void esp_reset_cleanup(struct esp *esp)
+{
+	struct esp_cmd_entry *ent, *tmp;
+	int i;
+
+	list_for_each_entry_safe(ent, tmp, &esp->queued_cmds, list) {
+		struct scsi_cmnd *cmd = ent->cmd;
+
+		list_del(&ent->list);
+		cmd->result = DID_RESET << 16;
+		cmd->scsi_done(cmd);
+		esp_put_ent(esp, ent);
+	}
+
+	list_for_each_entry_safe(ent, tmp, &esp->active_cmds, list) {
+		if (ent == esp->active_cmd)
+			esp->active_cmd = NULL;
+		esp_reset_cleanup_one(esp, ent);
+	}
+
+	BUG_ON(esp->active_cmd != NULL);
+
+	/* Force renegotiation of sync/wide transfers.  */
+	for (i = 0; i < ESP_MAX_TARGET; i++) {
+		struct esp_target_data *tp = &esp->target[i];
+
+		tp->esp_period = 0;
+		tp->esp_offset = 0;
+		tp->esp_config3 &= ~(ESP_CONFIG3_EWIDE |
+				     ESP_CONFIG3_FSCSI |
+				     ESP_CONFIG3_FAST);
+		tp->flags &= ~ESP_TGT_WIDE;
+		tp->flags |= ESP_TGT_CHECK_NEGO;
+
+		if (tp->starget)
+			starget_for_each_device(tp->starget, NULL,
+						esp_clear_hold);
+	}
+}
+
+/* Runs under host->lock */
+static void __esp_interrupt(struct esp *esp)
+{
+	int finish_reset, intr_done;
+	u8 phase;
+
+	esp->sreg = esp_read8(ESP_STATUS);
+
+	if (esp->flags & ESP_FLAG_RESETTING) {
+		finish_reset = 1;
+	} else {
+		if (esp_check_gross_error(esp))
+			return;
+
+		finish_reset = esp_check_spur_intr(esp);
+		if (finish_reset < 0)
+			return;
+	}
+
+	esp->ireg = esp_read8(ESP_INTRPT);
+
+	if (esp->ireg & ESP_INTR_SR)
+		finish_reset = 1;
+
+	if (finish_reset) {
+		esp_reset_cleanup(esp);
+		if (esp->eh_reset) {
+			complete(esp->eh_reset);
+			esp->eh_reset = NULL;
+		}
+		return;
+	}
+
+	phase = (esp->sreg & ESP_STAT_PMASK);
+	if (esp->rev == FASHME) {
+		if (((phase != ESP_DIP && phase != ESP_DOP) &&
+		     esp->select_state == ESP_SELECT_NONE &&
+		     esp->event != ESP_EVENT_STATUS &&
+		     esp->event != ESP_EVENT_DATA_DONE) ||
+		    (esp->ireg & ESP_INTR_RSEL)) {
+			esp->sreg2 = esp_read8(ESP_STATUS2);
+			if (!(esp->sreg2 & ESP_STAT2_FEMPTY) ||
+			    (esp->sreg2 & ESP_STAT2_F1BYTE))
+				hme_read_fifo(esp);
+		}
+	}
+
+	esp_log_intr("ESP: intr sreg[%02x] seqreg[%02x] "
+		     "sreg2[%02x] ireg[%02x]\n",
+		     esp->sreg, esp->seqreg, esp->sreg2, esp->ireg);
+
+	intr_done = 0;
+
+	if (esp->ireg & (ESP_INTR_S | ESP_INTR_SATN | ESP_INTR_IC)) {
+		printk("ESP: unexpected IREG %02x\n", esp->ireg);
+		if (esp->ireg & ESP_INTR_IC)
+			esp_dump_cmd_log(esp);
+
+		esp_schedule_reset(esp);
+	} else {
+		if (!(esp->ireg & ESP_INTR_RSEL)) {
+			/* Some combination of FDONE, BSERV, DC.  */
+			if (esp->select_state != ESP_SELECT_NONE)
+				intr_done = esp_finish_select(esp);
+		} else if (esp->ireg & ESP_INTR_RSEL) {
+			if (esp->active_cmd)
+				(void) esp_finish_select(esp);
+			intr_done = esp_reconnect(esp);
+		}
+	}
+	while (!intr_done)
+		intr_done = esp_process_event(esp);
+}
+
+irqreturn_t scsi_esp_intr(int irq, void *dev_id)
+{
+	struct esp *esp = dev_id;
+	unsigned long flags;
+	irqreturn_t ret;
+
+	spin_lock_irqsave(esp->host->host_lock, flags);
+	ret = IRQ_NONE;
+	if (esp->ops->irq_pending(esp)) {
+		ret = IRQ_HANDLED;
+		for (;;) {
+			int i;
+
+			__esp_interrupt(esp);
+			if (!(esp->flags & ESP_FLAG_QUICKIRQ_CHECK))
+				break;
+			esp->flags &= ~ESP_FLAG_QUICKIRQ_CHECK;
+
+			for (i = 0; i < ESP_QUICKIRQ_LIMIT; i++) {
+				if (esp->ops->irq_pending(esp))
+					break;
+			}
+			if (i == ESP_QUICKIRQ_LIMIT)
+				break;
+		}
+	}
+	spin_unlock_irqrestore(esp->host->host_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(scsi_esp_intr);
+
+static void __devinit esp_get_revision(struct esp *esp)
+{
+	u8 val;
+
+	esp->config1 = (ESP_CONFIG1_PENABLE | (esp->scsi_id & 7));
+	esp->config2 = (ESP_CONFIG2_SCSI2ENAB | ESP_CONFIG2_REGPARITY);
+	esp_write8(esp->config2, ESP_CFG2);
+
+	val = esp_read8(ESP_CFG2);
+	val &= ~ESP_CONFIG2_MAGIC;
+	if (val != (ESP_CONFIG2_SCSI2ENAB | ESP_CONFIG2_REGPARITY)) {
+		/* If what we write to cfg2 does not come back, cfg2 is not
+		 * implemented, therefore this must be a plain esp100.
+		 */
+		esp->rev = ESP100;
+	} else {
+		esp->config2 = 0;
+		esp_set_all_config3(esp, 5);
+		esp->prev_cfg3 = 5;
+		esp_write8(esp->config2, ESP_CFG2);
+		esp_write8(0, ESP_CFG3);
+		esp_write8(esp->prev_cfg3, ESP_CFG3);
+
+		val = esp_read8(ESP_CFG3);
+		if (val != 5) {
+			/* The cfg2 register is implemented, however
+			 * cfg3 is not, must be esp100a.
+			 */
+			esp->rev = ESP100A;
+		} else {
+			esp_set_all_config3(esp, 0);
+			esp->prev_cfg3 = 0;
+			esp_write8(esp->prev_cfg3, ESP_CFG3);
+
+			/* All of cfg{1,2,3} implemented, must be one of
+			 * the fas variants, figure out which one.
+			 */
+			if (esp->cfact == 0 || esp->cfact > ESP_CCF_F5) {
+				esp->rev = FAST;
+				esp->sync_defp = SYNC_DEFP_FAST;
+			} else {
+				esp->rev = ESP236;
+			}
+			esp->config2 = 0;
+			esp_write8(esp->config2, ESP_CFG2);
+		}
+	}
+}
+
+static void __devinit esp_init_swstate(struct esp *esp)
+{
+	int i;
+
+	INIT_LIST_HEAD(&esp->queued_cmds);
+	INIT_LIST_HEAD(&esp->active_cmds);
+	INIT_LIST_HEAD(&esp->esp_cmd_pool);
+
+	/* Start with a clear state, domain validation (via ->slave_configure,
+	 * spi_dv_device()) will attempt to enable SYNC, WIDE, and tagged
+	 * commands.
+	 */
+	for (i = 0 ; i < ESP_MAX_TARGET; i++) {
+		esp->target[i].flags = 0;
+		esp->target[i].nego_goal_period = 0;
+		esp->target[i].nego_goal_offset = 0;
+		esp->target[i].nego_goal_width = 0;
+		esp->target[i].nego_goal_tags = 0;
+	}
+}
+
+/* This places the ESP into a known state at boot time. */
+static void __devinit esp_bootup_reset(struct esp *esp)
+{
+	u8 val;
+
+	/* Reset the DMA */
+	esp->ops->reset_dma(esp);
+
+	/* Reset the ESP */
+	esp_reset_esp(esp);
+
+	/* Reset the SCSI bus, but tell ESP not to generate an irq */
+	val = esp_read8(ESP_CFG1);
+	val |= ESP_CONFIG1_SRRDISAB;
+	esp_write8(val, ESP_CFG1);
+
+	scsi_esp_cmd(esp, ESP_CMD_RS);
+	udelay(400);
+
+	esp_write8(esp->config1, ESP_CFG1);
+
+	/* Eat any bitrot in the chip and we are done... */
+	esp_read8(ESP_INTRPT);
+}
+
+static void __devinit esp_set_clock_params(struct esp *esp)
+{
+	int fmhz;
+	u8 ccf;
+
+	/* This is getting messy but it has to be done correctly or else
+	 * you get weird behavior all over the place.  We are trying to
+	 * basically figure out three pieces of information.
+	 *
+	 * a) Clock Conversion Factor
+	 *
+	 *    This is a representation of the input crystal clock frequency
+	 *    going into the ESP on this machine.  Any operation whose timing
+	 *    is longer than 400ns depends on this value being correct.  For
+	 *    example, you'll get blips for arbitration/selection during high
+	 *    load or with multiple targets if this is not set correctly.
+	 *
+	 * b) Selection Time-Out
+	 *
+	 *    The ESP isn't very bright and will arbitrate for the bus and try
+	 *    to select a target forever if you let it.  This value tells the
+	 *    ESP when it has taken too long to negotiate and that it should
+	 *    interrupt the CPU so we can see what happened.  The value is
+	 *    computed as follows (from NCR/Symbios chip docs).
+	 *
+	 *          (Time Out Period) *  (Input Clock)
+	 *    STO = ----------------------------------
+	 *          (8192) * (Clock Conversion Factor)
+	 *
+	 *    We use a time out period of 250ms (ESP_BUS_TIMEOUT).
+	 *
+	 * c) Imperical constants for synchronous offset and transfer period
+         *    register values
+	 *
+	 *    This entails the smallest and largest sync period we could ever
+	 *    handle on this ESP.
+	 */
+	fmhz = esp->cfreq;
+
+	ccf = ((fmhz / 1000000) + 4) / 5;
+	if (ccf == 1)
+		ccf = 2;
+
+	/* If we can't find anything reasonable, just assume 20MHZ.
+	 * This is the clock frequency of the older sun4c's where I've
+	 * been unable to find the clock-frequency PROM property.  All
+	 * other machines provide useful values it seems.
+	 */
+	if (fmhz <= 5000000 || ccf < 1 || ccf > 8) {
+		fmhz = 20000000;
+		ccf = 4;
+	}
+
+	esp->cfact = (ccf == 8 ? 0 : ccf);
+	esp->cfreq = fmhz;
+	esp->ccycle = ESP_MHZ_TO_CYCLE(fmhz);
+	esp->ctick = ESP_TICK(ccf, esp->ccycle);
+	esp->neg_defp = ESP_NEG_DEFP(fmhz, ccf);
+	esp->sync_defp = SYNC_DEFP_SLOW;
+}
+
+static const char *esp_chip_names[] = {
+	"ESP100",
+	"ESP100A",
+	"ESP236",
+	"FAS236",
+	"FAS100A",
+	"FAST",
+	"FASHME",
+};
+
+static struct scsi_transport_template *esp_transport_template;
+
+int __devinit scsi_esp_register(struct esp *esp, struct device *dev)
+{
+	static int instance;
+	int err;
+
+	esp->host->transportt = esp_transport_template;
+	esp->host->max_lun = ESP_MAX_LUN;
+	esp->host->cmd_per_lun = 2;
+
+	esp_set_clock_params(esp);
+
+	esp_get_revision(esp);
+
+	esp_init_swstate(esp);
+
+	esp_bootup_reset(esp);
+
+	printk(KERN_INFO PFX "esp%u, regs[%1p:%1p] irq[%u]\n",
+	       esp->host->unique_id, esp->regs, esp->dma_regs,
+	       esp->host->irq);
+	printk(KERN_INFO PFX "esp%u is a %s, %u MHz (ccf=%u), SCSI ID %u\n",
+	       esp->host->unique_id, esp_chip_names[esp->rev],
+	       esp->cfreq / 1000000, esp->cfact, esp->scsi_id);
+
+	/* Let the SCSI bus reset settle. */
+	ssleep(esp_bus_reset_settle);
+
+	err = scsi_add_host(esp->host, dev);
+	if (err)
+		return err;
+
+	esp->host->unique_id = instance++;
+
+	scsi_scan_host(esp->host);
+
+	return 0;
+}
+EXPORT_SYMBOL(scsi_esp_register);
+
+void __devexit scsi_esp_unregister(struct esp *esp)
+{
+	scsi_remove_host(esp->host);
+}
+EXPORT_SYMBOL(scsi_esp_unregister);
+
+static int esp_slave_alloc(struct scsi_device *dev)
+{
+	struct esp *esp = host_to_esp(dev->host);
+	struct esp_target_data *tp = &esp->target[dev->id];
+	struct esp_lun_data *lp;
+
+	lp = kzalloc(sizeof(*lp), GFP_KERNEL);
+	if (!lp)
+		return -ENOMEM;
+	dev->hostdata = lp;
+
+	tp->starget = dev->sdev_target;
+
+	spi_min_period(tp->starget) = esp->min_period;
+	spi_max_offset(tp->starget) = 15;
+
+	if (esp->flags & ESP_FLAG_WIDE_CAPABLE)
+		spi_max_width(tp->starget) = 1;
+	else
+		spi_max_width(tp->starget) = 0;
+
+	return 0;
+}
+
+static int esp_slave_configure(struct scsi_device *dev)
+{
+	struct esp *esp = host_to_esp(dev->host);
+	struct esp_target_data *tp = &esp->target[dev->id];
+	int goal_tags, queue_depth;
+
+	goal_tags = 0;
+
+	if (dev->tagged_supported) {
+		/* XXX make this configurable somehow XXX */
+		goal_tags = ESP_DEFAULT_TAGS;
+
+		if (goal_tags > ESP_MAX_TAG)
+			goal_tags = ESP_MAX_TAG;
+	}
+
+	queue_depth = goal_tags;
+	if (queue_depth < dev->host->cmd_per_lun)
+		queue_depth = dev->host->cmd_per_lun;
+
+	if (goal_tags) {
+		scsi_set_tag_type(dev, MSG_ORDERED_TAG);
+		scsi_activate_tcq(dev, queue_depth);
+	} else {
+		scsi_deactivate_tcq(dev, queue_depth);
+	}
+	tp->flags |= ESP_TGT_DISCONNECT;
+
+	if (!spi_initial_dv(dev->sdev_target))
+		spi_dv_device(dev);
+
+	return 0;
+}
+
+static void esp_slave_destroy(struct scsi_device *dev)
+{
+	struct esp_lun_data *lp = dev->hostdata;
+
+	kfree(lp);
+	dev->hostdata = NULL;
+}
+
+static int esp_eh_abort_handler(struct scsi_cmnd *cmd)
+{
+	struct esp *esp = host_to_esp(cmd->device->host);
+	struct esp_cmd_entry *ent, *tmp;
+	struct completion eh_done;
+	unsigned long flags;
+
+	/* XXX This helps a lot with debugging but might be a bit
+	 * XXX much for the final driver.
+	 */
+	spin_lock_irqsave(esp->host->host_lock, flags);
+	printk(KERN_ERR PFX "esp%d: Aborting command [%p:%02x]\n",
+	       esp->host->unique_id, cmd, cmd->cmnd[0]);
+	ent = esp->active_cmd;
+	if (ent)
+		printk(KERN_ERR PFX "esp%d: Current command [%p:%02x]\n",
+		       esp->host->unique_id, ent->cmd, ent->cmd->cmnd[0]);
+	list_for_each_entry(ent, &esp->queued_cmds, list) {
+		printk(KERN_ERR PFX "esp%d: Queued command [%p:%02x]\n",
+		       esp->host->unique_id, ent->cmd, ent->cmd->cmnd[0]);
+	}
+	list_for_each_entry(ent, &esp->active_cmds, list) {
+		printk(KERN_ERR PFX "esp%d: Active command [%p:%02x]\n",
+		       esp->host->unique_id, ent->cmd, ent->cmd->cmnd[0]);
+	}
+	esp_dump_cmd_log(esp);
+	spin_unlock_irqrestore(esp->host->host_lock, flags);
+
+	spin_lock_irqsave(esp->host->host_lock, flags);
+
+	ent = NULL;
+	list_for_each_entry(tmp, &esp->queued_cmds, list) {
+		if (tmp->cmd == cmd) {
+			ent = tmp;
+			break;
+		}
+	}
+
+	if (ent) {
+		/* Easiest case, we didn't even issue the command
+		 * yet so it is trivial to abort.
+		 */
+		list_del(&ent->list);
+
+		cmd->result = DID_ABORT << 16;
+		cmd->scsi_done(cmd);
+
+		esp_put_ent(esp, ent);
+
+		goto out_success;
+	}
+
+	init_completion(&eh_done);
+
+	ent = esp->active_cmd;
+	if (ent && ent->cmd == cmd) {
+		/* Command is the currently active command on
+		 * the bus.  If we already have an output message
+		 * pending, no dice.
+		 */
+		if (esp->msg_out_len)
+			goto out_failure;
+
+		/* Send out an abort, encouraging the target to
+		 * go to MSGOUT phase by asserting ATN.
+		 */
+		esp->msg_out[0] = ABORT_TASK_SET;
+		esp->msg_out_len = 1;
+		ent->eh_done = &eh_done;
+
+		scsi_esp_cmd(esp, ESP_CMD_SATN);
+	} else {
+		/* The command is disconnected.  This is not easy to
+		 * abort.  For now we fail and let the scsi error
+		 * handling layer go try a scsi bus reset or host
+		 * reset.
+		 *
+		 * What we could do is put together a scsi command
+		 * solely for the purpose of sending an abort message
+		 * to the target.  Coming up with all the code to
+		 * cook up scsi commands, special case them everywhere,
+		 * etc. is for questionable gain and it would be better
+		 * if the generic scsi error handling layer could do at
+		 * least some of that for us.
+		 *
+		 * Anyways this is an area for potential future improvement
+		 * in this driver.
+		 */
+		goto out_failure;
+	}
+
+	spin_unlock_irqrestore(esp->host->host_lock, flags);
+
+	if (!wait_for_completion_timeout(&eh_done, 5 * HZ)) {
+		spin_lock_irqsave(esp->host->host_lock, flags);
+		ent->eh_done = NULL;
+		spin_unlock_irqrestore(esp->host->host_lock, flags);
+
+		return FAILED;
+	}
+
+	return SUCCESS;
+
+out_success:
+	spin_unlock_irqrestore(esp->host->host_lock, flags);
+	return SUCCESS;
+
+out_failure:
+	/* XXX This might be a good location to set ESP_TGT_BROKEN
+	 * XXX since we know which target/lun in particular is
+	 * XXX causing trouble.
+	 */
+	spin_unlock_irqrestore(esp->host->host_lock, flags);
+	return FAILED;
+}
+
+static int esp_eh_bus_reset_handler(struct scsi_cmnd *cmd)
+{
+	struct esp *esp = host_to_esp(cmd->device->host);
+	struct completion eh_reset;
+	unsigned long flags;
+
+	init_completion(&eh_reset);
+
+	spin_lock_irqsave(esp->host->host_lock, flags);
+
+	esp->eh_reset = &eh_reset;
+
+	/* XXX This is too simple... We should add lots of
+	 * XXX checks here so that if we find that the chip is
+	 * XXX very wedged we return failure immediately so
+	 * XXX that we can perform a full chip reset.
+	 */
+	esp->flags |= ESP_FLAG_RESETTING;
+	scsi_esp_cmd(esp, ESP_CMD_RS);
+
+	spin_unlock_irqrestore(esp->host->host_lock, flags);
+
+	ssleep(esp_bus_reset_settle);
+
+	if (!wait_for_completion_timeout(&eh_reset, 5 * HZ)) {
+		spin_lock_irqsave(esp->host->host_lock, flags);
+		esp->eh_reset = NULL;
+		spin_unlock_irqrestore(esp->host->host_lock, flags);
+
+		return FAILED;
+	}
+
+	return SUCCESS;
+}
+
+/* All bets are off, reset the entire device.  */
+static int esp_eh_host_reset_handler(struct scsi_cmnd *cmd)
+{
+	struct esp *esp = host_to_esp(cmd->device->host);
+	unsigned long flags;
+
+	spin_lock_irqsave(esp->host->host_lock, flags);
+	esp_bootup_reset(esp);
+	esp_reset_cleanup(esp);
+	spin_unlock_irqrestore(esp->host->host_lock, flags);
+
+	ssleep(esp_bus_reset_settle);
+
+	return SUCCESS;
+}
+
+static const char *esp_info(struct Scsi_Host *host)
+{
+	return "esp";
+}
+
+struct scsi_host_template scsi_esp_template = {
+	.module			= THIS_MODULE,
+	.name			= "esp",
+	.info			= esp_info,
+	.queuecommand		= esp_queuecommand,
+	.slave_alloc		= esp_slave_alloc,
+	.slave_configure	= esp_slave_configure,
+	.slave_destroy		= esp_slave_destroy,
+	.eh_abort_handler	= esp_eh_abort_handler,
+	.eh_bus_reset_handler	= esp_eh_bus_reset_handler,
+	.eh_host_reset_handler	= esp_eh_host_reset_handler,
+	.can_queue		= 7,
+	.this_id		= 7,
+	.sg_tablesize		= SG_ALL,
+	.use_clustering		= ENABLE_CLUSTERING,
+	.max_sectors		= 0xffff,
+	.skip_settle_delay	= 1,
+};
+EXPORT_SYMBOL(scsi_esp_template);
+
+static void esp_get_signalling(struct Scsi_Host *host)
+{
+	struct esp *esp = host_to_esp(host);
+	enum spi_signal_type type;
+
+	if (esp->flags & ESP_FLAG_DIFFERENTIAL)
+		type = SPI_SIGNAL_HVD;
+	else
+		type = SPI_SIGNAL_SE;
+
+	spi_signalling(host) = type;
+}
+
+static void esp_set_offset(struct scsi_target *target, int offset)
+{
+	struct Scsi_Host *host = dev_to_shost(target->dev.parent);
+	struct esp *esp = host_to_esp(host);
+	struct esp_target_data *tp = &esp->target[target->id];
+
+	tp->nego_goal_offset = offset;
+	tp->flags |= ESP_TGT_CHECK_NEGO;
+}
+
+static void esp_set_period(struct scsi_target *target, int period)
+{
+	struct Scsi_Host *host = dev_to_shost(target->dev.parent);
+	struct esp *esp = host_to_esp(host);
+	struct esp_target_data *tp = &esp->target[target->id];
+
+	tp->nego_goal_period = period;
+	tp->flags |= ESP_TGT_CHECK_NEGO;
+}
+
+static void esp_set_width(struct scsi_target *target, int width)
+{
+	struct Scsi_Host *host = dev_to_shost(target->dev.parent);
+	struct esp *esp = host_to_esp(host);
+	struct esp_target_data *tp = &esp->target[target->id];
+
+	tp->nego_goal_width = (width ? 1 : 0);
+	tp->flags |= ESP_TGT_CHECK_NEGO;
+}
+
+static struct spi_function_template esp_transport_ops = {
+	.set_offset		= esp_set_offset,
+	.show_offset		= 1,
+	.set_period		= esp_set_period,
+	.show_period		= 1,
+	.set_width		= esp_set_width,
+	.show_width		= 1,
+	.get_signalling		= esp_get_signalling,
+};
+
+static int __init esp_init(void)
+{
+	BUILD_BUG_ON(sizeof(struct scsi_pointer) <
+		     sizeof(struct esp_cmd_priv));
+
+	esp_transport_template = spi_attach_transport(&esp_transport_ops);
+	if (!esp_transport_template)
+		return -ENODEV;
+
+	return 0;
+}
+
+static void __exit esp_exit(void)
+{
+	spi_release_transport(esp_transport_template);
+}
+
+MODULE_DESCRIPTION("ESP SCSI driver core");
+MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
+
+module_param(esp_bus_reset_settle, int, 0);
+MODULE_PARM_DESC(esp_bus_reset_settle,
+		 "ESP scsi bus reset delay in seconds");
+
+module_param(esp_debug, int, 0);
+MODULE_PARM_DESC(esp_debug,
+"ESP bitmapped debugging message enable value:\n"
+"	0x00000001	Log interrupt events\n"
+"	0x00000002	Log scsi commands\n"
+"	0x00000004	Log resets\n"
+"	0x00000008	Log message in events\n"
+"	0x00000010	Log message out events\n"
+"	0x00000020	Log command completion\n"
+"	0x00000040	Log disconnects\n"
+"	0x00000080	Log data start\n"
+"	0x00000100	Log data done\n"
+"	0x00000200	Log reconnects\n"
+"	0x00000400	Log auto-sense data\n"
+);
+
+module_init(esp_init);
+module_exit(esp_exit);
diff --git a/drivers/scsi/esp_scsi.h b/drivers/scsi/esp_scsi.h
new file mode 100644
index 00000000000..8d4a6690401
--- /dev/null
+++ b/drivers/scsi/esp_scsi.h
@@ -0,0 +1,560 @@
+/* esp_scsi.h: Defines and structures for the ESP drier.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#ifndef _ESP_SCSI_H
+#define _ESP_SCSI_H
+
+					/* Access    Description      Offset */
+#define ESP_TCLOW	0x00UL		/* rw  Low bits transfer count 0x00  */
+#define ESP_TCMED	0x01UL		/* rw  Mid bits transfer count 0x04  */
+#define ESP_FDATA	0x02UL		/* rw  FIFO data bits          0x08  */
+#define ESP_CMD		0x03UL		/* rw  SCSI command bits       0x0c  */
+#define ESP_STATUS	0x04UL		/* ro  ESP status register     0x10  */
+#define ESP_BUSID	ESP_STATUS	/* wo  BusID for sel/resel     0x10  */
+#define ESP_INTRPT	0x05UL		/* ro  Kind of interrupt       0x14  */
+#define ESP_TIMEO	ESP_INTRPT	/* wo  Timeout for sel/resel   0x14  */
+#define ESP_SSTEP	0x06UL		/* ro  Sequence step register  0x18  */
+#define ESP_STP		ESP_SSTEP	/* wo  Transfer period/sync    0x18  */
+#define ESP_FFLAGS	0x07UL		/* ro  Bits current FIFO info  0x1c  */
+#define ESP_SOFF	ESP_FFLAGS	/* wo  Sync offset             0x1c  */
+#define ESP_CFG1	0x08UL		/* rw  First cfg register      0x20  */
+#define ESP_CFACT	0x09UL		/* wo  Clock conv factor       0x24  */
+#define ESP_STATUS2	ESP_CFACT	/* ro  HME status2 register    0x24  */
+#define ESP_CTEST	0x0aUL		/* wo  Chip test register      0x28  */
+#define ESP_CFG2	0x0bUL		/* rw  Second cfg register     0x2c  */
+#define ESP_CFG3	0x0cUL		/* rw  Third cfg register      0x30  */
+#define ESP_TCHI	0x0eUL		/* rw  High bits transf count  0x38  */
+#define ESP_UID		ESP_TCHI	/* ro  Unique ID code          0x38  */
+#define FAS_RLO		ESP_TCHI	/* rw  HME extended counter    0x38  */
+#define ESP_FGRND	0x0fUL		/* rw  Data base for fifo      0x3c  */
+#define FAS_RHI		ESP_FGRND	/* rw  HME extended counter    0x3c  */
+
+#define SBUS_ESP_REG_SIZE	0x40UL
+
+/* Bitfield meanings for the above registers. */
+
+/* ESP config reg 1, read-write, found on all ESP chips */
+#define ESP_CONFIG1_ID        0x07      /* My BUS ID bits */
+#define ESP_CONFIG1_CHTEST    0x08      /* Enable ESP chip tests */
+#define ESP_CONFIG1_PENABLE   0x10      /* Enable parity checks */
+#define ESP_CONFIG1_PARTEST   0x20      /* Parity test mode enabled? */
+#define ESP_CONFIG1_SRRDISAB  0x40      /* Disable SCSI reset reports */
+#define ESP_CONFIG1_SLCABLE   0x80      /* Enable slow cable mode */
+
+/* ESP config reg 2, read-write, found only on esp100a+esp200+esp236 chips */
+#define ESP_CONFIG2_DMAPARITY 0x01      /* enable DMA Parity (200,236) */
+#define ESP_CONFIG2_REGPARITY 0x02      /* enable reg Parity (200,236) */
+#define ESP_CONFIG2_BADPARITY 0x04      /* Bad parity target abort  */
+#define ESP_CONFIG2_SCSI2ENAB 0x08      /* Enable SCSI-2 features (tgtmode) */
+#define ESP_CONFIG2_HI        0x10      /* High Impedance DREQ ???  */
+#define ESP_CONFIG2_HMEFENAB  0x10      /* HME features enable */
+#define ESP_CONFIG2_BCM       0x20      /* Enable byte-ctrl (236)   */
+#define ESP_CONFIG2_DISPINT   0x20      /* Disable pause irq (hme) */
+#define ESP_CONFIG2_FENAB     0x40      /* Enable features (fas100,216) */
+#define ESP_CONFIG2_SPL       0x40      /* Enable status-phase latch (236) */
+#define ESP_CONFIG2_MKDONE    0x40      /* HME magic feature */
+#define ESP_CONFIG2_HME32     0x80      /* HME 32 extended */
+#define ESP_CONFIG2_MAGIC     0xe0      /* Invalid bits... */
+
+/* ESP config register 3 read-write, found only esp236+fas236+fas100a+hme chips */
+#define ESP_CONFIG3_FCLOCK    0x01     /* FAST SCSI clock rate (esp100a/hme) */
+#define ESP_CONFIG3_TEM       0x01     /* Enable thresh-8 mode (esp/fas236)  */
+#define ESP_CONFIG3_FAST      0x02     /* Enable FAST SCSI     (esp100a/hme) */
+#define ESP_CONFIG3_ADMA      0x02     /* Enable alternate-dma (esp/fas236)  */
+#define ESP_CONFIG3_TENB      0x04     /* group2 SCSI2 support (esp100a/hme) */
+#define ESP_CONFIG3_SRB       0x04     /* Save residual byte   (esp/fas236)  */
+#define ESP_CONFIG3_TMS       0x08     /* Three-byte msg's ok  (esp100a/hme) */
+#define ESP_CONFIG3_FCLK      0x08     /* Fast SCSI clock rate (esp/fas236)  */
+#define ESP_CONFIG3_IDMSG     0x10     /* ID message checking  (esp100a/hme) */
+#define ESP_CONFIG3_FSCSI     0x10     /* Enable FAST SCSI     (esp/fas236)  */
+#define ESP_CONFIG3_GTM       0x20     /* group2 SCSI2 support (esp/fas236)  */
+#define ESP_CONFIG3_IDBIT3    0x20     /* Bit 3 of HME SCSI-ID (hme)         */
+#define ESP_CONFIG3_TBMS      0x40     /* Three-byte msg's ok  (esp/fas236)  */
+#define ESP_CONFIG3_EWIDE     0x40     /* Enable Wide-SCSI     (hme)         */
+#define ESP_CONFIG3_IMS       0x80     /* ID msg chk'ng        (esp/fas236)  */
+#define ESP_CONFIG3_OBPUSH    0x80     /* Push odd-byte to dma (hme)         */
+
+/* ESP command register read-write */
+/* Group 1 commands:  These may be sent at any point in time to the ESP
+ *                    chip.  None of them can generate interrupts 'cept
+ *                    the "SCSI bus reset" command if you have not disabled
+ *                    SCSI reset interrupts in the config1 ESP register.
+ */
+#define ESP_CMD_NULL          0x00     /* Null command, ie. a nop */
+#define ESP_CMD_FLUSH         0x01     /* FIFO Flush */
+#define ESP_CMD_RC            0x02     /* Chip reset */
+#define ESP_CMD_RS            0x03     /* SCSI bus reset */
+
+/* Group 2 commands:  ESP must be an initiator and connected to a target
+ *                    for these commands to work.
+ */
+#define ESP_CMD_TI            0x10     /* Transfer Information */
+#define ESP_CMD_ICCSEQ        0x11     /* Initiator cmd complete sequence */
+#define ESP_CMD_MOK           0x12     /* Message okie-dokie */
+#define ESP_CMD_TPAD          0x18     /* Transfer Pad */
+#define ESP_CMD_SATN          0x1a     /* Set ATN */
+#define ESP_CMD_RATN          0x1b     /* De-assert ATN */
+
+/* Group 3 commands:  ESP must be in the MSGOUT or MSGIN state and be connected
+ *                    to a target as the initiator for these commands to work.
+ */
+#define ESP_CMD_SMSG          0x20     /* Send message */
+#define ESP_CMD_SSTAT         0x21     /* Send status */
+#define ESP_CMD_SDATA         0x22     /* Send data */
+#define ESP_CMD_DSEQ          0x23     /* Discontinue Sequence */
+#define ESP_CMD_TSEQ          0x24     /* Terminate Sequence */
+#define ESP_CMD_TCCSEQ        0x25     /* Target cmd cmplt sequence */
+#define ESP_CMD_DCNCT         0x27     /* Disconnect */
+#define ESP_CMD_RMSG          0x28     /* Receive Message */
+#define ESP_CMD_RCMD          0x29     /* Receive Command */
+#define ESP_CMD_RDATA         0x2a     /* Receive Data */
+#define ESP_CMD_RCSEQ         0x2b     /* Receive cmd sequence */
+
+/* Group 4 commands:  The ESP must be in the disconnected state and must
+ *                    not be connected to any targets as initiator for
+ *                    these commands to work.
+ */
+#define ESP_CMD_RSEL          0x40     /* Reselect */
+#define ESP_CMD_SEL           0x41     /* Select w/o ATN */
+#define ESP_CMD_SELA          0x42     /* Select w/ATN */
+#define ESP_CMD_SELAS         0x43     /* Select w/ATN & STOP */
+#define ESP_CMD_ESEL          0x44     /* Enable selection */
+#define ESP_CMD_DSEL          0x45     /* Disable selections */
+#define ESP_CMD_SA3           0x46     /* Select w/ATN3 */
+#define ESP_CMD_RSEL3         0x47     /* Reselect3 */
+
+/* This bit enables the ESP's DMA on the SBus */
+#define ESP_CMD_DMA           0x80     /* Do DMA? */
+
+/* ESP status register read-only */
+#define ESP_STAT_PIO          0x01     /* IO phase bit */
+#define ESP_STAT_PCD          0x02     /* CD phase bit */
+#define ESP_STAT_PMSG         0x04     /* MSG phase bit */
+#define ESP_STAT_PMASK        0x07     /* Mask of phase bits */
+#define ESP_STAT_TDONE        0x08     /* Transfer Completed */
+#define ESP_STAT_TCNT         0x10     /* Transfer Counter Is Zero */
+#define ESP_STAT_PERR         0x20     /* Parity error */
+#define ESP_STAT_SPAM         0x40     /* Real bad error */
+/* This indicates the 'interrupt pending' condition on esp236, it is a reserved
+ * bit on other revs of the ESP.
+ */
+#define ESP_STAT_INTR         0x80             /* Interrupt */
+
+/* The status register can be masked with ESP_STAT_PMASK and compared
+ * with the following values to determine the current phase the ESP
+ * (at least thinks it) is in.  For our purposes we also add our own
+ * software 'done' bit for our phase management engine.
+ */
+#define ESP_DOP   (0)                                       /* Data Out  */
+#define ESP_DIP   (ESP_STAT_PIO)                            /* Data In   */
+#define ESP_CMDP  (ESP_STAT_PCD)                            /* Command   */
+#define ESP_STATP (ESP_STAT_PCD|ESP_STAT_PIO)               /* Status    */
+#define ESP_MOP   (ESP_STAT_PMSG|ESP_STAT_PCD)              /* Message Out */
+#define ESP_MIP   (ESP_STAT_PMSG|ESP_STAT_PCD|ESP_STAT_PIO) /* Message In */
+
+/* HME only: status 2 register */
+#define ESP_STAT2_SCHBIT      0x01 /* Upper bits 3-7 of sstep enabled */
+#define ESP_STAT2_FFLAGS      0x02 /* The fifo flags are now latched */
+#define ESP_STAT2_XCNT        0x04 /* The transfer counter is latched */
+#define ESP_STAT2_CREGA       0x08 /* The command reg is active now */
+#define ESP_STAT2_WIDE        0x10 /* Interface on this adapter is wide */
+#define ESP_STAT2_F1BYTE      0x20 /* There is one byte at top of fifo */
+#define ESP_STAT2_FMSB        0x40 /* Next byte in fifo is most significant */
+#define ESP_STAT2_FEMPTY      0x80 /* FIFO is empty */
+
+/* ESP interrupt register read-only */
+#define ESP_INTR_S            0x01     /* Select w/o ATN */
+#define ESP_INTR_SATN         0x02     /* Select w/ATN */
+#define ESP_INTR_RSEL         0x04     /* Reselected */
+#define ESP_INTR_FDONE        0x08     /* Function done */
+#define ESP_INTR_BSERV        0x10     /* Bus service */
+#define ESP_INTR_DC           0x20     /* Disconnect */
+#define ESP_INTR_IC           0x40     /* Illegal command given */
+#define ESP_INTR_SR           0x80     /* SCSI bus reset detected */
+
+/* ESP sequence step register read-only */
+#define ESP_STEP_VBITS        0x07     /* Valid bits */
+#define ESP_STEP_ASEL         0x00     /* Selection&Arbitrate cmplt */
+#define ESP_STEP_SID          0x01     /* One msg byte sent */
+#define ESP_STEP_NCMD         0x02     /* Was not in command phase */
+#define ESP_STEP_PPC          0x03     /* Early phase chg caused cmnd
+                                        * bytes to be lost
+                                        */
+#define ESP_STEP_FINI4        0x04     /* Command was sent ok */
+
+/* Ho hum, some ESP's set the step register to this as well... */
+#define ESP_STEP_FINI5        0x05
+#define ESP_STEP_FINI6        0x06
+#define ESP_STEP_FINI7        0x07
+
+/* ESP chip-test register read-write */
+#define ESP_TEST_TARG         0x01     /* Target test mode */
+#define ESP_TEST_INI          0x02     /* Initiator test mode */
+#define ESP_TEST_TS           0x04     /* Tristate test mode */
+
+/* ESP unique ID register read-only, found on fas236+fas100a only */
+#define ESP_UID_F100A         0x00     /* ESP FAS100A  */
+#define ESP_UID_F236          0x02     /* ESP FAS236   */
+#define ESP_UID_REV           0x07     /* ESP revision */
+#define ESP_UID_FAM           0xf8     /* ESP family   */
+
+/* ESP fifo flags register read-only */
+/* Note that the following implies a 16 byte FIFO on the ESP. */
+#define ESP_FF_FBYTES         0x1f     /* Num bytes in FIFO */
+#define ESP_FF_ONOTZERO       0x20     /* offset ctr not zero (esp100) */
+#define ESP_FF_SSTEP          0xe0     /* Sequence step */
+
+/* ESP clock conversion factor register write-only */
+#define ESP_CCF_F0            0x00     /* 35.01MHz - 40MHz */
+#define ESP_CCF_NEVER         0x01     /* Set it to this and die */
+#define ESP_CCF_F2            0x02     /* 10MHz */
+#define ESP_CCF_F3            0x03     /* 10.01MHz - 15MHz */
+#define ESP_CCF_F4            0x04     /* 15.01MHz - 20MHz */
+#define ESP_CCF_F5            0x05     /* 20.01MHz - 25MHz */
+#define ESP_CCF_F6            0x06     /* 25.01MHz - 30MHz */
+#define ESP_CCF_F7            0x07     /* 30.01MHz - 35MHz */
+
+/* HME only... */
+#define ESP_BUSID_RESELID     0x10
+#define ESP_BUSID_CTR32BIT    0x40
+
+#define ESP_BUS_TIMEOUT        250     /* In milli-seconds */
+#define ESP_TIMEO_CONST       8192
+#define ESP_NEG_DEFP(mhz, cfact) \
+        ((ESP_BUS_TIMEOUT * ((mhz) / 1000)) / (8192 * (cfact)))
+#define ESP_MHZ_TO_CYCLE(mhertz)  ((1000000000) / ((mhertz) / 1000))
+#define ESP_TICK(ccf, cycle)  ((7682 * (ccf) * (cycle) / 1000))
+
+/* For slow to medium speed input clock rates we shoot for 5mb/s, but for high
+ * input clock rates we try to do 10mb/s although I don't think a transfer can
+ * even run that fast with an ESP even with DMA2 scatter gather pipelining.
+ */
+#define SYNC_DEFP_SLOW            0x32   /* 5mb/s  */
+#define SYNC_DEFP_FAST            0x19   /* 10mb/s */
+
+struct esp_cmd_priv {
+	union {
+		dma_addr_t	dma_addr;
+		int		num_sg;
+	} u;
+
+	unsigned int		cur_residue;
+	struct scatterlist	*cur_sg;
+	unsigned int		tot_residue;
+};
+#define ESP_CMD_PRIV(CMD)	((struct esp_cmd_priv *)(&(CMD)->SCp))
+
+enum esp_rev {
+	ESP100     = 0x00,  /* NCR53C90 - very broken */
+	ESP100A    = 0x01,  /* NCR53C90A */
+	ESP236     = 0x02,
+	FAS236     = 0x03,
+	FAS100A    = 0x04,
+	FAST       = 0x05,
+	FASHME     = 0x06,
+};
+
+struct esp_cmd_entry {
+	struct list_head	list;
+
+	struct scsi_cmnd	*cmd;
+
+	unsigned int		saved_cur_residue;
+	struct scatterlist	*saved_cur_sg;
+	unsigned int		saved_tot_residue;
+
+	u8			flags;
+#define ESP_CMD_FLAG_WRITE	0x01 /* DMA is a write */
+#define ESP_CMD_FLAG_ABORT	0x02 /* being aborted */
+#define ESP_CMD_FLAG_AUTOSENSE	0x04 /* Doing automatic REQUEST_SENSE */
+
+	u8			tag[2];
+
+	u8			status;
+	u8			message;
+
+	unsigned char		*sense_ptr;
+	unsigned char		*saved_sense_ptr;
+	dma_addr_t		sense_dma;
+
+	struct completion	*eh_done;
+};
+
+/* XXX make this configurable somehow XXX */
+#define ESP_DEFAULT_TAGS	16
+
+#define ESP_MAX_TARGET		16
+#define ESP_MAX_LUN		8
+#define ESP_MAX_TAG		256
+
+struct esp_lun_data {
+	struct esp_cmd_entry	*non_tagged_cmd;
+	int			num_tagged;
+	int			hold;
+	struct esp_cmd_entry	*tagged_cmds[ESP_MAX_TAG];
+};
+
+struct esp_target_data {
+	/* These are the ESP_STP, ESP_SOFF, and ESP_CFG3 register values which
+	 * match the currently negotiated settings for this target.  The SCSI
+	 * protocol values are maintained in spi_{offset,period,wide}(starget).
+	 */
+	u8			esp_period;
+	u8			esp_offset;
+	u8			esp_config3;
+
+	u8			flags;
+#define ESP_TGT_WIDE		0x01
+#define ESP_TGT_DISCONNECT	0x02
+#define ESP_TGT_NEGO_WIDE	0x04
+#define ESP_TGT_NEGO_SYNC	0x08
+#define ESP_TGT_CHECK_NEGO	0x40
+#define ESP_TGT_BROKEN		0x80
+
+	/* When ESP_TGT_CHECK_NEGO is set, on the next scsi command to this
+	 * device we will try to negotiate the following parameters.
+	 */
+	u8			nego_goal_period;
+	u8			nego_goal_offset;
+	u8			nego_goal_width;
+	u8			nego_goal_tags;
+
+	struct scsi_target	*starget;
+};
+
+struct esp_event_ent {
+	u8			type;
+#define ESP_EVENT_TYPE_EVENT	0x01
+#define ESP_EVENT_TYPE_CMD	0x02
+	u8			val;
+
+	u8			sreg;
+	u8			seqreg;
+	u8			sreg2;
+	u8			ireg;
+	u8			select_state;
+	u8			event;
+	u8			__pad;
+};
+
+struct esp;
+struct esp_driver_ops {
+	/* Read and write the ESP 8-bit registers.  On some
+	 * applications of the ESP chip the registers are at 4-byte
+	 * instead of 1-byte intervals.
+	 */
+	void (*esp_write8)(struct esp *esp, u8 val, unsigned long reg);
+	u8 (*esp_read8)(struct esp *esp, unsigned long reg);
+
+	/* Map and unmap DMA memory.  Eventually the driver will be
+	 * converted to the generic DMA API as soon as SBUS is able to
+	 * cope with that.  At such time we can remove this.
+	 */
+	dma_addr_t (*map_single)(struct esp *esp, void *buf,
+				 size_t sz, int dir);
+	int (*map_sg)(struct esp *esp, struct scatterlist *sg,
+		      int num_sg, int dir);
+	void (*unmap_single)(struct esp *esp, dma_addr_t addr,
+			     size_t sz, int dir);
+	void (*unmap_sg)(struct esp *esp, struct scatterlist *sg,
+			 int num_sg, int dir);
+
+	/* Return non-zero if there is an IRQ pending.  Usually this
+	 * status bit lives in the DMA controller sitting in front of
+	 * the ESP.  This has to be accurate or else the ESP interrupt
+	 * handler will not run.
+	 */
+	int (*irq_pending)(struct esp *esp);
+
+	/* Reset the DMA engine entirely.  On return, ESP interrupts
+	 * should be enabled.  Often the interrupt enabling is
+	 * controlled in the DMA engine.
+	 */
+	void (*reset_dma)(struct esp *esp);
+
+	/* Drain any pending DMA in the DMA engine after a transfer.
+	 * This is for writes to memory.
+	 */
+	void (*dma_drain)(struct esp *esp);
+
+	/* Invalidate the DMA engine after a DMA transfer.  */
+	void (*dma_invalidate)(struct esp *esp);
+
+	/* Setup an ESP command that will use a DMA transfer.
+	 * The 'esp_count' specifies what transfer length should be
+	 * programmed into the ESP transfer counter registers, whereas
+	 * the 'dma_count' is the length that should be programmed into
+	 * the DMA controller.  Usually they are the same.  If 'write'
+	 * is non-zero, this transfer is a write into memory.  'cmd'
+	 * holds the ESP command that should be issued by calling
+	 * scsi_esp_cmd() at the appropriate time while programming
+	 * the DMA hardware.
+	 */
+	void (*send_dma_cmd)(struct esp *esp, u32 dma_addr, u32 esp_count,
+			     u32 dma_count, int write, u8 cmd);
+
+	/* Return non-zero if the DMA engine is reporting an error
+	 * currently.
+	 */
+	int (*dma_error)(struct esp *esp);
+};
+
+#define ESP_MAX_MSG_SZ		8
+#define ESP_EVENT_LOG_SZ	32
+
+#define ESP_QUICKIRQ_LIMIT	100
+#define ESP_RESELECT_TAG_LIMIT	2500
+
+struct esp {
+	void __iomem		*regs;
+	void __iomem		*dma_regs;
+
+	const struct esp_driver_ops *ops;
+
+	struct Scsi_Host	*host;
+	void			*dev;
+
+	struct esp_cmd_entry	*active_cmd;
+
+	struct list_head	queued_cmds;
+	struct list_head	active_cmds;
+
+	u8			*command_block;
+	dma_addr_t		command_block_dma;
+
+	unsigned int		data_dma_len;
+
+	/* The following are used to determine the cause of an IRQ. Upon every
+	 * IRQ entry we synchronize these with the hardware registers.
+	 */
+	u8			sreg;
+	u8			seqreg;
+	u8			sreg2;
+	u8			ireg;
+
+	u32			prev_hme_dmacsr;
+	u8			prev_soff;
+	u8			prev_stp;
+	u8			prev_cfg3;
+	u8			__pad;
+
+	struct list_head	esp_cmd_pool;
+
+	struct esp_target_data	target[ESP_MAX_TARGET];
+
+	int			fifo_cnt;
+	u8			fifo[16];
+
+	struct esp_event_ent	esp_event_log[ESP_EVENT_LOG_SZ];
+	int			esp_event_cur;
+
+	u8			msg_out[ESP_MAX_MSG_SZ];
+	int			msg_out_len;
+
+	u8			msg_in[ESP_MAX_MSG_SZ];
+	int			msg_in_len;
+
+	u8			bursts;
+	u8			config1;
+	u8			config2;
+
+	u8			scsi_id;
+	u32			scsi_id_mask;
+
+	enum esp_rev		rev;
+
+	u32			flags;
+#define ESP_FLAG_DIFFERENTIAL	0x00000001
+#define ESP_FLAG_RESETTING	0x00000002
+#define ESP_FLAG_DOING_SLOWCMD	0x00000004
+#define ESP_FLAG_WIDE_CAPABLE	0x00000008
+#define ESP_FLAG_QUICKIRQ_CHECK	0x00000010
+
+	u8			select_state;
+#define ESP_SELECT_NONE		0x00 /* Not selecting */
+#define ESP_SELECT_BASIC	0x01 /* Select w/o MSGOUT phase */
+#define ESP_SELECT_MSGOUT	0x02 /* Select with MSGOUT */
+
+	/* When we are not selecting, we are expecting an event.  */
+	u8			event;
+#define ESP_EVENT_NONE		0x00
+#define ESP_EVENT_CMD_START	0x01
+#define ESP_EVENT_CMD_DONE	0x02
+#define ESP_EVENT_DATA_IN	0x03
+#define ESP_EVENT_DATA_OUT	0x04
+#define ESP_EVENT_DATA_DONE	0x05
+#define ESP_EVENT_MSGIN		0x06
+#define ESP_EVENT_MSGIN_MORE	0x07
+#define ESP_EVENT_MSGIN_DONE	0x08
+#define ESP_EVENT_MSGOUT	0x09
+#define ESP_EVENT_MSGOUT_DONE	0x0a
+#define ESP_EVENT_STATUS	0x0b
+#define ESP_EVENT_FREE_BUS	0x0c
+#define ESP_EVENT_CHECK_PHASE	0x0d
+#define ESP_EVENT_RESET		0x10
+
+	/* Probed in esp_get_clock_params() */
+	u32			cfact;
+	u32			cfreq;
+	u32			ccycle;
+	u32			ctick;
+	u32			neg_defp;
+	u32			sync_defp;
+
+	/* Computed in esp_reset_esp() */
+	u32			max_period;
+	u32			min_period;
+	u32			radelay;
+
+	/* Slow command state.  */
+	u8			*cmd_bytes_ptr;
+	int			cmd_bytes_left;
+
+	struct completion	*eh_reset;
+
+	struct sbus_dma		*dma;
+};
+
+#define host_to_esp(host)	((struct esp *)(host)->hostdata)
+
+/* A front-end driver for the ESP chip should do the following in
+ * it's device probe routine:
+ * 1) Allocate the host and private area using scsi_host_alloc()
+ *    with size 'sizeof(struct esp)'.  The first argument to
+ *    scsi_host_alloc() should be &scsi_esp_template.
+ * 2) Set host->max_id as appropriate.
+ * 3) Set esp->host to the scsi_host itself, and esp->dev
+ *    to the device object pointer.
+ * 4) Hook up esp->ops to the front-end implementation.
+ * 5) If the ESP chip supports wide transfers, set ESP_FLAG_WIDE_CAPABLE
+ *    in esp->flags.
+ * 6) Map the DMA and ESP chip registers.
+ * 7) DMA map the ESP command block, store the DMA address
+ *    in esp->command_block_dma.
+ * 8) Register the scsi_esp_intr() interrupt handler.
+ * 9) Probe for and provide the following chip properties:
+ *    esp->scsi_id (assign to esp->host->this_id too)
+ *    esp->scsi_id_mask
+ *    If ESP bus is differential, set ESP_FLAG_DIFFERENTIAL
+ *    esp->cfreq
+ *    DMA burst bit mask in esp->bursts, if necessary
+ * 10) Perform any actions necessary before the ESP device can
+ *     be programmed for the first time.  On some configs, for
+ *     example, the DMA engine has to be reset before ESP can
+ *     be programmed.
+ * 11) If necessary, call dev_set_drvdata() as needed.
+ * 12) Call scsi_esp_register() with prepared 'esp' structure
+ *     and a device pointer if possible.
+ * 13) Check scsi_esp_register() return value, release all resources
+ *     if an error was returned.
+ */
+extern struct scsi_host_template scsi_esp_template;
+extern int scsi_esp_register(struct esp *, struct device *);
+
+extern void scsi_esp_unregister(struct esp *);
+extern irqreturn_t scsi_esp_intr(int, void *);
+extern void scsi_esp_cmd(struct esp *, u8);
+
+#endif /* !(_ESP_SCSI_H) */
diff --git a/drivers/scsi/sun_esp.c b/drivers/scsi/sun_esp.c
new file mode 100644
index 00000000000..8c766bcd109
--- /dev/null
+++ b/drivers/scsi/sun_esp.c
@@ -0,0 +1,634 @@
+/* sun_esp.c: ESP front-end for Sparc SBUS systems.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/dma.h>
+
+#include <asm/sbus.h>
+
+#include <scsi/scsi_host.h>
+
+#include "esp_scsi.h"
+
+#define DRV_MODULE_NAME		"sun_esp"
+#define PFX DRV_MODULE_NAME	": "
+#define DRV_VERSION		"1.000"
+#define DRV_MODULE_RELDATE	"April 19, 2007"
+
+#define dma_read32(REG) \
+	sbus_readl(esp->dma_regs + (REG))
+#define dma_write32(VAL, REG) \
+	sbus_writel((VAL), esp->dma_regs + (REG))
+
+static int __devinit esp_sbus_find_dma(struct esp *esp, struct sbus_dev *dma_sdev)
+{
+	struct sbus_dev *sdev = esp->dev;
+	struct sbus_dma *dma;
+
+	if (dma_sdev != NULL) {
+		for_each_dvma(dma) {
+			if (dma->sdev == dma_sdev)
+				break;
+		}
+	} else {
+		for_each_dvma(dma) {
+			if (dma->sdev == NULL)
+				break;
+
+			/* If bus + slot are the same and it has the
+			 * correct OBP name, it's ours.
+			 */
+			if (sdev->bus == dma->sdev->bus &&
+			    sdev->slot == dma->sdev->slot &&
+			    (!strcmp(dma->sdev->prom_name, "dma") ||
+			     !strcmp(dma->sdev->prom_name, "espdma")))
+				break;
+		}
+	}
+
+	if (dma == NULL) {
+		printk(KERN_ERR PFX "[%s] Cannot find dma.\n",
+		       sdev->ofdev.node->full_name);
+		return -ENODEV;
+	}
+	esp->dma = dma;
+	esp->dma_regs = dma->regs;
+
+	return 0;
+
+}
+
+static int __devinit esp_sbus_map_regs(struct esp *esp, int hme)
+{
+	struct sbus_dev *sdev = esp->dev;
+	struct resource *res;
+
+	/* On HME, two reg sets exist, first is DVMA,
+	 * second is ESP registers.
+	 */
+	if (hme)
+		res = &sdev->resource[1];
+	else
+		res = &sdev->resource[0];
+
+	esp->regs = sbus_ioremap(res, 0, SBUS_ESP_REG_SIZE, "ESP");
+	if (!esp->regs)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int __devinit esp_sbus_map_command_block(struct esp *esp)
+{
+	struct sbus_dev *sdev = esp->dev;
+
+	esp->command_block = sbus_alloc_consistent(sdev, 16,
+						   &esp->command_block_dma);
+	if (!esp->command_block)
+		return -ENOMEM;
+	return 0;
+}
+
+static int __devinit esp_sbus_register_irq(struct esp *esp)
+{
+	struct Scsi_Host *host = esp->host;
+	struct sbus_dev *sdev = esp->dev;
+
+	host->irq = sdev->irqs[0];
+	return request_irq(host->irq, scsi_esp_intr, IRQF_SHARED, "ESP", esp);
+}
+
+static void __devinit esp_get_scsi_id(struct esp *esp)
+{
+	struct sbus_dev *sdev = esp->dev;
+	struct device_node *dp = sdev->ofdev.node;
+
+	esp->scsi_id = of_getintprop_default(dp, "initiator-id", 0xff);
+	if (esp->scsi_id != 0xff)
+		goto done;
+
+	esp->scsi_id = of_getintprop_default(dp, "scsi-initiator-id", 0xff);
+	if (esp->scsi_id != 0xff)
+		goto done;
+
+	if (!sdev->bus) {
+		/* SUN4 */
+		esp->scsi_id = 7;
+		goto done;
+	}
+
+	esp->scsi_id = of_getintprop_default(sdev->bus->ofdev.node,
+					     "scsi-initiator-id", 7);
+
+done:
+	esp->host->this_id = esp->scsi_id;
+	esp->scsi_id_mask = (1 << esp->scsi_id);
+}
+
+static void __devinit esp_get_differential(struct esp *esp)
+{
+	struct sbus_dev *sdev = esp->dev;
+	struct device_node *dp = sdev->ofdev.node;
+
+	if (of_find_property(dp, "differential", NULL))
+		esp->flags |= ESP_FLAG_DIFFERENTIAL;
+	else
+		esp->flags &= ~ESP_FLAG_DIFFERENTIAL;
+}
+
+static void __devinit esp_get_clock_params(struct esp *esp)
+{
+	struct sbus_dev *sdev = esp->dev;
+	struct device_node *dp = sdev->ofdev.node;
+	struct device_node *bus_dp;
+	int fmhz;
+
+	bus_dp = NULL;
+	if (sdev != NULL && sdev->bus != NULL)
+		bus_dp = sdev->bus->ofdev.node;
+
+	fmhz = of_getintprop_default(dp, "clock-frequency", 0);
+	if (fmhz == 0)
+		fmhz = (!bus_dp) ? 0 :
+			of_getintprop_default(bus_dp, "clock-frequency", 0);
+
+	esp->cfreq = fmhz;
+}
+
+static void __devinit esp_get_bursts(struct esp *esp, struct sbus_dev *dma)
+{
+	struct sbus_dev *sdev = esp->dev;
+	struct device_node *dp = sdev->ofdev.node;
+	u8 bursts;
+
+	bursts = of_getintprop_default(dp, "burst-sizes", 0xff);
+	if (dma) {
+		struct device_node *dma_dp = dma->ofdev.node;
+		u8 val = of_getintprop_default(dma_dp, "burst-sizes", 0xff);
+		if (val != 0xff)
+			bursts &= val;
+	}
+
+	if (sdev->bus) {
+		u8 val = of_getintprop_default(sdev->bus->ofdev.node,
+					       "burst-sizes", 0xff);
+		if (val != 0xff)
+			bursts &= val;
+	}
+
+	if (bursts == 0xff ||
+	    (bursts & DMA_BURST16) == 0 ||
+	    (bursts & DMA_BURST32) == 0)
+		bursts = (DMA_BURST32 - 1);
+
+	esp->bursts = bursts;
+}
+
+static void __devinit esp_sbus_get_props(struct esp *esp, struct sbus_dev *espdma)
+{
+	esp_get_scsi_id(esp);
+	esp_get_differential(esp);
+	esp_get_clock_params(esp);
+	esp_get_bursts(esp, espdma);
+}
+
+static void sbus_esp_write8(struct esp *esp, u8 val, unsigned long reg)
+{
+	sbus_writeb(val, esp->regs + (reg * 4UL));
+}
+
+static u8 sbus_esp_read8(struct esp *esp, unsigned long reg)
+{
+	return sbus_readb(esp->regs + (reg * 4UL));
+}
+
+static dma_addr_t sbus_esp_map_single(struct esp *esp, void *buf,
+				      size_t sz, int dir)
+{
+	return sbus_map_single(esp->dev, buf, sz, dir);
+}
+
+static int sbus_esp_map_sg(struct esp *esp, struct scatterlist *sg,
+				  int num_sg, int dir)
+{
+	return sbus_map_sg(esp->dev, sg, num_sg, dir);
+}
+
+static void sbus_esp_unmap_single(struct esp *esp, dma_addr_t addr,
+				  size_t sz, int dir)
+{
+	sbus_unmap_single(esp->dev, addr, sz, dir);
+}
+
+static void sbus_esp_unmap_sg(struct esp *esp, struct scatterlist *sg,
+			      int num_sg, int dir)
+{
+	sbus_unmap_sg(esp->dev, sg, num_sg, dir);
+}
+
+static int sbus_esp_irq_pending(struct esp *esp)
+{
+	if (dma_read32(DMA_CSR) & (DMA_HNDL_INTR | DMA_HNDL_ERROR))
+		return 1;
+	return 0;
+}
+
+static void sbus_esp_reset_dma(struct esp *esp)
+{
+	int can_do_burst16, can_do_burst32, can_do_burst64;
+	int can_do_sbus64, lim;
+	u32 val;
+
+	can_do_burst16 = (esp->bursts & DMA_BURST16) != 0;
+	can_do_burst32 = (esp->bursts & DMA_BURST32) != 0;
+	can_do_burst64 = 0;
+	can_do_sbus64 = 0;
+	if (sbus_can_dma_64bit(esp->dev))
+		can_do_sbus64 = 1;
+	if (sbus_can_burst64(esp->sdev))
+		can_do_burst64 = (esp->bursts & DMA_BURST64) != 0;
+
+	/* Put the DVMA into a known state. */
+	if (esp->dma->revision != dvmahme) {
+		val = dma_read32(DMA_CSR);
+		dma_write32(val | DMA_RST_SCSI, DMA_CSR);
+		dma_write32(val & ~DMA_RST_SCSI, DMA_CSR);
+	}
+	switch (esp->dma->revision) {
+	case dvmahme:
+		dma_write32(DMA_RESET_FAS366, DMA_CSR);
+		dma_write32(DMA_RST_SCSI, DMA_CSR);
+
+		esp->prev_hme_dmacsr = (DMA_PARITY_OFF | DMA_2CLKS |
+					DMA_SCSI_DISAB | DMA_INT_ENAB);
+
+		esp->prev_hme_dmacsr &= ~(DMA_ENABLE | DMA_ST_WRITE |
+					  DMA_BRST_SZ);
+
+		if (can_do_burst64)
+			esp->prev_hme_dmacsr |= DMA_BRST64;
+		else if (can_do_burst32)
+			esp->prev_hme_dmacsr |= DMA_BRST32;
+
+		if (can_do_sbus64) {
+			esp->prev_hme_dmacsr |= DMA_SCSI_SBUS64;
+			sbus_set_sbus64(esp->dev, esp->bursts);
+		}
+
+		lim = 1000;
+		while (dma_read32(DMA_CSR) & DMA_PEND_READ) {
+			if (--lim == 0) {
+				printk(KERN_ALERT PFX "esp%d: DMA_PEND_READ "
+				       "will not clear!\n",
+				       esp->host->unique_id);
+				break;
+			}
+			udelay(1);
+		}
+
+		dma_write32(0, DMA_CSR);
+		dma_write32(esp->prev_hme_dmacsr, DMA_CSR);
+
+		dma_write32(0, DMA_ADDR);
+		break;
+
+	case dvmarev2:
+		if (esp->rev != ESP100) {
+			val = dma_read32(DMA_CSR);
+			dma_write32(val | DMA_3CLKS, DMA_CSR);
+		}
+		break;
+
+	case dvmarev3:
+		val = dma_read32(DMA_CSR);
+		val &= ~DMA_3CLKS;
+		val |= DMA_2CLKS;
+		if (can_do_burst32) {
+			val &= ~DMA_BRST_SZ;
+			val |= DMA_BRST32;
+		}
+		dma_write32(val, DMA_CSR);
+		break;
+
+	case dvmaesc1:
+		val = dma_read32(DMA_CSR);
+		val |= DMA_ADD_ENABLE;
+		val &= ~DMA_BCNT_ENAB;
+		if (!can_do_burst32 && can_do_burst16) {
+			val |= DMA_ESC_BURST;
+		} else {
+			val &= ~(DMA_ESC_BURST);
+		}
+		dma_write32(val, DMA_CSR);
+		break;
+
+	default:
+		break;
+	}
+
+	/* Enable interrupts.  */
+	val = dma_read32(DMA_CSR);
+	dma_write32(val | DMA_INT_ENAB, DMA_CSR);
+}
+
+static void sbus_esp_dma_drain(struct esp *esp)
+{
+	u32 csr;
+	int lim;
+
+	if (esp->dma->revision == dvmahme)
+		return;
+
+	csr = dma_read32(DMA_CSR);
+	if (!(csr & DMA_FIFO_ISDRAIN))
+		return;
+
+	if (esp->dma->revision != dvmarev3 && esp->dma->revision != dvmaesc1)
+		dma_write32(csr | DMA_FIFO_STDRAIN, DMA_CSR);
+
+	lim = 1000;
+	while (dma_read32(DMA_CSR) & DMA_FIFO_ISDRAIN) {
+		if (--lim == 0) {
+			printk(KERN_ALERT PFX "esp%d: DMA will not drain!\n",
+			       esp->host->unique_id);
+			break;
+		}
+		udelay(1);
+	}
+}
+
+static void sbus_esp_dma_invalidate(struct esp *esp)
+{
+	if (esp->dma->revision == dvmahme) {
+		dma_write32(DMA_RST_SCSI, DMA_CSR);
+
+		esp->prev_hme_dmacsr = ((esp->prev_hme_dmacsr |
+					 (DMA_PARITY_OFF | DMA_2CLKS |
+					  DMA_SCSI_DISAB | DMA_INT_ENAB)) &
+					~(DMA_ST_WRITE | DMA_ENABLE));
+
+		dma_write32(0, DMA_CSR);
+		dma_write32(esp->prev_hme_dmacsr, DMA_CSR);
+
+		/* This is necessary to avoid having the SCSI channel
+		 * engine lock up on us.
+		 */
+		dma_write32(0, DMA_ADDR);
+	} else {
+		u32 val;
+		int lim;
+
+		lim = 1000;
+		while ((val = dma_read32(DMA_CSR)) & DMA_PEND_READ) {
+			if (--lim == 0) {
+				printk(KERN_ALERT PFX "esp%d: DMA will not "
+				       "invalidate!\n", esp->host->unique_id);
+				break;
+			}
+			udelay(1);
+		}
+
+		val &= ~(DMA_ENABLE | DMA_ST_WRITE | DMA_BCNT_ENAB);
+		val |= DMA_FIFO_INV;
+		dma_write32(val, DMA_CSR);
+		val &= ~DMA_FIFO_INV;
+		dma_write32(val, DMA_CSR);
+	}
+}
+
+static void sbus_esp_send_dma_cmd(struct esp *esp, u32 addr, u32 esp_count,
+				  u32 dma_count, int write, u8 cmd)
+{
+	u32 csr;
+
+	BUG_ON(!(cmd & ESP_CMD_DMA));
+
+	sbus_esp_write8(esp, (esp_count >> 0) & 0xff, ESP_TCLOW);
+	sbus_esp_write8(esp, (esp_count >> 8) & 0xff, ESP_TCMED);
+	if (esp->rev == FASHME) {
+		sbus_esp_write8(esp, (esp_count >> 16) & 0xff, FAS_RLO);
+		sbus_esp_write8(esp, 0, FAS_RHI);
+
+		scsi_esp_cmd(esp, cmd);
+
+		csr = esp->prev_hme_dmacsr;
+		csr |= DMA_SCSI_DISAB | DMA_ENABLE;
+		if (write)
+			csr |= DMA_ST_WRITE;
+		else
+			csr &= ~DMA_ST_WRITE;
+		esp->prev_hme_dmacsr = csr;
+
+		dma_write32(dma_count, DMA_COUNT);
+		dma_write32(addr, DMA_ADDR);
+		dma_write32(csr, DMA_CSR);
+	} else {
+		csr = dma_read32(DMA_CSR);
+		csr |= DMA_ENABLE;
+		if (write)
+			csr |= DMA_ST_WRITE;
+		else
+			csr &= ~DMA_ST_WRITE;
+		dma_write32(csr, DMA_CSR);
+		if (esp->dma->revision == dvmaesc1) {
+			u32 end = PAGE_ALIGN(addr + dma_count + 16U);
+			dma_write32(end - addr, DMA_COUNT);
+		}
+		dma_write32(addr, DMA_ADDR);
+
+		scsi_esp_cmd(esp, cmd);
+	}
+
+}
+
+static int sbus_esp_dma_error(struct esp *esp)
+{
+	u32 csr = dma_read32(DMA_CSR);
+
+	if (csr & DMA_HNDL_ERROR)
+		return 1;
+
+	return 0;
+}
+
+static const struct esp_driver_ops sbus_esp_ops = {
+	.esp_write8	=	sbus_esp_write8,
+	.esp_read8	=	sbus_esp_read8,
+	.map_single	=	sbus_esp_map_single,
+	.map_sg		=	sbus_esp_map_sg,
+	.unmap_single	=	sbus_esp_unmap_single,
+	.unmap_sg	=	sbus_esp_unmap_sg,
+	.irq_pending	=	sbus_esp_irq_pending,
+	.reset_dma	=	sbus_esp_reset_dma,
+	.dma_drain	=	sbus_esp_dma_drain,
+	.dma_invalidate	=	sbus_esp_dma_invalidate,
+	.send_dma_cmd	=	sbus_esp_send_dma_cmd,
+	.dma_error	=	sbus_esp_dma_error,
+};
+
+static int __devinit esp_sbus_probe_one(struct device *dev,
+					struct sbus_dev *esp_dev,
+					struct sbus_dev *espdma,
+					struct sbus_bus *sbus,
+					int hme)
+{
+	struct scsi_host_template *tpnt = &scsi_esp_template;
+	struct Scsi_Host *host;
+	struct esp *esp;
+	int err;
+
+	host = scsi_host_alloc(tpnt, sizeof(struct esp));
+
+	err = -ENOMEM;
+	if (!host)
+		goto fail;
+
+	host->max_id = (hme ? 16 : 8);
+	esp = host_to_esp(host);
+
+	esp->host = host;
+	esp->dev = esp_dev;
+	esp->ops = &sbus_esp_ops;
+
+	if (hme)
+		esp->flags |= ESP_FLAG_WIDE_CAPABLE;
+
+	err = esp_sbus_find_dma(esp, espdma);
+	if (err < 0)
+		goto fail_unlink;
+
+	err = esp_sbus_map_regs(esp, hme);
+	if (err < 0)
+		goto fail_unlink;
+
+	err = esp_sbus_map_command_block(esp);
+	if (err < 0)
+		goto fail_unmap_regs;
+
+	err = esp_sbus_register_irq(esp);
+	if (err < 0)
+		goto fail_unmap_command_block;
+
+	esp_sbus_get_props(esp, espdma);
+
+	/* Before we try to touch the ESP chip, ESC1 dma can
+	 * come up with the reset bit set, so make sure that
+	 * is clear first.
+	 */
+	if (esp->dma->revision == dvmaesc1) {
+		u32 val = dma_read32(DMA_CSR);
+
+		dma_write32(val & ~DMA_RST_SCSI, DMA_CSR);
+	}
+
+	dev_set_drvdata(&esp_dev->ofdev.dev, esp);
+
+	err = scsi_esp_register(esp, dev);
+	if (err)
+		goto fail_free_irq;
+
+	return 0;
+
+fail_free_irq:
+	free_irq(host->irq, esp);
+fail_unmap_command_block:
+	sbus_free_consistent(esp->dev, 16,
+			     esp->command_block,
+			     esp->command_block_dma);
+fail_unmap_regs:
+	sbus_iounmap(esp->regs, SBUS_ESP_REG_SIZE);
+fail_unlink:
+	scsi_host_put(host);
+fail:
+	return err;
+}
+
+static int __devinit esp_sbus_probe(struct of_device *dev, const struct of_device_id *match)
+{
+	struct sbus_dev *sdev = to_sbus_device(&dev->dev);
+	struct device_node *dp = dev->node;
+	struct sbus_dev *dma_sdev = NULL;
+	int hme = 0;
+
+	if (dp->parent &&
+	    (!strcmp(dp->parent->name, "espdma") ||
+	     !strcmp(dp->parent->name, "dma")))
+		dma_sdev = sdev->parent;
+	else if (!strcmp(dp->name, "SUNW,fas")) {
+		dma_sdev = sdev;
+		hme = 1;
+	}
+
+	return esp_sbus_probe_one(&dev->dev, sdev, dma_sdev,
+				  sdev->bus, hme);
+}
+
+static int __devexit esp_sbus_remove(struct of_device *dev)
+{
+	struct esp *esp = dev_get_drvdata(&dev->dev);
+	unsigned int irq = esp->host->irq;
+	u32 val;
+
+	scsi_esp_unregister(esp);
+
+	/* Disable interrupts.  */
+	val = dma_read32(DMA_CSR);
+	dma_write32(val & ~DMA_INT_ENAB, DMA_CSR);
+
+	free_irq(irq, esp);
+	sbus_free_consistent(esp->dev, 16,
+			     esp->command_block,
+			     esp->command_block_dma);
+	sbus_iounmap(esp->regs, SBUS_ESP_REG_SIZE);
+
+	scsi_host_put(esp->host);
+
+	return 0;
+}
+
+static struct of_device_id esp_match[] = {
+	{
+		.name = "SUNW,esp",
+	},
+	{
+		.name = "SUNW,fas",
+	},
+	{
+		.name = "esp",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, esp_match);
+
+static struct of_platform_driver esp_sbus_driver = {
+	.name		= "esp",
+	.match_table	= esp_match,
+	.probe		= esp_sbus_probe,
+	.remove		= __devexit_p(esp_sbus_remove),
+};
+
+static int __init sunesp_init(void)
+{
+	return of_register_driver(&esp_sbus_driver, &sbus_bus_type);
+}
+
+static void __exit sunesp_exit(void)
+{
+	of_unregister_driver(&esp_sbus_driver);
+}
+
+MODULE_DESCRIPTION("Sun ESP SCSI driver");
+MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
+
+module_init(sunesp_init);
+module_exit(sunesp_exit);
-- 
cgit v1.2.3


From 801c135ce73d5df1caf3eca35b66a10824ae0707 Mon Sep 17 00:00:00 2001
From: "Artem B. Bityutskiy" <dedekind@linutronix.de>
Date: Tue, 27 Jun 2006 12:22:22 +0400
Subject: UBI: Unsorted Block Images

UBI (Latin: "where?") manages multiple logical volumes on a single
flash device, specifically supporting NAND flash devices. UBI provides
a flexible partitioning concept which still allows for wear-levelling
across the whole flash device.

In a sense, UBI may be compared to the Logical Volume Manager
(LVM). Whereas LVM maps logical sector numbers to physical HDD sector
numbers, UBI maps logical eraseblocks to physical eraseblocks.

More information may be found at
http://www.linux-mtd.infradead.org/doc/ubi.html

Partitioning/Re-partitioning

  An UBI volume occupies a certain number of erase blocks. This is
  limited by a configured maximum volume size, which could also be
  viewed as the partition size. Each individual UBI volume's size can
  be changed independently of the other UBI volumes, provided that the
  sum of all volume sizes doesn't exceed a certain limit.

  UBI supports dynamic volumes and static volumes. Static volumes are
  read-only and their contents are protected by CRC check sums.

Bad eraseblocks handling

  UBI transparently handles bad eraseblocks. When a physical
  eraseblock becomes bad, it is substituted by a good physical
  eraseblock, and the user does not even notice this.

Scrubbing

  On a NAND flash bit flips can occur on any write operation,
  sometimes also on read. If bit flips persist on the device, at first
  they can still be corrected by ECC, but once they accumulate,
  correction will become impossible. Thus it is best to actively scrub
  the affected eraseblock, by first copying it to a free eraseblock
  and then erasing the original. The UBI layer performs this type of
  scrubbing under the covers, transparently to the UBI volume users.

Erase Counts

  UBI maintains an erase count header per eraseblock. This frees
  higher-level layers (like file systems) from doing this and allows
  for centralized erase count management instead. The erase counts are
  used by the wear-levelling algorithm in the UBI layer. The algorithm
  itself is exchangeable.

Booting from NAND

  For booting directly from NAND flash the hardware must at least be
  capable of fetching and executing a small portion of the NAND
  flash. Some NAND flash controllers have this kind of support. They
  usually limit the window to a few kilobytes in erase block 0. This
  "initial program loader" (IPL) must then contain sufficient logic to
  load and execute the next boot phase.

  Due to bad eraseblocks, which may be randomly scattered over the
  flash device, it is problematic to store the "secondary program
  loader" (SPL) statically. Also, due to bit-flips it may become
  corrupted over time. UBI allows to solve this problem gracefully by
  storing the SPL in a small static UBI volume.

UBI volumes vs. static partitions

  UBI volumes are still very similar to static MTD partitions:

    * both consist of eraseblocks (logical eraseblocks in case of UBI
      volumes, and physical eraseblocks in case of static partitions;
    * both support three basic operations - read, write, erase.

  But UBI volumes have the following advantages over traditional
  static MTD partitions:

    * there are no eraseblock wear-leveling constraints in case of UBI
      volumes, so the user should not care about this;
    * there are no bit-flips and bad eraseblocks in case of UBI volumes.

  So, UBI volumes may be considered as flash devices with relaxed
  restrictions.

Where can it be found?

  Documentation, kernel code and applications can be found in the MTD
  gits.

What are the applications for?

  The applications help to create binary flash images for two purposes: pfi
  files (partial flash images) for in-system update of UBI volumes, and plain
  binary images, with or without OOB data in case of NAND, for a manufacturing
  step. Furthermore some tools are/and will be created that allow flash content
  analysis after a system has crashed..

Who did UBI?

  The original ideas, where UBI is based on, were developed by Andreas
  Arnez, Frank Haverkamp and Thomas Gleixner. Josh W. Boyer and some others
  were involved too. The implementation of the kernel layer was done by Artem
  B. Bityutskiy. The user-space applications and tools were written by Oliver
  Lohmann with contributions from Frank Haverkamp, Andreas Arnez, and Artem.
  Joern Engel contributed a patch which modifies JFFS2 so that it can be run on
  a UBI volume. Thomas Gleixner did modifications to the NAND layer. Alexander
  Schmidt made some testing work as well as core functionality improvements.

Signed-off-by: Artem B. Bityutskiy <dedekind@linutronix.de>
Signed-off-by: Frank Haverkamp <haver@vnet.ibm.com>
---
 drivers/mtd/Kconfig           |    2 +
 drivers/mtd/Makefile          |    2 +
 drivers/mtd/ubi/Kconfig       |   58 ++
 drivers/mtd/ubi/Kconfig.debug |  104 +++
 drivers/mtd/ubi/Makefile      |    7 +
 drivers/mtd/ubi/build.c       |  848 +++++++++++++++++++++
 drivers/mtd/ubi/cdev.c        |  722 ++++++++++++++++++
 drivers/mtd/ubi/debug.c       |  224 ++++++
 drivers/mtd/ubi/debug.h       |  161 ++++
 drivers/mtd/ubi/eba.c         | 1241 ++++++++++++++++++++++++++++++
 drivers/mtd/ubi/gluebi.c      |  324 ++++++++
 drivers/mtd/ubi/io.c          | 1259 +++++++++++++++++++++++++++++++
 drivers/mtd/ubi/kapi.c        |  575 ++++++++++++++
 drivers/mtd/ubi/misc.c        |  105 +++
 drivers/mtd/ubi/scan.c        | 1368 +++++++++++++++++++++++++++++++++
 drivers/mtd/ubi/scan.h        |  167 ++++
 drivers/mtd/ubi/ubi.h         |  535 +++++++++++++
 drivers/mtd/ubi/upd.c         |  348 +++++++++
 drivers/mtd/ubi/vmt.c         |  809 ++++++++++++++++++++
 drivers/mtd/ubi/vtbl.c        |  809 ++++++++++++++++++++
 drivers/mtd/ubi/wl.c          | 1671 +++++++++++++++++++++++++++++++++++++++++
 21 files changed, 11339 insertions(+)
 create mode 100644 drivers/mtd/ubi/Kconfig
 create mode 100644 drivers/mtd/ubi/Kconfig.debug
 create mode 100644 drivers/mtd/ubi/Makefile
 create mode 100644 drivers/mtd/ubi/build.c
 create mode 100644 drivers/mtd/ubi/cdev.c
 create mode 100644 drivers/mtd/ubi/debug.c
 create mode 100644 drivers/mtd/ubi/debug.h
 create mode 100644 drivers/mtd/ubi/eba.c
 create mode 100644 drivers/mtd/ubi/gluebi.c
 create mode 100644 drivers/mtd/ubi/io.c
 create mode 100644 drivers/mtd/ubi/kapi.c
 create mode 100644 drivers/mtd/ubi/misc.c
 create mode 100644 drivers/mtd/ubi/scan.c
 create mode 100644 drivers/mtd/ubi/scan.h
 create mode 100644 drivers/mtd/ubi/ubi.h
 create mode 100644 drivers/mtd/ubi/upd.c
 create mode 100644 drivers/mtd/ubi/vmt.c
 create mode 100644 drivers/mtd/ubi/vtbl.c
 create mode 100644 drivers/mtd/ubi/wl.c

(limited to 'drivers')

diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 26f75c29944..6d1b91bf7ad 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -292,5 +292,7 @@ source "drivers/mtd/nand/Kconfig"
 
 source "drivers/mtd/onenand/Kconfig"
 
+source "drivers/mtd/ubi/Kconfig"
+
 endmenu
 
diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile
index c130e6261ad..92055405cb3 100644
--- a/drivers/mtd/Makefile
+++ b/drivers/mtd/Makefile
@@ -28,3 +28,5 @@ nftl-objs		:= nftlcore.o nftlmount.o
 inftl-objs		:= inftlcore.o inftlmount.o
 
 obj-y		+= chips/ maps/ devices/ nand/ onenand/
+
+obj-$(CONFIG_MTD_UBI)		+= ubi/
diff --git a/drivers/mtd/ubi/Kconfig b/drivers/mtd/ubi/Kconfig
new file mode 100644
index 00000000000..b9daf159a4a
--- /dev/null
+++ b/drivers/mtd/ubi/Kconfig
@@ -0,0 +1,58 @@
+# drivers/mtd/ubi/Kconfig
+
+menu "UBI - Unsorted block images"
+	depends on MTD
+
+config MTD_UBI
+	tristate "Enable UBI"
+	depends on MTD
+	select CRC32
+	help
+	  UBI is a software layer above MTD layer which admits of LVM-like
+	  logical volumes on top of MTD devices, hides some complexities of
+	  flash chips like wear and bad blocks and provides some other useful
+	  capabilities. Please, consult the MTD web site for more details
+	  (www.linux-mtd.infradead.org).
+
+config MTD_UBI_WL_THRESHOLD
+	int "UBI wear-leveling threshold"
+	default 4096
+	range 2 65536
+	depends on MTD_UBI
+	help
+	  This parameter defines the maximum difference between the highest
+	  erase counter value and the lowest erase counter value of eraseblocks
+	  of UBI devices. When this threshold is exceeded, UBI starts performing
+	  wear leveling by means of moving data from eraseblock with low erase
+	  counter to eraseblocks with high erase counter. Leave the default
+	  value if unsure.
+
+config MTD_UBI_BEB_RESERVE
+	int "Percentage of reserved eraseblocks for bad eraseblocks handling"
+	default 1
+	range 0 25
+	depends on MTD_UBI
+	help
+	  If the MTD device admits of bad eraseblocks (e.g. NAND flash), UBI
+	  reserves some amount of physical eraseblocks to handle new bad
+	  eraseblocks. For example, if a flash physical eraseblock becomes bad,
+	  UBI uses these reserved physical eraseblocks to relocate the bad one.
+	  This option specifies how many physical eraseblocks will be reserved
+	  for bad eraseblock handling (percents of total number of good flash
+	  eraseblocks). If the underlying flash does not admit of bad
+	  eraseblocks (e.g. NOR flash), this value is ignored and nothing is
+	  reserved. Leave the default value if unsure.
+
+config MTD_UBI_GLUEBI
+	bool "Emulate MTD devices"
+	default n
+	depends on MTD_UBI
+	help
+	   This option enables MTD devices emulation on top of UBI volumes: for
+	   each UBI volumes an MTD device is created, and all I/O to this MTD
+	   device is redirected to the UBI volume. This is handy to make
+	   MTD-oriented software (like JFFS2) work on top of UBI. Do not enable
+	   this if no legacy software will be used.
+
+source "drivers/mtd/ubi/Kconfig.debug"
+endmenu
diff --git a/drivers/mtd/ubi/Kconfig.debug b/drivers/mtd/ubi/Kconfig.debug
new file mode 100644
index 00000000000..1e2ee22edef
--- /dev/null
+++ b/drivers/mtd/ubi/Kconfig.debug
@@ -0,0 +1,104 @@
+comment "UBI debugging options"
+	depends on MTD_UBI
+
+config MTD_UBI_DEBUG
+	bool "UBI debugging"
+	depends on SYSFS
+	depends on MTD_UBI
+	select DEBUG_FS
+	select KALLSYMS_ALL
+	help
+	  This option enables UBI debugging.
+
+config MTD_UBI_DEBUG_MSG
+	bool "UBI debugging messages"
+	depends on MTD_UBI_DEBUG
+	default n
+	help
+	  This option enables UBI debugging messages.
+
+config MTD_UBI_DEBUG_PARANOID
+	bool "Extra self-checks"
+	default n
+	depends on MTD_UBI_DEBUG
+	help
+	  This option enables extra checks in UBI code. Note this slows UBI down
+	  significantly.
+
+config MTD_UBI_DEBUG_DISABLE_BGT
+	bool "Do not enable the UBI background thread"
+	depends on MTD_UBI_DEBUG
+	default n
+	help
+	  This option switches the background thread off by default. The thread
+	  may be also be enabled/disabled via UBI sysfs.
+
+config MTD_UBI_DEBUG_USERSPACE_IO
+	bool "Direct user-space write/erase support"
+	default n
+	depends on MTD_UBI_DEBUG
+	help
+	  By default, users cannot directly write and erase individual
+	  eraseblocks of dynamic volumes, and have to use update operation
+	  instead. This option enables this capability - it is very useful for
+	  debugging and testing.
+
+config MTD_UBI_DEBUG_EMULATE_BITFLIPS
+	bool "Emulate flash bit-flips"
+	depends on MTD_UBI_DEBUG
+	default n
+	help
+	  This option emulates bit-flips with probability 1/50, which in turn
+	  causes scrubbing. Useful for debugging and stressing UBI.
+
+config MTD_UBI_DEBUG_EMULATE_WRITE_FAILURES
+	bool "Emulate flash write failures"
+	depends on MTD_UBI_DEBUG
+	default n
+	help
+	  This option emulates write failures with probability 1/100. Useful for
+	  debugging and testing how UBI handlines errors.
+
+config MTD_UBI_DEBUG_EMULATE_ERASE_FAILURES
+	bool "Emulate flash erase failures"
+	depends on MTD_UBI_DEBUG
+	default n
+	help
+	  This option emulates erase failures with probability 1/100. Useful for
+	  debugging and testing how UBI handlines errors.
+
+menu "Additional UBI debugging messages"
+	depends on MTD_UBI_DEBUG
+
+config MTD_UBI_DEBUG_MSG_BLD
+	bool "Additional UBI initialization and build messages"
+	default n
+	depends on MTD_UBI_DEBUG
+	help
+	  This option enables detailed UBI initialization and device build
+	  debugging messages.
+
+config MTD_UBI_DEBUG_MSG_EBA
+	bool "Eraseblock association unit messages"
+	default n
+	depends on MTD_UBI_DEBUG
+	help
+	  This option enables debugging messages from the UBI eraseblock
+	  association unit.
+
+config MTD_UBI_DEBUG_MSG_WL
+	bool "Wear-leveling unit messages"
+	default n
+	depends on MTD_UBI_DEBUG
+	help
+	  This option enables debugging messages from the UBI wear-leveling
+	  unit.
+
+config MTD_UBI_DEBUG_MSG_IO
+	bool "Input/output unit messages"
+	default n
+	depends on MTD_UBI_DEBUG
+	help
+	  This option enables debugging messages from the UBI input/output unit.
+
+endmenu # UBI debugging messages
diff --git a/drivers/mtd/ubi/Makefile b/drivers/mtd/ubi/Makefile
new file mode 100644
index 00000000000..dd834e04151
--- /dev/null
+++ b/drivers/mtd/ubi/Makefile
@@ -0,0 +1,7 @@
+obj-$(CONFIG_MTD_UBI) += ubi.o
+
+ubi-y += vtbl.o vmt.o upd.o build.o cdev.o kapi.o eba.o io.o wl.o scan.o
+ubi-y += misc.o
+
+ubi-$(CONFIG_MTD_UBI_DEBUG) += debug.o
+ubi-$(CONFIG_MTD_UBI_GLUEBI) += gluebi.o
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
new file mode 100644
index 00000000000..555d594d181
--- /dev/null
+++ b/drivers/mtd/ubi/build.c
@@ -0,0 +1,848 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ * Copyright (c) Nokia Corporation, 2007
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём),
+ *         Frank Haverkamp
+ */
+
+/*
+ * This file includes UBI initialization and building of UBI devices. At the
+ * moment UBI devices may only be added while UBI is initialized, but dynamic
+ * device add/remove functionality is planned. Also, at the moment we only
+ * attach UBI devices by scanning, which will become a bottleneck when flashes
+ * reach certain large size. Then one may improve UBI and add other methods.
+ */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/stringify.h>
+#include <linux/stat.h>
+#include "ubi.h"
+
+/* Maximum length of the 'mtd=' parameter */
+#define MTD_PARAM_LEN_MAX 64
+
+/**
+ * struct mtd_dev_param - MTD device parameter description data structure.
+ * @name: MTD device name or number string
+ * @vid_hdr_offs: VID header offset
+ * @data_offs: data offset
+ */
+struct mtd_dev_param
+{
+	char name[MTD_PARAM_LEN_MAX];
+	int vid_hdr_offs;
+	int data_offs;
+};
+
+/* Numbers of elements set in the @mtd_dev_param array */
+static int mtd_devs = 0;
+
+/* MTD devices specification parameters */
+static struct mtd_dev_param mtd_dev_param[UBI_MAX_DEVICES];
+
+/* Number of UBI devices in system */
+int ubi_devices_cnt;
+
+/* All UBI devices in system */
+struct ubi_device *ubi_devices[UBI_MAX_DEVICES];
+
+/* Root UBI "class" object (corresponds to '/<sysfs>/class/ubi/') */
+struct class *ubi_class;
+
+/* "Show" method for files in '/<sysfs>/class/ubi/' */
+static ssize_t ubi_version_show(struct class *class, char *buf)
+{
+	return sprintf(buf, "%d\n", UBI_VERSION);
+}
+
+/* UBI version attribute ('/<sysfs>/class/ubi/version') */
+static struct class_attribute ubi_version =
+	__ATTR(version, S_IRUGO, ubi_version_show, NULL);
+
+static ssize_t dev_attribute_show(struct device *dev,
+				  struct device_attribute *attr, char *buf);
+
+/* UBI device attributes (correspond to files in '/<sysfs>/class/ubi/ubiX') */
+static struct device_attribute dev_eraseblock_size =
+	__ATTR(eraseblock_size, S_IRUGO, dev_attribute_show, NULL);
+static struct device_attribute dev_avail_eraseblocks =
+	__ATTR(avail_eraseblocks, S_IRUGO, dev_attribute_show, NULL);
+static struct device_attribute dev_total_eraseblocks =
+	__ATTR(total_eraseblocks, S_IRUGO, dev_attribute_show, NULL);
+static struct device_attribute dev_volumes_count =
+	__ATTR(volumes_count, S_IRUGO, dev_attribute_show, NULL);
+static struct device_attribute dev_max_ec =
+	__ATTR(max_ec, S_IRUGO, dev_attribute_show, NULL);
+static struct device_attribute dev_reserved_for_bad =
+	__ATTR(reserved_for_bad, S_IRUGO, dev_attribute_show, NULL);
+static struct device_attribute dev_bad_peb_count =
+	__ATTR(bad_peb_count, S_IRUGO, dev_attribute_show, NULL);
+static struct device_attribute dev_max_vol_count =
+	__ATTR(max_vol_count, S_IRUGO, dev_attribute_show, NULL);
+static struct device_attribute dev_min_io_size =
+	__ATTR(min_io_size, S_IRUGO, dev_attribute_show, NULL);
+static struct device_attribute dev_bgt_enabled =
+	__ATTR(bgt_enabled, S_IRUGO, dev_attribute_show, NULL);
+
+/* "Show" method for files in '/<sysfs>/class/ubi/ubiX/' */
+static ssize_t dev_attribute_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	const struct ubi_device *ubi;
+
+	ubi = container_of(dev, struct ubi_device, dev);
+	if (attr == &dev_eraseblock_size)
+		return sprintf(buf, "%d\n", ubi->leb_size);
+	else if (attr == &dev_avail_eraseblocks)
+		return sprintf(buf, "%d\n", ubi->avail_pebs);
+	else if (attr == &dev_total_eraseblocks)
+		return sprintf(buf, "%d\n", ubi->good_peb_count);
+	else if (attr == &dev_volumes_count)
+		return sprintf(buf, "%d\n", ubi->vol_count);
+	else if (attr == &dev_max_ec)
+		return sprintf(buf, "%d\n", ubi->max_ec);
+	else if (attr == &dev_reserved_for_bad)
+		return sprintf(buf, "%d\n", ubi->beb_rsvd_pebs);
+	else if (attr == &dev_bad_peb_count)
+		return sprintf(buf, "%d\n", ubi->bad_peb_count);
+	else if (attr == &dev_max_vol_count)
+		return sprintf(buf, "%d\n", ubi->vtbl_slots);
+	else if (attr == &dev_min_io_size)
+		return sprintf(buf, "%d\n", ubi->min_io_size);
+	else if (attr == &dev_bgt_enabled)
+		return sprintf(buf, "%d\n", ubi->thread_enabled);
+	else
+		BUG();
+
+	return 0;
+}
+
+/* Fake "release" method for UBI devices */
+static void dev_release(struct device *dev) { }
+
+/**
+ * ubi_sysfs_init - initialize sysfs for an UBI device.
+ * @ubi: UBI device description object
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int ubi_sysfs_init(struct ubi_device *ubi)
+{
+	int err;
+
+	ubi->dev.release = dev_release;
+	ubi->dev.devt = MKDEV(ubi->major, 0);
+	ubi->dev.class = ubi_class;
+	sprintf(&ubi->dev.bus_id[0], UBI_NAME_STR"%d", ubi->ubi_num);
+	err = device_register(&ubi->dev);
+	if (err)
+		goto out;
+
+	err = device_create_file(&ubi->dev, &dev_eraseblock_size);
+	if (err)
+		goto out_unregister;
+	err = device_create_file(&ubi->dev, &dev_avail_eraseblocks);
+	if (err)
+		goto out_eraseblock_size;
+	err = device_create_file(&ubi->dev, &dev_total_eraseblocks);
+	if (err)
+		goto out_avail_eraseblocks;
+	err = device_create_file(&ubi->dev, &dev_volumes_count);
+	if (err)
+		goto out_total_eraseblocks;
+	err = device_create_file(&ubi->dev, &dev_max_ec);
+	if (err)
+		goto out_volumes_count;
+	err = device_create_file(&ubi->dev, &dev_reserved_for_bad);
+	if (err)
+		goto out_volumes_max_ec;
+	err = device_create_file(&ubi->dev, &dev_bad_peb_count);
+	if (err)
+		goto out_reserved_for_bad;
+	err = device_create_file(&ubi->dev, &dev_max_vol_count);
+	if (err)
+		goto out_bad_peb_count;
+	err = device_create_file(&ubi->dev, &dev_min_io_size);
+	if (err)
+		goto out_max_vol_count;
+	err = device_create_file(&ubi->dev, &dev_bgt_enabled);
+	if (err)
+		goto out_min_io_size;
+
+	return 0;
+
+out_min_io_size:
+	device_remove_file(&ubi->dev, &dev_min_io_size);
+out_max_vol_count:
+	device_remove_file(&ubi->dev, &dev_max_vol_count);
+out_bad_peb_count:
+	device_remove_file(&ubi->dev, &dev_bad_peb_count);
+out_reserved_for_bad:
+	device_remove_file(&ubi->dev, &dev_reserved_for_bad);
+out_volumes_max_ec:
+	device_remove_file(&ubi->dev, &dev_max_ec);
+out_volumes_count:
+	device_remove_file(&ubi->dev, &dev_volumes_count);
+out_total_eraseblocks:
+	device_remove_file(&ubi->dev, &dev_total_eraseblocks);
+out_avail_eraseblocks:
+	device_remove_file(&ubi->dev, &dev_avail_eraseblocks);
+out_eraseblock_size:
+	device_remove_file(&ubi->dev, &dev_eraseblock_size);
+out_unregister:
+	device_unregister(&ubi->dev);
+out:
+	ubi_err("failed to initialize sysfs for %s", ubi->ubi_name);
+	return err;
+}
+
+/**
+ * ubi_sysfs_close - close sysfs for an UBI device.
+ * @ubi: UBI device description object
+ */
+static void ubi_sysfs_close(struct ubi_device *ubi)
+{
+	device_remove_file(&ubi->dev, &dev_bgt_enabled);
+	device_remove_file(&ubi->dev, &dev_min_io_size);
+	device_remove_file(&ubi->dev, &dev_max_vol_count);
+	device_remove_file(&ubi->dev, &dev_bad_peb_count);
+	device_remove_file(&ubi->dev, &dev_reserved_for_bad);
+	device_remove_file(&ubi->dev, &dev_max_ec);
+	device_remove_file(&ubi->dev, &dev_volumes_count);
+	device_remove_file(&ubi->dev, &dev_total_eraseblocks);
+	device_remove_file(&ubi->dev, &dev_avail_eraseblocks);
+	device_remove_file(&ubi->dev, &dev_eraseblock_size);
+	device_unregister(&ubi->dev);
+}
+
+/**
+ * kill_volumes - destroy all volumes.
+ * @ubi: UBI device description object
+ */
+static void kill_volumes(struct ubi_device *ubi)
+{
+	int i;
+
+	for (i = 0; i < ubi->vtbl_slots; i++)
+		if (ubi->volumes[i])
+			ubi_free_volume(ubi, i);
+}
+
+/**
+ * uif_init - initialize user interfaces for an UBI device.
+ * @ubi: UBI device description object
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int uif_init(struct ubi_device *ubi)
+{
+	int i, err;
+	dev_t dev;
+
+	mutex_init(&ubi->vtbl_mutex);
+	spin_lock_init(&ubi->volumes_lock);
+
+	sprintf(ubi->ubi_name, UBI_NAME_STR "%d", ubi->ubi_num);
+
+	/*
+	 * Major numbers for the UBI character devices are allocated
+	 * dynamically. Major numbers of volume character devices are
+	 * equivalent to ones of the corresponding UBI character device. Minor
+	 * numbers of UBI character devices are 0, while minor numbers of
+	 * volume character devices start from 1. Thus, we allocate one major
+	 * number and ubi->vtbl_slots + 1 minor numbers.
+	 */
+	err = alloc_chrdev_region(&dev, 0, ubi->vtbl_slots + 1, ubi->ubi_name);
+	if (err) {
+		ubi_err("cannot register UBI character devices");
+		return err;
+	}
+
+	cdev_init(&ubi->cdev, &ubi_cdev_operations);
+	ubi->major = MAJOR(dev);
+	dbg_msg("%s major is %u", ubi->ubi_name, ubi->major);
+	ubi->cdev.owner = THIS_MODULE;
+
+	dev = MKDEV(ubi->major, 0);
+	err = cdev_add(&ubi->cdev, dev, 1);
+	if (err) {
+		ubi_err("cannot add character device %s", ubi->ubi_name);
+		goto out_unreg;
+	}
+
+	err = ubi_sysfs_init(ubi);
+	if (err)
+		goto out_cdev;
+
+	for (i = 0; i < ubi->vtbl_slots; i++)
+		if (ubi->volumes[i]) {
+			err = ubi_add_volume(ubi, i);
+			if (err)
+				goto out_volumes;
+		}
+
+	return 0;
+
+out_volumes:
+	kill_volumes(ubi);
+	ubi_sysfs_close(ubi);
+out_cdev:
+	cdev_del(&ubi->cdev);
+out_unreg:
+	unregister_chrdev_region(MKDEV(ubi->major, 0),
+				 ubi->vtbl_slots + 1);
+	return err;
+}
+
+/**
+ * uif_close - close user interfaces for an UBI device.
+ * @ubi: UBI device description object
+ */
+static void uif_close(struct ubi_device *ubi)
+{
+	kill_volumes(ubi);
+	ubi_sysfs_close(ubi);
+	cdev_del(&ubi->cdev);
+	unregister_chrdev_region(MKDEV(ubi->major, 0), ubi->vtbl_slots + 1);
+}
+
+/**
+ * attach_by_scanning - attach an MTD device using scanning method.
+ * @ubi: UBI device descriptor
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ *
+ * Note, currently this is the only method to attach UBI devices. Hopefully in
+ * the future we'll have more scalable attaching methods and avoid full media
+ * scanning. But even in this case scanning will be needed as a fall-back
+ * attaching method if there are some on-flash table corruptions.
+ */
+static int attach_by_scanning(struct ubi_device *ubi)
+{
+	int err;
+	struct ubi_scan_info *si;
+
+	si = ubi_scan(ubi);
+	if (IS_ERR(si))
+		return PTR_ERR(si);
+
+	ubi->bad_peb_count = si->bad_peb_count;
+	ubi->good_peb_count = ubi->peb_count - ubi->bad_peb_count;
+	ubi->max_ec = si->max_ec;
+	ubi->mean_ec = si->mean_ec;
+
+	err = ubi_read_volume_table(ubi, si);
+	if (err)
+		goto out_si;
+
+	err = ubi_wl_init_scan(ubi, si);
+	if (err)
+		goto out_vtbl;
+
+	err = ubi_eba_init_scan(ubi, si);
+	if (err)
+		goto out_wl;
+
+	ubi_scan_destroy_si(si);
+	return 0;
+
+out_wl:
+	ubi_wl_close(ubi);
+out_vtbl:
+	kfree(ubi->vtbl);
+out_si:
+	ubi_scan_destroy_si(si);
+	return err;
+}
+
+/**
+ * io_init - initialize I/O unit for a given UBI device.
+ * @ubi: UBI device description object
+ *
+ * If @ubi->vid_hdr_offset or @ubi->leb_start is zero, default offsets are
+ * assumed:
+ *   o EC header is always at offset zero - this cannot be changed;
+ *   o VID header starts just after the EC header at the closest address
+ *   aligned to @io->@hdrs_min_io_size;
+ *   o data starts just after the VID header at the closest address aligned to
+ *     @io->@min_io_size
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int io_init(struct ubi_device *ubi)
+{
+	if (ubi->mtd->numeraseregions != 0) {
+		/*
+		 * Some flashes have several erase regions. Different regions
+		 * may have different eraseblock size and other
+		 * characteristics. It looks like mostly multi-region flashes
+		 * have one "main" region and one or more small regions to
+		 * store boot loader code or boot parameters or whatever. I
+		 * guess we should just pick the largest region. But this is
+		 * not implemented.
+		 */
+		ubi_err("multiple regions, not implemented");
+		return -EINVAL;
+	}
+
+	/*
+	 * Note, in this implementation we support MTD devices with 0x7FFFFFFF
+	 * physical eraseblocks maximum.
+	 */
+
+	ubi->peb_size   = ubi->mtd->erasesize;
+	ubi->peb_count  = ubi->mtd->size / ubi->mtd->erasesize;
+	ubi->flash_size = ubi->mtd->size;
+
+	if (ubi->mtd->block_isbad && ubi->mtd->block_markbad)
+		ubi->bad_allowed = 1;
+
+	ubi->min_io_size = ubi->mtd->writesize;
+	ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft;
+
+	/* Make sure minimal I/O unit is power of 2 */
+	if (ubi->min_io_size == 0 ||
+	    (ubi->min_io_size & (ubi->min_io_size - 1))) {
+		ubi_err("bad min. I/O unit");
+		return -EINVAL;
+	}
+
+	ubi_assert(ubi->hdrs_min_io_size > 0);
+	ubi_assert(ubi->hdrs_min_io_size <= ubi->min_io_size);
+	ubi_assert(ubi->min_io_size % ubi->hdrs_min_io_size == 0);
+
+	/* Calculate default aligned sizes of EC and VID headers */
+	ubi->ec_hdr_alsize = ALIGN(UBI_EC_HDR_SIZE, ubi->hdrs_min_io_size);
+	ubi->vid_hdr_alsize = ALIGN(UBI_VID_HDR_SIZE, ubi->hdrs_min_io_size);
+
+	dbg_msg("min_io_size      %d", ubi->min_io_size);
+	dbg_msg("hdrs_min_io_size %d", ubi->hdrs_min_io_size);
+	dbg_msg("ec_hdr_alsize    %d", ubi->ec_hdr_alsize);
+	dbg_msg("vid_hdr_alsize   %d", ubi->vid_hdr_alsize);
+
+	if (ubi->vid_hdr_offset == 0)
+		/* Default offset */
+		ubi->vid_hdr_offset = ubi->vid_hdr_aloffset =
+				      ubi->ec_hdr_alsize;
+	else {
+		ubi->vid_hdr_aloffset = ubi->vid_hdr_offset &
+						~(ubi->hdrs_min_io_size - 1);
+		ubi->vid_hdr_shift = ubi->vid_hdr_offset -
+						ubi->vid_hdr_aloffset;
+	}
+
+	/* Similar for the data offset */
+	if (ubi->leb_start == 0) {
+		ubi->leb_start = ubi->vid_hdr_offset + ubi->vid_hdr_alsize;
+		ubi->leb_start = ALIGN(ubi->leb_start, ubi->min_io_size);
+	}
+
+	dbg_msg("vid_hdr_offset   %d", ubi->vid_hdr_offset);
+	dbg_msg("vid_hdr_aloffset %d", ubi->vid_hdr_aloffset);
+	dbg_msg("vid_hdr_shift    %d", ubi->vid_hdr_shift);
+	dbg_msg("leb_start        %d", ubi->leb_start);
+
+	/* The shift must be aligned to 32-bit boundary */
+	if (ubi->vid_hdr_shift % 4) {
+		ubi_err("unaligned VID header shift %d",
+			ubi->vid_hdr_shift);
+		return -EINVAL;
+	}
+
+	/* Check sanity */
+	if (ubi->vid_hdr_offset < UBI_EC_HDR_SIZE ||
+	    ubi->leb_start < ubi->vid_hdr_offset + UBI_VID_HDR_SIZE ||
+	    ubi->leb_start > ubi->peb_size - UBI_VID_HDR_SIZE ||
+	    ubi->leb_start % ubi->min_io_size) {
+		ubi_err("bad VID header (%d) or data offsets (%d)",
+			ubi->vid_hdr_offset, ubi->leb_start);
+		return -EINVAL;
+	}
+
+	/*
+	 * It may happen that EC and VID headers are situated in one minimal
+	 * I/O unit. In this case we can only accept this UBI image in
+	 * read-only mode.
+	 */
+	if (ubi->vid_hdr_offset + UBI_VID_HDR_SIZE <= ubi->hdrs_min_io_size) {
+		ubi_warn("EC and VID headers are in the same minimal I/O unit, "
+			 "switch to read-only mode");
+		ubi->ro_mode = 1;
+	}
+
+	ubi->leb_size = ubi->peb_size - ubi->leb_start;
+
+	if (!(ubi->mtd->flags & MTD_WRITEABLE)) {
+		ubi_msg("MTD device %d is write-protected, attach in "
+			"read-only mode", ubi->mtd->index);
+		ubi->ro_mode = 1;
+	}
+
+	dbg_msg("leb_size         %d", ubi->leb_size);
+	dbg_msg("ro_mode          %d", ubi->ro_mode);
+
+	/*
+	 * Note, ideally, we have to initialize ubi->bad_peb_count here. But
+	 * unfortunately, MTD does not provide this information. We should loop
+	 * over all physical eraseblocks and invoke mtd->block_is_bad() for
+	 * each physical eraseblock. So, we skip ubi->bad_peb_count
+	 * uninitialized and initialize it after scanning.
+	 */
+
+	return 0;
+}
+
+/**
+ * attach_mtd_dev - attach an MTD device.
+ * @mtd_dev: MTD device name or number string
+ * @vid_hdr_offset: VID header offset
+ * @data_offset: data offset
+ *
+ * This function attaches an MTD device to UBI. It first treats @mtd_dev as the
+ * MTD device name, and tries to open it by this name. If it is unable to open,
+ * it tries to convert @mtd_dev to an integer and open the MTD device by its
+ * number. Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+static int attach_mtd_dev(const char *mtd_dev, int vid_hdr_offset,
+			  int data_offset)
+{
+	struct ubi_device *ubi;
+	struct mtd_info *mtd;
+	int i, err;
+
+	mtd = get_mtd_device_nm(mtd_dev);
+	if (IS_ERR(mtd)) {
+		int mtd_num;
+		char *endp;
+
+		if (PTR_ERR(mtd) != -ENODEV)
+			return PTR_ERR(mtd);
+
+		/*
+		 * Probably this is not MTD device name but MTD device number -
+		 * check this out.
+		 */
+		mtd_num = simple_strtoul(mtd_dev, &endp, 0);
+		if (*endp != '\0' || mtd_dev == endp) {
+			ubi_err("incorrect MTD device: \"%s\"", mtd_dev);
+			return -ENODEV;
+		}
+
+		mtd = get_mtd_device(NULL, mtd_num);
+		if (IS_ERR(mtd))
+			return PTR_ERR(mtd);
+	}
+
+	/* Check if we already have the same MTD device attached */
+	for (i = 0; i < ubi_devices_cnt; i++)
+		if (ubi_devices[i]->mtd->index == mtd->index) {
+			ubi_err("mtd%d is already attached to ubi%d",
+				mtd->index, i);
+			err = -EINVAL;
+			goto out_mtd;
+		}
+
+	ubi = ubi_devices[ubi_devices_cnt] = kzalloc(sizeof(struct ubi_device),
+						      GFP_KERNEL);
+	if (!ubi) {
+		err = -ENOMEM;
+		goto out_mtd;
+	}
+
+	ubi->ubi_num = ubi_devices_cnt;
+	ubi->mtd = mtd;
+
+	dbg_msg("attaching mtd%d to ubi%d: VID header offset %d data offset %d",
+		ubi->mtd->index, ubi_devices_cnt, vid_hdr_offset, data_offset);
+
+	ubi->vid_hdr_offset = vid_hdr_offset;
+	ubi->leb_start = data_offset;
+	err = io_init(ubi);
+	if (err)
+		goto out_free;
+
+	err = attach_by_scanning(ubi);
+	if (err) {
+		dbg_err("failed to attach by scanning, error %d", err);
+		goto out_free;
+	}
+
+	err = uif_init(ubi);
+	if (err)
+		goto out_detach;
+
+	ubi_devices_cnt += 1;
+
+	ubi_msg("attached mtd%d to ubi%d", ubi->mtd->index, ubi_devices_cnt);
+	ubi_msg("MTD device name:            \"%s\"", ubi->mtd->name);
+	ubi_msg("MTD device size:            %llu MiB", ubi->flash_size >> 20);
+	ubi_msg("physical eraseblock size:   %d bytes (%d KiB)",
+		ubi->peb_size, ubi->peb_size >> 10);
+	ubi_msg("logical eraseblock size:    %d bytes", ubi->leb_size);
+	ubi_msg("number of good PEBs:        %d", ubi->good_peb_count);
+	ubi_msg("number of bad PEBs:         %d", ubi->bad_peb_count);
+	ubi_msg("smallest flash I/O unit:    %d", ubi->min_io_size);
+	ubi_msg("VID header offset:          %d (aligned %d)",
+		ubi->vid_hdr_offset, ubi->vid_hdr_aloffset);
+	ubi_msg("data offset:                %d", ubi->leb_start);
+	ubi_msg("max. allowed volumes:       %d", ubi->vtbl_slots);
+	ubi_msg("wear-leveling threshold:    %d", CONFIG_MTD_UBI_WL_THRESHOLD);
+	ubi_msg("number of internal volumes: %d", UBI_INT_VOL_COUNT);
+	ubi_msg("number of user volumes:     %d",
+		ubi->vol_count - UBI_INT_VOL_COUNT);
+	ubi_msg("available PEBs:             %d", ubi->avail_pebs);
+	ubi_msg("total number of reserved PEBs: %d", ubi->rsvd_pebs);
+	ubi_msg("number of PEBs reserved for bad PEB handling: %d",
+		ubi->beb_rsvd_pebs);
+	ubi_msg("max/mean erase counter: %d/%d", ubi->max_ec, ubi->mean_ec);
+
+	/* Enable the background thread */
+	if (!DBG_DISABLE_BGT) {
+		ubi->thread_enabled = 1;
+		wake_up_process(ubi->bgt_thread);
+	}
+
+	return 0;
+
+out_detach:
+	ubi_eba_close(ubi);
+	ubi_wl_close(ubi);
+	kfree(ubi->vtbl);
+out_free:
+	kfree(ubi);
+out_mtd:
+	put_mtd_device(mtd);
+	ubi_devices[ubi_devices_cnt] = NULL;
+	return err;
+}
+
+/**
+ * detach_mtd_dev - detach an MTD device.
+ * @ubi: UBI device description object
+ */
+static void detach_mtd_dev(struct ubi_device *ubi)
+{
+	int ubi_num = ubi->ubi_num, mtd_num = ubi->mtd->index;
+
+	dbg_msg("detaching mtd%d from ubi%d", ubi->mtd->index, ubi_num);
+	uif_close(ubi);
+	ubi_eba_close(ubi);
+	ubi_wl_close(ubi);
+	kfree(ubi->vtbl);
+	put_mtd_device(ubi->mtd);
+	kfree(ubi_devices[ubi_num]);
+	ubi_devices[ubi_num] = NULL;
+	ubi_devices_cnt -= 1;
+	ubi_assert(ubi_devices_cnt >= 0);
+	ubi_msg("mtd%d is detached from ubi%d", mtd_num, ubi_num);
+}
+
+static int __init ubi_init(void)
+{
+	int err, i, k;
+
+	/* Ensure that EC and VID headers have correct size */
+	BUILD_BUG_ON(sizeof(struct ubi_ec_hdr) != 64);
+	BUILD_BUG_ON(sizeof(struct ubi_vid_hdr) != 64);
+
+	if (mtd_devs > UBI_MAX_DEVICES) {
+		printk("UBI error: too many MTD devices, maximum is %d\n",
+		       UBI_MAX_DEVICES);
+		return -EINVAL;
+	}
+
+	ubi_class = class_create(THIS_MODULE, UBI_NAME_STR);
+	if (IS_ERR(ubi_class))
+		return PTR_ERR(ubi_class);
+
+	err = class_create_file(ubi_class, &ubi_version);
+	if (err)
+		goto out_class;
+
+	/* Attach MTD devices */
+	for (i = 0; i < mtd_devs; i++) {
+		struct mtd_dev_param *p = &mtd_dev_param[i];
+
+		cond_resched();
+
+		if (!p->name) {
+			dbg_err("empty name");
+			err = -EINVAL;
+			goto out_detach;
+		}
+
+		err = attach_mtd_dev(p->name, p->vid_hdr_offs, p->data_offs);
+		if (err)
+			goto out_detach;
+	}
+
+	return 0;
+
+out_detach:
+	for (k = 0; k < i; k++)
+		detach_mtd_dev(ubi_devices[k]);
+	class_remove_file(ubi_class, &ubi_version);
+out_class:
+	class_destroy(ubi_class);
+	return err;
+}
+module_init(ubi_init);
+
+static void __exit ubi_exit(void)
+{
+	int i, n = ubi_devices_cnt;
+
+	for (i = 0; i < n; i++)
+		detach_mtd_dev(ubi_devices[i]);
+	class_remove_file(ubi_class, &ubi_version);
+	class_destroy(ubi_class);
+}
+module_exit(ubi_exit);
+
+/**
+ * bytes_str_to_int - convert a string representing number of bytes to an
+ * integer.
+ * @str: the string to convert
+ *
+ * This function returns positive resulting integer in case of success and a
+ * negative error code in case of failure.
+ */
+static int __init bytes_str_to_int(const char *str)
+{
+	char *endp;
+	unsigned long result;
+
+	result = simple_strtoul(str, &endp, 0);
+	if (str == endp || result < 0) {
+		printk("UBI error: incorrect bytes count: \"%s\"\n", str);
+		return -EINVAL;
+	}
+
+	switch (*endp) {
+	case 'G':
+		result *= 1024;
+	case 'M':
+		result *= 1024;
+	case 'K':
+	case 'k':
+		result *= 1024;
+		if (endp[1] == 'i' && (endp[2] == '\0' ||
+			  endp[2] == 'B'  || endp[2] == 'b'))
+			endp += 2;
+	case '\0':
+		break;
+	default:
+		printk("UBI error: incorrect bytes count: \"%s\"\n", str);
+		return -EINVAL;
+	}
+
+	return result;
+}
+
+/**
+ * ubi_mtd_param_parse - parse the 'mtd=' UBI parameter.
+ * @val: the parameter value to parse
+ * @kp: not used
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of error.
+ */
+static int __init ubi_mtd_param_parse(const char *val, struct kernel_param *kp)
+{
+	int i, len;
+	struct mtd_dev_param *p;
+	char buf[MTD_PARAM_LEN_MAX];
+	char *pbuf = &buf[0];
+	char *tokens[3] = {NULL, NULL, NULL};
+
+	if (mtd_devs == UBI_MAX_DEVICES) {
+		printk("UBI error: too many parameters, max. is %d\n",
+		       UBI_MAX_DEVICES);
+		return -EINVAL;
+	}
+
+	len = strnlen(val, MTD_PARAM_LEN_MAX);
+	if (len == MTD_PARAM_LEN_MAX) {
+		printk("UBI error: parameter \"%s\" is too long, max. is %d\n",
+		       val, MTD_PARAM_LEN_MAX);
+		return -EINVAL;
+	}
+
+	if (len == 0) {
+		printk("UBI warning: empty 'mtd=' parameter - ignored\n");
+		return 0;
+	}
+
+	strcpy(buf, val);
+
+	/* Get rid of the final newline */
+	if (buf[len - 1] == '\n')
+		buf[len - 1] = 0;
+
+	for (i = 0; i < 3; i++)
+		tokens[i] = strsep(&pbuf, ",");
+
+	if (pbuf) {
+		printk("UBI error: too many arguments at \"%s\"\n", val);
+		return -EINVAL;
+	}
+
+	if (tokens[0] == '\0')
+		return -EINVAL;
+
+	p = &mtd_dev_param[mtd_devs];
+	strcpy(&p->name[0], tokens[0]);
+
+	if (tokens[1])
+		p->vid_hdr_offs = bytes_str_to_int(tokens[1]);
+	if (tokens[2])
+		p->data_offs = bytes_str_to_int(tokens[2]);
+
+	if (p->vid_hdr_offs < 0)
+		return p->vid_hdr_offs;
+	if (p->data_offs < 0)
+		return p->data_offs;
+
+	mtd_devs += 1;
+	return 0;
+}
+
+module_param_call(mtd, ubi_mtd_param_parse, NULL, NULL, 000);
+MODULE_PARM_DESC(mtd, "MTD devices to attach. Parameter format: "
+		      "mtd=<name|num>[,<vid_hdr_offs>,<data_offs>]. "
+		      "Multiple \"mtd\" parameters may be specified.\n"
+		      "MTD devices may be specified by their number or name. "
+		      "Optional \"vid_hdr_offs\" and \"data_offs\" parameters "
+		      "specify UBI VID header position and data starting "
+		      "position to be used by UBI.\n"
+		      "Example: mtd=content,1984,2048 mtd=4 - attach MTD device"
+		      "with name content using VID header offset 1984 and data "
+		      "start 2048, and MTD device number 4 using default "
+		      "offsets");
+
+MODULE_VERSION(__stringify(UBI_VERSION));
+MODULE_DESCRIPTION("UBI - Unsorted Block Images");
+MODULE_AUTHOR("Artem Bityutskiy");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
new file mode 100644
index 00000000000..6612eb79bf1
--- /dev/null
+++ b/drivers/mtd/ubi/cdev.c
@@ -0,0 +1,722 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * This file includes implementation of UBI character device operations.
+ *
+ * There are two kinds of character devices in UBI: UBI character devices and
+ * UBI volume character devices. UBI character devices allow users to
+ * manipulate whole volumes: create, remove, and re-size them. Volume character
+ * devices provide volume I/O capabilities.
+ *
+ * Major and minor numbers are assigned dynamically to both UBI and volume
+ * character devices.
+ */
+
+#include <linux/module.h>
+#include <linux/stat.h>
+#include <linux/ioctl.h>
+#include <linux/capability.h>
+#include <mtd/ubi-user.h>
+#include <asm/uaccess.h>
+#include <asm/div64.h>
+#include "ubi.h"
+
+/*
+ * Maximum sequence numbers of UBI and volume character device IOCTLs (direct
+ * logical eraseblock erase is a debug-only feature).
+ */
+#define UBI_CDEV_IOC_MAX_SEQ 2
+#ifndef CONFIG_MTD_UBI_DEBUG_USERSPACE_IO
+#define VOL_CDEV_IOC_MAX_SEQ 1
+#else
+#define VOL_CDEV_IOC_MAX_SEQ 2
+#endif
+
+/**
+ * major_to_device - get UBI device object by character device major number.
+ * @major: major number
+ *
+ * This function returns a pointer to the UBI device object.
+ */
+static struct ubi_device *major_to_device(int major)
+{
+	int i;
+
+	for (i = 0; i < ubi_devices_cnt; i++)
+		if (ubi_devices[i] && ubi_devices[i]->major == major)
+			return ubi_devices[i];
+	BUG();
+}
+
+/**
+ * get_exclusive - get exclusive access to an UBI volume.
+ * @desc: volume descriptor
+ *
+ * This function changes UBI volume open mode to "exclusive". Returns previous
+ * mode value (positive integer) in case of success and a negative error code
+ * in case of failure.
+ */
+static int get_exclusive(struct ubi_volume_desc *desc)
+{
+	int users, err;
+	struct ubi_volume *vol = desc->vol;
+
+	spin_lock(&vol->ubi->volumes_lock);
+	users = vol->readers + vol->writers + vol->exclusive;
+	ubi_assert(users > 0);
+	if (users > 1) {
+		dbg_err("%d users for volume %d", users, vol->vol_id);
+		err = -EBUSY;
+	} else {
+		vol->readers = vol->writers = 0;
+		vol->exclusive = 1;
+		err = desc->mode;
+		desc->mode = UBI_EXCLUSIVE;
+	}
+	spin_unlock(&vol->ubi->volumes_lock);
+
+	return err;
+}
+
+/**
+ * revoke_exclusive - revoke exclusive mode.
+ * @desc: volume descriptor
+ * @mode: new mode to switch to
+ */
+static void revoke_exclusive(struct ubi_volume_desc *desc, int mode)
+{
+	struct ubi_volume *vol = desc->vol;
+
+	spin_lock(&vol->ubi->volumes_lock);
+	ubi_assert(vol->readers == 0 && vol->writers == 0);
+	ubi_assert(vol->exclusive == 1 && desc->mode == UBI_EXCLUSIVE);
+	vol->exclusive = 0;
+	if (mode == UBI_READONLY)
+		vol->readers = 1;
+	else if (mode == UBI_READWRITE)
+		vol->writers = 1;
+	else
+		vol->exclusive = 1;
+	spin_unlock(&vol->ubi->volumes_lock);
+
+	desc->mode = mode;
+}
+
+static int vol_cdev_open(struct inode *inode, struct file *file)
+{
+	struct ubi_volume_desc *desc;
+	const struct ubi_device *ubi = major_to_device(imajor(inode));
+	int vol_id = iminor(inode) - 1;
+	int mode;
+
+	if (file->f_mode & FMODE_WRITE)
+		mode = UBI_READWRITE;
+	else
+		mode = UBI_READONLY;
+
+	dbg_msg("open volume %d, mode %d", vol_id, mode);
+
+	desc = ubi_open_volume(ubi->ubi_num, vol_id, mode);
+	if (IS_ERR(desc))
+		return PTR_ERR(desc);
+
+	file->private_data = desc;
+	return 0;
+}
+
+static int vol_cdev_release(struct inode *inode, struct file *file)
+{
+	struct ubi_volume_desc *desc = file->private_data;
+	struct ubi_volume *vol = desc->vol;
+
+	dbg_msg("release volume %d, mode %d", vol->vol_id, desc->mode);
+
+	if (vol->updating) {
+		ubi_warn("update of volume %d not finished, volume is damaged",
+			 vol->vol_id);
+		vol->updating = 0;
+		kfree(vol->upd_buf);
+	}
+
+	ubi_close_volume(desc);
+	return 0;
+}
+
+static loff_t vol_cdev_llseek(struct file *file, loff_t offset, int origin)
+{
+	struct ubi_volume_desc *desc = file->private_data;
+	struct ubi_volume *vol = desc->vol;
+	loff_t new_offset;
+
+	if (vol->updating) {
+		 /* Update is in progress, seeking is prohibited */
+		dbg_err("updating");
+		return -EBUSY;
+	}
+
+	switch (origin) {
+	case 0: /* SEEK_SET */
+		new_offset = offset;
+		break;
+	case 1: /* SEEK_CUR */
+		new_offset = file->f_pos + offset;
+		break;
+	case 2: /* SEEK_END */
+		new_offset = vol->used_bytes + offset;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (new_offset < 0 || new_offset > vol->used_bytes) {
+		dbg_err("bad seek %lld", new_offset);
+		return -EINVAL;
+	}
+
+	dbg_msg("seek volume %d, offset %lld, origin %d, new offset %lld",
+		vol->vol_id, offset, origin, new_offset);
+
+	file->f_pos = new_offset;
+	return new_offset;
+}
+
+static ssize_t vol_cdev_read(struct file *file, __user char *buf, size_t count,
+			     loff_t *offp)
+{
+	struct ubi_volume_desc *desc = file->private_data;
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+	int err, lnum, off, len,  vol_id = desc->vol->vol_id, tbuf_size;
+	size_t count_save = count;
+	void *tbuf;
+	uint64_t tmp;
+
+	dbg_msg("read %zd bytes from offset %lld of volume %d",
+		count, *offp, vol_id);
+
+	if (vol->updating) {
+		dbg_err("updating");
+		return -EBUSY;
+	}
+	if (vol->upd_marker) {
+		dbg_err("damaged volume, update marker is set");
+		return -EBADF;
+	}
+	if (*offp == vol->used_bytes || count == 0)
+		return 0;
+
+	if (vol->corrupted)
+		dbg_msg("read from corrupted volume %d", vol_id);
+
+	if (*offp + count > vol->used_bytes)
+		count_save = count = vol->used_bytes - *offp;
+
+	tbuf_size = vol->usable_leb_size;
+	if (count < tbuf_size)
+		tbuf_size = ALIGN(count, ubi->min_io_size);
+	tbuf = kmalloc(tbuf_size, GFP_KERNEL);
+	if (!tbuf)
+		return -ENOMEM;
+
+	len = count > tbuf_size ? tbuf_size : count;
+
+	tmp = *offp;
+	off = do_div(tmp, vol->usable_leb_size);
+	lnum = tmp;
+
+	do {
+		cond_resched();
+
+		if (off + len >= vol->usable_leb_size)
+			len = vol->usable_leb_size - off;
+
+		err = ubi_eba_read_leb(ubi, vol_id, lnum, tbuf, off, len, 0);
+		if (err)
+			break;
+
+		off += len;
+		if (off == vol->usable_leb_size) {
+			lnum += 1;
+			off -= vol->usable_leb_size;
+		}
+
+		count -= len;
+		*offp += len;
+
+		err = copy_to_user(buf, tbuf, len);
+		if (err) {
+			err = -EFAULT;
+			break;
+		}
+
+		buf += len;
+		len = count > tbuf_size ? tbuf_size : count;
+	} while (count);
+
+	kfree(tbuf);
+	return err ? err : count_save - count;
+}
+
+#ifdef CONFIG_MTD_UBI_DEBUG_USERSPACE_IO
+
+/*
+ * This function allows to directly write to dynamic UBI volumes, without
+ * issuing the volume update operation. Available only as a debugging feature.
+ * Very useful for testing UBI.
+ */
+static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf,
+				     size_t count, loff_t *offp)
+{
+	struct ubi_volume_desc *desc = file->private_data;
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+	int lnum, off, len, tbuf_size, vol_id = vol->vol_id, err = 0;
+	size_t count_save = count;
+	char *tbuf;
+	uint64_t tmp;
+
+	dbg_msg("requested: write %zd bytes to offset %lld of volume %u",
+		count, *offp, desc->vol->vol_id);
+
+	if (vol->vol_type == UBI_STATIC_VOLUME)
+		return -EROFS;
+
+	tmp = *offp;
+	off = do_div(tmp, vol->usable_leb_size);
+	lnum = tmp;
+
+	if (off % ubi->min_io_size) {
+		dbg_err("unaligned position");
+		return -EINVAL;
+	}
+
+	if (*offp + count > vol->used_bytes)
+		count_save = count = vol->used_bytes - *offp;
+
+	/* We can write only in fractions of the minimum I/O unit */
+	if (count % ubi->min_io_size) {
+		dbg_err("unaligned write length");
+		return -EINVAL;
+	}
+
+	tbuf_size = vol->usable_leb_size;
+	if (count < tbuf_size)
+		tbuf_size = ALIGN(count, ubi->min_io_size);
+	tbuf = kmalloc(tbuf_size, GFP_KERNEL);
+	if (!tbuf)
+		return -ENOMEM;
+
+	len = count > tbuf_size ? tbuf_size : count;
+
+	while (count) {
+		cond_resched();
+
+		if (off + len >= vol->usable_leb_size)
+			len = vol->usable_leb_size - off;
+
+		err = copy_from_user(tbuf, buf, len);
+		if (err) {
+			err = -EFAULT;
+			break;
+		}
+
+		err = ubi_eba_write_leb(ubi, vol_id, lnum, tbuf, off, len,
+					UBI_UNKNOWN);
+		if (err)
+			break;
+
+		off += len;
+		if (off == vol->usable_leb_size) {
+			lnum += 1;
+			off -= vol->usable_leb_size;
+		}
+
+		count -= len;
+		*offp += len;
+		buf += len;
+		len = count > tbuf_size ? tbuf_size : count;
+	}
+
+	kfree(tbuf);
+	return err ? err : count_save - count;
+}
+
+#else
+#define vol_cdev_direct_write(file, buf, count, offp) -EPERM
+#endif /* CONFIG_MTD_UBI_DEBUG_USERSPACE_IO */
+
+static ssize_t vol_cdev_write(struct file *file, const char __user *buf,
+			      size_t count, loff_t *offp)
+{
+	int err = 0;
+	struct ubi_volume_desc *desc = file->private_data;
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+
+	if (!vol->updating)
+		return vol_cdev_direct_write(file, buf, count, offp);
+
+	err = ubi_more_update_data(ubi, vol->vol_id, buf, count);
+	if (err < 0) {
+		ubi_err("cannot write %zd bytes of update data", count);
+		return err;
+	}
+
+	if (err) {
+		/*
+		 * Update is finished, @err contains number of actually written
+		 * bytes now.
+		 */
+		count = err;
+
+		err = ubi_check_volume(ubi, vol->vol_id);
+		if (err < 0)
+			return err;
+
+		if (err) {
+			ubi_warn("volume %d on UBI device %d is corrupted",
+				 vol->vol_id, ubi->ubi_num);
+			vol->corrupted = 1;
+		}
+		vol->checked = 1;
+		revoke_exclusive(desc, UBI_READWRITE);
+	}
+
+	*offp += count;
+	return count;
+}
+
+static int vol_cdev_ioctl(struct inode *inode, struct file *file,
+			  unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	struct ubi_volume_desc *desc = file->private_data;
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+	void __user *argp = (void __user *)arg;
+
+	if (_IOC_NR(cmd) > VOL_CDEV_IOC_MAX_SEQ ||
+	    _IOC_TYPE(cmd) != UBI_VOL_IOC_MAGIC)
+		return -ENOTTY;
+
+	if (_IOC_DIR(cmd) && _IOC_READ)
+		err = !access_ok(VERIFY_WRITE, argp, _IOC_SIZE(cmd));
+	else if (_IOC_DIR(cmd) && _IOC_WRITE)
+		err = !access_ok(VERIFY_READ, argp, _IOC_SIZE(cmd));
+	if (err)
+		return -EFAULT;
+
+	switch (cmd) {
+
+	/* Volume update command */
+	case UBI_IOCVOLUP:
+	{
+		int64_t bytes, rsvd_bytes;
+
+		if (!capable(CAP_SYS_RESOURCE)) {
+			err = -EPERM;
+			break;
+		}
+
+		err = copy_from_user(&bytes, argp, sizeof(int64_t));
+		if (err) {
+			err = -EFAULT;
+			break;
+		}
+
+		if (desc->mode == UBI_READONLY) {
+			err = -EROFS;
+			break;
+		}
+
+		rsvd_bytes = vol->reserved_pebs * (ubi->leb_size-vol->data_pad);
+		if (bytes < 0 || bytes > rsvd_bytes) {
+			err = -EINVAL;
+			break;
+		}
+
+		err = get_exclusive(desc);
+		if (err < 0)
+			break;
+
+		err = ubi_start_update(ubi, vol->vol_id, bytes);
+		if (bytes == 0)
+			revoke_exclusive(desc, UBI_READWRITE);
+
+		file->f_pos = 0;
+		break;
+	}
+
+#ifdef CONFIG_MTD_UBI_DEBUG_USERSPACE_IO
+	/* Logical eraseblock erasure command */
+	case UBI_IOCEBER:
+	{
+		int32_t lnum;
+
+		err = __get_user(lnum, (__user int32_t *)argp);
+		if (err) {
+			err = -EFAULT;
+			break;
+		}
+
+		if (desc->mode == UBI_READONLY) {
+			err = -EROFS;
+			break;
+		}
+
+		if (lnum < 0 || lnum >= vol->reserved_pebs) {
+			err = -EINVAL;
+			break;
+		}
+
+		if (vol->vol_type != UBI_DYNAMIC_VOLUME) {
+			err = -EROFS;
+			break;
+		}
+
+		dbg_msg("erase LEB %d:%d", vol->vol_id, lnum);
+		err = ubi_eba_unmap_leb(ubi, vol->vol_id, lnum);
+		if (err)
+			break;
+
+		err = ubi_wl_flush(ubi);
+		break;
+	}
+#endif
+
+	default:
+		err = -ENOTTY;
+		break;
+	}
+
+	return err;
+}
+
+/**
+ * verify_mkvol_req - verify volume creation request.
+ * @ubi: UBI device description object
+ * @req: the request to check
+ *
+ * This function zero if the request is correct, and %-EINVAL if not.
+ */
+static int verify_mkvol_req(const struct ubi_device *ubi,
+			    const struct ubi_mkvol_req *req)
+{
+	int n, err = -EINVAL;
+
+	if (req->bytes < 0 || req->alignment < 0 || req->vol_type < 0 ||
+	    req->name_len < 0)
+		goto bad;
+
+	if ((req->vol_id < 0 || req->vol_id >= ubi->vtbl_slots) &&
+	    req->vol_id != UBI_VOL_NUM_AUTO)
+		goto bad;
+
+	if (req->alignment == 0)
+		goto bad;
+
+	if (req->bytes == 0)
+		goto bad;
+
+	if (req->vol_type != UBI_DYNAMIC_VOLUME &&
+	    req->vol_type != UBI_STATIC_VOLUME)
+		goto bad;
+
+	if (req->alignment > ubi->leb_size)
+		goto bad;
+
+	n = req->alignment % ubi->min_io_size;
+	if (req->alignment != 1 && n)
+		goto bad;
+
+	if (req->name_len > UBI_VOL_NAME_MAX) {
+		err = -ENAMETOOLONG;
+		goto bad;
+	}
+
+	return 0;
+
+bad:
+	dbg_err("bad volume creation request");
+	ubi_dbg_dump_mkvol_req(req);
+	return err;
+}
+
+/**
+ * verify_rsvol_req - verify volume re-size request.
+ * @ubi: UBI device description object
+ * @req: the request to check
+ *
+ * This function returns zero if the request is correct, and %-EINVAL if not.
+ */
+static int verify_rsvol_req(const struct ubi_device *ubi,
+			    const struct ubi_rsvol_req *req)
+{
+	if (req->bytes <= 0)
+		return -EINVAL;
+
+	if (req->vol_id < 0 || req->vol_id >= ubi->vtbl_slots)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
+			  unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	struct ubi_device *ubi;
+	struct ubi_volume_desc *desc;
+	void __user *argp = (void __user *)arg;
+
+	if (_IOC_NR(cmd) > UBI_CDEV_IOC_MAX_SEQ ||
+	    _IOC_TYPE(cmd) != UBI_IOC_MAGIC)
+		return -ENOTTY;
+
+	if (_IOC_DIR(cmd) && _IOC_READ)
+		err = !access_ok(VERIFY_WRITE, argp, _IOC_SIZE(cmd));
+	else if (_IOC_DIR(cmd) && _IOC_WRITE)
+		err = !access_ok(VERIFY_READ, argp, _IOC_SIZE(cmd));
+	if (err)
+		return -EFAULT;
+
+	if (!capable(CAP_SYS_RESOURCE))
+		return -EPERM;
+
+	ubi = major_to_device(imajor(inode));
+	if (IS_ERR(ubi))
+		return PTR_ERR(ubi);
+
+	switch (cmd) {
+	/* Create volume command */
+	case UBI_IOCMKVOL:
+	{
+		struct ubi_mkvol_req req;
+
+		dbg_msg("create volume");
+		err = __copy_from_user(&req, argp,
+				       sizeof(struct ubi_mkvol_req));
+		if (err) {
+			err = -EFAULT;
+			break;
+		}
+
+		err = verify_mkvol_req(ubi, &req);
+		if (err)
+			break;
+
+		req.name[req.name_len] = '\0';
+
+		err = ubi_create_volume(ubi, &req);
+		if (err)
+			break;
+
+		err = __put_user(req.vol_id, (__user int32_t *)argp);
+		if (err)
+			err = -EFAULT;
+
+		break;
+	}
+
+	/* Remove volume command */
+	case UBI_IOCRMVOL:
+	{
+		int vol_id;
+
+		dbg_msg("remove volume");
+		err = __get_user(vol_id, (__user int32_t *)argp);
+		if (err) {
+			err = -EFAULT;
+			break;
+		}
+
+		desc = ubi_open_volume(ubi->ubi_num, vol_id, UBI_EXCLUSIVE);
+		if (IS_ERR(desc)) {
+			err = PTR_ERR(desc);
+			break;
+		}
+
+		err = ubi_remove_volume(desc);
+		if (err)
+			ubi_close_volume(desc);
+
+		break;
+	}
+
+	/* Re-size volume command */
+	case UBI_IOCRSVOL:
+	{
+		int pebs;
+		uint64_t tmp;
+		struct ubi_rsvol_req req;
+
+		dbg_msg("re-size volume");
+		err = __copy_from_user(&req, argp,
+				       sizeof(struct ubi_rsvol_req));
+		if (err) {
+			err = -EFAULT;
+			break;
+		}
+
+		err = verify_rsvol_req(ubi, &req);
+		if (err)
+			break;
+
+		desc = ubi_open_volume(ubi->ubi_num, req.vol_id, UBI_EXCLUSIVE);
+		if (IS_ERR(desc)) {
+			err = PTR_ERR(desc);
+			break;
+		}
+
+		tmp = req.bytes;
+		pebs = !!do_div(tmp, desc->vol->usable_leb_size);
+		pebs += tmp;
+
+		err = ubi_resize_volume(desc, pebs);
+		ubi_close_volume(desc);
+		break;
+	}
+
+	default:
+		err = -ENOTTY;
+		break;
+	}
+
+	return err;
+}
+
+/* UBI character device operations */
+struct file_operations ubi_cdev_operations = {
+	.owner = THIS_MODULE,
+	.ioctl = ubi_cdev_ioctl,
+	.llseek = no_llseek
+};
+
+/* UBI volume character device operations */
+struct file_operations ubi_vol_cdev_operations = {
+	.owner   = THIS_MODULE,
+	.open    = vol_cdev_open,
+	.release = vol_cdev_release,
+	.llseek  = vol_cdev_llseek,
+	.read    = vol_cdev_read,
+	.write   = vol_cdev_write,
+	.ioctl   = vol_cdev_ioctl
+};
diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c
new file mode 100644
index 00000000000..86364221faf
--- /dev/null
+++ b/drivers/mtd/ubi/debug.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * Here we keep all the UBI debugging stuff which should normally be disabled
+ * and compiled-out, but it is extremely helpful when hunting bugs or doing big
+ * changes.
+ */
+
+#ifdef CONFIG_MTD_UBI_DEBUG_MSG
+
+#include "ubi.h"
+
+/**
+ * ubi_dbg_dump_ec_hdr - dump an erase counter header.
+ * @ec_hdr: the erase counter header to dump
+ */
+void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr)
+{
+	dbg_msg("erase counter header dump:");
+	dbg_msg("magic          %#08x", ubi32_to_cpu(ec_hdr->magic));
+	dbg_msg("version        %d",    (int)ec_hdr->version);
+	dbg_msg("ec             %llu",  (long long)ubi64_to_cpu(ec_hdr->ec));
+	dbg_msg("vid_hdr_offset %d",    ubi32_to_cpu(ec_hdr->vid_hdr_offset));
+	dbg_msg("data_offset    %d",    ubi32_to_cpu(ec_hdr->data_offset));
+	dbg_msg("hdr_crc        %#08x", ubi32_to_cpu(ec_hdr->hdr_crc));
+	dbg_msg("erase counter header hexdump:");
+	ubi_dbg_hexdump(ec_hdr, UBI_EC_HDR_SIZE);
+}
+
+/**
+ * ubi_dbg_dump_vid_hdr - dump a volume identifier header.
+ * @vid_hdr: the volume identifier header to dump
+ */
+void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr)
+{
+	dbg_msg("volume identifier header dump:");
+	dbg_msg("magic     %08x", ubi32_to_cpu(vid_hdr->magic));
+	dbg_msg("version   %d",   (int)vid_hdr->version);
+	dbg_msg("vol_type  %d",   (int)vid_hdr->vol_type);
+	dbg_msg("copy_flag %d",   (int)vid_hdr->copy_flag);
+	dbg_msg("compat    %d",   (int)vid_hdr->compat);
+	dbg_msg("vol_id    %d",   ubi32_to_cpu(vid_hdr->vol_id));
+	dbg_msg("lnum      %d",   ubi32_to_cpu(vid_hdr->lnum));
+	dbg_msg("leb_ver   %u",   ubi32_to_cpu(vid_hdr->leb_ver));
+	dbg_msg("data_size %d",   ubi32_to_cpu(vid_hdr->data_size));
+	dbg_msg("used_ebs  %d",   ubi32_to_cpu(vid_hdr->used_ebs));
+	dbg_msg("data_pad  %d",   ubi32_to_cpu(vid_hdr->data_pad));
+	dbg_msg("sqnum     %llu",
+		(unsigned long long)ubi64_to_cpu(vid_hdr->sqnum));
+	dbg_msg("hdr_crc   %08x", ubi32_to_cpu(vid_hdr->hdr_crc));
+	dbg_msg("volume identifier header hexdump:");
+}
+
+/**
+ * ubi_dbg_dump_vol_info- dump volume information.
+ * @vol: UBI volume description object
+ */
+void ubi_dbg_dump_vol_info(const struct ubi_volume *vol)
+{
+	dbg_msg("volume information dump:");
+	dbg_msg("vol_id          %d", vol->vol_id);
+	dbg_msg("reserved_pebs   %d", vol->reserved_pebs);
+	dbg_msg("alignment       %d", vol->alignment);
+	dbg_msg("data_pad        %d", vol->data_pad);
+	dbg_msg("vol_type        %d", vol->vol_type);
+	dbg_msg("name_len        %d", vol->name_len);
+	dbg_msg("usable_leb_size %d", vol->usable_leb_size);
+	dbg_msg("used_ebs        %d", vol->used_ebs);
+	dbg_msg("used_bytes      %lld", vol->used_bytes);
+	dbg_msg("last_eb_bytes   %d", vol->last_eb_bytes);
+	dbg_msg("corrupted       %d", vol->corrupted);
+	dbg_msg("upd_marker      %d", vol->upd_marker);
+
+	if (vol->name_len <= UBI_VOL_NAME_MAX &&
+	    strnlen(vol->name, vol->name_len + 1) == vol->name_len) {
+		dbg_msg("name          %s", vol->name);
+	} else {
+		dbg_msg("the 1st 5 characters of the name: %c%c%c%c%c",
+			vol->name[0], vol->name[1], vol->name[2],
+			vol->name[3], vol->name[4]);
+	}
+}
+
+/**
+ * ubi_dbg_dump_vtbl_record - dump a &struct ubi_vtbl_record object.
+ * @r: the object to dump
+ * @idx: volume table index
+ */
+void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx)
+{
+	int name_len = ubi16_to_cpu(r->name_len);
+
+	dbg_msg("volume table record %d dump:", idx);
+	dbg_msg("reserved_pebs   %d", ubi32_to_cpu(r->reserved_pebs));
+	dbg_msg("alignment       %d", ubi32_to_cpu(r->alignment));
+	dbg_msg("data_pad        %d", ubi32_to_cpu(r->data_pad));
+	dbg_msg("vol_type        %d", (int)r->vol_type);
+	dbg_msg("upd_marker      %d", (int)r->upd_marker);
+	dbg_msg("name_len        %d", name_len);
+
+	if (r->name[0] == '\0') {
+		dbg_msg("name          NULL");
+		return;
+	}
+
+	if (name_len <= UBI_VOL_NAME_MAX &&
+	    strnlen(&r->name[0], name_len + 1) == name_len) {
+		dbg_msg("name          %s", &r->name[0]);
+	} else {
+		dbg_msg("1st 5 characters of the name: %c%c%c%c%c",
+			r->name[0], r->name[1], r->name[2], r->name[3],
+			r->name[4]);
+	}
+	dbg_msg("crc             %#08x", ubi32_to_cpu(r->crc));
+}
+
+/**
+ * ubi_dbg_dump_sv - dump a &struct ubi_scan_volume object.
+ * @sv: the object to dump
+ */
+void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv)
+{
+	dbg_msg("volume scanning information dump:");
+	dbg_msg("vol_id         %d", sv->vol_id);
+	dbg_msg("highest_lnum   %d", sv->highest_lnum);
+	dbg_msg("leb_count      %d", sv->leb_count);
+	dbg_msg("compat         %d", sv->compat);
+	dbg_msg("vol_type       %d", sv->vol_type);
+	dbg_msg("used_ebs       %d", sv->used_ebs);
+	dbg_msg("last_data_size %d", sv->last_data_size);
+	dbg_msg("data_pad       %d", sv->data_pad);
+}
+
+/**
+ * ubi_dbg_dump_seb - dump a &struct ubi_scan_leb object.
+ * @seb: the object to dump
+ * @type: object type: 0 - not corrupted, 1 - corrupted
+ */
+void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type)
+{
+	dbg_msg("eraseblock scanning information dump:");
+	dbg_msg("ec       %d", seb->ec);
+	dbg_msg("pnum     %d", seb->pnum);
+	if (type == 0) {
+		dbg_msg("lnum     %d", seb->lnum);
+		dbg_msg("scrub    %d", seb->scrub);
+		dbg_msg("sqnum    %llu", seb->sqnum);
+		dbg_msg("leb_ver  %u", seb->leb_ver);
+	}
+}
+
+/**
+ * ubi_dbg_dump_mkvol_req - dump a &struct ubi_mkvol_req object.
+ * @req: the object to dump
+ */
+void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req)
+{
+	char nm[17];
+
+	dbg_msg("volume creation request dump:");
+	dbg_msg("vol_id    %d",   req->vol_id);
+	dbg_msg("alignment %d",   req->alignment);
+	dbg_msg("bytes     %lld", (long long)req->bytes);
+	dbg_msg("vol_type  %d",   req->vol_type);
+	dbg_msg("name_len  %d",   req->name_len);
+
+	memcpy(nm, req->name, 16);
+	nm[16] = 0;
+	dbg_msg("the 1st 16 characters of the name: %s", nm);
+}
+
+#define BYTES_PER_LINE 32
+
+/**
+ * ubi_dbg_hexdump - dump a buffer.
+ * @ptr: the buffer to dump
+ * @size: buffer size which must be multiple of 4 bytes
+ */
+void ubi_dbg_hexdump(const void *ptr, int size)
+{
+	int i, k = 0, rows, columns;
+	const uint8_t *p = ptr;
+
+	size = ALIGN(size, 4);
+	rows = size/BYTES_PER_LINE + size % BYTES_PER_LINE;
+	for (i = 0; i < rows; i++) {
+		int j;
+
+		cond_resched();
+		columns = min(size - k, BYTES_PER_LINE) / 4;
+		if (columns == 0)
+			break;
+		printk(KERN_DEBUG "%5d:  ", i * BYTES_PER_LINE);
+		for (j = 0; j < columns; j++) {
+			int n, N;
+
+			N = size - k > 4 ? 4 : size - k;
+			for (n = 0; n < N; n++)
+				printk("%02x", p[k++]);
+			printk(" ");
+		}
+		printk("\n");
+	}
+}
+
+#endif /* CONFIG_MTD_UBI_DEBUG_MSG */
diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h
new file mode 100644
index 00000000000..f816ad9a36c
--- /dev/null
+++ b/drivers/mtd/ubi/debug.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+#ifndef __UBI_DEBUG_H__
+#define __UBI_DEBUG_H__
+
+#ifdef CONFIG_MTD_UBI_DEBUG
+#include <linux/random.h>
+
+#define ubi_assert(expr)  BUG_ON(!(expr))
+#define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__)
+#else
+#define ubi_assert(expr)  ({})
+#define dbg_err(fmt, ...) ({})
+#endif
+
+#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT
+#define DBG_DISABLE_BGT 1
+#else
+#define DBG_DISABLE_BGT 0
+#endif
+
+#ifdef CONFIG_MTD_UBI_DEBUG_MSG
+/* Generic debugging message */
+#define dbg_msg(fmt, ...) \
+	printk(KERN_DEBUG "UBI DBG: %s: " fmt "\n", __FUNCTION__, ##__VA_ARGS__)
+
+#define ubi_dbg_dump_stack() dump_stack()
+
+struct ubi_ec_hdr;
+struct ubi_vid_hdr;
+struct ubi_volume;
+struct ubi_vtbl_record;
+struct ubi_scan_volume;
+struct ubi_scan_leb;
+struct ubi_mkvol_req;
+
+void ubi_dbg_print(int type, const char *func, const char *fmt, ...);
+void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr);
+void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr);
+void ubi_dbg_dump_vol_info(const struct ubi_volume *vol);
+void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx);
+void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv);
+void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type);
+void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req);
+void ubi_dbg_hexdump(const void *buf, int size);
+
+#else
+
+#define dbg_msg(fmt, ...)    ({})
+#define ubi_dbg_dump_stack() ({})
+#define ubi_dbg_print(func, fmt, ...)    ({})
+#define ubi_dbg_dump_ec_hdr(ec_hdr)      ({})
+#define ubi_dbg_dump_vid_hdr(vid_hdr)    ({})
+#define ubi_dbg_dump_vol_info(vol)       ({})
+#define ubi_dbg_dump_vtbl_record(r, idx) ({})
+#define ubi_dbg_dump_sv(sv)              ({})
+#define ubi_dbg_dump_seb(seb, type)      ({})
+#define ubi_dbg_dump_mkvol_req(req)      ({})
+#define ubi_dbg_hexdump(buf, size)       ({})
+
+#endif /* CONFIG_MTD_UBI_DEBUG_MSG */
+
+#ifdef CONFIG_MTD_UBI_DEBUG_MSG_EBA
+/* Messages from the eraseblock association unit */
+#define dbg_eba(fmt, ...) \
+	printk(KERN_DEBUG "UBI DBG eba: %s: " fmt "\n", __FUNCTION__, \
+	       ##__VA_ARGS__)
+#else
+#define dbg_eba(fmt, ...) ({})
+#endif
+
+#ifdef CONFIG_MTD_UBI_DEBUG_MSG_WL
+/* Messages from the wear-leveling unit */
+#define dbg_wl(fmt, ...) \
+	printk(KERN_DEBUG "UBI DBG wl: %s: " fmt "\n", __FUNCTION__, \
+	       ##__VA_ARGS__)
+#else
+#define dbg_wl(fmt, ...) ({})
+#endif
+
+#ifdef CONFIG_MTD_UBI_DEBUG_MSG_IO
+/* Messages from the input/output unit */
+#define dbg_io(fmt, ...) \
+	printk(KERN_DEBUG "UBI DBG io: %s: " fmt "\n", __FUNCTION__, \
+	       ##__VA_ARGS__)
+#else
+#define dbg_io(fmt, ...) ({})
+#endif
+
+#ifdef CONFIG_MTD_UBI_DEBUG_MSG_BLD
+/* Initialization and build messages */
+#define dbg_bld(fmt, ...) \
+	printk(KERN_DEBUG "UBI DBG bld: %s: " fmt "\n", __FUNCTION__, \
+	       ##__VA_ARGS__)
+#else
+#define dbg_bld(fmt, ...) ({})
+#endif
+
+#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_BITFLIPS
+/**
+ * ubi_dbg_is_bitflip - if it is time to emulate a bit-flip.
+ *
+ * Returns non-zero if a bit-flip should be emulated, otherwise returns zero.
+ */
+static inline int ubi_dbg_is_bitflip(void)
+{
+	return !(random32() % 200);
+}
+#else
+#define ubi_dbg_is_bitflip() 0
+#endif
+
+#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_WRITE_FAILURES
+/**
+ * ubi_dbg_is_write_failure - if it is time to emulate a write failure.
+ *
+ * Returns non-zero if a write failure should be emulated, otherwise returns
+ * zero.
+ */
+static inline int ubi_dbg_is_write_failure(void)
+{
+	return !(random32() % 500);
+}
+#else
+#define ubi_dbg_is_write_failure() 0
+#endif
+
+#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_ERASE_FAILURES
+/**
+ * ubi_dbg_is_erase_failure - if its time to emulate an erase failure.
+ *
+ * Returns non-zero if an erase failure should be emulated, otherwise returns
+ * zero.
+ */
+static inline int ubi_dbg_is_erase_failure(void)
+{
+		return !(random32() % 400);
+}
+#else
+#define ubi_dbg_is_erase_failure() 0
+#endif
+
+#endif /* !__UBI_DEBUG_H__ */
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
new file mode 100644
index 00000000000..d847ee1da3d
--- /dev/null
+++ b/drivers/mtd/ubi/eba.c
@@ -0,0 +1,1241 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * The UBI Eraseblock Association (EBA) unit.
+ *
+ * This unit is responsible for I/O to/from logical eraseblock.
+ *
+ * Although in this implementation the EBA table is fully kept and managed in
+ * RAM, which assumes poor scalability, it might be (partially) maintained on
+ * flash in future implementations.
+ *
+ * The EBA unit implements per-logical eraseblock locking. Before accessing a
+ * logical eraseblock it is locked for reading or writing. The per-logical
+ * eraseblock locking is implemented by means of the lock tree. The lock tree
+ * is an RB-tree which refers all the currently locked logical eraseblocks. The
+ * lock tree elements are &struct ltree_entry objects. They are indexed by
+ * (@vol_id, @lnum) pairs.
+ *
+ * EBA also maintains the global sequence counter which is incremented each
+ * time a logical eraseblock is mapped to a physical eraseblock and it is
+ * stored in the volume identifier header. This means that each VID header has
+ * a unique sequence number. The sequence number is only increased an we assume
+ * 64 bits is enough to never overflow.
+ */
+
+#include <linux/slab.h>
+#include <linux/crc32.h>
+#include <linux/err.h>
+#include "ubi.h"
+
+/**
+ * struct ltree_entry - an entry in the lock tree.
+ * @rb: links RB-tree nodes
+ * @vol_id: volume ID of the locked logical eraseblock
+ * @lnum: locked logical eraseblock number
+ * @users: how many tasks are using this logical eraseblock or wait for it
+ * @mutex: read/write mutex to implement read/write access serialization to
+ * the (@vol_id, @lnum) logical eraseblock
+ *
+ * When a logical eraseblock is being locked - corresponding &struct ltree_entry
+ * object is inserted to the lock tree (@ubi->ltree).
+ */
+struct ltree_entry {
+	struct rb_node rb;
+	int vol_id;
+	int lnum;
+	int users;
+	struct rw_semaphore mutex;
+};
+
+/* Slab cache for lock-tree entries */
+static struct kmem_cache *ltree_slab;
+
+/**
+ * next_sqnum - get next sequence number.
+ * @ubi: UBI device description object
+ *
+ * This function returns next sequence number to use, which is just the current
+ * global sequence counter value. It also increases the global sequence
+ * counter.
+ */
+static unsigned long long next_sqnum(struct ubi_device *ubi)
+{
+	unsigned long long sqnum;
+
+	spin_lock(&ubi->ltree_lock);
+	sqnum = ubi->global_sqnum++;
+	spin_unlock(&ubi->ltree_lock);
+
+	return sqnum;
+}
+
+/**
+ * ubi_get_compat - get compatibility flags of a volume.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ *
+ * This function returns compatibility flags for an internal volume. User
+ * volumes have no compatibility flags, so %0 is returned.
+ */
+static int ubi_get_compat(const struct ubi_device *ubi, int vol_id)
+{
+	if (vol_id == UBI_LAYOUT_VOL_ID)
+		return UBI_LAYOUT_VOLUME_COMPAT;
+	return 0;
+}
+
+/**
+ * ltree_lookup - look up the lock tree.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ *
+ * This function returns a pointer to the corresponding &struct ltree_entry
+ * object if the logical eraseblock is locked and %NULL if it is not.
+ * @ubi->ltree_lock has to be locked.
+ */
+static struct ltree_entry *ltree_lookup(struct ubi_device *ubi, int vol_id,
+					int lnum)
+{
+	struct rb_node *p;
+
+	p = ubi->ltree.rb_node;
+	while (p) {
+		struct ltree_entry *le;
+
+		le = rb_entry(p, struct ltree_entry, rb);
+
+		if (vol_id < le->vol_id)
+			p = p->rb_left;
+		else if (vol_id > le->vol_id)
+			p = p->rb_right;
+		else {
+			if (lnum < le->lnum)
+				p = p->rb_left;
+			else if (lnum > le->lnum)
+				p = p->rb_right;
+			else
+				return le;
+		}
+	}
+
+	return NULL;
+}
+
+/**
+ * ltree_add_entry - add new entry to the lock tree.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ *
+ * This function adds new entry for logical eraseblock (@vol_id, @lnum) to the
+ * lock tree. If such entry is already there, its usage counter is increased.
+ * Returns pointer to the lock tree entry or %-ENOMEM if memory allocation
+ * failed.
+ */
+static struct ltree_entry *ltree_add_entry(struct ubi_device *ubi, int vol_id,
+					   int lnum)
+{
+	struct ltree_entry *le, *le1, *le_free;
+
+	le = kmem_cache_alloc(ltree_slab, GFP_KERNEL);
+	if (!le)
+		return ERR_PTR(-ENOMEM);
+
+	le->vol_id = vol_id;
+	le->lnum = lnum;
+
+	spin_lock(&ubi->ltree_lock);
+	le1 = ltree_lookup(ubi, vol_id, lnum);
+
+	if (le1) {
+		/*
+		 * This logical eraseblock is already locked. The newly
+		 * allocated lock entry is not needed.
+		 */
+		le_free = le;
+		le = le1;
+	} else {
+		struct rb_node **p, *parent = NULL;
+
+		/*
+		 * No lock entry, add the newly allocated one to the
+		 * @ubi->ltree RB-tree.
+		 */
+		le_free = NULL;
+
+		p = &ubi->ltree.rb_node;
+		while (*p) {
+			parent = *p;
+			le1 = rb_entry(parent, struct ltree_entry, rb);
+
+			if (vol_id < le1->vol_id)
+				p = &(*p)->rb_left;
+			else if (vol_id > le1->vol_id)
+				p = &(*p)->rb_right;
+			else {
+				ubi_assert(lnum != le1->lnum);
+				if (lnum < le1->lnum)
+					p = &(*p)->rb_left;
+				else
+					p = &(*p)->rb_right;
+			}
+		}
+
+		rb_link_node(&le->rb, parent, p);
+		rb_insert_color(&le->rb, &ubi->ltree);
+	}
+	le->users += 1;
+	spin_unlock(&ubi->ltree_lock);
+
+	if (le_free)
+		kmem_cache_free(ltree_slab, le_free);
+
+	return le;
+}
+
+/**
+ * leb_read_lock - lock logical eraseblock for reading.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ *
+ * This function locks a logical eraseblock for reading. Returns zero in case
+ * of success and a negative error code in case of failure.
+ */
+static int leb_read_lock(struct ubi_device *ubi, int vol_id, int lnum)
+{
+	struct ltree_entry *le;
+
+	le = ltree_add_entry(ubi, vol_id, lnum);
+	if (IS_ERR(le))
+		return PTR_ERR(le);
+	down_read(&le->mutex);
+	return 0;
+}
+
+/**
+ * leb_read_unlock - unlock logical eraseblock.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ */
+static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum)
+{
+	int free = 0;
+	struct ltree_entry *le;
+
+	spin_lock(&ubi->ltree_lock);
+	le = ltree_lookup(ubi, vol_id, lnum);
+	le->users -= 1;
+	ubi_assert(le->users >= 0);
+	if (le->users == 0) {
+		rb_erase(&le->rb, &ubi->ltree);
+		free = 1;
+	}
+	spin_unlock(&ubi->ltree_lock);
+
+	up_read(&le->mutex);
+	if (free)
+		kmem_cache_free(ltree_slab, le);
+}
+
+/**
+ * leb_write_lock - lock logical eraseblock for writing.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ *
+ * This function locks a logical eraseblock for writing. Returns zero in case
+ * of success and a negative error code in case of failure.
+ */
+static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum)
+{
+	struct ltree_entry *le;
+
+	le = ltree_add_entry(ubi, vol_id, lnum);
+	if (IS_ERR(le))
+		return PTR_ERR(le);
+	down_write(&le->mutex);
+	return 0;
+}
+
+/**
+ * leb_write_unlock - unlock logical eraseblock.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ */
+static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum)
+{
+	int free;
+	struct ltree_entry *le;
+
+	spin_lock(&ubi->ltree_lock);
+	le = ltree_lookup(ubi, vol_id, lnum);
+	le->users -= 1;
+	ubi_assert(le->users >= 0);
+	if (le->users == 0) {
+		rb_erase(&le->rb, &ubi->ltree);
+		free = 1;
+	} else
+		free = 0;
+	spin_unlock(&ubi->ltree_lock);
+
+	up_write(&le->mutex);
+	if (free)
+		kmem_cache_free(ltree_slab, le);
+}
+
+/**
+ * ubi_eba_unmap_leb - un-map logical eraseblock.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ *
+ * This function un-maps logical eraseblock @lnum and schedules corresponding
+ * physical eraseblock for erasure. Returns zero in case of success and a
+ * negative error code in case of failure.
+ */
+int ubi_eba_unmap_leb(struct ubi_device *ubi, int vol_id, int lnum)
+{
+	int idx = vol_id2idx(ubi, vol_id), err, pnum;
+	struct ubi_volume *vol = ubi->volumes[idx];
+
+	if (ubi->ro_mode)
+		return -EROFS;
+
+	err = leb_write_lock(ubi, vol_id, lnum);
+	if (err)
+		return err;
+
+	pnum = vol->eba_tbl[lnum];
+	if (pnum < 0)
+		/* This logical eraseblock is already unmapped */
+		goto out_unlock;
+
+	dbg_eba("erase LEB %d:%d, PEB %d", vol_id, lnum, pnum);
+
+	vol->eba_tbl[lnum] = UBI_LEB_UNMAPPED;
+	err = ubi_wl_put_peb(ubi, pnum, 0);
+
+out_unlock:
+	leb_write_unlock(ubi, vol_id, lnum);
+	return err;
+}
+
+/**
+ * ubi_eba_read_leb - read data.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ * @buf: buffer to store the read data
+ * @offset: offset from where to read
+ * @len: how many bytes to read
+ * @check: data CRC check flag
+ *
+ * If the logical eraseblock @lnum is unmapped, @buf is filled with 0xFF
+ * bytes. The @check flag only makes sense for static volumes and forces
+ * eraseblock data CRC checking.
+ *
+ * In case of success this function returns zero. In case of a static volume,
+ * if data CRC mismatches - %-EBADMSG is returned. %-EBADMSG may also be
+ * returned for any volume type if an ECC error was detected by the MTD device
+ * driver. Other negative error cored may be returned in case of other errors.
+ */
+int ubi_eba_read_leb(struct ubi_device *ubi, int vol_id, int lnum, void *buf,
+		     int offset, int len, int check)
+{
+	int err, pnum, scrub = 0, idx = vol_id2idx(ubi, vol_id);
+	struct ubi_vid_hdr *vid_hdr;
+	struct ubi_volume *vol = ubi->volumes[idx];
+	uint32_t crc, crc1;
+
+	err = leb_read_lock(ubi, vol_id, lnum);
+	if (err)
+		return err;
+
+	pnum = vol->eba_tbl[lnum];
+	if (pnum < 0) {
+		/*
+		 * The logical eraseblock is not mapped, fill the whole buffer
+		 * with 0xFF bytes. The exception is static volumes for which
+		 * it is an error to read unmapped logical eraseblocks.
+		 */
+		dbg_eba("read %d bytes from offset %d of LEB %d:%d (unmapped)",
+			len, offset, vol_id, lnum);
+		leb_read_unlock(ubi, vol_id, lnum);
+		ubi_assert(vol->vol_type != UBI_STATIC_VOLUME);
+		memset(buf, 0xFF, len);
+		return 0;
+	}
+
+	dbg_eba("read %d bytes from offset %d of LEB %d:%d, PEB %d",
+		len, offset, vol_id, lnum, pnum);
+
+	if (vol->vol_type == UBI_DYNAMIC_VOLUME)
+		check = 0;
+
+retry:
+	if (check) {
+		vid_hdr = ubi_zalloc_vid_hdr(ubi);
+		if (!vid_hdr) {
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+
+		err = ubi_io_read_vid_hdr(ubi, pnum, vid_hdr, 1);
+		if (err && err != UBI_IO_BITFLIPS) {
+			if (err > 0) {
+				/*
+				 * The header is either absent or corrupted.
+				 * The former case means there is a bug -
+				 * switch to read-only mode just in case.
+				 * The latter case means a real corruption - we
+				 * may try to recover data. FIXME: but this is
+				 * not implemented.
+				 */
+				if (err == UBI_IO_BAD_VID_HDR) {
+					ubi_warn("bad VID header at PEB %d, LEB"
+						 "%d:%d", pnum, vol_id, lnum);
+					err = -EBADMSG;
+				} else
+					ubi_ro_mode(ubi);
+			}
+			goto out_free;
+		} else if (err == UBI_IO_BITFLIPS)
+			scrub = 1;
+
+		ubi_assert(lnum < ubi32_to_cpu(vid_hdr->used_ebs));
+		ubi_assert(len == ubi32_to_cpu(vid_hdr->data_size));
+
+		crc = ubi32_to_cpu(vid_hdr->data_crc);
+		ubi_free_vid_hdr(ubi, vid_hdr);
+	}
+
+	err = ubi_io_read_data(ubi, buf, pnum, offset, len);
+	if (err) {
+		if (err == UBI_IO_BITFLIPS) {
+			scrub = 1;
+			err = 0;
+		} else if (err == -EBADMSG) {
+			if (vol->vol_type == UBI_DYNAMIC_VOLUME)
+				goto out_unlock;
+			scrub = 1;
+			if (!check) {
+				ubi_msg("force data checking");
+				check = 1;
+				goto retry;
+			}
+		} else
+			goto out_unlock;
+	}
+
+	if (check) {
+		crc1 = crc32(UBI_CRC32_INIT, buf, len);
+		if (crc1 != crc) {
+			ubi_warn("CRC error: calculated %#08x, must be %#08x",
+				 crc1, crc);
+			err = -EBADMSG;
+			goto out_unlock;
+		}
+	}
+
+	if (scrub)
+		err = ubi_wl_scrub_peb(ubi, pnum);
+
+	leb_read_unlock(ubi, vol_id, lnum);
+	return err;
+
+out_free:
+	ubi_free_vid_hdr(ubi, vid_hdr);
+out_unlock:
+	leb_read_unlock(ubi, vol_id, lnum);
+	return err;
+}
+
+/**
+ * recover_peb - recover from write failure.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock to recover
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ * @buf: data which was not written because of the write failure
+ * @offset: offset of the failed write
+ * @len: how many bytes should have been written
+ *
+ * This function is called in case of a write failure and moves all good data
+ * from the potentially bad physical eraseblock to a good physical eraseblock.
+ * This function also writes the data which was not written due to the failure.
+ * Returns new physical eraseblock number in case of success, and a negative
+ * error code in case of failure.
+ */
+static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum,
+		       const void *buf, int offset, int len)
+{
+	int err, idx = vol_id2idx(ubi, vol_id), new_pnum, data_size, tries = 0;
+	struct ubi_volume *vol = ubi->volumes[idx];
+	struct ubi_vid_hdr *vid_hdr;
+	unsigned char *new_buf;
+
+	vid_hdr = ubi_zalloc_vid_hdr(ubi);
+	if (!vid_hdr) {
+		return -ENOMEM;
+	}
+
+retry:
+	new_pnum = ubi_wl_get_peb(ubi, UBI_UNKNOWN);
+	if (new_pnum < 0) {
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return new_pnum;
+	}
+
+	ubi_msg("recover PEB %d, move data to PEB %d", pnum, new_pnum);
+
+	err = ubi_io_read_vid_hdr(ubi, pnum, vid_hdr, 1);
+	if (err && err != UBI_IO_BITFLIPS) {
+		if (err > 0)
+			err = -EIO;
+		goto out_put;
+	}
+
+	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+	err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr);
+	if (err)
+		goto write_error;
+
+	data_size = offset + len;
+	new_buf = kmalloc(data_size, GFP_KERNEL);
+	if (!new_buf) {
+		err = -ENOMEM;
+		goto out_put;
+	}
+	memset(new_buf + offset, 0xFF, len);
+
+	/* Read everything before the area where the write failure happened */
+	if (offset > 0) {
+		err = ubi_io_read_data(ubi, new_buf, pnum, 0, offset);
+		if (err && err != UBI_IO_BITFLIPS) {
+			kfree(new_buf);
+			goto out_put;
+		}
+	}
+
+	memcpy(new_buf + offset, buf, len);
+
+	err = ubi_io_write_data(ubi, new_buf, new_pnum, 0, data_size);
+	if (err) {
+		kfree(new_buf);
+		goto write_error;
+	}
+
+	kfree(new_buf);
+	ubi_free_vid_hdr(ubi, vid_hdr);
+
+	vol->eba_tbl[lnum] = new_pnum;
+	ubi_wl_put_peb(ubi, pnum, 1);
+
+	ubi_msg("data was successfully recovered");
+	return 0;
+
+out_put:
+	ubi_wl_put_peb(ubi, new_pnum, 1);
+	ubi_free_vid_hdr(ubi, vid_hdr);
+	return err;
+
+write_error:
+	/*
+	 * Bad luck? This physical eraseblock is bad too? Crud. Let's try to
+	 * get another one.
+	 */
+	ubi_warn("failed to write to PEB %d", new_pnum);
+	ubi_wl_put_peb(ubi, new_pnum, 1);
+	if (++tries > UBI_IO_RETRIES) {
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return err;
+	}
+	ubi_msg("try again");
+	goto retry;
+}
+
+/**
+ * ubi_eba_write_leb - write data to dynamic volume.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ * @buf: the data to write
+ * @offset: offset within the logical eraseblock where to write
+ * @len: how many bytes to write
+ * @dtype: data type
+ *
+ * This function writes data to logical eraseblock @lnum of a dynamic volume
+ * @vol_id. Returns zero in case of success and a negative error code in case
+ * of failure. In case of error, it is possible that something was still
+ * written to the flash media, but may be some garbage.
+ */
+int ubi_eba_write_leb(struct ubi_device *ubi, int vol_id, int lnum,
+		      const void *buf, int offset, int len, int dtype)
+{
+	int idx = vol_id2idx(ubi, vol_id), err, pnum, tries = 0;
+	struct ubi_volume *vol = ubi->volumes[idx];
+	struct ubi_vid_hdr *vid_hdr;
+
+	if (ubi->ro_mode)
+		return -EROFS;
+
+	err = leb_write_lock(ubi, vol_id, lnum);
+	if (err)
+		return err;
+
+	pnum = vol->eba_tbl[lnum];
+	if (pnum >= 0) {
+		dbg_eba("write %d bytes at offset %d of LEB %d:%d, PEB %d",
+			len, offset, vol_id, lnum, pnum);
+
+		err = ubi_io_write_data(ubi, buf, pnum, offset, len);
+		if (err) {
+			ubi_warn("failed to write data to PEB %d", pnum);
+			if (err == -EIO && ubi->bad_allowed)
+				err = recover_peb(ubi, pnum, vol_id, lnum, buf, offset, len);
+			if (err)
+				ubi_ro_mode(ubi);
+		}
+		leb_write_unlock(ubi, vol_id, lnum);
+		return err;
+	}
+
+	/*
+	 * The logical eraseblock is not mapped. We have to get a free physical
+	 * eraseblock and write the volume identifier header there first.
+	 */
+	vid_hdr = ubi_zalloc_vid_hdr(ubi);
+	if (!vid_hdr) {
+		leb_write_unlock(ubi, vol_id, lnum);
+		return -ENOMEM;
+	}
+
+	vid_hdr->vol_type = UBI_VID_DYNAMIC;
+	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+	vid_hdr->vol_id = cpu_to_ubi32(vol_id);
+	vid_hdr->lnum = cpu_to_ubi32(lnum);
+	vid_hdr->compat = ubi_get_compat(ubi, vol_id);
+	vid_hdr->data_pad = cpu_to_ubi32(vol->data_pad);
+
+retry:
+	pnum = ubi_wl_get_peb(ubi, dtype);
+	if (pnum < 0) {
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		leb_write_unlock(ubi, vol_id, lnum);
+		return pnum;
+	}
+
+	dbg_eba("write VID hdr and %d bytes at offset %d of LEB %d:%d, PEB %d",
+		len, offset, vol_id, lnum, pnum);
+
+	err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr);
+	if (err) {
+		ubi_warn("failed to write VID header to LEB %d:%d, PEB %d",
+			 vol_id, lnum, pnum);
+		goto write_error;
+	}
+
+	err = ubi_io_write_data(ubi, buf, pnum, offset, len);
+	if (err) {
+		ubi_warn("failed to write %d bytes at offset %d of LEB %d:%d, "
+			 "PEB %d", len, offset, vol_id, lnum, pnum);
+		goto write_error;
+	}
+
+	vol->eba_tbl[lnum] = pnum;
+
+	leb_write_unlock(ubi, vol_id, lnum);
+	ubi_free_vid_hdr(ubi, vid_hdr);
+	return 0;
+
+write_error:
+	if (err != -EIO || !ubi->bad_allowed) {
+		ubi_ro_mode(ubi);
+		leb_write_unlock(ubi, vol_id, lnum);
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return err;
+	}
+
+	/*
+	 * Fortunately, this is the first write operation to this physical
+	 * eraseblock, so just put it and request a new one. We assume that if
+	 * this physical eraseblock went bad, the erase code will handle that.
+	 */
+	err = ubi_wl_put_peb(ubi, pnum, 1);
+	if (err || ++tries > UBI_IO_RETRIES) {
+		ubi_ro_mode(ubi);
+		leb_write_unlock(ubi, vol_id, lnum);
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return err;
+	}
+
+	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+	ubi_msg("try another PEB");
+	goto retry;
+}
+
+/**
+ * ubi_eba_write_leb_st - write data to static volume.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ * @buf: data to write
+ * @len: how many bytes to write
+ * @dtype: data type
+ * @used_ebs: how many logical eraseblocks will this volume contain
+ *
+ * This function writes data to logical eraseblock @lnum of static volume
+ * @vol_id. The @used_ebs argument should contain total number of logical
+ * eraseblock in this static volume.
+ *
+ * When writing to the last logical eraseblock, the @len argument doesn't have
+ * to be aligned to the minimal I/O unit size. Instead, it has to be equivalent
+ * to the real data size, although the @buf buffer has to contain the
+ * alignment. In all other cases, @len has to be aligned.
+ *
+ * It is prohibited to write more then once to logical eraseblocks of static
+ * volumes. This function returns zero in case of success and a negative error
+ * code in case of failure.
+ */
+int ubi_eba_write_leb_st(struct ubi_device *ubi, int vol_id, int lnum,
+			 const void *buf, int len, int dtype, int used_ebs)
+{
+	int err, pnum, tries = 0, data_size = len;
+	int idx = vol_id2idx(ubi, vol_id);
+	struct ubi_volume *vol = ubi->volumes[idx];
+	struct ubi_vid_hdr *vid_hdr;
+	uint32_t crc;
+
+	if (ubi->ro_mode)
+		return -EROFS;
+
+	if (lnum == used_ebs - 1)
+		/* If this is the last LEB @len may be unaligned */
+		len = ALIGN(data_size, ubi->min_io_size);
+	else
+		ubi_assert(len % ubi->min_io_size == 0);
+
+	vid_hdr = ubi_zalloc_vid_hdr(ubi);
+	if (!vid_hdr)
+		return -ENOMEM;
+
+	err = leb_write_lock(ubi, vol_id, lnum);
+	if (err) {
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return err;
+	}
+
+	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+	vid_hdr->vol_id = cpu_to_ubi32(vol_id);
+	vid_hdr->lnum = cpu_to_ubi32(lnum);
+	vid_hdr->compat = ubi_get_compat(ubi, vol_id);
+	vid_hdr->data_pad = cpu_to_ubi32(vol->data_pad);
+
+	crc = crc32(UBI_CRC32_INIT, buf, data_size);
+	vid_hdr->vol_type = UBI_VID_STATIC;
+	vid_hdr->data_size = cpu_to_ubi32(data_size);
+	vid_hdr->used_ebs = cpu_to_ubi32(used_ebs);
+	vid_hdr->data_crc = cpu_to_ubi32(crc);
+
+retry:
+	pnum = ubi_wl_get_peb(ubi, dtype);
+	if (pnum < 0) {
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		leb_write_unlock(ubi, vol_id, lnum);
+		return pnum;
+	}
+
+	dbg_eba("write VID hdr and %d bytes at LEB %d:%d, PEB %d, used_ebs %d",
+		len, vol_id, lnum, pnum, used_ebs);
+
+	err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr);
+	if (err) {
+		ubi_warn("failed to write VID header to LEB %d:%d, PEB %d",
+			 vol_id, lnum, pnum);
+		goto write_error;
+	}
+
+	err = ubi_io_write_data(ubi, buf, pnum, 0, len);
+	if (err) {
+		ubi_warn("failed to write %d bytes of data to PEB %d",
+			 len, pnum);
+		goto write_error;
+	}
+
+	ubi_assert(vol->eba_tbl[lnum] < 0);
+	vol->eba_tbl[lnum] = pnum;
+
+	leb_write_unlock(ubi, vol_id, lnum);
+	ubi_free_vid_hdr(ubi, vid_hdr);
+	return 0;
+
+write_error:
+	if (err != -EIO || !ubi->bad_allowed) {
+		/*
+		 * This flash device does not admit of bad eraseblocks or
+		 * something nasty and unexpected happened. Switch to read-only
+		 * mode just in case.
+		 */
+		ubi_ro_mode(ubi);
+		leb_write_unlock(ubi, vol_id, lnum);
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return err;
+	}
+
+	err = ubi_wl_put_peb(ubi, pnum, 1);
+	if (err || ++tries > UBI_IO_RETRIES) {
+		ubi_ro_mode(ubi);
+		leb_write_unlock(ubi, vol_id, lnum);
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return err;
+	}
+
+	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+	ubi_msg("try another PEB");
+	goto retry;
+}
+
+/*
+ * ubi_eba_atomic_leb_change - change logical eraseblock atomically.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ * @buf: data to write
+ * @len: how many bytes to write
+ * @dtype: data type
+ *
+ * This function changes the contents of a logical eraseblock atomically. @buf
+ * has to contain new logical eraseblock data, and @len - the length of the
+ * data, which has to be aligned. This function guarantees that in case of an
+ * unclean reboot the old contents is preserved. Returns zero in case of
+ * success and a negative error code in case of failure.
+ */
+int ubi_eba_atomic_leb_change(struct ubi_device *ubi, int vol_id, int lnum,
+			      const void *buf, int len, int dtype)
+{
+	int err, pnum, tries = 0, idx = vol_id2idx(ubi, vol_id);
+	struct ubi_volume *vol = ubi->volumes[idx];
+	struct ubi_vid_hdr *vid_hdr;
+	uint32_t crc;
+
+	if (ubi->ro_mode)
+		return -EROFS;
+
+	vid_hdr = ubi_zalloc_vid_hdr(ubi);
+	if (!vid_hdr)
+		return -ENOMEM;
+
+	err = leb_write_lock(ubi, vol_id, lnum);
+	if (err) {
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return err;
+	}
+
+	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+	vid_hdr->vol_id = cpu_to_ubi32(vol_id);
+	vid_hdr->lnum = cpu_to_ubi32(lnum);
+	vid_hdr->compat = ubi_get_compat(ubi, vol_id);
+	vid_hdr->data_pad = cpu_to_ubi32(vol->data_pad);
+
+	crc = crc32(UBI_CRC32_INIT, buf, len);
+	vid_hdr->vol_type = UBI_VID_STATIC;
+	vid_hdr->data_size = cpu_to_ubi32(len);
+	vid_hdr->copy_flag = 1;
+	vid_hdr->data_crc = cpu_to_ubi32(crc);
+
+retry:
+	pnum = ubi_wl_get_peb(ubi, dtype);
+	if (pnum < 0) {
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		leb_write_unlock(ubi, vol_id, lnum);
+		return pnum;
+	}
+
+	dbg_eba("change LEB %d:%d, PEB %d, write VID hdr to PEB %d",
+		vol_id, lnum, vol->eba_tbl[lnum], pnum);
+
+	err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr);
+	if (err) {
+		ubi_warn("failed to write VID header to LEB %d:%d, PEB %d",
+			 vol_id, lnum, pnum);
+		goto write_error;
+	}
+
+	err = ubi_io_write_data(ubi, buf, pnum, 0, len);
+	if (err) {
+		ubi_warn("failed to write %d bytes of data to PEB %d",
+			 len, pnum);
+		goto write_error;
+	}
+
+	err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 1);
+	if (err) {
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		leb_write_unlock(ubi, vol_id, lnum);
+		return err;
+	}
+
+	vol->eba_tbl[lnum] = pnum;
+	leb_write_unlock(ubi, vol_id, lnum);
+	ubi_free_vid_hdr(ubi, vid_hdr);
+	return 0;
+
+write_error:
+	if (err != -EIO || !ubi->bad_allowed) {
+		/*
+		 * This flash device does not admit of bad eraseblocks or
+		 * something nasty and unexpected happened. Switch to read-only
+		 * mode just in case.
+		 */
+		ubi_ro_mode(ubi);
+		leb_write_unlock(ubi, vol_id, lnum);
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return err;
+	}
+
+	err = ubi_wl_put_peb(ubi, pnum, 1);
+	if (err || ++tries > UBI_IO_RETRIES) {
+		ubi_ro_mode(ubi);
+		leb_write_unlock(ubi, vol_id, lnum);
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return err;
+	}
+
+	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+	ubi_msg("try another PEB");
+	goto retry;
+}
+
+/**
+ * ltree_entry_ctor - lock tree entries slab cache constructor.
+ * @obj: the lock-tree entry to construct
+ * @cache: the lock tree entry slab cache
+ * @flags: constructor flags
+ */
+static void ltree_entry_ctor(void *obj, struct kmem_cache *cache,
+			     unsigned long flags)
+{
+	struct ltree_entry *le = obj;
+
+	if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) !=
+	    SLAB_CTOR_CONSTRUCTOR)
+		return;
+
+	le->users = 0;
+	init_rwsem(&le->mutex);
+}
+
+/**
+ * ubi_eba_copy_leb - copy logical eraseblock.
+ * @ubi: UBI device description object
+ * @from: physical eraseblock number from where to copy
+ * @to: physical eraseblock number where to copy
+ * @vid_hdr: VID header of the @from physical eraseblock
+ *
+ * This function copies logical eraseblock from physical eraseblock @from to
+ * physical eraseblock @to. The @vid_hdr buffer may be changed by this
+ * function. Returns zero in case of success, %UBI_IO_BITFLIPS if the operation
+ * was canceled because bit-flips were detected at the target PEB, and a
+ * negative error code in case of failure.
+ */
+int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
+		     struct ubi_vid_hdr *vid_hdr)
+{
+	int err, vol_id, lnum, data_size, aldata_size, pnum, idx;
+	struct ubi_volume *vol;
+	uint32_t crc;
+	void *buf, *buf1 = NULL;
+
+	vol_id = ubi32_to_cpu(vid_hdr->vol_id);
+	lnum = ubi32_to_cpu(vid_hdr->lnum);
+
+	dbg_eba("copy LEB %d:%d, PEB %d to PEB %d", vol_id, lnum, from, to);
+
+	if (vid_hdr->vol_type == UBI_VID_STATIC) {
+		data_size = ubi32_to_cpu(vid_hdr->data_size);
+		aldata_size = ALIGN(data_size, ubi->min_io_size);
+	} else
+		data_size = aldata_size =
+			    ubi->leb_size - ubi32_to_cpu(vid_hdr->data_pad);
+
+	buf = kmalloc(aldata_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	/*
+	 * We do not want anybody to write to this logical eraseblock while we
+	 * are moving it, so we lock it.
+	 */
+	err = leb_write_lock(ubi, vol_id, lnum);
+	if (err) {
+		kfree(buf);
+		return err;
+	}
+
+	/*
+	 * But the logical eraseblock might have been put by this time.
+	 * Cancel if it is true.
+	 */
+	idx = vol_id2idx(ubi, vol_id);
+
+	/*
+	 * We may race with volume deletion/re-size, so we have to hold
+	 * @ubi->volumes_lock.
+	 */
+	spin_lock(&ubi->volumes_lock);
+	vol = ubi->volumes[idx];
+	if (!vol) {
+		dbg_eba("volume %d was removed meanwhile", vol_id);
+		spin_unlock(&ubi->volumes_lock);
+		goto out_unlock;
+	}
+
+	pnum = vol->eba_tbl[lnum];
+	if (pnum != from) {
+		dbg_eba("LEB %d:%d is no longer mapped to PEB %d, mapped to "
+			"PEB %d, cancel", vol_id, lnum, from, pnum);
+		spin_unlock(&ubi->volumes_lock);
+		goto out_unlock;
+	}
+	spin_unlock(&ubi->volumes_lock);
+
+	/* OK, now the LEB is locked and we can safely start moving it */
+
+	dbg_eba("read %d bytes of data", aldata_size);
+	err = ubi_io_read_data(ubi, buf, from, 0, aldata_size);
+	if (err && err != UBI_IO_BITFLIPS) {
+		ubi_warn("error %d while reading data from PEB %d",
+			 err, from);
+		goto out_unlock;
+	}
+
+	/*
+	 * Now we have got to calculate how much data we have to to copy. In
+	 * case of a static volume it is fairly easy - the VID header contains
+	 * the data size. In case of a dynamic volume it is more difficult - we
+	 * have to read the contents, cut 0xFF bytes from the end and copy only
+	 * the first part. We must do this to avoid writing 0xFF bytes as it
+	 * may have some side-effects. And not only this. It is important not
+	 * to include those 0xFFs to CRC because later the they may be filled
+	 * by data.
+	 */
+	if (vid_hdr->vol_type == UBI_VID_DYNAMIC)
+		aldata_size = data_size =
+				ubi_calc_data_len(ubi, buf, data_size);
+
+	cond_resched();
+	crc = crc32(UBI_CRC32_INIT, buf, data_size);
+	cond_resched();
+
+	/*
+	 * It may turn out to me that the whole @from physical eraseblock
+	 * contains only 0xFF bytes. Then we have to only write the VID header
+	 * and do not write any data. This also means we should not set
+	 * @vid_hdr->copy_flag, @vid_hdr->data_size, and @vid_hdr->data_crc.
+	 */
+	if (data_size > 0) {
+		vid_hdr->copy_flag = 1;
+		vid_hdr->data_size = cpu_to_ubi32(data_size);
+		vid_hdr->data_crc = cpu_to_ubi32(crc);
+	}
+	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+
+	err = ubi_io_write_vid_hdr(ubi, to, vid_hdr);
+	if (err)
+		goto out_unlock;
+
+	cond_resched();
+
+	/* Read the VID header back and check if it was written correctly */
+	err = ubi_io_read_vid_hdr(ubi, to, vid_hdr, 1);
+	if (err) {
+		if (err != UBI_IO_BITFLIPS)
+			ubi_warn("cannot read VID header back from PEB %d", to);
+		goto out_unlock;
+	}
+
+	if (data_size > 0) {
+		err = ubi_io_write_data(ubi, buf, to, 0, aldata_size);
+		if (err)
+			goto out_unlock;
+
+		/*
+		 * We've written the data and are going to read it back to make
+		 * sure it was written correctly.
+		 */
+		buf1 = kmalloc(aldata_size, GFP_KERNEL);
+		if (!buf1) {
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+
+		cond_resched();
+
+		err = ubi_io_read_data(ubi, buf1, to, 0, aldata_size);
+		if (err) {
+			if (err != UBI_IO_BITFLIPS)
+				ubi_warn("cannot read data back from PEB %d",
+					 to);
+			goto out_unlock;
+		}
+
+		cond_resched();
+
+		if (memcmp(buf, buf1, aldata_size)) {
+			ubi_warn("read data back from PEB %d - it is different",
+				 to);
+			goto out_unlock;
+		}
+	}
+
+	ubi_assert(vol->eba_tbl[lnum] == from);
+	vol->eba_tbl[lnum] = to;
+
+	leb_write_unlock(ubi, vol_id, lnum);
+	kfree(buf);
+	kfree(buf1);
+
+	return 0;
+
+out_unlock:
+	leb_write_unlock(ubi, vol_id, lnum);
+	kfree(buf);
+	kfree(buf1);
+	return err;
+}
+
+/**
+ * ubi_eba_init_scan - initialize the EBA unit using scanning information.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
+{
+	int i, j, err, num_volumes;
+	struct ubi_scan_volume *sv;
+	struct ubi_volume *vol;
+	struct ubi_scan_leb *seb;
+	struct rb_node *rb;
+
+	dbg_eba("initialize EBA unit");
+
+	spin_lock_init(&ubi->ltree_lock);
+	ubi->ltree = RB_ROOT;
+
+	if (ubi_devices_cnt == 0) {
+		ltree_slab = kmem_cache_create("ubi_ltree_slab",
+					       sizeof(struct ltree_entry), 0,
+					       0, &ltree_entry_ctor, NULL);
+		if (!ltree_slab)
+			return -ENOMEM;
+	}
+
+	ubi->global_sqnum = si->max_sqnum + 1;
+	num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT;
+
+	for (i = 0; i < num_volumes; i++) {
+		vol = ubi->volumes[i];
+		if (!vol)
+			continue;
+
+		cond_resched();
+
+		vol->eba_tbl = kmalloc(vol->reserved_pebs * sizeof(int),
+				       GFP_KERNEL);
+		if (!vol->eba_tbl) {
+			err = -ENOMEM;
+			goto out_free;
+		}
+
+		for (j = 0; j < vol->reserved_pebs; j++)
+			vol->eba_tbl[j] = UBI_LEB_UNMAPPED;
+
+		sv = ubi_scan_find_sv(si, idx2vol_id(ubi, i));
+		if (!sv)
+			continue;
+
+		ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) {
+			if (seb->lnum >= vol->reserved_pebs)
+				/*
+				 * This may happen in case of an unclean reboot
+				 * during re-size.
+				 */
+				ubi_scan_move_to_list(sv, seb, &si->erase);
+			vol->eba_tbl[seb->lnum] = seb->pnum;
+		}
+	}
+
+	if (ubi->bad_allowed) {
+		ubi_calculate_reserved(ubi);
+
+		if (ubi->avail_pebs < ubi->beb_rsvd_level) {
+			/* No enough free physical eraseblocks */
+			ubi->beb_rsvd_pebs = ubi->avail_pebs;
+			ubi_warn("cannot reserve enough PEBs for bad PEB "
+				 "handling, reserved %d, need %d",
+				 ubi->beb_rsvd_pebs, ubi->beb_rsvd_level);
+		} else
+			ubi->beb_rsvd_pebs = ubi->beb_rsvd_level;
+
+		ubi->avail_pebs -= ubi->beb_rsvd_pebs;
+		ubi->rsvd_pebs  += ubi->beb_rsvd_pebs;
+	}
+
+	dbg_eba("EBA unit is initialized");
+	return 0;
+
+out_free:
+	for (i = 0; i < num_volumes; i++) {
+		if (!ubi->volumes[i])
+			continue;
+		kfree(ubi->volumes[i]->eba_tbl);
+	}
+	if (ubi_devices_cnt == 0)
+		kmem_cache_destroy(ltree_slab);
+	return err;
+}
+
+/**
+ * ubi_eba_close - close EBA unit.
+ * @ubi: UBI device description object
+ */
+void ubi_eba_close(const struct ubi_device *ubi)
+{
+	int i, num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT;
+
+	dbg_eba("close EBA unit");
+
+	for (i = 0; i < num_volumes; i++) {
+		if (!ubi->volumes[i])
+			continue;
+		kfree(ubi->volumes[i]->eba_tbl);
+	}
+	if (ubi_devices_cnt == 1)
+		kmem_cache_destroy(ltree_slab);
+}
diff --git a/drivers/mtd/ubi/gluebi.c b/drivers/mtd/ubi/gluebi.c
new file mode 100644
index 00000000000..c8bbfd1e67a
--- /dev/null
+++ b/drivers/mtd/ubi/gluebi.c
@@ -0,0 +1,324 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём), Joern Engel
+ */
+
+/*
+ * This file includes implementation of fake MTD devices for each UBI volume.
+ * This sounds strange, but it is in fact quite useful to make MTD-oriented
+ * software (including all the legacy software) to work on top of UBI.
+ *
+ * Gluebi emulates MTD devices of "MTD_UBIVOLUME" type. Their minimal I/O unit
+ * size (mtd->writesize) is equivalent to the UBI minimal I/O unit. The
+ * eraseblock size is equivalent to the logical eraseblock size of the volume.
+ */
+
+#include <asm/div64.h>
+#include "ubi.h"
+
+/**
+ * gluebi_get_device - get MTD device reference.
+ * @mtd: the MTD device description object
+ *
+ * This function is called every time the MTD device is being opened and
+ * implements the MTD get_device() operation. Returns zero in case of success
+ * and a negative error code in case of failure.
+ */
+static int gluebi_get_device(struct mtd_info *mtd)
+{
+	struct ubi_volume *vol;
+
+	vol = container_of(mtd, struct ubi_volume, gluebi_mtd);
+
+	/*
+	 * We do not introduce locks for gluebi reference count because the
+	 * get_device()/put_device() calls are already serialized at MTD.
+	 */
+	if (vol->gluebi_refcount > 0) {
+		/*
+		 * The MTD device is already referenced and this is just one
+		 * more reference. MTD allows many users to open the same
+		 * volume simultaneously and do not distinguish between
+		 * readers/writers/exclusive openers as UBI does. So we do not
+		 * open the UBI volume again - just increase the reference
+		 * counter and return.
+		 */
+		vol->gluebi_refcount += 1;
+		return 0;
+	}
+
+	/*
+	 * This is the first reference to this UBI volume via the MTD device
+	 * interface. Open the corresponding volume in read-write mode.
+	 */
+	vol->gluebi_desc = ubi_open_volume(vol->ubi->ubi_num, vol->vol_id,
+					   UBI_READWRITE);
+	if (IS_ERR(vol->gluebi_desc))
+		return PTR_ERR(vol->gluebi_desc);
+	vol->gluebi_refcount += 1;
+	return 0;
+}
+
+/**
+ * gluebi_put_device - put MTD device reference.
+ * @mtd: the MTD device description object
+ *
+ * This function is called every time the MTD device is being put. Returns
+ * zero in case of success and a negative error code in case of failure.
+ */
+static void gluebi_put_device(struct mtd_info *mtd)
+{
+	struct ubi_volume *vol;
+
+	vol = container_of(mtd, struct ubi_volume, gluebi_mtd);
+	vol->gluebi_refcount -= 1;
+	ubi_assert(vol->gluebi_refcount >= 0);
+	if (vol->gluebi_refcount == 0)
+		ubi_close_volume(vol->gluebi_desc);
+}
+
+/**
+ * gluebi_read - read operation of emulated MTD devices.
+ * @mtd: MTD device description object
+ * @from: absolute offset from where to read
+ * @len: how many bytes to read
+ * @retlen: count of read bytes is returned here
+ * @buf: buffer to store the read data
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int gluebi_read(struct mtd_info *mtd, loff_t from, size_t len,
+		       size_t *retlen, unsigned char *buf)
+{
+	int err = 0, lnum, offs, total_read;
+	struct ubi_volume *vol;
+	struct ubi_device *ubi;
+	uint64_t tmp = from;
+
+	dbg_msg("read %zd bytes from offset %lld", len, from);
+
+	if (len < 0 || from < 0 || from + len > mtd->size)
+		return -EINVAL;
+
+	vol = container_of(mtd, struct ubi_volume, gluebi_mtd);
+	ubi = vol->ubi;
+
+	offs = do_div(tmp, mtd->erasesize);
+	lnum = tmp;
+
+	total_read = len;
+	while (total_read) {
+		size_t to_read = mtd->erasesize - offs;
+
+		if (to_read > total_read)
+			to_read = total_read;
+
+		err = ubi_eba_read_leb(ubi, vol->vol_id, lnum, buf, offs,
+				       to_read, 0);
+		if (err)
+			break;
+
+		lnum += 1;
+		offs = 0;
+		total_read -= to_read;
+		buf += to_read;
+	}
+
+	*retlen = len - total_read;
+	return err;
+}
+
+/**
+ * gluebi_write - write operation of emulated MTD devices.
+ * @mtd: MTD device description object
+ * @to: absolute offset where to write
+ * @len: how many bytes to write
+ * @retlen: count of written bytes is returned here
+ * @buf: buffer with data to write
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int gluebi_write(struct mtd_info *mtd, loff_t to, size_t len,
+		       size_t *retlen, const u_char *buf)
+{
+	int err = 0, lnum, offs, total_written;
+	struct ubi_volume *vol;
+	struct ubi_device *ubi;
+	uint64_t tmp = to;
+
+	dbg_msg("write %zd bytes to offset %lld", len, to);
+
+	if (len < 0 || to < 0 || len + to > mtd->size)
+		return -EINVAL;
+
+	vol = container_of(mtd, struct ubi_volume, gluebi_mtd);
+	ubi = vol->ubi;
+
+	if (ubi->ro_mode)
+		return -EROFS;
+
+	offs = do_div(tmp, mtd->erasesize);
+	lnum = tmp;
+
+	if (len % mtd->writesize || offs % mtd->writesize)
+		return -EINVAL;
+
+	total_written = len;
+	while (total_written) {
+		size_t to_write = mtd->erasesize - offs;
+
+		if (to_write > total_written)
+			to_write = total_written;
+
+		err = ubi_eba_write_leb(ubi, vol->vol_id, lnum, buf, offs,
+					to_write, UBI_UNKNOWN);
+		if (err)
+			break;
+
+		lnum += 1;
+		offs = 0;
+		total_written -= to_write;
+		buf += to_write;
+	}
+
+	*retlen = len - total_written;
+	return err;
+}
+
+/**
+ * gluebi_erase - erase operation of emulated MTD devices.
+ * @mtd: the MTD device description object
+ * @instr: the erase operation description
+ *
+ * This function calls the erase callback when finishes. Returns zero in case
+ * of success and a negative error code in case of failure.
+ */
+static int gluebi_erase(struct mtd_info *mtd, struct erase_info *instr)
+{
+	int err, i, lnum, count;
+	struct ubi_volume *vol;
+	struct ubi_device *ubi;
+
+	dbg_msg("erase %u bytes at offset %u", instr->len, instr->addr);
+
+	if (instr->addr < 0 || instr->addr > mtd->size - mtd->erasesize)
+		return -EINVAL;
+
+	if (instr->len < 0 || instr->addr + instr->len > mtd->size)
+		return -EINVAL;
+
+	if (instr->addr % mtd->writesize || instr->len % mtd->writesize)
+		return -EINVAL;
+
+	lnum = instr->addr / mtd->erasesize;
+	count = instr->len / mtd->erasesize;
+
+	vol = container_of(mtd, struct ubi_volume, gluebi_mtd);
+	ubi = vol->ubi;
+
+	if (ubi->ro_mode)
+		return -EROFS;
+
+	for (i = 0; i < count; i++) {
+		err = ubi_eba_unmap_leb(ubi, vol->vol_id, lnum + i);
+		if (err)
+			goto out_err;
+	}
+
+	/*
+	 * MTD erase operations are synchronous, so we have to make sure the
+	 * physical eraseblock is wiped out.
+	 */
+	err = ubi_wl_flush(ubi);
+	if (err)
+		goto out_err;
+
+        instr->state = MTD_ERASE_DONE;
+        mtd_erase_callback(instr);
+	return 0;
+
+out_err:
+	instr->state = MTD_ERASE_FAILED;
+	instr->fail_addr = lnum * mtd->erasesize;
+	return err;
+}
+
+/**
+ * ubi_create_gluebi - initialize gluebi for an UBI volume.
+ * @ubi: UBI device description object
+ * @vol: volume description object
+ *
+ * This function is called when an UBI volume is created in order to create
+ * corresponding fake MTD device. Returns zero in case of success and a
+ * negative error code in case of failure.
+ */
+int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol)
+{
+	int err;
+	struct mtd_info *mtd = &vol->gluebi_mtd;
+
+	mtd->name = kmemdup(vol->name, vol->name_len + 1, GFP_KERNEL);
+	if (!mtd->name)
+		return -ENOMEM;
+
+	mtd->type = MTD_UBIVOLUME;
+	if (!ubi->ro_mode)
+		mtd->flags = MTD_WRITEABLE;
+	mtd->writesize  = ubi->min_io_size;
+	mtd->owner      = THIS_MODULE;
+	mtd->size       = vol->usable_leb_size * vol->reserved_pebs;
+	mtd->erasesize  = vol->usable_leb_size;
+	mtd->read       = gluebi_read;
+	mtd->write      = gluebi_write;
+	mtd->erase      = gluebi_erase;
+	mtd->get_device = gluebi_get_device;
+	mtd->put_device = gluebi_put_device;
+
+	if (add_mtd_device(mtd)) {
+		ubi_err("cannot not add MTD device\n");
+		kfree(mtd->name);
+		return -ENFILE;
+	}
+
+	dbg_msg("added mtd%d (\"%s\"), size %u, EB size %u",
+		mtd->index, mtd->name, mtd->size, mtd->erasesize);
+	return 0;
+}
+
+/**
+ * ubi_destroy_gluebi - close gluebi for an UBI volume.
+ * @vol: volume description object
+ *
+ * This function is called when an UBI volume is removed in order to remove
+ * corresponding fake MTD device. Returns zero in case of success and a
+ * negative error code in case of failure.
+ */
+int ubi_destroy_gluebi(struct ubi_volume *vol)
+{
+	int err;
+	struct mtd_info *mtd = &vol->gluebi_mtd;
+
+	dbg_msg("remove mtd%d", mtd->index);
+	err = del_mtd_device(mtd);
+	if (err)
+		return err;
+	kfree(mtd->name);
+	return 0;
+}
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
new file mode 100644
index 00000000000..438914d0515
--- /dev/null
+++ b/drivers/mtd/ubi/io.c
@@ -0,0 +1,1259 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ * Copyright (c) Nokia Corporation, 2006, 2007
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * UBI input/output unit.
+ *
+ * This unit provides a uniform way to work with all kinds of the underlying
+ * MTD devices. It also implements handy functions for reading and writing UBI
+ * headers.
+ *
+ * We are trying to have a paranoid mindset and not to trust to what we read
+ * from the flash media in order to be more secure and robust. So this unit
+ * validates every single header it reads from the flash media.
+ *
+ * Some words about how the eraseblock headers are stored.
+ *
+ * The erase counter header is always stored at offset zero. By default, the
+ * VID header is stored after the EC header at the closest aligned offset
+ * (i.e. aligned to the minimum I/O unit size). Data starts next to the VID
+ * header at the closest aligned offset. But this default layout may be
+ * changed. For example, for different reasons (e.g., optimization) UBI may be
+ * asked to put the VID header at further offset, and even at an unaligned
+ * offset. Of course, if the offset of the VID header is unaligned, UBI adds
+ * proper padding in front of it. Data offset may also be changed but it has to
+ * be aligned.
+ *
+ * About minimal I/O units. In general, UBI assumes flash device model where
+ * there is only one minimal I/O unit size. E.g., in case of NOR flash it is 1,
+ * in case of NAND flash it is a NAND page, etc. This is reported by MTD in the
+ * @ubi->mtd->writesize field. But as an exception, UBI admits of using another
+ * (smaller) minimal I/O unit size for EC and VID headers to make it possible
+ * to do different optimizations.
+ *
+ * This is extremely useful in case of NAND flashes which admit of several
+ * write operations to one NAND page. In this case UBI can fit EC and VID
+ * headers at one NAND page. Thus, UBI may use "sub-page" size as the minimal
+ * I/O unit for the headers (the @ubi->hdrs_min_io_size field). But it still
+ * reports NAND page size (@ubi->min_io_size) as a minimal I/O unit for the UBI
+ * users.
+ *
+ * Example: some Samsung NANDs with 2KiB pages allow 4x 512-byte writes, so
+ * although the minimal I/O unit is 2K, UBI uses 512 bytes for EC and VID
+ * headers.
+ *
+ * Q: why not just to treat sub-page as a minimal I/O unit of this flash
+ * device, e.g., make @ubi->min_io_size = 512 in the example above?
+ *
+ * A: because when writing a sub-page, MTD still writes a full 2K page but the
+ * bytes which are no relevant to the sub-page are 0xFF. So, basically, writing
+ * 4x512 sub-pages is 4 times slower then writing one 2KiB NAND page. Thus, we
+ * prefer to use sub-pages only for EV and VID headers.
+ *
+ * As it was noted above, the VID header may start at a non-aligned offset.
+ * For example, in case of a 2KiB page NAND flash with a 512 bytes sub-page,
+ * the VID header may reside at offset 1984 which is the last 64 bytes of the
+ * last sub-page (EC header is always at offset zero). This causes some
+ * difficulties when reading and writing VID headers.
+ *
+ * Suppose we have a 64-byte buffer and we read a VID header at it. We change
+ * the data and want to write this VID header out. As we can only write in
+ * 512-byte chunks, we have to allocate one more buffer and copy our VID header
+ * to offset 448 of this buffer.
+ *
+ * The I/O unit does the following trick in order to avoid this extra copy.
+ * It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID header
+ * and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. When the
+ * VID header is being written out, it shifts the VID header pointer back and
+ * writes the whole sub-page.
+ */
+
+#include <linux/crc32.h>
+#include <linux/err.h>
+#include "ubi.h"
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
+static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum);
+static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum);
+static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum,
+				 const struct ubi_ec_hdr *ec_hdr);
+static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum);
+static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum,
+				  const struct ubi_vid_hdr *vid_hdr);
+static int paranoid_check_all_ff(const struct ubi_device *ubi, int pnum,
+				 int offset, int len);
+#else
+#define paranoid_check_not_bad(ubi, pnum) 0
+#define paranoid_check_peb_ec_hdr(ubi, pnum)  0
+#define paranoid_check_ec_hdr(ubi, pnum, ec_hdr)  0
+#define paranoid_check_peb_vid_hdr(ubi, pnum) 0
+#define paranoid_check_vid_hdr(ubi, pnum, vid_hdr) 0
+#define paranoid_check_all_ff(ubi, pnum, offset, len) 0
+#endif
+
+/**
+ * ubi_io_read - read data from a physical eraseblock.
+ * @ubi: UBI device description object
+ * @buf: buffer where to store the read data
+ * @pnum: physical eraseblock number to read from
+ * @offset: offset within the physical eraseblock from where to read
+ * @len: how many bytes to read
+ *
+ * This function reads data from offset @offset of physical eraseblock @pnum
+ * and stores the read data in the @buf buffer. The following return codes are
+ * possible:
+ *
+ * o %0 if all the requested data were successfully read;
+ * o %UBI_IO_BITFLIPS if all the requested data were successfully read, but
+ *   correctable bit-flips were detected; this is harmless but may indicate
+ *   that this eraseblock may become bad soon (but do not have to);
+ * o %-EBADMSG if the MTD subsystem reported about data data integrity
+ *   problems, for example it can me an ECC error in case of NAND; this most
+ *   probably means that the data is corrupted;
+ * o %-EIO if some I/O error occurred;
+ * o other negative error codes in case of other errors.
+ */
+int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset,
+		int len)
+{
+	int err, retries = 0;
+	size_t read;
+	loff_t addr;
+
+	dbg_io("read %d bytes from PEB %d:%d", len, pnum, offset);
+
+	ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
+	ubi_assert(offset >= 0 && offset + len <= ubi->peb_size);
+	ubi_assert(len > 0);
+
+	err = paranoid_check_not_bad(ubi, pnum);
+	if (err)
+		return err > 0 ? -EINVAL : err;
+
+	addr = (loff_t)pnum * ubi->peb_size + offset;
+retry:
+	err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf);
+	if (err) {
+		if (err == -EUCLEAN) {
+			/*
+			 * -EUCLEAN is reported if there was a bit-flip which
+			 * was corrected, so this is harmless.
+			 */
+			ubi_msg("fixable bit-flip detected at PEB %d", pnum);
+			ubi_assert(len == read);
+			return UBI_IO_BITFLIPS;
+		}
+
+		if (read != len && retries++ < UBI_IO_RETRIES) {
+			dbg_io("error %d while reading %d bytes from PEB %d:%d, "
+			       "read only %zd bytes, retry",
+			       err, len, pnum, offset, read);
+			yield();
+			goto retry;
+		}
+
+		ubi_err("error %d while reading %d bytes from PEB %d:%d, "
+			"read %zd bytes", err, len, pnum, offset, read);
+		ubi_dbg_dump_stack();
+	} else {
+		ubi_assert(len == read);
+
+		if (ubi_dbg_is_bitflip()) {
+			dbg_msg("bit-flip (emulated)");
+			err = UBI_IO_BITFLIPS;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * ubi_io_write - write data to a physical eraseblock.
+ * @ubi: UBI device description object
+ * @buf: buffer with the data to write
+ * @pnum: physical eraseblock number to write to
+ * @offset: offset within the physical eraseblock where to write
+ * @len: how many bytes to write
+ *
+ * This function writes @len bytes of data from buffer @buf to offset @offset
+ * of physical eraseblock @pnum. If all the data were successfully written,
+ * zero is returned. If an error occurred, this function returns a negative
+ * error code. If %-EIO is returned, the physical eraseblock most probably went
+ * bad.
+ *
+ * Note, in case of an error, it is possible that something was still written
+ * to the flash media, but may be some garbage.
+ */
+int ubi_io_write(const struct ubi_device *ubi, const void *buf, int pnum,
+		 int offset, int len)
+{
+	int err;
+	size_t written;
+	loff_t addr;
+
+	dbg_io("write %d bytes to PEB %d:%d", len, pnum, offset);
+
+	ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
+	ubi_assert(offset >= 0 && offset + len <= ubi->peb_size);
+	ubi_assert(offset % ubi->hdrs_min_io_size == 0);
+	ubi_assert(len > 0 && len % ubi->hdrs_min_io_size == 0);
+
+	if (ubi->ro_mode) {
+		ubi_err("read-only mode");
+		return -EROFS;
+	}
+
+	/* The below has to be compiled out if paranoid checks are disabled */
+
+	err = paranoid_check_not_bad(ubi, pnum);
+	if (err)
+		return err > 0 ? -EINVAL : err;
+
+	/* The area we are writing to has to contain all 0xFF bytes */
+	err = paranoid_check_all_ff(ubi, pnum, offset, len);
+	if (err)
+		return err > 0 ? -EINVAL : err;
+
+	if (offset >= ubi->leb_start) {
+		/*
+		 * We write to the data area of the physical eraseblock. Make
+		 * sure it has valid EC and VID headers.
+		 */
+		err = paranoid_check_peb_ec_hdr(ubi, pnum);
+		if (err)
+			return err > 0 ? -EINVAL : err;
+		err = paranoid_check_peb_vid_hdr(ubi, pnum);
+		if (err)
+			return err > 0 ? -EINVAL : err;
+	}
+
+	if (ubi_dbg_is_write_failure()) {
+		dbg_err("cannot write %d bytes to PEB %d:%d "
+			"(emulated)", len, pnum, offset);
+		ubi_dbg_dump_stack();
+		return -EIO;
+	}
+
+	addr = (loff_t)pnum * ubi->peb_size + offset;
+	err = ubi->mtd->write(ubi->mtd, addr, len, &written, buf);
+	if (err) {
+		ubi_err("error %d while writing %d bytes to PEB %d:%d, written"
+			" %zd bytes", err, len, pnum, offset, written);
+		ubi_dbg_dump_stack();
+	} else
+		ubi_assert(written == len);
+
+	return err;
+}
+
+/**
+ * erase_callback - MTD erasure call-back.
+ * @ei: MTD erase information object.
+ *
+ * Note, even though MTD erase interface is asynchronous, all the current
+ * implementations are synchronous anyway.
+ */
+static void erase_callback(struct erase_info *ei)
+{
+	wake_up_interruptible((wait_queue_head_t *)ei->priv);
+}
+
+/**
+ * do_sync_erase - synchronously erase a physical eraseblock.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock number to erase
+ *
+ * This function synchronously erases physical eraseblock @pnum and returns
+ * zero in case of success and a negative error code in case of failure. If
+ * %-EIO is returned, the physical eraseblock most probably went bad.
+ */
+static int do_sync_erase(const struct ubi_device *ubi, int pnum)
+{
+	int err, retries = 0;
+	struct erase_info ei;
+	wait_queue_head_t wq;
+
+	dbg_io("erase PEB %d", pnum);
+
+retry:
+	init_waitqueue_head(&wq);
+	memset(&ei, 0, sizeof(struct erase_info));
+
+	ei.mtd      = ubi->mtd;
+	ei.addr     = pnum * ubi->peb_size;
+	ei.len      = ubi->peb_size;
+	ei.callback = erase_callback;
+	ei.priv     = (unsigned long)&wq;
+
+	err = ubi->mtd->erase(ubi->mtd, &ei);
+	if (err) {
+		if (retries++ < UBI_IO_RETRIES) {
+			dbg_io("error %d while erasing PEB %d, retry",
+			       err, pnum);
+			yield();
+			goto retry;
+		}
+		ubi_err("cannot erase PEB %d, error %d", pnum, err);
+		ubi_dbg_dump_stack();
+		return err;
+	}
+
+	err = wait_event_interruptible(wq, ei.state == MTD_ERASE_DONE ||
+					   ei.state == MTD_ERASE_FAILED);
+	if (err) {
+		ubi_err("interrupted PEB %d erasure", pnum);
+		return -EINTR;
+	}
+
+	if (ei.state == MTD_ERASE_FAILED) {
+		if (retries++ < UBI_IO_RETRIES) {
+			dbg_io("error while erasing PEB %d, retry", pnum);
+			yield();
+			goto retry;
+		}
+		ubi_err("cannot erase PEB %d", pnum);
+		ubi_dbg_dump_stack();
+		return -EIO;
+	}
+
+	err = paranoid_check_all_ff(ubi, pnum, 0, ubi->peb_size);
+	if (err)
+		return err > 0 ? -EINVAL : err;
+
+	if (ubi_dbg_is_erase_failure() && !err) {
+		dbg_err("cannot erase PEB %d (emulated)", pnum);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * check_pattern - check if buffer contains only a certain byte pattern.
+ * @buf: buffer to check
+ * @patt: the pattern to check
+ * @size: buffer size in bytes
+ *
+ * This function returns %1 in there are only @patt bytes in @buf, and %0 if
+ * something else was also found.
+ */
+static int check_pattern(const void *buf, uint8_t patt, int size)
+{
+	int i;
+
+	for (i = 0; i < size; i++)
+		if (((const uint8_t *)buf)[i] != patt)
+			return 0;
+	return 1;
+}
+
+/* Patterns to write to a physical eraseblock when torturing it */
+static uint8_t patterns[] = {0xa5, 0x5a, 0x0};
+
+/**
+ * torture_peb - test a supposedly bad physical eraseblock.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock number to test
+ *
+ * This function returns %-EIO if the physical eraseblock did not pass the
+ * test, a positive number of erase operations done if the test was
+ * successfully passed, and other negative error codes in case of other errors.
+ */
+static int torture_peb(const struct ubi_device *ubi, int pnum)
+{
+	void *buf;
+	int err, i, patt_count;
+
+	buf = kmalloc(ubi->peb_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	patt_count = ARRAY_SIZE(patterns);
+	ubi_assert(patt_count > 0);
+
+	for (i = 0; i < patt_count; i++) {
+		err = do_sync_erase(ubi, pnum);
+		if (err)
+			goto out;
+
+		/* Make sure the PEB contains only 0xFF bytes */
+		err = ubi_io_read(ubi, buf, pnum, 0, ubi->peb_size);
+		if (err)
+			goto out;
+
+		err = check_pattern(buf, 0xFF, ubi->peb_size);
+		if (err == 0) {
+			ubi_err("erased PEB %d, but a non-0xFF byte found",
+				pnum);
+			err = -EIO;
+			goto out;
+		}
+
+		/* Write a pattern and check it */
+		memset(buf, patterns[i], ubi->peb_size);
+		err = ubi_io_write(ubi, buf, pnum, 0, ubi->peb_size);
+		if (err)
+			goto out;
+
+		memset(buf, ~patterns[i], ubi->peb_size);
+		err = ubi_io_read(ubi, buf, pnum, 0, ubi->peb_size);
+		if (err)
+			goto out;
+
+		err = check_pattern(buf, patterns[i], ubi->peb_size);
+		if (err == 0) {
+			ubi_err("pattern %x checking failed for PEB %d",
+				patterns[i], pnum);
+			err = -EIO;
+			goto out;
+		}
+	}
+
+	err = patt_count;
+
+out:
+	if (err == UBI_IO_BITFLIPS || err == -EBADMSG)
+		/*
+		 * If a bit-flip or data integrity error was detected, the test
+		 * has not passed because it happened on a freshly erased
+		 * physical eraseblock which means something is wrong with it.
+		 */
+		err = -EIO;
+	kfree(buf);
+	return err;
+}
+
+/**
+ * ubi_io_sync_erase - synchronously erase a physical eraseblock.
+ * @ubi: UBI device description object
+ * @pnum: physical eraseblock number to erase
+ * @torture: if this physical eraseblock has to be tortured
+ *
+ * This function synchronously erases physical eraseblock @pnum. If @torture
+ * flag is not zero, the physical eraseblock is checked by means of writing
+ * different patterns to it and reading them back. If the torturing is enabled,
+ * the physical eraseblock is erased more then once.
+ *
+ * This function returns the number of erasures made in case of success, %-EIO
+ * if the erasure failed or the torturing test failed, and other negative error
+ * codes in case of other errors. Note, %-EIO means that the physical
+ * eraseblock is bad.
+ */
+int ubi_io_sync_erase(const struct ubi_device *ubi, int pnum, int torture)
+{
+	int err, ret = 0;
+
+	ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
+
+	err = paranoid_check_not_bad(ubi, pnum);
+	if (err != 0)
+		return err > 0 ? -EINVAL : err;
+
+	if (ubi->ro_mode) {
+		ubi_err("read-only mode");
+		return -EROFS;
+	}
+
+	if (torture) {
+		ret = torture_peb(ubi, pnum);
+		if (ret < 0)
+			return ret;
+	}
+
+	err = do_sync_erase(ubi, pnum);
+	if (err)
+		return err;
+
+	return ret + 1;
+}
+
+/**
+ * ubi_io_is_bad - check if a physical eraseblock is bad.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock number to check
+ *
+ * This function returns a positive number if the physical eraseblock is bad,
+ * zero if not, and a negative error code if an error occurred.
+ */
+int ubi_io_is_bad(const struct ubi_device *ubi, int pnum)
+{
+	struct mtd_info *mtd = ubi->mtd;
+
+	ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
+
+	if (ubi->bad_allowed) {
+		int ret;
+
+		ret = mtd->block_isbad(mtd, (loff_t)pnum * ubi->peb_size);
+		if (ret < 0)
+			ubi_err("error %d while checking if PEB %d is bad",
+				ret, pnum);
+		else if (ret)
+			dbg_io("PEB %d is bad", pnum);
+		return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * ubi_io_mark_bad - mark a physical eraseblock as bad.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock number to mark
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+int ubi_io_mark_bad(const struct ubi_device *ubi, int pnum)
+{
+	int err;
+	struct mtd_info *mtd = ubi->mtd;
+
+	ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
+
+	if (ubi->ro_mode) {
+		ubi_err("read-only mode");
+		return -EROFS;
+	}
+
+	if (!ubi->bad_allowed)
+		return 0;
+
+	err = mtd->block_markbad(mtd, (loff_t)pnum * ubi->peb_size);
+	if (err)
+		ubi_err("cannot mark PEB %d bad, error %d", pnum, err);
+	return err;
+}
+
+/**
+ * validate_ec_hdr - validate an erase counter header.
+ * @ubi: UBI device description object
+ * @ec_hdr: the erase counter header to check
+ *
+ * This function returns zero if the erase counter header is OK, and %1 if
+ * not.
+ */
+static int validate_ec_hdr(const struct ubi_device *ubi,
+			   const struct ubi_ec_hdr *ec_hdr)
+{
+	long long ec;
+	int vid_hdr_offset, leb_start;
+
+	ec = ubi64_to_cpu(ec_hdr->ec);
+	vid_hdr_offset = ubi32_to_cpu(ec_hdr->vid_hdr_offset);
+	leb_start = ubi32_to_cpu(ec_hdr->data_offset);
+
+	if (ec_hdr->version != UBI_VERSION) {
+		ubi_err("node with incompatible UBI version found: "
+			"this UBI version is %d, image version is %d",
+			UBI_VERSION, (int)ec_hdr->version);
+		goto bad;
+	}
+
+	if (vid_hdr_offset != ubi->vid_hdr_offset) {
+		ubi_err("bad VID header offset %d, expected %d",
+			vid_hdr_offset, ubi->vid_hdr_offset);
+		goto bad;
+	}
+
+	if (leb_start != ubi->leb_start) {
+		ubi_err("bad data offset %d, expected %d",
+			leb_start, ubi->leb_start);
+		goto bad;
+	}
+
+	if (ec < 0 || ec > UBI_MAX_ERASECOUNTER) {
+		ubi_err("bad erase counter %lld", ec);
+		goto bad;
+	}
+
+	return 0;
+
+bad:
+	ubi_err("bad EC header");
+	ubi_dbg_dump_ec_hdr(ec_hdr);
+	ubi_dbg_dump_stack();
+	return 1;
+}
+
+/**
+ * ubi_io_read_ec_hdr - read and check an erase counter header.
+ * @ubi: UBI device description object
+ * @pnum: physical eraseblock to read from
+ * @ec_hdr: a &struct ubi_ec_hdr object where to store the read erase counter
+ * header
+ * @verbose: be verbose if the header is corrupted or was not found
+ *
+ * This function reads erase counter header from physical eraseblock @pnum and
+ * stores it in @ec_hdr. This function also checks CRC checksum of the read
+ * erase counter header. The following codes may be returned:
+ *
+ * o %0 if the CRC checksum is correct and the header was successfully read;
+ * o %UBI_IO_BITFLIPS if the CRC is correct, but bit-flips were detected
+ *   and corrected by the flash driver; this is harmless but may indicate that
+ *   this eraseblock may become bad soon (but may be not);
+ * o %UBI_IO_BAD_EC_HDR if the erase counter header is corrupted (a CRC error);
+ * o %UBI_IO_PEB_EMPTY if the physical eraseblock is empty;
+ * o a negative error code in case of failure.
+ */
+int ubi_io_read_ec_hdr(const struct ubi_device *ubi, int pnum,
+		       struct ubi_ec_hdr *ec_hdr, int verbose)
+{
+	int err, read_err = 0;
+	uint32_t crc, magic, hdr_crc;
+
+	dbg_io("read EC header from PEB %d", pnum);
+	ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
+
+	err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE);
+	if (err) {
+		if (err != UBI_IO_BITFLIPS && err != -EBADMSG)
+			return err;
+
+		/*
+		 * We read all the data, but either a correctable bit-flip
+		 * occurred, or MTD reported about some data integrity error,
+		 * like an ECC error in case of NAND. The former is harmless,
+		 * the later may mean that the read data is corrupted. But we
+		 * have a CRC check-sum and we will detect this. If the EC
+		 * header is still OK, we just report this as there was a
+		 * bit-flip.
+		 */
+		read_err = err;
+	}
+
+	magic = ubi32_to_cpu(ec_hdr->magic);
+	if (magic != UBI_EC_HDR_MAGIC) {
+		/*
+		 * The magic field is wrong. Let's check if we have read all
+		 * 0xFF. If yes, this physical eraseblock is assumed to be
+		 * empty.
+		 *
+		 * But if there was a read error, we do not test it for all
+		 * 0xFFs. Even if it does contain all 0xFFs, this error
+		 * indicates that something is still wrong with this physical
+		 * eraseblock and we anyway cannot treat it as empty.
+		 */
+		if (read_err != -EBADMSG &&
+		    check_pattern(ec_hdr, 0xFF, UBI_EC_HDR_SIZE)) {
+			/* The physical eraseblock is supposedly empty */
+
+			/*
+			 * The below is just a paranoid check, it has to be
+			 * compiled out if paranoid checks are disabled.
+			 */
+			err = paranoid_check_all_ff(ubi, pnum, 0,
+						    ubi->peb_size);
+			if (err)
+				return err > 0 ? UBI_IO_BAD_EC_HDR : err;
+
+			if (verbose)
+				ubi_warn("no EC header found at PEB %d, "
+					 "only 0xFF bytes", pnum);
+			return UBI_IO_PEB_EMPTY;
+		}
+
+		/*
+		 * This is not a valid erase counter header, and these are not
+		 * 0xFF bytes. Report that the header is corrupted.
+		 */
+		if (verbose) {
+			ubi_warn("bad magic number at PEB %d: %08x instead of "
+				 "%08x", pnum, magic, UBI_EC_HDR_MAGIC);
+			ubi_dbg_dump_ec_hdr(ec_hdr);
+		}
+		return UBI_IO_BAD_EC_HDR;
+	}
+
+	crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC);
+	hdr_crc = ubi32_to_cpu(ec_hdr->hdr_crc);
+
+	if (hdr_crc != crc) {
+		if (verbose) {
+			ubi_warn("bad EC header CRC at PEB %d, calculated %#08x,"
+				 " read %#08x", pnum, crc, hdr_crc);
+			ubi_dbg_dump_ec_hdr(ec_hdr);
+		}
+		return UBI_IO_BAD_EC_HDR;
+	}
+
+	/* And of course validate what has just been read from the media */
+	err = validate_ec_hdr(ubi, ec_hdr);
+	if (err) {
+		ubi_err("validation failed for PEB %d", pnum);
+		return -EINVAL;
+	}
+
+	return read_err ? UBI_IO_BITFLIPS : 0;
+}
+
+/**
+ * ubi_io_write_ec_hdr - write an erase counter header.
+ * @ubi: UBI device description object
+ * @pnum: physical eraseblock to write to
+ * @ec_hdr: the erase counter header to write
+ *
+ * This function writes erase counter header described by @ec_hdr to physical
+ * eraseblock @pnum. It also fills most fields of @ec_hdr before writing, so
+ * the caller do not have to fill them. Callers must only fill the @ec_hdr->ec
+ * field.
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure. If %-EIO is returned, the physical eraseblock most probably
+ * went bad.
+ */
+int ubi_io_write_ec_hdr(const struct ubi_device *ubi, int pnum,
+			struct ubi_ec_hdr *ec_hdr)
+{
+	int err;
+	uint32_t crc;
+
+	dbg_io("write EC header to PEB %d", pnum);
+	ubi_assert(pnum >= 0 &&  pnum < ubi->peb_count);
+
+	ec_hdr->magic = cpu_to_ubi32(UBI_EC_HDR_MAGIC);
+	ec_hdr->version = UBI_VERSION;
+	ec_hdr->vid_hdr_offset = cpu_to_ubi32(ubi->vid_hdr_offset);
+	ec_hdr->data_offset = cpu_to_ubi32(ubi->leb_start);
+	crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC);
+	ec_hdr->hdr_crc = cpu_to_ubi32(crc);
+
+	err = paranoid_check_ec_hdr(ubi, pnum, ec_hdr);
+	if (err)
+		return -EINVAL;
+
+	err = ubi_io_write(ubi, ec_hdr, pnum, 0, ubi->ec_hdr_alsize);
+	return err;
+}
+
+/**
+ * validate_vid_hdr - validate a volume identifier header.
+ * @ubi: UBI device description object
+ * @vid_hdr: the volume identifier header to check
+ *
+ * This function checks that data stored in the volume identifier header
+ * @vid_hdr. Returns zero if the VID header is OK and %1 if not.
+ */
+static int validate_vid_hdr(const struct ubi_device *ubi,
+			    const struct ubi_vid_hdr *vid_hdr)
+{
+	int vol_type = vid_hdr->vol_type;
+	int copy_flag = vid_hdr->copy_flag;
+	int vol_id = ubi32_to_cpu(vid_hdr->vol_id);
+	int lnum = ubi32_to_cpu(vid_hdr->lnum);
+	int compat = vid_hdr->compat;
+	int data_size = ubi32_to_cpu(vid_hdr->data_size);
+	int used_ebs = ubi32_to_cpu(vid_hdr->used_ebs);
+	int data_pad = ubi32_to_cpu(vid_hdr->data_pad);
+	int data_crc = ubi32_to_cpu(vid_hdr->data_crc);
+	int usable_leb_size = ubi->leb_size - data_pad;
+
+	if (copy_flag != 0 && copy_flag != 1) {
+		dbg_err("bad copy_flag");
+		goto bad;
+	}
+
+	if (vol_id < 0 || lnum < 0 || data_size < 0 || used_ebs < 0 ||
+	    data_pad < 0) {
+		dbg_err("negative values");
+		goto bad;
+	}
+
+	if (vol_id >= UBI_MAX_VOLUMES && vol_id < UBI_INTERNAL_VOL_START) {
+		dbg_err("bad vol_id");
+		goto bad;
+	}
+
+	if (vol_id < UBI_INTERNAL_VOL_START && compat != 0) {
+		dbg_err("bad compat");
+		goto bad;
+	}
+
+	if (vol_id >= UBI_INTERNAL_VOL_START && compat != UBI_COMPAT_DELETE &&
+	    compat != UBI_COMPAT_RO && compat != UBI_COMPAT_PRESERVE &&
+	    compat != UBI_COMPAT_REJECT) {
+		dbg_err("bad compat");
+		goto bad;
+	}
+
+	if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) {
+		dbg_err("bad vol_type");
+		goto bad;
+	}
+
+	if (data_pad >= ubi->leb_size / 2) {
+		dbg_err("bad data_pad");
+		goto bad;
+	}
+
+	if (vol_type == UBI_VID_STATIC) {
+		/*
+		 * Although from high-level point of view static volumes may
+		 * contain zero bytes of data, but no VID headers can contain
+		 * zero at these fields, because they empty volumes do not have
+		 * mapped logical eraseblocks.
+		 */
+		if (used_ebs == 0) {
+			dbg_err("zero used_ebs");
+			goto bad;
+		}
+		if (data_size == 0) {
+			dbg_err("zero data_size");
+			goto bad;
+		}
+		if (lnum < used_ebs - 1) {
+			if (data_size != usable_leb_size) {
+				dbg_err("bad data_size");
+				goto bad;
+			}
+		} else if (lnum == used_ebs - 1) {
+			if (data_size == 0) {
+				dbg_err("bad data_size at last LEB");
+				goto bad;
+			}
+		} else {
+			dbg_err("too high lnum");
+			goto bad;
+		}
+	} else {
+		if (copy_flag == 0) {
+			if (data_crc != 0) {
+				dbg_err("non-zero data CRC");
+				goto bad;
+			}
+			if (data_size != 0) {
+				dbg_err("non-zero data_size");
+				goto bad;
+			}
+		} else {
+			if (data_size == 0) {
+				dbg_err("zero data_size of copy");
+				goto bad;
+			}
+		}
+		if (used_ebs != 0) {
+			dbg_err("bad used_ebs");
+			goto bad;
+		}
+	}
+
+	return 0;
+
+bad:
+	ubi_err("bad VID header");
+	ubi_dbg_dump_vid_hdr(vid_hdr);
+	ubi_dbg_dump_stack();
+	return 1;
+}
+
+/**
+ * ubi_io_read_vid_hdr - read and check a volume identifier header.
+ * @ubi: UBI device description object
+ * @pnum: physical eraseblock number to read from
+ * @vid_hdr: &struct ubi_vid_hdr object where to store the read volume
+ * identifier header
+ * @verbose: be verbose if the header is corrupted or wasn't found
+ *
+ * This function reads the volume identifier header from physical eraseblock
+ * @pnum and stores it in @vid_hdr. It also checks CRC checksum of the read
+ * volume identifier header. The following codes may be returned:
+ *
+ * o %0 if the CRC checksum is correct and the header was successfully read;
+ * o %UBI_IO_BITFLIPS if the CRC is correct, but bit-flips were detected
+ *   and corrected by the flash driver; this is harmless but may indicate that
+ *   this eraseblock may become bad soon;
+ * o %UBI_IO_BAD_VID_HRD if the volume identifier header is corrupted (a CRC
+ *   error detected);
+ * o %UBI_IO_PEB_FREE if the physical eraseblock is free (i.e., there is no VID
+ *   header there);
+ * o a negative error code in case of failure.
+ */
+int ubi_io_read_vid_hdr(const struct ubi_device *ubi, int pnum,
+			struct ubi_vid_hdr *vid_hdr, int verbose)
+{
+	int err, read_err = 0;
+	uint32_t crc, magic, hdr_crc;
+	void *p;
+
+	dbg_io("read VID header from PEB %d", pnum);
+	ubi_assert(pnum >= 0 &&  pnum < ubi->peb_count);
+
+	p = (char *)vid_hdr - ubi->vid_hdr_shift;
+	err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset,
+			  ubi->vid_hdr_alsize);
+	if (err) {
+		if (err != UBI_IO_BITFLIPS && err != -EBADMSG)
+			return err;
+
+		/*
+		 * We read all the data, but either a correctable bit-flip
+		 * occurred, or MTD reported about some data integrity error,
+		 * like an ECC error in case of NAND. The former is harmless,
+		 * the later may mean the read data is corrupted. But we have a
+		 * CRC check-sum and we will identify this. If the VID header is
+		 * still OK, we just report this as there was a bit-flip.
+		 */
+		read_err = err;
+	}
+
+	magic = ubi32_to_cpu(vid_hdr->magic);
+	if (magic != UBI_VID_HDR_MAGIC) {
+		/*
+		 * If we have read all 0xFF bytes, the VID header probably does
+		 * not exist and the physical eraseblock is assumed to be free.
+		 *
+		 * But if there was a read error, we do not test the data for
+		 * 0xFFs. Even if it does contain all 0xFFs, this error
+		 * indicates that something is still wrong with this physical
+		 * eraseblock and it cannot be regarded as free.
+		 */
+		if (read_err != -EBADMSG &&
+		    check_pattern(vid_hdr, 0xFF, UBI_VID_HDR_SIZE)) {
+			/* The physical eraseblock is supposedly free */
+
+			/*
+			 * The below is just a paranoid check, it has to be
+			 * compiled out if paranoid checks are disabled.
+			 */
+			err = paranoid_check_all_ff(ubi, pnum, ubi->leb_start,
+						    ubi->leb_size);
+			if (err)
+				return err > 0 ? UBI_IO_BAD_VID_HDR : err;
+
+			if (verbose)
+				ubi_warn("no VID header found at PEB %d, "
+					 "only 0xFF bytes", pnum);
+			return UBI_IO_PEB_FREE;
+		}
+
+		/*
+		 * This is not a valid VID header, and these are not 0xFF
+		 * bytes. Report that the header is corrupted.
+		 */
+		if (verbose) {
+			ubi_warn("bad magic number at PEB %d: %08x instead of "
+				 "%08x", pnum, magic, UBI_VID_HDR_MAGIC);
+			ubi_dbg_dump_vid_hdr(vid_hdr);
+		}
+		return UBI_IO_BAD_VID_HDR;
+	}
+
+	crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC);
+	hdr_crc = ubi32_to_cpu(vid_hdr->hdr_crc);
+
+	if (hdr_crc != crc) {
+		if (verbose) {
+			ubi_warn("bad CRC at PEB %d, calculated %#08x, "
+				 "read %#08x", pnum, crc, hdr_crc);
+			ubi_dbg_dump_vid_hdr(vid_hdr);
+		}
+		return UBI_IO_BAD_VID_HDR;
+	}
+
+	/* Validate the VID header that we have just read */
+	err = validate_vid_hdr(ubi, vid_hdr);
+	if (err) {
+		ubi_err("validation failed for PEB %d", pnum);
+		return -EINVAL;
+	}
+
+	return read_err ? UBI_IO_BITFLIPS : 0;
+}
+
+/**
+ * ubi_io_write_vid_hdr - write a volume identifier header.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock number to write to
+ * @vid_hdr: the volume identifier header to write
+ *
+ * This function writes the volume identifier header described by @vid_hdr to
+ * physical eraseblock @pnum. This function automatically fills the
+ * @vid_hdr->magic and the @vid_hdr->version fields, as well as calculates
+ * header CRC checksum and stores it at vid_hdr->hdr_crc.
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure. If %-EIO is returned, the physical eraseblock probably went
+ * bad.
+ */
+int ubi_io_write_vid_hdr(const struct ubi_device *ubi, int pnum,
+			 struct ubi_vid_hdr *vid_hdr)
+{
+	int err;
+	uint32_t crc;
+	void *p;
+
+	dbg_io("write VID header to PEB %d", pnum);
+	ubi_assert(pnum >= 0 &&  pnum < ubi->peb_count);
+
+	err = paranoid_check_peb_ec_hdr(ubi, pnum);
+	if (err)
+		return err > 0 ? -EINVAL: err;
+
+	vid_hdr->magic = cpu_to_ubi32(UBI_VID_HDR_MAGIC);
+	vid_hdr->version = UBI_VERSION;
+	crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC);
+	vid_hdr->hdr_crc = cpu_to_ubi32(crc);
+
+	err = paranoid_check_vid_hdr(ubi, pnum, vid_hdr);
+	if (err)
+		return -EINVAL;
+
+	p = (char *)vid_hdr - ubi->vid_hdr_shift;
+	err = ubi_io_write(ubi, p, pnum, ubi->vid_hdr_aloffset,
+			   ubi->vid_hdr_alsize);
+	return err;
+}
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
+
+/**
+ * paranoid_check_not_bad - ensure that a physical eraseblock is not bad.
+ * @ubi: UBI device description object
+ * @pnum: physical eraseblock number to check
+ *
+ * This function returns zero if the physical eraseblock is good, a positive
+ * number if it is bad and a negative error code if an error occurred.
+ */
+static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum)
+{
+	int err;
+
+	err = ubi_io_is_bad(ubi, pnum);
+	if (!err)
+		return err;
+
+	ubi_err("paranoid check failed for PEB %d", pnum);
+	ubi_dbg_dump_stack();
+	return err;
+}
+
+/**
+ * paranoid_check_ec_hdr - check if an erase counter header is all right.
+ * @ubi: UBI device description object
+ * @pnum: physical eraseblock number the erase counter header belongs to
+ * @ec_hdr: the erase counter header to check
+ *
+ * This function returns zero if the erase counter header contains valid
+ * values, and %1 if not.
+ */
+static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum,
+				 const struct ubi_ec_hdr *ec_hdr)
+{
+	int err;
+	uint32_t magic;
+
+	magic = ubi32_to_cpu(ec_hdr->magic);
+	if (magic != UBI_EC_HDR_MAGIC) {
+		ubi_err("bad magic %#08x, must be %#08x",
+			magic, UBI_EC_HDR_MAGIC);
+		goto fail;
+	}
+
+	err = validate_ec_hdr(ubi, ec_hdr);
+	if (err) {
+		ubi_err("paranoid check failed for PEB %d", pnum);
+		goto fail;
+	}
+
+	return 0;
+
+fail:
+	ubi_dbg_dump_ec_hdr(ec_hdr);
+	ubi_dbg_dump_stack();
+	return 1;
+}
+
+/**
+ * paranoid_check_peb_ec_hdr - check that the erase counter header of a
+ * physical eraseblock is in-place and is all right.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock number to check
+ *
+ * This function returns zero if the erase counter header is all right, %1 if
+ * not, and a negative error code if an error occurred.
+ */
+static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum)
+{
+	int err;
+	uint32_t crc, hdr_crc;
+	struct ubi_ec_hdr *ec_hdr;
+
+	ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL);
+	if (!ec_hdr)
+		return -ENOMEM;
+
+	err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE);
+	if (err && err != UBI_IO_BITFLIPS && err != -EBADMSG)
+		goto exit;
+
+	crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC);
+	hdr_crc = ubi32_to_cpu(ec_hdr->hdr_crc);
+	if (hdr_crc != crc) {
+		ubi_err("bad CRC, calculated %#08x, read %#08x", crc, hdr_crc);
+		ubi_err("paranoid check failed for PEB %d", pnum);
+		ubi_dbg_dump_ec_hdr(ec_hdr);
+		ubi_dbg_dump_stack();
+		err = 1;
+		goto exit;
+	}
+
+	err = paranoid_check_ec_hdr(ubi, pnum, ec_hdr);
+
+exit:
+	kfree(ec_hdr);
+	return err;
+}
+
+/**
+ * paranoid_check_vid_hdr - check that a volume identifier header is all right.
+ * @ubi: UBI device description object
+ * @pnum: physical eraseblock number the volume identifier header belongs to
+ * @vid_hdr: the volume identifier header to check
+ *
+ * This function returns zero if the volume identifier header is all right, and
+ * %1 if not.
+ */
+static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum,
+				  const struct ubi_vid_hdr *vid_hdr)
+{
+	int err;
+	uint32_t magic;
+
+	magic = ubi32_to_cpu(vid_hdr->magic);
+	if (magic != UBI_VID_HDR_MAGIC) {
+		ubi_err("bad VID header magic %#08x at PEB %d, must be %#08x",
+			magic, pnum, UBI_VID_HDR_MAGIC);
+		goto fail;
+	}
+
+	err = validate_vid_hdr(ubi, vid_hdr);
+	if (err) {
+		ubi_err("paranoid check failed for PEB %d", pnum);
+		goto fail;
+	}
+
+	return err;
+
+fail:
+	ubi_err("paranoid check failed for PEB %d", pnum);
+	ubi_dbg_dump_vid_hdr(vid_hdr);
+	ubi_dbg_dump_stack();
+	return 1;
+
+}
+
+/**
+ * paranoid_check_peb_vid_hdr - check that the volume identifier header of a
+ * physical eraseblock is in-place and is all right.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock number to check
+ *
+ * This function returns zero if the volume identifier header is all right,
+ * %1 if not, and a negative error code if an error occurred.
+ */
+static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum)
+{
+	int err;
+	uint32_t crc, hdr_crc;
+	struct ubi_vid_hdr *vid_hdr;
+	void *p;
+
+	vid_hdr = ubi_zalloc_vid_hdr(ubi);
+	if (!vid_hdr)
+		return -ENOMEM;
+
+	p = (char *)vid_hdr - ubi->vid_hdr_shift;
+	err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset,
+			  ubi->vid_hdr_alsize);
+	if (err && err != UBI_IO_BITFLIPS && err != -EBADMSG)
+		goto exit;
+
+	crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_EC_HDR_SIZE_CRC);
+	hdr_crc = ubi32_to_cpu(vid_hdr->hdr_crc);
+	if (hdr_crc != crc) {
+		ubi_err("bad VID header CRC at PEB %d, calculated %#08x, "
+			"read %#08x", pnum, crc, hdr_crc);
+		ubi_err("paranoid check failed for PEB %d", pnum);
+		ubi_dbg_dump_vid_hdr(vid_hdr);
+		ubi_dbg_dump_stack();
+		err = 1;
+		goto exit;
+	}
+
+	err = paranoid_check_vid_hdr(ubi, pnum, vid_hdr);
+
+exit:
+	ubi_free_vid_hdr(ubi, vid_hdr);
+	return err;
+}
+
+/**
+ * paranoid_check_all_ff - check that a region of flash is empty.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock number to check
+ * @offset: the starting offset within the physical eraseblock to check
+ * @len: the length of the region to check
+ *
+ * This function returns zero if only 0xFF bytes are present at offset
+ * @offset of the physical eraseblock @pnum, %1 if not, and a negative error
+ * code if an error occurred.
+ */
+static int paranoid_check_all_ff(const struct ubi_device *ubi, int pnum,
+				 int offset, int len)
+{
+	size_t read;
+	int err;
+	void *buf;
+	loff_t addr = (loff_t)pnum * ubi->peb_size + offset;
+
+	buf = kzalloc(len, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf);
+	if (err && err != -EUCLEAN) {
+		ubi_err("error %d while reading %d bytes from PEB %d:%d, "
+			"read %zd bytes", err, len, pnum, offset, read);
+		goto error;
+	}
+
+	err = check_pattern(buf, 0xFF, len);
+	if (err == 0) {
+		ubi_err("flash region at PEB %d:%d, length %d does not "
+			"contain all 0xFF bytes", pnum, offset, len);
+		goto fail;
+	}
+
+	kfree(buf);
+	return 0;
+
+fail:
+	ubi_err("paranoid check failed for PEB %d", pnum);
+	dbg_msg("hex dump of the %d-%d region", offset, offset + len);
+	ubi_dbg_hexdump(buf, len);
+	err = 1;
+error:
+	ubi_dbg_dump_stack();
+	kfree(buf);
+	return err;
+}
+
+#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
new file mode 100644
index 00000000000..d352c4575c3
--- /dev/null
+++ b/drivers/mtd/ubi/kapi.c
@@ -0,0 +1,575 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/* This file mostly implements UBI kernel API functions */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <asm/div64.h>
+#include "ubi.h"
+
+/**
+ * ubi_get_device_info - get information about UBI device.
+ * @ubi_num: UBI device number
+ * @di: the information is stored here
+ *
+ * This function returns %0 in case of success and a %-ENODEV if there is no
+ * such UBI device.
+ */
+int ubi_get_device_info(int ubi_num, struct ubi_device_info *di)
+{
+	const struct ubi_device *ubi;
+
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES ||
+	    !ubi_devices[ubi_num]) {
+		module_put(THIS_MODULE);
+		return -ENODEV;
+	}
+
+	ubi = ubi_devices[ubi_num];
+	di->ubi_num = ubi->ubi_num;
+	di->leb_size = ubi->leb_size;
+	di->min_io_size = ubi->min_io_size;
+	di->ro_mode = ubi->ro_mode;
+	di->cdev = MKDEV(ubi->major, 0);
+	module_put(THIS_MODULE);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ubi_get_device_info);
+
+/**
+ * ubi_get_volume_info - get information about UBI volume.
+ * @desc: volume descriptor
+ * @vi: the information is stored here
+ */
+void ubi_get_volume_info(struct ubi_volume_desc *desc,
+			 struct ubi_volume_info *vi)
+{
+	const struct ubi_volume *vol = desc->vol;
+	const struct ubi_device *ubi = vol->ubi;
+
+	vi->vol_id = vol->vol_id;
+	vi->ubi_num = ubi->ubi_num;
+	vi->size = vol->reserved_pebs;
+	vi->used_bytes = vol->used_bytes;
+	vi->vol_type = vol->vol_type;
+	vi->corrupted = vol->corrupted;
+	vi->upd_marker = vol->upd_marker;
+	vi->alignment = vol->alignment;
+	vi->usable_leb_size = vol->usable_leb_size;
+	vi->name_len = vol->name_len;
+	vi->name = vol->name;
+	vi->cdev = MKDEV(ubi->major, vi->vol_id + 1);
+}
+EXPORT_SYMBOL_GPL(ubi_get_volume_info);
+
+/**
+ * ubi_open_volume - open UBI volume.
+ * @ubi_num: UBI device number
+ * @vol_id: volume ID
+ * @mode: open mode
+ *
+ * The @mode parameter specifies if the volume should be opened in read-only
+ * mode, read-write mode, or exclusive mode. The exclusive mode guarantees that
+ * nobody else will be able to open this volume. UBI allows to have many volume
+ * readers and one writer at a time.
+ *
+ * If a static volume is being opened for the first time since boot, it will be
+ * checked by this function, which means it will be fully read and the CRC
+ * checksum of each logical eraseblock will be checked.
+ *
+ * This function returns volume descriptor in case of success and a negative
+ * error code in case of failure.
+ */
+struct ubi_volume_desc *ubi_open_volume(int ubi_num, int vol_id, int mode)
+{
+	int err;
+	struct ubi_volume_desc *desc;
+	struct ubi_device *ubi = ubi_devices[ubi_num];
+	struct ubi_volume *vol;
+
+	dbg_msg("open device %d volume %d, mode %d", ubi_num, vol_id, mode);
+
+	err = -ENODEV;
+	if (!try_module_get(THIS_MODULE))
+		return ERR_PTR(err);
+
+	if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES || !ubi)
+		goto out_put;
+
+	err = -EINVAL;
+	if (vol_id < 0 || vol_id >= ubi->vtbl_slots)
+		goto out_put;
+	if (mode != UBI_READONLY && mode != UBI_READWRITE &&
+	    mode != UBI_EXCLUSIVE)
+		goto out_put;
+
+	desc = kmalloc(sizeof(struct ubi_volume_desc), GFP_KERNEL);
+	if (!desc) {
+		err = -ENOMEM;
+		goto out_put;
+	}
+
+	spin_lock(&ubi->volumes_lock);
+	vol = ubi->volumes[vol_id];
+	if (!vol) {
+		err = -ENODEV;
+		goto out_unlock;
+	}
+
+	err = -EBUSY;
+	switch (mode) {
+	case UBI_READONLY:
+		if (vol->exclusive)
+			goto out_unlock;
+		vol->readers += 1;
+		break;
+
+	case UBI_READWRITE:
+		if (vol->exclusive || vol->writers > 0)
+			goto out_unlock;
+		vol->writers += 1;
+		break;
+
+	case UBI_EXCLUSIVE:
+		if (vol->exclusive || vol->writers || vol->readers)
+			goto out_unlock;
+		vol->exclusive = 1;
+		break;
+	}
+	spin_unlock(&ubi->volumes_lock);
+
+	desc->vol = vol;
+	desc->mode = mode;
+
+	/*
+	 * To prevent simultaneous checks of the same volume we use @vtbl_mutex,
+	 * although it is not the purpose it was introduced for.
+	 */
+	mutex_lock(&ubi->vtbl_mutex);
+	if (!vol->checked) {
+		/* This is the first open - check the volume */
+		err = ubi_check_volume(ubi, vol_id);
+		if (err < 0) {
+			mutex_unlock(&ubi->vtbl_mutex);
+			ubi_close_volume(desc);
+			return ERR_PTR(err);
+		}
+		if (err == 1) {
+			ubi_warn("volume %d on UBI device %d is corrupted",
+				 vol_id, ubi->ubi_num);
+			vol->corrupted = 1;
+		}
+		vol->checked = 1;
+	}
+	mutex_unlock(&ubi->vtbl_mutex);
+	return desc;
+
+out_unlock:
+	spin_unlock(&ubi->volumes_lock);
+	kfree(desc);
+out_put:
+	module_put(THIS_MODULE);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(ubi_open_volume);
+
+/**
+ * ubi_open_volume_nm - open UBI volume by name.
+ * @ubi_num: UBI device number
+ * @name: volume name
+ * @mode: open mode
+ *
+ * This function is similar to 'ubi_open_volume()', but opens a volume by name.
+ */
+struct ubi_volume_desc *ubi_open_volume_nm(int ubi_num, const char *name,
+					   int mode)
+{
+	int i, vol_id = -1, len;
+	struct ubi_volume_desc *ret;
+	struct ubi_device *ubi;
+
+	dbg_msg("open volume %s, mode %d", name, mode);
+
+	if (!name)
+		return ERR_PTR(-EINVAL);
+
+	len = strnlen(name, UBI_VOL_NAME_MAX + 1);
+	if (len > UBI_VOL_NAME_MAX)
+		return ERR_PTR(-EINVAL);
+
+	ret = ERR_PTR(-ENODEV);
+	if (!try_module_get(THIS_MODULE))
+		return ret;
+
+	if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES || !ubi_devices[ubi_num])
+		goto out_put;
+
+	ubi = ubi_devices[ubi_num];
+
+	spin_lock(&ubi->volumes_lock);
+	/* Walk all volumes of this UBI device */
+	for (i = 0; i < ubi->vtbl_slots; i++) {
+		struct ubi_volume *vol = ubi->volumes[i];
+
+		if (vol && len == vol->name_len && !strcmp(name, vol->name)) {
+			vol_id = i;
+			break;
+		}
+	}
+	spin_unlock(&ubi->volumes_lock);
+
+	if (vol_id < 0)
+		goto out_put;
+
+	ret = ubi_open_volume(ubi_num, vol_id, mode);
+
+out_put:
+	module_put(THIS_MODULE);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ubi_open_volume_nm);
+
+/**
+ * ubi_close_volume - close UBI volume.
+ * @desc: volume descriptor
+ */
+void ubi_close_volume(struct ubi_volume_desc *desc)
+{
+	struct ubi_volume *vol = desc->vol;
+
+	dbg_msg("close volume %d, mode %d", vol->vol_id, desc->mode);
+
+	spin_lock(&vol->ubi->volumes_lock);
+	switch (desc->mode) {
+	case UBI_READONLY:
+		vol->readers -= 1;
+		break;
+	case UBI_READWRITE:
+		vol->writers -= 1;
+		break;
+	case UBI_EXCLUSIVE:
+		vol->exclusive = 0;
+	}
+	spin_unlock(&vol->ubi->volumes_lock);
+
+	kfree(desc);
+	module_put(THIS_MODULE);
+}
+EXPORT_SYMBOL_GPL(ubi_close_volume);
+
+/**
+ * ubi_leb_read - read data.
+ * @desc: volume descriptor
+ * @lnum: logical eraseblock number to read from
+ * @buf: buffer where to store the read data
+ * @offset: offset within the logical eraseblock to read from
+ * @len: how many bytes to read
+ * @check: whether UBI has to check the read data's CRC or not.
+ *
+ * This function reads data from offset @offset of logical eraseblock @lnum and
+ * stores the data at @buf. When reading from static volumes, @check specifies
+ * whether the data has to be checked or not. If yes, the whole logical
+ * eraseblock will be read and its CRC checksum will be checked (i.e., the CRC
+ * checksum is per-eraseblock). So checking may substantially slow down the
+ * read speed. The @check argument is ignored for dynamic volumes.
+ *
+ * In case of success, this function returns zero. In case of failure, this
+ * function returns a negative error code.
+ *
+ * %-EBADMSG error code is returned:
+ * o for both static and dynamic volumes if MTD driver has detected a data
+ *   integrity problem (unrecoverable ECC checksum mismatch in case of NAND);
+ * o for static volumes in case of data CRC mismatch.
+ *
+ * If the volume is damaged because of an interrupted update this function just
+ * returns immediately with %-EBADF error code.
+ */
+int ubi_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
+		 int len, int check)
+{
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+	int err, vol_id = vol->vol_id;
+
+	dbg_msg("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset);
+
+	if (vol_id < 0 || vol_id >= ubi->vtbl_slots || lnum < 0 ||
+	    lnum >= vol->used_ebs || offset < 0 || len < 0 ||
+	    offset + len > vol->usable_leb_size)
+		return -EINVAL;
+
+	if (vol->vol_type == UBI_STATIC_VOLUME && lnum == vol->used_ebs - 1 &&
+	    offset + len > vol->last_eb_bytes)
+		return -EINVAL;
+
+	if (vol->upd_marker)
+		return -EBADF;
+	if (len == 0)
+		return 0;
+
+	err = ubi_eba_read_leb(ubi, vol_id, lnum, buf, offset, len, check);
+	if (err && err == -EBADMSG && vol->vol_type == UBI_STATIC_VOLUME) {
+		ubi_warn("mark volume %d as corrupted", vol_id);
+		vol->corrupted = 1;
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(ubi_leb_read);
+
+/**
+ * ubi_leb_write - write data.
+ * @desc: volume descriptor
+ * @lnum: logical eraseblock number to write to
+ * @buf: data to write
+ * @offset: offset within the logical eraseblock where to write
+ * @len: how many bytes to write
+ * @dtype: expected data type
+ *
+ * This function writes @len bytes of data from @buf to offset @offset of
+ * logical eraseblock @lnum. The @dtype argument describes expected lifetime of
+ * the data.
+ *
+ * This function takes care of physical eraseblock write failures. If write to
+ * the physical eraseblock write operation fails, the logical eraseblock is
+ * re-mapped to another physical eraseblock, the data is recovered, and the
+ * write finishes. UBI has a pool of reserved physical eraseblocks for this.
+ *
+ * If all the data were successfully written, zero is returned. If an error
+ * occurred and UBI has not been able to recover from it, this function returns
+ * a negative error code. Note, in case of an error, it is possible that
+ * something was still written to the flash media, but that may be some
+ * garbage.
+ *
+ * If the volume is damaged because of an interrupted update this function just
+ * returns immediately with %-EBADF code.
+ */
+int ubi_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
+		  int offset, int len, int dtype)
+{
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+	int vol_id = vol->vol_id;
+
+	dbg_msg("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset);
+
+	if (vol_id < 0 || vol_id >= ubi->vtbl_slots)
+		return -EINVAL;
+
+	if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
+		return -EROFS;
+
+	if (lnum < 0 || lnum >= vol->reserved_pebs || offset < 0 || len < 0 ||
+	    offset + len > vol->usable_leb_size || offset % ubi->min_io_size ||
+	    len % ubi->min_io_size)
+		return -EINVAL;
+
+	if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM &&
+	    dtype != UBI_UNKNOWN)
+		return -EINVAL;
+
+	if (vol->upd_marker)
+		return -EBADF;
+
+	if (len == 0)
+		return 0;
+
+	return ubi_eba_write_leb(ubi, vol_id, lnum, buf, offset, len, dtype);
+}
+EXPORT_SYMBOL_GPL(ubi_leb_write);
+
+/*
+ * ubi_leb_change - change logical eraseblock atomically.
+ * @desc: volume descriptor
+ * @lnum: logical eraseblock number to change
+ * @buf: data to write
+ * @len: how many bytes to write
+ * @dtype: expected data type
+ *
+ * This function changes the contents of a logical eraseblock atomically. @buf
+ * has to contain new logical eraseblock data, and @len - the length of the
+ * data, which has to be aligned. The length may be shorter then the logical
+ * eraseblock size, ant the logical eraseblock may be appended to more times
+ * later on. This function guarantees that in case of an unclean reboot the old
+ * contents is preserved. Returns zero in case of success and a negative error
+ * code in case of failure.
+ */
+int ubi_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
+		   int len, int dtype)
+{
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+	int vol_id = vol->vol_id;
+
+	dbg_msg("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum);
+
+	if (vol_id < 0 || vol_id >= ubi->vtbl_slots)
+		return -EINVAL;
+
+	if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
+		return -EROFS;
+
+	if (lnum < 0 || lnum >= vol->reserved_pebs || len < 0 ||
+	    len > vol->usable_leb_size || len % ubi->min_io_size)
+		return -EINVAL;
+
+	if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM &&
+	    dtype != UBI_UNKNOWN)
+		return -EINVAL;
+
+	if (vol->upd_marker)
+		return -EBADF;
+
+	if (len == 0)
+		return 0;
+
+	return ubi_eba_atomic_leb_change(ubi, vol_id, lnum, buf, len, dtype);
+}
+EXPORT_SYMBOL_GPL(ubi_leb_change);
+
+/**
+ * ubi_leb_erase - erase logical eraseblock.
+ * @desc: volume descriptor
+ * @lnum: logical eraseblock number
+ *
+ * This function un-maps logical eraseblock @lnum and synchronously erases the
+ * correspondent physical eraseblock. Returns zero in case of success and a
+ * negative error code in case of failure.
+ *
+ * If the volume is damaged because of an interrupted update this function just
+ * returns immediately with %-EBADF code.
+ */
+int ubi_leb_erase(struct ubi_volume_desc *desc, int lnum)
+{
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+	int err, vol_id = vol->vol_id;
+
+	dbg_msg("erase LEB %d:%d", vol_id, lnum);
+
+	if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
+		return -EROFS;
+
+	if (lnum < 0 || lnum >= vol->reserved_pebs)
+		return -EINVAL;
+
+	if (vol->upd_marker)
+		return -EBADF;
+
+	err = ubi_eba_unmap_leb(ubi, vol_id, lnum);
+	if (err)
+		return err;
+
+	return ubi_wl_flush(ubi);
+}
+EXPORT_SYMBOL_GPL(ubi_leb_erase);
+
+/**
+ * ubi_leb_unmap - un-map logical eraseblock.
+ * @desc: volume descriptor
+ * @lnum: logical eraseblock number
+ *
+ * This function un-maps logical eraseblock @lnum and schedules the
+ * corresponding physical eraseblock for erasure, so that it will eventually be
+ * physically erased in background. This operation is much faster then the
+ * erase operation.
+ *
+ * Unlike erase, the un-map operation does not guarantee that the logical
+ * eraseblock will contain all 0xFF bytes when UBI is initialized again. For
+ * example, if several logical eraseblocks are un-mapped, and an unclean reboot
+ * happens after this, the logical eraseblocks will not necessarily be
+ * un-mapped again when this MTD device is attached. They may actually be
+ * mapped to the same physical eraseblocks again. So, this function has to be
+ * used with care.
+ *
+ * In other words, when un-mapping a logical eraseblock, UBI does not store
+ * any information about this on the flash media, it just marks the logical
+ * eraseblock as "un-mapped" in RAM. If UBI is detached before the physical
+ * eraseblock is physically erased, it will be mapped again to the same logical
+ * eraseblock when the MTD device is attached again.
+ *
+ * The main and obvious use-case of this function is when the contents of a
+ * logical eraseblock has to be re-written. Then it is much more efficient to
+ * first un-map it, then write new data, rather then first erase it, then write
+ * new data. Note, once new data has been written to the logical eraseblock,
+ * UBI guarantees that the old contents has gone forever. In other words, if an
+ * unclean reboot happens after the logical eraseblock has been un-mapped and
+ * then written to, it will contain the last written data.
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure. If the volume is damaged because of an interrupted update
+ * this function just returns immediately with %-EBADF code.
+ */
+int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum)
+{
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+	int vol_id = vol->vol_id;
+
+	dbg_msg("unmap LEB %d:%d", vol_id, lnum);
+
+	if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
+		return -EROFS;
+
+	if (lnum < 0 || lnum >= vol->reserved_pebs)
+		return -EINVAL;
+
+	if (vol->upd_marker)
+		return -EBADF;
+
+	return ubi_eba_unmap_leb(ubi, vol_id, lnum);
+}
+EXPORT_SYMBOL_GPL(ubi_leb_unmap);
+
+/**
+ * ubi_is_mapped - check if logical eraseblock is mapped.
+ * @desc: volume descriptor
+ * @lnum: logical eraseblock number
+ *
+ * This function checks if logical eraseblock @lnum is mapped to a physical
+ * eraseblock. If a logical eraseblock is un-mapped, this does not necessarily
+ * mean it will still be un-mapped after the UBI device is re-attached. The
+ * logical eraseblock may become mapped to the physical eraseblock it was last
+ * mapped to.
+ *
+ * This function returns %1 if the LEB is mapped, %0 if not, and a negative
+ * error code in case of failure. If the volume is damaged because of an
+ * interrupted update this function just returns immediately with %-EBADF error
+ * code.
+ */
+int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum)
+{
+	struct ubi_volume *vol = desc->vol;
+
+	dbg_msg("test LEB %d:%d", vol->vol_id, lnum);
+
+	if (lnum < 0 || lnum >= vol->reserved_pebs)
+		return -EINVAL;
+
+	if (vol->upd_marker)
+		return -EBADF;
+
+	return vol->eba_tbl[lnum] >= 0;
+}
+EXPORT_SYMBOL_GPL(ubi_is_mapped);
diff --git a/drivers/mtd/ubi/misc.c b/drivers/mtd/ubi/misc.c
new file mode 100644
index 00000000000..38d4e6757dc
--- /dev/null
+++ b/drivers/mtd/ubi/misc.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/* Here we keep miscellaneous functions which are used all over the UBI code */
+
+#include "ubi.h"
+
+/**
+ * calc_data_len - calculate how much real data is stored in a buffer.
+ * @ubi: UBI device description object
+ * @buf: a buffer with the contents of the physical eraseblock
+ * @length: the buffer length
+ *
+ * This function calculates how much "real data" is stored in @buf and returnes
+ * the length. Continuous 0xFF bytes at the end of the buffer are not
+ * considered as "real data".
+ */
+int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf,
+		      int length)
+{
+	int i;
+
+	ubi_assert(length % ubi->min_io_size == 0);
+
+	for (i = length - 1; i >= 0; i--)
+		if (((const uint8_t *)buf)[i] != 0xFF)
+			break;
+
+	/* The resulting length must be aligned to the minimum flash I/O size */
+	length = ALIGN(i + 1, ubi->min_io_size);
+	return length;
+}
+
+/**
+ * ubi_check_volume - check the contents of a static volume.
+ * @ubi: UBI device description object
+ * @vol_id: ID of the volume to check
+ *
+ * This function checks if static volume @vol_id is corrupted by fully reading
+ * it and checking data CRC. This function returns %0 if the volume is not
+ * corrupted, %1 if it is corrupted and a negative error code in case of
+ * failure. Dynamic volumes are not checked and zero is returned immediately.
+ */
+int ubi_check_volume(struct ubi_device *ubi, int vol_id)
+{
+	void *buf;
+	int err = 0, i;
+	struct ubi_volume *vol = ubi->volumes[vol_id];
+
+	if (vol->vol_type != UBI_STATIC_VOLUME)
+		return 0;
+
+	buf = kmalloc(vol->usable_leb_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	for (i = 0; i < vol->used_ebs; i++) {
+		int size;
+
+		if (i == vol->used_ebs - 1)
+			size = vol->last_eb_bytes;
+		else
+			size = vol->usable_leb_size;
+
+		err = ubi_eba_read_leb(ubi, vol_id, i, buf, 0, size, 1);
+		if (err) {
+			if (err == -EBADMSG)
+				err = 1;
+			break;
+		}
+	}
+
+	kfree(buf);
+	return err;
+}
+
+/**
+ * ubi_calculate_rsvd_pool - calculate how many PEBs must be reserved for bad
+ * eraseblock handling.
+ * @ubi: UBI device description object
+ */
+void ubi_calculate_reserved(struct ubi_device *ubi)
+{
+	ubi->beb_rsvd_level = ubi->good_peb_count/100;
+	ubi->beb_rsvd_level *= CONFIG_MTD_UBI_BEB_RESERVE;
+	if (ubi->beb_rsvd_level < MIN_RESEVED_PEBS)
+		ubi->beb_rsvd_level = MIN_RESEVED_PEBS;
+}
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
new file mode 100644
index 00000000000..473f3200b86
--- /dev/null
+++ b/drivers/mtd/ubi/scan.c
@@ -0,0 +1,1368 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * UBI scanning unit.
+ *
+ * This unit is responsible for scanning the flash media, checking UBI
+ * headers and providing complete information about the UBI flash image.
+ *
+ * The scanning information is reoresented by a &struct ubi_scan_info' object.
+ * Information about found volumes is represented by &struct ubi_scan_volume
+ * objects which are kept in volume RB-tree with root at the @volumes field.
+ * The RB-tree is indexed by the volume ID.
+ *
+ * Found logical eraseblocks are represented by &struct ubi_scan_leb objects.
+ * These objects are kept in per-volume RB-trees with the root at the
+ * corresponding &struct ubi_scan_volume object. To put it differently, we keep
+ * an RB-tree of per-volume objects and each of these objects is the root of
+ * RB-tree of per-eraseblock objects.
+ *
+ * Corrupted physical eraseblocks are put to the @corr list, free physical
+ * eraseblocks are put to the @free list and the physical eraseblock to be
+ * erased are put to the @erase list.
+ */
+
+#include <linux/err.h>
+#include <linux/crc32.h>
+#include "ubi.h"
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
+static int paranoid_check_si(const struct ubi_device *ubi,
+			     struct ubi_scan_info *si);
+#else
+#define paranoid_check_si(ubi, si) 0
+#endif
+
+/* Temporary variables used during scanning */
+static struct ubi_ec_hdr *ech;
+static struct ubi_vid_hdr *vidh;
+
+int ubi_scan_add_to_list(struct ubi_scan_info *si, int pnum, int ec,
+			 struct list_head *list)
+{
+	struct ubi_scan_leb *seb;
+
+	if (list == &si->free)
+		dbg_bld("add to free: PEB %d, EC %d", pnum, ec);
+	else if (list == &si->erase)
+		dbg_bld("add to erase: PEB %d, EC %d", pnum, ec);
+	else if (list == &si->corr)
+		dbg_bld("add to corrupted: PEB %d, EC %d", pnum, ec);
+	else if (list == &si->alien)
+		dbg_bld("add to alien: PEB %d, EC %d", pnum, ec);
+	else
+		BUG();
+
+	seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL);
+	if (!seb)
+		return -ENOMEM;
+
+	seb->pnum = pnum;
+	seb->ec = ec;
+	list_add_tail(&seb->u.list, list);
+	return 0;
+}
+
+/**
+ * commit_to_mean_value - commit intermediate results to the final mean erase
+ * counter value.
+ * @si: scanning information
+ *
+ * This is a helper function which calculates partial mean erase counter mean
+ * value and adds it to the resulting mean value. As we can work only in
+ * integer arithmetic and we want to calculate the mean value of erase counter
+ * accurately, we first sum erase counter values in @si->ec_sum variable and
+ * count these components in @si->ec_count. If this temporary @si->ec_sum is
+ * going to overflow, we calculate the partial mean value
+ * (@si->ec_sum/@si->ec_count) and add it to @si->mean_ec.
+ */
+static void commit_to_mean_value(struct ubi_scan_info *si)
+{
+	si->ec_sum /= si->ec_count;
+	if (si->ec_sum % si->ec_count >= si->ec_count / 2)
+		si->mean_ec += 1;
+	si->mean_ec += si->ec_sum;
+}
+
+/**
+ * validate_vid_hdr - check that volume identifier header is correct and
+ * consistent.
+ * @vid_hdr: the volume identifier header to check
+ * @sv: information about the volume this logical eraseblock belongs to
+ * @pnum: physical eraseblock number the VID header came from
+ *
+ * This function checks that data stored in @vid_hdr is consistent. Returns
+ * non-zero if an inconsistency was found and zero if not.
+ *
+ * Note, UBI does sanity check of everything it reads from the flash media.
+ * Most of the checks are done in the I/O unit. Here we check that the
+ * information in the VID header is consistent to the information in other VID
+ * headers of the same volume.
+ */
+static int validate_vid_hdr(const struct ubi_vid_hdr *vid_hdr,
+			    const struct ubi_scan_volume *sv, int pnum)
+{
+	int vol_type = vid_hdr->vol_type;
+	int vol_id = ubi32_to_cpu(vid_hdr->vol_id);
+	int used_ebs = ubi32_to_cpu(vid_hdr->used_ebs);
+	int data_pad = ubi32_to_cpu(vid_hdr->data_pad);
+
+	if (sv->leb_count != 0) {
+		int sv_vol_type;
+
+		/*
+		 * This is not the first logical eraseblock belonging to this
+		 * volume. Ensure that the data in its VID header is consistent
+		 * to the data in previous logical eraseblock headers.
+		 */
+
+		if (vol_id != sv->vol_id) {
+			dbg_err("inconsistent vol_id");
+			goto bad;
+		}
+
+		if (sv->vol_type == UBI_STATIC_VOLUME)
+			sv_vol_type = UBI_VID_STATIC;
+		else
+			sv_vol_type = UBI_VID_DYNAMIC;
+
+		if (vol_type != sv_vol_type) {
+			dbg_err("inconsistent vol_type");
+			goto bad;
+		}
+
+		if (used_ebs != sv->used_ebs) {
+			dbg_err("inconsistent used_ebs");
+			goto bad;
+		}
+
+		if (data_pad != sv->data_pad) {
+			dbg_err("inconsistent data_pad");
+			goto bad;
+		}
+	}
+
+	return 0;
+
+bad:
+	ubi_err("inconsistent VID header at PEB %d", pnum);
+	ubi_dbg_dump_vid_hdr(vid_hdr);
+	ubi_dbg_dump_sv(sv);
+	return -EINVAL;
+}
+
+/**
+ * add_volume - add volume to the scanning information.
+ * @si: scanning information
+ * @vol_id: ID of the volume to add
+ * @pnum: physical eraseblock number
+ * @vid_hdr: volume identifier header
+ *
+ * If the volume corresponding to the @vid_hdr logical eraseblock is already
+ * present in the scanning information, this function does nothing. Otherwise
+ * it adds corresponding volume to the scanning information. Returns a pointer
+ * to the scanning volume object in case of success and a negative error code
+ * in case of failure.
+ */
+static struct ubi_scan_volume *add_volume(struct ubi_scan_info *si, int vol_id,
+					  int pnum,
+					  const struct ubi_vid_hdr *vid_hdr)
+{
+	struct ubi_scan_volume *sv;
+	struct rb_node **p = &si->volumes.rb_node, *parent = NULL;
+
+	ubi_assert(vol_id == ubi32_to_cpu(vid_hdr->vol_id));
+
+	/* Walk the volume RB-tree to look if this volume is already present */
+	while (*p) {
+		parent = *p;
+		sv = rb_entry(parent, struct ubi_scan_volume, rb);
+
+		if (vol_id == sv->vol_id)
+			return sv;
+
+		if (vol_id > sv->vol_id)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	/* The volume is absent - add it */
+	sv = kmalloc(sizeof(struct ubi_scan_volume), GFP_KERNEL);
+	if (!sv)
+		return ERR_PTR(-ENOMEM);
+
+	sv->highest_lnum = sv->leb_count = 0;
+	si->max_sqnum = 0;
+	sv->vol_id = vol_id;
+	sv->root = RB_ROOT;
+	sv->used_ebs = ubi32_to_cpu(vid_hdr->used_ebs);
+	sv->data_pad = ubi32_to_cpu(vid_hdr->data_pad);
+	sv->compat = vid_hdr->compat;
+	sv->vol_type = vid_hdr->vol_type == UBI_VID_DYNAMIC ? UBI_DYNAMIC_VOLUME
+							    : UBI_STATIC_VOLUME;
+	if (vol_id > si->highest_vol_id)
+		si->highest_vol_id = vol_id;
+
+	rb_link_node(&sv->rb, parent, p);
+	rb_insert_color(&sv->rb, &si->volumes);
+	si->vols_found += 1;
+	dbg_bld("added volume %d", vol_id);
+	return sv;
+}
+
+/**
+ * compare_lebs - find out which logical eraseblock is newer.
+ * @ubi: UBI device description object
+ * @seb: first logical eraseblock to compare
+ * @pnum: physical eraseblock number of the second logical eraseblock to
+ * compare
+ * @vid_hdr: volume identifier header of the second logical eraseblock
+ *
+ * This function compares 2 copies of a LEB and informs which one is newer. In
+ * case of success this function returns a positive value, in case of failure, a
+ * negative error code is returned. The success return codes use the following
+ * bits:
+ *     o bit 0 is cleared: the first PEB (described by @seb) is newer then the
+ *       second PEB (described by @pnum and @vid_hdr);
+ *     o bit 0 is set: the second PEB is newer;
+ *     o bit 1 is cleared: no bit-flips were detected in the newer LEB;
+ *     o bit 1 is set: bit-flips were detected in the newer LEB;
+ *     o bit 2 is cleared: the older LEB is not corrupted;
+ *     o bit 2 is set: the older LEB is corrupted.
+ */
+static int compare_lebs(const struct ubi_device *ubi,
+			const struct ubi_scan_leb *seb, int pnum,
+			const struct ubi_vid_hdr *vid_hdr)
+{
+	void *buf;
+	int len, err, second_is_newer, bitflips = 0, corrupted = 0;
+	uint32_t data_crc, crc;
+	struct ubi_vid_hdr *vidh = NULL;
+	unsigned long long sqnum2 = ubi64_to_cpu(vid_hdr->sqnum);
+
+	if (seb->sqnum == 0 && sqnum2 == 0) {
+		long long abs, v1 = seb->leb_ver, v2 = ubi32_to_cpu(vid_hdr->leb_ver);
+
+		/*
+		 * UBI constantly increases the logical eraseblock version
+		 * number and it can overflow. Thus, we have to bear in mind
+		 * that versions that are close to %0xFFFFFFFF are less then
+		 * versions that are close to %0.
+		 *
+		 * The UBI WL unit guarantees that the number of pending tasks
+		 * is not greater then %0x7FFFFFFF. So, if the difference
+		 * between any two versions is greater or equivalent to
+		 * %0x7FFFFFFF, there was an overflow and the logical
+		 * eraseblock with lower version is actually newer then the one
+		 * with higher version.
+		 *
+		 * FIXME: but this is anyway obsolete and will be removed at
+		 * some point.
+		 */
+
+		dbg_bld("using old crappy leb_ver stuff");
+
+		abs = v1 - v2;
+		if (abs < 0)
+			abs = -abs;
+
+		if (abs < 0x7FFFFFFF)
+			/* Non-overflow situation */
+			second_is_newer = (v2 > v1);
+		else
+			second_is_newer = (v2 < v1);
+	} else
+		/* Obviously the LEB with lower sequence counter is older */
+		second_is_newer = sqnum2 > seb->sqnum;
+
+	/*
+	 * Now we know which copy is newer. If the copy flag of the PEB with
+	 * newer version is not set, then we just return, otherwise we have to
+	 * check data CRC. For the second PEB we already have the VID header,
+	 * for the first one - we'll need to re-read it from flash.
+	 *
+	 * FIXME: this may be optimized so that we wouldn't read twice.
+	 */
+
+	if (second_is_newer) {
+		if (!vid_hdr->copy_flag) {
+			/* It is not a copy, so it is newer */
+			dbg_bld("second PEB %d is newer, copy_flag is unset",
+				pnum);
+			return 1;
+		}
+	} else {
+		pnum = seb->pnum;
+
+		vidh = ubi_zalloc_vid_hdr(ubi);
+		if (!vidh)
+			return -ENOMEM;
+
+		err = ubi_io_read_vid_hdr(ubi, pnum, vidh, 0);
+		if (err) {
+			if (err == UBI_IO_BITFLIPS)
+				bitflips = 1;
+			else {
+				dbg_err("VID of PEB %d header is bad, but it "
+					"was OK earlier", pnum);
+				if (err > 0)
+					err = -EIO;
+
+				goto out_free_vidh;
+			}
+		}
+
+		if (!vidh->copy_flag) {
+			/* It is not a copy, so it is newer */
+			dbg_bld("first PEB %d is newer, copy_flag is unset",
+				pnum);
+			err = bitflips << 1;
+			goto out_free_vidh;
+		}
+
+		vid_hdr = vidh;
+	}
+
+	/* Read the data of the copy and check the CRC */
+
+	len = ubi32_to_cpu(vid_hdr->data_size);
+	buf = kmalloc(len, GFP_KERNEL);
+	if (!buf) {
+		err = -ENOMEM;
+		goto out_free_vidh;
+	}
+
+	err = ubi_io_read_data(ubi, buf, pnum, 0, len);
+	if (err && err != UBI_IO_BITFLIPS)
+		goto out_free_buf;
+
+	data_crc = ubi32_to_cpu(vid_hdr->data_crc);
+	crc = crc32(UBI_CRC32_INIT, buf, len);
+	if (crc != data_crc) {
+		dbg_bld("PEB %d CRC error: calculated %#08x, must be %#08x",
+			pnum, crc, data_crc);
+		corrupted = 1;
+		bitflips = 0;
+		second_is_newer = !second_is_newer;
+	} else {
+		dbg_bld("PEB %d CRC is OK", pnum);
+		bitflips = !!err;
+	}
+
+	kfree(buf);
+	ubi_free_vid_hdr(ubi, vidh);
+
+	if (second_is_newer)
+		dbg_bld("second PEB %d is newer, copy_flag is set", pnum);
+	else
+		dbg_bld("first PEB %d is newer, copy_flag is set", pnum);
+
+	return second_is_newer | (bitflips << 1) | (corrupted << 2);
+
+out_free_buf:
+	kfree(buf);
+out_free_vidh:
+	ubi_free_vid_hdr(ubi, vidh);
+	ubi_assert(err < 0);
+	return err;
+}
+
+/**
+ * ubi_scan_add_used - add information about a physical eraseblock to the
+ * scanning information.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ * @pnum: the physical eraseblock number
+ * @ec: erase counter
+ * @vid_hdr: the volume identifier header
+ * @bitflips: if bit-flips were detected when this physical eraseblock was read
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si,
+		      int pnum, int ec, const struct ubi_vid_hdr *vid_hdr,
+		      int bitflips)
+{
+	int err, vol_id, lnum;
+	uint32_t leb_ver;
+	unsigned long long sqnum;
+	struct ubi_scan_volume *sv;
+	struct ubi_scan_leb *seb;
+	struct rb_node **p, *parent = NULL;
+
+	vol_id = ubi32_to_cpu(vid_hdr->vol_id);
+	lnum = ubi32_to_cpu(vid_hdr->lnum);
+	sqnum = ubi64_to_cpu(vid_hdr->sqnum);
+	leb_ver = ubi32_to_cpu(vid_hdr->leb_ver);
+
+	dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, ver %u, bitflips %d",
+		pnum, vol_id, lnum, ec, sqnum, leb_ver, bitflips);
+
+	sv = add_volume(si, vol_id, pnum, vid_hdr);
+	if (IS_ERR(sv) < 0)
+		return PTR_ERR(sv);
+
+	/*
+	 * Walk the RB-tree of logical eraseblocks of volume @vol_id to look
+	 * if this is the first instance of this logical eraseblock or not.
+	 */
+	p = &sv->root.rb_node;
+	while (*p) {
+		int cmp_res;
+
+		parent = *p;
+		seb = rb_entry(parent, struct ubi_scan_leb, u.rb);
+		if (lnum != seb->lnum) {
+			if (lnum < seb->lnum)
+				p = &(*p)->rb_left;
+			else
+				p = &(*p)->rb_right;
+			continue;
+		}
+
+		/*
+		 * There is already a physical eraseblock describing the same
+		 * logical eraseblock present.
+		 */
+
+		dbg_bld("this LEB already exists: PEB %d, sqnum %llu, "
+			"LEB ver %u, EC %d", seb->pnum, seb->sqnum,
+			seb->leb_ver, seb->ec);
+
+		/*
+		 * Make sure that the logical eraseblocks have different
+		 * versions. Otherwise the image is bad.
+		 */
+		if (seb->leb_ver == leb_ver && leb_ver != 0) {
+			ubi_err("two LEBs with same version %u", leb_ver);
+			ubi_dbg_dump_seb(seb, 0);
+			ubi_dbg_dump_vid_hdr(vid_hdr);
+			return -EINVAL;
+		}
+
+		/*
+		 * Make sure that the logical eraseblocks have different
+		 * sequence numbers. Otherwise the image is bad.
+		 *
+		 * FIXME: remove 'sqnum != 0' check when leb_ver is removed.
+		 */
+		if (seb->sqnum == sqnum && sqnum != 0) {
+			ubi_err("two LEBs with same sequence number %llu",
+				sqnum);
+			ubi_dbg_dump_seb(seb, 0);
+			ubi_dbg_dump_vid_hdr(vid_hdr);
+			return -EINVAL;
+		}
+
+		/*
+		 * Now we have to drop the older one and preserve the newer
+		 * one.
+		 */
+		cmp_res = compare_lebs(ubi, seb, pnum, vid_hdr);
+		if (cmp_res < 0)
+			return cmp_res;
+
+		if (cmp_res & 1) {
+			/*
+			 * This logical eraseblock is newer then the one
+			 * found earlier.
+			 */
+			err = validate_vid_hdr(vid_hdr, sv, pnum);
+			if (err)
+				return err;
+
+			if (cmp_res & 4)
+				err = ubi_scan_add_to_list(si, seb->pnum,
+							   seb->ec, &si->corr);
+			else
+				err = ubi_scan_add_to_list(si, seb->pnum,
+							   seb->ec, &si->erase);
+			if (err)
+				return err;
+
+			seb->ec = ec;
+			seb->pnum = pnum;
+			seb->scrub = ((cmp_res & 2) || bitflips);
+			seb->sqnum = sqnum;
+			seb->leb_ver = leb_ver;
+
+			if (sv->highest_lnum == lnum)
+				sv->last_data_size =
+					ubi32_to_cpu(vid_hdr->data_size);
+
+			return 0;
+		} else {
+			/*
+			 * This logical eraseblock is older then the one found
+			 * previously.
+			 */
+			if (cmp_res & 4)
+				return ubi_scan_add_to_list(si, pnum, ec,
+							    &si->corr);
+			else
+				return ubi_scan_add_to_list(si, pnum, ec,
+							    &si->erase);
+		}
+	}
+
+	/*
+	 * We've met this logical eraseblock for the first time, add it to the
+	 * scanning information.
+	 */
+
+	err = validate_vid_hdr(vid_hdr, sv, pnum);
+	if (err)
+		return err;
+
+	seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL);
+	if (!seb)
+		return -ENOMEM;
+
+	seb->ec = ec;
+	seb->pnum = pnum;
+	seb->lnum = lnum;
+	seb->sqnum = sqnum;
+	seb->scrub = bitflips;
+	seb->leb_ver = leb_ver;
+
+	if (sv->highest_lnum <= lnum) {
+		sv->highest_lnum = lnum;
+		sv->last_data_size = ubi32_to_cpu(vid_hdr->data_size);
+	}
+
+	if (si->max_sqnum < sqnum)
+		si->max_sqnum = sqnum;
+
+	sv->leb_count += 1;
+	rb_link_node(&seb->u.rb, parent, p);
+	rb_insert_color(&seb->u.rb, &sv->root);
+	return 0;
+}
+
+/**
+ * ubi_scan_find_sv - find information about a particular volume in the
+ * scanning information.
+ * @si: scanning information
+ * @vol_id: the requested volume ID
+ *
+ * This function returns a pointer to the volume description or %NULL if there
+ * are no data about this volume in the scanning information.
+ */
+struct ubi_scan_volume *ubi_scan_find_sv(const struct ubi_scan_info *si,
+					 int vol_id)
+{
+	struct ubi_scan_volume *sv;
+	struct rb_node *p = si->volumes.rb_node;
+
+	while (p) {
+		sv = rb_entry(p, struct ubi_scan_volume, rb);
+
+		if (vol_id == sv->vol_id)
+			return sv;
+
+		if (vol_id > sv->vol_id)
+			p = p->rb_left;
+		else
+			p = p->rb_right;
+	}
+
+	return NULL;
+}
+
+/**
+ * ubi_scan_find_seb - find information about a particular logical
+ * eraseblock in the volume scanning information.
+ * @sv: a pointer to the volume scanning information
+ * @lnum: the requested logical eraseblock
+ *
+ * This function returns a pointer to the scanning logical eraseblock or %NULL
+ * if there are no data about it in the scanning volume information.
+ */
+struct ubi_scan_leb *ubi_scan_find_seb(const struct ubi_scan_volume *sv,
+				       int lnum)
+{
+	struct ubi_scan_leb *seb;
+	struct rb_node *p = sv->root.rb_node;
+
+	while (p) {
+		seb = rb_entry(p, struct ubi_scan_leb, u.rb);
+
+		if (lnum == seb->lnum)
+			return seb;
+
+		if (lnum > seb->lnum)
+			p = p->rb_left;
+		else
+			p = p->rb_right;
+	}
+
+	return NULL;
+}
+
+/**
+ * ubi_scan_rm_volume - delete scanning information about a volume.
+ * @si: scanning information
+ * @sv: the volume scanning information to delete
+ */
+void ubi_scan_rm_volume(struct ubi_scan_info *si, struct ubi_scan_volume *sv)
+{
+	struct rb_node *rb;
+	struct ubi_scan_leb *seb;
+
+	dbg_bld("remove scanning information about volume %d", sv->vol_id);
+
+	while ((rb = rb_first(&sv->root))) {
+		seb = rb_entry(rb, struct ubi_scan_leb, u.rb);
+		rb_erase(&seb->u.rb, &sv->root);
+		list_add_tail(&seb->u.list, &si->erase);
+	}
+
+	rb_erase(&sv->rb, &si->volumes);
+	kfree(sv);
+	si->vols_found -= 1;
+}
+
+/**
+ * ubi_scan_erase_peb - erase a physical eraseblock.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ * @pnum: physical eraseblock number to erase;
+ * @ec: erase counter value to write (%UBI_SCAN_UNKNOWN_EC if it is unknown)
+ *
+ * This function erases physical eraseblock 'pnum', and writes the erase
+ * counter header to it. This function should only be used on UBI device
+ * initialization stages, when the EBA unit had not been yet initialized. This
+ * function returns zero in case of success and a negative error code in case
+ * of failure.
+ */
+int ubi_scan_erase_peb(const struct ubi_device *ubi,
+		       const struct ubi_scan_info *si, int pnum, int ec)
+{
+	int err;
+	struct ubi_ec_hdr *ec_hdr;
+
+	ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL);
+	if (!ec_hdr)
+		return -ENOMEM;
+
+	if ((long long)ec >= UBI_MAX_ERASECOUNTER) {
+		/*
+		 * Erase counter overflow. Upgrade UBI and use 64-bit
+		 * erase counters internally.
+		 */
+		ubi_err("erase counter overflow at PEB %d, EC %d", pnum, ec);
+		return -EINVAL;
+	}
+
+	ec_hdr->ec = cpu_to_ubi64(ec);
+
+	err = ubi_io_sync_erase(ubi, pnum, 0);
+	if (err < 0)
+		goto out_free;
+
+	err = ubi_io_write_ec_hdr(ubi, pnum, ec_hdr);
+
+out_free:
+	kfree(ec_hdr);
+	return err;
+}
+
+/**
+ * ubi_scan_get_free_peb - get a free physical eraseblock.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ *
+ * This function returns a free physical eraseblock. It is supposed to be
+ * called on the UBI initialization stages when the wear-leveling unit is not
+ * initialized yet. This function picks a physical eraseblocks from one of the
+ * lists, writes the EC header if it is needed, and removes it from the list.
+ *
+ * This function returns scanning physical eraseblock information in case of
+ * success and an error code in case of failure.
+ */
+struct ubi_scan_leb *ubi_scan_get_free_peb(const struct ubi_device *ubi,
+					   struct ubi_scan_info *si)
+{
+	int err = 0, i;
+	struct ubi_scan_leb *seb;
+
+	if (!list_empty(&si->free)) {
+		seb = list_entry(si->free.next, struct ubi_scan_leb, u.list);
+		list_del(&seb->u.list);
+		dbg_bld("return free PEB %d, EC %d", seb->pnum, seb->ec);
+		return seb;
+	}
+
+	for (i = 0; i < 2; i++) {
+		struct list_head *head;
+		struct ubi_scan_leb *tmp_seb;
+
+		if (i == 0)
+			head = &si->erase;
+		else
+			head = &si->corr;
+
+		/*
+		 * We try to erase the first physical eraseblock from the @head
+		 * list and pick it if we succeed, or try to erase the
+		 * next one if not. And so forth. We don't want to take care
+		 * about bad eraseblocks here - they'll be handled later.
+		 */
+		list_for_each_entry_safe(seb, tmp_seb, head, u.list) {
+			if (seb->ec == UBI_SCAN_UNKNOWN_EC)
+				seb->ec = si->mean_ec;
+
+			err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1);
+			if (err)
+				continue;
+
+			seb->ec += 1;
+			list_del(&seb->u.list);
+			dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec);
+			return seb;
+		}
+	}
+
+	ubi_err("no eraseblocks found");
+	return ERR_PTR(-ENOSPC);
+}
+
+/**
+ * process_eb - read UBI headers, check them and add corresponding data
+ * to the scanning information.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ * @pnum: the physical eraseblock number
+ *
+ * This function returns a zero if the physical eraseblock was succesfully
+ * handled and a negative error code in case of failure.
+ */
+static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum)
+{
+	long long ec;
+	int err, bitflips = 0, vol_id, ec_corr = 0;
+
+	dbg_bld("scan PEB %d", pnum);
+
+	/* Skip bad physical eraseblocks */
+	err = ubi_io_is_bad(ubi, pnum);
+	if (err < 0)
+		return err;
+	else if (err) {
+		/*
+		 * FIXME: this is actually duty of the I/O unit to initialize
+		 * this, but MTD does not provide enough information.
+		 */
+		si->bad_peb_count += 1;
+		return 0;
+	}
+
+	err = ubi_io_read_ec_hdr(ubi, pnum, ech, 0);
+	if (err < 0)
+		return err;
+	else if (err == UBI_IO_BITFLIPS)
+		bitflips = 1;
+	else if (err == UBI_IO_PEB_EMPTY)
+		return ubi_scan_add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC,
+					    &si->erase);
+	else if (err == UBI_IO_BAD_EC_HDR) {
+		/*
+		 * We have to also look at the VID header, possibly it is not
+		 * corrupted. Set %bitflips flag in order to make this PEB be
+		 * moved and EC be re-created.
+		 */
+		ec_corr = 1;
+		ec = UBI_SCAN_UNKNOWN_EC;
+		bitflips = 1;
+	}
+
+	si->is_empty = 0;
+
+	if (!ec_corr) {
+		/* Make sure UBI version is OK */
+		if (ech->version != UBI_VERSION) {
+			ubi_err("this UBI version is %d, image version is %d",
+				UBI_VERSION, (int)ech->version);
+			return -EINVAL;
+		}
+
+		ec = ubi64_to_cpu(ech->ec);
+		if (ec > UBI_MAX_ERASECOUNTER) {
+			/*
+			 * Erase counter overflow. The EC headers have 64 bits
+			 * reserved, but we anyway make use of only 31 bit
+			 * values, as this seems to be enough for any existing
+			 * flash. Upgrade UBI and use 64-bit erase counters
+			 * internally.
+			 */
+			ubi_err("erase counter overflow, max is %d",
+				UBI_MAX_ERASECOUNTER);
+			ubi_dbg_dump_ec_hdr(ech);
+			return -EINVAL;
+		}
+	}
+
+	/* OK, we've done with the EC header, let's look at the VID header */
+
+	err = ubi_io_read_vid_hdr(ubi, pnum, vidh, 0);
+	if (err < 0)
+		return err;
+	else if (err == UBI_IO_BITFLIPS)
+		bitflips = 1;
+	else if (err == UBI_IO_BAD_VID_HDR ||
+		 (err == UBI_IO_PEB_FREE && ec_corr)) {
+		/* VID header is corrupted */
+		err = ubi_scan_add_to_list(si, pnum, ec, &si->corr);
+		if (err)
+			return err;
+		goto adjust_mean_ec;
+	} else if (err == UBI_IO_PEB_FREE) {
+		/* No VID header - the physical eraseblock is free */
+		err = ubi_scan_add_to_list(si, pnum, ec, &si->free);
+		if (err)
+			return err;
+		goto adjust_mean_ec;
+	}
+
+	vol_id = ubi32_to_cpu(vidh->vol_id);
+	if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOL_ID) {
+		int lnum = ubi32_to_cpu(vidh->lnum);
+
+		/* Unsupported internal volume */
+		switch (vidh->compat) {
+		case UBI_COMPAT_DELETE:
+			ubi_msg("\"delete\" compatible internal volume %d:%d"
+				" found, remove it", vol_id, lnum);
+			err = ubi_scan_add_to_list(si, pnum, ec, &si->corr);
+			if (err)
+				return err;
+			break;
+
+		case UBI_COMPAT_RO:
+			ubi_msg("read-only compatible internal volume %d:%d"
+				" found, switch to read-only mode",
+				vol_id, lnum);
+			ubi->ro_mode = 1;
+			break;
+
+		case UBI_COMPAT_PRESERVE:
+			ubi_msg("\"preserve\" compatible internal volume %d:%d"
+				" found", vol_id, lnum);
+			err = ubi_scan_add_to_list(si, pnum, ec, &si->alien);
+			if (err)
+				return err;
+			si->alien_peb_count += 1;
+			return 0;
+
+		case UBI_COMPAT_REJECT:
+			ubi_err("incompatible internal volume %d:%d found",
+				vol_id, lnum);
+			return -EINVAL;
+		}
+	}
+
+	/* Both UBI headers seem to be fine */
+	err = ubi_scan_add_used(ubi, si, pnum, ec, vidh, bitflips);
+	if (err)
+		return err;
+
+adjust_mean_ec:
+	if (!ec_corr) {
+		if (si->ec_sum + ec < ec) {
+			commit_to_mean_value(si);
+			si->ec_sum = 0;
+			si->ec_count = 0;
+		} else {
+			si->ec_sum += ec;
+			si->ec_count += 1;
+		}
+
+		if (ec > si->max_ec)
+			si->max_ec = ec;
+		if (ec < si->min_ec)
+			si->min_ec = ec;
+	}
+
+	return 0;
+}
+
+/**
+ * ubi_scan - scan an MTD device.
+ * @ubi: UBI device description object
+ *
+ * This function does full scanning of an MTD device and returns complete
+ * information about it. In case of failure, an error code is returned.
+ */
+struct ubi_scan_info *ubi_scan(struct ubi_device *ubi)
+{
+	int err, pnum;
+	struct rb_node *rb1, *rb2;
+	struct ubi_scan_volume *sv;
+	struct ubi_scan_leb *seb;
+	struct ubi_scan_info *si;
+
+	si = kzalloc(sizeof(struct ubi_scan_info), GFP_KERNEL);
+	if (!si)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&si->corr);
+	INIT_LIST_HEAD(&si->free);
+	INIT_LIST_HEAD(&si->erase);
+	INIT_LIST_HEAD(&si->alien);
+	si->volumes = RB_ROOT;
+	si->is_empty = 1;
+
+	err = -ENOMEM;
+	ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL);
+	if (!ech)
+		goto out_si;
+
+	vidh = ubi_zalloc_vid_hdr(ubi);
+	if (!vidh)
+		goto out_ech;
+
+	for (pnum = 0; pnum < ubi->peb_count; pnum++) {
+		cond_resched();
+
+		dbg_msg("process PEB %d", pnum);
+		err = process_eb(ubi, si, pnum);
+		if (err < 0)
+			goto out_vidh;
+	}
+
+	dbg_msg("scanning is finished");
+
+	/* Finish mean erase counter calculations */
+	if (si->ec_count)
+		commit_to_mean_value(si);
+
+	if (si->is_empty)
+		ubi_msg("empty MTD device detected");
+
+	/*
+	 * In case of unknown erase counter we use the mean erase counter
+	 * value.
+	 */
+	ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) {
+		ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb)
+			if (seb->ec == UBI_SCAN_UNKNOWN_EC)
+				seb->ec = si->mean_ec;
+	}
+
+	list_for_each_entry(seb, &si->free, u.list) {
+		if (seb->ec == UBI_SCAN_UNKNOWN_EC)
+			seb->ec = si->mean_ec;
+	}
+
+	list_for_each_entry(seb, &si->corr, u.list)
+		if (seb->ec == UBI_SCAN_UNKNOWN_EC)
+			seb->ec = si->mean_ec;
+
+	list_for_each_entry(seb, &si->erase, u.list)
+		if (seb->ec == UBI_SCAN_UNKNOWN_EC)
+			seb->ec = si->mean_ec;
+
+	err = paranoid_check_si(ubi, si);
+	if (err) {
+		if (err > 0)
+			err = -EINVAL;
+		goto out_vidh;
+	}
+
+	ubi_free_vid_hdr(ubi, vidh);
+	kfree(ech);
+
+	return si;
+
+out_vidh:
+	ubi_free_vid_hdr(ubi, vidh);
+out_ech:
+	kfree(ech);
+out_si:
+	ubi_scan_destroy_si(si);
+	return ERR_PTR(err);
+}
+
+/**
+ * destroy_sv - free the scanning volume information
+ * @sv: scanning volume information
+ *
+ * This function destroys the volume RB-tree (@sv->root) and the scanning
+ * volume information.
+ */
+static void destroy_sv(struct ubi_scan_volume *sv)
+{
+	struct ubi_scan_leb *seb;
+	struct rb_node *this = sv->root.rb_node;
+
+	while (this) {
+		if (this->rb_left)
+			this = this->rb_left;
+		else if (this->rb_right)
+			this = this->rb_right;
+		else {
+			seb = rb_entry(this, struct ubi_scan_leb, u.rb);
+			this = rb_parent(this);
+			if (this) {
+				if (this->rb_left == &seb->u.rb)
+					this->rb_left = NULL;
+				else
+					this->rb_right = NULL;
+			}
+
+			kfree(seb);
+		}
+	}
+	kfree(sv);
+}
+
+/**
+ * ubi_scan_destroy_si - destroy scanning information.
+ * @si: scanning information
+ */
+void ubi_scan_destroy_si(struct ubi_scan_info *si)
+{
+	struct ubi_scan_leb *seb, *seb_tmp;
+	struct ubi_scan_volume *sv;
+	struct rb_node *rb;
+
+	list_for_each_entry_safe(seb, seb_tmp, &si->alien, u.list) {
+		list_del(&seb->u.list);
+		kfree(seb);
+	}
+	list_for_each_entry_safe(seb, seb_tmp, &si->erase, u.list) {
+		list_del(&seb->u.list);
+		kfree(seb);
+	}
+	list_for_each_entry_safe(seb, seb_tmp, &si->corr, u.list) {
+		list_del(&seb->u.list);
+		kfree(seb);
+	}
+	list_for_each_entry_safe(seb, seb_tmp, &si->free, u.list) {
+		list_del(&seb->u.list);
+		kfree(seb);
+	}
+
+	/* Destroy the volume RB-tree */
+	rb = si->volumes.rb_node;
+	while (rb) {
+		if (rb->rb_left)
+			rb = rb->rb_left;
+		else if (rb->rb_right)
+			rb = rb->rb_right;
+		else {
+			sv = rb_entry(rb, struct ubi_scan_volume, rb);
+
+			rb = rb_parent(rb);
+			if (rb) {
+				if (rb->rb_left == &sv->rb)
+					rb->rb_left = NULL;
+				else
+					rb->rb_right = NULL;
+			}
+
+			destroy_sv(sv);
+		}
+	}
+
+	kfree(si);
+}
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
+
+/**
+ * paranoid_check_si - check if the scanning information is correct and
+ * consistent.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ *
+ * This function returns zero if the scanning information is all right, %1 if
+ * not and a negative error code if an error occurred.
+ */
+static int paranoid_check_si(const struct ubi_device *ubi,
+			     struct ubi_scan_info *si)
+{
+	int pnum, err, vols_found = 0;
+	struct rb_node *rb1, *rb2;
+	struct ubi_scan_volume *sv;
+	struct ubi_scan_leb *seb, *last_seb;
+	uint8_t *buf;
+
+	/*
+	 * At first, check that scanning information is ok.
+	 */
+	ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) {
+		int leb_count = 0;
+
+		cond_resched();
+
+		vols_found += 1;
+
+		if (si->is_empty) {
+			ubi_err("bad is_empty flag");
+			goto bad_sv;
+		}
+
+		if (sv->vol_id < 0 || sv->highest_lnum < 0 ||
+		    sv->leb_count < 0 || sv->vol_type < 0 || sv->used_ebs < 0 ||
+		    sv->data_pad < 0 || sv->last_data_size < 0) {
+			ubi_err("negative values");
+			goto bad_sv;
+		}
+
+		if (sv->vol_id >= UBI_MAX_VOLUMES &&
+		    sv->vol_id < UBI_INTERNAL_VOL_START) {
+			ubi_err("bad vol_id");
+			goto bad_sv;
+		}
+
+		if (sv->vol_id > si->highest_vol_id) {
+			ubi_err("highest_vol_id is %d, but vol_id %d is there",
+				si->highest_vol_id, sv->vol_id);
+			goto out;
+		}
+
+		if (sv->vol_type != UBI_DYNAMIC_VOLUME &&
+		    sv->vol_type != UBI_STATIC_VOLUME) {
+			ubi_err("bad vol_type");
+			goto bad_sv;
+		}
+
+		if (sv->data_pad > ubi->leb_size / 2) {
+			ubi_err("bad data_pad");
+			goto bad_sv;
+		}
+
+		last_seb = NULL;
+		ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) {
+			cond_resched();
+
+			last_seb = seb;
+			leb_count += 1;
+
+			if (seb->pnum < 0 || seb->ec < 0) {
+				ubi_err("negative values");
+				goto bad_seb;
+			}
+
+			if (seb->ec < si->min_ec) {
+				ubi_err("bad si->min_ec (%d), %d found",
+					si->min_ec, seb->ec);
+				goto bad_seb;
+			}
+
+			if (seb->ec > si->max_ec) {
+				ubi_err("bad si->max_ec (%d), %d found",
+					si->max_ec, seb->ec);
+				goto bad_seb;
+			}
+
+			if (seb->pnum >= ubi->peb_count) {
+				ubi_err("too high PEB number %d, total PEBs %d",
+					seb->pnum, ubi->peb_count);
+				goto bad_seb;
+			}
+
+			if (sv->vol_type == UBI_STATIC_VOLUME) {
+				if (seb->lnum >= sv->used_ebs) {
+					ubi_err("bad lnum or used_ebs");
+					goto bad_seb;
+				}
+			} else {
+				if (sv->used_ebs != 0) {
+					ubi_err("non-zero used_ebs");
+					goto bad_seb;
+				}
+			}
+
+			if (seb->lnum > sv->highest_lnum) {
+				ubi_err("incorrect highest_lnum or lnum");
+				goto bad_seb;
+			}
+		}
+
+		if (sv->leb_count != leb_count) {
+			ubi_err("bad leb_count, %d objects in the tree",
+				leb_count);
+			goto bad_sv;
+		}
+
+		if (!last_seb)
+			continue;
+
+		seb = last_seb;
+
+		if (seb->lnum != sv->highest_lnum) {
+			ubi_err("bad highest_lnum");
+			goto bad_seb;
+		}
+	}
+
+	if (vols_found != si->vols_found) {
+		ubi_err("bad si->vols_found %d, should be %d",
+			si->vols_found, vols_found);
+		goto out;
+	}
+
+	/* Check that scanning information is correct */
+	ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) {
+		last_seb = NULL;
+		ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) {
+			int vol_type;
+
+			cond_resched();
+
+			last_seb = seb;
+
+			err = ubi_io_read_vid_hdr(ubi, seb->pnum, vidh, 1);
+			if (err && err != UBI_IO_BITFLIPS) {
+				ubi_err("VID header is not OK (%d)", err);
+				if (err > 0)
+					err = -EIO;
+				return err;
+			}
+
+			vol_type = vidh->vol_type == UBI_VID_DYNAMIC ?
+				   UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME;
+			if (sv->vol_type != vol_type) {
+				ubi_err("bad vol_type");
+				goto bad_vid_hdr;
+			}
+
+			if (seb->sqnum != ubi64_to_cpu(vidh->sqnum)) {
+				ubi_err("bad sqnum %llu", seb->sqnum);
+				goto bad_vid_hdr;
+			}
+
+			if (sv->vol_id != ubi32_to_cpu(vidh->vol_id)) {
+				ubi_err("bad vol_id %d", sv->vol_id);
+				goto bad_vid_hdr;
+			}
+
+			if (sv->compat != vidh->compat) {
+				ubi_err("bad compat %d", vidh->compat);
+				goto bad_vid_hdr;
+			}
+
+			if (seb->lnum != ubi32_to_cpu(vidh->lnum)) {
+				ubi_err("bad lnum %d", seb->lnum);
+				goto bad_vid_hdr;
+			}
+
+			if (sv->used_ebs != ubi32_to_cpu(vidh->used_ebs)) {
+				ubi_err("bad used_ebs %d", sv->used_ebs);
+				goto bad_vid_hdr;
+			}
+
+			if (sv->data_pad != ubi32_to_cpu(vidh->data_pad)) {
+				ubi_err("bad data_pad %d", sv->data_pad);
+				goto bad_vid_hdr;
+			}
+
+			if (seb->leb_ver != ubi32_to_cpu(vidh->leb_ver)) {
+				ubi_err("bad leb_ver %u", seb->leb_ver);
+				goto bad_vid_hdr;
+			}
+		}
+
+		if (!last_seb)
+			continue;
+
+		if (sv->highest_lnum != ubi32_to_cpu(vidh->lnum)) {
+			ubi_err("bad highest_lnum %d", sv->highest_lnum);
+			goto bad_vid_hdr;
+		}
+
+		if (sv->last_data_size != ubi32_to_cpu(vidh->data_size)) {
+			ubi_err("bad last_data_size %d", sv->last_data_size);
+			goto bad_vid_hdr;
+		}
+	}
+
+	/*
+	 * Make sure that all the physical eraseblocks are in one of the lists
+	 * or trees.
+	 */
+	buf = kmalloc(ubi->peb_count, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	memset(buf, 1, ubi->peb_count);
+	for (pnum = 0; pnum < ubi->peb_count; pnum++) {
+		err = ubi_io_is_bad(ubi, pnum);
+		if (err < 0)
+			return err;
+		else if (err)
+			buf[pnum] = 0;
+	}
+
+	ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb)
+		ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb)
+			buf[seb->pnum] = 0;
+
+	list_for_each_entry(seb, &si->free, u.list)
+		buf[seb->pnum] = 0;
+
+	list_for_each_entry(seb, &si->corr, u.list)
+		buf[seb->pnum] = 0;
+
+	list_for_each_entry(seb, &si->erase, u.list)
+		buf[seb->pnum] = 0;
+
+	list_for_each_entry(seb, &si->alien, u.list)
+		buf[seb->pnum] = 0;
+
+	err = 0;
+	for (pnum = 0; pnum < ubi->peb_count; pnum++)
+		if (buf[pnum]) {
+			ubi_err("PEB %d is not referred", pnum);
+			err = 1;
+		}
+
+	kfree(buf);
+	if (err)
+		goto out;
+	return 0;
+
+bad_seb:
+	ubi_err("bad scanning information about LEB %d", seb->lnum);
+	ubi_dbg_dump_seb(seb, 0);
+	ubi_dbg_dump_sv(sv);
+	goto out;
+
+bad_sv:
+	ubi_err("bad scanning information about volume %d", sv->vol_id);
+	ubi_dbg_dump_sv(sv);
+	goto out;
+
+bad_vid_hdr:
+	ubi_err("bad scanning information about volume %d", sv->vol_id);
+	ubi_dbg_dump_sv(sv);
+	ubi_dbg_dump_vid_hdr(vidh);
+
+out:
+	ubi_dbg_dump_stack();
+	return 1;
+}
+
+#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */
diff --git a/drivers/mtd/ubi/scan.h b/drivers/mtd/ubi/scan.h
new file mode 100644
index 00000000000..3949f6192c7
--- /dev/null
+++ b/drivers/mtd/ubi/scan.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+#ifndef __UBI_SCAN_H__
+#define __UBI_SCAN_H__
+
+/* The erase counter value for this physical eraseblock is unknown */
+#define UBI_SCAN_UNKNOWN_EC (-1)
+
+/**
+ * struct ubi_scan_leb - scanning information about a physical eraseblock.
+ * @ec: erase counter (%UBI_SCAN_UNKNOWN_EC if it is unknown)
+ * @pnum: physical eraseblock number
+ * @lnum: logical eraseblock number
+ * @scrub: if this physical eraseblock needs scrubbing
+ * @sqnum: sequence number
+ * @u: unions RB-tree or @list links
+ * @u.rb: link in the per-volume RB-tree of &struct ubi_scan_leb objects
+ * @u.list: link in one of the eraseblock lists
+ * @leb_ver: logical eraseblock version (obsolete)
+ *
+ * One object of this type is allocated for each physical eraseblock during
+ * scanning.
+ */
+struct ubi_scan_leb {
+	int ec;
+	int pnum;
+	int lnum;
+	int scrub;
+	unsigned long long sqnum;
+	union {
+		struct rb_node rb;
+		struct list_head list;
+	} u;
+	uint32_t leb_ver;
+};
+
+/**
+ * struct ubi_scan_volume - scanning information about a volume.
+ * @vol_id: volume ID
+ * @highest_lnum: highest logical eraseblock number in this volume
+ * @leb_count: number of logical eraseblocks in this volume
+ * @vol_type: volume type
+ * @used_ebs: number of used logical eraseblocks in this volume (only for
+ * static volumes)
+ * @last_data_size: amount of data in the last logical eraseblock of this
+ * volume (always equivalent to the usable logical eraseblock size in case of
+ * dynamic volumes)
+ * @data_pad: how many bytes at the end of logical eraseblocks of this volume
+ * are not used (due to volume alignment)
+ * @compat: compatibility flags of this volume
+ * @rb: link in the volume RB-tree
+ * @root: root of the RB-tree containing all the eraseblock belonging to this
+ * volume (&struct ubi_scan_leb objects)
+ *
+ * One object of this type is allocated for each volume during scanning.
+ */
+struct ubi_scan_volume {
+	int vol_id;
+	int highest_lnum;
+	int leb_count;
+	int vol_type;
+	int used_ebs;
+	int last_data_size;
+	int data_pad;
+	int compat;
+	struct rb_node rb;
+	struct rb_root root;
+};
+
+/**
+ * struct ubi_scan_info - UBI scanning information.
+ * @volumes: root of the volume RB-tree
+ * @corr: list of corrupted physical eraseblocks
+ * @free: list of free physical eraseblocks
+ * @erase: list of physical eraseblocks which have to be erased
+ * @alien: list of physical eraseblocks which should not be used by UBI (e.g.,
+ * @bad_peb_count: count of bad physical eraseblocks
+ * those belonging to "preserve"-compatible internal volumes)
+ * @vols_found: number of volumes found during scanning
+ * @highest_vol_id: highest volume ID
+ * @alien_peb_count: count of physical eraseblocks in the @alien list
+ * @is_empty: flag indicating whether the MTD device is empty or not
+ * @min_ec: lowest erase counter value
+ * @max_ec: highest erase counter value
+ * @max_sqnum: highest sequence number value
+ * @mean_ec: mean erase counter value
+ * @ec_sum: a temporary variable used when calculating @mean_ec
+ * @ec_count: a temporary variable used when calculating @mean_ec
+ *
+ * This data structure contains the result of scanning and may be used by other
+ * UBI units to build final UBI data structures, further error-recovery and so
+ * on.
+ */
+struct ubi_scan_info {
+	struct rb_root volumes;
+	struct list_head corr;
+	struct list_head free;
+	struct list_head erase;
+	struct list_head alien;
+	int bad_peb_count;
+	int vols_found;
+	int highest_vol_id;
+	int alien_peb_count;
+	int is_empty;
+	int min_ec;
+	int max_ec;
+	unsigned long long max_sqnum;
+	int mean_ec;
+	int ec_sum;
+	int ec_count;
+};
+
+struct ubi_device;
+struct ubi_vid_hdr;
+
+/*
+ * ubi_scan_move_to_list - move a physical eraseblock from the volume tree to a
+ * list.
+ *
+ * @sv: volume scanning information
+ * @seb: scanning eraseblock infprmation
+ * @list: the list to move to
+ */
+static inline void ubi_scan_move_to_list(struct ubi_scan_volume *sv,
+					 struct ubi_scan_leb *seb,
+					 struct list_head *list)
+{
+		rb_erase(&seb->u.rb, &sv->root);
+		list_add_tail(&seb->u.list, list);
+}
+
+int ubi_scan_add_to_list(struct ubi_scan_info *si, int pnum, int ec,
+			 struct list_head *list);
+int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si,
+		      int pnum, int ec, const struct ubi_vid_hdr *vid_hdr,
+		      int bitflips);
+struct ubi_scan_volume *ubi_scan_find_sv(const struct ubi_scan_info *si,
+					 int vol_id);
+struct ubi_scan_leb *ubi_scan_find_seb(const struct ubi_scan_volume *sv,
+				       int lnum);
+void ubi_scan_rm_volume(struct ubi_scan_info *si, struct ubi_scan_volume *sv);
+struct ubi_scan_leb *ubi_scan_get_free_peb(const struct ubi_device *ubi,
+					   struct ubi_scan_info *si);
+int ubi_scan_erase_peb(const struct ubi_device *ubi,
+		       const struct ubi_scan_info *si, int pnum, int ec);
+struct ubi_scan_info *ubi_scan(struct ubi_device *ubi);
+void ubi_scan_destroy_si(struct ubi_scan_info *si);
+
+#endif /* !__UBI_SCAN_H__ */
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
new file mode 100644
index 00000000000..feb647f108f
--- /dev/null
+++ b/drivers/mtd/ubi/ubi.h
@@ -0,0 +1,535 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ * Copyright (c) Nokia Corporation, 2006, 2007
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+#ifndef __UBI_UBI_H__
+#define __UBI_UBI_H__
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/spinlock.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/string.h>
+#include <linux/mtd/mtd.h>
+
+#include <mtd/ubi-header.h>
+#include <linux/mtd/ubi.h>
+
+#include "scan.h"
+#include "debug.h"
+
+/* Maximum number of supported UBI devices */
+#define UBI_MAX_DEVICES 32
+
+/* UBI name used for character devices, sysfs, etc */
+#define UBI_NAME_STR "ubi"
+
+/* Normal UBI messages */
+#define ubi_msg(fmt, ...) printk(KERN_NOTICE "UBI: " fmt "\n", ##__VA_ARGS__)
+/* UBI warning messages */
+#define ubi_warn(fmt, ...) printk(KERN_WARNING "UBI warning: %s: " fmt "\n", \
+				  __FUNCTION__, ##__VA_ARGS__)
+/* UBI error messages */
+#define ubi_err(fmt, ...) printk(KERN_ERR "UBI error: %s: " fmt "\n", \
+				 __FUNCTION__, ##__VA_ARGS__)
+
+/* Lowest number PEBs reserved for bad PEB handling */
+#define MIN_RESEVED_PEBS 2
+
+/* Background thread name pattern */
+#define UBI_BGT_NAME_PATTERN "ubi_bgt%dd"
+
+/* This marker in the EBA table means that the LEB is um-mapped */
+#define UBI_LEB_UNMAPPED -1
+
+/*
+ * In case of errors, UBI tries to repeat the operation several times before
+ * returning error. The below constant defines how many times UBI re-tries.
+ */
+#define UBI_IO_RETRIES 3
+
+/*
+ * Error codes returned by the I/O unit.
+ *
+ * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only
+ * 0xFF bytes
+ * UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a
+ * valid erase counter header, and the rest are %0xFF bytes
+ * UBI_IO_BAD_EC_HDR: the erase counter header is corrupted (bad magic or CRC)
+ * UBI_IO_BAD_VID_HDR: the volume identifier header is corrupted (bad magic or
+ * CRC)
+ * UBI_IO_BITFLIPS: bit-flips were detected and corrected
+ */
+enum {
+	UBI_IO_PEB_EMPTY = 1,
+	UBI_IO_PEB_FREE,
+	UBI_IO_BAD_EC_HDR,
+	UBI_IO_BAD_VID_HDR,
+	UBI_IO_BITFLIPS
+};
+
+extern int ubi_devices_cnt;
+extern struct ubi_device *ubi_devices[];
+
+struct ubi_volume_desc;
+
+/**
+ * struct ubi_volume - UBI volume description data structure.
+ * @dev: device object to make use of the the Linux device model
+ * @cdev: character device object to create character device
+ * @ubi: reference to the UBI device description object
+ * @vol_id: volume ID
+ * @readers: number of users holding this volume in read-only mode
+ * @writers: number of users holding this volume in read-write mode
+ * @exclusive: whether somebody holds this volume in exclusive mode
+ * @removed: if the volume was removed
+ * @checked: if this static volume was checked
+ *
+ * @reserved_pebs: how many physical eraseblocks are reserved for this volume
+ * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME)
+ * @usable_leb_size: logical eraseblock size without padding
+ * @used_ebs: how many logical eraseblocks in this volume contain data
+ * @last_eb_bytes: how many bytes are stored in the last logical eraseblock
+ * @used_bytes: how many bytes of data this volume contains
+ * @upd_marker: non-zero if the update marker is set for this volume
+ * @corrupted: non-zero if the volume is corrupted (static volumes only)
+ * @alignment: volume alignment
+ * @data_pad: how many bytes are not used at the end of physical eraseblocks to
+ * satisfy the requested alignment
+ * @name_len: volume name length
+ * @name: volume name
+ *
+ * @updating: whether the volume is being updated
+ * @upd_ebs: how many eraseblocks are expected to be updated
+ * @upd_bytes: how many bytes are expected to be received
+ * @upd_received: how many update bytes were already received
+ * @upd_buf: update buffer which is used to collect update data
+ *
+ * @eba_tbl: EBA table of this volume (LEB->PEB mapping)
+ *
+ * @gluebi_desc: gluebi UBI volume descriptor
+ * @gluebi_refcount: reference count of the gluebi MTD device
+ * @gluebi_mtd: MTD device description object of the gluebi MTD device
+ *
+ * The @corrupted field indicates that the volume's contents is corrupted.
+ * Since UBI protects only static volumes, this field is not relevant to
+ * dynamic volumes - it is user's responsibility to assure their data
+ * integrity.
+ *
+ * The @upd_marker flag indicates that this volume is either being updated at
+ * the moment or is damaged because of an unclean reboot.
+ */
+struct ubi_volume {
+	struct device dev;
+	struct cdev cdev;
+	struct ubi_device *ubi;
+	int vol_id;
+	int readers;
+	int writers;
+	int exclusive;
+	int removed;
+	int checked;
+
+	int reserved_pebs;
+	int vol_type;
+	int usable_leb_size;
+	int used_ebs;
+	int last_eb_bytes;
+	long long used_bytes;
+	int upd_marker;
+	int corrupted;
+	int alignment;
+	int data_pad;
+	int name_len;
+	char name[UBI_VOL_NAME_MAX+1];
+
+	int updating;
+	int upd_ebs;
+	long long upd_bytes;
+	long long upd_received;
+	void *upd_buf;
+
+	int *eba_tbl;
+
+#ifdef CONFIG_MTD_UBI_GLUEBI
+	/* Gluebi-related stuff may be compiled out */
+	struct ubi_volume_desc *gluebi_desc;
+	int gluebi_refcount;
+	struct mtd_info gluebi_mtd;
+#endif
+};
+
+/**
+ * struct ubi_volume_desc - descriptor of the UBI volume returned when it is
+ * opened.
+ * @vol: reference to the corresponding volume description object
+ * @mode: open mode (%UBI_READONLY, %UBI_READWRITE, or %UBI_EXCLUSIVE)
+ */
+struct ubi_volume_desc {
+	struct ubi_volume *vol;
+	int mode;
+};
+
+struct ubi_wl_entry;
+
+/**
+ * struct ubi_device - UBI device description structure
+ * @dev: class device object to use the the Linux device model
+ * @cdev: character device object to create character device
+ * @ubi_num: UBI device number
+ * @ubi_name: UBI device name
+ * @major: character device major number
+ * @vol_count: number of volumes in this UBI device
+ * @volumes: volumes of this UBI device
+ * @volumes_lock: protects @volumes, @rsvd_pebs, @avail_pebs, beb_rsvd_pebs,
+ * @beb_rsvd_level, @bad_peb_count, @good_peb_count, @vol_count, @vol->readers,
+ * @vol->writers, @vol->exclusive, @vol->removed, @vol->mapping and
+ * @vol->eba_tbl.
+ *
+ * @rsvd_pebs: count of reserved physical eraseblocks
+ * @avail_pebs: count of available physical eraseblocks
+ * @beb_rsvd_pebs: how many physical eraseblocks are reserved for bad PEB
+ * handling
+ * @beb_rsvd_level: normal level of PEBs reserved for bad PEB handling
+ *
+ * @vtbl_slots: how many slots are available in the volume table
+ * @vtbl_size: size of the volume table in bytes
+ * @vtbl: in-RAM volume table copy
+ *
+ * @max_ec: current highest erase counter value
+ * @mean_ec: current mean erase counter value
+ *
+ * global_sqnum: global sequence number
+ * @ltree_lock: protects the lock tree and @global_sqnum
+ * @ltree: the lock tree
+ * @vtbl_mutex: protects on-flash volume table
+ *
+ * @used: RB-tree of used physical eraseblocks
+ * @free: RB-tree of free physical eraseblocks
+ * @scrub: RB-tree of physical eraseblocks which need scrubbing
+ * @prot: protection trees
+ * @prot.pnum: protection tree indexed by physical eraseblock numbers
+ * @prot.aec: protection tree indexed by absolute erase counter value
+ * @wl_lock: protects the @used, @free, @prot, @lookuptbl, @abs_ec, @move_from,
+ * @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works
+ * fields
+ * @wl_scheduled: non-zero if the wear-leveling was scheduled
+ * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any
+ * physical eraseblock
+ * @abs_ec: absolute erase counter
+ * @move_from: physical eraseblock from where the data is being moved
+ * @move_to: physical eraseblock where the data is being moved to
+ * @move_from_put: if the "from" PEB was put
+ * @move_to_put: if the "to" PEB was put
+ * @works: list of pending works
+ * @works_count: count of pending works
+ * @bgt_thread: background thread description object
+ * @thread_enabled: if the background thread is enabled
+ * @bgt_name: background thread name
+ *
+ * @flash_size: underlying MTD device size (in bytes)
+ * @peb_count: count of physical eraseblocks on the MTD device
+ * @peb_size: physical eraseblock size
+ * @bad_peb_count: count of bad physical eraseblocks
+ * @good_peb_count: count of good physical eraseblocks
+ * @min_io_size: minimal input/output unit size of the underlying MTD device
+ * @hdrs_min_io_size: minimal I/O unit size used for VID and EC headers
+ * @ro_mode: if the UBI device is in read-only mode
+ * @leb_size: logical eraseblock size
+ * @leb_start: starting offset of logical eraseblocks within physical
+ * eraseblocks
+ * @ec_hdr_alsize: size of the EC header aligned to @hdrs_min_io_size
+ * @vid_hdr_alsize: size of the VID header aligned to @hdrs_min_io_size
+ * @vid_hdr_offset: starting offset of the volume identifier header (might be
+ * unaligned)
+ * @vid_hdr_aloffset: starting offset of the VID header aligned to
+ * @hdrs_min_io_size
+ * @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset
+ * @bad_allowed: whether the MTD device admits of bad physical eraseblocks or
+ * not
+ * @mtd: MTD device descriptor
+ */
+struct ubi_device {
+	struct cdev cdev;
+	struct device dev;
+	int ubi_num;
+	char ubi_name[sizeof(UBI_NAME_STR)+5];
+	int major;
+	int vol_count;
+	struct ubi_volume *volumes[UBI_MAX_VOLUMES+UBI_INT_VOL_COUNT];
+	spinlock_t volumes_lock;
+
+	int rsvd_pebs;
+	int avail_pebs;
+	int beb_rsvd_pebs;
+	int beb_rsvd_level;
+
+	int vtbl_slots;
+	int vtbl_size;
+	struct ubi_vtbl_record *vtbl;
+	struct mutex vtbl_mutex;
+
+	int max_ec;
+	int mean_ec;
+
+	/* EBA unit's stuff */
+	unsigned long long global_sqnum;
+	spinlock_t ltree_lock;
+	struct rb_root ltree;
+
+	/* Wear-leveling unit's stuff */
+	struct rb_root used;
+	struct rb_root free;
+	struct rb_root scrub;
+	struct {
+		struct rb_root pnum;
+		struct rb_root aec;
+	} prot;
+	spinlock_t wl_lock;
+	int wl_scheduled;
+	struct ubi_wl_entry **lookuptbl;
+	unsigned long long abs_ec;
+	struct ubi_wl_entry *move_from;
+	struct ubi_wl_entry *move_to;
+	int move_from_put;
+	int move_to_put;
+	struct list_head works;
+	int works_count;
+	struct task_struct *bgt_thread;
+	int thread_enabled;
+	char bgt_name[sizeof(UBI_BGT_NAME_PATTERN)+2];
+
+	/* I/O unit's stuff */
+	long long flash_size;
+	int peb_count;
+	int peb_size;
+	int bad_peb_count;
+	int good_peb_count;
+	int min_io_size;
+	int hdrs_min_io_size;
+	int ro_mode;
+	int leb_size;
+	int leb_start;
+	int ec_hdr_alsize;
+	int vid_hdr_alsize;
+	int vid_hdr_offset;
+	int vid_hdr_aloffset;
+	int vid_hdr_shift;
+	int bad_allowed;
+	struct mtd_info *mtd;
+};
+
+extern struct file_operations ubi_cdev_operations;
+extern struct file_operations ubi_vol_cdev_operations;
+extern struct class *ubi_class;
+
+/* vtbl.c */
+int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
+			   struct ubi_vtbl_record *vtbl_rec);
+int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si);
+
+/* vmt.c */
+int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req);
+int ubi_remove_volume(struct ubi_volume_desc *desc);
+int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs);
+int ubi_add_volume(struct ubi_device *ubi, int vol_id);
+void ubi_free_volume(struct ubi_device *ubi, int vol_id);
+
+/* upd.c */
+int ubi_start_update(struct ubi_device *ubi, int vol_id, long long bytes);
+int ubi_more_update_data(struct ubi_device *ubi, int vol_id,
+			 const void __user *buf, int count);
+
+/* misc.c */
+int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, int length);
+int ubi_check_volume(struct ubi_device *ubi, int vol_id);
+void ubi_calculate_reserved(struct ubi_device *ubi);
+
+/* gluebi.c */
+#ifdef CONFIG_MTD_UBI_GLUEBI
+int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol);
+int ubi_destroy_gluebi(struct ubi_volume *vol);
+#else
+#define ubi_create_gluebi(ubi, vol) 0
+#define ubi_destroy_gluebi(vol) 0
+#endif
+
+/* eba.c */
+int ubi_eba_unmap_leb(struct ubi_device *ubi, int vol_id, int lnum);
+int ubi_eba_read_leb(struct ubi_device *ubi, int vol_id, int lnum, void *buf,
+		     int offset, int len, int check);
+int ubi_eba_write_leb(struct ubi_device *ubi, int vol_id, int lnum,
+		      const void *buf, int offset, int len, int dtype);
+int ubi_eba_write_leb_st(struct ubi_device *ubi, int vol_id, int lnum,
+			 const void *buf, int len, int dtype,
+			 int used_ebs);
+int ubi_eba_atomic_leb_change(struct ubi_device *ubi, int vol_id, int lnum,
+			      const void *buf, int len, int dtype);
+int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
+		     struct ubi_vid_hdr *vid_hdr);
+int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si);
+void ubi_eba_close(const struct ubi_device *ubi);
+
+/* wl.c */
+int ubi_wl_get_peb(struct ubi_device *ubi, int dtype);
+int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture);
+int ubi_wl_flush(struct ubi_device *ubi);
+int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum);
+int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si);
+void ubi_wl_close(struct ubi_device *ubi);
+
+/* io.c */
+int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset,
+		int len);
+int ubi_io_write(const struct ubi_device *ubi, const void *buf, int pnum,
+		 int offset, int len);
+int ubi_io_sync_erase(const struct ubi_device *ubi, int pnum, int torture);
+int ubi_io_is_bad(const struct ubi_device *ubi, int pnum);
+int ubi_io_mark_bad(const struct ubi_device *ubi, int pnum);
+int ubi_io_read_ec_hdr(const struct ubi_device *ubi, int pnum,
+		       struct ubi_ec_hdr *ec_hdr, int verbose);
+int ubi_io_write_ec_hdr(const struct ubi_device *ubi, int pnum,
+			struct ubi_ec_hdr *ec_hdr);
+int ubi_io_read_vid_hdr(const struct ubi_device *ubi, int pnum,
+			struct ubi_vid_hdr *vid_hdr, int verbose);
+int ubi_io_write_vid_hdr(const struct ubi_device *ubi, int pnum,
+			 struct ubi_vid_hdr *vid_hdr);
+
+/*
+ * ubi_rb_for_each_entry - walk an RB-tree.
+ * @rb: a pointer to type 'struct rb_node' to to use as a loop counter
+ * @pos: a pointer to RB-tree entry type to use as a loop counter
+ * @root: RB-tree's root
+ * @member: the name of the 'struct rb_node' within the RB-tree entry
+ */
+#define ubi_rb_for_each_entry(rb, pos, root, member)                         \
+	for (rb = rb_first(root),                                            \
+	     pos = (rb ? container_of(rb, typeof(*pos), member) : NULL);     \
+	     rb;                                                             \
+	     rb = rb_next(rb), pos = container_of(rb, typeof(*pos), member))
+
+/**
+ * ubi_zalloc_vid_hdr - allocate a volume identifier header object.
+ * @ubi: UBI device description object
+ *
+ * This function returns a pointer to the newly allocated and zero-filled
+ * volume identifier header object in case of success and %NULL in case of
+ * failure.
+ */
+static inline struct ubi_vid_hdr *ubi_zalloc_vid_hdr(const struct ubi_device *ubi)
+{
+	void *vid_hdr;
+
+	vid_hdr = kzalloc(ubi->vid_hdr_alsize, GFP_KERNEL);
+	if (!vid_hdr)
+		return NULL;
+
+	/*
+	 * VID headers may be stored at un-aligned flash offsets, so we shift
+	 * the pointer.
+	 */
+	return vid_hdr + ubi->vid_hdr_shift;
+}
+
+/**
+ * ubi_free_vid_hdr - free a volume identifier header object.
+ * @ubi: UBI device description object
+ * @vid_hdr: the object to free
+ */
+static inline void ubi_free_vid_hdr(const struct ubi_device *ubi,
+				    struct ubi_vid_hdr *vid_hdr)
+{
+	void *p = vid_hdr;
+
+	if (!p)
+		return;
+
+	kfree(p - ubi->vid_hdr_shift);
+}
+
+/*
+ * This function is equivalent to 'ubi_io_read()', but @offset is relative to
+ * the beginning of the logical eraseblock, not to the beginning of the
+ * physical eraseblock.
+ */
+static inline int ubi_io_read_data(const struct ubi_device *ubi, void *buf,
+				   int pnum, int offset, int len)
+{
+	ubi_assert(offset >= 0);
+	return ubi_io_read(ubi, buf, pnum, offset + ubi->leb_start, len);
+}
+
+/*
+ * This function is equivalent to 'ubi_io_write()', but @offset is relative to
+ * the beginning of the logical eraseblock, not to the beginning of the
+ * physical eraseblock.
+ */
+static inline int ubi_io_write_data(const struct ubi_device *ubi, const void *buf,
+				    int pnum, int offset, int len)
+{
+	ubi_assert(offset >= 0);
+	return ubi_io_write(ubi, buf, pnum, offset + ubi->leb_start, len);
+}
+
+/**
+ * ubi_ro_mode - switch to read-only mode.
+ * @ubi: UBI device description object
+ */
+static inline void ubi_ro_mode(struct ubi_device *ubi)
+{
+	ubi->ro_mode = 1;
+	ubi_warn("switch to read-only mode");
+}
+
+/**
+ * vol_id2idx - get table index by volume ID.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ */
+static inline int vol_id2idx(const struct ubi_device *ubi, int vol_id)
+{
+	if (vol_id >= UBI_INTERNAL_VOL_START)
+		return vol_id - UBI_INTERNAL_VOL_START + ubi->vtbl_slots;
+	else
+		return vol_id;
+}
+
+/**
+ * idx2vol_id - get volume ID by table index.
+ * @ubi: UBI device description object
+ * @idx: table index
+ */
+static inline int idx2vol_id(const struct ubi_device *ubi, int idx)
+{
+	if (idx >= ubi->vtbl_slots)
+		return idx - ubi->vtbl_slots + UBI_INTERNAL_VOL_START;
+	else
+		return idx;
+}
+
+#endif /* !__UBI_UBI_H__ */
diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c
new file mode 100644
index 00000000000..8925b977e3d
--- /dev/null
+++ b/drivers/mtd/ubi/upd.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ * Copyright (c) Nokia Corporation, 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ *
+ * Jan 2007: Alexander Schmidt, hacked per-volume update.
+ */
+
+/*
+ * This file contains implementation of the volume update functionality.
+ *
+ * The update operation is based on the per-volume update marker which is
+ * stored in the volume table. The update marker is set before the update
+ * starts, and removed after the update has been finished. So if the update was
+ * interrupted by an unclean re-boot or due to some other reasons, the update
+ * marker stays on the flash media and UBI finds it when it attaches the MTD
+ * device next time. If the update marker is set for a volume, the volume is
+ * treated as damaged and most I/O operations are prohibited. Only a new update
+ * operation is allowed.
+ *
+ * Note, in general it is possible to implement the update operation as a
+ * transaction with a roll-back capability.
+ */
+
+#include <linux/err.h>
+#include <asm/uaccess.h>
+#include <asm/div64.h>
+#include "ubi.h"
+
+/**
+ * set_update_marker - set update marker.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ *
+ * This function sets the update marker flag for volume @vol_id. Returns zero
+ * in case of success and a negative error code in case of failure.
+ */
+static int set_update_marker(struct ubi_device *ubi, int vol_id)
+{
+	int err;
+	struct ubi_vtbl_record vtbl_rec;
+	struct ubi_volume *vol = ubi->volumes[vol_id];
+
+	dbg_msg("set update marker for volume %d", vol_id);
+
+	if (vol->upd_marker) {
+		ubi_assert(ubi->vtbl[vol_id].upd_marker);
+		dbg_msg("already set");
+		return 0;
+	}
+
+	memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record));
+	vtbl_rec.upd_marker = 1;
+
+	err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);
+	vol->upd_marker = 1;
+	return err;
+}
+
+/**
+ * clear_update_marker - clear update marker.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @bytes: new data size in bytes
+ *
+ * This function clears the update marker for volume @vol_id, sets new volume
+ * data size and clears the "corrupted" flag (static volumes only). Returns
+ * zero in case of success and a negative error code in case of failure.
+ */
+static int clear_update_marker(struct ubi_device *ubi, int vol_id, long long bytes)
+{
+	int err;
+	uint64_t tmp;
+	struct ubi_vtbl_record vtbl_rec;
+	struct ubi_volume *vol = ubi->volumes[vol_id];
+
+	dbg_msg("clear update marker for volume %d", vol_id);
+
+	memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record));
+	ubi_assert(vol->upd_marker && vtbl_rec.upd_marker);
+	vtbl_rec.upd_marker = 0;
+
+	if (vol->vol_type == UBI_STATIC_VOLUME) {
+		vol->corrupted = 0;
+		vol->used_bytes = tmp = bytes;
+		vol->last_eb_bytes = do_div(tmp, vol->usable_leb_size);
+		vol->used_ebs = tmp;
+		if (vol->last_eb_bytes)
+			vol->used_ebs += 1;
+		else
+			vol->last_eb_bytes = vol->usable_leb_size;
+	}
+
+	err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);
+	vol->upd_marker = 0;
+	return err;
+}
+
+/**
+ * ubi_start_update - start volume update.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @bytes: update bytes
+ *
+ * This function starts volume update operation. If @bytes is zero, the volume
+ * is just wiped out. Returns zero in case of success and a negative error code
+ * in case of failure.
+ */
+int ubi_start_update(struct ubi_device *ubi, int vol_id, long long bytes)
+{
+	int i, err;
+	uint64_t tmp;
+	struct ubi_volume *vol = ubi->volumes[vol_id];
+
+	dbg_msg("start update of volume %d, %llu bytes", vol_id, bytes);
+	vol->updating = 1;
+
+	err = set_update_marker(ubi, vol_id);
+	if (err)
+		return err;
+
+	/* Before updating - wipe out the volume */
+	for (i = 0; i < vol->reserved_pebs; i++) {
+		err = ubi_eba_unmap_leb(ubi, vol_id, i);
+		if (err)
+			return err;
+	}
+
+	if (bytes == 0) {
+		err = clear_update_marker(ubi, vol_id, 0);
+		if (err)
+			return err;
+		err = ubi_wl_flush(ubi);
+		if (!err)
+			vol->updating = 0;
+	}
+
+	vol->upd_buf = kmalloc(ubi->leb_size, GFP_KERNEL);
+	if (!vol->upd_buf)
+		return -ENOMEM;
+
+	tmp = bytes;
+	vol->upd_ebs = !!do_div(tmp, vol->usable_leb_size);
+	vol->upd_ebs += tmp;
+	vol->upd_bytes = bytes;
+	vol->upd_received = 0;
+	return 0;
+}
+
+/**
+ * write_leb - write update data.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ * @buf: data to write
+ * @len: data size
+ * @used_ebs: how many logical eraseblocks will this volume contain (static
+ * volumes only)
+ *
+ * This function writes update data to corresponding logical eraseblock. In
+ * case of dynamic volume, this function checks if the data contains 0xFF bytes
+ * at the end. If yes, the 0xFF bytes are cut and not written. So if the whole
+ * buffer contains only 0xFF bytes, the LEB is left unmapped.
+ *
+ * The reason why we skip the trailing 0xFF bytes in case of dynamic volume is
+ * that we want to make sure that more data may be appended to the logical
+ * eraseblock in future. Indeed, writing 0xFF bytes may have side effects and
+ * this PEB won't be writable anymore. So if one writes the file-system image
+ * to the UBI volume where 0xFFs mean free space - UBI makes sure this free
+ * space is writable after the update.
+ *
+ * We do not do this for static volumes because they are read-only. But this
+ * also cannot be done because we have to store per-LEB CRC and the correct
+ * data length.
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int write_leb(struct ubi_device *ubi, int vol_id, int lnum, void *buf,
+		     int len, int used_ebs)
+{
+	int err, l;
+	struct ubi_volume *vol = ubi->volumes[vol_id];
+
+	if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
+		l = ALIGN(len, ubi->min_io_size);
+		memset(buf + len, 0xFF, l - len);
+
+		l = ubi_calc_data_len(ubi, buf, l);
+		if (l == 0) {
+			dbg_msg("all %d bytes contain 0xFF - skip", len);
+			return 0;
+		}
+		if (len != l)
+			dbg_msg("skip last %d bytes (0xFF)", len - l);
+
+		err = ubi_eba_write_leb(ubi, vol_id, lnum, buf, 0, l,
+					UBI_UNKNOWN);
+	} else {
+		/*
+		 * When writing static volume, and this is the last logical
+		 * eraseblock, the length (@len) does not have to be aligned to
+		 * the minimal flash I/O unit. The 'ubi_eba_write_leb_st()'
+		 * function accepts exact (unaligned) length and stores it in
+		 * the VID header. And it takes care of proper alignment by
+		 * padding the buffer. Here we just make sure the padding will
+		 * contain zeros, not random trash.
+		 */
+		memset(buf + len, 0, vol->usable_leb_size - len);
+		err = ubi_eba_write_leb_st(ubi, vol_id, lnum, buf, len,
+					   UBI_UNKNOWN, used_ebs);
+	}
+
+	return err;
+}
+
+/**
+ * ubi_more_update_data - write more update data.
+ * @vol: volume description object
+ * @buf: write data (user-space memory buffer)
+ * @count: how much bytes to write
+ *
+ * This function writes more data to the volume which is being updated. It may
+ * be called arbitrary number of times until all of the update data arrive.
+ * This function returns %0 in case of success, number of bytes written during
+ * the last call if the whole volume update was successfully finished, and a
+ * negative error code in case of failure.
+ */
+int ubi_more_update_data(struct ubi_device *ubi, int vol_id,
+			 const void __user *buf, int count)
+{
+	uint64_t tmp;
+	struct ubi_volume *vol = ubi->volumes[vol_id];
+	int lnum, offs, err = 0, len, to_write = count;
+
+	dbg_msg("write %d of %lld bytes, %lld already passed",
+		count, vol->upd_bytes, vol->upd_received);
+
+	if (ubi->ro_mode)
+		return -EROFS;
+
+	tmp = vol->upd_received;
+	offs = do_div(tmp, vol->usable_leb_size);
+	lnum = tmp;
+
+	if (vol->upd_received + count > vol->upd_bytes)
+		to_write = count = vol->upd_bytes - vol->upd_received;
+
+	/*
+	 * When updating volumes, we accumulate whole logical eraseblock of
+	 * data and write it at once.
+	 */
+	if (offs != 0) {
+		/*
+		 * This is a write to the middle of the logical eraseblock. We
+		 * copy the data to our update buffer and wait for more data or
+		 * flush it if the whole eraseblock is written or the update
+		 * is finished.
+		 */
+
+		len = vol->usable_leb_size - offs;
+		if (len > count)
+			len = count;
+
+		err = copy_from_user(vol->upd_buf + offs, buf, len);
+		if (err)
+			return -EFAULT;
+
+		if (offs + len == vol->usable_leb_size ||
+		    vol->upd_received + len == vol->upd_bytes) {
+			int flush_len = offs + len;
+
+			/*
+			 * OK, we gathered either the whole eraseblock or this
+			 * is the last chunk, it's time to flush the buffer.
+			 */
+			ubi_assert(flush_len <= vol->usable_leb_size);
+			err = write_leb(ubi, vol_id, lnum, vol->upd_buf,
+					flush_len, vol->upd_ebs);
+			if (err)
+				return err;
+		}
+
+		vol->upd_received += len;
+		count -= len;
+		buf += len;
+		lnum += 1;
+	}
+
+	/*
+	 * If we've got more to write, let's continue. At this point we know we
+	 * are starting from the beginning of an eraseblock.
+	 */
+	while (count) {
+		if (count > vol->usable_leb_size)
+			len = vol->usable_leb_size;
+		else
+			len = count;
+
+		err = copy_from_user(vol->upd_buf, buf, len);
+		if (err)
+			return -EFAULT;
+
+		if (len == vol->usable_leb_size ||
+		    vol->upd_received + len == vol->upd_bytes) {
+			err = write_leb(ubi, vol_id, lnum, vol->upd_buf, len,
+					vol->upd_ebs);
+			if (err)
+				break;
+		}
+
+		vol->upd_received += len;
+		count -= len;
+		lnum += 1;
+		buf += len;
+	}
+
+	ubi_assert(vol->upd_received <= vol->upd_bytes);
+	if (vol->upd_received == vol->upd_bytes) {
+		/* The update is finished, clear the update marker */
+		err = clear_update_marker(ubi, vol_id, vol->upd_bytes);
+		if (err)
+			return err;
+		err = ubi_wl_flush(ubi);
+		if (err == 0) {
+			err = to_write;
+			kfree(vol->upd_buf);
+			vol->updating = 0;
+		}
+	}
+
+	return err;
+}
diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c
new file mode 100644
index 00000000000..622d0d18952
--- /dev/null
+++ b/drivers/mtd/ubi/vmt.c
@@ -0,0 +1,809 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation;  either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * This file contains implementation of volume creation, deletion, updating and
+ * resizing.
+ */
+
+#include <linux/err.h>
+#include <asm/div64.h>
+#include "ubi.h"
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
+static void paranoid_check_volumes(struct ubi_device *ubi);
+#else
+#define paranoid_check_volumes(ubi)
+#endif
+
+static ssize_t vol_attribute_show(struct device *dev,
+				  struct device_attribute *attr, char *buf);
+
+/* Device attributes corresponding to files in '/<sysfs>/class/ubi/ubiX_Y' */
+static struct device_attribute vol_reserved_ebs =
+	__ATTR(reserved_ebs, S_IRUGO, vol_attribute_show, NULL);
+static struct device_attribute vol_type =
+	__ATTR(type, S_IRUGO, vol_attribute_show, NULL);
+static struct device_attribute vol_name =
+	__ATTR(name, S_IRUGO, vol_attribute_show, NULL);
+static struct device_attribute vol_corrupted =
+	__ATTR(corrupted, S_IRUGO, vol_attribute_show, NULL);
+static struct device_attribute vol_alignment =
+	__ATTR(alignment, S_IRUGO, vol_attribute_show, NULL);
+static struct device_attribute vol_usable_eb_size =
+	__ATTR(usable_eb_size, S_IRUGO, vol_attribute_show, NULL);
+static struct device_attribute vol_data_bytes =
+	__ATTR(data_bytes, S_IRUGO, vol_attribute_show, NULL);
+static struct device_attribute vol_upd_marker =
+	__ATTR(upd_marker, S_IRUGO, vol_attribute_show, NULL);
+
+/*
+ * "Show" method for files in '/<sysfs>/class/ubi/ubiX_Y/'.
+ *
+ * Consider a situation:
+ * A. process 1 opens a sysfs file related to volume Y, say
+ *    /<sysfs>/class/ubi/ubiX_Y/reserved_ebs;
+ * B. process 2 removes volume Y;
+ * C. process 1 starts reading the /<sysfs>/class/ubi/ubiX_Y/reserved_ebs file;
+ *
+ * What we want to do in a situation like that is to return error when the file
+ * is read. This is done by means of the 'removed' flag and the 'vol_lock' of
+ * the UBI volume description object.
+ */
+static ssize_t vol_attribute_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	int ret;
+	struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev);
+
+	spin_lock(&vol->ubi->volumes_lock);
+	if (vol->removed) {
+		spin_unlock(&vol->ubi->volumes_lock);
+		return -ENODEV;
+	}
+	if (attr == &vol_reserved_ebs)
+		ret = sprintf(buf, "%d\n", vol->reserved_pebs);
+	else if (attr == &vol_type) {
+		const char *tp;
+		tp = vol->vol_type == UBI_DYNAMIC_VOLUME ? "dynamic" : "static";
+		ret = sprintf(buf, "%s\n", tp);
+	} else if (attr == &vol_name)
+		ret = sprintf(buf, "%s\n", vol->name);
+	else if (attr == &vol_corrupted)
+		ret = sprintf(buf, "%d\n", vol->corrupted);
+	else if (attr == &vol_alignment)
+		ret = sprintf(buf, "%d\n", vol->alignment);
+	else if (attr == &vol_usable_eb_size) {
+		ret = sprintf(buf, "%d\n", vol->usable_leb_size);
+	} else if (attr == &vol_data_bytes)
+		ret = sprintf(buf, "%lld\n", vol->used_bytes);
+	else if (attr == &vol_upd_marker)
+		ret = sprintf(buf, "%d\n", vol->upd_marker);
+	else
+		BUG();
+	spin_unlock(&vol->ubi->volumes_lock);
+	return ret;
+}
+
+/* Release method for volume devices */
+static void vol_release(struct device *dev)
+{
+	struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev);
+	ubi_assert(vol->removed);
+	kfree(vol);
+}
+
+/**
+ * volume_sysfs_init - initialize sysfs for new volume.
+ * @ubi: UBI device description object
+ * @vol: volume description object
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ *
+ * Note, this function does not free allocated resources in case of failure -
+ * the caller does it. This is because this would cause release() here and the
+ * caller would oops.
+ */
+static int volume_sysfs_init(struct ubi_device *ubi, struct ubi_volume *vol)
+{
+	int err;
+
+	err = device_create_file(&vol->dev, &vol_reserved_ebs);
+	if (err)
+		return err;
+	err = device_create_file(&vol->dev, &vol_type);
+	if (err)
+		return err;
+	err = device_create_file(&vol->dev, &vol_name);
+	if (err)
+		return err;
+	err = device_create_file(&vol->dev, &vol_corrupted);
+	if (err)
+		return err;
+	err = device_create_file(&vol->dev, &vol_alignment);
+	if (err)
+		return err;
+	err = device_create_file(&vol->dev, &vol_usable_eb_size);
+	if (err)
+		return err;
+	err = device_create_file(&vol->dev, &vol_data_bytes);
+	if (err)
+		return err;
+	err = device_create_file(&vol->dev, &vol_upd_marker);
+	if (err)
+		return err;
+	return 0;
+}
+
+/**
+ * volume_sysfs_close - close sysfs for a volume.
+ * @vol: volume description object
+ */
+static void volume_sysfs_close(struct ubi_volume *vol)
+{
+	device_remove_file(&vol->dev, &vol_upd_marker);
+	device_remove_file(&vol->dev, &vol_data_bytes);
+	device_remove_file(&vol->dev, &vol_usable_eb_size);
+	device_remove_file(&vol->dev, &vol_alignment);
+	device_remove_file(&vol->dev, &vol_corrupted);
+	device_remove_file(&vol->dev, &vol_name);
+	device_remove_file(&vol->dev, &vol_type);
+	device_remove_file(&vol->dev, &vol_reserved_ebs);
+	device_unregister(&vol->dev);
+}
+
+/**
+ * ubi_create_volume - create volume.
+ * @ubi: UBI device description object
+ * @req: volume creation request
+ *
+ * This function creates volume described by @req. If @req->vol_id id
+ * %UBI_VOL_NUM_AUTO, this function automatically assigne ID to the new volume
+ * and saves it in @req->vol_id. Returns zero in case of success and a negative
+ * error code in case of failure.
+ */
+int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
+{
+	int i, err, vol_id = req->vol_id;
+	struct ubi_volume *vol;
+	struct ubi_vtbl_record vtbl_rec;
+	uint64_t bytes;
+
+	if (ubi->ro_mode)
+		return -EROFS;
+
+	vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL);
+	if (!vol)
+		return -ENOMEM;
+
+	spin_lock(&ubi->volumes_lock);
+
+	if (vol_id == UBI_VOL_NUM_AUTO) {
+		/* Find unused volume ID */
+		dbg_msg("search for vacant volume ID");
+		for (i = 0; i < ubi->vtbl_slots; i++)
+			if (!ubi->volumes[i]) {
+				vol_id = i;
+				break;
+			}
+
+		if (vol_id == UBI_VOL_NUM_AUTO) {
+			dbg_err("out of volume IDs");
+			err = -ENFILE;
+			goto out_unlock;
+		}
+		req->vol_id = vol_id;
+	}
+
+	dbg_msg("volume ID %d, %llu bytes, type %d, name %s",
+		vol_id, (unsigned long long)req->bytes,
+		(int)req->vol_type, req->name);
+
+	/* Ensure that this volume does not exist */
+	err = -EEXIST;
+	if (ubi->volumes[vol_id]) {
+		dbg_err("volume %d already exists", vol_id);
+		goto out_unlock;
+	}
+
+	/* Ensure that the name is unique */
+	for (i = 0; i < ubi->vtbl_slots; i++)
+		if (ubi->volumes[i] &&
+		    ubi->volumes[i]->name_len == req->name_len &&
+		    strcmp(ubi->volumes[i]->name, req->name) == 0) {
+			dbg_err("volume \"%s\" exists (ID %d)", req->name, i);
+			goto out_unlock;
+		}
+
+        /* Calculate how many eraseblocks are requested */
+	vol->usable_leb_size = ubi->leb_size - ubi->leb_size % req->alignment;
+	bytes = req->bytes;
+	if (do_div(bytes, vol->usable_leb_size))
+		vol->reserved_pebs = 1;
+	vol->reserved_pebs += bytes;
+
+	/* Reserve physical eraseblocks */
+	if (vol->reserved_pebs > ubi->avail_pebs) {
+		dbg_err("not enough PEBs, only %d available", ubi->avail_pebs);
+		spin_unlock(&ubi->volumes_lock);
+		err = -ENOSPC;
+		goto out_unlock;
+	}
+	ubi->avail_pebs -= vol->reserved_pebs;
+	ubi->rsvd_pebs += vol->reserved_pebs;
+
+	vol->vol_id    = vol_id;
+	vol->alignment = req->alignment;
+	vol->data_pad  = ubi->leb_size % vol->alignment;
+	vol->vol_type  = req->vol_type;
+	vol->name_len  = req->name_len;
+	memcpy(vol->name, req->name, vol->name_len + 1);
+	vol->exclusive = 1;
+	vol->ubi = ubi;
+	ubi->volumes[vol_id] = vol;
+	spin_unlock(&ubi->volumes_lock);
+
+	/*
+	 * Finish all pending erases because there may be some LEBs belonging
+	 * to the same volume ID.
+	 */
+	err = ubi_wl_flush(ubi);
+	if (err)
+		goto out_acc;
+
+	vol->eba_tbl = kmalloc(vol->reserved_pebs * sizeof(int), GFP_KERNEL);
+	if (!vol->eba_tbl) {
+		err = -ENOMEM;
+		goto out_acc;
+	}
+
+	for (i = 0; i < vol->reserved_pebs; i++)
+		vol->eba_tbl[i] = UBI_LEB_UNMAPPED;
+
+	if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
+		vol->used_ebs = vol->reserved_pebs;
+		vol->last_eb_bytes = vol->usable_leb_size;
+		vol->used_bytes = vol->used_ebs * vol->usable_leb_size;
+	} else {
+		bytes = vol->used_bytes;
+		vol->last_eb_bytes = do_div(bytes, vol->usable_leb_size);
+		vol->used_ebs = bytes;
+		if (vol->last_eb_bytes)
+			vol->used_ebs += 1;
+		else
+			vol->last_eb_bytes = vol->usable_leb_size;
+	}
+
+	/* Register character device for the volume */
+	cdev_init(&vol->cdev, &ubi_vol_cdev_operations);
+	vol->cdev.owner = THIS_MODULE;
+	err = cdev_add(&vol->cdev, MKDEV(ubi->major, vol_id + 1), 1);
+	if (err) {
+		ubi_err("cannot add character device for volume %d", vol_id);
+		goto out_mapping;
+	}
+
+	err = ubi_create_gluebi(ubi, vol);
+	if (err)
+		goto out_cdev;
+
+	vol->dev.release = vol_release;
+	vol->dev.parent = &ubi->dev;
+	vol->dev.devt = MKDEV(ubi->major, vol->vol_id + 1);
+	vol->dev.class = ubi_class;
+	sprintf(&vol->dev.bus_id[0], "%s_%d", ubi->ubi_name, vol->vol_id);
+	err = device_register(&vol->dev);
+	if (err)
+		goto out_gluebi;
+
+	err = volume_sysfs_init(ubi, vol);
+	if (err)
+		goto out_sysfs;
+
+	/* Fill volume table record */
+	memset(&vtbl_rec, 0, sizeof(struct ubi_vtbl_record));
+	vtbl_rec.reserved_pebs = cpu_to_ubi32(vol->reserved_pebs);
+	vtbl_rec.alignment     = cpu_to_ubi32(vol->alignment);
+	vtbl_rec.data_pad      = cpu_to_ubi32(vol->data_pad);
+	vtbl_rec.name_len      = cpu_to_ubi16(vol->name_len);
+	if (vol->vol_type == UBI_DYNAMIC_VOLUME)
+		vtbl_rec.vol_type = UBI_VID_DYNAMIC;
+	else
+		vtbl_rec.vol_type = UBI_VID_STATIC;
+	memcpy(vtbl_rec.name, vol->name, vol->name_len + 1);
+
+	err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);
+	if (err)
+		goto out_sysfs;
+
+	spin_lock(&ubi->volumes_lock);
+	ubi->vol_count += 1;
+	vol->exclusive = 0;
+	spin_unlock(&ubi->volumes_lock);
+
+	paranoid_check_volumes(ubi);
+	return 0;
+
+out_gluebi:
+	err = ubi_destroy_gluebi(vol);
+out_cdev:
+	cdev_del(&vol->cdev);
+out_mapping:
+	kfree(vol->eba_tbl);
+out_acc:
+	spin_lock(&ubi->volumes_lock);
+	ubi->rsvd_pebs -= vol->reserved_pebs;
+	ubi->avail_pebs += vol->reserved_pebs;
+out_unlock:
+	spin_unlock(&ubi->volumes_lock);
+	kfree(vol);
+	return err;
+
+	/*
+	 * We are registered, so @vol is destroyed in the release function and
+	 * we have to de-initialize differently.
+	 */
+out_sysfs:
+	err = ubi_destroy_gluebi(vol);
+	cdev_del(&vol->cdev);
+	kfree(vol->eba_tbl);
+	spin_lock(&ubi->volumes_lock);
+	ubi->rsvd_pebs -= vol->reserved_pebs;
+	ubi->avail_pebs += vol->reserved_pebs;
+	spin_unlock(&ubi->volumes_lock);
+	volume_sysfs_close(vol);
+	return err;
+}
+
+/**
+ * ubi_remove_volume - remove volume.
+ * @desc: volume descriptor
+ *
+ * This function removes volume described by @desc. The volume has to be opened
+ * in "exclusive" mode. Returns zero in case of success and a negative error
+ * code in case of failure.
+ */
+int ubi_remove_volume(struct ubi_volume_desc *desc)
+{
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+	int i, err, vol_id = vol->vol_id, reserved_pebs = vol->reserved_pebs;
+
+	dbg_msg("remove UBI volume %d", vol_id);
+	ubi_assert(desc->mode == UBI_EXCLUSIVE);
+	ubi_assert(vol == ubi->volumes[vol_id]);
+
+	if (ubi->ro_mode)
+		return -EROFS;
+
+	err = ubi_destroy_gluebi(vol);
+	if (err)
+		return err;
+
+	err = ubi_change_vtbl_record(ubi, vol_id, NULL);
+	if (err)
+		return err;
+
+	for (i = 0; i < vol->reserved_pebs; i++) {
+		err = ubi_eba_unmap_leb(ubi, vol_id, i);
+		if (err)
+			return err;
+	}
+
+	spin_lock(&ubi->volumes_lock);
+	vol->removed = 1;
+	ubi->volumes[vol_id] = NULL;
+	spin_unlock(&ubi->volumes_lock);
+
+	kfree(vol->eba_tbl);
+	vol->eba_tbl = NULL;
+	cdev_del(&vol->cdev);
+	volume_sysfs_close(vol);
+	kfree(desc);
+
+	spin_lock(&ubi->volumes_lock);
+	ubi->rsvd_pebs -= reserved_pebs;
+	ubi->avail_pebs += reserved_pebs;
+	i = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs;
+	if (i > 0) {
+		i = ubi->avail_pebs >= i ? i : ubi->avail_pebs;
+		ubi->avail_pebs -= i;
+		ubi->rsvd_pebs += i;
+		ubi->beb_rsvd_pebs += i;
+		if (i > 0)
+			ubi_msg("reserve more %d PEBs", i);
+	}
+	ubi->vol_count -= 1;
+	spin_unlock(&ubi->volumes_lock);
+
+	paranoid_check_volumes(ubi);
+	module_put(THIS_MODULE);
+	return 0;
+}
+
+/**
+ * ubi_resize_volume - re-size volume.
+ * @desc: volume descriptor
+ * @reserved_pebs: new size in physical eraseblocks
+ *
+ * This function returns zero in case of success, and a negative error code in
+ * case of failure.
+ */
+int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
+{
+	int i, err, pebs, *new_mapping;
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+	struct ubi_vtbl_record vtbl_rec;
+	int vol_id = vol->vol_id;
+
+	if (ubi->ro_mode)
+		return -EROFS;
+
+	dbg_msg("re-size volume %d to from %d to %d PEBs",
+		vol_id, vol->reserved_pebs, reserved_pebs);
+	ubi_assert(desc->mode == UBI_EXCLUSIVE);
+	ubi_assert(vol == ubi->volumes[vol_id]);
+
+	if (vol->vol_type == UBI_STATIC_VOLUME &&
+	    reserved_pebs < vol->used_ebs) {
+		dbg_err("too small size %d, %d LEBs contain data",
+			reserved_pebs, vol->used_ebs);
+		return -EINVAL;
+	}
+
+	/* If the size is the same, we have nothing to do */
+	if (reserved_pebs == vol->reserved_pebs)
+		return 0;
+
+	new_mapping = kmalloc(reserved_pebs * sizeof(int), GFP_KERNEL);
+	if (!new_mapping)
+		return -ENOMEM;
+
+	for (i = 0; i < reserved_pebs; i++)
+		new_mapping[i] = UBI_LEB_UNMAPPED;
+
+	/* Reserve physical eraseblocks */
+	pebs = reserved_pebs - vol->reserved_pebs;
+	if (pebs > 0) {
+		spin_lock(&ubi->volumes_lock);
+		if (pebs > ubi->avail_pebs) {
+			dbg_err("not enough PEBs: requested %d, available %d",
+				pebs, ubi->avail_pebs);
+			spin_unlock(&ubi->volumes_lock);
+			err = -ENOSPC;
+			goto out_free;
+		}
+		ubi->avail_pebs -= pebs;
+		ubi->rsvd_pebs += pebs;
+		for (i = 0; i < vol->reserved_pebs; i++)
+			new_mapping[i] = vol->eba_tbl[i];
+		kfree(vol->eba_tbl);
+		vol->eba_tbl = new_mapping;
+		spin_unlock(&ubi->volumes_lock);
+	}
+
+	/* Change volume table record */
+	memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record));
+	vtbl_rec.reserved_pebs = cpu_to_ubi32(reserved_pebs);
+	err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);
+	if (err)
+		goto out_acc;
+
+	if (pebs < 0) {
+		for (i = 0; i < -pebs; i++) {
+			err = ubi_eba_unmap_leb(ubi, vol_id, reserved_pebs + i);
+			if (err)
+				goto out_acc;
+		}
+		spin_lock(&ubi->volumes_lock);
+		ubi->rsvd_pebs += pebs;
+		ubi->avail_pebs -= pebs;
+		pebs = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs;
+		if (pebs > 0) {
+			pebs = ubi->avail_pebs >= pebs ? pebs : ubi->avail_pebs;
+			ubi->avail_pebs -= pebs;
+			ubi->rsvd_pebs += pebs;
+			ubi->beb_rsvd_pebs += pebs;
+			if (pebs > 0)
+				ubi_msg("reserve more %d PEBs", pebs);
+		}
+		for (i = 0; i < reserved_pebs; i++)
+			new_mapping[i] = vol->eba_tbl[i];
+		kfree(vol->eba_tbl);
+		vol->eba_tbl = new_mapping;
+		spin_unlock(&ubi->volumes_lock);
+	}
+
+	vol->reserved_pebs = reserved_pebs;
+	if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
+		vol->used_ebs = reserved_pebs;
+		vol->last_eb_bytes = vol->usable_leb_size;
+		vol->used_bytes = vol->used_ebs * vol->usable_leb_size;
+	}
+
+	paranoid_check_volumes(ubi);
+	return 0;
+
+out_acc:
+	if (pebs > 0) {
+		spin_lock(&ubi->volumes_lock);
+		ubi->rsvd_pebs -= pebs;
+		ubi->avail_pebs += pebs;
+		spin_unlock(&ubi->volumes_lock);
+	}
+out_free:
+	kfree(new_mapping);
+	return err;
+}
+
+/**
+ * ubi_add_volume - add volume.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ *
+ * This function adds an existin volume and initializes all its data
+ * structures. Returnes zero in case of success and a negative error code in
+ * case of failure.
+ */
+int ubi_add_volume(struct ubi_device *ubi, int vol_id)
+{
+	int err;
+	struct ubi_volume *vol = ubi->volumes[vol_id];
+
+	dbg_msg("add volume %d", vol_id);
+	ubi_dbg_dump_vol_info(vol);
+	ubi_assert(vol);
+
+	/* Register character device for the volume */
+	cdev_init(&vol->cdev, &ubi_vol_cdev_operations);
+	vol->cdev.owner = THIS_MODULE;
+	err = cdev_add(&vol->cdev, MKDEV(ubi->major, vol->vol_id + 1), 1);
+	if (err) {
+		ubi_err("cannot add character device for volume %d", vol_id);
+		return err;
+	}
+
+	err = ubi_create_gluebi(ubi, vol);
+	if (err)
+		goto out_cdev;
+
+	vol->dev.release = vol_release;
+	vol->dev.parent = &ubi->dev;
+	vol->dev.devt = MKDEV(ubi->major, vol->vol_id + 1);
+	vol->dev.class = ubi_class;
+	sprintf(&vol->dev.bus_id[0], "%s_%d", ubi->ubi_name, vol->vol_id);
+	err = device_register(&vol->dev);
+	if (err)
+		goto out_gluebi;
+
+	err = volume_sysfs_init(ubi, vol);
+	if (err) {
+		cdev_del(&vol->cdev);
+		err = ubi_destroy_gluebi(vol);
+		volume_sysfs_close(vol);
+		return err;
+	}
+
+	paranoid_check_volumes(ubi);
+	return 0;
+
+out_gluebi:
+	err = ubi_destroy_gluebi(vol);
+out_cdev:
+	cdev_del(&vol->cdev);
+	return err;
+}
+
+/**
+ * ubi_free_volume - free volume.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ *
+ * This function frees all resources for volume @vol_id but does not remove it.
+ * Used only when the UBI device is detached.
+ */
+void ubi_free_volume(struct ubi_device *ubi, int vol_id)
+{
+	int err;
+	struct ubi_volume *vol = ubi->volumes[vol_id];
+
+	dbg_msg("free volume %d", vol_id);
+	ubi_assert(vol);
+
+	vol->removed = 1;
+	err = ubi_destroy_gluebi(vol);
+	ubi->volumes[vol_id] = NULL;
+	cdev_del(&vol->cdev);
+	volume_sysfs_close(vol);
+}
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
+
+/**
+ * paranoid_check_volume - check volume information.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ */
+static void paranoid_check_volume(const struct ubi_device *ubi, int vol_id)
+{
+	int idx = vol_id2idx(ubi, vol_id);
+	int reserved_pebs, alignment, data_pad, vol_type, name_len, upd_marker;
+	const struct ubi_volume *vol = ubi->volumes[idx];
+	long long n;
+	const char *name;
+
+	reserved_pebs = ubi32_to_cpu(ubi->vtbl[vol_id].reserved_pebs);
+
+	if (!vol) {
+		if (reserved_pebs) {
+			ubi_err("no volume info, but volume exists");
+			goto fail;
+		}
+		return;
+	}
+
+	if (vol->reserved_pebs < 0 || vol->alignment < 0 || vol->data_pad < 0 ||
+	    vol->name_len < 0) {
+		ubi_err("negative values");
+		goto fail;
+	}
+	if (vol->alignment > ubi->leb_size || vol->alignment == 0) {
+		ubi_err("bad alignment");
+		goto fail;
+	}
+
+	n = vol->alignment % ubi->min_io_size;
+	if (vol->alignment != 1 && n) {
+		ubi_err("alignment is not multiple of min I/O unit");
+		goto fail;
+	}
+
+	n = ubi->leb_size % vol->alignment;
+	if (vol->data_pad != n) {
+		ubi_err("bad data_pad, has to be %lld", n);
+		goto fail;
+	}
+
+	if (vol->vol_type != UBI_DYNAMIC_VOLUME &&
+	    vol->vol_type != UBI_STATIC_VOLUME) {
+		ubi_err("bad vol_type");
+		goto fail;
+	}
+
+	if (vol->upd_marker != 0 && vol->upd_marker != 1) {
+		ubi_err("bad upd_marker");
+		goto fail;
+	}
+
+	if (vol->upd_marker && vol->corrupted) {
+		dbg_err("update marker and corrupted simultaneously");
+		goto fail;
+	}
+
+	if (vol->reserved_pebs > ubi->good_peb_count) {
+		ubi_err("too large reserved_pebs");
+		goto fail;
+	}
+
+	n = ubi->leb_size - vol->data_pad;
+	if (vol->usable_leb_size != ubi->leb_size - vol->data_pad) {
+		ubi_err("bad usable_leb_size, has to be %lld", n);
+		goto fail;
+	}
+
+	if (vol->name_len > UBI_VOL_NAME_MAX) {
+		ubi_err("too long volume name, max is %d", UBI_VOL_NAME_MAX);
+		goto fail;
+	}
+
+	if (!vol->name) {
+		ubi_err("NULL volume name");
+		goto fail;
+	}
+
+	n = strnlen(vol->name, vol->name_len + 1);
+	if (n != vol->name_len) {
+		ubi_err("bad name_len %lld", n);
+		goto fail;
+	}
+
+	n = vol->used_ebs * vol->usable_leb_size;
+	if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
+		if (vol->corrupted != 0) {
+			ubi_err("corrupted dynamic volume");
+			goto fail;
+		}
+		if (vol->used_ebs != vol->reserved_pebs) {
+			ubi_err("bad used_ebs");
+			goto fail;
+		}
+		if (vol->last_eb_bytes != vol->usable_leb_size) {
+			ubi_err("bad last_eb_bytes");
+			goto fail;
+		}
+		if (vol->used_bytes != n) {
+			ubi_err("bad used_bytes");
+			goto fail;
+		}
+	} else {
+		if (vol->corrupted != 0 && vol->corrupted != 1) {
+			ubi_err("bad corrupted");
+			goto fail;
+		}
+		if (vol->used_ebs < 0 || vol->used_ebs > vol->reserved_pebs) {
+			ubi_err("bad used_ebs");
+			goto fail;
+		}
+		if (vol->last_eb_bytes < 0 ||
+		    vol->last_eb_bytes > vol->usable_leb_size) {
+			ubi_err("bad last_eb_bytes");
+			goto fail;
+		}
+		if (vol->used_bytes < 0 || vol->used_bytes > n ||
+		    vol->used_bytes < n - vol->usable_leb_size) {
+			ubi_err("bad used_bytes");
+			goto fail;
+		}
+	}
+
+	alignment  = ubi32_to_cpu(ubi->vtbl[vol_id].alignment);
+	data_pad   = ubi32_to_cpu(ubi->vtbl[vol_id].data_pad);
+	name_len   = ubi16_to_cpu(ubi->vtbl[vol_id].name_len);
+	upd_marker = ubi->vtbl[vol_id].upd_marker;
+	name       = &ubi->vtbl[vol_id].name[0];
+	if (ubi->vtbl[vol_id].vol_type == UBI_VID_DYNAMIC)
+		vol_type = UBI_DYNAMIC_VOLUME;
+	else
+		vol_type = UBI_STATIC_VOLUME;
+
+	if (alignment != vol->alignment || data_pad != vol->data_pad ||
+	    upd_marker != vol->upd_marker || vol_type != vol->vol_type ||
+	    name_len!= vol->name_len || strncmp(name, vol->name, name_len)) {
+		ubi_err("volume info is different");
+		goto fail;
+	}
+
+	return;
+
+fail:
+	ubi_err("paranoid check failed");
+	ubi_dbg_dump_vol_info(vol);
+	ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id);
+	BUG();
+}
+
+/**
+ * paranoid_check_volumes - check information about all volumes.
+ * @ubi: UBI device description object
+ */
+static void paranoid_check_volumes(struct ubi_device *ubi)
+{
+	int i;
+
+	mutex_lock(&ubi->vtbl_mutex);
+	spin_lock(&ubi->volumes_lock);
+	for (i = 0; i < ubi->vtbl_slots; i++)
+		paranoid_check_volume(ubi, i);
+	spin_unlock(&ubi->volumes_lock);
+	mutex_unlock(&ubi->vtbl_mutex);
+}
+#endif
diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
new file mode 100644
index 00000000000..b6fd6bbd941
--- /dev/null
+++ b/drivers/mtd/ubi/vtbl.c
@@ -0,0 +1,809 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ * Copyright (c) Nokia Corporation, 2006, 2007
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * This file includes volume table manipulation code. The volume table is an
+ * on-flash table containing volume meta-data like name, number of reserved
+ * physical eraseblocks, type, etc. The volume table is stored in the so-called
+ * "layout volume".
+ *
+ * The layout volume is an internal volume which is organized as follows. It
+ * consists of two logical eraseblocks - LEB 0 and LEB 1. Each logical
+ * eraseblock stores one volume table copy, i.e. LEB 0 and LEB 1 duplicate each
+ * other. This redundancy guarantees robustness to unclean reboots. The volume
+ * table is basically an array of volume table records. Each record contains
+ * full information about the volume and protected by a CRC checksum.
+ *
+ * The volume table is changed, it is first changed in RAM. Then LEB 0 is
+ * erased, and the updated volume table is written back to LEB 0. Then same for
+ * LEB 1. This scheme guarantees recoverability from unclean reboots.
+ *
+ * In this UBI implementation the on-flash volume table does not contain any
+ * information about how many data static volumes contain. This information may
+ * be found from the scanning data.
+ *
+ * But it would still be beneficial to store this information in the volume
+ * table. For example, suppose we have a static volume X, and all its physical
+ * eraseblocks became bad for some reasons. Suppose we are attaching the
+ * corresponding MTD device, the scanning has found no logical eraseblocks
+ * corresponding to the volume X. According to the volume table volume X does
+ * exist. So we don't know whether it is just empty or all its physical
+ * eraseblocks went bad. So we cannot alarm the user about this corruption.
+ *
+ * The volume table also stores so-called "update marker", which is used for
+ * volume updates. Before updating the volume, the update marker is set, and
+ * after the update operation is finished, the update marker is cleared. So if
+ * the update operation was interrupted (e.g. by an unclean reboot) - the
+ * update marker is still there and we know that the volume's contents is
+ * damaged.
+ */
+
+#include <linux/crc32.h>
+#include <linux/err.h>
+#include <asm/div64.h>
+#include "ubi.h"
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
+static void paranoid_vtbl_check(const struct ubi_device *ubi);
+#else
+#define paranoid_vtbl_check(ubi)
+#endif
+
+/* Empty volume table record */
+static struct ubi_vtbl_record empty_vtbl_record;
+
+/**
+ * ubi_change_vtbl_record - change volume table record.
+ * @ubi: UBI device description object
+ * @idx: table index to change
+ * @vtbl_rec: new volume table record
+ *
+ * This function changes volume table record @idx. If @vtbl_rec is %NULL, empty
+ * volume table record is written. The caller does not have to calculate CRC of
+ * the record as it is done by this function. Returns zero in case of success
+ * and a negative error code in case of failure.
+ */
+int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
+			   struct ubi_vtbl_record *vtbl_rec)
+{
+	int i, err;
+	uint32_t crc;
+
+	ubi_assert(idx >= 0 && idx < ubi->vtbl_slots);
+
+	if (!vtbl_rec)
+		vtbl_rec = &empty_vtbl_record;
+	else {
+		crc = crc32(UBI_CRC32_INIT, vtbl_rec, UBI_VTBL_RECORD_SIZE_CRC);
+		vtbl_rec->crc = cpu_to_ubi32(crc);
+	}
+
+	dbg_msg("change record %d", idx);
+	ubi_dbg_dump_vtbl_record(vtbl_rec, idx);
+
+	mutex_lock(&ubi->vtbl_mutex);
+	memcpy(&ubi->vtbl[idx], vtbl_rec, sizeof(struct ubi_vtbl_record));
+	for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) {
+		err = ubi_eba_unmap_leb(ubi, UBI_LAYOUT_VOL_ID, i);
+		if (err) {
+			mutex_unlock(&ubi->vtbl_mutex);
+			return err;
+		}
+		err = ubi_eba_write_leb(ubi, UBI_LAYOUT_VOL_ID, i, ubi->vtbl, 0,
+					ubi->vtbl_size, UBI_LONGTERM);
+		if (err) {
+			mutex_unlock(&ubi->vtbl_mutex);
+			return err;
+		}
+	}
+
+	paranoid_vtbl_check(ubi);
+	mutex_unlock(&ubi->vtbl_mutex);
+	return ubi_wl_flush(ubi);
+}
+
+/**
+ * vol_til_check - check if volume table is not corrupted and contains sensible
+ * data.
+ *
+ * @ubi: UBI device description object
+ * @vtbl: volume table
+ *
+ * This function returns zero if @vtbl is all right, %1 if CRC is incorrect,
+ * and %-EINVAL if it contains inconsistent data.
+ */
+static int vtbl_check(const struct ubi_device *ubi,
+		      const struct ubi_vtbl_record *vtbl)
+{
+	int i, n, reserved_pebs, alignment, data_pad, vol_type, name_len;
+	int upd_marker;
+	uint32_t crc;
+	const char *name;
+
+	for (i = 0; i < ubi->vtbl_slots; i++) {
+		cond_resched();
+
+		reserved_pebs = ubi32_to_cpu(vtbl[i].reserved_pebs);
+		alignment = ubi32_to_cpu(vtbl[i].alignment);
+		data_pad = ubi32_to_cpu(vtbl[i].data_pad);
+		upd_marker = vtbl[i].upd_marker;
+		vol_type = vtbl[i].vol_type;
+		name_len = ubi16_to_cpu(vtbl[i].name_len);
+		name = &vtbl[i].name[0];
+
+		crc = crc32(UBI_CRC32_INIT, &vtbl[i], UBI_VTBL_RECORD_SIZE_CRC);
+		if (ubi32_to_cpu(vtbl[i].crc) != crc) {
+			ubi_err("bad CRC at record %u: %#08x, not %#08x",
+				 i, crc, ubi32_to_cpu(vtbl[i].crc));
+			ubi_dbg_dump_vtbl_record(&vtbl[i], i);
+			return 1;
+		}
+
+		if (reserved_pebs == 0) {
+			if (memcmp(&vtbl[i], &empty_vtbl_record,
+						UBI_VTBL_RECORD_SIZE)) {
+				dbg_err("bad empty record");
+				goto bad;
+			}
+			continue;
+		}
+
+		if (reserved_pebs < 0 || alignment < 0 || data_pad < 0 ||
+		    name_len < 0) {
+			dbg_err("negative values");
+			goto bad;
+		}
+
+		if (alignment > ubi->leb_size || alignment == 0) {
+			dbg_err("bad alignment");
+			goto bad;
+		}
+
+		n = alignment % ubi->min_io_size;
+		if (alignment != 1 && n) {
+			dbg_err("alignment is not multiple of min I/O unit");
+			goto bad;
+		}
+
+		n = ubi->leb_size % alignment;
+		if (data_pad != n) {
+			dbg_err("bad data_pad, has to be %d", n);
+			goto bad;
+		}
+
+		if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) {
+			dbg_err("bad vol_type");
+			goto bad;
+		}
+
+		if (upd_marker != 0 && upd_marker != 1) {
+			dbg_err("bad upd_marker");
+			goto bad;
+		}
+
+		if (reserved_pebs > ubi->good_peb_count) {
+			dbg_err("too large reserved_pebs, good PEBs %d",
+				ubi->good_peb_count);
+			goto bad;
+		}
+
+		if (name_len > UBI_VOL_NAME_MAX) {
+			dbg_err("too long volume name, max %d",
+				UBI_VOL_NAME_MAX);
+			goto bad;
+		}
+
+		if (name[0] == '\0') {
+			dbg_err("NULL volume name");
+			goto bad;
+		}
+
+		if (name_len != strnlen(name, name_len + 1)) {
+			dbg_err("bad name_len");
+			goto bad;
+		}
+	}
+
+	/* Checks that all names are unique */
+	for (i = 0; i < ubi->vtbl_slots - 1; i++) {
+		for (n = i + 1; n < ubi->vtbl_slots; n++) {
+			int len1 = ubi16_to_cpu(vtbl[i].name_len);
+			int len2 = ubi16_to_cpu(vtbl[n].name_len);
+
+			if (len1 > 0 && len1 == len2 &&
+			    !strncmp(vtbl[i].name, vtbl[n].name, len1)) {
+				ubi_err("volumes %d and %d have the same name"
+					" \"%s\"", i, n, vtbl[i].name);
+				ubi_dbg_dump_vtbl_record(&vtbl[i], i);
+				ubi_dbg_dump_vtbl_record(&vtbl[n], n);
+				return -EINVAL;
+			}
+		}
+	}
+
+	return 0;
+
+bad:
+	ubi_err("volume table check failed, record %d", i);
+	ubi_dbg_dump_vtbl_record(&vtbl[i], i);
+	return -EINVAL;
+}
+
+/**
+ * create_vtbl - create a copy of volume table.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ * @copy: number of the volume table copy
+ * @vtbl: contents of the volume table
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int create_vtbl(const struct ubi_device *ubi, struct ubi_scan_info *si,
+		       int copy, void *vtbl)
+{
+	int err, tries = 0;
+	static struct ubi_vid_hdr *vid_hdr;
+	struct ubi_scan_volume *sv;
+	struct ubi_scan_leb *new_seb, *old_seb = NULL;
+
+	ubi_msg("create volume table (copy #%d)", copy + 1);
+
+	vid_hdr = ubi_zalloc_vid_hdr(ubi);
+	if (!vid_hdr)
+		return -ENOMEM;
+
+	/*
+	 * Check if there is a logical eraseblock which would have to contain
+	 * this volume table copy was found during scanning. It has to be wiped
+	 * out.
+	 */
+	sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOL_ID);
+	if (sv)
+		old_seb = ubi_scan_find_seb(sv, copy);
+
+retry:
+	new_seb = ubi_scan_get_free_peb(ubi, si);
+	if (IS_ERR(new_seb)) {
+		err = PTR_ERR(new_seb);
+		goto out_free;
+	}
+
+	vid_hdr->vol_type = UBI_VID_DYNAMIC;
+	vid_hdr->vol_id = cpu_to_ubi32(UBI_LAYOUT_VOL_ID);
+	vid_hdr->compat = UBI_LAYOUT_VOLUME_COMPAT;
+	vid_hdr->data_size = vid_hdr->used_ebs =
+			     vid_hdr->data_pad = cpu_to_ubi32(0);
+	vid_hdr->lnum = cpu_to_ubi32(copy);
+	vid_hdr->sqnum = cpu_to_ubi64(++si->max_sqnum);
+	vid_hdr->leb_ver = cpu_to_ubi32(old_seb ? old_seb->leb_ver + 1: 0);
+
+	/* The EC header is already there, write the VID header */
+	err = ubi_io_write_vid_hdr(ubi, new_seb->pnum, vid_hdr);
+	if (err)
+		goto write_error;
+
+	/* Write the layout volume contents */
+	err = ubi_io_write_data(ubi, vtbl, new_seb->pnum, 0, ubi->vtbl_size);
+	if (err)
+		goto write_error;
+
+	/*
+	 * And add it to the scanning information. Don't delete the old
+	 * @old_seb as it will be deleted and freed in 'ubi_scan_add_used()'.
+	 */
+	err = ubi_scan_add_used(ubi, si, new_seb->pnum, new_seb->ec,
+				vid_hdr, 0);
+	kfree(new_seb);
+	ubi_free_vid_hdr(ubi, vid_hdr);
+	return err;
+
+write_error:
+	kfree(new_seb);
+	/* May be this physical eraseblock went bad, try to pick another one */
+	if (++tries <= 5) {
+		err = ubi_scan_add_to_list(si, new_seb->pnum, new_seb->ec,
+					   &si->corr);
+		if (!err)
+			goto retry;
+	}
+out_free:
+	ubi_free_vid_hdr(ubi, vid_hdr);
+	return err;
+
+}
+
+/**
+ * process_lvol - process the layout volume.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ * @sv: layout volume scanning information
+ *
+ * This function is responsible for reading the layout volume, ensuring it is
+ * not corrupted, and recovering from corruptions if needed. Returns volume
+ * table in case of success and a negative error code in case of failure.
+ */
+static struct ubi_vtbl_record *process_lvol(const struct ubi_device *ubi,
+					    struct ubi_scan_info *si,
+					    struct ubi_scan_volume *sv)
+{
+	int err;
+	struct rb_node *rb;
+	struct ubi_scan_leb *seb;
+	struct ubi_vtbl_record *leb[UBI_LAYOUT_VOLUME_EBS] = { NULL, NULL };
+	int leb_corrupted[UBI_LAYOUT_VOLUME_EBS] = {1, 1};
+
+	/*
+	 * UBI goes through the following steps when it changes the layout
+	 * volume:
+	 * a. erase LEB 0;
+	 * b. write new data to LEB 0;
+	 * c. erase LEB 1;
+	 * d. write new data to LEB 1.
+	 *
+	 * Before the change, both LEBs contain the same data.
+	 *
+	 * Due to unclean reboots, the contents of LEB 0 may be lost, but there
+	 * should LEB 1. So it is OK if LEB 0 is corrupted while LEB 1 is not.
+	 * Similarly, LEB 1 may be lost, but there should be LEB 0. And
+	 * finally, unclean reboots may result in a situation when neither LEB
+	 * 0 nor LEB 1 are corrupted, but they are different. In this case, LEB
+	 * 0 contains more recent information.
+	 *
+	 * So the plan is to first check LEB 0. Then
+	 * a. if LEB 0 is OK, it must be containing the most resent data; then
+	 *    we compare it with LEB 1, and if they are different, we copy LEB
+	 *    0 to LEB 1;
+	 * b. if LEB 0 is corrupted, but LEB 1 has to be OK, and we copy LEB 1
+	 *    to LEB 0.
+	 */
+
+	dbg_msg("check layout volume");
+
+	/* Read both LEB 0 and LEB 1 into memory */
+	ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) {
+		leb[seb->lnum] = kzalloc(ubi->vtbl_size, GFP_KERNEL);
+		if (!leb[seb->lnum]) {
+			err = -ENOMEM;
+			goto out_free;
+		}
+
+		err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0,
+				       ubi->vtbl_size);
+		if (err == UBI_IO_BITFLIPS || err == -EBADMSG)
+			/* Scrub the PEB later */
+			seb->scrub = 1;
+		else if (err)
+			goto out_free;
+	}
+
+	err = -EINVAL;
+	if (leb[0]) {
+		leb_corrupted[0] = vtbl_check(ubi, leb[0]);
+		if (leb_corrupted[0] < 0)
+			goto out_free;
+	}
+
+	if (!leb_corrupted[0]) {
+		/* LEB 0 is OK */
+		if (leb[1])
+			leb_corrupted[1] = memcmp(leb[0], leb[1], ubi->vtbl_size);
+		if (leb_corrupted[1]) {
+			ubi_warn("volume table copy #2 is corrupted");
+			err = create_vtbl(ubi, si, 1, leb[0]);
+			if (err)
+				goto out_free;
+			ubi_msg("volume table was restored");
+		}
+
+		/* Both LEB 1 and LEB 2 are OK and consistent */
+		kfree(leb[1]);
+		return leb[0];
+	} else {
+		/* LEB 0 is corrupted or does not exist */
+		if (leb[1]) {
+			leb_corrupted[1] = vtbl_check(ubi, leb[1]);
+			if (leb_corrupted[1] < 0)
+				goto out_free;
+		}
+		if (leb_corrupted[1]) {
+			/* Both LEB 0 and LEB 1 are corrupted */
+			ubi_err("both volume tables are corrupted");
+			goto out_free;
+		}
+
+		ubi_warn("volume table copy #1 is corrupted");
+		err = create_vtbl(ubi, si, 0, leb[1]);
+		if (err)
+			goto out_free;
+		ubi_msg("volume table was restored");
+
+		kfree(leb[0]);
+		return leb[1];
+	}
+
+out_free:
+	kfree(leb[0]);
+	kfree(leb[1]);
+	return ERR_PTR(err);
+}
+
+/**
+ * create_empty_lvol - create empty layout volume.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ *
+ * This function returns volume table contents in case of success and a
+ * negative error code in case of failure.
+ */
+static struct ubi_vtbl_record *create_empty_lvol(const struct ubi_device *ubi,
+						 struct ubi_scan_info *si)
+{
+	int i;
+	struct ubi_vtbl_record *vtbl;
+
+	vtbl = kzalloc(ubi->vtbl_size, GFP_KERNEL);
+	if (!vtbl)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < ubi->vtbl_slots; i++)
+		memcpy(&vtbl[i], &empty_vtbl_record, UBI_VTBL_RECORD_SIZE);
+
+	for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) {
+		int err;
+
+		err = create_vtbl(ubi, si, i, vtbl);
+		if (err) {
+			kfree(vtbl);
+			return ERR_PTR(err);
+		}
+	}
+
+	return vtbl;
+}
+
+/**
+ * init_volumes - initialize volume information for existing volumes.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ * @vtbl: volume table
+ *
+ * This function allocates volume description objects for existing volumes.
+ * Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si,
+			const struct ubi_vtbl_record *vtbl)
+{
+	int i, reserved_pebs = 0;
+	struct ubi_scan_volume *sv;
+	struct ubi_volume *vol;
+
+	for (i = 0; i < ubi->vtbl_slots; i++) {
+		cond_resched();
+
+		if (ubi32_to_cpu(vtbl[i].reserved_pebs) == 0)
+			continue; /* Empty record */
+
+		vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL);
+		if (!vol)
+			return -ENOMEM;
+
+		vol->reserved_pebs = ubi32_to_cpu(vtbl[i].reserved_pebs);
+		vol->alignment = ubi32_to_cpu(vtbl[i].alignment);
+		vol->data_pad = ubi32_to_cpu(vtbl[i].data_pad);
+		vol->vol_type = vtbl[i].vol_type == UBI_VID_DYNAMIC ?
+					UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME;
+		vol->name_len = ubi16_to_cpu(vtbl[i].name_len);
+		vol->usable_leb_size = ubi->leb_size - vol->data_pad;
+		memcpy(vol->name, vtbl[i].name, vol->name_len);
+		vol->name[vol->name_len] = '\0';
+		vol->vol_id = i;
+
+		ubi_assert(!ubi->volumes[i]);
+		ubi->volumes[i] = vol;
+		ubi->vol_count += 1;
+		vol->ubi = ubi;
+		reserved_pebs += vol->reserved_pebs;
+
+		/*
+		 * In case of dynamic volume UBI knows nothing about how many
+		 * data is stored there. So assume the whole volume is used.
+		 */
+		if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
+			vol->used_ebs = vol->reserved_pebs;
+			vol->last_eb_bytes = vol->usable_leb_size;
+			vol->used_bytes = vol->used_ebs * vol->usable_leb_size;
+			continue;
+		}
+
+		/* Static volumes only */
+		sv = ubi_scan_find_sv(si, i);
+		if (!sv) {
+			/*
+			 * No eraseblocks belonging to this volume found. We
+			 * don't actually know whether this static volume is
+			 * completely corrupted or just contains no data. And
+			 * we cannot know this as long as data size is not
+			 * stored on flash. So we just assume the volume is
+			 * empty. FIXME: this should be handled.
+			 */
+			continue;
+		}
+
+		if (sv->leb_count != sv->used_ebs) {
+			/*
+			 * We found a static volume which misses several
+			 * eraseblocks. Treat it as corrupted.
+			 */
+			ubi_warn("static volume %d misses %d LEBs - corrupted",
+				 sv->vol_id, sv->used_ebs - sv->leb_count);
+			vol->corrupted = 1;
+			continue;
+		}
+
+		vol->used_ebs = sv->used_ebs;
+		vol->used_bytes = (vol->used_ebs - 1) * vol->usable_leb_size;
+		vol->used_bytes += sv->last_data_size;
+		vol->last_eb_bytes = sv->last_data_size;
+	}
+
+	vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL);
+	if (!vol)
+		return -ENOMEM;
+
+	vol->reserved_pebs = UBI_LAYOUT_VOLUME_EBS;
+	vol->alignment = 1;
+	vol->vol_type = UBI_DYNAMIC_VOLUME;
+	vol->name_len = sizeof(UBI_LAYOUT_VOLUME_NAME) - 1;
+	memcpy(vol->name, UBI_LAYOUT_VOLUME_NAME, vol->name_len + 1);
+	vol->usable_leb_size = ubi->leb_size;
+	vol->used_ebs = vol->reserved_pebs;
+	vol->last_eb_bytes = vol->reserved_pebs;
+	vol->used_bytes = vol->used_ebs * (ubi->leb_size - vol->data_pad);
+	vol->vol_id = UBI_LAYOUT_VOL_ID;
+
+	ubi_assert(!ubi->volumes[i]);
+	ubi->volumes[vol_id2idx(ubi, vol->vol_id)] = vol;
+	reserved_pebs += vol->reserved_pebs;
+	ubi->vol_count += 1;
+	vol->ubi = ubi;
+
+	if (reserved_pebs > ubi->avail_pebs)
+		ubi_err("not enough PEBs, required %d, available %d",
+			reserved_pebs, ubi->avail_pebs);
+	ubi->rsvd_pebs += reserved_pebs;
+	ubi->avail_pebs -= reserved_pebs;
+
+	return 0;
+}
+
+/**
+ * check_sv - check volume scanning information.
+ * @vol: UBI volume description object
+ * @sv: volume scanning information
+ *
+ * This function returns zero if the volume scanning information is consistent
+ * to the data read from the volume tabla, and %-EINVAL if not.
+ */
+static int check_sv(const struct ubi_volume *vol,
+		    const struct ubi_scan_volume *sv)
+{
+	if (sv->highest_lnum >= vol->reserved_pebs) {
+		dbg_err("bad highest_lnum");
+		goto bad;
+	}
+	if (sv->leb_count > vol->reserved_pebs) {
+		dbg_err("bad leb_count");
+		goto bad;
+	}
+	if (sv->vol_type != vol->vol_type) {
+		dbg_err("bad vol_type");
+		goto bad;
+	}
+	if (sv->used_ebs > vol->reserved_pebs) {
+		dbg_err("bad used_ebs");
+		goto bad;
+	}
+	if (sv->data_pad != vol->data_pad) {
+		dbg_err("bad data_pad");
+		goto bad;
+	}
+	return 0;
+
+bad:
+	ubi_err("bad scanning information");
+	ubi_dbg_dump_sv(sv);
+	ubi_dbg_dump_vol_info(vol);
+	return -EINVAL;
+}
+
+/**
+ * check_scanning_info - check that scanning information.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ *
+ * Even though we protect on-flash data by CRC checksums, we still don't trust
+ * the media. This function ensures that scanning information is consistent to
+ * the information read from the volume table. Returns zero if the scanning
+ * information is OK and %-EINVAL if it is not.
+ */
+static int check_scanning_info(const struct ubi_device *ubi,
+			       struct ubi_scan_info *si)
+{
+	int err, i;
+	struct ubi_scan_volume *sv;
+	struct ubi_volume *vol;
+
+	if (si->vols_found > UBI_INT_VOL_COUNT + ubi->vtbl_slots) {
+		ubi_err("scanning found %d volumes, maximum is %d + %d",
+			si->vols_found, UBI_INT_VOL_COUNT, ubi->vtbl_slots);
+		return -EINVAL;
+	}
+
+	if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT&&
+	    si->highest_vol_id < UBI_INTERNAL_VOL_START) {
+		ubi_err("too large volume ID %d found by scanning",
+			si->highest_vol_id);
+		return -EINVAL;
+	}
+
+
+	for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) {
+		cond_resched();
+
+		sv = ubi_scan_find_sv(si, i);
+		vol = ubi->volumes[i];
+		if (!vol) {
+			if (sv)
+				ubi_scan_rm_volume(si, sv);
+			continue;
+		}
+
+		if (vol->reserved_pebs == 0) {
+			ubi_assert(i < ubi->vtbl_slots);
+
+			if (!sv)
+				continue;
+
+			/*
+			 * During scanning we found a volume which does not
+			 * exist according to the information in the volume
+			 * table. This must have happened due to an unclean
+			 * reboot while the volume was being removed. Discard
+			 * these eraseblocks.
+			 */
+			ubi_msg("finish volume %d removal", sv->vol_id);
+			ubi_scan_rm_volume(si, sv);
+		} else if (sv) {
+			err = check_sv(vol, sv);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ubi_read_volume_table - read volume table.
+ * information.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ *
+ * This function reads volume table, checks it, recover from errors if needed,
+ * or creates it if needed. Returns zero in case of success and a negative
+ * error code in case of failure.
+ */
+int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si)
+{
+	int i, err;
+	struct ubi_scan_volume *sv;
+
+	empty_vtbl_record.crc = cpu_to_ubi32(0xf116c36b);
+
+	/*
+	 * The number of supported volumes is limited by the eraseblock size
+	 * and by the UBI_MAX_VOLUMES constant.
+	 */
+	ubi->vtbl_slots = ubi->leb_size / UBI_VTBL_RECORD_SIZE;
+	if (ubi->vtbl_slots > UBI_MAX_VOLUMES)
+		ubi->vtbl_slots = UBI_MAX_VOLUMES;
+
+	ubi->vtbl_size = ubi->vtbl_slots * UBI_VTBL_RECORD_SIZE;
+	ubi->vtbl_size = ALIGN(ubi->vtbl_size, ubi->min_io_size);
+
+	sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOL_ID);
+	if (!sv) {
+		/*
+		 * No logical eraseblocks belonging to the layout volume were
+		 * found. This could mean that the flash is just empty. In
+		 * this case we create empty layout volume.
+		 *
+		 * But if flash is not empty this must be a corruption or the
+		 * MTD device just contains garbage.
+		 */
+		if (si->is_empty) {
+			ubi->vtbl = create_empty_lvol(ubi, si);
+			if (IS_ERR(ubi->vtbl))
+				return PTR_ERR(ubi->vtbl);
+		} else {
+			ubi_err("the layout volume was not found");
+			return -EINVAL;
+		}
+	} else {
+		if (sv->leb_count > UBI_LAYOUT_VOLUME_EBS) {
+			/* This must not happen with proper UBI images */
+			dbg_err("too many LEBs (%d) in layout volume",
+				sv->leb_count);
+			return -EINVAL;
+		}
+
+		ubi->vtbl = process_lvol(ubi, si, sv);
+		if (IS_ERR(ubi->vtbl))
+			return PTR_ERR(ubi->vtbl);
+	}
+
+	ubi->avail_pebs = ubi->good_peb_count;
+
+	/*
+	 * The layout volume is OK, initialize the corresponding in-RAM data
+	 * structures.
+	 */
+	err = init_volumes(ubi, si, ubi->vtbl);
+	if (err)
+		goto out_free;
+
+	/*
+	 * Get sure that the scanning information is consistent to the
+	 * information stored in the volume table.
+	 */
+	err = check_scanning_info(ubi, si);
+	if (err)
+		goto out_free;
+
+	return 0;
+
+out_free:
+	kfree(ubi->vtbl);
+	for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++)
+		if (ubi->volumes[i]) {
+			kfree(ubi->volumes[i]);
+			ubi->volumes[i] = NULL;
+		}
+	return err;
+}
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
+
+/**
+ * paranoid_vtbl_check - check volume table.
+ * @ubi: UBI device description object
+ */
+static void paranoid_vtbl_check(const struct ubi_device *ubi)
+{
+	if (vtbl_check(ubi, ubi->vtbl)) {
+		ubi_err("paranoid check failed");
+		BUG();
+	}
+}
+
+#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
new file mode 100644
index 00000000000..9ecaf77eca9
--- /dev/null
+++ b/drivers/mtd/ubi/wl.c
@@ -0,0 +1,1671 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Authors: Artem Bityutskiy (Битюцкий Артём), Thomas Gleixner
+ */
+
+/*
+ * UBI wear-leveling unit.
+ *
+ * This unit is responsible for wear-leveling. It works in terms of physical
+ * eraseblocks and erase counters and knows nothing about logical eraseblocks,
+ * volumes, etc. From this unit's perspective all physical eraseblocks are of
+ * two types - used and free. Used physical eraseblocks are those that were
+ * "get" by the 'ubi_wl_get_peb()' function, and free physical eraseblocks are
+ * those that were put by the 'ubi_wl_put_peb()' function.
+ *
+ * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter
+ * header. The rest of the physical eraseblock contains only 0xFF bytes.
+ *
+ * When physical eraseblocks are returned to the WL unit by means of the
+ * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is
+ * done asynchronously in context of the per-UBI device background thread,
+ * which is also managed by the WL unit.
+ *
+ * The wear-leveling is ensured by means of moving the contents of used
+ * physical eraseblocks with low erase counter to free physical eraseblocks
+ * with high erase counter.
+ *
+ * The 'ubi_wl_get_peb()' function accepts data type hints which help to pick
+ * an "optimal" physical eraseblock. For example, when it is known that the
+ * physical eraseblock will be "put" soon because it contains short-term data,
+ * the WL unit may pick a free physical eraseblock with low erase counter, and
+ * so forth.
+ *
+ * If the WL unit fails to erase a physical eraseblock, it marks it as bad.
+ *
+ * This unit is also responsible for scrubbing. If a bit-flip is detected in a
+ * physical eraseblock, it has to be moved. Technically this is the same as
+ * moving it for wear-leveling reasons.
+ *
+ * As it was said, for the UBI unit all physical eraseblocks are either "free"
+ * or "used". Free eraseblock are kept in the @wl->free RB-tree, while used
+ * eraseblocks are kept in a set of different RB-trees: @wl->used,
+ * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub.
+ *
+ * Note, in this implementation, we keep a small in-RAM object for each physical
+ * eraseblock. This is surely not a scalable solution. But it appears to be good
+ * enough for moderately large flashes and it is simple. In future, one may
+ * re-work this unit and make it more scalable.
+ *
+ * At the moment this unit does not utilize the sequence number, which was
+ * introduced relatively recently. But it would be wise to do this because the
+ * sequence number of a logical eraseblock characterizes how old is it. For
+ * example, when we move a PEB with low erase counter, and we need to pick the
+ * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we
+ * pick target PEB with an average EC if our PEB is not very "old". This is a
+ * room for future re-works of the WL unit.
+ *
+ * FIXME: looks too complex, should be simplified (later).
+ */
+
+#include <linux/slab.h>
+#include <linux/crc32.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include "ubi.h"
+
+/* Number of physical eraseblocks reserved for wear-leveling purposes */
+#define WL_RESERVED_PEBS 1
+
+/*
+ * How many erase cycles are short term, unknown, and long term physical
+ * eraseblocks protected.
+ */
+#define ST_PROTECTION 16
+#define U_PROTECTION  10
+#define LT_PROTECTION 4
+
+/*
+ * Maximum difference between two erase counters. If this threshold is
+ * exceeded, the WL unit starts moving data from used physical eraseblocks with
+ * low erase counter to free physical eraseblocks with high erase counter.
+ */
+#define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD
+
+/*
+ * When a physical eraseblock is moved, the WL unit has to pick the target
+ * physical eraseblock to move to. The simplest way would be just to pick the
+ * one with the highest erase counter. But in certain workloads this could lead
+ * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a
+ * situation when the picked physical eraseblock is constantly erased after the
+ * data is written to it. So, we have a constant which limits the highest erase
+ * counter of the free physical eraseblock to pick. Namely, the WL unit does
+ * not pick eraseblocks with erase counter greater then the lowest erase
+ * counter plus %WL_FREE_MAX_DIFF.
+ */
+#define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD)
+
+/*
+ * Maximum number of consecutive background thread failures which is enough to
+ * switch to read-only mode.
+ */
+#define WL_MAX_FAILURES 32
+
+/**
+ * struct ubi_wl_entry - wear-leveling entry.
+ * @rb: link in the corresponding RB-tree
+ * @ec: erase counter
+ * @pnum: physical eraseblock number
+ *
+ * Each physical eraseblock has a corresponding &struct wl_entry object which
+ * may be kept in different RB-trees.
+ */
+struct ubi_wl_entry {
+	struct rb_node rb;
+	int ec;
+	int pnum;
+};
+
+/**
+ * struct ubi_wl_prot_entry - PEB protection entry.
+ * @rb_pnum: link in the @wl->prot.pnum RB-tree
+ * @rb_aec: link in the @wl->prot.aec RB-tree
+ * @abs_ec: the absolute erase counter value when the protection ends
+ * @e: the wear-leveling entry of the physical eraseblock under protection
+ *
+ * When the WL unit returns a physical eraseblock, the physical eraseblock is
+ * protected from being moved for some "time". For this reason, the physical
+ * eraseblock is not directly moved from the @wl->free tree to the @wl->used
+ * tree. There is one more tree in between where this physical eraseblock is
+ * temporarily stored (@wl->prot).
+ *
+ * All this protection stuff is needed because:
+ *  o we don't want to move physical eraseblocks just after we have given them
+ *    to the user; instead, we first want to let users fill them up with data;
+ *
+ *  o there is a chance that the user will put the physical eraseblock very
+ *    soon, so it makes sense not to move it for some time, but wait; this is
+ *    especially important in case of "short term" physical eraseblocks.
+ *
+ * Physical eraseblocks stay protected only for limited time. But the "time" is
+ * measured in erase cycles in this case. This is implemented with help of the
+ * absolute erase counter (@wl->abs_ec). When it reaches certain value, the
+ * physical eraseblocks are moved from the protection trees (@wl->prot.*) to
+ * the @wl->used tree.
+ *
+ * Protected physical eraseblocks are searched by physical eraseblock number
+ * (when they are put) and by the absolute erase counter (to check if it is
+ * time to move them to the @wl->used tree). So there are actually 2 RB-trees
+ * storing the protected physical eraseblocks: @wl->prot.pnum and
+ * @wl->prot.aec. They are referred to as the "protection" trees. The
+ * first one is indexed by the physical eraseblock number. The second one is
+ * indexed by the absolute erase counter. Both trees store
+ * &struct ubi_wl_prot_entry objects.
+ *
+ * Each physical eraseblock has 2 main states: free and used. The former state
+ * corresponds to the @wl->free tree. The latter state is split up on several
+ * sub-states:
+ * o the WL movement is allowed (@wl->used tree);
+ * o the WL movement is temporarily prohibited (@wl->prot.pnum and
+ * @wl->prot.aec trees);
+ * o scrubbing is needed (@wl->scrub tree).
+ *
+ * Depending on the sub-state, wear-leveling entries of the used physical
+ * eraseblocks may be kept in one of those trees.
+ */
+struct ubi_wl_prot_entry {
+	struct rb_node rb_pnum;
+	struct rb_node rb_aec;
+	unsigned long long abs_ec;
+	struct ubi_wl_entry *e;
+};
+
+/**
+ * struct ubi_work - UBI work description data structure.
+ * @list: a link in the list of pending works
+ * @func: worker function
+ * @priv: private data of the worker function
+ *
+ * @e: physical eraseblock to erase
+ * @torture: if the physical eraseblock has to be tortured
+ *
+ * The @func pointer points to the worker function. If the @cancel argument is
+ * not zero, the worker has to free the resources and exit immediately. The
+ * worker has to return zero in case of success and a negative error code in
+ * case of failure.
+ */
+struct ubi_work {
+	struct list_head list;
+	int (*func)(struct ubi_device *ubi, struct ubi_work *wrk, int cancel);
+	/* The below fields are only relevant to erasure works */
+	struct ubi_wl_entry *e;
+	int torture;
+};
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
+static int paranoid_check_ec(const struct ubi_device *ubi, int pnum, int ec);
+static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
+				     struct rb_root *root);
+#else
+#define paranoid_check_ec(ubi, pnum, ec) 0
+#define paranoid_check_in_wl_tree(e, root)
+#endif
+
+/* Slab cache for wear-leveling entries */
+static struct kmem_cache *wl_entries_slab;
+
+/**
+ * tree_empty - a helper function to check if an RB-tree is empty.
+ * @root: the root of the tree
+ *
+ * This function returns non-zero if the RB-tree is empty and zero if not.
+ */
+static inline int tree_empty(struct rb_root *root)
+{
+	return root->rb_node == NULL;
+}
+
+/**
+ * wl_tree_add - add a wear-leveling entry to a WL RB-tree.
+ * @e: the wear-leveling entry to add
+ * @root: the root of the tree
+ *
+ * Note, we use (erase counter, physical eraseblock number) pairs as keys in
+ * the @ubi->used and @ubi->free RB-trees.
+ */
+static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root)
+{
+	struct rb_node **p, *parent = NULL;
+
+	p = &root->rb_node;
+	while (*p) {
+		struct ubi_wl_entry *e1;
+
+		parent = *p;
+		e1 = rb_entry(parent, struct ubi_wl_entry, rb);
+
+		if (e->ec < e1->ec)
+			p = &(*p)->rb_left;
+		else if (e->ec > e1->ec)
+			p = &(*p)->rb_right;
+		else {
+			ubi_assert(e->pnum != e1->pnum);
+			if (e->pnum < e1->pnum)
+				p = &(*p)->rb_left;
+			else
+				p = &(*p)->rb_right;
+		}
+	}
+
+	rb_link_node(&e->rb, parent, p);
+	rb_insert_color(&e->rb, root);
+}
+
+
+/*
+ * Helper functions to add and delete wear-leveling entries from different
+ * trees.
+ */
+
+static void free_tree_add(struct ubi_device *ubi, struct ubi_wl_entry *e)
+{
+	wl_tree_add(e, &ubi->free);
+}
+static inline void used_tree_add(struct ubi_device *ubi,
+				 struct ubi_wl_entry *e)
+{
+	wl_tree_add(e, &ubi->used);
+}
+static inline void scrub_tree_add(struct ubi_device *ubi,
+				  struct ubi_wl_entry *e)
+{
+	wl_tree_add(e, &ubi->scrub);
+}
+static inline void free_tree_del(struct ubi_device *ubi,
+				 struct ubi_wl_entry *e)
+{
+	paranoid_check_in_wl_tree(e, &ubi->free);
+	rb_erase(&e->rb, &ubi->free);
+}
+static inline void used_tree_del(struct ubi_device *ubi,
+				 struct ubi_wl_entry *e)
+{
+	paranoid_check_in_wl_tree(e, &ubi->used);
+	rb_erase(&e->rb, &ubi->used);
+}
+static inline void scrub_tree_del(struct ubi_device *ubi,
+				  struct ubi_wl_entry *e)
+{
+	paranoid_check_in_wl_tree(e, &ubi->scrub);
+	rb_erase(&e->rb, &ubi->scrub);
+}
+
+/**
+ * do_work - do one pending work.
+ * @ubi: UBI device description object
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int do_work(struct ubi_device *ubi)
+{
+	int err;
+	struct ubi_work *wrk;
+
+	spin_lock(&ubi->wl_lock);
+
+	if (list_empty(&ubi->works)) {
+		spin_unlock(&ubi->wl_lock);
+		return 0;
+	}
+
+	wrk = list_entry(ubi->works.next, struct ubi_work, list);
+	list_del(&wrk->list);
+	spin_unlock(&ubi->wl_lock);
+
+	/*
+	 * Call the worker function. Do not touch the work structure
+	 * after this call as it will have been freed or reused by that
+	 * time by the worker function.
+	 */
+	err = wrk->func(ubi, wrk, 0);
+	if (err)
+		ubi_err("work failed with error code %d", err);
+
+	spin_lock(&ubi->wl_lock);
+	ubi->works_count -= 1;
+	ubi_assert(ubi->works_count >= 0);
+	spin_unlock(&ubi->wl_lock);
+	return err;
+}
+
+/**
+ * produce_free_peb - produce a free physical eraseblock.
+ * @ubi: UBI device description object
+ *
+ * This function tries to make a free PEB by means of synchronous execution of
+ * pending works. This may be needed if, for example the background thread is
+ * disabled. Returns zero in case of success and a negative error code in case
+ * of failure.
+ */
+static int produce_free_peb(struct ubi_device *ubi)
+{
+	int err;
+
+	spin_lock(&ubi->wl_lock);
+	while (tree_empty(&ubi->free)) {
+		spin_unlock(&ubi->wl_lock);
+
+		dbg_wl("do one work synchronously");
+		err = do_work(ubi);
+		if (err)
+			return err;
+
+		spin_lock(&ubi->wl_lock);
+	}
+	spin_unlock(&ubi->wl_lock);
+
+	return 0;
+}
+
+/**
+ * in_wl_tree - check if wear-leveling entry is present in a WL RB-tree.
+ * @e: the wear-leveling entry to check
+ * @root: the root of the tree
+ *
+ * This function returns non-zero if @e is in the @root RB-tree and zero if it
+ * is not.
+ */
+static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root)
+{
+	struct rb_node *p;
+
+	p = root->rb_node;
+	while (p) {
+		struct ubi_wl_entry *e1;
+
+		e1 = rb_entry(p, struct ubi_wl_entry, rb);
+
+		if (e->pnum == e1->pnum) {
+			ubi_assert(e == e1);
+			return 1;
+		}
+
+		if (e->ec < e1->ec)
+			p = p->rb_left;
+		else if (e->ec > e1->ec)
+			p = p->rb_right;
+		else {
+			ubi_assert(e->pnum != e1->pnum);
+			if (e->pnum < e1->pnum)
+				p = p->rb_left;
+			else
+				p = p->rb_right;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * prot_tree_add - add physical eraseblock to protection trees.
+ * @ubi: UBI device description object
+ * @e: the physical eraseblock to add
+ * @pe: protection entry object to use
+ * @abs_ec: absolute erase counter value when this physical eraseblock has
+ * to be removed from the protection trees.
+ *
+ * @wl->lock has to be locked.
+ */
+static void prot_tree_add(struct ubi_device *ubi, struct ubi_wl_entry *e,
+			  struct ubi_wl_prot_entry *pe, int abs_ec)
+{
+	struct rb_node **p, *parent = NULL;
+	struct ubi_wl_prot_entry *pe1;
+
+	pe->e = e;
+	pe->abs_ec = ubi->abs_ec + abs_ec;
+
+	p = &ubi->prot.pnum.rb_node;
+	while (*p) {
+		parent = *p;
+		pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_pnum);
+
+		if (e->pnum < pe1->e->pnum)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&pe->rb_pnum, parent, p);
+	rb_insert_color(&pe->rb_pnum, &ubi->prot.pnum);
+
+	p = &ubi->prot.aec.rb_node;
+	parent = NULL;
+	while (*p) {
+		parent = *p;
+		pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_aec);
+
+		if (pe->abs_ec < pe1->abs_ec)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&pe->rb_aec, parent, p);
+	rb_insert_color(&pe->rb_aec, &ubi->prot.aec);
+}
+
+/**
+ * find_wl_entry - find wear-leveling entry closest to certain erase counter.
+ * @root: the RB-tree where to look for
+ * @max: highest possible erase counter
+ *
+ * This function looks for a wear leveling entry with erase counter closest to
+ * @max and less then @max.
+ */
+static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max)
+{
+	struct rb_node *p;
+	struct ubi_wl_entry *e;
+
+	e = rb_entry(rb_first(root), struct ubi_wl_entry, rb);
+	max += e->ec;
+
+	p = root->rb_node;
+	while (p) {
+		struct ubi_wl_entry *e1;
+
+		e1 = rb_entry(p, struct ubi_wl_entry, rb);
+		if (e1->ec >= max)
+			p = p->rb_left;
+		else {
+			p = p->rb_right;
+			e = e1;
+		}
+	}
+
+	return e;
+}
+
+/**
+ * ubi_wl_get_peb - get a physical eraseblock.
+ * @ubi: UBI device description object
+ * @dtype: type of data which will be stored in this physical eraseblock
+ *
+ * This function returns a physical eraseblock in case of success and a
+ * negative error code in case of failure. Might sleep.
+ */
+int ubi_wl_get_peb(struct ubi_device *ubi, int dtype)
+{
+	int err, protect, medium_ec;
+	struct ubi_wl_entry *e, *first, *last;
+	struct ubi_wl_prot_entry *pe;
+
+	ubi_assert(dtype == UBI_LONGTERM || dtype == UBI_SHORTTERM ||
+		   dtype == UBI_UNKNOWN);
+
+	pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_KERNEL);
+	if (!pe)
+		return -ENOMEM;
+
+retry:
+	spin_lock(&ubi->wl_lock);
+	if (tree_empty(&ubi->free)) {
+		if (ubi->works_count == 0) {
+			ubi_assert(list_empty(&ubi->works));
+			ubi_err("no free eraseblocks");
+			spin_unlock(&ubi->wl_lock);
+			kfree(pe);
+			return -ENOSPC;
+		}
+		spin_unlock(&ubi->wl_lock);
+
+		err = produce_free_peb(ubi);
+		if (err < 0) {
+			kfree(pe);
+			return err;
+		}
+		goto retry;
+	}
+
+	switch (dtype) {
+		case UBI_LONGTERM:
+			/*
+			 * For long term data we pick a physical eraseblock
+			 * with high erase counter. But the highest erase
+			 * counter we can pick is bounded by the the lowest
+			 * erase counter plus %WL_FREE_MAX_DIFF.
+			 */
+			e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
+			protect = LT_PROTECTION;
+			break;
+		case UBI_UNKNOWN:
+			/*
+			 * For unknown data we pick a physical eraseblock with
+			 * medium erase counter. But we by no means can pick a
+			 * physical eraseblock with erase counter greater or
+			 * equivalent than the lowest erase counter plus
+			 * %WL_FREE_MAX_DIFF.
+			 */
+			first = rb_entry(rb_first(&ubi->free),
+					 struct ubi_wl_entry, rb);
+			last = rb_entry(rb_last(&ubi->free),
+					struct ubi_wl_entry, rb);
+
+			if (last->ec - first->ec < WL_FREE_MAX_DIFF)
+				e = rb_entry(ubi->free.rb_node,
+						struct ubi_wl_entry, rb);
+			else {
+				medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2;
+				e = find_wl_entry(&ubi->free, medium_ec);
+			}
+			protect = U_PROTECTION;
+			break;
+		case UBI_SHORTTERM:
+			/*
+			 * For short term data we pick a physical eraseblock
+			 * with the lowest erase counter as we expect it will
+			 * be erased soon.
+			 */
+			e = rb_entry(rb_first(&ubi->free),
+				     struct ubi_wl_entry, rb);
+			protect = ST_PROTECTION;
+			break;
+		default:
+			protect = 0;
+			e = NULL;
+			BUG();
+	}
+
+	/*
+	 * Move the physical eraseblock to the protection trees where it will
+	 * be protected from being moved for some time.
+	 */
+	free_tree_del(ubi, e);
+	prot_tree_add(ubi, e, pe, protect);
+
+	dbg_wl("PEB %d EC %d, protection %d", e->pnum, e->ec, protect);
+	spin_unlock(&ubi->wl_lock);
+
+	return e->pnum;
+}
+
+/**
+ * prot_tree_del - remove a physical eraseblock from the protection trees
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock to remove
+ */
+static void prot_tree_del(struct ubi_device *ubi, int pnum)
+{
+	struct rb_node *p;
+	struct ubi_wl_prot_entry *pe = NULL;
+
+	p = ubi->prot.pnum.rb_node;
+	while (p) {
+
+		pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum);
+
+		if (pnum == pe->e->pnum)
+			break;
+
+		if (pnum < pe->e->pnum)
+			p = p->rb_left;
+		else
+			p = p->rb_right;
+	}
+
+	ubi_assert(pe->e->pnum == pnum);
+	rb_erase(&pe->rb_aec, &ubi->prot.aec);
+	rb_erase(&pe->rb_pnum, &ubi->prot.pnum);
+	kfree(pe);
+}
+
+/**
+ * sync_erase - synchronously erase a physical eraseblock.
+ * @ubi: UBI device description object
+ * @e: the the physical eraseblock to erase
+ * @torture: if the physical eraseblock has to be tortured
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int torture)
+{
+	int err;
+	struct ubi_ec_hdr *ec_hdr;
+	unsigned long long ec = e->ec;
+
+	dbg_wl("erase PEB %d, old EC %llu", e->pnum, ec);
+
+	err = paranoid_check_ec(ubi, e->pnum, e->ec);
+	if (err > 0)
+		return -EINVAL;
+
+	ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL);
+	if (!ec_hdr)
+		return -ENOMEM;
+
+	err = ubi_io_sync_erase(ubi, e->pnum, torture);
+	if (err < 0)
+		goto out_free;
+
+	ec += err;
+	if (ec > UBI_MAX_ERASECOUNTER) {
+		/*
+		 * Erase counter overflow. Upgrade UBI and use 64-bit
+		 * erase counters internally.
+		 */
+		ubi_err("erase counter overflow at PEB %d, EC %llu",
+			e->pnum, ec);
+		err = -EINVAL;
+		goto out_free;
+	}
+
+	dbg_wl("erased PEB %d, new EC %llu", e->pnum, ec);
+
+	ec_hdr->ec = cpu_to_ubi64(ec);
+
+	err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr);
+	if (err)
+		goto out_free;
+
+	e->ec = ec;
+	spin_lock(&ubi->wl_lock);
+	if (e->ec > ubi->max_ec)
+		ubi->max_ec = e->ec;
+	spin_unlock(&ubi->wl_lock);
+
+out_free:
+	kfree(ec_hdr);
+	return err;
+}
+
+/**
+ * check_protection_over - check if it is time to stop protecting some
+ * physical eraseblocks.
+ * @ubi: UBI device description object
+ *
+ * This function is called after each erase operation, when the absolute erase
+ * counter is incremented, to check if some physical eraseblock  have not to be
+ * protected any longer. These physical eraseblocks are moved from the
+ * protection trees to the used tree.
+ */
+static void check_protection_over(struct ubi_device *ubi)
+{
+	struct ubi_wl_prot_entry *pe;
+
+	/*
+	 * There may be several protected physical eraseblock to remove,
+	 * process them all.
+	 */
+	while (1) {
+		spin_lock(&ubi->wl_lock);
+		if (tree_empty(&ubi->prot.aec)) {
+			spin_unlock(&ubi->wl_lock);
+			break;
+		}
+
+		pe = rb_entry(rb_first(&ubi->prot.aec),
+			      struct ubi_wl_prot_entry, rb_aec);
+
+		if (pe->abs_ec > ubi->abs_ec) {
+			spin_unlock(&ubi->wl_lock);
+			break;
+		}
+
+		dbg_wl("PEB %d protection over, abs_ec %llu, PEB abs_ec %llu",
+		       pe->e->pnum, ubi->abs_ec, pe->abs_ec);
+		rb_erase(&pe->rb_aec, &ubi->prot.aec);
+		rb_erase(&pe->rb_pnum, &ubi->prot.pnum);
+		used_tree_add(ubi, pe->e);
+		spin_unlock(&ubi->wl_lock);
+
+		kfree(pe);
+		cond_resched();
+	}
+}
+
+/**
+ * schedule_ubi_work - schedule a work.
+ * @ubi: UBI device description object
+ * @wrk: the work to schedule
+ *
+ * This function enqueues a work defined by @wrk to the tail of the pending
+ * works list.
+ */
+static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk)
+{
+	spin_lock(&ubi->wl_lock);
+	list_add_tail(&wrk->list, &ubi->works);
+	ubi_assert(ubi->works_count >= 0);
+	ubi->works_count += 1;
+	if (ubi->thread_enabled)
+		wake_up_process(ubi->bgt_thread);
+	spin_unlock(&ubi->wl_lock);
+}
+
+static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
+			int cancel);
+
+/**
+ * schedule_erase - schedule an erase work.
+ * @ubi: UBI device description object
+ * @e: the WL entry of the physical eraseblock to erase
+ * @torture: if the physical eraseblock has to be tortured
+ *
+ * This function returns zero in case of success and a %-ENOMEM in case of
+ * failure.
+ */
+static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
+			  int torture)
+{
+	struct ubi_work *wl_wrk;
+
+	dbg_wl("schedule erasure of PEB %d, EC %d, torture %d",
+	       e->pnum, e->ec, torture);
+
+	wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_KERNEL);
+	if (!wl_wrk)
+		return -ENOMEM;
+
+	wl_wrk->func = &erase_worker;
+	wl_wrk->e = e;
+	wl_wrk->torture = torture;
+
+	schedule_ubi_work(ubi, wl_wrk);
+	return 0;
+}
+
+/**
+ * wear_leveling_worker - wear-leveling worker function.
+ * @ubi: UBI device description object
+ * @wrk: the work object
+ * @cancel: non-zero if the worker has to free memory and exit
+ *
+ * This function copies a more worn out physical eraseblock to a less worn out
+ * one. Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
+				int cancel)
+{
+	int err, put = 0;
+	struct ubi_wl_entry *e1, *e2;
+	struct ubi_vid_hdr *vid_hdr;
+
+	kfree(wrk);
+
+	if (cancel)
+		return 0;
+
+	vid_hdr = ubi_zalloc_vid_hdr(ubi);
+	if (!vid_hdr)
+		return -ENOMEM;
+
+	spin_lock(&ubi->wl_lock);
+
+	/*
+	 * Only one WL worker at a time is supported at this implementation, so
+	 * make sure a PEB is not being moved already.
+	 */
+	if (ubi->move_to || tree_empty(&ubi->free) ||
+	    (tree_empty(&ubi->used) && tree_empty(&ubi->scrub))) {
+		/*
+		 * Only one WL worker at a time is supported at this
+		 * implementation, so if a LEB is already being moved, cancel.
+		 *
+		 * No free physical eraseblocks? Well, we cancel wear-leveling
+		 * then. It will be triggered again when a free physical
+		 * eraseblock appears.
+		 *
+		 * No used physical eraseblocks? They must be temporarily
+		 * protected from being moved. They will be moved to the
+		 * @ubi->used tree later and the wear-leveling will be
+		 * triggered again.
+		 */
+		dbg_wl("cancel WL, a list is empty: free %d, used %d",
+		       tree_empty(&ubi->free), tree_empty(&ubi->used));
+		ubi->wl_scheduled = 0;
+		spin_unlock(&ubi->wl_lock);
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return 0;
+	}
+
+	if (tree_empty(&ubi->scrub)) {
+		/*
+		 * Now pick the least worn-out used physical eraseblock and a
+		 * highly worn-out free physical eraseblock. If the erase
+		 * counters differ much enough, start wear-leveling.
+		 */
+		e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb);
+		e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
+
+		if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) {
+			dbg_wl("no WL needed: min used EC %d, max free EC %d",
+			       e1->ec, e2->ec);
+			ubi->wl_scheduled = 0;
+			spin_unlock(&ubi->wl_lock);
+			ubi_free_vid_hdr(ubi, vid_hdr);
+			return 0;
+		}
+		used_tree_del(ubi, e1);
+		dbg_wl("move PEB %d EC %d to PEB %d EC %d",
+		       e1->pnum, e1->ec, e2->pnum, e2->ec);
+	} else {
+		e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, rb);
+		e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
+		scrub_tree_del(ubi, e1);
+		dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum);
+	}
+
+	free_tree_del(ubi, e2);
+	ubi_assert(!ubi->move_from && !ubi->move_to);
+	ubi_assert(!ubi->move_to_put && !ubi->move_from_put);
+	ubi->move_from = e1;
+	ubi->move_to = e2;
+	spin_unlock(&ubi->wl_lock);
+
+	/*
+	 * Now we are going to copy physical eraseblock @e1->pnum to @e2->pnum.
+	 * We so far do not know which logical eraseblock our physical
+	 * eraseblock (@e1) belongs to. We have to read the volume identifier
+	 * header first.
+	 */
+
+	err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0);
+	if (err && err != UBI_IO_BITFLIPS) {
+		if (err == UBI_IO_PEB_FREE) {
+			/*
+			 * We are trying to move PEB without a VID header. UBI
+			 * always write VID headers shortly after the PEB was
+			 * given, so we have a situation when it did not have
+			 * chance to write it down because it was preempted.
+			 * Just re-schedule the work, so that next time it will
+			 * likely have the VID header in place.
+			 */
+			dbg_wl("PEB %d has no VID header", e1->pnum);
+			err = 0;
+		} else {
+			ubi_err("error %d while reading VID header from PEB %d",
+				err, e1->pnum);
+			if (err > 0)
+				err = -EIO;
+		}
+		goto error;
+	}
+
+	err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr);
+	if (err) {
+		if (err == UBI_IO_BITFLIPS)
+			err = 0;
+		goto error;
+	}
+
+	ubi_free_vid_hdr(ubi, vid_hdr);
+	spin_lock(&ubi->wl_lock);
+	if (!ubi->move_to_put)
+		used_tree_add(ubi, e2);
+	else
+		put = 1;
+	ubi->move_from = ubi->move_to = NULL;
+	ubi->move_from_put = ubi->move_to_put = 0;
+	ubi->wl_scheduled = 0;
+	spin_unlock(&ubi->wl_lock);
+
+	if (put) {
+		/*
+		 * Well, the target PEB was put meanwhile, schedule it for
+		 * erasure.
+		 */
+		dbg_wl("PEB %d was put meanwhile, erase", e2->pnum);
+		err = schedule_erase(ubi, e2, 0);
+		if (err) {
+			kmem_cache_free(wl_entries_slab, e2);
+			ubi_ro_mode(ubi);
+		}
+	}
+
+	err = schedule_erase(ubi, e1, 0);
+	if (err) {
+		kmem_cache_free(wl_entries_slab, e1);
+		ubi_ro_mode(ubi);
+	}
+
+	dbg_wl("done");
+	return err;
+
+	/*
+	 * Some error occurred. @e1 was not changed, so return it back. @e2
+	 * might be changed, schedule it for erasure.
+	 */
+error:
+	if (err)
+		dbg_wl("error %d occurred, cancel operation", err);
+	ubi_assert(err <= 0);
+
+	ubi_free_vid_hdr(ubi, vid_hdr);
+	spin_lock(&ubi->wl_lock);
+	ubi->wl_scheduled = 0;
+	if (ubi->move_from_put)
+		put = 1;
+	else
+		used_tree_add(ubi, e1);
+	ubi->move_from = ubi->move_to = NULL;
+	ubi->move_from_put = ubi->move_to_put = 0;
+	spin_unlock(&ubi->wl_lock);
+
+	if (put) {
+		/*
+		 * Well, the target PEB was put meanwhile, schedule it for
+		 * erasure.
+		 */
+		dbg_wl("PEB %d was put meanwhile, erase", e1->pnum);
+		err = schedule_erase(ubi, e1, 0);
+		if (err) {
+			kmem_cache_free(wl_entries_slab, e1);
+			ubi_ro_mode(ubi);
+		}
+	}
+
+	err = schedule_erase(ubi, e2, 0);
+	if (err) {
+		kmem_cache_free(wl_entries_slab, e2);
+		ubi_ro_mode(ubi);
+	}
+
+	yield();
+	return err;
+}
+
+/**
+ * ensure_wear_leveling - schedule wear-leveling if it is needed.
+ * @ubi: UBI device description object
+ *
+ * This function checks if it is time to start wear-leveling and schedules it
+ * if yes. This function returns zero in case of success and a negative error
+ * code in case of failure.
+ */
+static int ensure_wear_leveling(struct ubi_device *ubi)
+{
+	int err = 0;
+	struct ubi_wl_entry *e1;
+	struct ubi_wl_entry *e2;
+	struct ubi_work *wrk;
+
+	spin_lock(&ubi->wl_lock);
+	if (ubi->wl_scheduled)
+		/* Wear-leveling is already in the work queue */
+		goto out_unlock;
+
+	/*
+	 * If the ubi->scrub tree is not empty, scrubbing is needed, and the
+	 * the WL worker has to be scheduled anyway.
+	 */
+	if (tree_empty(&ubi->scrub)) {
+		if (tree_empty(&ubi->used) || tree_empty(&ubi->free))
+			/* No physical eraseblocks - no deal */
+			goto out_unlock;
+
+		/*
+		 * We schedule wear-leveling only if the difference between the
+		 * lowest erase counter of used physical eraseblocks and a high
+		 * erase counter of free physical eraseblocks is greater then
+		 * %UBI_WL_THRESHOLD.
+		 */
+		e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb);
+		e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
+
+		if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD))
+			goto out_unlock;
+		dbg_wl("schedule wear-leveling");
+	} else
+		dbg_wl("schedule scrubbing");
+
+	ubi->wl_scheduled = 1;
+	spin_unlock(&ubi->wl_lock);
+
+	wrk = kmalloc(sizeof(struct ubi_work), GFP_KERNEL);
+	if (!wrk) {
+		err = -ENOMEM;
+		goto out_cancel;
+	}
+
+	wrk->func = &wear_leveling_worker;
+	schedule_ubi_work(ubi, wrk);
+	return err;
+
+out_cancel:
+	spin_lock(&ubi->wl_lock);
+	ubi->wl_scheduled = 0;
+out_unlock:
+	spin_unlock(&ubi->wl_lock);
+	return err;
+}
+
+/**
+ * erase_worker - physical eraseblock erase worker function.
+ * @ubi: UBI device description object
+ * @wl_wrk: the work object
+ * @cancel: non-zero if the worker has to free memory and exit
+ *
+ * This function erases a physical eraseblock and perform torture testing if
+ * needed. It also takes care about marking the physical eraseblock bad if
+ * needed. Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
+			int cancel)
+{
+	int err;
+	struct ubi_wl_entry *e = wl_wrk->e;
+	int pnum = e->pnum;
+
+	if (cancel) {
+		dbg_wl("cancel erasure of PEB %d EC %d", pnum, e->ec);
+		kfree(wl_wrk);
+		kmem_cache_free(wl_entries_slab, e);
+		return 0;
+	}
+
+	dbg_wl("erase PEB %d EC %d", pnum, e->ec);
+
+	err = sync_erase(ubi, e, wl_wrk->torture);
+	if (!err) {
+		/* Fine, we've erased it successfully */
+		kfree(wl_wrk);
+
+		spin_lock(&ubi->wl_lock);
+		ubi->abs_ec += 1;
+		free_tree_add(ubi, e);
+		spin_unlock(&ubi->wl_lock);
+
+		/*
+		 * One more erase operation has happened, take care about protected
+		 * physical eraseblocks.
+		 */
+		check_protection_over(ubi);
+
+		/* And take care about wear-leveling */
+		err = ensure_wear_leveling(ubi);
+		return err;
+	}
+
+	kfree(wl_wrk);
+	kmem_cache_free(wl_entries_slab, e);
+
+	if (err != -EIO) {
+		/*
+		 * If this is not %-EIO, we have no idea what to do. Scheduling
+		 * this physical eraseblock for erasure again would cause
+		 * errors again and again. Well, lets switch to RO mode.
+		 */
+		ubi_ro_mode(ubi);
+		return err;
+	}
+
+	/* It is %-EIO, the PEB went bad */
+
+	if (!ubi->bad_allowed) {
+		ubi_err("bad physical eraseblock %d detected", pnum);
+		ubi_ro_mode(ubi);
+		err = -EIO;
+	} else {
+		int need;
+
+		spin_lock(&ubi->volumes_lock);
+		need = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs + 1;
+		if (need > 0) {
+			need = ubi->avail_pebs >= need ? need : ubi->avail_pebs;
+			ubi->avail_pebs -= need;
+			ubi->rsvd_pebs += need;
+			ubi->beb_rsvd_pebs += need;
+			if (need > 0)
+				ubi_msg("reserve more %d PEBs", need);
+		}
+
+		if (ubi->beb_rsvd_pebs == 0) {
+			spin_unlock(&ubi->volumes_lock);
+			ubi_err("no reserved physical eraseblocks");
+			ubi_ro_mode(ubi);
+			return -EIO;
+		}
+
+		spin_unlock(&ubi->volumes_lock);
+		ubi_msg("mark PEB %d as bad", pnum);
+
+		err = ubi_io_mark_bad(ubi, pnum);
+		if (err) {
+			ubi_ro_mode(ubi);
+			return err;
+		}
+
+		spin_lock(&ubi->volumes_lock);
+		ubi->beb_rsvd_pebs -= 1;
+		ubi->bad_peb_count += 1;
+		ubi->good_peb_count -= 1;
+		ubi_calculate_reserved(ubi);
+		if (ubi->beb_rsvd_pebs == 0)
+			ubi_warn("last PEB from the reserved pool was used");
+		spin_unlock(&ubi->volumes_lock);
+	}
+
+	return err;
+}
+
+/**
+ * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling
+ * unit.
+ * @ubi: UBI device description object
+ * @pnum: physical eraseblock to return
+ * @torture: if this physical eraseblock has to be tortured
+ *
+ * This function is called to return physical eraseblock @pnum to the pool of
+ * free physical eraseblocks. The @torture flag has to be set if an I/O error
+ * occurred to this @pnum and it has to be tested. This function returns zero
+ * in case of success and a negative error code in case of failure.
+ */
+int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture)
+{
+	int err;
+	struct ubi_wl_entry *e;
+
+	dbg_wl("PEB %d", pnum);
+	ubi_assert(pnum >= 0);
+	ubi_assert(pnum < ubi->peb_count);
+
+	spin_lock(&ubi->wl_lock);
+
+	e = ubi->lookuptbl[pnum];
+	if (e == ubi->move_from) {
+		/*
+		 * User is putting the physical eraseblock which was selected to
+		 * be moved. It will be scheduled for erasure in the
+		 * wear-leveling worker.
+		 */
+		dbg_wl("PEB %d is being moved", pnum);
+		ubi_assert(!ubi->move_from_put);
+		ubi->move_from_put = 1;
+		spin_unlock(&ubi->wl_lock);
+		return 0;
+	} else if (e == ubi->move_to) {
+		/*
+		 * User is putting the physical eraseblock which was selected
+		 * as the target the data is moved to. It may happen if the EBA
+		 * unit already re-mapped the LEB but the WL unit did has not
+		 * put the PEB to the "used" tree.
+		 */
+		dbg_wl("PEB %d is the target of data moving", pnum);
+		ubi_assert(!ubi->move_to_put);
+		ubi->move_to_put = 1;
+		spin_unlock(&ubi->wl_lock);
+		return 0;
+	} else {
+		if (in_wl_tree(e, &ubi->used))
+			used_tree_del(ubi, e);
+		else if (in_wl_tree(e, &ubi->scrub))
+			scrub_tree_del(ubi, e);
+		else
+			prot_tree_del(ubi, e->pnum);
+	}
+	spin_unlock(&ubi->wl_lock);
+
+	err = schedule_erase(ubi, e, torture);
+	if (err) {
+		spin_lock(&ubi->wl_lock);
+		used_tree_add(ubi, e);
+		spin_unlock(&ubi->wl_lock);
+	}
+
+	return err;
+}
+
+/**
+ * ubi_wl_scrub_peb - schedule a physical eraseblock for scrubbing.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock to schedule
+ *
+ * If a bit-flip in a physical eraseblock is detected, this physical eraseblock
+ * needs scrubbing. This function schedules a physical eraseblock for
+ * scrubbing which is done in background. This function returns zero in case of
+ * success and a negative error code in case of failure.
+ */
+int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum)
+{
+	struct ubi_wl_entry *e;
+
+	ubi_msg("schedule PEB %d for scrubbing", pnum);
+
+retry:
+	spin_lock(&ubi->wl_lock);
+	e = ubi->lookuptbl[pnum];
+	if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub)) {
+		spin_unlock(&ubi->wl_lock);
+		return 0;
+	}
+
+	if (e == ubi->move_to) {
+		/*
+		 * This physical eraseblock was used to move data to. The data
+		 * was moved but the PEB was not yet inserted to the proper
+		 * tree. We should just wait a little and let the WL worker
+		 * proceed.
+		 */
+		spin_unlock(&ubi->wl_lock);
+		dbg_wl("the PEB %d is not in proper tree, retry", pnum);
+		yield();
+		goto retry;
+	}
+
+	if (in_wl_tree(e, &ubi->used))
+		used_tree_del(ubi, e);
+	else
+		prot_tree_del(ubi, pnum);
+
+	scrub_tree_add(ubi, e);
+	spin_unlock(&ubi->wl_lock);
+
+	/*
+	 * Technically scrubbing is the same as wear-leveling, so it is done
+	 * by the WL worker.
+	 */
+	return ensure_wear_leveling(ubi);
+}
+
+/**
+ * ubi_wl_flush - flush all pending works.
+ * @ubi: UBI device description object
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+int ubi_wl_flush(struct ubi_device *ubi)
+{
+	int err, pending_count;
+
+	pending_count = ubi->works_count;
+
+	dbg_wl("flush (%d pending works)", pending_count);
+
+	/*
+	 * Erase while the pending works queue is not empty, but not more then
+	 * the number of currently pending works.
+	 */
+	while (pending_count-- > 0) {
+		err = do_work(ubi);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/**
+ * tree_destroy - destroy an RB-tree.
+ * @root: the root of the tree to destroy
+ */
+static void tree_destroy(struct rb_root *root)
+{
+	struct rb_node *rb;
+	struct ubi_wl_entry *e;
+
+	rb = root->rb_node;
+	while (rb) {
+		if (rb->rb_left)
+			rb = rb->rb_left;
+		else if (rb->rb_right)
+			rb = rb->rb_right;
+		else {
+			e = rb_entry(rb, struct ubi_wl_entry, rb);
+
+			rb = rb_parent(rb);
+			if (rb) {
+				if (rb->rb_left == &e->rb)
+					rb->rb_left = NULL;
+				else
+					rb->rb_right = NULL;
+			}
+
+			kmem_cache_free(wl_entries_slab, e);
+		}
+	}
+}
+
+/**
+ * ubi_thread - UBI background thread.
+ * @u: the UBI device description object pointer
+ */
+static int ubi_thread(void *u)
+{
+	int failures = 0;
+	struct ubi_device *ubi = u;
+
+	ubi_msg("background thread \"%s\" started, PID %d",
+		ubi->bgt_name, current->pid);
+
+	for (;;) {
+		int err;
+
+		if (kthread_should_stop())
+			goto out;
+
+		if (try_to_freeze())
+			continue;
+
+		spin_lock(&ubi->wl_lock);
+		if (list_empty(&ubi->works) || ubi->ro_mode ||
+			       !ubi->thread_enabled) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			spin_unlock(&ubi->wl_lock);
+			schedule();
+			continue;
+		}
+		spin_unlock(&ubi->wl_lock);
+
+		err = do_work(ubi);
+		if (err) {
+			ubi_err("%s: work failed with error code %d",
+				ubi->bgt_name, err);
+			if (failures++ > WL_MAX_FAILURES) {
+				/*
+				 * Too many failures, disable the thread and
+				 * switch to read-only mode.
+				 */
+				ubi_msg("%s: %d consecutive failures",
+					ubi->bgt_name, WL_MAX_FAILURES);
+				ubi_ro_mode(ubi);
+				break;
+			}
+		} else
+			failures = 0;
+
+		cond_resched();
+	}
+
+out:
+	dbg_wl("background thread \"%s\" is killed", ubi->bgt_name);
+	return 0;
+}
+
+/**
+ * cancel_pending - cancel all pending works.
+ * @ubi: UBI device description object
+ */
+static void cancel_pending(struct ubi_device *ubi)
+{
+	while (!list_empty(&ubi->works)) {
+		struct ubi_work *wrk;
+
+		wrk = list_entry(ubi->works.next, struct ubi_work, list);
+		list_del(&wrk->list);
+		wrk->func(ubi, wrk, 1);
+		ubi->works_count -= 1;
+		ubi_assert(ubi->works_count >= 0);
+	}
+}
+
+/**
+ * ubi_wl_init_scan - initialize the wear-leveling unit using scanning
+ * information.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ *
+ * This function returns zero in case of success, and a negative error code in
+ * case of failure.
+ */
+int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
+{
+	int err;
+	struct rb_node *rb1, *rb2;
+	struct ubi_scan_volume *sv;
+	struct ubi_scan_leb *seb, *tmp;
+	struct ubi_wl_entry *e;
+
+
+	ubi->used = ubi->free = ubi->scrub = RB_ROOT;
+	ubi->prot.pnum = ubi->prot.aec = RB_ROOT;
+	spin_lock_init(&ubi->wl_lock);
+	ubi->max_ec = si->max_ec;
+	INIT_LIST_HEAD(&ubi->works);
+
+	sprintf(ubi->bgt_name, UBI_BGT_NAME_PATTERN, ubi->ubi_num);
+
+	ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name);
+	if (IS_ERR(ubi->bgt_thread)) {
+		err = PTR_ERR(ubi->bgt_thread);
+		ubi_err("cannot spawn \"%s\", error %d", ubi->bgt_name,
+			err);
+		return err;
+	}
+
+	if (ubi_devices_cnt == 0) {
+		wl_entries_slab = kmem_cache_create("ubi_wl_entry_slab",
+						    sizeof(struct ubi_wl_entry),
+						    0, 0, NULL, NULL);
+		if (!wl_entries_slab)
+			return -ENOMEM;
+	}
+
+	err = -ENOMEM;
+	ubi->lookuptbl = kzalloc(ubi->peb_count * sizeof(void *), GFP_KERNEL);
+	if (!ubi->lookuptbl)
+		goto out_free;
+
+	list_for_each_entry_safe(seb, tmp, &si->erase, u.list) {
+		cond_resched();
+
+		e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL);
+		if (!e)
+			goto out_free;
+
+		e->pnum = seb->pnum;
+		e->ec = seb->ec;
+		ubi->lookuptbl[e->pnum] = e;
+		if (schedule_erase(ubi, e, 0)) {
+			kmem_cache_free(wl_entries_slab, e);
+			goto out_free;
+		}
+	}
+
+	list_for_each_entry(seb, &si->free, u.list) {
+		cond_resched();
+
+		e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL);
+		if (!e)
+			goto out_free;
+
+		e->pnum = seb->pnum;
+		e->ec = seb->ec;
+		ubi_assert(e->ec >= 0);
+		free_tree_add(ubi, e);
+		ubi->lookuptbl[e->pnum] = e;
+	}
+
+	list_for_each_entry(seb, &si->corr, u.list) {
+		cond_resched();
+
+		e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL);
+		if (!e)
+			goto out_free;
+
+		e->pnum = seb->pnum;
+		e->ec = seb->ec;
+		ubi->lookuptbl[e->pnum] = e;
+		if (schedule_erase(ubi, e, 0)) {
+			kmem_cache_free(wl_entries_slab, e);
+			goto out_free;
+		}
+	}
+
+	ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) {
+		ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) {
+			cond_resched();
+
+			e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL);
+			if (!e)
+				goto out_free;
+
+			e->pnum = seb->pnum;
+			e->ec = seb->ec;
+			ubi->lookuptbl[e->pnum] = e;
+			if (!seb->scrub) {
+				dbg_wl("add PEB %d EC %d to the used tree",
+				       e->pnum, e->ec);
+				used_tree_add(ubi, e);
+			} else {
+				dbg_wl("add PEB %d EC %d to the scrub tree",
+				       e->pnum, e->ec);
+				scrub_tree_add(ubi, e);
+			}
+		}
+	}
+
+	if (WL_RESERVED_PEBS > ubi->avail_pebs) {
+		ubi_err("no enough physical eraseblocks (%d, need %d)",
+			ubi->avail_pebs, WL_RESERVED_PEBS);
+		goto out_free;
+	}
+	ubi->avail_pebs -= WL_RESERVED_PEBS;
+	ubi->rsvd_pebs += WL_RESERVED_PEBS;
+
+	/* Schedule wear-leveling if needed */
+	err = ensure_wear_leveling(ubi);
+	if (err)
+		goto out_free;
+
+	return 0;
+
+out_free:
+	cancel_pending(ubi);
+	tree_destroy(&ubi->used);
+	tree_destroy(&ubi->free);
+	tree_destroy(&ubi->scrub);
+	kfree(ubi->lookuptbl);
+	if (ubi_devices_cnt == 0)
+		kmem_cache_destroy(wl_entries_slab);
+	return err;
+}
+
+/**
+ * protection_trees_destroy - destroy the protection RB-trees.
+ * @ubi: UBI device description object
+ */
+static void protection_trees_destroy(struct ubi_device *ubi)
+{
+	struct rb_node *rb;
+	struct ubi_wl_prot_entry *pe;
+
+	rb = ubi->prot.aec.rb_node;
+	while (rb) {
+		if (rb->rb_left)
+			rb = rb->rb_left;
+		else if (rb->rb_right)
+			rb = rb->rb_right;
+		else {
+			pe = rb_entry(rb, struct ubi_wl_prot_entry, rb_aec);
+
+			rb = rb_parent(rb);
+			if (rb) {
+				if (rb->rb_left == &pe->rb_aec)
+					rb->rb_left = NULL;
+				else
+					rb->rb_right = NULL;
+			}
+
+			kmem_cache_free(wl_entries_slab, pe->e);
+			kfree(pe);
+		}
+	}
+}
+
+/**
+ * ubi_wl_close - close the wear-leveling unit.
+ * @ubi: UBI device description object
+ */
+void ubi_wl_close(struct ubi_device *ubi)
+{
+	dbg_wl("disable \"%s\"", ubi->bgt_name);
+	if (ubi->bgt_thread)
+		kthread_stop(ubi->bgt_thread);
+
+	dbg_wl("close the UBI wear-leveling unit");
+
+	cancel_pending(ubi);
+	protection_trees_destroy(ubi);
+	tree_destroy(&ubi->used);
+	tree_destroy(&ubi->free);
+	tree_destroy(&ubi->scrub);
+	kfree(ubi->lookuptbl);
+	if (ubi_devices_cnt == 1)
+		kmem_cache_destroy(wl_entries_slab);
+}
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
+
+/**
+ * paranoid_check_ec - make sure that the erase counter of a physical eraseblock
+ * is correct.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock number to check
+ * @ec: the erase counter to check
+ *
+ * This function returns zero if the erase counter of physical eraseblock @pnum
+ * is equivalent to @ec, %1 if not, and a negative error code if an error
+ * occurred.
+ */
+static int paranoid_check_ec(const struct ubi_device *ubi, int pnum, int ec)
+{
+	int err;
+	long long read_ec;
+	struct ubi_ec_hdr *ec_hdr;
+
+	ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL);
+	if (!ec_hdr)
+		return -ENOMEM;
+
+	err = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, 0);
+	if (err && err != UBI_IO_BITFLIPS) {
+		/* The header does not have to exist */
+		err = 0;
+		goto out_free;
+	}
+
+	read_ec = ubi64_to_cpu(ec_hdr->ec);
+	if (ec != read_ec) {
+		ubi_err("paranoid check failed for PEB %d", pnum);
+		ubi_err("read EC is %lld, should be %d", read_ec, ec);
+		ubi_dbg_dump_stack();
+		err = 1;
+	} else
+		err = 0;
+
+out_free:
+	kfree(ec_hdr);
+	return err;
+}
+
+/**
+ * paranoid_check_in_wl_tree - make sure that a wear-leveling entry is present
+ * in a WL RB-tree.
+ * @e: the wear-leveling entry to check
+ * @root: the root of the tree
+ *
+ * This function returns zero if @e is in the @root RB-tree and %1 if it
+ * is not.
+ */
+static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
+				     struct rb_root *root)
+{
+	if (in_wl_tree(e, root))
+		return 0;
+
+	ubi_err("paranoid check failed for PEB %d, EC %d, RB-tree %p ",
+		e->pnum, e->ec, root);
+	ubi_dbg_dump_stack();
+	return 1;
+}
+
+#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */
-- 
cgit v1.2.3


From d468a030026017008286919aa6127b1190efb2c2 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Fri, 27 Apr 2007 15:11:44 +0300
Subject: UBI: remove unused variable

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/gluebi.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/mtd/ubi/gluebi.c b/drivers/mtd/ubi/gluebi.c
index c8bbfd1e67a..fc9478d605f 100644
--- a/drivers/mtd/ubi/gluebi.c
+++ b/drivers/mtd/ubi/gluebi.c
@@ -271,7 +271,6 @@ out_err:
  */
 int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol)
 {
-	int err;
 	struct mtd_info *mtd = &vol->gluebi_mtd;
 
 	mtd->name = kmemdup(vol->name, vol->name_len + 1, GFP_KERNEL);
-- 
cgit v1.2.3


From 6fc321fd7dd91f0592f37503219196835314fbb7 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:25 +0200
Subject: [S390] cio/ipl: Clean interface between cio and ipl code.

Clean interface between cio and ipl code, so Peter stops complaining.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 drivers/s390/cio/cio.c | 38 ++++++++++----------------------------
 1 file changed, 10 insertions(+), 28 deletions(-)

(limited to 'drivers')

diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 9cb129ab5be..21af446c1f2 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -1048,37 +1048,19 @@ void reipl_ccw_dev(struct ccw_dev_id *devid)
 	do_reipl_asm(*((__u32*)&schid));
 }
 
-static struct schib __initdata ipl_schib;
-
-/*
- * ipl_save_parameters gets called very early. It is not allowed to access
- * anything in the bss section at all. The bss section is not cleared yet,
- * but may contain some ipl parameters written by the firmware.
- * These parameters (if present) are copied to 0x2000.
- * To avoid corruption of the ipl parameters, all variables used by this
- * function must reside on the stack or in the data section.
- */
-void ipl_save_parameters(void)
+int __init cio_get_iplinfo(struct cio_iplinfo *iplinfo)
 {
 	struct subchannel_id schid;
-	unsigned int *ipl_ptr;
-	void *src, *dst;
+	struct schib schib;
 
 	schid = *(struct subchannel_id *)__LC_SUBCHANNEL_ID;
 	if (!schid.one)
-		return;
-	if (stsch(schid, &ipl_schib))
-		return;
-	if (!ipl_schib.pmcw.dnv)
-		return;
-	ipl_devno = ipl_schib.pmcw.dev;
-	ipl_flags |= IPL_DEVNO_VALID;
-	if (!ipl_schib.pmcw.qf)
-		return;
-	ipl_flags |= IPL_PARMBLOCK_VALID;
-	ipl_ptr = (unsigned int *)__LC_IPL_PARMBLOCK_PTR;
-	src = (void *)(unsigned long)*ipl_ptr;
-	dst = (void *)IPL_PARMBLOCK_ORIGIN;
-	memmove(dst, src, PAGE_SIZE);
-	*ipl_ptr = IPL_PARMBLOCK_ORIGIN;
+		return -ENODEV;
+	if (stsch(schid, &schib))
+		return -ENODEV;
+	if (!schib.pmcw.dnv)
+		return -ENODEV;
+	iplinfo->devno = schib.pmcw.dev;
+	iplinfo->is_qdio = schib.pmcw.qf;
+	return 0;
 }
-- 
cgit v1.2.3


From f86635fad14c4a6810cf0e08488fc9129a3b3b32 Mon Sep 17 00:00:00 2001
From: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:26 +0200
Subject: [S390] cio: Introduce struct chp_id.

Introduce data type for channel-path IDs.

Signed-off-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 drivers/s390/cio/chpid.h      |  51 ++++++++++++
 drivers/s390/cio/chsc.c       | 178 +++++++++++++++++++++++-------------------
 drivers/s390/cio/chsc.h       |   8 +-
 drivers/s390/cio/cio.c        |   5 +-
 drivers/s390/cio/device_fsm.c |   6 +-
 drivers/s390/cio/ioasm.h      |   5 +-
 6 files changed, 167 insertions(+), 86 deletions(-)
 create mode 100644 drivers/s390/cio/chpid.h

(limited to 'drivers')

diff --git a/drivers/s390/cio/chpid.h b/drivers/s390/cio/chpid.h
new file mode 100644
index 00000000000..44cc00ed9b5
--- /dev/null
+++ b/drivers/s390/cio/chpid.h
@@ -0,0 +1,51 @@
+/*
+ *  drivers/s390/cio/chpid.h
+ *
+ *    Copyright IBM Corp. 2007
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef S390_CHP_ID_H
+#define S390_CHP_ID_H S390_CHP_ID_H
+
+#include <linux/string.h>
+#include <asm/types.h>
+#include "css.h"
+
+struct chp_id {
+	u8 reserved1;
+	u8 cssid;
+	u8 reserved2;
+	u8 id;
+} __attribute__((packed));
+
+static inline void chp_id_init(struct chp_id *chpid)
+{
+	memset(chpid, 0, sizeof(struct chp_id));
+}
+
+static inline int chp_id_is_equal(struct chp_id *a, struct chp_id *b)
+{
+	return (a->id == b->id) && (a->cssid == b->cssid);
+}
+
+static inline void chp_id_next(struct chp_id *chpid)
+{
+	if (chpid->id < __MAX_CHPID)
+		chpid->id++;
+	else {
+		chpid->id = 0;
+		chpid->cssid++;
+	}
+}
+
+static inline int chp_id_is_valid(struct chp_id *chpid)
+{
+	return (chpid->cssid <= __MAX_CSSID);
+}
+
+
+#define chp_id_for_each(c) \
+	for (chp_id_init(c); chp_id_is_valid(c); chp_id_next(c))
+
+#endif /* S390_CHP_ID_H */
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index 6f05a44e381..bb6f876e53c 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -20,43 +20,48 @@
 #include "cio.h"
 #include "cio_debug.h"
 #include "ioasm.h"
+#include "chpid.h"
 #include "chsc.h"
 
 static void *sei_page;
 
-static int new_channel_path(int chpid);
+static int new_channel_path(struct chp_id chpid);
 
-static inline void
-set_chp_logically_online(int chp, int onoff)
+static inline struct channel_path *chpid_to_chp(struct chp_id chpid)
 {
-	css[0]->chps[chp]->state = onoff;
+	return css[chpid.cssid]->chps[chpid.id];
 }
 
-static int
-get_chp_status(int chp)
+static void set_chp_logically_online(struct chp_id chpid, int onoff)
 {
-	return (css[0]->chps[chp] ? css[0]->chps[chp]->state : -ENODEV);
+	chpid_to_chp(chpid)->state = onoff;
 }
 
-void
-chsc_validate_chpids(struct subchannel *sch)
+static int get_chp_status(struct chp_id chpid)
+{
+	return (chpid_to_chp(chpid) ? chpid_to_chp(chpid)->state : -ENODEV);
+}
+
+void chsc_validate_chpids(struct subchannel *sch)
 {
 	int mask, chp;
+	struct chp_id chpid;
 
+	chp_id_init(&chpid);
 	for (chp = 0; chp <= 7; chp++) {
 		mask = 0x80 >> chp;
-		if (!get_chp_status(sch->schib.pmcw.chpid[chp]))
+		chpid.id = sch->schib.pmcw.chpid[chp];
+		if (!get_chp_status(chpid))
 			/* disable using this path */
 			sch->opm &= ~mask;
 	}
 }
 
-void
-chpid_is_actually_online(int chp)
+void chpid_is_actually_online(struct chp_id chpid)
 {
 	int state;
 
-	state = get_chp_status(chp);
+	state = get_chp_status(chpid);
 	if (state < 0) {
 		need_rescan = 1;
 		queue_work(slow_path_wq, &slow_path_work);
@@ -200,11 +205,14 @@ css_get_ssd_info(struct subchannel *sch)
 	spin_unlock_irq(sch->lock);
 	free_page((unsigned long)page);
 	if (!ret) {
-		int j, chpid, mask;
+		int j, mask;
+		struct chp_id chpid;
+
+		chp_id_init(&chpid);
 		/* Allocate channel path structures, if needed. */
 		for (j = 0; j < 8; j++) {
 			mask = 0x80 >> j;
-			chpid = sch->ssd_info.chpid[j];
+			chpid.id = sch->ssd_info.chpid[j];
 			if ((sch->schib.pmcw.pim & mask) &&
 			    (get_chp_status(chpid) < 0))
 			    new_channel_path(chpid);
@@ -227,7 +235,7 @@ s390_subchannel_remove_chpid(struct device *dev, void *data)
 	for (j = 0; j < 8; j++) {
 		mask = 0x80 >> j;
 		if ((sch->schib.pmcw.pim & mask) &&
-		    (sch->schib.pmcw.chpid[j] == chpid->id))
+		    (sch->schib.pmcw.chpid[j] == chpid->chpid.id))
 			break;
 	}
 	if (j >= 8)
@@ -277,18 +285,17 @@ out_unreg:
 	return 0;
 }
 
-static void
-s390_set_chpid_offline( __u8 chpid)
+static void s390_set_chpid_offline(struct chp_id chpid)
 {
 	char dbf_txt[15];
 	struct device *dev;
 
-	sprintf(dbf_txt, "chpr%x", chpid);
+	sprintf(dbf_txt, "chpr%x.%02x", chpid.cssid, chpid.id);
 	CIO_TRACE_EVENT(2, dbf_txt);
 
 	if (get_chp_status(chpid) <= 0)
 		return;
-	dev = get_device(&css[0]->chps[chpid]->dev);
+	dev = get_device(&(chpid_to_chp(chpid)->dev));
 	bus_for_each_dev(&css_bus_type, NULL, to_channelpath(dev),
 			 s390_subchannel_remove_chpid);
 
@@ -316,7 +323,7 @@ s390_process_res_acc_sch(struct res_acc_data *res_data, struct subchannel *sch)
 		 * check if chpid is in information updated by ssd
 		 */
 		if (sch->ssd_info.valid &&
-		    sch->ssd_info.chpid[chp] == res_data->chp->id &&
+		    sch->ssd_info.chpid[chp] == res_data->chp->chpid.id &&
 		    (sch->ssd_info.fla[chp] & res_data->fla_mask)
 		    == res_data->fla) {
 			found = 1;
@@ -409,7 +416,8 @@ s390_process_res_acc (struct res_acc_data *res_data)
 	int rc;
 	char dbf_txt[15];
 
-	sprintf(dbf_txt, "accpr%x", res_data->chp->id);
+	sprintf(dbf_txt, "accpr%x.%02x", res_data->chp->chpid.cssid,
+		res_data->chp->chpid.id);
 	CIO_TRACE_EVENT( 2, dbf_txt);
 	if (res_data->fla != 0) {
 		sprintf(dbf_txt, "fla%x", res_data->fla);
@@ -482,17 +490,21 @@ struct chsc_sei_area {
 
 static int chsc_process_sei_link_incident(struct chsc_sei_area *sei_area)
 {
-	int chpid;
+	struct chp_id chpid;
+	int id;
 
 	CIO_CRW_EVENT(4, "chsc: link incident (rs=%02x, rs_id=%04x)\n",
 		      sei_area->rs, sei_area->rsid);
 	if (sei_area->rs != 4)
 		return 0;
-	chpid = __get_chpid_from_lir(sei_area->ccdf);
-	if (chpid < 0)
+	id = __get_chpid_from_lir(sei_area->ccdf);
+	if (id < 0)
 		CIO_CRW_EVENT(4, "chsc: link incident - invalid LIR\n");
-	else
+	else {
+		chp_id_init(&chpid);
+		chpid.id = id;
 		s390_set_chpid_offline(chpid);
+	}
 
 	return 0;
 }
@@ -501,6 +513,7 @@ static int chsc_process_sei_res_acc(struct chsc_sei_area *sei_area)
 {
 	struct res_acc_data res_data;
 	struct device *dev;
+	struct chp_id chpid;
 	int status;
 	int rc;
 
@@ -508,13 +521,15 @@ static int chsc_process_sei_res_acc(struct chsc_sei_area *sei_area)
 		      "rs_id=%04x)\n", sei_area->rs, sei_area->rsid);
 	if (sei_area->rs != 4)
 		return 0;
+	chp_id_init(&chpid);
+	chpid.id = sei_area->rsid;
 	/* allocate a new channel path structure, if needed */
-	status = get_chp_status(sei_area->rsid);
+	status = get_chp_status(chpid);
 	if (status < 0)
-		new_channel_path(sei_area->rsid);
+		new_channel_path(chpid);
 	else if (!status)
 		return 0;
-	dev = get_device(&css[0]->chps[sei_area->rsid]->dev);
+	dev = get_device(&(chpid_to_chp(chpid)->dev));
 	memset(&res_data, 0, sizeof(struct res_acc_data));
 	res_data.chp = to_channelpath(dev);
 	if ((sei_area->vf & 0xc0) != 0) {
@@ -631,7 +646,7 @@ __chp_add(struct subchannel_id schid, void *data)
 	for (i=0; i<8; i++) {
 		mask = 0x80 >> i;
 		if ((sch->schib.pmcw.pim & mask) &&
-		    (sch->schib.pmcw.chpid[i] == chp->id)) {
+		    (sch->schib.pmcw.chpid[i] == chp->chpid.id)) {
 			if (stsch(sch->schid, &sch->schib) != 0) {
 				/* Endgame. */
 				spin_unlock_irq(sch->lock);
@@ -657,8 +672,7 @@ __chp_add(struct subchannel_id schid, void *data)
 	return 0;
 }
 
-static int
-chp_add(int chpid)
+static int chp_add(struct chp_id chpid)
 {
 	int rc;
 	char dbf_txt[15];
@@ -667,10 +681,10 @@ chp_add(int chpid)
 	if (!get_chp_status(chpid))
 		return 0; /* no need to do the rest */
 	
-	sprintf(dbf_txt, "cadd%x", chpid);
+	sprintf(dbf_txt, "cadd%x.%02x", chpid.cssid, chpid.id);
 	CIO_TRACE_EVENT(2, dbf_txt);
 
-	dev = get_device(&css[0]->chps[chpid]->dev);
+	dev = get_device(&(chpid_to_chp(chpid)->dev));
 	rc = for_each_subchannel(__chp_add, to_channelpath(dev));
 	if (css_slow_subchannels_exist())
 		rc = -EAGAIN;
@@ -683,9 +697,12 @@ chp_add(int chpid)
 /* 
  * Handling of crw machine checks with channel path source.
  */
-int
-chp_process_crw(int chpid, int on)
+int chp_process_crw(int id, int on)
 {
+	struct chp_id chpid;
+
+	chp_id_init(&chpid);
+	chpid.id = id;
 	if (on == 0) {
 		/* Path has gone. We use the link incident routine.*/
 		s390_set_chpid_offline(chpid);
@@ -733,8 +750,8 @@ static void terminate_internal_io(struct subchannel *sch)
 		sch->driver->termination(&sch->dev);
 }
 
-static void
-__s390_subchannel_vary_chpid(struct subchannel *sch, __u8 chpid, int on)
+static void __s390_subchannel_vary_chpid(struct subchannel *sch,
+					 struct chp_id chpid, int on)
 {
 	int chp, old_lpm;
 	unsigned long flags;
@@ -745,7 +762,7 @@ __s390_subchannel_vary_chpid(struct subchannel *sch, __u8 chpid, int on)
 	spin_lock_irqsave(sch->lock, flags);
 	old_lpm = sch->lpm;
 	for (chp = 0; chp < 8; chp++) {
-		if (sch->ssd_info.chpid[chp] != chpid)
+		if (sch->ssd_info.chpid[chp] != chpid.id)
 			continue;
 
 		if (on) {
@@ -780,11 +797,10 @@ __s390_subchannel_vary_chpid(struct subchannel *sch, __u8 chpid, int on)
 	spin_unlock_irqrestore(sch->lock, flags);
 }
 
-static int
-s390_subchannel_vary_chpid_off(struct device *dev, void *data)
+static int s390_subchannel_vary_chpid_off(struct device *dev, void *data)
 {
 	struct subchannel *sch;
-	__u8 *chpid;
+	struct chp_id *chpid;
 
 	sch = to_subchannel(dev);
 	chpid = data;
@@ -793,11 +809,10 @@ s390_subchannel_vary_chpid_off(struct device *dev, void *data)
 	return 0;
 }
 
-static int
-s390_subchannel_vary_chpid_on(struct device *dev, void *data)
+static int s390_subchannel_vary_chpid_on(struct device *dev, void *data)
 {
 	struct subchannel *sch;
-	__u8 *chpid;
+	struct chp_id *chpid;
 
 	sch = to_subchannel(dev);
 	chpid = data;
@@ -833,23 +848,25 @@ __s390_vary_chpid_on(struct subchannel_id schid, void *data)
  * Function: s390_vary_chpid
  * Varies the specified chpid online or offline
  */
-static int
-s390_vary_chpid( __u8 chpid, int on)
+static int s390_vary_chpid(struct chp_id chpid, int on)
 {
 	char dbf_text[15];
 	int status;
 
-	sprintf(dbf_text, on?"varyon%x":"varyoff%x", chpid);
+	sprintf(dbf_text, on?"varyon%x.%02x":"varyoff%x.%02x", chpid.cssid,
+		chpid.id);
 	CIO_TRACE_EVENT( 2, dbf_text);
 
 	status = get_chp_status(chpid);
 	if (status < 0) {
-		printk(KERN_ERR "Can't vary unknown chpid %02X\n", chpid);
+		printk(KERN_ERR "Can't vary unknown chpid %x.%02x\n",
+		       chpid.cssid, chpid.id);
 		return -EINVAL;
 	}
 
 	if (!on && !status) {
-		printk(KERN_ERR "chpid %x is already offline\n", chpid);
+		printk(KERN_ERR "chpid %x.%02x is already offline\n",
+		       chpid.cssid, chpid.id);
 		return -EINVAL;
 	}
 
@@ -904,20 +921,19 @@ static struct bin_attribute chp_measurement_chars_attr = {
 	.read = chp_measurement_chars_read,
 };
 
-static void
-chp_measurement_copy_block(struct cmg_entry *buf,
-			   struct channel_subsystem *css, int chpid)
+static void chp_measurement_copy_block(struct cmg_entry *buf,
+			struct channel_subsystem *css, struct chp_id chpid)
 {
 	void *area;
 	struct cmg_entry *entry, reference_buf;
 	int idx;
 
-	if (chpid < 128) {
+	if (chpid.id < 128) {
 		area = css->cub_addr1;
-		idx = chpid;
+		idx = chpid.id;
 	} else {
 		area = css->cub_addr2;
-		idx = chpid - 128;
+		idx = chpid.id - 128;
 	}
 	entry = area + (idx * sizeof(struct cmg_entry));
 	do {
@@ -941,7 +957,7 @@ chp_measurement_read(struct kobject *kobj, char *buf, loff_t off, size_t count)
 	/* Only allow single reads. */
 	if (off || count < size)
 		return 0;
-	chp_measurement_copy_block((struct cmg_entry *)buf, css, chp->id);
+	chp_measurement_copy_block((struct cmg_entry *)buf, css, chp->chpid);
 	count = size;
 	return count;
 }
@@ -1137,7 +1153,7 @@ chp_status_show(struct device *dev, struct device_attribute *attr, char *buf)
 
 	if (!chp)
 		return 0;
-	return (get_chp_status(chp->id) ? sprintf(buf, "online\n") :
+	return (get_chp_status(chp->chpid) ? sprintf(buf, "online\n") :
 		sprintf(buf, "offline\n"));
 }
 
@@ -1154,9 +1170,9 @@ chp_status_write(struct device *dev, struct device_attribute *attr, const char *
 		return count;
 
 	if (!strnicmp(cmd, "on", 2))
-		error = s390_vary_chpid(cp->id, 1);
+		error = s390_vary_chpid(cp->chpid, 1);
 	else if (!strnicmp(cmd, "off", 3))
-		error = s390_vary_chpid(cp->id, 0);
+		error = s390_vary_chpid(cp->chpid, 0);
 	else
 		error = -EINVAL;
 
@@ -1227,9 +1243,8 @@ chp_release(struct device *dev)
 	kfree(cp);
 }
 
-static int
-chsc_determine_channel_path_description(int chpid,
-					struct channel_path_desc *desc)
+static int chsc_determine_channel_path_description(struct chp_id chpid,
+						struct channel_path_desc *desc)
 {
 	int ccode, ret;
 
@@ -1252,8 +1267,8 @@ chsc_determine_channel_path_description(int chpid,
 	scpd_area->request.length = 0x0010;
 	scpd_area->request.code = 0x0002;
 
-	scpd_area->first_chpid = chpid;
-	scpd_area->last_chpid = chpid;
+	scpd_area->first_chpid = chpid.id;
+	scpd_area->last_chpid = chpid.id;
 
 	ccode = chsc(scpd_area);
 	if (ccode > 0) {
@@ -1349,8 +1364,8 @@ chsc_get_channel_measurement_chars(struct channel_path *chp)
 	scmc_area->request.length = 0x0010;
 	scmc_area->request.code = 0x0022;
 
-	scmc_area->first_chpid = chp->id;
-	scmc_area->last_chpid = chp->id;
+	scmc_area->first_chpid = chp->chpid.id;
+	scmc_area->last_chpid = chp->chpid.id;
 
 	ccode = chsc(scmc_area);
 	if (ccode > 0) {
@@ -1396,8 +1411,7 @@ out:
  * Entries for chpids on the system bus.
  * This replaces /proc/chpids.
  */
-static int
-new_channel_path(int chpid)
+static int new_channel_path(struct chp_id chpid)
 {
 	struct channel_path *chp;
 	int ret;
@@ -1407,11 +1421,12 @@ new_channel_path(int chpid)
 		return -ENOMEM;
 
 	/* fill in status, etc. */
-	chp->id = chpid;
+	chp->chpid = chpid;
 	chp->state = 1;
-	chp->dev.parent = &css[0]->device;
+	chp->dev.parent = &css[chpid.cssid]->device;
 	chp->dev.release = chp_release;
-	snprintf(chp->dev.bus_id, BUS_ID_SIZE, "chp0.%x", chpid);
+	snprintf(chp->dev.bus_id, BUS_ID_SIZE, "chp%x.%02x", chpid.cssid,
+		 chpid.id);
 
 	/* Obtain channel path description and fill it in. */
 	ret = chsc_determine_channel_path_description(chpid, &chp->desc);
@@ -1437,8 +1452,8 @@ new_channel_path(int chpid)
 	/* make it known to the system */
 	ret = device_register(&chp->dev);
 	if (ret) {
-		printk(KERN_WARNING "%s: could not register %02x\n",
-		       __func__, chpid);
+		printk(KERN_WARNING "%s: could not register %x.%02x\n",
+		       __func__, chpid.cssid, chpid.id);
 		goto out_free;
 	}
 	ret = sysfs_create_group(&chp->dev.kobj, &chp_attr_group);
@@ -1446,18 +1461,18 @@ new_channel_path(int chpid)
 		device_unregister(&chp->dev);
 		goto out_free;
 	}
-	mutex_lock(&css[0]->mutex);
-	if (css[0]->cm_enabled) {
+	mutex_lock(&css[chpid.cssid]->mutex);
+	if (css[chpid.cssid]->cm_enabled) {
 		ret = chsc_add_chp_cmg_attr(chp);
 		if (ret) {
 			sysfs_remove_group(&chp->dev.kobj, &chp_attr_group);
 			device_unregister(&chp->dev);
-			mutex_unlock(&css[0]->mutex);
+			mutex_unlock(&css[chpid.cssid]->mutex);
 			goto out_free;
 		}
 	}
-	css[0]->chps[chpid] = chp;
-	mutex_unlock(&css[0]->mutex);
+	css[chpid.cssid]->chps[chpid.id] = chp;
+	mutex_unlock(&css[chpid.cssid]->mutex);
 	return ret;
 out_free:
 	kfree(chp);
@@ -1469,8 +1484,11 @@ chsc_get_chp_desc(struct subchannel *sch, int chp_no)
 {
 	struct channel_path *chp;
 	struct channel_path_desc *desc;
+	struct chp_id chpid;
 
-	chp = css[0]->chps[sch->schib.pmcw.chpid[chp_no]];
+	chp_id_init(&chpid);
+	chpid.id = sch->schib.pmcw.chpid[chp_no];
+	chp = chpid_to_chp(chpid);
 	if (!chp)
 		return NULL;
 	desc = kmalloc(sizeof(struct channel_path_desc), GFP_KERNEL);
diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h
index 0fb2b024208..2949c85b6d9 100644
--- a/drivers/s390/cio/chsc.h
+++ b/drivers/s390/cio/chsc.h
@@ -1,6 +1,10 @@
 #ifndef S390_CHSC_H
 #define S390_CHSC_H
 
+#include <linux/types.h>
+#include <linux/device.h>
+#include "chpid.h"
+
 #define CHSC_SEI_ACC_CHPID        1
 #define CHSC_SEI_ACC_LINKADDR     2
 #define CHSC_SEI_ACC_FULLLINKADDR 3
@@ -34,7 +38,7 @@ struct channel_path_desc {
 } __attribute__ ((packed));
 
 struct channel_path {
-	int id;
+	struct chp_id chpid;
 	int state;
 	struct channel_path_desc desc;
 	/* Channel-measurement related stuff: */
@@ -46,7 +50,7 @@ struct channel_path {
 
 extern void s390_process_css( void );
 extern void chsc_validate_chpids(struct subchannel *);
-extern void chpid_is_actually_online(int);
+extern void chpid_is_actually_online(struct chp_id);
 extern int css_get_ssd_info(struct subchannel *);
 extern int chsc_process_crw(void);
 extern int chp_process_crw(int, int);
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 21af446c1f2..bab729202f4 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -954,6 +954,7 @@ static void css_reset(void)
 {
 	int i, ret;
 	unsigned long long timeout;
+	struct chp_id chpid;
 
 	/* Reset subchannels. */
 	for_each_subchannel(__shutdown_subchannel_easy,  NULL);
@@ -963,8 +964,10 @@ static void css_reset(void)
 	__ctl_set_bit(14, 28);
 	/* Temporarily reenable machine checks. */
 	local_mcck_enable();
+	chp_id_init(&chpid);
 	for (i = 0; i <= __MAX_CHPID; i++) {
-		ret = rchp(i);
+		chpid.id = i;
+		ret = rchp(chpid);
 		if ((ret == 0) || (ret == 2))
 			/*
 			 * rchp either succeeded, or another rchp is already
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 089a3ddd626..5f5ee1eef07 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -22,6 +22,7 @@
 #include "device.h"
 #include "chsc.h"
 #include "ioasm.h"
+#include "chpid.h"
 
 int
 device_is_online(struct subchannel *sch)
@@ -210,14 +211,17 @@ static void
 __recover_lost_chpids(struct subchannel *sch, int old_lpm)
 {
 	int mask, i;
+	struct chp_id chpid;
 
+	chp_id_init(&chpid);
 	for (i = 0; i<8; i++) {
 		mask = 0x80 >> i;
 		if (!(sch->lpm & mask))
 			continue;
 		if (old_lpm & mask)
 			continue;
-		chpid_is_actually_online(sch->schib.pmcw.chpid[i]);
+		chpid.id = sch->schib.pmcw.chpid[i];
+		chpid_is_actually_online(chpid);
 	}
 }
 
diff --git a/drivers/s390/cio/ioasm.h b/drivers/s390/cio/ioasm.h
index ad6d8294006..78c7db6daa8 100644
--- a/drivers/s390/cio/ioasm.h
+++ b/drivers/s390/cio/ioasm.h
@@ -2,6 +2,7 @@
 #define S390_CIO_IOASM_H
 
 #include "schid.h"
+#include "chpid.h"
 
 /*
  * TPI info structure
@@ -189,9 +190,9 @@ static inline int chsc(void *chsc_area)
 	return cc;
 }
 
-static inline int rchp(int chpid)
+static inline int rchp(struct chp_id chpid)
 {
-	register unsigned int reg1 asm ("1") = chpid;
+	register struct chp_id reg1 asm ("1") = chpid;
 	int ccode;
 
 	asm volatile(
-- 
cgit v1.2.3


From d120b2a4e60cc9e62e7cc5dcf049100af3745cc4 Mon Sep 17 00:00:00 2001
From: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:27 +0200
Subject: [S390] cio: Allow 0 and 1 as input for channel path status attribute.

Channel path status can now be modified by writing '0' and '1'
to the sysfs status attribute in addition to 'offline' and
'online' respectively.

Signed-off-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 drivers/s390/cio/chsc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index bb6f876e53c..b329851f7b5 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -1169,9 +1169,9 @@ chp_status_write(struct device *dev, struct device_attribute *attr, const char *
 	if (!num_args)
 		return count;
 
-	if (!strnicmp(cmd, "on", 2))
+	if (!strnicmp(cmd, "on", 2) || !strcmp(cmd, "1"))
 		error = s390_vary_chpid(cp->chpid, 1);
-	else if (!strnicmp(cmd, "off", 3))
+	else if (!strnicmp(cmd, "off", 3) || !strcmp(cmd, "0"))
 		error = s390_vary_chpid(cp->chpid, 0);
 	else
 		error = -EINVAL;
-- 
cgit v1.2.3


From e6b6e10ac1de116fc6d2288f185393014851cccf Mon Sep 17 00:00:00 2001
From: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:28 +0200
Subject: [S390] cio: Introduce separate files for channel-path related code.

Signed-off-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 drivers/s390/cio/Makefile     |   2 +-
 drivers/s390/cio/chp.c        | 433 ++++++++++++++++++++++++++++++++++++++
 drivers/s390/cio/chp.h        |  37 ++++
 drivers/s390/cio/chsc.c       | 473 ++++--------------------------------------
 drivers/s390/cio/chsc.h       |  28 +--
 drivers/s390/cio/cio.c        |   8 +-
 drivers/s390/cio/css.h        |   1 +
 drivers/s390/cio/device_fsm.c |   6 +-
 drivers/s390/cio/device_ops.c |   7 +-
 drivers/s390/s390mach.c       |   1 +
 10 files changed, 534 insertions(+), 462 deletions(-)
 create mode 100644 drivers/s390/cio/chp.c
 create mode 100644 drivers/s390/cio/chp.h

(limited to 'drivers')

diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
index c490c2a1c2f..fe7b3ffa1ea 100644
--- a/drivers/s390/cio/Makefile
+++ b/drivers/s390/cio/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the S/390 common i/o drivers
 #
 
-obj-y += airq.o blacklist.o chsc.o cio.o css.o
+obj-y += airq.o blacklist.o chsc.o cio.o css.o chp.o
 ccw_device-objs += device.o device_fsm.o device_ops.o
 ccw_device-objs += device_id.o device_pgid.o device_status.o
 obj-y += ccw_device.o cmf.o
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
new file mode 100644
index 00000000000..6e04521accd
--- /dev/null
+++ b/drivers/s390/cio/chp.c
@@ -0,0 +1,433 @@
+/*
+ *  drivers/s390/cio/chp.c
+ *
+ *    Copyright IBM Corp. 1999,2007
+ *    Author(s): Cornelia Huck (cornelia.huck@de.ibm.com)
+ *		 Arnd Bergmann (arndb@de.ibm.com)
+ *		 Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#include <linux/bug.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <asm/errno.h>
+
+#include "chpid.h"
+#include "cio.h"
+#include "css.h"
+#include "ioasm.h"
+#include "cio_debug.h"
+#include "chp.h"
+
+#define to_channelpath(device) container_of(device, struct channel_path, dev)
+
+/* Return channel_path struct for given chpid. */
+static inline struct channel_path *chpid_to_chp(struct chp_id chpid)
+{
+	return css[chpid.cssid]->chps[chpid.id];
+}
+
+/* Set vary state for given chpid. */
+static void set_chp_logically_online(struct chp_id chpid, int onoff)
+{
+	chpid_to_chp(chpid)->state = onoff;
+}
+
+/* On succes return 0 if channel-path is varied offline, 1 if it is varied
+ * online. Return -ENODEV if channel-path is not registered. */
+int chp_get_status(struct chp_id chpid)
+{
+	return (chpid_to_chp(chpid) ? chpid_to_chp(chpid)->state : -ENODEV);
+}
+
+/**
+ * chp_get_sch_opm - return opm for subchannel
+ * @sch: subchannel
+ *
+ * Calculate and return the operational path mask (opm) based on the chpids
+ * used by the subchannel and the status of the associated channel-paths.
+ */
+u8 chp_get_sch_opm(struct subchannel *sch)
+{
+	struct chp_id chpid;
+	int opm;
+	int i;
+
+	opm = 0;
+	chp_id_init(&chpid);
+	for (i=0; i < 8; i++) {
+		opm <<= 1;
+		chpid.id = sch->schib.pmcw.chpid[i];
+		if (chp_get_status(chpid) != 0)
+			opm |= 1;
+	}
+	return opm;
+}
+
+/**
+ * chp_is_registered - check if a channel-path is registered
+ * @chpid: channel-path ID
+ *
+ * Return non-zero if a channel-path with the given chpid is registered,
+ * zero otherwise.
+ */
+int chp_is_registered(struct chp_id chpid)
+{
+	return chpid_to_chp(chpid) != NULL;
+}
+
+/*
+ * Function: s390_vary_chpid
+ * Varies the specified chpid online or offline
+ */
+static int s390_vary_chpid(struct chp_id chpid, int on)
+{
+	char dbf_text[15];
+	int status;
+
+	sprintf(dbf_text, on?"varyon%x.%02x":"varyoff%x.%02x", chpid.cssid,
+		chpid.id);
+	CIO_TRACE_EVENT( 2, dbf_text);
+
+	status = chp_get_status(chpid);
+	if (status < 0) {
+		printk(KERN_ERR "Can't vary unknown chpid %x.%02x\n",
+		       chpid.cssid, chpid.id);
+		return -EINVAL;
+	}
+
+	if (!on && !status) {
+		printk(KERN_ERR "chpid %x.%02x is already offline\n",
+		       chpid.cssid, chpid.id);
+		return -EINVAL;
+	}
+
+	set_chp_logically_online(chpid, on);
+	chsc_chp_vary(chpid, on);
+	return 0;
+}
+
+/*
+ * Channel measurement related functions
+ */
+static ssize_t chp_measurement_chars_read(struct kobject *kobj, char *buf,
+					  loff_t off, size_t count)
+{
+	struct channel_path *chp;
+	unsigned int size;
+
+	chp = to_channelpath(container_of(kobj, struct device, kobj));
+	if (!chp->cmg_chars)
+		return 0;
+
+	size = sizeof(struct cmg_chars);
+
+	if (off > size)
+		return 0;
+	if (off + count > size)
+		count = size - off;
+	memcpy(buf, chp->cmg_chars + off, count);
+	return count;
+}
+
+static struct bin_attribute chp_measurement_chars_attr = {
+	.attr = {
+		.name = "measurement_chars",
+		.mode = S_IRUSR,
+		.owner = THIS_MODULE,
+	},
+	.size = sizeof(struct cmg_chars),
+	.read = chp_measurement_chars_read,
+};
+
+static void chp_measurement_copy_block(struct cmg_entry *buf,
+				       struct channel_subsystem *css,
+				       struct chp_id chpid)
+{
+	void *area;
+	struct cmg_entry *entry, reference_buf;
+	int idx;
+
+	if (chpid.id < 128) {
+		area = css->cub_addr1;
+		idx = chpid.id;
+	} else {
+		area = css->cub_addr2;
+		idx = chpid.id - 128;
+	}
+	entry = area + (idx * sizeof(struct cmg_entry));
+	do {
+		memcpy(buf, entry, sizeof(*entry));
+		memcpy(&reference_buf, entry, sizeof(*entry));
+	} while (reference_buf.values[0] != buf->values[0]);
+}
+
+static ssize_t chp_measurement_read(struct kobject *kobj, char *buf,
+				    loff_t off, size_t count)
+{
+	struct channel_path *chp;
+	struct channel_subsystem *css;
+	unsigned int size;
+
+	chp = to_channelpath(container_of(kobj, struct device, kobj));
+	css = to_css(chp->dev.parent);
+
+	size = sizeof(struct cmg_entry);
+
+	/* Only allow single reads. */
+	if (off || count < size)
+		return 0;
+	chp_measurement_copy_block((struct cmg_entry *)buf, css, chp->chpid);
+	count = size;
+	return count;
+}
+
+static struct bin_attribute chp_measurement_attr = {
+	.attr = {
+		.name = "measurement",
+		.mode = S_IRUSR,
+		.owner = THIS_MODULE,
+	},
+	.size = sizeof(struct cmg_entry),
+	.read = chp_measurement_read,
+};
+
+void chp_remove_cmg_attr(struct channel_path *chp)
+{
+	device_remove_bin_file(&chp->dev, &chp_measurement_chars_attr);
+	device_remove_bin_file(&chp->dev, &chp_measurement_attr);
+}
+
+int chp_add_cmg_attr(struct channel_path *chp)
+{
+	int ret;
+
+	ret = device_create_bin_file(&chp->dev, &chp_measurement_chars_attr);
+	if (ret)
+		return ret;
+	ret = device_create_bin_file(&chp->dev, &chp_measurement_attr);
+	if (ret)
+		device_remove_bin_file(&chp->dev, &chp_measurement_chars_attr);
+	return ret;
+}
+
+/*
+ * Files for the channel path entries.
+ */
+static ssize_t chp_status_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct channel_path *chp = container_of(dev, struct channel_path, dev);
+
+	if (!chp)
+		return 0;
+	return (chp_get_status(chp->chpid) ? sprintf(buf, "online\n") :
+		sprintf(buf, "offline\n"));
+}
+
+static ssize_t chp_status_write(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct channel_path *cp = container_of(dev, struct channel_path, dev);
+	char cmd[10];
+	int num_args;
+	int error;
+
+	num_args = sscanf(buf, "%5s", cmd);
+	if (!num_args)
+		return count;
+
+	if (!strnicmp(cmd, "on", 2) || !strcmp(cmd, "1"))
+		error = s390_vary_chpid(cp->chpid, 1);
+	else if (!strnicmp(cmd, "off", 3) || !strcmp(cmd, "0"))
+		error = s390_vary_chpid(cp->chpid, 0);
+	else
+		error = -EINVAL;
+
+	return error < 0 ? error : count;
+
+}
+
+static DEVICE_ATTR(status, 0644, chp_status_show, chp_status_write);
+
+static ssize_t chp_type_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct channel_path *chp = container_of(dev, struct channel_path, dev);
+
+	if (!chp)
+		return 0;
+	return sprintf(buf, "%x\n", chp->desc.desc);
+}
+
+static DEVICE_ATTR(type, 0444, chp_type_show, NULL);
+
+static ssize_t chp_cmg_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct channel_path *chp = to_channelpath(dev);
+
+	if (!chp)
+		return 0;
+	if (chp->cmg == -1) /* channel measurements not available */
+		return sprintf(buf, "unknown\n");
+	return sprintf(buf, "%x\n", chp->cmg);
+}
+
+static DEVICE_ATTR(cmg, 0444, chp_cmg_show, NULL);
+
+static ssize_t chp_shared_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct channel_path *chp = to_channelpath(dev);
+
+	if (!chp)
+		return 0;
+	if (chp->shared == -1) /* channel measurements not available */
+		return sprintf(buf, "unknown\n");
+	return sprintf(buf, "%x\n", chp->shared);
+}
+
+static DEVICE_ATTR(shared, 0444, chp_shared_show, NULL);
+
+static struct attribute * chp_attrs[] = {
+	&dev_attr_status.attr,
+	&dev_attr_type.attr,
+	&dev_attr_cmg.attr,
+	&dev_attr_shared.attr,
+	NULL,
+};
+
+static struct attribute_group chp_attr_group = {
+	.attrs = chp_attrs,
+};
+
+static void chp_release(struct device *dev)
+{
+	struct channel_path *cp;
+
+	cp = container_of(dev, struct channel_path, dev);
+	kfree(cp);
+}
+
+/**
+ * chp_new - register a new channel-path
+ * @chpid - channel-path ID
+ *
+ * Create and register data structure representing new channel-path. Return
+ * zero on success, non-zero otherwise.
+ */
+int chp_new(struct chp_id chpid)
+{
+	struct channel_path *chp;
+	int ret;
+
+	chp = kzalloc(sizeof(struct channel_path), GFP_KERNEL);
+	if (!chp)
+		return -ENOMEM;
+
+	/* fill in status, etc. */
+	chp->chpid = chpid;
+	chp->state = 1;
+	chp->dev.parent = &css[chpid.cssid]->device;
+	chp->dev.release = chp_release;
+	snprintf(chp->dev.bus_id, BUS_ID_SIZE, "chp%x.%02x", chpid.cssid,
+		 chpid.id);
+
+	/* Obtain channel path description and fill it in. */
+	ret = chsc_determine_channel_path_description(chpid, &chp->desc);
+	if (ret)
+		goto out_free;
+	/* Get channel-measurement characteristics. */
+	if (css_characteristics_avail && css_chsc_characteristics.scmc
+	    && css_chsc_characteristics.secm) {
+		ret = chsc_get_channel_measurement_chars(chp);
+		if (ret)
+			goto out_free;
+	} else {
+		static int msg_done;
+
+		if (!msg_done) {
+			printk(KERN_WARNING "cio: Channel measurements not "
+			       "available, continuing.\n");
+			msg_done = 1;
+		}
+		chp->cmg = -1;
+	}
+
+	/* make it known to the system */
+	ret = device_register(&chp->dev);
+	if (ret) {
+		printk(KERN_WARNING "%s: could not register %x.%02x\n",
+		       __func__, chpid.cssid, chpid.id);
+		goto out_free;
+	}
+	ret = sysfs_create_group(&chp->dev.kobj, &chp_attr_group);
+	if (ret) {
+		device_unregister(&chp->dev);
+		goto out_free;
+	}
+	mutex_lock(&css[chpid.cssid]->mutex);
+	if (css[chpid.cssid]->cm_enabled) {
+		ret = chp_add_cmg_attr(chp);
+		if (ret) {
+			sysfs_remove_group(&chp->dev.kobj, &chp_attr_group);
+			device_unregister(&chp->dev);
+			mutex_unlock(&css[chpid.cssid]->mutex);
+			goto out_free;
+		}
+	}
+	css[chpid.cssid]->chps[chpid.id] = chp;
+	mutex_unlock(&css[chpid.cssid]->mutex);
+	return ret;
+out_free:
+	kfree(chp);
+	return ret;
+}
+
+/**
+ * chp_get_chp_desc - return newly allocated channel-path description
+ * @chpid: channel-path ID
+ *
+ * On success return a newly allocated copy of the channel-path description
+ * data associated with the given channel-path ID. Return %NULL on error.
+ */
+void *chp_get_chp_desc(struct chp_id chpid)
+{
+	struct channel_path *chp;
+	struct channel_path_desc *desc;
+
+	chp = chpid_to_chp(chpid);
+	if (!chp)
+		return NULL;
+	desc = kmalloc(sizeof(struct channel_path_desc), GFP_KERNEL);
+	if (!desc)
+		return NULL;
+	memcpy(desc, &chp->desc, sizeof(struct channel_path_desc));
+	return desc;
+}
+
+/**
+ * chp_process_crw - process channel-path status change
+ * @id: channel-path ID number
+ * @status: non-zero if channel-path has become available, zero otherwise
+ *
+ * Handle channel-report-words indicating that the status of a channel-path
+ * has changed.
+ */
+int chp_process_crw(int id, int status)
+{
+	struct chp_id chpid;
+
+	chp_id_init(&chpid);
+	chpid.id = id;
+	if (status) {
+		if (!chp_is_registered(chpid))
+			chp_new(chpid);
+		return chsc_chp_online(chpid);
+	} else {
+		chsc_chp_offline(chpid);
+		return 0;
+	}
+}
diff --git a/drivers/s390/cio/chp.h b/drivers/s390/cio/chp.h
new file mode 100644
index 00000000000..ac2b1a9c3bc
--- /dev/null
+++ b/drivers/s390/cio/chp.h
@@ -0,0 +1,37 @@
+/*
+ *  drivers/s390/cio/chp.h
+ *
+ *    Copyright IBM Corp. 2007
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef S390_CHP_H
+#define S390_CHP_H S390_CHP_H
+
+#include <linux/types.h>
+#include <linux/device.h>
+
+#include "chpid.h"
+#include "chsc.h"
+
+struct channel_path {
+	struct chp_id chpid;
+	int state;
+	struct channel_path_desc desc;
+	/* Channel-measurement related stuff: */
+	int cmg;
+	int shared;
+	void *cmg_chars;
+	struct device dev;
+};
+
+int chp_get_status(struct chp_id chpid);
+u8 chp_get_sch_opm(struct subchannel *sch);
+int chp_is_registered(struct chp_id chpid);
+void *chp_get_chp_desc(struct chp_id chpid);
+int chp_process_crw(int id, int available);
+void chp_remove_cmg_attr(struct channel_path *chp);
+int chp_add_cmg_attr(struct channel_path *chp);
+int chp_new(struct chp_id chpid);
+
+#endif /* S390_CHP_H */
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index b329851f7b5..d99f525eac0 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -21,54 +21,11 @@
 #include "cio_debug.h"
 #include "ioasm.h"
 #include "chpid.h"
+#include "chp.h"
 #include "chsc.h"
 
 static void *sei_page;
 
-static int new_channel_path(struct chp_id chpid);
-
-static inline struct channel_path *chpid_to_chp(struct chp_id chpid)
-{
-	return css[chpid.cssid]->chps[chpid.id];
-}
-
-static void set_chp_logically_online(struct chp_id chpid, int onoff)
-{
-	chpid_to_chp(chpid)->state = onoff;
-}
-
-static int get_chp_status(struct chp_id chpid)
-{
-	return (chpid_to_chp(chpid) ? chpid_to_chp(chpid)->state : -ENODEV);
-}
-
-void chsc_validate_chpids(struct subchannel *sch)
-{
-	int mask, chp;
-	struct chp_id chpid;
-
-	chp_id_init(&chpid);
-	for (chp = 0; chp <= 7; chp++) {
-		mask = 0x80 >> chp;
-		chpid.id = sch->schib.pmcw.chpid[chp];
-		if (!get_chp_status(chpid))
-			/* disable using this path */
-			sch->opm &= ~mask;
-	}
-}
-
-void chpid_is_actually_online(struct chp_id chpid)
-{
-	int state;
-
-	state = get_chp_status(chpid);
-	if (state < 0) {
-		need_rescan = 1;
-		queue_work(slow_path_wq, &slow_path_work);
-	} else
-		WARN_ON(!state);
-}
-
 /* FIXME: this is _always_ called for every subchannel. shouldn't we
  *	  process more than one at a time? */
 static int
@@ -214,8 +171,8 @@ css_get_ssd_info(struct subchannel *sch)
 			mask = 0x80 >> j;
 			chpid.id = sch->ssd_info.chpid[j];
 			if ((sch->schib.pmcw.pim & mask) &&
-			    (get_chp_status(chpid) < 0))
-			    new_channel_path(chpid);
+			    !chp_is_registered(chpid))
+				chp_new(chpid);
 		}
 	}
 	return ret;
@@ -227,7 +184,7 @@ s390_subchannel_remove_chpid(struct device *dev, void *data)
 	int j;
 	int mask;
 	struct subchannel *sch;
-	struct channel_path *chpid;
+	struct chp_id *chpid;
 	struct schib schib;
 
 	sch = to_subchannel(dev);
@@ -235,7 +192,7 @@ s390_subchannel_remove_chpid(struct device *dev, void *data)
 	for (j = 0; j < 8; j++) {
 		mask = 0x80 >> j;
 		if ((sch->schib.pmcw.pim & mask) &&
-		    (sch->schib.pmcw.chpid[j] == chpid->chpid.id))
+		    (sch->schib.pmcw.chpid[j] == chpid->id))
 			break;
 	}
 	if (j >= 8)
@@ -285,51 +242,48 @@ out_unreg:
 	return 0;
 }
 
-static void s390_set_chpid_offline(struct chp_id chpid)
+void chsc_chp_offline(struct chp_id chpid)
 {
 	char dbf_txt[15];
-	struct device *dev;
 
 	sprintf(dbf_txt, "chpr%x.%02x", chpid.cssid, chpid.id);
 	CIO_TRACE_EVENT(2, dbf_txt);
 
-	if (get_chp_status(chpid) <= 0)
+	if (chp_get_status(chpid) <= 0)
 		return;
-	dev = get_device(&(chpid_to_chp(chpid)->dev));
-	bus_for_each_dev(&css_bus_type, NULL, to_channelpath(dev),
+	bus_for_each_dev(&css_bus_type, NULL, &chpid,
 			 s390_subchannel_remove_chpid);
 
 	if (need_rescan || css_slow_subchannels_exist())
 		queue_work(slow_path_wq, &slow_path_work);
-	put_device(dev);
 }
 
 struct res_acc_data {
-	struct channel_path *chp;
+	struct chp_id chpid;
 	u32 fla_mask;
 	u16 fla;
 };
 
-static int
-s390_process_res_acc_sch(struct res_acc_data *res_data, struct subchannel *sch)
+static int s390_process_res_acc_sch(struct res_acc_data *res_data,
+				    struct subchannel *sch)
 {
 	int found;
 	int chp;
 	int ccode;
-	
+
 	found = 0;
 	for (chp = 0; chp <= 7; chp++)
 		/*
 		 * check if chpid is in information updated by ssd
 		 */
 		if (sch->ssd_info.valid &&
-		    sch->ssd_info.chpid[chp] == res_data->chp->chpid.id &&
+		    sch->ssd_info.chpid[chp] == res_data->chpid.id &&
 		    (sch->ssd_info.fla[chp] & res_data->fla_mask)
 		    == res_data->fla) {
 			found = 1;
 			break;
 		}
-	
+
 	if (found == 0)
 		return 0;
 
@@ -416,8 +370,8 @@ s390_process_res_acc (struct res_acc_data *res_data)
 	int rc;
 	char dbf_txt[15];
 
-	sprintf(dbf_txt, "accpr%x.%02x", res_data->chp->chpid.cssid,
-		res_data->chp->chpid.id);
+	sprintf(dbf_txt, "accpr%x.%02x", res_data->chpid.cssid,
+		res_data->chpid.id);
 	CIO_TRACE_EVENT( 2, dbf_txt);
 	if (res_data->fla != 0) {
 		sprintf(dbf_txt, "fla%x", res_data->fla);
@@ -503,7 +457,7 @@ static int chsc_process_sei_link_incident(struct chsc_sei_area *sei_area)
 	else {
 		chp_id_init(&chpid);
 		chpid.id = id;
-		s390_set_chpid_offline(chpid);
+		chsc_chp_offline(chpid);
 	}
 
 	return 0;
@@ -512,7 +466,6 @@ static int chsc_process_sei_link_incident(struct chsc_sei_area *sei_area)
 static int chsc_process_sei_res_acc(struct chsc_sei_area *sei_area)
 {
 	struct res_acc_data res_data;
-	struct device *dev;
 	struct chp_id chpid;
 	int status;
 	int rc;
@@ -524,14 +477,13 @@ static int chsc_process_sei_res_acc(struct chsc_sei_area *sei_area)
 	chp_id_init(&chpid);
 	chpid.id = sei_area->rsid;
 	/* allocate a new channel path structure, if needed */
-	status = get_chp_status(chpid);
+	status = chp_get_status(chpid);
 	if (status < 0)
-		new_channel_path(chpid);
+		chp_new(chpid);
 	else if (!status)
 		return 0;
-	dev = get_device(&(chpid_to_chp(chpid)->dev));
 	memset(&res_data, 0, sizeof(struct res_acc_data));
-	res_data.chp = to_channelpath(dev);
+	res_data.chpid = chpid;
 	if ((sei_area->vf & 0xc0) != 0) {
 		res_data.fla = sei_area->fla;
 		if ((sei_area->vf & 0xc0) == 0xc0)
@@ -542,7 +494,6 @@ static int chsc_process_sei_res_acc(struct chsc_sei_area *sei_area)
 			res_data.fla_mask = 0xff00;
 	}
 	rc = s390_process_res_acc(&res_data);
-	put_device(dev);
 
 	return rc;
 }
@@ -634,10 +585,10 @@ static int
 __chp_add(struct subchannel_id schid, void *data)
 {
 	int i, mask;
-	struct channel_path *chp;
+	struct chp_id *chpid;
 	struct subchannel *sch;
 
-	chp = data;
+	chpid = data;
 	sch = get_subchannel_by_schid(schid);
 	if (!sch)
 		/* Check if the subchannel is now available. */
@@ -646,7 +597,7 @@ __chp_add(struct subchannel_id schid, void *data)
 	for (i=0; i<8; i++) {
 		mask = 0x80 >> i;
 		if ((sch->schib.pmcw.pim & mask) &&
-		    (sch->schib.pmcw.chpid[i] == chp->chpid.id)) {
+		    (sch->schib.pmcw.chpid[i] == chpid->id)) {
 			if (stsch(sch->schid, &sch->schib) != 0) {
 				/* Endgame. */
 				spin_unlock_irq(sch->lock);
@@ -672,52 +623,24 @@ __chp_add(struct subchannel_id schid, void *data)
 	return 0;
 }
 
-static int chp_add(struct chp_id chpid)
+int chsc_chp_online(struct chp_id chpid)
 {
 	int rc;
 	char dbf_txt[15];
-	struct device *dev;
 
-	if (!get_chp_status(chpid))
-		return 0; /* no need to do the rest */
-	
 	sprintf(dbf_txt, "cadd%x.%02x", chpid.cssid, chpid.id);
 	CIO_TRACE_EVENT(2, dbf_txt);
 
-	dev = get_device(&(chpid_to_chp(chpid)->dev));
-	rc = for_each_subchannel(__chp_add, to_channelpath(dev));
+	if (chp_get_status(chpid) == 0)
+		return 0;
+	rc = for_each_subchannel(__chp_add, &chpid);
 	if (css_slow_subchannels_exist())
 		rc = -EAGAIN;
 	if (rc != -EAGAIN)
 		rc = 0;
-	put_device(dev);
 	return rc;
 }
 
-/* 
- * Handling of crw machine checks with channel path source.
- */
-int chp_process_crw(int id, int on)
-{
-	struct chp_id chpid;
-
-	chp_id_init(&chpid);
-	chpid.id = id;
-	if (on == 0) {
-		/* Path has gone. We use the link incident routine.*/
-		s390_set_chpid_offline(chpid);
-		return 0; /* De-register is async anyway. */
-	}
-	/*
-	 * Path has come. Allocate a new channel path structure,
-	 * if needed.
-	 */
-	if (get_chp_status(chpid) < 0)
-		new_channel_path(chpid);
-	/* Avoid the extra overhead in process_rec_acc. */
-	return chp_add(chpid);
-}
-
 static int check_for_io_on_path(struct subchannel *sch, int index)
 {
 	int cc;
@@ -844,34 +767,13 @@ __s390_vary_chpid_on(struct subchannel_id schid, void *data)
 	return 0;
 }
 
-/*
- * Function: s390_vary_chpid
- * Varies the specified chpid online or offline
+/**
+ * chsc_chp_vary - propagate channel-path vary operation to subchannels
+ * @chpid: channl-path ID
+ * @on: non-zero for vary online, zero for vary offline
  */
-static int s390_vary_chpid(struct chp_id chpid, int on)
+int chsc_chp_vary(struct chp_id chpid, int on)
 {
-	char dbf_text[15];
-	int status;
-
-	sprintf(dbf_text, on?"varyon%x.%02x":"varyoff%x.%02x", chpid.cssid,
-		chpid.id);
-	CIO_TRACE_EVENT( 2, dbf_text);
-
-	status = get_chp_status(chpid);
-	if (status < 0) {
-		printk(KERN_ERR "Can't vary unknown chpid %x.%02x\n",
-		       chpid.cssid, chpid.id);
-		return -EINVAL;
-	}
-
-	if (!on && !status) {
-		printk(KERN_ERR "chpid %x.%02x is already offline\n",
-		       chpid.cssid, chpid.id);
-		return -EINVAL;
-	}
-
-	set_chp_logically_online(chpid, on);
-
 	/*
 	 * Redo PathVerification on the devices the chpid connects to
 	 */
@@ -887,112 +789,6 @@ static int s390_vary_chpid(struct chp_id chpid, int on)
 	return 0;
 }
 
-/*
- * Channel measurement related functions
- */
-static ssize_t
-chp_measurement_chars_read(struct kobject *kobj, char *buf, loff_t off,
-			   size_t count)
-{
-	struct channel_path *chp;
-	unsigned int size;
-
-	chp = to_channelpath(container_of(kobj, struct device, kobj));
-	if (!chp->cmg_chars)
-		return 0;
-
-	size = sizeof(struct cmg_chars);
-
-	if (off > size)
-		return 0;
-	if (off + count > size)
-		count = size - off;
-	memcpy(buf, chp->cmg_chars + off, count);
-	return count;
-}
-
-static struct bin_attribute chp_measurement_chars_attr = {
-	.attr = {
-		.name = "measurement_chars",
-		.mode = S_IRUSR,
-		.owner = THIS_MODULE,
-	},
-	.size = sizeof(struct cmg_chars),
-	.read = chp_measurement_chars_read,
-};
-
-static void chp_measurement_copy_block(struct cmg_entry *buf,
-			struct channel_subsystem *css, struct chp_id chpid)
-{
-	void *area;
-	struct cmg_entry *entry, reference_buf;
-	int idx;
-
-	if (chpid.id < 128) {
-		area = css->cub_addr1;
-		idx = chpid.id;
-	} else {
-		area = css->cub_addr2;
-		idx = chpid.id - 128;
-	}
-	entry = area + (idx * sizeof(struct cmg_entry));
-	do {
-		memcpy(buf, entry, sizeof(*entry));
-		memcpy(&reference_buf, entry, sizeof(*entry));
-	} while (reference_buf.values[0] != buf->values[0]);
-}
-
-static ssize_t
-chp_measurement_read(struct kobject *kobj, char *buf, loff_t off, size_t count)
-{
-	struct channel_path *chp;
-	struct channel_subsystem *css;
-	unsigned int size;
-
-	chp = to_channelpath(container_of(kobj, struct device, kobj));
-	css = to_css(chp->dev.parent);
-
-	size = sizeof(struct cmg_entry);
-
-	/* Only allow single reads. */
-	if (off || count < size)
-		return 0;
-	chp_measurement_copy_block((struct cmg_entry *)buf, css, chp->chpid);
-	count = size;
-	return count;
-}
-
-static struct bin_attribute chp_measurement_attr = {
-	.attr = {
-		.name = "measurement",
-		.mode = S_IRUSR,
-		.owner = THIS_MODULE,
-	},
-	.size = sizeof(struct cmg_entry),
-	.read = chp_measurement_read,
-};
-
-static void
-chsc_remove_chp_cmg_attr(struct channel_path *chp)
-{
-	device_remove_bin_file(&chp->dev, &chp_measurement_chars_attr);
-	device_remove_bin_file(&chp->dev, &chp_measurement_attr);
-}
-
-static int
-chsc_add_chp_cmg_attr(struct channel_path *chp)
-{
-	int ret;
-
-	ret = device_create_bin_file(&chp->dev, &chp_measurement_chars_attr);
-	if (ret)
-		return ret;
-	ret = device_create_bin_file(&chp->dev, &chp_measurement_attr);
-	if (ret)
-		device_remove_bin_file(&chp->dev, &chp_measurement_chars_attr);
-	return ret;
-}
-
 static void
 chsc_remove_cmg_attr(struct channel_subsystem *css)
 {
@@ -1001,7 +797,7 @@ chsc_remove_cmg_attr(struct channel_subsystem *css)
 	for (i = 0; i <= __MAX_CHPID; i++) {
 		if (!css->chps[i])
 			continue;
-		chsc_remove_chp_cmg_attr(css->chps[i]);
+		chp_remove_cmg_attr(css->chps[i]);
 	}
 }
 
@@ -1014,7 +810,7 @@ chsc_add_cmg_attr(struct channel_subsystem *css)
 	for (i = 0; i <= __MAX_CHPID; i++) {
 		if (!css->chps[i])
 			continue;
-		ret = chsc_add_chp_cmg_attr(css->chps[i]);
+		ret = chp_add_cmg_attr(css->chps[i]);
 		if (ret)
 			goto cleanup;
 	}
@@ -1023,12 +819,11 @@ cleanup:
 	for (--i; i >= 0; i--) {
 		if (!css->chps[i])
 			continue;
-		chsc_remove_chp_cmg_attr(css->chps[i]);
+		chp_remove_cmg_attr(css->chps[i]);
 	}
 	return ret;
 }
 
-
 static int
 __chsc_do_secm(struct channel_subsystem *css, int enable, void *page)
 {
@@ -1143,108 +938,8 @@ chsc_secm(struct channel_subsystem *css, int enable)
 	return ret;
 }
 
-/*
- * Files for the channel path entries.
- */
-static ssize_t
-chp_status_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct channel_path *chp = container_of(dev, struct channel_path, dev);
-
-	if (!chp)
-		return 0;
-	return (get_chp_status(chp->chpid) ? sprintf(buf, "online\n") :
-		sprintf(buf, "offline\n"));
-}
-
-static ssize_t
-chp_status_write(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct channel_path *cp = container_of(dev, struct channel_path, dev);
-	char cmd[10];
-	int num_args;
-	int error;
-
-	num_args = sscanf(buf, "%5s", cmd);
-	if (!num_args)
-		return count;
-
-	if (!strnicmp(cmd, "on", 2) || !strcmp(cmd, "1"))
-		error = s390_vary_chpid(cp->chpid, 1);
-	else if (!strnicmp(cmd, "off", 3) || !strcmp(cmd, "0"))
-		error = s390_vary_chpid(cp->chpid, 0);
-	else
-		error = -EINVAL;
-
-	return error < 0 ? error : count;
-
-}
-
-static DEVICE_ATTR(status, 0644, chp_status_show, chp_status_write);
-
-static ssize_t
-chp_type_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct channel_path *chp = container_of(dev, struct channel_path, dev);
-
-	if (!chp)
-		return 0;
-	return sprintf(buf, "%x\n", chp->desc.desc);
-}
-
-static DEVICE_ATTR(type, 0444, chp_type_show, NULL);
-
-static ssize_t
-chp_cmg_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct channel_path *chp = to_channelpath(dev);
-
-	if (!chp)
-		return 0;
-	if (chp->cmg == -1) /* channel measurements not available */
-		return sprintf(buf, "unknown\n");
-	return sprintf(buf, "%x\n", chp->cmg);
-}
-
-static DEVICE_ATTR(cmg, 0444, chp_cmg_show, NULL);
-
-static ssize_t
-chp_shared_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct channel_path *chp = to_channelpath(dev);
-
-	if (!chp)
-		return 0;
-	if (chp->shared == -1) /* channel measurements not available */
-		return sprintf(buf, "unknown\n");
-	return sprintf(buf, "%x\n", chp->shared);
-}
-
-static DEVICE_ATTR(shared, 0444, chp_shared_show, NULL);
-
-static struct attribute * chp_attrs[] = {
-	&dev_attr_status.attr,
-	&dev_attr_type.attr,
-	&dev_attr_cmg.attr,
-	&dev_attr_shared.attr,
-	NULL,
-};
-
-static struct attribute_group chp_attr_group = {
-	.attrs = chp_attrs,
-};
-
-static void
-chp_release(struct device *dev)
-{
-	struct channel_path *cp;
-	
-	cp = container_of(dev, struct channel_path, dev);
-	kfree(cp);
-}
-
-static int chsc_determine_channel_path_description(struct chp_id chpid,
-						struct channel_path_desc *desc)
+int chsc_determine_channel_path_description(struct chp_id chpid,
+					    struct channel_path_desc *desc)
 {
 	int ccode, ret;
 
@@ -1331,8 +1026,7 @@ chsc_initialize_cmg_chars(struct channel_path *chp, u8 cmcv,
 	}
 }
 
-static int
-chsc_get_channel_measurement_chars(struct channel_path *chp)
+int chsc_get_channel_measurement_chars(struct channel_path *chp)
 {
 	int ccode, ret;
 
@@ -1407,97 +1101,6 @@ out:
 	return ret;
 }
 
-/*
- * Entries for chpids on the system bus.
- * This replaces /proc/chpids.
- */
-static int new_channel_path(struct chp_id chpid)
-{
-	struct channel_path *chp;
-	int ret;
-
-	chp = kzalloc(sizeof(struct channel_path), GFP_KERNEL);
-	if (!chp)
-		return -ENOMEM;
-
-	/* fill in status, etc. */
-	chp->chpid = chpid;
-	chp->state = 1;
-	chp->dev.parent = &css[chpid.cssid]->device;
-	chp->dev.release = chp_release;
-	snprintf(chp->dev.bus_id, BUS_ID_SIZE, "chp%x.%02x", chpid.cssid,
-		 chpid.id);
-
-	/* Obtain channel path description and fill it in. */
-	ret = chsc_determine_channel_path_description(chpid, &chp->desc);
-	if (ret)
-		goto out_free;
-	/* Get channel-measurement characteristics. */
-	if (css_characteristics_avail && css_chsc_characteristics.scmc
-	    && css_chsc_characteristics.secm) {
-		ret = chsc_get_channel_measurement_chars(chp);
-		if (ret)
-			goto out_free;
-	} else {
-		static int msg_done;
-
-		if (!msg_done) {
-			printk(KERN_WARNING "cio: Channel measurements not "
-			       "available, continuing.\n");
-			msg_done = 1;
-		}
-		chp->cmg = -1;
-	}
-
-	/* make it known to the system */
-	ret = device_register(&chp->dev);
-	if (ret) {
-		printk(KERN_WARNING "%s: could not register %x.%02x\n",
-		       __func__, chpid.cssid, chpid.id);
-		goto out_free;
-	}
-	ret = sysfs_create_group(&chp->dev.kobj, &chp_attr_group);
-	if (ret) {
-		device_unregister(&chp->dev);
-		goto out_free;
-	}
-	mutex_lock(&css[chpid.cssid]->mutex);
-	if (css[chpid.cssid]->cm_enabled) {
-		ret = chsc_add_chp_cmg_attr(chp);
-		if (ret) {
-			sysfs_remove_group(&chp->dev.kobj, &chp_attr_group);
-			device_unregister(&chp->dev);
-			mutex_unlock(&css[chpid.cssid]->mutex);
-			goto out_free;
-		}
-	}
-	css[chpid.cssid]->chps[chpid.id] = chp;
-	mutex_unlock(&css[chpid.cssid]->mutex);
-	return ret;
-out_free:
-	kfree(chp);
-	return ret;
-}
-
-void *
-chsc_get_chp_desc(struct subchannel *sch, int chp_no)
-{
-	struct channel_path *chp;
-	struct channel_path_desc *desc;
-	struct chp_id chpid;
-
-	chp_id_init(&chpid);
-	chpid.id = sch->schib.pmcw.chpid[chp_no];
-	chp = chpid_to_chp(chpid);
-	if (!chp)
-		return NULL;
-	desc = kmalloc(sizeof(struct channel_path_desc), GFP_KERNEL);
-	if (!desc)
-		return NULL;
-	memcpy(desc, &chp->desc, sizeof(struct channel_path_desc));
-	return desc;
-}
-
 static int __init
 chsc_alloc_sei_area(void)
 {
diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h
index 2949c85b6d9..0e40defc608 100644
--- a/drivers/s390/cio/chsc.h
+++ b/drivers/s390/cio/chsc.h
@@ -5,10 +5,6 @@
 #include <linux/device.h>
 #include "chpid.h"
 
-#define CHSC_SEI_ACC_CHPID        1
-#define CHSC_SEI_ACC_LINKADDR     2
-#define CHSC_SEI_ACC_FULLLINKADDR 3
-
 #define CHSC_SDA_OC_MSS   0x2
 
 struct chsc_header {
@@ -37,23 +33,10 @@ struct channel_path_desc {
 	u8 chpp;
 } __attribute__ ((packed));
 
-struct channel_path {
-	struct chp_id chpid;
-	int state;
-	struct channel_path_desc desc;
-	/* Channel-measurement related stuff: */
-	int cmg;
-	int shared;
-	void *cmg_chars;
-	struct device dev;
-};
+struct channel_path;
 
-extern void s390_process_css( void );
-extern void chsc_validate_chpids(struct subchannel *);
-extern void chpid_is_actually_online(struct chp_id);
 extern int css_get_ssd_info(struct subchannel *);
 extern int chsc_process_crw(void);
-extern int chp_process_crw(int, int);
 
 struct css_general_char {
 	u64 : 41;
@@ -89,12 +72,15 @@ extern struct css_chsc_char css_chsc_characteristics;
 extern int chsc_determine_css_characteristics(void);
 extern int css_characteristics_avail;
 
-extern void *chsc_get_chp_desc(struct subchannel*, int);
-
 extern int chsc_enable_facility(int);
 struct channel_subsystem;
 extern int chsc_secm(struct channel_subsystem *, int);
 
-#define to_channelpath(device) container_of(device, struct channel_path, dev)
+int chsc_chp_vary(struct chp_id chpid, int on);
+int chsc_determine_channel_path_description(struct chp_id chpid,
+					    struct channel_path_desc *desc);
+int chsc_chp_online(struct chp_id chpid);
+void chsc_chp_offline(struct chp_id chpid);
+int chsc_get_channel_measurement_chars(struct channel_path *chp);
 
 #endif
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index bab729202f4..7dd0649c95d 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -29,6 +29,7 @@
 #include "ioasm.h"
 #include "blacklist.h"
 #include "cio_debug.h"
+#include "chp.h"
 #include "../s390mach.h"
 
 debug_info_t *cio_debug_msg_id;
@@ -592,9 +593,10 @@ cio_validate_subchannel (struct subchannel *sch, struct subchannel_id schid)
 		err = -ENODEV;
 		goto out;
 	}
-	sch->opm = 0xff;
-	if (!cio_is_console(sch->schid))
-		chsc_validate_chpids(sch);
+	if (cio_is_console(sch->schid))
+		sch->opm = 0xff;
+	else
+		sch->opm = chp_get_sch_opm(sch);
 	sch->lpm = sch->schib.pmcw.pam & sch->opm;
 
 	CIO_DEBUG(KERN_INFO, 0,
diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h
index ca2bab932a8..4319e1ced79 100644
--- a/drivers/s390/cio/css.h
+++ b/drivers/s390/cio/css.h
@@ -4,6 +4,7 @@
 #include <linux/mutex.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
+#include <linux/device.h>
 
 #include <asm/cio.h>
 
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 5f5ee1eef07..db3d1b990f5 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -23,6 +23,7 @@
 #include "chsc.h"
 #include "ioasm.h"
 #include "chpid.h"
+#include "chp.h"
 
 int
 device_is_online(struct subchannel *sch)
@@ -221,7 +222,10 @@ __recover_lost_chpids(struct subchannel *sch, int old_lpm)
 		if (old_lpm & mask)
 			continue;
 		chpid.id = sch->schib.pmcw.chpid[i];
-		chpid_is_actually_online(chpid);
+		if (!chp_is_registered(chpid)) {
+			need_rescan = 1;
+			queue_work(slow_path_wq, &slow_path_work);
+		}
 	}
 }
 
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index 7c7775aae38..d819ae2ee9a 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -22,6 +22,8 @@
 #include "css.h"
 #include "chsc.h"
 #include "device.h"
+#include "chpid.h"
+#include "chp.h"
 
 int ccw_device_set_options_mask(struct ccw_device *cdev, unsigned long flags)
 {
@@ -606,9 +608,12 @@ void *
 ccw_device_get_chp_desc(struct ccw_device *cdev, int chp_no)
 {
 	struct subchannel *sch;
+	struct chp_id chpid;
 
 	sch = to_subchannel(cdev->dev.parent);
-	return chsc_get_chp_desc(sch, chp_no);
+	chp_id_init(&chpid);
+	chpid.id = sch->schib.pmcw.chpid[chp_no];
+	return chp_get_chp_desc(chpid);
 }
 
 // FIXME: these have to go:
diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index 806bb1a921e..afd8a3c0f8d 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -21,6 +21,7 @@
 #include "cio/cio.h"
 #include "cio/chsc.h"
 #include "cio/css.h"
+#include "cio/chp.h"
 #include "s390mach.h"
 
 static struct semaphore m_sem;
-- 
cgit v1.2.3


From c9182e0f42c5646e670c2166b6d6638052d574af Mon Sep 17 00:00:00 2001
From: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:29 +0200
Subject: [S390] cio: observe chpid valid flag

Check validity flag of CHPID description data before continuing with
channel-path initialization.

Signed-off-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/chp.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'drivers')

diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index 6e04521accd..8a5839147f8 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -339,6 +339,10 @@ int chp_new(struct chp_id chpid)
 	ret = chsc_determine_channel_path_description(chpid, &chp->desc);
 	if (ret)
 		goto out_free;
+	if ((chp->desc.flags & 0x80) == 0) {
+		ret = -ENODEV;
+		goto out_free;
+	}
 	/* Get channel-measurement characteristics. */
 	if (css_characteristics_avail && css_chsc_characteristics.scmc
 	    && css_chsc_characteristics.secm) {
-- 
cgit v1.2.3


From f5ba6c863617c15d22cce5f8666ff4c832773025 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:30 +0200
Subject: [S390] cio: Clean up online_store.

Detangle the online_store code and make it more readable.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/device.c | 109 ++++++++++++++++++++++++++--------------------
 1 file changed, 62 insertions(+), 47 deletions(-)

(limited to 'drivers')

diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index e322111fb36..c3fc205e3bc 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -413,11 +413,60 @@ ccw_device_set_online(struct ccw_device *cdev)
 	return (ret == 0) ? -ENODEV : ret;
 }
 
-static ssize_t
-online_store (struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+static void online_store_handle_offline(struct ccw_device *cdev)
+{
+	if (cdev->private->state == DEV_STATE_DISCONNECTED)
+		ccw_device_remove_disconnected(cdev);
+	else if (cdev->drv && cdev->drv->set_offline)
+		ccw_device_set_offline(cdev);
+}
+
+static int online_store_recog_and_online(struct ccw_device *cdev)
+{
+	int ret;
+
+	/* Do device recognition, if needed. */
+	if (cdev->id.cu_type == 0) {
+		ret = ccw_device_recognition(cdev);
+		if (ret) {
+			printk(KERN_WARNING"Couldn't start recognition "
+			       "for device %s (ret=%d)\n",
+			       cdev->dev.bus_id, ret);
+			return ret;
+		}
+		wait_event(cdev->private->wait_q,
+			   cdev->private->flags.recog_done);
+	}
+	if (cdev->drv && cdev->drv->set_online)
+		ccw_device_set_online(cdev);
+	return 0;
+}
+static void online_store_handle_online(struct ccw_device *cdev, int force)
+{
+	int ret;
+
+	ret = online_store_recog_and_online(cdev);
+	if (ret)
+		return;
+	if (force && cdev->private->state == DEV_STATE_BOXED) {
+		ret = ccw_device_stlck(cdev);
+		if (ret) {
+			printk(KERN_WARNING"ccw_device_stlck for device %s "
+			       "returned %d!\n", cdev->dev.bus_id, ret);
+			return;
+		}
+		if (cdev->id.cu_type == 0)
+			cdev->private->state = DEV_STATE_NOT_OPER;
+		online_store_recog_and_online(cdev);
+	}
+
+}
+
+static ssize_t online_store (struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
 {
 	struct ccw_device *cdev = to_ccwdev(dev);
-	int i, force, ret;
+	int i, force;
 	char *tmp;
 
 	if (atomic_cmpxchg(&cdev->private->onoff, 0, 1) != 0)
@@ -434,51 +483,17 @@ online_store (struct device *dev, struct device_attribute *attr, const char *buf
 		force = 0;
 		i = simple_strtoul(buf, &tmp, 16);
 	}
-	if (i == 1) {
-		/* Do device recognition, if needed. */
-		if (cdev->id.cu_type == 0) {
-			ret = ccw_device_recognition(cdev);
-			if (ret) {
-				printk(KERN_WARNING"Couldn't start recognition "
-				       "for device %s (ret=%d)\n",
-				       cdev->dev.bus_id, ret);
-				goto out;
-			}
-			wait_event(cdev->private->wait_q,
-				   cdev->private->flags.recog_done);
-		}
-		if (cdev->drv && cdev->drv->set_online)
-			ccw_device_set_online(cdev);
-	} else if (i == 0) {
-		if (cdev->private->state == DEV_STATE_DISCONNECTED)
-			ccw_device_remove_disconnected(cdev);
-		else if (cdev->drv && cdev->drv->set_offline)
-			ccw_device_set_offline(cdev);
-	}
-	if (force && cdev->private->state == DEV_STATE_BOXED) {
-		ret = ccw_device_stlck(cdev);
-		if (ret) {
-			printk(KERN_WARNING"ccw_device_stlck for device %s "
-			       "returned %d!\n", cdev->dev.bus_id, ret);
-			goto out;
-		}
-		/* Do device recognition, if needed. */
-		if (cdev->id.cu_type == 0) {
-			cdev->private->state = DEV_STATE_NOT_OPER;
-			ret = ccw_device_recognition(cdev);
-			if (ret) {
-				printk(KERN_WARNING"Couldn't start recognition "
-				       "for device %s (ret=%d)\n",
-				       cdev->dev.bus_id, ret);
-				goto out;
-			}
-			wait_event(cdev->private->wait_q,
-				   cdev->private->flags.recog_done);
-		}
-		if (cdev->drv && cdev->drv->set_online)
-			ccw_device_set_online(cdev);
+
+	switch (i) {
+	case 0:
+		online_store_handle_offline(cdev);
+		break;
+	case 1:
+		online_store_handle_online(cdev, force);
+		break;
+	default:
+		count = -EINVAL;
 	}
-	out:
 	if (cdev->drv)
 		module_put(cdev->drv->owner);
 	atomic_set(&cdev->private->onoff, 0);
-- 
cgit v1.2.3


From e5854a5839fa426a7873f038080f63587de5f1f1 Mon Sep 17 00:00:00 2001
From: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:31 +0200
Subject: [S390] cio: Channel-path configure function.

Add a new attribute to the channel-path sysfs directory through which
channel-path configure operations can be triggered. Also listen for
hardware events requesting channel-path configure operations and
process them accordingly.

Signed-off-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 drivers/s390/char/Makefile    |   2 +-
 drivers/s390/char/sclp_chp.c  | 196 +++++++++++++++++++++++++++++++++
 drivers/s390/cio/chp.c        | 250 +++++++++++++++++++++++++++++++++++++++++-
 drivers/s390/cio/chp.h        |  20 +++-
 drivers/s390/cio/chpid.h      |  51 ---------
 drivers/s390/cio/chsc.c       |  44 +++++++-
 drivers/s390/cio/chsc.h       |   2 +-
 drivers/s390/cio/cio.c        |   1 +
 drivers/s390/cio/cio.h        |   1 +
 drivers/s390/cio/css.h        |   4 +-
 drivers/s390/cio/device_fsm.c |   2 +-
 drivers/s390/cio/device_ops.c |   2 +-
 drivers/s390/cio/ioasm.h      |   2 +-
 13 files changed, 515 insertions(+), 62 deletions(-)
 create mode 100644 drivers/s390/char/sclp_chp.c
 delete mode 100644 drivers/s390/cio/chpid.h

(limited to 'drivers')

diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile
index 293e667b50f..5fd581c22db 100644
--- a/drivers/s390/char/Makefile
+++ b/drivers/s390/char/Makefile
@@ -3,7 +3,7 @@
 #
 
 obj-y += ctrlchar.o keyboard.o defkeymap.o sclp.o sclp_rw.o sclp_quiesce.o \
-	 sclp_info.o
+	 sclp_info.o sclp_chp.o
 
 obj-$(CONFIG_TN3270) += raw3270.o
 obj-$(CONFIG_TN3270_CONSOLE) += con3270.o
diff --git a/drivers/s390/char/sclp_chp.c b/drivers/s390/char/sclp_chp.c
new file mode 100644
index 00000000000..a66b914519b
--- /dev/null
+++ b/drivers/s390/char/sclp_chp.c
@@ -0,0 +1,196 @@
+/*
+ *  drivers/s390/char/sclp_chp.c
+ *
+ *    Copyright IBM Corp. 2007
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/gfp.h>
+#include <linux/errno.h>
+#include <linux/completion.h>
+#include <asm/sclp.h>
+#include <asm/chpid.h>
+
+#include "sclp.h"
+
+#define TAG	"sclp_chp: "
+
+#define SCLP_CMDW_CONFIGURE_CHANNEL_PATH	0x000f0001
+#define SCLP_CMDW_DECONFIGURE_CHANNEL_PATH	0x000e0001
+#define SCLP_CMDW_READ_CHANNEL_PATH_INFORMATION	0x00030001
+
+static inline sclp_cmdw_t get_configure_cmdw(struct chp_id chpid)
+{
+	return SCLP_CMDW_CONFIGURE_CHANNEL_PATH | chpid.id << 8;
+}
+
+static inline sclp_cmdw_t get_deconfigure_cmdw(struct chp_id chpid)
+{
+	return SCLP_CMDW_DECONFIGURE_CHANNEL_PATH | chpid.id << 8;
+}
+
+static void chp_callback(struct sclp_req *req, void *data)
+{
+	struct completion *completion = data;
+
+	complete(completion);
+}
+
+struct chp_cfg_sccb {
+	struct sccb_header header;
+	u8 ccm;
+	u8 reserved[6];
+	u8 cssid;
+} __attribute__((packed));
+
+struct chp_cfg_data {
+	struct chp_cfg_sccb sccb;
+	struct sclp_req req;
+	struct completion completion;
+} __attribute__((packed));
+
+static int do_configure(sclp_cmdw_t cmd)
+{
+	struct chp_cfg_data *data;
+	int rc;
+
+	/* Prepare sccb. */
+	data = (struct chp_cfg_data *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+	if (!data)
+		return -ENOMEM;
+	data->sccb.header.length = sizeof(struct chp_cfg_sccb);
+	data->req.command = cmd;
+	data->req.sccb = &(data->sccb);
+	data->req.status = SCLP_REQ_FILLED;
+	data->req.callback = chp_callback;
+	data->req.callback_data = &(data->completion);
+	init_completion(&data->completion);
+
+	/* Perform sclp request. */
+	rc = sclp_add_request(&(data->req));
+	if (rc)
+		goto out;
+	wait_for_completion(&data->completion);
+
+	/* Check response .*/
+	if (data->req.status != SCLP_REQ_DONE) {
+		printk(KERN_WARNING TAG "configure channel-path request failed "
+		       "(status=0x%02x)\n", data->req.status);
+		rc = -EIO;
+		goto out;
+	}
+	switch (data->sccb.header.response_code) {
+	case 0x0020:
+	case 0x0120:
+	case 0x0440:
+	case 0x0450:
+		break;
+	default:
+		printk(KERN_WARNING TAG "configure channel-path failed "
+		       "(cmd=0x%08x, response=0x%04x)\n", cmd,
+		       data->sccb.header.response_code);
+		rc = -EIO;
+		break;
+	}
+out:
+	free_page((unsigned long) data);
+
+	return rc;
+}
+
+/**
+ * sclp_chp_configure - perform configure channel-path sclp command
+ * @chpid: channel-path ID
+ *
+ * Perform configure channel-path command sclp command for specified chpid.
+ * Return 0 after command successfully finished, non-zero otherwise.
+ */
+int sclp_chp_configure(struct chp_id chpid)
+{
+	return do_configure(get_configure_cmdw(chpid));
+}
+
+/**
+ * sclp_chp_deconfigure - perform deconfigure channel-path sclp command
+ * @chpid: channel-path ID
+ *
+ * Perform deconfigure channel-path command sclp command for specified chpid
+ * and wait for completion. On success return 0. Return non-zero otherwise.
+ */
+int sclp_chp_deconfigure(struct chp_id chpid)
+{
+	return do_configure(get_deconfigure_cmdw(chpid));
+}
+
+struct chp_info_sccb {
+	struct sccb_header header;
+	u8 recognized[SCLP_CHP_INFO_MASK_SIZE];
+	u8 standby[SCLP_CHP_INFO_MASK_SIZE];
+	u8 configured[SCLP_CHP_INFO_MASK_SIZE];
+	u8 ccm;
+	u8 reserved[6];
+	u8 cssid;
+} __attribute__((packed));
+
+struct chp_info_data {
+	struct chp_info_sccb sccb;
+	struct sclp_req req;
+	struct completion completion;
+} __attribute__((packed));
+
+/**
+ * sclp_chp_read_info - perform read channel-path information sclp command
+ * @info: resulting channel-path information data
+ *
+ * Perform read channel-path information sclp command and wait for completion.
+ * On success, store channel-path information in @info and return 0. Return
+ * non-zero otherwise.
+ */
+int sclp_chp_read_info(struct sclp_chp_info *info)
+{
+	struct chp_info_data *data;
+	int rc;
+
+	/* Prepare sccb. */
+	data = (struct chp_info_data *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+	if (!data)
+		return -ENOMEM;
+	data->sccb.header.length = sizeof(struct chp_info_sccb);
+	data->req.command = SCLP_CMDW_READ_CHANNEL_PATH_INFORMATION;
+	data->req.sccb = &(data->sccb);
+	data->req.status = SCLP_REQ_FILLED;
+	data->req.callback = chp_callback;
+	data->req.callback_data = &(data->completion);
+	init_completion(&data->completion);
+
+	/* Perform sclp request. */
+	rc = sclp_add_request(&(data->req));
+	if (rc)
+		goto out;
+	wait_for_completion(&data->completion);
+
+	/* Check response .*/
+	if (data->req.status != SCLP_REQ_DONE) {
+		printk(KERN_WARNING TAG "read channel-path info request failed "
+		       "(status=0x%02x)\n", data->req.status);
+		rc = -EIO;
+		goto out;
+	}
+	if (data->sccb.header.response_code != 0x0010) {
+		printk(KERN_WARNING TAG "read channel-path info failed "
+		       "(response=0x%04x)\n", data->sccb.header.response_code);
+		rc = -EIO;
+		goto out;
+	}
+	memcpy(info->recognized, data->sccb.recognized,
+	       SCLP_CHP_INFO_MASK_SIZE);
+	memcpy(info->standby, data->sccb.standby,
+	       SCLP_CHP_INFO_MASK_SIZE);
+	memcpy(info->configured, data->sccb.configured,
+	       SCLP_CHP_INFO_MASK_SIZE);
+out:
+	free_page((unsigned long) data);
+
+	return rc;
+}
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index 8a5839147f8..0e92c8c8986 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -10,9 +10,14 @@
 #include <linux/bug.h>
 #include <linux/workqueue.h>
 #include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/wait.h>
+#include <linux/mutex.h>
 #include <asm/errno.h>
+#include <asm/chpid.h>
+#include <asm/sclp.h>
 
-#include "chpid.h"
 #include "cio.h"
 #include "css.h"
 #include "ioasm.h"
@@ -20,6 +25,32 @@
 #include "chp.h"
 
 #define to_channelpath(device) container_of(device, struct channel_path, dev)
+#define CHP_INFO_UPDATE_INTERVAL	1*HZ
+
+enum cfg_task_t {
+	cfg_none,
+	cfg_configure,
+	cfg_deconfigure
+};
+
+/* Map for pending configure tasks. */
+static enum cfg_task_t chp_cfg_task[__MAX_CSSID + 1][__MAX_CHPID + 1];
+static DEFINE_MUTEX(cfg_lock);
+static int cfg_busy;
+
+/* Map for channel-path status. */
+static struct sclp_chp_info chp_info;
+static DEFINE_MUTEX(info_lock);
+
+/* Time after which channel-path status may be outdated. */
+static unsigned long chp_info_expires;
+
+/* Workqueue to perform pending configure tasks. */
+static struct workqueue_struct *chp_wq;
+static struct work_struct cfg_work;
+
+/* Wait queue for configure completion events. */
+static wait_queue_head_t cfg_wait_queue;
 
 /* Return channel_path struct for given chpid. */
 static inline struct channel_path *chpid_to_chp(struct chp_id chpid)
@@ -251,6 +282,43 @@ static ssize_t chp_status_write(struct device *dev,
 
 static DEVICE_ATTR(status, 0644, chp_status_show, chp_status_write);
 
+static ssize_t chp_configure_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct channel_path *cp;
+	int status;
+
+	cp = container_of(dev, struct channel_path, dev);
+	status = chp_info_get_status(cp->chpid);
+	if (status < 0)
+		return status;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", status);
+}
+
+static int cfg_wait_idle(void);
+
+static ssize_t chp_configure_write(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct channel_path *cp;
+	int val;
+	char delim;
+
+	if (sscanf(buf, "%d %c", &val, &delim) != 1)
+		return -EINVAL;
+	if (val != 0 && val != 1)
+		return -EINVAL;
+	cp = container_of(dev, struct channel_path, dev);
+	chp_cfg_schedule(cp->chpid, val);
+	cfg_wait_idle();
+
+	return count;
+}
+
+static DEVICE_ATTR(configure, 0644, chp_configure_show, chp_configure_write);
+
 static ssize_t chp_type_show(struct device *dev, struct device_attribute *attr,
 			     char *buf)
 {
@@ -293,6 +361,7 @@ static DEVICE_ATTR(shared, 0444, chp_shared_show, NULL);
 
 static struct attribute * chp_attrs[] = {
 	&dev_attr_status.attr,
+	&dev_attr_configure.attr,
 	&dev_attr_type.attr,
 	&dev_attr_cmg.attr,
 	&dev_attr_shared.attr,
@@ -323,6 +392,8 @@ int chp_new(struct chp_id chpid)
 	struct channel_path *chp;
 	int ret;
 
+	if (chp_is_registered(chpid))
+		return 0;
 	chp = kzalloc(sizeof(struct channel_path), GFP_KERNEL);
 	if (!chp)
 		return -ENOMEM;
@@ -435,3 +506,180 @@ int chp_process_crw(int id, int status)
 		return 0;
 	}
 }
+
+static inline int info_bit_num(struct chp_id id)
+{
+	return id.id + id.cssid * (__MAX_CHPID + 1);
+}
+
+/* Force chp_info refresh on next call to info_validate(). */
+static void info_expire(void)
+{
+	mutex_lock(&info_lock);
+	chp_info_expires = jiffies - 1;
+	mutex_unlock(&info_lock);
+}
+
+/* Ensure that chp_info is up-to-date. */
+static int info_update(void)
+{
+	int rc;
+
+	mutex_lock(&info_lock);
+	rc = 0;
+	if (time_after(jiffies, chp_info_expires)) {
+		/* Data is too old, update. */
+		rc = sclp_chp_read_info(&chp_info);
+		chp_info_expires = jiffies + CHP_INFO_UPDATE_INTERVAL ;
+	}
+	mutex_unlock(&info_lock);
+
+	return rc;
+}
+
+/**
+ * chp_info_get_status - retrieve configure status of a channel-path
+ * @chpid: channel-path ID
+ *
+ * On success, return 0 for standby, 1 for configured, 2 for reserved,
+ * 3 for not recognized. Return negative error code on error.
+ */
+int chp_info_get_status(struct chp_id chpid)
+{
+	int rc;
+	int bit;
+
+	rc = info_update();
+	if (rc)
+		return rc;
+
+	bit = info_bit_num(chpid);
+	mutex_lock(&info_lock);
+	if (!chp_test_bit(chp_info.recognized, bit))
+		rc = CHP_STATUS_NOT_RECOGNIZED;
+	else if (chp_test_bit(chp_info.configured, bit))
+		rc = CHP_STATUS_CONFIGURED;
+	else if (chp_test_bit(chp_info.standby, bit))
+		rc = CHP_STATUS_STANDBY;
+	else
+		rc = CHP_STATUS_RESERVED;
+	mutex_unlock(&info_lock);
+
+	return rc;
+}
+
+/* Return configure task for chpid. */
+static enum cfg_task_t cfg_get_task(struct chp_id chpid)
+{
+	return chp_cfg_task[chpid.cssid][chpid.id];
+}
+
+/* Set configure task for chpid. */
+static void cfg_set_task(struct chp_id chpid, enum cfg_task_t cfg)
+{
+	chp_cfg_task[chpid.cssid][chpid.id] = cfg;
+}
+
+/* Perform one configure/deconfigure request. Reschedule work function until
+ * last request. */
+static void cfg_func(struct work_struct *work)
+{
+	struct chp_id chpid;
+	enum cfg_task_t t;
+
+	mutex_lock(&cfg_lock);
+	t = cfg_none;
+	chp_id_for_each(&chpid) {
+		t = cfg_get_task(chpid);
+		if (t != cfg_none) {
+			cfg_set_task(chpid, cfg_none);
+			break;
+		}
+	}
+	mutex_unlock(&cfg_lock);
+
+	switch (t) {
+	case cfg_configure:
+		sclp_chp_configure(chpid);
+		info_expire();
+		chsc_chp_online(chpid);
+		break;
+	case cfg_deconfigure:
+		sclp_chp_deconfigure(chpid);
+		info_expire();
+		chsc_chp_offline(chpid);
+		break;
+	case cfg_none:
+		/* Get updated information after last change. */
+		info_update();
+		mutex_lock(&cfg_lock);
+		cfg_busy = 0;
+		mutex_unlock(&cfg_lock);
+		wake_up_interruptible(&cfg_wait_queue);
+		return;
+	}
+	queue_work(chp_wq, &cfg_work);
+}
+
+/**
+ * chp_cfg_schedule - schedule chpid configuration request
+ * @chpid - channel-path ID
+ * @configure - Non-zero for configure, zero for deconfigure
+ *
+ * Schedule a channel-path configuration/deconfiguration request.
+ */
+void chp_cfg_schedule(struct chp_id chpid, int configure)
+{
+	CIO_MSG_EVENT(2, "chp_cfg_sched%x.%02x=%d\n", chpid.cssid, chpid.id,
+		      configure);
+	mutex_lock(&cfg_lock);
+	cfg_set_task(chpid, configure ? cfg_configure : cfg_deconfigure);
+	cfg_busy = 1;
+	mutex_unlock(&cfg_lock);
+	queue_work(chp_wq, &cfg_work);
+}
+
+/**
+ * chp_cfg_cancel_deconfigure - cancel chpid deconfiguration request
+ * @chpid - channel-path ID
+ *
+ * Cancel an active channel-path deconfiguration request if it has not yet
+ * been performed.
+ */
+void chp_cfg_cancel_deconfigure(struct chp_id chpid)
+{
+	CIO_MSG_EVENT(2, "chp_cfg_cancel:%x.%02x\n", chpid.cssid, chpid.id);
+	mutex_lock(&cfg_lock);
+	if (cfg_get_task(chpid) == cfg_deconfigure)
+		cfg_set_task(chpid, cfg_none);
+	mutex_unlock(&cfg_lock);
+}
+
+static int cfg_wait_idle(void)
+{
+	if (wait_event_interruptible(cfg_wait_queue, !cfg_busy))
+		return -ERESTARTSYS;
+	return 0;
+}
+
+static int __init chp_init(void)
+{
+	struct chp_id chpid;
+
+	chp_wq = create_singlethread_workqueue("cio_chp");
+	if (!chp_wq)
+		return -ENOMEM;
+	INIT_WORK(&cfg_work, cfg_func);
+	init_waitqueue_head(&cfg_wait_queue);
+	if (info_update())
+		return 0;
+	/* Register available channel-paths. */
+	chp_id_for_each(&chpid) {
+		if (chp_info_get_status(chpid) != CHP_STATUS_NOT_RECOGNIZED)
+			chp_new(chpid);
+	}
+
+	return 0;
+}
+
+subsys_initcall(chp_init);
diff --git a/drivers/s390/cio/chp.h b/drivers/s390/cio/chp.h
index ac2b1a9c3bc..862af69d970 100644
--- a/drivers/s390/cio/chp.h
+++ b/drivers/s390/cio/chp.h
@@ -10,10 +10,23 @@
 
 #include <linux/types.h>
 #include <linux/device.h>
-
-#include "chpid.h"
+#include <asm/chpid.h>
 #include "chsc.h"
 
+#define CHP_STATUS_STANDBY		0
+#define CHP_STATUS_CONFIGURED		1
+#define CHP_STATUS_RESERVED		2
+#define CHP_STATUS_NOT_RECOGNIZED	3
+
+static inline int chp_test_bit(u8 *bitmap, int num)
+{
+	int byte = num >> 3;
+	int mask = 128 >> (num & 7);
+
+	return (bitmap[byte] & mask) ? 1 : 0;
+}
+
+
 struct channel_path {
 	struct chp_id chpid;
 	int state;
@@ -33,5 +46,8 @@ int chp_process_crw(int id, int available);
 void chp_remove_cmg_attr(struct channel_path *chp);
 int chp_add_cmg_attr(struct channel_path *chp);
 int chp_new(struct chp_id chpid);
+void chp_cfg_schedule(struct chp_id chpid, int configure);
+void chp_cfg_cancel_deconfigure(struct chp_id chpid);
+int chp_info_get_status(struct chp_id chpid);
 
 #endif /* S390_CHP_H */
diff --git a/drivers/s390/cio/chpid.h b/drivers/s390/cio/chpid.h
deleted file mode 100644
index 44cc00ed9b5..00000000000
--- a/drivers/s390/cio/chpid.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- *  drivers/s390/cio/chpid.h
- *
- *    Copyright IBM Corp. 2007
- *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
- */
-
-#ifndef S390_CHP_ID_H
-#define S390_CHP_ID_H S390_CHP_ID_H
-
-#include <linux/string.h>
-#include <asm/types.h>
-#include "css.h"
-
-struct chp_id {
-	u8 reserved1;
-	u8 cssid;
-	u8 reserved2;
-	u8 id;
-} __attribute__((packed));
-
-static inline void chp_id_init(struct chp_id *chpid)
-{
-	memset(chpid, 0, sizeof(struct chp_id));
-}
-
-static inline int chp_id_is_equal(struct chp_id *a, struct chp_id *b)
-{
-	return (a->id == b->id) && (a->cssid == b->cssid);
-}
-
-static inline void chp_id_next(struct chp_id *chpid)
-{
-	if (chpid->id < __MAX_CHPID)
-		chpid->id++;
-	else {
-		chpid->id = 0;
-		chpid->cssid++;
-	}
-}
-
-static inline int chp_id_is_valid(struct chp_id *chpid)
-{
-	return (chpid->cssid <= __MAX_CSSID);
-}
-
-
-#define chp_id_for_each(c) \
-	for (chp_id_init(c); chp_id_is_valid(c); chp_id_next(c))
-
-#endif /* S390_CHP_ID_H */
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index d99f525eac0..3dec460bba2 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -15,12 +15,12 @@
 #include <linux/device.h>
 
 #include <asm/cio.h>
+#include <asm/chpid.h>
 
 #include "css.h"
 #include "cio.h"
 #include "cio_debug.h"
 #include "ioasm.h"
-#include "chpid.h"
 #include "chp.h"
 #include "chsc.h"
 
@@ -498,6 +498,45 @@ static int chsc_process_sei_res_acc(struct chsc_sei_area *sei_area)
 	return rc;
 }
 
+struct chp_config_data {
+	u8 map[32];
+	u8 op;
+	u8 pc;
+};
+
+static int chsc_process_sei_chp_config(struct chsc_sei_area *sei_area)
+{
+	struct chp_config_data *data;
+	struct chp_id chpid;
+	int num;
+
+	CIO_CRW_EVENT(4, "chsc: channel-path-configuration notification\n");
+	if (sei_area->rs != 0)
+		return 0;
+	data = (struct chp_config_data *) &(sei_area->ccdf);
+	chp_id_init(&chpid);
+	for (num = 0; num <= __MAX_CHPID; num++) {
+		if (!chp_test_bit(data->map, num))
+			continue;
+		chpid.id = num;
+		printk(KERN_WARNING "cio: processing configure event %d for "
+		       "chpid %x.%02x\n", data->op, chpid.cssid, chpid.id);
+		switch (data->op) {
+		case 0:
+			chp_cfg_schedule(chpid, 1);
+			break;
+		case 1:
+			chp_cfg_schedule(chpid, 0);
+			break;
+		case 2:
+			chp_cfg_cancel_deconfigure(chpid);
+			break;
+		}
+	}
+
+	return 0;
+}
+
 static int chsc_process_sei(struct chsc_sei_area *sei_area)
 {
 	int rc;
@@ -514,6 +553,9 @@ static int chsc_process_sei(struct chsc_sei_area *sei_area)
 	case 2: /* i/o resource accessibiliy */
 		rc = chsc_process_sei_res_acc(sei_area);
 		break;
+	case 8: /* channel-path-configuration notification */
+		rc = chsc_process_sei_chp_config(sei_area);
+		break;
 	default: /* other stuff */
 		CIO_CRW_EVENT(4, "chsc: unhandled sei content code %d\n",
 			      sei_area->cc);
diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h
index 0e40defc608..322586f27cc 100644
--- a/drivers/s390/cio/chsc.h
+++ b/drivers/s390/cio/chsc.h
@@ -3,7 +3,7 @@
 
 #include <linux/types.h>
 #include <linux/device.h>
-#include "chpid.h"
+#include <asm/chpid.h>
 
 #define CHSC_SDA_OC_MSS   0x2
 
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 7dd0649c95d..ea1defba569 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -22,6 +22,7 @@
 #include <asm/setup.h>
 #include <asm/reset.h>
 #include <asm/ipl.h>
+#include <asm/chpid.h>
 #include "airq.h"
 #include "cio.h"
 #include "css.h"
diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
index 35154a21035..e62ab5c5286 100644
--- a/drivers/s390/cio/cio.h
+++ b/drivers/s390/cio/cio.h
@@ -3,6 +3,7 @@
 
 #include "schid.h"
 #include <linux/mutex.h>
+#include <linux/device.h>
 
 /*
  * where we put the ssd info
diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h
index 4319e1ced79..b2b1a265c60 100644
--- a/drivers/s390/cio/css.h
+++ b/drivers/s390/cio/css.h
@@ -5,8 +5,10 @@
 #include <linux/wait.h>
 #include <linux/workqueue.h>
 #include <linux/device.h>
+#include <linux/types.h>
 
 #include <asm/cio.h>
+#include <asm/chpid.h>
 
 #include "schid.h"
 
@@ -149,8 +151,6 @@ extern void css_reiterate_subchannels(void);
 
 #define __MAX_SUBCHANNEL 65535
 #define __MAX_SSID 3
-#define __MAX_CHPID 255
-#define __MAX_CSSID 0
 
 struct channel_subsystem {
 	u8 cssid;
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index db3d1b990f5..d6226881d0d 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -15,6 +15,7 @@
 
 #include <asm/ccwdev.h>
 #include <asm/cio.h>
+#include <asm/chpid.h>
 
 #include "cio.h"
 #include "cio_debug.h"
@@ -22,7 +23,6 @@
 #include "device.h"
 #include "chsc.h"
 #include "ioasm.h"
-#include "chpid.h"
 #include "chp.h"
 
 int
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index d819ae2ee9a..16f59fcb66b 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -16,13 +16,13 @@
 
 #include <asm/ccwdev.h>
 #include <asm/idals.h>
+#include <asm/chpid.h>
 
 #include "cio.h"
 #include "cio_debug.h"
 #include "css.h"
 #include "chsc.h"
 #include "device.h"
-#include "chpid.h"
 #include "chp.h"
 
 int ccw_device_set_options_mask(struct ccw_device *cdev, unsigned long flags)
diff --git a/drivers/s390/cio/ioasm.h b/drivers/s390/cio/ioasm.h
index 78c7db6daa8..7153dd95908 100644
--- a/drivers/s390/cio/ioasm.h
+++ b/drivers/s390/cio/ioasm.h
@@ -1,8 +1,8 @@
 #ifndef S390_CIO_IOASM_H
 #define S390_CIO_IOASM_H
 
+#include <asm/chpid.h>
 #include "schid.h"
-#include "chpid.h"
 
 /*
  * TPI info structure
-- 
cgit v1.2.3


From cfbe9bb2fb5de1da58d351432a9465c22d6d3ee5 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:32 +0200
Subject: [S390] cio: Use add_uevent_var.

Convert ccw_uevent to use add_uevent_var and adapt snprint_alias.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/device.c | 66 +++++++++++++++++++----------------------------
 1 file changed, 26 insertions(+), 40 deletions(-)

(limited to 'drivers')

diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index c3fc205e3bc..34e7d77b997 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -56,13 +56,12 @@ ccw_bus_match (struct device * dev, struct device_driver * drv)
 /* Store modalias string delimited by prefix/suffix string into buffer with
  * specified size. Return length of resulting string (excluding trailing '\0')
  * even if string doesn't fit buffer (snprintf semantics). */
-static int snprint_alias(char *buf, size_t size, const char *prefix,
+static int snprint_alias(char *buf, size_t size,
 			 struct ccw_device_id *id, const char *suffix)
 {
 	int len;
 
-	len = snprintf(buf, size, "%sccw:t%04Xm%02X", prefix, id->cu_type,
-		       id->cu_model);
+	len = snprintf(buf, size, "ccw:t%04Xm%02X", id->cu_type, id->cu_model);
 	if (len > size)
 		return len;
 	buf += len;
@@ -85,53 +84,40 @@ static int ccw_uevent(struct device *dev, char **envp, int num_envp,
 	struct ccw_device *cdev = to_ccwdev(dev);
 	struct ccw_device_id *id = &(cdev->id);
 	int i = 0;
-	int len;
+	int len = 0;
+	int ret;
+	char modalias_buf[30];
 
 	/* CU_TYPE= */
-	len = snprintf(buffer, buffer_size, "CU_TYPE=%04X", id->cu_type) + 1;
-	if (len > buffer_size || i >= num_envp)
-		return -ENOMEM;
-	envp[i++] = buffer;
-	buffer += len;
-	buffer_size -= len;
+	ret = add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &len,
+			     "CU_TYPE=%04X", id->cu_type);
+	if (ret)
+		return ret;
 
 	/* CU_MODEL= */
-	len = snprintf(buffer, buffer_size, "CU_MODEL=%02X", id->cu_model) + 1;
-	if (len > buffer_size || i >= num_envp)
-		return -ENOMEM;
-	envp[i++] = buffer;
-	buffer += len;
-	buffer_size -= len;
+	ret = add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &len,
+			     "CU_MODEL=%02X", id->cu_model);
+	if (ret)
+		return ret;
 
 	/* The next two can be zero, that's ok for us */
 	/* DEV_TYPE= */
-	len = snprintf(buffer, buffer_size, "DEV_TYPE=%04X", id->dev_type) + 1;
-	if (len > buffer_size || i >= num_envp)
-		return -ENOMEM;
-	envp[i++] = buffer;
-	buffer += len;
-	buffer_size -= len;
+	ret = add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &len,
+			     "DEV_TYPE=%04X", id->dev_type);
+	if (ret)
+		return ret;
 
 	/* DEV_MODEL= */
-	len = snprintf(buffer, buffer_size, "DEV_MODEL=%02X",
-			(unsigned char) id->dev_model) + 1;
-	if (len > buffer_size || i >= num_envp)
-		return -ENOMEM;
-	envp[i++] = buffer;
-	buffer += len;
-	buffer_size -= len;
+	ret = add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &len,
+			     "DEV_MODEL=%02X", id->dev_model);
+	if (ret)
+		return ret;
 
 	/* MODALIAS=  */
-	len = snprint_alias(buffer, buffer_size, "MODALIAS=", id, "") + 1;
-	if (len > buffer_size || i >= num_envp)
-		return -ENOMEM;
-	envp[i++] = buffer;
-	buffer += len;
-	buffer_size -= len;
-
-	envp[i] = NULL;
-
-	return 0;
+	snprint_alias(modalias_buf, sizeof(modalias_buf), id, "");
+	ret = add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &len,
+			     "MODALIAS=%s", modalias_buf);
+	return ret;
 }
 
 struct bus_type ccw_bus_type;
@@ -280,7 +266,7 @@ modalias_show (struct device *dev, struct device_attribute *attr, char *buf)
 	struct ccw_device_id *id = &(cdev->id);
 	int len;
 
-	len = snprint_alias(buf, PAGE_SIZE, "", id, "\n") + 1;
+	len = snprint_alias(buf, PAGE_SIZE, id, "\n") + 1;
 
 	return len > PAGE_SIZE ? PAGE_SIZE : len;
 }
-- 
cgit v1.2.3


From 387b734fc2b55f776b192c7afdfd892ba42347d4 Mon Sep 17 00:00:00 2001
From: Stefan Bader <shbader@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:33 +0200
Subject: [S390] cio: Re-start path verification after aborting internal I/O.

Path verification triggered by changes to the available CHPIDs will be
interrupted by another change but not re-started. This results in an
invalid path mask.
To solve this make sure to completely re-start path verification when
changing the available paths.

Signed-off-by: Stefan Bader <shbader@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/chsc.c | 108 ++++++++++++++++++++++++------------------------
 1 file changed, 54 insertions(+), 54 deletions(-)

(limited to 'drivers')

diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index 3dec460bba2..02615eb4398 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -178,6 +178,38 @@ css_get_ssd_info(struct subchannel *sch)
 	return ret;
 }
 
+static int check_for_io_on_path(struct subchannel *sch, int mask)
+{
+	int cc;
+
+	cc = stsch(sch->schid, &sch->schib);
+	if (cc)
+		return 0;
+	if (sch->schib.scsw.actl && sch->schib.pmcw.lpum == mask)
+		return 1;
+	return 0;
+}
+
+static void terminate_internal_io(struct subchannel *sch)
+{
+	if (cio_clear(sch)) {
+		/* Recheck device in case clear failed. */
+		sch->lpm = 0;
+		if (device_trigger_verify(sch) != 0) {
+			if(css_enqueue_subchannel_slow(sch->schid)) {
+				css_clear_subchannel_slow_list();
+				need_rescan = 1;
+			}
+		}
+		return;
+	}
+	/* Request retry of internal operation. */
+	device_set_intretry(sch);
+	/* Call handler. */
+	if (sch->driver && sch->driver->termination)
+		sch->driver->termination(&sch->dev);
+}
+
 static int
 s390_subchannel_remove_chpid(struct device *dev, void *data)
 {
@@ -208,37 +240,33 @@ s390_subchannel_remove_chpid(struct device *dev, void *data)
 	if (sch->schib.pmcw.pim == 0x80)
 		goto out_unreg;
 
-	if ((sch->schib.scsw.actl & SCSW_ACTL_DEVACT) &&
-	    (sch->schib.scsw.actl & SCSW_ACTL_SCHACT) &&
-	    (sch->schib.pmcw.lpum == mask)) {
-		int cc;
-
-		cc = cio_clear(sch);
-		if (cc == -ENODEV)
+	if (check_for_io_on_path(sch, mask)) {
+		if (device_is_online(sch))
+			device_kill_io(sch);
+		else {
+			terminate_internal_io(sch);
+			/* Re-start path verification. */
+			if (sch->driver && sch->driver->verify)
+				sch->driver->verify(&sch->dev);
+		}
+	} else {
+		/* trigger path verification. */
+		if (sch->driver && sch->driver->verify)
+			sch->driver->verify(&sch->dev);
+		else if (sch->lpm == mask)
 			goto out_unreg;
-		/* Request retry of internal operation. */
-		device_set_intretry(sch);
-		/* Call handler. */
-		if (sch->driver && sch->driver->termination)
-			sch->driver->termination(&sch->dev);
-		goto out_unlock;
 	}
 
-	/* trigger path verification. */
-	if (sch->driver && sch->driver->verify)
-		sch->driver->verify(&sch->dev);
-	else if (sch->lpm == mask)
-		goto out_unreg;
-out_unlock:
 	spin_unlock_irq(sch->lock);
 	return 0;
+
 out_unreg:
-	spin_unlock_irq(sch->lock);
 	sch->lpm = 0;
 	if (css_enqueue_subchannel_slow(sch->schid)) {
 		css_clear_subchannel_slow_list();
 		need_rescan = 1;
 	}
+	spin_unlock_irq(sch->lock);
 	return 0;
 }
 
@@ -683,38 +711,6 @@ int chsc_chp_online(struct chp_id chpid)
 	return rc;
 }
 
-static int check_for_io_on_path(struct subchannel *sch, int index)
-{
-	int cc;
-
-	cc = stsch(sch->schid, &sch->schib);
-	if (cc)
-		return 0;
-	if (sch->schib.scsw.actl && sch->schib.pmcw.lpum == (0x80 >> index))
-		return 1;
-	return 0;
-}
-
-static void terminate_internal_io(struct subchannel *sch)
-{
-	if (cio_clear(sch)) {
-		/* Recheck device in case clear failed. */
-		sch->lpm = 0;
-		if (device_trigger_verify(sch) != 0) {
-			if(css_enqueue_subchannel_slow(sch->schid)) {
-				css_clear_subchannel_slow_list();
-				need_rescan = 1;
-			}
-		}
-		return;
-	}
-	/* Request retry of internal operation. */
-	device_set_intretry(sch);
-	/* Call handler. */
-	if (sch->driver && sch->driver->termination)
-		sch->driver->termination(&sch->dev);
-}
-
 static void __s390_subchannel_vary_chpid(struct subchannel *sch,
 					 struct chp_id chpid, int on)
 {
@@ -741,13 +737,17 @@ static void __s390_subchannel_vary_chpid(struct subchannel *sch,
 		}
 		sch->opm &= ~(0x80 >> chp);
 		sch->lpm &= ~(0x80 >> chp);
-		if (check_for_io_on_path(sch, chp)) {
+		if (check_for_io_on_path(sch, (0x80 >> chp))) {
 			if (device_is_online(sch))
 				/* Path verification is done after killing. */
 				device_kill_io(sch);
-			else
+			else {
 				/* Kill and retry internal I/O. */
 				terminate_internal_io(sch);
+				/* Re-start path verification. */
+				if (sch->driver && sch->driver->verify)
+					sch->driver->verify(&sch->dev);
+			}
 		} else if (!sch->lpm) {
 			if (device_trigger_verify(sch) != 0) {
 				if (css_enqueue_subchannel_slow(sch->schid)) {
-- 
cgit v1.2.3


From 83b3370c79b91b9be3f6540c3c914e689134b45f Mon Sep 17 00:00:00 2001
From: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:34 +0200
Subject: [S390] cio: replace subchannel evaluation queue with bitmap

Use a bitmap for indicating which subchannels require evaluation
instead of allocating memory for each evaluation request. This
approach reduces memory consumption during recovery in case of
massive evaluation request occurrence and removes the need for
memory allocation failure handling.

Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/Makefile     |   2 +-
 drivers/s390/cio/chp.c        |   8 +--
 drivers/s390/cio/chp.h        |   2 +-
 drivers/s390/cio/chsc.c       | 130 ++++++++++---------------------------
 drivers/s390/cio/chsc.h       |   4 +-
 drivers/s390/cio/css.c        | 148 ++++++++++++++----------------------------
 drivers/s390/cio/css.h        |  10 +--
 drivers/s390/cio/device_fsm.c |   6 +-
 drivers/s390/cio/idset.c      | 112 ++++++++++++++++++++++++++++++++
 drivers/s390/cio/idset.h      |  25 +++++++
 drivers/s390/s390mach.c       |  24 ++-----
 11 files changed, 235 insertions(+), 236 deletions(-)
 create mode 100644 drivers/s390/cio/idset.c
 create mode 100644 drivers/s390/cio/idset.h

(limited to 'drivers')

diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
index fe7b3ffa1ea..cfaf77b320f 100644
--- a/drivers/s390/cio/Makefile
+++ b/drivers/s390/cio/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the S/390 common i/o drivers
 #
 
-obj-y += airq.o blacklist.o chsc.o cio.o css.o chp.o
+obj-y += airq.o blacklist.o chsc.o cio.o css.o chp.o idset.o
 ccw_device-objs += device.o device_fsm.o device_ops.o
 ccw_device-objs += device_id.o device_pgid.o device_status.o
 obj-y += ccw_device.o cmf.o
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index 0e92c8c8986..ac289e6eadf 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -491,7 +491,7 @@ void *chp_get_chp_desc(struct chp_id chpid)
  * Handle channel-report-words indicating that the status of a channel-path
  * has changed.
  */
-int chp_process_crw(int id, int status)
+void chp_process_crw(int id, int status)
 {
 	struct chp_id chpid;
 
@@ -500,11 +500,9 @@ int chp_process_crw(int id, int status)
 	if (status) {
 		if (!chp_is_registered(chpid))
 			chp_new(chpid);
-		return chsc_chp_online(chpid);
-	} else {
+		chsc_chp_online(chpid);
+	} else
 		chsc_chp_offline(chpid);
-		return 0;
-	}
 }
 
 static inline int info_bit_num(struct chp_id id)
diff --git a/drivers/s390/cio/chp.h b/drivers/s390/cio/chp.h
index 862af69d970..65286563c59 100644
--- a/drivers/s390/cio/chp.h
+++ b/drivers/s390/cio/chp.h
@@ -42,7 +42,7 @@ int chp_get_status(struct chp_id chpid);
 u8 chp_get_sch_opm(struct subchannel *sch);
 int chp_is_registered(struct chp_id chpid);
 void *chp_get_chp_desc(struct chp_id chpid);
-int chp_process_crw(int id, int available);
+void chp_process_crw(int id, int available);
 void chp_remove_cmg_attr(struct channel_path *chp);
 int chp_add_cmg_attr(struct channel_path *chp);
 int chp_new(struct chp_id chpid);
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index 02615eb4398..89a130a6265 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -195,12 +195,8 @@ static void terminate_internal_io(struct subchannel *sch)
 	if (cio_clear(sch)) {
 		/* Recheck device in case clear failed. */
 		sch->lpm = 0;
-		if (device_trigger_verify(sch) != 0) {
-			if(css_enqueue_subchannel_slow(sch->schid)) {
-				css_clear_subchannel_slow_list();
-				need_rescan = 1;
-			}
-		}
+		if (device_trigger_verify(sch) != 0)
+			css_schedule_eval(sch->schid);
 		return;
 	}
 	/* Request retry of internal operation. */
@@ -262,11 +258,8 @@ s390_subchannel_remove_chpid(struct device *dev, void *data)
 
 out_unreg:
 	sch->lpm = 0;
-	if (css_enqueue_subchannel_slow(sch->schid)) {
-		css_clear_subchannel_slow_list();
-		need_rescan = 1;
-	}
 	spin_unlock_irq(sch->lock);
+	css_schedule_eval(sch->schid);
 	return 0;
 }
 
@@ -281,9 +274,6 @@ void chsc_chp_offline(struct chp_id chpid)
 		return;
 	bus_for_each_dev(&css_bus_type, NULL, &chpid,
 			 s390_subchannel_remove_chpid);
-
-	if (need_rescan || css_slow_subchannels_exist())
-		queue_work(slow_path_wq, &slow_path_work);
 }
 
 struct res_acc_data {
@@ -331,7 +321,6 @@ static int
 s390_process_res_acc_new_sch(struct subchannel_id schid)
 {
 	struct schib schib;
-	int ret;
 	/*
 	 * We don't know the device yet, but since a path
 	 * may be available now to the device we'll have
@@ -342,15 +331,10 @@ s390_process_res_acc_new_sch(struct subchannel_id schid)
 	 */
 	if (stsch_err(schid, &schib))
 		/* We're through */
-		return need_rescan ? -EAGAIN : -ENXIO;
+		return -ENXIO;
 
 	/* Put it on the slow path. */
-	ret = css_enqueue_subchannel_slow(schid);
-	if (ret) {
-		css_clear_subchannel_slow_list();
-		need_rescan = 1;
-		return -EAGAIN;
-	}
+	css_schedule_eval(schid);
 	return 0;
 }
 
@@ -392,10 +376,8 @@ __s390_process_res_acc(struct subchannel_id schid, void *data)
 }
 
 
-static int
-s390_process_res_acc (struct res_acc_data *res_data)
+static void s390_process_res_acc (struct res_acc_data *res_data)
 {
-	int rc;
 	char dbf_txt[15];
 
 	sprintf(dbf_txt, "accpr%x.%02x", res_data->chpid.cssid,
@@ -413,12 +395,7 @@ s390_process_res_acc (struct res_acc_data *res_data)
 	 * The more information we have (info), the less scanning
 	 * will we have to do.
 	 */
-	rc = for_each_subchannel(__s390_process_res_acc, res_data);
-	if (css_slow_subchannels_exist())
-		rc = -EAGAIN;
-	else if (rc != -EAGAIN)
-		rc = 0;
-	return rc;
+	for_each_subchannel(__s390_process_res_acc, res_data);
 }
 
 static int
@@ -470,7 +447,7 @@ struct chsc_sei_area {
 	/* ccdf has to be big enough for a link-incident record */
 } __attribute__ ((packed));
 
-static int chsc_process_sei_link_incident(struct chsc_sei_area *sei_area)
+static void chsc_process_sei_link_incident(struct chsc_sei_area *sei_area)
 {
 	struct chp_id chpid;
 	int id;
@@ -478,7 +455,7 @@ static int chsc_process_sei_link_incident(struct chsc_sei_area *sei_area)
 	CIO_CRW_EVENT(4, "chsc: link incident (rs=%02x, rs_id=%04x)\n",
 		      sei_area->rs, sei_area->rsid);
 	if (sei_area->rs != 4)
-		return 0;
+		return;
 	id = __get_chpid_from_lir(sei_area->ccdf);
 	if (id < 0)
 		CIO_CRW_EVENT(4, "chsc: link incident - invalid LIR\n");
@@ -487,21 +464,18 @@ static int chsc_process_sei_link_incident(struct chsc_sei_area *sei_area)
 		chpid.id = id;
 		chsc_chp_offline(chpid);
 	}
-
-	return 0;
 }
 
-static int chsc_process_sei_res_acc(struct chsc_sei_area *sei_area)
+static void chsc_process_sei_res_acc(struct chsc_sei_area *sei_area)
 {
 	struct res_acc_data res_data;
 	struct chp_id chpid;
 	int status;
-	int rc;
 
 	CIO_CRW_EVENT(4, "chsc: resource accessibility event (rs=%02x, "
 		      "rs_id=%04x)\n", sei_area->rs, sei_area->rsid);
 	if (sei_area->rs != 4)
-		return 0;
+		return;
 	chp_id_init(&chpid);
 	chpid.id = sei_area->rsid;
 	/* allocate a new channel path structure, if needed */
@@ -509,7 +483,7 @@ static int chsc_process_sei_res_acc(struct chsc_sei_area *sei_area)
 	if (status < 0)
 		chp_new(chpid);
 	else if (!status)
-		return 0;
+		return;
 	memset(&res_data, 0, sizeof(struct res_acc_data));
 	res_data.chpid = chpid;
 	if ((sei_area->vf & 0xc0) != 0) {
@@ -521,9 +495,7 @@ static int chsc_process_sei_res_acc(struct chsc_sei_area *sei_area)
 			/* link address */
 			res_data.fla_mask = 0xff00;
 	}
-	rc = s390_process_res_acc(&res_data);
-
-	return rc;
+	s390_process_res_acc(&res_data);
 }
 
 struct chp_config_data {
@@ -532,7 +504,7 @@ struct chp_config_data {
 	u8 pc;
 };
 
-static int chsc_process_sei_chp_config(struct chsc_sei_area *sei_area)
+static void chsc_process_sei_chp_config(struct chsc_sei_area *sei_area)
 {
 	struct chp_config_data *data;
 	struct chp_id chpid;
@@ -540,7 +512,7 @@ static int chsc_process_sei_chp_config(struct chsc_sei_area *sei_area)
 
 	CIO_CRW_EVENT(4, "chsc: channel-path-configuration notification\n");
 	if (sei_area->rs != 0)
-		return 0;
+		return;
 	data = (struct chp_config_data *) &(sei_area->ccdf);
 	chp_id_init(&chpid);
 	for (num = 0; num <= __MAX_CHPID; num++) {
@@ -561,52 +533,44 @@ static int chsc_process_sei_chp_config(struct chsc_sei_area *sei_area)
 			break;
 		}
 	}
-
-	return 0;
 }
 
-static int chsc_process_sei(struct chsc_sei_area *sei_area)
+static void chsc_process_sei(struct chsc_sei_area *sei_area)
 {
-	int rc;
-
 	/* Check if we might have lost some information. */
-	if (sei_area->flags & 0x40)
+	if (sei_area->flags & 0x40) {
 		CIO_CRW_EVENT(2, "chsc: event overflow\n");
+		css_schedule_eval_all();
+	}
 	/* which kind of information was stored? */
-	rc = 0;
 	switch (sei_area->cc) {
 	case 1: /* link incident*/
-		rc = chsc_process_sei_link_incident(sei_area);
+		chsc_process_sei_link_incident(sei_area);
 		break;
 	case 2: /* i/o resource accessibiliy */
-		rc = chsc_process_sei_res_acc(sei_area);
+		chsc_process_sei_res_acc(sei_area);
 		break;
 	case 8: /* channel-path-configuration notification */
-		rc = chsc_process_sei_chp_config(sei_area);
+		chsc_process_sei_chp_config(sei_area);
 		break;
 	default: /* other stuff */
 		CIO_CRW_EVENT(4, "chsc: unhandled sei content code %d\n",
 			      sei_area->cc);
 		break;
 	}
-
-	return rc;
 }
 
-int chsc_process_crw(void)
+void chsc_process_crw(void)
 {
 	struct chsc_sei_area *sei_area;
-	int ret;
-	int rc;
 
 	if (!sei_page)
-		return 0;
+		return;
 	/* Access to sei_page is serialized through machine check handler
 	 * thread, so no need for locking. */
 	sei_area = sei_page;
 
 	CIO_TRACE_EVENT( 2, "prcss");
-	ret = 0;
 	do {
 		memset(sei_area, 0, sizeof(*sei_area));
 		sei_area->request.length = 0x0010;
@@ -616,37 +580,26 @@ int chsc_process_crw(void)
 
 		if (sei_area->response.code == 0x0001) {
 			CIO_CRW_EVENT(4, "chsc: sei successful\n");
-			rc = chsc_process_sei(sei_area);
-			if (rc)
-				ret = rc;
+			chsc_process_sei(sei_area);
 		} else {
 			CIO_CRW_EVENT(2, "chsc: sei failed (rc=%04x)\n",
 				      sei_area->response.code);
-			ret = 0;
 			break;
 		}
 	} while (sei_area->flags & 0x80);
-
-	return ret;
 }
 
 static int
 __chp_add_new_sch(struct subchannel_id schid)
 {
 	struct schib schib;
-	int ret;
 
 	if (stsch_err(schid, &schib))
 		/* We're through */
-		return need_rescan ? -EAGAIN : -ENXIO;
+		return -ENXIO;
 
 	/* Put it on the slow path. */
-	ret = css_enqueue_subchannel_slow(schid);
-	if (ret) {
-		css_clear_subchannel_slow_list();
-		need_rescan = 1;
-		return -EAGAIN;
-	}
+	css_schedule_eval(schid);
 	return 0;
 }
 
@@ -693,22 +646,15 @@ __chp_add(struct subchannel_id schid, void *data)
 	return 0;
 }
 
-int chsc_chp_online(struct chp_id chpid)
+void chsc_chp_online(struct chp_id chpid)
 {
-	int rc;
 	char dbf_txt[15];
 
 	sprintf(dbf_txt, "cadd%x.%02x", chpid.cssid, chpid.id);
 	CIO_TRACE_EVENT(2, dbf_txt);
 
-	if (chp_get_status(chpid) == 0)
-		return 0;
-	rc = for_each_subchannel(__chp_add, &chpid);
-	if (css_slow_subchannels_exist())
-		rc = -EAGAIN;
-	if (rc != -EAGAIN)
-		rc = 0;
-	return rc;
+	if (chp_get_status(chpid) != 0)
+		for_each_subchannel(__chp_add, &chpid);
 }
 
 static void __s390_subchannel_vary_chpid(struct subchannel *sch,
@@ -749,12 +695,8 @@ static void __s390_subchannel_vary_chpid(struct subchannel *sch,
 					sch->driver->verify(&sch->dev);
 			}
 		} else if (!sch->lpm) {
-			if (device_trigger_verify(sch) != 0) {
-				if (css_enqueue_subchannel_slow(sch->schid)) {
-					css_clear_subchannel_slow_list();
-					need_rescan = 1;
-				}
-			}
+			if (device_trigger_verify(sch) != 0)
+				css_schedule_eval(sch->schid);
 		} else if (sch->driver && sch->driver->verify)
 			sch->driver->verify(&sch->dev);
 		break;
@@ -801,11 +743,7 @@ __s390_vary_chpid_on(struct subchannel_id schid, void *data)
 		/* We're through */
 		return -ENXIO;
 	/* Put it on the slow path. */
-	if (css_enqueue_subchannel_slow(schid)) {
-		css_clear_subchannel_slow_list();
-		need_rescan = 1;
-		return -EAGAIN;
-	}
+	css_schedule_eval(schid);
 	return 0;
 }
 
@@ -826,8 +764,6 @@ int chsc_chp_vary(struct chp_id chpid, int on)
 	if (on)
 		/* Scan for new devices on varied on path. */
 		for_each_subchannel(__s390_vary_chpid_on, NULL);
-	if (need_rescan || css_slow_subchannels_exist())
-		queue_work(slow_path_wq, &slow_path_work);
 	return 0;
 }
 
diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h
index 322586f27cc..742ef57d2c5 100644
--- a/drivers/s390/cio/chsc.h
+++ b/drivers/s390/cio/chsc.h
@@ -36,7 +36,7 @@ struct channel_path_desc {
 struct channel_path;
 
 extern int css_get_ssd_info(struct subchannel *);
-extern int chsc_process_crw(void);
+extern void chsc_process_crw(void);
 
 struct css_general_char {
 	u64 : 41;
@@ -79,7 +79,7 @@ extern int chsc_secm(struct channel_subsystem *, int);
 int chsc_chp_vary(struct chp_id chpid, int on);
 int chsc_determine_channel_path_description(struct chp_id chpid,
 					    struct channel_path_desc *desc);
-int chsc_chp_online(struct chp_id chpid);
+void chsc_chp_online(struct chp_id chpid);
 void chsc_chp_offline(struct chp_id chpid);
 int chsc_get_channel_measurement_chars(struct channel_path *chp);
 
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index fe0ace7aece..fcc641e578f 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -20,8 +20,8 @@
 #include "ioasm.h"
 #include "chsc.h"
 #include "device.h"
+#include "idset.h"
 
-int need_rescan = 0;
 int css_init_done = 0;
 static int need_reprobe = 0;
 static int max_ssid = 0;
@@ -306,7 +306,7 @@ static int css_evaluate_new_subchannel(struct subchannel_id schid, int slow)
 	return css_probe_device(schid);
 }
 
-static int css_evaluate_subchannel(struct subchannel_id schid, int slow)
+static void css_evaluate_subchannel(struct subchannel_id schid, int slow)
 {
 	struct subchannel *sch;
 	int ret;
@@ -317,53 +317,66 @@ static int css_evaluate_subchannel(struct subchannel_id schid, int slow)
 		put_device(&sch->dev);
 	} else
 		ret = css_evaluate_new_subchannel(schid, slow);
-
-	return ret;
+	if (ret == -EAGAIN)
+		css_schedule_eval(schid);
 }
 
-static int
-css_rescan_devices(struct subchannel_id schid, void *data)
+static struct idset *slow_subchannel_set;
+static spinlock_t slow_subchannel_lock;
+
+static int __init slow_subchannel_init(void)
 {
-	return css_evaluate_subchannel(schid, 1);
+	spin_lock_init(&slow_subchannel_lock);
+	slow_subchannel_set = idset_sch_new();
+	if (!slow_subchannel_set) {
+		printk(KERN_WARNING "cio: could not allocate slow subchannel "
+		       "set\n");
+		return -ENOMEM;
+	}
+	return 0;
 }
 
-struct slow_subchannel {
-	struct list_head slow_list;
-	struct subchannel_id schid;
-};
-
-static LIST_HEAD(slow_subchannels_head);
-static DEFINE_SPINLOCK(slow_subchannel_lock);
+subsys_initcall(slow_subchannel_init);
 
-static void
-css_trigger_slow_path(struct work_struct *unused)
+static void css_slow_path_func(struct work_struct *unused)
 {
-	CIO_TRACE_EVENT(4, "slowpath");
-
-	if (need_rescan) {
-		need_rescan = 0;
-		for_each_subchannel(css_rescan_devices, NULL);
-		return;
-	}
+	struct subchannel_id schid;
 
+	CIO_TRACE_EVENT(4, "slowpath");
 	spin_lock_irq(&slow_subchannel_lock);
-	while (!list_empty(&slow_subchannels_head)) {
-		struct slow_subchannel *slow_sch =
-			list_entry(slow_subchannels_head.next,
-				   struct slow_subchannel, slow_list);
-
-		list_del_init(slow_subchannels_head.next);
+	init_subchannel_id(&schid);
+	while (idset_sch_get_first(slow_subchannel_set, &schid)) {
+		idset_sch_del(slow_subchannel_set, schid);
 		spin_unlock_irq(&slow_subchannel_lock);
-		css_evaluate_subchannel(slow_sch->schid, 1);
+		css_evaluate_subchannel(schid, 1);
 		spin_lock_irq(&slow_subchannel_lock);
-		kfree(slow_sch);
 	}
 	spin_unlock_irq(&slow_subchannel_lock);
 }
 
-DECLARE_WORK(slow_path_work, css_trigger_slow_path);
+static DECLARE_WORK(slow_path_work, css_slow_path_func);
 struct workqueue_struct *slow_path_wq;
 
+void css_schedule_eval(struct subchannel_id schid)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&slow_subchannel_lock, flags);
+	idset_sch_add(slow_subchannel_set, schid);
+	queue_work(slow_path_wq, &slow_path_work);
+	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
+}
+
+void css_schedule_eval_all(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&slow_subchannel_lock, flags);
+	idset_fill(slow_subchannel_set);
+	queue_work(slow_path_wq, &slow_path_work);
+	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
+}
+
 /* Reprobe subchannel if unregistered. */
 static int reprobe_subchannel(struct subchannel_id schid, void *data)
 {
@@ -425,34 +438,15 @@ void css_schedule_reprobe(void)
 
 EXPORT_SYMBOL_GPL(css_schedule_reprobe);
 
-/*
- * Rescan for new devices. FIXME: This is slow.
- * This function is called when we have lost CRWs due to overflows and we have
- * to do subchannel housekeeping.
- */
-void
-css_reiterate_subchannels(void)
-{
-	css_clear_subchannel_slow_list();
-	need_rescan = 1;
-}
-
 /*
  * Called from the machine check handler for subchannel report words.
  */
-int
-css_process_crw(int rsid1, int rsid2)
+void css_process_crw(int rsid1, int rsid2)
 {
-	int ret;
 	struct subchannel_id mchk_schid;
 
 	CIO_CRW_EVENT(2, "source is subchannel %04X, subsystem id %x\n",
 		      rsid1, rsid2);
-
-	if (need_rescan)
-		/* We need to iterate all subchannels anyway. */
-		return -EAGAIN;
-
 	init_subchannel_id(&mchk_schid);
 	mchk_schid.sch_no = rsid1;
 	if (rsid2 != 0)
@@ -463,14 +457,7 @@ css_process_crw(int rsid1, int rsid2)
 	 * use stsch() to find out if the subchannel in question has come
 	 * or gone.
 	 */
-	ret = css_evaluate_subchannel(mchk_schid, 0);
-	if (ret == -EAGAIN) {
-		if (css_enqueue_subchannel_slow(mchk_schid)) {
-			css_clear_subchannel_slow_list();
-			need_rescan = 1;
-		}
-	}
-	return ret;
+	css_evaluate_subchannel(mchk_schid, 0);
 }
 
 static int __init
@@ -745,47 +732,6 @@ struct bus_type css_bus_type = {
 
 subsys_initcall(init_channel_subsystem);
 
-int
-css_enqueue_subchannel_slow(struct subchannel_id schid)
-{
-	struct slow_subchannel *new_slow_sch;
-	unsigned long flags;
-
-	new_slow_sch = kzalloc(sizeof(struct slow_subchannel), GFP_ATOMIC);
-	if (!new_slow_sch)
-		return -ENOMEM;
-	new_slow_sch->schid = schid;
-	spin_lock_irqsave(&slow_subchannel_lock, flags);
-	list_add_tail(&new_slow_sch->slow_list, &slow_subchannels_head);
-	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
-	return 0;
-}
-
-void
-css_clear_subchannel_slow_list(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&slow_subchannel_lock, flags);
-	while (!list_empty(&slow_subchannels_head)) {
-		struct slow_subchannel *slow_sch =
-			list_entry(slow_subchannels_head.next,
-				   struct slow_subchannel, slow_list);
-
-		list_del_init(slow_subchannels_head.next);
-		kfree(slow_sch);
-	}
-	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
-}
-
-
-
-int
-css_slow_subchannels_exist(void)
-{
-	return (!list_empty(&slow_subchannels_head));
-}
-
 MODULE_LICENSE("GPL");
 EXPORT_SYMBOL(css_bus_type);
 EXPORT_SYMBOL_GPL(css_characteristics_avail);
diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h
index b2b1a265c60..4b3133a7bae 100644
--- a/drivers/s390/cio/css.h
+++ b/drivers/s390/cio/css.h
@@ -146,7 +146,7 @@ extern void css_sch_device_unregister(struct subchannel *);
 extern struct subchannel * get_subchannel_by_schid(struct subchannel_id);
 extern int css_init_done;
 extern int for_each_subchannel(int(*fn)(struct subchannel_id, void *), void *);
-extern int css_process_crw(int, int);
+extern void css_process_crw(int, int);
 extern void css_reiterate_subchannels(void);
 
 #define __MAX_SUBCHANNEL 65535
@@ -186,16 +186,12 @@ int device_trigger_verify(struct subchannel *sch);
 void device_kill_pending_timer(struct subchannel *);
 
 /* Helper functions to build lists for the slow path. */
-extern int css_enqueue_subchannel_slow(struct subchannel_id schid);
-void css_walk_subchannel_slow_list(void (*fn)(unsigned long));
-void css_clear_subchannel_slow_list(void);
-int css_slow_subchannels_exist(void);
-extern int need_rescan;
+void css_schedule_eval(struct subchannel_id schid);
+void css_schedule_eval_all(void);
 
 int sch_is_pseudo_sch(struct subchannel *);
 
 extern struct workqueue_struct *slow_path_wq;
-extern struct work_struct slow_path_work;
 
 int subchannel_add_files (struct device *);
 extern struct attribute_group *subch_attr_groups[];
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index d6226881d0d..898ec3b2beb 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -222,10 +222,8 @@ __recover_lost_chpids(struct subchannel *sch, int old_lpm)
 		if (old_lpm & mask)
 			continue;
 		chpid.id = sch->schib.pmcw.chpid[i];
-		if (!chp_is_registered(chpid)) {
-			need_rescan = 1;
-			queue_work(slow_path_wq, &slow_path_work);
-		}
+		if (!chp_is_registered(chpid))
+			css_schedule_eval_all();
 	}
 }
 
diff --git a/drivers/s390/cio/idset.c b/drivers/s390/cio/idset.c
new file mode 100644
index 00000000000..16ea828e99f
--- /dev/null
+++ b/drivers/s390/cio/idset.c
@@ -0,0 +1,112 @@
+/*
+ *  drivers/s390/cio/idset.c
+ *
+ *    Copyright IBM Corp. 2007
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#include <linux/slab.h>
+#include <asm/bitops.h>
+#include "idset.h"
+#include "css.h"
+
+struct idset {
+	int num_ssid;
+	int num_id;
+	unsigned long bitmap[0];
+};
+
+static inline unsigned long bitmap_size(int num_ssid, int num_id)
+{
+	return __BITOPS_WORDS(num_ssid * num_id) * sizeof(unsigned long);
+}
+
+static struct idset *idset_new(int num_ssid, int num_id)
+{
+	struct idset *set;
+
+	set = kzalloc(sizeof(struct idset) + bitmap_size(num_ssid, num_id),
+		      GFP_KERNEL);
+	if (set) {
+		set->num_ssid = num_ssid;
+		set->num_id = num_id;
+	}
+	return set;
+}
+
+void idset_free(struct idset *set)
+{
+	kfree(set);
+}
+
+void idset_clear(struct idset *set)
+{
+	memset(set->bitmap, 0, bitmap_size(set->num_ssid, set->num_id));
+}
+
+void idset_fill(struct idset *set)
+{
+	memset(set->bitmap, 0xff, bitmap_size(set->num_ssid, set->num_id));
+}
+
+static inline void idset_add(struct idset *set, int ssid, int id)
+{
+	set_bit(ssid * set->num_id + id, set->bitmap);
+}
+
+static inline void idset_del(struct idset *set, int ssid, int id)
+{
+	clear_bit(ssid * set->num_id + id, set->bitmap);
+}
+
+static inline int idset_contains(struct idset *set, int ssid, int id)
+{
+	return test_bit(ssid * set->num_id + id, set->bitmap);
+}
+
+static inline int idset_get_first(struct idset *set, int *ssid, int *id)
+{
+	int bitnum;
+
+	bitnum = find_first_bit(set->bitmap, set->num_ssid * set->num_id);
+	if (bitnum >= set->num_ssid * set->num_id)
+		return 0;
+	*ssid = bitnum / set->num_id;
+	*id = bitnum % set->num_id;
+	return 1;
+}
+
+struct idset *idset_sch_new(void)
+{
+	return idset_new(__MAX_SSID + 1, __MAX_SUBCHANNEL + 1);
+}
+
+void idset_sch_add(struct idset *set, struct subchannel_id schid)
+{
+	idset_add(set, schid.ssid, schid.sch_no);
+}
+
+void idset_sch_del(struct idset *set, struct subchannel_id schid)
+{
+	idset_del(set, schid.ssid, schid.sch_no);
+}
+
+int idset_sch_contains(struct idset *set, struct subchannel_id schid)
+{
+	return idset_contains(set, schid.ssid, schid.sch_no);
+}
+
+int idset_sch_get_first(struct idset *set, struct subchannel_id *schid)
+{
+	int ssid = 0;
+	int id = 0;
+	int rc;
+
+	rc = idset_get_first(set, &ssid, &id);
+	if (rc) {
+		init_subchannel_id(schid);
+		schid->ssid = ssid;
+		schid->sch_no = id;
+	}
+	return rc;
+}
diff --git a/drivers/s390/cio/idset.h b/drivers/s390/cio/idset.h
new file mode 100644
index 00000000000..144466ab8c1
--- /dev/null
+++ b/drivers/s390/cio/idset.h
@@ -0,0 +1,25 @@
+/*
+ *  drivers/s390/cio/idset.h
+ *
+ *    Copyright IBM Corp. 2007
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef S390_IDSET_H
+#define S390_IDSET_H S390_IDSET_H
+
+#include "schid.h"
+
+struct idset;
+
+void idset_free(struct idset *set);
+void idset_clear(struct idset *set);
+void idset_fill(struct idset *set);
+
+struct idset *idset_sch_new(void);
+void idset_sch_add(struct idset *set, struct subchannel_id id);
+void idset_sch_del(struct idset *set, struct subchannel_id id);
+int idset_sch_contains(struct idset *set, struct subchannel_id id);
+int idset_sch_get_first(struct idset *set, struct subchannel_id *id);
+
+#endif /* S390_IDSET_H */
diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index afd8a3c0f8d..644a06eba82 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -45,14 +45,13 @@ static int
 s390_collect_crw_info(void *param)
 {
 	struct crw crw[2];
-	int ccode, ret, slow;
+	int ccode;
 	struct semaphore *sem;
 	unsigned int chain;
 
 	sem = (struct semaphore *)param;
 repeat:
 	down_interruptible(sem);
-	slow = 0;
 	chain = 0;
 	while (1) {
 		if (unlikely(chain > 1)) {
@@ -85,9 +84,8 @@ repeat:
 		/* Check for overflows. */
 		if (crw[chain].oflw) {
 			pr_debug("%s: crw overflow detected!\n", __FUNCTION__);
-			css_reiterate_subchannels();
+			css_schedule_eval_all();
 			chain = 0;
-			slow = 1;
 			continue;
 		}
 		switch (crw[chain].rsc) {
@@ -95,10 +93,7 @@ repeat:
 			if (crw[0].chn && !chain)
 				break;
 			pr_debug("source is subchannel %04X\n", crw[0].rsid);
-			ret = css_process_crw (crw[0].rsid,
-					       chain ? crw[1].rsid : 0);
-			if (ret == -EAGAIN)
-				slow = 1;
+			css_process_crw(crw[0].rsid, chain ? crw[1].rsid : 0);
 			break;
 		case CRW_RSC_MONITOR:
 			pr_debug("source is monitoring facility\n");
@@ -117,28 +112,23 @@ repeat:
 			}
 			switch (crw[0].erc) {
 			case CRW_ERC_IPARM: /* Path has come. */
-				ret = chp_process_crw(crw[0].rsid, 1);
+				chp_process_crw(crw[0].rsid, 1);
 				break;
 			case CRW_ERC_PERRI: /* Path has gone. */
 			case CRW_ERC_PERRN:
-				ret = chp_process_crw(crw[0].rsid, 0);
+				chp_process_crw(crw[0].rsid, 0);
 				break;
 			default:
 				pr_debug("Don't know how to handle erc=%x\n",
 					 crw[0].erc);
-				ret = 0;
 			}
-			if (ret == -EAGAIN)
-				slow = 1;
 			break;
 		case CRW_RSC_CONFIG:
 			pr_debug("source is configuration-alert facility\n");
 			break;
 		case CRW_RSC_CSS:
 			pr_debug("source is channel subsystem\n");
-			ret = chsc_process_crw();
-			if (ret == -EAGAIN)
-				slow = 1;
+			chsc_process_crw();
 			break;
 		default:
 			pr_debug("unknown source\n");
@@ -147,8 +137,6 @@ repeat:
 		/* chain is always 0 or 1 here. */
 		chain = crw[chain].chn ? chain + 1 : 0;
 	}
-	if (slow)
-		queue_work(slow_path_wq, &slow_path_work);
 	goto repeat;
 	return 0;
 }
-- 
cgit v1.2.3


From 7ad6a24970325294a22a08446d473384c15b928e Mon Sep 17 00:00:00 2001
From: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:35 +0200
Subject: [S390] cio: fix subchannel channel-path data usage

Ensure that channel-path related subchannel data is only retrieved and
used when it is valid and that it is updated when it may have changed.

Signed-off-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/chsc.c       | 303 +++++++++++++++---------------------------
 drivers/s390/cio/chsc.h       |  10 +-
 drivers/s390/cio/cio.h        |  16 +--
 drivers/s390/cio/css.c        |  53 +++++++-
 drivers/s390/cio/css.h        |   1 +
 drivers/s390/cio/device.c     |  12 +-
 drivers/s390/cio/device_fsm.c |   1 +
 7 files changed, 179 insertions(+), 217 deletions(-)

(limited to 'drivers')

diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index 89a130a6265..0841e16b6a8 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -26,155 +26,84 @@
 
 static void *sei_page;
 
-/* FIXME: this is _always_ called for every subchannel. shouldn't we
- *	  process more than one at a time? */
-static int
-chsc_get_sch_desc_irq(struct subchannel *sch, void *page)
-{
-	int ccode, j;
+struct chsc_ssd_area {
+	struct chsc_header request;
+	u16 :10;
+	u16 ssid:2;
+	u16 :4;
+	u16 f_sch;	  /* first subchannel */
+	u16 :16;
+	u16 l_sch;	  /* last subchannel */
+	u32 :32;
+	struct chsc_header response;
+	u32 :32;
+	u8 sch_valid : 1;
+	u8 dev_valid : 1;
+	u8 st	     : 3; /* subchannel type */
+	u8 zeroes    : 3;
+	u8  unit_addr;	  /* unit address */
+	u16 devno;	  /* device number */
+	u8 path_mask;
+	u8 fla_valid_mask;
+	u16 sch;	  /* subchannel */
+	u8 chpid[8];	  /* chpids 0-7 */
+	u16 fla[8];	  /* full link addresses 0-7 */
+} __attribute__ ((packed));
 
-	struct {
-		struct chsc_header request;
-		u16 reserved1a:10;
-		u16 ssid:2;
-		u16 reserved1b:4;
-		u16 f_sch;	  /* first subchannel */
-		u16 reserved2;
-		u16 l_sch;	  /* last subchannel */
-		u32 reserved3;
-		struct chsc_header response;
-		u32 reserved4;
-		u8 sch_valid : 1;
-		u8 dev_valid : 1;
-		u8 st	     : 3; /* subchannel type */
-		u8 zeroes    : 3;
-		u8  unit_addr;	  /* unit address */
-		u16 devno;	  /* device number */
-		u8 path_mask;
-		u8 fla_valid_mask;
-		u16 sch;	  /* subchannel */
-		u8 chpid[8];	  /* chpids 0-7 */
-		u16 fla[8];	  /* full link addresses 0-7 */
-	} __attribute__ ((packed)) *ssd_area;
-
-	ssd_area = page;
+int chsc_get_ssd_info(struct subchannel_id schid, struct chsc_ssd_info *ssd)
+{
+	unsigned long page;
+	struct chsc_ssd_area *ssd_area;
+	int ccode;
+	int ret;
+	int i;
+	int mask;
 
+	page = get_zeroed_page(GFP_KERNEL | GFP_DMA);
+	if (!page)
+		return -ENOMEM;
+	ssd_area = (struct chsc_ssd_area *) page;
 	ssd_area->request.length = 0x0010;
 	ssd_area->request.code = 0x0004;
-
-	ssd_area->ssid = sch->schid.ssid;
-	ssd_area->f_sch = sch->schid.sch_no;
-	ssd_area->l_sch = sch->schid.sch_no;
+	ssd_area->ssid = schid.ssid;
+	ssd_area->f_sch = schid.sch_no;
+	ssd_area->l_sch = schid.sch_no;
 
 	ccode = chsc(ssd_area);
+	/* Check response. */
 	if (ccode > 0) {
-		pr_debug("chsc returned with ccode = %d\n", ccode);
-		return (ccode == 3) ? -ENODEV : -EBUSY;
+		ret = (ccode == 3) ? -ENODEV : -EBUSY;
+		goto out_free;
 	}
-
-	switch (ssd_area->response.code) {
-	case 0x0001: /* everything ok */
-		break;
-	case 0x0002:
-		CIO_CRW_EVENT(2, "Invalid command!\n");
-		return -EINVAL;
-	case 0x0003:
-		CIO_CRW_EVENT(2, "Error in chsc request block!\n");
-		return -EINVAL;
-	case 0x0004:
-		CIO_CRW_EVENT(2, "Model does not provide ssd\n");
-		return -EOPNOTSUPP;
-	default:
-		CIO_CRW_EVENT(2, "Unknown CHSC response %d\n",
+	if (ssd_area->response.code != 0x0001) {
+		CIO_MSG_EVENT(2, "chsc: ssd failed for 0.%x.%04x (rc=%04x)\n",
+			      schid.ssid, schid.sch_no,
 			      ssd_area->response.code);
-		return -EIO;
-	}
-
-	/*
-	 * ssd_area->st stores the type of the detected
-	 * subchannel, with the following definitions:
-	 *
-	 * 0: I/O subchannel:	  All fields have meaning
-	 * 1: CHSC subchannel:	  Only sch_val, st and sch
-	 *			  have meaning
-	 * 2: Message subchannel: All fields except unit_addr
-	 *			  have meaning
-	 * 3: ADM subchannel:	  Only sch_val, st and sch
-	 *			  have meaning
-	 *
-	 * Other types are currently undefined.
-	 */
-	if (ssd_area->st > 3) { /* uhm, that looks strange... */
-		CIO_CRW_EVENT(0, "Strange subchannel type %d"
-			      " for sch 0.%x.%04x\n", ssd_area->st,
-			      sch->schid.ssid, sch->schid.sch_no);
-		/*
-		 * There may have been a new subchannel type defined in the
-		 * time since this code was written; since we don't know which
-		 * fields have meaning and what to do with it we just jump out
-		 */
-		return 0;
-	} else {
-		const char *type[4] = {"I/O", "chsc", "message", "ADM"};
-		CIO_CRW_EVENT(6, "ssd: sch 0.%x.%04x is %s subchannel\n",
-			      sch->schid.ssid, sch->schid.sch_no,
-			      type[ssd_area->st]);
-
-		sch->ssd_info.valid = 1;
-		sch->ssd_info.type = ssd_area->st;
+		ret = -EIO;
+		goto out_free;
 	}
-
-	if (ssd_area->st == 0 || ssd_area->st == 2) {
-		for (j = 0; j < 8; j++) {
-			if (!((0x80 >> j) & ssd_area->path_mask &
-			      ssd_area->fla_valid_mask))
-				continue;
-			sch->ssd_info.chpid[j] = ssd_area->chpid[j];
-			sch->ssd_info.fla[j]   = ssd_area->fla[j];
-		}
+	if (!ssd_area->sch_valid) {
+		ret = -ENODEV;
+		goto out_free;
 	}
-	return 0;
-}
-
-int
-css_get_ssd_info(struct subchannel *sch)
-{
-	int ret;
-	void *page;
-
-	page = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
-	if (!page)
-		return -ENOMEM;
-	spin_lock_irq(sch->lock);
-	ret = chsc_get_sch_desc_irq(sch, page);
-	if (ret) {
-		static int cio_chsc_err_msg;
-		
-		if (!cio_chsc_err_msg) {
-			printk(KERN_ERR
-			       "chsc_get_sch_descriptions:"
-			       " Error %d while doing chsc; "
-			       "processing some machine checks may "
-			       "not work\n", ret);
-			cio_chsc_err_msg = 1;
-		}
-	}
-	spin_unlock_irq(sch->lock);
-	free_page((unsigned long)page);
-	if (!ret) {
-		int j, mask;
-		struct chp_id chpid;
-
-		chp_id_init(&chpid);
-		/* Allocate channel path structures, if needed. */
-		for (j = 0; j < 8; j++) {
-			mask = 0x80 >> j;
-			chpid.id = sch->ssd_info.chpid[j];
-			if ((sch->schib.pmcw.pim & mask) &&
-			    !chp_is_registered(chpid))
-				chp_new(chpid);
+	/* Copy data */
+	ret = 0;
+	memset(ssd, 0, sizeof(struct chsc_ssd_info));
+	if ((ssd_area->st != 0) && (ssd_area->st != 2))
+		goto out_free;
+	ssd->path_mask = ssd_area->path_mask;
+	ssd->fla_valid_mask = ssd_area->fla_valid_mask;
+	for (i = 0; i < 8; i++) {
+		mask = 0x80 >> i;
+		if (ssd_area->path_mask & mask) {
+			chp_id_init(&ssd->chpid[i]);
+			ssd->chpid[i].id = ssd_area->chpid[i];
 		}
+		if (ssd_area->fla_valid_mask & mask)
+			ssd->fla[i] = ssd_area->fla[i];
 	}
+out_free:
+	free_page(page);
 	return ret;
 }
 
@@ -276,47 +205,6 @@ void chsc_chp_offline(struct chp_id chpid)
 			 s390_subchannel_remove_chpid);
 }
 
-struct res_acc_data {
-	struct chp_id chpid;
-	u32 fla_mask;
-	u16 fla;
-};
-
-static int s390_process_res_acc_sch(struct res_acc_data *res_data,
-				    struct subchannel *sch)
-{
-	int found;
-	int chp;
-	int ccode;
-
-	found = 0;
-	for (chp = 0; chp <= 7; chp++)
-		/*
-		 * check if chpid is in information updated by ssd
-		 */
-		if (sch->ssd_info.valid &&
-		    sch->ssd_info.chpid[chp] == res_data->chpid.id &&
-		    (sch->ssd_info.fla[chp] & res_data->fla_mask)
-		    == res_data->fla) {
-			found = 1;
-			break;
-		}
-
-	if (found == 0)
-		return 0;
-
-	/*
-	 * Do a stsch to update our subchannel structure with the
-	 * new path information and eventually check for logically
-	 * offline chpids.
-	 */
-	ccode = stsch(sch->schid, &sch->schib);
-	if (ccode > 0)
-		return 0;
-
-	return 0x80 >> chp;
-}
-
 static int
 s390_process_res_acc_new_sch(struct subchannel_id schid)
 {
@@ -338,6 +226,32 @@ s390_process_res_acc_new_sch(struct subchannel_id schid)
 	return 0;
 }
 
+struct res_acc_data {
+	struct chp_id chpid;
+	u32 fla_mask;
+	u16 fla;
+};
+
+static int get_res_chpid_mask(struct chsc_ssd_info *ssd,
+			      struct res_acc_data *data)
+{
+	int i;
+	int mask;
+
+	for (i = 0; i < 8; i++) {
+		mask = 0x80 >> i;
+		if (!(ssd->path_mask & mask))
+			continue;
+		if (!chp_id_is_equal(&ssd->chpid[i], &data->chpid))
+			continue;
+		if ((ssd->fla_valid_mask & mask) &&
+		    ((ssd->fla[i] & data->fla_mask) != data->fla))
+			continue;
+		return mask;
+	}
+	return 0;
+}
+
 static int
 __s390_process_res_acc(struct subchannel_id schid, void *data)
 {
@@ -352,14 +266,11 @@ __s390_process_res_acc(struct subchannel_id schid, void *data)
 		return s390_process_res_acc_new_sch(schid);
 
 	spin_lock_irq(sch->lock);
-
-	chp_mask = s390_process_res_acc_sch(res_data, sch);
-
-	if (chp_mask == 0) {
-		spin_unlock_irq(sch->lock);
-		put_device(&sch->dev);
-		return 0;
-	}
+	chp_mask = get_res_chpid_mask(&sch->ssd_info, res_data);
+	if (chp_mask == 0)
+		goto out;
+	if (stsch(sch->schid, &sch->schib))
+		goto out;
 	old_lpm = sch->lpm;
 	sch->lpm = ((sch->schib.pmcw.pim &
 		     sch->schib.pmcw.pam &
@@ -369,13 +280,12 @@ __s390_process_res_acc(struct subchannel_id schid, void *data)
 		device_trigger_reprobe(sch);
 	else if (sch->driver && sch->driver->verify)
 		sch->driver->verify(&sch->dev);
-
+out:
 	spin_unlock_irq(sch->lock);
 	put_device(&sch->dev);
 	return 0;
 }
 
-
 static void s390_process_res_acc (struct res_acc_data *res_data)
 {
 	char dbf_txt[15];
@@ -661,29 +571,30 @@ static void __s390_subchannel_vary_chpid(struct subchannel *sch,
 					 struct chp_id chpid, int on)
 {
 	int chp, old_lpm;
+	int mask;
 	unsigned long flags;
 
-	if (!sch->ssd_info.valid)
-		return;
-	
 	spin_lock_irqsave(sch->lock, flags);
 	old_lpm = sch->lpm;
 	for (chp = 0; chp < 8; chp++) {
-		if (sch->ssd_info.chpid[chp] != chpid.id)
+		mask = 0x80 >> chp;
+		if (!(sch->ssd_info.path_mask & mask))
+			continue;
+		if (!chp_id_is_equal(&sch->ssd_info.chpid[chp], &chpid))
 			continue;
 
 		if (on) {
-			sch->opm |= (0x80 >> chp);
-			sch->lpm |= (0x80 >> chp);
+			sch->opm |= mask;
+			sch->lpm |= mask;
 			if (!old_lpm)
 				device_trigger_reprobe(sch);
 			else if (sch->driver && sch->driver->verify)
 				sch->driver->verify(&sch->dev);
 			break;
 		}
-		sch->opm &= ~(0x80 >> chp);
-		sch->lpm &= ~(0x80 >> chp);
-		if (check_for_io_on_path(sch, (0x80 >> chp))) {
+		sch->opm &= ~mask;
+		sch->lpm &= ~mask;
+		if (check_for_io_on_path(sch, mask)) {
 			if (device_is_online(sch))
 				/* Path verification is done after killing. */
 				device_kill_io(sch);
diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h
index 742ef57d2c5..2ad81d11cf7 100644
--- a/drivers/s390/cio/chsc.h
+++ b/drivers/s390/cio/chsc.h
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 #include <linux/device.h>
 #include <asm/chpid.h>
+#include "schid.h"
 
 #define CHSC_SDA_OC_MSS   0x2
 
@@ -35,7 +36,6 @@ struct channel_path_desc {
 
 struct channel_path;
 
-extern int css_get_ssd_info(struct subchannel *);
 extern void chsc_process_crw(void);
 
 struct css_general_char {
@@ -69,6 +69,14 @@ struct css_chsc_char {
 extern struct css_general_char css_general_characteristics;
 extern struct css_chsc_char css_chsc_characteristics;
 
+struct chsc_ssd_info {
+	u8 path_mask;
+	u8 fla_valid_mask;
+	struct chp_id chpid[8];
+	u16 fla[8];
+};
+extern int chsc_get_ssd_info(struct subchannel_id schid,
+			     struct chsc_ssd_info *ssd);
 extern int chsc_determine_css_characteristics(void);
 extern int css_characteristics_avail;
 
diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
index e62ab5c5286..7446c39951a 100644
--- a/drivers/s390/cio/cio.h
+++ b/drivers/s390/cio/cio.h
@@ -1,19 +1,11 @@
 #ifndef S390_CIO_H
 #define S390_CIO_H
 
-#include "schid.h"
 #include <linux/mutex.h>
 #include <linux/device.h>
-
-/*
- * where we put the ssd info
- */
-struct ssd_info {
-	__u8  valid:1;
-	__u8  type:7;		/* subchannel type */
-	__u8  chpid[8];		/* chpids */
-	__u16 fla[8];		/* full link addresses */
-} __attribute__ ((packed));
+#include <asm/chpid.h>
+#include "chsc.h"
+#include "schid.h"
 
 /*
  * path management control word
@@ -109,7 +101,7 @@ struct subchannel {
 	struct schib schib;	/* subchannel information block */
 	struct orb orb;		/* operation request block */
 	struct ccw1 sense_ccw;	/* static ccw for sense command */
-	struct ssd_info ssd_info;	/* subchannel description */
+	struct chsc_ssd_info ssd_info;	/* subchannel description */
 	struct device dev;	/* entry in device tree */
 	struct css_driver *driver;
 } __attribute__ ((aligned(8)));
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index fcc641e578f..27c6d9e55b2 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -21,6 +21,7 @@
 #include "chsc.h"
 #include "device.h"
 #include "idset.h"
+#include "chp.h"
 
 int css_init_done = 0;
 static int need_reprobe = 0;
@@ -125,8 +126,52 @@ void css_sch_device_unregister(struct subchannel *sch)
 	mutex_unlock(&sch->reg_mutex);
 }
 
-static int
-css_register_subchannel(struct subchannel *sch)
+static void ssd_from_pmcw(struct chsc_ssd_info *ssd, struct pmcw *pmcw)
+{
+	int i;
+	int mask;
+
+	memset(ssd, 0, sizeof(struct chsc_ssd_info));
+	ssd->path_mask = pmcw->pim;
+	for (i = 0; i < 8; i++) {
+		mask = 0x80 >> i;
+		if (pmcw->pim & mask) {
+			chp_id_init(&ssd->chpid[i]);
+			ssd->chpid[i].id = pmcw->chpid[i];
+		}
+	}
+}
+
+static void ssd_register_chpids(struct chsc_ssd_info *ssd)
+{
+	int i;
+	int mask;
+
+	for (i = 0; i < 8; i++) {
+		mask = 0x80 >> i;
+		if (ssd->path_mask & mask)
+			if (!chp_is_registered(ssd->chpid[i]))
+				chp_new(ssd->chpid[i]);
+	}
+}
+
+void css_update_ssd_info(struct subchannel *sch)
+{
+	int ret;
+
+	if (cio_is_console(sch->schid)) {
+		/* Console is initialized too early for functions requiring
+		 * memory allocation. */
+		ssd_from_pmcw(&sch->ssd_info, &sch->schib.pmcw);
+	} else {
+		ret = chsc_get_ssd_info(sch->schid, &sch->ssd_info);
+		if (ret)
+			ssd_from_pmcw(&sch->ssd_info, &sch->schib.pmcw);
+		ssd_register_chpids(&sch->ssd_info);
+	}
+}
+
+static int css_register_subchannel(struct subchannel *sch)
 {
 	int ret;
 
@@ -135,9 +180,7 @@ css_register_subchannel(struct subchannel *sch)
 	sch->dev.bus = &css_bus_type;
 	sch->dev.release = &css_subchannel_release;
 	sch->dev.groups = subch_attr_groups;
-
-	css_get_ssd_info(sch);
-
+	css_update_ssd_info(sch);
 	/* make it known to the system */
 	ret = css_sch_device_register(sch);
 	if (ret) {
diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h
index 4b3133a7bae..71fcfdc4280 100644
--- a/drivers/s390/cio/css.h
+++ b/drivers/s390/cio/css.h
@@ -148,6 +148,7 @@ extern int css_init_done;
 extern int for_each_subchannel(int(*fn)(struct subchannel_id, void *), void *);
 extern void css_process_crw(int, int);
 extern void css_reiterate_subchannels(void);
+void css_update_ssd_info(struct subchannel *sch);
 
 #define __MAX_SUBCHANNEL 65535
 #define __MAX_SSID 3
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 34e7d77b997..7bb44e73ea9 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -216,12 +216,18 @@ static ssize_t
 chpids_show (struct device * dev, struct device_attribute *attr, char * buf)
 {
 	struct subchannel *sch = to_subchannel(dev);
-	struct ssd_info *ssd = &sch->ssd_info;
+	struct chsc_ssd_info *ssd = &sch->ssd_info;
 	ssize_t ret = 0;
 	int chp;
+	int mask;
 
-	for (chp = 0; chp < 8; chp++)
-		ret += sprintf (buf+ret, "%02x ", ssd->chpid[chp]);
+	for (chp = 0; chp < 8; chp++) {
+		mask = 0x80 >> chp;
+		if (ssd->path_mask & mask)
+			ret += sprintf(buf + ret, "%02x ", ssd->chpid[chp].id);
+		else
+			ret += sprintf(buf + ret, "00 ");
+	}
 	ret += sprintf (buf+ret, "\n");
 	return min((ssize_t)PAGE_SIZE, ret);
 }
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 898ec3b2beb..aadd2fd4a86 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -246,6 +246,7 @@ ccw_device_recog_done(struct ccw_device *cdev, int state)
 	 */
 	old_lpm = sch->lpm;
 	stsch(sch->schid, &sch->schib);
+	css_update_ssd_info(sch);
 	sch->lpm = sch->schib.pmcw.pam & sch->opm;
 	/* Check since device may again have become not operational. */
 	if (!sch->schib.pmcw.dnv)
-- 
cgit v1.2.3


From 82b7ac058f60e0c92f9237fbaf440671f437ecdf Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:36 +0200
Subject: [S390] cio: Dont call css_update_ssd_info from interrupt context.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/device.c     | 3 ++-
 drivers/s390/cio/device_fsm.c | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 7bb44e73ea9..3b56f373da3 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -743,6 +743,7 @@ static int io_subchannel_recog(struct ccw_device *, struct subchannel *);
 static void sch_attach_device(struct subchannel *sch,
 			      struct ccw_device *cdev)
 {
+	css_update_ssd_info(sch);
 	spin_lock_irq(sch->lock);
 	sch->dev.driver_data = cdev;
 	cdev->private->schid = sch->schid;
@@ -878,7 +879,7 @@ io_subchannel_register(struct work_struct *work)
 	priv = container_of(work, struct ccw_device_private, kick_work);
 	cdev = priv->cdev;
 	sch = to_subchannel(cdev->dev.parent);
-
+	css_update_ssd_info(sch);
 	/*
 	 * io_subchannel_register() will also be called after device
 	 * recognition has been done for a boxed device (which will already
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index aadd2fd4a86..898ec3b2beb 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -246,7 +246,6 @@ ccw_device_recog_done(struct ccw_device *cdev, int state)
 	 */
 	old_lpm = sch->lpm;
 	stsch(sch->schid, &sch->schib);
-	css_update_ssd_info(sch);
 	sch->lpm = sch->schib.pmcw.pam & sch->opm;
 	/* Check since device may again have become not operational. */
 	if (!sch->schib.pmcw.dnv)
-- 
cgit v1.2.3


From d76123eb357a4baa653714183df286c1bb99f707 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:37 +0200
Subject: [S390] cio: ccwgroup register vs. unregister.

Introduce a mutex for struct ccwgroup to prevent simuntaneous
register/unregister on the same ccwgroup device.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/ccwgroup.c | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

(limited to 'drivers')

diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c
index 5aeb68e732b..e5ccda63e88 100644
--- a/drivers/s390/cio/ccwgroup.c
+++ b/drivers/s390/cio/ccwgroup.c
@@ -75,8 +75,10 @@ static void ccwgroup_ungroup_callback(struct device *dev)
 {
 	struct ccwgroup_device *gdev = to_ccwgroupdev(dev);
 
+	mutex_lock(&gdev->reg_mutex);
 	__ccwgroup_remove_symlinks(gdev);
 	device_unregister(dev);
+	mutex_unlock(&gdev->reg_mutex);
 }
 
 static ssize_t
@@ -173,7 +175,8 @@ ccwgroup_create(struct device *root,
 		return -ENOMEM;
 
 	atomic_set(&gdev->onoff, 0);
-
+	mutex_init(&gdev->reg_mutex);
+	mutex_lock(&gdev->reg_mutex);
 	for (i = 0; i < argc; i++) {
 		gdev->cdev[i] = get_ccwdev_by_busid(cdrv, argv[i]);
 
@@ -183,12 +186,12 @@ ccwgroup_create(struct device *root,
 		    || gdev->cdev[i]->id.driver_info !=
 		    gdev->cdev[0]->id.driver_info) {
 			rc = -EINVAL;
-			goto free_dev;
+			goto error;
 		}
 		/* Don't allow a device to belong to more than one group. */
 		if (gdev->cdev[i]->dev.driver_data) {
 			rc = -EINVAL;
-			goto free_dev;
+			goto error;
 		}
 		gdev->cdev[i]->dev.driver_data = gdev;
 	}
@@ -203,9 +206,8 @@ ccwgroup_create(struct device *root,
 			gdev->cdev[0]->dev.bus_id);
 
 	rc = device_register(&gdev->dev);
-	
 	if (rc)
-		goto free_dev;
+		goto error;
 	get_device(&gdev->dev);
 	rc = device_create_file(&gdev->dev, &dev_attr_ungroup);
 
@@ -216,27 +218,21 @@ ccwgroup_create(struct device *root,
 
 	rc = __ccwgroup_create_symlinks(gdev);
 	if (!rc) {
+		mutex_unlock(&gdev->reg_mutex);
 		put_device(&gdev->dev);
 		return 0;
 	}
 	device_remove_file(&gdev->dev, &dev_attr_ungroup);
 	device_unregister(&gdev->dev);
 error:
-	for (i = 0; i < argc; i++)
-		if (gdev->cdev[i]) {
-			put_device(&gdev->cdev[i]->dev);
-			gdev->cdev[i]->dev.driver_data = NULL;
-		}
-	put_device(&gdev->dev);
-	return rc;
-free_dev:
 	for (i = 0; i < argc; i++)
 		if (gdev->cdev[i]) {
 			if (gdev->cdev[i]->dev.driver_data == gdev)
 				gdev->cdev[i]->dev.driver_data = NULL;
 			put_device(&gdev->cdev[i]->dev);
 		}
-	kfree(gdev);
+	mutex_unlock(&gdev->reg_mutex);
+	put_device(&gdev->dev);
 	return rc;
 }
 
@@ -422,8 +418,12 @@ ccwgroup_driver_unregister (struct ccwgroup_driver *cdriver)
 	get_driver(&cdriver->driver);
 	while ((dev = driver_find_device(&cdriver->driver, NULL, NULL,
 					 __ccwgroup_match_all))) {
-		__ccwgroup_remove_symlinks(to_ccwgroupdev(dev));
+		struct ccwgroup_device *gdev = to_ccwgroupdev(dev);
+
+		mutex_lock(&gdev->reg_mutex);
+		__ccwgroup_remove_symlinks(gdev);
 		device_unregister(dev);
+		mutex_unlock(&gdev->reg_mutex);
 		put_device(dev);
 	}
 	put_driver(&cdriver->driver);
@@ -444,8 +444,10 @@ __ccwgroup_get_gdev_by_cdev(struct ccw_device *cdev)
 	if (cdev->dev.driver_data) {
 		gdev = (struct ccwgroup_device *)cdev->dev.driver_data;
 		if (get_device(&gdev->dev)) {
+			mutex_lock(&gdev->reg_mutex);
 			if (device_is_registered(&gdev->dev))
 				return gdev;
+			mutex_unlock(&gdev->reg_mutex);
 			put_device(&gdev->dev);
 		}
 		return NULL;
@@ -465,6 +467,7 @@ ccwgroup_remove_ccwdev(struct ccw_device *cdev)
 	if (gdev) {
 		__ccwgroup_remove_symlinks(gdev);
 		device_unregister(&gdev->dev);
+		mutex_unlock(&gdev->reg_mutex);
 		put_device(&gdev->dev);
 	}
 }
-- 
cgit v1.2.3


From 8c4941c53b14e5a08ed2f270e9f087b410a9abcc Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:38 +0200
Subject: [S390] cio: cm_enable memory leak.

We allocage two pages when channel path measurements are enabled
via cm_enable. We must not forget to free them again when
channel path measurements are disabled again.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/chsc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index 0841e16b6a8..ea92ac4d657 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -818,7 +818,7 @@ chsc_secm(struct channel_subsystem *css, int enable)
 		} else
 			chsc_remove_cmg_attr(css);
 	}
-	if (enable && !css->cm_enabled) {
+	if (!css->cm_enabled) {
 		free_page((unsigned long)css->cub_addr1);
 		free_page((unsigned long)css->cub_addr2);
 	}
-- 
cgit v1.2.3


From ef99516c9646802c3d38c3eb83de302e05b3c1b5 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:39 +0200
Subject: [S390] cio: Unregister ccw devices directly.

We used to unregister ccw devices not directly from the I/O
subchannel remove function in order to avoid lifelocks on the
css bus semaphore. This semaphore is gone, and there is no reason
to not unregister the ccw device directly (it is even better since
it is more in keeping with the goal of immediate disconnect).

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/device.c | 56 ++++++++++-------------------------------------
 1 file changed, 12 insertions(+), 44 deletions(-)

(limited to 'drivers')

diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 3b56f373da3..03355902c58 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -290,16 +290,10 @@ int ccw_device_is_orphan(struct ccw_device *cdev)
 	return sch_is_pseudo_sch(to_subchannel(cdev->dev.parent));
 }
 
-static void ccw_device_unregister(struct work_struct *work)
+static void ccw_device_unregister(struct ccw_device *cdev)
 {
-	struct ccw_device_private *priv;
-	struct ccw_device *cdev;
-
-	priv = container_of(work, struct ccw_device_private, kick_work);
-	cdev = priv->cdev;
 	if (test_and_clear_bit(1, &cdev->private->registered))
-		device_unregister(&cdev->dev);
-	put_device(&cdev->dev);
+		device_del(&cdev->dev);
 }
 
 static void
@@ -316,11 +310,8 @@ ccw_device_remove_disconnected(struct ccw_device *cdev)
 		spin_lock_irqsave(cdev->ccwlock, flags);
 		cdev->private->state = DEV_STATE_NOT_OPER;
 		spin_unlock_irqrestore(cdev->ccwlock, flags);
-		if (get_device(&cdev->dev)) {
-			PREPARE_WORK(&cdev->private->kick_work,
-				     ccw_device_unregister);
-			queue_work(ccw_device_work, &cdev->private->kick_work);
-		}
+		ccw_device_unregister(cdev);
+		put_device(&cdev->dev);
 		return ;
 	}
 	sch = to_subchannel(cdev->dev.parent);
@@ -555,17 +546,10 @@ static struct attribute_group ccwdev_attr_group = {
 	.attrs = ccwdev_attrs,
 };
 
-static int
-device_add_files (struct device *dev)
-{
-	return sysfs_create_group(&dev->kobj, &ccwdev_attr_group);
-}
-
-static void
-device_remove_files(struct device *dev)
-{
-	sysfs_remove_group(&dev->kobj, &ccwdev_attr_group);
-}
+struct attribute_group *ccwdev_attr_groups[] = {
+	&ccwdev_attr_group,
+	NULL,
+};
 
 /* this is a simple abstraction for device_register that sets the
  * correct bus type and adds the bus specific files */
@@ -580,10 +564,6 @@ static int ccw_device_register(struct ccw_device *cdev)
 		return ret;
 
 	set_bit(1, &cdev->private->registered);
-	if ((ret = device_add_files(dev))) {
-		if (test_and_clear_bit(1, &cdev->private->registered))
-			device_del(dev);
-	}
 	return ret;
 }
 
@@ -655,10 +635,6 @@ ccw_device_add_changed(struct work_struct *work)
 		return;
 	}
 	set_bit(1, &cdev->private->registered);
-	if (device_add_files(&cdev->dev)) {
-		if (test_and_clear_bit(1, &cdev->private->registered))
-			device_unregister(&cdev->dev);
-	}
 }
 
 void ccw_device_do_unreg_rereg(struct work_struct *work)
@@ -671,9 +647,7 @@ void ccw_device_do_unreg_rereg(struct work_struct *work)
 	cdev = priv->cdev;
 	sch = to_subchannel(cdev->dev.parent);
 
-	device_remove_files(&cdev->dev);
-	if (test_and_clear_bit(1, &cdev->private->registered))
-		device_del(&cdev->dev);
+	ccw_device_unregister(cdev);
 	PREPARE_WORK(&cdev->private->kick_work,
 		     ccw_device_add_changed);
 	queue_work(ccw_device_work, &cdev->private->kick_work);
@@ -712,6 +686,7 @@ static int io_subchannel_initialize_dev(struct subchannel *sch,
 	cdev->dev.parent = &sch->dev;
 	cdev->dev.release = ccw_device_release;
 	INIT_LIST_HEAD(&cdev->private->kick_work.entry);
+	cdev->dev.groups = ccwdev_attr_groups;
 	/* Do first half of device_register. */
 	device_initialize(&cdev->dev);
 	if (!get_device(&sch->dev)) {
@@ -1141,15 +1116,8 @@ io_subchannel_remove (struct subchannel *sch)
 	sch->dev.driver_data = NULL;
 	cdev->private->state = DEV_STATE_NOT_OPER;
 	spin_unlock_irqrestore(cdev->ccwlock, flags);
-	/*
-	 * Put unregistration on workqueue to avoid livelocks on the css bus
-	 * semaphore.
-	 */
-	if (get_device(&cdev->dev)) {
-		PREPARE_WORK(&cdev->private->kick_work,
-			     ccw_device_unregister);
-		queue_work(ccw_device_work, &cdev->private->kick_work);
-	}
+	ccw_device_unregister(cdev);
+	put_device(&cdev->dev);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 60691d3c2c0fe9ecc264741ff41f283fef579b8a Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:45 +0200
Subject: [S390] Get rid of console setup functions.

We get this:

Section mismatch: reference to .init.text:con3270_consetup from .data
		  between 'con3270' (at offset 0x45c8) and 'con3270_fn'
Section mismatch: reference to .init.text:con3215_consetup from .data
		  between 'con3215' (at offset 0x4678) and
		  'raw3215_ccw_driver'

Since there is no difference between a non present console setup
function and one that returns only 0 remove them.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 drivers/s390/char/con3215.c | 7 -------
 drivers/s390/char/con3270.c | 7 -------
 2 files changed, 14 deletions(-)

(limited to 'drivers')

diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c
index 9a328f14a64..6000bdee408 100644
--- a/drivers/s390/char/con3215.c
+++ b/drivers/s390/char/con3215.c
@@ -813,12 +813,6 @@ con3215_unblank(void)
 	spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags);
 }
 
-static int __init 
-con3215_consetup(struct console *co, char *options)
-{
-	return 0;
-}
-
 /*
  *  The console structure for the 3215 console
  */
@@ -827,7 +821,6 @@ static struct console con3215 = {
 	.write	 = con3215_write,
 	.device	 = con3215_device,
 	.unblank = con3215_unblank,
-	.setup	 = con3215_consetup,
 	.flags	 = CON_PRINTBUFFER,
 };
 
diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c
index 8e7f2d7633d..fd3479119eb 100644
--- a/drivers/s390/char/con3270.c
+++ b/drivers/s390/char/con3270.c
@@ -555,12 +555,6 @@ con3270_unblank(void)
 	spin_unlock_irqrestore(&cp->view.lock, flags);
 }
 
-static int __init 
-con3270_consetup(struct console *co, char *options)
-{
-	return 0;
-}
-
 /*
  *  The console structure for the 3270 console
  */
@@ -569,7 +563,6 @@ static struct console con3270 = {
 	.write	 = con3270_write,
 	.device	 = con3270_device,
 	.unblank = con3270_unblank,
-	.setup	 = con3270_consetup,
 	.flags	 = CON_PRINTBUFFER,
 };
 
-- 
cgit v1.2.3


From 4dfd5c4593e69e9d399dd9e01d184dc534408f7e Mon Sep 17 00:00:00 2001
From: Horst Hummel <horst.hummel@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:47 +0200
Subject: [S390] dasd: Add sysfs attribute status and generate uevents.

This patch adds a sysfs-attribute 'status' to make the DASD device-status
accessible from user-space. In addition, the DASD driver generates an
uevent(CHANGE) for the ccw-device on each device-status change.
This enables user-space applications (e.g. udev) to do related processing.

Signed-off-by: Horst Hummel <horst.hummel@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/dasd.c        |  3 +++
 drivers/s390/block/dasd_devmap.c | 41 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)

(limited to 'drivers')

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index eb5dc62f0d9..e71929db8b0 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -398,6 +398,9 @@ dasd_change_state(struct dasd_device *device)
 
 	if (device->state == device->target)
 		wake_up(&dasd_init_waitq);
+
+	/* let user-space know that the device status changed */
+	kobject_uevent(&device->cdev->dev.kobj, KOBJ_CHANGE);
 }
 
 /*
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index ed70852cc91..66958b8d7c4 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -828,6 +828,46 @@ dasd_discipline_show(struct device *dev, struct device_attribute *attr,
 
 static DEVICE_ATTR(discipline, 0444, dasd_discipline_show, NULL);
 
+static ssize_t
+dasd_device_status_show(struct device *dev, struct device_attribute *attr,
+		     char *buf)
+{
+	struct dasd_device *device;
+	ssize_t len;
+
+	device = dasd_device_from_cdev(to_ccwdev(dev));
+	if (!IS_ERR(device)) {
+		switch (device->state) {
+		case DASD_STATE_NEW:
+			len = snprintf(buf, PAGE_SIZE, "new\n");
+			break;
+		case DASD_STATE_KNOWN:
+			len = snprintf(buf, PAGE_SIZE, "detected\n");
+			break;
+		case DASD_STATE_BASIC:
+			len = snprintf(buf, PAGE_SIZE, "basic\n");
+			break;
+		case DASD_STATE_UNFMT:
+			len = snprintf(buf, PAGE_SIZE, "unformatted\n");
+			break;
+		case DASD_STATE_READY:
+			len = snprintf(buf, PAGE_SIZE, "ready\n");
+			break;
+		case DASD_STATE_ONLINE:
+			len = snprintf(buf, PAGE_SIZE, "online\n");
+			break;
+		default:
+			len = snprintf(buf, PAGE_SIZE, "no stat\n");
+			break;
+		}
+		dasd_put_device(device);
+	} else
+		len = snprintf(buf, PAGE_SIZE, "unknown\n");
+	return len;
+}
+
+static DEVICE_ATTR(status, 0444, dasd_device_status_show, NULL);
+
 static ssize_t
 dasd_alias_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
@@ -939,6 +979,7 @@ static DEVICE_ATTR(eer_enabled, 0644, dasd_eer_show, dasd_eer_store);
 static struct attribute * dasd_attrs[] = {
 	&dev_attr_readonly.attr,
 	&dev_attr_discipline.attr,
+	&dev_attr_status.attr,
 	&dev_attr_alias.attr,
 	&dev_attr_vendor.attr,
 	&dev_attr_uid.attr,
-- 
cgit v1.2.3


From 7039d3a11c4b4b59f9ef933b4b0a28304bdd07d1 Mon Sep 17 00:00:00 2001
From: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:48 +0200
Subject: [S390] dasd: Add ipldev parameter.

Specifying 'ipldev' in the dasd= kernel parameter will automatically
activate the boot device for use by the dasd driver.

Signed-off-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/dasd_devmap.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'drivers')

diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index 66958b8d7c4..6a89cefe99b 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -19,6 +19,7 @@
 
 #include <asm/debug.h>
 #include <asm/uaccess.h>
+#include <asm/ipl.h>
 
 /* This is ugly... */
 #define PRINTK_HEADER "dasd_devmap:"
@@ -133,6 +134,8 @@ dasd_call_setup(char *str)
 __setup ("dasd=", dasd_call_setup);
 #endif	/* #ifndef MODULE */
 
+#define	DASD_IPLDEV	"ipldev"
+
 /*
  * Read a device busid/devno from a string.
  */
@@ -141,6 +144,20 @@ dasd_busid(char **str, int *id0, int *id1, int *devno)
 {
 	int val, old_style;
 
+	/* Interpret ipldev busid */
+	if (strncmp(DASD_IPLDEV, *str, strlen(DASD_IPLDEV)) == 0) {
+		if (ipl_info.type != IPL_TYPE_CCW) {
+			MESSAGE(KERN_ERR, "%s", "ipl device is not a ccw "
+				"device");
+			return -EINVAL;
+		}
+		*id0 = 0;
+		*id1 = ipl_info.data.ccw.dev_id.ssid;
+		*devno = ipl_info.data.ccw.dev_id.devno;
+		*str += strlen(DASD_IPLDEV);
+
+		return 0;
+	}
 	/* check for leading '0x' */
 	old_style = 0;
 	if ((*str)[0] == '0' && (*str)[1] == 'x') {
-- 
cgit v1.2.3


From 411ed3225733dbd83b4cbaaa992ef80d6ec1534e Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:49 +0200
Subject: [S390] zfcpdump support.

s390 machines provide hardware support for creating Linux dumps on SCSI
disks. For creating a dump a special purpose dump Linux is used. The first
32 MB of memory are saved by the hardware before the dump Linux is
booted. Via an SCLP interface, the saved memory can be accessed from
Linux. This patch exports memory and registers of the crashed Linux to
userspace via a debugfs file. For more information refer to
Documentation/s390/zfcpdump.txt, which is included in this patch.

Signed-off-by: Michael Holzheu <holzheu@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 drivers/s390/char/Makefile     |   3 +
 drivers/s390/char/sclp.h       |   2 +
 drivers/s390/char/sclp_sdias.c | 255 ++++++++++++++++
 drivers/s390/char/zcore.c      | 651 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 911 insertions(+)
 create mode 100644 drivers/s390/char/sclp_sdias.c
 create mode 100644 drivers/s390/char/zcore.c

(limited to 'drivers')

diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile
index 5fd581c22db..a0f6db21855 100644
--- a/drivers/s390/char/Makefile
+++ b/drivers/s390/char/Makefile
@@ -29,3 +29,6 @@ obj-$(CONFIG_S390_TAPE_34XX) += tape_34xx.o
 obj-$(CONFIG_S390_TAPE_3590) += tape_3590.o
 obj-$(CONFIG_MONREADER) += monreader.o
 obj-$(CONFIG_MONWRITER) += monwriter.o
+
+zcore_mod-objs := sclp_sdias.o zcore.o
+obj-$(CONFIG_ZFCPDUMP) += zcore_mod.o
diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h
index 7d29ab45a6e..6402e943436 100644
--- a/drivers/s390/char/sclp.h
+++ b/drivers/s390/char/sclp.h
@@ -27,6 +27,7 @@
 #define EvTyp_CntlProgIdent	0x0B
 #define EvTyp_SigQuiesce	0x1D
 #define EvTyp_VT220Msg		0x1A
+#define EvTyp_SDIAS		0x1C
 
 #define EvTyp_OpCmd_Mask	0x80000000
 #define EvTyp_Msg_Mask		0x40000000
@@ -36,6 +37,7 @@
 #define EvTyp_CtlProgIdent_Mask	0x00200000
 #define EvTyp_SigQuiesce_Mask	0x00000008
 #define EvTyp_VT220Msg_Mask	0x00000040
+#define EvTyp_SDIAS_Mask	0x00000010
 
 #define GnrlMsgFlgs_DOM		0x8000
 #define GnrlMsgFlgs_SndAlrm	0x4000
diff --git a/drivers/s390/char/sclp_sdias.c b/drivers/s390/char/sclp_sdias.c
new file mode 100644
index 00000000000..06a4d089723
--- /dev/null
+++ b/drivers/s390/char/sclp_sdias.c
@@ -0,0 +1,255 @@
+/*
+ * Sclp "store data in absolut storage"
+ *
+ * Copyright IBM Corp. 2003,2007
+ * Author(s): Michael Holzheu
+ */
+
+#include <linux/sched.h>
+#include <asm/sclp.h>
+#include <asm/debug.h>
+#include <asm/ipl.h>
+#include "sclp.h"
+#include "sclp_rw.h"
+
+#define TRACE(x...) debug_sprintf_event(sdias_dbf, 1, x)
+#define ERROR_MSG(x...) printk ( KERN_ALERT "SDIAS: " x )
+
+#define SDIAS_RETRIES 300
+#define SDIAS_SLEEP_TICKS 50
+
+#define EQ_STORE_DATA	0x0
+#define EQ_SIZE		0x1
+#define DI_FCP_DUMP	0x0
+#define ASA_SIZE_32	0x0
+#define ASA_SIZE_64	0x1
+#define EVSTATE_ALL_STORED	0x0
+#define EVSTATE_NO_DATA		0x3
+#define EVSTATE_PART_STORED	0x10
+
+static struct debug_info *sdias_dbf;
+
+static struct sclp_register sclp_sdias_register = {
+	.send_mask = EvTyp_SDIAS_Mask,
+};
+
+struct sdias_evbuf {
+	struct	evbuf_header hdr;
+	u8	event_qual;
+	u8	data_id;
+	u64	reserved2;
+	u32	event_id;
+	u16	reserved3;
+	u8	asa_size;
+	u8	event_status;
+	u32	reserved4;
+	u32	blk_cnt;
+	u64	asa;
+	u32	reserved5;
+	u32	fbn;
+	u32	reserved6;
+	u32	lbn;
+	u16	reserved7;
+	u16	dbs;
+} __attribute__((packed));
+
+struct sdias_sccb {
+	struct sccb_header  hdr;
+	struct sdias_evbuf  evbuf;
+} __attribute__((packed));
+
+static struct sdias_sccb sccb __attribute__((aligned(4096)));
+
+static int sclp_req_done;
+static wait_queue_head_t sdias_wq;
+static DEFINE_MUTEX(sdias_mutex);
+
+static void sdias_callback(struct sclp_req *request, void *data)
+{
+	struct sdias_sccb *sccb;
+
+	sccb = (struct sdias_sccb *) request->sccb;
+	sclp_req_done = 1;
+	wake_up(&sdias_wq); /* Inform caller, that request is complete */
+	TRACE("callback done\n");
+}
+
+static int sdias_sclp_send(struct sclp_req *req)
+{
+	int retries;
+	int rc;
+
+	for (retries = SDIAS_RETRIES; retries; retries--) {
+		sclp_req_done = 0;
+		TRACE("add request\n");
+		rc = sclp_add_request(req);
+		if (rc) {
+			/* not initiated, wait some time and retry */
+			set_current_state(TASK_INTERRUPTIBLE);
+			TRACE("add request failed: rc = %i\n",rc);
+			schedule_timeout(SDIAS_SLEEP_TICKS);
+			continue;
+		}
+		/* initiated, wait for completion of service call */
+		wait_event(sdias_wq, (sclp_req_done == 1));
+		if (req->status == SCLP_REQ_FAILED) {
+			TRACE("sclp request failed\n");
+			rc = -EIO;
+			continue;
+		}
+		TRACE("request done\n");
+		break;
+	}
+	return rc;
+}
+
+/*
+ * Get number of blocks (4K) available in the HSA
+ */
+int sclp_sdias_blk_count(void)
+{
+	struct sclp_req request;
+	int rc;
+
+	mutex_lock(&sdias_mutex);
+
+	memset(&sccb, 0, sizeof(sccb));
+	memset(&request, 0, sizeof(request));
+
+	sccb.hdr.length = sizeof(sccb);
+	sccb.evbuf.hdr.length = sizeof(struct sdias_evbuf);
+	sccb.evbuf.hdr.type = EvTyp_SDIAS;
+	sccb.evbuf.event_qual = EQ_SIZE;
+	sccb.evbuf.data_id = DI_FCP_DUMP;
+	sccb.evbuf.event_id = 4712;
+	sccb.evbuf.dbs = 1;
+
+	request.sccb = &sccb;
+	request.command = SCLP_CMDW_WRITE_EVENT_DATA;
+	request.status = SCLP_REQ_FILLED;
+	request.callback = sdias_callback;
+
+	rc = sdias_sclp_send(&request);
+	if (rc) {
+		ERROR_MSG("sclp_send failed for get_nr_blocks\n");
+		goto out;
+	}
+	if (sccb.hdr.response_code != 0x0020) {
+		TRACE("send failed: %x\n", sccb.hdr.response_code);
+		rc = -EIO;
+		goto out;
+	}
+
+	switch (sccb.evbuf.event_status) {
+		case 0:
+			rc = sccb.evbuf.blk_cnt;
+			break;
+		default:
+			ERROR_MSG("SCLP error: %x\n", sccb.evbuf.event_status);
+			rc = -EIO;
+			goto out;
+	}
+	TRACE("%i blocks\n", rc);
+out:
+	mutex_unlock(&sdias_mutex);
+	return rc;
+}
+
+/*
+ * Copy from HSA to absolute storage (not reentrant):
+ *
+ * @dest     : Address of buffer where data should be copied
+ * @start_blk: Start Block (beginning with 1)
+ * @nr_blks  : Number of 4K blocks to copy
+ *
+ * Return Value: 0 : Requested 'number' of blocks of data copied
+ *		 <0: ERROR - negative event status
+ */
+int sclp_sdias_copy(void *dest, int start_blk, int nr_blks)
+{
+	struct sclp_req request;
+	int rc;
+
+	mutex_lock(&sdias_mutex);
+
+	memset(&sccb, 0, sizeof(sccb));
+	memset(&request, 0, sizeof(request));
+
+	sccb.hdr.length = sizeof(sccb);
+	sccb.evbuf.hdr.length = sizeof(struct sdias_evbuf);
+	sccb.evbuf.hdr.type = EvTyp_SDIAS;
+	sccb.evbuf.hdr.flags = 0;
+	sccb.evbuf.event_qual = EQ_STORE_DATA;
+	sccb.evbuf.data_id = DI_FCP_DUMP;
+	sccb.evbuf.event_id = 4712;
+#ifdef __s390x__
+	sccb.evbuf.asa_size = ASA_SIZE_64;
+#else
+	sccb.evbuf.asa_size = ASA_SIZE_32;
+#endif
+	sccb.evbuf.event_status = 0;
+	sccb.evbuf.blk_cnt = nr_blks;
+	sccb.evbuf.asa = (unsigned long)dest;
+	sccb.evbuf.fbn = start_blk;
+	sccb.evbuf.lbn = 0;
+	sccb.evbuf.dbs = 1;
+
+	request.sccb	 = &sccb;
+	request.command  = SCLP_CMDW_WRITE_EVENT_DATA;
+	request.status	 = SCLP_REQ_FILLED;
+	request.callback = sdias_callback;
+
+	rc = sdias_sclp_send(&request);
+	if (rc) {
+		ERROR_MSG("sclp_send failed: %x\n", rc);
+		goto out;
+	}
+	if (sccb.hdr.response_code != 0x0020) {
+		TRACE("copy failed: %x\n", sccb.hdr.response_code);
+		rc = -EIO;
+		goto out;
+	}
+
+	switch (sccb.evbuf.event_status) {
+		case EVSTATE_ALL_STORED:
+			TRACE("all stored\n");
+		case EVSTATE_PART_STORED:
+			TRACE("part stored: %i\n", sccb.evbuf.blk_cnt);
+			break;
+		case EVSTATE_NO_DATA:
+			TRACE("no data\n");
+		default:
+			ERROR_MSG("Error from SCLP while copying hsa. "
+				  "Event status = %x\n",
+				sccb.evbuf.event_status);
+			rc = -EIO;
+	}
+out:
+	mutex_unlock(&sdias_mutex);
+	return rc;
+}
+
+int __init sdias_init(void)
+{
+	int rc;
+
+	if (ipl_info.type != IPL_TYPE_FCP_DUMP)
+		return 0;
+	sdias_dbf = debug_register("dump_sdias", 4, 1, 4 * sizeof(long));
+	debug_register_view(sdias_dbf, &debug_sprintf_view);
+	debug_set_level(sdias_dbf, 6);
+	rc = sclp_register(&sclp_sdias_register);
+	if (rc) {
+		ERROR_MSG("sclp register failed\n");
+		return rc;
+	}
+	init_waitqueue_head(&sdias_wq);
+	TRACE("init done\n");
+	return 0;
+}
+
+void __exit sdias_exit(void)
+{
+	debug_unregister(sdias_dbf);
+	sclp_unregister(&sclp_sdias_register);
+}
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
new file mode 100644
index 00000000000..89d439316a5
--- /dev/null
+++ b/drivers/s390/char/zcore.c
@@ -0,0 +1,651 @@
+/*
+ * zcore module to export memory content and register sets for creating system
+ * dumps on SCSI disks (zfcpdump). The "zcore/mem" debugfs file shows the same
+ * dump format as s390 standalone dumps.
+ *
+ * For more information please refer to Documentation/s390/zfcpdump.txt
+ *
+ * Copyright IBM Corp. 2003,2007
+ * Author(s): Michael Holzheu
+ */
+
+#include <linux/init.h>
+#include <linux/miscdevice.h>
+#include <linux/utsname.h>
+#include <linux/debugfs.h>
+#include <asm/ipl.h>
+#include <asm/sclp.h>
+#include <asm/setup.h>
+#include <asm/sigp.h>
+#include <asm/uaccess.h>
+#include <asm/debug.h>
+#include <asm/processor.h>
+#include <asm/irqflags.h>
+
+#define TRACE(x...) debug_sprintf_event(zcore_dbf, 1, x)
+#define MSG(x...) printk( KERN_ALERT x )
+#define ERROR_MSG(x...) printk ( KERN_ALERT "DUMP: " x )
+
+#define TO_USER		0
+#define TO_KERNEL	1
+
+enum arch_id {
+	ARCH_S390	= 0,
+	ARCH_S390X	= 1,
+};
+
+/* dump system info */
+
+struct sys_info {
+	enum arch_id	arch;
+	unsigned long	sa_base;
+	u32		sa_size;
+	int		cpu_map[NR_CPUS];
+	unsigned long	mem_size;
+	union save_area	lc_mask;
+};
+
+static struct sys_info sys_info;
+static struct debug_info *zcore_dbf;
+static int hsa_available;
+static struct dentry *zcore_dir;
+static struct dentry *zcore_file;
+
+/*
+ * Copy memory from HSA to kernel or user memory (not reentrant):
+ *
+ * @dest:  Kernel or user buffer where memory should be copied to
+ * @src:   Start address within HSA where data should be copied
+ * @count: Size of buffer, which should be copied
+ * @mode:  Either TO_KERNEL or TO_USER
+ */
+static int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode)
+{
+	int offs, blk_num;
+	static char buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
+
+	if (count == 0)
+		return 0;
+
+	/* copy first block */
+	offs = 0;
+	if ((src % PAGE_SIZE) != 0) {
+		blk_num = src / PAGE_SIZE + 2;
+		if (sclp_sdias_copy(buf, blk_num, 1)) {
+			TRACE("sclp_sdias_copy() failed\n");
+			return -EIO;
+		}
+		offs = min((PAGE_SIZE - (src % PAGE_SIZE)), count);
+		if (mode == TO_USER) {
+			if (copy_to_user((__force __user void*) dest,
+					 buf + (src % PAGE_SIZE), offs))
+				return -EFAULT;
+		} else
+			memcpy(dest, buf + (src % PAGE_SIZE), offs);
+	}
+	if (offs == count)
+		goto out;
+
+	/* copy middle */
+	for (; (offs + PAGE_SIZE) <= count; offs += PAGE_SIZE) {
+		blk_num = (src + offs) / PAGE_SIZE + 2;
+		if (sclp_sdias_copy(buf, blk_num, 1)) {
+			TRACE("sclp_sdias_copy() failed\n");
+			return -EIO;
+		}
+		if (mode == TO_USER) {
+			if (copy_to_user((__force __user void*) dest + offs,
+					 buf, PAGE_SIZE))
+				return -EFAULT;
+		} else
+			memcpy(dest + offs, buf, PAGE_SIZE);
+	}
+	if (offs == count)
+		goto out;
+
+	/* copy last block */
+	blk_num = (src + offs) / PAGE_SIZE + 2;
+	if (sclp_sdias_copy(buf, blk_num, 1)) {
+		TRACE("sclp_sdias_copy() failed\n");
+		return -EIO;
+	}
+	if (mode == TO_USER) {
+		if (copy_to_user((__force __user void*) dest + offs, buf,
+				 PAGE_SIZE))
+			return -EFAULT;
+	} else
+		memcpy(dest + offs, buf, count - offs);
+out:
+	return 0;
+}
+
+static int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count)
+{
+	return memcpy_hsa((void __force *) dest, src, count, TO_USER);
+}
+
+static int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count)
+{
+	return memcpy_hsa(dest, src, count, TO_KERNEL);
+}
+
+static int memcpy_real(void *dest, unsigned long src, size_t count)
+{
+	unsigned long flags;
+	int rc = -EFAULT;
+	register unsigned long _dest asm("2") = (unsigned long) dest;
+	register unsigned long _len1 asm("3") = (unsigned long) count;
+	register unsigned long _src  asm("4") = src;
+	register unsigned long _len2 asm("5") = (unsigned long) count;
+
+	if (count == 0)
+		return 0;
+	flags = __raw_local_irq_stnsm(0xf8); /* switch to real mode */
+	asm volatile (
+		"0:	mvcle	%1,%2,0x0\n"
+		"1:	jo	0b\n"
+		"	lhi	%0,0x0\n"
+		"2:\n"
+		EX_TABLE(1b,2b)
+		: "+d" (rc)
+		: "d" (_dest), "d" (_src), "d" (_len1), "d" (_len2)
+		: "cc", "memory");
+	__raw_local_irq_ssm(flags);
+
+	return rc;
+}
+
+static int memcpy_real_user(__user void *dest, unsigned long src, size_t count)
+{
+	static char buf[4096];
+	int offs = 0, size;
+
+	while (offs < count) {
+		size = min(sizeof(buf), count - offs);
+		if (memcpy_real(buf, src + offs, size))
+			return -EFAULT;
+		if (copy_to_user(dest + offs, buf, size))
+			return -EFAULT;
+		offs += size;
+	}
+	return 0;
+}
+
+#ifdef __s390x__
+/*
+ * Convert s390x (64 bit) cpu info to s390 (32 bit) cpu info
+ */
+static void __init s390x_to_s390_regs(union save_area *out, union save_area *in,
+				      int cpu)
+{
+	int i;
+
+	for (i = 0; i < 16; i++) {
+		out->s390.gp_regs[i] = in->s390x.gp_regs[i] & 0x00000000ffffffff;
+		out->s390.acc_regs[i] = in->s390x.acc_regs[i];
+		out->s390.ctrl_regs[i] =
+			in->s390x.ctrl_regs[i] & 0x00000000ffffffff;
+	}
+	/* locore for 31 bit has only space for fpregs 0,2,4,6 */
+	out->s390.fp_regs[0] = in->s390x.fp_regs[0];
+	out->s390.fp_regs[1] = in->s390x.fp_regs[2];
+	out->s390.fp_regs[2] = in->s390x.fp_regs[4];
+	out->s390.fp_regs[3] = in->s390x.fp_regs[6];
+	memcpy(&(out->s390.psw[0]), &(in->s390x.psw[0]), 4);
+	out->s390.psw[1] |= 0x8; /* set bit 12 */
+	memcpy(&(out->s390.psw[4]),&(in->s390x.psw[12]), 4);
+	out->s390.psw[4] |= 0x80; /* set (31bit) addressing bit */
+	out->s390.pref_reg = in->s390x.pref_reg;
+	out->s390.timer = in->s390x.timer;
+	out->s390.clk_cmp = in->s390x.clk_cmp;
+}
+
+static void __init s390x_to_s390_save_areas(void)
+{
+	int i = 1;
+	static union save_area tmp;
+
+	while (zfcpdump_save_areas[i]) {
+		s390x_to_s390_regs(&tmp, zfcpdump_save_areas[i], i);
+		memcpy(zfcpdump_save_areas[i], &tmp, sizeof(tmp));
+		i++;
+	}
+}
+
+#endif /* __s390x__ */
+
+static int __init init_cpu_info(enum arch_id arch)
+{
+	union save_area *sa;
+
+	/* get info for boot cpu from lowcore, stored in the HSA */
+
+	sa = kmalloc(sizeof(*sa), GFP_KERNEL);
+	if (!sa) {
+		ERROR_MSG("kmalloc failed: %s: %i\n",__FUNCTION__, __LINE__);
+		return -ENOMEM;
+	}
+	if (memcpy_hsa_kernel(sa, sys_info.sa_base, sys_info.sa_size) < 0) {
+		ERROR_MSG("could not copy from HSA\n");
+		kfree(sa);
+		return -EIO;
+	}
+	zfcpdump_save_areas[0] = sa;
+
+#ifdef __s390x__
+	/* convert s390x regs to s390, if we are dumping an s390 Linux */
+
+	if (arch == ARCH_S390)
+		s390x_to_s390_save_areas();
+#endif
+
+	return 0;
+}
+
+static DEFINE_MUTEX(zcore_mutex);
+
+#define DUMP_VERSION	0x3
+#define DUMP_MAGIC	0xa8190173618f23fdULL
+#define DUMP_ARCH_S390X	2
+#define DUMP_ARCH_S390	1
+#define HEADER_SIZE	4096
+
+/* dump header dumped according to s390 crash dump format */
+
+struct zcore_header {
+	u64 magic;
+	u32 version;
+	u32 header_size;
+	u32 dump_level;
+	u32 page_size;
+	u64 mem_size;
+	u64 mem_start;
+	u64 mem_end;
+	u32 num_pages;
+	u32 pad1;
+	u64 tod;
+	cpuid_t cpu_id;
+	u32 arch_id;
+	u32 build_arch;
+	char pad2[4016];
+} __attribute__((packed,__aligned__(16)));
+
+static struct zcore_header zcore_header = {
+	.magic		= DUMP_MAGIC,
+	.version	= DUMP_VERSION,
+	.header_size	= 4096,
+	.dump_level	= 0,
+	.page_size	= PAGE_SIZE,
+	.mem_start	= 0,
+#ifdef __s390x__
+	.build_arch	= DUMP_ARCH_S390X,
+#else
+	.build_arch	= DUMP_ARCH_S390,
+#endif
+};
+
+/*
+ * Copy lowcore info to buffer. Use map in order to copy only register parts.
+ *
+ * @buf:    User buffer
+ * @sa:     Pointer to save area
+ * @sa_off: Offset in save area to copy
+ * @len:    Number of bytes to copy
+ */
+static int copy_lc(void __user *buf, void *sa, int sa_off, int len)
+{
+	int i;
+	char *lc_mask = (char*)&sys_info.lc_mask;
+
+	for (i = 0; i < len; i++) {
+		if (!lc_mask[i + sa_off])
+			continue;
+		if (copy_to_user(buf + i, sa + sa_off + i, 1))
+			return -EFAULT;
+	}
+	return 0;
+}
+
+/*
+ * Copy lowcores info to memory, if necessary
+ *
+ * @buf:   User buffer
+ * @addr:  Start address of buffer in dump memory
+ * @count: Size of buffer
+ */
+static int zcore_add_lc(char __user *buf, unsigned long start, size_t count)
+{
+	unsigned long end;
+	int i = 0;
+
+	if (count == 0)
+		return 0;
+
+	end = start + count;
+	while (zfcpdump_save_areas[i]) {
+		unsigned long cp_start, cp_end; /* copy range */
+		unsigned long sa_start, sa_end; /* save area range */
+		unsigned long prefix;
+		unsigned long sa_off, len, buf_off;
+
+		if (sys_info.arch == ARCH_S390)
+			prefix = zfcpdump_save_areas[i]->s390.pref_reg;
+		else
+			prefix = zfcpdump_save_areas[i]->s390x.pref_reg;
+
+		sa_start = prefix + sys_info.sa_base;
+		sa_end = prefix + sys_info.sa_base + sys_info.sa_size;
+
+		if ((end < sa_start) || (start > sa_end))
+			goto next;
+		cp_start = max(start, sa_start);
+		cp_end = min(end, sa_end);
+
+		buf_off = cp_start - start;
+		sa_off = cp_start - sa_start;
+		len = cp_end - cp_start;
+
+		TRACE("copy_lc for: %lx\n", start);
+		if (copy_lc(buf + buf_off, zfcpdump_save_areas[i], sa_off, len))
+			return -EFAULT;
+next:
+		i++;
+	}
+	return 0;
+}
+
+/*
+ * Read routine for zcore character device
+ * First 4K are dump header
+ * Next 32MB are HSA Memory
+ * Rest is read from absolute Memory
+ */
+static ssize_t zcore_read(struct file *file, char __user *buf, size_t count,
+			  loff_t *ppos)
+{
+	unsigned long mem_start; /* Start address in memory */
+	size_t mem_offs;	 /* Offset in dump memory */
+	size_t hdr_count;	 /* Size of header part of output buffer */
+	size_t size;
+	int rc;
+
+	mutex_lock(&zcore_mutex);
+
+	if (*ppos > (sys_info.mem_size + HEADER_SIZE)) {
+		rc = -EINVAL;
+		goto fail;
+	}
+
+	count = min(count, (size_t) (sys_info.mem_size + HEADER_SIZE - *ppos));
+
+	/* Copy dump header */
+	if (*ppos < HEADER_SIZE) {
+		size = min(count, (size_t) (HEADER_SIZE - *ppos));
+		if (copy_to_user(buf, &zcore_header + *ppos, size)) {
+			rc = -EFAULT;
+			goto fail;
+		}
+		hdr_count = size;
+		mem_start = 0;
+	} else {
+		hdr_count = 0;
+		mem_start = *ppos - HEADER_SIZE;
+	}
+
+	mem_offs = 0;
+
+	/* Copy from HSA data */
+	if (*ppos < (ZFCPDUMP_HSA_SIZE + HEADER_SIZE)) {
+		size = min((count - hdr_count), (size_t) (ZFCPDUMP_HSA_SIZE
+			   - mem_start));
+		rc = memcpy_hsa_user(buf + hdr_count, mem_start, size);
+		if (rc)
+			goto fail;
+
+		mem_offs += size;
+	}
+
+	/* Copy from real mem */
+	size = count - mem_offs - hdr_count;
+	rc = memcpy_real_user(buf + hdr_count + mem_offs, mem_start + mem_offs,
+			      size);
+	if (rc)
+		goto fail;
+
+	/*
+	 * Since s390 dump analysis tools like lcrash or crash
+	 * expect register sets in the prefix pages of the cpus,
+	 * we copy them into the read buffer, if necessary.
+	 * buf + hdr_count: Start of memory part of output buffer
+	 * mem_start: Start memory address to copy from
+	 * count - hdr_count: Size of memory area to copy
+	 */
+	if (zcore_add_lc(buf + hdr_count, mem_start, count - hdr_count)) {
+		rc = -EFAULT;
+		goto fail;
+	}
+	*ppos += count;
+fail:
+	mutex_unlock(&zcore_mutex);
+	return (rc < 0) ? rc : count;
+}
+
+static int zcore_open(struct inode *inode, struct file *filp)
+{
+	if (!hsa_available)
+		return -ENODATA;
+	else
+		return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
+}
+
+static int zcore_release(struct inode *inode, struct file *filep)
+{
+	diag308(DIAG308_REL_HSA, NULL);
+	hsa_available = 0;
+	return 0;
+}
+
+static loff_t zcore_lseek(struct file *file, loff_t offset, int orig)
+{
+	loff_t rc;
+
+	mutex_lock(&zcore_mutex);
+	switch (orig) {
+	case 0:
+		file->f_pos = offset;
+		rc = file->f_pos;
+		break;
+	case 1:
+		file->f_pos += offset;
+		rc = file->f_pos;
+		break;
+	default:
+		rc = -EINVAL;
+	}
+	mutex_unlock(&zcore_mutex);
+	return rc;
+}
+
+static struct file_operations zcore_fops = {
+	.owner		= THIS_MODULE,
+	.llseek		= zcore_lseek,
+	.read		= zcore_read,
+	.open		= zcore_open,
+	.release	= zcore_release,
+};
+
+
+static void __init set_s390_lc_mask(union save_area *map)
+{
+	memset(&map->s390.ext_save, 0xff, sizeof(map->s390.ext_save));
+	memset(&map->s390.timer, 0xff, sizeof(map->s390.timer));
+	memset(&map->s390.clk_cmp, 0xff, sizeof(map->s390.clk_cmp));
+	memset(&map->s390.psw, 0xff, sizeof(map->s390.psw));
+	memset(&map->s390.pref_reg, 0xff, sizeof(map->s390.pref_reg));
+	memset(&map->s390.acc_regs, 0xff, sizeof(map->s390.acc_regs));
+	memset(&map->s390.fp_regs, 0xff, sizeof(map->s390.fp_regs));
+	memset(&map->s390.gp_regs, 0xff, sizeof(map->s390.gp_regs));
+	memset(&map->s390.ctrl_regs, 0xff, sizeof(map->s390.ctrl_regs));
+}
+
+static void __init set_s390x_lc_mask(union save_area *map)
+{
+	memset(&map->s390x.fp_regs, 0xff, sizeof(map->s390x.fp_regs));
+	memset(&map->s390x.gp_regs, 0xff, sizeof(map->s390x.gp_regs));
+	memset(&map->s390x.psw, 0xff, sizeof(map->s390x.psw));
+	memset(&map->s390x.pref_reg, 0xff, sizeof(map->s390x.pref_reg));
+	memset(&map->s390x.fp_ctrl_reg, 0xff, sizeof(map->s390x.fp_ctrl_reg));
+	memset(&map->s390x.tod_reg, 0xff, sizeof(map->s390x.tod_reg));
+	memset(&map->s390x.timer, 0xff, sizeof(map->s390x.timer));
+	memset(&map->s390x.clk_cmp, 0xff, sizeof(map->s390x.clk_cmp));
+	memset(&map->s390x.acc_regs, 0xff, sizeof(map->s390x.acc_regs));
+	memset(&map->s390x.ctrl_regs, 0xff, sizeof(map->s390x.ctrl_regs));
+}
+
+/*
+ * Initialize dump globals for a given architecture
+ */
+static int __init sys_info_init(enum arch_id arch)
+{
+	switch (arch) {
+	case ARCH_S390X:
+		MSG("DETECTED 'S390X (64 bit) OS'\n");
+		sys_info.sa_base = SAVE_AREA_BASE_S390X;
+		sys_info.sa_size = sizeof(struct save_area_s390x);
+		set_s390x_lc_mask(&sys_info.lc_mask);
+		break;
+	case ARCH_S390:
+		MSG("DETECTED 'S390 (32 bit) OS'\n");
+		sys_info.sa_base = SAVE_AREA_BASE_S390;
+		sys_info.sa_size = sizeof(struct save_area_s390);
+		set_s390_lc_mask(&sys_info.lc_mask);
+		break;
+	default:
+		ERROR_MSG("unknown architecture 0x%x.\n",arch);
+		return -EINVAL;
+	}
+	sys_info.arch = arch;
+	if (init_cpu_info(arch)) {
+		ERROR_MSG("get cpu info failed\n");
+		return -ENOMEM;
+	}
+	sys_info.mem_size = real_memory_size;
+
+	return 0;
+}
+
+static int __init check_sdias(void)
+{
+	int rc, act_hsa_size;
+
+	rc = sclp_sdias_blk_count();
+	if (rc < 0) {
+		ERROR_MSG("Could not determine HSA size\n");
+		return rc;
+	}
+	act_hsa_size = (rc - 1) * PAGE_SIZE;
+	if (act_hsa_size < ZFCPDUMP_HSA_SIZE) {
+		ERROR_MSG("HSA size too small: %i\n", act_hsa_size);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void __init zcore_header_init(int arch, struct zcore_header *hdr)
+{
+	if (arch == ARCH_S390X)
+		hdr->arch_id = DUMP_ARCH_S390X;
+	else
+		hdr->arch_id = DUMP_ARCH_S390;
+	hdr->mem_size = sys_info.mem_size;
+	hdr->mem_end = sys_info.mem_size;
+	hdr->num_pages = sys_info.mem_size / PAGE_SIZE;
+	hdr->tod = get_clock();
+	get_cpu_id(&hdr->cpu_id);
+}
+
+extern int sdias_init(void);
+
+static int __init zcore_init(void)
+{
+	unsigned char arch;
+	int rc;
+
+	if (ipl_info.type != IPL_TYPE_FCP_DUMP)
+		return -ENODATA;
+
+	zcore_dbf = debug_register("zcore", 4, 1, 4 * sizeof(long));
+	debug_register_view(zcore_dbf, &debug_sprintf_view);
+	debug_set_level(zcore_dbf, 6);
+
+	TRACE("devno:  %x\n", ipl_info.data.fcp.dev_id.devno);
+	TRACE("wwpn:   %llx\n", (unsigned long long) ipl_info.data.fcp.wwpn);
+	TRACE("lun:    %llx\n", (unsigned long long) ipl_info.data.fcp.lun);
+
+	rc = sdias_init();
+	if (rc)
+		goto fail;
+
+	rc = check_sdias();
+	if (rc) {
+		ERROR_MSG("Dump initialization failed\n");
+		goto fail;
+	}
+
+	rc = memcpy_hsa_kernel(&arch, __LC_AR_MODE_ID, 1);
+	if (rc) {
+		ERROR_MSG("sdial memcpy for arch id failed\n");
+		goto fail;
+	}
+
+#ifndef __s390x__
+	if (arch == ARCH_S390X) {
+		ERROR_MSG("32 bit dumper can't dump 64 bit system!\n");
+		rc = -EINVAL;
+		goto fail;
+	}
+#endif
+
+	rc = sys_info_init(arch);
+	if (rc) {
+		ERROR_MSG("arch init failed\n");
+		goto fail;
+	}
+
+	zcore_header_init(arch, &zcore_header);
+
+	zcore_dir = debugfs_create_dir("zcore" , NULL);
+	if (!zcore_dir) {
+		rc = -ENOMEM;
+		goto fail;
+	}
+	zcore_file = debugfs_create_file("mem", S_IRUSR, zcore_dir, NULL,
+					 &zcore_fops);
+	if (!zcore_file) {
+		debugfs_remove(zcore_dir);
+		rc = -ENOMEM;
+		goto fail;
+	}
+	hsa_available = 1;
+	return 0;
+
+fail:
+	diag308(DIAG308_REL_HSA, NULL);
+	return rc;
+}
+
+extern void sdias_exit(void);
+
+static void __exit zcore_exit(void)
+{
+	debug_unregister(zcore_dbf);
+	sdias_exit();
+	diag308(DIAG308_REL_HSA, NULL);
+}
+
+MODULE_AUTHOR("Copyright IBM Corp. 2003,2007");
+MODULE_DESCRIPTION("zcore module for zfcpdump support");
+MODULE_LICENSE("GPL");
+
+subsys_initcall(zcore_init);
+module_exit(zcore_exit);
-- 
cgit v1.2.3


From b7127dfeed3252a76aa31f016aac5fba53d99711 Mon Sep 17 00:00:00 2001
From: "Ahmed S. Darwish" <darwish.07@gmail.com>
Date: Fri, 27 Apr 2007 16:01:50 +0200
Subject: [S390] ctc: kmalloc->kzalloc/casting cleanups.

A patch for the CTC / ESCON network driver.  Switch from kmalloc to kzalloc
when appropriate, remove some unnecessary kmalloc casts too.  Since I have no
s390 machine, I didn't compile it but I examined it carefully.

Signed-off-by: "Ahmed S. Darwish" <darwish.07@gmail.com>
Cc: Frank Pavlic <fpavlic@de.ibm.com>
Cc: Ursula Braun <braunu@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/net/ctcmain.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

(limited to 'drivers')

diff --git a/drivers/s390/net/ctcmain.c b/drivers/s390/net/ctcmain.c
index 0d6d5fcc128..570a960bfb5 100644
--- a/drivers/s390/net/ctcmain.c
+++ b/drivers/s390/net/ctcmain.c
@@ -1638,21 +1638,19 @@ add_channel(struct ccw_device *cdev, enum channel_types type)
 	struct channel *ch;
 
 	DBF_TEXT(trace, 2, __FUNCTION__);
-	if ((ch =
-	     (struct channel *) kmalloc(sizeof (struct channel),
-					GFP_KERNEL)) == NULL) {
+	ch = kzalloc(sizeof(struct channel), GFP_KERNEL);
+	if (!ch) {
 		ctc_pr_warn("ctc: Out of memory in add_channel\n");
 		return -1;
 	}
-	memset(ch, 0, sizeof (struct channel));
-	if ((ch->ccw = kmalloc(8*sizeof(struct ccw1),
-					       GFP_KERNEL | GFP_DMA)) == NULL) {
+	/* assure all flags and counters are reset */
+	ch->ccw = kzalloc(8 * sizeof(struct ccw1), GFP_KERNEL | GFP_DMA);
+	if (!ch->ccw) {
 		kfree(ch);
 		ctc_pr_warn("ctc: Out of memory in add_channel\n");
 		return -1;
 	}
 
-	memset(ch->ccw, 0, 8*sizeof(struct ccw1));	// assure all flags and counters are reset
 
 	/**
 	 * "static" ccws are used in the following way:
@@ -1692,15 +1690,14 @@ add_channel(struct ccw_device *cdev, enum channel_types type)
 		return -1;
 	}
 	fsm_newstate(ch->fsm, CH_STATE_IDLE);
-	if ((ch->irb = kmalloc(sizeof (struct irb),
-					      GFP_KERNEL)) == NULL) {
+	ch->irb = kzalloc(sizeof(struct irb), GFP_KERNEL);
+	if (!ch->irb) {
 		ctc_pr_warn("ctc: Out of memory in add_channel\n");
 		kfree_fsm(ch->fsm);
 		kfree(ch->ccw);
 		kfree(ch);
 		return -1;
 	}
-	memset(ch->irb, 0, sizeof (struct irb));
 	while (*c && less_than((*c)->id, ch->id))
 		c = &(*c)->next;
 	if (*c && (!strncmp((*c)->id, ch->id, CTC_ID_SIZE))) {
@@ -2745,14 +2742,13 @@ ctc_probe_device(struct ccwgroup_device *cgdev)
 	if (!get_device(&cgdev->dev))
 		return -ENODEV;
 
-	priv = kmalloc(sizeof (struct ctc_priv), GFP_KERNEL);
+	priv = kzalloc(sizeof(struct ctc_priv), GFP_KERNEL);
 	if (!priv) {
 		ctc_pr_err("%s: Out of memory\n", __func__);
 		put_device(&cgdev->dev);
 		return -ENOMEM;
 	}
 
-	memset(priv, 0, sizeof (struct ctc_priv));
 	rc = ctc_add_files(&cgdev->dev);
 	if (rc) {
 		kfree(priv);
@@ -2793,10 +2789,9 @@ ctc_init_netdevice(struct net_device * dev, int alloc_device,
 	DBF_TEXT(setup, 3, __FUNCTION__);
 
 	if (alloc_device) {
-		dev = kmalloc(sizeof (struct net_device), GFP_KERNEL);
+		dev = kzalloc(sizeof(struct net_device), GFP_KERNEL);
 		if (!dev)
 			return NULL;
-		memset(dev, 0, sizeof (struct net_device));
 	}
 
 	dev->priv = privptr;
-- 
cgit v1.2.3


From b3d00c3b9278876b84a808bc513048b145fdef90 Mon Sep 17 00:00:00 2001
From: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:51 +0200
Subject: [S390] sclp: initialize early.

Add explicit sclp initialization for those sclp users that do not
register with the interface.

Signed-off-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/char/sclp.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'drivers')

diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index f171de3b0b1..bd219ab2286 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -15,6 +15,7 @@
 #include <linux/timer.h>
 #include <linux/reboot.h>
 #include <linux/jiffies.h>
+#include <linux/init.h>
 #include <asm/types.h>
 #include <asm/s390_ext.h>
 
@@ -930,3 +931,10 @@ sclp_init(void)
 	sclp_init_mask(1);
 	return 0;
 }
+
+static __init int sclp_initcall(void)
+{
+	return sclp_init();
+}
+
+arch_initcall(sclp_initcall);
-- 
cgit v1.2.3


From 66b494a7178cbd84d8fc0e5f1e92d81fb6ec9f6e Mon Sep 17 00:00:00 2001
From: Ursula Braun <braunu@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:52 +0200
Subject: [S390] vmlogrdr: stop IUCV connection in vmlogrdr_release.

Reopen of /dev/account failed. The IUCV path has to be terminated
in vmlogrdr_release.

Signed-off-by: Ursula Braun <braunu@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/char/vmlogrdr.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'drivers')

diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c
index b87d3b01993..08797fa4506 100644
--- a/drivers/s390/char/vmlogrdr.c
+++ b/drivers/s390/char/vmlogrdr.c
@@ -385,6 +385,9 @@ static int vmlogrdr_release (struct inode *inode, struct file *filp)
 
 	struct vmlogrdr_priv_t * logptr = filp->private_data;
 
+	iucv_path_sever(logptr->path, NULL);
+	kfree(logptr->path);
+	logptr->path = NULL;
 	if (logptr->autorecording) {
 		ret = vmlogrdr_recording(logptr,0,logptr->autopurge);
 		if (ret)
-- 
cgit v1.2.3


From 6d4740c89c187ee8f5ac7355c4eeffda26493d1f Mon Sep 17 00:00:00 2001
From: Stefan Haberland <stefan.haberland@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:53 +0200
Subject: [S390] sclp: fix coding style.

Use only capital letters for defines.

Cc: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Stefan Haberland <stefan.haberland@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/char/sclp.c         |  2 +-
 drivers/s390/char/sclp.h         | 72 ++++++++++++++++++++--------------------
 drivers/s390/char/sclp_cpi.c     |  4 +--
 drivers/s390/char/sclp_quiesce.c |  2 +-
 drivers/s390/char/sclp_rw.c      | 16 ++++-----
 drivers/s390/char/sclp_sdias.c   |  6 ++--
 drivers/s390/char/sclp_tty.c     |  6 ++--
 drivers/s390/char/sclp_vt220.c   |  8 ++---
 8 files changed, 58 insertions(+), 58 deletions(-)

(limited to 'drivers')

diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index bd219ab2286..fa62e694405 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -511,7 +511,7 @@ sclp_state_change_cb(struct evbuf_header *evbuf)
 }
 
 static struct sclp_register sclp_state_change_event = {
-	.receive_mask = EvTyp_StateChange_Mask,
+	.receive_mask = EVTYP_STATECHANGE_MASK,
 	.receiver_fn = sclp_state_change_cb
 };
 
diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h
index 6402e943436..e03dcf4c5fc 100644
--- a/drivers/s390/char/sclp.h
+++ b/drivers/s390/char/sclp.h
@@ -19,35 +19,35 @@
 #define MAX_KMEM_PAGES (sizeof(unsigned long) << 3)
 #define MAX_CONSOLE_PAGES	4
 
-#define EvTyp_OpCmd		0x01
-#define EvTyp_Msg		0x02
-#define EvTyp_StateChange	0x08
-#define EvTyp_PMsgCmd		0x09
-#define EvTyp_CntlProgOpCmd	0x20
-#define EvTyp_CntlProgIdent	0x0B
-#define EvTyp_SigQuiesce	0x1D
-#define EvTyp_VT220Msg		0x1A
-#define EvTyp_SDIAS		0x1C
-
-#define EvTyp_OpCmd_Mask	0x80000000
-#define EvTyp_Msg_Mask		0x40000000
-#define EvTyp_StateChange_Mask	0x01000000
-#define EvTyp_PMsgCmd_Mask	0x00800000
-#define EvTyp_CtlProgOpCmd_Mask	0x00000001
-#define EvTyp_CtlProgIdent_Mask	0x00200000
-#define EvTyp_SigQuiesce_Mask	0x00000008
-#define EvTyp_VT220Msg_Mask	0x00000040
-#define EvTyp_SDIAS_Mask	0x00000010
-
-#define GnrlMsgFlgs_DOM		0x8000
-#define GnrlMsgFlgs_SndAlrm	0x4000
-#define GnrlMsgFlgs_HoldMsg	0x2000
-
-#define LnTpFlgs_CntlText	0x8000
-#define LnTpFlgs_LabelText	0x4000
-#define LnTpFlgs_DataText	0x2000
-#define LnTpFlgs_EndText	0x1000
-#define LnTpFlgs_PromptText	0x0800
+#define EVTYP_OPCMD		0x01
+#define EVTYP_MSG		0x02
+#define EVTYP_STATECHANGE	0x08
+#define EVTYP_PMSGCMD		0x09
+#define EVTYP_CNTLPROGOPCMD	0x20
+#define EVTYP_CNTLPROGIDENT	0x0B
+#define EVTYP_SIGQUIESCE	0x1D
+#define EVTYP_VT220MSG		0x1A
+#define EVTYP_SDIAS		0x1C
+
+#define EVTYP_OPCMD_MASK	0x80000000
+#define EVTYP_MSG_MASK		0x40000000
+#define EVTYP_STATECHANGE_MASK	0x01000000
+#define EVTYP_PMSGCMD_MASK	0x00800000
+#define EVTYP_CTLPROGOPCMD_MASK	0x00000001
+#define EVTYP_CTLPROGIDENT_MASK	0x00200000
+#define EVTYP_SIGQUIESCE_MASK	0x00000008
+#define EVTYP_VT220MSG_MASK	0x00000040
+#define EVTYP_SDIAS_MASK	0x00000010
+
+#define GNRLMSGFLGS_DOM		0x8000
+#define GNRLMSGFLGS_SNDALRM	0x4000
+#define GNRLMSGFLGS_HOLDMSG	0x2000
+
+#define LNTPFLGS_CNTLTEXT	0x8000
+#define LNTPFLGS_LABELTEXT	0x4000
+#define LNTPFLGS_DATATEXT	0x2000
+#define LNTPFLGS_ENDTEXT	0x1000
+#define LNTPFLGS_PROMPTTEXT	0x0800
 
 typedef unsigned int sclp_cmdw_t;
 
@@ -58,15 +58,15 @@ typedef unsigned int sclp_cmdw_t;
 #define SCLP_CMDW_READ_SCP_INFO_FORCED	0x00120001
 
 #define GDS_ID_MDSMU		0x1310
-#define GDS_ID_MDSRouteInfo	0x1311
-#define GDS_ID_AgUnWrkCorr	0x1549
-#define GDS_ID_SNACondReport	0x1532
+#define GDS_ID_MDSROUTEINFO	0x1311
+#define GDS_ID_AGUNWRKCORR	0x1549
+#define GDS_ID_SNACONDREPORT	0x1532
 #define GDS_ID_CPMSU		0x1212
-#define GDS_ID_RoutTargInstr	0x154D
-#define GDS_ID_OpReq		0x8070
-#define GDS_ID_TextCmd		0x1320
+#define GDS_ID_ROUTTARGINSTR	0x154D
+#define GDS_ID_OPREQ		0x8070
+#define GDS_ID_TEXTCMD		0x1320
 
-#define GDS_KEY_SelfDefTextMsg	0x31
+#define GDS_KEY_SELFDEFTEXTMSG	0x31
 
 typedef u32 sccb_mask_t;	/* ATTENTION: assumes 32bit mask !!! */
 
diff --git a/drivers/s390/char/sclp_cpi.c b/drivers/s390/char/sclp_cpi.c
index 65aa2c85737..29fe2a5ec2f 100644
--- a/drivers/s390/char/sclp_cpi.c
+++ b/drivers/s390/char/sclp_cpi.c
@@ -46,7 +46,7 @@ struct cpi_sccb {
 /* Event type structure for write message and write priority message */
 static struct sclp_register sclp_cpi_event =
 {
-	.send_mask = EvTyp_CtlProgIdent_Mask
+	.send_mask = EVTYP_CTLPROGIDENT_MASK
 };
 
 MODULE_LICENSE("GPL");
@@ -201,7 +201,7 @@ cpi_module_init(void)
 		       "console.\n");
 		return -EINVAL;
 	}
-	if (!(sclp_cpi_event.sclp_send_mask & EvTyp_CtlProgIdent_Mask)) {
+	if (!(sclp_cpi_event.sclp_send_mask & EVTYP_CTLPROGIDENT_MASK)) {
 		printk(KERN_WARNING "cpi: no control program identification "
 		       "support\n");
 		sclp_unregister(&sclp_cpi_event);
diff --git a/drivers/s390/char/sclp_quiesce.c b/drivers/s390/char/sclp_quiesce.c
index baa8fe669ed..45ff25e787c 100644
--- a/drivers/s390/char/sclp_quiesce.c
+++ b/drivers/s390/char/sclp_quiesce.c
@@ -43,7 +43,7 @@ sclp_quiesce_handler(struct evbuf_header *evbuf)
 }
 
 static struct sclp_register sclp_quiesce_event = {
-	.receive_mask = EvTyp_SigQuiesce_Mask,
+	.receive_mask = EVTYP_SIGQUIESCE_MASK,
 	.receiver_fn = sclp_quiesce_handler
 };
 
diff --git a/drivers/s390/char/sclp_rw.c b/drivers/s390/char/sclp_rw.c
index 2486783ea58..bbd5b8b66f4 100644
--- a/drivers/s390/char/sclp_rw.c
+++ b/drivers/s390/char/sclp_rw.c
@@ -30,7 +30,7 @@
 
 /* Event type structure for write message and write priority message */
 static struct sclp_register sclp_rw_event = {
-	.send_mask = EvTyp_Msg_Mask | EvTyp_PMsgCmd_Mask
+	.send_mask = EVTYP_MSG_MASK | EVTYP_PMSGCMD_MASK
 };
 
 /*
@@ -64,7 +64,7 @@ sclp_make_buffer(void *page, unsigned short columns, unsigned short htab)
 	memset(sccb, 0, sizeof(struct write_sccb));
 	sccb->header.length = sizeof(struct write_sccb);
 	sccb->msg_buf.header.length = sizeof(struct msg_buf);
-	sccb->msg_buf.header.type = EvTyp_Msg;
+	sccb->msg_buf.header.type = EVTYP_MSG;
 	sccb->msg_buf.mdb.header.length = sizeof(struct mdb);
 	sccb->msg_buf.mdb.header.type = 1;
 	sccb->msg_buf.mdb.header.tag = 0xD4C4C240;	/* ebcdic "MDB " */
@@ -114,7 +114,7 @@ sclp_initialize_mto(struct sclp_buffer *buffer, int max_len)
 	memset(mto, 0, sizeof(struct mto));
 	mto->length = sizeof(struct mto);
 	mto->type = 4;	/* message text object */
-	mto->line_type_flags = LnTpFlgs_EndText; /* end text */
+	mto->line_type_flags = LNTPFLGS_ENDTEXT; /* end text */
 
 	/* set pointer to first byte after struct mto. */
 	buffer->current_line = (char *) (mto + 1);
@@ -215,7 +215,7 @@ sclp_write(struct sclp_buffer *buffer, const unsigned char *msg, int count)
 		case '\a':	/* bell, one for several times	*/
 			/* set SCLP sound alarm bit in General Object */
 			buffer->sccb->msg_buf.mdb.go.general_msg_flags |=
-				GnrlMsgFlgs_SndAlrm;
+				GNRLMSGFLGS_SNDALRM;
 			break;
 		case '\t':	/* horizontal tabulator	 */
 			/* check if new mto needs to be created */
@@ -452,12 +452,12 @@ sclp_emit_buffer(struct sclp_buffer *buffer,
 		return -EIO;
 
 	sccb = buffer->sccb;
-	if (sclp_rw_event.sclp_send_mask & EvTyp_Msg_Mask)
+	if (sclp_rw_event.sclp_send_mask & EVTYP_MSG_MASK)
 		/* Use normal write message */
-		sccb->msg_buf.header.type = EvTyp_Msg;
-	else if (sclp_rw_event.sclp_send_mask & EvTyp_PMsgCmd_Mask)
+		sccb->msg_buf.header.type = EVTYP_MSG;
+	else if (sclp_rw_event.sclp_send_mask & EVTYP_PMSGCMD_MASK)
 		/* Use write priority message */
-		sccb->msg_buf.header.type = EvTyp_PMsgCmd;
+		sccb->msg_buf.header.type = EVTYP_PMSGCMD;
 	else
 		return -ENOSYS;
 	buffer->request.command = SCLP_CMDW_WRITE_EVENT_DATA;
diff --git a/drivers/s390/char/sclp_sdias.c b/drivers/s390/char/sclp_sdias.c
index 06a4d089723..52283daddae 100644
--- a/drivers/s390/char/sclp_sdias.c
+++ b/drivers/s390/char/sclp_sdias.c
@@ -30,7 +30,7 @@
 static struct debug_info *sdias_dbf;
 
 static struct sclp_register sclp_sdias_register = {
-	.send_mask = EvTyp_SDIAS_Mask,
+	.send_mask = EVTYP_SDIAS_MASK,
 };
 
 struct sdias_evbuf {
@@ -118,7 +118,7 @@ int sclp_sdias_blk_count(void)
 
 	sccb.hdr.length = sizeof(sccb);
 	sccb.evbuf.hdr.length = sizeof(struct sdias_evbuf);
-	sccb.evbuf.hdr.type = EvTyp_SDIAS;
+	sccb.evbuf.hdr.type = EVTYP_SDIAS;
 	sccb.evbuf.event_qual = EQ_SIZE;
 	sccb.evbuf.data_id = DI_FCP_DUMP;
 	sccb.evbuf.event_id = 4712;
@@ -177,7 +177,7 @@ int sclp_sdias_copy(void *dest, int start_blk, int nr_blks)
 
 	sccb.hdr.length = sizeof(sccb);
 	sccb.evbuf.hdr.length = sizeof(struct sdias_evbuf);
-	sccb.evbuf.hdr.type = EvTyp_SDIAS;
+	sccb.evbuf.hdr.type = EVTYP_SDIAS;
 	sccb.evbuf.hdr.flags = 0;
 	sccb.evbuf.event_qual = EQ_STORE_DATA;
 	sccb.evbuf.data_id = DI_FCP_DUMP;
diff --git a/drivers/s390/char/sclp_tty.c b/drivers/s390/char/sclp_tty.c
index 076816b9d52..e3b3d390b4a 100644
--- a/drivers/s390/char/sclp_tty.c
+++ b/drivers/s390/char/sclp_tty.c
@@ -648,7 +648,7 @@ sclp_eval_textcmd(struct gds_subvector *start,
 	subvec = start;
 	while (subvec < end) {
 		subvec = find_gds_subvector(subvec, end,
-					    GDS_KEY_SelfDefTextMsg);
+					    GDS_KEY_SELFDEFTEXTMSG);
 		if (!subvec)
 			break;
 		sclp_eval_selfdeftextmsg((struct gds_subvector *)(subvec + 1),
@@ -664,7 +664,7 @@ sclp_eval_cpmsu(struct gds_vector *start, struct gds_vector *end)
 
 	vec = start;
 	while (vec < end) {
-		vec = find_gds_vector(vec, end, GDS_ID_TextCmd);
+		vec = find_gds_vector(vec, end, GDS_ID_TEXTCMD);
 		if (!vec)
 			break;
 		sclp_eval_textcmd((struct gds_subvector *)(vec + 1),
@@ -703,7 +703,7 @@ sclp_tty_state_change(struct sclp_register *reg)
 
 static struct sclp_register sclp_input_event =
 {
-	.receive_mask = EvTyp_OpCmd_Mask | EvTyp_PMsgCmd_Mask,
+	.receive_mask = EVTYP_OPCMD_MASK | EVTYP_PMSGCMD_MASK,
 	.state_change_fn = sclp_tty_state_change,
 	.receiver_fn = sclp_tty_receiver
 };
diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c
index f77dc33b5f8..726334757bb 100644
--- a/drivers/s390/char/sclp_vt220.c
+++ b/drivers/s390/char/sclp_vt220.c
@@ -99,8 +99,8 @@ static void sclp_vt220_emit_current(void);
 
 /* Registration structure for our interest in SCLP event buffers */
 static struct sclp_register sclp_vt220_register = {
-	.send_mask		= EvTyp_VT220Msg_Mask,
-	.receive_mask		= EvTyp_VT220Msg_Mask,
+	.send_mask		= EVTYP_VT220MSG_MASK,
+	.receive_mask		= EVTYP_VT220MSG_MASK,
 	.state_change_fn	= NULL,
 	.receiver_fn		= sclp_vt220_receiver_fn
 };
@@ -202,7 +202,7 @@ sclp_vt220_callback(struct sclp_req *request, void *data)
 static int
 __sclp_vt220_emit(struct sclp_vt220_request *request)
 {
-	if (!(sclp_vt220_register.sclp_send_mask & EvTyp_VT220Msg_Mask)) {
+	if (!(sclp_vt220_register.sclp_send_mask & EVTYP_VT220MSG_MASK)) {
 		request->sclp_req.status = SCLP_REQ_FAILED;
 		return -EIO;
 	}
@@ -284,7 +284,7 @@ sclp_vt220_initialize_page(void *page)
 	sccb->header.length = sizeof(struct sclp_vt220_sccb);
 	sccb->header.function_code = SCLP_NORMAL_WRITE;
 	sccb->header.response_code = 0x0000;
-	sccb->evbuf.type = EvTyp_VT220Msg;
+	sccb->evbuf.type = EVTYP_VT220MSG;
 	sccb->evbuf.length = sizeof(struct evbuf_header);
 
 	return request;
-- 
cgit v1.2.3


From 2fc2d1e9ffcde78af7ab63ed640d9a4901797de2 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 27 Apr 2007 16:01:56 +0200
Subject: [S390] Processor degradation notification.

Generate uevents for all cpus if cpu capability changes. This can
happen e.g. because the cpus are overheating. The cpu capability can
be read via /sys/devices/system/cpu/cpuN/capability.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 drivers/s390/char/Makefile      |  2 +-
 drivers/s390/char/sclp.h        |  2 ++
 drivers/s390/char/sclp_config.c | 75 +++++++++++++++++++++++++++++++++++++++++
 drivers/s390/sysinfo.c          | 18 ++++++++++
 4 files changed, 96 insertions(+), 1 deletion(-)
 create mode 100644 drivers/s390/char/sclp_config.c

(limited to 'drivers')

diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile
index a0f6db21855..c210784bdf4 100644
--- a/drivers/s390/char/Makefile
+++ b/drivers/s390/char/Makefile
@@ -3,7 +3,7 @@
 #
 
 obj-y += ctrlchar.o keyboard.o defkeymap.o sclp.o sclp_rw.o sclp_quiesce.o \
-	 sclp_info.o sclp_chp.o
+	 sclp_info.o sclp_config.o sclp_chp.o
 
 obj-$(CONFIG_TN3270) += raw3270.o
 obj-$(CONFIG_TN3270_CONSOLE) += con3270.o
diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h
index e03dcf4c5fc..87ac4a3ad49 100644
--- a/drivers/s390/char/sclp.h
+++ b/drivers/s390/char/sclp.h
@@ -27,6 +27,7 @@
 #define EVTYP_CNTLPROGIDENT	0x0B
 #define EVTYP_SIGQUIESCE	0x1D
 #define EVTYP_VT220MSG		0x1A
+#define EVTYP_CONFMGMDATA	0x04
 #define EVTYP_SDIAS		0x1C
 
 #define EVTYP_OPCMD_MASK	0x80000000
@@ -37,6 +38,7 @@
 #define EVTYP_CTLPROGIDENT_MASK	0x00200000
 #define EVTYP_SIGQUIESCE_MASK	0x00000008
 #define EVTYP_VT220MSG_MASK	0x00000040
+#define EVTYP_CONFMGMDATA_MASK	0x10000000
 #define EVTYP_SDIAS_MASK	0x00000010
 
 #define GNRLMSGFLGS_DOM		0x8000
diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c
new file mode 100644
index 00000000000..5322e5e54a9
--- /dev/null
+++ b/drivers/s390/char/sclp_config.c
@@ -0,0 +1,75 @@
+/*
+ *  drivers/s390/char/sclp_config.c
+ *
+ *    Copyright IBM Corp. 2007
+ *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/cpu.h>
+#include <linux/sysdev.h>
+#include <linux/workqueue.h>
+#include "sclp.h"
+
+#define TAG	"sclp_config: "
+
+struct conf_mgm_data {
+	u8 reserved;
+	u8 ev_qualifier;
+} __attribute__((packed));
+
+#define EV_QUAL_CAP_CHANGE	3
+
+static struct work_struct sclp_cpu_capability_work;
+
+static void sclp_cpu_capability_notify(struct work_struct *work)
+{
+	int cpu;
+	struct sys_device *sysdev;
+
+	printk(KERN_WARNING TAG "cpu capability changed.\n");
+	lock_cpu_hotplug();
+	for_each_online_cpu(cpu) {
+		sysdev = get_cpu_sysdev(cpu);
+		kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
+	}
+	unlock_cpu_hotplug();
+}
+
+static void sclp_conf_receiver_fn(struct evbuf_header *evbuf)
+{
+	struct conf_mgm_data *cdata;
+
+	cdata = (struct conf_mgm_data *)(evbuf + 1);
+	if (cdata->ev_qualifier == EV_QUAL_CAP_CHANGE)
+		schedule_work(&sclp_cpu_capability_work);
+}
+
+static struct sclp_register sclp_conf_register =
+{
+	.receive_mask = EVTYP_CONFMGMDATA_MASK,
+	.receiver_fn  = sclp_conf_receiver_fn,
+};
+
+static int __init sclp_conf_init(void)
+{
+	int rc;
+
+	INIT_WORK(&sclp_cpu_capability_work, sclp_cpu_capability_notify);
+
+	rc = sclp_register(&sclp_conf_register);
+	if (rc) {
+		printk(KERN_ERR TAG "failed to register (%d).\n", rc);
+		return rc;
+	}
+
+	if (!(sclp_conf_register.sclp_receive_mask & EVTYP_CONFMGMDATA_MASK)) {
+		printk(KERN_WARNING TAG "no configuration management.\n");
+		sclp_unregister(&sclp_conf_register);
+		rc = -ENOSYS;
+	}
+	return rc;
+}
+
+__initcall(sclp_conf_init);
diff --git a/drivers/s390/sysinfo.c b/drivers/s390/sysinfo.c
index 090743d2f91..19343f9675c 100644
--- a/drivers/s390/sysinfo.c
+++ b/drivers/s390/sysinfo.c
@@ -357,6 +357,24 @@ static __init int create_proc_sysinfo(void)
 
 __initcall(create_proc_sysinfo);
 
+int get_cpu_capability(unsigned int *capability)
+{
+	struct sysinfo_1_2_2 *info;
+	int rc;
+
+	info = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+	rc = stsi(info, 1, 2, 2);
+	if (rc == -ENOSYS)
+		goto out;
+	rc = 0;
+	*capability = info->capability;
+out:
+	free_page((unsigned long) info);
+	return rc;
+}
+
 /*
  * CPU capability might have changed. Therefore recalculate loops_per_jiffy.
  */
-- 
cgit v1.2.3


From cb629a01bb5bca951287e761c590a5686c6ca416 Mon Sep 17 00:00:00 2001
From: Milind Arun Choudhary <milindchoudhary@gmail.com>
Date: Fri, 27 Apr 2007 16:02:01 +0200
Subject: [S390] SPIN_LOCK_UNLOCKED cleanup in drivers/s390

SPIN_LOCK_UNLOCKED cleanup,use __SPIN_LOCK_UNLOCKED instead.

Signed-off-by: Milind Arun Choudhary <milindchoudhary@gmail.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/char/vmlogrdr.c | 6 +++---
 drivers/s390/cio/cmf.c       | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c
index 08797fa4506..a5a00e9ae4d 100644
--- a/drivers/s390/char/vmlogrdr.c
+++ b/drivers/s390/char/vmlogrdr.c
@@ -125,7 +125,7 @@ static struct vmlogrdr_priv_t sys_ser[] = {
 	  .recording_name = "EREP",
 	  .minor_num      = 0,
 	  .buffer_free    = 1,
-	  .priv_lock      = SPIN_LOCK_UNLOCKED,
+	  .priv_lock	  = __SPIN_LOCK_UNLOCKED(sys_ser[0].priv_lock),
 	  .autorecording  = 1,
 	  .autopurge      = 1,
 	},
@@ -134,7 +134,7 @@ static struct vmlogrdr_priv_t sys_ser[] = {
 	  .recording_name = "ACCOUNT",
 	  .minor_num      = 1,
 	  .buffer_free    = 1,
-	  .priv_lock      = SPIN_LOCK_UNLOCKED,
+	  .priv_lock	  = __SPIN_LOCK_UNLOCKED(sys_ser[1].priv_lock),
 	  .autorecording  = 1,
 	  .autopurge      = 1,
 	},
@@ -143,7 +143,7 @@ static struct vmlogrdr_priv_t sys_ser[] = {
 	  .recording_name = "SYMPTOM",
 	  .minor_num      = 2,
 	  .buffer_free    = 1,
-	  .priv_lock      = SPIN_LOCK_UNLOCKED,
+	  .priv_lock	  = __SPIN_LOCK_UNLOCKED(sys_ser[2].priv_lock),
 	  .autorecording  = 1,
 	  .autopurge      = 1,
 	}
diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c
index 90b22faabbf..28abd697be1 100644
--- a/drivers/s390/cio/cmf.c
+++ b/drivers/s390/cio/cmf.c
@@ -476,7 +476,7 @@ struct cmb_area {
 };
 
 static struct cmb_area cmb_area = {
-	.lock = SPIN_LOCK_UNLOCKED,
+	.lock = __SPIN_LOCK_UNLOCKED(cmb_area.lock),
 	.list = LIST_HEAD_INIT(cmb_area.list),
 	.num_channels  = 1024,
 };
-- 
cgit v1.2.3