From 4c521e422f2837b9652fa00a064a01d009f939b6 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Mon, 9 Jul 2007 22:23:51 -0700
Subject: [SPARC64]: Add Sun LDOM virtual network driver.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig   |    6 +
 drivers/net/Makefile  |    1 +
 drivers/net/sunvnet.c | 1156 +++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/net/sunvnet.h |   70 +++
 4 files changed, 1233 insertions(+)
 create mode 100644 drivers/net/sunvnet.c
 create mode 100644 drivers/net/sunvnet.h

(limited to 'drivers')

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index d17d64eb706..7903f9c7839 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -604,6 +604,12 @@ config CASSINI
 	  Support for the Sun Cassini chip, aka Sun GigaSwift Ethernet. See also
 	  <http://www.sun.com/products-n-solutions/hardware/docs/pdf/817-4341-10.pdf>
 
+config SUNVNET
+	tristate "Sun Virtual Network support"
+	depends on SUN_LDOMS
+	help
+	  Support for virtual network devices under Sun Logical Domains.
+
 config NET_VENDOR_3COM
 	bool "3COM cards"
 	depends on ISA || EISA || MCA || PCI
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index c26b8674213..b95b1b237a2 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_SUNBMAC) += sunbmac.o
 obj-$(CONFIG_MYRI_SBUS) += myri_sbus.o
 obj-$(CONFIG_SUNGEM) += sungem.o sungem_phy.o
 obj-$(CONFIG_CASSINI) += cassini.o
+obj-$(CONFIG_SUNVNET) += sunvnet.o
 
 obj-$(CONFIG_MACE) += mace.o
 obj-$(CONFIG_BMAC) += bmac.o
diff --git a/drivers/net/sunvnet.c b/drivers/net/sunvnet.c
new file mode 100644
index 00000000000..d764e4ccba5
--- /dev/null
+++ b/drivers/net/sunvnet.c
@@ -0,0 +1,1156 @@
+/* sunvnet.c: Sun LDOM Virtual Network Driver.
+ *
+ * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#include <linux/etherdevice.h>
+
+#include <asm/vio.h>
+#include <asm/ldc.h>
+
+#include "sunvnet.h"
+
+#define DRV_MODULE_NAME		"sunvnet"
+#define PFX DRV_MODULE_NAME	": "
+#define DRV_MODULE_VERSION	"1.0"
+#define DRV_MODULE_RELDATE	"June 25, 2007"
+
+static char version[] __devinitdata =
+	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
+MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_DESCRIPTION("Sun LDOM virtual network driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_MODULE_VERSION);
+
+/* Ordered from largest major to lowest */
+static struct vio_version vnet_versions[] = {
+	{ .major = 1, .minor = 0 },
+};
+
+static inline u32 vnet_tx_dring_avail(struct vio_dring_state *dr)
+{
+	return vio_dring_avail(dr, VNET_TX_RING_SIZE);
+}
+
+static int vnet_handle_unknown(struct vnet_port *port, void *arg)
+{
+	struct vio_msg_tag *pkt = arg;
+
+	printk(KERN_ERR PFX "Received unknown msg [%02x:%02x:%04x:%08x]\n",
+	       pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
+	printk(KERN_ERR PFX "Resetting connection.\n");
+
+	ldc_disconnect(port->vio.lp);
+
+	return -ECONNRESET;
+}
+
+static int vnet_send_attr(struct vio_driver_state *vio)
+{
+	struct vnet_port *port = to_vnet_port(vio);
+	struct net_device *dev = port->vp->dev;
+	struct vio_net_attr_info pkt;
+	int i;
+
+	memset(&pkt, 0, sizeof(pkt));
+	pkt.tag.type = VIO_TYPE_CTRL;
+	pkt.tag.stype = VIO_SUBTYPE_INFO;
+	pkt.tag.stype_env = VIO_ATTR_INFO;
+	pkt.tag.sid = vio_send_sid(vio);
+	pkt.xfer_mode = VIO_DRING_MODE;
+	pkt.addr_type = VNET_ADDR_ETHERMAC;
+	pkt.ack_freq = 0;
+	for (i = 0; i < 6; i++)
+		pkt.addr |= (u64)dev->dev_addr[i] << ((5 - i) * 8);
+	pkt.mtu = ETH_FRAME_LEN;
+
+	viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
+	       "ackfreq[%u] mtu[%llu]\n",
+	       pkt.xfer_mode, pkt.addr_type,
+	       (unsigned long long) pkt.addr,
+	       pkt.ack_freq,
+	       (unsigned long long) pkt.mtu);
+
+	return vio_ldc_send(vio, &pkt, sizeof(pkt));
+}
+
+static int handle_attr_info(struct vio_driver_state *vio,
+			    struct vio_net_attr_info *pkt)
+{
+	viodbg(HS, "GOT NET ATTR INFO xmode[0x%x] atype[0x%x] addr[%llx] "
+	       "ackfreq[%u] mtu[%llu]\n",
+	       pkt->xfer_mode, pkt->addr_type,
+	       (unsigned long long) pkt->addr,
+	       pkt->ack_freq,
+	       (unsigned long long) pkt->mtu);
+
+	pkt->tag.sid = vio_send_sid(vio);
+
+	if (pkt->xfer_mode != VIO_DRING_MODE ||
+	    pkt->addr_type != VNET_ADDR_ETHERMAC ||
+	    pkt->mtu != ETH_FRAME_LEN) {
+		viodbg(HS, "SEND NET ATTR NACK\n");
+
+		pkt->tag.stype = VIO_SUBTYPE_NACK;
+
+		(void) vio_ldc_send(vio, pkt, sizeof(*pkt));
+
+		return -ECONNRESET;
+	} else {
+		viodbg(HS, "SEND NET ATTR ACK\n");
+
+		pkt->tag.stype = VIO_SUBTYPE_ACK;
+
+		return vio_ldc_send(vio, pkt, sizeof(*pkt));
+	}
+
+}
+
+static int handle_attr_ack(struct vio_driver_state *vio,
+			   struct vio_net_attr_info *pkt)
+{
+	viodbg(HS, "GOT NET ATTR ACK\n");
+
+	return 0;
+}
+
+static int handle_attr_nack(struct vio_driver_state *vio,
+			    struct vio_net_attr_info *pkt)
+{
+	viodbg(HS, "GOT NET ATTR NACK\n");
+
+	return -ECONNRESET;
+}
+
+static int vnet_handle_attr(struct vio_driver_state *vio, void *arg)
+{
+	struct vio_net_attr_info *pkt = arg;
+
+	switch (pkt->tag.stype) {
+	case VIO_SUBTYPE_INFO:
+		return handle_attr_info(vio, pkt);
+
+	case VIO_SUBTYPE_ACK:
+		return handle_attr_ack(vio, pkt);
+
+	case VIO_SUBTYPE_NACK:
+		return handle_attr_nack(vio, pkt);
+
+	default:
+		return -ECONNRESET;
+	}
+}
+
+static void vnet_handshake_complete(struct vio_driver_state *vio)
+{
+	struct vio_dring_state *dr;
+
+	dr = &vio->drings[VIO_DRIVER_RX_RING];
+	dr->snd_nxt = dr->rcv_nxt = 1;
+
+	dr = &vio->drings[VIO_DRIVER_TX_RING];
+	dr->snd_nxt = dr->rcv_nxt = 1;
+}
+
+/* The hypervisor interface that implements copying to/from imported
+ * memory from another domain requires that copies are done to 8-byte
+ * aligned buffers, and that the lengths of such copies are also 8-byte
+ * multiples.
+ *
+ * So we align skb->data to an 8-byte multiple and pad-out the data
+ * area so we can round the copy length up to the next multiple of
+ * 8 for the copy.
+ *
+ * The transmitter puts the actual start of the packet 6 bytes into
+ * the buffer it sends over, so that the IP headers after the ethernet
+ * header are aligned properly.  These 6 bytes are not in the descriptor
+ * length, they are simply implied.  This offset is represented using
+ * the VNET_PACKET_SKIP macro.
+ */
+static struct sk_buff *alloc_and_align_skb(struct net_device *dev,
+					   unsigned int len)
+{
+	struct sk_buff *skb = netdev_alloc_skb(dev, len+VNET_PACKET_SKIP+8+8);
+	unsigned long addr, off;
+
+	if (unlikely(!skb))
+		return NULL;
+
+	addr = (unsigned long) skb->data;
+	off = ((addr + 7UL) & ~7UL) - addr;
+	if (off)
+		skb_reserve(skb, off);
+
+	return skb;
+}
+
+static int vnet_rx_one(struct vnet_port *port, unsigned int len,
+		       struct ldc_trans_cookie *cookies, int ncookies)
+{
+	struct net_device *dev = port->vp->dev;
+	unsigned int copy_len;
+	struct sk_buff *skb;
+	int err;
+
+	err = -EMSGSIZE;
+	if (unlikely(len < ETH_ZLEN || len > ETH_FRAME_LEN)) {
+		dev->stats.rx_length_errors++;
+		goto out_dropped;
+	}
+
+	skb = alloc_and_align_skb(dev, len);
+	err = -ENOMEM;
+	if (unlikely(!skb)) {
+		dev->stats.rx_missed_errors++;
+		goto out_dropped;
+	}
+
+	copy_len = (len + VNET_PACKET_SKIP + 7U) & ~7U;
+	skb_put(skb, copy_len);
+	err = ldc_copy(port->vio.lp, LDC_COPY_IN,
+		       skb->data, copy_len, 0,
+		       cookies, ncookies);
+	if (unlikely(err < 0)) {
+		dev->stats.rx_frame_errors++;
+		goto out_free_skb;
+	}
+
+	skb_pull(skb, VNET_PACKET_SKIP);
+	skb_trim(skb, len);
+	skb->protocol = eth_type_trans(skb, dev);
+
+	dev->stats.rx_packets++;
+	dev->stats.rx_bytes += len;
+
+	netif_rx(skb);
+
+	return 0;
+
+out_free_skb:
+	kfree_skb(skb);
+
+out_dropped:
+	dev->stats.rx_dropped++;
+	return err;
+}
+
+static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr,
+			 u32 start, u32 end, u8 vio_dring_state)
+{
+	struct vio_dring_data hdr = {
+		.tag = {
+			.type		= VIO_TYPE_DATA,
+			.stype		= VIO_SUBTYPE_ACK,
+			.stype_env	= VIO_DRING_DATA,
+			.sid		= vio_send_sid(&port->vio),
+		},
+		.dring_ident		= dr->ident,
+		.start_idx		= start,
+		.end_idx		= end,
+		.state			= vio_dring_state,
+	};
+	int err, delay;
+
+	hdr.seq = dr->snd_nxt;
+	delay = 1;
+	do {
+		err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
+		if (err > 0) {
+			dr->snd_nxt++;
+			break;
+		}
+		udelay(delay);
+		if ((delay <<= 1) > 128)
+			delay = 128;
+	} while (err == -EAGAIN);
+
+	return err;
+}
+
+static u32 next_idx(u32 idx, struct vio_dring_state *dr)
+{
+	if (++idx == dr->num_entries)
+		idx = 0;
+	return idx;
+}
+
+static u32 prev_idx(u32 idx, struct vio_dring_state *dr)
+{
+	if (idx == 0)
+		idx = dr->num_entries - 1;
+	else
+		idx--;
+
+	return idx;
+}
+
+static struct vio_net_desc *get_rx_desc(struct vnet_port *port,
+					struct vio_dring_state *dr,
+					u32 index)
+{
+	struct vio_net_desc *desc = port->vio.desc_buf;
+	int err;
+
+	err = ldc_get_dring_entry(port->vio.lp, desc, dr->entry_size,
+				  (index * dr->entry_size),
+				  dr->cookies, dr->ncookies);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	return desc;
+}
+
+static int put_rx_desc(struct vnet_port *port,
+		       struct vio_dring_state *dr,
+		       struct vio_net_desc *desc,
+		       u32 index)
+{
+	int err;
+
+	err = ldc_put_dring_entry(port->vio.lp, desc, dr->entry_size,
+				  (index * dr->entry_size),
+				  dr->cookies, dr->ncookies);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int vnet_walk_rx_one(struct vnet_port *port,
+			    struct vio_dring_state *dr,
+			    u32 index, int *needs_ack)
+{
+	struct vio_net_desc *desc = get_rx_desc(port, dr, index);
+	struct vio_driver_state *vio = &port->vio;
+	int err;
+
+	if (IS_ERR(desc))
+		return PTR_ERR(desc);
+
+	viodbg(DATA, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%lx:%lx]\n",
+	       desc->hdr.state, desc->hdr.ack,
+	       desc->size, desc->ncookies,
+	       desc->cookies[0].cookie_addr,
+	       desc->cookies[0].cookie_size);
+
+	if (desc->hdr.state != VIO_DESC_READY)
+		return 1;
+	err = vnet_rx_one(port, desc->size, desc->cookies, desc->ncookies);
+	if (err == -ECONNRESET)
+		return err;
+	desc->hdr.state = VIO_DESC_DONE;
+	err = put_rx_desc(port, dr, desc, index);
+	if (err < 0)
+		return err;
+	*needs_ack = desc->hdr.ack;
+	return 0;
+}
+
+static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr,
+			u32 start, u32 end)
+{
+	struct vio_driver_state *vio = &port->vio;
+	int ack_start = -1, ack_end = -1;
+
+	end = (end == (u32) -1) ? prev_idx(start, dr) : next_idx(end, dr);
+
+	viodbg(DATA, "vnet_walk_rx start[%08x] end[%08x]\n", start, end);
+
+	while (start != end) {
+		int ack = 0, err = vnet_walk_rx_one(port, dr, start, &ack);
+		if (err == -ECONNRESET)
+			return err;
+		if (err != 0)
+			break;
+		if (ack_start == -1)
+			ack_start = start;
+		ack_end = start;
+		start = next_idx(start, dr);
+		if (ack && start != end) {
+			err = vnet_send_ack(port, dr, ack_start, ack_end,
+					    VIO_DRING_ACTIVE);
+			if (err == -ECONNRESET)
+				return err;
+			ack_start = -1;
+		}
+	}
+	if (unlikely(ack_start == -1))
+		ack_start = ack_end = prev_idx(start, dr);
+	return vnet_send_ack(port, dr, ack_start, ack_end, VIO_DRING_STOPPED);
+}
+
+static int vnet_rx(struct vnet_port *port, void *msgbuf)
+{
+	struct vio_dring_data *pkt = msgbuf;
+	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_RX_RING];
+	struct vio_driver_state *vio = &port->vio;
+
+	viodbg(DATA, "vnet_rx stype_env[%04x] seq[%016lx] rcv_nxt[%016lx]\n",
+	       pkt->tag.stype_env, pkt->seq, dr->rcv_nxt);
+
+	if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA))
+		return 0;
+	if (unlikely(pkt->seq != dr->rcv_nxt)) {
+		printk(KERN_ERR PFX "RX out of sequence seq[0x%lx] "
+		       "rcv_nxt[0x%lx]\n", pkt->seq, dr->rcv_nxt);
+		return 0;
+	}
+
+	dr->rcv_nxt++;
+
+	/* XXX Validate pkt->start_idx and pkt->end_idx XXX */
+
+	return vnet_walk_rx(port, dr, pkt->start_idx, pkt->end_idx);
+}
+
+static int idx_is_pending(struct vio_dring_state *dr, u32 end)
+{
+	u32 idx = dr->cons;
+	int found = 0;
+
+	while (idx != dr->prod) {
+		if (idx == end) {
+			found = 1;
+			break;
+		}
+		idx = next_idx(idx, dr);
+	}
+	return found;
+}
+
+static int vnet_ack(struct vnet_port *port, void *msgbuf)
+{
+	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+	struct vio_dring_data *pkt = msgbuf;
+	struct net_device *dev;
+	struct vnet *vp;
+	u32 end;
+
+	if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA))
+		return 0;
+
+	end = pkt->end_idx;
+	if (unlikely(!idx_is_pending(dr, end)))
+		return 0;
+
+	dr->cons = next_idx(end, dr);
+
+	vp = port->vp;
+	dev = vp->dev;
+	if (unlikely(netif_queue_stopped(dev) &&
+		     vnet_tx_dring_avail(dr) >= VNET_TX_WAKEUP_THRESH(dr)))
+		return 1;
+
+	return 0;
+}
+
+static int vnet_nack(struct vnet_port *port, void *msgbuf)
+{
+	/* XXX just reset or similar XXX */
+	return 0;
+}
+
+static void maybe_tx_wakeup(struct vnet *vp)
+{
+	struct net_device *dev = vp->dev;
+
+	netif_tx_lock(dev);
+	if (likely(netif_queue_stopped(dev))) {
+		struct vnet_port *port;
+		int wake = 1;
+
+		list_for_each_entry(port, &vp->port_list, list) {
+			struct vio_dring_state *dr;
+
+			dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+			if (vnet_tx_dring_avail(dr) <
+			    VNET_TX_WAKEUP_THRESH(dr)) {
+				wake = 0;
+				break;
+			}
+		}
+		if (wake)
+			netif_wake_queue(dev);
+	}
+	netif_tx_unlock(dev);
+}
+
+static void vnet_event(void *arg, int event)
+{
+	struct vnet_port *port = arg;
+	struct vio_driver_state *vio = &port->vio;
+	unsigned long flags;
+	int tx_wakeup, err;
+
+	spin_lock_irqsave(&vio->lock, flags);
+
+	if (unlikely(event == LDC_EVENT_RESET ||
+		     event == LDC_EVENT_UP)) {
+		vio_link_state_change(vio, event);
+		spin_unlock_irqrestore(&vio->lock, flags);
+
+		return;
+	}
+
+	if (unlikely(event != LDC_EVENT_DATA_READY)) {
+		printk(KERN_WARNING PFX "Unexpected LDC event %d\n", event);
+		spin_unlock_irqrestore(&vio->lock, flags);
+		return;
+	}
+
+	tx_wakeup = err = 0;
+	while (1) {
+		union {
+			struct vio_msg_tag tag;
+			u64 raw[8];
+		} msgbuf;
+
+		err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf));
+		if (unlikely(err < 0)) {
+			if (err == -ECONNRESET)
+				vio_conn_reset(vio);
+			break;
+		}
+		if (err == 0)
+			break;
+		viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n",
+		       msgbuf.tag.type,
+		       msgbuf.tag.stype,
+		       msgbuf.tag.stype_env,
+		       msgbuf.tag.sid);
+		err = vio_validate_sid(vio, &msgbuf.tag);
+		if (err < 0)
+			break;
+
+		if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) {
+			if (msgbuf.tag.stype == VIO_SUBTYPE_INFO) {
+				err = vnet_rx(port, &msgbuf);
+			} else if (msgbuf.tag.stype == VIO_SUBTYPE_ACK) {
+				err = vnet_ack(port, &msgbuf);
+				if (err > 0)
+					tx_wakeup |= err;
+			} else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK) {
+				err = vnet_nack(port, &msgbuf);
+			}
+		} else if (msgbuf.tag.type == VIO_TYPE_CTRL) {
+			err = vio_control_pkt_engine(vio, &msgbuf);
+			if (err)
+				break;
+		} else {
+			err = vnet_handle_unknown(port, &msgbuf);
+		}
+		if (err == -ECONNRESET)
+			break;
+	}
+	spin_unlock(&vio->lock);
+	if (unlikely(tx_wakeup && err != -ECONNRESET))
+		maybe_tx_wakeup(port->vp);
+	local_irq_restore(flags);
+}
+
+static int __vnet_tx_trigger(struct vnet_port *port)
+{
+	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+	struct vio_dring_data hdr = {
+		.tag = {
+			.type		= VIO_TYPE_DATA,
+			.stype		= VIO_SUBTYPE_INFO,
+			.stype_env	= VIO_DRING_DATA,
+			.sid		= vio_send_sid(&port->vio),
+		},
+		.dring_ident		= dr->ident,
+		.start_idx		= dr->prod,
+		.end_idx		= (u32) -1,
+	};
+	int err, delay;
+
+	hdr.seq = dr->snd_nxt;
+	delay = 1;
+	do {
+		err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
+		if (err > 0) {
+			dr->snd_nxt++;
+			break;
+		}
+		udelay(delay);
+		if ((delay <<= 1) > 128)
+			delay = 128;
+	} while (err == -EAGAIN);
+
+	return err;
+}
+
+struct vnet_port *__tx_port_find(struct vnet *vp, struct sk_buff *skb)
+{
+	unsigned int hash = vnet_hashfn(skb->data);
+	struct hlist_head *hp = &vp->port_hash[hash];
+	struct hlist_node *n;
+	struct vnet_port *port;
+
+	hlist_for_each_entry(port, n, hp, hash) {
+		if (!compare_ether_addr(port->raddr, skb->data))
+			return port;
+	}
+	port = NULL;
+	if (!list_empty(&vp->port_list))
+		port = list_entry(vp->port_list.next, struct vnet_port, list);
+
+	return port;
+}
+
+struct vnet_port *tx_port_find(struct vnet *vp, struct sk_buff *skb)
+{
+	struct vnet_port *ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vp->lock, flags);
+	ret = __tx_port_find(vp, skb);
+	spin_unlock_irqrestore(&vp->lock, flags);
+
+	return ret;
+}
+
+static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct vnet *vp = netdev_priv(dev);
+	struct vnet_port *port = tx_port_find(vp, skb);
+	struct vio_dring_state *dr;
+	struct vio_net_desc *d;
+	unsigned long flags;
+	unsigned int len;
+	void *tx_buf;
+	int i, err;
+
+	if (unlikely(!port))
+		goto out_dropped;
+
+	spin_lock_irqsave(&port->vio.lock, flags);
+
+	dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+	if (unlikely(vnet_tx_dring_avail(dr) < 2)) {
+		if (!netif_queue_stopped(dev)) {
+			netif_stop_queue(dev);
+
+			/* This is a hard error, log it. */
+			printk(KERN_ERR PFX "%s: BUG! Tx Ring full when "
+			       "queue awake!\n", dev->name);
+			dev->stats.tx_errors++;
+		}
+		spin_unlock_irqrestore(&port->vio.lock, flags);
+		return NETDEV_TX_BUSY;
+	}
+
+	d = vio_dring_cur(dr);
+
+	tx_buf = port->tx_bufs[dr->prod].buf;
+	skb_copy_from_linear_data(skb, tx_buf + VNET_PACKET_SKIP, skb->len);
+
+	len = skb->len;
+	if (len < ETH_ZLEN) {
+		len = ETH_ZLEN;
+		memset(tx_buf+VNET_PACKET_SKIP+skb->len, 0, len - skb->len);
+	}
+
+	d->hdr.ack = VIO_ACK_ENABLE;
+	d->size = len;
+	d->ncookies = port->tx_bufs[dr->prod].ncookies;
+	for (i = 0; i < d->ncookies; i++)
+		d->cookies[i] = port->tx_bufs[dr->prod].cookies[i];
+
+	/* This has to be a non-SMP write barrier because we are writing
+	 * to memory which is shared with the peer LDOM.
+	 */
+	wmb();
+
+	d->hdr.state = VIO_DESC_READY;
+
+	err = __vnet_tx_trigger(port);
+	if (unlikely(err < 0)) {
+		printk(KERN_INFO PFX "%s: TX trigger error %d\n",
+		       dev->name, err);
+		d->hdr.state = VIO_DESC_FREE;
+		dev->stats.tx_carrier_errors++;
+		goto out_dropped_unlock;
+	}
+
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len;
+
+	dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1);
+	if (unlikely(vnet_tx_dring_avail(dr) < 2)) {
+		netif_stop_queue(dev);
+		if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr))
+			netif_wake_queue(dev);
+	}
+
+	spin_unlock_irqrestore(&port->vio.lock, flags);
+
+	dev_kfree_skb(skb);
+
+	dev->trans_start = jiffies;
+	return NETDEV_TX_OK;
+
+out_dropped_unlock:
+	spin_unlock_irqrestore(&port->vio.lock, flags);
+
+out_dropped:
+	dev_kfree_skb(skb);
+	dev->stats.tx_dropped++;
+	return NETDEV_TX_OK;
+}
+
+static void vnet_tx_timeout(struct net_device *dev)
+{
+	/* XXX Implement me XXX */
+}
+
+static int vnet_open(struct net_device *dev)
+{
+	netif_carrier_on(dev);
+	netif_start_queue(dev);
+
+	return 0;
+}
+
+static int vnet_close(struct net_device *dev)
+{
+	netif_stop_queue(dev);
+	netif_carrier_off(dev);
+
+	return 0;
+}
+
+static void vnet_set_rx_mode(struct net_device *dev)
+{
+	/* XXX Implement multicast support XXX */
+}
+
+static int vnet_change_mtu(struct net_device *dev, int new_mtu)
+{
+	if (new_mtu != ETH_DATA_LEN)
+		return -EINVAL;
+
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+static int vnet_set_mac_addr(struct net_device *dev, void *p)
+{
+	return -EINVAL;
+}
+
+static void vnet_get_drvinfo(struct net_device *dev,
+			     struct ethtool_drvinfo *info)
+{
+	strcpy(info->driver, DRV_MODULE_NAME);
+	strcpy(info->version, DRV_MODULE_VERSION);
+}
+
+static u32 vnet_get_msglevel(struct net_device *dev)
+{
+	struct vnet *vp = netdev_priv(dev);
+	return vp->msg_enable;
+}
+
+static void vnet_set_msglevel(struct net_device *dev, u32 value)
+{
+	struct vnet *vp = netdev_priv(dev);
+	vp->msg_enable = value;
+}
+
+static const struct ethtool_ops vnet_ethtool_ops = {
+	.get_drvinfo		= vnet_get_drvinfo,
+	.get_msglevel		= vnet_get_msglevel,
+	.set_msglevel		= vnet_set_msglevel,
+	.get_link		= ethtool_op_get_link,
+	.get_perm_addr		= ethtool_op_get_perm_addr,
+};
+
+static void vnet_port_free_tx_bufs(struct vnet_port *port)
+{
+	struct vio_dring_state *dr;
+	int i;
+
+	dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+	if (dr->base) {
+		ldc_free_exp_dring(port->vio.lp, dr->base,
+				   (dr->entry_size * dr->num_entries),
+				   dr->cookies, dr->ncookies);
+		dr->base = NULL;
+		dr->entry_size = 0;
+		dr->num_entries = 0;
+		dr->pending = 0;
+		dr->ncookies = 0;
+	}
+
+	for (i = 0; i < VNET_TX_RING_SIZE; i++) {
+		void *buf = port->tx_bufs[i].buf;
+
+		if (!buf)
+			continue;
+
+		ldc_unmap(port->vio.lp,
+			  port->tx_bufs[i].cookies,
+			  port->tx_bufs[i].ncookies);
+
+		kfree(buf);
+		port->tx_bufs[i].buf = NULL;
+	}
+}
+
+static int __devinit vnet_port_alloc_tx_bufs(struct vnet_port *port)
+{
+	struct vio_dring_state *dr;
+	unsigned long len;
+	int i, err, ncookies;
+	void *dring;
+
+	for (i = 0; i < VNET_TX_RING_SIZE; i++) {
+		void *buf = kzalloc(ETH_FRAME_LEN + 8, GFP_KERNEL);
+		int map_len = (ETH_FRAME_LEN + 7) & ~7;
+
+		err = -ENOMEM;
+		if (!buf) {
+			printk(KERN_ERR "TX buffer allocation failure\n");
+			goto err_out;
+		}
+		err = -EFAULT;
+		if ((unsigned long)buf & (8UL - 1)) {
+			printk(KERN_ERR "TX buffer misaligned\n");
+			kfree(buf);
+			goto err_out;
+		}
+
+		err = ldc_map_single(port->vio.lp, buf, map_len,
+				     port->tx_bufs[i].cookies, 2,
+				     (LDC_MAP_SHADOW |
+				      LDC_MAP_DIRECT |
+				      LDC_MAP_RW));
+		if (err < 0) {
+			kfree(buf);
+			goto err_out;
+		}
+		port->tx_bufs[i].buf = buf;
+		port->tx_bufs[i].ncookies = err;
+	}
+
+	dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+
+	len = (VNET_TX_RING_SIZE *
+	       (sizeof(struct vio_net_desc) +
+		(sizeof(struct ldc_trans_cookie) * 2)));
+
+	ncookies = VIO_MAX_RING_COOKIES;
+	dring = ldc_alloc_exp_dring(port->vio.lp, len,
+				    dr->cookies, &ncookies,
+				    (LDC_MAP_SHADOW |
+				     LDC_MAP_DIRECT |
+				     LDC_MAP_RW));
+	if (IS_ERR(dring)) {
+		err = PTR_ERR(dring);
+		goto err_out;
+	}
+
+	dr->base = dring;
+	dr->entry_size = (sizeof(struct vio_net_desc) +
+			  (sizeof(struct ldc_trans_cookie) * 2));
+	dr->num_entries = VNET_TX_RING_SIZE;
+	dr->prod = dr->cons = 0;
+	dr->pending = VNET_TX_RING_SIZE;
+	dr->ncookies = ncookies;
+
+	return 0;
+
+err_out:
+	vnet_port_free_tx_bufs(port);
+
+	return err;
+}
+
+static struct ldc_channel_config vnet_ldc_cfg = {
+	.event		= vnet_event,
+	.mtu		= 64,
+	.mode		= LDC_MODE_UNRELIABLE,
+};
+
+static struct vio_driver_ops vnet_vio_ops = {
+	.send_attr		= vnet_send_attr,
+	.handle_attr		= vnet_handle_attr,
+	.handshake_complete	= vnet_handshake_complete,
+};
+
+const char *remote_macaddr_prop = "remote-mac-address";
+
+static int __devinit vnet_port_probe(struct vio_dev *vdev,
+				     const struct vio_device_id *id)
+{
+	struct mdesc_node *endp;
+	struct vnet_port *port;
+	unsigned long flags;
+	struct vnet *vp;
+	const u64 *rmac;
+	int len, i, err, switch_port;
+
+	vp = dev_get_drvdata(vdev->dev.parent);
+	if (!vp) {
+		printk(KERN_ERR PFX "Cannot find port parent vnet.\n");
+		return -ENODEV;
+	}
+
+	rmac = md_get_property(vdev->mp, remote_macaddr_prop, &len);
+	if (!rmac) {
+		printk(KERN_ERR PFX "Port lacks %s property.\n",
+		       remote_macaddr_prop);
+		return -ENODEV;
+	}
+
+	endp = vio_find_endpoint(vdev);
+	if (!endp) {
+		printk(KERN_ERR PFX "Port lacks channel-endpoint.\n");
+		return -ENODEV;
+	}
+
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	if (!port) {
+		printk(KERN_ERR PFX "Cannot allocate vnet_port.\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < ETH_ALEN; i++)
+		port->raddr[i] = (*rmac >> (5 - i) * 8) & 0xff;
+
+	port->vp = vp;
+
+	err = vio_driver_init(&port->vio, vdev, VDEV_NETWORK, endp,
+			      vnet_versions, ARRAY_SIZE(vnet_versions),
+			      &vnet_vio_ops, vp->dev->name);
+	if (err)
+		goto err_out_free_port;
+
+	err = vio_ldc_alloc(&port->vio, &vnet_ldc_cfg, port);
+	if (err)
+		goto err_out_free_port;
+
+	err = vnet_port_alloc_tx_bufs(port);
+	if (err)
+		goto err_out_free_ldc;
+
+	INIT_HLIST_NODE(&port->hash);
+	INIT_LIST_HEAD(&port->list);
+
+	switch_port = 0;
+	if (md_get_property(vdev->mp, "switch-port", NULL) != NULL)
+		switch_port = 1;
+
+	spin_lock_irqsave(&vp->lock, flags);
+	if (switch_port)
+		list_add(&port->list, &vp->port_list);
+	else
+		list_add_tail(&port->list, &vp->port_list);
+	hlist_add_head(&port->hash, &vp->port_hash[vnet_hashfn(port->raddr)]);
+	spin_unlock_irqrestore(&vp->lock, flags);
+
+	dev_set_drvdata(&vdev->dev, port);
+
+	printk(KERN_INFO "%s: PORT ( remote-mac ", vp->dev->name);
+	for (i = 0; i < 6; i++)
+		printk("%2.2x%c", port->raddr[i], i == 5 ? ' ' : ':');
+	if (switch_port)
+		printk("switch-port ");
+	printk(")\n");
+
+	vio_port_up(&port->vio);
+
+	return 0;
+
+err_out_free_ldc:
+	vio_ldc_free(&port->vio);
+
+err_out_free_port:
+	kfree(port);
+
+	return err;
+}
+
+static int vnet_port_remove(struct vio_dev *vdev)
+{
+	struct vnet_port *port = dev_get_drvdata(&vdev->dev);
+
+	if (port) {
+		struct vnet *vp = port->vp;
+		unsigned long flags;
+
+		del_timer_sync(&port->vio.timer);
+
+		spin_lock_irqsave(&vp->lock, flags);
+		list_del(&port->list);
+		hlist_del(&port->hash);
+		spin_unlock_irqrestore(&vp->lock, flags);
+
+		vnet_port_free_tx_bufs(port);
+		vio_ldc_free(&port->vio);
+
+		dev_set_drvdata(&vdev->dev, NULL);
+
+		kfree(port);
+	}
+	return 0;
+}
+
+static struct vio_device_id vnet_port_match[] = {
+	{
+		.type = "vnet-port",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(vio, vnet_match);
+
+static struct vio_driver vnet_port_driver = {
+	.id_table	= vnet_port_match,
+	.probe		= vnet_port_probe,
+	.remove		= vnet_port_remove,
+	.driver		= {
+		.name	= "vnet_port",
+		.owner	= THIS_MODULE,
+	}
+};
+
+const char *local_mac_prop = "local-mac-address";
+
+static int __devinit vnet_probe(struct vio_dev *vdev,
+				const struct vio_device_id *id)
+{
+	static int vnet_version_printed;
+	struct net_device *dev;
+	struct vnet *vp;
+	const u64 *mac;
+	int err, i, len;
+
+	if (vnet_version_printed++ == 0)
+		printk(KERN_INFO "%s", version);
+
+	mac = md_get_property(vdev->mp, local_mac_prop, &len);
+	if (!mac) {
+		printk(KERN_ERR PFX "vnet lacks %s property.\n",
+		       local_mac_prop);
+		err = -ENODEV;
+		goto err_out;
+	}
+
+	dev = alloc_etherdev(sizeof(*vp));
+	if (!dev) {
+		printk(KERN_ERR PFX "Etherdev alloc failed, aborting.\n");
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	for (i = 0; i < ETH_ALEN; i++)
+		dev->dev_addr[i] = (*mac >> (5 - i) * 8) & 0xff;
+
+	memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
+
+	SET_NETDEV_DEV(dev, &vdev->dev);
+
+	vp = netdev_priv(dev);
+
+	spin_lock_init(&vp->lock);
+	vp->dev = dev;
+	vp->vdev = vdev;
+
+	INIT_LIST_HEAD(&vp->port_list);
+	for (i = 0; i < VNET_PORT_HASH_SIZE; i++)
+		INIT_HLIST_HEAD(&vp->port_hash[i]);
+
+	dev->open = vnet_open;
+	dev->stop = vnet_close;
+	dev->set_multicast_list = vnet_set_rx_mode;
+	dev->set_mac_address = vnet_set_mac_addr;
+	dev->tx_timeout = vnet_tx_timeout;
+	dev->ethtool_ops = &vnet_ethtool_ops;
+	dev->watchdog_timeo = VNET_TX_TIMEOUT;
+	dev->change_mtu = vnet_change_mtu;
+	dev->hard_start_xmit = vnet_start_xmit;
+
+	err = register_netdev(dev);
+	if (err) {
+		printk(KERN_ERR PFX "Cannot register net device, "
+		       "aborting.\n");
+		goto err_out_free_dev;
+	}
+
+	printk(KERN_INFO "%s: Sun LDOM vnet ", dev->name);
+
+	for (i = 0; i < 6; i++)
+		printk("%2.2x%c", dev->dev_addr[i], i == 5 ? '\n' : ':');
+
+	dev_set_drvdata(&vdev->dev, vp);
+
+	return 0;
+
+err_out_free_dev:
+	free_netdev(dev);
+
+err_out:
+	return err;
+}
+
+static int vnet_remove(struct vio_dev *vdev)
+{
+
+	struct vnet *vp = dev_get_drvdata(&vdev->dev);
+
+	if (vp) {
+		/* XXX unregister port, or at least check XXX */
+		unregister_netdevice(vp->dev);
+		dev_set_drvdata(&vdev->dev, NULL);
+	}
+	return 0;
+}
+
+static struct vio_device_id vnet_match[] = {
+	{
+		.type = "network",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(vio, vnet_match);
+
+static struct vio_driver vnet_driver = {
+	.id_table	= vnet_match,
+	.probe		= vnet_probe,
+	.remove		= vnet_remove,
+	.driver		= {
+		.name	= "vnet",
+		.owner	= THIS_MODULE,
+	}
+};
+
+static int __init vnet_init(void)
+{
+	int err = vio_register_driver(&vnet_driver);
+
+	if (!err) {
+		err = vio_register_driver(&vnet_port_driver);
+		if (err)
+			vio_unregister_driver(&vnet_driver);
+	}
+
+	return err;
+}
+
+static void __exit vnet_exit(void)
+{
+	vio_unregister_driver(&vnet_port_driver);
+	vio_unregister_driver(&vnet_driver);
+}
+
+module_init(vnet_init);
+module_exit(vnet_exit);
diff --git a/drivers/net/sunvnet.h b/drivers/net/sunvnet.h
new file mode 100644
index 00000000000..1c887302d46
--- /dev/null
+++ b/drivers/net/sunvnet.h
@@ -0,0 +1,70 @@
+#ifndef _SUNVNET_H
+#define _SUNVNET_H
+
+#define DESC_NCOOKIES(entry_size)	\
+	((entry_size) - sizeof(struct vio_net_desc))
+
+/* length of time before we decide the hardware is borked,
+ * and dev->tx_timeout() should be called to fix the problem
+ */
+#define VNET_TX_TIMEOUT			(5 * HZ)
+
+#define VNET_TX_RING_SIZE		512
+#define VNET_TX_WAKEUP_THRESH(dr)	((dr)->pending / 4)
+
+/* VNET packets are sent in buffers with the first 6 bytes skipped
+ * so that after the ethernet header the IPv4/IPv6 headers are aligned
+ * properly.
+ */
+#define VNET_PACKET_SKIP		6
+
+struct vnet_tx_entry {
+	void			*buf;
+	unsigned int		ncookies;
+	struct ldc_trans_cookie	cookies[2];
+};
+
+struct vnet;
+struct vnet_port {
+	struct vio_driver_state	vio;
+
+	struct hlist_node	hash;
+	u8			raddr[ETH_ALEN];
+
+	struct vnet		*vp;
+
+	struct vnet_tx_entry	tx_bufs[VNET_TX_RING_SIZE];
+
+	struct list_head	list;
+};
+
+static inline struct vnet_port *to_vnet_port(struct vio_driver_state *vio)
+{
+	return container_of(vio, struct vnet_port, vio);
+}
+
+#define VNET_PORT_HASH_SIZE	16
+#define VNET_PORT_HASH_MASK	(VNET_PORT_HASH_SIZE - 1)
+
+static inline unsigned int vnet_hashfn(u8 *mac)
+{
+	unsigned int val = mac[4] ^ mac[5];
+
+	return val & (VNET_PORT_HASH_MASK);
+}
+
+struct vnet {
+	/* Protects port_list and port_hash.  */
+	spinlock_t		lock;
+
+	struct net_device	*dev;
+
+	u32			msg_enable;
+	struct vio_dev		*vdev;
+
+	struct list_head	port_list;
+
+	struct hlist_head	port_hash[VNET_PORT_HASH_SIZE];
+};
+
+#endif /* _SUNVNET_H */
-- 
cgit v1.2.3


From 667ef3c3968e4e2ddc3f3f84f05e11fb2453d5b6 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Mon, 16 Jul 2007 04:03:56 -0700
Subject: [SPARC64]: Add Sun LDOM virtual disk driver.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/block/Kconfig  |   7 +
 drivers/block/Makefile |   1 +
 drivers/block/sunvdc.c | 970 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 978 insertions(+)
 create mode 100644 drivers/block/sunvdc.c

(limited to 'drivers')

diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 6e23af1ecbd..d9867fd2f2a 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -423,6 +423,13 @@ config ATA_OVER_ETH
 	This driver provides Support for ATA over Ethernet block
 	devices like the Coraid EtherDrive (R) Storage Blade.
 
+config SUNVDC
+	tristate "Sun Virtual Disk Client support"
+	depends on SUN_LDOMS
+	help
+	  Support for virtual disk devices as a client under Sun
+	  Logical Domains.
+
 source "drivers/s390/block/Kconfig"
 
 endif # BLK_DEV
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index e5f98acc5d5..43371c59623 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_BLK_CPQ_DA)	+= cpqarray.o
 obj-$(CONFIG_BLK_CPQ_CISS_DA)  += cciss.o
 obj-$(CONFIG_BLK_DEV_DAC960)	+= DAC960.o
 obj-$(CONFIG_CDROM_PKTCDVD)	+= pktcdvd.o
+obj-$(CONFIG_SUNVDC)		+= sunvdc.o
 
 obj-$(CONFIG_BLK_DEV_UMEM)	+= umem.o
 obj-$(CONFIG_BLK_DEV_NBD)	+= nbd.o
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
new file mode 100644
index 00000000000..8dbbeace52a
--- /dev/null
+++ b/drivers/block/sunvdc.c
@@ -0,0 +1,970 @@
+/* sunvdc.c: Sun LDOM Virtual Disk Client.
+ *
+ * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/blkdev.h>
+#include <linux/hdreg.h>
+#include <linux/genhd.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/list.h>
+
+#include <asm/vio.h>
+#include <asm/ldc.h>
+
+#define DRV_MODULE_NAME		"sunvdc"
+#define PFX DRV_MODULE_NAME	": "
+#define DRV_MODULE_VERSION	"1.0"
+#define DRV_MODULE_RELDATE	"June 25, 2007"
+
+static char version[] __devinitdata =
+	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
+MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_DESCRIPTION("Sun LDOM virtual disk client driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_MODULE_VERSION);
+
+#define VDC_TX_RING_SIZE	256
+
+#define WAITING_FOR_LINK_UP	0x01
+#define WAITING_FOR_TX_SPACE	0x02
+#define WAITING_FOR_GEN_CMD	0x04
+#define WAITING_FOR_ANY		-1
+
+struct vdc_req_entry {
+	struct request		*req;
+};
+
+struct vdc_port {
+	struct vio_driver_state	vio;
+
+	struct vdc		*vp;
+
+	struct gendisk		*disk;
+
+	struct vdc_completion	*cmp;
+
+	u64			req_id;
+	u64			seq;
+	struct vdc_req_entry	rq_arr[VDC_TX_RING_SIZE];
+
+	unsigned long		ring_cookies;
+
+	u64			max_xfer_size;
+	u32			vdisk_block_size;
+
+	/* The server fills these in for us in the disk attribute
+	 * ACK packet.
+	 */
+	u64			operations;
+	u32			vdisk_size;
+	u8			vdisk_type;
+	u8			dev_no;
+
+	char			disk_name[32];
+
+	struct vio_disk_geom	geom;
+	struct vio_disk_vtoc	label;
+
+	struct list_head	list;
+};
+
+static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio)
+{
+	return container_of(vio, struct vdc_port, vio);
+}
+
+struct vdc {
+	/* Protects prot_list.  */
+	spinlock_t		lock;
+
+	struct vio_dev		*dev;
+
+	struct list_head	port_list;
+};
+
+/* Ordered from largest major to lowest */
+static struct vio_version vdc_versions[] = {
+	{ .major = 1, .minor = 0 },
+};
+
+#define VDCBLK_NAME	"vdisk"
+static int vdc_major;
+#define PARTITION_SHIFT	3
+
+static inline u32 vdc_tx_dring_avail(struct vio_dring_state *dr)
+{
+	return vio_dring_avail(dr, VDC_TX_RING_SIZE);
+}
+
+static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	struct gendisk *disk = bdev->bd_disk;
+	struct vdc_port *port = disk->private_data;
+
+	geo->heads = (u8) port->geom.num_hd;
+	geo->sectors = (u8) port->geom.num_sec;
+	geo->cylinders = port->geom.num_cyl;
+
+	return 0;
+}
+
+static struct block_device_operations vdc_fops = {
+	.owner		= THIS_MODULE,
+	.getgeo		= vdc_getgeo,
+};
+
+static void vdc_finish(struct vio_driver_state *vio, int err, int waiting_for)
+{
+	if (vio->cmp &&
+	    (waiting_for == -1 ||
+	     vio->cmp->waiting_for == waiting_for)) {
+		vio->cmp->err = err;
+		complete(&vio->cmp->com);
+		vio->cmp = NULL;
+	}
+}
+
+static void vdc_handshake_complete(struct vio_driver_state *vio)
+{
+	vdc_finish(vio, 0, WAITING_FOR_LINK_UP);
+}
+
+static int vdc_handle_unknown(struct vdc_port *port, void *arg)
+{
+	struct vio_msg_tag *pkt = arg;
+
+	printk(KERN_ERR PFX "Received unknown msg [%02x:%02x:%04x:%08x]\n",
+	       pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
+	printk(KERN_ERR PFX "Resetting connection.\n");
+
+	ldc_disconnect(port->vio.lp);
+
+	return -ECONNRESET;
+}
+
+static int vdc_send_attr(struct vio_driver_state *vio)
+{
+	struct vdc_port *port = to_vdc_port(vio);
+	struct vio_disk_attr_info pkt;
+
+	memset(&pkt, 0, sizeof(pkt));
+
+	pkt.tag.type = VIO_TYPE_CTRL;
+	pkt.tag.stype = VIO_SUBTYPE_INFO;
+	pkt.tag.stype_env = VIO_ATTR_INFO;
+	pkt.tag.sid = vio_send_sid(vio);
+
+	pkt.xfer_mode = VIO_DRING_MODE;
+	pkt.vdisk_block_size = port->vdisk_block_size;
+	pkt.max_xfer_size = port->max_xfer_size;
+
+	viodbg(HS, "SEND ATTR xfer_mode[0x%x] blksz[%u] max_xfer[%lu]\n",
+	       pkt.xfer_mode, pkt.vdisk_block_size, pkt.max_xfer_size);
+
+	return vio_ldc_send(&port->vio, &pkt, sizeof(pkt));
+}
+
+static int vdc_handle_attr(struct vio_driver_state *vio, void *arg)
+{
+	struct vdc_port *port = to_vdc_port(vio);
+	struct vio_disk_attr_info *pkt = arg;
+
+	viodbg(HS, "GOT ATTR stype[0x%x] ops[%lx] disk_size[%lu] disk_type[%x] "
+	       "xfer_mode[0x%x] blksz[%u] max_xfer[%lu]\n",
+	       pkt->tag.stype, pkt->operations,
+	       pkt->vdisk_size, pkt->vdisk_type,
+	       pkt->xfer_mode, pkt->vdisk_block_size,
+	       pkt->max_xfer_size);
+
+	if (pkt->tag.stype == VIO_SUBTYPE_ACK) {
+		switch (pkt->vdisk_type) {
+		case VD_DISK_TYPE_DISK:
+		case VD_DISK_TYPE_SLICE:
+			break;
+
+		default:
+			printk(KERN_ERR PFX "%s: Bogus vdisk_type 0x%x\n",
+			       vio->name, pkt->vdisk_type);
+			return -ECONNRESET;
+		}
+
+		if (pkt->vdisk_block_size > port->vdisk_block_size) {
+			printk(KERN_ERR PFX "%s: BLOCK size increased "
+			       "%u --> %u\n",
+			       vio->name,
+			       port->vdisk_block_size, pkt->vdisk_block_size);
+			return -ECONNRESET;
+		}
+
+		port->operations = pkt->operations;
+		port->vdisk_size = pkt->vdisk_size;
+		port->vdisk_type = pkt->vdisk_type;
+		if (pkt->max_xfer_size < port->max_xfer_size)
+			port->max_xfer_size = pkt->max_xfer_size;
+		port->vdisk_block_size = pkt->vdisk_block_size;
+		return 0;
+	} else {
+		printk(KERN_ERR PFX "%s: Attribute NACK\n", vio->name);
+
+		return -ECONNRESET;
+	}
+}
+
+static void vdc_end_special(struct vdc_port *port, struct vio_disk_desc *desc)
+{
+	int err = desc->status;
+
+	vdc_finish(&port->vio, -err, WAITING_FOR_GEN_CMD);
+}
+
+static void vdc_end_request(struct request *req, int uptodate, int num_sectors)
+{
+	if (end_that_request_first(req, uptodate, num_sectors))
+		return;
+	add_disk_randomness(req->rq_disk);
+	end_that_request_last(req, uptodate);
+}
+
+static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
+			unsigned int index)
+{
+	struct vio_disk_desc *desc = vio_dring_entry(dr, index);
+	struct vdc_req_entry *rqe = &port->rq_arr[index];
+	struct request *req;
+
+	if (unlikely(desc->hdr.state != VIO_DESC_DONE))
+		return;
+
+	ldc_unmap(port->vio.lp, desc->cookies, desc->ncookies);
+	desc->hdr.state = VIO_DESC_FREE;
+	dr->cons = (index + 1) & (VDC_TX_RING_SIZE - 1);
+
+	req = rqe->req;
+	if (req == NULL) {
+		vdc_end_special(port, desc);
+		return;
+	}
+
+	rqe->req = NULL;
+
+	vdc_end_request(req, !desc->status, desc->size >> 9);
+
+	if (blk_queue_stopped(port->disk->queue))
+		blk_start_queue(port->disk->queue);
+}
+
+static int vdc_ack(struct vdc_port *port, void *msgbuf)
+{
+	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+	struct vio_dring_data *pkt = msgbuf;
+
+	if (unlikely(pkt->dring_ident != dr->ident ||
+		     pkt->start_idx != pkt->end_idx ||
+		     pkt->start_idx >= VDC_TX_RING_SIZE))
+		return 0;
+
+	vdc_end_one(port, dr, pkt->start_idx);
+
+	return 0;
+}
+
+static int vdc_nack(struct vdc_port *port, void *msgbuf)
+{
+	/* XXX Implement me XXX */
+	return 0;
+}
+
+static void vdc_event(void *arg, int event)
+{
+	struct vdc_port *port = arg;
+	struct vio_driver_state *vio = &port->vio;
+	unsigned long flags;
+	int err;
+
+	spin_lock_irqsave(&vio->lock, flags);
+
+	if (unlikely(event == LDC_EVENT_RESET ||
+		     event == LDC_EVENT_UP)) {
+		vio_link_state_change(vio, event);
+		spin_unlock_irqrestore(&vio->lock, flags);
+		return;
+	}
+
+	if (unlikely(event != LDC_EVENT_DATA_READY)) {
+		printk(KERN_WARNING PFX "Unexpected LDC event %d\n", event);
+		spin_unlock_irqrestore(&vio->lock, flags);
+		return;
+	}
+
+	err = 0;
+	while (1) {
+		union {
+			struct vio_msg_tag tag;
+			u64 raw[8];
+		} msgbuf;
+
+		err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf));
+		if (unlikely(err < 0)) {
+			if (err == -ECONNRESET)
+				vio_conn_reset(vio);
+			break;
+		}
+		if (err == 0)
+			break;
+		viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n",
+		       msgbuf.tag.type,
+		       msgbuf.tag.stype,
+		       msgbuf.tag.stype_env,
+		       msgbuf.tag.sid);
+		err = vio_validate_sid(vio, &msgbuf.tag);
+		if (err < 0)
+			break;
+
+		if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) {
+			if (msgbuf.tag.stype == VIO_SUBTYPE_ACK)
+				err = vdc_ack(port, &msgbuf);
+			else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK)
+				err = vdc_nack(port, &msgbuf);
+			else
+				err = vdc_handle_unknown(port, &msgbuf);
+		} else if (msgbuf.tag.type == VIO_TYPE_CTRL) {
+			err = vio_control_pkt_engine(vio, &msgbuf);
+		} else {
+			err = vdc_handle_unknown(port, &msgbuf);
+		}
+		if (err < 0)
+			break;
+	}
+	if (err < 0)
+		vdc_finish(&port->vio, err, WAITING_FOR_ANY);
+	spin_unlock_irqrestore(&vio->lock, flags);
+}
+
+static int __vdc_tx_trigger(struct vdc_port *port)
+{
+	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+	struct vio_dring_data hdr = {
+		.tag = {
+			.type		= VIO_TYPE_DATA,
+			.stype		= VIO_SUBTYPE_INFO,
+			.stype_env	= VIO_DRING_DATA,
+			.sid		= vio_send_sid(&port->vio),
+		},
+		.dring_ident		= dr->ident,
+		.start_idx		= dr->prod,
+		.end_idx		= dr->prod,
+	};
+	int err, delay;
+
+	hdr.seq = dr->snd_nxt;
+	delay = 1;
+	do {
+		err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
+		if (err > 0) {
+			dr->snd_nxt++;
+			break;
+		}
+		udelay(delay);
+		if ((delay <<= 1) > 128)
+			delay = 128;
+	} while (err == -EAGAIN);
+
+	return err;
+}
+
+static int __send_request(struct request *req)
+{
+	struct vdc_port *port = req->rq_disk->private_data;
+	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+	struct scatterlist sg[port->ring_cookies];
+	struct vdc_req_entry *rqe;
+	struct vio_disk_desc *desc;
+	unsigned int map_perm;
+	int nsg, err, i;
+	u64 len;
+	u8 op;
+
+	map_perm = LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
+
+	if (rq_data_dir(req) == READ) {
+		map_perm |= LDC_MAP_W;
+		op = VD_OP_BREAD;
+	} else {
+		map_perm |= LDC_MAP_R;
+		op = VD_OP_BWRITE;
+	}
+
+	nsg = blk_rq_map_sg(req->q, req, sg);
+
+	len = 0;
+	for (i = 0; i < nsg; i++)
+		len += sg[i].length;
+
+	if (unlikely(vdc_tx_dring_avail(dr) < 1)) {
+		blk_stop_queue(port->disk->queue);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	desc = vio_dring_cur(dr);
+
+	err = ldc_map_sg(port->vio.lp, sg, nsg,
+			 desc->cookies, port->ring_cookies,
+			 map_perm);
+	if (err < 0) {
+		printk(KERN_ERR PFX "ldc_map_sg() failure, err=%d.\n", err);
+		return err;
+	}
+
+	rqe = &port->rq_arr[dr->prod];
+	rqe->req = req;
+
+	desc->hdr.ack = VIO_ACK_ENABLE;
+	desc->req_id = port->req_id;
+	desc->operation = op;
+	if (port->vdisk_type == VD_DISK_TYPE_DISK) {
+		desc->slice = 2;
+	} else {
+		desc->slice = 0;
+	}
+	desc->status = ~0;
+	desc->offset = (req->sector << 9) / port->vdisk_block_size;
+	desc->size = len;
+	desc->ncookies = err;
+
+	/* This has to be a non-SMP write barrier because we are writing
+	 * to memory which is shared with the peer LDOM.
+	 */
+	wmb();
+	desc->hdr.state = VIO_DESC_READY;
+
+	err = __vdc_tx_trigger(port);
+	if (err < 0) {
+		printk(KERN_ERR PFX "vdc_tx_trigger() failure, err=%d\n", err);
+	} else {
+		port->req_id++;
+		dr->prod = (dr->prod + 1) & (VDC_TX_RING_SIZE - 1);
+	}
+out:
+
+	return err;
+}
+
+static void do_vdc_request(request_queue_t *q)
+{
+	while (1) {
+		struct request *req = elv_next_request(q);
+
+		if (!req)
+			break;
+
+		blkdev_dequeue_request(req);
+		if (__send_request(req) < 0)
+			vdc_end_request(req, 0, req->hard_nr_sectors);
+	}
+}
+
+static int generic_request(struct vdc_port *port, u8 op, void *buf, int len)
+{
+	struct vio_dring_state *dr;
+	struct vio_completion comp;
+	struct vio_disk_desc *desc;
+	unsigned int map_perm;
+	unsigned long flags;
+	int op_len, err;
+	void *req_buf;
+
+	if (!(((u64)1 << ((u64)op - 1)) & port->operations))
+		return -EOPNOTSUPP;
+
+	switch (op) {
+	case VD_OP_BREAD:
+	case VD_OP_BWRITE:
+	default:
+		return -EINVAL;
+
+	case VD_OP_FLUSH:
+		op_len = 0;
+		map_perm = 0;
+		break;
+
+	case VD_OP_GET_WCE:
+		op_len = sizeof(u32);
+		map_perm = LDC_MAP_W;
+		break;
+
+	case VD_OP_SET_WCE:
+		op_len = sizeof(u32);
+		map_perm = LDC_MAP_R;
+		break;
+
+	case VD_OP_GET_VTOC:
+		op_len = sizeof(struct vio_disk_vtoc);
+		map_perm = LDC_MAP_W;
+		break;
+
+	case VD_OP_SET_VTOC:
+		op_len = sizeof(struct vio_disk_vtoc);
+		map_perm = LDC_MAP_R;
+		break;
+
+	case VD_OP_GET_DISKGEOM:
+		op_len = sizeof(struct vio_disk_geom);
+		map_perm = LDC_MAP_W;
+		break;
+
+	case VD_OP_SET_DISKGEOM:
+		op_len = sizeof(struct vio_disk_geom);
+		map_perm = LDC_MAP_R;
+		break;
+
+	case VD_OP_SCSICMD:
+		op_len = 16;
+		map_perm = LDC_MAP_RW;
+		break;
+
+	case VD_OP_GET_DEVID:
+		op_len = sizeof(struct vio_disk_devid);
+		map_perm = LDC_MAP_W;
+		break;
+
+	case VD_OP_GET_EFI:
+	case VD_OP_SET_EFI:
+		return -EOPNOTSUPP;
+		break;
+	};
+
+	map_perm |= LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
+
+	op_len = (op_len + 7) & ~7;
+	req_buf = kzalloc(op_len, GFP_KERNEL);
+	if (!req_buf)
+		return -ENOMEM;
+
+	if (len > op_len)
+		len = op_len;
+
+	if (map_perm & LDC_MAP_R)
+		memcpy(req_buf, buf, len);
+
+	spin_lock_irqsave(&port->vio.lock, flags);
+
+	dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+
+	/* XXX If we want to use this code generically we have to
+	 * XXX handle TX ring exhaustion etc.
+	 */
+	desc = vio_dring_cur(dr);
+
+	err = ldc_map_single(port->vio.lp, req_buf, op_len,
+			     desc->cookies, port->ring_cookies,
+			     map_perm);
+	if (err < 0) {
+		spin_unlock_irqrestore(&port->vio.lock, flags);
+		kfree(req_buf);
+		return err;
+	}
+
+	init_completion(&comp.com);
+	comp.waiting_for = WAITING_FOR_GEN_CMD;
+	port->vio.cmp = &comp;
+
+	desc->hdr.ack = VIO_ACK_ENABLE;
+	desc->req_id = port->req_id;
+	desc->operation = op;
+	desc->slice = 0;
+	desc->status = ~0;
+	desc->offset = 0;
+	desc->size = op_len;
+	desc->ncookies = err;
+
+	/* This has to be a non-SMP write barrier because we are writing
+	 * to memory which is shared with the peer LDOM.
+	 */
+	wmb();
+	desc->hdr.state = VIO_DESC_READY;
+
+	err = __vdc_tx_trigger(port);
+	if (err >= 0) {
+		port->req_id++;
+		dr->prod = (dr->prod + 1) & (VDC_TX_RING_SIZE - 1);
+		spin_unlock_irqrestore(&port->vio.lock, flags);
+
+		wait_for_completion(&comp.com);
+		err = comp.err;
+	} else {
+		port->vio.cmp = NULL;
+		spin_unlock_irqrestore(&port->vio.lock, flags);
+	}
+
+	if (map_perm & LDC_MAP_W)
+		memcpy(buf, req_buf, len);
+
+	kfree(req_buf);
+
+	return err;
+}
+
+static int __devinit vdc_alloc_tx_ring(struct vdc_port *port)
+{
+	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+	unsigned long len, entry_size;
+	int ncookies;
+	void *dring;
+
+	entry_size = sizeof(struct vio_disk_desc) +
+		(sizeof(struct ldc_trans_cookie) * port->ring_cookies);
+	len = (VDC_TX_RING_SIZE * entry_size);
+
+	ncookies = VIO_MAX_RING_COOKIES;
+	dring = ldc_alloc_exp_dring(port->vio.lp, len,
+				    dr->cookies, &ncookies,
+				    (LDC_MAP_SHADOW |
+				     LDC_MAP_DIRECT |
+				     LDC_MAP_RW));
+	if (IS_ERR(dring))
+		return PTR_ERR(dring);
+
+	dr->base = dring;
+	dr->entry_size = entry_size;
+	dr->num_entries = VDC_TX_RING_SIZE;
+	dr->prod = dr->cons = 0;
+	dr->pending = VDC_TX_RING_SIZE;
+	dr->ncookies = ncookies;
+
+	return 0;
+}
+
+static void vdc_free_tx_ring(struct vdc_port *port)
+{
+	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+
+	if (dr->base) {
+		ldc_free_exp_dring(port->vio.lp, dr->base,
+				   (dr->entry_size * dr->num_entries),
+				   dr->cookies, dr->ncookies);
+		dr->base = NULL;
+		dr->entry_size = 0;
+		dr->num_entries = 0;
+		dr->pending = 0;
+		dr->ncookies = 0;
+	}
+}
+
+static int probe_disk(struct vdc_port *port)
+{
+	struct vio_completion comp;
+	struct request_queue *q;
+	struct gendisk *g;
+	int err;
+
+	init_completion(&comp.com);
+	comp.err = 0;
+	comp.waiting_for = WAITING_FOR_LINK_UP;
+	port->vio.cmp = &comp;
+
+	vio_port_up(&port->vio);
+
+	wait_for_completion(&comp.com);
+	if (comp.err)
+		return comp.err;
+
+	err = generic_request(port, VD_OP_GET_VTOC,
+			      &port->label, sizeof(port->label));
+	if (err < 0) {
+		printk(KERN_ERR PFX "VD_OP_GET_VTOC returns error %d\n", err);
+		return err;
+	}
+
+	err = generic_request(port, VD_OP_GET_DISKGEOM,
+			      &port->geom, sizeof(port->geom));
+	if (err < 0) {
+		printk(KERN_ERR PFX "VD_OP_GET_DISKGEOM returns "
+		       "error %d\n", err);
+		return err;
+	}
+
+	port->vdisk_size = ((u64)port->geom.num_cyl *
+			    (u64)port->geom.num_hd *
+			    (u64)port->geom.num_sec);
+
+	q = blk_init_queue(do_vdc_request, &port->vio.lock);
+	if (!q) {
+		printk(KERN_ERR PFX "%s: Could not allocate queue.\n",
+		       port->vio.name);
+		return -ENOMEM;
+	}
+	g = alloc_disk(1 << PARTITION_SHIFT);
+	if (!g) {
+		printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n",
+		       port->vio.name);
+		blk_cleanup_queue(q);
+		return -ENOMEM;
+	}
+
+	port->disk = g;
+
+	blk_queue_max_hw_segments(q, port->ring_cookies);
+	blk_queue_max_phys_segments(q, port->ring_cookies);
+	blk_queue_max_sectors(q, port->max_xfer_size);
+	g->major = vdc_major;
+	g->first_minor = port->dev_no << PARTITION_SHIFT;
+	strcpy(g->disk_name, port->disk_name);
+
+	g->fops = &vdc_fops;
+	g->queue = q;
+	g->private_data = port;
+	g->driverfs_dev = &port->vio.vdev->dev;
+
+	set_capacity(g, port->vdisk_size);
+
+	printk(KERN_INFO PFX "%s: %u sectors (%u MB)\n",
+	       g->disk_name,
+	       port->vdisk_size, (port->vdisk_size >> (20 - 9)));
+
+	add_disk(g);
+
+	return 0;
+}
+
+static struct ldc_channel_config vdc_ldc_cfg = {
+	.event		= vdc_event,
+	.mtu		= 64,
+	.mode		= LDC_MODE_UNRELIABLE,
+};
+
+static struct vio_driver_ops vdc_vio_ops = {
+	.send_attr		= vdc_send_attr,
+	.handle_attr		= vdc_handle_attr,
+	.handshake_complete	= vdc_handshake_complete,
+};
+
+static int __devinit vdc_port_probe(struct vio_dev *vdev,
+				    const struct vio_device_id *id)
+{
+	struct mdesc_node *endp;
+	struct vdc_port *port;
+	unsigned long flags;
+	struct vdc *vp;
+	const u64 *port_id;
+	int err;
+
+	vp = dev_get_drvdata(vdev->dev.parent);
+	if (!vp) {
+		printk(KERN_ERR PFX "Cannot find port parent vdc.\n");
+		return -ENODEV;
+	}
+
+	endp = vio_find_endpoint(vdev);
+	if (!endp) {
+		printk(KERN_ERR PFX "Port lacks channel-endpoint.\n");
+		return -ENODEV;
+	}
+
+	port_id = md_get_property(vdev->mp, "id", NULL);
+	if (!port_id) {
+		printk(KERN_ERR PFX "Port lacks id property.\n");
+		return -ENODEV;
+	}
+	if ((*port_id << PARTITION_SHIFT) & ~(u64)MINORMASK) {
+		printk(KERN_ERR PFX "Port id [%lu] too large.\n", *port_id);
+		return -ENODEV;
+	}
+
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	if (!port) {
+		printk(KERN_ERR PFX "Cannot allocate vdc_port.\n");
+		return -ENOMEM;
+	}
+
+	port->vp = vp;
+	port->dev_no = *port_id;
+
+	if (port->dev_no >= 26)
+		snprintf(port->disk_name, sizeof(port->disk_name),
+			 VDCBLK_NAME "%c%c",
+			 'a' + (port->dev_no / 26) - 1,
+			 'a' + (port->dev_no % 26));
+	else
+		snprintf(port->disk_name, sizeof(port->disk_name),
+			 VDCBLK_NAME "%c", 'a' + (port->dev_no % 26));
+
+	err = vio_driver_init(&port->vio, vdev, VDEV_DISK, endp,
+			      vdc_versions, ARRAY_SIZE(vdc_versions),
+			      &vdc_vio_ops, port->disk_name);
+	if (err)
+		goto err_out_free_port;
+
+	port->vdisk_block_size = 512;
+	port->max_xfer_size = ((128 * 1024) / port->vdisk_block_size);
+	port->ring_cookies = ((port->max_xfer_size *
+			       port->vdisk_block_size) / PAGE_SIZE) + 2;
+
+	err = vio_ldc_alloc(&port->vio, &vdc_ldc_cfg, port);
+	if (err)
+		goto err_out_free_port;
+
+	err = vdc_alloc_tx_ring(port);
+	if (err)
+		goto err_out_free_ldc;
+
+	err = probe_disk(port);
+	if (err)
+		goto err_out_free_tx_ring;
+
+	INIT_LIST_HEAD(&port->list);
+
+	spin_lock_irqsave(&vp->lock, flags);
+	list_add(&port->list, &vp->port_list);
+	spin_unlock_irqrestore(&vp->lock, flags);
+
+	dev_set_drvdata(&vdev->dev, port);
+
+	return 0;
+
+err_out_free_tx_ring:
+	vdc_free_tx_ring(port);
+
+err_out_free_ldc:
+	vio_ldc_free(&port->vio);
+
+err_out_free_port:
+	kfree(port);
+
+	return err;
+}
+
+static int vdc_port_remove(struct vio_dev *vdev)
+{
+	struct vdc_port *port = dev_get_drvdata(&vdev->dev);
+
+	if (port) {
+		del_timer_sync(&port->vio.timer);
+
+		vdc_free_tx_ring(port);
+		vio_ldc_free(&port->vio);
+
+		dev_set_drvdata(&vdev->dev, NULL);
+
+		kfree(port);
+	}
+	return 0;
+}
+
+static struct vio_device_id vdc_port_match[] = {
+	{
+		.type = "vdc-port",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(vio, vdc_match);
+
+static struct vio_driver vdc_port_driver = {
+	.id_table	= vdc_port_match,
+	.probe		= vdc_port_probe,
+	.remove		= vdc_port_remove,
+	.driver		= {
+		.name	= "vdc_port",
+		.owner	= THIS_MODULE,
+	}
+};
+
+static int __devinit vdc_probe(struct vio_dev *vdev,
+			       const struct vio_device_id *id)
+{
+	static int vdc_version_printed;
+	struct vdc *vp;
+
+	if (vdc_version_printed++ == 0)
+		printk(KERN_INFO "%s", version);
+
+	vp = kzalloc(sizeof(struct vdc), GFP_KERNEL);
+	if (!vp)
+		return -ENOMEM;
+
+	spin_lock_init(&vp->lock);
+	vp->dev = vdev;
+	INIT_LIST_HEAD(&vp->port_list);
+
+	dev_set_drvdata(&vdev->dev, vp);
+
+	return 0;
+}
+
+static int vdc_remove(struct vio_dev *vdev)
+{
+
+	struct vdc *vp = dev_get_drvdata(&vdev->dev);
+
+	if (vp) {
+		kfree(vp);
+		dev_set_drvdata(&vdev->dev, NULL);
+	}
+	return 0;
+}
+
+static struct vio_device_id vdc_match[] = {
+	{
+		.type = "block",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(vio, vdc_match);
+
+static struct vio_driver vdc_driver = {
+	.id_table	= vdc_match,
+	.probe		= vdc_probe,
+	.remove		= vdc_remove,
+	.driver		= {
+		.name	= "vdc",
+		.owner	= THIS_MODULE,
+	}
+};
+
+static int __init vdc_init(void)
+{
+	int err;
+
+	err = register_blkdev(0, VDCBLK_NAME);
+	if (err < 0)
+		goto out_err;
+
+	vdc_major = err;
+	err = vio_register_driver(&vdc_driver);
+	if (err)
+		goto out_unregister_blkdev;
+
+	err = vio_register_driver(&vdc_port_driver);
+	if (err)
+		goto out_unregister_vdc;
+
+	return 0;
+
+out_unregister_vdc:
+	vio_unregister_driver(&vdc_driver);
+
+out_unregister_blkdev:
+	unregister_blkdev(vdc_major, VDCBLK_NAME);
+	vdc_major = 0;
+
+out_err:
+	return err;
+}
+
+static void __exit vdc_exit(void)
+{
+	vio_unregister_driver(&vdc_port_driver);
+	vio_unregister_driver(&vdc_driver);
+	unregister_blkdev(vdc_major, VDCBLK_NAME);
+}
+
+module_init(vdc_init);
+module_exit(vdc_exit);
-- 
cgit v1.2.3


From 43fdf27470b216ebdef47e09ff83bed2f2894b13 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 12 Jul 2007 13:47:50 -0700
Subject: [SPARC64]: Abstract out mdesc accesses for better MD update handling.

Since we have to be able to handle MD updates, having an in-tree
set of data structures representing the MD objects actually makes
things more painful.

The MD itself is easy to parse, and we can implement the existing
interfaces using direct parsing of the MD binary image.

The MD is now reference counted, so accesses have to now take the
form:

	handle = mdesc_grab();

	... operations on MD ...

	mdesc_release(handle);

The only remaining issue are cases where code holds on to references
to MD property values.  mdesc_get_property() returns a direct pointer
to the property value, most cases just pull in the information they
need and discard the pointer, but there are few that use the pointer
directly over a long lifetime.  Those will be fixed up in a subsequent
changeset.

A preliminary handler for MD update events from domain services is
there, it is rudimentry but it works and handles all of the reference
counting.  It does not check the generation number of the MDs,
and it does not generate a "add/delete" list for notification to
interesting parties about MD changes but that will be forthcoming.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/block/sunvdc.c | 24 +++++++++++++-----------
 drivers/net/sunvnet.c  | 34 +++++++++++++++++++++-------------
 2 files changed, 34 insertions(+), 24 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 8dbbeace52a..0f5e3caf85d 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -750,7 +750,7 @@ static struct vio_driver_ops vdc_vio_ops = {
 static int __devinit vdc_port_probe(struct vio_dev *vdev,
 				    const struct vio_device_id *id)
 {
-	struct mdesc_node *endp;
+	struct mdesc_handle *hp;
 	struct vdc_port *port;
 	unsigned long flags;
 	struct vdc *vp;
@@ -763,26 +763,24 @@ static int __devinit vdc_port_probe(struct vio_dev *vdev,
 		return -ENODEV;
 	}
 
-	endp = vio_find_endpoint(vdev);
-	if (!endp) {
-		printk(KERN_ERR PFX "Port lacks channel-endpoint.\n");
-		return -ENODEV;
-	}
+	hp = mdesc_grab();
 
-	port_id = md_get_property(vdev->mp, "id", NULL);
+	port_id = mdesc_get_property(hp, vdev->mp, "id", NULL);
+	err = -ENODEV;
 	if (!port_id) {
 		printk(KERN_ERR PFX "Port lacks id property.\n");
-		return -ENODEV;
+		goto err_out_release_mdesc;
 	}
 	if ((*port_id << PARTITION_SHIFT) & ~(u64)MINORMASK) {
 		printk(KERN_ERR PFX "Port id [%lu] too large.\n", *port_id);
-		return -ENODEV;
+		goto err_out_release_mdesc;
 	}
 
 	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	err = -ENOMEM;
 	if (!port) {
 		printk(KERN_ERR PFX "Cannot allocate vdc_port.\n");
-		return -ENOMEM;
+		goto err_out_release_mdesc;
 	}
 
 	port->vp = vp;
@@ -797,7 +795,7 @@ static int __devinit vdc_port_probe(struct vio_dev *vdev,
 		snprintf(port->disk_name, sizeof(port->disk_name),
 			 VDCBLK_NAME "%c", 'a' + (port->dev_no % 26));
 
-	err = vio_driver_init(&port->vio, vdev, VDEV_DISK, endp,
+	err = vio_driver_init(&port->vio, vdev, VDEV_DISK,
 			      vdc_versions, ARRAY_SIZE(vdc_versions),
 			      &vdc_vio_ops, port->disk_name);
 	if (err)
@@ -828,6 +826,8 @@ static int __devinit vdc_port_probe(struct vio_dev *vdev,
 
 	dev_set_drvdata(&vdev->dev, port);
 
+	mdesc_release(hp);
+
 	return 0;
 
 err_out_free_tx_ring:
@@ -839,6 +839,8 @@ err_out_free_ldc:
 err_out_free_port:
 	kfree(port);
 
+err_out_release_mdesc:
+	mdesc_release(hp);
 	return err;
 }
 
diff --git a/drivers/net/sunvnet.c b/drivers/net/sunvnet.c
index d764e4ccba5..8a667c13fae 100644
--- a/drivers/net/sunvnet.c
+++ b/drivers/net/sunvnet.c
@@ -892,7 +892,7 @@ const char *remote_macaddr_prop = "remote-mac-address";
 static int __devinit vnet_port_probe(struct vio_dev *vdev,
 				     const struct vio_device_id *id)
 {
-	struct mdesc_node *endp;
+	struct mdesc_handle *hp;
 	struct vnet_port *port;
 	unsigned long flags;
 	struct vnet *vp;
@@ -905,23 +905,21 @@ static int __devinit vnet_port_probe(struct vio_dev *vdev,
 		return -ENODEV;
 	}
 
-	rmac = md_get_property(vdev->mp, remote_macaddr_prop, &len);
+	hp = mdesc_grab();
+
+	rmac = mdesc_get_property(hp, vdev->mp, remote_macaddr_prop, &len);
+	err = -ENODEV;
 	if (!rmac) {
 		printk(KERN_ERR PFX "Port lacks %s property.\n",
 		       remote_macaddr_prop);
-		return -ENODEV;
-	}
-
-	endp = vio_find_endpoint(vdev);
-	if (!endp) {
-		printk(KERN_ERR PFX "Port lacks channel-endpoint.\n");
-		return -ENODEV;
+		goto err_out_put_mdesc;
 	}
 
 	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	err = -ENOMEM;
 	if (!port) {
 		printk(KERN_ERR PFX "Cannot allocate vnet_port.\n");
-		return -ENOMEM;
+		goto err_out_put_mdesc;
 	}
 
 	for (i = 0; i < ETH_ALEN; i++)
@@ -929,7 +927,7 @@ static int __devinit vnet_port_probe(struct vio_dev *vdev,
 
 	port->vp = vp;
 
-	err = vio_driver_init(&port->vio, vdev, VDEV_NETWORK, endp,
+	err = vio_driver_init(&port->vio, vdev, VDEV_NETWORK,
 			      vnet_versions, ARRAY_SIZE(vnet_versions),
 			      &vnet_vio_ops, vp->dev->name);
 	if (err)
@@ -947,7 +945,7 @@ static int __devinit vnet_port_probe(struct vio_dev *vdev,
 	INIT_LIST_HEAD(&port->list);
 
 	switch_port = 0;
-	if (md_get_property(vdev->mp, "switch-port", NULL) != NULL)
+	if (mdesc_get_property(hp, vdev->mp, "switch-port", NULL) != NULL)
 		switch_port = 1;
 
 	spin_lock_irqsave(&vp->lock, flags);
@@ -969,6 +967,8 @@ static int __devinit vnet_port_probe(struct vio_dev *vdev,
 
 	vio_port_up(&port->vio);
 
+	mdesc_release(hp);
+
 	return 0;
 
 err_out_free_ldc:
@@ -977,6 +977,8 @@ err_out_free_ldc:
 err_out_free_port:
 	kfree(port);
 
+err_out_put_mdesc:
+	mdesc_release(hp);
 	return err;
 }
 
@@ -1029,6 +1031,7 @@ static int __devinit vnet_probe(struct vio_dev *vdev,
 				const struct vio_device_id *id)
 {
 	static int vnet_version_printed;
+	struct mdesc_handle *hp;
 	struct net_device *dev;
 	struct vnet *vp;
 	const u64 *mac;
@@ -1037,7 +1040,9 @@ static int __devinit vnet_probe(struct vio_dev *vdev,
 	if (vnet_version_printed++ == 0)
 		printk(KERN_INFO "%s", version);
 
-	mac = md_get_property(vdev->mp, local_mac_prop, &len);
+	hp = mdesc_grab();
+
+	mac = mdesc_get_property(hp, vdev->mp, local_mac_prop, &len);
 	if (!mac) {
 		printk(KERN_ERR PFX "vnet lacks %s property.\n",
 		       local_mac_prop);
@@ -1093,12 +1098,15 @@ static int __devinit vnet_probe(struct vio_dev *vdev,
 
 	dev_set_drvdata(&vdev->dev, vp);
 
+	mdesc_release(hp);
+
 	return 0;
 
 err_out_free_dev:
 	free_netdev(dev);
 
 err_out:
+	mdesc_release(hp);
 	return err;
 }
 
-- 
cgit v1.2.3


From f3c681c028846bd5d39f563909409832a295ca69 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Sun, 15 Jul 2007 23:53:32 -0700
Subject: [SERIAL]: Fix console write locking in sparc drivers.

Mirror the logic in 8250 for proper console write locking
when SYSRQ is triggered or an OOPS is in progress.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/serial/sunhv.c    | 30 ++++++++++++++++++++++++++----
 drivers/serial/sunsab.c   | 19 ++++++++++++++-----
 drivers/serial/sunsu.c    | 14 ++++++++++++++
 drivers/serial/sunzilog.c | 17 ++++++++++++++---
 4 files changed, 68 insertions(+), 12 deletions(-)

(limited to 'drivers')

diff --git a/drivers/serial/sunhv.c b/drivers/serial/sunhv.c
index 96557e6dba6..17bcca53d6a 100644
--- a/drivers/serial/sunhv.c
+++ b/drivers/serial/sunhv.c
@@ -440,8 +440,16 @@ static void sunhv_console_write_paged(struct console *con, const char *s, unsign
 {
 	struct uart_port *port = sunhv_port;
 	unsigned long flags;
+	int locked = 1;
+
+	local_irq_save(flags);
+	if (port->sysrq) {
+		locked = 0;
+	} else if (oops_in_progress) {
+		locked = spin_trylock(&port->lock);
+	} else
+		spin_lock(&port->lock);
 
-	spin_lock_irqsave(&port->lock, flags);
 	while (n > 0) {
 		unsigned long ra = __pa(con_write_page);
 		unsigned long page_bytes;
@@ -469,7 +477,10 @@ static void sunhv_console_write_paged(struct console *con, const char *s, unsign
 			ra += written;
 		}
 	}
-	spin_unlock_irqrestore(&port->lock, flags);
+
+	if (locked)
+		spin_unlock(&port->lock);
+	local_irq_restore(flags);
 }
 
 static inline void sunhv_console_putchar(struct uart_port *port, char c)
@@ -488,7 +499,15 @@ static void sunhv_console_write_bychar(struct console *con, const char *s, unsig
 {
 	struct uart_port *port = sunhv_port;
 	unsigned long flags;
-	int i;
+	int i, locked = 1;
+
+	local_irq_save(flags);
+	if (port->sysrq) {
+		locked = 0;
+	} else if (oops_in_progress) {
+		locked = spin_trylock(&port->lock);
+	} else
+		spin_lock(&port->lock);
 
 	spin_lock_irqsave(&port->lock, flags);
 	for (i = 0; i < n; i++) {
@@ -496,7 +515,10 @@ static void sunhv_console_write_bychar(struct console *con, const char *s, unsig
 			sunhv_console_putchar(port, '\r');
 		sunhv_console_putchar(port, *s++);
 	}
-	spin_unlock_irqrestore(&port->lock, flags);
+
+	if (locked)
+		spin_unlock(&port->lock);
+	local_irq_restore(flags);
 }
 
 static struct console sunhv_console = {
diff --git a/drivers/serial/sunsab.c b/drivers/serial/sunsab.c
index deb9ab4b5a0..8a0f9e4408d 100644
--- a/drivers/serial/sunsab.c
+++ b/drivers/serial/sunsab.c
@@ -860,22 +860,31 @@ static int num_channels;
 static void sunsab_console_putchar(struct uart_port *port, int c)
 {
 	struct uart_sunsab_port *up = (struct uart_sunsab_port *)port;
-	unsigned long flags;
-
-	spin_lock_irqsave(&up->port.lock, flags);
 
 	sunsab_tec_wait(up);
 	writeb(c, &up->regs->w.tic);
-
-	spin_unlock_irqrestore(&up->port.lock, flags);
 }
 
 static void sunsab_console_write(struct console *con, const char *s, unsigned n)
 {
 	struct uart_sunsab_port *up = &sunsab_ports[con->index];
+	unsigned long flags;
+	int locked = 1;
+
+	local_irq_save(flags);
+	if (up->port.sysrq) {
+		locked = 0;
+	} else if (oops_in_progress) {
+		locked = spin_trylock(&up->port.lock);
+	} else
+		spin_lock(&up->port.lock);
 
 	uart_console_write(&up->port, s, n, sunsab_console_putchar);
 	sunsab_tec_wait(up);
+
+	if (locked)
+		spin_unlock(&up->port.lock);
+	local_irq_restore(flags);
 }
 
 static int sunsab_console_setup(struct console *con, char *options)
diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c
index 2a63cdba320..26d720baf88 100644
--- a/drivers/serial/sunsu.c
+++ b/drivers/serial/sunsu.c
@@ -1288,7 +1288,17 @@ static void sunsu_console_write(struct console *co, const char *s,
 				unsigned int count)
 {
 	struct uart_sunsu_port *up = &sunsu_ports[co->index];
+	unsigned long flags;
 	unsigned int ier;
+	int locked = 1;
+
+	local_irq_save(flags);
+	if (up->port.sysrq) {
+		locked = 0;
+	} else if (oops_in_progress) {
+		locked = spin_trylock(&up->port.lock);
+	} else
+		spin_lock(&up->port.lock);
 
 	/*
 	 *	First save the UER then disable the interrupts
@@ -1304,6 +1314,10 @@ static void sunsu_console_write(struct console *co, const char *s,
 	 */
 	wait_for_xmitr(up);
 	serial_out(up, UART_IER, ier);
+
+	if (locked)
+		spin_unlock(&up->port.lock);
+	local_irq_restore(flags);
 }
 
 /*
diff --git a/drivers/serial/sunzilog.c b/drivers/serial/sunzilog.c
index 15b6e1cb040..0a3e10a4a35 100644
--- a/drivers/serial/sunzilog.c
+++ b/drivers/serial/sunzilog.c
@@ -9,7 +9,7 @@
  * C. Dost, Pete Zaitcev, Ted Ts'o and Alex Buell for their
  * work there.
  *
- *  Copyright (C) 2002, 2006 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 2002, 2006, 2007 David S. Miller (davem@davemloft.net)
  */
 
 #include <linux/module.h>
@@ -1151,11 +1151,22 @@ sunzilog_console_write(struct console *con, const char *s, unsigned int count)
 {
 	struct uart_sunzilog_port *up = &sunzilog_port_table[con->index];
 	unsigned long flags;
+	int locked = 1;
+
+	local_irq_save(flags);
+	if (up->port.sysrq) {
+		locked = 0;
+	} else if (oops_in_progress) {
+		locked = spin_trylock(&up->port.lock);
+	} else
+		spin_lock(&up->port.lock);
 
-	spin_lock_irqsave(&up->port.lock, flags);
 	uart_console_write(&up->port, s, count, sunzilog_putchar);
 	udelay(2);
-	spin_unlock_irqrestore(&up->port.lock, flags);
+
+	if (locked)
+		spin_unlock(&up->port.lock);
+	local_irq_restore(flags);
 }
 
 static int __init sunzilog_console_setup(struct console *con, char *options)
-- 
cgit v1.2.3