From a586d4f6016f7139d8c26df0e6927131168d3b5b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 4 Feb 2008 23:49:56 -0500 Subject: virtio: simplify config mechanism. Previously we used a type/len pair within the config space, but this seems overkill. We now simply define a structure which represents the layout in the config space: the config space can now only be extended at the end. The main driver-visible changes: 1) We indicate what fields are present with an explicit feature bit. 2) Virtqueues are explicitly numbered, and not in the config space. Signed-off-by: Rusty Russell --- drivers/virtio/virtio.c | 45 --------------------------------------------- 1 file changed, 45 deletions(-) (limited to 'drivers/virtio') diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 69d7ea02cd4..303cb6f9010 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -148,51 +148,6 @@ void unregister_virtio_device(struct virtio_device *dev) } EXPORT_SYMBOL_GPL(unregister_virtio_device); -int __virtio_config_val(struct virtio_device *vdev, - u8 type, void *val, size_t size) -{ - void *token; - unsigned int len; - - token = vdev->config->find(vdev, type, &len); - if (!token) - return -ENOENT; - - if (len != size) - return -EIO; - - vdev->config->get(vdev, token, val, size); - return 0; -} -EXPORT_SYMBOL_GPL(__virtio_config_val); - -int virtio_use_bit(struct virtio_device *vdev, - void *token, unsigned int len, unsigned int bitnum) -{ - unsigned long bits[16]; - - /* This makes it convenient to pass-through find() results. */ - if (!token) - return 0; - - /* bit not in range of this bitfield? */ - if (bitnum * 8 >= len / 2) - return 0; - - /* Giant feature bitfields are silly. */ - BUG_ON(len > sizeof(bits)); - vdev->config->get(vdev, token, bits, len); - - if (!test_bit(bitnum, bits)) - return 0; - - /* Set acknowledge bit, and write it back. */ - set_bit(bitnum + len * 8 / 2, bits); - vdev->config->set(vdev, token, bits, len); - return 1; -} -EXPORT_SYMBOL_GPL(virtio_use_bit); - static int virtio_init(void) { if (bus_register(&virtio_bus) != 0) -- cgit v1.2.3 From 18445c4d501b9ab4336f66ef46b092661ddaf336 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 4 Feb 2008 23:49:57 -0500 Subject: virtio: explicit enable_cb/disable_cb rather than callback return. It seems that virtio_net wants to disable callbacks (interrupts) before calling netif_rx_schedule(), so we can't use the return value to do so. Rename "restart" to "cb_enable" and introduce "cb_disable" hook: callback now returns void, rather than a boolean. Signed-off-by: Rusty Russell --- drivers/virtio/virtio_ring.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'drivers/virtio') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 1dc04b6684e..342bb0363fb 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -220,7 +220,17 @@ static void *vring_get_buf(struct virtqueue *_vq, unsigned int *len) return ret; } -static bool vring_restart(struct virtqueue *_vq) +static void vring_disable_cb(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + START_USE(vq); + BUG_ON(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT); + vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; + END_USE(vq); +} + +static bool vring_enable_cb(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -254,8 +264,8 @@ irqreturn_t vring_interrupt(int irq, void *_vq) return IRQ_HANDLED; pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); - if (vq->vq.callback && !vq->vq.callback(&vq->vq)) - vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; + if (vq->vq.callback) + vq->vq.callback(&vq->vq); return IRQ_HANDLED; } @@ -264,7 +274,8 @@ static struct virtqueue_ops vring_vq_ops = { .add_buf = vring_add_buf, .get_buf = vring_get_buf, .kick = vring_kick, - .restart = vring_restart, + .disable_cb = vring_disable_cb, + .enable_cb = vring_enable_cb, .shutdown = vring_shutdown, }; @@ -272,7 +283,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, struct virtio_device *vdev, void *pages, void (*notify)(struct virtqueue *), - bool (*callback)(struct virtqueue *)) + void (*callback)(struct virtqueue *)) { struct vring_virtqueue *vq; unsigned int i; -- cgit v1.2.3 From 426e3e0af5d2473e67d4256fc1340b7faebd1cc7 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 4 Feb 2008 23:49:59 -0500 Subject: virtio: clarify NO_NOTIFY flag usage The other side (host) can set the NO_NOTIFY flag as an optimization, to say "no need to kick me when you add things". Make it clear that this is advisory only; especially that we should always notify when the ring is full. Signed-off-by: Rusty Russell --- drivers/virtio/virtio_ring.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/virtio') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 342bb0363fb..dbe1d35db32 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -87,6 +87,8 @@ static int vring_add_buf(struct virtqueue *_vq, if (vq->num_free < out + in) { pr_debug("Can't add buf len %i - avail = %i\n", out + in, vq->num_free); + /* We notify *even if* VRING_USED_F_NO_NOTIFY is set here. */ + vq->notify(&vq->vq); END_USE(vq); return -ENOSPC; } -- cgit v1.2.3 From 6e5aa7efb27aec7e55b6463fa2c8db594c4226fa Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 4 Feb 2008 23:50:03 -0500 Subject: virtio: reset function A reset function solves three problems: 1) It allows us to renegotiate features, eg. if we want to upgrade a guest driver without rebooting the guest. 2) It gives us a clean way of shutting down virtqueues: after a reset, we know that the buffers won't be used by the host, and 3) It helps the guest recover from messed-up drivers. So we remove the ->shutdown hook, and the only way we now remove feature bits is via reset. We leave it to the driver to do the reset before it deletes queues: the balloon driver, for example, needs to chat to the host in its remove function. Signed-off-by: Rusty Russell --- drivers/virtio/virtio.c | 12 ++++++++++-- drivers/virtio/virtio_ring.c | 11 ----------- 2 files changed, 10 insertions(+), 13 deletions(-) (limited to 'drivers/virtio') diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 303cb6f9010..7dddb186093 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -102,9 +102,13 @@ static int virtio_dev_remove(struct device *_d) struct virtio_driver *drv = container_of(dev->dev.driver, struct virtio_driver, driver); - dev->config->set_status(dev, dev->config->get_status(dev) - & ~VIRTIO_CONFIG_S_DRIVER); drv->remove(dev); + + /* Driver should have reset device. */ + BUG_ON(dev->config->get_status(dev)); + + /* Acknowledge the device's existence again. */ + add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); return 0; } @@ -130,6 +134,10 @@ int register_virtio_device(struct virtio_device *dev) dev->dev.bus = &virtio_bus; sprintf(dev->dev.bus_id, "%u", dev->index); + /* We always start by resetting the device, in case a previous + * driver messed it up. This also tests that code path a little. */ + dev->config->reset(dev); + /* Acknowledge that we've seen the device. */ add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index dbe1d35db32..9849babd6b3 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -173,16 +173,6 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head) vq->num_free++; } -/* FIXME: We need to tell other side about removal, to synchronize. */ -static void vring_shutdown(struct virtqueue *_vq) -{ - struct vring_virtqueue *vq = to_vvq(_vq); - unsigned int i; - - for (i = 0; i < vq->vring.num; i++) - detach_buf(vq, i); -} - static inline bool more_used(const struct vring_virtqueue *vq) { return vq->last_used_idx != vq->vring.used->idx; @@ -278,7 +268,6 @@ static struct virtqueue_ops vring_vq_ops = { .kick = vring_kick, .disable_cb = vring_disable_cb, .enable_cb = vring_enable_cb, - .shutdown = vring_shutdown, }; struct virtqueue *vring_new_virtqueue(unsigned int num, -- cgit v1.2.3 From 81a8deab1ce3816c6a89e3429e234e7d3686da94 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 4 Feb 2008 23:50:04 -0500 Subject: virtio: handle interrupts after callbacks turned off Anthony Liguori found double interrupt suppression in the virtio_net driver, triggered by two skb_recv_done's in a row. This is because virtio_ring's interrupt suppression is a best-effort optimization: it contains no synchronization so the host can miss it and still send interrupts. But it's certainly nicer for virtio users if calling disable_cb actually disables callbacks, so we check for the race in the interrupt routine. Note: SMP guests might require syncronization here, but since disable_cb is actually called from interrupt context, there has to be some form of synchronization before the next same interrupt handler is called (Linux guarantees that the same device's irq handler will never run simultanously on multiple CPUs). Signed-off-by: Rusty Russell --- drivers/virtio/virtio_ring.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/virtio') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 9849babd6b3..9859213aa65 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -255,6 +255,13 @@ irqreturn_t vring_interrupt(int irq, void *_vq) if (unlikely(vq->broken)) return IRQ_HANDLED; + /* Other side may have missed us turning off the interrupt, + * but we should preserve disable semantic for virtio users. */ + if (unlikely(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { + pr_debug("virtqueue interrupt after disable for %p\n", vq); + return IRQ_HANDLED; + } + pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); if (vq->vq.callback) vq->vq.callback(&vq->vq); -- cgit v1.2.3 From 15f9c8903cbdb02aee0f1bcf86a97c2e238b9a3d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 4 Feb 2008 23:50:05 -0500 Subject: virtio: Use the sg_phys convenience function. Simple cleanup. Signed-off-by: Rusty Russell --- drivers/virtio/virtio_ring.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/virtio') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 9859213aa65..74c245092b5 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -99,16 +99,14 @@ static int vring_add_buf(struct virtqueue *_vq, head = vq->free_head; for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) { vq->vring.desc[i].flags = VRING_DESC_F_NEXT; - vq->vring.desc[i].addr = (page_to_pfn(sg_page(sg))<offset; + vq->vring.desc[i].addr = sg_phys(sg); vq->vring.desc[i].len = sg->length; prev = i; sg++; } for (; in; i = vq->vring.desc[i].next, in--) { vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; - vq->vring.desc[i].addr = (page_to_pfn(sg_page(sg))<offset; + vq->vring.desc[i].addr = sg_phys(sg); vq->vring.desc[i].len = sg->length; prev = i; sg++; -- cgit v1.2.3 From c6fd47011b4bdebad3f1513bac75fe4895e332ee Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 4 Feb 2008 23:50:05 -0500 Subject: virtio: Allow virtio to be modular and used by modules This is needed for the virtio PCI device to be compiled as a module. Signed-off-by: Anthony Liguori Signed-off-by: Rusty Russell --- drivers/virtio/Kconfig | 4 ++-- drivers/virtio/virtio.c | 8 ++++++++ drivers/virtio/virtio_ring.c | 4 ++++ 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers/virtio') diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index 9e33fc4da87..de0c8c2654e 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -1,8 +1,8 @@ # Virtio always gets selected by whoever wants it. config VIRTIO - bool + tristate # Similarly the virtio ring implementation. config VIRTIO_RING - bool + tristate depends on VIRTIO diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 7dddb186093..b535483bc55 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -162,4 +162,12 @@ static int virtio_init(void) panic("virtio bus registration failed"); return 0; } + +static void __exit virtio_exit(void) +{ + bus_unregister(&virtio_bus); +} core_initcall(virtio_init); +module_exit(virtio_exit); + +MODULE_LICENSE("GPL"); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 74c245092b5..3a28c138213 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -266,6 +266,7 @@ irqreturn_t vring_interrupt(int irq, void *_vq) return IRQ_HANDLED; } +EXPORT_SYMBOL_GPL(vring_interrupt); static struct virtqueue_ops vring_vq_ops = { .add_buf = vring_add_buf, @@ -318,9 +319,12 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, return &vq->vq; } +EXPORT_SYMBOL_GPL(vring_new_virtqueue); void vring_del_virtqueue(struct virtqueue *vq) { kfree(to_vvq(vq)); } +EXPORT_SYMBOL_GPL(vring_del_virtqueue); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 3343660d8c62c6b00b2f15324ef3fcb6be207bfa Mon Sep 17 00:00:00 2001 From: Anthony Liguori Date: Mon, 12 Nov 2007 21:30:26 -0600 Subject: virtio: PCI device This is a PCI device that implements a transport for virtio. It allows virtio devices to be used by QEMU based VMMs like KVM or Xen. Signed-off-by: Anthony Liguori Signed-off-by: Rusty Russell --- drivers/virtio/Kconfig | 17 ++ drivers/virtio/Makefile | 1 + drivers/virtio/virtio_pci.c | 440 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 458 insertions(+) create mode 100644 drivers/virtio/virtio_pci.c (limited to 'drivers/virtio') diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index de0c8c2654e..833db2f36e9 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -6,3 +6,20 @@ config VIRTIO config VIRTIO_RING tristate depends on VIRTIO + +config VIRTIO_PCI + tristate "PCI driver for virtio devices (EXPERIMENTAL)" + depends on PCI && EXPERIMENTAL + select VIRTIO + select VIRTIO_RING + ---help--- + This drivers provides support for virtio based paravirtual device + drivers over PCI. This requires that your VMM has appropriate PCI + virtio backends. Most QEMU based VMMs should support these devices + (like KVM or Xen). + + Currently, the ABI is not considered stable so there is no guarantee + that this version of the driver will work with your VMM. + + If unsure, say M. + diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index f70e40971dd..cc84999f305 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -1,2 +1,3 @@ obj-$(CONFIG_VIRTIO) += virtio.o obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o +obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c new file mode 100644 index 00000000000..192687e3a56 --- /dev/null +++ b/drivers/virtio/virtio_pci.c @@ -0,0 +1,440 @@ +/* + * Virtio PCI driver + * + * This module allows virtio devices to be used over a virtual PCI device. + * This can be used with QEMU based VMMs like KVM or Xen. + * + * Copyright IBM Corp. 2007 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Anthony Liguori "); +MODULE_DESCRIPTION("virtio-pci"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1"); + +/* Our device structure */ +struct virtio_pci_device +{ + struct virtio_device vdev; + struct pci_dev *pci_dev; + + /* the IO mapping for the PCI config space */ + void *ioaddr; + + /* a list of queues so we can dispatch IRQs */ + spinlock_t lock; + struct list_head virtqueues; +}; + +struct virtio_pci_vq_info +{ + /* the actual virtqueue */ + struct virtqueue *vq; + + /* the number of entries in the queue */ + int num; + + /* the index of the queue */ + int queue_index; + + /* the virtual address of the ring queue */ + void *queue; + + /* the list node for the virtqueues list */ + struct list_head node; +}; + +/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ +static struct pci_device_id virtio_pci_id_table[] = { + { 0x1af4, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, + { 0 }, +}; + +MODULE_DEVICE_TABLE(pci, virtio_pci_id_table); + +/* A PCI device has it's own struct device and so does a virtio device so + * we create a place for the virtio devices to show up in sysfs. I think it + * would make more sense for virtio to not insist on having it's own device. */ +static struct device virtio_pci_root = { + .parent = NULL, + .bus_id = "virtio-pci", +}; + +/* Unique numbering for devices under the kvm root */ +static unsigned int dev_index; + +/* Convert a generic virtio device to our structure */ +static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) +{ + return container_of(vdev, struct virtio_pci_device, vdev); +} + +/* virtio config->feature() implementation */ +static bool vp_feature(struct virtio_device *vdev, unsigned bit) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + u32 mask; + + /* Since this function is supposed to have the side effect of + * enabling a queried feature, we simulate that by doing a read + * from the host feature bitmask and then writing to the guest + * feature bitmask */ + mask = ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES); + if (mask & (1 << bit)) { + mask |= (1 << bit); + iowrite32(mask, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); + } + + return !!(mask & (1 << bit)); +} + +/* virtio config->get() implementation */ +static void vp_get(struct virtio_device *vdev, unsigned offset, + void *buf, unsigned len) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + void *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset; + u8 *ptr = buf; + int i; + + for (i = 0; i < len; i++) + ptr[i] = ioread8(ioaddr + i); +} + +/* the config->set() implementation. it's symmetric to the config->get() + * implementation */ +static void vp_set(struct virtio_device *vdev, unsigned offset, + const void *buf, unsigned len) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + void *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset; + const u8 *ptr = buf; + int i; + + for (i = 0; i < len; i++) + iowrite8(ptr[i], ioaddr + i); +} + +/* config->{get,set}_status() implementations */ +static u8 vp_get_status(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS); +} + +static void vp_set_status(struct virtio_device *vdev, u8 status) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + /* We should never be setting status to 0. */ + BUG_ON(status == 0); + return iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS); +} + +static void vp_reset(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + /* 0 status means a reset. */ + return iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS); +} + +/* the notify function used when creating a virt queue */ +static void vp_notify(struct virtqueue *vq) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); + struct virtio_pci_vq_info *info = vq->priv; + + /* we write the queue's selector into the notification register to + * signal the other end */ + iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); +} + +/* A small wrapper to also acknowledge the interrupt when it's handled. + * I really need an EIO hook for the vring so I can ack the interrupt once we + * know that we'll be handling the IRQ but before we invoke the callback since + * the callback may notify the host which results in the host attempting to + * raise an interrupt that we would then mask once we acknowledged the + * interrupt. */ +static irqreturn_t vp_interrupt(int irq, void *opaque) +{ + struct virtio_pci_device *vp_dev = opaque; + struct virtio_pci_vq_info *info; + irqreturn_t ret = IRQ_NONE; + u8 isr; + + /* reading the ISR has the effect of also clearing it so it's very + * important to save off the value. */ + isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR); + + /* It's definitely not us if the ISR was not high */ + if (!isr) + return IRQ_NONE; + + /* Configuration change? Tell driver if it wants to know. */ + if (isr & VIRTIO_PCI_ISR_CONFIG) { + struct virtio_driver *drv; + drv = container_of(vp_dev->vdev.dev.driver, + struct virtio_driver, driver); + + if (drv->config_changed) + drv->config_changed(&vp_dev->vdev); + } + + spin_lock(&vp_dev->lock); + list_for_each_entry(info, &vp_dev->virtqueues, node) { + if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) + ret = IRQ_HANDLED; + } + spin_unlock(&vp_dev->lock); + + return ret; +} + +/* the config->find_vq() implementation */ +static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, + void (*callback)(struct virtqueue *vq)) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtio_pci_vq_info *info; + struct virtqueue *vq; + u16 num; + int err; + + /* Select the queue we're interested in */ + iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + + /* Check if queue is either not available or already active. */ + num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM); + if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN)) + return ERR_PTR(-ENOENT); + + /* allocate and fill out our structure the represents an active + * queue */ + info = kmalloc(sizeof(struct virtio_pci_vq_info), GFP_KERNEL); + if (!info) + return ERR_PTR(-ENOMEM); + + info->queue_index = index; + info->num = num; + + info->queue = kzalloc(PAGE_ALIGN(vring_size(num,PAGE_SIZE)), GFP_KERNEL); + if (info->queue == NULL) { + err = -ENOMEM; + goto out_info; + } + + /* activate the queue */ + iowrite32(virt_to_phys(info->queue) >> PAGE_SHIFT, + vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); + + /* create the vring */ + vq = vring_new_virtqueue(info->num, vdev, info->queue, + vp_notify, callback); + if (!vq) { + err = -ENOMEM; + goto out_activate_queue; + } + + vq->priv = info; + info->vq = vq; + + spin_lock(&vp_dev->lock); + list_add(&info->node, &vp_dev->virtqueues); + spin_unlock(&vp_dev->lock); + + return vq; + +out_activate_queue: + iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); + kfree(info->queue); +out_info: + kfree(info); + return ERR_PTR(err); +} + +/* the config->del_vq() implementation */ +static void vp_del_vq(struct virtqueue *vq) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); + struct virtio_pci_vq_info *info = vq->priv; + + spin_lock(&vp_dev->lock); + list_del(&info->node); + spin_unlock(&vp_dev->lock); + + vring_del_virtqueue(vq); + + /* Select and deactivate the queue */ + iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); + + kfree(info->queue); + kfree(info); +} + +static struct virtio_config_ops virtio_pci_config_ops = { + .feature = vp_feature, + .get = vp_get, + .set = vp_set, + .get_status = vp_get_status, + .set_status = vp_set_status, + .reset = vp_reset, + .find_vq = vp_find_vq, + .del_vq = vp_del_vq, +}; + +/* the PCI probing function */ +static int __devinit virtio_pci_probe(struct pci_dev *pci_dev, + const struct pci_device_id *id) +{ + struct virtio_pci_device *vp_dev; + int err; + + /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */ + if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f) + return -ENODEV; + + /* allocate our structure and fill it out */ + vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL); + if (vp_dev == NULL) + return -ENOMEM; + + snprintf(vp_dev->vdev.dev.bus_id, BUS_ID_SIZE, "virtio%d", dev_index); + vp_dev->vdev.index = dev_index; + dev_index++; + + vp_dev->vdev.dev.parent = &virtio_pci_root; + vp_dev->vdev.config = &virtio_pci_config_ops; + vp_dev->pci_dev = pci_dev; + INIT_LIST_HEAD(&vp_dev->virtqueues); + spin_lock_init(&vp_dev->lock); + + /* enable the device */ + err = pci_enable_device(pci_dev); + if (err) + goto out; + + err = pci_request_regions(pci_dev, "virtio-pci"); + if (err) + goto out_enable_device; + + vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0); + if (vp_dev->ioaddr == NULL) + goto out_req_regions; + + pci_set_drvdata(pci_dev, vp_dev); + + /* we use the subsystem vendor/device id as the virtio vendor/device + * id. this allows us to use the same PCI vendor/device id for all + * virtio devices and to identify the particular virtio driver by + * the subsytem ids */ + vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor; + vp_dev->vdev.id.device = pci_dev->subsystem_device; + + /* register a handler for the queue with the PCI device's interrupt */ + err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, + vp_dev->vdev.dev.bus_id, vp_dev); + if (err) + goto out_set_drvdata; + + /* finally register the virtio device */ + err = register_virtio_device(&vp_dev->vdev); + if (err) + goto out_req_irq; + + return 0; + +out_req_irq: + free_irq(pci_dev->irq, vp_dev); +out_set_drvdata: + pci_set_drvdata(pci_dev, NULL); + pci_iounmap(pci_dev, vp_dev->ioaddr); +out_req_regions: + pci_release_regions(pci_dev); +out_enable_device: + pci_disable_device(pci_dev); +out: + kfree(vp_dev); + return err; +} + +static void __devexit virtio_pci_remove(struct pci_dev *pci_dev) +{ + struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); + + free_irq(pci_dev->irq, vp_dev); + pci_set_drvdata(pci_dev, NULL); + pci_iounmap(pci_dev, vp_dev->ioaddr); + pci_release_regions(pci_dev); + pci_disable_device(pci_dev); + kfree(vp_dev); +} + +#ifdef CONFIG_PM +static int virtio_pci_suspend(struct pci_dev *pci_dev, pm_message_t state) +{ + pci_save_state(pci_dev); + pci_set_power_state(pci_dev, PCI_D3hot); + return 0; +} + +static int virtio_pci_resume(struct pci_dev *pci_dev) +{ + pci_restore_state(pci_dev); + pci_set_power_state(pci_dev, PCI_D0); + return 0; +} +#endif + +static struct pci_driver virtio_pci_driver = { + .name = "virtio-pci", + .id_table = virtio_pci_id_table, + .probe = virtio_pci_probe, + .remove = virtio_pci_remove, +#ifdef CONFIG_PM + .suspend = virtio_pci_suspend, + .resume = virtio_pci_resume, +#endif +}; + +static int __init virtio_pci_init(void) +{ + int err; + + err = device_register(&virtio_pci_root); + if (err) + return err; + + err = pci_register_driver(&virtio_pci_driver); + if (err) + device_unregister(&virtio_pci_root); + + return err; +} + +module_init(virtio_pci_init); + +static void __exit virtio_pci_exit(void) +{ + device_unregister(&virtio_pci_root); + pci_unregister_driver(&virtio_pci_driver); +} + +module_exit(virtio_pci_exit); -- cgit v1.2.3 From 55a7c066041e7850948d29ed813f62821a9ec046 Mon Sep 17 00:00:00 2001 From: Anthony Liguori Date: Mon, 28 Jan 2008 09:59:59 -0600 Subject: virtio: Use PCI revision field to indicate virtio PCI ABI version As Avi pointed out, as we continue to massage the virtio PCI ABI, we can make things a little more friendly to users by utilizing the PCI revision field to indicate which version of the ABI we're using. This is a hard ABI version and incrementing it will cause the guest driver to break. This is the necessary changes to virtio_pci to support this. Signed-off-by: Anthony Liguori Signed-off-by: Rusty Russell --- drivers/virtio/virtio_pci.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/virtio') diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 192687e3a56..26f787ddd5f 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -311,6 +311,12 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev, if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f) return -ENODEV; + if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) { + printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n", + VIRTIO_PCI_ABI_VERSION, pci_dev->revision); + return -ENODEV; + } + /* allocate our structure and fill it out */ vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL); if (vp_dev == NULL) -- cgit v1.2.3 From 6b35e40767c6c1ac783330109ae8e0c09ea6bc82 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 4 Feb 2008 23:50:12 -0500 Subject: virtio: balloon driver After discussions with Anthony Liguori, it seems that the virtio balloon can be made even simpler. Here's my attempt. The device configuration tells the driver how much memory it should take from the guest (ie. balloon size). The guest feeds the page numbers it has taken via one virtqueue. A second virtqueue feeds the page numbers the driver wants back: if the device has the VIRTIO_BALLOON_F_MUST_TELL_HOST bit, then this queue is compulsory, otherwise it's advisory (and the guest can simply fault the pages back in). This driver can be enhanced later to deflate the balloon via a shrinker, oom callback or we could even go for a complete set of in-guest regulators. Signed-off-by: Rusty Russell --- drivers/virtio/Kconfig | 10 ++ drivers/virtio/Makefile | 1 + drivers/virtio/virtio_balloon.c | 284 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 295 insertions(+) create mode 100644 drivers/virtio/virtio_balloon.c (limited to 'drivers/virtio') diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index 833db2f36e9..3dd6294d10b 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -23,3 +23,13 @@ config VIRTIO_PCI If unsure, say M. +config VIRTIO_BALLOON + tristate "Virtio balloon driver (EXPERIMENTAL)" + select VIRTIO + select VIRTIO_RING + ---help--- + This driver supports increasing and decreasing the amount + of memory within a KVM guest. + + If unsure, say M. + diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index cc84999f305..6738c446c19 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_VIRTIO) += virtio.o obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o +obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c new file mode 100644 index 00000000000..622aece1acc --- /dev/null +++ b/drivers/virtio/virtio_balloon.c @@ -0,0 +1,284 @@ +/* Virtio balloon implementation, inspired by Dor Loar and Marcelo + * Tosatti's implementations. + * + * Copyright 2008 Rusty Russell IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +//#define DEBUG +#include +#include +#include +#include +#include + +struct virtio_balloon +{ + struct virtio_device *vdev; + struct virtqueue *inflate_vq, *deflate_vq; + + /* Where the ballooning thread waits for config to change. */ + wait_queue_head_t config_change; + + /* The thread servicing the balloon. */ + struct task_struct *thread; + + /* Waiting for host to ack the pages we released. */ + struct completion acked; + + /* Do we have to tell Host *before* we reuse pages? */ + bool tell_host_first; + + /* The pages we've told the Host we're not using. */ + unsigned int num_pages; + struct list_head pages; + + /* The array of pfns we tell the Host about. */ + unsigned int num_pfns; + u32 pfns[256]; +}; + +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; + +static void balloon_ack(struct virtqueue *vq) +{ + struct virtio_balloon *vb; + unsigned int len; + + vb = vq->vq_ops->get_buf(vq, &len); + if (vb) + complete(&vb->acked); +} + +static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq) +{ + struct scatterlist sg; + + sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns); + + init_completion(&vb->acked); + + /* We should always be able to add one buffer to an empty queue. */ + if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) != 0) + BUG(); + vq->vq_ops->kick(vq); + + /* When host has read buffer, this completes via balloon_ack */ + wait_for_completion(&vb->acked); +} + +static void fill_balloon(struct virtio_balloon *vb, size_t num) +{ + /* We can only do one array worth at a time. */ + num = min(num, ARRAY_SIZE(vb->pfns)); + + for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns++) { + struct page *page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY); + if (!page) { + if (printk_ratelimit()) + dev_printk(KERN_INFO, &vb->vdev->dev, + "Out of puff! Can't get %zu pages\n", + num); + /* Sleep for at least 1/5 of a second before retry. */ + msleep(200); + break; + } + vb->pfns[vb->num_pfns] = page_to_pfn(page); + totalram_pages--; + vb->num_pages++; + list_add(&page->lru, &vb->pages); + } + + /* Didn't get any? Oh well. */ + if (vb->num_pfns == 0) + return; + + tell_host(vb, vb->inflate_vq); +} + +static void release_pages_by_pfn(const u32 pfns[], unsigned int num) +{ + unsigned int i; + + for (i = 0; i < num; i++) { + __free_page(pfn_to_page(pfns[i])); + totalram_pages++; + } +} + +static void leak_balloon(struct virtio_balloon *vb, size_t num) +{ + struct page *page; + + /* We can only do one array worth at a time. */ + num = min(num, ARRAY_SIZE(vb->pfns)); + + for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns++) { + page = list_first_entry(&vb->pages, struct page, lru); + list_del(&page->lru); + vb->pfns[vb->num_pfns] = page_to_pfn(page); + vb->num_pages--; + } + + if (vb->tell_host_first) { + tell_host(vb, vb->deflate_vq); + release_pages_by_pfn(vb->pfns, vb->num_pfns); + } else { + release_pages_by_pfn(vb->pfns, vb->num_pfns); + tell_host(vb, vb->deflate_vq); + } +} + +static void virtballoon_changed(struct virtio_device *vdev) +{ + struct virtio_balloon *vb = vdev->priv; + + wake_up(&vb->config_change); +} + +static inline int towards_target(struct virtio_balloon *vb) +{ + u32 v; + __virtio_config_val(vb->vdev, + offsetof(struct virtio_balloon_config, num_pages), + &v); + return v - vb->num_pages; +} + +static void update_balloon_size(struct virtio_balloon *vb) +{ + __le32 actual = cpu_to_le32(vb->num_pages); + + vb->vdev->config->set(vb->vdev, + offsetof(struct virtio_balloon_config, actual), + &actual, sizeof(actual)); +} + +static int balloon(void *_vballoon) +{ + struct virtio_balloon *vb = _vballoon; + + set_freezable(); + while (!kthread_should_stop()) { + int diff; + + try_to_freeze(); + wait_event_interruptible(vb->config_change, + (diff = towards_target(vb)) != 0 + || kthread_should_stop()); + if (diff > 0) + fill_balloon(vb, diff); + else if (diff < 0) + leak_balloon(vb, -diff); + update_balloon_size(vb); + } + return 0; +} + +static int virtballoon_probe(struct virtio_device *vdev) +{ + struct virtio_balloon *vb; + int err; + + vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL); + if (!vb) { + err = -ENOMEM; + goto out; + } + + INIT_LIST_HEAD(&vb->pages); + vb->num_pages = 0; + init_waitqueue_head(&vb->config_change); + vb->vdev = vdev; + + /* We expect two virtqueues. */ + vb->inflate_vq = vdev->config->find_vq(vdev, 0, balloon_ack); + if (IS_ERR(vb->inflate_vq)) { + err = PTR_ERR(vb->inflate_vq); + goto out_free_vb; + } + + vb->deflate_vq = vdev->config->find_vq(vdev, 1, balloon_ack); + if (IS_ERR(vb->deflate_vq)) { + err = PTR_ERR(vb->deflate_vq); + goto out_del_inflate_vq; + } + + vb->thread = kthread_run(balloon, vb, "vballoon"); + if (IS_ERR(vb->thread)) { + err = PTR_ERR(vb->thread); + goto out_del_deflate_vq; + } + + vb->tell_host_first + = vdev->config->feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); + + return 0; + +out_del_deflate_vq: + vdev->config->del_vq(vb->deflate_vq); +out_del_inflate_vq: + vdev->config->del_vq(vb->inflate_vq); +out_free_vb: + kfree(vb); +out: + return err; +} + +static void virtballoon_remove(struct virtio_device *vdev) +{ + struct virtio_balloon *vb = vdev->priv; + + kthread_stop(vb->thread); + + /* There might be pages left in the balloon: free them. */ + while (vb->num_pages) + leak_balloon(vb, vb->num_pages); + + /* Now we reset the device so we can clean up the queues. */ + vdev->config->reset(vdev); + + vdev->config->del_vq(vb->deflate_vq); + vdev->config->del_vq(vb->inflate_vq); + kfree(vb); +} + +static struct virtio_driver virtio_balloon = { + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = virtballoon_probe, + .remove = __devexit_p(virtballoon_remove), + .config_changed = virtballoon_changed, +}; + +static int __init init(void) +{ + return register_virtio_driver(&virtio_balloon); +} + +static void __exit fini(void) +{ + unregister_virtio_driver(&virtio_balloon); +} +module_init(init); +module_exit(fini); + +MODULE_DEVICE_TABLE(virtio, id_table); +MODULE_DESCRIPTION("Virtio balloon driver"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3